This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: PR68577: Handle narrowing for vector popcount, etc.


On Tue, Dec 1, 2015 at 10:14 AM, Richard Sandiford
<richard.sandiford@arm.com> wrote:
> This patch adds support for simple cases where the a vector internal
> function returns wider results than the scalar equivalent.  It punts
> on other cases.
>
> Tested on powerpc64-linux-gnu and x86_64-linux-gnu.  OK to install?
>
> Thanks,
> Richard
>
>
> gcc/
>         PR tree-optimization/68577
>         * tree-vect-stmts.c (simple_integer_narrowing): New function.
>         (vectorizable_call): Restrict internal function handling
>         to NONE and NARROW cases, using simple_integer_narrowing
>         to test for the latter.  Add cost of narrowing operation
>         and insert it where necessary.
>
> gcc/testsuite/
>         PR tree-optimization/68577
>         * gcc.dg/vect/pr68577.c: New test.
>
> diff --git a/gcc/testsuite/gcc.dg/vect/pr68577.c b/gcc/testsuite/gcc.dg/vect/pr68577.c
> new file mode 100644
> index 0000000..999c1c8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr68577.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +
> +int a, b;
> +
> +void
> +__sched_cpucount (void)
> +{
> +  while (b)
> +    {
> +      long l = b++;
> +      a += __builtin_popcountl(l);
> +    }
> +}
> +
> +void
> +slp_test (int *x, long *y)
> +{
> +  for (int i = 0; i < 512; i += 4)
> +    {
> +      x[i] = __builtin_popcountl(y[i]);
> +      x[i + 1] = __builtin_popcountl(y[i + 1]);
> +      x[i + 2] = __builtin_popcountl(y[i + 2]);
> +      x[i + 3] = __builtin_popcountl(y[i + 3]);
> +    }
> +}
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 3b078da..af86bce 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -2122,6 +2122,40 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
>    return true;
>  }
>
> +/* Return true if vector type VECTYPE_OUT has integer elements and
> +   if we can narrow two integer vectors with the same shape as
> +   VECTYPE_IN to VECTYPE_OUT in a single step.  On success,
> +   return the binary pack code in *CONVERT_CODE and the types
> +   of the input vectors in *CONVERT_FROM.  */
> +
> +static bool
> +simple_integer_narrowing (tree vectype_out, tree vectype_in,
> +                         tree_code *convert_code, tree *convert_from)
> +{
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out)))
> +    return false;
> +
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
> +    {
> +      unsigned int bits
> +       = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype_in)));
> +      tree scalar_type = build_nonstandard_integer_type (bits, 0);
> +      vectype_in = get_same_sized_vectype (scalar_type, vectype_in);
> +    }
> +

any reason for supporting non-integer types on the input?  It seems to me
you are doing this for the lrint case?  If so isn't the "question" wrong and
you should pass the integer type the IFN returns as vectype_in instead?

That said, this conversion doesn't seem to belong to simple_integer_narrowing.

The patch is ok with simply removing it.

Thanks,
Richard.

> +  tree_code code;
> +  int multi_step_cvt = 0;
> +  auto_vec <tree, 8> interm_types;
> +  if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
> +                                       &code, &multi_step_cvt,
> +                                       &interm_types)
> +      || multi_step_cvt)
> +    return false;
> +
> +  *convert_code = code;
> +  *convert_from = vectype_in;
> +  return true;
> +}
>
>  /* Function vectorizable_call.
>
> @@ -2288,7 +2322,13 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>    tree callee = gimple_call_fndecl (stmt);
>
>    /* First try using an internal function.  */
> -  if (cfn != CFN_LAST)
> +  tree_code convert_code = ERROR_MARK;
> +  tree convert_from = NULL_TREE;
> +  if (cfn != CFN_LAST
> +      && (modifier == NONE
> +         || (modifier == NARROW
> +             && simple_integer_narrowing (vectype_out, vectype_in,
> +                                          &convert_code, &convert_from))))
>      ifn = vectorizable_internal_function (cfn, callee, vectype_out,
>                                           vectype_in);
>
> @@ -2328,7 +2368,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>
>    if (slp_node || PURE_SLP_STMT (stmt_info))
>      ncopies = 1;
> -  else if (modifier == NARROW)
> +  else if (modifier == NARROW && ifn == IFN_LAST)
>      ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
>    else
>      ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
> @@ -2344,6 +2384,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>          dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
>                           "\n");
>        vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
> +      if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
> +       add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
> +                      vec_promote_demote, stmt_info, 0, vect_body);
> +
>        return true;
>      }
>
> @@ -2357,9 +2401,9 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>    vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
>
>    prev_stmt_info = NULL;
> -  switch (modifier)
> +  if (modifier == NONE || ifn != IFN_LAST)
>      {
> -    case NONE:
> +      tree prev_res = NULL_TREE;
>        for (j = 0; j < ncopies; ++j)
>         {
>           /* Build argument list for the vectorized call.  */
> @@ -2387,12 +2431,30 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>                       vec<tree> vec_oprndsk = vec_defs[k];
>                       vargs[k] = vec_oprndsk[i];
>                     }
> -                 if (ifn != IFN_LAST)
> -                   new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +                 if (modifier == NARROW)
> +                   {
> +                     tree half_res = make_ssa_name (convert_from);
> +                     new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +                     gimple_call_set_lhs (new_stmt, half_res);
> +                     vect_finish_stmt_generation (stmt, new_stmt, gsi);
> +                     if ((i & 1) == 0)
> +                       {
> +                         prev_res = half_res;
> +                         continue;
> +                       }
> +                     new_temp = make_ssa_name (vec_dest);
> +                     new_stmt = gimple_build_assign (new_temp, convert_code,
> +                                                     prev_res, half_res);
> +                   }
>                   else
> -                   new_stmt = gimple_build_call_vec (fndecl, vargs);
> -                 new_temp = make_ssa_name (vec_dest, new_stmt);
> -                 gimple_call_set_lhs (new_stmt, new_temp);
> +                   {
> +                     if (ifn != IFN_LAST)
> +                       new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +                     else
> +                       new_stmt = gimple_build_call_vec (fndecl, vargs);
> +                     new_temp = make_ssa_name (vec_dest, new_stmt);
> +                     gimple_call_set_lhs (new_stmt, new_temp);
> +                   }
>                   vect_finish_stmt_generation (stmt, new_stmt, gsi);
>                   SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
>                 }
> @@ -2436,6 +2498,21 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>               new_temp = make_ssa_name (vec_dest);
>               new_stmt = gimple_build_assign (new_temp, new_var);
>             }
> +         else if (modifier == NARROW)
> +           {
> +             tree half_res = make_ssa_name (convert_from);
> +             new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> +             gimple_call_set_lhs (new_stmt, half_res);
> +             vect_finish_stmt_generation (stmt, new_stmt, gsi);
> +             if ((j & 1) == 0)
> +               {
> +                 prev_res = half_res;
> +                 continue;
> +               }
> +             new_temp = make_ssa_name (vec_dest);
> +             new_stmt = gimple_build_assign (new_temp, convert_code,
> +                                             prev_res, half_res);
> +           }
>           else
>             {
>               if (ifn != IFN_LAST)
> @@ -2447,17 +2524,16 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>             }
>           vect_finish_stmt_generation (stmt, new_stmt, gsi);
>
> -         if (j == 0)
> +         if (j == (modifier == NARROW ? 1 : 0))
>             STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
>           else
>             STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
>
>           prev_stmt_info = vinfo_for_stmt (new_stmt);
>         }
> -
> -      break;
> -
> -    case NARROW:
> +    }
> +  else if (modifier == NARROW)
> +    {
>        for (j = 0; j < ncopies; ++j)
>         {
>           /* Build argument list for the vectorized call.  */
> @@ -2528,10 +2604,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>               vargs.quick_push (vec_oprnd1);
>             }
>
> -         if (ifn != IFN_LAST)
> -           new_stmt = gimple_build_call_internal_vec (ifn, vargs);
> -         else
> -           new_stmt = gimple_build_call_vec (fndecl, vargs);
> +         new_stmt = gimple_build_call_vec (fndecl, vargs);
>           new_temp = make_ssa_name (vec_dest, new_stmt);
>           gimple_call_set_lhs (new_stmt, new_temp);
>           vect_finish_stmt_generation (stmt, new_stmt, gsi);
> @@ -2545,13 +2618,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
>         }
>
>        *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
> -
> -      break;
> -
> -    case WIDEN:
> -      /* No current target implements this case.  */
> -      return false;
>      }
> +  else
> +    /* No current target implements this case.  */
> +    return false;
>
>    vargs.release ();
>
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]