[PATCH] vect: Missed opportunity to use [SU]ABD

Wed May 10 09:01:20 GMT 2023

Oluwatamilore Adebayo <Oluwatamilore.Adebayo@arm.com> writes:
> From 0b5f469171c340ef61a48a31877d495bb77bd35f Mon Sep 17 00:00:00 2001
> From: oluade01 <oluwatamilore.adebayo@arm.com>
> Date: Fri, 14 Apr 2023 10:24:43 +0100
> Subject: [PATCH 1/4] Missed opportunity to use [SU]ABD
>
> This adds a recognition pattern for the non-widening
> absolute difference (ABD).
>
> gcc/ChangeLog:
>
>         * doc/md.texi (sabd, uabd): Document them.
>         * internal-fn.def (ABD): Use new optab.
>         * optabs.def (sabd_optab, uabd_optab): New optabs,
>         * tree-vect-patterns.cc (vect_recog_absolute_difference):
>         Recognize the following idiom abs (a - b).
>         (vect_recog_sad_pattern): Refactor to use
>         vect_recog_absolute_difference.
>         (vect_recog_abd_pattern): Use patterns found by
>         vect_recog_absolute_difference to build a new ABD
>         internal call.
> ---
>  gcc/doc/md.texi           |  10 ++
>  gcc/internal-fn.def       |   3 +
>  gcc/optabs.def            |   2 +
>  gcc/tree-vect-patterns.cc | 250 +++++++++++++++++++++++++++++++++-----
>  4 files changed, 234 insertions(+), 31 deletions(-)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..0ad546c63a8deebb4b6db894f437d1e21f0245a8 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5778,6 +5778,16 @@ Other shift and rotate instructions, analogous to the
>  Vector shift and rotate instructions that take vectors as operand 2
>  instead of a scalar type.
>
> +@cindex @code{uabd@var{m}} instruction pattern
> +@cindex @code{sabd@var{m}} instruction pattern
> +@item @samp{uabd@var{m}}, @samp{sabd@var{m}}
> +Signed and unsigned absolute difference instructions.  These
> +instructions find the difference between operands 1 and 2
> +then return the absolute value.  A C code equivalent would be:
> +@smallexample
> +op0 = abs (op0 - op1)

op0 = abs (op1 - op2)

But that isn't the correct calculation for unsigned (where abs doesn't
really work).  It also doesn't handle some cases correctly for signed.

I think it's more:

  op0 = op1 > op2 ? (unsigned type) op1 - op2 : (unsigned type) op2 - op1

or (conceptually) max minus min.

E.g. for 16-bit values, the absolute difference between signed 0x7fff
and signed -0x8000 is 0xffff (reinterpreted as -1 if you cast back
to signed).  But, ignoring undefined behaviour:

  0x7fff - 0x8000 = -1
  abs(-1) = 1

which gives the wrong answer.

We might still be able to fold C abs(a - b) to abd for signed a and b
by relying on undefined behaviour (TYPE_OVERFLOW_UNDEFINED).  But we
can't do it for -fwrapv.

Richi knows better than me what would be appropriate here.

Thanks,
Richard

> +@end smallexample
> +
>  @cindex @code{avg@var{m}3_floor} instruction pattern
>  @cindex @code{uavg@var{m}3_floor} instruction pattern
>  @item @samp{avg@var{m}3_floor}
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..0f1724ecf37a31c231572edf90b5577e2d82f468 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -167,6 +167,9 @@ DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary)
>  DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary)
>  DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary)
>
> +DEF_INTERNAL_SIGNED_OPTAB_FN (ABD, ECF_CONST | ECF_NOTHROW, first,
> +                             sabd, uabd, binary)
> +
>  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first,
>                               savg_floor, uavg_floor, binary)
>  DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first,
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 695f5911b300c9ca5737de9be809fa01aabe5e01..29bc92281a2175f898634cbe6af63c18021e5268 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -359,6 +359,8 @@ OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
>  OPTAB_D (extract_last_optab, "extract_last_$a")
>  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
>
> +OPTAB_D (uabd_optab, "uabd$a3")
> +OPTAB_D (sabd_optab, "sabd$a3")
>  OPTAB_D (savg_floor_optab, "avg$a3_floor")
>  OPTAB_D (uavg_floor_optab, "uavg$a3_floor")
>  OPTAB_D (savg_ceil_optab, "avg$a3_ceil")
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index a49b09539776c0056e77f99b10365d0a8747fbc5..91e1f9d4b610275dd833ec56dc77f76367ee7886 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -770,6 +770,89 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
>      }
>  }
>
> +/* Look for the following pattern
> +       X = x[i]
> +       Y = y[i]
> +       DIFF = X - Y
> +       DAD = ABS_EXPR<DIFF>
> + */
> +static bool
> +vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
> +                               tree *half_type, bool reject_unsigned,
> +                               vect_unpromoted_value unprom[2],
> +                               tree diff_oprnds[2])
> +{
> +  if (!abs_stmt)
> +    return false;
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  enum tree_code code = gimple_assign_rhs_code (abs_stmt);
> +  if (code != ABS_EXPR && code != ABSU_EXPR)
> +    return false;
> +
> +  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> +  tree abs_type = TREE_TYPE (abs_oprnd);
> +  if (!abs_oprnd)
> +    return false;
> +  if (reject_unsigned && TYPE_UNSIGNED (abs_type))
> +    return false;
> +  if (!ANY_INTEGRAL_TYPE_P (abs_type) || TYPE_OVERFLOW_WRAPS (abs_type))
> +    return false;
> +
> +  /* Peel off conversions from the ABS input.  This can involve sign
> +     changes (e.g.  from an unsigned subtraction to a signed ABS input)
> +     or signed promotion, but it can't include unsigned promotion.
> +     (Note that ABS of an unsigned promotion should have been folded
> +     away before now anyway.)  */
> +  vect_unpromoted_value unprom_diff;
> +  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> +                                                   &unprom_diff);
> +  if (!abs_oprnd)
> +    return false;
> +  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> +      && TYPE_UNSIGNED (unprom_diff.type))
> +    if (!reject_unsigned)
> +      return false;
> +
> +  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> +  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> +  if (!diff_stmt_vinfo)
> +    return false;
> +
> +  bool assigned_oprnds = false;
> +  gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
> +  if (diff_oprnds && diff && gimple_assign_rhs_code (diff) == MINUS_EXPR)
> +  {
> +    assigned_oprnds = true;
> +    diff_oprnds[0] = gimple_assign_rhs1 (diff);
> +    diff_oprnds[1] = gimple_assign_rhs2 (diff);
> +  }
> +
> +  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> +     inside the loop (in case we are analyzing an outer-loop).  */
> +  if (vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR,
> +                            WIDEN_MINUS_EXPR,
> +                            false, 2, unprom, half_type))
> +  {
> +    if (diff_oprnds && !assigned_oprnds)
> +    {
> +      diff_oprnds[0] = unprom[0].op;
> +      diff_oprnds[1] = unprom[1].op;
> +    }
> +  }
> +  else if (!assigned_oprnds)
> +  {
> +    return false;
> +  }
> +  else
> +  {
> +    *half_type = NULL_TREE;
> +  }
> +
> +  return true;
> +}
> +
>  /* Convert UNPROM to TYPE and return the result, adding new statements
>     to STMT_INFO's pattern definition statements if no better way is
>     available.  VECTYPE is the vector form of TYPE.
> @@ -1308,40 +1391,13 @@ vect_recog_sad_pattern (vec_info *vinfo,
>    /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
>       inside the loop (in case we are analyzing an outer-loop).  */
>    gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
> -  if (!abs_stmt
> -      || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
> -         && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
> -    return NULL;
> -
> -  tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
> -  tree abs_type = TREE_TYPE (abs_oprnd);
> -  if (TYPE_UNSIGNED (abs_type))
> -    return NULL;
> -
> -  /* Peel off conversions from the ABS input.  This can involve sign
> -     changes (e.g. from an unsigned subtraction to a signed ABS input)
> -     or signed promotion, but it can't include unsigned promotion.
> -     (Note that ABS of an unsigned promotion should have been folded
> -     away before now anyway.)  */
> -  vect_unpromoted_value unprom_diff;
> -  abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
> -                                                   &unprom_diff);
> -  if (!abs_oprnd)
> -    return NULL;
> -  if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
> -      && TYPE_UNSIGNED (unprom_diff.type))
> -    return NULL;
>
> -  /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
> -  stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
> -  if (!diff_stmt_vinfo)
> +  vect_unpromoted_value unprom[2];
> +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> +                                      true, unprom, NULL))
>      return NULL;
>
> -  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
> -     inside the loop (in case we are analyzing an outer-loop).  */
> -  vect_unpromoted_value unprom[2];
> -  if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
> -                            false, 2, unprom, &half_type))
> +  if (!half_type)
>      return NULL;
>
>    vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
> @@ -1363,6 +1419,137 @@ vect_recog_sad_pattern (vec_info *vinfo,
>    return pattern_stmt;
>  }
>
> +/* Function vect_recog_abd_pattern
> +
> +   Try to find the following ABsolute Difference (ABD) pattern:
> +
> +     VTYPE x, y, out;
> +     type diff;
> +   loop i in range:
> +     S1 diff = x[i] - y[i]
> +     S2 out[i] = ABS_EXPR <diff>;
> +
> +   where 'type' is a integer and 'VTYPE' is a vector of integers
> +   the same size as 'type'
> +
> +   Input:
> +
> +   * STMT_VINFO: The stmt from which the pattern search begins
> +
> +   Output:
> +
> +   * TYPE_out: The type of the output of this pattern
> +
> +   * Return value: A new stmt that will be used to replace the sequence of
> +     stmts that constitute the pattern; either SABD or UABD:
> +       SABD_EXPR<x, y, out>
> +       UABD_EXPR<x, y, out>
> +
> +      UABD expressions are used when the input types are
> +      narrower than the output types or the output type is narrower
> +      than 32 bits
> + */
> +
> +static gimple *
> +vect_recog_abd_pattern (vec_info *vinfo,
> +               stmt_vec_info stmt_vinfo, tree *type_out)
> +{
> +  /* Look for the following patterns
> +       X = x[i]
> +       Y = y[i]
> +       DIFF = X - Y
> +       DAD = ABS_EXPR<DIFF>
> +       out[i] = DAD
> +
> +     In which
> +      - X, Y, DIFF, DAD all have the same type
> +      - x, y, out are all vectors of the same type
> +  */
> +  gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> +  if (!last_stmt)
> +    return NULL;
> +
> +  tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> +
> +  gassign *abs_stmt = last_stmt;
> +  if (gimple_assign_cast_p (last_stmt))
> +  {
> +    tree last_rhs = gimple_assign_rhs1 (last_stmt);
> +    if (!SSA_VAR_P (last_rhs))
> +      return NULL;
> +
> +    abs_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (last_rhs));
> +    if (!abs_stmt)
> +      return NULL;
> +  }
> +
> +  vect_unpromoted_value unprom[2];
> +  tree diff_oprnds[2];
> +  tree half_type;
> +  if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
> +                                      false, unprom, diff_oprnds))
> +    return NULL;
> +
> +#define SAME_TYPE(A, B) (TYPE_PRECISION (A) == TYPE_PRECISION (B))
> +
> +  tree abd_oprnds[2];
> +  if (half_type)
> +  {
> +    if (!SAME_TYPE (unprom[0].type, unprom[1].type))
> +      return NULL;
> +
> +    tree diff_type = TREE_TYPE (diff_oprnds[0]);
> +    if (TYPE_PRECISION (out_type) != TYPE_PRECISION (diff_type))
> +    {
> +      vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds, half_type, unprom,
> +                          get_vectype_for_scalar_type (vinfo, half_type));
> +    }
> +    else
> +    {
> +      abd_oprnds[0] = diff_oprnds[0];
> +      abd_oprnds[1] = diff_oprnds[1];
> +    }
> +  }
> +  else
> +  {
> +    if (unprom[0].op && unprom[1].op
> +       && (!SAME_TYPE (unprom[0].type, unprom[1].type)
> +       || !SAME_TYPE (unprom[0].type, out_type)))
> +      return NULL;
> +
> +    unprom[0].op = diff_oprnds[0];
> +    unprom[1].op = diff_oprnds[1];
> +    tree signed_out = signed_type_for (out_type);
> +    tree signed_out_vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> +    vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> +                        signed_out, unprom, signed_out_vectype);
> +
> +    if (!SAME_TYPE (TREE_TYPE (diff_oprnds[0]), TREE_TYPE (abd_oprnds[0])))
> +      return NULL;
> +  }
> +
> +  if (!SAME_TYPE (TREE_TYPE (abd_oprnds[0]), TREE_TYPE (abd_oprnds[1]))
> +      || !SAME_TYPE (TREE_TYPE (abd_oprnds[0]), out_type))
> +    return NULL;
> +
> +  vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> +
> +  tree vectype = get_vectype_for_scalar_type (vinfo, out_type);
> +  if (!vectype
> +      || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> +                                         OPTIMIZE_FOR_SPEED))
> +    return NULL;
> +
> +  *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
> +
> +  tree var = vect_recog_temp_ssa_var (out_type, NULL);
> +  gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> +                                               abd_oprnds[0], abd_oprnds[1]);
> +  gimple_call_set_lhs (abd_stmt, var);
> +  gimple_set_location (abd_stmt, gimple_location (last_stmt));
> +  return abd_stmt;
> +}
> +
>  /* Recognize an operation that performs ORIG_CODE on widened inputs,
>     so that it can be treated as though it had the form:
>
> @@ -6439,6 +6626,7 @@ struct vect_recog_func
>  static vect_recog_func vect_vect_recog_func_ptrs[] = {
>    { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
>    { vect_recog_bit_insert_pattern, "bit_insert" },
> +  { vect_recog_abd_pattern, "abd" },
>    { vect_recog_over_widening_pattern, "over_widening" },
>    /* Must come after over_widening, which narrows the shift as much as
>       possible beforehand.  */
> --
> 2.25.1