This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, RFC, rs6000] enable GIMPLE folding of vec_splat


On Tue, Aug 7, 2018 at 9:25 PM Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
>
> Hi
> Enable GIMPLE folding of the vec_splat() intrinsic.
>
> For review.. feedback is expected. :-)
>
> I came up with the following after spending some time poking around
> at the tree_vec_extract() and vector_element() functions as seen
> in tree-vect-generic.c looking for insights.  Some of this seems a
> bit clunky to me yet, but this is functional as far as make-check
> can tell, and is as far as I can get without additional input.
>
> This uses the tree_vec_extract() function out of tree-vect-generic.c
> to retrieve the splat value, which is a BIT_FIELD_REF.   That function is
> made non-static as part of this change.
>
> In review of the .gimple output, this folding takes a sample testcase
> of
>         vector bool int testb_0  (vector bool int x)
>         {
>           return vec_splat (x, 0b00000);
>         }
> from:
> testb_0 (__vector __bool int x)
> {
>   __vector __bool intD.1486 D.2855;
>
>   _1 = VIEW_CONVERT_EXPR<__vector signed intD.1468>(xD.2778);
>   _2 = __builtin_altivec_vspltwD.1919 (_1, 0);
>   D.2855 = VIEW_CONVERT_EXPR<__vector __bool intD.1486>(_2);
>   return D.2855;
> }
> to:
>  testb_0 (__vector __bool int x)
> {
>   __vector __bool intD.1486 D.2855;
>
>   _1 = VIEW_CONVERT_EXPR<__vector signed intD.1468>(xD.2778);
>   D.2856 = BIT_FIELD_REF <_1, 32, 0>;
>   _2 = {D.2856, D.2856, D.2856, D.2856};
>   D.2855 = VIEW_CONVERT_EXPR<__vector __bool intD.1486>(_2);
>   return D.2855;
> }
>
>
> Testcases are being posted as a separate patch.
>
> OK for trunk?

It certainly works, nowadays we have VEC_DUPLICATE_EXPR though
so you could simply do

 _1 = VIEW_CONVERT_EXPR<__vector signed intD.1468>(xD.2778);
 D.2856 = BIT_FIELD_REF <_1, 32, 0>;
 D.2855 = VEC_DUPLICATE_EXPR <__vector __bool intD.1486> (D.2856);

not sure what variant is better for followup optimizations and whether
we already fold that { D.2856, D.2856, D.2856, D.2856 }; to VEC_DUPLICATE_EXPR.

Richard may know more.

Richard.

> Thanks,
> -Will
>
> [gcc]
>
> 2018-08-07  Will Schmidt  <will_schmidt@vnet.ibm.com>
>
>         * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
>           early gimple folding of vec_splat().
>         * tree-vect-generic.c: Remove static from tree_vec_extract() definition.
>         * gimple-fold.h:  Add an extern define for tree_vec_extract().
>
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 35c32be..acc6b49 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -15764,10 +15764,56 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>          tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value);
>          g = gimple_build_assign (lhs, splat_tree);
>          gimple_set_location (g, gimple_location (stmt));
>          gsi_replace (gsi, g, true);
>          return true;
> +       }
> +
> +    /* Flavors of vec_splat.  */
> +    // a = vec_splat (b, 0x3) ;  // a = { b[3],b[3],b[3],...};
> +    case ALTIVEC_BUILTIN_VSPLTB:
> +    case ALTIVEC_BUILTIN_VSPLTH:
> +    case ALTIVEC_BUILTIN_VSPLTW:
> +    case VSX_BUILTIN_XXSPLTD_V2DI:
> +    case VSX_BUILTIN_XXSPLTD_V2DF:
> +      {
> +       arg0 = gimple_call_arg (stmt, 0); /* input vector.  */
> +       arg1 = gimple_call_arg (stmt, 1); /* index into arg0.  */
> +       /* Only fold the vec_splat_*() if arg1 is a constant
> +          5-bit unsigned literal.  */
> +       if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 0x1f)
> +         return false;
> +
> +       lhs = gimple_call_lhs (stmt);
> +       tree lhs_type = TREE_TYPE (lhs);
> +
> +       tree splat;
> +       if (TREE_CODE (arg0) == VECTOR_CST)
> +         splat = VECTOR_CST_ELT (arg0, TREE_INT_CST_LOW (arg1));
> +       else
> +         {
> +           /* Determine (in bits) the length and start location of the
> +              splat value for a call to the tree_vec_extract helper.  */
> +           int tree_size_in_bits = TREE_INT_CST_LOW (size_in_bytes (lhs_type))
> +                                   * BITS_PER_UNIT;
> +           int splat_elem_size = tree_size_in_bits / VECTOR_CST_NELTS (arg0);
> +           int splat_start_bit = TREE_INT_CST_LOW (arg1) * splat_elem_size;
> +           /* Do not attempt to early-fold if the size + specified offset into
> +              the vector would touch outside of the source vector.  */
> +           if ((splat_start_bit + splat_elem_size) > tree_size_in_bits)
> +             return false;
> +           tree len = build_int_cst (bitsizetype, splat_elem_size);
> +           tree start = build_int_cst (bitsizetype, splat_start_bit);
> +           splat = tree_vec_extract (gsi, TREE_TYPE (lhs_type), arg0,
> +                                     len, start);
> +       }
> +       /* And finally, build the new vector.  */
> +       tree splat_tree = build_vector_from_val (lhs_type, splat);
> +       g = gimple_build_assign (lhs, splat_tree);
> +       gimple_set_location (g, gimple_location (stmt));
> +       gsi_replace (gsi, g, true);
> +       return true;
>        }
>
>      /* vec_mergel (integrals).  */
>      case ALTIVEC_BUILTIN_VMRGLH:
>      case ALTIVEC_BUILTIN_VMRGLW:
> diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h
> index 04e9bfa..e634180 100644
> --- a/gcc/gimple-fold.h
> +++ b/gcc/gimple-fold.h
> @@ -59,10 +59,11 @@ extern tree gimple_fold_indirect_ref (tree);
>  extern bool gimple_fold_builtin_sprintf (gimple_stmt_iterator *);
>  extern bool gimple_fold_builtin_snprintf (gimple_stmt_iterator *);
>  extern bool arith_code_with_undefined_signed_overflow (tree_code);
>  extern gimple_seq rewrite_to_defined_overflow (gimple *);
>  extern void replace_call_with_value (gimple_stmt_iterator *, tree);
> +extern tree tree_vec_extract (gimple_stmt_iterator *, tree, tree, tree, tree);
>
>  /* gimple_build, functionally matching fold_buildN, outputs stmts
>     int the provided sequence, matching and simplifying them on-the-fly.
>     Supposed to replace force_gimple_operand (fold_buildN (...), ...).  */
>  extern tree gimple_build (gimple_seq *, location_t,
> diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
> index 909f790..1c9701d 100644
> --- a/gcc/tree-vect-generic.c
> +++ b/gcc/tree-vect-generic.c
> @@ -118,11 +118,11 @@ build_word_mode_vector_type (int nunits)
>
>  typedef tree (*elem_op_func) (gimple_stmt_iterator *,
>                               tree, tree, tree, tree, tree, enum tree_code,
>                               tree);
>
> -static inline tree
> +tree
>  tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
>                   tree t, tree bitsize, tree bitpos)
>  {
>    if (TREE_CODE (t) == SSA_NAME)
>      {
>
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]