This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: SLP for vectors


On Sun, Jan 27, 2013 at 4:28 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> Hello,
>
> this message is to check that I am not doing something absurd and ask for a
> bit of advice.
>
> In the attached patch, I let SLP recognize vector loads/stores just like it
> recognizes those in an array. It has a few issues: the cost of the
> vectorized version is overestimated, the base object is wrong (doesn't strip
> the bit_field_ref, but since the base address is right and the base object
> is almost unused...), but most importantly it only works if the vectors have
> their address taken, otherwise (after asking gimple_vuse?) SLP doesn't
> detect their use as loads or stores at all.

Yes, if they have not their address taken they are not loads.

> Also, it only works if you write
> result[0]=..., result[1]=... and does not recognize a constructor as a
> series of stores.
>
> Is slowly extending SLP the right way to go? Should get_references_in_stmt
> report vector element accesses?

It does if it is a memory access.

You didn't provide a new testcase so it's hard for me to guess what you
are trying to do.  I suppose you want to vectorize

   w[0] = v[0] + v[0];
   w[1] = v[1] + v[1];

into

   w = v + v;

As it would work if w and v are array accesses instead of vector subscripts?
Note that similar issues (and bugreports) exist for complex component accesses.

So yes, handling BIT_FIELD_REF in the vectorizer looks like the correct
way to do - but mind that you should constrain the BIT_FIELD_REFs you
allow (I suppose in the end that's properly done by other part of the analysis).

As of handling non-memory BIT_FIELD_REFs - I suggest to split out
the test of whether a stmt is considered a "load" for the purpose of
vectorization and simply special-case this therein, not requiring a VUSE.

I suppose the data-ref analysis parts are not strictly required, nor
the get_addr_base_and_unit_offset_1 parts?  They should be split out
and separately tested anyway.  The data-ref part at least will confuse
analysis of 'a[0]' or 'a[1]' vs. 'a', but I suppose independent of the patch.

Richard.

> (the patch as is passes the testsuite on x86_64-linux and vectorizes the few
> examples I tried)
>
> --
> Marc Glisse
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c       (revision 195493)
> +++ gcc/tree-vect-stmts.c       (working copy)
> @@ -3860,20 +3860,21 @@ vectorizable_store (gimple stmt, gimple_
>    /* Is vectorizable store? */
>
>    if (!is_gimple_assign (stmt))
>      return false;
>
>    scalar_dest = gimple_assign_lhs (stmt);
>    if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
>        && is_pattern_stmt_p (stmt_info))
>      scalar_dest = TREE_OPERAND (scalar_dest, 0);
>    if (TREE_CODE (scalar_dest) != ARRAY_REF
> +      && TREE_CODE (scalar_dest) != BIT_FIELD_REF
>        && TREE_CODE (scalar_dest) != INDIRECT_REF
>        && TREE_CODE (scalar_dest) != COMPONENT_REF
>        && TREE_CODE (scalar_dest) != IMAGPART_EXPR
>        && TREE_CODE (scalar_dest) != REALPART_EXPR
>        && TREE_CODE (scalar_dest) != MEM_REF)
>      return false;
>
>    gcc_assert (gimple_assign_single_p (stmt));
>    op = gimple_assign_rhs1 (stmt);
>    if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
> @@ -4394,20 +4395,21 @@ vectorizable_load (gimple stmt, gimple_s
>    /* Is vectorizable load? */
>    if (!is_gimple_assign (stmt))
>      return false;
>
>    scalar_dest = gimple_assign_lhs (stmt);
>    if (TREE_CODE (scalar_dest) != SSA_NAME)
>      return false;
>
>    code = gimple_assign_rhs_code (stmt);
>    if (code != ARRAY_REF
> +      && code != BIT_FIELD_REF
>        && code != INDIRECT_REF
>        && code != COMPONENT_REF
>        && code != IMAGPART_EXPR
>        && code != REALPART_EXPR
>        && code != MEM_REF
>        && TREE_CODE_CLASS (code) != tcc_declaration)
>      return false;
>
>    if (!STMT_VINFO_DATA_REF (stmt_info))
>      return false;
> Index: gcc/tree-vect-slp.c
> ===================================================================
> --- gcc/tree-vect-slp.c (revision 195493)
> +++ gcc/tree-vect-slp.c (working copy)
> @@ -660,20 +660,21 @@ vect_build_slp_tree (loop_vec_info loop_
>         }
>        else
>         {
>           if (first_stmt_code != rhs_code
>               && (first_stmt_code != IMAGPART_EXPR
>                   || rhs_code != REALPART_EXPR)
>               && (first_stmt_code != REALPART_EXPR
>                   || rhs_code != IMAGPART_EXPR)
>                && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
>                     && (first_stmt_code == ARRAY_REF
> +                       || first_stmt_code == BIT_FIELD_REF
>                         || first_stmt_code == INDIRECT_REF
>                         || first_stmt_code == COMPONENT_REF
>                         || first_stmt_code == MEM_REF)))
>             {
>               if (dump_enabled_p ())
>                 {
>                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>                                    "Build SLP failed: different operation "
>                                    "in stmt ");
>                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
> 0);
> Index: gcc/tree-data-ref.c
> ===================================================================
> --- gcc/tree-data-ref.c (revision 195493)
> +++ gcc/tree-data-ref.c (working copy)
> @@ -854,20 +854,24 @@ dr_analyze_indices (struct data_referenc
>    /* Analyze access functions of dimensions we know to be independent.  */
>    while (handled_component_p (ref))
>      {
>        if (TREE_CODE (ref) == ARRAY_REF)
>         {
>           op = TREE_OPERAND (ref, 1);
>           access_fn = analyze_scalar_evolution (loop, op);
>           access_fn = instantiate_scev (before_loop, loop, access_fn);
>           access_fns.safe_push (access_fn);
>         }
> +      else if (TREE_CODE (ref) == BIT_FIELD_REF)
> +       {
> +         access_fns.safe_push (TREE_OPERAND (ref, 2));
> +       }
>        else if (TREE_CODE (ref) == COMPONENT_REF
>                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) ==
> RECORD_TYPE)
>         {
>           /* For COMPONENT_REFs of records (but not unions!) use the
>              FIELD_DECL offset as constant access function so we can
>              disambiguate a[i].f1 and a[i].f2.  */
>           tree off = component_ref_field_offset (ref);
>           off = size_binop (PLUS_EXPR,
>                             size_binop (MULT_EXPR,
>                                         fold_convert (bitsizetype, off),
> Index: gcc/tree-flow-inline.h
> ===================================================================
> --- gcc/tree-flow-inline.h      (revision 195493)
> +++ gcc/tree-flow-inline.h      (working copy)
> @@ -1239,21 +1239,24 @@ get_addr_base_and_unit_offset_1 (tree ex
>  {
>    HOST_WIDE_INT byte_offset = 0;
>
>    /* Compute cumulative byte-offset for nested component-refs and
> array-refs,
>       and find the ultimate containing object.  */
>    while (1)
>      {
>        switch (TREE_CODE (exp))
>         {
>         case BIT_FIELD_REF:
> -         return NULL_TREE;
> +         // Exit the loop?
> +         byte_offset += (TREE_INT_CST_LOW (TREE_OPERAND (exp, 2))
> +                         / BITS_PER_UNIT);
> +         break;
>
>         case COMPONENT_REF:
>           {
>             tree field = TREE_OPERAND (exp, 1);
>             tree this_offset = component_ref_field_offset (exp);
>             HOST_WIDE_INT hthis_offset;
>
>             if (!this_offset
>                 || TREE_CODE (this_offset) != INTEGER_CST
>                 || (TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET (field))
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> --- gcc/tree-vect-data-refs.c   (revision 195493)
> +++ gcc/tree-vect-data-refs.c   (working copy)
> @@ -3789,20 +3789,22 @@ vect_create_data_ref_ptr (gimple stmt, t
>
>    if (dump_enabled_p ())
>      {
>        tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
>        dump_printf_loc (MSG_NOTE, vect_location,
>                         "create %s-pointer variable to type: ",
>                         tree_code_name[(int) TREE_CODE (aggr_type)]);
>        dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
>        if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
>          dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
> +      else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
> +        dump_printf (MSG_NOTE, "  vectorizing a vector ref: ");
>        else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
>          dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
>        else
>          dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
>        dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
>      }
>
>    /* (1) Create the new aggregate-pointer variable.  */
>    aggr_ptr_type = build_pointer_type (aggr_type);
>    base = get_base_address (DR_REF (dr));
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]