This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: SLP for vectors
On Sun, Jan 27, 2013 at 4:28 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> Hello,
>
> this message is to check that I am not doing something absurd and ask for a
> bit of advice.
>
> In the attached patch, I let SLP recognize vector loads/stores just like it
> recognizes those in an array. It has a few issues: the cost of the
> vectorized version is overestimated, the base object is wrong (doesn't strip
> the bit_field_ref, but since the base address is right and the base object
> is almost unused...), but most importantly it only works if the vectors have
> their address taken, otherwise (after asking gimple_vuse?) SLP doesn't
> detect their use as loads or stores at all.
Yes, if they have not their address taken they are not loads.
> Also, it only works if you write
> result[0]=..., result[1]=... and does not recognize a constructor as a
> series of stores.
>
> Is slowly extending SLP the right way to go? Should get_references_in_stmt
> report vector element accesses?
It does if it is a memory access.
You didn't provide a new testcase so it's hard for me to guess what you
are trying to do. I suppose you want to vectorize
w[0] = v[0] + v[0];
w[1] = v[1] + v[1];
into
w = v + v;
As it would work if w and v are array accesses instead of vector subscripts?
Note that similar issues (and bugreports) exist for complex component accesses.
So yes, handling BIT_FIELD_REF in the vectorizer looks like the correct
way to do - but mind that you should constrain the BIT_FIELD_REFs you
allow (I suppose in the end that's properly done by other part of the analysis).
As of handling non-memory BIT_FIELD_REFs - I suggest to split out
the test of whether a stmt is considered a "load" for the purpose of
vectorization and simply special-case this therein, not requiring a VUSE.
I suppose the data-ref analysis parts are not strictly required, nor
the get_addr_base_and_unit_offset_1 parts? They should be split out
and separately tested anyway. The data-ref part at least will confuse
analysis of 'a[0]' or 'a[1]' vs. 'a', but I suppose independent of the patch.
Richard.
> (the patch as is passes the testsuite on x86_64-linux and vectorizes the few
> examples I tried)
>
> --
> Marc Glisse
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c (revision 195493)
> +++ gcc/tree-vect-stmts.c (working copy)
> @@ -3860,20 +3860,21 @@ vectorizable_store (gimple stmt, gimple_
> /* Is vectorizable store? */
>
> if (!is_gimple_assign (stmt))
> return false;
>
> scalar_dest = gimple_assign_lhs (stmt);
> if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
> && is_pattern_stmt_p (stmt_info))
> scalar_dest = TREE_OPERAND (scalar_dest, 0);
> if (TREE_CODE (scalar_dest) != ARRAY_REF
> + && TREE_CODE (scalar_dest) != BIT_FIELD_REF
> && TREE_CODE (scalar_dest) != INDIRECT_REF
> && TREE_CODE (scalar_dest) != COMPONENT_REF
> && TREE_CODE (scalar_dest) != IMAGPART_EXPR
> && TREE_CODE (scalar_dest) != REALPART_EXPR
> && TREE_CODE (scalar_dest) != MEM_REF)
> return false;
>
> gcc_assert (gimple_assign_single_p (stmt));
> op = gimple_assign_rhs1 (stmt);
> if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
> @@ -4394,20 +4395,21 @@ vectorizable_load (gimple stmt, gimple_s
> /* Is vectorizable load? */
> if (!is_gimple_assign (stmt))
> return false;
>
> scalar_dest = gimple_assign_lhs (stmt);
> if (TREE_CODE (scalar_dest) != SSA_NAME)
> return false;
>
> code = gimple_assign_rhs_code (stmt);
> if (code != ARRAY_REF
> + && code != BIT_FIELD_REF
> && code != INDIRECT_REF
> && code != COMPONENT_REF
> && code != IMAGPART_EXPR
> && code != REALPART_EXPR
> && code != MEM_REF
> && TREE_CODE_CLASS (code) != tcc_declaration)
> return false;
>
> if (!STMT_VINFO_DATA_REF (stmt_info))
> return false;
> Index: gcc/tree-vect-slp.c
> ===================================================================
> --- gcc/tree-vect-slp.c (revision 195493)
> +++ gcc/tree-vect-slp.c (working copy)
> @@ -660,20 +660,21 @@ vect_build_slp_tree (loop_vec_info loop_
> }
> else
> {
> if (first_stmt_code != rhs_code
> && (first_stmt_code != IMAGPART_EXPR
> || rhs_code != REALPART_EXPR)
> && (first_stmt_code != REALPART_EXPR
> || rhs_code != IMAGPART_EXPR)
> && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
> && (first_stmt_code == ARRAY_REF
> + || first_stmt_code == BIT_FIELD_REF
> || first_stmt_code == INDIRECT_REF
> || first_stmt_code == COMPONENT_REF
> || first_stmt_code == MEM_REF)))
> {
> if (dump_enabled_p ())
> {
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> "Build SLP failed: different operation "
> "in stmt ");
> dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
> 0);
> Index: gcc/tree-data-ref.c
> ===================================================================
> --- gcc/tree-data-ref.c (revision 195493)
> +++ gcc/tree-data-ref.c (working copy)
> @@ -854,20 +854,24 @@ dr_analyze_indices (struct data_referenc
> /* Analyze access functions of dimensions we know to be independent. */
> while (handled_component_p (ref))
> {
> if (TREE_CODE (ref) == ARRAY_REF)
> {
> op = TREE_OPERAND (ref, 1);
> access_fn = analyze_scalar_evolution (loop, op);
> access_fn = instantiate_scev (before_loop, loop, access_fn);
> access_fns.safe_push (access_fn);
> }
> + else if (TREE_CODE (ref) == BIT_FIELD_REF)
> + {
> + access_fns.safe_push (TREE_OPERAND (ref, 2));
> + }
> else if (TREE_CODE (ref) == COMPONENT_REF
> && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) ==
> RECORD_TYPE)
> {
> /* For COMPONENT_REFs of records (but not unions!) use the
> FIELD_DECL offset as constant access function so we can
> disambiguate a[i].f1 and a[i].f2. */
> tree off = component_ref_field_offset (ref);
> off = size_binop (PLUS_EXPR,
> size_binop (MULT_EXPR,
> fold_convert (bitsizetype, off),
> Index: gcc/tree-flow-inline.h
> ===================================================================
> --- gcc/tree-flow-inline.h (revision 195493)
> +++ gcc/tree-flow-inline.h (working copy)
> @@ -1239,21 +1239,24 @@ get_addr_base_and_unit_offset_1 (tree ex
> {
> HOST_WIDE_INT byte_offset = 0;
>
> /* Compute cumulative byte-offset for nested component-refs and
> array-refs,
> and find the ultimate containing object. */
> while (1)
> {
> switch (TREE_CODE (exp))
> {
> case BIT_FIELD_REF:
> - return NULL_TREE;
> + // Exit the loop?
> + byte_offset += (TREE_INT_CST_LOW (TREE_OPERAND (exp, 2))
> + / BITS_PER_UNIT);
> + break;
>
> case COMPONENT_REF:
> {
> tree field = TREE_OPERAND (exp, 1);
> tree this_offset = component_ref_field_offset (exp);
> HOST_WIDE_INT hthis_offset;
>
> if (!this_offset
> || TREE_CODE (this_offset) != INTEGER_CST
> || (TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET (field))
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> --- gcc/tree-vect-data-refs.c (revision 195493)
> +++ gcc/tree-vect-data-refs.c (working copy)
> @@ -3789,20 +3789,22 @@ vect_create_data_ref_ptr (gimple stmt, t
>
> if (dump_enabled_p ())
> {
> tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
> dump_printf_loc (MSG_NOTE, vect_location,
> "create %s-pointer variable to type: ",
> tree_code_name[(int) TREE_CODE (aggr_type)]);
> dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
> if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
> dump_printf (MSG_NOTE, " vectorizing an array ref: ");
> + else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
> + dump_printf (MSG_NOTE, " vectorizing a vector ref: ");
> else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
> dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
> else
> dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
> dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
> }
>
> /* (1) Create the new aggregate-pointer variable. */
> aggr_ptr_type = build_pointer_type (aggr_type);
> base = get_base_address (DR_REF (dr));
>