This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Fix PR43432, vectorize backwards stepping loads
- From: Michael Matz <matz at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 7 Sep 2010 14:58:56 +0200 (CEST)
- Subject: Fix PR43432, vectorize backwards stepping loads
Hi,
this implements handling of consecutive loads whose indices run backwards.
I'm handling the easy cases only, namely when no explicit realignment
instructions need to be generated and when we need to emit only one load
per original load (i.e. no multiple type width in the loop).
This all requires that we be able to reverse the elements of a vector,
hence two new helper functions for that are added.
Reversed stores aren't handled but should be reasonably easy to extend.
Regstrapped on x86_64-linux. I've had to add x86_64-*-* to
target-supports.exp/vect_perm recognition which in turn makes
vect/slp-perm-8.c and vect/slp-perm-9.c fail then. That's most probably
because while x86_64 supports permutation for 4 and 8-sized elements it
doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c). I'm going
to investigate this but seek feedback on the patch itself already now.
Ciao,
Michael.
--
PR tree-optimization/43432
* tree-vect-data-refs.c (vect_analyze_data_ref_access):
Accept backwards consecutive accesses.
(vect_create_data_ref_ptr): If step is negative generate
decreasing IVs.
* tree-vect-stmts.c (vectorizable_store): Reject negative steps.
(perm_mask_for_reverse, reverse_vec_elements): New functions.
(vectorizable_load): Handle loads with negative steps when easily
possible.
testsuite/
PR tree-optimization/43432
* gcc.dg/vect/pr43432.c: New test.
Index: tree-vect-data-refs.c
===================================================================
--- tree-vect-data-refs.c (revision 163773)
+++ tree-vect-data-refs.c (working copy)
@@ -2281,7 +2281,9 @@ vect_analyze_data_ref_access (struct dat
}
/* Consecutive? */
- if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
+ if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
+ || (dr_step < 0
+ && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
{
/* Mark that it is not interleaving. */
DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
@@ -2953,6 +2955,7 @@ vect_create_data_ref_ptr (gimple stmt, s
tree vptr;
gimple_stmt_iterator incr_gsi;
bool insert_after;
+ bool negative;
tree indx_before_incr, indx_after_incr;
gimple incr;
tree step;
@@ -2985,6 +2988,7 @@ vect_create_data_ref_ptr (gimple stmt, s
*inv_p = true;
else
*inv_p = false;
+ negative = tree_int_cst_compare (step, size_zero_node) < 0;
/* Create an expression for the first address accessed by this load
in LOOP. */
@@ -3144,6 +3148,8 @@ vect_create_data_ref_ptr (gimple stmt, s
LOOP is zero. In this case the step here is also zero. */
if (*inv_p)
step = size_zero_node;
+ else if (negative)
+ step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c (revision 163773)
+++ tree-vect-stmts.c (working copy)
@@ -3134,6 +3134,13 @@ vectorizable_store (gimple stmt, gimple_
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
+ if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "negative step for store.");
+ return false;
+ }
+
if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
{
strided_store = true;
@@ -3400,6 +3407,68 @@ vectorizable_store (gimple stmt, gimple_
return true;
}
+/* Given a vector type VECTYPE returns a builtin DECL to be used
+ for vector permutation and stores a mask into *MASK that implements
+ reversal of the vector elements. If that is impossible to do
+ returns NULL (and *MASK is unchanged). */
+
+static tree
+perm_mask_for_reverse (tree vectype, tree *mask)
+{
+ tree builtin_decl;
+ tree mask_element_type, mask_type;
+ tree mask_vec = NULL;
+ int i;
+ int nunits;
+ if (!targetm.vectorize.builtin_vec_perm)
+ return NULL;
+
+ builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
+ &mask_element_type);
+ if (!builtin_decl || !mask_element_type)
+ return NULL;
+
+ mask_type = get_vectype_for_scalar_type (mask_element_type);
+ nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
+ return NULL;
+
+ for (i = 0; i < nunits; i++)
+ mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
+ mask_vec = build_vector (mask_type, mask_vec);
+
+ if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
+ return NULL;
+ if (mask)
+ *mask = mask_vec;
+ return builtin_decl;
+}
+
+/* Given a vector variable X, that was generated for the scalar LHS of
+ STMT, generate instructions to reverse the vector elements of X,
+ insert them a *GSI and return the permuted vector variable. */
+
+static tree
+reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
+{
+ tree vectype = TREE_TYPE (x);
+ tree mask_vec, builtin_decl;
+ tree perm_dest, data_ref;
+ gimple perm_stmt;
+
+ builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
+
+ perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
+
+ /* Generate the permute statement. */
+ perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
+ data_ref = make_ssa_name (perm_dest, perm_stmt);
+ gimple_call_set_lhs (perm_stmt, data_ref);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+
+ return data_ref;
+}
+
/* vectorizable_load.
Check if STMT reads a non scalar data-ref (array/pointer/structure) that
@@ -3442,6 +3511,7 @@ vectorizable_load (gimple stmt, gimple_s
gimple first_stmt;
tree scalar_type;
bool inv_p;
+ bool negative;
bool compute_in_loop = false;
struct loop *at_loop;
int vec_num;
@@ -3504,6 +3574,14 @@ vectorizable_load (gimple stmt, gimple_s
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
+ negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
+ if (negative && ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "multiple types with negative step.");
+ return false;
+ }
+
scalar_type = TREE_TYPE (DR_REF (dr));
mode = TYPE_MODE (vectype);
@@ -3538,6 +3616,25 @@ vectorizable_load (gimple stmt, gimple_s
return false;
}
+ if (negative)
+ {
+ gcc_assert (!strided_load);
+ alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
+ if (alignment_support_scheme != dr_aligned
+ && alignment_support_scheme != dr_unaligned_supported)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "negative step but alignment required.");
+ return false;
+ }
+ if (!perm_mask_for_reverse (vectype, NULL))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "negative step and reversing not supported.");
+ return false;
+ }
+ }
+
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
@@ -3712,6 +3809,9 @@ vectorizable_load (gimple stmt, gimple_s
else
at_loop = loop;
+ if (negative)
+ offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
+
prev_stmt_info = NULL;
for (j = 0; j < ncopies; j++)
{
@@ -3885,6 +3985,12 @@ vectorizable_load (gimple stmt, gimple_s
gcc_unreachable (); /* FORNOW. */
}
+ if (negative)
+ {
+ new_temp = reverse_vec_elements (new_temp, stmt, gsi);
+ new_stmt = SSA_NAME_DEF_STMT (new_temp);
+ }
+
/* Collect vector loads and later create their permutation in
vect_transform_strided_load (). */
if (strided_load || slp_perm)
Index: testsuite/gcc.dg/vect/pr43432.c
===================================================================
--- testsuite/gcc.dg/vect/pr43432.c (revision 0)
+++ testsuite/gcc.dg/vect/pr43432.c (revision 0)
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */
+
+
+void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1,
+int len){
+ int i;
+ src1 += len-1;
+ for(i=0; i<len; i++)
+ dst[i] = src0[i] * src1[-i];
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */