This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Fix PR66253 (GemsFDTD miscompile)
- From: Michael Matz <matz at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 17 Jun 2015 17:32:05 +0200 (CEST)
- Subject: Fix PR66253 (GemsFDTD miscompile)
- Authentication-results: sourceware.org; auth=none
Hi,
this implements support for strided grouped stores in the non-SLP case
(the SLP case existed already). Before we were ignoring all but the last
store in a group. That led to a miscompile of GemsFDTD, the testcase
reflects that situation.
Also since r224511 yesterday grouped strided non-SLP loads were broken,
all loads in a group were using the same base address, which is okay only
for the SLP case, as the code is structured right now (only the SLP case
uses the permutation path, non-SLP emits scalar loads directly).
Regstrapping on x86-64-linux in progress, okay if that passes?
Ciao,
Michael.
PR middle-end/66253
* tree-vect-stmts.c (vectorizable_store): Implement non-SLP
grouped strided stores.
(vectorizable_load): Don't use the DR from first_stmt in
the non-SLP grouped strided case.
testsuite/
* gcc.dg/vect/pr66253.c: New testcase.
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c (revision 224562)
+++ tree-vect-stmts.c (working copy)
@@ -5262,16 +5262,17 @@ vectorizable_store (gimple stmt, gimple_
gimple_seq stmts = NULL;
tree stride_base, stride_step, alias_off;
tree vec_oprnd;
+ unsigned int g;
gcc_assert (!nested_in_vect_loop_p (loop, stmt));
stride_base
= fold_build_pointer_plus
- (unshare_expr (DR_BASE_ADDRESS (dr)),
+ (unshare_expr (DR_BASE_ADDRESS (first_dr)),
size_binop (PLUS_EXPR,
- convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
- convert_to_ptrofftype (DR_INIT(dr))));
- stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
+ convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
+ convert_to_ptrofftype (DR_INIT(first_dr))));
+ stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
/* For a store with loop-invariant (but other than power-of-2)
stride (i.e. not a grouped access) like so:
@@ -5302,6 +5303,7 @@ vectorizable_store (gimple stmt, gimple_
ltype = vectype;
ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ group_size = 1;
}
ivstep = stride_step;
@@ -5322,65 +5324,89 @@ vectorizable_store (gimple stmt, gimple_
gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
prev_stmt_info = NULL;
- running_off = offvar;
- alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
- for (j = 0; j < ncopies; j++)
+ alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
+ next_stmt = first_stmt;
+ for (g = 0; g < group_size; g++)
{
- /* We've set op and dt above, from gimple_assign_rhs1(stmt),
- and first_stmt == stmt. */
- if (j == 0)
- {
- if (slp)
- {
- vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
- slp_node, -1);
- vec_oprnd = vec_oprnds[0];
- }
- else
- vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL);
- }
- else
+ running_off = offvar;
+ if (g)
{
- if (slp)
- vec_oprnd = vec_oprnds[j];
- else
- vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
- }
-
- for (i = 0; i < nstores; i++)
- {
- tree newref, newoff;
- gimple incr, assign;
- tree size = TYPE_SIZE (ltype);
- /* Extract the i'th component. */
- tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
+ tree size = TYPE_SIZE_UNIT (ltype);
+ tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
size);
- tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
- size, pos);
-
- elem = force_gimple_operand_gsi (gsi, elem, true,
- NULL_TREE, true,
- GSI_SAME_STMT);
-
- newref = build2 (MEM_REF, ltype,
- running_off, alias_off);
-
- /* And store it to *running_off. */
- assign = gimple_build_assign (newref, elem);
- vect_finish_stmt_generation (stmt, assign, gsi);
-
- newoff = copy_ssa_name (running_off, NULL);
+ tree newoff = copy_ssa_name (running_off, NULL);
incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
- running_off, stride_step);
+ running_off, pos);
vect_finish_stmt_generation (stmt, incr, gsi);
-
running_off = newoff;
- if (j == 0 && i == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
+ }
+ for (j = 0; j < ncopies; j++)
+ {
+ /* We've set op and dt above, from gimple_assign_rhs1(stmt),
+ and first_stmt == stmt. */
+ if (j == 0)
+ {
+ if (slp)
+ {
+ vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
+ slp_node, -1);
+ vec_oprnd = vec_oprnds[0];
+ }
+ else
+ {
+ gcc_assert (gimple_assign_single_p (next_stmt));
+ op = gimple_assign_rhs1 (next_stmt);
+ vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
+ NULL);
+ }
+ }
else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
- prev_stmt_info = vinfo_for_stmt (assign);
+ {
+ if (slp)
+ vec_oprnd = vec_oprnds[j];
+ else
+ vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
+ }
+
+ for (i = 0; i < nstores; i++)
+ {
+ tree newref, newoff;
+ gimple incr, assign;
+ tree size = TYPE_SIZE (ltype);
+ /* Extract the i'th component. */
+ tree pos = fold_build2 (MULT_EXPR, bitsizetype,
+ bitsize_int (i), size);
+ tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
+ size, pos);
+
+ elem = force_gimple_operand_gsi (gsi, elem, true,
+ NULL_TREE, true,
+ GSI_SAME_STMT);
+
+ newref = build2 (MEM_REF, ltype,
+ running_off, alias_off);
+
+ /* And store it to *running_off. */
+ assign = gimple_build_assign (newref, elem);
+ vect_finish_stmt_generation (stmt, assign, gsi);
+
+ newoff = copy_ssa_name (running_off, NULL);
+ incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
+ running_off, stride_step);
+ vect_finish_stmt_generation (stmt, incr, gsi);
+
+ running_off = newoff;
+ if (g == group_size - 1)
+ {
+ if (j == 0 && i == 0)
+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
+ else
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
+ prev_stmt_info = vinfo_for_stmt (assign);
+ }
+ }
}
+ next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
}
return true;
}
@@ -6265,7 +6291,7 @@ vectorizable_load (gimple stmt, gimple_s
gcc_assert (!nested_in_vect_loop);
- if (grouped_load)
+ if (slp && grouped_load)
first_dr = STMT_VINFO_DATA_REF
(vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
else
Index: testsuite/gcc.dg/vect/pr66253.c
===================================================================
--- testsuite/gcc.dg/vect/pr66253.c (revision 0)
+++ testsuite/gcc.dg/vect/pr66253.c (working copy)
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target vect_hw_misalign } */
+
+#include "tree-vect.h"
+
+void __attribute__((noinline,noclone))
+test1(_Complex double * __restrict__ a, _Complex double * __restrict__ b,
+ double * __restrict__ c, int stride, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ a[i*stride] = 0.5 * b[i*stride] * c[i*stride];
+ }
+}
+
+double ca[256];
+_Complex double ia[256];
+_Complex double da[256];
+
+extern void abort (void);
+
+int main ()
+{
+ int i;
+ int stride;
+
+ check_vect ();
+
+ for (stride = 1; stride < 15; stride++)
+ {
+ for (i = 0; i < 256; i++)
+ {
+ __real__ ia[i] = (i + stride) % 19;
+ __imag__ ia[i] = (i + stride) % 23;
+ ca[i] = (i + stride) % 29;
+ __asm__ volatile ("");
+ }
+
+ test1(da, ia, ca, stride, 256/stride);
+
+ for (i = 0; i < 256/stride; i++)
+ {
+ if (da[i*stride] != 0.5 * ia[i*stride] * ca[i*stride])
+ abort ();
+ }
+ }
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */