This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Fix PR66253 (GemsFDTD miscompile)


Hi,

this implements support for strided grouped stores in the non-SLP case 
(the SLP case existed already).  Before we were ignoring all but the last 
store in a group.  That led to a miscompile of GemsFDTD, the testcase 
reflects that situation.

Also since r224511 yesterday grouped strided non-SLP loads were broken, 
all loads in a group were using the same base address, which is okay only 
for the SLP case, as the code is structured right now (only the SLP case 
uses the permutation path, non-SLP emits scalar loads directly).

Regstrapping on x86-64-linux in progress, okay if that passes?


Ciao,
Michael.
	PR middle-end/66253
	* tree-vect-stmts.c (vectorizable_store): Implement non-SLP
	grouped strided stores.
	(vectorizable_load): Don't use the DR from first_stmt in
	the non-SLP grouped strided case.

testsuite/
	* gcc.dg/vect/pr66253.c: New testcase.

Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c	(revision 224562)
+++ tree-vect-stmts.c	(working copy)
@@ -5262,16 +5262,17 @@ vectorizable_store (gimple stmt, gimple_
       gimple_seq stmts = NULL;
       tree stride_base, stride_step, alias_off;
       tree vec_oprnd;
+      unsigned int g;
 
       gcc_assert (!nested_in_vect_loop_p (loop, stmt));
 
       stride_base
 	= fold_build_pointer_plus
-	    (unshare_expr (DR_BASE_ADDRESS (dr)),
+	    (unshare_expr (DR_BASE_ADDRESS (first_dr)),
 	     size_binop (PLUS_EXPR,
-			 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
-			 convert_to_ptrofftype (DR_INIT(dr))));
-      stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
+			 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
+			 convert_to_ptrofftype (DR_INIT(first_dr))));
+      stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
 
       /* For a store with loop-invariant (but other than power-of-2)
          stride (i.e. not a grouped access) like so:
@@ -5302,6 +5303,7 @@ vectorizable_store (gimple stmt, gimple_
 	    ltype = vectype;
 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+	  group_size = 1;
 	}
 
       ivstep = stride_step;
@@ -5322,65 +5324,89 @@ vectorizable_store (gimple stmt, gimple_
 	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
 
       prev_stmt_info = NULL;
-      running_off = offvar;
-      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
-      for (j = 0; j < ncopies; j++)
+      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
+      next_stmt = first_stmt;
+      for (g = 0; g < group_size; g++)
 	{
-	  /* We've set op and dt above, from gimple_assign_rhs1(stmt),
-	     and first_stmt == stmt.  */
-	  if (j == 0)
-	    {
-	      if (slp)
-		{
-		  vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
-				     slp_node, -1);
-		  vec_oprnd = vec_oprnds[0];
-		}
-	      else
-		vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL);
-	    }
-	  else
+	  running_off = offvar;
+	  if (g)
 	    {
-	      if (slp)
-		vec_oprnd = vec_oprnds[j];
-	      else
-		vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
-	    }
-
-	  for (i = 0; i < nstores; i++)
-	    {
-	      tree newref, newoff;
-	      gimple incr, assign;
-	      tree size = TYPE_SIZE (ltype);
-	      /* Extract the i'th component.  */
-	      tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
+	      tree size = TYPE_SIZE_UNIT (ltype);
+	      tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
 				      size);
-	      tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
-				       size, pos);
-
-	      elem = force_gimple_operand_gsi (gsi, elem, true,
-					       NULL_TREE, true,
-					       GSI_SAME_STMT);
-
-	      newref = build2 (MEM_REF, ltype,
-			       running_off, alias_off);
-
-	      /* And store it to *running_off.  */
-	      assign = gimple_build_assign (newref, elem);
-	      vect_finish_stmt_generation (stmt, assign, gsi);
-
-	      newoff = copy_ssa_name (running_off, NULL);
+	      tree newoff = copy_ssa_name (running_off, NULL);
 	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
-					  running_off, stride_step);
+					  running_off, pos);
 	      vect_finish_stmt_generation (stmt, incr, gsi);
-
 	      running_off = newoff;
-	      if (j == 0 && i == 0)
-		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
+	    }
+	  for (j = 0; j < ncopies; j++)
+	    {
+	      /* We've set op and dt above, from gimple_assign_rhs1(stmt),
+		 and first_stmt == stmt.  */
+	      if (j == 0)
+		{
+		  if (slp)
+		    {
+		      vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
+					 slp_node, -1);
+		      vec_oprnd = vec_oprnds[0];
+		    }
+		  else
+		    {
+		      gcc_assert (gimple_assign_single_p (next_stmt));
+		      op = gimple_assign_rhs1 (next_stmt);
+		      vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
+								NULL);
+		    }
+		}
 	      else
-		STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
-	      prev_stmt_info = vinfo_for_stmt (assign);
+		{
+		  if (slp)
+		    vec_oprnd = vec_oprnds[j];
+		  else
+		    vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
+		}
+
+	      for (i = 0; i < nstores; i++)
+		{
+		  tree newref, newoff;
+		  gimple incr, assign;
+		  tree size = TYPE_SIZE (ltype);
+		  /* Extract the i'th component.  */
+		  tree pos = fold_build2 (MULT_EXPR, bitsizetype,
+					  bitsize_int (i), size);
+		  tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
+					   size, pos);
+
+		  elem = force_gimple_operand_gsi (gsi, elem, true,
+						   NULL_TREE, true,
+						   GSI_SAME_STMT);
+
+		  newref = build2 (MEM_REF, ltype,
+				   running_off, alias_off);
+
+		  /* And store it to *running_off.  */
+		  assign = gimple_build_assign (newref, elem);
+		  vect_finish_stmt_generation (stmt, assign, gsi);
+
+		  newoff = copy_ssa_name (running_off, NULL);
+		  incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
+					      running_off, stride_step);
+		  vect_finish_stmt_generation (stmt, incr, gsi);
+
+		  running_off = newoff;
+		  if (g == group_size - 1)
+		    {
+		      if (j == 0 && i == 0)
+			STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
+		      else
+			STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
+		      prev_stmt_info = vinfo_for_stmt (assign);
+		    }
+		}
 	    }
+	  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
 	}
       return true;
     }
@@ -6265,7 +6291,7 @@ vectorizable_load (gimple stmt, gimple_s
 
       gcc_assert (!nested_in_vect_loop);
 
-      if (grouped_load)
+      if (slp && grouped_load)
 	first_dr = STMT_VINFO_DATA_REF
 	    (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
       else
Index: testsuite/gcc.dg/vect/pr66253.c
===================================================================
--- testsuite/gcc.dg/vect/pr66253.c	(revision 0)
+++ testsuite/gcc.dg/vect/pr66253.c	(working copy)
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target vect_hw_misalign } */
+
+#include "tree-vect.h"
+
+void __attribute__((noinline,noclone))
+test1(_Complex double * __restrict__ a, _Complex double * __restrict__ b,
+      double * __restrict__ c, int stride, int n)
+{
+  int i;
+  for (i = 0; i < n; i++)
+    {
+      a[i*stride] = 0.5 * b[i*stride] * c[i*stride];
+    }
+}
+
+double ca[256];
+_Complex double ia[256];
+_Complex double da[256];
+
+extern void abort (void);
+
+int main ()
+{
+  int i;
+  int stride;
+
+  check_vect ();
+
+  for (stride = 1; stride < 15; stride++)
+    {
+      for (i = 0; i < 256; i++)
+	{
+	  __real__ ia[i] = (i + stride) % 19;
+	  __imag__ ia[i] = (i + stride) % 23;
+	  ca[i] = (i + stride) % 29;
+	  __asm__ volatile ("");
+	}
+
+      test1(da, ia, ca, stride, 256/stride);
+
+      for (i = 0; i < 256/stride; i++)
+	{
+	  if (da[i*stride] != 0.5 * ia[i*stride] * ca[i*stride])
+	    abort ();
+	}
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]