This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Re: Fix PR66251 (wrong code with strided group stores)

From: Richard Biener <richard dot guenther at gmail dot com>
To: Michael Matz <matz at suse dot de>,gcc-patches at gcc dot gnu dot org
Date: Fri, 22 May 2015 18:31:56 +0200
Subject: Re: Fix PR66251 (wrong code with strided group stores)
Authentication-results: sourceware.org; auth=none
References: <alpine dot LSU dot 2 dot 20 dot 1505221709220 dot 27315 at wotan dot suse dot de>

On May 22, 2015 5:13:16 PM GMT+02:00, Michael Matz <matz@suse.de> wrote:
>Hi,
>
>between Richis improvements of grouped accesses, and mine to strided 
>stores is an interaction that now leads to ICEs and wrong code after
>both 
>are in, for instance PR66251.  The added testcases reflects this 
>situation, and uses both, narrowing and widening (narrowing would still
>
>ICE, widening right now produce only wrong code).  The patch fixes the 
>testcase(s).
>
>It's currently regstrapping on x86_64-linux, okay for trunk if that 
>passes?

OK.

Thanks,
Richard.

>
>Ciao,
>Michael.
>
>	PR middle-end/66251
>
>	* tree-vect-stmts.c (vect_model_store_cost): Handled strided group
>	stores.
>	(vect_create_vectorized_demotion_stmts): Always set
>	STMT_VINFO_VEC_STMT, also with SLP.
>	(vectorizable_store): Handle strided group stores.
>
>testsuite/:
>	PR middle-end/66251
>	* gcc.dg/vect/pr66251.c: New test.
>
>Index: tree-vect-stmts.c
>===================================================================
>--- tree-vect-stmts.c	(revision 223577)
>+++ tree-vect-stmts.c	(working copy)
>@@ -1000,7 +1000,8 @@ vect_model_store_cost (stmt_vec_info stm
>    equivalent to the cost of GROUP_SIZE separate stores.  If a grouped
>     access is instead being provided by a permute-and-store operation,
>      include the cost of the permutes.  */
>-  if (!store_lanes_p && group_size > 1)
>+  if (!store_lanes_p && group_size > 1
>+      && !STMT_VINFO_STRIDED_P (stmt_info))
>     {
>       /* Uses a high and low interleave or shuffle operations for each
> 	 needed permute.  */
>@@ -1014,21 +1015,24 @@ vect_model_store_cost (stmt_vec_info stm
>                          group_size);
>     }
> 
>+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
>   /* Costs of the stores.  */
>-  if (STMT_VINFO_STRIDED_P (stmt_info))
>+  if (STMT_VINFO_STRIDED_P (stmt_info)
>+      && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
>     {
>       /* N scalar stores plus extracting the elements.  */
>-      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
>       inside_cost += record_stmt_cost (body_cost_vec,
> 				       ncopies * TYPE_VECTOR_SUBPARTS (vectype),
> 				       scalar_store, stmt_info, 0, vect_body);
>-      inside_cost += record_stmt_cost (body_cost_vec,
>-				       ncopies * TYPE_VECTOR_SUBPARTS (vectype),
>-				       vec_to_scalar, stmt_info, 0, vect_body);
>     }
>   else
>  vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
> 
>+  if (STMT_VINFO_STRIDED_P (stmt_info))
>+    inside_cost += record_stmt_cost (body_cost_vec,
>+				     ncopies * TYPE_VECTOR_SUBPARTS (vectype),
>+				     vec_to_scalar, stmt_info, 0, vect_body);
>+
>   if (dump_enabled_p ())
>     dump_printf_loc (MSG_NOTE, vect_location,
>                      "vect_model_store_cost: inside_cost = %d, "
>@@ -3377,15 +3381,13 @@ vect_create_vectorized_demotion_stmts (v
> 	     (or in STMT_VINFO_RELATED_STMT chain).  */
> 	  if (slp_node)
> 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
>+
>+	  if (!*prev_stmt_info)
>+	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
> 	  else
>-	    {
>-	      if (!*prev_stmt_info)
>-		STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
>-	      else
>-		STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
>+	    STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
> 
>-	      *prev_stmt_info = vinfo_for_stmt (new_stmt);
>-	    }
>+	  *prev_stmt_info = vinfo_for_stmt (new_stmt);
> 	}
>     }
> 
>@@ -5155,15 +5157,27 @@ vectorizable_store (gimple stmt, gimple_
>     {
>       grouped_store = true;
>       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
>-      if (!slp && !PURE_SLP_STMT (stmt_info))
>+      group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
>+      if (!slp
>+	  && !PURE_SLP_STMT (stmt_info)
>+	  && !STMT_VINFO_STRIDED_P (stmt_info))
> 	{
>-	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
> 	  if (vect_store_lanes_supported (vectype, group_size))
> 	    store_lanes_p = true;
> 	  else if (!vect_grouped_store_supported (vectype, group_size))
> 	    return false;
> 	}
> 
>+      if (STMT_VINFO_STRIDED_P (stmt_info)
>+	  && (slp || PURE_SLP_STMT (stmt_info))
>+	  && (group_size > nunits
>+	      || nunits % group_size != 0))
>+	{
>+	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>+			   "unhandled strided group store\n");
>+	  return false;
>+	}
>+
>       if (first_stmt == stmt)
> 	{
>      /* STMT is the leader of the group. Check the operands of all the
>@@ -5286,10 +5300,23 @@ vectorizable_store (gimple stmt, gimple_
> 	     ...
>          */
> 
>+      unsigned nstores = nunits;
>+      tree ltype = elem_type;
>+      if (slp)
>+	{
>+	  nstores = nunits / group_size;
>+	  if (group_size < nunits)
>+	    ltype = build_vector_type (elem_type, group_size);
>+	  else
>+	    ltype = vectype;
>+	  ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
>+	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
>+	}
>+
>       ivstep = stride_step;
>       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
> 			    build_int_cst (TREE_TYPE (ivstep),
>-					   ncopies * nunits));
>+					   ncopies * nstores));
> 
>       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
> 
>@@ -5315,22 +5342,22 @@ vectorizable_store (gimple stmt, gimple_
> 	  else
> 	    vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
> 
>-	  for (i = 0; i < nunits; i++)
>+	  for (i = 0; i < nstores; i++)
> 	    {
> 	      tree newref, newoff;
> 	      gimple incr, assign;
>-	      tree size = TYPE_SIZE (elem_type);
>+	      tree size = TYPE_SIZE (ltype);
> 	      /* Extract the i'th component.  */
>	      tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
> 				      size);
>-	      tree elem = fold_build3 (BIT_FIELD_REF, elem_type, vec_oprnd,
>+	      tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
> 				       size, pos);
> 
> 	      elem = force_gimple_operand_gsi (gsi, elem, true,
> 					       NULL_TREE, true,
> 					       GSI_SAME_STMT);
> 
>-	      newref = build2 (MEM_REF, TREE_TYPE (vectype),
>+	      newref = build2 (MEM_REF, ltype,
> 			       running_off, alias_off);
> 
> 	      /* And store it to *running_off.  */
>Index: testsuite/gcc.dg/vect/pr66251.c
>===================================================================
>--- testsuite/gcc.dg/vect/pr66251.c	(revision 0)
>+++ testsuite/gcc.dg/vect/pr66251.c	(working copy)
>@@ -0,0 +1,79 @@
>+/* { dg-require-effective-target vect_int } */
>+/* { dg-require-effective-target vect_double } */
>+/* { dg-require-effective-target vect_floatint_cvt } */
>+/* { dg-require-effective-target vect_intfloat_cvt } */
>+/* { dg-require-effective-target vect_pack_trunc } */
>+/* { dg-require-effective-target vect_unpack } */
>+/* { dg-require-effective-target vect_hw_misalign } */
>+
>+#include "tree-vect.h"
>+
>+void __attribute__((noinline,noclone))
>+test1(_Complex double *a, _Complex int *b, int stride, int n)
>+{
>+  int i;
>+  for (i = 0; i < n; i++)
>+    {
>+      a[i*stride] = b[i*stride];
>+    }
>+}
>+
>+void __attribute__((noinline,noclone))
>+test2(_Complex int *a, _Complex double *b, int stride, int n)
>+{
>+  int i;
>+  for (i = 0; i < n; i++)
>+    {
>+      a[i*stride] = b[i*stride];
>+    }
>+}
>+
>+_Complex int ia[256];
>+_Complex double da[256];
>+
>+extern void abort (void);
>+
>+int main ()
>+{
>+  int i;
>+  int stride;
>+
>+  check_vect ();
>+
>+  for (stride = 1; stride < 15; stride++)
>+    {
>+      for (i = 0; i < 256; i++)
>+	{
>+	  __real__ ia[i] = (i + stride) % 19;
>+	  __imag__ ia[i] = (i + stride) % 23;
>+	  __asm__ volatile ("");
>+	}
>+
>+      test1(da, ia, stride, 256/stride);
>+
>+      for (i = 0; i < 256/stride; i++)
>+	{
>+	  if (da[i*stride] != ia[i*stride])
>+	    abort ();
>+	}
>+
>+      for (i = 0; i < 256; i++)
>+	{
>+	  __real__ da[i] = (i + stride + 1) % 29;
>+	  __imag__ da[i] = (i + stride + 1) % 31;
>+	  __asm__ volatile ("");
>+	}
>+
>+      test2(ia, da, stride, 256/stride);
>+
>+      for (i = 0; i < 256/stride; i++)
>+	{
>+	  if (da[i*stride] != ia[i*stride])
>+	    abort ();
>+	}
>+    }
>+  return 0;
>+}
>+
>+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } }
>*/
>+/* { dg-final { cleanup-tree-dump "vect" } } */

Follow-Ups:
- Re: Fix PR66251 (wrong code with strided group stores)
  - From: Michael Matz

References:
- Fix PR66251 (wrong code with strided group stores)
  - From: Michael Matz

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]