This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix up vectorizer DDR_REVERSED_P handling (PR tree-optimization/59594, take 2)


On Tue, Jan 28, 2014 at 01:14:32PM +0100, Richard Biener wrote:
> > I admit I fully don't understand why exactly, but my experimentation so far
> > showed that for read/write and write/read ddrs it is ok and desirable to
> > ignore the dist > 0 && DDR_REVERSED_P (ddr) cases, but for write/write
> > ddrs it is undesirable.  See the PR for further tests, perhaps I could
> > turn them into further testcases.
> 
> Please.

New testcase in the patch.

> > -      if (dist > 0 && DDR_REVERSED_P (ddr))
> > +      if (dist > 0 && DDR_REVERSED_P (ddr)
> > +	  && (DR_IS_READ (dra) || DR_IS_READ (drb)))
> 
> I think that'snot sufficient.  It depends
> on the order of the stmts whether the dependence distance is really
> negative - we are trying to catch write-after-read negative distance
> here I think.  We can't rely on the DDRs being formed in stmt order
> (anymore, at least since 4.9 where we start to arbitrary re-order
> the vector of DRs).

As discussed on IRC, the actual bug was that vect_analyze_data_ref_accesses
reordered the data references before DDRs were constructed, thus DDR_A
wasn't necessarily before DDR_B lexically in the loop and thus using
DDR_REVERSED_P bit didn't really reflect whether it is negative or positive
distance.

Fixed by making sure the data refs aren't reordered (well, they are
reordered on a copy of the vector only for the purposes of
vect_analyze_data_ref_accesses).  Bootstrapped/regtested on x86_64-linux
and i686-linux, ok for trunk?

2014-01-28  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/59594
	* tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Sort
	a copy of the datarefs vector rather than the vector itself.

	* gcc.dg/vect/no-vfa-vect-depend-2.c: New test.
	* gcc.dg/vect/no-vfa-vect-depend-3.c: New test.
	* gcc.dg/vect/pr59594.c: New test.

--- gcc/tree-vect-data-refs.c.jj	2014-01-23 10:52:26.766346677 +0100
+++ gcc/tree-vect-data-refs.c	2014-01-28 16:11:34.371698307 +0100
@@ -2484,19 +2484,21 @@ vect_analyze_data_ref_accesses (loop_vec
     return true;
 
   /* Sort the array of datarefs to make building the interleaving chains
-     linear.  */
-  qsort (datarefs.address (), datarefs.length (),
+     linear.  Don't modify the original vector's order, it is needed for
+     determining what dependencies are reversed.  */
+  vec<data_reference_p> datarefs_copy = datarefs.copy ();
+  qsort (datarefs_copy.address (), datarefs_copy.length (),
 	 sizeof (data_reference_p), dr_group_sort_cmp);
 
   /* Build the interleaving chains.  */
-  for (i = 0; i < datarefs.length () - 1;)
+  for (i = 0; i < datarefs_copy.length () - 1;)
     {
-      data_reference_p dra = datarefs[i];
+      data_reference_p dra = datarefs_copy[i];
       stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
       stmt_vec_info lastinfo = NULL;
-      for (i = i + 1; i < datarefs.length (); ++i)
+      for (i = i + 1; i < datarefs_copy.length (); ++i)
 	{
-	  data_reference_p drb = datarefs[i];
+	  data_reference_p drb = datarefs_copy[i];
 	  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
 
 	  /* ???  Imperfect sorting (non-compatible types, non-modulo
@@ -2573,7 +2575,7 @@ vect_analyze_data_ref_accesses (loop_vec
 	}
     }
 
-  FOR_EACH_VEC_ELT (datarefs, i, dr)
+  FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
     if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) 
         && !vect_analyze_data_ref_access (dr))
       {
@@ -2588,9 +2590,13 @@ vect_analyze_data_ref_accesses (loop_vec
             continue;
           }
         else
-          return false;
+	  {
+	    datarefs_copy.release ();
+	    return false;
+	  }
       }
 
+  datarefs_copy.release ();
   return true;
 }
 
--- gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c.jj	2014-01-28 14:06:10.818303424 +0100
+++ gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c	2014-01-28 14:06:10.818303424 +0100
@@ -0,0 +1,55 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 17
+
+int ia[N] = {48,45,42,39,36,33,30,27,24,21,18,15,12,9,6,3,0};
+int ib[N] = {48,45,42,39,36,33,30,27,24,21,18,15,12,9,6,3,0};
+int res[N] = {48,192,180,168,156,144,132,120,108,96,84,72,60,48,36,24,12};
+
+__attribute__ ((noinline))
+int main1 ()
+{
+  int i;
+
+  /* Not vectorizable due to data dependence: dependence distance 1.  */ 
+  for (i = N - 1; i >= 0; i--)
+    {
+      ia[i] = ia[i+1] * 4;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (ia[i] != 0)
+	abort ();
+    } 
+
+  /* Vectorizable. Dependence distance -1.  */
+  for (i = N - 1; i >= 0; i--)
+    {
+      ib[i+1] = ib[i] * 4;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (ib[i] != res[i])
+	abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "dependence distance negative" 1 "vect"  } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c.jj	2014-01-28 14:12:08.235470812 +0100
+++ gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c	2014-01-28 16:18:44.876376404 +0100
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+int ia[N + 1];
+int ib[N + 1];
+
+/* Vectorizable. Dependence distance -1.  */
+__attribute__((noinline)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    {
+      ia[i + 1] = 1;
+      ib[i] = ia[i];
+    }
+}
+
+/* Not vectorizable due to data dependence: dependence distance 1.  */
+__attribute__((noinline)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    {
+      ia[i] = 1;
+      ib[i] = ia[i + 1];
+    }
+}
+
+/* Not vectorizable due to data dependence: dependence distance 1.  */
+__attribute__((noinline)) void
+f3 (void)
+{
+  int i;
+  for (i = N - 1; i >= 0; i--)
+    {
+      ia[i + 1] = 1;
+      ib[i] = ia[i];
+    }
+}
+
+/* Vectorizable. Dependence distance -1.  */
+__attribute__((noinline)) void
+f4 (void)
+{
+  int i;
+  for (i = N - 1; i >= 0; i--)
+    {
+      ia[i] = 1;
+      ib[i] = ia[i + 1];
+    }
+}
+
+/* Vectorizable. Dependence distance -1.  */
+__attribute__((noinline)) void
+f5 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    {
+      ia[i + 1] = 1;
+      ia[i] = 2;
+    }
+}
+
+/* Not vectorizable due to data dependence: dependence distance 1.  */
+__attribute__((noinline)) void
+f6 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    {
+      ia[i] = 1;
+      ia[i + 1] = 2;
+    }
+}
+
+/* Not vectorizable due to data dependence: dependence distance 1.  */
+__attribute__((noinline)) void
+f7 (void)
+{
+  int i;
+  for (i = N - 1; i >= 0; i--)
+    {
+      ia[i + 1] = 1;
+      ia[i] = 2;
+    }
+}
+
+/* Vectorizable. Dependence distance -1.  */
+__attribute__((noinline)) void
+f8 (void)
+{
+  int i;
+  for (i = N - 1; i >= 0; i--)
+    {
+      ia[i] = 1;
+      ia[i + 1] = 2;
+    }
+}
+
+__attribute__ ((noinline)) int
+main1 (void)
+{
+  int i, j;
+
+  for (j = 0; j < 8; j++)
+    {
+      for (i = 0; i <= N; i++)
+	{
+	  ia[i] = i + 3;
+	  ib[i] = i + N + 3;
+	  asm ("");
+	}
+
+      switch (j)
+	{
+	case 0: f1 (); break;
+	case 1: f2 (); break;
+	case 2: f3 (); break;
+	case 3: f4 (); break;
+	case 4: f5 (); break;
+	case 5: f6 (); break;
+	case 6: f7 (); break;
+	case 7: f8 (); break;
+	}
+
+      for (i = 0; i <= N; i++)
+	{
+	  int ea = i + 3;
+	  int eb = i + N + 3;
+	  switch (j)
+	    {
+	    case 0:
+	      if (i) ea = 1;
+	      if (i == 0) eb = 3;
+	      else if (i != N) eb = 1;
+	      break;
+	    case 1:
+	      if (i != N) ea = 1;
+	      if (i != N) eb = i + 4;
+	      break;
+	    case 2:
+	      if (i) ea = 1;
+	      if (i != N) eb = i + 3;
+	      break;
+	    case 3:
+	      if (i != N) ea = 1;
+	      if (i < N - 1) eb = 1;
+	      else if (i == N - 1) eb = 67;
+	      break;
+	    case 4:
+	      ea = 1 + (i != N);
+	      break;
+	    case 5:
+	      ea = 2 - (i != N);
+	      break;
+	    case 6:
+	      ea = 1 + (i == 0);
+	      break;
+	    case 7:
+	      ea = 2 - (i == 0);
+	      break;
+	    }
+	  if (ia[i] != ea || ib[i] != eb)
+	    abort ();
+	}
+    }
+
+  return 0;
+}
+
+int main ()
+{
+  check_vect ();
+
+  return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" {xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "dependence distance negative" 4 "vect"  } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/vect/pr59594.c.jj	2014-01-28 14:06:10.819303452 +0100
+++ gcc/testsuite/gcc.dg/vect/pr59594.c	2014-01-28 14:06:10.818303424 +0100
@@ -0,0 +1,31 @@
+/* PR tree-optimization/59594 */
+
+#include "tree-vect.h"
+
+#define N 1024
+int b[N + 1];
+
+int
+main ()
+{
+  int i;
+  check_vect ();
+  for (i = 0; i < N + 1; i++)
+    {
+      b[i] = i;
+      asm ("");
+    }
+  for (i = N; i >= 0; i--)
+    {
+      b[i + 1] = b[i];
+      b[i] = 1;
+    }
+  if (b[0] != 1)
+    __builtin_abort ();
+  for (i = 0; i < N; i++)
+    if (b[i + 1] != i)
+      __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]