[PATCH] Fix up vectorizer DDR_REVERSED_P handling (PR tree-optimization/59594, take 2)

Richard Biener rguenther@suse.de
Wed Jan 29 08:28:00 GMT 2014


On January 28, 2014 11:32:40 PM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
>On Tue, Jan 28, 2014 at 01:14:32PM +0100, Richard Biener wrote:
>> > I admit I fully don't understand why exactly, but my
>experimentation so far
>> > showed that for read/write and write/read ddrs it is ok and
>desirable to
>> > ignore the dist > 0 && DDR_REVERSED_P (ddr) cases, but for
>write/write
>> > ddrs it is undesirable.  See the PR for further tests, perhaps I
>could
>> > turn them into further testcases.
>> 
>> Please.
>
>New testcase in the patch.
>
>> > -      if (dist > 0 && DDR_REVERSED_P (ddr))
>> > +      if (dist > 0 && DDR_REVERSED_P (ddr)
>> > +	  && (DR_IS_READ (dra) || DR_IS_READ (drb)))
>> 
>> I think that'snot sufficient.  It depends
>> on the order of the stmts whether the dependence distance is really
>> negative - we are trying to catch write-after-read negative distance
>> here I think.  We can't rely on the DDRs being formed in stmt order
>> (anymore, at least since 4.9 where we start to arbitrary re-order
>> the vector of DRs).
>
>As discussed on IRC, the actual bug was that
>vect_analyze_data_ref_accesses
>reordered the data references before DDRs were constructed, thus DDR_A
>wasn't necessarily before DDR_B lexically in the loop and thus using
>DDR_REVERSED_P bit didn't really reflect whether it is negative or
>positive
>distance.
>
>Fixed by making sure the data refs aren't reordered (well, they are
>reordered on a copy of the vector only for the purposes of
>vect_analyze_data_ref_accesses).  Bootstrapped/regtested on
>x86_64-linux
>and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.

>2014-01-28  Jakub Jelinek  <jakub@redhat.com>
>
>	PR tree-optimization/59594
>	* tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Sort
>	a copy of the datarefs vector rather than the vector itself.
>
>	* gcc.dg/vect/no-vfa-vect-depend-2.c: New test.
>	* gcc.dg/vect/no-vfa-vect-depend-3.c: New test.
>	* gcc.dg/vect/pr59594.c: New test.
>
>--- gcc/tree-vect-data-refs.c.jj	2014-01-23 10:52:26.766346677 +0100
>+++ gcc/tree-vect-data-refs.c	2014-01-28 16:11:34.371698307 +0100
>@@ -2484,19 +2484,21 @@ vect_analyze_data_ref_accesses (loop_vec
>     return true;
> 
> /* Sort the array of datarefs to make building the interleaving chains
>-     linear.  */
>-  qsort (datarefs.address (), datarefs.length (),
>+     linear.  Don't modify the original vector's order, it is needed
>for
>+     determining what dependencies are reversed.  */
>+  vec<data_reference_p> datarefs_copy = datarefs.copy ();
>+  qsort (datarefs_copy.address (), datarefs_copy.length (),
> 	 sizeof (data_reference_p), dr_group_sort_cmp);
> 
>   /* Build the interleaving chains.  */
>-  for (i = 0; i < datarefs.length () - 1;)
>+  for (i = 0; i < datarefs_copy.length () - 1;)
>     {
>-      data_reference_p dra = datarefs[i];
>+      data_reference_p dra = datarefs_copy[i];
>       stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
>       stmt_vec_info lastinfo = NULL;
>-      for (i = i + 1; i < datarefs.length (); ++i)
>+      for (i = i + 1; i < datarefs_copy.length (); ++i)
> 	{
>-	  data_reference_p drb = datarefs[i];
>+	  data_reference_p drb = datarefs_copy[i];
> 	  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
> 
> 	  /* ???  Imperfect sorting (non-compatible types, non-modulo
>@@ -2573,7 +2575,7 @@ vect_analyze_data_ref_accesses (loop_vec
> 	}
>     }
> 
>-  FOR_EACH_VEC_ELT (datarefs, i, dr)
>+  FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
>     if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) 
>         && !vect_analyze_data_ref_access (dr))
>       {
>@@ -2588,9 +2590,13 @@ vect_analyze_data_ref_accesses (loop_vec
>             continue;
>           }
>         else
>-          return false;
>+	  {
>+	    datarefs_copy.release ();
>+	    return false;
>+	  }
>       }
> 
>+  datarefs_copy.release ();
>   return true;
> }
> 
>--- gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c.jj	2014-01-28
>14:06:10.818303424 +0100
>+++ gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c	2014-01-28
>14:06:10.818303424 +0100
>@@ -0,0 +1,55 @@
>+/* { dg-require-effective-target vect_int } */
>+
>+#include <stdarg.h>
>+#include "tree-vect.h"
>+
>+#define N 17
>+
>+int ia[N] = {48,45,42,39,36,33,30,27,24,21,18,15,12,9,6,3,0};
>+int ib[N] = {48,45,42,39,36,33,30,27,24,21,18,15,12,9,6,3,0};
>+int res[N] =
>{48,192,180,168,156,144,132,120,108,96,84,72,60,48,36,24,12};
>+
>+__attribute__ ((noinline))
>+int main1 ()
>+{
>+  int i;
>+
>+  /* Not vectorizable due to data dependence: dependence distance 1. 
>*/ 
>+  for (i = N - 1; i >= 0; i--)
>+    {
>+      ia[i] = ia[i+1] * 4;
>+    }
>+
>+  /* check results:  */
>+  for (i = 0; i < N; i++)
>+    {
>+      if (ia[i] != 0)
>+	abort ();
>+    } 
>+
>+  /* Vectorizable. Dependence distance -1.  */
>+  for (i = N - 1; i >= 0; i--)
>+    {
>+      ib[i+1] = ib[i] * 4;
>+    }
>+
>+  /* check results:  */
>+  for (i = 0; i < N; i++)
>+    {
>+      if (ib[i] != res[i])
>+	abort ();
>+    }
>+
>+  return 0;
>+}
>+
>+int main (void)
>+{
>+  check_vect ();
>+
>+  return main1 ();
>+}
>+
>+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"
>{xfail vect_no_align } } } */
>+/* { dg-final { scan-tree-dump-times "dependence distance negative" 1
>"vect"  } } */
>+/* { dg-final { cleanup-tree-dump "vect" } } */
>--- gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c.jj	2014-01-28
>14:12:08.235470812 +0100
>+++ gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c	2014-01-28
>16:18:44.876376404 +0100
>@@ -0,0 +1,187 @@
>+/* { dg-require-effective-target vect_int } */
>+
>+#include <stdarg.h>
>+#include "tree-vect.h"
>+
>+#define N 64
>+
>+int ia[N + 1];
>+int ib[N + 1];
>+
>+/* Vectorizable. Dependence distance -1.  */
>+__attribute__((noinline)) void
>+f1 (void)
>+{
>+  int i;
>+  for (i = 0; i < N; i++)
>+    {
>+      ia[i + 1] = 1;
>+      ib[i] = ia[i];
>+    }
>+}
>+
>+/* Not vectorizable due to data dependence: dependence distance 1.  */
>+__attribute__((noinline)) void
>+f2 (void)
>+{
>+  int i;
>+  for (i = 0; i < N; i++)
>+    {
>+      ia[i] = 1;
>+      ib[i] = ia[i + 1];
>+    }
>+}
>+
>+/* Not vectorizable due to data dependence: dependence distance 1.  */
>+__attribute__((noinline)) void
>+f3 (void)
>+{
>+  int i;
>+  for (i = N - 1; i >= 0; i--)
>+    {
>+      ia[i + 1] = 1;
>+      ib[i] = ia[i];
>+    }
>+}
>+
>+/* Vectorizable. Dependence distance -1.  */
>+__attribute__((noinline)) void
>+f4 (void)
>+{
>+  int i;
>+  for (i = N - 1; i >= 0; i--)
>+    {
>+      ia[i] = 1;
>+      ib[i] = ia[i + 1];
>+    }
>+}
>+
>+/* Vectorizable. Dependence distance -1.  */
>+__attribute__((noinline)) void
>+f5 (void)
>+{
>+  int i;
>+  for (i = 0; i < N; i++)
>+    {
>+      ia[i + 1] = 1;
>+      ia[i] = 2;
>+    }
>+}
>+
>+/* Not vectorizable due to data dependence: dependence distance 1.  */
>+__attribute__((noinline)) void
>+f6 (void)
>+{
>+  int i;
>+  for (i = 0; i < N; i++)
>+    {
>+      ia[i] = 1;
>+      ia[i + 1] = 2;
>+    }
>+}
>+
>+/* Not vectorizable due to data dependence: dependence distance 1.  */
>+__attribute__((noinline)) void
>+f7 (void)
>+{
>+  int i;
>+  for (i = N - 1; i >= 0; i--)
>+    {
>+      ia[i + 1] = 1;
>+      ia[i] = 2;
>+    }
>+}
>+
>+/* Vectorizable. Dependence distance -1.  */
>+__attribute__((noinline)) void
>+f8 (void)
>+{
>+  int i;
>+  for (i = N - 1; i >= 0; i--)
>+    {
>+      ia[i] = 1;
>+      ia[i + 1] = 2;
>+    }
>+}
>+
>+__attribute__ ((noinline)) int
>+main1 (void)
>+{
>+  int i, j;
>+
>+  for (j = 0; j < 8; j++)
>+    {
>+      for (i = 0; i <= N; i++)
>+	{
>+	  ia[i] = i + 3;
>+	  ib[i] = i + N + 3;
>+	  asm ("");
>+	}
>+
>+      switch (j)
>+	{
>+	case 0: f1 (); break;
>+	case 1: f2 (); break;
>+	case 2: f3 (); break;
>+	case 3: f4 (); break;
>+	case 4: f5 (); break;
>+	case 5: f6 (); break;
>+	case 6: f7 (); break;
>+	case 7: f8 (); break;
>+	}
>+
>+      for (i = 0; i <= N; i++)
>+	{
>+	  int ea = i + 3;
>+	  int eb = i + N + 3;
>+	  switch (j)
>+	    {
>+	    case 0:
>+	      if (i) ea = 1;
>+	      if (i == 0) eb = 3;
>+	      else if (i != N) eb = 1;
>+	      break;
>+	    case 1:
>+	      if (i != N) ea = 1;
>+	      if (i != N) eb = i + 4;
>+	      break;
>+	    case 2:
>+	      if (i) ea = 1;
>+	      if (i != N) eb = i + 3;
>+	      break;
>+	    case 3:
>+	      if (i != N) ea = 1;
>+	      if (i < N - 1) eb = 1;
>+	      else if (i == N - 1) eb = 67;
>+	      break;
>+	    case 4:
>+	      ea = 1 + (i != N);
>+	      break;
>+	    case 5:
>+	      ea = 2 - (i != N);
>+	      break;
>+	    case 6:
>+	      ea = 1 + (i == 0);
>+	      break;
>+	    case 7:
>+	      ea = 2 - (i == 0);
>+	      break;
>+	    }
>+	  if (ia[i] != ea || ib[i] != eb)
>+	    abort ();
>+	}
>+    }
>+
>+  return 0;
>+}
>+
>+int main ()
>+{
>+  check_vect ();
>+
>+  return main1 ();
>+}
>+
>+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect"
>{xfail vect_no_align } } } */
>+/* { dg-final { scan-tree-dump-times "dependence distance negative" 4
>"vect"  } } */
>+/* { dg-final { cleanup-tree-dump "vect" } } */
>--- gcc/testsuite/gcc.dg/vect/pr59594.c.jj	2014-01-28
>14:06:10.819303452 +0100
>+++ gcc/testsuite/gcc.dg/vect/pr59594.c	2014-01-28 14:06:10.818303424
>+0100
>@@ -0,0 +1,31 @@
>+/* PR tree-optimization/59594 */
>+
>+#include "tree-vect.h"
>+
>+#define N 1024
>+int b[N + 1];
>+
>+int
>+main ()
>+{
>+  int i;
>+  check_vect ();
>+  for (i = 0; i < N + 1; i++)
>+    {
>+      b[i] = i;
>+      asm ("");
>+    }
>+  for (i = N; i >= 0; i--)
>+    {
>+      b[i + 1] = b[i];
>+      b[i] = 1;
>+    }
>+  if (b[0] != 1)
>+    __builtin_abort ();
>+  for (i = 0; i < N; i++)
>+    if (b[i + 1] != i)
>+      __builtin_abort ();
>+  return 0;
>+}
>+
>+/* { dg-final { cleanup-tree-dump "vect" } } */
>
>
>	Jakub




More information about the Gcc-patches mailing list