This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Fix up vectorizer DDR_REVERSED_P handling (PR tree-optimization/59594, take 2)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Richard Biener <rguenther at suse dot de>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Tue, 28 Jan 2014 23:32:40 +0100
- Subject: [PATCH] Fix up vectorizer DDR_REVERSED_P handling (PR tree-optimization/59594, take 2)
- Authentication-results: sourceware.org; auth=none
- References: <20140124211801 dot GZ892 at tucnak dot redhat dot com> <alpine dot LSU dot 2 dot 11 dot 1401281304000 dot 469 at zhemvz dot fhfr dot qr>
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
On Tue, Jan 28, 2014 at 01:14:32PM +0100, Richard Biener wrote:
> > I admit I fully don't understand why exactly, but my experimentation so far
> > showed that for read/write and write/read ddrs it is ok and desirable to
> > ignore the dist > 0 && DDR_REVERSED_P (ddr) cases, but for write/write
> > ddrs it is undesirable. See the PR for further tests, perhaps I could
> > turn them into further testcases.
>
> Please.
New testcase in the patch.
> > - if (dist > 0 && DDR_REVERSED_P (ddr))
> > + if (dist > 0 && DDR_REVERSED_P (ddr)
> > + && (DR_IS_READ (dra) || DR_IS_READ (drb)))
>
> I think that'snot sufficient. It depends
> on the order of the stmts whether the dependence distance is really
> negative - we are trying to catch write-after-read negative distance
> here I think. We can't rely on the DDRs being formed in stmt order
> (anymore, at least since 4.9 where we start to arbitrary re-order
> the vector of DRs).
As discussed on IRC, the actual bug was that vect_analyze_data_ref_accesses
reordered the data references before DDRs were constructed, thus DDR_A
wasn't necessarily before DDR_B lexically in the loop and thus using
DDR_REVERSED_P bit didn't really reflect whether it is negative or positive
distance.
Fixed by making sure the data refs aren't reordered (well, they are
reordered on a copy of the vector only for the purposes of
vect_analyze_data_ref_accesses). Bootstrapped/regtested on x86_64-linux
and i686-linux, ok for trunk?
2014-01-28 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/59594
* tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Sort
a copy of the datarefs vector rather than the vector itself.
* gcc.dg/vect/no-vfa-vect-depend-2.c: New test.
* gcc.dg/vect/no-vfa-vect-depend-3.c: New test.
* gcc.dg/vect/pr59594.c: New test.
--- gcc/tree-vect-data-refs.c.jj 2014-01-23 10:52:26.766346677 +0100
+++ gcc/tree-vect-data-refs.c 2014-01-28 16:11:34.371698307 +0100
@@ -2484,19 +2484,21 @@ vect_analyze_data_ref_accesses (loop_vec
return true;
/* Sort the array of datarefs to make building the interleaving chains
- linear. */
- qsort (datarefs.address (), datarefs.length (),
+ linear. Don't modify the original vector's order, it is needed for
+ determining what dependencies are reversed. */
+ vec<data_reference_p> datarefs_copy = datarefs.copy ();
+ qsort (datarefs_copy.address (), datarefs_copy.length (),
sizeof (data_reference_p), dr_group_sort_cmp);
/* Build the interleaving chains. */
- for (i = 0; i < datarefs.length () - 1;)
+ for (i = 0; i < datarefs_copy.length () - 1;)
{
- data_reference_p dra = datarefs[i];
+ data_reference_p dra = datarefs_copy[i];
stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
stmt_vec_info lastinfo = NULL;
- for (i = i + 1; i < datarefs.length (); ++i)
+ for (i = i + 1; i < datarefs_copy.length (); ++i)
{
- data_reference_p drb = datarefs[i];
+ data_reference_p drb = datarefs_copy[i];
stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
/* ??? Imperfect sorting (non-compatible types, non-modulo
@@ -2573,7 +2575,7 @@ vect_analyze_data_ref_accesses (loop_vec
}
}
- FOR_EACH_VEC_ELT (datarefs, i, dr)
+ FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
&& !vect_analyze_data_ref_access (dr))
{
@@ -2588,9 +2590,13 @@ vect_analyze_data_ref_accesses (loop_vec
continue;
}
else
- return false;
+ {
+ datarefs_copy.release ();
+ return false;
+ }
}
+ datarefs_copy.release ();
return true;
}
--- gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c.jj 2014-01-28 14:06:10.818303424 +0100
+++ gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c 2014-01-28 14:06:10.818303424 +0100
@@ -0,0 +1,55 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 17
+
+int ia[N] = {48,45,42,39,36,33,30,27,24,21,18,15,12,9,6,3,0};
+int ib[N] = {48,45,42,39,36,33,30,27,24,21,18,15,12,9,6,3,0};
+int res[N] = {48,192,180,168,156,144,132,120,108,96,84,72,60,48,36,24,12};
+
+__attribute__ ((noinline))
+int main1 ()
+{
+ int i;
+
+ /* Not vectorizable due to data dependence: dependence distance 1. */
+ for (i = N - 1; i >= 0; i--)
+ {
+ ia[i] = ia[i+1] * 4;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (ia[i] != 0)
+ abort ();
+ }
+
+ /* Vectorizable. Dependence distance -1. */
+ for (i = N - 1; i >= 0; i--)
+ {
+ ib[i+1] = ib[i] * 4;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (ib[i] != res[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "dependence distance negative" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c.jj 2014-01-28 14:12:08.235470812 +0100
+++ gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c 2014-01-28 16:18:44.876376404 +0100
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+int ia[N + 1];
+int ib[N + 1];
+
+/* Vectorizable. Dependence distance -1. */
+__attribute__((noinline)) void
+f1 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ ia[i + 1] = 1;
+ ib[i] = ia[i];
+ }
+}
+
+/* Not vectorizable due to data dependence: dependence distance 1. */
+__attribute__((noinline)) void
+f2 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ ia[i] = 1;
+ ib[i] = ia[i + 1];
+ }
+}
+
+/* Not vectorizable due to data dependence: dependence distance 1. */
+__attribute__((noinline)) void
+f3 (void)
+{
+ int i;
+ for (i = N - 1; i >= 0; i--)
+ {
+ ia[i + 1] = 1;
+ ib[i] = ia[i];
+ }
+}
+
+/* Vectorizable. Dependence distance -1. */
+__attribute__((noinline)) void
+f4 (void)
+{
+ int i;
+ for (i = N - 1; i >= 0; i--)
+ {
+ ia[i] = 1;
+ ib[i] = ia[i + 1];
+ }
+}
+
+/* Vectorizable. Dependence distance -1. */
+__attribute__((noinline)) void
+f5 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ ia[i + 1] = 1;
+ ia[i] = 2;
+ }
+}
+
+/* Not vectorizable due to data dependence: dependence distance 1. */
+__attribute__((noinline)) void
+f6 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ ia[i] = 1;
+ ia[i + 1] = 2;
+ }
+}
+
+/* Not vectorizable due to data dependence: dependence distance 1. */
+__attribute__((noinline)) void
+f7 (void)
+{
+ int i;
+ for (i = N - 1; i >= 0; i--)
+ {
+ ia[i + 1] = 1;
+ ia[i] = 2;
+ }
+}
+
+/* Vectorizable. Dependence distance -1. */
+__attribute__((noinline)) void
+f8 (void)
+{
+ int i;
+ for (i = N - 1; i >= 0; i--)
+ {
+ ia[i] = 1;
+ ia[i + 1] = 2;
+ }
+}
+
+__attribute__ ((noinline)) int
+main1 (void)
+{
+ int i, j;
+
+ for (j = 0; j < 8; j++)
+ {
+ for (i = 0; i <= N; i++)
+ {
+ ia[i] = i + 3;
+ ib[i] = i + N + 3;
+ asm ("");
+ }
+
+ switch (j)
+ {
+ case 0: f1 (); break;
+ case 1: f2 (); break;
+ case 2: f3 (); break;
+ case 3: f4 (); break;
+ case 4: f5 (); break;
+ case 5: f6 (); break;
+ case 6: f7 (); break;
+ case 7: f8 (); break;
+ }
+
+ for (i = 0; i <= N; i++)
+ {
+ int ea = i + 3;
+ int eb = i + N + 3;
+ switch (j)
+ {
+ case 0:
+ if (i) ea = 1;
+ if (i == 0) eb = 3;
+ else if (i != N) eb = 1;
+ break;
+ case 1:
+ if (i != N) ea = 1;
+ if (i != N) eb = i + 4;
+ break;
+ case 2:
+ if (i) ea = 1;
+ if (i != N) eb = i + 3;
+ break;
+ case 3:
+ if (i != N) ea = 1;
+ if (i < N - 1) eb = 1;
+ else if (i == N - 1) eb = 67;
+ break;
+ case 4:
+ ea = 1 + (i != N);
+ break;
+ case 5:
+ ea = 2 - (i != N);
+ break;
+ case 6:
+ ea = 1 + (i == 0);
+ break;
+ case 7:
+ ea = 2 - (i == 0);
+ break;
+ }
+ if (ia[i] != ea || ib[i] != eb)
+ abort ();
+ }
+ }
+
+ return 0;
+}
+
+int main ()
+{
+ check_vect ();
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" {xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "dependence distance negative" 4 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/vect/pr59594.c.jj 2014-01-28 14:06:10.819303452 +0100
+++ gcc/testsuite/gcc.dg/vect/pr59594.c 2014-01-28 14:06:10.818303424 +0100
@@ -0,0 +1,31 @@
+/* PR tree-optimization/59594 */
+
+#include "tree-vect.h"
+
+#define N 1024
+int b[N + 1];
+
+int
+main ()
+{
+ int i;
+ check_vect ();
+ for (i = 0; i < N + 1; i++)
+ {
+ b[i] = i;
+ asm ("");
+ }
+ for (i = N; i >= 0; i--)
+ {
+ b[i + 1] = b[i];
+ b[i] = 1;
+ }
+ if (b[0] != 1)
+ __builtin_abort ();
+ for (i = 0; i < N; i++)
+ if (b[i + 1] != i)
+ __builtin_abort ();
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
Jakub