This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Fix up vectorizer DDR_REVERSED_P handling (PR tree-optimization/59594, take 2)
- From: Richard Biener <rguenther at suse dot de>
- To: Jakub Jelinek <jakub at redhat dot com>,Jakub Jelinek <jakub at redhat dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Wed, 29 Jan 2014 09:27:11 +0100
- Subject: Re: [PATCH] Fix up vectorizer DDR_REVERSED_P handling (PR tree-optimization/59594, take 2)
- Authentication-results: sourceware.org; auth=none
- References: <20140124211801 dot GZ892 at tucnak dot redhat dot com> <alpine dot LSU dot 2 dot 11 dot 1401281304000 dot 469 at zhemvz dot fhfr dot qr> <20140128223240 dot GP892 at tucnak dot redhat dot com>
On January 28, 2014 11:32:40 PM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
>On Tue, Jan 28, 2014 at 01:14:32PM +0100, Richard Biener wrote:
>> > I admit I fully don't understand why exactly, but my
>experimentation so far
>> > showed that for read/write and write/read ddrs it is ok and
>desirable to
>> > ignore the dist > 0 && DDR_REVERSED_P (ddr) cases, but for
>write/write
>> > ddrs it is undesirable. See the PR for further tests, perhaps I
>could
>> > turn them into further testcases.
>>
>> Please.
>
>New testcase in the patch.
>
>> > - if (dist > 0 && DDR_REVERSED_P (ddr))
>> > + if (dist > 0 && DDR_REVERSED_P (ddr)
>> > + && (DR_IS_READ (dra) || DR_IS_READ (drb)))
>>
>> I think that'snot sufficient. It depends
>> on the order of the stmts whether the dependence distance is really
>> negative - we are trying to catch write-after-read negative distance
>> here I think. We can't rely on the DDRs being formed in stmt order
>> (anymore, at least since 4.9 where we start to arbitrary re-order
>> the vector of DRs).
>
>As discussed on IRC, the actual bug was that
>vect_analyze_data_ref_accesses
>reordered the data references before DDRs were constructed, thus DDR_A
>wasn't necessarily before DDR_B lexically in the loop and thus using
>DDR_REVERSED_P bit didn't really reflect whether it is negative or
>positive
>distance.
>
>Fixed by making sure the data refs aren't reordered (well, they are
>reordered on a copy of the vector only for the purposes of
>vect_analyze_data_ref_accesses). Bootstrapped/regtested on
>x86_64-linux
>and i686-linux, ok for trunk?
Ok.
Thanks,
Richard.
>2014-01-28 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/59594
> * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Sort
> a copy of the datarefs vector rather than the vector itself.
>
> * gcc.dg/vect/no-vfa-vect-depend-2.c: New test.
> * gcc.dg/vect/no-vfa-vect-depend-3.c: New test.
> * gcc.dg/vect/pr59594.c: New test.
>
>--- gcc/tree-vect-data-refs.c.jj 2014-01-23 10:52:26.766346677 +0100
>+++ gcc/tree-vect-data-refs.c 2014-01-28 16:11:34.371698307 +0100
>@@ -2484,19 +2484,21 @@ vect_analyze_data_ref_accesses (loop_vec
> return true;
>
> /* Sort the array of datarefs to make building the interleaving chains
>- linear. */
>- qsort (datarefs.address (), datarefs.length (),
>+ linear. Don't modify the original vector's order, it is needed
>for
>+ determining what dependencies are reversed. */
>+ vec<data_reference_p> datarefs_copy = datarefs.copy ();
>+ qsort (datarefs_copy.address (), datarefs_copy.length (),
> sizeof (data_reference_p), dr_group_sort_cmp);
>
> /* Build the interleaving chains. */
>- for (i = 0; i < datarefs.length () - 1;)
>+ for (i = 0; i < datarefs_copy.length () - 1;)
> {
>- data_reference_p dra = datarefs[i];
>+ data_reference_p dra = datarefs_copy[i];
> stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
> stmt_vec_info lastinfo = NULL;
>- for (i = i + 1; i < datarefs.length (); ++i)
>+ for (i = i + 1; i < datarefs_copy.length (); ++i)
> {
>- data_reference_p drb = datarefs[i];
>+ data_reference_p drb = datarefs_copy[i];
> stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
>
> /* ??? Imperfect sorting (non-compatible types, non-modulo
>@@ -2573,7 +2575,7 @@ vect_analyze_data_ref_accesses (loop_vec
> }
> }
>
>- FOR_EACH_VEC_ELT (datarefs, i, dr)
>+ FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
> if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
> && !vect_analyze_data_ref_access (dr))
> {
>@@ -2588,9 +2590,13 @@ vect_analyze_data_ref_accesses (loop_vec
> continue;
> }
> else
>- return false;
>+ {
>+ datarefs_copy.release ();
>+ return false;
>+ }
> }
>
>+ datarefs_copy.release ();
> return true;
> }
>
>--- gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c.jj 2014-01-28
>14:06:10.818303424 +0100
>+++ gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c 2014-01-28
>14:06:10.818303424 +0100
>@@ -0,0 +1,55 @@
>+/* { dg-require-effective-target vect_int } */
>+
>+#include <stdarg.h>
>+#include "tree-vect.h"
>+
>+#define N 17
>+
>+int ia[N] = {48,45,42,39,36,33,30,27,24,21,18,15,12,9,6,3,0};
>+int ib[N] = {48,45,42,39,36,33,30,27,24,21,18,15,12,9,6,3,0};
>+int res[N] =
>{48,192,180,168,156,144,132,120,108,96,84,72,60,48,36,24,12};
>+
>+__attribute__ ((noinline))
>+int main1 ()
>+{
>+ int i;
>+
>+ /* Not vectorizable due to data dependence: dependence distance 1.
>*/
>+ for (i = N - 1; i >= 0; i--)
>+ {
>+ ia[i] = ia[i+1] * 4;
>+ }
>+
>+ /* check results: */
>+ for (i = 0; i < N; i++)
>+ {
>+ if (ia[i] != 0)
>+ abort ();
>+ }
>+
>+ /* Vectorizable. Dependence distance -1. */
>+ for (i = N - 1; i >= 0; i--)
>+ {
>+ ib[i+1] = ib[i] * 4;
>+ }
>+
>+ /* check results: */
>+ for (i = 0; i < N; i++)
>+ {
>+ if (ib[i] != res[i])
>+ abort ();
>+ }
>+
>+ return 0;
>+}
>+
>+int main (void)
>+{
>+ check_vect ();
>+
>+ return main1 ();
>+}
>+
>+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"
>{xfail vect_no_align } } } */
>+/* { dg-final { scan-tree-dump-times "dependence distance negative" 1
>"vect" } } */
>+/* { dg-final { cleanup-tree-dump "vect" } } */
>--- gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c.jj 2014-01-28
>14:12:08.235470812 +0100
>+++ gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-3.c 2014-01-28
>16:18:44.876376404 +0100
>@@ -0,0 +1,187 @@
>+/* { dg-require-effective-target vect_int } */
>+
>+#include <stdarg.h>
>+#include "tree-vect.h"
>+
>+#define N 64
>+
>+int ia[N + 1];
>+int ib[N + 1];
>+
>+/* Vectorizable. Dependence distance -1. */
>+__attribute__((noinline)) void
>+f1 (void)
>+{
>+ int i;
>+ for (i = 0; i < N; i++)
>+ {
>+ ia[i + 1] = 1;
>+ ib[i] = ia[i];
>+ }
>+}
>+
>+/* Not vectorizable due to data dependence: dependence distance 1. */
>+__attribute__((noinline)) void
>+f2 (void)
>+{
>+ int i;
>+ for (i = 0; i < N; i++)
>+ {
>+ ia[i] = 1;
>+ ib[i] = ia[i + 1];
>+ }
>+}
>+
>+/* Not vectorizable due to data dependence: dependence distance 1. */
>+__attribute__((noinline)) void
>+f3 (void)
>+{
>+ int i;
>+ for (i = N - 1; i >= 0; i--)
>+ {
>+ ia[i + 1] = 1;
>+ ib[i] = ia[i];
>+ }
>+}
>+
>+/* Vectorizable. Dependence distance -1. */
>+__attribute__((noinline)) void
>+f4 (void)
>+{
>+ int i;
>+ for (i = N - 1; i >= 0; i--)
>+ {
>+ ia[i] = 1;
>+ ib[i] = ia[i + 1];
>+ }
>+}
>+
>+/* Vectorizable. Dependence distance -1. */
>+__attribute__((noinline)) void
>+f5 (void)
>+{
>+ int i;
>+ for (i = 0; i < N; i++)
>+ {
>+ ia[i + 1] = 1;
>+ ia[i] = 2;
>+ }
>+}
>+
>+/* Not vectorizable due to data dependence: dependence distance 1. */
>+__attribute__((noinline)) void
>+f6 (void)
>+{
>+ int i;
>+ for (i = 0; i < N; i++)
>+ {
>+ ia[i] = 1;
>+ ia[i + 1] = 2;
>+ }
>+}
>+
>+/* Not vectorizable due to data dependence: dependence distance 1. */
>+__attribute__((noinline)) void
>+f7 (void)
>+{
>+ int i;
>+ for (i = N - 1; i >= 0; i--)
>+ {
>+ ia[i + 1] = 1;
>+ ia[i] = 2;
>+ }
>+}
>+
>+/* Vectorizable. Dependence distance -1. */
>+__attribute__((noinline)) void
>+f8 (void)
>+{
>+ int i;
>+ for (i = N - 1; i >= 0; i--)
>+ {
>+ ia[i] = 1;
>+ ia[i + 1] = 2;
>+ }
>+}
>+
>+__attribute__ ((noinline)) int
>+main1 (void)
>+{
>+ int i, j;
>+
>+ for (j = 0; j < 8; j++)
>+ {
>+ for (i = 0; i <= N; i++)
>+ {
>+ ia[i] = i + 3;
>+ ib[i] = i + N + 3;
>+ asm ("");
>+ }
>+
>+ switch (j)
>+ {
>+ case 0: f1 (); break;
>+ case 1: f2 (); break;
>+ case 2: f3 (); break;
>+ case 3: f4 (); break;
>+ case 4: f5 (); break;
>+ case 5: f6 (); break;
>+ case 6: f7 (); break;
>+ case 7: f8 (); break;
>+ }
>+
>+ for (i = 0; i <= N; i++)
>+ {
>+ int ea = i + 3;
>+ int eb = i + N + 3;
>+ switch (j)
>+ {
>+ case 0:
>+ if (i) ea = 1;
>+ if (i == 0) eb = 3;
>+ else if (i != N) eb = 1;
>+ break;
>+ case 1:
>+ if (i != N) ea = 1;
>+ if (i != N) eb = i + 4;
>+ break;
>+ case 2:
>+ if (i) ea = 1;
>+ if (i != N) eb = i + 3;
>+ break;
>+ case 3:
>+ if (i != N) ea = 1;
>+ if (i < N - 1) eb = 1;
>+ else if (i == N - 1) eb = 67;
>+ break;
>+ case 4:
>+ ea = 1 + (i != N);
>+ break;
>+ case 5:
>+ ea = 2 - (i != N);
>+ break;
>+ case 6:
>+ ea = 1 + (i == 0);
>+ break;
>+ case 7:
>+ ea = 2 - (i == 0);
>+ break;
>+ }
>+ if (ia[i] != ea || ib[i] != eb)
>+ abort ();
>+ }
>+ }
>+
>+ return 0;
>+}
>+
>+int main ()
>+{
>+ check_vect ();
>+
>+ return main1 ();
>+}
>+
>+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect"
>{xfail vect_no_align } } } */
>+/* { dg-final { scan-tree-dump-times "dependence distance negative" 4
>"vect" } } */
>+/* { dg-final { cleanup-tree-dump "vect" } } */
>--- gcc/testsuite/gcc.dg/vect/pr59594.c.jj 2014-01-28
>14:06:10.819303452 +0100
>+++ gcc/testsuite/gcc.dg/vect/pr59594.c 2014-01-28 14:06:10.818303424
>+0100
>@@ -0,0 +1,31 @@
>+/* PR tree-optimization/59594 */
>+
>+#include "tree-vect.h"
>+
>+#define N 1024
>+int b[N + 1];
>+
>+int
>+main ()
>+{
>+ int i;
>+ check_vect ();
>+ for (i = 0; i < N + 1; i++)
>+ {
>+ b[i] = i;
>+ asm ("");
>+ }
>+ for (i = N; i >= 0; i--)
>+ {
>+ b[i + 1] = b[i];
>+ b[i] = 1;
>+ }
>+ if (b[0] != 1)
>+ __builtin_abort ();
>+ for (i = 0; i < N; i++)
>+ if (b[i + 1] != i)
>+ __builtin_abort ();
>+ return 0;
>+}
>+
>+/* { dg-final { cleanup-tree-dump "vect" } } */
>
>
> Jakub