This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, x86] Fix pblendv expand.


On Wed, Dec 10, 2014 at 02:37:13AM +0300, Evgeny Stupachenko wrote:
> 2014-12-10  Evgeny Stupachenko  <evstupac@gmail.com>

I went ahead and filed a PR, so we have something to refer to in the
ChangeLog and name the testcases.

> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -47546,6 +47546,7 @@ expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
>      dcopy.op0 = dcopy.op1 = d->op1;
>    else
>      dcopy.op0 = dcopy.op1 = d->op0;
> +  dcopy.target = gen_reg_rtx (vmode);
>    dcopy.one_operand_p = true;
> 
>    for (i = 0; i < nelt; ++i)

This is incorrect if d->testing_p is true (can happen e.g. on the testcase
below; generally, when testing_p is true, we should not use gen_reg_rtx
because it can be called from GIMPLE optimizers before init_emit is called.
See PR57896 for details.
If d->testing_p is true, target is never the same as any of the operands,
all 3 are virtual registers, so there is no overlap (and even if there would
be, it should not matter).

> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/blend.c
> @@ -0,0 +1,63 @@
> +/* Test correctness of size 3 store groups permutation.  */
> +/* { dg-do run } */

The testcase as is does not fail even with unfixed compiler.  I had to add
some dg-additional-options.

> +  bar(2, q);
> +  for (i = 0; i < N; i++)
> +    if (q[0].a[i].f != 0 || q[0].a[i].c != i || q[0].a[i].p != -1)
> +      return 1;

Furthermore, tests in the testsuite should fail through abort ()
/ __builtin_abort () instead of just returning non-zero exit code.

So, here is complete updated patch I'm going to bootstrap/regtest.

2014-12-10  Jakub Jelinek  <jakub@redhat.com>
	    Evgeny Stupachenko  <evstupac@gmail.com>

	* config/i386/i386.c (expand_vec_perm_pblendv): If not testing_p,
	set dcopy.target to a new pseudo.

	* gcc.dg/vect/pr64252.c: New test.
	* gcc.dg/pr64252.c: New test.
	* gcc.target/i386/avx2-pr64252.c: New test.

--- gcc/config/i386/i386.c.jj	2014-12-10 09:45:15.000000000 +0100
+++ gcc/config/i386/i386.c	2014-12-10 11:38:12.530795610 +0100
@@ -47554,6 +47554,8 @@ expand_vec_perm_pblendv (struct expand_v
     dcopy.op0 = dcopy.op1 = d->op1;
   else
     dcopy.op0 = dcopy.op1 = d->op0;
+  if (!d->testing_p)
+    dcopy.target = gen_reg_rtx (vmode);
   dcopy.one_operand_p = true;
 
   for (i = 0; i < nelt; ++i)
--- gcc/testsuite/gcc.dg/vect/pr64252.c.jj	2014-12-10 11:42:47.669991028 +0100
+++ gcc/testsuite/gcc.dg/vect/pr64252.c	2014-12-10 11:47:43.895818223 +0100
@@ -0,0 +1,66 @@
+/* PR target/64252 */
+/* Test correctness of size 3 store groups permutation.  */
+/* { dg-do run } */
+/* { dg-additional-options "-O3" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "tree-vect.h"
+
+#define N 50
+
+enum num3
+{
+  a, b, c
+};
+
+struct flags
+{
+  enum num3 f;
+  unsigned int c;
+  unsigned int p;
+};
+
+struct flagsN
+{
+  struct flags a[N];
+};
+
+void
+bar (int n, struct flagsN *ff)
+{
+  struct flagsN *fc;
+  for (fc = ff + 1; fc < (ff + n); fc++)
+    {
+      int i;
+      for (i = 0; i < N; ++i)
+       {
+         ff->a[i].f = 0;
+         ff->a[i].c = i;
+         ff->a[i].p = -1;
+       }
+      for (i = 0; i < n; i++)
+       {
+         int j;
+         for (j = 0; j < N - n; ++j)
+           {
+             fc->a[i + j].f = 0;
+             fc->a[i + j].c = j + i;
+             fc->a[i + j].p = -1;
+           }
+       }
+    }
+}
+
+struct flagsN q[2];
+
+int main()
+{
+  int i;
+  check_vect ();
+  bar(2, q);
+  for (i = 0; i < N; i++)
+    if (q[0].a[i].f != 0 || q[0].a[i].c != i || q[0].a[i].p != -1)
+      abort ();
+  return 0;
+}
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/pr64252.c.jj	2014-12-10 11:26:05.649467180 +0100
+++ gcc/testsuite/gcc.dg/pr64252.c	2014-12-10 11:25:27.057139759 +0100
@@ -0,0 +1,30 @@
+/* PR target/64252 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+typedef unsigned int V __attribute__((vector_size (32)));
+
+__attribute__((noinline, noclone)) void
+foo (V *a, V *b, V *c, V *d, V *e)
+{
+  V t = __builtin_shuffle (*a, *b, *c);
+  V v = __builtin_shuffle (t, (V) { ~0U, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U }, (V) { 0, 1, 8, 3, 4, 5, 9, 7 });
+  v = v + *d;
+  *e = v;
+}
+
+int
+main ()
+{
+  V a, b, c, d, e;
+  int i;
+  a = (V) { 1, 2, 3, 4, 5, 6, 7, 8 };
+  b = (V) { 9, 10, 11, 12, 13, 14, 15, 16 };
+  c = (V) { 1, 3, 5, 7, 9, 11, 13, 15 };
+  d = (V) { 0, 0, 0, 0, 0, 0, 0, 0 };
+  foo (&a, &b, &c, &d, &e);
+  for (i = 0; i < 8; i++)
+    if (e[i] != ((i == 2 || i == 6) ? ~0U : 2 + 2 * i))
+      __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.target/i386/avx2-pr64252.c.jj	2014-12-10 11:27:36.868877426 +0100
+++ gcc/testsuite/gcc.target/i386/avx2-pr64252.c	2014-12-10 11:30:49.500520279 +0100
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define main() do_main ()
+
+#include "../../gcc.dg/pr64252.c"
+
+static void
+avx2_test (void)
+{
+  do_main ();
+}


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]