This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix AVX vector permutation handling (PR target/43107)


Hi!

The attached testcase ICEs, because
(insn 23 22 0 pr43107.c:13 (set (reg:V4SF 65 [ vect_perm_even.21 ])
        (vec_select:V4SF (reg:V4SF 58 [ vect_perm_even.17 ])
            (parallel [
                    (const_int 0 [0x0])
                    (const_int 2 [0x2])
                    (const_int 4 [0x4])
                    (const_int 6 [0x6])
                ]))) 2007 {*avx_vpermilpv4sf} (nil))
(which is invalid, given that reg 58 is just 4 elements long and thus
4 and 6 is out of bounds) is being simplified when reg 58 is found to be
initialized with a constant and of course dereferencing 4th and 6th
element in something that has just 0th to 3rd element leads to accessing
uninitialized memory or random garbage.

avx_vpermilp_parallel originally used to reject indexes bigger or equal to
nelt (this got changed on 2009-11-30) and I believe the original was
correct.  So, this patch reverts that change, and instead when
expand_vec_perm_1 optimizes d->op0 == d->op1 case (BTW, why doesn't
this call rtx_equal_p (d->op0, d->op1)?) ensures that the indexes are in
range for the single operand (as both operands are the same, we can do it).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2010-02-18  Jakub Jelinek  <jakub@redhat.com>

	PR target/43107
	* config/i386/i386.c (avx_vpermilp_parallel): Reject indexes
	greater or equal to nelt instead of 2 * nelt.
	(expand_vec_perm_1): When op0 and op1 are equal, mask indexes
	with nelt - 1.

	* gcc.target/i386/pr43107.c: New test.

--- gcc/config/i386/i386.c.jj	2010-02-17 10:52:00.000000000 +0100
+++ gcc/config/i386/i386.c	2010-02-18 10:32:47.000000000 +0100
@@ -24656,7 +24656,7 @@ avx_vpermilp_parallel (rtx par, enum mac
       if (!CONST_INT_P (er))
 	return 0;
       ei = INTVAL (er);
-      if (ei >= 2 * nelt)
+      if (ei >= nelt)
 	return 0;
       ipar[i] = ei;
     }
@@ -29248,7 +29248,10 @@ expand_vec_perm_1 (struct expand_vec_per
      input where SEL+CONCAT may not.  */
   if (d->op0 == d->op1)
     {
-      if (expand_vselect (d->target, d->op0, d->perm, nelt))
+      for (i = 0; i < nelt; i++)
+	perm2[i] = d->perm[i] & (nelt - 1);
+
+      if (expand_vselect (d->target, d->op0, perm2, nelt))
 	return true;
 
       /* There are plenty of patterns in sse.md that are written for
@@ -29259,8 +29262,8 @@ expand_vec_perm_1 (struct expand_vec_per
 	 every other permutation operand.  */
       for (i = 0; i < nelt; i += 2)
 	{
-	  perm2[i] = d->perm[i];
-	  perm2[i+1] = d->perm[i+1] + nelt;
+	  perm2[i] = d->perm[i] & (nelt - 1);
+	  perm2[i + 1] = (d->perm[i + 1] & (nelt - 1)) + nelt;
 	}
       if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
 	return true;
@@ -29268,11 +29271,12 @@ expand_vec_perm_1 (struct expand_vec_per
       /* Recognize shufps, which means adding {0, 0, nelt, nelt}.  */
       if (nelt >= 4)
 	{
-	  memcpy (perm2, d->perm, nelt);
-	  for (i = 2; i < nelt; i += 4)
+	  for (i = 0; i < nelt; i += 4)
 	    {
-	      perm2[i+0] += nelt;
-	      perm2[i+1] += nelt;
+	      perm2[i + 0] = d->perm[i + 0] & (nelt - 1);
+	      perm2[i + 1] = d->perm[i + 1] & (nelt - 1);
+	      perm2[i + 2] = (d->perm[i + 2] & (nelt - 1)) + nelt;
+	      perm2[i + 3] = (d->perm[i + 3] & (nelt - 1)) + nelt;
 	    }
 
 	  if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
--- gcc/testsuite/gcc.target/i386/pr43107.c.jj	2010-02-18 10:40:28.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/pr43107.c	2010-02-18 10:41:16.000000000 +0100
@@ -0,0 +1,20 @@
+/* PR target/43107 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx" } */
+
+extern void bar (float b[4][4]);
+
+void
+foo ()
+{
+  float a[4][4], b[4][4];
+  int i, j;
+  for (i = 0; i < 4; i++)
+    {
+      for (j = 0; j < 4; j++)
+	a[i][j] = 0;
+      for (j = 0; j < 4; j++)
+	b[i][j] = a[i][j];
+    }
+  bar (b);
+}

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]