[PATCH] Don't lower VEC_PERM_EXPR if it can be expanded using vec_shr optab (PR target/68483)

Jakub Jelinek jakub@redhat.com
Mon Nov 23 19:26:00 GMT 2015


Hi!

The patches that removed VEC_RSHIFT_EXPR regressed the first of these
testcases on i?86/-msse2, because can_vec_perm_p returns false for that,
and indeed as can_vec_perm_p is given only the mode and mask indices,
there is nothing it can do about it.  The former VEC_RSHIFT_EXPR
is a special VEC_PERM_EXPR with zero (bitwise, so not -0.0) as second
argument and we can use vec_shr in that case.  The expander knows that, but
veclower hasn't been taught about that, which is what this patch does.

The patch also fixes up the shift_amt_for_vec_perm_mask function,
if the first index is >= nelt, then it certainly is not a vector shift, but
all zeros result (we should have folded it), plus when first is < nelt,
then it doesn't make sense to mask the result, even for first == nelt - 1
first + nelt - 1 is <= 2 * nelt - 1.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/5.3?

2015-11-23  Jakub Jelinek  <jakub@redhat.com>

	PR target/68483
	* tree-vect-generic.c (lower_vec_perm): If VEC_PERM_EXPR
	is valid vec_shr pattern, don't lower it even if can_vec_perm_p
	returns false.
	* optabs.c (shift_amt_for_vec_perm_mask): Return NULL_RTX
	whenever first is nelt or above.  Don't mask expected with
	2 * nelt - 1.

	* gcc.target/i386/pr68483-1.c: New test.
	* gcc.target/i386/pr68483-2.c: New test.

--- gcc/tree-vect-generic.c.jj	2015-11-23 13:29:41.959236201 +0100
+++ gcc/tree-vect-generic.c	2015-11-23 14:13:10.378094173 +0100
@@ -1272,6 +1272,30 @@ lower_vec_perm (gimple_stmt_iterator *gs
 	  update_stmt (stmt);
 	  return;
 	}
+      /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
+	 vector as VEC1 and a right element shift MASK.  */
+      if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
+	  != CODE_FOR_nothing
+	  && TREE_CODE (vec1) == VECTOR_CST
+	  && initializer_zerop (vec1)
+	  && sel_int[0]
+	  && sel_int[0] < elements)
+	{
+	  for (i = 1; i < elements; ++i)
+	    {
+	      unsigned int expected = i + sel_int[0];
+	      /* Indices into the second vector are all equivalent.  */
+	      if (MIN (elements, (unsigned) sel_int[i])
+		  != MIN (elements, expected))
+ 		break;
+	    }
+	  if (i == elements)
+	    {
+	      gimple_assign_set_rhs3 (stmt, mask);
+	      update_stmt (stmt);
+	      return;
+	    }
+	}
     }
   else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
     return;
--- gcc/optabs.c.jj	2015-11-23 13:29:41.706239800 +0100
+++ gcc/optabs.c	2015-11-23 13:33:14.857205132 +0100
@@ -5232,12 +5232,12 @@ shift_amt_for_vec_perm_mask (rtx sel)
     return NULL_RTX;
 
   first = INTVAL (CONST_VECTOR_ELT (sel, 0));
-  if (first >= 2*nelt)
+  if (first >= nelt)
     return NULL_RTX;
   for (i = 1; i < nelt; i++)
     {
       int idx = INTVAL (CONST_VECTOR_ELT (sel, i));
-      unsigned int expected = (i + first) & (2 * nelt - 1);
+      unsigned int expected = i + first;
       /* Indices into the second vector are all equivalent.  */
       if (idx < 0 || (MIN (nelt, (unsigned) idx) != MIN (nelt, expected)))
 	return NULL_RTX;
--- gcc/testsuite/gcc.target/i386/pr68483-1.c.jj	2015-11-23 14:27:54.213534756 +0100
+++ gcc/testsuite/gcc.target/i386/pr68483-1.c	2015-11-23 14:33:57.810362424 +0100
@@ -0,0 +1,22 @@
+/* PR target/68483 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2 -mno-sse3" } */
+
+void
+test (int *input, int *out, unsigned x1, unsigned x2)
+{
+  unsigned i, j;
+  unsigned end = x1;
+
+  for (i = j = 0; i < 1000; i++)
+    {
+      int sum = 0;
+      end += x2;
+      for (; j < end; j++)
+	sum += input[j];
+      out[i] = sum;
+    }
+}
+
+/* { dg-final { scan-assembler "psrldq\[^\n\r]*(8,|, 8)" { target ia32 } } } */
+/* { dg-final { scan-assembler "psrldq\[^\n\r]*(4,|, 4)" { target ia32 } } } */
--- gcc/testsuite/gcc.target/i386/pr68483-2.c.jj	2015-11-23 14:33:22.436865628 +0100
+++ gcc/testsuite/gcc.target/i386/pr68483-2.c	2015-11-23 14:34:33.716851638 +0100
@@ -0,0 +1,15 @@
+/* PR target/68483 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-sse3" } */
+
+typedef int V __attribute__((vector_size (16)));
+
+void
+foo (V *a, V *b)
+{
+  V c = { 0, 0, 0, 0 };
+  V d = { 1, 2, 3, 4 };
+  *a = __builtin_shuffle (*b, c, d);
+}
+
+/* { dg-final { scan-assembler "psrldq\[^\n\r]*(4,|, 4)" } } */

	Jakub



More information about the Gcc-patches mailing list