[PATCH] Re: vector shift regression on sparc

Jakub Jelinek jakub@redhat.com
Mon Oct 31 11:14:00 GMT 2011


On Sun, Oct 30, 2011 at 12:38:32AM -0400, David Miller wrote:
> gcc.dg/pr48616.c segfaults on sparc as of a day or two ago
> 
> vectorizable_shift() crashes because op1_vectype is NULL and
> we hit this code path:
> 
>   /* Vector shifted by vector.  */
>   if (!scalar_shift_arg)
>     {
>       optab = optab_for_tree_code (code, vectype, optab_vector);
>       if (vect_print_dump_info (REPORT_DETAILS))
> 	fprintf (vect_dump, "vector/vector shift/rotate found.");
> =>    if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
> 
> dt[1] is vect_external_def and slp_node is non-NULL.
> 
> Indeed, when the 'dt' arg to vect_is_simple_use_1() is
> vect_external_def *vectype will be set to NULL.

Here is a fix for that (and other issues that show up on these
testcases with -O3 -mxop if I disable all vector/scalar shift expanders
in sse.md).
For SLP it currently gives up more often than for loop vectorization,
I assume we could handle all dt[1] == vect_constant_def
and dt[2] == vect_external_def cases for SLP (and at least the former
even if the constants differ between nodes) by building the vectors by hand,
though the current vect_get_vec_defs/vect_get_vec_defs_for_stmt_copy can't
be used for that as is.

2011-10-28  Jakub Jelinek  <jakub@redhat.com>

	* tree-vect-stmts.c (vectorizable_shift): If op1 is vect_external_def
	in a loop and has different type from op0, cast it to op0's type
	before the loop first.  For slp give up.  Don't crash if op1_vectype
	is NULL.

	* gcc.dg/vshift-3.c: New test.
	* gcc.dg/vshift-4.c: New test.
	* gcc.dg/vshift-5.c: New test.

--- gcc/tree-vect-stmts.c.jj	2011-10-28 16:21:06.000000000 +0200
+++ gcc/tree-vect-stmts.c	2011-10-31 10:27:57.000000000 +0100
@@ -2446,7 +2446,10 @@ vectorizable_shift (gimple stmt, gimple_
       optab = optab_for_tree_code (code, vectype, optab_vector);
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "vector/vector shift/rotate found.");
-      if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
+      if (!op1_vectype)
+	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
+      if (op1_vectype == NULL_TREE
+	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
 	{
 	  if (vect_print_dump_info (REPORT_DETAILS))
 	    fprintf (vect_dump, "unusable type for last operand in"
@@ -2480,9 +2483,28 @@ vectorizable_shift (gimple stmt, gimple_
               /* Unlike the other binary operators, shifts/rotates have
                  the rhs being int, instead of the same type as the lhs,
                  so make sure the scalar is the right type if we are
-                 dealing with vectors of short/char.  */
+		 dealing with vectors of long long/long/short/char.  */
               if (dt[1] == vect_constant_def)
                 op1 = fold_convert (TREE_TYPE (vectype), op1);
+	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
+						   TREE_TYPE (op1)))
+		{
+		  if (slp_node
+		      && TYPE_MODE (TREE_TYPE (vectype))
+			 != TYPE_MODE (TREE_TYPE (op1)))
+		    {
+		      if (vect_print_dump_info (REPORT_DETAILS))
+		      fprintf (vect_dump, "unusable type for last operand in"
+					  " vector/vector shift/rotate.");
+			return false;
+		    }
+		  if (vec_stmt && !slp_node)
+		    {
+		      op1 = fold_convert (TREE_TYPE (vectype), op1);
+		      op1 = vect_init_vector (stmt, op1,
+					      TREE_TYPE (vectype), NULL);
+		    }
+		}
             }
         }
     }
--- gcc/testsuite/gcc.dg/vshift-3.c.jj	2011-10-31 10:00:57.000000000 +0100
+++ gcc/testsuite/gcc.dg/vshift-3.c	2011-10-31 10:00:42.000000000 +0100
@@ -0,0 +1,136 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#include <stdlib.h>
+
+#define N 64
+
+#ifndef TYPE1
+#define TYPE1 int
+#define TYPE2 long long
+#endif
+
+signed TYPE1 a[N], b, g[N];
+unsigned TYPE1 c[N], h[N];
+signed TYPE2 d[N], e, j[N];
+unsigned TYPE2 f[N], k[N];
+
+#ifndef S
+#define S(x) x
+#endif
+
+__attribute__((noinline)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    g[i] = a[i] << S (b);
+}
+
+__attribute__((noinline)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    g[i] = a[i] >> S (b);
+}
+
+__attribute__((noinline)) void
+f3 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    h[i] = c[i] >> S (b);
+}
+
+__attribute__((noinline)) void
+f4 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    j[i] = d[i] << S (e);
+}
+
+__attribute__((noinline)) void
+f5 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    j[i] = d[i] >> S (e);
+}
+
+__attribute__((noinline)) void
+f6 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    k[i] = f[i] >> S (e);
+}
+
+__attribute__((noinline)) void
+f7 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    j[i] = d[i] << S (b);
+}
+
+__attribute__((noinline)) void
+f8 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    j[i] = d[i] >> S (b);
+}
+
+__attribute__((noinline)) void
+f9 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    k[i] = f[i] >> S (b);
+}
+
+int
+main ()
+{
+  int i;
+  b = 7;
+  e = 12;
+  for (i = 0; i < N; i++)
+    {
+      asm ("");
+      c[i] = (random () << 1) | (random () & 1);
+      a[i] = c[i];
+      d[i] = (random () << 1) | (random () & 1);
+      d[i] |= (unsigned long long) c[i] << 32;
+      f[i] = d[i];
+    }
+  f1 ();
+  f3 ();
+  f4 ();
+  f6 ();
+  for (i = 0; i < N; i++)
+    if (g[i] != (signed TYPE1) (a[i] << S (b))
+	|| h[i] != (unsigned TYPE1) (c[i] >> S (b))
+	|| j[i] != (signed TYPE2) (d[i] << S (e))
+	|| k[i] != (unsigned TYPE2) (f[i] >> S (e)))
+      abort ();
+  f2 ();
+  f5 ();
+  f9 ();
+  for (i = 0; i < N; i++)
+    if (g[i] != (signed TYPE1) (a[i] >> S (b))
+	|| j[i] != (signed TYPE2) (d[i] >> S (e))
+	|| k[i] != (unsigned TYPE2) (f[i] >> S (b)))
+      abort ();
+  f7 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != (signed TYPE2) (d[i] << S (b)))
+      abort ();
+  f8 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != (signed TYPE2) (d[i] >> S (b)))
+      abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/vshift-4.c.jj	2011-10-31 10:01:08.000000000 +0100
+++ gcc/testsuite/gcc.dg/vshift-4.c	2011-10-31 10:01:22.000000000 +0100
@@ -0,0 +1,6 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#define S(x) 3
+
+#include "vshift-3.c"
--- gcc/testsuite/gcc.dg/vshift-5.c.jj	2011-10-31 10:33:09.000000000 +0100
+++ gcc/testsuite/gcc.dg/vshift-5.c	2011-10-31 10:32:57.000000000 +0100
@@ -0,0 +1,80 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+extern void abort (void);
+long long a[16];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << 2;
+  a1 = a1 << 3;
+  a2 = a2 << 4;
+  a3 = a3 << 5;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << 2;
+  a1 = a1 << 2;
+  a2 = a2 << 2;
+  a3 = a3 << 2;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f3 (int x)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << x;
+  a1 = a1 << x;
+  a2 = a2 << x;
+  a3 = a3 << x;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+int
+main ()
+{
+  a[0] = 4LL;
+  a[1] = 3LL;
+  a[2] = 2LL;
+  a[3] = 1LL;
+  f1 ();
+  if (a[0] != (4LL << 2) || a[1] != (3LL << 3)
+      || a[2] != (2LL << 4) || a[3] != (1LL << 5))
+    abort ();
+  f2 ();
+  if (a[0] != (4LL << 4) || a[1] != (3LL << 5)
+      || a[2] != (2LL << 6) || a[3] != (1LL << 7))
+    abort ();
+  f3 (3);
+  if (a[0] != (4LL << 7) || a[1] != (3LL << 8)
+      || a[2] != (2LL << 9) || a[3] != (1LL << 10))
+    abort ();
+}


	Jakub



More information about the Gcc-patches mailing list