[PATCH] Re: vector shift regression on sparc
Jakub Jelinek
jakub@redhat.com
Mon Oct 31 11:14:00 GMT 2011
On Sun, Oct 30, 2011 at 12:38:32AM -0400, David Miller wrote:
> gcc.dg/pr48616.c segfaults on sparc as of a day or two ago
>
> vectorizable_shift() crashes because op1_vectype is NULL and
> we hit this code path:
>
> /* Vector shifted by vector. */
> if (!scalar_shift_arg)
> {
> optab = optab_for_tree_code (code, vectype, optab_vector);
> if (vect_print_dump_info (REPORT_DETAILS))
> fprintf (vect_dump, "vector/vector shift/rotate found.");
> => if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
>
> dt[1] is vect_external_def and slp_node is non-NULL.
>
> Indeed, when the 'dt' arg to vect_is_simple_use_1() is
> vect_external_def *vectype will be set to NULL.
Here is a fix for that (and other issues that show up on these
testcases with -O3 -mxop if I disable all vector/scalar shift expanders
in sse.md).
For SLP it currently gives up more often than for loop vectorization,
I assume we could handle all dt[1] == vect_constant_def
and dt[2] == vect_external_def cases for SLP (and at least the former
even if the constants differ between nodes) by building the vectors by hand,
though the current vect_get_vec_defs/vect_get_vec_defs_for_stmt_copy can't
be used for that as is.
2011-10-28 Jakub Jelinek <jakub@redhat.com>
* tree-vect-stmts.c (vectorizable_shift): If op1 is vect_external_def
in a loop and has different type from op0, cast it to op0's type
before the loop first. For slp give up. Don't crash if op1_vectype
is NULL.
* gcc.dg/vshift-3.c: New test.
* gcc.dg/vshift-4.c: New test.
* gcc.dg/vshift-5.c: New test.
--- gcc/tree-vect-stmts.c.jj 2011-10-28 16:21:06.000000000 +0200
+++ gcc/tree-vect-stmts.c 2011-10-31 10:27:57.000000000 +0100
@@ -2446,7 +2446,10 @@ vectorizable_shift (gimple stmt, gimple_
optab = optab_for_tree_code (code, vectype, optab_vector);
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "vector/vector shift/rotate found.");
- if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
+ if (!op1_vectype)
+ op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
+ if (op1_vectype == NULL_TREE
+ || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "unusable type for last operand in"
@@ -2480,9 +2483,28 @@ vectorizable_shift (gimple stmt, gimple_
/* Unlike the other binary operators, shifts/rotates have
the rhs being int, instead of the same type as the lhs,
so make sure the scalar is the right type if we are
- dealing with vectors of short/char. */
+ dealing with vectors of long long/long/short/char. */
if (dt[1] == vect_constant_def)
op1 = fold_convert (TREE_TYPE (vectype), op1);
+ else if (!useless_type_conversion_p (TREE_TYPE (vectype),
+ TREE_TYPE (op1)))
+ {
+ if (slp_node
+ && TYPE_MODE (TREE_TYPE (vectype))
+ != TYPE_MODE (TREE_TYPE (op1)))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "unusable type for last operand in"
+ " vector/vector shift/rotate.");
+ return false;
+ }
+ if (vec_stmt && !slp_node)
+ {
+ op1 = fold_convert (TREE_TYPE (vectype), op1);
+ op1 = vect_init_vector (stmt, op1,
+ TREE_TYPE (vectype), NULL);
+ }
+ }
}
}
}
--- gcc/testsuite/gcc.dg/vshift-3.c.jj 2011-10-31 10:00:57.000000000 +0100
+++ gcc/testsuite/gcc.dg/vshift-3.c 2011-10-31 10:00:42.000000000 +0100
@@ -0,0 +1,136 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#include <stdlib.h>
+
+#define N 64
+
+#ifndef TYPE1
+#define TYPE1 int
+#define TYPE2 long long
+#endif
+
+signed TYPE1 a[N], b, g[N];
+unsigned TYPE1 c[N], h[N];
+signed TYPE2 d[N], e, j[N];
+unsigned TYPE2 f[N], k[N];
+
+#ifndef S
+#define S(x) x
+#endif
+
+__attribute__((noinline)) void
+f1 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ g[i] = a[i] << S (b);
+}
+
+__attribute__((noinline)) void
+f2 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ g[i] = a[i] >> S (b);
+}
+
+__attribute__((noinline)) void
+f3 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ h[i] = c[i] >> S (b);
+}
+
+__attribute__((noinline)) void
+f4 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ j[i] = d[i] << S (e);
+}
+
+__attribute__((noinline)) void
+f5 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ j[i] = d[i] >> S (e);
+}
+
+__attribute__((noinline)) void
+f6 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ k[i] = f[i] >> S (e);
+}
+
+__attribute__((noinline)) void
+f7 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ j[i] = d[i] << S (b);
+}
+
+__attribute__((noinline)) void
+f8 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ j[i] = d[i] >> S (b);
+}
+
+__attribute__((noinline)) void
+f9 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ k[i] = f[i] >> S (b);
+}
+
+int
+main ()
+{
+ int i;
+ b = 7;
+ e = 12;
+ for (i = 0; i < N; i++)
+ {
+ asm ("");
+ c[i] = (random () << 1) | (random () & 1);
+ a[i] = c[i];
+ d[i] = (random () << 1) | (random () & 1);
+ d[i] |= (unsigned long long) c[i] << 32;
+ f[i] = d[i];
+ }
+ f1 ();
+ f3 ();
+ f4 ();
+ f6 ();
+ for (i = 0; i < N; i++)
+ if (g[i] != (signed TYPE1) (a[i] << S (b))
+ || h[i] != (unsigned TYPE1) (c[i] >> S (b))
+ || j[i] != (signed TYPE2) (d[i] << S (e))
+ || k[i] != (unsigned TYPE2) (f[i] >> S (e)))
+ abort ();
+ f2 ();
+ f5 ();
+ f9 ();
+ for (i = 0; i < N; i++)
+ if (g[i] != (signed TYPE1) (a[i] >> S (b))
+ || j[i] != (signed TYPE2) (d[i] >> S (e))
+ || k[i] != (unsigned TYPE2) (f[i] >> S (b)))
+ abort ();
+ f7 ();
+ for (i = 0; i < N; i++)
+ if (j[i] != (signed TYPE2) (d[i] << S (b)))
+ abort ();
+ f8 ();
+ for (i = 0; i < N; i++)
+ if (j[i] != (signed TYPE2) (d[i] >> S (b)))
+ abort ();
+ return 0;
+}
--- gcc/testsuite/gcc.dg/vshift-4.c.jj 2011-10-31 10:01:08.000000000 +0100
+++ gcc/testsuite/gcc.dg/vshift-4.c 2011-10-31 10:01:22.000000000 +0100
@@ -0,0 +1,6 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#define S(x) 3
+
+#include "vshift-3.c"
--- gcc/testsuite/gcc.dg/vshift-5.c.jj 2011-10-31 10:33:09.000000000 +0100
+++ gcc/testsuite/gcc.dg/vshift-5.c 2011-10-31 10:32:57.000000000 +0100
@@ -0,0 +1,80 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+extern void abort (void);
+long long a[16];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+ long long a0, a1, a2, a3;
+ a0 = a[0];
+ a1 = a[1];
+ a2 = a[2];
+ a3 = a[3];
+ a0 = a0 << 2;
+ a1 = a1 << 3;
+ a2 = a2 << 4;
+ a3 = a3 << 5;
+ a[0] = a0;
+ a[1] = a1;
+ a[2] = a2;
+ a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+ long long a0, a1, a2, a3;
+ a0 = a[0];
+ a1 = a[1];
+ a2 = a[2];
+ a3 = a[3];
+ a0 = a0 << 2;
+ a1 = a1 << 2;
+ a2 = a2 << 2;
+ a3 = a3 << 2;
+ a[0] = a0;
+ a[1] = a1;
+ a[2] = a2;
+ a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f3 (int x)
+{
+ long long a0, a1, a2, a3;
+ a0 = a[0];
+ a1 = a[1];
+ a2 = a[2];
+ a3 = a[3];
+ a0 = a0 << x;
+ a1 = a1 << x;
+ a2 = a2 << x;
+ a3 = a3 << x;
+ a[0] = a0;
+ a[1] = a1;
+ a[2] = a2;
+ a[3] = a3;
+}
+
+int
+main ()
+{
+ a[0] = 4LL;
+ a[1] = 3LL;
+ a[2] = 2LL;
+ a[3] = 1LL;
+ f1 ();
+ if (a[0] != (4LL << 2) || a[1] != (3LL << 3)
+ || a[2] != (2LL << 4) || a[3] != (1LL << 5))
+ abort ();
+ f2 ();
+ if (a[0] != (4LL << 4) || a[1] != (3LL << 5)
+ || a[2] != (2LL << 6) || a[3] != (1LL << 7))
+ abort ();
+ f3 (3);
+ if (a[0] != (4LL << 7) || a[1] != (3LL << 8)
+ || a[2] != (2LL << 9) || a[3] != (1LL << 10))
+ abort ();
+}
Jakub
More information about the Gcc-patches
mailing list