This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PATCH: PR target/42542: Vectorizer produces incorrect results on max of signed intergers


Hi,

x86 integer vector subtraction insns don't set EFLAGS and there is no
parallel unsigned V4SI saturating subtraction. We can't easily detect
unsigned underflow. We can only enable umaxv4si3 and uminv4si3 for
SSE4.1 and XOP.  Tested on Linux/Intel Core i7.  OK for trunk and 4.3/4.4?

Thanks.


H.J.
---
gcc/

2009-12-30  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/42542
	* config/i386/i386.c (ix86_expand_int_vcond): Don't convert
	GTU to GT for V4SI and V2DI.

	* config/i386/sse.md (umaxv4si3): Enabled for SSE4.1 and XOP.
	(umin<mode>3): Enabled for SSE2 with V8HI as well as SSE4.1
	and XOP.

gcc/testsuite/

2009-12-30  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/42542
	* gcc.dg/vect/vect-reduc-1.c: Don't scan "vectorized 3 loops"
	for i?86-*-* nor x86_64-*-*.

	* gcc.target/i386/pr42542-1.c: New.
	* gcc.target/i386/pr42542-1a.c: Likewise.
	* gcc.target/i386/pr42542-1b.c: Likewise.
	* gcc.target/i386/pr42542-2.c: Likewise.
	* gcc.target/i386/pr42542-2a.c: Likewise.
	* gcc.target/i386/pr42542-2b.c: Likewise.
	* gcc.target/i386/pr42542-3.c: Likewise.
	* gcc.target/i386/pr42542-3a.c: Likewise.

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3e15b9d..fdcc6b0 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -16252,37 +16252,6 @@ ix86_expand_int_vcond (rtx operands[])
 
 	  switch (mode)
 	    {
-	    case V4SImode:
-	    case V2DImode:
-		{
-		  rtx t1, t2, mask;
-
-		  /* Perform a parallel modulo subtraction.  */
-		  t1 = gen_reg_rtx (mode);
-		  emit_insn ((mode == V4SImode
-			      ? gen_subv4si3
-			      : gen_subv2di3) (t1, cop0, cop1));
-
-		  /* Extract the original sign bit of op0.  */
-		  mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
-						  true, false);
-		  t2 = gen_reg_rtx (mode);
-		  emit_insn ((mode == V4SImode
-			      ? gen_andv4si3
-			      : gen_andv2di3) (t2, cop0, mask));
-
-		  /* XOR it back into the result of the subtraction.
-		     This results in the sign bit set iff we saw
-		     unsigned underflow.  */
-		  x = gen_reg_rtx (mode);
-		  emit_insn ((mode == V4SImode
-			      ? gen_xorv4si3
-			      : gen_xorv2di3) (x, t1, t2));
-
-		  code = GT;
-		}
-	      break;
-
 	    case V16QImode:
 	    case V8HImode:
 	      /* Perform a parallel unsigned saturating subtraction.  */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 9bbea80..ef4b21b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -6138,7 +6138,7 @@
   [(set (match_operand:V4SI 0 "register_operand" "")
 	(umax:V4SI (match_operand:V4SI 1 "register_operand" "")
 		   (match_operand:V4SI 2 "register_operand" "")))]
-  "TARGET_SSE2"
+  "TARGET_SSE4_1 || TARGET_XOP"
 {
   if (TARGET_SSE4_1)
     ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
@@ -6199,7 +6199,9 @@
   [(set (match_operand:SSEMODE24 0 "register_operand" "")
 	(umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
 			(match_operand:SSEMODE24 2 "register_operand" "")))]
-  "TARGET_SSE2"
+  "(TARGET_SSE2 && <MODE>mode == V8HImode)
+   || TARGET_SSE4_1
+   || TARGET_XOP"
 {
   if (TARGET_SSE4_1)
     ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c
index dbb154d..8da7fb0 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c
@@ -52,5 +52,5 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail { vect_no_int_add || vect_no_int_max } } } } */
+/* { dg-final { if { ![ istarget i?86-*-* ] && ![ istarget x86_64-*-* ] } { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail { vect_no_int_add || vect_no_int_max } } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-1.c b/gcc/testsuite/gcc.target/i386/pr42542-1.c
new file mode 100644
index 0000000..6e115c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-1.c
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned int v1[] __attribute__ ((aligned(16))) =
+{
+  0x80000000, 1, 0xa0000000, 2,
+  3, 0xd0000000, 0xf0000000, 0xe0000000
+};
+unsigned int v2[] __attribute__ ((aligned(16))) =
+{
+  4, 0xb0000000, 5, 0xc0000000,
+  0xd0000000, 6, 7, 8
+};
+
+unsigned int max[] =
+{
+  0x80000000, 0xb0000000, 0xa0000000, 0xc0000000,
+  0xd0000000, 0xd0000000, 0xf0000000, 0xe0000000
+};
+
+unsigned int min[] =
+{
+  4, 1, 5, 2,
+  3, 6, 7, 8
+};
+
+unsigned int res[16] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+  int i;
+
+  for (i = 0; i < 8; i++)
+    res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+  int i;
+
+  for (i = 0; i < 8; i++)
+    res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int err = 0;
+
+  find_max ();
+  for (i = 0; i < 8; i++)
+    if (res[i] != max[i])
+      err++;
+
+  find_min ();
+  for (i = 0; i < 8; i++)
+    if (res[i] != min[i])
+      err++;
+
+  if (err)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-1a.c b/gcc/testsuite/gcc.target/i386/pr42542-1a.c
new file mode 100644
index 0000000..cd77175
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-1a.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-1b.c b/gcc/testsuite/gcc.target/i386/pr42542-1b.c
new file mode 100644
index 0000000..7651f07
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-1b.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-1.c"
+
+/* { dg-final { scan-assembler "pmaxud" } } */
+/* { dg-final { scan-assembler "pminud" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-2.c b/gcc/testsuite/gcc.target/i386/pr42542-2.c
new file mode 100644
index 0000000..fc59534
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-2.c
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned short v1[] __attribute__ ((aligned(16))) =
+{
+  0x8000, 0x9000, 1, 10, 0xa000, 0xb000, 2, 20,
+  3, 30, 0xd000, 0xe000, 0xf000, 0xe000, 25, 30
+};
+unsigned short v2[] __attribute__ ((aligned(16))) =
+{
+  4, 40, 0xb000, 0x8000, 5, 50, 0xc000, 0xf000,
+  0xd000, 0xa000, 6, 65, 7, 75, 0xe000, 0xc000
+};
+
+unsigned short max[] =
+{
+  0x8000, 0x9000, 0xb000, 0x8000, 0xa000, 0xb000, 0xc000, 0xf000,
+  0xd000, 0xa000, 0xd000, 0xe000, 0xf000, 0xe000, 0xe000, 0xc000
+};
+
+unsigned short min[] =
+{
+  4, 40, 1, 10, 5, 50, 2, 20,
+  3, 30, 6, 65, 7, 75, 25, 30
+};
+
+unsigned short res[16] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+  int i;
+
+  for (i = 0; i < 16; i++)
+    res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+  int i;
+
+  for (i = 0; i < 16; i++)
+    res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int err = 0;
+
+  find_max ();
+  for (i = 0; i < 16; i++)
+    if (res[i] != max[i])
+      err++;
+
+  find_min ();
+  for (i = 0; i < 16; i++)
+    if (res[i] != min[i])
+      err++;
+
+  if (err)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-2a.c b/gcc/testsuite/gcc.target/i386/pr42542-2a.c
new file mode 100644
index 0000000..bcefa9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-2a.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-2b.c b/gcc/testsuite/gcc.target/i386/pr42542-2b.c
new file mode 100644
index 0000000..ddb539b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-2b.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-2.c"
+
+/* { dg-final { scan-assembler "pmaxuw" } } */
+/* { dg-final { scan-assembler "pminuw" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-3.c b/gcc/testsuite/gcc.target/i386/pr42542-3.c
new file mode 100644
index 0000000..028d2f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-3.c
@@ -0,0 +1,85 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned char v1[] __attribute__ ((aligned(16))) =
+{
+  0x80, 0xd0, 0x90, 0xa0, 1, 15, 10, 15,
+  0xa0, 0xc0, 0xb0, 0xf0, 2, 25, 20, 35,
+  3, 34, 30, 36, 0xd0, 0x80, 0xe0, 0xb0,
+  0xf0, 0xe0, 0xe0, 0x80, 25, 34, 30, 40
+};
+unsigned char v2[] __attribute__ ((aligned(16))) =
+{
+  4, 44, 40, 48, 0xb0, 0x80, 0x80, 0x90,
+  5, 55, 50, 51, 0xc0, 0xb0, 0xf0, 0xd0,
+  0xd0, 0x80, 0xa0, 0xf0, 6, 61, 65, 68,
+  7, 76, 75, 81, 0xe0, 0xf0, 0xc0, 0x90
+};
+
+unsigned char max[] =
+{
+  0x80, 0xd0, 0x90, 0xa0, 0xb0, 0x80, 0x80, 0x90,
+  0xa0, 0xc0, 0xb0, 0xf0, 0xc0, 0xb0, 0xf0, 0xd0,
+  0xd0, 0x80, 0xa0, 0xf0, 0xd0, 0x80, 0xe0, 0xb0,
+  0xf0, 0xe0, 0xe0, 0x80, 0xe0, 0xf0, 0xc0, 0x90
+};
+
+unsigned char min[] =
+{
+  4, 44, 40, 48, 1, 15, 10, 15,
+  5, 55, 50, 51, 2, 25, 20, 35,
+  3, 34, 30, 36, 6, 61, 65, 68,
+  7, 76, 75, 81, 25, 34, 30, 40
+};
+
+unsigned char res[32] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+  int i;
+
+  for (i = 0; i < 32; i++)
+    res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+  int i;
+
+  for (i = 0; i < 32; i++)
+    res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int err = 0;
+
+  find_max ();
+  for (i = 0; i < 32; i++)
+    if (res[i] != max[i])
+      err++;
+
+  find_min ();
+  for (i = 0; i < 32; i++)
+    if (res[i] != min[i])
+      err++;
+
+  if (err)
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-3a.c b/gcc/testsuite/gcc.target/i386/pr42542-3a.c
new file mode 100644
index 0000000..754e59e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-3a.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#include "pr42542-3.c"
+
+/* { dg-final { scan-assembler "pmaxub" } } */
+/* { dg-final { scan-assembler "pminub" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]