PATCH: PR target/42542: Vectorizer produces incorrect results on max of signed intergers

H.J. Lu hjl.tools@gmail.com
Thu Jan 7 19:51:00 GMT 2010


On Tue, Jan 5, 2010 at 12:31 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On 01/05/2010 08:47 PM, H.J. Lu wrote:
>
>>>> Subtracting 0x80000000 from a number in the range 0 to 0xFFFFFFFF will
>>>> produce a result in the range -0x80000000 to 0x7FFFFFFF which is exactly
>>>> two's complement. �XOR just happens to give the same result.
>>>>
>>>> It's a very well known trick IMNSHO. �Please provide a counterexample if
>>>> you think there is one.
>>>>
>>>
>>> I don't have one. 0x8000000 was all that worried me.
>>>
>>> H.J., can you please revert your patch and fix the PR with Paolo's
>>> proposed
>>> solution?
>>>
>>>
>>
>> Here is the patch. OK for trunk?
>>
>
>> 2010-01-05 Paolo Bonzini <bonzinI@gnu.rg> H.J. Lu <hongjiu.lu@intel.com>
>> PR target/42542 * config/i386/i386.c (ix86_expand_int_vcond): Convert GTU to
>> GT for V4SI and V2DI by subtracting (-(INT MAX) - 1) from both operands to
>> make them signed. * config/i386/sse.md (umaxv4si3): Revert the last change.
>> (umin<mode>3): Likewise. (uminv8hi3): Removed. (uminv4si3): Likewise.
>> gcc/testsuite/ 2010-01-05 H.J. Lu <hongjiu.lu@intel.com> *
>> gcc.target/i386/pr42542-1.c (res): Make it 8 elements.
>>
>
> This is OK for mainline and release branches after a couple of days without
> problems.
>

I am checking this fix into 4.3/4.4.

Thanks.


-- 
H.J.
-------------- next part --------------
gcc/

2010-01-05  H.J. Lu  <hongjiu.lu@intel.com>

	Backport from mainline
	2010-01-05  Paolo Bonzini  <bonzinI@gnu.rg>
		    H.J. Lu  <hongjiu.lu@intel.com>

	PR target/42542
	* config/i386/i386.c (ix86_expand_int_vcond): Convert GTU to GT
	for V4SI and V2DI by subtracting (-(INT MAX) - 1) from both
	operands to make them signed.

gcc/testsuite/

2010-01-05  H.J. Lu  <hongjiu.lu@intel.com>

	Backport from mainline
	2010-01-04  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/42542
	* gcc.target/i386/pr42542-1.c: New.
	* gcc.target/i386/pr42542-1a.c: Likewise.
	* gcc.target/i386/pr42542-1b.c: Likewise.
	* gcc.target/i386/pr42542-2.c: Likewise.
	* gcc.target/i386/pr42542-2a.c: Likewise.
	* gcc.target/i386/pr42542-2b.c: Likewise.
	* gcc.target/i386/pr42542-3.c: Likewise.
	* gcc.target/i386/pr42542-3a.c: Likewise.

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d9f9b8c..25336d9 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -13418,28 +13418,22 @@ ix86_expand_int_vcond (rtx operands[])
 	case V2DImode:
 	  {
 	    rtx t1, t2, mask;
+	    rtx (*gen_sub3) (rtx, rtx, rtx);
 
-	    /* Perform a parallel modulo subtraction.  */
-	    t1 = gen_reg_rtx (mode);
-	    emit_insn ((mode == V4SImode
-			? gen_subv4si3
-			: gen_subv2di3) (t1, cop0, cop1));
-
-	    /* Extract the original sign bit of op0.  */
+	    /* Subtract (-(INT MAX) - 1) from both operands to make
+	       them signed.  */
 	    mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
 					    true, false);
-	    t2 = gen_reg_rtx (mode);
-	    emit_insn ((mode == V4SImode
-			? gen_andv4si3
-			: gen_andv2di3) (t2, cop0, mask));
+	    gen_sub3 = (mode == V4SImode
+			? gen_subv4si3 : gen_subv2di3);
+	    t1 = gen_reg_rtx (mode);
+	    emit_insn (gen_sub3 (t1, cop0, mask));
 
-	    /* XOR it back into the result of the subtraction.  This results
-	       in the sign bit set iff we saw unsigned underflow.  */
-	    x = gen_reg_rtx (mode);
-	    emit_insn ((mode == V4SImode
-			? gen_xorv4si3
-			: gen_xorv2di3) (x, t1, t2));
+	    t2 = gen_reg_rtx (mode);
+	    emit_insn (gen_sub3 (t2, cop1, mask));
 
+	    cop0 = t1;
+	    cop1 = t2;
 	    code = GT;
 	  }
 	  break;
@@ -13451,6 +13445,8 @@ ix86_expand_int_vcond (rtx operands[])
 	  emit_insn (gen_rtx_SET (VOIDmode, x,
 				  gen_rtx_US_MINUS (mode, cop0, cop1)));
 
+	  cop0 = x;
+	  cop1 = CONST0_RTX (mode);
 	  code = EQ;
 	  negate = !negate;
 	  break;
@@ -13458,9 +13454,6 @@ ix86_expand_int_vcond (rtx operands[])
 	default:
 	  gcc_unreachable ();
 	}
-
-      cop0 = x;
-      cop1 = CONST0_RTX (mode);
     }
 
   x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-1a.c	2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-1.c"
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-1b.c	2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-1.c"
+
+/* { dg-final { scan-assembler "pmaxud" } } */
+/* { dg-final { scan-assembler "pminud" } } */
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-1.c	2010-01-05 11:27:44.000000000 -0800
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned int v1[] __attribute__ ((aligned(16))) =
+{
+  0x80000000, 1, 0xa0000000, 2,
+  3, 0xd0000000, 0xf0000000, 0xe0000000
+};
+unsigned int v2[] __attribute__ ((aligned(16))) =
+{
+  4, 0xb0000000, 5, 0xc0000000,
+  0xd0000000, 6, 7, 8
+};
+
+unsigned int max[] =
+{
+  0x80000000, 0xb0000000, 0xa0000000, 0xc0000000,
+  0xd0000000, 0xd0000000, 0xf0000000, 0xe0000000
+};
+
+unsigned int min[] =
+{
+  4, 1, 5, 2,
+  3, 6, 7, 8
+};
+
+unsigned int res[8] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+  int i;
+
+  for (i = 0; i < 8; i++)
+    res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+  int i;
+
+  for (i = 0; i < 8; i++)
+    res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int err = 0;
+
+  find_max ();
+  for (i = 0; i < 8; i++)
+    if (res[i] != max[i])
+      err++;
+
+  find_min ();
+  for (i = 0; i < 8; i++)
+    if (res[i] != min[i])
+      err++;
+
+  if (err)
+    abort ();
+}
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-2a.c	2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-2.c"
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-2b.c	2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-2.c"
+
+/* { dg-final { scan-assembler "pmaxuw" } } */
+/* { dg-final { scan-assembler "pminuw" } } */
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-2.c	2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned short v1[] __attribute__ ((aligned(16))) =
+{
+  0x8000, 0x9000, 1, 10, 0xa000, 0xb000, 2, 20,
+  3, 30, 0xd000, 0xe000, 0xf000, 0xe000, 25, 30
+};
+unsigned short v2[] __attribute__ ((aligned(16))) =
+{
+  4, 40, 0xb000, 0x8000, 5, 50, 0xc000, 0xf000,
+  0xd000, 0xa000, 6, 65, 7, 75, 0xe000, 0xc000
+};
+
+unsigned short max[] =
+{
+  0x8000, 0x9000, 0xb000, 0x8000, 0xa000, 0xb000, 0xc000, 0xf000,
+  0xd000, 0xa000, 0xd000, 0xe000, 0xf000, 0xe000, 0xe000, 0xc000
+};
+
+unsigned short min[] =
+{
+  4, 40, 1, 10, 5, 50, 2, 20,
+  3, 30, 6, 65, 7, 75, 25, 30
+};
+
+unsigned short res[16] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+  int i;
+
+  for (i = 0; i < 16; i++)
+    res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+  int i;
+
+  for (i = 0; i < 16; i++)
+    res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int err = 0;
+
+  find_max ();
+  for (i = 0; i < 16; i++)
+    if (res[i] != max[i])
+      err++;
+
+  find_min ();
+  for (i = 0; i < 16; i++)
+    if (res[i] != min[i])
+      err++;
+
+  if (err)
+    abort ();
+}
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-3a.c	2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#include "pr42542-3.c"
+
+/* { dg-final { scan-assembler "pmaxub" } } */
+/* { dg-final { scan-assembler "pminub" } } */
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-3.c	2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,85 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned char v1[] __attribute__ ((aligned(16))) =
+{
+  0x80, 0xd0, 0x90, 0xa0, 1, 15, 10, 15,
+  0xa0, 0xc0, 0xb0, 0xf0, 2, 25, 20, 35,
+  3, 34, 30, 36, 0xd0, 0x80, 0xe0, 0xb0,
+  0xf0, 0xe0, 0xe0, 0x80, 25, 34, 30, 40
+};
+unsigned char v2[] __attribute__ ((aligned(16))) =
+{
+  4, 44, 40, 48, 0xb0, 0x80, 0x80, 0x90,
+  5, 55, 50, 51, 0xc0, 0xb0, 0xf0, 0xd0,
+  0xd0, 0x80, 0xa0, 0xf0, 6, 61, 65, 68,
+  7, 76, 75, 81, 0xe0, 0xf0, 0xc0, 0x90
+};
+
+unsigned char max[] =
+{
+  0x80, 0xd0, 0x90, 0xa0, 0xb0, 0x80, 0x80, 0x90,
+  0xa0, 0xc0, 0xb0, 0xf0, 0xc0, 0xb0, 0xf0, 0xd0,
+  0xd0, 0x80, 0xa0, 0xf0, 0xd0, 0x80, 0xe0, 0xb0,
+  0xf0, 0xe0, 0xe0, 0x80, 0xe0, 0xf0, 0xc0, 0x90
+};
+
+unsigned char min[] =
+{
+  4, 44, 40, 48, 1, 15, 10, 15,
+  5, 55, 50, 51, 2, 25, 20, 35,
+  3, 34, 30, 36, 6, 61, 65, 68,
+  7, 76, 75, 81, 25, 34, 30, 40
+};
+
+unsigned char res[32] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+  int i;
+
+  for (i = 0; i < 32; i++)
+    res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+  int i;
+
+  for (i = 0; i < 32; i++)
+    res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int err = 0;
+
+  find_max ();
+  for (i = 0; i < 32; i++)
+    if (res[i] != max[i])
+      err++;
+
+  find_min ();
+  for (i = 0; i < 32; i++)
+    if (res[i] != min[i])
+      err++;
+
+  if (err)
+    abort ();
+}
-------------- next part --------------
gcc/

2010-01-05  H.J. Lu  <hongjiu.lu@intel.com>

	Backport from mainline
	2010-01-05  Paolo Bonzini  <bonzinI@gnu.rg>
		    H.J. Lu  <hongjiu.lu@intel.com>

	PR target/42542
	* config/i386/i386.c (ix86_expand_int_vcond): Convert GTU to GT
	for V4SI and V2DI by subtracting (-(INT MAX) - 1) from both
	operands to make them signed.

gcc/testsuite/

2010-01-05  H.J. Lu  <hongjiu.lu@intel.com>

	Backport from mainline
	2010-01-04  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/42542
	* gcc.target/i386/pr42542-1.c: New.
	* gcc.target/i386/pr42542-1a.c: Likewise.
	* gcc.target/i386/pr42542-1b.c: Likewise.
	* gcc.target/i386/pr42542-2.c: Likewise.
	* gcc.target/i386/pr42542-2a.c: Likewise.
	* gcc.target/i386/pr42542-2b.c: Likewise.
	* gcc.target/i386/pr42542-3.c: Likewise.
	* gcc.target/i386/pr42542-3a.c: Likewise.

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6bb5fc6..de40c10 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -15696,8 +15696,9 @@ ix86_expand_int_vcond (rtx operands[])
 	    }
 	}
 
-      /* Unsigned parallel compare is not supported by the hardware.  Play some
-	 tricks to turn this into a signed comparison against 0.  */
+      /* Unsigned parallel compare is not supported by the hardware.
+	 Play some tricks to turn this into a signed comparison
+	 against 0.  */
       if (code == GTU)
 	{
 	  cop0 = force_reg (mode, cop0);
@@ -15706,32 +15707,26 @@ ix86_expand_int_vcond (rtx operands[])
 	    {
 	    case V4SImode:
 	    case V2DImode:
-	      {
-		rtx t1, t2, mask;
-
-		/* Perform a parallel modulo subtraction.  */
-		t1 = gen_reg_rtx (mode);
-		emit_insn ((mode == V4SImode
-			    ? gen_subv4si3
-			    : gen_subv2di3) (t1, cop0, cop1));
-
-		/* Extract the original sign bit of op0.  */
-		mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
-						true, false);
-		t2 = gen_reg_rtx (mode);
-		emit_insn ((mode == V4SImode
-			    ? gen_andv4si3
-			    : gen_andv2di3) (t2, cop0, mask));
-
-		/* XOR it back into the result of the subtraction.  This results
-		   in the sign bit set iff we saw unsigned underflow.  */
-		x = gen_reg_rtx (mode);
-		emit_insn ((mode == V4SImode
-			    ? gen_xorv4si3
-			    : gen_xorv2di3) (x, t1, t2));
-
-		code = GT;
-	      }
+		{
+		  rtx t1, t2, mask;
+		  rtx (*gen_sub3) (rtx, rtx, rtx);
+
+		  /* Subtract (-(INT MAX) - 1) from both operands to make
+		     them signed.  */
+		  mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
+						  true, false);
+		  gen_sub3 = (mode == V4SImode
+			      ? gen_subv4si3 : gen_subv2di3);
+		  t1 = gen_reg_rtx (mode);
+		  emit_insn (gen_sub3 (t1, cop0, mask));
+
+		  t2 = gen_reg_rtx (mode);
+		  emit_insn (gen_sub3 (t2, cop1, mask));
+
+		  cop0 = t1;
+		  cop1 = t2;
+		  code = GT;
+		}
 	      break;
 
 	    case V16QImode:
@@ -15741,6 +15736,8 @@ ix86_expand_int_vcond (rtx operands[])
 	      emit_insn (gen_rtx_SET (VOIDmode, x,
 				      gen_rtx_US_MINUS (mode, cop0, cop1)));
 
+	      cop0 = x;
+	      cop1 = CONST0_RTX (mode);
 	      code = EQ;
 	      negate = !negate;
 	      break;
@@ -15748,9 +15745,6 @@ ix86_expand_int_vcond (rtx operands[])
 	    default:
 	      gcc_unreachable ();
 	    }
-
-	  cop0 = x;
-	  cop1 = CONST0_RTX (mode);
 	}
     }
 
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-1a.c	2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-1.c"
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-1b.c	2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-1.c"
+
+/* { dg-final { scan-assembler "pmaxud" } } */
+/* { dg-final { scan-assembler "pminud" } } */
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-1.c	2010-01-05 11:27:44.000000000 -0800
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned int v1[] __attribute__ ((aligned(16))) =
+{
+  0x80000000, 1, 0xa0000000, 2,
+  3, 0xd0000000, 0xf0000000, 0xe0000000
+};
+unsigned int v2[] __attribute__ ((aligned(16))) =
+{
+  4, 0xb0000000, 5, 0xc0000000,
+  0xd0000000, 6, 7, 8
+};
+
+unsigned int max[] =
+{
+  0x80000000, 0xb0000000, 0xa0000000, 0xc0000000,
+  0xd0000000, 0xd0000000, 0xf0000000, 0xe0000000
+};
+
+unsigned int min[] =
+{
+  4, 1, 5, 2,
+  3, 6, 7, 8
+};
+
+unsigned int res[8] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+  int i;
+
+  for (i = 0; i < 8; i++)
+    res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+  int i;
+
+  for (i = 0; i < 8; i++)
+    res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int err = 0;
+
+  find_max ();
+  for (i = 0; i < 8; i++)
+    if (res[i] != max[i])
+      err++;
+
+  find_min ();
+  for (i = 0; i < 8; i++)
+    if (res[i] != min[i])
+      err++;
+
+  if (err)
+    abort ();
+}
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-2a.c	2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-2.c"
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-2b.c	2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-2.c"
+
+/* { dg-final { scan-assembler "pmaxuw" } } */
+/* { dg-final { scan-assembler "pminuw" } } */
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-2.c	2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned short v1[] __attribute__ ((aligned(16))) =
+{
+  0x8000, 0x9000, 1, 10, 0xa000, 0xb000, 2, 20,
+  3, 30, 0xd000, 0xe000, 0xf000, 0xe000, 25, 30
+};
+unsigned short v2[] __attribute__ ((aligned(16))) =
+{
+  4, 40, 0xb000, 0x8000, 5, 50, 0xc000, 0xf000,
+  0xd000, 0xa000, 6, 65, 7, 75, 0xe000, 0xc000
+};
+
+unsigned short max[] =
+{
+  0x8000, 0x9000, 0xb000, 0x8000, 0xa000, 0xb000, 0xc000, 0xf000,
+  0xd000, 0xa000, 0xd000, 0xe000, 0xf000, 0xe000, 0xe000, 0xc000
+};
+
+unsigned short min[] =
+{
+  4, 40, 1, 10, 5, 50, 2, 20,
+  3, 30, 6, 65, 7, 75, 25, 30
+};
+
+unsigned short res[16] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+  int i;
+
+  for (i = 0; i < 16; i++)
+    res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+  int i;
+
+  for (i = 0; i < 16; i++)
+    res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int err = 0;
+
+  find_max ();
+  for (i = 0; i < 16; i++)
+    if (res[i] != max[i])
+      err++;
+
+  find_min ();
+  for (i = 0; i < 16; i++)
+    if (res[i] != min[i])
+      err++;
+
+  if (err)
+    abort ();
+}
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-3a.c	2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#include "pr42542-3.c"
+
+/* { dg-final { scan-assembler "pmaxub" } } */
+/* { dg-final { scan-assembler "pminub" } } */
--- /dev/null	2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-3.c	2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,85 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned char v1[] __attribute__ ((aligned(16))) =
+{
+  0x80, 0xd0, 0x90, 0xa0, 1, 15, 10, 15,
+  0xa0, 0xc0, 0xb0, 0xf0, 2, 25, 20, 35,
+  3, 34, 30, 36, 0xd0, 0x80, 0xe0, 0xb0,
+  0xf0, 0xe0, 0xe0, 0x80, 25, 34, 30, 40
+};
+unsigned char v2[] __attribute__ ((aligned(16))) =
+{
+  4, 44, 40, 48, 0xb0, 0x80, 0x80, 0x90,
+  5, 55, 50, 51, 0xc0, 0xb0, 0xf0, 0xd0,
+  0xd0, 0x80, 0xa0, 0xf0, 6, 61, 65, 68,
+  7, 76, 75, 81, 0xe0, 0xf0, 0xc0, 0x90
+};
+
+unsigned char max[] =
+{
+  0x80, 0xd0, 0x90, 0xa0, 0xb0, 0x80, 0x80, 0x90,
+  0xa0, 0xc0, 0xb0, 0xf0, 0xc0, 0xb0, 0xf0, 0xd0,
+  0xd0, 0x80, 0xa0, 0xf0, 0xd0, 0x80, 0xe0, 0xb0,
+  0xf0, 0xe0, 0xe0, 0x80, 0xe0, 0xf0, 0xc0, 0x90
+};
+
+unsigned char min[] =
+{
+  4, 44, 40, 48, 1, 15, 10, 15,
+  5, 55, 50, 51, 2, 25, 20, 35,
+  3, 34, 30, 36, 6, 61, 65, 68,
+  7, 76, 75, 81, 25, 34, 30, 40
+};
+
+unsigned char res[32] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+  int i;
+
+  for (i = 0; i < 32; i++)
+    res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+  int i;
+
+  for (i = 0; i < 32; i++)
+    res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int err = 0;
+
+  find_max ();
+  for (i = 0; i < 32; i++)
+    if (res[i] != max[i])
+      err++;
+
+  find_min ();
+  for (i = 0; i < 32; i++)
+    if (res[i] != min[i])
+      err++;
+
+  if (err)
+    abort ();
+}


More information about the Gcc-patches mailing list