PATCH: PR target/42542: Vectorizer produces incorrect results on max of signed intergers
H.J. Lu
hjl.tools@gmail.com
Thu Jan 7 19:51:00 GMT 2010
On Tue, Jan 5, 2010 at 12:31 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On 01/05/2010 08:47 PM, H.J. Lu wrote:
>
>>>> Subtracting 0x80000000 from a number in the range 0 to 0xFFFFFFFF will
>>>> produce a result in the range -0x80000000 to 0x7FFFFFFF which is exactly
>>>> two's complement. �XOR just happens to give the same result.
>>>>
>>>> It's a very well known trick IMNSHO. �Please provide a counterexample if
>>>> you think there is one.
>>>>
>>>
>>> I don't have one. 0x8000000 was all that worried me.
>>>
>>> H.J., can you please revert your patch and fix the PR with Paolo's
>>> proposed
>>> solution?
>>>
>>>
>>
>> Here is the patch. OK for trunk?
>>
>
>> 2010-01-05 Paolo Bonzini <bonzinI@gnu.rg> H.J. Lu <hongjiu.lu@intel.com>
>> PR target/42542 * config/i386/i386.c (ix86_expand_int_vcond): Convert GTU to
>> GT for V4SI and V2DI by subtracting (-(INT MAX) - 1) from both operands to
>> make them signed. * config/i386/sse.md (umaxv4si3): Revert the last change.
>> (umin<mode>3): Likewise. (uminv8hi3): Removed. (uminv4si3): Likewise.
>> gcc/testsuite/ 2010-01-05 H.J. Lu <hongjiu.lu@intel.com> *
>> gcc.target/i386/pr42542-1.c (res): Make it 8 elements.
>>
>
> This is OK for mainline and release branches after a couple of days without
> problems.
>
I am checking this fix into 4.3/4.4.
Thanks.
--
H.J.
-------------- next part --------------
gcc/
2010-01-05 H.J. Lu <hongjiu.lu@intel.com>
Backport from mainline
2010-01-05 Paolo Bonzini <bonzinI@gnu.rg>
H.J. Lu <hongjiu.lu@intel.com>
PR target/42542
* config/i386/i386.c (ix86_expand_int_vcond): Convert GTU to GT
for V4SI and V2DI by subtracting (-(INT MAX) - 1) from both
operands to make them signed.
gcc/testsuite/
2010-01-05 H.J. Lu <hongjiu.lu@intel.com>
Backport from mainline
2010-01-04 H.J. Lu <hongjiu.lu@intel.com>
PR target/42542
* gcc.target/i386/pr42542-1.c: New.
* gcc.target/i386/pr42542-1a.c: Likewise.
* gcc.target/i386/pr42542-1b.c: Likewise.
* gcc.target/i386/pr42542-2.c: Likewise.
* gcc.target/i386/pr42542-2a.c: Likewise.
* gcc.target/i386/pr42542-2b.c: Likewise.
* gcc.target/i386/pr42542-3.c: Likewise.
* gcc.target/i386/pr42542-3a.c: Likewise.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d9f9b8c..25336d9 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -13418,28 +13418,22 @@ ix86_expand_int_vcond (rtx operands[])
case V2DImode:
{
rtx t1, t2, mask;
+ rtx (*gen_sub3) (rtx, rtx, rtx);
- /* Perform a parallel modulo subtraction. */
- t1 = gen_reg_rtx (mode);
- emit_insn ((mode == V4SImode
- ? gen_subv4si3
- : gen_subv2di3) (t1, cop0, cop1));
-
- /* Extract the original sign bit of op0. */
+ /* Subtract (-(INT MAX) - 1) from both operands to make
+ them signed. */
mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
true, false);
- t2 = gen_reg_rtx (mode);
- emit_insn ((mode == V4SImode
- ? gen_andv4si3
- : gen_andv2di3) (t2, cop0, mask));
+ gen_sub3 = (mode == V4SImode
+ ? gen_subv4si3 : gen_subv2di3);
+ t1 = gen_reg_rtx (mode);
+ emit_insn (gen_sub3 (t1, cop0, mask));
- /* XOR it back into the result of the subtraction. This results
- in the sign bit set iff we saw unsigned underflow. */
- x = gen_reg_rtx (mode);
- emit_insn ((mode == V4SImode
- ? gen_xorv4si3
- : gen_xorv2di3) (x, t1, t2));
+ t2 = gen_reg_rtx (mode);
+ emit_insn (gen_sub3 (t2, cop1, mask));
+ cop0 = t1;
+ cop1 = t2;
code = GT;
}
break;
@@ -13451,6 +13445,8 @@ ix86_expand_int_vcond (rtx operands[])
emit_insn (gen_rtx_SET (VOIDmode, x,
gen_rtx_US_MINUS (mode, cop0, cop1)));
+ cop0 = x;
+ cop1 = CONST0_RTX (mode);
code = EQ;
negate = !negate;
break;
@@ -13458,9 +13454,6 @@ ix86_expand_int_vcond (rtx operands[])
default:
gcc_unreachable ();
}
-
- cop0 = x;
- cop1 = CONST0_RTX (mode);
}
x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-1a.c 2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-1.c"
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-1b.c 2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-1.c"
+
+/* { dg-final { scan-assembler "pmaxud" } } */
+/* { dg-final { scan-assembler "pminud" } } */
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-1.c 2010-01-05 11:27:44.000000000 -0800
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned int v1[] __attribute__ ((aligned(16))) =
+{
+ 0x80000000, 1, 0xa0000000, 2,
+ 3, 0xd0000000, 0xf0000000, 0xe0000000
+};
+unsigned int v2[] __attribute__ ((aligned(16))) =
+{
+ 4, 0xb0000000, 5, 0xc0000000,
+ 0xd0000000, 6, 7, 8
+};
+
+unsigned int max[] =
+{
+ 0x80000000, 0xb0000000, 0xa0000000, 0xc0000000,
+ 0xd0000000, 0xd0000000, 0xf0000000, 0xe0000000
+};
+
+unsigned int min[] =
+{
+ 4, 1, 5, 2,
+ 3, 6, 7, 8
+};
+
+unsigned int res[8] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+ int i;
+ int err = 0;
+
+ find_max ();
+ for (i = 0; i < 8; i++)
+ if (res[i] != max[i])
+ err++;
+
+ find_min ();
+ for (i = 0; i < 8; i++)
+ if (res[i] != min[i])
+ err++;
+
+ if (err)
+ abort ();
+}
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-2a.c 2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-2.c"
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-2b.c 2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-2.c"
+
+/* { dg-final { scan-assembler "pmaxuw" } } */
+/* { dg-final { scan-assembler "pminuw" } } */
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-2.c 2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned short v1[] __attribute__ ((aligned(16))) =
+{
+ 0x8000, 0x9000, 1, 10, 0xa000, 0xb000, 2, 20,
+ 3, 30, 0xd000, 0xe000, 0xf000, 0xe000, 25, 30
+};
+unsigned short v2[] __attribute__ ((aligned(16))) =
+{
+ 4, 40, 0xb000, 0x8000, 5, 50, 0xc000, 0xf000,
+ 0xd000, 0xa000, 6, 65, 7, 75, 0xe000, 0xc000
+};
+
+unsigned short max[] =
+{
+ 0x8000, 0x9000, 0xb000, 0x8000, 0xa000, 0xb000, 0xc000, 0xf000,
+ 0xd000, 0xa000, 0xd000, 0xe000, 0xf000, 0xe000, 0xe000, 0xc000
+};
+
+unsigned short min[] =
+{
+ 4, 40, 1, 10, 5, 50, 2, 20,
+ 3, 30, 6, 65, 7, 75, 25, 30
+};
+
+unsigned short res[16] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+ int i;
+ int err = 0;
+
+ find_max ();
+ for (i = 0; i < 16; i++)
+ if (res[i] != max[i])
+ err++;
+
+ find_min ();
+ for (i = 0; i < 16; i++)
+ if (res[i] != min[i])
+ err++;
+
+ if (err)
+ abort ();
+}
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-3a.c 2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#include "pr42542-3.c"
+
+/* { dg-final { scan-assembler "pmaxub" } } */
+/* { dg-final { scan-assembler "pminub" } } */
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-3.c 2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,85 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned char v1[] __attribute__ ((aligned(16))) =
+{
+ 0x80, 0xd0, 0x90, 0xa0, 1, 15, 10, 15,
+ 0xa0, 0xc0, 0xb0, 0xf0, 2, 25, 20, 35,
+ 3, 34, 30, 36, 0xd0, 0x80, 0xe0, 0xb0,
+ 0xf0, 0xe0, 0xe0, 0x80, 25, 34, 30, 40
+};
+unsigned char v2[] __attribute__ ((aligned(16))) =
+{
+ 4, 44, 40, 48, 0xb0, 0x80, 0x80, 0x90,
+ 5, 55, 50, 51, 0xc0, 0xb0, 0xf0, 0xd0,
+ 0xd0, 0x80, 0xa0, 0xf0, 6, 61, 65, 68,
+ 7, 76, 75, 81, 0xe0, 0xf0, 0xc0, 0x90
+};
+
+unsigned char max[] =
+{
+ 0x80, 0xd0, 0x90, 0xa0, 0xb0, 0x80, 0x80, 0x90,
+ 0xa0, 0xc0, 0xb0, 0xf0, 0xc0, 0xb0, 0xf0, 0xd0,
+ 0xd0, 0x80, 0xa0, 0xf0, 0xd0, 0x80, 0xe0, 0xb0,
+ 0xf0, 0xe0, 0xe0, 0x80, 0xe0, 0xf0, 0xc0, 0x90
+};
+
+unsigned char min[] =
+{
+ 4, 44, 40, 48, 1, 15, 10, 15,
+ 5, 55, 50, 51, 2, 25, 20, 35,
+ 3, 34, 30, 36, 6, 61, 65, 68,
+ 7, 76, 75, 81, 25, 34, 30, 40
+};
+
+unsigned char res[32] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+ int i;
+ int err = 0;
+
+ find_max ();
+ for (i = 0; i < 32; i++)
+ if (res[i] != max[i])
+ err++;
+
+ find_min ();
+ for (i = 0; i < 32; i++)
+ if (res[i] != min[i])
+ err++;
+
+ if (err)
+ abort ();
+}
-------------- next part --------------
gcc/
2010-01-05 H.J. Lu <hongjiu.lu@intel.com>
Backport from mainline
2010-01-05 Paolo Bonzini <bonzinI@gnu.rg>
H.J. Lu <hongjiu.lu@intel.com>
PR target/42542
* config/i386/i386.c (ix86_expand_int_vcond): Convert GTU to GT
for V4SI and V2DI by subtracting (-(INT MAX) - 1) from both
operands to make them signed.
gcc/testsuite/
2010-01-05 H.J. Lu <hongjiu.lu@intel.com>
Backport from mainline
2010-01-04 H.J. Lu <hongjiu.lu@intel.com>
PR target/42542
* gcc.target/i386/pr42542-1.c: New.
* gcc.target/i386/pr42542-1a.c: Likewise.
* gcc.target/i386/pr42542-1b.c: Likewise.
* gcc.target/i386/pr42542-2.c: Likewise.
* gcc.target/i386/pr42542-2a.c: Likewise.
* gcc.target/i386/pr42542-2b.c: Likewise.
* gcc.target/i386/pr42542-3.c: Likewise.
* gcc.target/i386/pr42542-3a.c: Likewise.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6bb5fc6..de40c10 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -15696,8 +15696,9 @@ ix86_expand_int_vcond (rtx operands[])
}
}
- /* Unsigned parallel compare is not supported by the hardware. Play some
- tricks to turn this into a signed comparison against 0. */
+ /* Unsigned parallel compare is not supported by the hardware.
+ Play some tricks to turn this into a signed comparison
+ against 0. */
if (code == GTU)
{
cop0 = force_reg (mode, cop0);
@@ -15706,32 +15707,26 @@ ix86_expand_int_vcond (rtx operands[])
{
case V4SImode:
case V2DImode:
- {
- rtx t1, t2, mask;
-
- /* Perform a parallel modulo subtraction. */
- t1 = gen_reg_rtx (mode);
- emit_insn ((mode == V4SImode
- ? gen_subv4si3
- : gen_subv2di3) (t1, cop0, cop1));
-
- /* Extract the original sign bit of op0. */
- mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
- true, false);
- t2 = gen_reg_rtx (mode);
- emit_insn ((mode == V4SImode
- ? gen_andv4si3
- : gen_andv2di3) (t2, cop0, mask));
-
- /* XOR it back into the result of the subtraction. This results
- in the sign bit set iff we saw unsigned underflow. */
- x = gen_reg_rtx (mode);
- emit_insn ((mode == V4SImode
- ? gen_xorv4si3
- : gen_xorv2di3) (x, t1, t2));
-
- code = GT;
- }
+ {
+ rtx t1, t2, mask;
+ rtx (*gen_sub3) (rtx, rtx, rtx);
+
+ /* Subtract (-(INT MAX) - 1) from both operands to make
+ them signed. */
+ mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
+ true, false);
+ gen_sub3 = (mode == V4SImode
+ ? gen_subv4si3 : gen_subv2di3);
+ t1 = gen_reg_rtx (mode);
+ emit_insn (gen_sub3 (t1, cop0, mask));
+
+ t2 = gen_reg_rtx (mode);
+ emit_insn (gen_sub3 (t2, cop1, mask));
+
+ cop0 = t1;
+ cop1 = t2;
+ code = GT;
+ }
break;
case V16QImode:
@@ -15741,6 +15736,8 @@ ix86_expand_int_vcond (rtx operands[])
emit_insn (gen_rtx_SET (VOIDmode, x,
gen_rtx_US_MINUS (mode, cop0, cop1)));
+ cop0 = x;
+ cop1 = CONST0_RTX (mode);
code = EQ;
negate = !negate;
break;
@@ -15748,9 +15745,6 @@ ix86_expand_int_vcond (rtx operands[])
default:
gcc_unreachable ();
}
-
- cop0 = x;
- cop1 = CONST0_RTX (mode);
}
}
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-1a.c 2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-1.c"
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-1b.c 2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-1.c"
+
+/* { dg-final { scan-assembler "pmaxud" } } */
+/* { dg-final { scan-assembler "pminud" } } */
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-1.c 2010-01-05 11:27:44.000000000 -0800
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned int v1[] __attribute__ ((aligned(16))) =
+{
+ 0x80000000, 1, 0xa0000000, 2,
+ 3, 0xd0000000, 0xf0000000, 0xe0000000
+};
+unsigned int v2[] __attribute__ ((aligned(16))) =
+{
+ 4, 0xb0000000, 5, 0xc0000000,
+ 0xd0000000, 6, 7, 8
+};
+
+unsigned int max[] =
+{
+ 0x80000000, 0xb0000000, 0xa0000000, 0xc0000000,
+ 0xd0000000, 0xd0000000, 0xf0000000, 0xe0000000
+};
+
+unsigned int min[] =
+{
+ 4, 1, 5, 2,
+ 3, 6, 7, 8
+};
+
+unsigned int res[8] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+ int i;
+ int err = 0;
+
+ find_max ();
+ for (i = 0; i < 8; i++)
+ if (res[i] != max[i])
+ err++;
+
+ find_min ();
+ for (i = 0; i < 8; i++)
+ if (res[i] != min[i])
+ err++;
+
+ if (err)
+ abort ();
+}
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-2a.c 2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-2.c"
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-2b.c 2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-2.c"
+
+/* { dg-final { scan-assembler "pmaxuw" } } */
+/* { dg-final { scan-assembler "pminuw" } } */
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-2.c 2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned short v1[] __attribute__ ((aligned(16))) =
+{
+ 0x8000, 0x9000, 1, 10, 0xa000, 0xb000, 2, 20,
+ 3, 30, 0xd000, 0xe000, 0xf000, 0xe000, 25, 30
+};
+unsigned short v2[] __attribute__ ((aligned(16))) =
+{
+ 4, 40, 0xb000, 0x8000, 5, 50, 0xc000, 0xf000,
+ 0xd000, 0xa000, 6, 65, 7, 75, 0xe000, 0xc000
+};
+
+unsigned short max[] =
+{
+ 0x8000, 0x9000, 0xb000, 0x8000, 0xa000, 0xb000, 0xc000, 0xf000,
+ 0xd000, 0xa000, 0xd000, 0xe000, 0xf000, 0xe000, 0xe000, 0xc000
+};
+
+unsigned short min[] =
+{
+ 4, 40, 1, 10, 5, 50, 2, 20,
+ 3, 30, 6, 65, 7, 75, 25, 30
+};
+
+unsigned short res[16] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+ int i;
+ int err = 0;
+
+ find_max ();
+ for (i = 0; i < 16; i++)
+ if (res[i] != max[i])
+ err++;
+
+ find_min ();
+ for (i = 0; i < 16; i++)
+ if (res[i] != min[i])
+ err++;
+
+ if (err)
+ abort ();
+}
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-3a.c 2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#include "pr42542-3.c"
+
+/* { dg-final { scan-assembler "pmaxub" } } */
+/* { dg-final { scan-assembler "pminub" } } */
--- /dev/null 2010-01-05 14:44:43.237240771 -0800
+++ gcc-4.4/gcc/testsuite/gcc.target/i386/pr42542-3.c 2010-01-04 12:16:02.000000000 -0800
@@ -0,0 +1,85 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned char v1[] __attribute__ ((aligned(16))) =
+{
+ 0x80, 0xd0, 0x90, 0xa0, 1, 15, 10, 15,
+ 0xa0, 0xc0, 0xb0, 0xf0, 2, 25, 20, 35,
+ 3, 34, 30, 36, 0xd0, 0x80, 0xe0, 0xb0,
+ 0xf0, 0xe0, 0xe0, 0x80, 25, 34, 30, 40
+};
+unsigned char v2[] __attribute__ ((aligned(16))) =
+{
+ 4, 44, 40, 48, 0xb0, 0x80, 0x80, 0x90,
+ 5, 55, 50, 51, 0xc0, 0xb0, 0xf0, 0xd0,
+ 0xd0, 0x80, 0xa0, 0xf0, 6, 61, 65, 68,
+ 7, 76, 75, 81, 0xe0, 0xf0, 0xc0, 0x90
+};
+
+unsigned char max[] =
+{
+ 0x80, 0xd0, 0x90, 0xa0, 0xb0, 0x80, 0x80, 0x90,
+ 0xa0, 0xc0, 0xb0, 0xf0, 0xc0, 0xb0, 0xf0, 0xd0,
+ 0xd0, 0x80, 0xa0, 0xf0, 0xd0, 0x80, 0xe0, 0xb0,
+ 0xf0, 0xe0, 0xe0, 0x80, 0xe0, 0xf0, 0xc0, 0x90
+};
+
+unsigned char min[] =
+{
+ 4, 44, 40, 48, 1, 15, 10, 15,
+ 5, 55, 50, 51, 2, 25, 20, 35,
+ 3, 34, 30, 36, 6, 61, 65, 68,
+ 7, 76, 75, 81, 25, 34, 30, 40
+};
+
+unsigned char res[32] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+ int i;
+ int err = 0;
+
+ find_max ();
+ for (i = 0; i < 32; i++)
+ if (res[i] != max[i])
+ err++;
+
+ find_min ();
+ for (i = 0; i < 32; i++)
+ if (res[i] != min[i])
+ err++;
+
+ if (err)
+ abort ();
+}
More information about the Gcc-patches
mailing list