[PATCH] Fix combiner on VEC_SELECT and ix86_expand_vector_set (PR rtl-optimization/21239)
Jakub Jelinek
jakub@redhat.com
Tue May 3 17:22:00 GMT 2005
Hi!
This patch actually includes 2 fixes, included together just because they
exhibit themselves on the same testcase.
The first one is a fix for ix86_expand_vector_set in V4SFmode:
the function should for TARGET ABCD replace ELT-th element with X.
For ELT 0 and 1 it does that, but for 2 instead of ABXD it was actually
returning XBAD and for ELT 3 instead of ABCX returned XBCA.
The second bug is that because of a typo it picked the wrong element
(and offset could go negative).
Tested on x86_64-linux, ok for HEAD/4.0?
2005-05-03 Jakub Jelinek <jakub@redhat.com>
* config/i386/i386.c (ix86_expand_vector_set): Fix setting 3rd and 4th
item in V4SF mode.
PR rtl-optimization/21239
* combine.c (combine_simplify_rtx) <case VEC_SELECT>: Fix a typo.
* gcc.dg/i386-sse-11.c: New test.
--- gcc/config/i386/i386.c.jj 2005-05-02 14:56:24.000000000 +0200
+++ gcc/config/i386/i386.c 2005-05-03 18:19:45.000000000 +0200
@@ -17022,32 +17022,35 @@ ix86_expand_vector_set (bool mmx_ok, rtx
break;
case 1:
- /* tmp = op0 = A B C D */
+ /* tmp = target = A B C D */
tmp = copy_to_reg (target);
-
- /* op0 = C C D D */
+ /* target = A A B B */
emit_insn (gen_sse_unpcklps (target, target, target));
-
- /* op0 = C C D X */
+ /* target = X A B B */
ix86_expand_vector_set (false, target, val, 0);
-
- /* op0 = A B X D */
+ /* target = A X C D */
emit_insn (gen_sse_shufps_1 (target, target, tmp,
GEN_INT (1), GEN_INT (0),
GEN_INT (2+4), GEN_INT (3+4)));
return;
case 2:
+ /* tmp = target = A B C D */
tmp = copy_to_reg (target);
- ix86_expand_vector_set (false, target, val, 0);
+ /* tmp = X B C D */
+ ix86_expand_vector_set (false, tmp, val, 0);
+ /* target = A B X D */
emit_insn (gen_sse_shufps_1 (target, target, tmp,
GEN_INT (0), GEN_INT (1),
GEN_INT (0+4), GEN_INT (3+4)));
return;
case 3:
+ /* tmp = target = A B C D */
tmp = copy_to_reg (target);
- ix86_expand_vector_set (false, target, val, 0);
+ /* tmp = X B C D */
+ ix86_expand_vector_set (false, tmp, val, 0);
+ /* target = A B X D */
emit_insn (gen_sse_shufps_1 (target, target, tmp,
GEN_INT (0), GEN_INT (1),
GEN_INT (2+4), GEN_INT (0+4)));
--- gcc/combine.c.jj 2005-04-25 11:55:54.000000000 +0200
+++ gcc/combine.c 2005-05-03 15:59:29.000000000 +0200
@@ -4742,7 +4742,7 @@ combine_simplify_rtx (rtx x, enum machin
if (GET_CODE (op0) == VEC_CONCAT)
{
HOST_WIDE_INT op0_size = GET_MODE_SIZE (GET_MODE (XEXP (op0, 0)));
- if (op0_size < offset)
+ if (offset < op0_size)
op0 = XEXP (op0, 0);
else
{
--- gcc/testsuite/gcc.dg/i386-sse-11.c.jj 2005-05-03 16:08:18.000000000 +0200
+++ gcc/testsuite/gcc.dg/i386-sse-11.c 2005-05-03 18:22:42.000000000 +0200
@@ -0,0 +1,92 @@
+/* PR rtl-optimization/21239 */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -msse2" } */
+#include <emmintrin.h>
+#include "i386-cpuid.h"
+
+extern void abort (void);
+
+void
+foo (unsigned int x, double *y, const double *z)
+{
+ __m128d tmp;
+ while (x)
+ {
+ tmp = _mm_load_sd (z);
+ _mm_store_sd (y, tmp);
+ --x; ++z; ++y;
+ }
+}
+
+void
+bar (unsigned int x, float *y, const float *z)
+{
+ __m128 tmp;
+ unsigned int i;
+ for (i = 0; i < x; ++i)
+ {
+ tmp = (__m128) { *z, 0, 0, 0 };
+ *y = __builtin_ia32_vec_ext_v4sf (tmp, 0);
+ ++z; ++y;
+ }
+ for (i = 0; i < x; ++i)
+ {
+ tmp = (__m128) { 0, *z, 0, 0 };
+ *y = __builtin_ia32_vec_ext_v4sf (tmp, 1);
+ ++z; ++y;
+ }
+ for (i = 0; i < x; ++i)
+ {
+ tmp = (__m128) { 0, 0, *z, 0 };
+ *y = __builtin_ia32_vec_ext_v4sf (tmp, 2);
+ ++z; ++y;
+ }
+ for (i = 0; i < x; ++i)
+ {
+ tmp = (__m128) { 0, 0, 0, *z };
+ *y = __builtin_ia32_vec_ext_v4sf (tmp, 3);
+ ++z; ++y;
+ }
+}
+
+void __attribute__((noinline))
+run_tests (void)
+{
+ unsigned int i;
+ double a[16], b[16];
+ float c[16], d[16];
+ for (i = 0; i < 16; ++i)
+ {
+ a[i] = 1;
+ b[i] = 2;
+ c[i] = 3;
+ d[i] = 4;
+ }
+ foo (16, a, b);
+ bar (4, c, d);
+ for (i = 0; i < 16; ++i)
+ {
+ if (a[i] != 2)
+ abort ();
+ if (c[i] != 4)
+ abort ();
+ }
+}
+
+int
+main ()
+{
+ unsigned long cpu_facilities;
+ unsigned int i;
+ double a[19], b[19];
+
+ cpu_facilities = i386_cpuid ();
+
+ if ((cpu_facilities & (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
+ != (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
+ /* If host has no vector support, pass. */
+ return 0;
+
+ run_tests ();
+ return 0;
+}
Jakub
More information about the Gcc-patches
mailing list