[PATCH] Fix combiner on VEC_SELECT and ix86_expand_vector_set (PR rtl-optimization/21239)

Jakub Jelinek jakub@redhat.com
Tue May 3 17:22:00 GMT 2005


Hi!

This patch actually includes 2 fixes, included together just because they
exhibit themselves on the same testcase.

The first one is a fix for ix86_expand_vector_set in V4SFmode:
the function should for TARGET ABCD replace ELT-th element with X.
For ELT 0 and 1 it does that, but for 2 instead of ABXD it was actually
returning XBAD and for ELT 3 instead of ABCX returned XBCA.

The second bug is that because of a typo it picked the wrong element
(and offset could go negative).

Tested on x86_64-linux, ok for HEAD/4.0?

2005-05-03  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386.c (ix86_expand_vector_set): Fix setting 3rd and 4th
	item in V4SF mode.

	PR rtl-optimization/21239
	* combine.c (combine_simplify_rtx) <case VEC_SELECT>: Fix a typo.

	* gcc.dg/i386-sse-11.c: New test.

--- gcc/config/i386/i386.c.jj	2005-05-02 14:56:24.000000000 +0200
+++ gcc/config/i386/i386.c	2005-05-03 18:19:45.000000000 +0200
@@ -17022,32 +17022,35 @@ ix86_expand_vector_set (bool mmx_ok, rtx
 	  break;
 
 	case 1:
-	  /* tmp = op0 = A B C D */
+	  /* tmp = target = A B C D */
 	  tmp = copy_to_reg (target);
-
-	  /* op0 = C C D D */
+	  /* target = A A B B */
 	  emit_insn (gen_sse_unpcklps (target, target, target));
-
-	  /* op0 = C C D X */
+	  /* target = X A B B */
 	  ix86_expand_vector_set (false, target, val, 0);
-
-	  /* op0 = A B X D  */
+	  /* target = A X C D  */
 	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
 				       GEN_INT (1), GEN_INT (0),
 				       GEN_INT (2+4), GEN_INT (3+4)));
 	  return;
 
 	case 2:
+	  /* tmp = target = A B C D */
 	  tmp = copy_to_reg (target);
-	  ix86_expand_vector_set (false, target, val, 0);
+	  /* tmp = X B C D */
+	  ix86_expand_vector_set (false, tmp, val, 0);
+	  /* target = A B X D */
 	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
 				       GEN_INT (0), GEN_INT (1),
 				       GEN_INT (0+4), GEN_INT (3+4)));
 	  return;
 
 	case 3:
+	  /* tmp = target = A B C D */
 	  tmp = copy_to_reg (target);
-	  ix86_expand_vector_set (false, target, val, 0);
+	  /* tmp = X B C D */
+	  ix86_expand_vector_set (false, tmp, val, 0);
+	  /* target = A B X D */
 	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
 				       GEN_INT (0), GEN_INT (1),
 				       GEN_INT (2+4), GEN_INT (0+4)));
--- gcc/combine.c.jj	2005-04-25 11:55:54.000000000 +0200
+++ gcc/combine.c	2005-05-03 15:59:29.000000000 +0200
@@ -4742,7 +4742,7 @@ combine_simplify_rtx (rtx x, enum machin
 		if (GET_CODE (op0) == VEC_CONCAT)
 		  {
 		    HOST_WIDE_INT op0_size = GET_MODE_SIZE (GET_MODE (XEXP (op0, 0)));
-		    if (op0_size < offset)
+		    if (offset < op0_size)
 		      op0 = XEXP (op0, 0);
 		    else
 		      {
--- gcc/testsuite/gcc.dg/i386-sse-11.c.jj	2005-05-03 16:08:18.000000000 +0200
+++ gcc/testsuite/gcc.dg/i386-sse-11.c	2005-05-03 18:22:42.000000000 +0200
@@ -0,0 +1,92 @@
+/* PR rtl-optimization/21239 */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -msse2" } */
+#include <emmintrin.h>
+#include "i386-cpuid.h"
+
+extern void abort (void);
+
+void
+foo (unsigned int x, double *y, const double *z)
+{
+  __m128d tmp;
+  while (x)
+    {
+      tmp = _mm_load_sd (z);
+      _mm_store_sd (y, tmp);
+      --x; ++z; ++y;
+    }
+}
+
+void
+bar (unsigned int x, float *y, const float *z)
+{
+  __m128 tmp;
+  unsigned int i;
+  for (i = 0; i < x; ++i)
+    {
+      tmp = (__m128) { *z, 0, 0, 0 };
+      *y = __builtin_ia32_vec_ext_v4sf (tmp, 0);
+      ++z; ++y;
+    }
+  for (i = 0; i < x; ++i)
+    {
+      tmp = (__m128) { 0, *z, 0, 0 };
+      *y = __builtin_ia32_vec_ext_v4sf (tmp, 1);
+      ++z; ++y;
+    }
+  for (i = 0; i < x; ++i)
+    {
+      tmp = (__m128) { 0, 0, *z, 0 };
+      *y = __builtin_ia32_vec_ext_v4sf (tmp, 2);
+      ++z; ++y;
+    }
+  for (i = 0; i < x; ++i)
+    {
+      tmp = (__m128) { 0, 0, 0, *z };
+      *y = __builtin_ia32_vec_ext_v4sf (tmp, 3);
+      ++z; ++y;
+    }
+}
+
+void __attribute__((noinline))
+run_tests (void)
+{
+  unsigned int i;
+  double a[16], b[16];
+  float c[16], d[16];
+  for (i = 0; i < 16; ++i)
+    {
+      a[i] = 1;
+      b[i] = 2;
+      c[i] = 3;
+      d[i] = 4;
+    }
+  foo (16, a, b);
+  bar (4, c, d);
+  for (i = 0; i < 16; ++i)
+    {
+      if (a[i] != 2)
+	abort ();
+      if (c[i] != 4)
+	abort ();
+    }
+}
+
+int
+main ()
+{
+  unsigned long cpu_facilities;
+  unsigned int i;
+  double a[19], b[19];
+
+  cpu_facilities = i386_cpuid ();
+
+  if ((cpu_facilities & (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
+      != (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
+    /* If host has no vector support, pass.  */
+    return 0;
+
+  run_tests ();
+  return 0;
+}

	Jakub



More information about the Gcc-patches mailing list