This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PATCH: PR target/40470: unable t o find a register to spill in class âSSE_FIRST_REG â


Hi,

Some SSE4 instructions have implicit XMM0 operand. This patch helps RA
deal with them by forcing operand into XMM0 during expand. OK for trunk
and 4.4?

Thanks.


H.J.
----
gcc/

2009-06-17  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/40470
	* config/i386/sse.md (sse4_1_blendvp<ssemodesuffixf2c>): Renamed
	to ...
	(*sse4_1_blendvp<ssemodesuffixf2c>): This.
	(sse4_1_pblendvb): Renamed to ...
	(*sse4_1_pblendvb): This.
	(sse4_1_blendvp<ssemodesuffixf2c>): New.
	(sse4_1_pblendvb): Likewise.
	(sse4_2_pcmpestr): Force operand 1 into XMM0.
	(sse4_2_pcmpistr): Likewise.

gcc/testsuite/

2009-06-17  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/40470
	* gcc.dg/torture/pr40470-1.c: New.
	* gcc.dg/torture/pr40470-2.c: Likewise.

--- gcc/config/i386/sse.md.xmm0	2009-05-27 07:42:18.000000000 -0700
+++ gcc/config/i386/sse.md	2009-06-17 06:50:15.000000000 -0700
@@ -8911,7 +8911,28 @@
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
+(define_expand "sse4_1_blendvp<ssemodesuffixf2c>"
+  [(match_operand:SSEMODEF2P 0 "register_operand" "")
+   (match_operand:SSEMODEF2P 1 "register_operand"  "")
+   (match_operand:SSEMODEF2P 2 "register_operand" "")
+   (match_operand:SSEMODEF2P 3 "register_operand" "")]
+  "TARGET_SSE4_1"
+{
+  /* Force operand 3 into XMM0.  */
+  rtx xmm0 = gen_rtx_REG (<MODE>mode, XMM0_REG);
+  rtx unspec;
+
+  emit_insn (gen_rtx_SET (VOIDmode, xmm0, operands[3]));
+  unspec = gen_rtx_UNSPEC (<MODE>mode,
+			   gen_rtvec (3, operands[1],
+				      operands[2],
+				      xmm0),
+			   UNSPEC_BLENDV);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], unspec));
+  DONE;
+})
+
+(define_insn "*sse4_1_blendvp<ssemodesuffixf2c>"
   [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
 	(unspec:SSEMODEF2P
 	  [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
@@ -9023,7 +9044,28 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "TI")])
 
-(define_insn "sse4_1_pblendvb"
+(define_expand "sse4_1_pblendvb"
+  [(match_operand:V16QI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand"  "")
+   (match_operand:V16QI 2 "register_operand" "")
+   (match_operand:V16QI 3 "register_operand" "")]
+  "TARGET_SSE4_1"
+{
+  /* Force operand 3 into XMM0.  */
+  rtx xmm0 = gen_rtx_REG (V16QImode, XMM0_REG);
+  rtx unspec;
+
+  emit_insn (gen_rtx_SET (VOIDmode, xmm0, operands[3]));
+  unspec = gen_rtx_UNSPEC (V16QImode,
+			   gen_rtvec (3, operands[1],
+				      operands[2],
+				      xmm0),
+			   UNSPEC_BLENDV);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], unspec));
+  DONE;
+})
+
+(define_insn "*sse4_1_pblendvb"
   [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
 	(unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand"  "0")
 		       (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
@@ -9597,9 +9639,14 @@
 				     operands[3], operands[4],
 				     operands[5], operands[6]));
   if (xmm0)
-    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
-				     operands[3], operands[4],
-				     operands[5], operands[6]));
+    {
+      /* Force operand 1 into XMM0.  */
+      rtx sse0 = gen_rtx_REG (V16QImode, XMM0_REG);
+      emit_insn (gen_sse4_2_pcmpestrm (sse0, operands[2],
+				       operands[3], operands[4],
+				       operands[5], operands[6]));
+      emit_insn (gen_rtx_SET (VOIDmode, operands[1], sse0));
+    }
   if (flags && !(ecx || xmm0))
     emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
 					   operands[2], operands[3],
@@ -9722,8 +9769,13 @@
     emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
 				     operands[3], operands[4]));
   if (xmm0)
-    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
-				     operands[3], operands[4]));
+    {
+      /* Force operand 1 into XMM0.  */
+      rtx sse0 = gen_rtx_REG (V16QImode, XMM0_REG);
+      emit_insn (gen_sse4_2_pcmpistrm (sse0, operands[2],
+				       operands[3], operands[4]));
+      emit_insn (gen_rtx_SET (VOIDmode, operands[1], sse0));
+    }
   if (flags && !(ecx || xmm0))
     emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
 					   operands[2], operands[3],
--- gcc/testsuite/gcc.dg/torture/pr40470-1.c.xmm0	2009-06-17 06:45:34.000000000 -0700
+++ gcc/testsuite/gcc.dg/torture/pr40470-1.c	2009-06-17 06:45:06.000000000 -0700
@@ -0,0 +1,22 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-msse4" } */
+#include <nmmintrin.h>
+__m128i load (char *);
+char *
+foo (char *p1, char *p2,
+     int bmsk, __m128i mask1, __m128i mask2)
+{
+  int len = 0;
+  __m128i frag1, frag2;
+  int  cmp_s;
+  if( !p2[0]) return p1;
+  if( !p1[0] ) return NULL;
+  frag2 = load (p2); 
+  frag1 = load (p1);
+  frag2 = _mm_blendv_epi8(frag2, mask2, mask1);
+  frag1 = _mm_blendv_epi8(frag1, mask2, mask1);
+  cmp_s = _mm_cmpistrs(frag2, frag1, 0x0c);
+  if( cmp_s )
+    __asm("bsfl %[bmsk], %[len]" : [len] "=r" (len) : [bmsk] "r" (bmsk) );
+  return p1 + len;
+}
--- gcc/testsuite/gcc.dg/torture/pr40470-2.c.xmm0	2009-06-17 06:45:39.000000000 -0700
+++ gcc/testsuite/gcc.dg/torture/pr40470-2.c	2009-06-17 06:49:46.000000000 -0700
@@ -0,0 +1,27 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-msse4" } */
+#include <nmmintrin.h>
+__m128i load (char *);
+char *
+foo (const unsigned char *s1, const unsigned char *s2,
+     int bmsk, __m128i frag2)
+{
+  int len = 0;
+  char *p1 = (char  *) s1;
+  char *p2 = (char  *) s2;
+  __m128i frag1, fruc, mask;
+  int cmp_c, cmp_s;
+  if( !p2[0]) return (char *) s1;
+  if( !p1[0] ) return NULL;
+  if( p2[1]) frag2 = load (p2); 
+  frag1 = load (p1);
+  fruc = _mm_loadu_si128 ((__m128i *) s1);
+  mask = _mm_cmpistrm(fruc, frag2, 0x44);
+  frag2 = _mm_blendv_epi8(frag2, mask, mask);
+  frag1 = _mm_blendv_epi8(frag1, mask, mask);
+  cmp_c = _mm_cmpistrc(frag2, frag1, 0x0c);
+  cmp_s = _mm_cmpistrs(frag2, frag1, 0x0c);
+  if( cmp_s  & cmp_c  )
+    __asm("bsfl %[bmsk], %[len]" : [len] "=r" (len) : [bmsk] "r" (bmsk) );
+  return p2 + len;
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]