This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, i386]: PR target/14631: Fix pinsrw instruction


Hello!

This patch is based on Richard Henderson's proposed fix for PR target/14631 (http://gcc.gnu.org/ml/gcc-patches/2004-09/msg02450.html). Selector handling is wrong for pinsrw insn.

Proposed fix generates invalid asm code for input register case, because gen_lowpart on register operand generates invalid asm:

pinsrw $0, ax, %xmm1

This code is wrong, as pinsrw expects 32bit registers or 16bit mem address.

Attached patch fixes wrong selector handling for pinsrw insn. It also fixes handling of register operands by using subregs. The testcase from PR target/14631 now passes for all optimization levels. Handling of memory addresses was also checked, and generated code looks as expected:

pinsrw $0, eax, %xmm1

...or...

pinsrw $0, 12(%ebp), %xmm1

Patch was bootstrapped on pentium4-pc-linux-gnu, regtesting is in progress.

2004-12-28  Richard Henderson  <rth@redhat.com>
       Uros Bizjak  <uros@kss-loka.si>

   PR target/14631
   * config/i386/i386.c (ix86_expand_builtin): [IX86_BUILTIN_PINSRW,
   IX86_BUILTIN_PINSRW128]: Fix wrong selector range in error message.
   * config/i386/i386.md (mmx_pinsrw, sse2_pinsrw): Fix selector
   handling.
   (*mmx_pinsrw_reg, *mmx_pinsrw_mem): New patterns.
   (*sse2_pinsrw_reg, *sse2_pinsrw_mem): New patterns.
   * config/i386/i386/predicates.md (const_pow2_1_to_8_operand,
   const_pow2_1_to_128_operand): New predicates.

Uros.
Index: i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.766
diff -u -p -r1.766 i386.c
--- i386.c	28 Dec 2004 05:26:23 -0000	1.766
+++ i386.c	28 Dec 2004 08:28:59 -0000
@@ -13437,7 +13437,7 @@ ix86_expand_builtin (tree exp, rtx targe
       if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
 	{
 	  error ("selector must be an integer constant in the range 0..%i",
-		  fcode == IX86_BUILTIN_PINSRW ? 15:255);
+		  fcode == IX86_BUILTIN_PINSRW ? 3:7);
 	  return const0_rtx;
 	}
       if (target == 0
Index: i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.594
diff -u -p -r1.594 i386.md
--- i386.md	27 Dec 2004 03:50:51 -0000	1.594
+++ i386.md	28 Dec 2004 08:29:03 -0000
@@ -21446,14 +21446,50 @@
 
 ;; MMX insert/extract/shuffle
 
-(define_insn "mmx_pinsrw"
+(define_expand "mmx_pinsrw"
+  [(set (match_operand:V4HI 0 "register_operand" "")
+        (vec_merge:V4HI
+	  (match_operand:V4HI 1 "register_operand" "")
+          (vec_duplicate:V4HI
+            (match_operand:SI 2 "nonimmediate_operand" ""))
+          (match_operand:SI 3 "const_0_to_3_operand" "")))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  if (REG_P (operands[2]))
+    operands[2] = simplify_gen_subreg (HImode, operands[2], SImode, 0);
+  else
+    operands[2] = gen_lowpart (HImode, operands[2]);
+  operands[3] = GEN_INT (1 << INTVAL (operands[3]));
+})
+
+(define_insn "*mmx_pinsrw_reg"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (vec_merge:V4HI
+	  (match_operand:V4HI 1 "register_operand" "0")
+          (vec_duplicate:V4HI
+            (subreg:HI
+	      (match_operand:SI 2 "register_operand" "r") 0))
+          (match_operand:SI 3 "const_pow2_1_to_8_operand" "N")))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "*mmx_pinsrw_mem"
   [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (vec_merge:V4HI (match_operand:V4HI 1 "register_operand" "0")
-			(vec_duplicate:V4HI
-			 (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "rm")))
-			(match_operand:SI 3 "const_0_to_15_operand" "N")))]
+        (vec_merge:V4HI
+	  (match_operand:V4HI 1 "register_operand" "0")
+          (vec_duplicate:V4HI
+            (match_operand:HI 2 "memory_operand" "m"))
+          (match_operand:SI 3 "const_pow2_1_to_8_operand" "N")))]
   "TARGET_SSE || TARGET_3DNOW_A"
-  "pinsrw\t{%3, %2, %0|%0, %2, %3}"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+}
   [(set_attr "type" "mmxcvt")
    (set_attr "mode" "DI")])
 
@@ -23141,15 +23177,50 @@
 
 ;; MMX insert/extract/shuffle
 
-(define_insn "sse2_pinsrw"
+(define_expand "sse2_pinsrw"
+  [(set (match_operand:V8HI 0 "register_operand" "")
+        (vec_merge:V8HI
+	  (match_operand:V8HI 1 "register_operand" "")
+          (vec_duplicate:V8HI
+            (match_operand:SI 2 "nonimmediate_operand" ""))
+          (match_operand:SI 3 "const_0_to_7_operand" "")))]
+  "TARGET_SSE2"
+{
+  if (REG_P (operands[2]))
+    operands[2] = simplify_gen_subreg (HImode, operands[2], SImode, 0);
+  else
+    operands[2] = gen_lowpart (HImode, operands[2]);
+  operands[3] = GEN_INT (1 << INTVAL (operands[3]));
+})
+
+(define_insn "*sse2_pinsrw_reg"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+        (vec_merge:V8HI
+	  (match_operand:V8HI 1 "register_operand" "0")
+          (vec_duplicate:V8HI
+            (subreg:HI
+	      (match_operand:SI 2 "register_operand" "r") 0))
+          (match_operand:SI 3 "const_pow2_1_to_128_operand" "N")))]
+  "TARGET_SSE2"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse2_pinsrw_mem"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (vec_merge:V8HI (match_operand:V8HI 1 "register_operand" "0")
-			(vec_duplicate:V8HI
-			 (truncate:HI
-			   (match_operand:SI 2 "nonimmediate_operand" "rm")))
-			(match_operand:SI 3 "const_0_to_255_operand" "N")))]
+        (vec_merge:V8HI
+	  (match_operand:V8HI 1 "register_operand" "0")
+          (vec_duplicate:V8HI
+            (match_operand:HI 2 "memory_operand" "m"))
+          (match_operand:SI 3 "const_pow2_1_to_128_operand" "N")))]
   "TARGET_SSE2"
-  "pinsrw\t{%3, %2, %0|%0, %2, %3}"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+}
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "TI")])
 
Index: predicates.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/predicates.md,v
retrieving revision 1.10
diff -u -p -r1.10 predicates.md
--- predicates.md	23 Dec 2004 06:18:22 -0000	1.10
+++ predicates.md	28 Dec 2004 08:29:03 -0000
@@ -546,6 +546,22 @@
   (and (match_code "const_int")
        (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 255")))
 
+;; Match exactly one bit in 4-bit mask.
+(define_predicate "const_pow2_1_to_8_operand"
+  (match_code "const_int")
+{
+  unsigned int log = exact_log2 (INTVAL (op));
+  return log <= 3;
+})
+
+;; Match exactly one bit in 8-bit mask.
+(define_predicate "const_pow2_1_to_128_operand"
+  (match_code "const_int")
+{
+  unsigned int log = exact_log2 (INTVAL (op));
+  return log <= 7;
+})
+
 ;; True if this is a constant appropriate for an increment or decrement.
 (define_predicate "incdec_operand"
   (match_code "const_int")

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]