This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, i386]: Committed; Use "z" register constraint in SSE4.1 variable blend instructions


Hello!

SSE4.1 variable blend instructions can now benefit from "z" register constraint, too. Attached patch doesn't force third argument into xmm0, but let reload do its job. Additionally, a safe_vector_operand() guard was added for VECTOR_MODE_P operands.

Also, there is no need to check inputs with "register_operand" if insn predicate is already "register_operand".

Patch was bootstrapped on x86_64-pc-linux-gnu and regression tested for all default languages. Patch is committed to SVN.

2007-06-02 Uros Bizjak <ubizjak@gmail.com>

* config/i386/i386.md ("sse4_1_blendvpd"): Require "z" class XMM
register for operand[3]. Adjust asm template.
("sse4_1_blendvpd"): Ditto.
("sse4_1_pblendvb"): Ditto.
* config/i386/i386.c (ix86_expand_sse_4_operands_builtin): Call
safe_vector_operand() if input operand is VECTOR_MODE_P operand. Do not
force operands[3] into xmm0 register for variable blend instructions.
(ix86_expand_sse_pcmpestr): Do not check operands for
"register_operand", when insn operand predicate is "register_operand".
(ix86_expand_sse_pcmpistr): Ditto.


Uros.
Index: sse.md
===================================================================
--- sse.md	(revision 125279)
+++ sse.md	(working copy)
@@ -5844,10 +5844,10 @@
   [(set (match_operand:V2DF 0 "register_operand" "=x")
 	(unspec:V2DF [(match_operand:V2DF 1 "register_operand"  "0")
 		      (match_operand:V2DF 2 "nonimmediate_operand" "xm")
-		      (reg:V2DF 21)]
+		      (match_operand:V2DF 3 "register_operand" "z")]
 		     UNSPEC_BLENDV))]
   "TARGET_SSE4_1"
-  "blendvpd\t{%%xmm0, %2, %0|%0, %2, %%xmm0}"
+  "blendvpd\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "V2DF")])
@@ -5856,10 +5856,10 @@
   [(set (match_operand:V4SF 0 "register_operand" "=x")
 	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
 		      (match_operand:V4SF 2 "nonimmediate_operand" "xm")
-		      (reg:V4SF 21)]
+		      (match_operand:V4SF 3 "register_operand" "z")]
 		     UNSPEC_BLENDV))]
   "TARGET_SSE4_1"
-  "blendvps\t{%%xmm0, %2, %0|%0, %2, %%xmm0}"
+  "blendvps\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "V4SF")])
@@ -5927,10 +5927,10 @@
   [(set (match_operand:V16QI 0 "register_operand" "=x")
 	(unspec:V16QI [(match_operand:V16QI 1 "register_operand"  "0")
 		       (match_operand:V16QI 2 "nonimmediate_operand" "xm")
-		       (reg:V16QI 21)]
+		       (match_operand:V16QI 3 "register_operand" "z")]
 		      UNSPEC_BLENDV))]
   "TARGET_SSE4_1"
-  "pblendvb\t{%%xmm0, %2, %0|%0, %2, %%xmm0}"
+  "pblendvb\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
Index: i386.c
===================================================================
--- i386.c	(revision 125279)
+++ i386.c	(working copy)
@@ -16929,8 +16929,7 @@ static const struct builtin_description 
   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, 0, 0 },
 };
 
-/* SSE builtins with 3 arguments and the last argument must be a 8 bit
-   constant or xmm0.  */
+/* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0.  */
 static const struct builtin_description bdesc_sse_3arg[] =
 {
   /* SSE4.1 */
@@ -18279,51 +18278,48 @@ ix86_expand_sse_4_operands_builtin (enum
   rtx op1 = expand_normal (arg1);
   rtx op2 = expand_normal (arg2);
   enum machine_mode tmode = insn_data[icode].operand[0].mode;
-  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
-  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
-  enum machine_mode mode2;
-  rtx xmm0;
-
-  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
-    op0 = copy_to_mode_reg (mode0, op0);
-  if ((optimize && !register_operand (op1, mode1))
-      || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
-    op1 = copy_to_mode_reg (mode1, op1);
+  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode2 = insn_data[icode].operand[2].mode;
+  enum machine_mode mode3 = insn_data[icode].operand[3].mode;
 
-  switch (icode)
-    {
-    case CODE_FOR_sse4_1_blendvpd:
-    case CODE_FOR_sse4_1_blendvps:
-    case CODE_FOR_sse4_1_pblendvb:
-      /* The third argument of variable blends must be xmm0.  */
-      xmm0 = gen_rtx_REG (tmode, FIRST_SSE_REG);
-      emit_move_insn (xmm0, op2);
-      op2 = xmm0;
-      break;
-    default:
-      mode2 = insn_data[icode].operand[2].mode;
-      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
-	{
-	  switch (icode)
-	    {
-	    case CODE_FOR_sse4_1_roundsd:
-	    case CODE_FOR_sse4_1_roundss:
-	      error ("the third argument must be a 4-bit immediate");
-	      break;
-	    default:
-	      error ("the third argument must be a 8-bit immediate");
-	      break;
-	    }
-	  return const0_rtx;
-	}
-      break;
-    }
+  if (VECTOR_MODE_P (mode1))
+    op0 = safe_vector_operand (op0, mode1);
+  if (VECTOR_MODE_P (mode2))
+    op1 = safe_vector_operand (op1, mode2);
+  if (VECTOR_MODE_P (mode3))
+    op2 = safe_vector_operand (op2, mode3);
 
   if (optimize
       || target == 0
       || GET_MODE (target) != tmode
       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
     target = gen_reg_rtx (tmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+    op0 = copy_to_mode_reg (mode1, op0);
+  if ((optimize && !register_operand (op1, mode2))
+      || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
+    op1 = copy_to_mode_reg (mode2, op1);
+
+  if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+    switch (icode)
+      {
+      case CODE_FOR_sse4_1_blendvpd:
+      case CODE_FOR_sse4_1_blendvps:
+      case CODE_FOR_sse4_1_pblendvb:
+	op2 = copy_to_mode_reg (mode3, op2);
+	break;
+
+      case CODE_FOR_sse4_1_roundsd:
+      case CODE_FOR_sse4_1_roundss:
+	error ("the third argument must be a 4-bit immediate");
+	return const0_rtx;
+
+      default:
+	error ("the third argument must be an 8-bit immediate");
+	return const0_rtx;
+      }
+
   pat = GEN_FCN (icode) (target, op0, op1, op2);
   if (! pat)
     return 0;
@@ -18732,17 +18728,14 @@ ix86_expand_sse_pcmpestr (const struct b
   if (VECTOR_MODE_P (modev4))
     op2 = safe_vector_operand (op2, modev4);
 
-  if ((optimize && !register_operand (op0, modev2))
-      || !(*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+  if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
     op0 = copy_to_mode_reg (modev2, op0);
-  if ((optimize && !register_operand (op1, modei3))
-      || !(*insn_data[d->icode].operand[3].predicate) (op1, modei3))
+  if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
     op1 = copy_to_mode_reg (modei3, op1);
   if ((optimize && !register_operand (op2, modev4))
       || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
     op2 = copy_to_mode_reg (modev4, op2);
-  if ((optimize && !register_operand (op3, modei5))
-      || !(*insn_data[d->icode].operand[5].predicate) (op3, modei5))
+  if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
     op3 = copy_to_mode_reg (modei5, op3);
 
   if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
@@ -18833,8 +18826,7 @@ ix86_expand_sse_pcmpistr (const struct b
   if (VECTOR_MODE_P (modev3))
     op1 = safe_vector_operand (op1, modev3);
 
-  if ((optimize && !register_operand (op0, modev2))
-      || !(*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+  if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
     op0 = copy_to_mode_reg (modev2, op0);
   if ((optimize && !register_operand (op1, modev3))
       || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]