This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC] try to generate FP and/or/xor instructions for SSE


Paolo Bonzini wrote:

0000000000400a40 <bar(float __vector, float __vector, float __vector)>:
  400a40:       por    %xmm2,%xmm1
  400a44:       addps  %xmm1,%xmm0
  400a47:       retq

I'm surely not qualified to argue about typing, but you'd need a rather strong distortion field to not characterize that as a regression.

This patch tries to prefer ps/pd variants if at least one argument is a vector float or vector double or __float128 respectively. I'm not sure it is the right design, as it may be a little too aggressive.


I didn't bootstrap/test it but it fixes the XFAILed testcase I added this morning.

Uros, what do you think?

Not my day.


Paolo
2007-08-23  Paolo Bonzini  <bonzini@gnu.org>

	* config/i386/predicates.md (sse_subreg_operand): New.
	* config/i386/sse.md (andv4sf3, iorv4sf3, xorv4sf3, andv2df3,
	iorv2df3, xorv2df3, andtf3, iortf3, xortf3, and<mode>3, ior<mode>3,
	xor<mode>3): Use ix86_expand_binary_operator.
	(andtf3, iortf3, nandtf3, xortf3): Use pd version.
	* config/i386/i386-protos.h (ix86_fixup_binary_operands): Remove.
	* config/i386/i386.c (ix86_fixup_binary_operands): Make static.
	Return new machine mode.  Try converting the operation to FP
	if there is an SSE subreg-ed operand.
	(ix86_expand_binop_builtin): Adjust calling convention.
	(ix86_fixup_binary_operands_no_copy): Check that mode does not change.
	(ix86_expand_binary_operator): Don't clobber flags for non-int modes.

Index: config/i386/predicates.md
===================================================================
--- config/i386/predicates.md	(revision 127728)
+++ config/i386/predicates.md	(working copy)
@@ -752,6 +752,17 @@
   return 0;
 })
 
+(define_special_predicate "sse_subreg_operand"
+  (match_code "subreg")
+{
+  enum machine_mode opmode = GET_MODE (SUBREG_REG (op));
+  if (opmode == TFmode)
+    opmode = V2DFmode;
+
+  return ((opmode == V4SFmode || opmode == V2DFmode)
+          && (mode == VOIDmode || opmode == mode));
+})
+
 ;; Return true if OP is a register or a zero.
 (define_predicate "reg_or_0_operand"
   (ior (match_operand 0 "register_operand")
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 127735)
+++ config/i386/sse.md	(working copy)
@@ -917,7 +917,7 @@
 	(and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
 		  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
   "TARGET_SSE"
-  "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
+  "ix86_expand_binary_operator (AND, V4SFmode, operands); DONE;")
 
 (define_insn "*andv4sf3"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -942,7 +942,7 @@
 	(ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
 		  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
   "TARGET_SSE"
-  "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
+  "ix86_expand_binary_operator (IOR, V4SFmode, operands); DONE;")
 
 (define_insn "*iorv4sf3"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -958,7 +958,7 @@
 	(xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
 		  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
   "TARGET_SSE"
-  "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
+  "ix86_expand_binary_operator (XOR, V4SFmode, operands); DONE;")
 
 (define_insn "*xorv4sf3"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -1942,7 +1942,7 @@
 	(and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
 		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
   "TARGET_SSE2"
-  "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
+  "ix86_expand_binary_operator (AND, V2DFmode, operands); DONE;")
 
 (define_insn "*andv2df3"
   [(set (match_operand:V2DF 0 "register_operand" "=x")
@@ -1967,7 +1967,7 @@
 	(ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
 		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
   "TARGET_SSE2"
-  "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
+  "ix86_expand_binary_operator (IOR, V2DFmode, operands); DONE;")
 
 (define_insn "*iorv2df3"
   [(set (match_operand:V2DF 0 "register_operand" "=x")
@@ -1983,7 +1983,7 @@
 	(xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
 		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
   "TARGET_SSE2"
-  "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
+  "ix86_expand_binary_operator (XOR, V2DFmode, operands); DONE;")
 
 (define_insn "*xorv2df3"
   [(set (match_operand:V2DF 0 "register_operand" "=x")
@@ -3774,7 +3774,7 @@
 	(and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
 		      (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
   "TARGET_SSE"
-  "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
+  "ix86_expand_binary_operator (AND, <MODE>mode, operands); DONE;")
 
 (define_insn "*sse_and<mode>3"
   [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
@@ -3824,7 +3824,7 @@
 	(and:TF (match_operand:TF 1 "nonimmediate_operand" "")
 		(match_operand:TF 2 "nonimmediate_operand" "")))]
   "TARGET_64BIT"
-  "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
+  "ix86_expand_binary_operator (AND, TFmode, operands); DONE;")
 
 (define_insn "*andtf3"
   [(set (match_operand:TF 0 "register_operand" "=x")
@@ -3832,7 +3832,7 @@
 	  (match_operand:TF 1 "nonimmediate_operand" "%0")
 	  (match_operand:TF 2 "nonimmediate_operand" "xm")))]
   "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
-  "pand\t{%2, %0|%0, %2}"
+  "andpd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
@@ -3843,7 +3843,7 @@
 	  (not:TF (match_operand:TF 1 "register_operand" "0"))
 	  (match_operand:TF 2 "nonimmediate_operand" "xm")))]
   "TARGET_64BIT"
-  "pandn\t{%2, %0|%0, %2}"
+  "andnpd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
@@ -3853,7 +3853,7 @@
 	(ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
 		      (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
   "TARGET_SSE"
-  "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
+  "ix86_expand_binary_operator (IOR, <MODE>mode, operands); DONE;")
 
 (define_insn "*sse_ior<mode>3"
   [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
@@ -3882,7 +3882,7 @@
 	(ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
 		(match_operand:TF 2 "nonimmediate_operand" "")))]
   "TARGET_64BIT"
-  "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
+  "ix86_expand_binary_operator (IOR, TFmode, operands); DONE;")
 
 (define_insn "*iortf3"
   [(set (match_operand:TF 0 "register_operand" "=x")
@@ -3890,7 +3890,7 @@
 	  (match_operand:TF 1 "nonimmediate_operand" "%0")
 	  (match_operand:TF 2 "nonimmediate_operand" "xm")))]
   "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
-  "por\t{%2, %0|%0, %2}"
+  "orpd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
@@ -3900,7 +3900,7 @@
 	(xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
 		      (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
   "TARGET_SSE"
-  "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
+  "ix86_expand_binary_operator (XOR, <MODE>mode, operands); DONE;")
 
 (define_insn "*sse_xor<mode>3"
   [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
@@ -3929,7 +3929,7 @@
 	(xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
 		(match_operand:TF 2 "nonimmediate_operand" "")))]
   "TARGET_64BIT"
-  "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
+  "ix86_expand_binary_operator (XOR, TFmode, operands); DONE;")
 
 (define_insn "*xortf3"
   [(set (match_operand:TF 0 "register_operand" "=x")
@@ -3937,7 +3937,7 @@
 	  (match_operand:TF 1 "nonimmediate_operand" "%0")
 	  (match_operand:TF 2 "nonimmediate_operand" "xm")))]
   "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
-  "pxor\t{%2, %0|%0, %2}"
+  "xorpd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
Index: config/i386/i386-protos.h
===================================================================
--- config/i386/i386-protos.h	(revision 127728)
+++ config/i386/i386-protos.h	(working copy)
@@ -79,8 +79,6 @@ extern void ix86_expand_move (enum machi
 extern void ix86_expand_vector_move (enum machine_mode, rtx[]);
 extern void ix86_expand_vector_move_misalign (enum machine_mode, rtx[]);
 extern void ix86_expand_push (enum machine_mode, rtx);
-extern rtx ix86_fixup_binary_operands (enum rtx_code,
-				       enum machine_mode, rtx[]);
 extern void ix86_fixup_binary_operands_no_copy (enum rtx_code,
 						enum machine_mode, rtx[]);
 extern void ix86_expand_binary_operator (enum rtx_code,
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 127728)
+++ config/i386/i386.c	(working copy)
@@ -10219,7 +10219,7 @@ ix86_swap_binary_operands_p (enum rtx_co
    destination to use for the operation.  If different from the true
    destination in operands[0], a copy operation will be required.  */
 
-rtx
+static enum machine_mode
 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
 			    rtx operands[])
 {
@@ -10261,9 +10261,26 @@ ix86_fixup_binary_operands (enum rtx_cod
   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
     src1 = force_reg (mode, src1);
 
+  /* Prefer V2DF or V4SF bitwise operations if one of the operands is in a
+     vector floating-point mode.  */
+  if ((code == AND || code == IOR || code == XOR)
+      && (sse_subreg_operand (src1, VOIDmode)
+          || sse_subreg_operand (src2, VOIDmode)))
+    {
+      if (sse_subreg_operand (src1, V4SFmode)
+	  || sse_subreg_operand (src2, V4SFmode))
+	mode = V4SFmode;
+      else
+	mode = V2DFmode;
+      dst = gen_lowpart (mode, dst);
+      src1 = gen_lowpart (mode, src1);
+      src2 = gen_lowpart (mode, src2);
+    }
+
+  operands[0] = dst;
   operands[1] = src1;
   operands[2] = src2;
-  return dst;
+  return mode;
 }
 
 /* Similarly, but assume that the destination has already been
@@ -10273,7 +10290,9 @@ void
 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
 				    enum machine_mode mode, rtx operands[])
 {
-  rtx dst = ix86_fixup_binary_operands (code, mode, operands);
+  rtx dst = operands[0];
+  enum machine_mode opmode = ix86_fixup_binary_operands (code, mode, operands);
+  gcc_assert (opmode == mode);
   gcc_assert (dst == operands[0]);
 }
 
@@ -10287,7 +10306,8 @@ ix86_expand_binary_operator (enum rtx_co
 {
   rtx src1, src2, dst, op, clob;
 
-  dst = ix86_fixup_binary_operands (code, mode, operands);
+  mode = ix86_fixup_binary_operands (code, mode, operands);
+  dst = operands[0];
   src1 = operands[1];
   src2 = operands[2];
 
@@ -10301,11 +10321,13 @@ ix86_expand_binary_operator (enum rtx_co
       gcc_assert (code == PLUS);
       emit_insn (op);
     }
-  else
+  else if (GET_MODE_CLASS (mode) == MODE_INT)
     {
       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
     }
+  else /* SSE */
+    emit_insn (op);
 
   /* Fix up the destination if needed.  */
   if (dst != operands[0])
@@ -18556,7 +18578,8 @@ ix86_expand_binop_builtin (enum insn_cod
 
   if (tmode == mode0 && tmode == mode1)
     {
-      target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
+      ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
+      target = xops[0];
       op0 = xops[1];
       op1 = xops[2];
     }

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]