This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [RFC] try to generate FP and/or/xor instructions for SSE
- From: Paolo Bonzini <bonzini at gnu dot org>
- Cc: tbp <tbptbp at gmail dot com>, Ross Ridge <rridge at csclub dot uwaterloo dot ca>, GCC Patches <gcc-patches at gcc dot gnu dot org>, Uros Bizjak <ubizjak at gmail dot com>
- Date: Thu, 23 Aug 2007 15:41:53 +0200
- Subject: Re: [RFC] try to generate FP and/or/xor instructions for SSE
- Newsgroups: gmane.comp.gcc.patches
- References: <20070823013503.426FE74B95@caffeine.csclub.uwaterloo.ca> <46CD273E.6060009@gnu.org> <46CD705E.90701@gmail.com> <46CD72E0.3000005@lu.unisi.ch> <46CD7ACA.8050904@gmail.com> <46CD8E69.4060402@gnu.org>
Paolo Bonzini wrote:
0000000000400a40 <bar(float __vector, float __vector, float __vector)>:
400a40: por %xmm2,%xmm1
400a44: addps %xmm1,%xmm0
400a47: retq
I'm surely not qualified to argue about typing, but you'd need a
rather strong distortion field to not characterize that as a regression.
This patch tries to prefer ps/pd variants if at least one argument is a
vector float or vector double or __float128 respectively. I'm not sure
it is the right design, as it may be a little too aggressive.
I didn't bootstrap/test it but it fixes the XFAILed testcase I added
this morning.
Uros, what do you think?
Not my day.
Paolo
2007-08-23 Paolo Bonzini <bonzini@gnu.org>
* config/i386/predicates.md (sse_subreg_operand): New.
* config/i386/sse.md (andv4sf3, iorv4sf3, xorv4sf3, andv2df3,
iorv2df3, xorv2df3, andtf3, iortf3, xortf3, and<mode>3, ior<mode>3,
xor<mode>3): Use ix86_expand_binary_operator.
(andtf3, iortf3, nandtf3, xortf3): Use pd version.
* config/i386/i386-protos.h (ix86_fixup_binary_operands): Remove.
* config/i386/i386.c (ix86_fixup_binary_operands): Make static.
Return new machine mode. Try converting the operation to FP
if there is an SSE subreg-ed operand.
(ix86_expand_binop_builtin): Adjust calling convention.
(ix86_fixup_binary_operands_no_copy): Check that mode does not change.
(ix86_expand_binary_operator): Don't clobber flags for non-int modes.
Index: config/i386/predicates.md
===================================================================
--- config/i386/predicates.md (revision 127728)
+++ config/i386/predicates.md (working copy)
@@ -752,6 +752,17 @@
return 0;
})
+(define_special_predicate "sse_subreg_operand"
+ (match_code "subreg")
+{
+ enum machine_mode opmode = GET_MODE (SUBREG_REG (op));
+ if (opmode == TFmode)
+ opmode = V2DFmode;
+
+ return ((opmode == V4SFmode || opmode == V2DFmode)
+ && (mode == VOIDmode || opmode == mode));
+})
+
;; Return true if OP is a register or a zero.
(define_predicate "reg_or_0_operand"
(ior (match_operand 0 "register_operand")
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md (revision 127735)
+++ config/i386/sse.md (working copy)
@@ -917,7 +917,7 @@
(and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
(match_operand:V4SF 2 "nonimmediate_operand" "")))]
"TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
+ "ix86_expand_binary_operator (AND, V4SFmode, operands); DONE;")
(define_insn "*andv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -942,7 +942,7 @@
(ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
(match_operand:V4SF 2 "nonimmediate_operand" "")))]
"TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
+ "ix86_expand_binary_operator (IOR, V4SFmode, operands); DONE;")
(define_insn "*iorv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -958,7 +958,7 @@
(xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
(match_operand:V4SF 2 "nonimmediate_operand" "")))]
"TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
+ "ix86_expand_binary_operator (XOR, V4SFmode, operands); DONE;")
(define_insn "*xorv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -1942,7 +1942,7 @@
(and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
(match_operand:V2DF 2 "nonimmediate_operand" "")))]
"TARGET_SSE2"
- "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
+ "ix86_expand_binary_operator (AND, V2DFmode, operands); DONE;")
(define_insn "*andv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
@@ -1967,7 +1967,7 @@
(ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
(match_operand:V2DF 2 "nonimmediate_operand" "")))]
"TARGET_SSE2"
- "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
+ "ix86_expand_binary_operator (IOR, V2DFmode, operands); DONE;")
(define_insn "*iorv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
@@ -1983,7 +1983,7 @@
(xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
(match_operand:V2DF 2 "nonimmediate_operand" "")))]
"TARGET_SSE2"
- "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
+ "ix86_expand_binary_operator (XOR, V2DFmode, operands); DONE;")
(define_insn "*xorv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
@@ -3774,7 +3774,7 @@
(and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
(match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
"TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
+ "ix86_expand_binary_operator (AND, <MODE>mode, operands); DONE;")
(define_insn "*sse_and<mode>3"
[(set (match_operand:SSEMODEI 0 "register_operand" "=x")
@@ -3824,7 +3824,7 @@
(and:TF (match_operand:TF 1 "nonimmediate_operand" "")
(match_operand:TF 2 "nonimmediate_operand" "")))]
"TARGET_64BIT"
- "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
+ "ix86_expand_binary_operator (AND, TFmode, operands); DONE;")
(define_insn "*andtf3"
[(set (match_operand:TF 0 "register_operand" "=x")
@@ -3832,7 +3832,7 @@
(match_operand:TF 1 "nonimmediate_operand" "%0")
(match_operand:TF 2 "nonimmediate_operand" "xm")))]
"TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
- "pand\t{%2, %0|%0, %2}"
+ "andpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
@@ -3843,7 +3843,7 @@
(not:TF (match_operand:TF 1 "register_operand" "0"))
(match_operand:TF 2 "nonimmediate_operand" "xm")))]
"TARGET_64BIT"
- "pandn\t{%2, %0|%0, %2}"
+ "andnpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
@@ -3853,7 +3853,7 @@
(ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
(match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
"TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
+ "ix86_expand_binary_operator (IOR, <MODE>mode, operands); DONE;")
(define_insn "*sse_ior<mode>3"
[(set (match_operand:SSEMODEI 0 "register_operand" "=x")
@@ -3882,7 +3882,7 @@
(ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
(match_operand:TF 2 "nonimmediate_operand" "")))]
"TARGET_64BIT"
- "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
+ "ix86_expand_binary_operator (IOR, TFmode, operands); DONE;")
(define_insn "*iortf3"
[(set (match_operand:TF 0 "register_operand" "=x")
@@ -3890,7 +3890,7 @@
(match_operand:TF 1 "nonimmediate_operand" "%0")
(match_operand:TF 2 "nonimmediate_operand" "xm")))]
"TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
- "por\t{%2, %0|%0, %2}"
+ "orpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
@@ -3900,7 +3900,7 @@
(xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
(match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
"TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
+ "ix86_expand_binary_operator (XOR, <MODE>mode, operands); DONE;")
(define_insn "*sse_xor<mode>3"
[(set (match_operand:SSEMODEI 0 "register_operand" "=x")
@@ -3929,7 +3929,7 @@
(xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
(match_operand:TF 2 "nonimmediate_operand" "")))]
"TARGET_64BIT"
- "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
+ "ix86_expand_binary_operator (XOR, TFmode, operands); DONE;")
(define_insn "*xortf3"
[(set (match_operand:TF 0 "register_operand" "=x")
@@ -3937,7 +3937,7 @@
(match_operand:TF 1 "nonimmediate_operand" "%0")
(match_operand:TF 2 "nonimmediate_operand" "xm")))]
"TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
- "pxor\t{%2, %0|%0, %2}"
+ "xorpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
Index: config/i386/i386-protos.h
===================================================================
--- config/i386/i386-protos.h (revision 127728)
+++ config/i386/i386-protos.h (working copy)
@@ -79,8 +79,6 @@ extern void ix86_expand_move (enum machi
extern void ix86_expand_vector_move (enum machine_mode, rtx[]);
extern void ix86_expand_vector_move_misalign (enum machine_mode, rtx[]);
extern void ix86_expand_push (enum machine_mode, rtx);
-extern rtx ix86_fixup_binary_operands (enum rtx_code,
- enum machine_mode, rtx[]);
extern void ix86_fixup_binary_operands_no_copy (enum rtx_code,
enum machine_mode, rtx[]);
extern void ix86_expand_binary_operator (enum rtx_code,
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 127728)
+++ config/i386/i386.c (working copy)
@@ -10219,7 +10219,7 @@ ix86_swap_binary_operands_p (enum rtx_co
destination to use for the operation. If different from the true
destination in operands[0], a copy operation will be required. */
-rtx
+static enum machine_mode
ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
rtx operands[])
{
@@ -10261,9 +10261,26 @@ ix86_fixup_binary_operands (enum rtx_cod
if (MEM_P (src1) && !rtx_equal_p (dst, src1))
src1 = force_reg (mode, src1);
+ /* Prefer V2DF or V4SF bitwise operations if one of the operands is in a
+ vector floating-point mode. */
+ if ((code == AND || code == IOR || code == XOR)
+ && (sse_subreg_operand (src1, VOIDmode)
+ || sse_subreg_operand (src2, VOIDmode)))
+ {
+ if (sse_subreg_operand (src1, V4SFmode)
+ || sse_subreg_operand (src2, V4SFmode))
+ mode = V4SFmode;
+ else
+ mode = V2DFmode;
+ dst = gen_lowpart (mode, dst);
+ src1 = gen_lowpart (mode, src1);
+ src2 = gen_lowpart (mode, src2);
+ }
+
+ operands[0] = dst;
operands[1] = src1;
operands[2] = src2;
- return dst;
+ return mode;
}
/* Similarly, but assume that the destination has already been
@@ -10273,7 +10290,9 @@ void
ix86_fixup_binary_operands_no_copy (enum rtx_code code,
enum machine_mode mode, rtx operands[])
{
- rtx dst = ix86_fixup_binary_operands (code, mode, operands);
+ rtx dst = operands[0];
+ enum machine_mode opmode = ix86_fixup_binary_operands (code, mode, operands);
+ gcc_assert (opmode == mode);
gcc_assert (dst == operands[0]);
}
@@ -10287,7 +10306,8 @@ ix86_expand_binary_operator (enum rtx_co
{
rtx src1, src2, dst, op, clob;
- dst = ix86_fixup_binary_operands (code, mode, operands);
+ mode = ix86_fixup_binary_operands (code, mode, operands);
+ dst = operands[0];
src1 = operands[1];
src2 = operands[2];
@@ -10301,11 +10321,13 @@ ix86_expand_binary_operator (enum rtx_co
gcc_assert (code == PLUS);
emit_insn (op);
}
- else
+ else if (GET_MODE_CLASS (mode) == MODE_INT)
{
clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
}
+ else /* SSE */
+ emit_insn (op);
/* Fix up the destination if needed. */
if (dst != operands[0])
@@ -18556,7 +18578,8 @@ ix86_expand_binop_builtin (enum insn_cod
if (tmode == mode0 && tmode == mode1)
{
- target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
+ ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
+ target = xops[0];
op0 = xops[1];
op1 = xops[2];
}