This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
revap i386 sse fp conditional move
- From: Richard Henderson <rth at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 12 Apr 2005 22:06:40 -0700
- Subject: revap i386 sse fp conditional move
In preparation for implemeting vcond, I wanted to split out some code
that I want to share. But I also wanted to make sure I wasn't harming
our optimization of conditional moves. The new code expands to logical
operations before reload rather than after, which means we have to do
much less work overall (the post-reload splitter had to do its own local
register allocation).
Bootstrapped and tested on i686 and amd64.
I ran the povray benchmark on a pentium-m. Which gave somewhat unbelievable
results (new code ~45% faster), which isn't suggested by diffs of the
assembly. Either I've done something wrong that isn't immediately obvious,
or the new code Just Gets Lucky with the register allocator in some inner
loop, or (since this was my laptop) some other system interaction meant
that things weren't quite as idle as they seemed.
Anyway, committed. Bitch if you find the bug.
r~
* config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Split ...
(ix86_expand_sse_fp_minmax): ... from ...
(ix86_expand_fp_movcc): ... here.
(ix86_expand_sse_movcc): Rewrite from ix86_split_sse_movcc.
* config/i386/i386-protos.h: Update.
* config/i386/i386.md (UNSPEC_IEEE_MIN, UNSPEC_IEEE_MAX): New.
(sse_setccsf, sse_setccdf): Allow before reload.
(movsfcc_1_sse_min, movsfcc_1_sse_max, movsfcc_1_sse): Remove.
(movdfcc_1_sse_min, movdfcc_1_sse_max, movdfcc_1_sse): Remove.
(ieee_sminsf3, ieee_smaxsf3, ieee_smindf3, ieee_smaxdf3): New.
* config/i386/sse.md (andsf3, nandsf3, iorsf3, xorsf3): New.
(anddf3, nanddf3, iordf3, xordf3): New.
Index: config/i386/i386-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386-protos.h,v
retrieving revision 1.135
diff -u -p -d -r1.135 i386-protos.h
--- config/i386/i386-protos.h 5 Apr 2005 20:20:29 -0000 1.135
+++ config/i386/i386-protos.h 13 Apr 2005 04:53:07 -0000
@@ -150,7 +150,6 @@ extern void ix86_expand_branch (enum rtx
extern int ix86_expand_setcc (enum rtx_code, rtx);
extern int ix86_expand_int_movcc (rtx[]);
extern int ix86_expand_fp_movcc (rtx[]);
-extern void ix86_split_sse_movcc (rtx[]);
extern int ix86_expand_int_addcc (rtx[]);
extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
extern void x86_initialize_trampoline (rtx, rtx, rtx);
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.808
diff -u -p -d -r1.808 i386.c
--- config/i386/i386.c 12 Apr 2005 01:46:28 -0000 1.808
+++ config/i386/i386.c 13 Apr 2005 04:53:10 -0000
@@ -10025,6 +10025,180 @@ ix86_expand_int_movcc (rtx operands[])
return 1; /* DONE */
}
+/* Swap, force into registers, or otherwise massage the two operands
+ to an sse comparison with a mask result. Thus we differ a bit from
+ ix86_prepare_fp_compare_args which expects to produce a flags result.
+
+ The DEST operand exists to help determine whether to commute commutative
+ operators. The POP0/POP1 operands are updated in place. The new
+ comparison code is returned, or UNKNOWN if not implementable. */
+
+static enum rtx_code
+ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
+ rtx *pop0, rtx *pop1)
+{
+ rtx tmp;
+
+ switch (code)
+ {
+ case LTGT:
+ case UNEQ:
+ /* We have no LTGT as an operator. We could implement it with
+ NE & ORDERED, but this requires an extra temporary. It's
+ not clear that it's worth it. */
+ return UNKNOWN;
+
+ case LT:
+ case LE:
+ case UNGT:
+ case UNGE:
+ /* These are supported directly. */
+ break;
+
+ case EQ:
+ case NE:
+ case UNORDERED:
+ case ORDERED:
+ /* For commutative operators, try to canonicalize the destination
+ operand to be first in the comparison - this helps reload to
+ avoid extra moves. */
+ if (!dest || !rtx_equal_p (dest, *pop1))
+ break;
+ /* FALLTHRU */
+
+ case GE:
+ case GT:
+ case UNLE:
+ case UNLT:
+ /* These are not supported directly. Swap the comparison operands
+ to transform into something that is supported. */
+ tmp = *pop0;
+ *pop0 = *pop1;
+ *pop1 = tmp;
+ code = swap_condition (code);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return code;
+}
+
+/* Detect conditional moves that exactly match min/max operational
+ semantics. Note that this is IEEE safe, as long as we don't
+ interchange the operands.
+
+ Returns FALSE if this conditional move doesn't match a MIN/MAX,
+ and TRUE if the operation is successful and instructions are emitted. */
+
+static bool
+ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
+ rtx cmp_op1, rtx if_true, rtx if_false)
+{
+ enum machine_mode mode;
+ bool is_min;
+ rtx tmp;
+
+ if (code == LT)
+ ;
+ else if (code == UNGE)
+ {
+ tmp = if_true;
+ if_true = if_false;
+ if_false = tmp;
+ }
+ else
+ return false;
+
+ if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
+ is_min = true;
+ else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
+ is_min = false;
+ else
+ return false;
+
+ mode = GET_MODE (dest);
+
+ /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
+ but MODE may be a vector mode and thus not appropriate. */
+ if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
+ {
+ int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
+ rtvec v;
+
+ if_true = force_reg (mode, if_true);
+ v = gen_rtvec (2, if_true, if_false);
+ tmp = gen_rtx_UNSPEC (mode, v, u);
+ }
+ else
+ {
+ code = is_min ? SMIN : SMAX;
+ tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+ return true;
+}
+
+static void
+ix86_expand_sse_movcc (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
+ rtx op_true, rtx op_false)
+{
+ enum machine_mode mode = GET_MODE (dest);
+ rtx t1, t2, t3, x;
+
+ cmp_op0 = force_reg (mode, cmp_op0);
+ if (!nonimmediate_operand (cmp_op1, mode))
+ cmp_op1 = force_reg (mode, cmp_op1);
+
+ if (optimize
+ || reg_overlap_mentioned_p (dest, op_true)
+ || reg_overlap_mentioned_p (dest, op_false))
+ t1 = gen_reg_rtx (mode);
+ else
+ t1 = dest;
+
+ x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
+ gcc_assert (sse_comparison_operator (x, VOIDmode));
+ emit_insn (gen_rtx_SET (VOIDmode, t1, x));
+
+ if (op_false == CONST0_RTX (mode))
+ {
+ op_true = force_reg (mode, op_true);
+ x = gen_rtx_AND (mode, t1, op_true);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+ else if (op_true == CONST0_RTX (mode))
+ {
+ op_false = force_reg (mode, op_false);
+ x = gen_rtx_NOT (mode, t1);
+ x = gen_rtx_AND (mode, x, op_false);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+ else
+ {
+ op_true = force_reg (mode, op_true);
+ op_false = force_reg (mode, op_false);
+
+ t2 = gen_reg_rtx (mode);
+ if (optimize)
+ t3 = gen_reg_rtx (mode);
+ else
+ t3 = dest;
+
+ x = gen_rtx_AND (mode, op_true, t1);
+ emit_insn (gen_rtx_SET (VOIDmode, t2, x));
+
+ x = gen_rtx_NOT (mode, t1);
+ x = gen_rtx_AND (mode, x, op_false);
+ emit_insn (gen_rtx_SET (VOIDmode, t3, x));
+
+ x = gen_rtx_IOR (mode, t3, t2);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+}
+
int
ix86_expand_fp_movcc (rtx operands[])
{
@@ -10034,88 +10208,30 @@ ix86_expand_fp_movcc (rtx operands[])
if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
{
- rtx cmp_op0, cmp_op1, if_true, if_false;
- rtx clob;
- enum machine_mode vmode, cmode;
- bool is_minmax = false;
-
- cmp_op0 = ix86_compare_op0;
- cmp_op1 = ix86_compare_op1;
- if_true = operands[2];
- if_false = operands[3];
+ enum machine_mode cmode;
/* Since we've no cmove for sse registers, don't force bad register
allocation just to gain access to it. Deny movcc when the
comparison mode doesn't match the move mode. */
- cmode = GET_MODE (cmp_op0);
+ cmode = GET_MODE (ix86_compare_op0);
if (cmode == VOIDmode)
- cmode = GET_MODE (cmp_op1);
+ cmode = GET_MODE (ix86_compare_op1);
if (cmode != mode)
return 0;
- /* We have no LTGT as an operator. We could implement it with
- NE & ORDERED, but this requires an extra temporary. It's
- not clear that it's worth it. */
- if (code == LTGT || code == UNEQ)
+ code = ix86_prepare_sse_fp_compare_args (operands[0], code,
+ &ix86_compare_op0,
+ &ix86_compare_op1);
+ if (code == UNKNOWN)
return 0;
- /* Massage condition to satisfy sse_comparison_operator. Try
- to canonicalize the destination operand to be first in the
- comparison - this helps reload to avoid extra moves. */
- if (!sse_comparison_operator (operands[1], VOIDmode)
- || (COMMUTATIVE_P (operands[1])
- && rtx_equal_p (operands[0], cmp_op1)))
- {
- tmp = cmp_op0;
- cmp_op0 = cmp_op1;
- cmp_op1 = tmp;
- code = swap_condition (code);
- }
-
- /* Detect conditional moves that exactly match min/max operational
- semantics. Note that this is IEEE safe, as long as we don't
- interchange the operands. Which is why we keep this in the form
- if an IF_THEN_ELSE instead of reducing to SMIN/SMAX. */
- if ((code == LT || code == UNGE) && REG_P (cmp_op0) && REG_P (cmp_op1))
- {
- if (((cmp_op0 == if_true && cmp_op1 == if_false)
- || (cmp_op0 == if_false && cmp_op1 == if_true)))
- {
- is_minmax = true;
- if (code == UNGE)
- {
- code = LT;
- tmp = if_true;
- if_true = if_false;
- if_false = tmp;
- }
- }
- }
-
- if (mode == SFmode)
- vmode = V4SFmode;
- else if (mode == DFmode)
- vmode = V2DFmode;
- else
- gcc_unreachable ();
-
- cmp_op0 = force_reg (mode, cmp_op0);
- if (!nonimmediate_operand (cmp_op1, mode))
- cmp_op1 = force_reg (mode, cmp_op1);
-
- tmp = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
- gcc_assert (sse_comparison_operator (tmp, VOIDmode));
-
- tmp = gen_rtx_IF_THEN_ELSE (mode, tmp, if_true, if_false);
- tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
-
- if (!is_minmax)
- {
- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (vmode));
- tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
- }
+ if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
+ ix86_compare_op1, operands[2],
+ operands[3]))
+ return 1;
- emit_insn (tmp);
+ ix86_expand_sse_movcc (operands[0], code, ix86_compare_op0,
+ ix86_compare_op1, operands[2], operands[3]);
return 1;
}
@@ -10166,100 +10282,6 @@ ix86_expand_fp_movcc (rtx operands[])
return 1;
}
-void
-ix86_split_sse_movcc (rtx operands[])
-{
- rtx dest, scratch, cmp, op_true, op_false, x;
- enum machine_mode mode, vmode;
-
- /* Note that the operator CMP has been set up with matching constraints
- such that dest is valid for the comparison. Unless one of the true
- or false operands are zero, the true operand has already been placed
- in SCRATCH. */
- dest = operands[0];
- scratch = operands[1];
- op_true = operands[2];
- op_false = operands[3];
- cmp = operands[4];
-
- mode = GET_MODE (dest);
- vmode = GET_MODE (scratch);
-
- /* We need to make sure that the TRUE and FALSE operands are out of the
- way of the destination. Marking the destination earlyclobber doesn't
- work, since we want matching constraints for the actual comparison, so
- at some point we always wind up having to do a copy ourselves here.
- We very much prefer the TRUE value to be in SCRATCH. If it turns out
- that FALSE overlaps DEST, then we invert the comparison so that we
- still only have to do one move. */
- if (rtx_equal_p (op_false, dest))
- {
- enum rtx_code code;
-
- if (rtx_equal_p (op_true, dest))
- {
- /* ??? Really ought not happen. It means some optimizer managed
- to prove the operands were identical, but failed to fold the
- conditional move to a straight move. Do so here, because
- otherwise we'll generate incorrect code. And since they're
- both already in the destination register, nothing to do. */
- return;
- }
-
- x = gen_rtx_REG (mode, REGNO (scratch));
- emit_move_insn (x, op_false);
- op_false = op_true;
- op_true = x;
-
- code = GET_CODE (cmp);
- code = reverse_condition_maybe_unordered (code);
- cmp = gen_rtx_fmt_ee (code, mode, XEXP (cmp, 0), XEXP (cmp, 1));
- }
- else if (op_true == CONST0_RTX (mode))
- ;
- else if (op_false == CONST0_RTX (mode) && !rtx_equal_p (op_true, dest))
- ;
- else
- {
- x = gen_rtx_REG (mode, REGNO (scratch));
- emit_move_insn (x, op_true);
- op_true = x;
- }
-
- emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
- dest = simplify_gen_subreg (vmode, dest, mode, 0);
-
- if (op_false == CONST0_RTX (mode))
- {
- op_true = simplify_gen_subreg (vmode, op_true, mode, 0);
- x = gen_rtx_AND (vmode, dest, op_true);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
- }
- else
- {
- op_false = simplify_gen_subreg (vmode, op_false, mode, 0);
-
- if (op_true == CONST0_RTX (mode))
- {
- x = gen_rtx_NOT (vmode, dest);
- x = gen_rtx_AND (vmode, x, op_false);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
- }
- else
- {
- x = gen_rtx_AND (vmode, scratch, dest);
- emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
-
- x = gen_rtx_NOT (vmode, dest);
- x = gen_rtx_AND (vmode, x, op_false);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
-
- x = gen_rtx_IOR (vmode, dest, scratch);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
- }
- }
-}
-
/* Expand conditional increment or decrement using adb/sbb instructions.
The default case using setcc followed by the conditional move can be
done by generic code. */
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.627
diff -u -p -d -r1.627 i386.md
--- config/i386/i386.md 12 Apr 2005 18:14:54 -0000 1.627
+++ config/i386/i386.md 13 Apr 2005 04:53:14 -0000
@@ -104,6 +104,8 @@
; Generic math support
(UNSPEC_COPYSIGN 50)
+ (UNSPEC_IEEE_MIN 51) ; not commutative
+ (UNSPEC_IEEE_MAX 52) ; not commutative
; x87 Floating point
(UNSPEC_SIN 60)
@@ -12462,17 +12464,14 @@
;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
;; subsequent logical operations are used to imitate conditional moves.
;; 0xffffffff is NaN, but not in normalized form, so we can't represent
-;; it directly. Further holding this value in pseudo register might bring
-;; problem in implicit normalization in spill code.
-;; So we don't define FLOAT_STORE_FLAG_VALUE and create these
-;; instructions after reload by splitting the conditional move patterns.
+;; it directly.
(define_insn "*sse_setccsf"
[(set (match_operand:SF 0 "register_operand" "=x")
(match_operator:SF 1 "sse_comparison_operator"
[(match_operand:SF 2 "register_operand" "0")
(match_operand:SF 3 "nonimmediate_operand" "xm")]))]
- "TARGET_SSE && reload_completed"
+ "TARGET_SSE"
"cmp%D1ss\t{%3, %0|%0, %3}"
[(set_attr "type" "ssecmp")
(set_attr "mode" "SF")])
@@ -12482,7 +12481,7 @@
(match_operator:DF 1 "sse_comparison_operator"
[(match_operand:DF 2 "register_operand" "0")
(match_operand:DF 3 "nonimmediate_operand" "Ym")]))]
- "TARGET_SSE2 && reload_completed"
+ "TARGET_SSE2"
"cmp%D1sd\t{%3, %0|%0, %3}"
[(set_attr "type" "ssecmp")
(set_attr "mode" "DF")])
@@ -17707,51 +17706,6 @@
"(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH"
"if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
-;; These versions of min/max are aware of the instruction's behavior
-;; wrt -0.0 and NaN inputs. If we don't care about either, then we
-;; should have used the smin/smax expanders in the first place.
-(define_insn "*movsfcc_1_sse_min"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (if_then_else:SF
- (lt:SF (match_operand:SF 1 "register_operand" "0")
- (match_operand:SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (match_dup 2)))]
- "TARGET_SSE_MATH"
- "minss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "SF")])
-
-(define_insn "*movsfcc_1_sse_max"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (if_then_else:SF
- (lt:SF (match_operand:SF 2 "nonimmediate_operand" "xm")
- (match_operand:SF 1 "nonimmediate_operand" "0"))
- (match_dup 1)
- (match_dup 2)))]
- "TARGET_SSE_MATH"
- "maxss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "SF")])
-
-(define_insn_and_split "*movsfcc_1_sse"
- [(set (match_operand:SF 0 "register_operand" "=x,x,x")
- (if_then_else:SF
- (match_operator:SF 4 "sse_comparison_operator"
- [(match_operand:SF 5 "register_operand" "0,0,0")
- (match_operand:SF 6 "nonimmediate_operand" "xm,xm,xm")])
- (match_operand:SF 2 "reg_or_0_operand" "C,x,x")
- (match_operand:SF 3 "reg_or_0_operand" "x,C,x")))
- (clobber (match_scratch:V4SF 1 "=&x,&x,&x"))]
- "TARGET_SSE_MATH"
- "#"
- "&& reload_completed"
- [(const_int 0)]
-{
- ix86_split_sse_movcc (operands);
- DONE;
-})
-
(define_insn "*movsfcc_1_387"
[(set (match_operand:SF 0 "register_operand" "=f#r,f#r,r#f,r#f")
(if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
@@ -17776,51 +17730,6 @@
"(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)"
"if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
-;; These versions of min/max are aware of the instruction's behavior
-;; wrt -0.0 and NaN inputs. If we don't care about either, then we
-;; should have used the smin/smax expanders in the first place.
-(define_insn "*movdfcc_1_sse_min"
- [(set (match_operand:DF 0 "register_operand" "=x")
- (if_then_else:DF
- (lt:DF (match_operand:DF 1 "register_operand" "0")
- (match_operand:DF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (match_dup 2)))]
- "TARGET_SSE2 && TARGET_SSE_MATH"
- "minsd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "DF")])
-
-(define_insn "*movdfcc_1_sse_max"
- [(set (match_operand:DF 0 "register_operand" "=x")
- (if_then_else:DF
- (lt:DF (match_operand:DF 2 "nonimmediate_operand" "xm")
- (match_operand:DF 1 "nonimmediate_operand" "0"))
- (match_dup 1)
- (match_dup 2)))]
- "TARGET_SSE2 && TARGET_SSE_MATH"
- "maxsd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "DF")])
-
-(define_insn_and_split "*movdfcc_1_sse"
- [(set (match_operand:DF 0 "register_operand" "=x,x,x")
- (if_then_else:DF
- (match_operator:DF 4 "sse_comparison_operator"
- [(match_operand:DF 5 "register_operand" "0,0,0")
- (match_operand:DF 6 "nonimmediate_operand" "xm,xm,xm")])
- (match_operand:DF 2 "reg_or_0_operand" "C,x,x")
- (match_operand:DF 3 "reg_or_0_operand" "x,C,x")))
- (clobber (match_scratch:V2DF 1 "=&x,&x,&x"))]
- "TARGET_SSE2 && TARGET_SSE_MATH"
- "#"
- "&& reload_completed"
- [(const_int 0)]
-{
- ix86_split_sse_movcc (operands);
- DONE;
-})
-
(define_insn "*movdfcc_1"
[(set (match_operand:DF 0 "register_operand" "=f#r,f#r,&r#f,&r#f")
(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
@@ -17935,6 +17844,52 @@
[(set_attr "type" "sseadd")
(set_attr "mode" "DF")])
+;; These versions of the min/max patterns implement exactly the operations
+;; min = (op1 < op2 ? op1 : op2)
+;; max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "*ieee_sminsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "TARGET_SSE_MATH"
+ "minss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "SF")])
+
+(define_insn "*ieee_smaxsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "TARGET_SSE_MATH"
+ "maxss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "SF")])
+
+(define_insn "*ieee_smindf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "minsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+
+(define_insn "*ieee_smaxdf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "maxsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+
;; Conditional addition patterns
(define_expand "addqicc"
[(match_operand:QI 0 "register_operand" "")
Index: config/i386/sse.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/sse.md,v
retrieving revision 1.7
diff -u -p -d -r1.7 sse.md
--- config/i386/sse.md 23 Jan 2005 15:05:45 -0000 1.7
+++ config/i386/sse.md 13 Apr 2005 04:53:15 -0000
@@ -773,6 +773,47 @@
[(set_attr "type" "sselog")
(set_attr "mode" "V4SF")])
+;; Also define scalar versions. These are used for abs, neg, and
+;; conditional move. Using subregs into vector modes causes regiser
+;; allocation lossage. These patterns do not allow memory operands
+;; because the native instructions read the full 128-bits.
+
+(define_insn "*andsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (and:SF (match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "register_operand" "x")))]
+ "TARGET_SSE"
+ "andps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*nandsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
+ (match_operand:SF 2 "register_operand" "x")))]
+ "TARGET_SSE"
+ "andnps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*iorsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (ior:SF (match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "register_operand" "x")))]
+ "TARGET_SSE"
+ "orps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*xorsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (xor:SF (match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "register_operand" "x")))]
+ "TARGET_SSE"
+ "xorps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision floating point conversion operations
@@ -1624,7 +1665,7 @@
[(set (match_operand:V2DF 0 "register_operand" "=x")
(and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
(match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2 && ix86_binary_operator_ok (AND, V4SFmode, operands)"
+ "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
"andpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V2DF")])
@@ -1670,6 +1711,47 @@
[(set_attr "type" "sselog")
(set_attr "mode" "V2DF")])
+;; Also define scalar versions. These are used for abs, neg, and
+;; conditional move. Using subregs into vector modes causes regiser
+;; allocation lossage. These patterns do not allow memory operands
+;; because the native instructions read the full 128-bits.
+
+(define_insn "*anddf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (and:DF (match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "register_operand" "x")))]
+ "TARGET_SSE2"
+ "andpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*nanddf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
+ (match_operand:DF 2 "register_operand" "x")))]
+ "TARGET_SSE2"
+ "andnpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*iordf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (ior:DF (match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "register_operand" "x")))]
+ "TARGET_SSE2"
+ "orpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*xordf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (xor:DF (match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "register_operand" "x")))]
+ "TARGET_SSE2"
+ "xorpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel double-precision floating point conversion operations