* config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Split ...
(ix86_expand_sse_fp_minmax): ... from ...
(ix86_expand_fp_movcc): ... here.
(ix86_expand_sse_movcc): Rewrite from ix86_split_sse_movcc.
* config/i386/i386-protos.h: Update.
* config/i386/i386.md (UNSPEC_IEEE_MIN, UNSPEC_IEEE_MAX): New.
(sse_setccsf, sse_setccdf): Allow before reload.
(movsfcc_1_sse_min, movsfcc_1_sse_max, movsfcc_1_sse): Remove.
(movdfcc_1_sse_min, movdfcc_1_sse_max, movdfcc_1_sse): Remove.
(ieee_sminsf3, ieee_smaxsf3, ieee_smindf3, ieee_smaxdf3): New.
* config/i386/sse.md (andsf3, nandsf3, iorsf3, xorsf3): New.
(anddf3, nanddf3, iordf3, xordf3): New.
From-SVN: r98068
+2005-04-12 Richard Henderson <rth@redhat.com>
+
+ * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Split ...
+ (ix86_expand_sse_fp_minmax): ... from ...
+ (ix86_expand_fp_movcc): ... here.
+ (ix86_expand_sse_movcc): Rewrite from ix86_split_sse_movcc.
+ * config/i386/i386-protos.h: Update.
+ * config/i386/i386.md (UNSPEC_IEEE_MIN, UNSPEC_IEEE_MAX): New.
+ (sse_setccsf, sse_setccdf): Allow before reload.
+ (movsfcc_1_sse_min, movsfcc_1_sse_max, movsfcc_1_sse): Remove.
+ (movdfcc_1_sse_min, movdfcc_1_sse_max, movdfcc_1_sse): Remove.
+ (ieee_sminsf3, ieee_smaxsf3, ieee_smindf3, ieee_smaxdf3): New.
+ * config/i386/sse.md (andsf3, nandsf3, iorsf3, xorsf3): New.
+ (anddf3, nanddf3, iordf3, xordf3): New.
+
2005-04-12 Jeff Law <law@redhat.com>
* Makefile.in (OBJS-common): Add tree-ssa-uncprop.o.
extern int ix86_expand_setcc (enum rtx_code, rtx);
extern int ix86_expand_int_movcc (rtx[]);
extern int ix86_expand_fp_movcc (rtx[]);
-extern void ix86_split_sse_movcc (rtx[]);
extern int ix86_expand_int_addcc (rtx[]);
extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
extern void x86_initialize_trampoline (rtx, rtx, rtx);
return 1; /* DONE */
}
+/* Swap, force into registers, or otherwise massage the two operands
+ to an sse comparison with a mask result. Thus we differ a bit from
+ ix86_prepare_fp_compare_args which expects to produce a flags result.
+
+ The DEST operand exists to help determine whether to commute commutative
+ operators. The POP0/POP1 operands are updated in place. The new
+ comparison code is returned, or UNKNOWN if not implementable. */
+
+static enum rtx_code
+ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
+ rtx *pop0, rtx *pop1)
+{
+ rtx tmp;
+
+ switch (code)
+ {
+ case LTGT:
+ case UNEQ:
+ /* We have no LTGT as an operator. We could implement it with
+ NE & ORDERED, but this requires an extra temporary. It's
+ not clear that it's worth it. */
+ return UNKNOWN;
+
+ case LT:
+ case LE:
+ case UNGT:
+ case UNGE:
+ /* These are supported directly. */
+ break;
+
+ case EQ:
+ case NE:
+ case UNORDERED:
+ case ORDERED:
+ /* For commutative operators, try to canonicalize the destination
+ operand to be first in the comparison - this helps reload to
+ avoid extra moves. */
+ if (!dest || !rtx_equal_p (dest, *pop1))
+ break;
+ /* FALLTHRU */
+
+ case GE:
+ case GT:
+ case UNLE:
+ case UNLT:
+ /* These are not supported directly. Swap the comparison operands
+ to transform into something that is supported. */
+ tmp = *pop0;
+ *pop0 = *pop1;
+ *pop1 = tmp;
+ code = swap_condition (code);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return code;
+}
+
+/* Detect conditional moves that exactly match min/max operational
+ semantics. Note that this is IEEE safe, as long as we don't
+ interchange the operands.
+
+ Returns FALSE if this conditional move doesn't match a MIN/MAX,
+ and TRUE if the operation is successful and instructions are emitted. */
+
+static bool
+ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
+ rtx cmp_op1, rtx if_true, rtx if_false)
+{
+ enum machine_mode mode;
+ bool is_min;
+ rtx tmp;
+
+ if (code == LT)
+ ;
+ else if (code == UNGE)
+ {
+ tmp = if_true;
+ if_true = if_false;
+ if_false = tmp;
+ }
+ else
+ return false;
+
+ if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
+ is_min = true;
+ else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
+ is_min = false;
+ else
+ return false;
+
+ mode = GET_MODE (dest);
+
+ /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
+ but MODE may be a vector mode and thus not appropriate. */
+ if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
+ {
+ int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
+ rtvec v;
+
+ if_true = force_reg (mode, if_true);
+ v = gen_rtvec (2, if_true, if_false);
+ tmp = gen_rtx_UNSPEC (mode, v, u);
+ }
+ else
+ {
+ code = is_min ? SMIN : SMAX;
+ tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+ return true;
+}
+
+static void
+ix86_expand_sse_movcc (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
+ rtx op_true, rtx op_false)
+{
+ enum machine_mode mode = GET_MODE (dest);
+ rtx t1, t2, t3, x;
+
+ cmp_op0 = force_reg (mode, cmp_op0);
+ if (!nonimmediate_operand (cmp_op1, mode))
+ cmp_op1 = force_reg (mode, cmp_op1);
+
+ if (optimize
+ || reg_overlap_mentioned_p (dest, op_true)
+ || reg_overlap_mentioned_p (dest, op_false))
+ t1 = gen_reg_rtx (mode);
+ else
+ t1 = dest;
+
+ x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
+ gcc_assert (sse_comparison_operator (x, VOIDmode));
+ emit_insn (gen_rtx_SET (VOIDmode, t1, x));
+
+ if (op_false == CONST0_RTX (mode))
+ {
+ op_true = force_reg (mode, op_true);
+ x = gen_rtx_AND (mode, t1, op_true);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+ else if (op_true == CONST0_RTX (mode))
+ {
+ op_false = force_reg (mode, op_false);
+ x = gen_rtx_NOT (mode, t1);
+ x = gen_rtx_AND (mode, x, op_false);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+ else
+ {
+ op_true = force_reg (mode, op_true);
+ op_false = force_reg (mode, op_false);
+
+ t2 = gen_reg_rtx (mode);
+ if (optimize)
+ t3 = gen_reg_rtx (mode);
+ else
+ t3 = dest;
+
+ x = gen_rtx_AND (mode, op_true, t1);
+ emit_insn (gen_rtx_SET (VOIDmode, t2, x));
+
+ x = gen_rtx_NOT (mode, t1);
+ x = gen_rtx_AND (mode, x, op_false);
+ emit_insn (gen_rtx_SET (VOIDmode, t3, x));
+
+ x = gen_rtx_IOR (mode, t3, t2);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+}
+
int
ix86_expand_fp_movcc (rtx operands[])
{
if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
{
- rtx cmp_op0, cmp_op1, if_true, if_false;
- rtx clob;
- enum machine_mode vmode, cmode;
- bool is_minmax = false;
-
- cmp_op0 = ix86_compare_op0;
- cmp_op1 = ix86_compare_op1;
- if_true = operands[2];
- if_false = operands[3];
+ enum machine_mode cmode;
/* Since we've no cmove for sse registers, don't force bad register
allocation just to gain access to it. Deny movcc when the
comparison mode doesn't match the move mode. */
- cmode = GET_MODE (cmp_op0);
+ cmode = GET_MODE (ix86_compare_op0);
if (cmode == VOIDmode)
- cmode = GET_MODE (cmp_op1);
+ cmode = GET_MODE (ix86_compare_op1);
if (cmode != mode)
return 0;
- /* We have no LTGT as an operator. We could implement it with
- NE & ORDERED, but this requires an extra temporary. It's
- not clear that it's worth it. */
- if (code == LTGT || code == UNEQ)
+ code = ix86_prepare_sse_fp_compare_args (operands[0], code,
+ &ix86_compare_op0,
+ &ix86_compare_op1);
+ if (code == UNKNOWN)
return 0;
- /* Massage condition to satisfy sse_comparison_operator. Try
- to canonicalize the destination operand to be first in the
- comparison - this helps reload to avoid extra moves. */
- if (!sse_comparison_operator (operands[1], VOIDmode)
- || (COMMUTATIVE_P (operands[1])
- && rtx_equal_p (operands[0], cmp_op1)))
- {
- tmp = cmp_op0;
- cmp_op0 = cmp_op1;
- cmp_op1 = tmp;
- code = swap_condition (code);
- }
-
- /* Detect conditional moves that exactly match min/max operational
- semantics. Note that this is IEEE safe, as long as we don't
- interchange the operands. Which is why we keep this in the form
- if an IF_THEN_ELSE instead of reducing to SMIN/SMAX. */
- if ((code == LT || code == UNGE) && REG_P (cmp_op0) && REG_P (cmp_op1))
- {
- if (((cmp_op0 == if_true && cmp_op1 == if_false)
- || (cmp_op0 == if_false && cmp_op1 == if_true)))
- {
- is_minmax = true;
- if (code == UNGE)
- {
- code = LT;
- tmp = if_true;
- if_true = if_false;
- if_false = tmp;
- }
- }
- }
-
- if (mode == SFmode)
- vmode = V4SFmode;
- else if (mode == DFmode)
- vmode = V2DFmode;
- else
- gcc_unreachable ();
-
- cmp_op0 = force_reg (mode, cmp_op0);
- if (!nonimmediate_operand (cmp_op1, mode))
- cmp_op1 = force_reg (mode, cmp_op1);
-
- tmp = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
- gcc_assert (sse_comparison_operator (tmp, VOIDmode));
-
- tmp = gen_rtx_IF_THEN_ELSE (mode, tmp, if_true, if_false);
- tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
-
- if (!is_minmax)
- {
- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (vmode));
- tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
- }
+ if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
+ ix86_compare_op1, operands[2],
+ operands[3]))
+ return 1;
- emit_insn (tmp);
+ ix86_expand_sse_movcc (operands[0], code, ix86_compare_op0,
+ ix86_compare_op1, operands[2], operands[3]);
return 1;
}
return 1;
}
-void
-ix86_split_sse_movcc (rtx operands[])
-{
- rtx dest, scratch, cmp, op_true, op_false, x;
- enum machine_mode mode, vmode;
-
- /* Note that the operator CMP has been set up with matching constraints
- such that dest is valid for the comparison. Unless one of the true
- or false operands are zero, the true operand has already been placed
- in SCRATCH. */
- dest = operands[0];
- scratch = operands[1];
- op_true = operands[2];
- op_false = operands[3];
- cmp = operands[4];
-
- mode = GET_MODE (dest);
- vmode = GET_MODE (scratch);
-
- /* We need to make sure that the TRUE and FALSE operands are out of the
- way of the destination. Marking the destination earlyclobber doesn't
- work, since we want matching constraints for the actual comparison, so
- at some point we always wind up having to do a copy ourselves here.
- We very much prefer the TRUE value to be in SCRATCH. If it turns out
- that FALSE overlaps DEST, then we invert the comparison so that we
- still only have to do one move. */
- if (rtx_equal_p (op_false, dest))
- {
- enum rtx_code code;
-
- if (rtx_equal_p (op_true, dest))
- {
- /* ??? Really ought not happen. It means some optimizer managed
- to prove the operands were identical, but failed to fold the
- conditional move to a straight move. Do so here, because
- otherwise we'll generate incorrect code. And since they're
- both already in the destination register, nothing to do. */
- return;
- }
-
- x = gen_rtx_REG (mode, REGNO (scratch));
- emit_move_insn (x, op_false);
- op_false = op_true;
- op_true = x;
-
- code = GET_CODE (cmp);
- code = reverse_condition_maybe_unordered (code);
- cmp = gen_rtx_fmt_ee (code, mode, XEXP (cmp, 0), XEXP (cmp, 1));
- }
- else if (op_true == CONST0_RTX (mode))
- ;
- else if (op_false == CONST0_RTX (mode) && !rtx_equal_p (op_true, dest))
- ;
- else
- {
- x = gen_rtx_REG (mode, REGNO (scratch));
- emit_move_insn (x, op_true);
- op_true = x;
- }
-
- emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
- dest = simplify_gen_subreg (vmode, dest, mode, 0);
-
- if (op_false == CONST0_RTX (mode))
- {
- op_true = simplify_gen_subreg (vmode, op_true, mode, 0);
- x = gen_rtx_AND (vmode, dest, op_true);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
- }
- else
- {
- op_false = simplify_gen_subreg (vmode, op_false, mode, 0);
-
- if (op_true == CONST0_RTX (mode))
- {
- x = gen_rtx_NOT (vmode, dest);
- x = gen_rtx_AND (vmode, x, op_false);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
- }
- else
- {
- x = gen_rtx_AND (vmode, scratch, dest);
- emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
-
- x = gen_rtx_NOT (vmode, dest);
- x = gen_rtx_AND (vmode, x, op_false);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
-
- x = gen_rtx_IOR (vmode, dest, scratch);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
- }
- }
-}
-
/* Expand conditional increment or decrement using adb/sbb instructions.
The default case using setcc followed by the conditional move can be
done by generic code. */
; Generic math support
(UNSPEC_COPYSIGN 50)
+ (UNSPEC_IEEE_MIN 51) ; not commutative
+ (UNSPEC_IEEE_MAX 52) ; not commutative
; x87 Floating point
(UNSPEC_SIN 60)
;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
;; subsequent logical operations are used to imitate conditional moves.
;; 0xffffffff is NaN, but not in normalized form, so we can't represent
-;; it directly. Further holding this value in pseudo register might bring
-;; problem in implicit normalization in spill code.
-;; So we don't define FLOAT_STORE_FLAG_VALUE and create these
-;; instructions after reload by splitting the conditional move patterns.
+;; it directly.
(define_insn "*sse_setccsf"
[(set (match_operand:SF 0 "register_operand" "=x")
(match_operator:SF 1 "sse_comparison_operator"
[(match_operand:SF 2 "register_operand" "0")
(match_operand:SF 3 "nonimmediate_operand" "xm")]))]
- "TARGET_SSE && reload_completed"
+ "TARGET_SSE"
"cmp%D1ss\t{%3, %0|%0, %3}"
[(set_attr "type" "ssecmp")
(set_attr "mode" "SF")])
(match_operator:DF 1 "sse_comparison_operator"
[(match_operand:DF 2 "register_operand" "0")
(match_operand:DF 3 "nonimmediate_operand" "Ym")]))]
- "TARGET_SSE2 && reload_completed"
+ "TARGET_SSE2"
"cmp%D1sd\t{%3, %0|%0, %3}"
[(set_attr "type" "ssecmp")
(set_attr "mode" "DF")])
"(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH"
"if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
-;; These versions of min/max are aware of the instruction's behavior
-;; wrt -0.0 and NaN inputs. If we don't care about either, then we
-;; should have used the smin/smax expanders in the first place.
-(define_insn "*movsfcc_1_sse_min"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (if_then_else:SF
- (lt:SF (match_operand:SF 1 "register_operand" "0")
- (match_operand:SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (match_dup 2)))]
- "TARGET_SSE_MATH"
- "minss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "SF")])
-
-(define_insn "*movsfcc_1_sse_max"
- [(set (match_operand:SF 0 "register_operand" "=x")
- (if_then_else:SF
- (lt:SF (match_operand:SF 2 "nonimmediate_operand" "xm")
- (match_operand:SF 1 "nonimmediate_operand" "0"))
- (match_dup 1)
- (match_dup 2)))]
- "TARGET_SSE_MATH"
- "maxss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "SF")])
-
-(define_insn_and_split "*movsfcc_1_sse"
- [(set (match_operand:SF 0 "register_operand" "=x,x,x")
- (if_then_else:SF
- (match_operator:SF 4 "sse_comparison_operator"
- [(match_operand:SF 5 "register_operand" "0,0,0")
- (match_operand:SF 6 "nonimmediate_operand" "xm,xm,xm")])
- (match_operand:SF 2 "reg_or_0_operand" "C,x,x")
- (match_operand:SF 3 "reg_or_0_operand" "x,C,x")))
- (clobber (match_scratch:V4SF 1 "=&x,&x,&x"))]
- "TARGET_SSE_MATH"
- "#"
- "&& reload_completed"
- [(const_int 0)]
-{
- ix86_split_sse_movcc (operands);
- DONE;
-})
-
(define_insn "*movsfcc_1_387"
[(set (match_operand:SF 0 "register_operand" "=f#r,f#r,r#f,r#f")
(if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
"(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)"
"if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
-;; These versions of min/max are aware of the instruction's behavior
-;; wrt -0.0 and NaN inputs. If we don't care about either, then we
-;; should have used the smin/smax expanders in the first place.
-(define_insn "*movdfcc_1_sse_min"
- [(set (match_operand:DF 0 "register_operand" "=x")
- (if_then_else:DF
- (lt:DF (match_operand:DF 1 "register_operand" "0")
- (match_operand:DF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (match_dup 2)))]
- "TARGET_SSE2 && TARGET_SSE_MATH"
- "minsd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "DF")])
-
-(define_insn "*movdfcc_1_sse_max"
- [(set (match_operand:DF 0 "register_operand" "=x")
- (if_then_else:DF
- (lt:DF (match_operand:DF 2 "nonimmediate_operand" "xm")
- (match_operand:DF 1 "nonimmediate_operand" "0"))
- (match_dup 1)
- (match_dup 2)))]
- "TARGET_SSE2 && TARGET_SSE_MATH"
- "maxsd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "DF")])
-
-(define_insn_and_split "*movdfcc_1_sse"
- [(set (match_operand:DF 0 "register_operand" "=x,x,x")
- (if_then_else:DF
- (match_operator:DF 4 "sse_comparison_operator"
- [(match_operand:DF 5 "register_operand" "0,0,0")
- (match_operand:DF 6 "nonimmediate_operand" "xm,xm,xm")])
- (match_operand:DF 2 "reg_or_0_operand" "C,x,x")
- (match_operand:DF 3 "reg_or_0_operand" "x,C,x")))
- (clobber (match_scratch:V2DF 1 "=&x,&x,&x"))]
- "TARGET_SSE2 && TARGET_SSE_MATH"
- "#"
- "&& reload_completed"
- [(const_int 0)]
-{
- ix86_split_sse_movcc (operands);
- DONE;
-})
-
(define_insn "*movdfcc_1"
[(set (match_operand:DF 0 "register_operand" "=f#r,f#r,&r#f,&r#f")
(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
[(set_attr "type" "sseadd")
(set_attr "mode" "DF")])
+;; These versions of the min/max patterns implement exactly the operations
+;; min = (op1 < op2 ? op1 : op2)
+;; max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "*ieee_sminsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "TARGET_SSE_MATH"
+ "minss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "SF")])
+
+(define_insn "*ieee_smaxsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "TARGET_SSE_MATH"
+ "maxss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "SF")])
+
+(define_insn "*ieee_smindf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "minsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+
+(define_insn "*ieee_smaxdf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "maxsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+
;; Conditional addition patterns
(define_expand "addqicc"
[(match_operand:QI 0 "register_operand" "")
[(set_attr "type" "sselog")
(set_attr "mode" "V4SF")])
+;; Also define scalar versions. These are used for abs, neg, and
+;; conditional move. Using subregs into vector modes causes regiser
+;; allocation lossage. These patterns do not allow memory operands
+;; because the native instructions read the full 128-bits.
+
+(define_insn "*andsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (and:SF (match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "register_operand" "x")))]
+ "TARGET_SSE"
+ "andps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*nandsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
+ (match_operand:SF 2 "register_operand" "x")))]
+ "TARGET_SSE"
+ "andnps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*iorsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (ior:SF (match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "register_operand" "x")))]
+ "TARGET_SSE"
+ "orps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*xorsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (xor:SF (match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "register_operand" "x")))]
+ "TARGET_SSE"
+ "xorps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision floating point conversion operations
[(set (match_operand:V2DF 0 "register_operand" "=x")
(and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
(match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2 && ix86_binary_operator_ok (AND, V4SFmode, operands)"
+ "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
"andpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V2DF")])
[(set_attr "type" "sselog")
(set_attr "mode" "V2DF")])
+;; Also define scalar versions. These are used for abs, neg, and
+;; conditional move. Using subregs into vector modes causes regiser
+;; allocation lossage. These patterns do not allow memory operands
+;; because the native instructions read the full 128-bits.
+
+(define_insn "*anddf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (and:DF (match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "register_operand" "x")))]
+ "TARGET_SSE2"
+ "andpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*nanddf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
+ (match_operand:DF 2 "register_operand" "x")))]
+ "TARGET_SSE2"
+ "andnpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*iordf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (ior:DF (match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "register_operand" "x")))]
+ "TARGET_SSE2"
+ "orpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*xordf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (xor:DF (match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "register_operand" "x")))]
+ "TARGET_SSE2"
+ "xorpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel double-precision floating point conversion operations