This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[4.0] fix PR 21169


 This patch is a backport from the mainline that fixes the bug #21169.
Build and tested for i686-pc-linux-gnu and  x86_64-unknown-linux-gnu
 Here is the original patch:

 2005-04-12  Richard Henderson  <rth@redhat.com>

        * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Split ...
        (ix86_expand_sse_fp_minmax): ... from ...
diff -ruN gcc-4.0.2-20060112-orig/gcc/config/i386/i386.c gcc-4.0.2-20060112/gcc/config/i386/i386.c
--- gcc-4.0.2-20060112-orig/gcc/config/i386/i386.c	2006-02-06 12:24:52.000000000 +0300
+++ gcc-4.0.2-20060112/gcc/config/i386/i386.c	2006-02-06 12:42:46.000000000 +0300
@@ -9999,6 +9999,181 @@
   return 1; /* DONE */
 }
 
+
+/* Swap, force into registers, or otherwise massage the two operands
+   to an sse comparison with a mask result.  Thus we differ a bit from
+   ix86_prepare_fp_compare_args which expects to produce a flags result.
+
+   The DEST operand exists to help determine whether to commute commutative
+   operators.  The POP0/POP1 operands are updated in place.  The new
+   comparison code is returned, or UNKNOWN if not implementable.  */
+
+static enum rtx_code
+ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
+                                  rtx *pop0, rtx *pop1)
+{
+  rtx tmp;
+
+  switch (code)
+    {
+    case LTGT:
+    case UNEQ:
+      /* We have no LTGT as an operator.  We could implement it with
+         NE & ORDERED, but this requires an extra temporary.  It's
+         not clear that it's worth it.  */
+      return UNKNOWN;
+
+    case LT:
+    case LE:
+    case UNGT:
+    case UNGE:
+      /* These are supported directly.  */
+      break;
+
+    case EQ:
+    case NE:
+    case UNORDERED:
+    case ORDERED:
+      /* For commutative operators, try to canonicalize the destination
+         operand to be first in the comparison - this helps reload to
+         avoid extra moves.  */
+      if (!dest || !rtx_equal_p (dest, *pop1))
+        break;
+      /* FALLTHRU */
+
+    case GE:
+    case GT:
+    case UNLE:
+    case UNLT:
+      /* These are not supported directly.  Swap the comparison operands
+         to transform into something that is supported.  */
+      tmp = *pop0;
+      *pop0 = *pop1;
+      *pop1 = tmp;
+      code = swap_condition (code);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return code;
+}
+
+/* Detect conditional moves that exactly match min/max operational
+   semantics.  Note that this is IEEE safe, as long as we don't
+   interchange the operands.
+
+   Returns FALSE if this conditional move doesn't match a MIN/MAX,
+   and TRUE if the operation is successful and instructions are emitted.  */
+
+static bool
+ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
+                           rtx cmp_op1, rtx if_true, rtx if_false)
+{
+  enum machine_mode mode;
+  bool is_min;
+  rtx tmp;
+
+  if (code == LT)
+    ;
+  else if (code == UNGE)
+    {
+      tmp = if_true;
+      if_true = if_false;
+      if_false = tmp;
+    }
+  else
+    return false;
+
+  if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
+    is_min = true;
+  else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
+    is_min = false;
+  else
+    return false;
+
+  mode = GET_MODE (dest);
+
+  /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
+     but MODE may be a vector mode and thus not appropriate.  */
+  if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
+    {
+      int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
+      rtvec v;
+
+      if_true = force_reg (mode, if_true);
+      v = gen_rtvec (2, if_true, if_false);
+      tmp = gen_rtx_UNSPEC (mode, v, u);
+    }
+  else
+    {
+      code = is_min ? SMIN : SMAX;
+      tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+  return true;
+}
+
+static void
+ix86_expand_sse_movcc (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
+                       rtx op_true, rtx op_false)
+{
+  enum machine_mode mode = GET_MODE (dest);
+  rtx t1, t2, t3, x;
+
+  cmp_op0 = force_reg (mode, cmp_op0);
+  if (!nonimmediate_operand (cmp_op1, mode))
+    cmp_op1 = force_reg (mode, cmp_op1);
+
+  if (optimize
+      || reg_overlap_mentioned_p (dest, op_true)
+      || reg_overlap_mentioned_p (dest, op_false))
+    t1 = gen_reg_rtx (mode);
+  else
+    t1 = dest;
+
+  x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
+  gcc_assert (sse_comparison_operator (x, VOIDmode));
+  emit_insn (gen_rtx_SET (VOIDmode, t1, x));
+
+  if (op_false == CONST0_RTX (mode))
+    {
+      op_true = force_reg (mode, op_true);
+      x = gen_rtx_AND (mode, t1, op_true);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+  else if (op_true == CONST0_RTX (mode))
+    {
+      op_false = force_reg (mode, op_false);
+      x = gen_rtx_NOT (mode, t1);
+      x = gen_rtx_AND (mode, x, op_false);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+  else
+    {
+      op_true = force_reg (mode, op_true);
+      op_false = force_reg (mode, op_false);
+
+      t2 = gen_reg_rtx (mode);
+      if (optimize)
+        t3 = gen_reg_rtx (mode);
+      else
+        t3 = dest;
+
+      x = gen_rtx_AND (mode, op_true, t1);
+      emit_insn (gen_rtx_SET (VOIDmode, t2, x));
+
+      x = gen_rtx_NOT (mode, t1);
+      x = gen_rtx_AND (mode, x, op_false);
+      emit_insn (gen_rtx_SET (VOIDmode, t3, x));
+
+      x = gen_rtx_IOR (mode, t3, t2);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+}
+
 int
 ix86_expand_fp_movcc (rtx operands[])
 {
@@ -10008,88 +10183,30 @@
 
   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
     {
-      rtx cmp_op0, cmp_op1, if_true, if_false;
-      rtx clob;
-      enum machine_mode vmode, cmode;
-      bool is_minmax = false;
-
-      cmp_op0 = ix86_compare_op0;
-      cmp_op1 = ix86_compare_op1;
-      if_true = operands[2];
-      if_false = operands[3];
+      enum machine_mode cmode;
 
       /* Since we've no cmove for sse registers, don't force bad register
-	 allocation just to gain access to it.  Deny movcc when the
-	 comparison mode doesn't match the move mode.  */
-      cmode = GET_MODE (cmp_op0);
+         allocation just to gain access to it.  Deny movcc when the
+         comparison mode doesn't match the move mode.  */
+      cmode = GET_MODE (ix86_compare_op0);
       if (cmode == VOIDmode)
-	cmode = GET_MODE (cmp_op1);
+        cmode = GET_MODE (ix86_compare_op1);
       if (cmode != mode)
-	return 0;
+        return 0;
 
-      /* We have no LTGT as an operator.  We could implement it with
-	 NE & ORDERED, but this requires an extra temporary.  It's
-	 not clear that it's worth it.  */
-      if (code == LTGT || code == UNEQ)
-	return 0;
+      code = ix86_prepare_sse_fp_compare_args (operands[0], code,
+                                               &ix86_compare_op0,
+                                               &ix86_compare_op1);
+      if (code == UNKNOWN)
+        return 0;
+
+      if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
+                                     ix86_compare_op1, operands[2],
+                                     operands[3]))
+        return 1;
 
-      /* Massage condition to satisfy sse_comparison_operator.  Try
-	 to canonicalize the destination operand to be first in the
-	 comparison - this helps reload to avoid extra moves.  */
-      if (!sse_comparison_operator (operands[1], VOIDmode)
-	  || (COMMUTATIVE_P (operands[1])
-	      && rtx_equal_p (operands[0], cmp_op1)))
-	{
-	  tmp = cmp_op0;
-	  cmp_op0 = cmp_op1;
-	  cmp_op1 = tmp;
-	  code = swap_condition (code);
-	}
-
-      /* Detect conditional moves that exactly match min/max operational
-	 semantics.  Note that this is IEEE safe, as long as we don't
-	 interchange the operands.  Which is why we keep this in the form
-	 if an IF_THEN_ELSE instead of reducing to SMIN/SMAX.  */
-      if ((code == LT || code == UNGE) && REG_P (cmp_op0) && REG_P (cmp_op1))
-	{
-	  if (((cmp_op0 == if_true && cmp_op1 == if_false)
-	      || (cmp_op0 == if_false && cmp_op1 == if_true)))
-	    {
-	      is_minmax = true;
-	      if (code == UNGE)
-		{
-		  code = LT;
-		  tmp = if_true;
-		  if_true = if_false;
-		  if_false = tmp;
-		}
-	    }
-	}
-
-      if (mode == SFmode)
-	vmode = V4SFmode;
-      else if (mode == DFmode)
-	vmode = V2DFmode;
-      else
-	gcc_unreachable ();
-
-      cmp_op0 = force_reg (mode, cmp_op0);
-      if (!nonimmediate_operand (cmp_op1, mode))
-	cmp_op1 = force_reg (mode, cmp_op1);
-
-      tmp = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
-      gcc_assert (sse_comparison_operator (tmp, VOIDmode));
-
-      tmp = gen_rtx_IF_THEN_ELSE (mode, tmp, if_true, if_false);
-      tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
-
-      if (!is_minmax)
-	{
-	  clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (vmode));
-	  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
-	}
-
-      emit_insn (tmp);
+      ix86_expand_sse_movcc (operands[0], code, ix86_compare_op0,
+                             ix86_compare_op1, operands[2], operands[3]);
       return 1;
     }
 
@@ -10104,7 +10221,7 @@
   if (!fcmov_comparison_operator (compare_op, VOIDmode))
     {
       if (second_test != NULL || bypass_test != NULL)
-	abort ();
+        abort ();
       tmp = gen_reg_rtx (QImode);
       ix86_expand_setcc (code, tmp);
       code = NE;
@@ -10126,115 +10243,20 @@
     }
 
   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
-			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
-						operands[2], operands[3])));
+                          gen_rtx_IF_THEN_ELSE (mode, compare_op,
+                                                operands[2], operands[3])));
   if (bypass_test)
     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
-			    gen_rtx_IF_THEN_ELSE (mode, bypass_test,
-						  operands[3], operands[0])));
+                            gen_rtx_IF_THEN_ELSE (mode, bypass_test,
+                                                  operands[3], operands[0])));
   if (second_test)
     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
-			    gen_rtx_IF_THEN_ELSE (mode, second_test,
-						  operands[2], operands[0])));
+                            gen_rtx_IF_THEN_ELSE (mode, second_test,
+                                                  operands[2], operands[0])));
 
   return 1;
 }
 
-void
-ix86_split_sse_movcc (rtx operands[])
-{
-  rtx dest, scratch, cmp, op_true, op_false, x;
-  enum machine_mode mode, vmode;
-
-  /* Note that the operator CMP has been set up with matching constraints
-     such that dest is valid for the comparison.  Unless one of the true
-     or false operands are zero, the true operand has already been placed
-     in SCRATCH.  */
-  dest = operands[0];
-  scratch = operands[1];
-  op_true = operands[2];
-  op_false = operands[3];
-  cmp = operands[4];
-
-  mode = GET_MODE (dest);
-  vmode = GET_MODE (scratch);
-
-  /* We need to make sure that the TRUE and FALSE operands are out of the
-     way of the destination.  Marking the destination earlyclobber doesn't
-     work, since we want matching constraints for the actual comparison, so
-     at some point we always wind up having to do a copy ourselves here.
-     We very much prefer the TRUE value to be in SCRATCH.  If it turns out
-     that FALSE overlaps DEST, then we invert the comparison so that we
-     still only have to do one move.  */
-  if (rtx_equal_p (op_false, dest))
-    {
-      enum rtx_code code;
-
-      if (rtx_equal_p (op_true, dest))
-	{
-	  /* ??? Really ought not happen.  It means some optimizer managed
-	     to prove the operands were identical, but failed to fold the
-	     conditional move to a straight move.  Do so here, because 
-	     otherwise we'll generate incorrect code.  And since they're
-	     both already in the destination register, nothing to do.  */
-	  emit_note (NOTE_INSN_DELETED);
-	  return;
-	}
-
-      x = gen_rtx_REG (mode, REGNO (scratch));
-      emit_move_insn (x, op_false);
-      op_false = op_true;
-      op_true = x;
-
-      code = GET_CODE (cmp);
-      code = reverse_condition_maybe_unordered (code);
-      cmp = gen_rtx_fmt_ee (code, mode, XEXP (cmp, 0), XEXP (cmp, 1));
-    }
-  else if (op_true == CONST0_RTX (mode))
-    ;
-  else if (op_false == CONST0_RTX (mode) && !rtx_equal_p (op_true, dest))
-    ;
-  else
-    {
-      x = gen_rtx_REG (mode, REGNO (scratch));
-      emit_move_insn (x, op_true);
-      op_true = x;
-    }
-
-  emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
-  dest = simplify_gen_subreg (vmode, dest, mode, 0);
-
-  if (op_false == CONST0_RTX (mode))
-    {
-      op_true = simplify_gen_subreg (vmode, op_true, mode, 0);
-      x = gen_rtx_AND (vmode, dest, op_true);
-      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
-    }
-  else
-    {
-      op_false = simplify_gen_subreg (vmode, op_false, mode, 0);
-
-      if (op_true == CONST0_RTX (mode))
-	{
-	  x = gen_rtx_NOT (vmode, dest);
-	  x = gen_rtx_AND (vmode, x, op_false);
-	  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
-	}
-      else
-	{
-	  x = gen_rtx_AND (vmode, scratch, dest);
-	  emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
-
-	  x = gen_rtx_NOT (vmode, dest);
-	  x = gen_rtx_AND (vmode, x, op_false);
-	  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
-
-	  x = gen_rtx_IOR (vmode, dest, scratch);
-	  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
-	}
-    }
-}
-
 /* Expand conditional increment or decrement using adb/sbb instructions.
    The default case using setcc followed by the conditional move can be
    done by generic code.  */
diff -ruN gcc-4.0.2-20060112-orig/gcc/config/i386/i386.md gcc-4.0.2-20060112/gcc/config/i386/i386.md
--- gcc-4.0.2-20060112-orig/gcc/config/i386/i386.md	2006-02-06 12:24:52.000000000 +0300
+++ gcc-4.0.2-20060112/gcc/config/i386/i386.md	2006-02-06 13:04:16.000000000 +0300
@@ -114,6 +114,10 @@
    (UNSPEC_LDQQU		76)
    (UNSPEC_MOVDDUP		77)
 
+   ; Generic math support
+   (UNSPEC_IEEE_MIN             111)     ; not commutative
+   (UNSPEC_IEEE_MAX             112)     ; not commutative
+
    ; x87 Floating point
    (UNSPEC_FPATAN		65)
    (UNSPEC_FYL2X		66)
@@ -12541,17 +12545,14 @@
 ;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
 ;; subsequent logical operations are used to imitate conditional moves.
 ;; 0xffffffff is NaN, but not in normalized form, so we can't represent
-;; it directly.  Further holding this value in pseudo register might bring
-;; problem in implicit normalization in spill code.
-;; So we don't define FLOAT_STORE_FLAG_VALUE and create these
-;; instructions after reload by splitting the conditional move patterns.
+;; it directly. 
 
 (define_insn "*sse_setccsf"
   [(set (match_operand:SF 0 "register_operand" "=x")
 	(match_operator:SF 1 "sse_comparison_operator"
 	  [(match_operand:SF 2 "register_operand" "0")
 	   (match_operand:SF 3 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE && reload_completed"
+  "TARGET_SSE"
   "cmp%D1ss\t{%3, %0|%0, %3}"
   [(set_attr "type" "ssecmp")
    (set_attr "mode" "SF")])
@@ -12561,7 +12562,7 @@
 	(match_operator:DF 1 "sse_comparison_operator"
 	  [(match_operand:DF 2 "register_operand" "0")
 	   (match_operand:DF 3 "nonimmediate_operand" "Ym")]))]
-  "TARGET_SSE2 && reload_completed"
+  "TARGET_SSE2"
   "cmp%D1sd\t{%3, %0|%0, %3}"
   [(set_attr "type" "ssecmp")
    (set_attr "mode" "DF")])
@@ -17679,51 +17680,6 @@
   "(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH"
   "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
 
-;; These versions of min/max are aware of the instruction's behavior
-;; wrt -0.0 and NaN inputs.  If we don't care about either, then we
-;; should have used the smin/smax expanders in the first place.
-(define_insn "*movsfcc_1_sse_min"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(if_then_else:SF
-	  (lt:SF (match_operand:SF 1 "register_operand" "0")
-		 (match_operand:SF 2 "nonimmediate_operand" "xm"))
-	  (match_dup 1)
-	  (match_dup 2)))]
-  "TARGET_SSE_MATH"
-  "minss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
-
-(define_insn "*movsfcc_1_sse_max"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(if_then_else:SF
-	  (lt:SF (match_operand:SF 2 "nonimmediate_operand" "xm")
-		 (match_operand:SF 1 "nonimmediate_operand" "0"))
-	  (match_dup 1)
-	  (match_dup 2)))]
-  "TARGET_SSE_MATH"
-  "maxss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
-
-(define_insn_and_split "*movsfcc_1_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x,x,x")
-	(if_then_else:SF
-	  (match_operator:SF 4 "sse_comparison_operator"
-	    [(match_operand:SF 5 "register_operand" "0,0,0")
-	     (match_operand:SF 6 "nonimmediate_operand" "xm,xm,xm")])
-	  (match_operand:SF 2 "reg_or_0_operand" "C,x,x")
-	  (match_operand:SF 3 "reg_or_0_operand" "x,C,x")))
-   (clobber (match_scratch:V4SF 1 "=&x,&x,&x"))]
-  "TARGET_SSE_MATH"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  ix86_split_sse_movcc (operands);
-  DONE;
-})
-
 (define_insn "*movsfcc_1_387"
   [(set (match_operand:SF 0 "register_operand" "=f#r,f#r,r#f,r#f")
 	(if_then_else:SF (match_operator 1 "fcmov_comparison_operator" 
@@ -17748,51 +17704,6 @@
   "(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)"
   "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
 
-;; These versions of min/max are aware of the instruction's behavior
-;; wrt -0.0 and NaN inputs.  If we don't care about either, then we
-;; should have used the smin/smax expanders in the first place.
-(define_insn "*movdfcc_1_sse_min"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-	(if_then_else:DF
-	  (lt:DF (match_operand:DF 1 "register_operand" "0")
-		 (match_operand:DF 2 "nonimmediate_operand" "xm"))
-	  (match_dup 1)
-	  (match_dup 2)))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "minsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
-
-(define_insn "*movdfcc_1_sse_max"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-	(if_then_else:DF
-	  (lt:DF (match_operand:DF 2 "nonimmediate_operand" "xm")
-		 (match_operand:DF 1 "nonimmediate_operand" "0"))
-	  (match_dup 1)
-	  (match_dup 2)))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "maxsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
-
-(define_insn_and_split "*movdfcc_1_sse"
-  [(set (match_operand:DF 0 "register_operand" "=x,x,x")
-	(if_then_else:DF
-	  (match_operator:DF 4 "sse_comparison_operator"
-	    [(match_operand:DF 5 "register_operand" "0,0,0")
-	     (match_operand:DF 6 "nonimmediate_operand" "xm,xm,xm")])
-	  (match_operand:DF 2 "reg_or_0_operand" "C,x,x")
-	  (match_operand:DF 3 "reg_or_0_operand" "x,C,x")))
-   (clobber (match_scratch:V2DF 1 "=&x,&x,&x"))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  ix86_split_sse_movcc (operands);
-  DONE;
-})
-
 (define_insn "*movdfcc_1"
   [(set (match_operand:DF 0 "register_operand" "=f#r,f#r,&r#f,&r#f")
 	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator" 
@@ -17907,6 +17818,52 @@
   [(set_attr "type" "sseadd")
    (set_attr "mode" "DF")])
 
+;; These versions of the min/max patterns implement exactly the operations
+;;   min = (op1 < op2 ? op1 : op2)
+;;   max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "*ieee_sminsf3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+        (unspec:SF [(match_operand:SF 1 "register_operand" "0")
+                    (match_operand:SF 2 "nonimmediate_operand" "xm")]
+                   UNSPEC_IEEE_MIN))]
+  "TARGET_SSE_MATH"
+  "minss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "SF")])
+
+(define_insn "*ieee_smaxsf3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+        (unspec:SF [(match_operand:SF 1 "register_operand" "0")
+                    (match_operand:SF 2 "nonimmediate_operand" "xm")]
+                   UNSPEC_IEEE_MAX))]
+  "TARGET_SSE_MATH"
+  "maxss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "SF")])
+
+(define_insn "*ieee_smindf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+        (unspec:DF [(match_operand:DF 1 "register_operand" "0")
+                    (match_operand:DF 2 "nonimmediate_operand" "xm")]
+                   UNSPEC_IEEE_MIN))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "minsd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "DF")])
+
+(define_insn "*ieee_smaxdf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+        (unspec:DF [(match_operand:DF 1 "register_operand" "0")
+                    (match_operand:DF 2 "nonimmediate_operand" "xm")]
+                   UNSPEC_IEEE_MAX))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "maxsd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "DF")])
+
 ;; Conditional addition patterns
 (define_expand "addqicc"
   [(match_operand:QI 0 "register_operand" "")
diff -ruN gcc-4.0.2-20060112-orig/gcc/config/i386/i386-protos.h gcc-4.0.2-20060112/gcc/config/i386/i386-protos.h
--- gcc-4.0.2-20060112-orig/gcc/config/i386/i386-protos.h	2006-02-06 12:24:52.000000000 +0300
+++ gcc-4.0.2-20060112/gcc/config/i386/i386-protos.h	2006-02-06 12:52:24.000000000 +0300
@@ -103,7 +103,6 @@
 extern int ix86_expand_setcc (enum rtx_code, rtx);
 extern int ix86_expand_int_movcc (rtx[]);
 extern int ix86_expand_fp_movcc (rtx[]);
-extern void ix86_split_sse_movcc (rtx[]);
 extern int ix86_expand_int_addcc (rtx[]);
 extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
 extern void x86_initialize_trampoline (rtx, rtx, rtx);
diff -ruN gcc-4.0.2-20060112-orig/gcc/config/i386/sse.md gcc-4.0.2-20060112/gcc/config/i386/sse.md
--- gcc-4.0.2-20060112-orig/gcc/config/i386/sse.md	2006-02-06 12:24:52.000000000 +0300
+++ gcc-4.0.2-20060112/gcc/config/i386/sse.md	2006-02-06 12:58:22.000000000 +0300
@@ -773,6 +773,48 @@
   [(set_attr "type" "sselog")
    (set_attr "mode" "V4SF")])
 
+;; Also define scalar versions.  These are used for abs, neg, and
+;; conditional move.  Using subregs into vector modes causes register
+;; allocation lossage.  These patterns do not allow memory operands
+;; because the native instructions read the full 128-bits.
+
+(define_insn "*andsf3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+        (and:SF (match_operand:SF 1 "register_operand" "0")
+                (match_operand:SF 2 "register_operand" "x")))]
+  "TARGET_SSE"
+  "andps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog") 
+   (set_attr "mode" "V4SF")]) 
+
+(define_insn "*nandsf3" 
+  [(set (match_operand:SF 0 "register_operand" "=x") 
+        (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
+                (match_operand:SF 2 "register_operand" "x")))] 
+  "TARGET_SSE"
+  "andnps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*iorsf3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+        (ior:SF (match_operand:SF 1 "register_operand" "0")
+                (match_operand:SF 2 "register_operand" "x")))]
+  "TARGET_SSE"
+  "orps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+  
+(define_insn "*xorsf3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+        (xor:SF (match_operand:SF 1 "register_operand" "0")
+                (match_operand:SF 2 "register_operand" "x")))]
+  "TARGET_SSE"
+  "xorps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel single-precision floating point conversion operations
@@ -1624,7 +1666,7 @@
   [(set (match_operand:V2DF 0 "register_operand" "=x")
 	(and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
 		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2 && ix86_binary_operator_ok (AND, V4SFmode, operands)"
+  "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2SFmode, operands)"
   "andpd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sselog")
    (set_attr "mode" "V2DF")])
@@ -1670,6 +1712,47 @@
   [(set_attr "type" "sselog")
    (set_attr "mode" "V2DF")])
 
+;; Also define scalar versions.  These are used for abs, neg, and
+;; conditional move.  Using subregs into vector modes causes regiser
+;; allocation lossage.  These patterns do not allow memory operands
+;; because the native instructions read the full 128-bits.
+
+(define_insn "*anddf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+        (and:DF (match_operand:DF 1 "register_operand" "0")
+                (match_operand:DF 2 "register_operand" "x")))]
+  "TARGET_SSE2"
+  "andpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog") 
+   (set_attr "mode" "V2DF")])
+
+(define_insn "*nanddf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+        (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
+                (match_operand:DF 2 "register_operand" "x")))]
+  "TARGET_SSE2"
+  "andnpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog") 
+   (set_attr "mode" "V2DF")])
+
+(define_insn "*iordf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+        (ior:DF (match_operand:DF 1 "register_operand" "0")
+                (match_operand:DF 2 "register_operand" "x")))]
+  "TARGET_SSE2"
+  "orpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V2DF")])
+  
+(define_insn "*xordf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+        (xor:DF (match_operand:DF 1 "register_operand" "0")
+                (match_operand:DF 2 "register_operand" "x")))]
+  "TARGET_SSE2"
+  "xorpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V2DF")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel double-precision floating point conversion operations

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]