This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
SSE min/max instruction support
- To: rth at cygnus dot com, gcc-patches at gcc dot gnu dot org, patches at x86-64 dot org
- Subject: SSE min/max instruction support
- From: Jan Hubicka <jh at suse dot cz>
- Date: Sat, 24 Feb 2001 02:20:13 +0100
Hi
This patch implements the simple, but common case of SSE conditional moves -
the support for min and max instructions. Gets 10% speedup on byte benchark -
whoo hoo, how bad benchmark that package is :)
Sat Feb 24 02:22:47 CET 2001 Jan Hubicka <jh@suse.cz>
* i386.md (minsf*, mindf*, maxsf*, maxdf*): New instruction patterns and
splitters.
* i386.c (ix86_expand_fp_movcc): Recognize min and max instructions.
*** /p1/x86-64/egcs/gcc/config/i386/i386.c Fri Feb 16 18:36:32 2001
--- i386.c Sat Feb 24 02:17:00 2001
*************** ix86_expand_fp_movcc (operands)
*** 5947,5952 ****
--- 6065,6119 ----
rtx tmp;
rtx compare_op, second_test, bypass_test;
+ /* For SF/DFmode conditional moves based on comparisons
+ in same mode, we may want to use SSE min/max instructions. */
+ if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
+ || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
+ && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0]))
+ {
+ rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
+ code = GET_CODE (operands[1]);
+
+ /* See if we have (cross) match between comparison operands and
+ conditional move operands. */
+ if (rtx_equal_p (operands[2], op1))
+ {
+ rtx tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ code = reverse_condition_maybe_unordered (code);
+ }
+ if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
+ {
+ /* Check for min operation. */
+ if (code == LT
+ || (!TARGET_IEEE_FP && (code == LE || code == UNLT || code == UNLE)))
+ {
+ operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
+ if (memory_operand (op0, VOIDmode))
+ op0 = force_reg (GET_MODE (operands[0]), op0);
+ if (GET_MODE (operands[0]) == SFmode)
+ emit_insn (gen_minsf (operands[0], op0, op1));
+ else
+ emit_insn (gen_mindf (operands[0], op0, op1));
+ return 1;
+ }
+ /* Check for max operation. */
+ if (code == GT
+ || (!TARGET_IEEE_FP && (code == GE || code == UNGT || code == UNGE)))
+ {
+ operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
+ if (memory_operand (op0, VOIDmode))
+ op0 = force_reg (GET_MODE (operands[0]), op0);
+ if (GET_MODE (operands[0]) == SFmode)
+ emit_insn (gen_maxsf (operands[0], op0, op1));
+ else
+ emit_insn (gen_maxdf (operands[0], op0, op1));
+ return 1;
+ }
+ }
+ }
+
/* The floating point conditional move instructions don't directly
support conditions resulting from a signed integer comparison. */
*** /p1/x86-64/egcs/gcc/config/i386/i386.md Fri Feb 16 18:34:23 2001
--- i386.md Sat Feb 24 02:23:59 2001
***************
*** 12098,12103 ****
--- 12235,12486 ----
fcmov%f1\\t{%3, %0|%0, %3}"
[(set_attr "type" "fcmov")
(set_attr "mode" "XF")])
+
+ (define_insn "minsf"
+ [(set (match_operand:SF 0 "register_operand" "=x#f,f#x,f#x")
+ (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "0,0,f#x")
+ (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x,0"))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "TARGET_SSE && TARGET_IEEE_FP"
+ "#")
+
+ (define_insn "*minsf_nonieee"
+ [(set (match_operand:SF 0 "register_operand" "=x#f,f#x")
+ (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "%0,0")
+ (match_operand:SF 2 "nonimmediate_operand" "xm#f,fm#x"))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "TARGET_SSE && !TARGET_IEEE_FP"
+ "#")
+
+ (define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "")
+ (match_operand:SF 2 "nonimmediate_operand" ""))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "SSE_REG_P (operands[0]) && reload_completed"
+ [(set (match_dup 0)
+ (if_then_else:SF (lt (match_dup 1)
+ (match_dup 2))
+ (match_dup 1)
+ (match_dup 2)))])
+
+ ;; We can't represent the LT test directly. Do this by swapping the operands. */
+ (define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "")
+ (match_operand:SF 2 "register_operand" ""))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "FP_REG_P (operands[0]) && reload_completed"
+ [(set (reg:CCFP 17)
+ (compare:CCFP (match_dup 2)
+ (match_dup 1)))
+ (set (match_dup 0)
+ (if_then_else:SF (ge (reg:CCFP 17) (const_int 0))
+ (match_dup 1)
+ (match_dup 2)))])
+
+ (define_insn "*minsf_sse"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (match_dup 2)))]
+ "TARGET_SSE && reload_completed"
+ "minss\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")])
+
+ (define_insn "mindf"
+ [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y,f#Y")
+ (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "0,0,f#Y")
+ (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y,0"))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "TARGET_SSE2 && TARGET_IEEE_FP"
+ "#")
+
+ (define_insn "*mindf_nonieee"
+ [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y")
+ (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "%0,0")
+ (match_operand:DF 2 "nonimmediate_operand" "Ym#f,fm#Y"))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "TARGET_SSE2 && !TARGET_IEEE_FP"
+ "#")
+
+ (define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "")
+ (match_operand:DF 2 "nonimmediate_operand" ""))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "SSE_REG_P (operands[0]) && reload_completed"
+ [(set (match_dup 0)
+ (if_then_else:DF (lt (match_dup 1)
+ (match_dup 2))
+ (match_dup 1)
+ (match_dup 2)))])
+
+ ;; We can't represent the LT test directly. Do this by swapping the operands. */
+ (define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "")
+ (match_operand:DF 2 "register_operand" ""))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "FP_REG_P (operands[0]) && reload_completed"
+ [(set (reg:CCFP 17)
+ (compare:CCFP (match_dup 2)
+ (match_dup 2)))
+ (set (match_dup 0)
+ (if_then_else:DF (ge (reg:CCFP 17) (const_int 0))
+ (match_dup 1)
+ (match_dup 2)))])
+
+ (define_insn "*mindf_sse"
+ [(set (match_operand:DF 0 "register_operand" "=Y")
+ (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "nonimmediate_operand" "Ym"))
+ (match_dup 1)
+ (match_dup 2)))]
+ "TARGET_SSE2 && reload_completed"
+ "minsd\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "DF")])
+
+ (define_insn "maxsf"
+ [(set (match_operand:SF 0 "register_operand" "=x#f,f#x,f#x")
+ (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "0,0,f#x")
+ (match_operand:SF 2 "nonimmediate_operand" "xm#f,fm#x,0"))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "TARGET_SSE && TARGET_IEEE_FP"
+ "#")
+
+ (define_insn "*maxsf_nonieee"
+ [(set (match_operand:SF 0 "register_operand" "=x#f,f#x")
+ (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "%0,0")
+ (match_operand:SF 2 "nonimmediate_operand" "xm#f,fm#x"))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "TARGET_SSE && !TARGET_IEEE_FP"
+ "#")
+
+ (define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "")
+ (match_operand:SF 2 "nonimmediate_operand" ""))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "SSE_REG_P (operands[0]) && reload_completed"
+ [(set (match_dup 0)
+ (if_then_else:SF (gt (match_dup 1)
+ (match_dup 2))
+ (match_dup 1)
+ (match_dup 2)))])
+
+ (define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "")
+ (match_operand:SF 2 "register_operand" ""))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "FP_REG_P (operands[0]) && reload_completed"
+ [(set (reg:CCFP 17)
+ (compare:CCFP (match_dup 1)
+ (match_dup 2)))
+ (set (match_dup 0)
+ (if_then_else:SF (gt (reg:CCFP 17) (const_int 0))
+ (match_dup 1)
+ (match_dup 2)))])
+
+ (define_insn "*maxsf_sse"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (match_dup 2)))]
+ "TARGET_SSE && reload_completed"
+ "maxss\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")])
+
+ (define_insn "maxdf"
+ [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y,f#Y")
+ (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "0,0,f#Y")
+ (match_operand:DF 2 "nonimmediate_operand" "Ym#f,fm#Y,0"))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "TARGET_SSE2 && TARGET_IEEE_FP"
+ "#")
+
+ (define_insn "*maxdf_nonieee"
+ [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y")
+ (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "%0,0")
+ (match_operand:DF 2 "nonimmediate_operand" "Ym#f,fm#Y"))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "TARGET_SSE2 && !TARGET_IEEE_FP"
+ "#")
+
+ (define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "")
+ (match_operand:DF 2 "nonimmediate_operand" ""))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "SSE_REG_P (operands[0]) && reload_completed"
+ [(set (match_dup 0)
+ (if_then_else:DF (gt (match_dup 1)
+ (match_dup 2))
+ (match_dup 1)
+ (match_dup 2)))])
+
+ (define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "")
+ (match_operand:DF 2 "register_operand" ""))
+ (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))]
+ "FP_REG_P (operands[0]) && reload_completed"
+ [(set (reg:CCFP 17)
+ (compare:CCFP (match_dup 1)
+ (match_dup 2)))
+ (set (match_dup 0)
+ (if_then_else:DF (gt (reg:CCFP 17) (const_int 0))
+ (match_dup 1)
+ (match_dup 2)))])
+
+ (define_insn "*maxdf_sse"
+ [(set (match_operand:DF 0 "register_operand" "=Y")
+ (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "nonimmediate_operand" "Ym"))
+ (match_dup 1)
+ (match_dup 2)))]
+ "TARGET_SSE2 && reload_completed"
+ "maxsd\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "DF")])
;; Misc patterns (?)