This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, i386]: Fix PR target/29852: Use fprem and fprem1 insns for SSE math
- From: Uros Bizjak <ubizjak at gmail dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Richard Guenther <rguenther at suse dot de>
- Date: Wed, 29 Nov 2006 23:02:33 +0100
- Subject: [PATCH, i386]: Fix PR target/29852: Use fprem and fprem1 insns for SSE math
Hello!
This patch implements fmod and remainder intrinsics using x87
instructions also for SSE math. In order to shorten truncation sequences
and x87->SSE reg reloads, truncxfsf2_mixed and truncxfdf2_mixed patterns
have to be enabled also for non-mixed SSE/387 math.
The testcase from PR:
double foo(double a, double b)
{
double x = fmod(a, 1.1);
return x + b;
}
compiles for x86_64 target to (-O2 -mno-math-errno for clarity):
movsd %xmm0, -16(%rsp)
fldl -16(%rsp)
fldl .LC0(%rip)
fxch %st(1)
.L2:
fprem
fnstsw %ax
testb $4, %ah
jne .L2
fstp %st(1)
fstpl -8(%rsp) <<- this is the truncation insn
movsd -8(%rsp), %xmm0
addsd %xmm1, %xmm0
ret
As shown in the PR, this patch executed synthetic fmod() testcase more
than 4 times faster than unpatched gcc and almost 2 times faster than icc.
2006-11-29 Uros Bizjak <ubizjak@gmail.com>
PR target/29852
config/i386/i386.md (*truncxfsf2_mixed, *truncxfdf2_mixed): Enable
insn patterns for TARGET_80387.
(*truncxfsf2_i387, *truncxfdf2_i387): Remove.
(*truncxfsf2_i387_1): Rename to *truncxfsf2_i387.
(*truncxfdf2_i387_1): Rename to *truncxfdf2_i387.
(fmod<mode>3, remainder<mode>3): Enable expaders for SSE math.
Generate truncxf<mode>2 insn patterns for strict SSE math.
Patch was bootstrapped on x86_64-pc-linux-gnu and regression tested for
c, c++ and fortran.
OK for mainline?
Uros.
Index: i386.md
===================================================================
--- i386.md (revision 119334)
+++ i386.md (working copy)
@@ -3941,7 +3941,7 @@
(float_truncate:SF
(match_operand:XF 1 "register_operand" "f,f,f,f")))
(clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))]
- "TARGET_MIX_SSE_I387"
+ "TARGET_80387"
{
gcc_assert (!which_alternative);
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
@@ -3962,23 +3962,6 @@
(set_attr "mode" "SF")])
(define_insn "*truncxfsf2_i387"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?r")
- (float_truncate:SF
- (match_operand:XF 1 "register_operand" "f,f,f")))
- (clobber (match_operand:SF 2 "memory_operand" "=X,m,m"))]
- "TARGET_80387"
-{
- gcc_assert (!which_alternative);
- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "fstp%z0\t%y0";
- else
- return "fst%z0\t%y0";
-}
- [(set_attr "type" "fmov,multi,multi")
- (set_attr "unit" "*,i387,i387")
- (set_attr "mode" "SF")])
-
-(define_insn "*truncxfsf2_i387_1"
[(set (match_operand:SF 0 "memory_operand" "=m")
(float_truncate:SF
(match_operand:XF 1 "register_operand" "f")))]
@@ -4037,7 +4020,7 @@
(float_truncate:DF
(match_operand:XF 1 "register_operand" "f,f,f,f")))
(clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))]
- "TARGET_SSE2 && TARGET_MIX_SSE_I387"
+ "TARGET_80387"
{
gcc_assert (!which_alternative);
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
@@ -4058,23 +4041,6 @@
(set_attr "mode" "DF")])
(define_insn "*truncxfdf2_i387"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?r")
- (float_truncate:DF
- (match_operand:XF 1 "register_operand" "f,f,f")))
- (clobber (match_operand:DF 2 "memory_operand" "=X,m,m"))]
- "TARGET_80387"
-{
- gcc_assert (!which_alternative);
- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "fstp%z0\t%y0";
- else
- return "fst%z0\t%y0";
-}
- [(set_attr "type" "fmov,multi,multi")
- (set_attr "unit" "*,i387,i387")
- (set_attr "mode" "DF")])
-
-(define_insn "*truncxfdf2_i387_1"
[(set (match_operand:DF 0 "memory_operand" "=m")
(float_truncate:DF
(match_operand:XF 1 "register_operand" "f")))]
@@ -15667,23 +15633,27 @@
[(use (match_operand:X87MODEF12 0 "register_operand" ""))
(use (match_operand:X87MODEF12 1 "general_operand" ""))
(use (match_operand:X87MODEF12 2 "general_operand" ""))]
- "TARGET_USE_FANCY_MATH_387
- && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
- || TARGET_MIX_SSE_I387)"
+ "TARGET_USE_FANCY_MATH_387"
{
rtx label = gen_label_rtx ();
rtx op1 = gen_reg_rtx (XFmode);
rtx op2 = gen_reg_rtx (XFmode);
- emit_insn(gen_extend<mode>xf2 (op1, operands[1]));
- emit_insn(gen_extend<mode>xf2 (op2, operands[2]));
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
emit_label (label);
emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
ix86_emit_fp_unordered_jump (label);
- emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
+ /* Truncate the result properly for strict SSE math. */
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !TARGET_MIX_SSE_I387)
+ emit_insn (gen_truncxf<mode>2 (operands[0], op1));
+ else
+ emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
+
DONE;
})
@@ -15724,24 +15694,28 @@
[(use (match_operand:X87MODEF12 0 "register_operand" ""))
(use (match_operand:X87MODEF12 1 "general_operand" ""))
(use (match_operand:X87MODEF12 2 "general_operand" ""))]
- "TARGET_USE_FANCY_MATH_387
- && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
- || TARGET_MIX_SSE_I387)"
+ "TARGET_USE_FANCY_MATH_387"
{
rtx label = gen_label_rtx ();
rtx op1 = gen_reg_rtx (XFmode);
rtx op2 = gen_reg_rtx (XFmode);
- emit_insn(gen_extend<mode>xf2 (op1, operands[1]));
- emit_insn(gen_extend<mode>xf2 (op2, operands[2]));
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
emit_label (label);
emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
ix86_emit_fp_unordered_jump (label);
- emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
+ /* Truncate the result properly for strict SSE math. */
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !TARGET_MIX_SSE_I387)
+ emit_insn (gen_truncxf<mode>2 (operands[0], op1));
+ else
+ emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
+
DONE;
})