This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[Patch, i386]: Commited: macroize fmod and remainder x87 patterns
- From: Uros Bizjak <ubizjak at gmail dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Sat, 25 Nov 2006 16:02:16 +0100
- Subject: [Patch, i386]: Commited: macroize fmod and remainder x87 patterns
Hello!
This relatively boring patch macroizes fmod and remainder i387 patterns.
However, as fprem{,1}xf4_i387 patterns are inserted in a tight loop,
implementation using float_extend patterns produce quite unoptimal code.
To maintain optimal register live information, fprem pattern must be
generated as:
gen_insn (gen_fpremxf4_i387 (op1, op2, op1, op2))
where op1 and op2 represent registers holding XFmode value, extended to
XFmode before the loop.
When fpremxf4_i387 pattern is implemented as
(define_insn "fpremxf4_i387"
[(set (match_operand:XF 0 "register_operand" "=f")
(unspec:XF [(float_extend:XF (match_operand:DF 2
"register_operand" "0"))
(float_extend:XF (match_operand:DF 3
"register_operand" "1"))]
UNSPEC_FPREM_F))
...
This pattern is generated from fmoddf3 expander as:
gen_insn(gen_fpremxf4_i387(op1, op2, operands[1], operands[2])
Unfortunatelly, in this case life analysis figures out that operands[2]
operand dies inside the loop, and a couple of compensating fstp and flds
are emitted inside the loop. So, current fpremxf4_i387 pattern (without
inherent XFmode extension) produces better code (tighter fmod reduction
loop).
Other than a straightforward DFmode and SFmode macroization, this patch
relaxes operand constraints a bit. All input operands are moved through
extend<mode>xf2 pattern, they can be constrainted ad "general_operands",
to match extend<mode>xf2 input operands.
2006-11-25 Uros Bizjak <ubizjak@gmail.com>
config/i386/i386.md (fpremxf4): Rename to fpremxf4_i387.
(fprem1xf4): Rename to fprem1xf4_i387.
(fmodsf3, fmoddf3): Macroize patterns using X87MODEF12 mode macro.
Rename patterns to fmod<mode>3. Use general_operand operand
constraint for operands 1 and 2. Use SSE_FLOAT_MODE_P to disable
patterns for SSE math.
(remaindersf3, remainderdf3): Ditto.
Patch was regression tested on i686-pc-linux-gnu for c, c++ and fortran.
At the end, it has distilled to quite trivial patch, so I guess it can
be committed to SVN under obvious rule. (Patch also includes trivial
rename of "sqrt<mode>xf2_i387" to "sqrt_extend<mode>xf2_i387",
mistakenly left out of my previous patch.)
Uros.
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 119188)
+++ config/i386/i386.md (working copy)
@@ -15549,7 +15549,7 @@
(set_attr "mode" "XF")
(set_attr "athlon_decode" "direct")])
-(define_insn "sqrt<mode>xf2_i387"
+(define_insn "sqrt_extend<mode>xf2_i387"
[(set (match_operand:XF 0 "register_operand" "=f")
(sqrt:XF
(float_extend:XF
@@ -15582,13 +15582,13 @@
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = force_reg (<MODE>mode, operands[1]);
- emit_insn (gen_sqrt<mode>xf2_i387 (op0, op1));
+ emit_insn (gen_sqrt_extend<mode>xf2_i387 (op0, op1));
emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
DONE;
}
})
-(define_insn "fpremxf4"
+(define_insn "fpremxf4_i387"
[(set (match_operand:XF 0 "register_operand" "=f")
(unspec:XF [(match_operand:XF 2 "register_operand" "0")
(match_operand:XF 3 "register_operand" "1")]
@@ -15603,73 +15603,49 @@
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")])
-(define_expand "fmodsf3"
- [(use (match_operand:SF 0 "register_operand" ""))
- (use (match_operand:SF 1 "register_operand" ""))
- (use (match_operand:SF 2 "register_operand" ""))]
- "TARGET_USE_FANCY_MATH_387
- && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
+(define_expand "fmodxf3"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))
+ (use (match_operand:XF 2 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387"
{
rtx label = gen_label_rtx ();
- rtx op1 = gen_reg_rtx (XFmode);
- rtx op2 = gen_reg_rtx (XFmode);
-
- emit_insn(gen_extendsfxf2 (op1, operands[1]));
- emit_insn(gen_extendsfxf2 (op2, operands[2]));
-
emit_label (label);
- emit_insn (gen_fpremxf4 (op1, op2, op1, op2));
+ emit_insn (gen_fpremxf4_i387 (operands[1], operands[2],
+ operands[1], operands[2]));
ix86_emit_fp_unordered_jump (label);
- emit_insn (gen_truncxfsf2_i387_noop_unspec (operands[0], op1));
+ emit_move_insn (operands[0], operands[1]);
DONE;
})
-(define_expand "fmoddf3"
- [(use (match_operand:DF 0 "register_operand" ""))
- (use (match_operand:DF 1 "register_operand" ""))
- (use (match_operand:DF 2 "register_operand" ""))]
+(define_expand "fmod<mode>3"
+ [(use (match_operand:X87MODEF12 0 "register_operand" ""))
+ (use (match_operand:X87MODEF12 1 "general_operand" ""))
+ (use (match_operand:X87MODEF12 2 "general_operand" ""))]
"TARGET_USE_FANCY_MATH_387
- && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)"
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)"
{
rtx label = gen_label_rtx ();
rtx op1 = gen_reg_rtx (XFmode);
rtx op2 = gen_reg_rtx (XFmode);
- emit_insn (gen_extenddfxf2 (op1, operands[1]));
- emit_insn (gen_extenddfxf2 (op2, operands[2]));
+ emit_insn(gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn(gen_extend<mode>xf2 (op2, operands[2]));
emit_label (label);
-
- emit_insn (gen_fpremxf4 (op1, op2, op1, op2));
+ emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
ix86_emit_fp_unordered_jump (label);
- emit_insn (gen_truncxfdf2_i387_noop_unspec (operands[0], op1));
+ emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
DONE;
})
-(define_expand "fmodxf3"
- [(use (match_operand:XF 0 "register_operand" ""))
- (use (match_operand:XF 1 "register_operand" ""))
- (use (match_operand:XF 2 "register_operand" ""))]
- "TARGET_USE_FANCY_MATH_387"
-{
- rtx label = gen_label_rtx ();
-
- emit_label (label);
-
- emit_insn (gen_fpremxf4 (operands[1], operands[2],
- operands[1], operands[2]));
- ix86_emit_fp_unordered_jump (label);
-
- emit_move_insn (operands[0], operands[1]);
- DONE;
-})
-
-(define_insn "fprem1xf4"
+(define_insn "fprem1xf4_i387"
[(set (match_operand:XF 0 "register_operand" "=f")
(unspec:XF [(match_operand:XF 2 "register_operand" "0")
(match_operand:XF 3 "register_operand" "1")]
@@ -15684,72 +15660,49 @@
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")])
-(define_expand "remaindersf3"
- [(use (match_operand:SF 0 "register_operand" ""))
- (use (match_operand:SF 1 "register_operand" ""))
- (use (match_operand:SF 2 "register_operand" ""))]
- "TARGET_USE_FANCY_MATH_387
- && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
+(define_expand "remainderxf3"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))
+ (use (match_operand:XF 2 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387"
{
rtx label = gen_label_rtx ();
- rtx op1 = gen_reg_rtx (XFmode);
- rtx op2 = gen_reg_rtx (XFmode);
-
- emit_insn(gen_extendsfxf2 (op1, operands[1]));
- emit_insn(gen_extendsfxf2 (op2, operands[2]));
-
emit_label (label);
- emit_insn (gen_fprem1xf4 (op1, op2, op1, op2));
+ emit_insn (gen_fprem1xf4_i387 (operands[1], operands[2],
+ operands[1], operands[2]));
ix86_emit_fp_unordered_jump (label);
- emit_insn (gen_truncxfsf2_i387_noop_unspec (operands[0], op1));
+ emit_move_insn (operands[0], operands[1]);
DONE;
})
-(define_expand "remainderdf3"
- [(use (match_operand:DF 0 "register_operand" ""))
- (use (match_operand:DF 1 "register_operand" ""))
- (use (match_operand:DF 2 "register_operand" ""))]
+(define_expand "remainder<mode>3"
+ [(use (match_operand:X87MODEF12 0 "register_operand" ""))
+ (use (match_operand:X87MODEF12 1 "general_operand" ""))
+ (use (match_operand:X87MODEF12 2 "general_operand" ""))]
"TARGET_USE_FANCY_MATH_387
- && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)"
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)"
{
rtx label = gen_label_rtx ();
rtx op1 = gen_reg_rtx (XFmode);
rtx op2 = gen_reg_rtx (XFmode);
- emit_insn (gen_extenddfxf2 (op1, operands[1]));
- emit_insn (gen_extenddfxf2 (op2, operands[2]));
+ emit_insn(gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn(gen_extend<mode>xf2 (op2, operands[2]));
emit_label (label);
- emit_insn (gen_fprem1xf4 (op1, op2, op1, op2));
+ emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
ix86_emit_fp_unordered_jump (label);
- emit_insn (gen_truncxfdf2_i387_noop_unspec (operands[0], op1));
+ emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
DONE;
})
-(define_expand "remainderxf3"
- [(use (match_operand:XF 0 "register_operand" ""))
- (use (match_operand:XF 1 "register_operand" ""))
- (use (match_operand:XF 2 "register_operand" ""))]
- "TARGET_USE_FANCY_MATH_387"
-{
- rtx label = gen_label_rtx ();
-
- emit_label (label);
-
- emit_insn (gen_fprem1xf4 (operands[1], operands[2],
- operands[1], operands[2]));
- ix86_emit_fp_unordered_jump (label);
-
- emit_move_insn (operands[0], operands[1]);
- DONE;
-})
-
(define_insn "*sindf2"
[(set (match_operand:DF 0 "register_operand" "=f")
(unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_SIN))]