This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Patch, i386]: Commited: macroize fmod and remainder x87 patterns


Hello!

This relatively boring patch macroizes fmod and remainder i387 patterns.

However, as fprem{,1}xf4_i387 patterns are inserted in a tight loop, implementation using float_extend patterns produce quite unoptimal code. To maintain optimal register live information, fprem pattern must be generated as:

gen_insn (gen_fpremxf4_i387 (op1, op2, op1, op2))

where op1 and op2 represent registers holding XFmode value, extended to XFmode before the loop.

When fpremxf4_i387 pattern is implemented as

(define_insn "fpremxf4_i387"
[(set (match_operand:XF 0 "register_operand" "=f")
(unspec:XF [(float_extend:XF (match_operand:DF 2 "register_operand" "0"))
(float_extend:XF (match_operand:DF 3 "register_operand" "1"))]
UNSPEC_FPREM_F))
...


This pattern is generated from fmoddf3 expander as:

gen_insn(gen_fpremxf4_i387(op1, op2, operands[1], operands[2])

Unfortunatelly, in this case life analysis figures out that operands[2] operand dies inside the loop, and a couple of compensating fstp and flds are emitted inside the loop. So, current fpremxf4_i387 pattern (without inherent XFmode extension) produces better code (tighter fmod reduction loop).

Other than a straightforward DFmode and SFmode macroization, this patch relaxes operand constraints a bit. All input operands are moved through extend<mode>xf2 pattern, they can be constrainted ad "general_operands", to match extend<mode>xf2 input operands.

2006-11-25 Uros Bizjak <ubizjak@gmail.com>

       config/i386/i386.md (fpremxf4): Rename to fpremxf4_i387.
       (fprem1xf4): Rename to fprem1xf4_i387.

       (fmodsf3, fmoddf3): Macroize patterns using X87MODEF12 mode macro.
       Rename patterns to fmod<mode>3. Use general_operand operand
       constraint for operands 1 and 2. Use SSE_FLOAT_MODE_P to disable
       patterns for SSE math.
       (remaindersf3, remainderdf3): Ditto.

Patch was regression tested on i686-pc-linux-gnu for c, c++ and fortran. At the end, it has distilled to quite trivial patch, so I guess it can be committed to SVN under obvious rule. (Patch also includes trivial rename of "sqrt<mode>xf2_i387" to "sqrt_extend<mode>xf2_i387", mistakenly left out of my previous patch.)

Uros.
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 119188)
+++ config/i386/i386.md	(working copy)
@@ -15549,7 +15549,7 @@
    (set_attr "mode" "XF")
    (set_attr "athlon_decode" "direct")])
 
-(define_insn "sqrt<mode>xf2_i387"
+(define_insn "sqrt_extend<mode>xf2_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
 	(sqrt:XF
 	  (float_extend:XF
@@ -15582,13 +15582,13 @@
       rtx op0 = gen_reg_rtx (XFmode);
       rtx op1 = force_reg (<MODE>mode, operands[1]);
 
-      emit_insn (gen_sqrt<mode>xf2_i387 (op0, op1));
+      emit_insn (gen_sqrt_extend<mode>xf2_i387 (op0, op1));
       emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
       DONE;
    }
 })
 
-(define_insn "fpremxf4"
+(define_insn "fpremxf4_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
 	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
 		    (match_operand:XF 3 "register_operand" "1")]
@@ -15603,73 +15603,49 @@
   [(set_attr "type" "fpspc")
    (set_attr "mode" "XF")])
 
-(define_expand "fmodsf3"
-  [(use (match_operand:SF 0 "register_operand" ""))
-   (use (match_operand:SF 1 "register_operand" ""))
-   (use (match_operand:SF 2 "register_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
+(define_expand "fmodxf3"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))
+   (use (match_operand:XF 2 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387"
 {
   rtx label = gen_label_rtx ();
 
-  rtx op1 = gen_reg_rtx (XFmode);
-  rtx op2 = gen_reg_rtx (XFmode);
-
-  emit_insn(gen_extendsfxf2 (op1, operands[1]));
-  emit_insn(gen_extendsfxf2 (op2, operands[2]));
-
   emit_label (label);
 
-  emit_insn (gen_fpremxf4 (op1, op2, op1, op2));
+  emit_insn (gen_fpremxf4_i387 (operands[1], operands[2],
+				operands[1], operands[2]));
   ix86_emit_fp_unordered_jump (label);
 
-  emit_insn (gen_truncxfsf2_i387_noop_unspec (operands[0], op1));
+  emit_move_insn (operands[0], operands[1]);
   DONE;
 })
 
-(define_expand "fmoddf3"
-  [(use (match_operand:DF 0 "register_operand" ""))
-   (use (match_operand:DF 1 "register_operand" ""))
-   (use (match_operand:DF 2 "register_operand" ""))]
+(define_expand "fmod<mode>3"
+  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
+   (use (match_operand:X87MODEF12 1 "general_operand" ""))
+   (use (match_operand:X87MODEF12 2 "general_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
-   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)"
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)"
 {
   rtx label = gen_label_rtx ();
 
   rtx op1 = gen_reg_rtx (XFmode);
   rtx op2 = gen_reg_rtx (XFmode);
 
-  emit_insn (gen_extenddfxf2 (op1, operands[1]));
-  emit_insn (gen_extenddfxf2 (op2, operands[2]));
+  emit_insn(gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn(gen_extend<mode>xf2 (op2, operands[2]));
 
   emit_label (label);
-
-  emit_insn (gen_fpremxf4 (op1, op2, op1, op2));
+  emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
   ix86_emit_fp_unordered_jump (label);
 
-  emit_insn (gen_truncxfdf2_i387_noop_unspec (operands[0], op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
   DONE;
 })
 
-(define_expand "fmodxf3"
-  [(use (match_operand:XF 0 "register_operand" ""))
-   (use (match_operand:XF 1 "register_operand" ""))
-   (use (match_operand:XF 2 "register_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387"
-{
-  rtx label = gen_label_rtx ();
-
-  emit_label (label);
-
-  emit_insn (gen_fpremxf4 (operands[1], operands[2],
-			   operands[1], operands[2]));
-  ix86_emit_fp_unordered_jump (label);
-
-  emit_move_insn (operands[0], operands[1]);
-  DONE;
-})
-
-(define_insn "fprem1xf4"
+(define_insn "fprem1xf4_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
 	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
 		    (match_operand:XF 3 "register_operand" "1")]
@@ -15684,72 +15660,49 @@
   [(set_attr "type" "fpspc")
    (set_attr "mode" "XF")])
 
-(define_expand "remaindersf3"
-  [(use (match_operand:SF 0 "register_operand" ""))
-   (use (match_operand:SF 1 "register_operand" ""))
-   (use (match_operand:SF 2 "register_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
+(define_expand "remainderxf3"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))
+   (use (match_operand:XF 2 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387"
 {
   rtx label = gen_label_rtx ();
 
-  rtx op1 = gen_reg_rtx (XFmode);
-  rtx op2 = gen_reg_rtx (XFmode);
-
-  emit_insn(gen_extendsfxf2 (op1, operands[1]));
-  emit_insn(gen_extendsfxf2 (op2, operands[2]));
-
   emit_label (label);
 
-  emit_insn (gen_fprem1xf4 (op1, op2, op1, op2));
+  emit_insn (gen_fprem1xf4_i387 (operands[1], operands[2],
+				 operands[1], operands[2]));
   ix86_emit_fp_unordered_jump (label);
 
-  emit_insn (gen_truncxfsf2_i387_noop_unspec (operands[0], op1));
+  emit_move_insn (operands[0], operands[1]);
   DONE;
 })
 
-(define_expand "remainderdf3"
-  [(use (match_operand:DF 0 "register_operand" ""))
-   (use (match_operand:DF 1 "register_operand" ""))
-   (use (match_operand:DF 2 "register_operand" ""))]
+(define_expand "remainder<mode>3"
+  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
+   (use (match_operand:X87MODEF12 1 "general_operand" ""))
+   (use (match_operand:X87MODEF12 2 "general_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
-   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)"
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)"
 {
   rtx label = gen_label_rtx ();
 
   rtx op1 = gen_reg_rtx (XFmode);
   rtx op2 = gen_reg_rtx (XFmode);
 
-  emit_insn (gen_extenddfxf2 (op1, operands[1]));
-  emit_insn (gen_extenddfxf2 (op2, operands[2]));
+  emit_insn(gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn(gen_extend<mode>xf2 (op2, operands[2]));
 
   emit_label (label);
 
-  emit_insn (gen_fprem1xf4 (op1, op2, op1, op2));
+  emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
   ix86_emit_fp_unordered_jump (label);
 
-  emit_insn (gen_truncxfdf2_i387_noop_unspec (operands[0], op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
   DONE;
 })
 
-(define_expand "remainderxf3"
-  [(use (match_operand:XF 0 "register_operand" ""))
-   (use (match_operand:XF 1 "register_operand" ""))
-   (use (match_operand:XF 2 "register_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387"
-{
-  rtx label = gen_label_rtx ();
-
-  emit_label (label);
-
-  emit_insn (gen_fprem1xf4 (operands[1], operands[2],
-			    operands[1], operands[2]));
-  ix86_emit_fp_unordered_jump (label);
-
-  emit_move_insn (operands[0], operands[1]);
-  DONE;
-})
-
 (define_insn "*sindf2"
   [(set (match_operand:DF 0 "register_operand" "=f")
 	(unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_SIN))]

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]