This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, i386]: Fix PR target/29852: Use fprem and fprem1 insns for SSE math


Hello!

This patch implements fmod and remainder intrinsics using x87 instructions also for SSE math. In order to shorten truncation sequences and x87->SSE reg reloads, truncxfsf2_mixed and truncxfdf2_mixed patterns have to be enabled also for non-mixed SSE/387 math.

The testcase from PR:

double foo(double a, double b)
{
 double x = fmod(a, 1.1);
 return x + b;
}

compiles for x86_64 target to (-O2 -mno-math-errno for clarity):

       movsd   %xmm0, -16(%rsp)
       fldl    -16(%rsp)
       fldl    .LC0(%rip)
       fxch    %st(1)
.L2:
       fprem
       fnstsw  %ax
       testb   $4, %ah
       jne     .L2
       fstp    %st(1)
       fstpl   -8(%rsp)		<<- this is the truncation insn
       movsd   -8(%rsp), %xmm0
       addsd   %xmm1, %xmm0
       ret

As shown in the PR, this patch executed synthetic fmod() testcase more than 4 times faster than unpatched gcc and almost 2 times faster than icc.

2006-11-29 Uros Bizjak <ubizjak@gmail.com>

       PR target/29852
       config/i386/i386.md (*truncxfsf2_mixed, *truncxfdf2_mixed): Enable
       insn patterns for TARGET_80387.
       (*truncxfsf2_i387, *truncxfdf2_i387): Remove.
       (*truncxfsf2_i387_1): Rename to *truncxfsf2_i387.
       (*truncxfdf2_i387_1): Rename to *truncxfdf2_i387.
       (fmod<mode>3, remainder<mode>3): Enable expaders for SSE math.
       Generate truncxf<mode>2 insn patterns for strict SSE math.

Patch was bootstrapped on x86_64-pc-linux-gnu and regression tested for c, c++ and fortran.

OK for mainline?

Uros.
Index: i386.md
===================================================================
--- i386.md	(revision 119334)
+++ i386.md	(working copy)
@@ -3941,7 +3941,7 @@
 	(float_truncate:SF
 	 (match_operand:XF 1 "register_operand" "f,f,f,f")))
    (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))]
-  "TARGET_MIX_SSE_I387"
+  "TARGET_80387"
 {
   gcc_assert (!which_alternative);
   if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
@@ -3962,23 +3962,6 @@
    (set_attr "mode" "SF")])
 
 (define_insn "*truncxfsf2_i387"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?r")
-	(float_truncate:SF
-	 (match_operand:XF 1 "register_operand" "f,f,f")))
-   (clobber (match_operand:SF 2 "memory_operand" "=X,m,m"))]
-  "TARGET_80387"
-{
-  gcc_assert (!which_alternative);
-  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-    return "fstp%z0\t%y0";
-   else
-     return "fst%z0\t%y0";
-}
-  [(set_attr "type" "fmov,multi,multi")
-   (set_attr "unit" "*,i387,i387")
-   (set_attr "mode" "SF")])
-
-(define_insn "*truncxfsf2_i387_1"
   [(set (match_operand:SF 0 "memory_operand" "=m")
 	(float_truncate:SF
 	 (match_operand:XF 1 "register_operand" "f")))]
@@ -4037,7 +4020,7 @@
 	(float_truncate:DF
 	 (match_operand:XF 1 "register_operand" "f,f,f,f")))
    (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))]
-  "TARGET_SSE2 && TARGET_MIX_SSE_I387"
+  "TARGET_80387"
 {
   gcc_assert (!which_alternative);
   if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
@@ -4058,23 +4041,6 @@
    (set_attr "mode" "DF")])
 
 (define_insn "*truncxfdf2_i387"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?r")
-	(float_truncate:DF
-	 (match_operand:XF 1 "register_operand" "f,f,f")))
-   (clobber (match_operand:DF 2 "memory_operand" "=X,m,m"))]
-  "TARGET_80387"
-{
-  gcc_assert (!which_alternative);
-  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-    return "fstp%z0\t%y0";
-  else
-    return "fst%z0\t%y0";
-}
-  [(set_attr "type" "fmov,multi,multi")
-   (set_attr "unit" "*,i387,i387")
-   (set_attr "mode" "DF")])
-
-(define_insn "*truncxfdf2_i387_1"
   [(set (match_operand:DF 0 "memory_operand" "=m")
 	(float_truncate:DF
 	  (match_operand:XF 1 "register_operand" "f")))]
@@ -15667,23 +15633,27 @@
   [(use (match_operand:X87MODEF12 0 "register_operand" ""))
    (use (match_operand:X87MODEF12 1 "general_operand" ""))
    (use (match_operand:X87MODEF12 2 "general_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-       || TARGET_MIX_SSE_I387)"
+  "TARGET_USE_FANCY_MATH_387"
 {
   rtx label = gen_label_rtx ();
 
   rtx op1 = gen_reg_rtx (XFmode);
   rtx op2 = gen_reg_rtx (XFmode);
 
-  emit_insn(gen_extend<mode>xf2 (op1, operands[1]));
-  emit_insn(gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
 
   emit_label (label);
   emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
   ix86_emit_fp_unordered_jump (label);
 
-  emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
+  /* Truncate the result properly for strict SSE math.  */
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !TARGET_MIX_SSE_I387)
+    emit_insn (gen_truncxf<mode>2 (operands[0], op1));
+  else
+    emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
+
   DONE;
 })
 
@@ -15724,24 +15694,28 @@
   [(use (match_operand:X87MODEF12 0 "register_operand" ""))
    (use (match_operand:X87MODEF12 1 "general_operand" ""))
    (use (match_operand:X87MODEF12 2 "general_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-       || TARGET_MIX_SSE_I387)"
+  "TARGET_USE_FANCY_MATH_387"
 {
   rtx label = gen_label_rtx ();
 
   rtx op1 = gen_reg_rtx (XFmode);
   rtx op2 = gen_reg_rtx (XFmode);
 
-  emit_insn(gen_extend<mode>xf2 (op1, operands[1]));
-  emit_insn(gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
 
   emit_label (label);
 
   emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
   ix86_emit_fp_unordered_jump (label);
 
-  emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
+  /* Truncate the result properly for strict SSE math.  */
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !TARGET_MIX_SSE_I387)
+    emit_insn (gen_truncxf<mode>2 (operands[0], op1));
+  else
+    emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
+
   DONE;
 })
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]