This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, i386]: Rewrite x87 sqrt patterns


Hello!

The i386 sqrt patterns doesn't truncate the result of fsqrt x87 instruction. This could be annoying due to the fact that these patterns are also used outside -ffast-math. Proposed patch solves this problem by expanding SFmode/DFmode argument to sqrt instruction to XFmode, further, XFmode sqrt (this is what x87 _really_ does) is emmitted and the result is truncated back to SFmode or DFmode.

The testcase:
double test(float a)
{
 return sqrt(a + 2.0);
}

is compiled (gcc -O2 -fomit-frame-pointer -fno-math-errno) to:
test:
       subl    $12, %esp
       flds    .LC0
       fadds   16(%esp)
       fsqrt
       fstpl   (%esp)
       fldl    (%esp)
       addl    $12, %esp
       ret

In addition to the truncation, no extra register moves are introduced, due to the way float_extend patterns for x87 are written. This patch also removes fsqrt patterns that implicitly extend operators, and removes "mixed" fsqrt patterns which were wrong anyway (x87 fsqrt operates in XFmode, where SSE sqrt operates in DFmode or SFmode).

Patch was bootstrapped on x86_64-pc-linux-gnu. A regression test on both x86_64 and i686 (c, c++ and fortran) will finish over night. OK for mainline if the tests pass?

BTW: I'd like to implement above approach for transcendental functions (sin, cos, exp, etc). Some of them are already implemented as proposed above, but trigonometric patterns still include implicit extend operators.

test:
       subl    $12, %esp
       flds    .LC0
       fadds   16(%esp)
       fsqrt
       fstpl   (%esp)
       fldl    (%esp)
       addl    $12, %esp
       ret

Uros.
Index: i386.md
===================================================================
--- i386.md	(revision 119061)
+++ i386.md	(working copy)
@@ -15525,26 +15525,32 @@
 
 ;; FPU special functions.
 
+(define_insn "sqrtxf2"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(sqrt:XF (match_operand:XF 1 "register_operand" "0")))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fsqrt"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")
+   (set_attr "athlon_decode" "direct")])
+
 (define_expand "sqrtsf2"
   [(set (match_operand:SF 0 "register_operand" "")
 	(sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "")))]
   "TARGET_USE_FANCY_MATH_387 || TARGET_SSE_MATH"
 {
   if (!TARGET_SSE_MATH)
-    operands[1] = force_reg (SFmode, operands[1]);
+    {
+      rtx op0 = gen_reg_rtx (XFmode);
+      rtx op1 = gen_reg_rtx (XFmode);
+
+      emit_insn (gen_extendsfxf2 (op1, operands[1]));
+      emit_insn (gen_sqrtxf2 (op0, op1));
+      emit_insn (gen_truncxfsf2 (operands[0], op0));
+      DONE;
+   }
 })
 
-(define_insn "*sqrtsf2_mixed"
-  [(set (match_operand:SF 0 "register_operand" "=f,x")
-	(sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "0,xm")))]
-  "TARGET_USE_FANCY_MATH_387 && TARGET_MIX_SSE_I387"
-  "@
-   fsqrt
-   sqrtss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "fpspc,sse")
-   (set_attr "mode" "SF,SF")
-   (set_attr "athlon_decode" "direct,*")])
-
 (define_insn "*sqrtsf2_sse"
   [(set (match_operand:SF 0 "register_operand" "=x")
 	(sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "xm")))]
@@ -15554,35 +15560,23 @@
    (set_attr "mode" "SF")
    (set_attr "athlon_decode" "*")])
 
-(define_insn "*sqrtsf2_i387"
-  [(set (match_operand:SF 0 "register_operand" "=f")
-	(sqrt:SF (match_operand:SF 1 "register_operand" "0")))]
-  "TARGET_USE_FANCY_MATH_387"
-  "fsqrt"
-  [(set_attr "type" "fpspc")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "direct")])
-
 (define_expand "sqrtdf2"
   [(set (match_operand:DF 0 "register_operand" "")
 	(sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "")))]
   "TARGET_USE_FANCY_MATH_387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
 {
   if (!(TARGET_SSE2 && TARGET_SSE_MATH))
-    operands[1] = force_reg (DFmode, operands[1]);
+    {
+      rtx op0 = gen_reg_rtx (XFmode);
+      rtx op1 = gen_reg_rtx (XFmode);
+
+      emit_insn (gen_extenddfxf2 (op1, operands[1]));
+      emit_insn (gen_sqrtxf2 (op0, op1));
+      emit_insn (gen_truncxfdf2 (operands[0], op0));
+      DONE;
+   }
 })
 
-(define_insn "*sqrtdf2_mixed"
-  [(set (match_operand:DF 0 "register_operand" "=f,Y")
-	(sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "0,Ym")))]
-  "TARGET_USE_FANCY_MATH_387 && TARGET_SSE2 && TARGET_MIX_SSE_I387"
-  "@
-   fsqrt
-   sqrtsd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "fpspc,sse")
-   (set_attr "mode" "DF,DF")
-   (set_attr "athlon_decode" "direct,*")])
-
 (define_insn "*sqrtdf2_sse"
   [(set (match_operand:DF 0 "register_operand" "=Y")
 	(sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "Ym")))]
@@ -15592,55 +15586,6 @@
    (set_attr "mode" "DF")
    (set_attr "athlon_decode" "*")])
 
-(define_insn "*sqrtdf2_i387"
-  [(set (match_operand:DF 0 "register_operand" "=f")
-	(sqrt:DF (match_operand:DF 1 "register_operand" "0")))]
-  "TARGET_USE_FANCY_MATH_387"
-  "fsqrt"
-  [(set_attr "type" "fpspc")
-   (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "direct")])
-
-(define_insn "*sqrtextendsfdf2_i387"
-  [(set (match_operand:DF 0 "register_operand" "=f")
-	(sqrt:DF (float_extend:DF
-		  (match_operand:SF 1 "register_operand" "0"))))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)"
-  "fsqrt"
-  [(set_attr "type" "fpspc")
-   (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "direct")])
-
-(define_insn "sqrtxf2"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-	(sqrt:XF (match_operand:XF 1 "register_operand" "0")))]
-  "TARGET_USE_FANCY_MATH_387"
-  "fsqrt"
-  [(set_attr "type" "fpspc")
-   (set_attr "mode" "XF")
-   (set_attr "athlon_decode" "direct")])
-
-(define_insn "*sqrtextendsfxf2_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-	(sqrt:XF (float_extend:XF
-		  (match_operand:SF 1 "register_operand" "0"))))]
-  "TARGET_USE_FANCY_MATH_387"
-  "fsqrt"
-  [(set_attr "type" "fpspc")
-   (set_attr "mode" "XF")
-   (set_attr "athlon_decode" "direct")])
-
-(define_insn "*sqrtextenddfxf2_i387"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-	(sqrt:XF (float_extend:XF
-		  (match_operand:DF 1 "register_operand" "0"))))]
-  "TARGET_USE_FANCY_MATH_387"
-  "fsqrt"
-  [(set_attr "type" "fpspc")
-   (set_attr "mode" "XF")
-   (set_attr "athlon_decode" "direct")])
-
 (define_insn "fpremxf4"
   [(set (match_operand:XF 0 "register_operand" "=f")
 	(unspec:XF [(match_operand:XF 2 "register_operand" "0")

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]