This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH,i386]: Implement hypot{,f,l} functions as built-in x87intrinsics


Roger Sayle wrote:

This patch implements hypot{f,l} functions as built-in x87 intrinsics.
Patch is bootstrapped on pentium4-pc-linux-gnu, regtested for c and c++.


I don't see why you're doing this one specific to x86. Any target
that supports sqrt should handle it just the same, surely?



I believe the hypot function is intended to be used in situations where x*x, y*y or x*x+y*y may potentially overflow/underflow, i.e. it's supposed to be more accurate than sqrt(x*x+y*y).

This is one of those rare cases where x87's extended precision is
actually a help, as x*x + y*y can be safely calculated in 80-bit
XFmode precision without problems.  Most other glibc targets needs
to mess around with argument reduction in their hypot implementations.



In May 2004 we have a long discussion [1] about fma() and hypot() implementation. It was suggested that on x87, XFmode can be used to produce perfectly good results for SFmode and DFmode, however XFmode calculation should be prevented by flag_unsafe_math_optimizations. However, I wanted to be on the safe side and used flag_unsafe_math_optimizations everywhere.

The (second revision) patch attached to this message implements all Roger's suggestions from [1]:

- DFmode and SFmode are expanded even without flag_unsafe_math
- XFmode is still protected by flag_unsafe_math
- Builtins are disabled for -mfpmath=sse

Without -ffast-math, this code is producef for hypot(x,y):
       pushl %ebp
       movl  %esp, %ebp
       subl  $8, %esp
       fldl  8(%ebp)
       fldl  16(%ebp)
       fld   %st(1)
       fmulp %st, %st(2)
       fld   %st(0)
       fmulp %st, %st(1)
       faddp %st, %st(1)
       fsqrt
       fstpl -8(%ebp)
       fldl  -8(%ebp)
       fucomi  %st(0), %st
       jp    .L14
       je    .L10
.L14:
       fstp  %st(0)
       leave
       jmp   hypot
       .p2align 4,,7
.L10:
       leave
       .p2align 4,,4
       ret

And with -ffast-math:
       pushl %ebp
       movl  %esp, %ebp
       fldl  8(%ebp)
       fldl  16(%ebp)
       fxch  %st(1)
       fmul  %st(0), %st
       fxch  %st(1)
       popl  %ebp
       fmul  %st(0), %st
       faddp %st, %st(1)
       fsqrt
       ret

2005-02-15 Uros Bizjak <uros@kss-loka.si>

   * optabs.h (enum optab_index): Add new OTI_hypot.
   (hypot_optab): Define corresponding macro.
   * optabs.c (init_optabs): Initialize hypot_optab.
   * genopinit.c (optabs): Implement hypot_optab using hypot?f3
   patterns.
   * builtins.c (expand_builtin_mathfn_2): Handle BUILT_IN_HYPOT{,F,L}
   using hypot_optab.
   (expand_builtin): Expand BUILT_IN_HYPOT{,F,L} using
   expand_builtin_mathfn_2.

   * config/i386/i386.md (hypot_core, hypotsf3, hypotdf3, hypotxf3):
   New expanders to implement hypotf, hypot and hypotl built-ins as
   inline x87 intrinsics.

testsuite:

* gcc.dg/builtins-34.c: Also check hypot*.

Of course, with -ffast-math, hypot(x,y) is reasonably implemented
as sqrt(x*x+y*y) on all platforms.



BTW: The question from [1] whether hypot/fma should be expanded generically for IEEE targets remained unanswered.


BTW2: Regarding fma, there was a proposal [2] to introduce MULTPLUS rtx, for proper constant-folding. IIRC, powerpc has a real fma insn that can be used in this case. I think that various multiply-accumulate (MAC) insns can also be described with MULTPLUS.

[1] http://gcc.gnu.org/ml/gcc-patches/2004-05/msg00735.html
[2] http://gcc.gnu.org/ml/gcc-patches/2004-05/msg00771.html

Uros.
Index: builtins.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/builtins.c,v
retrieving revision 1.422
diff -u -p -r1.422 builtins.c
--- builtins.c	12 Feb 2005 11:34:20 -0000	1.422
+++ builtins.c	15 Feb 2005 08:23:45 -0000
@@ -1887,6 +1887,10 @@ expand_builtin_mathfn_2 (tree exp, rtx t
     case BUILT_IN_POWF:
     case BUILT_IN_POWL:
       builtin_optab = pow_optab; break;
+    case BUILT_IN_HYPOT:
+    case BUILT_IN_HYPOTF:
+    case BUILT_IN_HYPOTL:
+      builtin_optab = hypot_optab; break;
     case BUILT_IN_ATAN2:
     case BUILT_IN_ATAN2F:
     case BUILT_IN_ATAN2L:
@@ -5280,6 +5284,9 @@ expand_builtin (tree exp, rtx target, rt
     case BUILT_IN_DREML:
       if (! flag_unsafe_math_optimizations)
 	break;
+    case BUILT_IN_HYPOT:
+    case BUILT_IN_HYPOTF:
+    case BUILT_IN_HYPOTL:
       target = expand_builtin_mathfn_2 (exp, target, subtarget);
       if (target)
 	return target;
Index: genopinit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/genopinit.c,v
retrieving revision 1.87
diff -u -p -r1.87 genopinit.c
--- genopinit.c	12 Feb 2005 11:34:21 -0000	1.87
+++ genopinit.c	15 Feb 2005 08:23:45 -0000
@@ -118,6 +118,7 @@ static const char * const optabs[] =
   "absv_optab->handlers[$A].insn_code = CODE_FOR_$(absv$I$a2$)",
   "copysign_optab->handlers[$A].insn_code = CODE_FOR_$(copysign$F$a3$)",
   "sqrt_optab->handlers[$A].insn_code = CODE_FOR_$(sqrt$a2$)",
+  "hypot_optab->handlers[$A].insn_code = CODE_FOR_$(hypot$a3$)",
   "floor_optab->handlers[$A].insn_code = CODE_FOR_$(floor$a2$)",
   "ceil_optab->handlers[$A].insn_code = CODE_FOR_$(ceil$a2$)",
   "round_optab->handlers[$A].insn_code = CODE_FOR_$(round$a2$)",
Index: optabs.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.260
diff -u -p -r1.260 optabs.c
--- optabs.c	12 Feb 2005 11:34:21 -0000	1.260
+++ optabs.c	15 Feb 2005 08:23:46 -0000
@@ -5012,6 +5012,7 @@ init_optabs (void)
   popcount_optab = init_optab (POPCOUNT);
   parity_optab = init_optab (PARITY);
   sqrt_optab = init_optab (SQRT);
+  hypot_optab = init_optab (UNKNOWN);
   floor_optab = init_optab (UNKNOWN);
   ceil_optab = init_optab (UNKNOWN);
   round_optab = init_optab (UNKNOWN);
Index: optabs.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.h,v
retrieving revision 1.49
diff -u -p -r1.49 optabs.h
--- optabs.h	12 Feb 2005 11:34:21 -0000	1.49
+++ optabs.h	15 Feb 2005 08:23:46 -0000
@@ -153,6 +153,8 @@ enum optab_index
   OTI_parity,
   /* Square root */
   OTI_sqrt,
+  /* Euclidean distance */
+  OTI_hypot,
   /* Sine-Cosine */
   OTI_sincos,
   /* Sine */
@@ -295,6 +297,7 @@ extern GTY(()) optab optab_table[OTI_MAX
 #define popcount_optab (optab_table[OTI_popcount])
 #define parity_optab (optab_table[OTI_parity])
 #define sqrt_optab (optab_table[OTI_sqrt])
+#define hypot_optab (optab_table[OTI_hypot])
 #define sincos_optab (optab_table[OTI_sincos])
 #define sin_optab (optab_table[OTI_sin])
 #define asin_optab (optab_table[OTI_asin])
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.618
diff -u -p -r1.618 i386.md
--- config/i386/i386.md	12 Feb 2005 11:34:22 -0000	1.618
+++ config/i386/i386.md	15 Feb 2005 08:23:50 -0000
@@ -14689,6 +14689,72 @@
    (set_attr "mode" "XF")
    (set_attr "athlon_decode" "direct")])
 
+(define_expand "hypotxf3_core"
+  [(set (match_dup 3)
+	(mult:XF (match_operand:XF 1 "register_operand" "") (match_dup 1)))
+   (set (match_dup 4)
+	(mult:XF (match_operand:XF 2 "register_operand" "") (match_dup 2)))
+   (set (match_dup 5)
+	(plus:XF (match_dup 3) (match_dup 4)))
+   (set (match_operand:XF 0 "register_operand" "")
+	(sqrt:XF (match_dup 5)))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  int i;
+
+  for (i=3; i<6; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "hypotxf3"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))
+   (use (match_operand:XF 2 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  emit_insn (gen_hypotxf3_core (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "hypotdf3"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:DF 2 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extenddfxf2 (op1, operands[1]));
+  emit_insn (gen_extenddfxf2 (op2, operands[2]));
+  emit_insn (gen_hypotxf3_core (op0, op1, op2));
+
+  emit_insn (gen_truncxfdf2 (operands[0], op0));
+  DONE;
+})
+
+(define_expand "hypotsf3"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:SF 1 "register_operand" ""))
+   (use (match_operand:SF 2 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extendsfxf2 (op1, operands[1]));
+  emit_insn (gen_extendsfxf2 (op2, operands[2]));
+  emit_insn (gen_hypotxf3_core (op0, op1, op2));
+
+  emit_insn (gen_truncxfsf2 (operands[0], op0));
+  DONE;
+})
+
 (define_insn "fpremxf4"
   [(set (match_operand:XF 0 "register_operand" "=f")
 	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
Index: testsuite/gcc.dg/builtins-34.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/testsuite/gcc.dg/builtins-34.c,v
retrieving revision 1.4
diff -u -p -r1.4 builtins-34.c
--- testsuite/gcc.dg/builtins-34.c	12 Feb 2005 11:34:24 -0000	1.4
+++ testsuite/gcc.dg/builtins-34.c	15 Feb 2005 08:23:53 -0000
@@ -13,16 +13,19 @@ extern double exp2(double);
 extern double pow10(double);
 extern double expm1(double);
 extern double ldexp(double, int);
+extern double hypot(double, double);
 extern float exp10f(float);
 extern float exp2f(float);
 extern float pow10f(float);
 extern float expm1f(float);
 extern float ldexpf(float, int);
+extern float hypotf(float, float);
 extern long double exp10l(long double);
 extern long double exp2l(long double);
 extern long double pow10l(long double);
 extern long double expm1l(long double);
 extern long double ldexpl(long double, int);
+extern long double hypotl(long double, long double);
 
 
 double test1(double x)
@@ -50,6 +53,11 @@ double test5(double x, int exp)
   return ldexp(x, exp);
 }
 
+double test6(double x, double y)
+{
+  return hypot(x, y);
+}
+
 float test1f(float x)
 {
   return exp10f(x);
@@ -75,6 +83,11 @@ float test5f(float x, int exp)
   return ldexpf(x, exp);
 }
 
+float test6f(float x, float y)
+{
+  return hypotf(x, y);
+}
+
 long double test1l(long double x)
 {
   return exp10l(x);
@@ -99,3 +112,8 @@ long double test5l(long double x, int ex
 {
   return ldexpl(x, exp);
 }
+
+long double test6l(long double x, long double y)
+{
+  return hypotl(x, y);
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]