This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] implement expm1() as built-in x87 intrinsics


Hello!

Attached to this message, please find a patch, which implements expm1() as built-in x87 intrinsics. Patch was tested by bootstraping gcc on i686-pc-linux-gnu, tested with new tests in gcc.dg/builtins-34.c, and some random torture tests.

2004-04-30 Uros Bizjak <uros@kss-loka.si>

       * optabs.h (enum optab_index): Add new OTI_expm1.
       (expm1_optab): Define corresponding macro.
       * optabs.c (init_optabs): Initialize expm1_optab.
       * genopinit.c (optabs): Implement expm1_optab using expm1?f2
       patterns.
       * builtins.c (expand_builtin_mathfn): Handle BUILT_IN_EXPM1{,F,L}
       using expm1_optab.
       (expand_builtin): Expand BUILT_IN_EXPM1{,F,L} using
       expand_builtin_mathfn if flag_unsafe_math_optimizations is set.

       * config/i386/i386.md (expm1df2, expm1sf2, expm1xf2): New expanders
       to implement expm1, expm1f and expm1l built-ins as inline x87
       intrinsics.

testsuite:

* gcc.dg/builtins-34.c: Also check expm1*.


expm1?f2 patterns are modelled after assembly code in mathinline.h, but they skip final check of _temp (which could not be zero anyway). Anyway, implemented expm1() has the same negative range as exp() intrinsic (~ -1.0e-300) , so this testcase:


--cut here--
int main() {
 double d = -4.123e300;
 float f = -4.123e300;
 long double ld = -4.123e320;

 printf("%f %f\n", expm1(d), exp(d)-1.0);
 printf("%f %f\n", expm1f(f), expf(f)-1.0f);
 printf("%Lf %Lf\n", expm1l(ld), expl(ld)-1.0f);

 return 0;
}
--cut here--

produces:
-1.000000 -1.000000
nan nan
nan nan

However, manpage says that "The value of expm1(x) may be more accurate than exp(x)-1.0 for _small_ values of x.", and indeed, running above testcase for input parameters {d,f,ld} = -4.123e-100 produces:

-4.123e-100 0
0 0
-4.123e-100 0

OK to commit it to mainline CVS?

Uros.
Index: gcc/builtins.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/builtins.c,v
retrieving revision 1.316
diff -u -p -r1.316 builtins.c
--- gcc/builtins.c	29 Apr 2004 15:39:12 -0000	1.316
+++ gcc/builtins.c	30 Apr 2004 07:10:00 -0000
@@ -1600,6 +1600,10 @@ expand_builtin_mathfn (tree exp, rtx tar
     case BUILT_IN_EXP2F:
     case BUILT_IN_EXP2L:
       errno_set = true; builtin_optab = exp2_optab; break;
+    case BUILT_IN_EXPM1:
+    case BUILT_IN_EXPM1F:
+    case BUILT_IN_EXPM1L:
+      errno_set = true; builtin_optab = expm1_optab; break;
     case BUILT_IN_LOGB:
     case BUILT_IN_LOGBF:
     case BUILT_IN_LOGBL:
@@ -5292,6 +5296,9 @@ expand_builtin (tree exp, rtx target, rt
     case BUILT_IN_EXP2:
     case BUILT_IN_EXP2F:
     case BUILT_IN_EXP2L:
+    case BUILT_IN_EXPM1:
+    case BUILT_IN_EXPM1F:
+    case BUILT_IN_EXPM1L:
     case BUILT_IN_LOGB:
     case BUILT_IN_LOGBF:
     case BUILT_IN_LOGBL:
Index: gcc/genopinit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/genopinit.c,v
retrieving revision 1.72
diff -u -p -r1.72 genopinit.c
--- gcc/genopinit.c	20 Apr 2004 19:40:25 -0000	1.72
+++ gcc/genopinit.c	30 Apr 2004 07:10:00 -0000
@@ -130,6 +130,7 @@ static const char * const optabs[] =
   "exp_optab->handlers[$A].insn_code = CODE_FOR_$(exp$a2$)",
   "exp10_optab->handlers[$A].insn_code = CODE_FOR_$(exp10$a2$)",
   "exp2_optab->handlers[$A].insn_code = CODE_FOR_$(exp2$a2$)",
+  "expm1_optab->handlers[$A].insn_code = CODE_FOR_$(expm1$a2$)",
   "logb_optab->handlers[$A].insn_code = CODE_FOR_$(logb$a2$)",
   "ilogb_optab->handlers[$A].insn_code = CODE_FOR_$(ilogb$a2$)",
   "log_optab->handlers[$A].insn_code = CODE_FOR_$(log$a2$)",
Index: gcc/optabs.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.217
diff -u -p -r1.217 optabs.c
--- gcc/optabs.c	20 Apr 2004 19:40:26 -0000	1.217
+++ gcc/optabs.c	30 Apr 2004 07:10:01 -0000
@@ -5386,6 +5386,7 @@ init_optabs (void)
   exp_optab = init_optab (UNKNOWN);
   exp10_optab = init_optab (UNKNOWN);
   exp2_optab = init_optab (UNKNOWN);
+  expm1_optab = init_optab (UNKNOWN);
   logb_optab = init_optab (UNKNOWN);
   ilogb_optab = init_optab (UNKNOWN);
   log_optab = init_optab (UNKNOWN);
Index: gcc/optabs.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.h,v
retrieving revision 1.26
diff -u -p -r1.26 optabs.h
--- gcc/optabs.h	20 Apr 2004 19:40:26 -0000	1.26
+++ gcc/optabs.h	30 Apr 2004 07:10:02 -0000
@@ -164,6 +164,8 @@ enum optab_index
   OTI_exp10,
   /* Base-2 Exponential */
   OTI_exp2,
+  /* Exponential - 1*/
+  OTI_expm1,
   /* Radix-independent exponent */
   OTI_logb,
   OTI_ilogb,
@@ -281,6 +283,7 @@ extern GTY(()) optab optab_table[OTI_MAX
 #define exp_optab (optab_table[OTI_exp])
 #define exp10_optab (optab_table[OTI_exp10])
 #define exp2_optab (optab_table[OTI_exp2])
+#define expm1_optab (optab_table[OTI_expm1])
 #define logb_optab (optab_table[OTI_logb])
 #define ilogb_optab (optab_table[OTI_ilogb])
 #define log_optab (optab_table[OTI_log])
Index: gcc/config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.531
diff -u -p -r1.531 i386.md
--- gcc/config/i386/i386.md	30 Apr 2004 06:04:57 -0000	1.531
+++ gcc/config/i386/i386.md	30 Apr 2004 07:10:08 -0000
@@ -15982,6 +15982,112 @@
     operands[i] = gen_reg_rtx (XFmode);
   emit_move_insn (operands[6], CONST1_RTX (XFmode));  /* fld1 */
 })
+
+(define_expand "expm1df2"
+  [(set (match_dup 2)
+	(float_extend:XF (match_operand:DF 1 "register_operand" "")))
+   (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3)))
+   (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT))
+   (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5)))
+   (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1))
+   (parallel [(set (match_dup 8)
+		   (unspec:XF [(match_dup 7) (match_dup 5)]
+			      UNSPEC_FSCALE_FRACT))
+		   (set (match_dup 9)
+		   (unspec:XF [(match_dup 7) (match_dup 5)]
+			      UNSPEC_FSCALE_EXP))])
+   (parallel [(set (match_dup 11)
+		   (unspec:XF [(match_dup 10) (match_dup 9)]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 12)
+		   (unspec:XF [(match_dup 10) (match_dup 9)]
+			      UNSPEC_FSCALE_EXP))])
+   (set (match_dup 13) (minus:XF (match_dup 11) (match_dup 10)))
+   (set (match_dup 14) (plus:XF (match_dup 13) (match_dup 8)))
+   (set (match_operand:DF 0 "register_operand" "")
+	(float_truncate:DF (match_dup 14)))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+{
+  rtx temp;
+  int i;
+
+  for (i=2; i<15; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+  temp = standard_80387_constant_rtx (5); /* fldl2e */
+  emit_move_insn (operands[3], temp);
+  emit_move_insn (operands[10], CONST1_RTX (XFmode));  /* fld1 */
+})
+
+(define_expand "expm1sf2"
+  [(set (match_dup 2)
+	(float_extend:XF (match_operand:SF 1 "register_operand" "")))
+   (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3)))
+   (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT))
+   (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5)))
+   (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1))
+   (parallel [(set (match_dup 8)
+		   (unspec:XF [(match_dup 7) (match_dup 5)]
+			      UNSPEC_FSCALE_FRACT))
+		   (set (match_dup 9)
+		   (unspec:XF [(match_dup 7) (match_dup 5)]
+			      UNSPEC_FSCALE_EXP))])
+   (parallel [(set (match_dup 11)
+		   (unspec:XF [(match_dup 10) (match_dup 9)]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 12)
+		   (unspec:XF [(match_dup 10) (match_dup 9)]
+			      UNSPEC_FSCALE_EXP))])
+   (set (match_dup 13) (minus:XF (match_dup 11) (match_dup 10)))
+   (set (match_dup 14) (plus:XF (match_dup 13) (match_dup 8)))
+   (set (match_operand:SF 0 "register_operand" "")
+	(float_truncate:SF (match_dup 14)))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+{
+  rtx temp;
+  int i;
+
+  for (i=2; i<15; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+  temp = standard_80387_constant_rtx (5); /* fldl2e */
+  emit_move_insn (operands[3], temp);
+  emit_move_insn (operands[10], CONST1_RTX (XFmode));  /* fld1 */
+})
+
+(define_expand "expm1xf2"
+  [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "")
+			       (match_dup 2)))
+   (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
+   (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
+   (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
+   (parallel [(set (match_dup 7)
+		   (unspec:XF [(match_dup 6) (match_dup 4)]
+			      UNSPEC_FSCALE_FRACT))
+		   (set (match_dup 8)
+		   (unspec:XF [(match_dup 6) (match_dup 4)]
+			      UNSPEC_FSCALE_EXP))])
+   (parallel [(set (match_dup 10)
+		   (unspec:XF [(match_dup 9) (match_dup 8)]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 11)
+		   (unspec:XF [(match_dup 9) (match_dup 8)]
+			      UNSPEC_FSCALE_EXP))])
+   (set (match_dup 12) (minus:XF (match_dup 10) (match_dup 9)))
+   (set (match_operand:XF 0 "register_operand" "")
+	(plus:XF (match_dup 12) (match_dup 7)))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+{
+  rtx temp;
+  int i;
+
+  for (i=2; i<13; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+  temp = standard_80387_constant_rtx (5); /* fldl2e */
+  emit_move_insn (operands[2], temp);
+  emit_move_insn (operands[9], CONST1_RTX (XFmode));  /* fld1 */
+})
 
 ;; Block operation instructions
 
Index: gcc/testsuite/gcc.dg/builtins-34.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/testsuite/gcc.dg/builtins-34.c,v
retrieving revision 1.2
diff -u -p -r1.2 builtins-34.c
--- gcc/testsuite/gcc.dg/builtins-34.c	12 Mar 2004 13:33:42 -0000	1.2
+++ gcc/testsuite/gcc.dg/builtins-34.c	30 Apr 2004 07:10:14 -0000
@@ -1,7 +1,7 @@
 /* Copyright (C) 2004 Free Software Foundation.
 
-   Check that exp10, exp10f, exp10l, exp2, exp2f, exp2l, pow10, pow10f
-   and pow10l built-in functions compile.
+   Check that exp10, exp10f, exp10l, exp2, exp2f, exp2l, pow10, pow10f,
+   pow10l, expm1, expm1f and expm1l built-in functions compile.
 
    Written by Uros Bizjak, 13th February 2004.  */
 
@@ -11,12 +11,15 @@
 extern double exp10(double);
 extern double exp2(double);
 extern double pow10(double);
+extern double expm1(double);
 extern float exp10f(float);
 extern float exp2f(float);
 extern float pow10f(float);
+extern float expm1f(float);
 extern long double exp10l(long double);
 extern long double exp2l(long double);
 extern long double pow10l(long double);
+extern long double expm1l(long double);
 
 
 double test1(double x)
@@ -34,6 +37,11 @@ double test3(double x)
   return pow10(x);
 }
 
+double test4(double x)
+{
+  return expm1(x);
+}
+
 float test1f(float x)
 {
   return exp10f(x);
@@ -49,6 +57,11 @@ float test3f(float x)
   return pow10f(x);
 }
 
+float test4f(float x)
+{
+  return expm1f(x);
+}
+
 long double test1l(long double x)
 {
   return exp10l(x);
@@ -62,5 +75,10 @@ long double test2l(long double x)
 long double test3l(long double x)
 {
   return pow10l(x);
+}
+
+long double test4l(long double x)
+{
+  return expm1l(x);
 }
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]