This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] implement expm1() as built-in x87 intrinsics
- From: Uros Bizjak <uros at kss-loka dot si>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Roger Sayle <roger at eyesopen dot com>
- Date: Fri, 30 Apr 2004 09:37:18 +0200
- Subject: [PATCH] implement expm1() as built-in x87 intrinsics
Hello!
Attached to this message, please find a patch, which implements expm1()
as built-in x87 intrinsics. Patch was tested by bootstraping gcc on
i686-pc-linux-gnu, tested with new tests in gcc.dg/builtins-34.c, and
some random torture tests.
2004-04-30 Uros Bizjak <uros@kss-loka.si>
* optabs.h (enum optab_index): Add new OTI_expm1.
(expm1_optab): Define corresponding macro.
* optabs.c (init_optabs): Initialize expm1_optab.
* genopinit.c (optabs): Implement expm1_optab using expm1?f2
patterns.
* builtins.c (expand_builtin_mathfn): Handle BUILT_IN_EXPM1{,F,L}
using expm1_optab.
(expand_builtin): Expand BUILT_IN_EXPM1{,F,L} using
expand_builtin_mathfn if flag_unsafe_math_optimizations is set.
* config/i386/i386.md (expm1df2, expm1sf2, expm1xf2): New expanders
to implement expm1, expm1f and expm1l built-ins as inline x87
intrinsics.
testsuite:
* gcc.dg/builtins-34.c: Also check expm1*.
expm1?f2 patterns are modelled after assembly code in mathinline.h, but
they skip final check of _temp (which could not be zero anyway).
Anyway, implemented expm1() has the same negative range as exp()
intrinsic (~ -1.0e-300) , so this testcase:
--cut here--
int main() {
double d = -4.123e300;
float f = -4.123e300;
long double ld = -4.123e320;
printf("%f %f\n", expm1(d), exp(d)-1.0);
printf("%f %f\n", expm1f(f), expf(f)-1.0f);
printf("%Lf %Lf\n", expm1l(ld), expl(ld)-1.0f);
return 0;
}
--cut here--
produces:
-1.000000 -1.000000
nan nan
nan nan
However, manpage says that "The value of expm1(x) may be more accurate
than exp(x)-1.0 for _small_ values of x.", and indeed, running above
testcase for input parameters {d,f,ld} = -4.123e-100 produces:
-4.123e-100 0
0 0
-4.123e-100 0
OK to commit it to mainline CVS?
Uros.
Index: gcc/builtins.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/builtins.c,v
retrieving revision 1.316
diff -u -p -r1.316 builtins.c
--- gcc/builtins.c 29 Apr 2004 15:39:12 -0000 1.316
+++ gcc/builtins.c 30 Apr 2004 07:10:00 -0000
@@ -1600,6 +1600,10 @@ expand_builtin_mathfn (tree exp, rtx tar
case BUILT_IN_EXP2F:
case BUILT_IN_EXP2L:
errno_set = true; builtin_optab = exp2_optab; break;
+ case BUILT_IN_EXPM1:
+ case BUILT_IN_EXPM1F:
+ case BUILT_IN_EXPM1L:
+ errno_set = true; builtin_optab = expm1_optab; break;
case BUILT_IN_LOGB:
case BUILT_IN_LOGBF:
case BUILT_IN_LOGBL:
@@ -5292,6 +5296,9 @@ expand_builtin (tree exp, rtx target, rt
case BUILT_IN_EXP2:
case BUILT_IN_EXP2F:
case BUILT_IN_EXP2L:
+ case BUILT_IN_EXPM1:
+ case BUILT_IN_EXPM1F:
+ case BUILT_IN_EXPM1L:
case BUILT_IN_LOGB:
case BUILT_IN_LOGBF:
case BUILT_IN_LOGBL:
Index: gcc/genopinit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/genopinit.c,v
retrieving revision 1.72
diff -u -p -r1.72 genopinit.c
--- gcc/genopinit.c 20 Apr 2004 19:40:25 -0000 1.72
+++ gcc/genopinit.c 30 Apr 2004 07:10:00 -0000
@@ -130,6 +130,7 @@ static const char * const optabs[] =
"exp_optab->handlers[$A].insn_code = CODE_FOR_$(exp$a2$)",
"exp10_optab->handlers[$A].insn_code = CODE_FOR_$(exp10$a2$)",
"exp2_optab->handlers[$A].insn_code = CODE_FOR_$(exp2$a2$)",
+ "expm1_optab->handlers[$A].insn_code = CODE_FOR_$(expm1$a2$)",
"logb_optab->handlers[$A].insn_code = CODE_FOR_$(logb$a2$)",
"ilogb_optab->handlers[$A].insn_code = CODE_FOR_$(ilogb$a2$)",
"log_optab->handlers[$A].insn_code = CODE_FOR_$(log$a2$)",
Index: gcc/optabs.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.217
diff -u -p -r1.217 optabs.c
--- gcc/optabs.c 20 Apr 2004 19:40:26 -0000 1.217
+++ gcc/optabs.c 30 Apr 2004 07:10:01 -0000
@@ -5386,6 +5386,7 @@ init_optabs (void)
exp_optab = init_optab (UNKNOWN);
exp10_optab = init_optab (UNKNOWN);
exp2_optab = init_optab (UNKNOWN);
+ expm1_optab = init_optab (UNKNOWN);
logb_optab = init_optab (UNKNOWN);
ilogb_optab = init_optab (UNKNOWN);
log_optab = init_optab (UNKNOWN);
Index: gcc/optabs.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.h,v
retrieving revision 1.26
diff -u -p -r1.26 optabs.h
--- gcc/optabs.h 20 Apr 2004 19:40:26 -0000 1.26
+++ gcc/optabs.h 30 Apr 2004 07:10:02 -0000
@@ -164,6 +164,8 @@ enum optab_index
OTI_exp10,
/* Base-2 Exponential */
OTI_exp2,
+ /* Exponential - 1*/
+ OTI_expm1,
/* Radix-independent exponent */
OTI_logb,
OTI_ilogb,
@@ -281,6 +283,7 @@ extern GTY(()) optab optab_table[OTI_MAX
#define exp_optab (optab_table[OTI_exp])
#define exp10_optab (optab_table[OTI_exp10])
#define exp2_optab (optab_table[OTI_exp2])
+#define expm1_optab (optab_table[OTI_expm1])
#define logb_optab (optab_table[OTI_logb])
#define ilogb_optab (optab_table[OTI_ilogb])
#define log_optab (optab_table[OTI_log])
Index: gcc/config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.531
diff -u -p -r1.531 i386.md
--- gcc/config/i386/i386.md 30 Apr 2004 06:04:57 -0000 1.531
+++ gcc/config/i386/i386.md 30 Apr 2004 07:10:08 -0000
@@ -15982,6 +15982,112 @@
operands[i] = gen_reg_rtx (XFmode);
emit_move_insn (operands[6], CONST1_RTX (XFmode)); /* fld1 */
})
+
+(define_expand "expm1df2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:DF 1 "register_operand" "")))
+ (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3)))
+ (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT))
+ (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5)))
+ (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1))
+ (parallel [(set (match_dup 8)
+ (unspec:XF [(match_dup 7) (match_dup 5)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 9)
+ (unspec:XF [(match_dup 7) (match_dup 5)]
+ UNSPEC_FSCALE_EXP))])
+ (parallel [(set (match_dup 11)
+ (unspec:XF [(match_dup 10) (match_dup 9)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 12)
+ (unspec:XF [(match_dup 10) (match_dup 9)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_dup 13) (minus:XF (match_dup 11) (match_dup 10)))
+ (set (match_dup 14) (plus:XF (match_dup 13) (match_dup 8)))
+ (set (match_operand:DF 0 "register_operand" "")
+ (float_truncate:DF (match_dup 14)))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+ int i;
+
+ for (i=2; i<15; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ temp = standard_80387_constant_rtx (5); /* fldl2e */
+ emit_move_insn (operands[3], temp);
+ emit_move_insn (operands[10], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "expm1sf2"
+ [(set (match_dup 2)
+ (float_extend:XF (match_operand:SF 1 "register_operand" "")))
+ (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3)))
+ (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT))
+ (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5)))
+ (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1))
+ (parallel [(set (match_dup 8)
+ (unspec:XF [(match_dup 7) (match_dup 5)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 9)
+ (unspec:XF [(match_dup 7) (match_dup 5)]
+ UNSPEC_FSCALE_EXP))])
+ (parallel [(set (match_dup 11)
+ (unspec:XF [(match_dup 10) (match_dup 9)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 12)
+ (unspec:XF [(match_dup 10) (match_dup 9)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_dup 13) (minus:XF (match_dup 11) (match_dup 10)))
+ (set (match_dup 14) (plus:XF (match_dup 13) (match_dup 8)))
+ (set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF (match_dup 14)))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+ int i;
+
+ for (i=2; i<15; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ temp = standard_80387_constant_rtx (5); /* fldl2e */
+ emit_move_insn (operands[3], temp);
+ emit_move_insn (operands[10], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "expm1xf2"
+ [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "")
+ (match_dup 2)))
+ (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
+ (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
+ (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
+ (parallel [(set (match_dup 7)
+ (unspec:XF [(match_dup 6) (match_dup 4)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 8)
+ (unspec:XF [(match_dup 6) (match_dup 4)]
+ UNSPEC_FSCALE_EXP))])
+ (parallel [(set (match_dup 10)
+ (unspec:XF [(match_dup 9) (match_dup 8)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 11)
+ (unspec:XF [(match_dup 9) (match_dup 8)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_dup 12) (minus:XF (match_dup 10) (match_dup 9)))
+ (set (match_operand:XF 0 "register_operand" "")
+ (plus:XF (match_dup 12) (match_dup 7)))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx temp;
+ int i;
+
+ for (i=2; i<13; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+ temp = standard_80387_constant_rtx (5); /* fldl2e */
+ emit_move_insn (operands[2], temp);
+ emit_move_insn (operands[9], CONST1_RTX (XFmode)); /* fld1 */
+})
;; Block operation instructions
Index: gcc/testsuite/gcc.dg/builtins-34.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/testsuite/gcc.dg/builtins-34.c,v
retrieving revision 1.2
diff -u -p -r1.2 builtins-34.c
--- gcc/testsuite/gcc.dg/builtins-34.c 12 Mar 2004 13:33:42 -0000 1.2
+++ gcc/testsuite/gcc.dg/builtins-34.c 30 Apr 2004 07:10:14 -0000
@@ -1,7 +1,7 @@
/* Copyright (C) 2004 Free Software Foundation.
- Check that exp10, exp10f, exp10l, exp2, exp2f, exp2l, pow10, pow10f
- and pow10l built-in functions compile.
+ Check that exp10, exp10f, exp10l, exp2, exp2f, exp2l, pow10, pow10f,
+ pow10l, expm1, expm1f and expm1l built-in functions compile.
Written by Uros Bizjak, 13th February 2004. */
@@ -11,12 +11,15 @@
extern double exp10(double);
extern double exp2(double);
extern double pow10(double);
+extern double expm1(double);
extern float exp10f(float);
extern float exp2f(float);
extern float pow10f(float);
+extern float expm1f(float);
extern long double exp10l(long double);
extern long double exp2l(long double);
extern long double pow10l(long double);
+extern long double expm1l(long double);
double test1(double x)
@@ -34,6 +37,11 @@ double test3(double x)
return pow10(x);
}
+double test4(double x)
+{
+ return expm1(x);
+}
+
float test1f(float x)
{
return exp10f(x);
@@ -49,6 +57,11 @@ float test3f(float x)
return pow10f(x);
}
+float test4f(float x)
+{
+ return expm1f(x);
+}
+
long double test1l(long double x)
{
return exp10l(x);
@@ -62,5 +75,10 @@ long double test2l(long double x)
long double test3l(long double x)
{
return pow10l(x);
+}
+
+long double test4l(long double x)
+{
+ return expm1l(x);
}