This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, take 2] implement drem() and fmod() as built-in x87 intrinsic
- From: Uros Bizjak <uros at kss-loka dot si>
- To: Roger Sayle <roger at eyesopen dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Wed, 05 May 2004 11:41:52 +0200
- Subject: [PATCH, take 2] implement drem() and fmod() as built-in x87 intrinsic
- References: <Pine.LNX.4.44.0405040537590.18609-100000@www.eyesopen.com>
Roger Sayle wrote:
In your current patch you create a forward conditional "ordered" jump
over a backward unconditional jump. A better approach is just generate a
single backward conditional "unordered" jump. gen_rtx_UNORDERED and
and gen_rtx_LABEL_REF (VOIDmode, label1). This should reduce the amount
of initial RTL.
The final fmoddf3 expanders should look like:
Roger,
I have (hopefully) addressed all your suggestions, and new patch is
attached to this message. This patch now implements both drem and fmod
instructions. Patch is tested by bootstrapping gcc on i686-linux-gnu,
and attached builtins-40.c testcase was compiled.
BTW: I had to recode fprem{,1} instructions as two input, two output
instruction, otherwise this testcase failed:
--cut here--
#define OP1 37865432.1234e300
#define OP2 0.234786234
int main() {
float xf = OP1;
float yf = OP2;
double x = OP1;
double y = OP2;
long double xl = OP1;
long double yl = OP2;
printf("%f, %f, %Lf\n", fmodf(xf,yf), fmod(x,y), fmodl(xl,yl));
printf("%f, %f, %Lf\n", dremf(xf,yf), drem(x,y), dreml(xl,yl));
return 0;
}
--cut here--
Input register for x86_fnstsw_1 pattern was changed to (reg:CCFP 18) and
everything works as expected now.
OK for mainline CVS?
2004-05-05 Uros Bizjak <uros@kss-loka.si>
* optabs.h (enum optab_index): Add new OTI_fmod and OTI_drem.
(fmod_optab): Define corresponding macros.
* optabs.c (init_optabs): Initialize fmod_optab and drem_optab.
* genopinit.c (optabs): Implement fmod_optab and drem_optab
using fmod?f3 and drem?f3 patterns.
* builtins.c (expand_builtin_mathfn_2): Handle BUILT_IN_FMOD{,F,L}
using fmod_optab and BUILT_IN_DREM{,F,L} using drem_optab.
(expand_builtin): Expand BUILT_IN_FMOD{,F,L} and
BUILT_IN_DREM{,F,L} using expand_builtin_mathfn_2 if
flag_unsafe_math_optimizations is set.
* reg-stack.c (subst_stack_regs_pat): Handle UNSPEC_FPREM_F,
UNSPEC_FPREM_U, UNSPEC_FPREM1_F and UNSPEC_FPREM1_U.
* config/i386/i386.c (ix86_emit_fp_unordered_jump): New function.
* config/i386/i386-protos.h (ix86_emit_fp_unordered_jump):
Prototype here.
* config/i386/i386.md (UNSPEC_FPREM_F, UNSPEC_FPREM_U,
UNSPEC_FPREM1_F, UNSPEC_FPREM1_U): New unspecs to represent x87's
fprem and fprem1 instructions.
(*x86_fnstsw_1): Change input parameter to (reg:CCFP 18).
Rename insn definition to x86_fnstsw_1.
(fpremxf4, fprem1xf4): New patterns to implement fprem and fprem1
x87 instructions.
(fmodsf3, fmoddf3, fmodxf3): New expanders to implement fmodf, fmod
and fmodl built-ins as inline x87 intrinsics.
(dremsf3, dremdf3, dremxf3): New expanders to implement dremf, drem
and dreml built-ins as inline x87 intrinsics.
testsuite:
* testsuite/gcc.dg/builtins-40.c: New test.
Uros.
Index: gcc/builtins.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/builtins.c,v
retrieving revision 1.319
diff -u -p -r1.319 builtins.c
--- gcc/builtins.c 3 May 2004 05:31:39 -0000 1.319
+++ gcc/builtins.c 5 May 2004 09:30:24 -0000
@@ -1793,6 +1793,14 @@ expand_builtin_mathfn_2 (tree exp, rtx t
case BUILT_IN_ATAN2F:
case BUILT_IN_ATAN2L:
builtin_optab = atan2_optab; break;
+ case BUILT_IN_FMOD:
+ case BUILT_IN_FMODF:
+ case BUILT_IN_FMODL:
+ builtin_optab = fmod_optab; break;
+ case BUILT_IN_DREM:
+ case BUILT_IN_DREMF:
+ case BUILT_IN_DREML:
+ builtin_optab = drem_optab; break;
default:
abort ();
}
@@ -5364,6 +5372,12 @@ expand_builtin (tree exp, rtx target, rt
case BUILT_IN_ATAN2:
case BUILT_IN_ATAN2F:
case BUILT_IN_ATAN2L:
+ case BUILT_IN_FMOD:
+ case BUILT_IN_FMODF:
+ case BUILT_IN_FMODL:
+ case BUILT_IN_DREM:
+ case BUILT_IN_DREMF:
+ case BUILT_IN_DREML:
if (! flag_unsafe_math_optimizations)
break;
target = expand_builtin_mathfn_2 (exp, target, subtarget);
Index: gcc/genopinit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/genopinit.c,v
retrieving revision 1.73
diff -u -p -r1.73 genopinit.c
--- gcc/genopinit.c 3 May 2004 05:31:40 -0000 1.73
+++ gcc/genopinit.c 5 May 2004 09:30:25 -0000
@@ -91,6 +91,8 @@ static const char * const optabs[] =
"udivmod_optab->handlers[$A].insn_code = CODE_FOR_$(udivmod$a4$)",
"smod_optab->handlers[$A].insn_code = CODE_FOR_$(mod$a3$)",
"umod_optab->handlers[$A].insn_code = CODE_FOR_$(umod$a3$)",
+ "fmod_optab->handlers[$A].insn_code = CODE_FOR_$(fmod$a3$)",
+ "drem_optab->handlers[$A].insn_code = CODE_FOR_$(drem$a3$)",
"ftrunc_optab->handlers[$A].insn_code = CODE_FOR_$(ftrunc$F$a2$)",
"and_optab->handlers[$A].insn_code = CODE_FOR_$(and$a3$)",
"ior_optab->handlers[$A].insn_code = CODE_FOR_$(ior$a3$)",
Index: gcc/optabs.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.219
diff -u -p -r1.219 optabs.c
--- gcc/optabs.c 3 May 2004 05:31:40 -0000 1.219
+++ gcc/optabs.c 5 May 2004 09:30:25 -0000
@@ -5328,6 +5328,8 @@ init_optabs (void)
udivmod_optab = init_optab (UNKNOWN);
smod_optab = init_optab (MOD);
umod_optab = init_optab (UMOD);
+ fmod_optab = init_optab (UNKNOWN);
+ drem_optab = init_optab (UNKNOWN);
ftrunc_optab = init_optab (UNKNOWN);
and_optab = init_optab (AND);
ior_optab = init_optab (IOR);
Index: gcc/optabs.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.h,v
retrieving revision 1.27
diff -u -p -r1.27 optabs.h
--- gcc/optabs.h 3 May 2004 05:31:40 -0000 1.27
+++ gcc/optabs.h 5 May 2004 09:30:26 -0000
@@ -93,6 +93,9 @@ enum optab_index
/* Signed remainder */
OTI_smod,
OTI_umod,
+ /* Floating point remainder functions */
+ OTI_fmod,
+ OTI_drem,
/* Convert float to integer in float fmt */
OTI_ftrunc,
@@ -245,6 +248,8 @@ extern GTY(()) optab optab_table[OTI_MAX
#define udivmod_optab (optab_table[OTI_udivmod])
#define smod_optab (optab_table[OTI_smod])
#define umod_optab (optab_table[OTI_umod])
+#define fmod_optab (optab_table[OTI_fmod])
+#define drem_optab (optab_table[OTI_drem])
#define ftrunc_optab (optab_table[OTI_ftrunc])
#define and_optab (optab_table[OTI_and])
#define ior_optab (optab_table[OTI_ior])
Index: gcc/reg-stack.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/reg-stack.c,v
retrieving revision 1.151
diff -u -p -r1.151 reg-stack.c
--- gcc/reg-stack.c 30 Apr 2004 16:27:19 -0000 1.151
+++ gcc/reg-stack.c 5 May 2004 09:30:26 -0000
@@ -1779,6 +1779,8 @@ subst_stack_regs_pat (rtx insn, stack re
break;
case UNSPEC_FSCALE_FRACT:
+ case UNSPEC_FPREM_F:
+ case UNSPEC_FPREM1_F:
/* These insns operate on the top two stack slots.
first part of double input, double output insn. */
@@ -1808,6 +1810,8 @@ subst_stack_regs_pat (rtx insn, stack re
break;
case UNSPEC_FSCALE_EXP:
+ case UNSPEC_FPREM_U:
+ case UNSPEC_FPREM1_U:
/* These insns operate on the top two stack slots./
second part of double input, double output insn. */
Index: gcc/config/i386/i386-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386-protos.h,v
retrieving revision 1.109
diff -u -p -r1.109 i386-protos.h
--- gcc/config/i386/i386-protos.h 12 Apr 2004 23:23:15 -0000 1.109
+++ gcc/config/i386/i386-protos.h 5 May 2004 09:30:26 -0000
@@ -182,6 +182,7 @@ extern bool ix86_fp_jump_nontrivial_p (e
extern void x86_order_regs_for_local_alloc (void);
extern void x86_function_profiler (FILE *, int);
extern void x86_emit_floatuns (rtx [2]);
+extern void ix86_emit_fp_unordered_jump (rtx);
extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
Index: gcc/config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.663
diff -u -p -r1.663 i386.c
--- gcc/config/i386/i386.c 30 Apr 2004 16:27:25 -0000 1.663
+++ gcc/config/i386/i386.c 5 May 2004 09:30:29 -0000
@@ -15928,4 +15928,25 @@ output_387_reg_move (rtx insn, rtx *oper
return "fst\t%y0";
}
+/* Output code to perform a conditional jump to LABEL, if C2 flag in
+ FP status register is set. */
+
+void
+ix86_emit_fp_unordered_jump (rtx label)
+{
+ rtx reg = gen_reg_rtx (HImode);
+ rtx temp;
+
+ emit_insn (gen_x86_fnstsw_1 (reg));
+ emit_insn (gen_x86_sahf_1 (reg));
+
+ temp = gen_rtx_REG (CCmode, FLAGS_REG);
+ temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
+ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
+ emit_jump_insn (temp);
+}
+
#include "gt-i386.h"
Index: gcc/config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.534
diff -u -p -r1.534 i386.md
--- gcc/config/i386/i386.md 3 May 2004 13:20:57 -0000 1.534
+++ gcc/config/i386/i386.md 5 May 2004 09:30:33 -0000
@@ -129,6 +129,10 @@
(UNSPEC_XTRACT_EXP 85)
(UNSPEC_FSCALE_FRACT 86)
(UNSPEC_FSCALE_EXP 87)
+ (UNSPEC_FPREM_F 88)
+ (UNSPEC_FPREM_U 89)
+ (UNSPEC_FPREM1_F 90)
+ (UNSPEC_FPREM1_U 91)
; REP instruction
(UNSPEC_REP 75)
@@ -941,9 +945,9 @@
;; FP compares, step 2
;; Move the fpsw to ax.
-(define_insn "*x86_fnstsw_1"
+(define_insn "x86_fnstsw_1"
[(set (match_operand:HI 0 "register_operand" "=a")
- (unspec:HI [(reg 18)] UNSPEC_FNSTSW))]
+ (unspec:HI [(reg:CCFP 18)] UNSPEC_FNSTSW))]
"TARGET_80387"
"fnstsw\t%0"
[(set_attr "length" "2")
@@ -14857,6 +14861,172 @@
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")
(set_attr "athlon_decode" "direct")])
+
+(define_insn "fpremxf4"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")
+ (match_operand:XF 3 "register_operand" "1")]
+ UNSPEC_FPREM_F))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(match_dup 2) (match_dup 3)]
+ UNSPEC_FPREM_U))
+ (set (reg:CCFP 18)
+ (unspec:CCFP [(const_int 0)] UNSPEC_NOP))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+ "fprem"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "fmodsf3"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))
+ (use (match_operand:SF 2 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx label = gen_label_rtx ();
+
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = gen_reg_rtx (XFmode);
+
+ emit_insn(gen_extendsfxf2 (op1, operands[1]));
+ emit_insn(gen_extendsfxf2 (op2, operands[2]));
+
+ emit_label (label);
+
+ emit_insn (gen_fpremxf4 (op1, op2, op1, op2));
+ ix86_emit_fp_unordered_jump (label);
+
+ emit_insn (gen_truncxfsf2_noop (operands[0], op1));
+ DONE;
+})
+
+(define_expand "fmoddf3"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))
+ (use (match_operand:DF 2 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx label = gen_label_rtx ();
+
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_extenddfxf2 (op2, operands[2]));
+
+ emit_label (label);
+
+ emit_insn (gen_fpremxf4 (op1, op2, op1, op2));
+ ix86_emit_fp_unordered_jump (label);
+
+ emit_insn (gen_truncxfdf2_noop (operands[0], op1));
+ DONE;
+})
+
+(define_expand "fmodxf3"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))
+ (use (match_operand:XF 2 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx label = gen_label_rtx ();
+
+ emit_label (label);
+
+ emit_insn (gen_fpremxf4 (operands[1], operands[2],
+ operands[1], operands[2]));
+ ix86_emit_fp_unordered_jump (label);
+
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+})
+
+(define_insn "fprem1xf4"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")
+ (match_operand:XF 3 "register_operand" "1")]
+ UNSPEC_FPREM1_F))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(match_dup 2) (match_dup 3)]
+ UNSPEC_FPREM1_U))
+ (set (reg:CCFP 18)
+ (unspec:CCFP [(const_int 0)] UNSPEC_NOP))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+ "fprem1"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "dremsf3"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))
+ (use (match_operand:SF 2 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx label = gen_label_rtx ();
+
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = gen_reg_rtx (XFmode);
+
+ emit_insn(gen_extendsfxf2 (op1, operands[1]));
+ emit_insn(gen_extendsfxf2 (op2, operands[2]));
+
+ emit_label (label);
+
+ emit_insn (gen_fprem1xf4 (op1, op2, op1, op2));
+ ix86_emit_fp_unordered_jump (label);
+
+ emit_insn (gen_truncxfsf2_noop (operands[0], op1));
+ DONE;
+})
+
+(define_expand "dremdf3"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))
+ (use (match_operand:DF 2 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx label = gen_label_rtx ();
+
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_extenddfxf2 (op2, operands[2]));
+
+ emit_label (label);
+
+ emit_insn (gen_fprem1xf4 (op1, op2, op1, op2));
+ ix86_emit_fp_unordered_jump (label);
+
+ emit_insn (gen_truncxfdf2_noop (operands[0], op1));
+ DONE;
+})
+
+(define_expand "dremxf3"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))
+ (use (match_operand:XF 2 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx label = gen_label_rtx ();
+
+ emit_label (label);
+
+ emit_insn (gen_fprem1xf4 (operands[1], operands[2],
+ operands[1], operands[2]));
+ ix86_emit_fp_unordered_jump (label);
+
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+})
(define_insn "*sindf2"
[(set (match_operand:DF 0 "register_operand" "=f")
/* Copyright (C) 2004 Free Software Foundation.
Check that fmod, fmodf, fmodl, drem, dremf and dreml
built-in functions compile.
Written by Uros Bizjak, 5th May 2004. */
/* { dg-do compile } */
/* { dg-options "-O2 -ffast-math" } */
extern double fmod(double, double);
extern double drem(double, double);
extern float fmodf(float, float);
extern float dremf(float, float);
extern long double fmodl(long double, long double);
extern long double dreml(long double, long double);
double test1(double x, double y)
{
return fmod(x, y);
}
double test2(double x, double y)
{
return drem(x, y);
}
float test1f(float x, float y)
{
return fmodf(x, y);
}
float test2f(float x, float y)
{
return dremf(x, y);
}
long double test1l(long double x, long double y)
{
return fmodl(x, y);
}
long double test2l(long double x, long double y)
{
return dreml(x, y);
}