This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, take 2] implement drem() and fmod() as built-in x87 intrinsic


Roger Sayle wrote:

In your current patch you create a forward conditional "ordered" jump
over a backward unconditional jump.  A better approach is just generate a
single backward conditional "unordered" jump.  gen_rtx_UNORDERED and
and gen_rtx_LABEL_REF (VOIDmode, label1).  This should reduce the amount
of initial RTL.

The final fmoddf3 expanders should look like:

Roger,

I have (hopefully) addressed all your suggestions, and new patch is attached to this message. This patch now implements both drem and fmod instructions. Patch is tested by bootstrapping gcc on i686-linux-gnu, and attached builtins-40.c testcase was compiled.

BTW: I had to recode fprem{,1} instructions as two input, two output instruction, otherwise this testcase failed:

--cut here--
#define OP1 37865432.1234e300
#define OP2 0.234786234

int main() {
       float xf = OP1;
       float yf = OP2;

       double x = OP1;
       double y = OP2;

       long double xl = OP1;
       long double yl = OP2;
       printf("%f, %f, %Lf\n", fmodf(xf,yf), fmod(x,y), fmodl(xl,yl));
       printf("%f, %f, %Lf\n", dremf(xf,yf), drem(x,y), dreml(xl,yl));
       return 0;
}
--cut here--

Input register for x86_fnstsw_1 pattern was changed to (reg:CCFP 18) and everything works as expected now.

OK for mainline CVS?

2004-05-05 Uros Bizjak <uros@kss-loka.si>

   * optabs.h (enum optab_index): Add new OTI_fmod and OTI_drem.
   (fmod_optab): Define corresponding macros.
   * optabs.c (init_optabs): Initialize fmod_optab and drem_optab.
   * genopinit.c (optabs): Implement fmod_optab and drem_optab
   using fmod?f3 and drem?f3 patterns.
   * builtins.c (expand_builtin_mathfn_2): Handle BUILT_IN_FMOD{,F,L}
   using fmod_optab and BUILT_IN_DREM{,F,L} using drem_optab.
   (expand_builtin): Expand BUILT_IN_FMOD{,F,L} and
   BUILT_IN_DREM{,F,L} using expand_builtin_mathfn_2 if
   flag_unsafe_math_optimizations is set.

   * reg-stack.c (subst_stack_regs_pat): Handle UNSPEC_FPREM_F,
   UNSPEC_FPREM_U, UNSPEC_FPREM1_F and UNSPEC_FPREM1_U.

   * config/i386/i386.c (ix86_emit_fp_unordered_jump): New function.
   * config/i386/i386-protos.h (ix86_emit_fp_unordered_jump):
   Prototype here.
   * config/i386/i386.md (UNSPEC_FPREM_F, UNSPEC_FPREM_U,
   UNSPEC_FPREM1_F, UNSPEC_FPREM1_U): New unspecs to represent x87's
   fprem and fprem1 instructions.
   (*x86_fnstsw_1): Change input parameter to (reg:CCFP 18).
   Rename insn definition to x86_fnstsw_1.
   (fpremxf4, fprem1xf4): New patterns to implement fprem and fprem1
   x87 instructions.
   (fmodsf3, fmoddf3, fmodxf3): New expanders to implement fmodf, fmod
   and fmodl built-ins as inline x87 intrinsics.
   (dremsf3, dremdf3, dremxf3): New expanders to implement dremf, drem
   and dreml built-ins as inline x87 intrinsics.

testsuite:

* testsuite/gcc.dg/builtins-40.c: New test.

Uros.
Index: gcc/builtins.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/builtins.c,v
retrieving revision 1.319
diff -u -p -r1.319 builtins.c
--- gcc/builtins.c	3 May 2004 05:31:39 -0000	1.319
+++ gcc/builtins.c	5 May 2004 09:30:24 -0000
@@ -1793,6 +1793,14 @@ expand_builtin_mathfn_2 (tree exp, rtx t
     case BUILT_IN_ATAN2F:
     case BUILT_IN_ATAN2L:
       builtin_optab = atan2_optab; break;
+    case BUILT_IN_FMOD:
+    case BUILT_IN_FMODF:
+    case BUILT_IN_FMODL:
+      builtin_optab = fmod_optab; break;
+    case BUILT_IN_DREM:
+    case BUILT_IN_DREMF:
+    case BUILT_IN_DREML:
+      builtin_optab = drem_optab; break;
     default:
       abort ();
     }
@@ -5364,6 +5372,12 @@ expand_builtin (tree exp, rtx target, rt
     case BUILT_IN_ATAN2:
     case BUILT_IN_ATAN2F:
     case BUILT_IN_ATAN2L:
+    case BUILT_IN_FMOD:
+    case BUILT_IN_FMODF:
+    case BUILT_IN_FMODL:
+    case BUILT_IN_DREM:
+    case BUILT_IN_DREMF:
+    case BUILT_IN_DREML:
       if (! flag_unsafe_math_optimizations)
 	break;
       target = expand_builtin_mathfn_2 (exp, target, subtarget);
Index: gcc/genopinit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/genopinit.c,v
retrieving revision 1.73
diff -u -p -r1.73 genopinit.c
--- gcc/genopinit.c	3 May 2004 05:31:40 -0000	1.73
+++ gcc/genopinit.c	5 May 2004 09:30:25 -0000
@@ -91,6 +91,8 @@ static const char * const optabs[] =
   "udivmod_optab->handlers[$A].insn_code = CODE_FOR_$(udivmod$a4$)",
   "smod_optab->handlers[$A].insn_code = CODE_FOR_$(mod$a3$)",
   "umod_optab->handlers[$A].insn_code = CODE_FOR_$(umod$a3$)",
+  "fmod_optab->handlers[$A].insn_code = CODE_FOR_$(fmod$a3$)",
+  "drem_optab->handlers[$A].insn_code = CODE_FOR_$(drem$a3$)",
   "ftrunc_optab->handlers[$A].insn_code = CODE_FOR_$(ftrunc$F$a2$)",
   "and_optab->handlers[$A].insn_code = CODE_FOR_$(and$a3$)",
   "ior_optab->handlers[$A].insn_code = CODE_FOR_$(ior$a3$)",
Index: gcc/optabs.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.219
diff -u -p -r1.219 optabs.c
--- gcc/optabs.c	3 May 2004 05:31:40 -0000	1.219
+++ gcc/optabs.c	5 May 2004 09:30:25 -0000
@@ -5328,6 +5328,8 @@ init_optabs (void)
   udivmod_optab = init_optab (UNKNOWN);
   smod_optab = init_optab (MOD);
   umod_optab = init_optab (UMOD);
+  fmod_optab = init_optab (UNKNOWN);
+  drem_optab = init_optab (UNKNOWN);
   ftrunc_optab = init_optab (UNKNOWN);
   and_optab = init_optab (AND);
   ior_optab = init_optab (IOR);
Index: gcc/optabs.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.h,v
retrieving revision 1.27
diff -u -p -r1.27 optabs.h
--- gcc/optabs.h	3 May 2004 05:31:40 -0000	1.27
+++ gcc/optabs.h	5 May 2004 09:30:26 -0000
@@ -93,6 +93,9 @@ enum optab_index
   /* Signed remainder */
   OTI_smod,
   OTI_umod,
+  /* Floating point remainder functions */
+  OTI_fmod,
+  OTI_drem,
   /* Convert float to integer in float fmt */
   OTI_ftrunc,
 
@@ -245,6 +248,8 @@ extern GTY(()) optab optab_table[OTI_MAX
 #define udivmod_optab (optab_table[OTI_udivmod])
 #define smod_optab (optab_table[OTI_smod])
 #define umod_optab (optab_table[OTI_umod])
+#define fmod_optab (optab_table[OTI_fmod])
+#define drem_optab (optab_table[OTI_drem])
 #define ftrunc_optab (optab_table[OTI_ftrunc])
 #define and_optab (optab_table[OTI_and])
 #define ior_optab (optab_table[OTI_ior])
Index: gcc/reg-stack.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/reg-stack.c,v
retrieving revision 1.151
diff -u -p -r1.151 reg-stack.c
--- gcc/reg-stack.c	30 Apr 2004 16:27:19 -0000	1.151
+++ gcc/reg-stack.c	5 May 2004 09:30:26 -0000
@@ -1779,6 +1779,8 @@ subst_stack_regs_pat (rtx insn, stack re
 		break;
 
 	      case UNSPEC_FSCALE_FRACT:
+	      case UNSPEC_FPREM_F:
+	      case UNSPEC_FPREM1_F:
 		/* These insns operate on the top two stack slots.
 		   first part of double input, double output insn.  */
 
@@ -1808,6 +1810,8 @@ subst_stack_regs_pat (rtx insn, stack re
 		break;
 
 	      case UNSPEC_FSCALE_EXP:
+	      case UNSPEC_FPREM_U:
+	      case UNSPEC_FPREM1_U:
 		/* These insns operate on the top two stack slots./
 		   second part of double input, double output insn.  */
 
Index: gcc/config/i386/i386-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386-protos.h,v
retrieving revision 1.109
diff -u -p -r1.109 i386-protos.h
--- gcc/config/i386/i386-protos.h	12 Apr 2004 23:23:15 -0000	1.109
+++ gcc/config/i386/i386-protos.h	5 May 2004 09:30:26 -0000
@@ -182,6 +182,7 @@ extern bool ix86_fp_jump_nontrivial_p (e
 extern void x86_order_regs_for_local_alloc (void);
 extern void x86_function_profiler (FILE *, int);
 extern void x86_emit_floatuns (rtx [2]);
+extern void ix86_emit_fp_unordered_jump (rtx);
 
 extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
 
Index: gcc/config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.663
diff -u -p -r1.663 i386.c
--- gcc/config/i386/i386.c	30 Apr 2004 16:27:25 -0000	1.663
+++ gcc/config/i386/i386.c	5 May 2004 09:30:29 -0000
@@ -15928,4 +15928,25 @@ output_387_reg_move (rtx insn, rtx *oper
   return "fst\t%y0";
 }
 
+/* Output code to perform a conditional jump to LABEL, if C2 flag in
+   FP status register is set.  */
+
+void
+ix86_emit_fp_unordered_jump (rtx label)
+{
+  rtx reg = gen_reg_rtx (HImode);
+  rtx temp;
+
+  emit_insn (gen_x86_fnstsw_1 (reg));
+  emit_insn (gen_x86_sahf_1 (reg));
+  
+  temp = gen_rtx_REG (CCmode, FLAGS_REG); 
+  temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
+  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
+			      gen_rtx_LABEL_REF (VOIDmode, label),
+			      pc_rtx);
+  temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
+  emit_jump_insn (temp);
+}
+
 #include "gt-i386.h"
Index: gcc/config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.534
diff -u -p -r1.534 i386.md
--- gcc/config/i386/i386.md	3 May 2004 13:20:57 -0000	1.534
+++ gcc/config/i386/i386.md	5 May 2004 09:30:33 -0000
@@ -129,6 +129,10 @@
    (UNSPEC_XTRACT_EXP		85)
    (UNSPEC_FSCALE_FRACT		86)
    (UNSPEC_FSCALE_EXP		87)
+   (UNSPEC_FPREM_F		88)
+   (UNSPEC_FPREM_U		89)
+   (UNSPEC_FPREM1_F		90)
+   (UNSPEC_FPREM1_U		91)
 
    ; REP instruction
    (UNSPEC_REP			75)
@@ -941,9 +945,9 @@
 ;; FP compares, step 2
 ;; Move the fpsw to ax.
 
-(define_insn "*x86_fnstsw_1"
+(define_insn "x86_fnstsw_1"
   [(set (match_operand:HI 0 "register_operand" "=a")
-	(unspec:HI [(reg 18)] UNSPEC_FNSTSW))]
+	(unspec:HI [(reg:CCFP 18)] UNSPEC_FNSTSW))]
   "TARGET_80387"
   "fnstsw\t%0"
   [(set_attr "length" "2")
@@ -14857,6 +14861,172 @@
   [(set_attr "type" "fpspc")
    (set_attr "mode" "XF")
    (set_attr "athlon_decode" "direct")])
+
+(define_insn "fpremxf4"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
+		    (match_operand:XF 3 "register_operand" "1")]
+		   UNSPEC_FPREM_F))
+   (set (match_operand:XF 1 "register_operand" "=u")
+	(unspec:XF [(match_dup 2) (match_dup 3)]
+		   UNSPEC_FPREM_U))
+   (set (reg:CCFP 18)
+	(unspec:CCFP [(const_int 0)] UNSPEC_NOP))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+  "fprem"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "fmodsf3"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:SF 1 "register_operand" ""))
+   (use (match_operand:SF 2 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+{
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_insn(gen_extendsfxf2 (op1, operands[1]));
+  emit_insn(gen_extendsfxf2 (op2, operands[2]));
+
+  emit_label (label);
+
+  emit_insn (gen_fpremxf4 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+
+  emit_insn (gen_truncxfsf2_noop (operands[0], op1));
+  DONE;
+})
+
+(define_expand "fmoddf3"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:DF 2 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+{
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extenddfxf2 (op1, operands[1]));
+  emit_insn (gen_extenddfxf2 (op2, operands[2]));
+
+  emit_label (label);
+
+  emit_insn (gen_fpremxf4 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+
+  emit_insn (gen_truncxfdf2_noop (operands[0], op1));
+  DONE;
+})
+
+(define_expand "fmodxf3"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))
+   (use (match_operand:XF 2 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+{
+  rtx label = gen_label_rtx ();
+
+  emit_label (label);
+
+  emit_insn (gen_fpremxf4 (operands[1], operands[2],
+			   operands[1], operands[2]));
+  ix86_emit_fp_unordered_jump (label);
+
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "fprem1xf4"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
+		    (match_operand:XF 3 "register_operand" "1")]
+		   UNSPEC_FPREM1_F))
+   (set (match_operand:XF 1 "register_operand" "=u")
+	(unspec:XF [(match_dup 2) (match_dup 3)]
+		   UNSPEC_FPREM1_U))
+   (set (reg:CCFP 18)
+	(unspec:CCFP [(const_int 0)] UNSPEC_NOP))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+  "fprem1"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "dremsf3"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:SF 1 "register_operand" ""))
+   (use (match_operand:SF 2 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+{
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_insn(gen_extendsfxf2 (op1, operands[1]));
+  emit_insn(gen_extendsfxf2 (op2, operands[2]));
+
+  emit_label (label);
+
+  emit_insn (gen_fprem1xf4 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+
+  emit_insn (gen_truncxfsf2_noop (operands[0], op1));
+  DONE;
+})
+
+(define_expand "dremdf3"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:DF 2 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+{
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extenddfxf2 (op1, operands[1]));
+  emit_insn (gen_extenddfxf2 (op2, operands[2]));
+
+  emit_label (label);
+
+  emit_insn (gen_fprem1xf4 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+
+  emit_insn (gen_truncxfdf2_noop (operands[0], op1));
+  DONE;
+})
+
+(define_expand "dremxf3"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))
+   (use (match_operand:XF 2 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+{
+  rtx label = gen_label_rtx ();
+
+  emit_label (label);
+
+  emit_insn (gen_fprem1xf4 (operands[1], operands[2],
+			    operands[1], operands[2]));
+  ix86_emit_fp_unordered_jump (label);
+
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
 
 (define_insn "*sindf2"
   [(set (match_operand:DF 0 "register_operand" "=f")
/* Copyright (C) 2004 Free Software Foundation.

   Check that fmod, fmodf, fmodl, drem, dremf and dreml
   built-in functions compile.

   Written by Uros Bizjak, 5th May 2004.  */

/* { dg-do compile } */
/* { dg-options "-O2 -ffast-math" } */

extern double fmod(double, double);
extern double drem(double, double);
extern float fmodf(float, float);
extern float dremf(float, float);
extern long double fmodl(long double, long double);
extern long double dreml(long double, long double);


double test1(double x, double y)
{
  return fmod(x, y);
}

double test2(double x, double y)
{
  return drem(x, y);
}

float test1f(float x, float y)
{
  return fmodf(x, y);
}

float test2f(float x, float y)
{
  return dremf(x, y);
}

long double test1l(long double x, long double y)
{
  return fmodl(x, y);
}

long double test2l(long double x, long double y)
{
  return dreml(x, y);
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]