This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] implement fmod() as built-in x87 intrinsic


Hello!

Attached to this message, please find a patch, that implements fmod() as built-in x87 intrinsic.

Patch is tested on i686-pc-linux-gnu:
- bootstrapped gcc
- compilation with attached builtins-40.c test
- compared output of fmod() fmodf(), fmodl() with -O2, with and without -ffast-math
- built almabench ( ~1 second faster: user 0m14.328s)


2004-05-04 Uros Bizjak <uros@kss-loka.si>

   * optabs.h (enum optab_index): Add new OTI_fmod.
   (fmod_optab): Define corresponding macro.
   * optabs.c (init_optabs): Initialize fmod_optab.
   * genopinit.c (optabs): Implement fmod_optab using fmod?f3
   patterns.
   * builtins.c (expand_builtin_mathfn_2): Handle BUILT_IN_FMOD{,F,L}
   using fmod_optab.
   (expand_builtin): Expand BUILT_IN_FMOD{,F,L} using
   expand_builtin_mathfn_2 if flag_unsafe_math_optimizations is set.

* reg-stack.c (subst_stack_regs_pat): Handle UNSPEC_FPREM.

   * config/i386/i386.md (UNSPEC_FPREM): New unspec to represent x87's
   fprem insn.
   (fpremxf_1): New pattern to implement fprem x87 instruction.
   (fmodsf3, fmoddf3, fmodxf3): New expanders to implement fmodf, fmod
   and fmodl built-ins as inline x87 intrinsics.

testsuite:

* testsuite/gcc.dg/builtins-40: New test.

It looks that there are some problems with gcc's reg-stack and loop optimization. The RTL that patch generates is OK, but asm code, produced by gcc is not optimal, because gcc does not know that two fxchs cancels each other. And optimally, fxch on input operators could be "implemented" by changing operator loading order... This effect could be observed in almabench, around fprem instruction in asm dumps [look at ???.c.35.stack dump for further analysis]. I guess that because my patch only shows gcc's weaknes, it is still OK to commit it to mainline CVS.

test1d:
       fldl 4(%esp)
       fldl 12(%esp)
       jmp  .L9
       .p2align 4,,7
.L13:
       fxch %st(1)    <- this should be moved out of loop
.L9:
       fxch %st(1)    <- this should be moved out of loop
       fprem
       fnstsw  %ax
       sahf
       jp   .L13
       fstp %st(1)
       ret

BTW: drem() could be implemented the same way, just fprem should be substituted with fprem1. This will be in followup patch.

Uros.

Index: gcc/builtins.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/builtins.c,v
retrieving revision 1.319
diff -u -p -r1.319 builtins.c
--- gcc/builtins.c	3 May 2004 05:31:39 -0000	1.319
+++ gcc/builtins.c	4 May 2004 11:35:39 -0000
@@ -1793,6 +1793,10 @@ expand_builtin_mathfn_2 (tree exp, rtx t
     case BUILT_IN_ATAN2F:
     case BUILT_IN_ATAN2L:
       builtin_optab = atan2_optab; break;
+    case BUILT_IN_FMOD:
+    case BUILT_IN_FMODF:
+    case BUILT_IN_FMODL:
+      builtin_optab = fmod_optab; break;
     default:
       abort ();
     }
@@ -5364,6 +5368,9 @@ expand_builtin (tree exp, rtx target, rt
     case BUILT_IN_ATAN2:
     case BUILT_IN_ATAN2F:
     case BUILT_IN_ATAN2L:
+    case BUILT_IN_FMOD:
+    case BUILT_IN_FMODF:
+    case BUILT_IN_FMODL:
       if (! flag_unsafe_math_optimizations)
 	break;
       target = expand_builtin_mathfn_2 (exp, target, subtarget);
Index: gcc/genopinit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/genopinit.c,v
retrieving revision 1.73
diff -u -p -r1.73 genopinit.c
--- gcc/genopinit.c	3 May 2004 05:31:40 -0000	1.73
+++ gcc/genopinit.c	4 May 2004 11:35:39 -0000
@@ -91,6 +91,7 @@ static const char * const optabs[] =
   "udivmod_optab->handlers[$A].insn_code = CODE_FOR_$(udivmod$a4$)",
   "smod_optab->handlers[$A].insn_code = CODE_FOR_$(mod$a3$)",
   "umod_optab->handlers[$A].insn_code = CODE_FOR_$(umod$a3$)",
+  "fmod_optab->handlers[$A].insn_code = CODE_FOR_$(fmod$a3$)",
   "ftrunc_optab->handlers[$A].insn_code = CODE_FOR_$(ftrunc$F$a2$)",
   "and_optab->handlers[$A].insn_code = CODE_FOR_$(and$a3$)",
   "ior_optab->handlers[$A].insn_code = CODE_FOR_$(ior$a3$)",
Index: gcc/optabs.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.219
diff -u -p -r1.219 optabs.c
--- gcc/optabs.c	3 May 2004 05:31:40 -0000	1.219
+++ gcc/optabs.c	4 May 2004 11:35:40 -0000
@@ -5328,6 +5328,7 @@ init_optabs (void)
   udivmod_optab = init_optab (UNKNOWN);
   smod_optab = init_optab (MOD);
   umod_optab = init_optab (UMOD);
+  fmod_optab = init_optab (UNKNOWN);
   ftrunc_optab = init_optab (UNKNOWN);
   and_optab = init_optab (AND);
   ior_optab = init_optab (IOR);
Index: gcc/optabs.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.h,v
retrieving revision 1.27
diff -u -p -r1.27 optabs.h
--- gcc/optabs.h	3 May 2004 05:31:40 -0000	1.27
+++ gcc/optabs.h	4 May 2004 11:35:40 -0000
@@ -93,6 +93,8 @@ enum optab_index
   /* Signed remainder */
   OTI_smod,
   OTI_umod,
+  /* Floating point remainder */
+  OTI_fmod,
   /* Convert float to integer in float fmt */
   OTI_ftrunc,
 
@@ -245,6 +247,7 @@ extern GTY(()) optab optab_table[OTI_MAX
 #define udivmod_optab (optab_table[OTI_udivmod])
 #define smod_optab (optab_table[OTI_smod])
 #define umod_optab (optab_table[OTI_umod])
+#define fmod_optab (optab_table[OTI_fmod])
 #define ftrunc_optab (optab_table[OTI_ftrunc])
 #define and_optab (optab_table[OTI_and])
 #define ior_optab (optab_table[OTI_ior])
Index: gcc/reg-stack.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/reg-stack.c,v
retrieving revision 1.151
diff -u -p -r1.151 reg-stack.c
--- gcc/reg-stack.c	30 Apr 2004 16:27:19 -0000	1.151
+++ gcc/reg-stack.c	4 May 2004 11:35:41 -0000
@@ -1723,6 +1723,27 @@ subst_stack_regs_pat (rtx insn, stack re
 	  case UNSPEC:
 	    switch (XINT (pat_src, 1))
 	      {
+	      case UNSPEC_FPREM:
+		src1 = get_true_reg (&XVECEXP (pat_src, 0, 0));
+		src2 = get_true_reg (&XVECEXP (pat_src, 0, 1));
+
+		src1_note = find_regno_note (insn, REG_DEAD, REGNO (*src1));
+		src2_note = find_regno_note (insn, REG_DEAD, REGNO (*src2));
+
+		/* Inputs should never die, they are
+		   replaced with outputs.  */
+		if ((src1_note) || (src2_note))
+		  abort();
+
+		swap_to_top (insn, regstack, *src1, *src2);
+
+		if (STACK_REG_P (*dest))
+		  replace_reg (dest, FIRST_STACK_REG);
+
+		replace_reg (src1, FIRST_STACK_REG);
+		replace_reg (src2, FIRST_STACK_REG + 1);
+		break;
+
 	      case UNSPEC_SIN:
 	      case UNSPEC_COS:
 	      case UNSPEC_FRNDINT:
Index: gcc/config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.534
diff -u -p -r1.534 i386.md
--- gcc/config/i386/i386.md	3 May 2004 13:20:57 -0000	1.534
+++ gcc/config/i386/i386.md	4 May 2004 11:35:44 -0000
@@ -117,6 +117,7 @@
    ; x87 Floating point
    (UNSPEC_FPATAN		65)
    (UNSPEC_FYL2X		66)
+   (UNSPEC_FPREM		67)
    (UNSPEC_FRNDINT		68)
    (UNSPEC_F2XM1		69)
 
@@ -14857,6 +14858,128 @@
   [(set_attr "type" "fpspc")
    (set_attr "mode" "XF")
    (set_attr "athlon_decode" "direct")])
+
+(define_insn "fpremxf_1"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
+		    (match_operand:XF 3 "register_operand" "u")]
+	           UNSPEC_FPREM))
+   (set (match_operand:HI 1 "register_operand" "=a")
+	(unspec:HI [(const_int 0)] UNSPEC_NOP))
+   (clobber (reg:CC 18))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+  "fprem\n\tfnstsw\t%1"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "fmodsf3"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:SF 1 "register_operand" ""))
+   (use (match_operand:SF 2 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+{
+  rtx label1 = gen_label_rtx ();
+  rtx label2 = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  rtx tmp;
+
+  emit_insn(gen_extendsfxf2 (op1, operands[1]));
+  emit_insn(gen_extendsfxf2 (op2, operands[2]));
+
+  emit_label (label1);
+  tmp = gen_reg_rtx (HImode);
+  emit_insn (gen_fpremxf_1 (op1, tmp, op1, op2));
+  emit_insn (gen_x86_sahf_1 (tmp));
+  
+  tmp = gen_rtx_REG (CCmode, FLAGS_REG); 
+  tmp = gen_rtx_ORDERED (VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label2),
+			      pc_rtx);
+  tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
+
+  emit_jump_insn (tmp);
+  emit_jump (label1);
+
+  emit_label (label2);
+  emit_insn (gen_truncxfsf2_noop (operands[0], op1));
+  DONE;
+})
+
+(define_expand "fmoddf3"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:DF 2 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+{
+  rtx label1 = gen_label_rtx ();
+  rtx label2 = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  rtx tmp;
+
+  emit_insn(gen_extendsfxf2 (op1, operands[1]));
+  emit_insn(gen_extendsfxf2 (op2, operands[2]));
+
+  emit_label (label1);
+  tmp = gen_reg_rtx (HImode);
+  emit_insn (gen_fpremxf_1 (op1, tmp, op1, op2));
+  emit_insn (gen_x86_sahf_1 (tmp));
+  
+  tmp = gen_rtx_REG (CCmode, FLAGS_REG); 
+  tmp = gen_rtx_ORDERED (VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label2),
+			      pc_rtx);
+  tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
+
+  emit_jump_insn (tmp);
+  emit_jump (label1);
+
+  emit_label (label2);
+  emit_insn (gen_truncxfdf2_noop (operands[0], op1));
+  DONE;
+})
+
+(define_expand "fmodxf3"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))
+   (use (match_operand:XF 2 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+{
+  rtx label1 = gen_label_rtx ();
+  rtx label2 = gen_label_rtx ();
+
+  rtx tmp;
+
+  emit_label (label1);
+  tmp = gen_reg_rtx (HImode);
+  emit_insn (gen_fpremxf_1 (operands[1], tmp, operands[1], operands[2]));
+  emit_insn (gen_x86_sahf_1 (tmp));
+  
+  tmp = gen_rtx_REG (CCmode, FLAGS_REG); 
+  tmp = gen_rtx_ORDERED (VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label2),
+			      pc_rtx);
+  tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
+
+  emit_jump_insn (tmp);
+  emit_jump (label1);
+
+  emit_label (label2);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
 
 (define_insn "*sindf2"
   [(set (match_operand:DF 0 "register_operand" "=f")
/* Copyright (C) 2004 Free Software Foundation.

   Check that fmod, fmodf and fmodl built-in functions compile.

   Written by Uros Bizjak, 4th May 2004.  */

/* { dg-do compile } */
/* { dg-options "-O2 -ffast-math" } */

extern double fmod(double, double);
extern float fmodf(float, float);
extern long double fmodl(long double, long double);


double test1(double x, double y)
{
  return fmod(x, y);
}

float test1f(float x, float y)
{
  return fmodf(x, y);
}

long double test1l(long double x, long double y)
{
  return fmodl(x, y);
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]