This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] implement rint{,f,l}, floor{,f,l}. ceil{,f,l}, trunc{,f,l}and nearbyint{,f,l} as x87 built-in functions


Hello!

Attached patch implements rint{,f,l}, floor{,f,l}. ceil{,f,l}, trunc{,f,l} and nearbyint{,f,l} as x87 built-in functions. i387 rounding mode changes are implemented with OPTIMIZE_MODE_SWITCHING machinery, so generated code is quite oprtimized.

The patch passed "make bootstrap" on i386 and succesfully compiled attached builtins-46.c with and without "-ffast-math". OK for mainline?

BTW: btrunc_optab was needed to prevent clash of new trunc?f2 patterns with existing trunc?f?f2 patterns. Also, BUILT_IN_RINT{,F,L} can be treated as BUILT_IN_NEARBYINT{,F,L}. These two issues are fixed with this patch.

2004-25-08 Uros Bizjak <uros@kss-loka.si>

       * builtins.c (expand_builtin_mathfn): Handle BUILT_IN_RINT{,F,L}
       using rint_optab.
       (expand_builtin): Expand BUILT_IN_RINT{,F,L} using
       expand_builtin_mathfn.
       * genopinit.c (optabs): Rename trunc_optab to btrunc_optab. Use
       btrunc?f patterns for btrunc_optab. Implement rint_optab using
       rint?f patterns.
       * optabs.c (init_optabs): Initialize rint_optab.
       * optabs.h (enum optab_index): Rename OTI_trunc to OTI_btrunc.
       Add new OTI_rint.
       (btrunc_optab): Rename macro from trunc_optab.
       (rint_optab): Define corresponding macro.

       * reg-stack.c (subst_stack_regs_pat): Handle UNSPEC_FRNDINT_FLOOR,
       UNSPEC_FRNDINT_CEIL, UNSPEC_FRNDINT_TRUNC, UNSPEC_FRNDINT_EXCEPTION.

       * config/i386/i386-protos.h (emit_i387_cw_initialization):
       Change prototype. Use new enum i387_cw_mode parameter.
       * config/i386/i386.c (emit_i387_cw_initialization):
       Handle new rounding modes.

       * config/i386/i386.h (enum fp_cw_mode): Delete.
       (enum i387_cw_mode): New enum.
       (MODE_NEEDED): Handle new rounding modes.
       (EMIT_MODE_SET): Change condition to handle new rounding modes.

       * config/i386/i386.md (UNSPEC_FRNDINT_FLOOR, UNSPEC_FRNDINT_CEIL,
       UNSPEC_FRNDINT_TRUNC, UNSPEC_FRNDINT_EXCEPTION): New unspecs to
       represent different rounding modes of frndint insn.
       (i387cw): New attribute definition.
       (*fix_truncdi_1): Add "i387cw" attribute defined to "trunc".
       (fix_truncdi_nomemory): Same.
       (fix_truncdi_memory): Same.
       (*fix_truncsi_1): Same.
       (fix_truncsi_nomemory): Same.
       (fix_truncsi_memory): Same.
       (*fix_trunchi_1): Same.
       (fix_trunchi_nomemory): Same.
       (fix_trunchi_memory): Same.

(x86_fnstcw_1): Remove comment.

       (*frndintxf2): Rename insn definition to frndintxf2. Move
       insn definition near rint?f2 expanders.
       (rintdf2, rintsf2, rintxf2): New expanders to implement rint,
       rintf and rintl built-ins as inline x87 intrinsics.
       (frndintxf4_floor): New pattern to implement floor rounding
       mode with frndint x87 instruction.
       (floordf2, floorsf2, floorxf2): New expanders to implement floor,
       floorf and floorl built-ins as inline x87 intrinsics.
       (frndintxf4_ceil): New pattern to implement ceil rounding
       mode with frndint x87 instruction.
       (ceildf2, ceilsf2, ceilxf2): New expanders to implement ceil,
       ceilf and ceill built-ins as inline x87 intrinsics.
       (frndintxf4_trunc): New pattern to implement trunc rounding
       mode with frndint x87 instruction.
       (btruncdf2, btruncsf2, btruncxf2): New expanders to implement trunc,
       truncf and truncl built-ins as inline x87 intrinsics.
       (frndintxf4_ex): New pattern to implement rounding
       mode with exceptions with frndint x87 instruction.
       (nearbyintdf2, nearbyintsf2, nearbyintxf2): New expanders to
       implement nearbyint, nearbyintf and nearbyintl built-ins as
       inline x87 intrinsics.

* testsuite/gcc.dg/builtins-46.c: New.


Consider for example this function: int test(double a) { return (int)floor (a); }

Generated code with "-O2 -ffast-math -fomit-frame-pointer":
test:
       subl    $8, %esp
       fnstcw  6(%esp)
       fldl    12(%esp)
       movzwl  6(%esp), %eax
       andw    $-3073, %ax
       orw     $1024, %ax
       movw    %ax, 4(%esp)
       movzwl  6(%esp), %eax
       fldcw   4(%esp)
       frndint
       fldcw   6(%esp)
       orw     $3072, %ax
       movw    %ax, 4(%esp)
       fldcw   4(%esp)
       fistpl  (%esp)
       fldcw   6(%esp)
       movl    (%esp), %eax
       addl    $8, %esp
       ret

Only one fnstcw insn is needed to calculate MODE_FLOOR and MODE_TRUNC. Resulting code is still not optimal. (int)floor(x) could be substituted with some kind of "lfloor(x)" [as it is case with (int)rint -> lrint()], however (fp)->(int) functionality is not yet implemented.

Unfortunatelly, round{,f,l} functions can't be implemented this way, because of funny x87 round functionality. It rounds 0.5 to 0.0 and 1.5 to 2.0...

Uros.



Index: gcc/builtins.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/builtins.c,v
retrieving revision 1.372
diff -u -p -r1.372 builtins.c
--- gcc/builtins.c	15 Aug 2004 15:44:49 -0000	1.372
+++ gcc/builtins.c	25 Aug 2004 10:20:33 -0000
@@ -1741,6 +1741,10 @@ expand_builtin_mathfn (tree exp, rtx tar
     case BUILT_IN_NEARBYINTF:
     case BUILT_IN_NEARBYINTL:
       builtin_optab = nearbyint_optab; break;
+    case BUILT_IN_RINT:
+    case BUILT_IN_RINTF:
+    case BUILT_IN_RINTL:
+      builtin_optab = rint_optab; break;
     default:
       abort ();
     }
@@ -5637,6 +5641,9 @@ expand_builtin (tree exp, rtx target, rt
     case BUILT_IN_NEARBYINT:
     case BUILT_IN_NEARBYINTF:
     case BUILT_IN_NEARBYINTL:
+    case BUILT_IN_RINT:
+    case BUILT_IN_RINTF:
+    case BUILT_IN_RINTL:
       target = expand_builtin_mathfn (exp, target, subtarget);
       if (target)
 	return target;
Index: gcc/genopinit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/genopinit.c,v
retrieving revision 1.78
diff -u -p -r1.78 genopinit.c
--- gcc/genopinit.c	3 Aug 2004 23:30:44 -0000	1.78
+++ gcc/genopinit.c	25 Aug 2004 10:20:33 -0000
@@ -122,8 +122,9 @@ static const char * const optabs[] =
   "floor_optab->handlers[$A].insn_code = CODE_FOR_$(floor$a2$)",
   "ceil_optab->handlers[$A].insn_code = CODE_FOR_$(ceil$a2$)",
   "round_optab->handlers[$A].insn_code = CODE_FOR_$(round$a2$)",
-  "trunc_optab->handlers[$A].insn_code = CODE_FOR_$(trunc$a2$)",
+  "btrunc_optab->handlers[$A].insn_code = CODE_FOR_$(btrunc$a2$)",
   "nearbyint_optab->handlers[$A].insn_code = CODE_FOR_$(nearbyint$a2$)",
+  "rint_optab->handlers[$A].insn_code = CODE_FOR_$(rint$a2$)",
   "sincos_optab->handlers[$A].insn_code = CODE_FOR_$(sincos$a3$)",
   "sin_optab->handlers[$A].insn_code = CODE_FOR_$(sin$a2$)",
   "asin_optab->handlers[$A].insn_code = CODE_FOR_$(asin$a2$)",
Index: gcc/optabs.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.235
diff -u -p -r1.235 optabs.c
--- gcc/optabs.c	19 Aug 2004 22:24:54 -0000	1.235
+++ gcc/optabs.c	25 Aug 2004 10:20:33 -0000
@@ -5430,6 +5430,7 @@ init_optabs (void)
   round_optab = init_optab (UNKNOWN);
   btrunc_optab = init_optab (UNKNOWN);
   nearbyint_optab = init_optab (UNKNOWN);
+  rint_optab = init_optab (UNKNOWN);
   sincos_optab = init_optab (UNKNOWN);
   sin_optab = init_optab (UNKNOWN);
   asin_optab = init_optab (UNKNOWN);
Index: gcc/optabs.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.h,v
retrieving revision 1.33
diff -u -p -r1.33 optabs.h
--- gcc/optabs.h	11 Aug 2004 02:50:06 -0000	1.33
+++ gcc/optabs.h	25 Aug 2004 10:20:33 -0000
@@ -183,9 +183,10 @@ enum optab_index
   /* Rounding functions */
   OTI_floor,
   OTI_ceil,
-  OTI_trunc,
+  OTI_btrunc,
   OTI_round,
   OTI_nearbyint,
+  OTI_rint,
   /* Tangent */
   OTI_tan,
   /* Inverse tangent */
@@ -299,9 +300,10 @@ extern GTY(()) optab optab_table[OTI_MAX
 #define log1p_optab (optab_table[OTI_log1p])
 #define floor_optab (optab_table[OTI_floor])
 #define ceil_optab (optab_table[OTI_ceil])
-#define btrunc_optab (optab_table[OTI_trunc])
+#define btrunc_optab (optab_table[OTI_btrunc])
 #define round_optab (optab_table[OTI_round])
 #define nearbyint_optab (optab_table[OTI_nearbyint])
+#define rint_optab (optab_table[OTI_rint])
 #define tan_optab (optab_table[OTI_tan])
 #define atan_optab (optab_table[OTI_atan])
 
Index: gcc/reg-stack.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/reg-stack.c,v
retrieving revision 1.161
diff -u -p -r1.161 reg-stack.c
--- gcc/reg-stack.c	18 Aug 2004 16:21:54 -0000	1.161
+++ gcc/reg-stack.c	25 Aug 2004 10:20:34 -0000
@@ -1728,6 +1728,12 @@ subst_stack_regs_pat (rtx insn, stack re
 	      case UNSPEC_COS:
 	      case UNSPEC_FRNDINT:
 	      case UNSPEC_F2XM1:
+
+	      case UNSPEC_FRNDINT_FLOOR:
+	      case UNSPEC_FRNDINT_CEIL:
+	      case UNSPEC_FRNDINT_TRUNC:
+	      case UNSPEC_FRNDINT_EXCEPTION:
+
 		/* These insns only operate on the top of the stack.  */
 
 		src1 = get_true_reg (&XVECEXP (pat_src, 0, 0));
Index: gcc/config/i386/i386-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386-protos.h,v
retrieving revision 1.117
diff -u -p -r1.117 i386-protos.h
--- gcc/config/i386/i386-protos.h	13 Aug 2004 04:29:01 -0000	1.117
+++ gcc/config/i386/i386-protos.h	25 Aug 2004 10:20:34 -0000
@@ -174,7 +174,7 @@ extern int ix86_secondary_memory_needed 
 					 enum machine_mode, int);
 extern enum reg_class ix86_preferred_reload_class (rtx, enum reg_class);
 extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int);
-extern void emit_i387_cw_initialization (rtx, rtx);
+extern void emit_i387_cw_initialization (rtx, rtx, enum i387_cw_mode);
 extern bool ix86_fp_jump_nontrivial_p (enum rtx_code);
 extern void x86_order_regs_for_local_alloc (void);
 extern void x86_function_profiler (FILE *, int);
Index: gcc/config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.714
diff -u -p -r1.714 i386.c
--- gcc/config/i386/i386.c	21 Aug 2004 06:49:14 -0000	1.714
+++ gcc/config/i386/i386.c	25 Aug 2004 10:20:36 -0000
@@ -7110,22 +7110,52 @@ output_387_binary_op (rtx insn, rtx *ope
   return buf;
 }
 
-/* Output code to initialize control word copies used by
-   trunc?f?i patterns.  NORMAL is set to current control word, while ROUND_DOWN
-   is set to control word rounding downwards.  */
+/* Output code to initialize control word copies used by trunc?f?i and
+   rounding patterns.  CURRENT_MODE is set to current control word,
+   while NEW_MODE is set to new control word.  */
+
 void
-emit_i387_cw_initialization (rtx normal, rtx round_down)
+emit_i387_cw_initialization (rtx current_mode, rtx new_mode,
+			     enum i387_cw_mode mode)
 {
   rtx reg = gen_reg_rtx (HImode);
 
-  emit_insn (gen_x86_fnstcw_1 (normal));
-  emit_move_insn (reg, normal);
-  if (!TARGET_PARTIAL_REG_STALL && !optimize_size
-      && !TARGET_64BIT)
-    emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
-  else
-    emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
-  emit_move_insn (round_down, reg);
+  emit_insn (gen_x86_fnstcw_1 (current_mode));
+  emit_move_insn (reg, current_mode);
+
+  switch (mode)
+    {
+    case FP_CW_FLOOR:
+      /* round down toward -oo */
+      emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
+      emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
+      break;
+
+    case FP_CW_CEIL:
+      /* round up toward +oo */
+      emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
+      emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
+      break;
+
+    case FP_CW_TRUNC:
+      /* round toward zero (truncate) */
+      if (!TARGET_PARTIAL_REG_STALL && !optimize_size
+	  && !TARGET_64BIT)
+	emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
+      else
+	emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
+      break;
+
+    case FP_CW_EXCEPTION:
+      /* generate exception for nearbyint*/
+      emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
+      break;
+
+    default:
+      abort();
+    }
+
+  emit_move_insn (new_mode, reg);
 }
 
 /* Output code for INSN to convert a float to a signed int.  OPERANDS
Index: gcc/config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.397
diff -u -p -r1.397 i386.h
--- gcc/config/i386/i386.h	18 Aug 2004 15:07:55 -0000	1.397
+++ gcc/config/i386/i386.h	25 Aug 2004 10:20:37 -0000
@@ -2943,7 +2943,8 @@ extern rtx ix86_compare_op1;	/* operand 
    Post-reload pass may be later used to eliminate the redundant fildcw if
    needed.  */
 
-enum fp_cw_mode {FP_CW_STORED, FP_CW_UNINITIALIZED, FP_CW_ANY};
+enum i387_cw_mode {FP_CW_FLOOR, FP_CW_CEIL, FP_CW_TRUNC,
+		   FP_CW_EXCEPTION, FP_CW_UNINITIALIZED, FP_CW_ANY};
 
 /* Define this macro if the port needs extra instructions inserted
    for mode switching in an optimizing compilation.  */
@@ -2971,9 +2972,15 @@ enum fp_cw_mode {FP_CW_STORED, FP_CW_UNI
    || (GET_CODE (I) == INSN && (asm_noperands (PATTERN (I)) >= 0 	\
 				|| GET_CODE (PATTERN (I)) == ASM_INPUT))\
    ? FP_CW_UNINITIALIZED						\
-   : recog_memoized (I) < 0 || get_attr_type (I) != TYPE_FISTP		\
+   : recog_memoized (I) < 0 || get_attr_i387cw (I) == I387CW_ANY	\
    ? FP_CW_ANY								\
-   : FP_CW_STORED)
+   : get_attr_i387cw (I) == I387CW_FLOOR				\
+   ? FP_CW_FLOOR							\
+   : get_attr_i387cw (I) == I387CW_CEIL					\
+   ? FP_CW_CEIL								\
+   : get_attr_i387cw (I) == I387CW_EXCEPTION				\
+   ? FP_CW_EXCEPTION							\
+   : FP_CW_TRUNC)
 
 /* This macro specifies the order in which modes for ENTITY are
    processed.  0 is the highest priority.  */
@@ -2985,9 +2992,10 @@ enum fp_cw_mode {FP_CW_STORED, FP_CW_UNI
    are to be inserted.  */
 
 #define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) 			\
-  ((MODE) == FP_CW_STORED						\
+  ((MODE) != FP_CW_ANY && (MODE) != FP_CW_UNINITIALIZED			\
    ? emit_i387_cw_initialization (assign_386_stack_local (HImode, 1),	\
-				  assign_386_stack_local (HImode, 2)), 0\
+				  assign_386_stack_local (HImode, 2),   \
+				  MODE), 0				\
    : 0)
 
 /* Avoid renaming of stack registers, as doing so in combination with
Index: gcc/config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.556
diff -u -p -r1.556 i386.md
--- gcc/config/i386/i386.md	14 Aug 2004 02:59:18 -0000	1.556
+++ gcc/config/i386/i386.md	25 Aug 2004 10:20:40 -0000
@@ -134,6 +134,12 @@
    (UNSPEC_FPREM1_F		90)
    (UNSPEC_FPREM1_U		91)
 
+   ; x87 Rounding
+   (UNSPEC_FRNDINT_FLOOR	96)
+   (UNSPEC_FRNDINT_CEIL 	97)
+   (UNSPEC_FRNDINT_TRUNC	98)
+   (UNSPEC_FRNDINT_EXCEPTION	99)
+
    ; REP instruction
    (UNSPEC_REP			75)
 
@@ -420,6 +426,11 @@
 (define_attr "fp_int_src" "false,true"
   (const_string "false"))
 
+;; Defines rounding mode of an FP operation.
+
+(define_attr "i387cw" "any,floor,ceil,trunc,exception"
+  (const_string "any"))
+
 ;; Describe a user's asm statement.
 (define_asm_attributes
   [(set_attr "length" "128")
@@ -4098,6 +4109,7 @@
   DONE;
 }
   [(set_attr "type" "fistp")
+   (set_attr "i387cw" "trunc")
    (set_attr "mode" "DI")])
 
 (define_insn "fix_truncdi_nomemory"
@@ -4111,6 +4123,7 @@
    && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)"
   "#"
   [(set_attr "type" "fistp")
+   (set_attr "i387cw" "trunc")
    (set_attr "mode" "DI")])
 
 (define_insn "fix_truncdi_memory"
@@ -4123,6 +4136,7 @@
    && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)"
   "* operands[5] = operands[4]; return output_fix_trunc (insn, operands);"
   [(set_attr "type" "fistp")
+   (set_attr "i387cw" "trunc")
    (set_attr "mode" "DI")])
 
 (define_split 
@@ -4263,6 +4277,7 @@
   DONE;
 }
   [(set_attr "type" "fistp")
+   (set_attr "i387cw" "trunc")
    (set_attr "mode" "SI")])
 
 (define_insn "fix_truncsi_nomemory"
@@ -4275,6 +4290,7 @@
    && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
   "#"
   [(set_attr "type" "fistp")
+   (set_attr "i387cw" "trunc")
    (set_attr "mode" "SI")])
 
 (define_insn "fix_truncsi_memory"
@@ -4286,6 +4302,7 @@
    && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
   "* return output_fix_trunc (insn, operands);"
   [(set_attr "type" "fistp")
+   (set_attr "i387cw" "trunc")
    (set_attr "mode" "SI")])
 
 ;; When SSE available, it is always faster to use it!
@@ -4404,6 +4421,7 @@
   DONE;
 }
   [(set_attr "type" "fistp")
+   (set_attr "i387cw" "trunc")
    (set_attr "mode" "HI")])
 
 (define_insn "fix_trunchi_nomemory"
@@ -4416,6 +4434,7 @@
    && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
   "#"
   [(set_attr "type" "fistp")
+   (set_attr "i387cw" "trunc")
    (set_attr "mode" "HI")])
 
 (define_insn "fix_trunchi_memory"
@@ -4427,6 +4446,7 @@
    && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
   "* return output_fix_trunc (insn, operands);"
   [(set_attr "type" "fistp")
+   (set_attr "i387cw" "trunc")
    (set_attr "mode" "HI")])
 
 (define_split 
@@ -4455,7 +4475,6 @@
    (set (match_dup 0) (match_dup 4))]
   "")
 
-;; %% Not used yet.
 (define_insn "x86_fnstcw_1"
   [(set (match_operand:HI 0 "memory_operand" "=m")
 	(unspec:HI [(reg:HI FPSR_REG)] UNSPEC_FSTCW))]
@@ -16040,16 +16059,6 @@
   operands[3] = gen_reg_rtx (XFmode);
 })
 
-(define_insn "*frndintxf2"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
-	 UNSPEC_FRNDINT))]
-  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
-   && flag_unsafe_math_optimizations"
-  "frndint"
-  [(set_attr "type" "fpspc")
-   (set_attr "mode" "XF")])
-
 (define_insn "*f2xm1xf2"
   [(set (match_operand:XF 0 "register_operand" "=f")
 	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
@@ -16420,6 +16429,332 @@
   emit_move_insn (operands[9], CONST1_RTX (XFmode));  /* fld1 */
 })
 
+
+(define_insn "frndintxf2"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+	 UNSPEC_FRNDINT))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+  "frndint"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "rintdf2"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DF 1 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extenddfxf2 (op1, operands[1]));
+  emit_insn (gen_frndintxf2 (op0, op1));
+
+  emit_insn (gen_truncxfdf2_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "rintsf2"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:SF 1 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extendsfxf2 (op1, operands[1]));
+  emit_insn (gen_frndintxf2 (op0, op1));
+
+  emit_insn (gen_truncxfsf2_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "rintxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  emit_insn (gen_frndintxf2 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "frndintxf4_floor"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+	 UNSPEC_FRNDINT_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+  [(set_attr "type" "fpspc")
+   (set_attr "i387cw" "floor")
+   (set_attr "mode" "XF")])
+
+(define_expand "floordf2"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DF 1 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = assign_386_stack_local (HImode, 1);
+  rtx op3 = assign_386_stack_local (HImode, 2);
+	
+  ix86_optimize_mode_switching = 1;
+
+  emit_insn (gen_extenddfxf2 (op1, operands[1]));
+  emit_insn (gen_frndintxf4_floor (op0, op1, op2, op3));
+
+  emit_insn (gen_truncxfdf2_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "floorsf2"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:SF 1 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = assign_386_stack_local (HImode, 1);
+  rtx op3 = assign_386_stack_local (HImode, 2);
+	
+  ix86_optimize_mode_switching = 1;
+
+  emit_insn (gen_extendsfxf2 (op1, operands[1]));
+  emit_insn (gen_frndintxf4_floor (op0, op1, op2, op3));
+
+  emit_insn (gen_truncxfsf2_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "floorxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  rtx op2 = assign_386_stack_local (HImode, 1);
+  rtx op3 = assign_386_stack_local (HImode, 2);
+	
+  ix86_optimize_mode_switching = 1;
+
+  emit_insn (gen_frndintxf4_floor (operands[0], operands[1], op2, op3));
+  DONE;
+})
+
+(define_insn "frndintxf4_ceil"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+	 UNSPEC_FRNDINT_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+  [(set_attr "type" "fpspc")
+   (set_attr "i387cw" "ceil")
+   (set_attr "mode" "XF")])
+
+(define_expand "ceildf2"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DF 1 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = assign_386_stack_local (HImode, 1);
+  rtx op3 = assign_386_stack_local (HImode, 2);
+	
+  ix86_optimize_mode_switching = 1;
+
+  emit_insn (gen_extenddfxf2 (op1, operands[1]));
+  emit_insn (gen_frndintxf4_ceil (op0, op1, op2, op3));
+
+  emit_insn (gen_truncxfdf2_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "ceilsf2"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:SF 1 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = assign_386_stack_local (HImode, 1);
+  rtx op3 = assign_386_stack_local (HImode, 2);
+	
+  ix86_optimize_mode_switching = 1;
+
+  emit_insn (gen_extendsfxf2 (op1, operands[1]));
+  emit_insn (gen_frndintxf4_ceil (op0, op1, op2, op3));
+
+  emit_insn (gen_truncxfsf2_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "ceilxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  rtx op2 = assign_386_stack_local (HImode, 1);
+  rtx op3 = assign_386_stack_local (HImode, 2);
+	
+  ix86_optimize_mode_switching = 1;
+
+  emit_insn (gen_frndintxf4_ceil (operands[0], operands[1], op2, op3));
+  DONE;
+})
+
+(define_insn "frndintxf4_trunc"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+	 UNSPEC_FRNDINT_TRUNC))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+  [(set_attr "type" "fpspc")
+   (set_attr "i387cw" "trunc")
+   (set_attr "mode" "XF")])
+
+(define_expand "btruncdf2"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DF 1 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = assign_386_stack_local (HImode, 1);
+  rtx op3 = assign_386_stack_local (HImode, 2);
+	
+  ix86_optimize_mode_switching = 1;
+
+  emit_insn (gen_extenddfxf2 (op1, operands[1]));
+  emit_insn (gen_frndintxf4_trunc (op0, op1, op2, op3));
+
+  emit_insn (gen_truncxfdf2_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "btruncsf2"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:SF 1 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = assign_386_stack_local (HImode, 1);
+  rtx op3 = assign_386_stack_local (HImode, 2);
+	
+  ix86_optimize_mode_switching = 1;
+
+  emit_insn (gen_extendsfxf2 (op1, operands[1]));
+  emit_insn (gen_frndintxf4_trunc (op0, op1, op2, op3));
+
+  emit_insn (gen_truncxfsf2_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "btruncxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  rtx op2 = assign_386_stack_local (HImode, 1);
+  rtx op3 = assign_386_stack_local (HImode, 2);
+	
+  ix86_optimize_mode_switching = 1;
+
+  emit_insn (gen_frndintxf4_trunc (operands[0], operands[1], op2, op3));
+  DONE;
+})
+
+(define_insn "frndintxf4_ex"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+	 UNSPEC_FRNDINT_EXCEPTION))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2"
+  [(set_attr "type" "fpspc")
+   (set_attr "i387cw" "exception")
+   (set_attr "mode" "XF")])
+
+(define_expand "nearbyintdf2"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DF 1 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = assign_386_stack_local (HImode, 1);
+  rtx op3 = assign_386_stack_local (HImode, 2);
+	
+  ix86_optimize_mode_switching = 1;
+
+  emit_insn (gen_extenddfxf2 (op1, operands[1]));
+  emit_insn (gen_frndintxf4_ex (op0, op1, op2, op3));
+
+  emit_insn (gen_truncxfdf2_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "nearbyintsf2"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:SF 1 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = assign_386_stack_local (HImode, 1);
+  rtx op3 = assign_386_stack_local (HImode, 2);
+	
+  ix86_optimize_mode_switching = 1;
+
+  emit_insn (gen_extendsfxf2 (op1, operands[1]));
+  emit_insn (gen_frndintxf4_ex (op0, op1, op2, op3));
+
+  emit_insn (gen_truncxfsf2_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "nearbyintxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
+   && flag_unsafe_math_optimizations"
+{
+  rtx op2 = assign_386_stack_local (HImode, 1);
+  rtx op3 = assign_386_stack_local (HImode, 2);
+	
+  ix86_optimize_mode_switching = 1;
+
+  emit_insn (gen_frndintxf4_ex (operands[0], operands[1], op2, op3));
+  DONE;
+})
+
+
 ;; Block operation instructions
 
 (define_insn "cld"
/* Copyright (C) 2004 Free Software Foundation.

   Check that rint, rintf, rintl, floor, floorf, floorl,
   ceil, ceilf, ceill, trunc, truncf, truncl,
   nearbyint, nearbyintf and nearbyintl
   built-in functions compile.

   Written by Uros Bizjak, 25th Aug 2004.  */

/* { dg-do compile } */
/* { dg-options "-O2 -ffast-math" } */

extern double rint(double);
extern double floor(double);
extern double ceil(double);
extern double trunc(double);
extern double nearbyint(double);

extern float rintf(float);
extern float floorf(float);
extern float ceilf(float);
extern float truncf(float);
extern float nearbyintf(float);

extern long double rintl(long double);
extern long double floorl(long double);
extern long double ceill(long double);
extern long double truncl(long double);
extern long double nearbyintl(long double);


double test1(double x)
{
  return rint(x);
}

double test2(double x)
{
  return floor(x);
}

double test3(double x)
{
  return ceil(x);
}

double test4(double x)
{
  return trunc(x);
}

double test5(double x)
{
  return nearbyint(x);
}

float test1f(float x)
{
  return rintf(x);
}

float test2f(float x)
{
  return floorf(x);
}

float test3f(float x)
{
  return ceilf(x);
}

float test4f(float x)
{
  return truncf(x);
}

float test5f(float x)
{
  return nearbyintf(x);
}

long double test1l(long double x)
{
  return rintl(x);
}

long double test2l(long double x)
{
  return floorl(x);
}

long double test3l(long double x)
{
  return ceill(x);
}

long double test4l(long double x)
{
  return truncl(x);
}

long double test5l(long double x)
{
  return nearbyintl(x);
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]