This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] implement rint{,f,l}, floor{,f,l}. ceil{,f,l}, trunc{,f,l}and nearbyint{,f,l} as x87 built-in functions
- From: Uros Bizjak <uros at kss-loka dot si>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Roger Sayle <roger at eyesopen dot com>
- Date: Wed, 25 Aug 2004 13:44:47 +0200
- Subject: [PATCH] implement rint{,f,l}, floor{,f,l}. ceil{,f,l}, trunc{,f,l}and nearbyint{,f,l} as x87 built-in functions
Hello!
Attached patch implements rint{,f,l}, floor{,f,l}. ceil{,f,l},
trunc{,f,l} and nearbyint{,f,l} as x87 built-in functions. i387 rounding
mode changes are implemented with OPTIMIZE_MODE_SWITCHING machinery, so
generated code is quite oprtimized.
The patch passed "make bootstrap" on i386 and succesfully compiled
attached builtins-46.c with and without "-ffast-math". OK for mainline?
BTW: btrunc_optab was needed to prevent clash of new trunc?f2 patterns
with existing trunc?f?f2 patterns. Also, BUILT_IN_RINT{,F,L} can be
treated as BUILT_IN_NEARBYINT{,F,L}. These two issues are fixed with
this patch.
2004-25-08 Uros Bizjak <uros@kss-loka.si>
* builtins.c (expand_builtin_mathfn): Handle BUILT_IN_RINT{,F,L}
using rint_optab.
(expand_builtin): Expand BUILT_IN_RINT{,F,L} using
expand_builtin_mathfn.
* genopinit.c (optabs): Rename trunc_optab to btrunc_optab. Use
btrunc?f patterns for btrunc_optab. Implement rint_optab using
rint?f patterns.
* optabs.c (init_optabs): Initialize rint_optab.
* optabs.h (enum optab_index): Rename OTI_trunc to OTI_btrunc.
Add new OTI_rint.
(btrunc_optab): Rename macro from trunc_optab.
(rint_optab): Define corresponding macro.
* reg-stack.c (subst_stack_regs_pat): Handle UNSPEC_FRNDINT_FLOOR,
UNSPEC_FRNDINT_CEIL, UNSPEC_FRNDINT_TRUNC, UNSPEC_FRNDINT_EXCEPTION.
* config/i386/i386-protos.h (emit_i387_cw_initialization):
Change prototype. Use new enum i387_cw_mode parameter.
* config/i386/i386.c (emit_i387_cw_initialization):
Handle new rounding modes.
* config/i386/i386.h (enum fp_cw_mode): Delete.
(enum i387_cw_mode): New enum.
(MODE_NEEDED): Handle new rounding modes.
(EMIT_MODE_SET): Change condition to handle new rounding modes.
* config/i386/i386.md (UNSPEC_FRNDINT_FLOOR, UNSPEC_FRNDINT_CEIL,
UNSPEC_FRNDINT_TRUNC, UNSPEC_FRNDINT_EXCEPTION): New unspecs to
represent different rounding modes of frndint insn.
(i387cw): New attribute definition.
(*fix_truncdi_1): Add "i387cw" attribute defined to "trunc".
(fix_truncdi_nomemory): Same.
(fix_truncdi_memory): Same.
(*fix_truncsi_1): Same.
(fix_truncsi_nomemory): Same.
(fix_truncsi_memory): Same.
(*fix_trunchi_1): Same.
(fix_trunchi_nomemory): Same.
(fix_trunchi_memory): Same.
(x86_fnstcw_1): Remove comment.
(*frndintxf2): Rename insn definition to frndintxf2. Move
insn definition near rint?f2 expanders.
(rintdf2, rintsf2, rintxf2): New expanders to implement rint,
rintf and rintl built-ins as inline x87 intrinsics.
(frndintxf4_floor): New pattern to implement floor rounding
mode with frndint x87 instruction.
(floordf2, floorsf2, floorxf2): New expanders to implement floor,
floorf and floorl built-ins as inline x87 intrinsics.
(frndintxf4_ceil): New pattern to implement ceil rounding
mode with frndint x87 instruction.
(ceildf2, ceilsf2, ceilxf2): New expanders to implement ceil,
ceilf and ceill built-ins as inline x87 intrinsics.
(frndintxf4_trunc): New pattern to implement trunc rounding
mode with frndint x87 instruction.
(btruncdf2, btruncsf2, btruncxf2): New expanders to implement trunc,
truncf and truncl built-ins as inline x87 intrinsics.
(frndintxf4_ex): New pattern to implement rounding
mode with exceptions with frndint x87 instruction.
(nearbyintdf2, nearbyintsf2, nearbyintxf2): New expanders to
implement nearbyint, nearbyintf and nearbyintl built-ins as
inline x87 intrinsics.
* testsuite/gcc.dg/builtins-46.c: New.
Consider for example this function:
int test(double a) {
return (int)floor (a);
}
Generated code with "-O2 -ffast-math -fomit-frame-pointer":
test:
subl $8, %esp
fnstcw 6(%esp)
fldl 12(%esp)
movzwl 6(%esp), %eax
andw $-3073, %ax
orw $1024, %ax
movw %ax, 4(%esp)
movzwl 6(%esp), %eax
fldcw 4(%esp)
frndint
fldcw 6(%esp)
orw $3072, %ax
movw %ax, 4(%esp)
fldcw 4(%esp)
fistpl (%esp)
fldcw 6(%esp)
movl (%esp), %eax
addl $8, %esp
ret
Only one fnstcw insn is needed to calculate MODE_FLOOR and MODE_TRUNC.
Resulting code is still not optimal. (int)floor(x) could be substituted
with some kind of "lfloor(x)" [as it is case with (int)rint -> lrint()],
however (fp)->(int) functionality is not yet implemented.
Unfortunatelly, round{,f,l} functions can't be implemented this way,
because of funny x87 round functionality. It rounds 0.5 to 0.0 and 1.5
to 2.0...
Uros.
Index: gcc/builtins.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/builtins.c,v
retrieving revision 1.372
diff -u -p -r1.372 builtins.c
--- gcc/builtins.c 15 Aug 2004 15:44:49 -0000 1.372
+++ gcc/builtins.c 25 Aug 2004 10:20:33 -0000
@@ -1741,6 +1741,10 @@ expand_builtin_mathfn (tree exp, rtx tar
case BUILT_IN_NEARBYINTF:
case BUILT_IN_NEARBYINTL:
builtin_optab = nearbyint_optab; break;
+ case BUILT_IN_RINT:
+ case BUILT_IN_RINTF:
+ case BUILT_IN_RINTL:
+ builtin_optab = rint_optab; break;
default:
abort ();
}
@@ -5637,6 +5641,9 @@ expand_builtin (tree exp, rtx target, rt
case BUILT_IN_NEARBYINT:
case BUILT_IN_NEARBYINTF:
case BUILT_IN_NEARBYINTL:
+ case BUILT_IN_RINT:
+ case BUILT_IN_RINTF:
+ case BUILT_IN_RINTL:
target = expand_builtin_mathfn (exp, target, subtarget);
if (target)
return target;
Index: gcc/genopinit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/genopinit.c,v
retrieving revision 1.78
diff -u -p -r1.78 genopinit.c
--- gcc/genopinit.c 3 Aug 2004 23:30:44 -0000 1.78
+++ gcc/genopinit.c 25 Aug 2004 10:20:33 -0000
@@ -122,8 +122,9 @@ static const char * const optabs[] =
"floor_optab->handlers[$A].insn_code = CODE_FOR_$(floor$a2$)",
"ceil_optab->handlers[$A].insn_code = CODE_FOR_$(ceil$a2$)",
"round_optab->handlers[$A].insn_code = CODE_FOR_$(round$a2$)",
- "trunc_optab->handlers[$A].insn_code = CODE_FOR_$(trunc$a2$)",
+ "btrunc_optab->handlers[$A].insn_code = CODE_FOR_$(btrunc$a2$)",
"nearbyint_optab->handlers[$A].insn_code = CODE_FOR_$(nearbyint$a2$)",
+ "rint_optab->handlers[$A].insn_code = CODE_FOR_$(rint$a2$)",
"sincos_optab->handlers[$A].insn_code = CODE_FOR_$(sincos$a3$)",
"sin_optab->handlers[$A].insn_code = CODE_FOR_$(sin$a2$)",
"asin_optab->handlers[$A].insn_code = CODE_FOR_$(asin$a2$)",
Index: gcc/optabs.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.235
diff -u -p -r1.235 optabs.c
--- gcc/optabs.c 19 Aug 2004 22:24:54 -0000 1.235
+++ gcc/optabs.c 25 Aug 2004 10:20:33 -0000
@@ -5430,6 +5430,7 @@ init_optabs (void)
round_optab = init_optab (UNKNOWN);
btrunc_optab = init_optab (UNKNOWN);
nearbyint_optab = init_optab (UNKNOWN);
+ rint_optab = init_optab (UNKNOWN);
sincos_optab = init_optab (UNKNOWN);
sin_optab = init_optab (UNKNOWN);
asin_optab = init_optab (UNKNOWN);
Index: gcc/optabs.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.h,v
retrieving revision 1.33
diff -u -p -r1.33 optabs.h
--- gcc/optabs.h 11 Aug 2004 02:50:06 -0000 1.33
+++ gcc/optabs.h 25 Aug 2004 10:20:33 -0000
@@ -183,9 +183,10 @@ enum optab_index
/* Rounding functions */
OTI_floor,
OTI_ceil,
- OTI_trunc,
+ OTI_btrunc,
OTI_round,
OTI_nearbyint,
+ OTI_rint,
/* Tangent */
OTI_tan,
/* Inverse tangent */
@@ -299,9 +300,10 @@ extern GTY(()) optab optab_table[OTI_MAX
#define log1p_optab (optab_table[OTI_log1p])
#define floor_optab (optab_table[OTI_floor])
#define ceil_optab (optab_table[OTI_ceil])
-#define btrunc_optab (optab_table[OTI_trunc])
+#define btrunc_optab (optab_table[OTI_btrunc])
#define round_optab (optab_table[OTI_round])
#define nearbyint_optab (optab_table[OTI_nearbyint])
+#define rint_optab (optab_table[OTI_rint])
#define tan_optab (optab_table[OTI_tan])
#define atan_optab (optab_table[OTI_atan])
Index: gcc/reg-stack.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/reg-stack.c,v
retrieving revision 1.161
diff -u -p -r1.161 reg-stack.c
--- gcc/reg-stack.c 18 Aug 2004 16:21:54 -0000 1.161
+++ gcc/reg-stack.c 25 Aug 2004 10:20:34 -0000
@@ -1728,6 +1728,12 @@ subst_stack_regs_pat (rtx insn, stack re
case UNSPEC_COS:
case UNSPEC_FRNDINT:
case UNSPEC_F2XM1:
+
+ case UNSPEC_FRNDINT_FLOOR:
+ case UNSPEC_FRNDINT_CEIL:
+ case UNSPEC_FRNDINT_TRUNC:
+ case UNSPEC_FRNDINT_EXCEPTION:
+
/* These insns only operate on the top of the stack. */
src1 = get_true_reg (&XVECEXP (pat_src, 0, 0));
Index: gcc/config/i386/i386-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386-protos.h,v
retrieving revision 1.117
diff -u -p -r1.117 i386-protos.h
--- gcc/config/i386/i386-protos.h 13 Aug 2004 04:29:01 -0000 1.117
+++ gcc/config/i386/i386-protos.h 25 Aug 2004 10:20:34 -0000
@@ -174,7 +174,7 @@ extern int ix86_secondary_memory_needed
enum machine_mode, int);
extern enum reg_class ix86_preferred_reload_class (rtx, enum reg_class);
extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int);
-extern void emit_i387_cw_initialization (rtx, rtx);
+extern void emit_i387_cw_initialization (rtx, rtx, enum i387_cw_mode);
extern bool ix86_fp_jump_nontrivial_p (enum rtx_code);
extern void x86_order_regs_for_local_alloc (void);
extern void x86_function_profiler (FILE *, int);
Index: gcc/config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.714
diff -u -p -r1.714 i386.c
--- gcc/config/i386/i386.c 21 Aug 2004 06:49:14 -0000 1.714
+++ gcc/config/i386/i386.c 25 Aug 2004 10:20:36 -0000
@@ -7110,22 +7110,52 @@ output_387_binary_op (rtx insn, rtx *ope
return buf;
}
-/* Output code to initialize control word copies used by
- trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
- is set to control word rounding downwards. */
+/* Output code to initialize control word copies used by trunc?f?i and
+ rounding patterns. CURRENT_MODE is set to current control word,
+ while NEW_MODE is set to new control word. */
+
void
-emit_i387_cw_initialization (rtx normal, rtx round_down)
+emit_i387_cw_initialization (rtx current_mode, rtx new_mode,
+ enum i387_cw_mode mode)
{
rtx reg = gen_reg_rtx (HImode);
- emit_insn (gen_x86_fnstcw_1 (normal));
- emit_move_insn (reg, normal);
- if (!TARGET_PARTIAL_REG_STALL && !optimize_size
- && !TARGET_64BIT)
- emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
- else
- emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
- emit_move_insn (round_down, reg);
+ emit_insn (gen_x86_fnstcw_1 (current_mode));
+ emit_move_insn (reg, current_mode);
+
+ switch (mode)
+ {
+ case FP_CW_FLOOR:
+ /* round down toward -oo */
+ emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
+ break;
+
+ case FP_CW_CEIL:
+ /* round up toward +oo */
+ emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
+ break;
+
+ case FP_CW_TRUNC:
+ /* round toward zero (truncate) */
+ if (!TARGET_PARTIAL_REG_STALL && !optimize_size
+ && !TARGET_64BIT)
+ emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
+ else
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
+ break;
+
+ case FP_CW_EXCEPTION:
+ /* generate exception for nearbyint*/
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
+ break;
+
+ default:
+ abort();
+ }
+
+ emit_move_insn (new_mode, reg);
}
/* Output code for INSN to convert a float to a signed int. OPERANDS
Index: gcc/config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.397
diff -u -p -r1.397 i386.h
--- gcc/config/i386/i386.h 18 Aug 2004 15:07:55 -0000 1.397
+++ gcc/config/i386/i386.h 25 Aug 2004 10:20:37 -0000
@@ -2943,7 +2943,8 @@ extern rtx ix86_compare_op1; /* operand
Post-reload pass may be later used to eliminate the redundant fildcw if
needed. */
-enum fp_cw_mode {FP_CW_STORED, FP_CW_UNINITIALIZED, FP_CW_ANY};
+enum i387_cw_mode {FP_CW_FLOOR, FP_CW_CEIL, FP_CW_TRUNC,
+ FP_CW_EXCEPTION, FP_CW_UNINITIALIZED, FP_CW_ANY};
/* Define this macro if the port needs extra instructions inserted
for mode switching in an optimizing compilation. */
@@ -2971,9 +2972,15 @@ enum fp_cw_mode {FP_CW_STORED, FP_CW_UNI
|| (GET_CODE (I) == INSN && (asm_noperands (PATTERN (I)) >= 0 \
|| GET_CODE (PATTERN (I)) == ASM_INPUT))\
? FP_CW_UNINITIALIZED \
- : recog_memoized (I) < 0 || get_attr_type (I) != TYPE_FISTP \
+ : recog_memoized (I) < 0 || get_attr_i387cw (I) == I387CW_ANY \
? FP_CW_ANY \
- : FP_CW_STORED)
+ : get_attr_i387cw (I) == I387CW_FLOOR \
+ ? FP_CW_FLOOR \
+ : get_attr_i387cw (I) == I387CW_CEIL \
+ ? FP_CW_CEIL \
+ : get_attr_i387cw (I) == I387CW_EXCEPTION \
+ ? FP_CW_EXCEPTION \
+ : FP_CW_TRUNC)
/* This macro specifies the order in which modes for ENTITY are
processed. 0 is the highest priority. */
@@ -2985,9 +2992,10 @@ enum fp_cw_mode {FP_CW_STORED, FP_CW_UNI
are to be inserted. */
#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
- ((MODE) == FP_CW_STORED \
+ ((MODE) != FP_CW_ANY && (MODE) != FP_CW_UNINITIALIZED \
? emit_i387_cw_initialization (assign_386_stack_local (HImode, 1), \
- assign_386_stack_local (HImode, 2)), 0\
+ assign_386_stack_local (HImode, 2), \
+ MODE), 0 \
: 0)
/* Avoid renaming of stack registers, as doing so in combination with
Index: gcc/config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.556
diff -u -p -r1.556 i386.md
--- gcc/config/i386/i386.md 14 Aug 2004 02:59:18 -0000 1.556
+++ gcc/config/i386/i386.md 25 Aug 2004 10:20:40 -0000
@@ -134,6 +134,12 @@
(UNSPEC_FPREM1_F 90)
(UNSPEC_FPREM1_U 91)
+ ; x87 Rounding
+ (UNSPEC_FRNDINT_FLOOR 96)
+ (UNSPEC_FRNDINT_CEIL 97)
+ (UNSPEC_FRNDINT_TRUNC 98)
+ (UNSPEC_FRNDINT_EXCEPTION 99)
+
; REP instruction
(UNSPEC_REP 75)
@@ -420,6 +426,11 @@
(define_attr "fp_int_src" "false,true"
(const_string "false"))
+;; Defines rounding mode of an FP operation.
+
+(define_attr "i387cw" "any,floor,ceil,trunc,exception"
+ (const_string "any"))
+
;; Describe a user's asm statement.
(define_asm_attributes
[(set_attr "length" "128")
@@ -4098,6 +4109,7 @@
DONE;
}
[(set_attr "type" "fistp")
+ (set_attr "i387cw" "trunc")
(set_attr "mode" "DI")])
(define_insn "fix_truncdi_nomemory"
@@ -4111,6 +4123,7 @@
&& (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)"
"#"
[(set_attr "type" "fistp")
+ (set_attr "i387cw" "trunc")
(set_attr "mode" "DI")])
(define_insn "fix_truncdi_memory"
@@ -4123,6 +4136,7 @@
&& (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)"
"* operands[5] = operands[4]; return output_fix_trunc (insn, operands);"
[(set_attr "type" "fistp")
+ (set_attr "i387cw" "trunc")
(set_attr "mode" "DI")])
(define_split
@@ -4263,6 +4277,7 @@
DONE;
}
[(set_attr "type" "fistp")
+ (set_attr "i387cw" "trunc")
(set_attr "mode" "SI")])
(define_insn "fix_truncsi_nomemory"
@@ -4275,6 +4290,7 @@
&& !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
"#"
[(set_attr "type" "fistp")
+ (set_attr "i387cw" "trunc")
(set_attr "mode" "SI")])
(define_insn "fix_truncsi_memory"
@@ -4286,6 +4302,7 @@
&& !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
"* return output_fix_trunc (insn, operands);"
[(set_attr "type" "fistp")
+ (set_attr "i387cw" "trunc")
(set_attr "mode" "SI")])
;; When SSE available, it is always faster to use it!
@@ -4404,6 +4421,7 @@
DONE;
}
[(set_attr "type" "fistp")
+ (set_attr "i387cw" "trunc")
(set_attr "mode" "HI")])
(define_insn "fix_trunchi_nomemory"
@@ -4416,6 +4434,7 @@
&& !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
"#"
[(set_attr "type" "fistp")
+ (set_attr "i387cw" "trunc")
(set_attr "mode" "HI")])
(define_insn "fix_trunchi_memory"
@@ -4427,6 +4446,7 @@
&& !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
"* return output_fix_trunc (insn, operands);"
[(set_attr "type" "fistp")
+ (set_attr "i387cw" "trunc")
(set_attr "mode" "HI")])
(define_split
@@ -4455,7 +4475,6 @@
(set (match_dup 0) (match_dup 4))]
"")
-;; %% Not used yet.
(define_insn "x86_fnstcw_1"
[(set (match_operand:HI 0 "memory_operand" "=m")
(unspec:HI [(reg:HI FPSR_REG)] UNSPEC_FSTCW))]
@@ -16040,16 +16059,6 @@
operands[3] = gen_reg_rtx (XFmode);
})
-(define_insn "*frndintxf2"
- [(set (match_operand:XF 0 "register_operand" "=f")
- (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
- UNSPEC_FRNDINT))]
- "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
- && flag_unsafe_math_optimizations"
- "frndint"
- [(set_attr "type" "fpspc")
- (set_attr "mode" "XF")])
-
(define_insn "*f2xm1xf2"
[(set (match_operand:XF 0 "register_operand" "=f")
(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
@@ -16420,6 +16429,332 @@
emit_move_insn (operands[9], CONST1_RTX (XFmode)); /* fld1 */
})
+
+(define_insn "frndintxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+ "frndint"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "rintdf2"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2 (op0, op1));
+
+ emit_insn (gen_truncxfdf2_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "rintsf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2 (op0, op1));
+
+ emit_insn (gen_truncxfsf2_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "rintxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ emit_insn (gen_frndintxf2 (operands[0], operands[1]));
+ DONE;
+})
+
+(define_insn "frndintxf4_floor"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+ "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+ [(set_attr "type" "fpspc")
+ (set_attr "i387cw" "floor")
+ (set_attr "mode" "XF")])
+
+(define_expand "floordf2"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = assign_386_stack_local (HImode, 1);
+ rtx op3 = assign_386_stack_local (HImode, 2);
+
+ ix86_optimize_mode_switching = 1;
+
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf4_floor (op0, op1, op2, op3));
+
+ emit_insn (gen_truncxfdf2_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "floorsf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = assign_386_stack_local (HImode, 1);
+ rtx op3 = assign_386_stack_local (HImode, 2);
+
+ ix86_optimize_mode_switching = 1;
+
+ emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf4_floor (op0, op1, op2, op3));
+
+ emit_insn (gen_truncxfsf2_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "floorxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op2 = assign_386_stack_local (HImode, 1);
+ rtx op3 = assign_386_stack_local (HImode, 2);
+
+ ix86_optimize_mode_switching = 1;
+
+ emit_insn (gen_frndintxf4_floor (operands[0], operands[1], op2, op3));
+ DONE;
+})
+
+(define_insn "frndintxf4_ceil"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_CEIL))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+ "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+ [(set_attr "type" "fpspc")
+ (set_attr "i387cw" "ceil")
+ (set_attr "mode" "XF")])
+
+(define_expand "ceildf2"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = assign_386_stack_local (HImode, 1);
+ rtx op3 = assign_386_stack_local (HImode, 2);
+
+ ix86_optimize_mode_switching = 1;
+
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf4_ceil (op0, op1, op2, op3));
+
+ emit_insn (gen_truncxfdf2_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "ceilsf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = assign_386_stack_local (HImode, 1);
+ rtx op3 = assign_386_stack_local (HImode, 2);
+
+ ix86_optimize_mode_switching = 1;
+
+ emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf4_ceil (op0, op1, op2, op3));
+
+ emit_insn (gen_truncxfsf2_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "ceilxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op2 = assign_386_stack_local (HImode, 1);
+ rtx op3 = assign_386_stack_local (HImode, 2);
+
+ ix86_optimize_mode_switching = 1;
+
+ emit_insn (gen_frndintxf4_ceil (operands[0], operands[1], op2, op3));
+ DONE;
+})
+
+(define_insn "frndintxf4_trunc"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_TRUNC))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+ "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+ [(set_attr "type" "fpspc")
+ (set_attr "i387cw" "trunc")
+ (set_attr "mode" "XF")])
+
+(define_expand "btruncdf2"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = assign_386_stack_local (HImode, 1);
+ rtx op3 = assign_386_stack_local (HImode, 2);
+
+ ix86_optimize_mode_switching = 1;
+
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf4_trunc (op0, op1, op2, op3));
+
+ emit_insn (gen_truncxfdf2_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "btruncsf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = assign_386_stack_local (HImode, 1);
+ rtx op3 = assign_386_stack_local (HImode, 2);
+
+ ix86_optimize_mode_switching = 1;
+
+ emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf4_trunc (op0, op1, op2, op3));
+
+ emit_insn (gen_truncxfsf2_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "btruncxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op2 = assign_386_stack_local (HImode, 1);
+ rtx op3 = assign_386_stack_local (HImode, 2);
+
+ ix86_optimize_mode_switching = 1;
+
+ emit_insn (gen_frndintxf4_trunc (operands[0], operands[1], op2, op3));
+ DONE;
+})
+
+(define_insn "frndintxf4_ex"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_EXCEPTION))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+ "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2"
+ [(set_attr "type" "fpspc")
+ (set_attr "i387cw" "exception")
+ (set_attr "mode" "XF")])
+
+(define_expand "nearbyintdf2"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DF 1 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = assign_386_stack_local (HImode, 1);
+ rtx op3 = assign_386_stack_local (HImode, 2);
+
+ ix86_optimize_mode_switching = 1;
+
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf4_ex (op0, op1, op2, op3));
+
+ emit_insn (gen_truncxfdf2_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "nearbyintsf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:SF 1 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = assign_386_stack_local (HImode, 1);
+ rtx op3 = assign_386_stack_local (HImode, 2);
+
+ ix86_optimize_mode_switching = 1;
+
+ emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf4_ex (op0, op1, op2, op3));
+
+ emit_insn (gen_truncxfsf2_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "nearbyintxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op2 = assign_386_stack_local (HImode, 1);
+ rtx op3 = assign_386_stack_local (HImode, 2);
+
+ ix86_optimize_mode_switching = 1;
+
+ emit_insn (gen_frndintxf4_ex (operands[0], operands[1], op2, op3));
+ DONE;
+})
+
+
;; Block operation instructions
(define_insn "cld"
/* Copyright (C) 2004 Free Software Foundation.
Check that rint, rintf, rintl, floor, floorf, floorl,
ceil, ceilf, ceill, trunc, truncf, truncl,
nearbyint, nearbyintf and nearbyintl
built-in functions compile.
Written by Uros Bizjak, 25th Aug 2004. */
/* { dg-do compile } */
/* { dg-options "-O2 -ffast-math" } */
extern double rint(double);
extern double floor(double);
extern double ceil(double);
extern double trunc(double);
extern double nearbyint(double);
extern float rintf(float);
extern float floorf(float);
extern float ceilf(float);
extern float truncf(float);
extern float nearbyintf(float);
extern long double rintl(long double);
extern long double floorl(long double);
extern long double ceill(long double);
extern long double truncl(long double);
extern long double nearbyintl(long double);
double test1(double x)
{
return rint(x);
}
double test2(double x)
{
return floor(x);
}
double test3(double x)
{
return ceil(x);
}
double test4(double x)
{
return trunc(x);
}
double test5(double x)
{
return nearbyint(x);
}
float test1f(float x)
{
return rintf(x);
}
float test2f(float x)
{
return floorf(x);
}
float test3f(float x)
{
return ceilf(x);
}
float test4f(float x)
{
return truncf(x);
}
float test5f(float x)
{
return nearbyintf(x);
}
long double test1l(long double x)
{
return rintl(x);
}
long double test2l(long double x)
{
return floorl(x);
}
long double test3l(long double x)
{
return ceill(x);
}
long double test4l(long double x)
{
return truncl(x);
}
long double test5l(long double x)
{
return nearbyintl(x);
}
- Follow-Ups:
- Re: [PATCH] implement rint{,f,l}, floor{,f,l}. ceil{,f,l}, trunc{,f,l}and nearbyint{,f,l} as x87 built-in functions
- Re: [PATCH] implement rint{,f,l}, floor{,f,l}. ceil{,f,l}, trunc{,f,l}and nearbyint{,f,l} as x87 built-in functions
- Re: [PATCH] implement rint{,f,l}, floor{,f,l}. ceil{,f,l}, trunc{,f,l} and nearbyint{,f,l} as x87 built-in functions