This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][PING] Expand lrint inline for x86_64/i?86 SSE math
- From: Richard Guenther <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Cc: geoffk at geoffk dot org
- Date: Tue, 24 Oct 2006 13:45:02 +0200 (CEST)
- Subject: [PATCH][PING] Expand lrint inline for x86_64/i?86 SSE math
This is a resend (and re-diff, some parts gone in already) of the first
patch in the series to expand C99 rounding functions inline for SSE math.
This expands {ll,l}rint{,f} inline if possible using the cvts{d,s}2s{i,d}
SSE intrinsic.
Bootstrapped and tested on {x86_64,i686,ppc,ia64}-linux-gnu.
Ok for mainline?
(I know this unfortunately needs a testsuite, a middle-end and a
i686 and x86_64 maintainer to approve, so CCing Geoff as Apple might
be interested in this as well ;))
Thanks,
Richard.
2006-08-23 Richard Guenther <rguenther@suse.de>
* optabs.h (enum optab_index): Remove OTI_lrint.
(enum convert_optab_index): Add COI_lrint.
(lrint_optab): Adjust.
(expand_sfix_optab): Declare.
* optabs.c (expand_sfix_optab): New function.
(init_optabs): Init lrint_optab as conversion optab.
* genopinit.c (lrint_optab): Change to a conversion optab.
* builtins.c (expand_builtin_int_roundingfn_2): Adjust to
expansion via conversion optab.
* config/i386/i386.md (*fistdi2_1): Remove
flag_unsafe_math_optimizations guard.
(fistdi2, fistdi2_with_temp, *fist<mode>2_1, fist<mode>2,
fist<mode>2_with_temp): Likewise.
(lrint<mode>2): Split into...
(lrintxf<mode>2): ... x87 part
(lrint<mode>di2, lrint<mode>si2): ... and SSE parts.
* config/i386/sse.md (sse_cvtss2si_2, sse_cvtss2_siq_2,
sse2_cvtsd2si_2, sse2_cvtsd2siq_2): New insns for
UNSPEC_FIX_NOTRUNC matching non-vector float modes.
* doc/md.texi (lrintMN2): Document.
* gcc.target/i386/math-torture/math-torture.exp: Torture
for interesting ia32 math options.
* gcc.target.i386/math-torture/lrint.c: New testcase.
Index: gcc/builtins.c
===================================================================
--- gcc.orig/builtins.c
+++ gcc/builtins.c
@@ -2320,7 +2320,7 @@ expand_builtin_int_roundingfn (tree exp,
static rtx
expand_builtin_int_roundingfn_2 (tree exp, rtx target, rtx subtarget)
{
- optab builtin_optab;
+ convert_optab builtin_optab;
rtx op0, insns;
tree fndecl = get_callee_fndecl (exp);
tree arglist = TREE_OPERAND (exp, 1);
@@ -2348,45 +2348,37 @@ expand_builtin_int_roundingfn_2 (tree ex
/* Make a suitable register to place result in. */
mode = TYPE_MODE (TREE_TYPE (exp));
- /* Before working hard, check whether the instruction is available. */
- if (builtin_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
- {
- target = gen_reg_rtx (mode);
-
- /* Wrap the computation of the argument in a SAVE_EXPR, as we may
- need to expand the argument again. This way, we will not perform
- side-effects more the once. */
- narg = builtin_save_expr (arg);
- if (narg != arg)
- {
- arg = narg;
- arglist = build_tree_list (NULL_TREE, arg);
- exp = build_function_call_expr (fndecl, arglist);
- }
+ target = gen_reg_rtx (mode);
- op0 = expand_expr (arg, subtarget, VOIDmode, 0);
-
- start_sequence ();
+ /* Wrap the computation of the argument in a SAVE_EXPR, as we may
+ need to expand the argument again. This way, we will not perform
+ side-effects more the once. */
+ narg = builtin_save_expr (arg);
+ if (narg != arg)
+ {
+ arg = narg;
+ arglist = build_tree_list (NULL_TREE, arg);
+ exp = build_function_call_expr (fndecl, arglist);
+ }
- /* Compute into TARGET.
- Set TARGET to wherever the result comes back. */
- target = expand_unop (mode, builtin_optab, op0, target, 0);
+ op0 = expand_expr (arg, subtarget, VOIDmode, 0);
- if (target != 0)
- {
- /* Output the entire sequence. */
- insns = get_insns ();
- end_sequence ();
- emit_insn (insns);
- return target;
- }
+ start_sequence ();
- /* If we were unable to expand via the builtin, stop the sequence
- (without outputting the insns) and call to the library function
- with the stabilized argument list. */
+ if (expand_sfix_optab (target, op0, builtin_optab))
+ {
+ /* Output the entire sequence. */
+ insns = get_insns ();
end_sequence ();
+ emit_insn (insns);
+ return target;
}
+ /* If we were unable to expand via the builtin, stop the sequence
+ (without outputting the insns) and call to the library function
+ with the stabilized argument list. */
+ end_sequence ();
+
target = expand_call (exp, target, target == const0_rtx);
return target;
Index: gcc/config/i386/i386.md
===================================================================
--- gcc.orig/config/i386/i386.md
+++ gcc/config/i386/i386.md
@@ -17211,7 +17211,6 @@
(unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
UNSPEC_FIST))]
"TARGET_USE_FANCY_MATH_387
- && flag_unsafe_math_optimizations
&& !(reload_completed || reload_in_progress)"
"#"
"&& 1"
@@ -17235,8 +17234,7 @@
(unspec:DI [(match_operand:XF 1 "register_operand" "f")]
UNSPEC_FIST))
(clobber (match_scratch:XF 2 "=&1f"))]
- "TARGET_USE_FANCY_MATH_387
- && flag_unsafe_math_optimizations"
+ "TARGET_USE_FANCY_MATH_387"
"* return output_fix_trunc (insn, operands, 0);"
[(set_attr "type" "fpspc")
(set_attr "mode" "DI")])
@@ -17247,8 +17245,7 @@
UNSPEC_FIST))
(clobber (match_operand:DI 2 "memory_operand" "=m,m"))
(clobber (match_scratch:XF 3 "=&1f,&1f"))]
- "TARGET_USE_FANCY_MATH_387
- && flag_unsafe_math_optimizations"
+ "TARGET_USE_FANCY_MATH_387"
"#"
[(set_attr "type" "fpspc")
(set_attr "mode" "DI")])
@@ -17281,7 +17278,6 @@
(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
UNSPEC_FIST))]
"TARGET_USE_FANCY_MATH_387
- && flag_unsafe_math_optimizations
&& !(reload_completed || reload_in_progress)"
"#"
"&& 1"
@@ -17299,8 +17295,7 @@
[(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
UNSPEC_FIST))]
- "TARGET_USE_FANCY_MATH_387
- && flag_unsafe_math_optimizations"
+ "TARGET_USE_FANCY_MATH_387"
"* return output_fix_trunc (insn, operands, 0);"
[(set_attr "type" "fpspc")
(set_attr "mode" "<MODE>")])
@@ -17310,8 +17305,7 @@
(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
UNSPEC_FIST))
(clobber (match_operand:X87MODEI12 2 "memory_operand" "=m"))]
- "TARGET_USE_FANCY_MATH_387
- && flag_unsafe_math_optimizations"
+ "TARGET_USE_FANCY_MATH_387"
"#"
[(set_attr "type" "fpspc")
(set_attr "mode" "<MODE>")])
@@ -17337,13 +17331,25 @@
UNSPEC_FIST))]
"")
-(define_expand "lrint<mode>2"
+(define_expand "lrintxf<mode>2"
[(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
- (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
- UNSPEC_FIST))]
- "TARGET_USE_FANCY_MATH_387
- && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations"
+ (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST))]
+ "TARGET_USE_FANCY_MATH_387"
+ "")
+
+(define_expand "lrint<mode>di2"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (unspec:DI [(match_operand:SSEMODEF 1 "register_operand" "")]
+ UNSPEC_FIX_NOTRUNC))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT"
+ "")
+
+(define_expand "lrint<mode>si2"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (unspec:SI [(match_operand:SSEMODEF 1 "register_operand" "")]
+ UNSPEC_FIX_NOTRUNC))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
"")
;; Rounding mode control word calculation could clobber FLAGS_REG.
Index: gcc/config/i386/sse.md
===================================================================
--- gcc.orig/config/i386/sse.md
+++ gcc/config/i386/sse.md
@@ -974,6 +974,16 @@
(set_attr "athlon_decode" "double,vector")
(set_attr "mode" "SI")])
+(define_insn "sse_cvtss2si_2"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE"
+ "cvtss2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "mode" "SI")])
+
(define_insn "sse_cvtss2siq"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(unspec:DI
@@ -987,6 +997,16 @@
(set_attr "athlon_decode" "double,vector")
(set_attr "mode" "DI")])
+(define_insn "sse_cvtss2siq_2"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvtss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "mode" "DI")])
+
(define_insn "sse_cvttss2si"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(fix:SI
@@ -1932,6 +1952,16 @@
(set_attr "athlon_decode" "double,vector")
(set_attr "mode" "SI")])
+(define_insn "sse2_cvtsd2si_2"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2"
+ "cvtsd2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "mode" "SI")])
+
(define_insn "sse2_cvtsd2siq"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(unspec:DI
@@ -1945,6 +1975,16 @@
(set_attr "athlon_decode" "double,vector")
(set_attr "mode" "DI")])
+(define_insn "sse2_cvtsd2siq_2"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvtsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "mode" "DI")])
+
(define_insn "sse2_cvttsd2si"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(fix:SI
Index: gcc/doc/md.texi
===================================================================
--- gcc.orig/doc/md.texi
+++ gcc/doc/md.texi
@@ -3682,6 +3682,12 @@ corresponds to the C data type @code{dou
built-in function uses the mode which corresponds to the C data
type @code{float}.
+@cindex @code{lrint@var{m}@var{n}2}
+@item @samp{lrint@var{m}@var{n}2}
+Convert operand 1 (valid for floating point mode @var{m}) to fixed
+point mode @var{n} as a signed number according to the current
+rounding mode and store in operand 0 (which has mode @var{n}).
+
@cindex @code{copysign@var{m}3} instruction pattern
@item @samp{copysign@var{m}3}
Store a value with the magnitude of operand 1 and the sign of operand
Index: gcc/genopinit.c
===================================================================
--- gcc.orig/genopinit.c
+++ gcc/genopinit.c
@@ -127,7 +127,7 @@ static const char * const optabs[] =
"btrunc_optab->handlers[$A].insn_code = CODE_FOR_$(btrunc$a2$)",
"nearbyint_optab->handlers[$A].insn_code = CODE_FOR_$(nearbyint$a2$)",
"rint_optab->handlers[$A].insn_code = CODE_FOR_$(rint$a2$)",
- "lrint_optab->handlers[$A].insn_code = CODE_FOR_$(lrint$a2$)",
+ "lrint_optab->handlers[$B][$A].insn_code = CODE_FOR_$(lrint$F$a$I$b2$)",
"sincos_optab->handlers[$A].insn_code = CODE_FOR_$(sincos$a3$)",
"sin_optab->handlers[$A].insn_code = CODE_FOR_$(sin$a2$)",
"asin_optab->handlers[$A].insn_code = CODE_FOR_$(asin$a2$)",
Index: gcc/optabs.c
===================================================================
--- gcc.orig/optabs.c
+++ gcc/optabs.c
@@ -4861,6 +4861,46 @@ expand_fix (rtx to, rtx from, int unsign
convert_move (to, target, 0);
}
}
+
+/* Generate code to convert FROM to fixed point and store in TO. FROM
+ must be floating point, TO must be signed. Use the conversion optab
+ TAB to do the conversion. */
+
+bool
+expand_sfix_optab (rtx to, rtx from, convert_optab tab)
+{
+ enum insn_code icode;
+ rtx target = to;
+ enum machine_mode fmode, imode;
+
+ /* We first try to find a pair of modes, one real and one integer, at
+ least as wide as FROM and TO, respectively, in which we can open-code
+ this conversion. If the integer mode is wider than the mode of TO,
+ we can do the conversion either signed or unsigned. */
+
+ for (fmode = GET_MODE (from); fmode != VOIDmode;
+ fmode = GET_MODE_WIDER_MODE (fmode))
+ for (imode = GET_MODE (to); imode != VOIDmode;
+ imode = GET_MODE_WIDER_MODE (imode))
+ {
+ icode = tab->handlers[imode][fmode].insn_code;
+ if (icode != CODE_FOR_nothing)
+ {
+ if (fmode != GET_MODE (from))
+ from = convert_to_mode (fmode, from, 0);
+
+ if (imode != GET_MODE (to))
+ target = gen_reg_rtx (imode);
+
+ emit_unop_insn (icode, target, from, UNKNOWN);
+ if (target != to)
+ convert_move (to, target, 0);
+ return true;
+ }
+ }
+
+ return false;
+}
/* Report whether we have an instruction to perform the operation
specified by CODE on operands of mode MODE. */
@@ -5266,7 +5306,6 @@ init_optabs (void)
btrunc_optab = init_optab (UNKNOWN);
nearbyint_optab = init_optab (UNKNOWN);
rint_optab = init_optab (UNKNOWN);
- lrint_optab = init_optab (UNKNOWN);
sincos_optab = init_optab (UNKNOWN);
sin_optab = init_optab (UNKNOWN);
asin_optab = init_optab (UNKNOWN);
@@ -5325,6 +5364,7 @@ init_optabs (void)
ufixtrunc_optab = init_convert_optab (UNKNOWN);
sfloat_optab = init_convert_optab (FLOAT);
ufloat_optab = init_convert_optab (UNSIGNED_FLOAT);
+ lrint_optab = init_convert_optab (UNKNOWN);
for (i = 0; i < NUM_MACHINE_MODES; i++)
{
@@ -5444,6 +5484,8 @@ init_optabs (void)
MODE_DECIMAL_FLOAT, MODE_INT);
init_interclass_conv_libfuncs (ufloat_optab, "floatuns",
MODE_INT, MODE_DECIMAL_FLOAT);
+ init_interclass_conv_libfuncs (lrint_optab, "lrint",
+ MODE_INT, MODE_FLOAT);
/* sext_optab is also used for FLOAT_EXTEND. */
init_intraclass_conv_libfuncs (sext_optab, "extend", MODE_FLOAT, true);
Index: gcc/optabs.h
===================================================================
--- gcc.orig/optabs.h
+++ gcc/optabs.h
@@ -196,7 +196,6 @@ enum optab_index
OTI_round,
OTI_nearbyint,
OTI_rint,
- OTI_lrint,
/* Tangent */
OTI_tan,
/* Inverse tangent */
@@ -345,7 +344,6 @@ extern GTY(()) optab optab_table[OTI_MAX
#define round_optab (optab_table[OTI_round])
#define nearbyint_optab (optab_table[OTI_nearbyint])
#define rint_optab (optab_table[OTI_rint])
-#define lrint_optab (optab_table[OTI_lrint])
#define tan_optab (optab_table[OTI_tan])
#define atan_optab (optab_table[OTI_atan])
#define copysign_optab (optab_table[OTI_copysign])
@@ -407,6 +405,8 @@ enum convert_optab_index
COI_sfloat,
COI_ufloat,
+ COI_lrint,
+
COI_MAX
};
@@ -421,6 +421,7 @@ extern GTY(()) convert_optab convert_opt
#define ufixtrunc_optab (convert_optab_table[COI_ufixtrunc])
#define sfloat_optab (convert_optab_table[COI_sfloat])
#define ufloat_optab (convert_optab_table[COI_ufloat])
+#define lrint_optab (convert_optab_table[COI_lrint])
/* These arrays record the insn_code of insns that may be needed to
perform input and output reloads of special objects. They provide a
@@ -597,6 +598,9 @@ extern void expand_float (rtx, rtx, int)
/* Generate code for a FIX_EXPR. */
extern void expand_fix (rtx, rtx, int);
+/* Generate code for float to integral conversion. */
+extern bool expand_sfix_optab (rtx, rtx, convert_optab);
+
/* Return tree if target supports vector operations for COND_EXPR. */
bool expand_vec_cond_expr_p (tree, enum machine_mode);
Index: gcc/testsuite/gcc.target/i386/math-torture/lrint.c
===================================================================
--- /dev/null
+++ gcc/testsuite/gcc.target/i386/math-torture/lrint.c
@@ -0,0 +1,26 @@
+/* { dg-do assemble } */
+
+long testlf (float x)
+{
+ return __builtin_lrintf (x);
+}
+long testl (double x)
+{
+ return __builtin_lrint (x);
+}
+long testll (long double x)
+{
+ return __builtin_lrintl (x);
+}
+long long testllf (float x)
+{
+ return __builtin_llrintf (x);
+}
+long long testll_ (double x)
+{
+ return __builtin_llrint (x);
+}
+long long testlll (long double x)
+{
+ return __builtin_llrintl (x);
+}
Index: gcc/testsuite/gcc.target/i386/math-torture/math-torture.exp
===================================================================
--- /dev/null
+++ gcc/testsuite/gcc.target/i386/math-torture/math-torture.exp
@@ -0,0 +1,27 @@
+# This harness is for tests that should be run at all optimisation levels.
+
+set TORTURE_OPTIONS [list \
+ { -O0 } \
+ { -O0 -msse -mno-sse2 -mfpmath=sse } \
+ { -O0 -msse -msse2 -mfpmath=sse } \
+ { -O0 -msse -mno-sse2 -mfpmath=sse,387 } \
+ { -O0 -msse -msse2 -mfpmath=sse,387 } \
+ { -O0 -msse -mno-sse2 -mfpmath=sse -ffast-math } \
+ { -O0 -msse -msse2 -mfpmath=sse -ffast-math } \
+ { -O0 -msse -mno-sse2 -mfpmath=sse,387 -ffast-math } \
+ { -O0 -msse -msse2 -mfpmath=sse,387 -ffast-math } \
+ { -O2 } \
+ { -O2 -msse -mno-sse2 -mfpmath=sse } \
+ { -O2 -msse -msse2 -mfpmath=sse } \
+ { -O2 -msse -mno-sse2 -mfpmath=sse,387 } \
+ { -O2 -msse -msse2 -mfpmath=sse,387 } \
+ { -O2 -msse -mno-sse2 -mfpmath=sse -ffast-math } \
+ { -O2 -msse -msse2 -mfpmath=sse -ffast-math } \
+ { -O2 -msse -mno-sse2 -mfpmath=sse,387 -ffast-math } \
+ { -O2 -msse -msse2 -mfpmath=sse,387 -ffast-math } \
+]
+load_lib gcc-dg.exp
+
+dg-init
+gcc-dg-runtest [lsort [glob $srcdir/$subdir/*.c]] ""
+dg-finish