This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Expand lfloor and lceil inline as SSE on x86_64/i?86
- From: Richard Guenther <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 23 Aug 2006 14:20:52 +0200 (CEST)
- Subject: [PATCH] Expand lfloor and lceil inline as SSE on x86_64/i?86
This patch in the series of expanding rounding functions inline on x86_64
attacks lfloor and lceil, thereby converting those to conversion optabs
and adding expanders that target SSE math.
Bootstrapped and tested on x86_64-unknown-linux-gnu, tested on
i686-pc-linux-gnu.
Ok for 4.3?
Thanks,
Richard.
:ADDPATCH middle-end,i386:
2006-08-23 Richard Guenther <rguenther@suse.de>
* genopinit.c (optabs): Change lfloor_optab and lceil_optab
to conversion optabs.
* optabs.c (init_optabs): Initialize lfloor_optab and lceil_optab
as conversion optab.
* optabs.h (enum optab_index): Remove OTI_lfloor and OTI_lceil.
(enum convert_optab_index): Add COI_lfloor and COI_lceil.
(lfloor_optab, lceil_optab): Adjust defines.
* builtins.c (expand_builtin_int_roundingfn): Adjust for
lfloor and lceil optabs now being conversion optabs.
* config/i386/i386-protos.h (ix86_expand_lfloorceil): Declare.
* config/i386/i386.c (ix86_expand_sse_compare_and_jump):
New static helper function.
(ix86_expand_lfloorceil): New function to expand lfloor and
lceil inline.
* config/i386/i386.md (lfloor<mode>2): Split into ...
(lfloorxf<mode>2): ... x87 variant
(lfloor<mode>di2, lfloor<mode>si2): ... and SSE variants
using ix86_expand_lfloorceil.
(lceil<mode>2, lceilxf<mode>2, lceil<mode>di2, lceil<mode>si2):
Likewise.
* doc/md.texi (lfloorMN, lceilMN): Document.
* gcc.target/i386/math-torture/lfloor.c: New testcase.
* gcc.target/i386/math-torture/lceil.c: Likewise.
Index: gcc/builtins.c
===================================================================
--- gcc.orig/builtins.c
+++ gcc/builtins.c
@@ -2235,7 +2235,7 @@ expand_builtin_sincos (tree exp)
static rtx
expand_builtin_int_roundingfn (tree exp, rtx target, rtx subtarget)
{
- optab builtin_optab;
+ convert_optab builtin_optab;
rtx op0, insns, tmp;
tree fndecl = get_callee_fndecl (exp);
tree arglist = TREE_OPERAND (exp, 1);
@@ -2270,44 +2270,37 @@ expand_builtin_int_roundingfn (tree exp,
/* Make a suitable register to place result in. */
mode = TYPE_MODE (TREE_TYPE (exp));
- /* Before working hard, check whether the instruction is available. */
- if (builtin_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
- {
- target = gen_reg_rtx (mode);
-
- /* Wrap the computation of the argument in a SAVE_EXPR, as we may
- need to expand the argument again. This way, we will not perform
- side-effects more the once. */
- narg = builtin_save_expr (arg);
- if (narg != arg)
- {
- arg = narg;
- arglist = build_tree_list (NULL_TREE, arg);
- exp = build_function_call_expr (fndecl, arglist);
- }
-
- op0 = expand_expr (arg, subtarget, VOIDmode, 0);
+ target = gen_reg_rtx (mode);
- start_sequence ();
+ /* Wrap the computation of the argument in a SAVE_EXPR, as we may
+ need to expand the argument again. This way, we will not perform
+ side-effects more the once. */
+ narg = builtin_save_expr (arg);
+ if (narg != arg)
+ {
+ arg = narg;
+ arglist = build_tree_list (NULL_TREE, arg);
+ exp = build_function_call_expr (fndecl, arglist);
+ }
- /* Compute into TARGET.
- Set TARGET to wherever the result comes back. */
- target = expand_unop (mode, builtin_optab, op0, target, 0);
+ op0 = expand_expr (arg, subtarget, VOIDmode, 0);
- if (target != 0)
- {
- /* Output the entire sequence. */
- insns = get_insns ();
- end_sequence ();
- emit_insn (insns);
- return target;
- }
+ start_sequence ();
- /* If we were unable to expand via the builtin, stop the sequence
- (without outputting the insns). */
+ /* Compute into TARGET. */
+ if (expand_sfix_optab (target, op0, builtin_optab))
+ {
+ /* Output the entire sequence. */
+ insns = get_insns ();
end_sequence ();
+ emit_insn (insns);
+ return target;
}
+ /* If we were unable to expand via the builtin, stop the sequence
+ (without outputting the insns). */
+ end_sequence ();
+
/* Fall back to floating point rounding optab. */
fallback_fndecl = mathfn_built_in (TREE_TYPE (arg), fallback_fn);
/* We shouldn't get here on targets without TARGET_C99_FUNCTIONS.
Index: gcc/config/i386/i386-protos.h
===================================================================
--- gcc.orig/config/i386/i386-protos.h
+++ gcc/config/i386/i386-protos.h
@@ -158,6 +158,7 @@ extern void ix86_emit_i387_log1p (rtx, r
extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
extern void ix86_expand_lround (rtx, rtx);
+extern void ix86_expand_lfloorceil (rtx, rtx, bool);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
Index: gcc/config/i386/i386.c
===================================================================
--- gcc.orig/config/i386/i386.c
+++ gcc/config/i386/i386.c
@@ -18999,6 +18999,36 @@ ix86_sse_copysign_to_positive (rtx resul
gen_rtx_IOR (mode, abs_value, sgn)));
}
+/* Expands a comparison of OP0 with OP1 using comparison code CODE,
+ swapping the operands if SWAP_OPERANDS is true. The expanded
+ code is a forward jump to a newly created label in case the
+ comparison is true. The generated label rtx is returned. */
+static rtx
+ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
+ bool swap_operands)
+{
+ rtx label, tmp;
+
+ if (swap_operands)
+ {
+ tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ }
+
+ label = gen_label_rtx ();
+ tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_COMPARE (CCFPUmode, op0, op1)));
+ tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
+ tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ JUMP_LABEL (tmp) = label;
+
+ return label;
+}
+
/* Expand SSE sequence for computing lround from OP1 storing
into OP0. */
void
@@ -19022,4 +19052,42 @@ ix86_expand_lround (rtx op0, rtx op1)
expand_fix (op0, adj, 0);
}
+/* Expand SSE2 sequence for computing lround from OPERAND1 storing
+ into OPERAND0. */
+void
+ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
+{
+ /* C code for the stuff we're doing below (for do_floor):
+ xi = (long)op1;
+ dx = (double)xi - op1;
+ xi -= dx > 0 ? 1 : 0;
+ return xi;
+ */
+ enum machine_mode fmode = GET_MODE (op1);
+ enum machine_mode imode = GET_MODE (op0);
+ rtx ireg, freg, label, zero;
+
+ /* reg = (long)op1 */
+ ireg = gen_reg_rtx (imode);
+ expand_fix (ireg, op1, 0);
+
+ /* freg = (double)reg */
+ freg = gen_reg_rtx (fmode);
+ expand_float (freg, ireg, 0);
+
+ /* freg = freg - op1 */
+ expand_simple_binop (fmode, MINUS, freg, op1, freg, 0, OPTAB_DIRECT);
+
+ /* ireg = (freg > 0) ? ireg - 1 : ireg */
+ zero = force_reg (fmode, const_double_from_real_value (dconst0, fmode));
+ label = ix86_expand_sse_compare_and_jump (UNLE,
+ freg, zero, !do_floor);
+ expand_simple_binop (imode, do_floor ? MINUS : PLUS,
+ ireg, const1_rtx, ireg, 0, OPTAB_DIRECT);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ emit_move_insn (op0, ireg);
+}
+
#include "gt-i386.h"
Index: gcc/config/i386/i386.md
===================================================================
--- gcc.orig/config/i386/i386.md
+++ gcc/config/i386/i386.md
@@ -17148,7 +17148,7 @@
(use (match_dup 3))])]
"")
-(define_expand "lfloor<mode>2"
+(define_expand "lfloorxf<mode>2"
[(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
(unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
UNSPEC_FIST_FLOOR))
@@ -17158,6 +17158,24 @@
&& flag_unsafe_math_optimizations"
"")
+(define_expand "lfloor<mode>di2"
+ [(match_operand:DI 0 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF 1 "register_operand" "")]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT"
+{
+ ix86_expand_lfloorceil (operand0, operand1, true);
+ DONE;
+})
+
+(define_expand "lfloor<mode>si2"
+ [(match_operand:SI 0 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF 1 "register_operand" "")]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+{
+ ix86_expand_lfloorceil (operand0, operand1, true);
+ DONE;
+})
+
;; Rounding mode control word calculation could clobber FLAGS_REG.
(define_insn_and_split "frndintxf2_ceil"
[(set (match_operand:XF 0 "register_operand" "=f")
@@ -17389,7 +17407,7 @@
(use (match_dup 3))])]
"")
-(define_expand "lceil<mode>2"
+(define_expand "lceilxf<mode>2"
[(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
(unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
UNSPEC_FIST_CEIL))
@@ -17399,6 +17417,24 @@
&& flag_unsafe_math_optimizations"
"")
+(define_expand "lceil<mode>di2"
+ [(match_operand:DI 0 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF 1 "register_operand" "")]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT"
+{
+ ix86_expand_lfloorceil (operand0, operand1, false);
+ DONE;
+})
+
+(define_expand "lceil<mode>si2"
+ [(match_operand:SI 0 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF 1 "register_operand" "")]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+{
+ ix86_expand_lfloorceil (operand0, operand1, false);
+ DONE;
+})
+
;; Rounding mode control word calculation could clobber FLAGS_REG.
(define_insn_and_split "frndintxf2_trunc"
[(set (match_operand:XF 0 "register_operand" "=f")
Index: gcc/doc/md.texi
===================================================================
--- gcc.orig/doc/md.texi
+++ gcc/doc/md.texi
@@ -3613,6 +3613,18 @@ Convert operand 1 (valid for floating po
point mode @var{n} as a signed number rounding to nearest and away
from zero and store in operand 0 (which has mode @var{n}).
+@cindex @code{lfloor@var{m}@var{n}2}
+@item @samp{lfloor@var{m}2}
+Convert operand 1 (valid for floating point mode @var{m}) to fixed
+point mode @var{n} as a signed number rounding down and store in
+operand 0 (which has mode @var{n}).
+
+@cindex @code{lceil@var{m}@var{n}2}
+@item @samp{lceil@var{m}2}
+Convert operand 1 (valid for floating point mode @var{m}) to fixed
+point mode @var{n} as a signed number rounding up and store in
+operand 0 (which has mode @var{n}).
+
@cindex @code{copysign@var{m}3} instruction pattern
@item @samp{copysign@var{m}3}
Store a value with the magnitude of operand 1 and the sign of operand
Index: gcc/genopinit.c
===================================================================
--- gcc.orig/genopinit.c
+++ gcc/genopinit.c
@@ -120,9 +120,9 @@ static const char * const optabs[] =
"copysign_optab->handlers[$A].insn_code = CODE_FOR_$(copysign$F$a3$)",
"sqrt_optab->handlers[$A].insn_code = CODE_FOR_$(sqrt$a2$)",
"floor_optab->handlers[$A].insn_code = CODE_FOR_$(floor$a2$)",
- "lfloor_optab->handlers[$A].insn_code = CODE_FOR_$(lfloor$a2$)",
+ "lfloor_optab->handlers[$B][$A].insn_code = CODE_FOR_$(lfloor$F$a$I$b2$)",
"ceil_optab->handlers[$A].insn_code = CODE_FOR_$(ceil$a2$)",
- "lceil_optab->handlers[$A].insn_code = CODE_FOR_$(lceil$a2$)",
+ "lceil_optab->handlers[$B][$A].insn_code = CODE_FOR_$(lceil$F$a$I$b2$)",
"round_optab->handlers[$A].insn_code = CODE_FOR_$(round$a2$)",
"btrunc_optab->handlers[$A].insn_code = CODE_FOR_$(btrunc$a2$)",
"nearbyint_optab->handlers[$A].insn_code = CODE_FOR_$(nearbyint$a2$)",
Index: gcc/optabs.c
===================================================================
--- gcc.orig/optabs.c
+++ gcc/optabs.c
@@ -5299,9 +5299,7 @@ init_optabs (void)
parity_optab = init_optab (PARITY);
sqrt_optab = init_optab (SQRT);
floor_optab = init_optab (UNKNOWN);
- lfloor_optab = init_optab (UNKNOWN);
ceil_optab = init_optab (UNKNOWN);
- lceil_optab = init_optab (UNKNOWN);
round_optab = init_optab (UNKNOWN);
btrunc_optab = init_optab (UNKNOWN);
nearbyint_optab = init_optab (UNKNOWN);
@@ -5366,6 +5364,8 @@ init_optabs (void)
ufloat_optab = init_convert_optab (UNSIGNED_FLOAT);
lrint_optab = init_convert_optab (UNKNOWN);
lround_optab = init_convert_optab (UNKNOWN);
+ lfloor_optab = init_convert_optab (UNKNOWN);
+ lceil_optab = init_convert_optab (UNKNOWN);
for (i = 0; i < NUM_MACHINE_MODES; i++)
{
@@ -5489,6 +5489,10 @@ init_optabs (void)
MODE_INT, MODE_FLOAT);
init_interclass_conv_libfuncs (lround_optab, "lround",
MODE_INT, MODE_FLOAT);
+ init_interclass_conv_libfuncs (lfloor_optab, "lfloor",
+ MODE_INT, MODE_FLOAT);
+ init_interclass_conv_libfuncs (lceil_optab, "lceil",
+ MODE_INT, MODE_FLOAT);
/* sext_optab is also used for FLOAT_EXTEND. */
init_intraclass_conv_libfuncs (sext_optab, "extend", MODE_FLOAT, true);
Index: gcc/optabs.h
===================================================================
--- gcc.orig/optabs.h
+++ gcc/optabs.h
@@ -189,9 +189,7 @@ enum optab_index
OTI_log1p,
/* Rounding functions */
OTI_floor,
- OTI_lfloor,
OTI_ceil,
- OTI_lceil,
OTI_btrunc,
OTI_round,
OTI_nearbyint,
@@ -337,9 +335,7 @@ extern GTY(()) optab optab_table[OTI_MAX
#define log2_optab (optab_table[OTI_log2])
#define log1p_optab (optab_table[OTI_log1p])
#define floor_optab (optab_table[OTI_floor])
-#define lfloor_optab (optab_table[OTI_lfloor])
#define ceil_optab (optab_table[OTI_ceil])
-#define lceil_optab (optab_table[OTI_lceil])
#define btrunc_optab (optab_table[OTI_btrunc])
#define round_optab (optab_table[OTI_round])
#define nearbyint_optab (optab_table[OTI_nearbyint])
@@ -407,6 +403,8 @@ enum convert_optab_index
COI_lrint,
COI_lround,
+ COI_lfloor,
+ COI_lceil,
COI_MAX
};
@@ -424,6 +422,8 @@ extern GTY(()) convert_optab convert_opt
#define ufloat_optab (convert_optab_table[COI_ufloat])
#define lrint_optab (convert_optab_table[COI_lrint])
#define lround_optab (convert_optab_table[COI_lround])
+#define lfloor_optab (convert_optab_table[COI_lfloor])
+#define lceil_optab (convert_optab_table[COI_lceil])
/* These arrays record the insn_code of insns that may be needed to
perform input and output reloads of special objects. They provide a
Index: gcc/testsuite/gcc.target/i386/math-torture/lceil.c
===================================================================
--- /dev/null
+++ gcc/testsuite/gcc.target/i386/math-torture/lceil.c
@@ -0,0 +1,26 @@
+/* { dg-do assemble } */
+
+long testlf (float x)
+{
+ return __builtin_lceilf (x);
+}
+long testl (double x)
+{
+ return __builtin_lceil (x);
+}
+long testll (long double x)
+{
+ return __builtin_lceill (x);
+}
+long long testllf (float x)
+{
+ return __builtin_llceilf (x);
+}
+long long testll_ (double x)
+{
+ return __builtin_llceil (x);
+}
+long long testlll (long double x)
+{
+ return __builtin_llceill (x);
+}
Index: gcc/testsuite/gcc.target/i386/math-torture/lfloor.c
===================================================================
--- /dev/null
+++ gcc/testsuite/gcc.target/i386/math-torture/lfloor.c
@@ -0,0 +1,26 @@
+/* { dg-do assemble } */
+
+long testlf (float x)
+{
+ return __builtin_lfloorf (x);
+}
+long testl (double x)
+{
+ return __builtin_lfloor (x);
+}
+long testll (long double x)
+{
+ return __builtin_lfloorl (x);
+}
+long long testllf (float x)
+{
+ return __builtin_llfloorf (x);
+}
+long long testll_ (double x)
+{
+ return __builtin_llfloor (x);
+}
+long long testlll (long double x)
+{
+ return __builtin_llfloorl (x);
+}