This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH]: Expand finite() as inline i386 asm
Richard Guenther wrote:
Regarding the implementation, by representing "finite" optab as a
conversion optab, existing infrastructure that handles conversion
optabs could be used with minimum middle-end changes.
A conversion optab has a non-neglible size overhead and in this case
we only have one target mode (SImode), so I'd rather have you use
a regular unoptab.
Regular unoptab is _not_ appropriate for intermode optabs like this:
(define_expand "isinf<mode>2"
[(use (match_operand:SI 0 "register_operand" ""))
(use (match_operand:X87MODEF 1 "register_operand" ""))]
1) This optab must expand in SImode. If we try to expand it in float
mode, we crash in emit_move_insn that tries to move SImode output
argument into DFmode target register. If SImode expansion is necessary,
we are stuck with "isinfsi2" name - and we can't use macros as this
would produce 3 functions with the same name.
2) It is possible to use only XFmode input argument and have
expand_unop() expand input argument to XFmode. However, following code
will produce quite subptimal asm due to copying of input argument (a)
from DFmode to XFmode:
if (__builtin_isinf (a))
return a;
else
return log(a);
gcc -O2:
test:
pushl %ebp
movl %esp, %ebp
fldl 8(%ebp)
fld %st(0)
fxam
fnstsw %ax
fstp %st(0)
andb $69, %ah
shrl $8, %eax
cmpb $5, %al
jne .L6
popl %ebp
ret
.p2align 4,,7
.L6:
fstp %st(0)
popl %ebp
jmp sin
(also, the pattern can't be selectivelly disabled for SSE and SSE2 -
just like ilogbsi2 pattern ATM).
It could be
argued, that function named "expand_builtin_int_roundingfn_2" is not
exactly the best name for the function that also handles finite (and
eventually ilogb) optab - perhaps it could be renamed to a better name
like "expand_builtin_intermodefn".
Just don't use that function and implement expand_builtin_classify ().
And this function will call expand_sfix_optab() or something very
similar due to intermode optab...
I think the testcase will fail on targets not providing a library
fallback
for finite{,f,l} as it seems to conform to BSD (the C99 variant is called
isfinite ()).
Hm, indeed. Attached (proto-) patch thus implements C99
__builtin_isfinite() for x87 math. It should be noted, that this
intrinsic function is 25% faster than the call to library function...
Please also see optimizers in action around cmp/jmp insns.
Uros.
Index: optabs.c
===================================================================
--- optabs.c (revision 121254)
+++ optabs.c (working copy)
@@ -5339,6 +5339,8 @@
le_optab = init_optab (LE);
unord_optab = init_optab (UNORDERED);
+ isinf_optab = init_optab (UNKNOWN);
+
neg_optab = init_optab (NEG);
negv_optab = init_optabv (NEG);
abs_optab = init_optab (ABS);
Index: optabs.h
===================================================================
--- optabs.h (revision 121254)
+++ optabs.h (working copy)
@@ -220,6 +220,9 @@
OTI_le,
OTI_unord,
+ /* Floating point classification optabs */
+ OTI_isinf,
+
/* String length */
OTI_strlen,
@@ -382,6 +385,8 @@
#define le_optab (optab_table[OTI_le])
#define unord_optab (optab_table[OTI_unord])
+#define isinf_optab (optab_table[OTI_isinf])
+
#define strlen_optab (optab_table[OTI_strlen])
#define cbranch_optab (optab_table[OTI_cbranch])
Index: genopinit.c
===================================================================
--- genopinit.c (revision 121254)
+++ genopinit.c (working copy)
@@ -118,6 +118,7 @@
abs_optab->handlers[$A].insn_code = CODE_FOR_$(abs$F$a2$)",
"absv_optab->handlers[$A].insn_code = CODE_FOR_$(absv$I$a2$)",
"copysign_optab->handlers[$A].insn_code = CODE_FOR_$(copysign$F$a3$)",
+ "isinf_optab->handlers[$A].insn_code = CODE_FOR_$(isinf$a2$)",
"sqrt_optab->handlers[$A].insn_code = CODE_FOR_$(sqrt$a2$)",
"floor_optab->handlers[$A].insn_code = CODE_FOR_$(floor$a2$)",
"lfloor_optab->handlers[$B][$A].insn_code = CODE_FOR_$(lfloor$F$a$I$b2$)",
Index: builtins.c
===================================================================
--- builtins.c (revision 121254)
+++ builtins.c (working copy)
@@ -94,6 +94,7 @@
static rtx expand_builtin_mathfn (tree, rtx, rtx);
static rtx expand_builtin_mathfn_2 (tree, rtx, rtx);
static rtx expand_builtin_mathfn_3 (tree, rtx, rtx);
+static rtx expand_builtin_classify (tree, rtx, rtx);
static rtx expand_builtin_sincos (tree);
static rtx expand_builtin_cexpi (tree, rtx, rtx);
static rtx expand_builtin_int_roundingfn (tree, rtx, rtx);
@@ -1674,6 +1675,7 @@
CASE_MATHFN (BUILT_IN_HYPOT)
CASE_MATHFN (BUILT_IN_ILOGB)
CASE_MATHFN (BUILT_IN_INF)
+ CASE_MATHFN (BUILT_IN_ISINF)
CASE_MATHFN (BUILT_IN_J0)
CASE_MATHFN (BUILT_IN_J1)
CASE_MATHFN (BUILT_IN_JN)
@@ -2169,6 +2171,78 @@
return target;
}
+static rtx
+expand_builtin_classify (tree exp, rtx target, rtx subtarget)
+{
+ optab builtin_optab;
+ rtx op0, insns, before_call;
+ tree fndecl = get_callee_fndecl (exp);
+ tree arglist = TREE_OPERAND (exp, 1);
+ enum machine_mode mode;
+ tree arg, narg;
+
+ if (!validate_arglist (arglist, REAL_TYPE, VOID_TYPE))
+ return 0;
+
+ arg = TREE_VALUE (arglist);
+
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ CASE_FLT_FN (BUILT_IN_ISINF):
+ builtin_optab = isinf_optab; break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Make a suitable register to place result in. */
+ mode = TYPE_MODE (TREE_TYPE (exp));
+
+ /* Before working hard, check whether the instruction is available. */
+ if (builtin_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
+ {
+ target = gen_reg_rtx (mode);
+
+ /* Wrap the computation of the argument in a SAVE_EXPR, as we may
+ need to expand the argument again. This way, we will not perform
+ side-effects more the once. */
+ narg = builtin_save_expr (arg);
+ if (narg != arg)
+ {
+ arg = narg;
+ arglist = build_tree_list (NULL_TREE, arg);
+ exp = build_function_call_expr (fndecl, arglist);
+ }
+
+ op0 = expand_expr (arg, subtarget, VOIDmode, 0);
+
+ start_sequence ();
+
+ /* Compute into TARGET.
+ Set TARGET to wherever the result comes back. */
+ target = expand_unop (mode, builtin_optab, op0, target, 0);
+
+ if (target != 0)
+ {
+ /* Output the entire sequence. */
+ insns = get_insns ();
+ end_sequence ();
+ emit_insn (insns);
+ return target;
+ }
+
+ /* If we were unable to expand via the builtin, stop the sequence
+ (without outputting the insns) and call to the library function
+ with the stabilized argument list. */
+ end_sequence ();
+ }
+
+ before_call = get_last_insn ();
+
+ target = expand_call (exp, target, target == const0_rtx);
+
+ return target;
+}
+
/* Expand a call to the builtin sincos math function.
Return 0 if a normal call should be emitted rather than expanding the
function in-line. EXP is the expression that is a call to the builtin
@@ -5826,6 +5900,12 @@
return target;
break;
+ CASE_FLT_FN (BUILT_IN_ISINF):
+ target = expand_builtin_classify (exp, target, subtarget);
+ if (target)
+ return target;
+ break;
+
CASE_FLT_FN (BUILT_IN_LCEIL):
CASE_FLT_FN (BUILT_IN_LLCEIL):
CASE_FLT_FN (BUILT_IN_LFLOOR):
Index: reg-stack.c
===================================================================
--- reg-stack.c (revision 121254)
+++ reg-stack.c (working copy)
@@ -1629,6 +1629,25 @@
replace_reg (src1, FIRST_STACK_REG);
break;
+ case UNSPEC_FXAM:
+
+ src1 = get_true_reg (&XVECEXP (pat_src, 0, 0));
+ emit_swap_insn (insn, regstack, *src1);
+
+ src1_note = find_regno_note (insn, REG_DEAD, REGNO (*src1));
+
+ replace_reg (src1, FIRST_STACK_REG);
+
+ if (src1_note)
+ {
+ remove_regno_note (insn, REG_DEAD,
+ REGNO (XEXP (src1_note, 0)));
+ emit_pop_insn (insn, regstack, XEXP (src1_note, 0),
+ EMIT_AFTER);
+ }
+
+ break;
+
case UNSPEC_SIN:
case UNSPEC_COS:
case UNSPEC_FRNDINT:
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 121254)
+++ config/i386/i386.md (working copy)
@@ -122,6 +122,7 @@
(UNSPEC_FIST 66)
(UNSPEC_F2XM1 67)
(UNSPEC_TAN 68)
+ (UNSPEC_FXAM 69)
; x87 Rounding
(UNSPEC_FRNDINT_FLOOR 70)
@@ -17584,6 +17585,42 @@
DONE;
})
+(define_insn "fxamxf2_i387"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=a")
+ (unspec:HI
+ [(match_operand:DF 1 "register_operand" "f")]
+ UNSPEC_FXAM))]
+ "TARGET_USE_FANCY_MATH_387"
+ "fxam\n\tfnstsw\t%0"
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set_attr "mode" "XF")])
+
+(define_expand "isinfsi2"
+ [(use (match_operand:SI 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
+{
+ rtx mask = GEN_INT (0x45);
+ rtx val = GEN_INT (0x05);
+
+ rtx cond;
+
+ rtx scratch = gen_reg_rtx (HImode);
+ rtx res = gen_reg_rtx (QImode);
+
+ emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, mask));
+ emit_insn (gen_cmpqi_ext_3 (scratch, val));
+ cond = gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG (CCmode, FLAGS_REG),
+ const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, res, cond));
+ emit_insn (gen_zero_extendqisi2 (operands[0], res));
+ DONE;
+})
+
;; Block operation instructions