This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Re: [PATCH]: Expand finite() as inline i386 asm

From: Uros Bizjak <ubizjak at gmail dot com>
To: Richard Guenther <richard dot guenther at gmail dot com>
Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>, Jan Hubicka <jh at suse dot cz>
Date: Sun, 28 Jan 2007 14:00:49 +0100
Subject: Re: [PATCH]: Expand finite() as inline i386 asm
References: <5787cf470701250731rb6cba27l360293b491b972a1@mail.gmail.com> <84fc9c000701250748g17d5f4f0nf37def879a26ad37@mail.gmail.com>

Richard Guenther wrote:

Regarding the implementation, by representing "finite" optab as a
conversion optab, existing infrastructure that handles conversion
optabs could be used with minimum middle-end changes.

A conversion optab has a non-neglible size overhead and in this case
we only have one target mode (SImode), so I'd rather have you use
a regular unoptab.

Regular unoptab is _not_ appropriate for intermode optabs like this:

(define_expand "isinf<mode>2"
 [(use (match_operand:SI 0 "register_operand" ""))
  (use (match_operand:X87MODEF 1 "register_operand" ""))]

1) This optab must expand in SImode. If we try to expand it in float mode, we crash in emit_move_insn that tries to move SImode output argument into DFmode target register. If SImode expansion is necessary, we are stuck with "isinfsi2" name - and we can't use macros as this would produce 3 functions with the same name.

2) It is possible to use only XFmode input argument and have expand_unop() expand input argument to XFmode. However, following code will produce quite subptimal asm due to copying of input argument (a) from DFmode to XFmode:

if (__builtin_isinf (a))
 return a;
else
 return log(a);

gcc -O2:

test:
       pushl   %ebp
       movl    %esp, %ebp
       fldl    8(%ebp)
       fld     %st(0)
       fxam
       fnstsw  %ax
       fstp    %st(0)
       andb    $69, %ah
       shrl    $8, %eax
       cmpb    $5, %al
       jne     .L6
       popl    %ebp
       ret
       .p2align 4,,7
.L6:
       fstp    %st(0)
       popl    %ebp
       jmp     sin

(also, the pattern can't be selectivelly disabled for SSE and SSE2 - just like ilogbsi2 pattern ATM).

It could be
argued, that function named "expand_builtin_int_roundingfn_2" is not
exactly the best name for the function that also handles finite (and
eventually ilogb) optab - perhaps it could be renamed to a better name
like "expand_builtin_intermodefn".

Just don't use that function and implement expand_builtin_classify ().

And this function will call expand_sfix_optab() or something very similar due to intermode optab...

I think the testcase will fail on targets not providing a library fallback for finite{,f,l} as it seems to conform to BSD (the C99 variant is called isfinite ()).

Hm, indeed. Attached (proto-) patch thus implements C99 __builtin_isfinite() for x87 math. It should be noted, that this intrinsic function is 25% faster than the call to library function... Please also see optimizers in action around cmp/jmp insns.

Uros.

Index: optabs.c
===================================================================
--- optabs.c	(revision 121254)
+++ optabs.c	(working copy)
@@ -5339,6 +5339,8 @@
   le_optab = init_optab (LE);
   unord_optab = init_optab (UNORDERED);
 
+  isinf_optab = init_optab (UNKNOWN);
+
   neg_optab = init_optab (NEG);
   negv_optab = init_optabv (NEG);
   abs_optab = init_optab (ABS);
Index: optabs.h
===================================================================
--- optabs.h	(revision 121254)
+++ optabs.h	(working copy)
@@ -220,6 +220,9 @@
   OTI_le,
   OTI_unord,
 
+  /* Floating point classification optabs */
+  OTI_isinf,
+
   /* String length */
   OTI_strlen,
 
@@ -382,6 +385,8 @@
 #define le_optab (optab_table[OTI_le])
 #define unord_optab (optab_table[OTI_unord])
 
+#define isinf_optab (optab_table[OTI_isinf])
+
 #define strlen_optab (optab_table[OTI_strlen])
 
 #define cbranch_optab (optab_table[OTI_cbranch])
Index: genopinit.c
===================================================================
--- genopinit.c	(revision 121254)
+++ genopinit.c	(working copy)
@@ -118,6 +118,7 @@
     abs_optab->handlers[$A].insn_code = CODE_FOR_$(abs$F$a2$)",
   "absv_optab->handlers[$A].insn_code = CODE_FOR_$(absv$I$a2$)",
   "copysign_optab->handlers[$A].insn_code = CODE_FOR_$(copysign$F$a3$)",
+  "isinf_optab->handlers[$A].insn_code = CODE_FOR_$(isinf$a2$)",
   "sqrt_optab->handlers[$A].insn_code = CODE_FOR_$(sqrt$a2$)",
   "floor_optab->handlers[$A].insn_code = CODE_FOR_$(floor$a2$)",
   "lfloor_optab->handlers[$B][$A].insn_code = CODE_FOR_$(lfloor$F$a$I$b2$)",
Index: builtins.c
===================================================================
--- builtins.c	(revision 121254)
+++ builtins.c	(working copy)
@@ -94,6 +94,7 @@
 static rtx expand_builtin_mathfn (tree, rtx, rtx);
 static rtx expand_builtin_mathfn_2 (tree, rtx, rtx);
 static rtx expand_builtin_mathfn_3 (tree, rtx, rtx);
+static rtx expand_builtin_classify (tree, rtx, rtx);
 static rtx expand_builtin_sincos (tree);
 static rtx expand_builtin_cexpi (tree, rtx, rtx);
 static rtx expand_builtin_int_roundingfn (tree, rtx, rtx);
@@ -1674,6 +1675,7 @@
       CASE_MATHFN (BUILT_IN_HYPOT)
       CASE_MATHFN (BUILT_IN_ILOGB)
       CASE_MATHFN (BUILT_IN_INF)
+      CASE_MATHFN (BUILT_IN_ISINF)
       CASE_MATHFN (BUILT_IN_J0)
       CASE_MATHFN (BUILT_IN_J1)
       CASE_MATHFN (BUILT_IN_JN)
@@ -2169,6 +2171,78 @@
   return target;
 }
 
+static rtx
+expand_builtin_classify (tree exp, rtx target, rtx subtarget)
+{
+  optab builtin_optab;
+  rtx op0, insns, before_call;
+  tree fndecl = get_callee_fndecl (exp);
+  tree arglist = TREE_OPERAND (exp, 1);
+  enum machine_mode mode;
+  tree arg, narg;
+
+  if (!validate_arglist (arglist, REAL_TYPE, VOID_TYPE))
+    return 0;
+
+  arg = TREE_VALUE (arglist);
+
+  switch (DECL_FUNCTION_CODE (fndecl))
+    {
+    CASE_FLT_FN (BUILT_IN_ISINF):
+      builtin_optab = isinf_optab; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Make a suitable register to place result in.  */
+  mode = TYPE_MODE (TREE_TYPE (exp));
+
+  /* Before working hard, check whether the instruction is available.  */
+  if (builtin_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
+    {
+      target = gen_reg_rtx (mode);
+
+      /* Wrap the computation of the argument in a SAVE_EXPR, as we may
+	 need to expand the argument again.  This way, we will not perform
+	 side-effects more the once.  */
+      narg = builtin_save_expr (arg);
+      if (narg != arg)
+	{
+	  arg = narg;
+	  arglist = build_tree_list (NULL_TREE, arg);
+	  exp = build_function_call_expr (fndecl, arglist);
+	}
+
+      op0 = expand_expr (arg, subtarget, VOIDmode, 0);
+
+      start_sequence ();
+
+      /* Compute into TARGET.
+	 Set TARGET to wherever the result comes back.  */
+      target = expand_unop (mode, builtin_optab, op0, target, 0);
+
+      if (target != 0)
+	{
+	  /* Output the entire sequence.  */
+	  insns = get_insns ();
+	  end_sequence ();
+	  emit_insn (insns);
+	  return target;
+	}
+
+      /* If we were unable to expand via the builtin, stop the sequence
+	 (without outputting the insns) and call to the library function
+	 with the stabilized argument list.  */
+      end_sequence ();
+    }
+
+  before_call = get_last_insn ();
+
+  target = expand_call (exp, target, target == const0_rtx);
+
+  return target;
+}
+
 /* Expand a call to the builtin sincos math function.
    Return 0 if a normal call should be emitted rather than expanding the
    function in-line.  EXP is the expression that is a call to the builtin
@@ -5826,6 +5900,12 @@
 	return target;
       break;
 
+    CASE_FLT_FN (BUILT_IN_ISINF):
+      target = expand_builtin_classify (exp, target, subtarget);
+      if (target)
+	return target;
+      break;
+
     CASE_FLT_FN (BUILT_IN_LCEIL):
     CASE_FLT_FN (BUILT_IN_LLCEIL):
     CASE_FLT_FN (BUILT_IN_LFLOOR):
Index: reg-stack.c
===================================================================
--- reg-stack.c	(revision 121254)
+++ reg-stack.c	(working copy)
@@ -1629,6 +1629,25 @@
 		replace_reg (src1, FIRST_STACK_REG);
 		break;
 
+	      case UNSPEC_FXAM:
+
+		src1 = get_true_reg (&XVECEXP (pat_src, 0, 0));
+		emit_swap_insn (insn, regstack, *src1);
+
+		src1_note = find_regno_note (insn, REG_DEAD, REGNO (*src1));
+
+		replace_reg (src1, FIRST_STACK_REG);
+
+		if (src1_note)
+		  {
+		    remove_regno_note (insn, REG_DEAD,
+				       REGNO (XEXP (src1_note, 0)));
+		    emit_pop_insn (insn, regstack, XEXP (src1_note, 0),
+				   EMIT_AFTER);
+		  }
+
+		break;
+
 	      case UNSPEC_SIN:
 	      case UNSPEC_COS:
 	      case UNSPEC_FRNDINT:
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 121254)
+++ config/i386/i386.md	(working copy)
@@ -122,6 +122,7 @@
    (UNSPEC_FIST			66)
    (UNSPEC_F2XM1		67)
    (UNSPEC_TAN			68)
+   (UNSPEC_FXAM			69)
 
    ; x87 Rounding
    (UNSPEC_FRNDINT_FLOOR	70)
@@ -17584,6 +17585,42 @@
   DONE;
 })
 
+(define_insn "fxamxf2_i387"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=a")
+	(unspec:HI
+	  [(match_operand:DF 1 "register_operand" "f")]
+	  UNSPEC_FXAM))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fxam\n\tfnstsw\t%0"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "XF")])
+
+(define_expand "isinfsi2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
+{
+  rtx mask = GEN_INT (0x45);
+  rtx val = GEN_INT (0x05);
+
+  rtx cond;
+
+  rtx scratch = gen_reg_rtx (HImode);
+  rtx res = gen_reg_rtx (QImode);
+
+  emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
+  emit_insn (gen_andqi_ext_0 (scratch, scratch, mask));
+  emit_insn (gen_cmpqi_ext_3 (scratch, val));
+  cond = gen_rtx_fmt_ee (EQ, QImode,
+			 gen_rtx_REG (CCmode, FLAGS_REG),
+			 const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, res, cond));
+  emit_insn (gen_zero_extendqisi2 (operands[0], res));
+  DONE;
+})
+
 
 ;; Block operation instructions

References:
- [PATCH]: Expand finite() as inline i386 asm
  - From: Uros Bizjak
- Re: [PATCH]: Expand finite() as inline i386 asm
  - From: Richard Guenther

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]