This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[PATCH][4.3] Expand lfloor/lceil inline for x86_64/i?86 SSE math

From: Richard Guenther <rguenther at suse dot de>
To: gcc-patches at gcc dot gnu dot org
Date: Wed, 18 Oct 2006 16:21:51 +0200 (CEST)
Subject: [PATCH][4.3] Expand lfloor/lceil inline for x86_64/i?86 SSE math
This third patch in the series adds expanders for lfloor and lceil.  It
changes the existing optabs from optabs that only take the float mode to
conversion optabs (because in principle we can expand lfloor to SImode
directly - but we don't do at the moment because we don't have a
builtin for that).  It basically avoids hassle with the tricks we play
for the x87 lfloor/lceil expanders.  And it's more correct, of course.

[ We expand lfloor, for example, to
        cvttsd2siq      %xmm0, %rax
        cvtsi2sdq       %rax, %xmm1
        leaq    -1(%rax), %rdx
        ucomisd %xmm0, %xmm1
        cmova   %rdx, %rax
        ret
]

Bootstrapped and tested on {x86_64,i686}-linux-gnu.

Ok for 4.3?  (I'll pause sending the series after this three, four
more to come for expanding rint, floor/ceil, round and trunc)

Thanks,
Richard.

--
Richard Guenther <rguenther@suse.de>
Novell / SUSE Labs

2006-08-23  Richard Guenther  <rguenther@suse.de>

	* genopinit.c (optabs): Change lfloor_optab and lceil_optab
	to conversion optabs.
	* optabs.c (init_optabs): Initialize lfloor_optab and lceil_optab
	as conversion optab.
	* optabs.h (enum optab_index): Remove OTI_lfloor and OTI_lceil.
	(enum convert_optab_index): Add COI_lfloor and COI_lceil.
	(lfloor_optab, lceil_optab): Adjust defines.
	* builtins.c (expand_builtin_int_roundingfn): Adjust for
	lfloor and lceil optabs now being conversion optabs.
	* config/i386/i386-protos.h (ix86_expand_lfloorceil): Declare.
	* config/i386/i386.c (ix86_expand_sse_compare_and_jump):
	New static helper function.
	(ix86_expand_lfloorceil): New function to expand lfloor and
	lceil inline.
	* config/i386/i386.md (lfloor<mode>2): Split into ...
	(lfloorxf<mode>2): ... x87 variant
	(lfloor<mode>di2, lfloor<mode>si2): ... and SSE variants
	using ix86_expand_lfloorceil.
	(lceil<mode>2, lceilxf<mode>2, lceil<mode>di2, lceil<mode>si2):
	Likewise.
	* doc/md.texi (lfloorMN, lceilMN): Document.

	* gcc.target/i386/math-torture/lfloor.c: New testcase.
	* gcc.target/i386/math-torture/lceil.c: Likewise.

Index: gcc/builtins.c
===================================================================
--- gcc.orig/builtins.c
+++ gcc/builtins.c
@@ -2235,7 +2235,7 @@ expand_builtin_sincos (tree exp)
 static rtx
 expand_builtin_int_roundingfn (tree exp, rtx target, rtx subtarget)
 {
-  optab builtin_optab;
+  convert_optab builtin_optab;
   rtx op0, insns, tmp;
   tree fndecl = get_callee_fndecl (exp);
   tree arglist = TREE_OPERAND (exp, 1);
@@ -2270,44 +2270,37 @@ expand_builtin_int_roundingfn (tree exp,
   /* Make a suitable register to place result in.  */
   mode = TYPE_MODE (TREE_TYPE (exp));
 
-  /* Before working hard, check whether the instruction is available.  */
-  if (builtin_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
-    {
-      target = gen_reg_rtx (mode);
-
-      /* Wrap the computation of the argument in a SAVE_EXPR, as we may
-	 need to expand the argument again.  This way, we will not perform
-	 side-effects more the once.  */
-      narg = builtin_save_expr (arg);
-      if (narg != arg)
-	{
-	  arg = narg;
-	  arglist = build_tree_list (NULL_TREE, arg);
-	  exp = build_function_call_expr (fndecl, arglist);
-	}
-
-      op0 = expand_expr (arg, subtarget, VOIDmode, 0);
+  target = gen_reg_rtx (mode);
 
-      start_sequence ();
+  /* Wrap the computation of the argument in a SAVE_EXPR, as we may
+     need to expand the argument again.  This way, we will not perform
+     side-effects more the once.  */
+  narg = builtin_save_expr (arg);
+  if (narg != arg)
+    {
+      arg = narg;
+      arglist = build_tree_list (NULL_TREE, arg);
+      exp = build_function_call_expr (fndecl, arglist);
+    }
 
-      /* Compute into TARGET.
-	 Set TARGET to wherever the result comes back.  */
-      target = expand_unop (mode, builtin_optab, op0, target, 0);
+  op0 = expand_expr (arg, subtarget, VOIDmode, 0);
 
-      if (target != 0)
-	{
-	  /* Output the entire sequence.  */
-	  insns = get_insns ();
-	  end_sequence ();
-	  emit_insn (insns);
-	  return target;
-	}
+  start_sequence ();
 
-      /* If we were unable to expand via the builtin, stop the sequence
-	 (without outputting the insns).  */
+  /* Compute into TARGET.  */
+  if (expand_sfix_optab (target, op0, builtin_optab))
+    {
+      /* Output the entire sequence.  */
+      insns = get_insns ();
       end_sequence ();
+      emit_insn (insns);
+      return target;
     }
 
+  /* If we were unable to expand via the builtin, stop the sequence
+     (without outputting the insns).  */
+  end_sequence ();
+
   /* Fall back to floating point rounding optab.  */
   fallback_fndecl = mathfn_built_in (TREE_TYPE (arg), fallback_fn);
   /* We shouldn't get here on targets without TARGET_C99_FUNCTIONS.
Index: gcc/config/i386/i386-protos.h
===================================================================
--- gcc.orig/config/i386/i386-protos.h
+++ gcc/config/i386/i386-protos.h
@@ -158,6 +158,7 @@ extern void ix86_emit_i387_log1p (rtx, r
 extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
 
 extern void ix86_expand_lround (rtx, rtx);
+extern void ix86_expand_lfloorceil (rtx, rtx, bool);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
Index: gcc/config/i386/i386.c
===================================================================
--- gcc.orig/config/i386/i386.c
+++ gcc/config/i386/i386.c
@@ -18999,6 +18999,36 @@ ix86_sse_copysign_to_positive (rtx resul
 			  gen_rtx_IOR (mode, abs_value, sgn)));
 }
 
+/* Expands a comparison of OP0 with OP1 using comparison code CODE,
+   swapping the operands if SWAP_OPERANDS is true.  The expanded
+   code is a forward jump to a newly created label in case the
+   comparison is true.  The generated label rtx is returned.  */
+static rtx
+ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
+                                  bool swap_operands)
+{
+  rtx label, tmp;
+
+  if (swap_operands)
+    {
+      tmp = op0;
+      op0 = op1;
+      op1 = tmp;
+    }
+
+  label = gen_label_rtx ();
+  tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp,
+			  gen_rtx_COMPARE (CCFPUmode, op0, op1)));
+  tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  return label;
+}
+
 /* Expand SSE sequence for computing lround from OP1 storing
    into OP0.  */
 void
@@ -19022,4 +19052,37 @@ ix86_expand_lround (rtx op0, rtx op1)
   expand_fix (op0, adj, 0);
 }
 
+/* Expand SSE2 sequence for computing lround from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
+{
+  /* C code for the stuff we're doing below (for do_floor):
+	xi = (long)op1;
+        xi -= (double)xi > op1 ? 1 : 0;
+        return xi;
+   */
+  enum machine_mode fmode = GET_MODE (op1);
+  enum machine_mode imode = GET_MODE (op0);
+  rtx ireg, freg, label;
+
+  /* reg = (long)op1 */
+  ireg = gen_reg_rtx (imode);
+  expand_fix (ireg, op1, 0);
+
+  /* freg = (double)reg */
+  freg = gen_reg_rtx (fmode);
+  expand_float (freg, ireg, 0);
+
+  /* ireg = (freg > op1) ? ireg - 1 : ireg */
+  label = ix86_expand_sse_compare_and_jump (UNLE,
+					    freg, op1, !do_floor);
+  expand_simple_binop (imode, do_floor ? MINUS : PLUS,
+                       ireg, const1_rtx, ireg, 0, OPTAB_DIRECT);
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (op0, ireg);
+}
+
 #include "gt-i386.h"
Index: gcc/config/i386/i386.md
===================================================================
--- gcc.orig/config/i386/i386.md
+++ gcc/config/i386/i386.md
@@ -17150,7 +17150,7 @@
 	      (use (match_dup 3))])]
   "")
 
-(define_expand "lfloor<mode>2"
+(define_expand "lfloorxf<mode>2"
   [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
 		   (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
 		    UNSPEC_FIST_FLOOR))
@@ -17160,6 +17160,26 @@
    && flag_unsafe_math_optimizations"
   "")
 
+(define_expand "lfloor<mode>di2"
+  [(match_operand:DI 0 "nonimmediate_operand" "")
+   (match_operand:SSEMODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT
+   && !flag_trapping_math"
+{
+  ix86_expand_lfloorceil (operand0, operand1, true);
+  DONE;
+})
+
+(define_expand "lfloor<mode>si2"
+  [(match_operand:SI 0 "nonimmediate_operand" "")
+   (match_operand:SSEMODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && !flag_trapping_math"
+{
+  ix86_expand_lfloorceil (operand0, operand1, true);
+  DONE;
+})
+
 ;; Rounding mode control word calculation could clobber FLAGS_REG.
 (define_insn_and_split "frndintxf2_ceil"
   [(set (match_operand:XF 0 "register_operand" "=f")
@@ -17391,7 +17411,7 @@
 	      (use (match_dup 3))])]
   "")
 
-(define_expand "lceil<mode>2"
+(define_expand "lceilxf<mode>2"
   [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
 		   (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
 		    UNSPEC_FIST_CEIL))
@@ -17401,6 +17421,26 @@
    && flag_unsafe_math_optimizations"
   "")
 
+(define_expand "lceil<mode>di2"
+  [(match_operand:DI 0 "nonimmediate_operand" "")
+   (match_operand:SSEMODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT
+   && !flag_trapping_math"
+{
+  ix86_expand_lfloorceil (operand0, operand1, false);
+  DONE;
+})
+
+(define_expand "lceil<mode>si2"
+  [(match_operand:SI 0 "nonimmediate_operand" "")
+   (match_operand:SSEMODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && !flag_trapping_math"
+{
+  ix86_expand_lfloorceil (operand0, operand1, false);
+  DONE;
+})
+
 ;; Rounding mode control word calculation could clobber FLAGS_REG.
 (define_insn_and_split "frndintxf2_trunc"
   [(set (match_operand:XF 0 "register_operand" "=f")
Index: gcc/doc/md.texi
===================================================================
--- gcc.orig/doc/md.texi
+++ gcc/doc/md.texi
@@ -3613,6 +3613,18 @@ Convert operand 1 (valid for floating po
 point mode @var{n} as a signed number rounding to nearest and away
 from zero and store in operand 0 (which has mode @var{n}).
 
+@cindex @code{lfloor@var{m}@var{n}2}
+@item @samp{lfloor@var{m}2}
+Convert operand 1 (valid for floating point mode @var{m}) to fixed
+point mode @var{n} as a signed number rounding down and store in
+operand 0 (which has mode @var{n}).
+
+@cindex @code{lceil@var{m}@var{n}2}
+@item @samp{lceil@var{m}2}
+Convert operand 1 (valid for floating point mode @var{m}) to fixed
+point mode @var{n} as a signed number rounding up and store in
+operand 0 (which has mode @var{n}).
+
 @cindex @code{copysign@var{m}3} instruction pattern
 @item @samp{copysign@var{m}3}
 Store a value with the magnitude of operand 1 and the sign of operand
Index: gcc/genopinit.c
===================================================================
--- gcc.orig/genopinit.c
+++ gcc/genopinit.c
@@ -120,9 +120,9 @@ static const char * const optabs[] =
   "copysign_optab->handlers[$A].insn_code = CODE_FOR_$(copysign$F$a3$)",
   "sqrt_optab->handlers[$A].insn_code = CODE_FOR_$(sqrt$a2$)",
   "floor_optab->handlers[$A].insn_code = CODE_FOR_$(floor$a2$)",
-  "lfloor_optab->handlers[$A].insn_code = CODE_FOR_$(lfloor$a2$)",
+  "lfloor_optab->handlers[$B][$A].insn_code = CODE_FOR_$(lfloor$F$a$I$b2$)",
   "ceil_optab->handlers[$A].insn_code = CODE_FOR_$(ceil$a2$)",
-  "lceil_optab->handlers[$A].insn_code = CODE_FOR_$(lceil$a2$)",
+  "lceil_optab->handlers[$B][$A].insn_code = CODE_FOR_$(lceil$F$a$I$b2$)",
   "round_optab->handlers[$A].insn_code = CODE_FOR_$(round$a2$)",
   "btrunc_optab->handlers[$A].insn_code = CODE_FOR_$(btrunc$a2$)",
   "nearbyint_optab->handlers[$A].insn_code = CODE_FOR_$(nearbyint$a2$)",
Index: gcc/optabs.c
===================================================================
--- gcc.orig/optabs.c
+++ gcc/optabs.c
@@ -5299,9 +5299,7 @@ init_optabs (void)
   parity_optab = init_optab (PARITY);
   sqrt_optab = init_optab (SQRT);
   floor_optab = init_optab (UNKNOWN);
-  lfloor_optab = init_optab (UNKNOWN);
   ceil_optab = init_optab (UNKNOWN);
-  lceil_optab = init_optab (UNKNOWN);
   round_optab = init_optab (UNKNOWN);
   btrunc_optab = init_optab (UNKNOWN);
   nearbyint_optab = init_optab (UNKNOWN);
@@ -5366,6 +5364,8 @@ init_optabs (void)
   ufloat_optab = init_convert_optab (UNSIGNED_FLOAT);
   lrint_optab = init_convert_optab (UNKNOWN);
   lround_optab = init_convert_optab (UNKNOWN);
+  lfloor_optab = init_convert_optab (UNKNOWN);
+  lceil_optab = init_convert_optab (UNKNOWN);
 
   for (i = 0; i < NUM_MACHINE_MODES; i++)
     {
@@ -5489,6 +5489,10 @@ init_optabs (void)
 				 MODE_INT, MODE_FLOAT);
   init_interclass_conv_libfuncs (lround_optab, "lround",
 				 MODE_INT, MODE_FLOAT);
+  init_interclass_conv_libfuncs (lfloor_optab, "lfloor",
+				 MODE_INT, MODE_FLOAT);
+  init_interclass_conv_libfuncs (lceil_optab, "lceil",
+				 MODE_INT, MODE_FLOAT);
 
   /* sext_optab is also used for FLOAT_EXTEND.  */
   init_intraclass_conv_libfuncs (sext_optab, "extend", MODE_FLOAT, true);
Index: gcc/optabs.h
===================================================================
--- gcc.orig/optabs.h
+++ gcc/optabs.h
@@ -189,9 +189,7 @@ enum optab_index
   OTI_log1p,
   /* Rounding functions */
   OTI_floor,
-  OTI_lfloor,
   OTI_ceil,
-  OTI_lceil,
   OTI_btrunc,
   OTI_round,
   OTI_nearbyint,
@@ -337,9 +335,7 @@ extern GTY(()) optab optab_table[OTI_MAX
 #define log2_optab (optab_table[OTI_log2])
 #define log1p_optab (optab_table[OTI_log1p])
 #define floor_optab (optab_table[OTI_floor])
-#define lfloor_optab (optab_table[OTI_lfloor])
 #define ceil_optab (optab_table[OTI_ceil])
-#define lceil_optab (optab_table[OTI_lceil])
 #define btrunc_optab (optab_table[OTI_btrunc])
 #define round_optab (optab_table[OTI_round])
 #define nearbyint_optab (optab_table[OTI_nearbyint])
@@ -407,6 +403,8 @@ enum convert_optab_index
 
   COI_lrint,
   COI_lround,
+  COI_lfloor,
+  COI_lceil,
 
   COI_MAX
 };
@@ -424,6 +422,8 @@ extern GTY(()) convert_optab convert_opt
 #define ufloat_optab (convert_optab_table[COI_ufloat])
 #define lrint_optab (convert_optab_table[COI_lrint])
 #define lround_optab (convert_optab_table[COI_lround])
+#define lfloor_optab (convert_optab_table[COI_lfloor])
+#define lceil_optab (convert_optab_table[COI_lceil])
 
 /* These arrays record the insn_code of insns that may be needed to
    perform input and output reloads of special objects.  They provide a
Index: gcc/testsuite/gcc.target/i386/math-torture/lceil.c
===================================================================
--- /dev/null
+++ gcc/testsuite/gcc.target/i386/math-torture/lceil.c
@@ -0,0 +1,26 @@
+/* { dg-do assemble } */
+
+long testlf (float x)
+{
+  return __builtin_lceilf (x);
+}
+long testl (double x)
+{
+  return __builtin_lceil (x);
+}
+long testll (long double x)
+{
+  return __builtin_lceill (x);
+}
+long long testllf (float x)
+{
+  return __builtin_llceilf (x);
+}
+long long testll_ (double x)
+{
+  return __builtin_llceil (x);
+}
+long long testlll (long double x)
+{
+  return __builtin_llceill (x);
+}
Index: gcc/testsuite/gcc.target/i386/math-torture/lfloor.c
===================================================================
--- /dev/null
+++ gcc/testsuite/gcc.target/i386/math-torture/lfloor.c
@@ -0,0 +1,26 @@
+/* { dg-do assemble } */
+
+long testlf (float x)
+{
+  return __builtin_lfloorf (x);
+}
+long testl (double x)
+{
+  return __builtin_lfloor (x);
+}
+long testll (long double x)
+{
+  return __builtin_lfloorl (x);
+}
+long long testllf (float x)
+{
+  return __builtin_llfloorf (x);
+}
+long long testll_ (double x)
+{
+  return __builtin_llfloor (x);
+}
+long long testlll (long double x)
+{
+  return __builtin_llfloorl (x);
+}
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]