This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[PATCH] Expand lfloor and lceil inline as SSE on x86_64/i?86

From: Richard Guenther <rguenther at suse dot de>
To: gcc-patches at gcc dot gnu dot org
Date: Wed, 23 Aug 2006 14:20:52 +0200 (CEST)
Subject: [PATCH] Expand lfloor and lceil inline as SSE on x86_64/i?86
This patch in the series of expanding rounding functions inline on x86_64
attacks lfloor and lceil, thereby converting those to conversion optabs
and adding expanders that target SSE math.

Bootstrapped and tested on x86_64-unknown-linux-gnu, tested on 
i686-pc-linux-gnu.

Ok for 4.3?

Thanks,
Richard.

:ADDPATCH middle-end,i386:

2006-08-23  Richard Guenther  <rguenther@suse.de>

	* genopinit.c (optabs): Change lfloor_optab and lceil_optab
	to conversion optabs.
	* optabs.c (init_optabs): Initialize lfloor_optab and lceil_optab
	as conversion optab.
	* optabs.h (enum optab_index): Remove OTI_lfloor and OTI_lceil.
	(enum convert_optab_index): Add COI_lfloor and COI_lceil.
	(lfloor_optab, lceil_optab): Adjust defines.
	* builtins.c (expand_builtin_int_roundingfn): Adjust for
	lfloor and lceil optabs now being conversion optabs.
	* config/i386/i386-protos.h (ix86_expand_lfloorceil): Declare.
	* config/i386/i386.c (ix86_expand_sse_compare_and_jump):
	New static helper function.
	(ix86_expand_lfloorceil): New function to expand lfloor and
	lceil inline.
	* config/i386/i386.md (lfloor<mode>2): Split into ...
	(lfloorxf<mode>2): ... x87 variant
	(lfloor<mode>di2, lfloor<mode>si2): ... and SSE variants
	using ix86_expand_lfloorceil.
	(lceil<mode>2, lceilxf<mode>2, lceil<mode>di2, lceil<mode>si2):
	Likewise.
	* doc/md.texi (lfloorMN, lceilMN): Document.

	* gcc.target/i386/math-torture/lfloor.c: New testcase.
	* gcc.target/i386/math-torture/lceil.c: Likewise.

Index: gcc/builtins.c
===================================================================
--- gcc.orig/builtins.c
+++ gcc/builtins.c
@@ -2235,7 +2235,7 @@ expand_builtin_sincos (tree exp)
 static rtx
 expand_builtin_int_roundingfn (tree exp, rtx target, rtx subtarget)
 {
-  optab builtin_optab;
+  convert_optab builtin_optab;
   rtx op0, insns, tmp;
   tree fndecl = get_callee_fndecl (exp);
   tree arglist = TREE_OPERAND (exp, 1);
@@ -2270,44 +2270,37 @@ expand_builtin_int_roundingfn (tree exp,
   /* Make a suitable register to place result in.  */
   mode = TYPE_MODE (TREE_TYPE (exp));
 
-  /* Before working hard, check whether the instruction is available.  */
-  if (builtin_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
-    {
-      target = gen_reg_rtx (mode);
-
-      /* Wrap the computation of the argument in a SAVE_EXPR, as we may
-	 need to expand the argument again.  This way, we will not perform
-	 side-effects more the once.  */
-      narg = builtin_save_expr (arg);
-      if (narg != arg)
-	{
-	  arg = narg;
-	  arglist = build_tree_list (NULL_TREE, arg);
-	  exp = build_function_call_expr (fndecl, arglist);
-	}
-
-      op0 = expand_expr (arg, subtarget, VOIDmode, 0);
+  target = gen_reg_rtx (mode);
 
-      start_sequence ();
+  /* Wrap the computation of the argument in a SAVE_EXPR, as we may
+     need to expand the argument again.  This way, we will not perform
+     side-effects more the once.  */
+  narg = builtin_save_expr (arg);
+  if (narg != arg)
+    {
+      arg = narg;
+      arglist = build_tree_list (NULL_TREE, arg);
+      exp = build_function_call_expr (fndecl, arglist);
+    }
 
-      /* Compute into TARGET.
-	 Set TARGET to wherever the result comes back.  */
-      target = expand_unop (mode, builtin_optab, op0, target, 0);
+  op0 = expand_expr (arg, subtarget, VOIDmode, 0);
 
-      if (target != 0)
-	{
-	  /* Output the entire sequence.  */
-	  insns = get_insns ();
-	  end_sequence ();
-	  emit_insn (insns);
-	  return target;
-	}
+  start_sequence ();
 
-      /* If we were unable to expand via the builtin, stop the sequence
-	 (without outputting the insns).  */
+  /* Compute into TARGET.  */
+  if (expand_sfix_optab (target, op0, builtin_optab))
+    {
+      /* Output the entire sequence.  */
+      insns = get_insns ();
       end_sequence ();
+      emit_insn (insns);
+      return target;
     }
 
+  /* If we were unable to expand via the builtin, stop the sequence
+     (without outputting the insns).  */
+  end_sequence ();
+
   /* Fall back to floating point rounding optab.  */
   fallback_fndecl = mathfn_built_in (TREE_TYPE (arg), fallback_fn);
   /* We shouldn't get here on targets without TARGET_C99_FUNCTIONS.
Index: gcc/config/i386/i386-protos.h
===================================================================
--- gcc.orig/config/i386/i386-protos.h
+++ gcc/config/i386/i386-protos.h
@@ -158,6 +158,7 @@ extern void ix86_emit_i387_log1p (rtx, r
 extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
 
 extern void ix86_expand_lround (rtx, rtx);
+extern void ix86_expand_lfloorceil (rtx, rtx, bool);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
Index: gcc/config/i386/i386.c
===================================================================
--- gcc.orig/config/i386/i386.c
+++ gcc/config/i386/i386.c
@@ -18999,6 +18999,36 @@ ix86_sse_copysign_to_positive (rtx resul
 			  gen_rtx_IOR (mode, abs_value, sgn)));
 }
 
+/* Expands a comparison of OP0 with OP1 using comparison code CODE,
+   swapping the operands if SWAP_OPERANDS is true.  The expanded
+   code is a forward jump to a newly created label in case the
+   comparison is true.  The generated label rtx is returned.  */
+static rtx
+ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
+                                  bool swap_operands)
+{
+  rtx label, tmp;
+
+  if (swap_operands)
+    {
+      tmp = op0;
+      op0 = op1;
+      op1 = tmp;
+    }
+
+  label = gen_label_rtx ();
+  tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp,
+			  gen_rtx_COMPARE (CCFPUmode, op0, op1)));
+  tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  return label;
+}
+
 /* Expand SSE sequence for computing lround from OP1 storing
    into OP0.  */
 void
@@ -19022,4 +19052,42 @@ ix86_expand_lround (rtx op0, rtx op1)
   expand_fix (op0, adj, 0);
 }
 
+/* Expand SSE2 sequence for computing lround from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
+{
+  /* C code for the stuff we're doing below (for do_floor):
+	xi = (long)op1;
+        dx = (double)xi - op1;
+        xi -= dx > 0 ? 1 : 0;
+        return xi;
+   */
+  enum machine_mode fmode = GET_MODE (op1);
+  enum machine_mode imode = GET_MODE (op0);
+  rtx ireg, freg, label, zero;
+
+  /* reg = (long)op1 */
+  ireg = gen_reg_rtx (imode);
+  expand_fix (ireg, op1, 0);
+
+  /* freg = (double)reg */
+  freg = gen_reg_rtx (fmode);
+  expand_float (freg, ireg, 0);
+
+  /* freg = freg - op1 */
+  expand_simple_binop (fmode, MINUS, freg, op1, freg, 0, OPTAB_DIRECT);
+
+  /* ireg = (freg > 0) ? ireg - 1 : ireg */
+  zero = force_reg (fmode, const_double_from_real_value (dconst0, fmode));
+  label = ix86_expand_sse_compare_and_jump (UNLE,
+					    freg, zero, !do_floor);
+  expand_simple_binop (imode, do_floor ? MINUS : PLUS,
+                       ireg, const1_rtx, ireg, 0, OPTAB_DIRECT);
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (op0, ireg);
+}
+
 #include "gt-i386.h"
Index: gcc/config/i386/i386.md
===================================================================
--- gcc.orig/config/i386/i386.md
+++ gcc/config/i386/i386.md
@@ -17148,7 +17148,7 @@
 	      (use (match_dup 3))])]
   "")
 
-(define_expand "lfloor<mode>2"
+(define_expand "lfloorxf<mode>2"
   [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
 		   (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
 		    UNSPEC_FIST_FLOOR))
@@ -17158,6 +17158,24 @@
    && flag_unsafe_math_optimizations"
   "")
 
+(define_expand "lfloor<mode>di2"
+  [(match_operand:DI 0 "nonimmediate_operand" "")
+   (match_operand:SSEMODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT"
+{
+  ix86_expand_lfloorceil (operand0, operand1, true);
+  DONE;
+})
+
+(define_expand "lfloor<mode>si2"
+  [(match_operand:SI 0 "nonimmediate_operand" "")
+   (match_operand:SSEMODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+{
+  ix86_expand_lfloorceil (operand0, operand1, true);
+  DONE;
+})
+
 ;; Rounding mode control word calculation could clobber FLAGS_REG.
 (define_insn_and_split "frndintxf2_ceil"
   [(set (match_operand:XF 0 "register_operand" "=f")
@@ -17389,7 +17407,7 @@
 	      (use (match_dup 3))])]
   "")
 
-(define_expand "lceil<mode>2"
+(define_expand "lceilxf<mode>2"
   [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
 		   (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
 		    UNSPEC_FIST_CEIL))
@@ -17399,6 +17417,24 @@
    && flag_unsafe_math_optimizations"
   "")
 
+(define_expand "lceil<mode>di2"
+  [(match_operand:DI 0 "nonimmediate_operand" "")
+   (match_operand:SSEMODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT"
+{
+  ix86_expand_lfloorceil (operand0, operand1, false);
+  DONE;
+})
+
+(define_expand "lceil<mode>si2"
+  [(match_operand:SI 0 "nonimmediate_operand" "")
+   (match_operand:SSEMODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+{
+  ix86_expand_lfloorceil (operand0, operand1, false);
+  DONE;
+})
+
 ;; Rounding mode control word calculation could clobber FLAGS_REG.
 (define_insn_and_split "frndintxf2_trunc"
   [(set (match_operand:XF 0 "register_operand" "=f")
Index: gcc/doc/md.texi
===================================================================
--- gcc.orig/doc/md.texi
+++ gcc/doc/md.texi
@@ -3613,6 +3613,18 @@ Convert operand 1 (valid for floating po
 point mode @var{n} as a signed number rounding to nearest and away
 from zero and store in operand 0 (which has mode @var{n}).
 
+@cindex @code{lfloor@var{m}@var{n}2}
+@item @samp{lfloor@var{m}2}
+Convert operand 1 (valid for floating point mode @var{m}) to fixed
+point mode @var{n} as a signed number rounding down and store in
+operand 0 (which has mode @var{n}).
+
+@cindex @code{lceil@var{m}@var{n}2}
+@item @samp{lceil@var{m}2}
+Convert operand 1 (valid for floating point mode @var{m}) to fixed
+point mode @var{n} as a signed number rounding up and store in
+operand 0 (which has mode @var{n}).
+
 @cindex @code{copysign@var{m}3} instruction pattern
 @item @samp{copysign@var{m}3}
 Store a value with the magnitude of operand 1 and the sign of operand
Index: gcc/genopinit.c
===================================================================
--- gcc.orig/genopinit.c
+++ gcc/genopinit.c
@@ -120,9 +120,9 @@ static const char * const optabs[] =
   "copysign_optab->handlers[$A].insn_code = CODE_FOR_$(copysign$F$a3$)",
   "sqrt_optab->handlers[$A].insn_code = CODE_FOR_$(sqrt$a2$)",
   "floor_optab->handlers[$A].insn_code = CODE_FOR_$(floor$a2$)",
-  "lfloor_optab->handlers[$A].insn_code = CODE_FOR_$(lfloor$a2$)",
+  "lfloor_optab->handlers[$B][$A].insn_code = CODE_FOR_$(lfloor$F$a$I$b2$)",
   "ceil_optab->handlers[$A].insn_code = CODE_FOR_$(ceil$a2$)",
-  "lceil_optab->handlers[$A].insn_code = CODE_FOR_$(lceil$a2$)",
+  "lceil_optab->handlers[$B][$A].insn_code = CODE_FOR_$(lceil$F$a$I$b2$)",
   "round_optab->handlers[$A].insn_code = CODE_FOR_$(round$a2$)",
   "btrunc_optab->handlers[$A].insn_code = CODE_FOR_$(btrunc$a2$)",
   "nearbyint_optab->handlers[$A].insn_code = CODE_FOR_$(nearbyint$a2$)",
Index: gcc/optabs.c
===================================================================
--- gcc.orig/optabs.c
+++ gcc/optabs.c
@@ -5299,9 +5299,7 @@ init_optabs (void)
   parity_optab = init_optab (PARITY);
   sqrt_optab = init_optab (SQRT);
   floor_optab = init_optab (UNKNOWN);
-  lfloor_optab = init_optab (UNKNOWN);
   ceil_optab = init_optab (UNKNOWN);
-  lceil_optab = init_optab (UNKNOWN);
   round_optab = init_optab (UNKNOWN);
   btrunc_optab = init_optab (UNKNOWN);
   nearbyint_optab = init_optab (UNKNOWN);
@@ -5366,6 +5364,8 @@ init_optabs (void)
   ufloat_optab = init_convert_optab (UNSIGNED_FLOAT);
   lrint_optab = init_convert_optab (UNKNOWN);
   lround_optab = init_convert_optab (UNKNOWN);
+  lfloor_optab = init_convert_optab (UNKNOWN);
+  lceil_optab = init_convert_optab (UNKNOWN);
 
   for (i = 0; i < NUM_MACHINE_MODES; i++)
     {
@@ -5489,6 +5489,10 @@ init_optabs (void)
 				 MODE_INT, MODE_FLOAT);
   init_interclass_conv_libfuncs (lround_optab, "lround",
 				 MODE_INT, MODE_FLOAT);
+  init_interclass_conv_libfuncs (lfloor_optab, "lfloor",
+				 MODE_INT, MODE_FLOAT);
+  init_interclass_conv_libfuncs (lceil_optab, "lceil",
+				 MODE_INT, MODE_FLOAT);
 
   /* sext_optab is also used for FLOAT_EXTEND.  */
   init_intraclass_conv_libfuncs (sext_optab, "extend", MODE_FLOAT, true);
Index: gcc/optabs.h
===================================================================
--- gcc.orig/optabs.h
+++ gcc/optabs.h
@@ -189,9 +189,7 @@ enum optab_index
   OTI_log1p,
   /* Rounding functions */
   OTI_floor,
-  OTI_lfloor,
   OTI_ceil,
-  OTI_lceil,
   OTI_btrunc,
   OTI_round,
   OTI_nearbyint,
@@ -337,9 +335,7 @@ extern GTY(()) optab optab_table[OTI_MAX
 #define log2_optab (optab_table[OTI_log2])
 #define log1p_optab (optab_table[OTI_log1p])
 #define floor_optab (optab_table[OTI_floor])
-#define lfloor_optab (optab_table[OTI_lfloor])
 #define ceil_optab (optab_table[OTI_ceil])
-#define lceil_optab (optab_table[OTI_lceil])
 #define btrunc_optab (optab_table[OTI_btrunc])
 #define round_optab (optab_table[OTI_round])
 #define nearbyint_optab (optab_table[OTI_nearbyint])
@@ -407,6 +403,8 @@ enum convert_optab_index
 
   COI_lrint,
   COI_lround,
+  COI_lfloor,
+  COI_lceil,
 
   COI_MAX
 };
@@ -424,6 +422,8 @@ extern GTY(()) convert_optab convert_opt
 #define ufloat_optab (convert_optab_table[COI_ufloat])
 #define lrint_optab (convert_optab_table[COI_lrint])
 #define lround_optab (convert_optab_table[COI_lround])
+#define lfloor_optab (convert_optab_table[COI_lfloor])
+#define lceil_optab (convert_optab_table[COI_lceil])
 
 /* These arrays record the insn_code of insns that may be needed to
    perform input and output reloads of special objects.  They provide a
Index: gcc/testsuite/gcc.target/i386/math-torture/lceil.c
===================================================================
--- /dev/null
+++ gcc/testsuite/gcc.target/i386/math-torture/lceil.c
@@ -0,0 +1,26 @@
+/* { dg-do assemble } */
+
+long testlf (float x)
+{
+  return __builtin_lceilf (x);
+}
+long testl (double x)
+{
+  return __builtin_lceil (x);
+}
+long testll (long double x)
+{
+  return __builtin_lceill (x);
+}
+long long testllf (float x)
+{
+  return __builtin_llceilf (x);
+}
+long long testll_ (double x)
+{
+  return __builtin_llceil (x);
+}
+long long testlll (long double x)
+{
+  return __builtin_llceill (x);
+}
Index: gcc/testsuite/gcc.target/i386/math-torture/lfloor.c
===================================================================
--- /dev/null
+++ gcc/testsuite/gcc.target/i386/math-torture/lfloor.c
@@ -0,0 +1,26 @@
+/* { dg-do assemble } */
+
+long testlf (float x)
+{
+  return __builtin_lfloorf (x);
+}
+long testl (double x)
+{
+  return __builtin_lfloor (x);
+}
+long testll (long double x)
+{
+  return __builtin_lfloorl (x);
+}
+long long testllf (float x)
+{
+  return __builtin_llfloorf (x);
+}
+long long testll_ (double x)
+{
+  return __builtin_llfloor (x);
+}
+long long testlll (long double x)
+{
+  return __builtin_llfloorl (x);
+}
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]