This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[PATCH][PING] Expand lrint inline for x86_64/i?86 SSE math

From: Richard Guenther <rguenther at suse dot de>
To: gcc-patches at gcc dot gnu dot org
Cc: geoffk at geoffk dot org
Date: Tue, 24 Oct 2006 13:45:02 +0200 (CEST)
Subject: [PATCH][PING] Expand lrint inline for x86_64/i?86 SSE math
This is a resend (and re-diff, some parts gone in already) of the first
patch in the series to expand C99 rounding functions inline for SSE math.

This expands {ll,l}rint{,f} inline if possible using the cvts{d,s}2s{i,d}
SSE intrinsic.

Bootstrapped and tested on {x86_64,i686,ppc,ia64}-linux-gnu.

Ok for mainline?

(I know this unfortunately needs a testsuite, a middle-end and a
i686 and x86_64 maintainer to approve, so CCing Geoff as Apple might
be interested in this as well ;))

Thanks,
Richard.


2006-08-23  Richard Guenther  <rguenther@suse.de>

	* optabs.h (enum optab_index): Remove OTI_lrint.
	(enum convert_optab_index): Add COI_lrint.
	(lrint_optab): Adjust.
	(expand_sfix_optab): Declare.
	* optabs.c (expand_sfix_optab): New function.
	(init_optabs): Init lrint_optab as conversion optab.
	* genopinit.c (lrint_optab): Change to a conversion optab.
	* builtins.c (expand_builtin_int_roundingfn_2): Adjust to
	expansion via conversion optab.
	* config/i386/i386.md (*fistdi2_1): Remove
	flag_unsafe_math_optimizations guard.
	(fistdi2, fistdi2_with_temp, *fist<mode>2_1, fist<mode>2,
	fist<mode>2_with_temp): Likewise.
	(lrint<mode>2): Split into...
	(lrintxf<mode>2): ... x87 part
	(lrint<mode>di2, lrint<mode>si2): ... and SSE parts.
	* config/i386/sse.md (sse_cvtss2si_2, sse_cvtss2_siq_2,
	sse2_cvtsd2si_2, sse2_cvtsd2siq_2): New insns for
	UNSPEC_FIX_NOTRUNC matching non-vector float modes.
	* doc/md.texi (lrintMN2): Document.

	* gcc.target/i386/math-torture/math-torture.exp: Torture
	for interesting ia32 math options.
	* gcc.target.i386/math-torture/lrint.c: New testcase.

Index: gcc/builtins.c
===================================================================
--- gcc.orig/builtins.c
+++ gcc/builtins.c
@@ -2320,7 +2320,7 @@ expand_builtin_int_roundingfn (tree exp,
 static rtx
 expand_builtin_int_roundingfn_2 (tree exp, rtx target, rtx subtarget)
 {
-  optab builtin_optab;
+  convert_optab builtin_optab;
   rtx op0, insns;
   tree fndecl = get_callee_fndecl (exp);
   tree arglist = TREE_OPERAND (exp, 1);
@@ -2348,45 +2348,37 @@ expand_builtin_int_roundingfn_2 (tree ex
   /* Make a suitable register to place result in.  */
   mode = TYPE_MODE (TREE_TYPE (exp));
 
-  /* Before working hard, check whether the instruction is available.  */
-  if (builtin_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
-    {
-      target = gen_reg_rtx (mode);
-
-      /* Wrap the computation of the argument in a SAVE_EXPR, as we may
-	 need to expand the argument again.  This way, we will not perform
-	 side-effects more the once.  */
-      narg = builtin_save_expr (arg);
-      if (narg != arg)
-	{
-	  arg = narg;
-	  arglist = build_tree_list (NULL_TREE, arg);
-	  exp = build_function_call_expr (fndecl, arglist);
-	}
+  target = gen_reg_rtx (mode);
 
-      op0 = expand_expr (arg, subtarget, VOIDmode, 0);
-
-      start_sequence ();
+  /* Wrap the computation of the argument in a SAVE_EXPR, as we may
+     need to expand the argument again.  This way, we will not perform
+     side-effects more the once.  */
+  narg = builtin_save_expr (arg);
+  if (narg != arg)
+    {
+      arg = narg;
+      arglist = build_tree_list (NULL_TREE, arg);
+      exp = build_function_call_expr (fndecl, arglist);
+    }
 
-      /* Compute into TARGET.
-	 Set TARGET to wherever the result comes back.  */
-      target = expand_unop (mode, builtin_optab, op0, target, 0);
+  op0 = expand_expr (arg, subtarget, VOIDmode, 0);
 
-      if (target != 0)
-	{
-	  /* Output the entire sequence.  */
-	  insns = get_insns ();
-	  end_sequence ();
-	  emit_insn (insns);
-	  return target;
-	}
+  start_sequence ();
 
-      /* If we were unable to expand via the builtin, stop the sequence
-	 (without outputting the insns) and call to the library function
-	 with the stabilized argument list.  */
+  if (expand_sfix_optab (target, op0, builtin_optab))
+    {
+      /* Output the entire sequence.  */
+      insns = get_insns ();
       end_sequence ();
+      emit_insn (insns);
+      return target;
     }
 
+  /* If we were unable to expand via the builtin, stop the sequence
+     (without outputting the insns) and call to the library function
+     with the stabilized argument list.  */
+  end_sequence ();
+
   target = expand_call (exp, target, target == const0_rtx);
 
   return target;
Index: gcc/config/i386/i386.md
===================================================================
--- gcc.orig/config/i386/i386.md
+++ gcc/config/i386/i386.md
@@ -17211,7 +17211,6 @@
 	(unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
 	 UNSPEC_FIST))]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations
    && !(reload_completed || reload_in_progress)"
   "#"
   "&& 1"
@@ -17235,8 +17234,7 @@
 	(unspec:DI [(match_operand:XF 1 "register_operand" "f")]
 	 UNSPEC_FIST))
    (clobber (match_scratch:XF 2 "=&1f"))]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations"
+  "TARGET_USE_FANCY_MATH_387"
   "* return output_fix_trunc (insn, operands, 0);"
   [(set_attr "type" "fpspc")
    (set_attr "mode" "DI")])
@@ -17247,8 +17245,7 @@
 	 UNSPEC_FIST))
    (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
    (clobber (match_scratch:XF 3 "=&1f,&1f"))]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations"
+  "TARGET_USE_FANCY_MATH_387"
   "#"
   [(set_attr "type" "fpspc")
    (set_attr "mode" "DI")])
@@ -17281,7 +17278,6 @@
 	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
 	 UNSPEC_FIST))]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations
    && !(reload_completed || reload_in_progress)"
   "#"
   "&& 1"
@@ -17299,8 +17295,7 @@
   [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
 	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
 	 UNSPEC_FIST))]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations"
+  "TARGET_USE_FANCY_MATH_387"
   "* return output_fix_trunc (insn, operands, 0);"
   [(set_attr "type" "fpspc")
    (set_attr "mode" "<MODE>")])
@@ -17310,8 +17305,7 @@
 	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
 	 UNSPEC_FIST))
    (clobber (match_operand:X87MODEI12 2 "memory_operand" "=m"))]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations"
+  "TARGET_USE_FANCY_MATH_387"
   "#"
   [(set_attr "type" "fpspc")
    (set_attr "mode" "<MODE>")])
@@ -17337,13 +17331,25 @@
 		       UNSPEC_FIST))]
   "")
 
-(define_expand "lrint<mode>2"
+(define_expand "lrintxf<mode>2"
   [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
-	(unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
-	 UNSPEC_FIST))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
+     (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+      UNSPEC_FIST))]
+  "TARGET_USE_FANCY_MATH_387"
+  "")
+
+(define_expand "lrint<mode>di2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+     (unspec:DI [(match_operand:SSEMODEF 1 "register_operand" "")]
+      UNSPEC_FIX_NOTRUNC))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT"
+  "")
+
+(define_expand "lrint<mode>si2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+     (unspec:SI [(match_operand:SSEMODEF 1 "register_operand" "")]
+      UNSPEC_FIX_NOTRUNC))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
   "")
 
 ;; Rounding mode control word calculation could clobber FLAGS_REG.
Index: gcc/config/i386/sse.md
===================================================================
--- gcc.orig/config/i386/sse.md
+++ gcc/config/i386/sse.md
@@ -974,6 +974,16 @@
    (set_attr "athlon_decode" "double,vector")
    (set_attr "mode" "SI")])
 
+(define_insn "sse_cvtss2si_2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+	 UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE"
+  "cvtss2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "mode" "SI")])
+
 (define_insn "sse_cvtss2siq"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
 	(unspec:DI
@@ -987,6 +997,16 @@
    (set_attr "athlon_decode" "double,vector")
    (set_attr "mode" "DI")])
 
+(define_insn "sse_cvtss2siq_2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+	 UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE && TARGET_64BIT"
+  "cvtss2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "mode" "DI")])
+
 (define_insn "sse_cvttss2si"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
 	(fix:SI
@@ -1932,6 +1952,16 @@
    (set_attr "athlon_decode" "double,vector")
    (set_attr "mode" "SI")])
 
+(define_insn "sse2_cvtsd2si_2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
+	 UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2"
+  "cvtsd2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "mode" "SI")])
+
 (define_insn "sse2_cvtsd2siq"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
 	(unspec:DI
@@ -1945,6 +1975,16 @@
    (set_attr "athlon_decode" "double,vector")
    (set_attr "mode" "DI")])
 
+(define_insn "sse2_cvtsd2siq_2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
+	 UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "cvtsd2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "mode" "DI")])
+
 (define_insn "sse2_cvttsd2si"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
 	(fix:SI
Index: gcc/doc/md.texi
===================================================================
--- gcc.orig/doc/md.texi
+++ gcc/doc/md.texi
@@ -3682,6 +3682,12 @@ corresponds to the C data type @code{dou
 built-in function uses the mode which corresponds to the C data
 type @code{float}.
 
+@cindex @code{lrint@var{m}@var{n}2}
+@item @samp{lrint@var{m}@var{n}2}
+Convert operand 1 (valid for floating point mode @var{m}) to fixed
+point mode @var{n} as a signed number according to the current
+rounding mode and store in operand 0 (which has mode @var{n}).
+
 @cindex @code{copysign@var{m}3} instruction pattern
 @item @samp{copysign@var{m}3}
 Store a value with the magnitude of operand 1 and the sign of operand
Index: gcc/genopinit.c
===================================================================
--- gcc.orig/genopinit.c
+++ gcc/genopinit.c
@@ -127,7 +127,7 @@ static const char * const optabs[] =
   "btrunc_optab->handlers[$A].insn_code = CODE_FOR_$(btrunc$a2$)",
   "nearbyint_optab->handlers[$A].insn_code = CODE_FOR_$(nearbyint$a2$)",
   "rint_optab->handlers[$A].insn_code = CODE_FOR_$(rint$a2$)",
-  "lrint_optab->handlers[$A].insn_code = CODE_FOR_$(lrint$a2$)",
+  "lrint_optab->handlers[$B][$A].insn_code = CODE_FOR_$(lrint$F$a$I$b2$)",
   "sincos_optab->handlers[$A].insn_code = CODE_FOR_$(sincos$a3$)",
   "sin_optab->handlers[$A].insn_code = CODE_FOR_$(sin$a2$)",
   "asin_optab->handlers[$A].insn_code = CODE_FOR_$(asin$a2$)",
Index: gcc/optabs.c
===================================================================
--- gcc.orig/optabs.c
+++ gcc/optabs.c
@@ -4861,6 +4861,46 @@ expand_fix (rtx to, rtx from, int unsign
         convert_move (to, target, 0);
     }
 }
+
+/* Generate code to convert FROM to fixed point and store in TO.  FROM
+   must be floating point, TO must be signed.  Use the conversion optab
+   TAB to do the conversion.  */
+
+bool
+expand_sfix_optab (rtx to, rtx from, convert_optab tab)
+{
+  enum insn_code icode;
+  rtx target = to;
+  enum machine_mode fmode, imode;
+
+  /* We first try to find a pair of modes, one real and one integer, at
+     least as wide as FROM and TO, respectively, in which we can open-code
+     this conversion.  If the integer mode is wider than the mode of TO,
+     we can do the conversion either signed or unsigned.  */
+
+  for (fmode = GET_MODE (from); fmode != VOIDmode;
+       fmode = GET_MODE_WIDER_MODE (fmode))
+    for (imode = GET_MODE (to); imode != VOIDmode;
+	 imode = GET_MODE_WIDER_MODE (imode))
+      {
+	icode = tab->handlers[imode][fmode].insn_code;
+	if (icode != CODE_FOR_nothing)
+	  {
+	    if (fmode != GET_MODE (from))
+	      from = convert_to_mode (fmode, from, 0);
+
+	    if (imode != GET_MODE (to))
+	      target = gen_reg_rtx (imode);
+
+	    emit_unop_insn (icode, target, from, UNKNOWN);
+	    if (target != to)
+	      convert_move (to, target, 0);
+	    return true;
+	  }
+      }
+
+  return false;
+}
 
 /* Report whether we have an instruction to perform the operation
    specified by CODE on operands of mode MODE.  */
@@ -5266,7 +5306,6 @@ init_optabs (void)
   btrunc_optab = init_optab (UNKNOWN);
   nearbyint_optab = init_optab (UNKNOWN);
   rint_optab = init_optab (UNKNOWN);
-  lrint_optab = init_optab (UNKNOWN);
   sincos_optab = init_optab (UNKNOWN);
   sin_optab = init_optab (UNKNOWN);
   asin_optab = init_optab (UNKNOWN);
@@ -5325,6 +5364,7 @@ init_optabs (void)
   ufixtrunc_optab = init_convert_optab (UNKNOWN);
   sfloat_optab = init_convert_optab (FLOAT);
   ufloat_optab = init_convert_optab (UNSIGNED_FLOAT);
+  lrint_optab = init_convert_optab (UNKNOWN);
 
   for (i = 0; i < NUM_MACHINE_MODES; i++)
     {
@@ -5444,6 +5484,8 @@ init_optabs (void)
 				 MODE_DECIMAL_FLOAT, MODE_INT);
   init_interclass_conv_libfuncs (ufloat_optab, "floatuns",
 				 MODE_INT, MODE_DECIMAL_FLOAT);
+  init_interclass_conv_libfuncs (lrint_optab, "lrint",
+				 MODE_INT, MODE_FLOAT);
 
   /* sext_optab is also used for FLOAT_EXTEND.  */
   init_intraclass_conv_libfuncs (sext_optab, "extend", MODE_FLOAT, true);
Index: gcc/optabs.h
===================================================================
--- gcc.orig/optabs.h
+++ gcc/optabs.h
@@ -196,7 +196,6 @@ enum optab_index
   OTI_round,
   OTI_nearbyint,
   OTI_rint,
-  OTI_lrint,
   /* Tangent */
   OTI_tan,
   /* Inverse tangent */
@@ -345,7 +344,6 @@ extern GTY(()) optab optab_table[OTI_MAX
 #define round_optab (optab_table[OTI_round])
 #define nearbyint_optab (optab_table[OTI_nearbyint])
 #define rint_optab (optab_table[OTI_rint])
-#define lrint_optab (optab_table[OTI_lrint])
 #define tan_optab (optab_table[OTI_tan])
 #define atan_optab (optab_table[OTI_atan])
 #define copysign_optab (optab_table[OTI_copysign])
@@ -407,6 +405,8 @@ enum convert_optab_index
   COI_sfloat,
   COI_ufloat,
 
+  COI_lrint,
+
   COI_MAX
 };
 
@@ -421,6 +421,7 @@ extern GTY(()) convert_optab convert_opt
 #define ufixtrunc_optab (convert_optab_table[COI_ufixtrunc])
 #define sfloat_optab (convert_optab_table[COI_sfloat])
 #define ufloat_optab (convert_optab_table[COI_ufloat])
+#define lrint_optab (convert_optab_table[COI_lrint])
 
 /* These arrays record the insn_code of insns that may be needed to
    perform input and output reloads of special objects.  They provide a
@@ -597,6 +598,9 @@ extern void expand_float (rtx, rtx, int)
 /* Generate code for a FIX_EXPR.  */
 extern void expand_fix (rtx, rtx, int);
 
+/* Generate code for float to integral conversion.  */
+extern bool expand_sfix_optab (rtx, rtx, convert_optab);
+
 /* Return tree if target supports vector operations for COND_EXPR.  */
 bool expand_vec_cond_expr_p (tree, enum machine_mode);
 
Index: gcc/testsuite/gcc.target/i386/math-torture/lrint.c
===================================================================
--- /dev/null
+++ gcc/testsuite/gcc.target/i386/math-torture/lrint.c
@@ -0,0 +1,26 @@
+/* { dg-do assemble } */
+
+long testlf (float x)
+{
+  return __builtin_lrintf (x);
+}
+long testl (double x)
+{
+  return __builtin_lrint (x);
+}
+long testll (long double x)
+{
+  return __builtin_lrintl (x);
+}
+long long testllf (float x)
+{
+  return __builtin_llrintf (x);
+}
+long long testll_ (double x)
+{
+  return __builtin_llrint (x);
+}
+long long testlll (long double x)
+{
+  return __builtin_llrintl (x);
+}
Index: gcc/testsuite/gcc.target/i386/math-torture/math-torture.exp
===================================================================
--- /dev/null
+++ gcc/testsuite/gcc.target/i386/math-torture/math-torture.exp
@@ -0,0 +1,27 @@
+# This harness is for tests that should be run at all optimisation levels.
+
+set TORTURE_OPTIONS [list \
+        { -O0 } \
+        { -O0 -msse -mno-sse2 -mfpmath=sse } \
+        { -O0 -msse -msse2 -mfpmath=sse } \
+        { -O0 -msse -mno-sse2 -mfpmath=sse,387 } \
+        { -O0 -msse -msse2 -mfpmath=sse,387 } \
+        { -O0 -msse -mno-sse2 -mfpmath=sse -ffast-math } \
+        { -O0 -msse -msse2 -mfpmath=sse -ffast-math } \
+        { -O0 -msse -mno-sse2 -mfpmath=sse,387 -ffast-math } \
+        { -O0 -msse -msse2 -mfpmath=sse,387 -ffast-math } \
+        { -O2 } \
+        { -O2 -msse -mno-sse2 -mfpmath=sse } \
+        { -O2 -msse -msse2 -mfpmath=sse } \
+        { -O2 -msse -mno-sse2 -mfpmath=sse,387 } \
+        { -O2 -msse -msse2 -mfpmath=sse,387 } \
+        { -O2 -msse -mno-sse2 -mfpmath=sse -ffast-math } \
+        { -O2 -msse -msse2 -mfpmath=sse -ffast-math } \
+        { -O2 -msse -mno-sse2 -mfpmath=sse,387 -ffast-math } \
+        { -O2 -msse -msse2 -mfpmath=sse,387 -ffast-math } \
+]
+load_lib gcc-dg.exp
+
+dg-init
+gcc-dg-runtest [lsort [glob $srcdir/$subdir/*.c]] ""
+dg-finish
Follow-Ups:
- Re: [PATCH][PING] Expand lrint inline for x86_64/i?86 SSE math
  - From: Roger Sayle
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]