This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][4.3] Expand rint inline for x86_64/i?86 SSE math


This fourth patch in the series adds an expander for rint/nearbyint which 
we can (for double) expand via
      if (!isless (fabs (operand1), 2**52))
        return operand1;
      tmp = copysign (2**52, operand1);
      return operand1 + tmp - tmp;
(with value range information about operand1 we could do even better
and avoid the comparison in some cases)  resulting in

        movsd   .LC3(%rip), %xmm3
        movapd  %xmm0, %xmm1
        movsd   .LC4(%rip), %xmm2
        andpd   %xmm3, %xmm0
        ucomisd %xmm0, %xmm2
        jbe     .L18
        movapd  %xmm3, %xmm0
        andnpd  %xmm1, %xmm0
        orpd    %xmm2, %xmm0
        addsd   %xmm0, %xmm1
        subsd   %xmm0, %xmm1
.L18:
        movapd  %xmm1, %xmm0

Bootstrapped and regtested on {x86_64,i686}-unknown-linux-gnu.

Ok for 4.3?

Thanks,
Richard.


2006-08-23  Richard Guenther  <rguenther@suse.de>

	* builtins.c (expand_builtin_mathfn): Expand nearbyint as
	rint in case -fno-trapping-math is enabled.
	* config/i386/i386-protos.h (ix86_expand_rint): Declare.
	* config/i386/i386.c (ix86_gen_TWO52): New static helper function.
	(ix86_expand_sse_fabs): Likewise.
	(ix86_expand_rint): New function expanding rint to x87 or SSE math.
	* config/i386/i386.md (rintdf2): Enable for SSE math if -fno-trapping-math
	is enabled, use ix86_expand_rint for expansion.
	(rintsf2): Likewise.

	* gcc.target/i386/math-torture/rint.c: New testcase.
	* gcc.target/i386/math-torture/nearbyint.c: Likewise.

Index: gcc/builtins.c
===================================================================
--- gcc.orig/builtins.c
+++ gcc/builtins.c
@@ -1833,7 +1833,10 @@ expand_builtin_mathfn (tree exp, rtx tar
     CASE_FLT_FN (BUILT_IN_ROUND):
       builtin_optab = round_optab; break;
     CASE_FLT_FN (BUILT_IN_NEARBYINT):
-      builtin_optab = nearbyint_optab; break;
+      builtin_optab = nearbyint_optab;
+      if (flag_trapping_math)
+	break;
+      /* Else fallthrough and expand as rint.  */
     CASE_FLT_FN (BUILT_IN_RINT):
       builtin_optab = rint_optab; break;
     default:
Index: gcc/config/i386/i386-protos.h
===================================================================
--- gcc.orig/config/i386/i386-protos.h
+++ gcc/config/i386/i386-protos.h
@@ -159,6 +159,7 @@ extern enum rtx_code ix86_reverse_condit
 
 extern void ix86_expand_lround (rtx, rtx);
 extern void ix86_expand_lfloorceil (rtx, rtx, bool);
+extern void ix86_expand_rint (rtx, rtx);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
Index: gcc/config/i386/i386.c
===================================================================
--- gcc.orig/config/i386/i386.c
+++ gcc/config/i386/i386.c
@@ -19016,13 +19016,44 @@ asm_preferred_eh_data_format (int code, 
 }
 
 /* Expand copysign from SIGN to the positive value ABS_VALUE
-   storing in RESULT.  */
+   storing in RESULT.  If MASK is non-null, it shall be a mask to mask out
+   the sign-bit.  */
 static void
-ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign)
+ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
 {
   enum machine_mode mode = GET_MODE (sign);
   rtx sgn = gen_reg_rtx (mode);
-  rtx mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
+  if (mask == NULL_RTX)
+    {
+      mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
+      if (!VECTOR_MODE_P (mode))
+	{
+	  /* We need to generate a scalar mode mask in this case.  */
+	  rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+	  tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
+	  mask = gen_reg_rtx (mode);
+	  emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
+	}
+    }
+  else
+    mask = gen_rtx_NOT (mode, mask);
+  emit_insn (gen_rtx_SET (VOIDmode, sgn,
+			  gen_rtx_AND (mode, mask, sign)));
+  emit_insn (gen_rtx_SET (VOIDmode, result,
+			  gen_rtx_IOR (mode, abs_value, sgn)));
+}
+
+/* Expand fabs (OP0) and return a new rtx that holds the result.  The
+   mask for masking out the sign-bit is stored in *SMASK, if that is
+   non-null.  */
+static rtx
+ix86_expand_sse_fabs (rtx op0, rtx *smask)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx xa, mask;
+
+  xa = gen_reg_rtx (mode);
+  mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
   if (!VECTOR_MODE_P (mode))
     {
       /* We need to generate a scalar mode mask in this case.  */
@@ -19031,10 +19062,13 @@ ix86_sse_copysign_to_positive (rtx resul
       mask = gen_reg_rtx (mode);
       emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
     }
-  emit_insn (gen_rtx_SET (VOIDmode, sgn,
-			  gen_rtx_AND (mode, mask, sign)));
-  emit_insn (gen_rtx_SET (VOIDmode, result,
-			  gen_rtx_IOR (mode, abs_value, sgn)));
+  emit_insn (gen_rtx_SET (VOIDmode, xa,
+			  gen_rtx_AND (mode, op0, mask)));
+
+  if (smask)
+    *smask = mask;
+
+  return xa;
 }
 
 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
@@ -19067,6 +19101,21 @@ ix86_expand_sse_compare_and_jump (enum r
   return label;
 }
 
+/* Generate and return a rtx of mode MODE for 2**n where n is the number
+   of bits of the mantissa of MODE, which must be one of DFmode or SFmode.  */
+static rtx
+ix86_gen_TWO52 (enum machine_mode mode)
+{
+  REAL_VALUE_TYPE TWO52r;
+  rtx TWO52;
+
+  real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
+  TWO52 = const_double_from_real_value (TWO52r, mode);
+  TWO52 = force_reg (mode, TWO52);
+
+  return TWO52;
+}
+
 /* Expand SSE sequence for computing lround from OP1 storing
    into OP0.  */
 void
@@ -19088,7 +19137,7 @@ ix86_expand_lround (rtx op0, rtx op1)
 
   /* adj = copysign (0.5, op1) */
   adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
-  ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1));
+  ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
 
   /* adj = op1 + adj */
   expand_simple_binop (mode, PLUS, adj, op1, adj, 0, OPTAB_DIRECT);
@@ -19130,4 +19179,39 @@ ix86_expand_lfloorceil (rtx op0, rtx op1
   emit_move_insn (op0, ireg);
 }
 
+/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
+   result in OPERAND0.  */
+void
+ix86_expand_rint (rtx operand0, rtx operand1)
+{
+  /* C code for the stuff we're doing below:
+        if (!isless (fabs (operand1), 2**52))
+	  return operand1;
+        tmp = copysign (2**52, operand1);
+        return operand1 + tmp - tmp;
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx res, xa, label, TWO52, mask;
+
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &mask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  TWO52 = ix86_gen_TWO52 (mode);
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  ix86_sse_copysign_to_positive (TWO52, TWO52, res, mask);
+
+  expand_simple_binop (mode, PLUS, res, TWO52, res, 0, OPTAB_DIRECT);
+  expand_simple_binop (mode, MINUS, res, TWO52, res, 0, OPTAB_DIRECT);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
 #include "gt-i386.h"
Index: gcc/config/i386/i386.md
===================================================================
--- gcc.orig/config/i386/i386.md
+++ gcc/config/i386/i386.md
@@ -17154,10 +17154,17 @@
 (define_expand "rintdf2"
   [(use (match_operand:DF 0 "register_operand" ""))
    (use (match_operand:DF 1 "register_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-{
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+      && !flag_trapping_math)
+    ix86_expand_rint (operand0, operand1);
+  else
+    {
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
 
@@ -17165,16 +17172,24 @@
   emit_insn (gen_frndintxf2 (op0, op1));
 
   emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+    }
   DONE;
 })
 
 (define_expand "rintsf2"
   [(use (match_operand:SF 0 "register_operand" ""))
    (use (match_operand:SF 1 "register_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-{
+  "(TARGET_USE_FANCY_MATH_387
+    && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
+      && !flag_trapping_math)
+    ix86_expand_rint (operand0, operand1);
+  else
+    {
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
 
@@ -17182,6 +17197,7 @@
   emit_insn (gen_frndintxf2 (op0, op1));
 
   emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+    }
   DONE;
 })
 
Index: gcc/testsuite/gcc.target/i386/math-torture/nearbyint.c
===================================================================
--- /dev/null
+++ gcc/testsuite/gcc.target/i386/math-torture/nearbyint.c
@@ -0,0 +1,15 @@
+/* { dg-do assemble } */
+
+float testlf (float x)
+{
+  return __builtin_nearbyintf (x);
+}
+double testl (double x)
+{
+  return __builtin_nearbyint (x);
+}
+long double testll (long double x)
+{
+  return __builtin_nearbyintl (x);
+}
+
Index: gcc/testsuite/gcc.target/i386/math-torture/rint.c
===================================================================
--- /dev/null
+++ gcc/testsuite/gcc.target/i386/math-torture/rint.c
@@ -0,0 +1,15 @@
+/* { dg-do assemble } */
+
+float testlf (float x)
+{
+  return __builtin_rintf (x);
+}
+double testl (double x)
+{
+  return __builtin_rint (x);
+}
+long double testll (long double x)
+{
+  return __builtin_rintl (x);
+}
+


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]