This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][committed] Expand trunc inline with SSE math on x86_64 and i?86


This expands C99 trunc inline using SSE instructions.  On x86_64 and
for i686 SFmode we can do

        double xa = fabs (x), x2;
        if (!isless (xa, TWO52))
          return x;
        return (double)(long)x;

where i686 DFmode requires compensation code like

        double xa = fabs (x), x2;
        if (!isless (xa, TWO52))
          return x;
        xa2 = xa + TWO52 - TWO52;
     Compensate:
        if (xa2 > xa)
          xa2 -= 1.0;
        x2 = copysign (xa2, x);
        return x2;

Bootstrapped and tested on {x86_64,i686}-linux-gnu.  Applied to mainline
as preapproved on IRC by Roger.

This completes the series and as suggested I will go back and add
appropriate !optimize_size checks where not already done and also
audit the other intrinsics in the i386 backend.

Richard.

2006-10-29  Richard Guenther  <rguenther@suse.de>

	* config/i386/i386-protos.h (ix86_expand_trunc): Declare.
	(ix86_expand_truncdf_32): Likewise.
	* config/i386/i386.c (ix86_expand_trunc): New function expanding
	trunc inline for SSE math and -fno-trapping-math and if not
	optimizing for size.
	(ix86_expand_truncdf_32): Same for DFmode on 32bit archs.
	* config/i386/i386.md (btruncsf2, btruncdf2): Adjust expanders
	for expanding btrunc inline for SSE math.

	* gcc.target/i386/math-torture/trunc.c: New testcase.

Index: gcc/config/i386/i386-protos.h
===================================================================
--- gcc.orig/config/i386/i386-protos.h	2006-10-29 17:30:50.000000000 +0100
+++ gcc/config/i386/i386-protos.h	2006-10-29 17:35:41.000000000 +0100
@@ -164,6 +164,8 @@ extern void ix86_expand_floorceil (rtx, 
 extern void ix86_expand_floorceildf_32 (rtx, rtx, bool);
 extern void ix86_expand_round (rtx, rtx);
 extern void ix86_expand_rounddf_32 (rtx, rtx);
+extern void ix86_expand_trunc (rtx, rtx);
+extern void ix86_expand_truncdf_32 (rtx, rtx);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
Index: gcc/config/i386/i386.c
===================================================================
--- gcc.orig/config/i386/i386.c	2006-10-29 17:30:50.000000000 +0100
+++ gcc/config/i386/i386.c	2006-10-29 17:35:41.000000000 +0100
@@ -19639,6 +19639,100 @@ ix86_expand_rounddf_32 (rtx operand0, rt
   emit_move_insn (operand0, res);
 }
 
+/* Expand SSE sequence for computing trunc from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_trunc (rtx operand0, rtx operand1)
+{
+  /* C code for SSE variant we expand below.
+        double xa = fabs (x), x2;
+        if (!isless (xa, TWO52))
+          return x;
+        return (double)(long)x;
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx xa, xi, TWO52, label, res;
+
+  TWO52 = ix86_gen_TWO52 (mode);
+
+  /* Temporary for holding the result, initialized to the input
+     operand to ease control flow.  */
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, NULL);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  /* x = (double)(long)x */
+  xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
+  expand_fix (xi, res, 0);
+  expand_float (res, xi, 0);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
+/* Expand SSE sequence for computing trunc from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
+{
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx xa, mask, TWO52, label, one, res, smask;
+
+  /* C code for SSE variant we expand below.
+        double xa = fabs (x), x2;
+        if (!isless (xa, TWO52))
+          return x;
+        xa2 = xa + TWO52 - TWO52;
+     Compensate:
+        if (xa2 > xa)
+          xa2 -= 1.0;
+        x2 = copysign (xa2, x);
+        return x2;
+   */
+
+  TWO52 = ix86_gen_TWO52 (mode);
+
+  /* Temporary for holding the result, initialized to the input
+     operand to ease control flow.  */
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &smask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  /* res = xa + TWO52 - TWO52; */
+  expand_simple_binop (mode, PLUS, xa, TWO52, res, 0, OPTAB_DIRECT);
+  expand_simple_binop (mode, MINUS, res, TWO52, res, 0, OPTAB_DIRECT);
+
+  /* generate 1.0 */
+  one = force_reg (mode, const_double_from_real_value (dconst1, mode));
+
+  /* Compensate: res = xa2 - (res > xa ? 1 : 0)  */
+  mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
+  emit_insn (gen_rtx_SET (VOIDmode, mask,
+                          gen_rtx_AND (mode, mask, one)));
+  expand_simple_binop (mode, MINUS,
+                       res, mask, res, 0, OPTAB_DIRECT);
+
+  /* res = copysign (res, operand1) */
+  ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
 /* Expand SSE sequence for computing round from OPERAND1 storing
    into OPERAND0.  */
 void
Index: gcc/config/i386/i386.md
===================================================================
--- gcc.orig/config/i386/i386.md	2006-10-29 17:31:30.000000000 +0100
+++ gcc/config/i386/i386.md	2006-10-29 17:37:09.000000000 +0100
@@ -18032,34 +18032,59 @@
 (define_expand "btruncdf2"
   [(use (match_operand:DF 0 "register_operand" ""))
    (use (match_operand:DF 1 "register_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-{
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+       && !flag_trapping_math
+       && !optimize_size)"
+{
+  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+      && !flag_trapping_math
+      && !optimize_size)
+    {
+      if (TARGET_64BIT)
+	ix86_expand_trunc (operand0, operand1);
+      else
+	ix86_expand_truncdf_32 (operand0, operand1);
+    }
+  else
+    {
+      rtx op0 = gen_reg_rtx (XFmode);
+      rtx op1 = gen_reg_rtx (XFmode);
 
-  emit_insn (gen_extenddfxf2 (op1, operands[1]));
-  emit_insn (gen_frndintxf2_trunc (op0, op1));
+      emit_insn (gen_extenddfxf2 (op1, operands[1]));
+      emit_insn (gen_frndintxf2_trunc (op0, op1));
 
-  emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+      emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+    }
   DONE;
 })
 
 (define_expand "btruncsf2"
   [(use (match_operand:SF 0 "register_operand" ""))
    (use (match_operand:SF 1 "register_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-{
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
+  "(TARGET_USE_FANCY_MATH_387
+    && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
+       && !flag_trapping_math
+       && !optimize_size)"
+{
+  if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
+      && !flag_trapping_math
+      && !optimize_size)
+    ix86_expand_trunc (operand0, operand1);
+  else
+    {
+      rtx op0 = gen_reg_rtx (XFmode);
+      rtx op1 = gen_reg_rtx (XFmode);
 
-  emit_insn (gen_extendsfxf2 (op1, operands[1]));
-  emit_insn (gen_frndintxf2_trunc (op0, op1));
+      emit_insn (gen_extendsfxf2 (op1, operands[1]));
+      emit_insn (gen_frndintxf2_trunc (op0, op1));
 
-  emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+      emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+    }
   DONE;
 })
 
Index: gcc/testsuite/gcc.target/i386/math-torture/trunc.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ gcc/testsuite/gcc.target/i386/math-torture/trunc.c	2006-10-29 17:35:41.000000000 +0100
@@ -0,0 +1,14 @@
+/* { dg-do assemble } */
+
+float testlf (float x)
+{
+  return __builtin_truncf (x);
+}
+double testl (double x)
+{
+  return __builtin_trunc (x);
+}
+long double testll (long double x)
+{
+  return __builtin_truncl (x);
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]