This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix signed zeros issues with recent SSE expansion patches


This patch fixes issues with preserving signed zeroes in the recent
series of SSE expansion patches (which slipped through my testing because
-0.0 == 0.0 ...).  It affects rint (easy to fix without runtime penalty),
the 32bit floor (fixed by ensuring to subtract zero instead of adding it)
and floor/ceil/trunc where an extra sign copying is required (if we
honor signed zeros, so no runtime penalty for -ffast-math).

It also adds a (first) runtime correctness check for the float -> float
rounding functions in double precision (which covers all interesting
paths if testing i686 and x86_64).

Bootstrapped and tested on x86_64-linux-gnu, tested on i686-pc-linux-gnu.

Ok for mainline?

Thanks,
Richard.

2006-10-30  Richard Guenther  <rguenther@suse.de>

	* config/i386/i386.c (ix86_expand_rint): Fix issues with
	signed zeros.
	(ix86_expand_floorceildf_32): Likewise.
	(ix86_expand_floorceil): Likewise.
	(ix86_expand_trunc): Likewise.

	* testsuite/gcc.target/i386/fpprec-1.c: New testcase.

Index: config/i386/i386.c
===================================================================
*** config/i386/i386.c	(revision 118179)
--- config/i386/i386.c	(working copy)
*************** void
*** 19421,19430 ****
  ix86_expand_rint (rtx operand0, rtx operand1)
  {
    /* C code for the stuff we're doing below:
!         if (!isless (fabs (operand1), 2**52))
  	  return operand1;
!         tmp = copysign (2**52, operand1);
!         return operand1 + tmp - tmp;
     */
    enum machine_mode mode = GET_MODE (operand0);
    rtx res, xa, label, TWO52, mask;
--- 19421,19431 ----
  ix86_expand_rint (rtx operand0, rtx operand1)
  {
    /* C code for the stuff we're doing below:
! 	xa = fabs (operand1);
!         if (!isless (xa, 2**52))
  	  return operand1;
!         xa = xa + 2**52 - 2**52;
!         return copysign (xa, operand1);
     */
    enum machine_mode mode = GET_MODE (operand0);
    rtx res, xa, label, TWO52, mask;
*************** ix86_expand_rint (rtx operand0, rtx oper
*** 19439,19448 ****
    TWO52 = ix86_gen_TWO52 (mode);
    label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
  
!   ix86_sse_copysign_to_positive (TWO52, TWO52, res, mask);
  
!   expand_simple_binop (mode, PLUS, res, TWO52, res, 0, OPTAB_DIRECT);
!   expand_simple_binop (mode, MINUS, res, TWO52, res, 0, OPTAB_DIRECT);
  
    emit_label (label);
    LABEL_NUSES (label) = 1;
--- 19440,19449 ----
    TWO52 = ix86_gen_TWO52 (mode);
    label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
  
!   expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
!   expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
  
!   ix86_sse_copysign_to_positive (res, xa, res, mask);
  
    emit_label (label);
    LABEL_NUSES (label) = 1;
*************** ix86_expand_floorceildf_32 (rtx operand0
*** 19466,19472 ****
            x2 -= 1;
       Compensate.  Ceil:
          if (x2 < x)
!           x2 += 1;
          return x2;
     */
    enum machine_mode mode = GET_MODE (operand0);
--- 19467,19473 ----
            x2 -= 1;
       Compensate.  Ceil:
          if (x2 < x)
!           x2 -= -1;
          return x2;
     */
    enum machine_mode mode = GET_MODE (operand0);
*************** ix86_expand_floorceildf_32 (rtx operand0
*** 19492,19505 ****
    /* xa = copysign (xa, operand1) */
    ix86_sse_copysign_to_positive (xa, xa, res, mask);
  
!   /* generate 1.0 */
!   one = force_reg (mode, const_double_from_real_value (dconst1, mode));
  
    /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
    tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
    emit_insn (gen_rtx_SET (VOIDmode, tmp,
                            gen_rtx_AND (mode, one, tmp)));
!   expand_simple_binop (mode, do_floor ? MINUS : PLUS,
                         xa, tmp, res, 0, OPTAB_DIRECT);
  
    emit_label (label);
--- 19493,19509 ----
    /* xa = copysign (xa, operand1) */
    ix86_sse_copysign_to_positive (xa, xa, res, mask);
  
!   /* generate 1.0 or -1.0 */
!   one = force_reg (mode,
! 	           const_double_from_real_value (do_floor
! 						 ? dconst1 : dconstm1, mode));
  
    /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
    tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
    emit_insn (gen_rtx_SET (VOIDmode, tmp,
                            gen_rtx_AND (mode, one, tmp)));
!   /* We always need to subtract here to preserve signed zero.  */
!   expand_simple_binop (mode, MINUS,
                         xa, tmp, res, 0, OPTAB_DIRECT);
  
    emit_label (label);
*************** ix86_expand_floorceil (rtx operand0, rtx
*** 19524,19533 ****
       Compensate.  Ceil:
  	if (x2 < x)
  	  x2 += 1;
  	return x2;
     */
    enum machine_mode mode = GET_MODE (operand0);
!   rtx xa, xi, TWO52, tmp, label, one, res;
  
    TWO52 = ix86_gen_TWO52 (mode);
  
--- 19528,19539 ----
       Compensate.  Ceil:
  	if (x2 < x)
  	  x2 += 1;
+ 	if (HONOR_SIGNED_ZEROS (mode))
+ 	  return copysign (x2, x);
  	return x2;
     */
    enum machine_mode mode = GET_MODE (operand0);
!   rtx xa, xi, TWO52, tmp, label, one, res, mask;
  
    TWO52 = ix86_gen_TWO52 (mode);
  
*************** ix86_expand_floorceil (rtx operand0, rtx
*** 19537,19543 ****
    emit_move_insn (res, operand1);
  
    /* xa = abs (operand1) */
!   xa = ix86_expand_sse_fabs (res, NULL);
  
    /* if (!isless (xa, TWO52)) goto label; */
    label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
--- 19543,19549 ----
    emit_move_insn (res, operand1);
  
    /* xa = abs (operand1) */
!   xa = ix86_expand_sse_fabs (res, &mask);
  
    /* if (!isless (xa, TWO52)) goto label; */
    label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
*************** ix86_expand_floorceil (rtx operand0, rtx
*** 19557,19562 ****
--- 19563,19571 ----
    expand_simple_binop (mode, do_floor ? MINUS : PLUS,
                         xa, tmp, res, 0, OPTAB_DIRECT);
  
+   if (HONOR_SIGNED_ZEROS (mode))
+     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
+ 
    emit_label (label);
    LABEL_NUSES (label) = 1;
  
*************** ix86_expand_trunc (rtx operand0, rtx ope
*** 19648,19657 ****
          double xa = fabs (x), x2;
          if (!isless (xa, TWO52))
            return x;
!         return (double)(long)x;
     */
    enum machine_mode mode = GET_MODE (operand0);
!   rtx xa, xi, TWO52, label, res;
  
    TWO52 = ix86_gen_TWO52 (mode);
  
--- 19657,19669 ----
          double xa = fabs (x), x2;
          if (!isless (xa, TWO52))
            return x;
!         x2 = (double)(long)x;
! 	if (HONOR_SIGNED_ZEROS (mode))
! 	  return copysign (x2, x);
! 	return x2;
     */
    enum machine_mode mode = GET_MODE (operand0);
!   rtx xa, xi, TWO52, label, res, mask;
  
    TWO52 = ix86_gen_TWO52 (mode);
  
*************** ix86_expand_trunc (rtx operand0, rtx ope
*** 19661,19667 ****
    emit_move_insn (res, operand1);
  
    /* xa = abs (operand1) */
!   xa = ix86_expand_sse_fabs (res, NULL);
  
    /* if (!isless (xa, TWO52)) goto label; */
    label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
--- 19673,19679 ----
    emit_move_insn (res, operand1);
  
    /* xa = abs (operand1) */
!   xa = ix86_expand_sse_fabs (res, &mask);
  
    /* if (!isless (xa, TWO52)) goto label; */
    label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
*************** ix86_expand_trunc (rtx operand0, rtx ope
*** 19671,19676 ****
--- 19683,19691 ----
    expand_fix (xi, res, 0);
    expand_float (res, xi, 0);
  
+   if (HONOR_SIGNED_ZEROS (mode))
+     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
+ 
    emit_label (label);
    LABEL_NUSES (label) = 1;
  
Index: testsuite/gcc.target/i386/fpprec-1.c
===================================================================
*** testsuite/gcc.target/i386/fpprec-1.c	(revision 0)
--- testsuite/gcc.target/i386/fpprec-1.c	(revision 0)
***************
*** 0 ****
--- 1,90 ----
+ /* { dg-do run } */
+ /* { dg-options "-O2 -fno-math-errno -fno-trapping-math -msse2 -mfpmath=sse" } */
+ 
+ #include "../../gcc.dg/i386-cpuid.h"
+ 
+ extern void abort(void);
+ extern int printf(const char *format, ...);
+ 
+ double x[] = { __builtin_nan(""), __builtin_inf(), -__builtin_inf(),
+ 	-0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,  /* +-DBL_MAX */
+ 	-0x1p-52, 0x1p-52,				    /* +-DBL_EPSILON */
+ 	/* nextafter/before 0.5, 1.0 and 1.5 */
+ 	0x1.0000000000001p-1, 0x1.fffffffffffffp-2,
+ 	0x1.0000000000001p+0, 0x1.fffffffffffffp-1,
+ 	0x1.8000000000001p+0, 0x1.7ffffffffffffp+0,
+ 	-0.0, 0.0, -0.5, 0.5, -1.0, 1.0, -1.5, 1.5, -2.0, 2.0,
+ 	-2.5, 2.5 };
+ #define NUM (sizeof(x)/sizeof(double))
+ 
+ double expect_round[] = { __builtin_nan(""), __builtin_inf(), -__builtin_inf(),
+ 	-0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,
+ 	-0.0, 0.0,
+ 	1.0, 0.0, 1.0, 1.0, 2.0, 1.0,
+ 	-0.0, 0.0, -1.0, 1.0, -1.0, 1.0, -2.0, 2.0, -2.0, 2.0,
+ 	-3.0, 3.0 };
+ 
+ double expect_rint[] = { __builtin_nan(""), __builtin_inf(), -__builtin_inf(),
+         -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,
+         -0.0, 0.0,
+         1.0, 0.0, 1.0, 1.0, 2.0, 1.0,
+         -0.0, 0.0, -0.0, 0.0, -1.0, 1.0, -2.0, 2.0, -2.0, 2.0,
+         -2.0, 2.0 };
+ 
+ double expect_floor[] = { __builtin_nan(""), __builtin_inf(), -__builtin_inf(),
+         -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,
+         -1.0, 0.0,
+         0.0, 0.0, 1.0, 0.0, 1.0, 1.0,
+         -0.0, 0.0, -1.0, 0.0, -1.0, 1.0, -2.0, 1.0, -2.0, 2.0,
+         -3.0, 2.0 };
+ 
+ double expect_ceil[] = { __builtin_nan(""), __builtin_inf(), -__builtin_inf(),
+         -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,
+         -0.0, 1.0,
+         1.0, 1.0, 2.0, 1.0, 2.0, 2.0,
+         -0.0, 0.0, -0.0, 1.0, -1.0, 1.0, -1.0, 2.0, -2.0, 2.0,
+         -2.0, 3.0 };
+ 
+ double expect_trunc[] = { __builtin_nan(""), __builtin_inf(), -__builtin_inf(),
+         -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,
+         -0.0, 0.0,
+         0.0, 0.0, 1.0, 0.0, 1.0, 1.0,
+         -0.0, 0.0, -0.0, 0.0, -1.0, 1.0, -1.0, 1.0, -2.0, 2.0,
+         -2.0, 2.0 };
+ 
+ 
+ #define CHECK(fn) \
+ void check_ ## fn (void) \
+ { \
+   int i; \
+   for (i = 0; i < NUM; ++i) \
+     { \
+       double res = __builtin_ ## fn (x[i]); \
+       if (__builtin_memcmp (&res, &expect_ ## fn [i], sizeof(double)) != 0) \
+         printf( # fn " [%i]: %.18e %.18e\n", i, expect_ ## fn [i], res), abort (); \
+     } \
+ }
+ 
+ CHECK(round)
+ CHECK(rint)
+ CHECK(floor)
+ CHECK(ceil)
+ CHECK(trunc)
+ 
+ int main()
+ {
+   unsigned long cpu_facilities;
+ 
+   cpu_facilities = i386_cpuid ();
+ 
+   if ((cpu_facilities & bit_SSE2) != bit_SSE2)
+     /* If host has no SSE2 support, pass.  */
+     return 0;
+ 
+   check_round ();
+   check_rint ();
+   check_floor ();
+   check_ceil ();
+   check_trunc ();
+   return 0;
+ }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]