[PATCH] Fix signed zeros issues with recent SSE expansion patches
Richard Guenther
rguenther@suse.de
Tue Oct 31 10:03:00 GMT 2006
This patch fixes issues with preserving signed zeroes in the recent
series of SSE expansion patches (which slipped through my testing because
-0.0 == 0.0 ...). It affects rint (easy to fix without runtime penalty),
the 32bit floor (fixed by ensuring to subtract zero instead of adding it)
and floor/ceil/trunc where an extra sign copying is required (if we
honor signed zeros, so no runtime penalty for -ffast-math).
It also adds a (first) runtime correctness check for the float -> float
rounding functions in double precision (which covers all interesting
paths if testing i686 and x86_64).
Bootstrapped and tested on x86_64-linux-gnu, tested on i686-pc-linux-gnu.
Ok for mainline?
Thanks,
Richard.
2006-10-30 Richard Guenther <rguenther@suse.de>
* config/i386/i386.c (ix86_expand_rint): Fix issues with
signed zeros.
(ix86_expand_floorceildf_32): Likewise.
(ix86_expand_floorceil): Likewise.
(ix86_expand_trunc): Likewise.
* testsuite/gcc.target/i386/fpprec-1.c: New testcase.
Index: config/i386/i386.c
===================================================================
*** config/i386/i386.c (revision 118179)
--- config/i386/i386.c (working copy)
*************** void
*** 19421,19430 ****
ix86_expand_rint (rtx operand0, rtx operand1)
{
/* C code for the stuff we're doing below:
! if (!isless (fabs (operand1), 2**52))
return operand1;
! tmp = copysign (2**52, operand1);
! return operand1 + tmp - tmp;
*/
enum machine_mode mode = GET_MODE (operand0);
rtx res, xa, label, TWO52, mask;
--- 19421,19431 ----
ix86_expand_rint (rtx operand0, rtx operand1)
{
/* C code for the stuff we're doing below:
! xa = fabs (operand1);
! if (!isless (xa, 2**52))
return operand1;
! xa = xa + 2**52 - 2**52;
! return copysign (xa, operand1);
*/
enum machine_mode mode = GET_MODE (operand0);
rtx res, xa, label, TWO52, mask;
*************** ix86_expand_rint (rtx operand0, rtx oper
*** 19439,19448 ****
TWO52 = ix86_gen_TWO52 (mode);
label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
! ix86_sse_copysign_to_positive (TWO52, TWO52, res, mask);
! expand_simple_binop (mode, PLUS, res, TWO52, res, 0, OPTAB_DIRECT);
! expand_simple_binop (mode, MINUS, res, TWO52, res, 0, OPTAB_DIRECT);
emit_label (label);
LABEL_NUSES (label) = 1;
--- 19440,19449 ----
TWO52 = ix86_gen_TWO52 (mode);
label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
! expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
! expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
! ix86_sse_copysign_to_positive (res, xa, res, mask);
emit_label (label);
LABEL_NUSES (label) = 1;
*************** ix86_expand_floorceildf_32 (rtx operand0
*** 19466,19472 ****
x2 -= 1;
Compensate. Ceil:
if (x2 < x)
! x2 += 1;
return x2;
*/
enum machine_mode mode = GET_MODE (operand0);
--- 19467,19473 ----
x2 -= 1;
Compensate. Ceil:
if (x2 < x)
! x2 -= -1;
return x2;
*/
enum machine_mode mode = GET_MODE (operand0);
*************** ix86_expand_floorceildf_32 (rtx operand0
*** 19492,19505 ****
/* xa = copysign (xa, operand1) */
ix86_sse_copysign_to_positive (xa, xa, res, mask);
! /* generate 1.0 */
! one = force_reg (mode, const_double_from_real_value (dconst1, mode));
/* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
emit_insn (gen_rtx_SET (VOIDmode, tmp,
gen_rtx_AND (mode, one, tmp)));
! expand_simple_binop (mode, do_floor ? MINUS : PLUS,
xa, tmp, res, 0, OPTAB_DIRECT);
emit_label (label);
--- 19493,19509 ----
/* xa = copysign (xa, operand1) */
ix86_sse_copysign_to_positive (xa, xa, res, mask);
! /* generate 1.0 or -1.0 */
! one = force_reg (mode,
! const_double_from_real_value (do_floor
! ? dconst1 : dconstm1, mode));
/* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
emit_insn (gen_rtx_SET (VOIDmode, tmp,
gen_rtx_AND (mode, one, tmp)));
! /* We always need to subtract here to preserve signed zero. */
! expand_simple_binop (mode, MINUS,
xa, tmp, res, 0, OPTAB_DIRECT);
emit_label (label);
*************** ix86_expand_floorceil (rtx operand0, rtx
*** 19524,19533 ****
Compensate. Ceil:
if (x2 < x)
x2 += 1;
return x2;
*/
enum machine_mode mode = GET_MODE (operand0);
! rtx xa, xi, TWO52, tmp, label, one, res;
TWO52 = ix86_gen_TWO52 (mode);
--- 19528,19539 ----
Compensate. Ceil:
if (x2 < x)
x2 += 1;
+ if (HONOR_SIGNED_ZEROS (mode))
+ return copysign (x2, x);
return x2;
*/
enum machine_mode mode = GET_MODE (operand0);
! rtx xa, xi, TWO52, tmp, label, one, res, mask;
TWO52 = ix86_gen_TWO52 (mode);
*************** ix86_expand_floorceil (rtx operand0, rtx
*** 19537,19543 ****
emit_move_insn (res, operand1);
/* xa = abs (operand1) */
! xa = ix86_expand_sse_fabs (res, NULL);
/* if (!isless (xa, TWO52)) goto label; */
label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
--- 19543,19549 ----
emit_move_insn (res, operand1);
/* xa = abs (operand1) */
! xa = ix86_expand_sse_fabs (res, &mask);
/* if (!isless (xa, TWO52)) goto label; */
label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
*************** ix86_expand_floorceil (rtx operand0, rtx
*** 19557,19562 ****
--- 19563,19571 ----
expand_simple_binop (mode, do_floor ? MINUS : PLUS,
xa, tmp, res, 0, OPTAB_DIRECT);
+ if (HONOR_SIGNED_ZEROS (mode))
+ ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
+
emit_label (label);
LABEL_NUSES (label) = 1;
*************** ix86_expand_trunc (rtx operand0, rtx ope
*** 19648,19657 ****
double xa = fabs (x), x2;
if (!isless (xa, TWO52))
return x;
! return (double)(long)x;
*/
enum machine_mode mode = GET_MODE (operand0);
! rtx xa, xi, TWO52, label, res;
TWO52 = ix86_gen_TWO52 (mode);
--- 19657,19669 ----
double xa = fabs (x), x2;
if (!isless (xa, TWO52))
return x;
! x2 = (double)(long)x;
! if (HONOR_SIGNED_ZEROS (mode))
! return copysign (x2, x);
! return x2;
*/
enum machine_mode mode = GET_MODE (operand0);
! rtx xa, xi, TWO52, label, res, mask;
TWO52 = ix86_gen_TWO52 (mode);
*************** ix86_expand_trunc (rtx operand0, rtx ope
*** 19661,19667 ****
emit_move_insn (res, operand1);
/* xa = abs (operand1) */
! xa = ix86_expand_sse_fabs (res, NULL);
/* if (!isless (xa, TWO52)) goto label; */
label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
--- 19673,19679 ----
emit_move_insn (res, operand1);
/* xa = abs (operand1) */
! xa = ix86_expand_sse_fabs (res, &mask);
/* if (!isless (xa, TWO52)) goto label; */
label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
*************** ix86_expand_trunc (rtx operand0, rtx ope
*** 19671,19676 ****
--- 19683,19691 ----
expand_fix (xi, res, 0);
expand_float (res, xi, 0);
+ if (HONOR_SIGNED_ZEROS (mode))
+ ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
+
emit_label (label);
LABEL_NUSES (label) = 1;
Index: testsuite/gcc.target/i386/fpprec-1.c
===================================================================
*** testsuite/gcc.target/i386/fpprec-1.c (revision 0)
--- testsuite/gcc.target/i386/fpprec-1.c (revision 0)
***************
*** 0 ****
--- 1,90 ----
+ /* { dg-do run } */
+ /* { dg-options "-O2 -fno-math-errno -fno-trapping-math -msse2 -mfpmath=sse" } */
+
+ #include "../../gcc.dg/i386-cpuid.h"
+
+ extern void abort(void);
+ extern int printf(const char *format, ...);
+
+ double x[] = { __builtin_nan(""), __builtin_inf(), -__builtin_inf(),
+ -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023, /* +-DBL_MAX */
+ -0x1p-52, 0x1p-52, /* +-DBL_EPSILON */
+ /* nextafter/before 0.5, 1.0 and 1.5 */
+ 0x1.0000000000001p-1, 0x1.fffffffffffffp-2,
+ 0x1.0000000000001p+0, 0x1.fffffffffffffp-1,
+ 0x1.8000000000001p+0, 0x1.7ffffffffffffp+0,
+ -0.0, 0.0, -0.5, 0.5, -1.0, 1.0, -1.5, 1.5, -2.0, 2.0,
+ -2.5, 2.5 };
+ #define NUM (sizeof(x)/sizeof(double))
+
+ double expect_round[] = { __builtin_nan(""), __builtin_inf(), -__builtin_inf(),
+ -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,
+ -0.0, 0.0,
+ 1.0, 0.0, 1.0, 1.0, 2.0, 1.0,
+ -0.0, 0.0, -1.0, 1.0, -1.0, 1.0, -2.0, 2.0, -2.0, 2.0,
+ -3.0, 3.0 };
+
+ double expect_rint[] = { __builtin_nan(""), __builtin_inf(), -__builtin_inf(),
+ -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,
+ -0.0, 0.0,
+ 1.0, 0.0, 1.0, 1.0, 2.0, 1.0,
+ -0.0, 0.0, -0.0, 0.0, -1.0, 1.0, -2.0, 2.0, -2.0, 2.0,
+ -2.0, 2.0 };
+
+ double expect_floor[] = { __builtin_nan(""), __builtin_inf(), -__builtin_inf(),
+ -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,
+ -1.0, 0.0,
+ 0.0, 0.0, 1.0, 0.0, 1.0, 1.0,
+ -0.0, 0.0, -1.0, 0.0, -1.0, 1.0, -2.0, 1.0, -2.0, 2.0,
+ -3.0, 2.0 };
+
+ double expect_ceil[] = { __builtin_nan(""), __builtin_inf(), -__builtin_inf(),
+ -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,
+ -0.0, 1.0,
+ 1.0, 1.0, 2.0, 1.0, 2.0, 2.0,
+ -0.0, 0.0, -0.0, 1.0, -1.0, 1.0, -1.0, 2.0, -2.0, 2.0,
+ -2.0, 3.0 };
+
+ double expect_trunc[] = { __builtin_nan(""), __builtin_inf(), -__builtin_inf(),
+ -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023,
+ -0.0, 0.0,
+ 0.0, 0.0, 1.0, 0.0, 1.0, 1.0,
+ -0.0, 0.0, -0.0, 0.0, -1.0, 1.0, -1.0, 1.0, -2.0, 2.0,
+ -2.0, 2.0 };
+
+
+ #define CHECK(fn) \
+ void check_ ## fn (void) \
+ { \
+ int i; \
+ for (i = 0; i < NUM; ++i) \
+ { \
+ double res = __builtin_ ## fn (x[i]); \
+ if (__builtin_memcmp (&res, &expect_ ## fn [i], sizeof(double)) != 0) \
+ printf( # fn " [%i]: %.18e %.18e\n", i, expect_ ## fn [i], res), abort (); \
+ } \
+ }
+
+ CHECK(round)
+ CHECK(rint)
+ CHECK(floor)
+ CHECK(ceil)
+ CHECK(trunc)
+
+ int main()
+ {
+ unsigned long cpu_facilities;
+
+ cpu_facilities = i386_cpuid ();
+
+ if ((cpu_facilities & bit_SSE2) != bit_SSE2)
+ /* If host has no SSE2 support, pass. */
+ return 0;
+
+ check_round ();
+ check_rint ();
+ check_floor ();
+ check_ceil ();
+ check_trunc ();
+ return 0;
+ }
More information about the Gcc-patches
mailing list