This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Fix i?86/x86_64 pre-SSE4.1 rint expansion (PR target/81906)
- From: Uros Bizjak <ubizjak at gmail dot com>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
- Date: Thu, 7 Dec 2017 18:23:03 +0100
- Subject: Re: [PATCH] Fix i?86/x86_64 pre-SSE4.1 rint expansion (PR target/81906)
- Authentication-results: sourceware.org; auth=none
- References: <20171207164831.GM2353@tucnak>
On Thu, Dec 7, 2017 at 5:48 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> As mentioned in the PR, the code emitted by ix86_expand_rint
> doesn't work with rounding to +/- infinity.
> This patch adjusts it if flag_rounding_math to do something that works
> well even for that case (should be just one insn longer).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2017-12-07 Joseph Myers <joseph@codesourcery.com>
> Alexander Monakov <amonakov@ispras.ru>
> Jakub Jelinek <jakub@redhat.com>
>
> PR target/81906
> * config/i386/i386.c (ix86_expand_rint): Handle flag_rounding_math.
>
> * gcc.target/i386/pr81906.c: New test.
OK for trunk and release branches.
Thanks,
Uros.
> --- gcc/config/i386/i386.c.jj 2017-12-05 10:15:31.000000000 +0100
> +++ gcc/config/i386/i386.c 2017-12-07 11:58:15.159881741 +0100
> @@ -44255,8 +44255,7 @@ ix86_expand_lfloorceil (rtx op0, rtx op1
> emit_move_insn (op0, ireg);
> }
>
> -/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
> - result in OPERAND0. */
> +/* Expand rint rounding OPERAND1 and storing the result in OPERAND0. */
> void
> ix86_expand_rint (rtx operand0, rtx operand1)
> {
> @@ -44264,11 +44263,17 @@ ix86_expand_rint (rtx operand0, rtx oper
> xa = fabs (operand1);
> if (!isless (xa, 2**52))
> return operand1;
> - xa = xa + 2**52 - 2**52;
> + two52 = 2**52;
> + if (flag_rounding_math)
> + {
> + two52 = copysign (two52, operand1);
> + xa = operand1;
> + }
> + xa = xa + two52 - two52;
> return copysign (xa, operand1);
> */
> machine_mode mode = GET_MODE (operand0);
> - rtx res, xa, TWO52, mask;
> + rtx res, xa, TWO52, two52, mask;
> rtx_code_label *label;
>
> res = gen_reg_rtx (mode);
> @@ -44281,8 +44286,16 @@ ix86_expand_rint (rtx operand0, rtx oper
> TWO52 = ix86_gen_TWO52 (mode);
> label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
>
> - xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
> - xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
> + two52 = TWO52;
> + if (flag_rounding_math)
> + {
> + two52 = gen_reg_rtx (mode);
> + ix86_sse_copysign_to_positive (two52, TWO52, res, mask);
> + xa = res;
> + }
> +
> + xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT);
> + xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT);
>
> ix86_sse_copysign_to_positive (res, xa, res, mask);
>
> --- gcc/testsuite/gcc.target/i386/pr81906.c.jj 2017-12-07 11:38:06.730812658 +0100
> +++ gcc/testsuite/gcc.target/i386/pr81906.c 2017-12-07 11:38:14.488716544 +0100
> @@ -0,0 +1,37 @@
> +/* PR target/81906 */
> +/* { dg-do run { target *-*-linux* *-*-gnu* } }
> +/* { dg-options "-O2 -frounding-math" } */
> +
> +#include <fenv.h>
> +
> +int
> +main ()
> +{
> + #define N 12
> + double a[N] = { 2.0, 2.25, 2.5, 2.75, 3.5, -2.0, -2.25, -2.5, -2.75, -3.5, 0x2.0p53, -0x2.0p53 };
> + double b[N], c[N], d[N], e[N];
> + double be[N] = { 2.0, 2.0, 2.0, 3.0, 4.0, -2.0, -2.0, -2.0, -3.0, -4.0, 0x2.0p53, -0x2.0p53 };
> + double ce[N] = { 2.0, 2.0, 2.0, 2.0, 3.0, -2.0, -3.0, -3.0, -3.0, -4.0, 0x2.0p53, -0x2.0p53 };
> + double de[N] = { 2.0, 3.0, 3.0, 3.0, 4.0, -2.0, -2.0, -2.0, -2.0, -3.0, 0x2.0p53, -0x2.0p53 };
> + double ee[N] = { 2.0, 2.0, 2.0, 2.0, 3.0, -2.0, -2.0, -2.0, -2.0, -3.0, 0x2.0p53, -0x2.0p53 };
> + asm volatile ("" : : "g" (a), "g" (be), "g" (ce), "g" (de), "g" (ee) : "memory");
> +
> + int i;
> + fesetround (FE_TONEAREST);
> + for (i = 0; i < N; ++i)
> + b[i] = __builtin_rint (a[i]);
> + fesetround (FE_DOWNWARD);
> + for (i = 0; i < N; ++i)
> + c[i] = __builtin_rint (a[i]);
> + fesetround (FE_UPWARD);
> + for (i = 0; i < N; ++i)
> + d[i] = __builtin_rint (a[i]);
> + fesetround (FE_TOWARDZERO);
> + for (i = 0; i < N; ++i)
> + e[i] = __builtin_rint (a[i]);
> + fesetround (FE_TONEAREST);
> + for (i = 0; i < N; ++i)
> + if (b[i] != be[i] || c[i] != ce[i] || d[i] != de[i] || e[i] != ee[i])
> + __builtin_abort ();
> + return 0;
> +}
>
> Jakub