This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

RE: Reverted: [RFC PATCH, i386]: Convert 1.0/sqrtf (x) into rsqrtf even without -mrecip

From: "Ye, Joey" <joey dot ye at intel dot com>
To: "Uros Bizjak" <ubizjak at gmail dot com>, "H. J. Lu" <hjl at lucon dot org>
Cc: "GCC Patches" <gcc-patches at gcc dot gnu dot org>
Date: Thu, 17 Jan 2008 18:09:16 +0800
Subject: RE: Reverted: [RFC PATCH, i386]: Convert 1.0/sqrtf (x) into rsqrtf even without -mrecip
References: <477F7FEA.9040505@gmail.com> <84fc9c000801050511u160c0253l5151d5f6e7e00d44@mail.gmail.com> <477FE99F.803@gmail.com> <4783CD89.7050301@gmail.com>

Uros Bizjak wrote:
> It turns out that rsqrtss conversion causes SPEC2006 481.wrf to
> segfault, probably because it directly compares two FP operands. So,
the
> above change is reverted in order to keep all reciprocal
transformations
> out of the -ffast-math for x86. This fixes PR target/34709.

Uros,

I have a small case showing the root cause:
> cat a.f90
      module my_mod
      real :: zero = 0.0
      contains
      real function my_rsqrt (r)
        implicit none
        real :: r
        my_rsqrt = 1.0/sqrt(r)
      end function
      end module

      program main
        use my_mod
        implicit none
        real :: sqr
        sqr = my_rsqrt(0.0)
        write(*,*), 'rsqrt(0)=', sqr
        sqr = my_rsqrt(sqr)
        write(*,*), 'rsqrt(INF)=', sqr
        sqr = my_rsqrt(-0.0)
        write(*,*), 'rsqrt(-0)=', sqr
      end program
> gfortran -v
Target: x86_64-unknown-linux-gnu
gcc version 4.3.0 20080105 (experimental) [trunk revision 131342] (GCC)
> gfortran -O2 -ffast-math a.f90 -o 342.exe
> ./342.exe
 rsqrt(0)=             NaN
 rsqrt(INF)=             NaN
 rsqrt(-0)=             NaN

> gfortran -v
Target: x86_64-unknown-linux-gnu
gcc version 4.3.0 20080105 (experimental) [trunk revision 131341] (GCC)
> gfortran -O2 -ffast-math a.f90 -o 341.exe
> ./341.exe
 rsqrt(0)=       +Infinity
 rsqrt(INF)=   0.0000000   
 rsqrt(-0)=       -Infinity

Following patch on 131342 passes the small case as well as 481.wrf and
bootstrap. It may not be the well optimized and performance impact is
not
tested yet. Also a==infinit is not handled when recip == 0. It is more
like
a solid proof of regression root cause.

Thanks - Joey

Index: gcc/real.h
===================================================================
--- gcc/real.h  (revision 131342)
+++ gcc/real.h  (working copy)
@@ -387,6 +387,7 @@
 extern REAL_VALUE_TYPE dconstthird;
 extern REAL_VALUE_TYPE dconstsqrt2;
 extern REAL_VALUE_TYPE dconste;
+extern REAL_VALUE_TYPE dconstinf;

 /* Function to return a real value (not a tree node)
    from a given integer constant.  */
Index: gcc/emit-rtl.c
===================================================================
--- gcc/emit-rtl.c      (revision 131342)
+++ gcc/emit-rtl.c      (working copy)
@@ -108,6 +108,7 @@
 REAL_VALUE_TYPE dconstthird;
 REAL_VALUE_TYPE dconstsqrt2;
 REAL_VALUE_TYPE dconste;
+REAL_VALUE_TYPE dconstinf;

 /* Record fixed-point constant 0 and 1.  */
 FIXED_VALUE_TYPE fconst0[MAX_FCONST0];
@@ -5247,6 +5248,7 @@
   SET_REAL_EXP (&dconsthalf, REAL_EXP (&dconsthalf) - 1);

   real_arithmetic (&dconstthird, RDIV_EXPR, &dconst1, &dconst3);
+  real_arithmetic (&dconstinf, RDIV_EXPR, &dconst1, &dconst0);

   /* Initialize mathematical constants for constant folding builtins.
      These constants need to be given to at least 160 bits precision.
*/
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c      (revision 131342)
+++ gcc/config/i386/i386.c      (working copy)
@@ -24211,7 +24211,8 @@
 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
                         bool recip)
 {
-  rtx x0, e0, e1, e2, e3, three, half, zero, mask;
+  rtx x0, e0, e1, e2, e3, three, half, zero, inf, zero_or_inf;
+  rtx mask, not_mask, inf_mask;

   x0 = gen_reg_rtx (mode);
   e0 = gen_reg_rtx (mode);
@@ -24221,17 +24222,23 @@

   three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
   half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
+  inf = CONST_DOUBLE_FROM_REAL_VALUE (dconstinf, SFmode);

+  zero_or_inf = gen_reg_rtx (mode);
   mask = gen_reg_rtx (mode);
+  not_mask = gen_reg_rtx (mode);
+  inf_mask = gen_reg_rtx (mode);

   if (VECTOR_MODE_P (mode))
     {
       three = ix86_build_const_vector (SFmode, true, three);
       half = ix86_build_const_vector (SFmode, true, half);
+      inf = ix86_build_const_vector (SFmode, true, inf);
     }

   three = force_reg (mode, three);
   half = force_reg (mode, half);
+  inf = force_reg (mode, inf);

   zero = force_reg (mode, CONST0_RTX(mode));

@@ -24239,16 +24246,30 @@
      1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) *
rsqrtss(a)) */

   /* Compare a to zero.  */
-  if (!recip)
-    emit_insn (gen_rtx_SET (VOIDmode, mask,
+  emit_insn (gen_rtx_SET (VOIDmode, mask,
                            gen_rtx_NE (mode, zero, a)));
+  /* For reciprocal, both zero and infinit need special handle.
+     TODO: need to handle infinit when recip == 0 */
+  if (recip)
+    {
+      /* Compare a to zero. */
+      emit_insn (gen_rtx_SET (VOIDmode, not_mask,
+                           gen_rtx_EQ (mode, zero, a)));
+      /* Compare a to infinit. */
+      emit_insn (gen_rtx_SET (VOIDmode, inf_mask,
+                           gen_rtx_NE (mode, inf, a)));
+    }

   /* x0 = 1./sqrt(a) estimate */
   emit_insn (gen_rtx_SET (VOIDmode, x0,
                          gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
                                          UNSPEC_RSQRT)));
   /* Filter out infinity.  */
-  if (!recip)
+  if (recip)
+    /* zero_or_inf = x0 & (a == 0) */
+    emit_insn (gen_rtx_SET (VOIDmode, zero_or_inf,
+                           gen_rtx_AND (mode, x0, not_mask)));
+  else
     emit_insn (gen_rtx_SET (VOIDmode, x0,
                            gen_rtx_AND (mode, x0, mask)));
   /* e0 = x0 * a */
@@ -24271,6 +24292,19 @@
   /* ret = e2 * e3 */
   emit_insn (gen_rtx_SET (VOIDmode, res,
                          gen_rtx_MULT (mode, e2, e3)));
+  if (recip)
+    {
+      /* ret = ret & mask, to clean possible NAN into zero */
+      emit_insn (gen_rtx_SET (VOIDmode, res,
+                         gen_rtx_AND (mode, res, mask)));
+      /* ret = ret + zero_or_inf */
+      emit_insn (gen_rtx_SET (VOIDmode, res,
+                         gen_rtx_PLUS (mode, zero_or_inf, res)));
+
+      /* And with inf_mask. res = (a==inf)? 0 : res */
+      emit_insn (gen_rtx_SET (VOIDmode, res,
+                         gen_rtx_AND (mode, res, inf_mask)));
+    }
 }

 /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */

Follow-Ups:
- Re: Reverted: [RFC PATCH, i386]: Convert 1.0/sqrtf (x) into rsqrtf even without -mrecip
  - From: Uros Bizjak

References:
- [RFC PATCH, i386]: Convert 1.0/sqrtf (x) into rsqrtf even without -mrecip
  - From: Uros Bizjak
- Re: [RFC PATCH, i386]: Convert 1.0/sqrtf (x) into rsqrtf even without -mrecip
  - From: Richard Guenther
- Re: [RFC PATCH, i386]: Convert 1.0/sqrtf (x) into rsqrtf even without -mrecip
  - From: Uros Bizjak
- Reverted: [RFC PATCH, i386]: Convert 1.0/sqrtf (x) into rsqrtf even without -mrecip
  - From: Uros Bizjak

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]