This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
RE: Reverted: [RFC PATCH, i386]: Convert 1.0/sqrtf (x) into rsqrtf even without -mrecip
Uros Bizjak wrote:
> It turns out that rsqrtss conversion causes SPEC2006 481.wrf to
> segfault, probably because it directly compares two FP operands. So,
the
> above change is reverted in order to keep all reciprocal
transformations
> out of the -ffast-math for x86. This fixes PR target/34709.
Uros,
I have a small case showing the root cause:
> cat a.f90
module my_mod
real :: zero = 0.0
contains
real function my_rsqrt (r)
implicit none
real :: r
my_rsqrt = 1.0/sqrt(r)
end function
end module
program main
use my_mod
implicit none
real :: sqr
sqr = my_rsqrt(0.0)
write(*,*), 'rsqrt(0)=', sqr
sqr = my_rsqrt(sqr)
write(*,*), 'rsqrt(INF)=', sqr
sqr = my_rsqrt(-0.0)
write(*,*), 'rsqrt(-0)=', sqr
end program
> gfortran -v
Target: x86_64-unknown-linux-gnu
gcc version 4.3.0 20080105 (experimental) [trunk revision 131342] (GCC)
> gfortran -O2 -ffast-math a.f90 -o 342.exe
> ./342.exe
rsqrt(0)= NaN
rsqrt(INF)= NaN
rsqrt(-0)= NaN
> gfortran -v
Target: x86_64-unknown-linux-gnu
gcc version 4.3.0 20080105 (experimental) [trunk revision 131341] (GCC)
> gfortran -O2 -ffast-math a.f90 -o 341.exe
> ./341.exe
rsqrt(0)= +Infinity
rsqrt(INF)= 0.0000000
rsqrt(-0)= -Infinity
Following patch on 131342 passes the small case as well as 481.wrf and
bootstrap. It may not be the well optimized and performance impact is
not
tested yet. Also a==infinit is not handled when recip == 0. It is more
like
a solid proof of regression root cause.
Thanks - Joey
Index: gcc/real.h
===================================================================
--- gcc/real.h (revision 131342)
+++ gcc/real.h (working copy)
@@ -387,6 +387,7 @@
extern REAL_VALUE_TYPE dconstthird;
extern REAL_VALUE_TYPE dconstsqrt2;
extern REAL_VALUE_TYPE dconste;
+extern REAL_VALUE_TYPE dconstinf;
/* Function to return a real value (not a tree node)
from a given integer constant. */
Index: gcc/emit-rtl.c
===================================================================
--- gcc/emit-rtl.c (revision 131342)
+++ gcc/emit-rtl.c (working copy)
@@ -108,6 +108,7 @@
REAL_VALUE_TYPE dconstthird;
REAL_VALUE_TYPE dconstsqrt2;
REAL_VALUE_TYPE dconste;
+REAL_VALUE_TYPE dconstinf;
/* Record fixed-point constant 0 and 1. */
FIXED_VALUE_TYPE fconst0[MAX_FCONST0];
@@ -5247,6 +5248,7 @@
SET_REAL_EXP (&dconsthalf, REAL_EXP (&dconsthalf) - 1);
real_arithmetic (&dconstthird, RDIV_EXPR, &dconst1, &dconst3);
+ real_arithmetic (&dconstinf, RDIV_EXPR, &dconst1, &dconst0);
/* Initialize mathematical constants for constant folding builtins.
These constants need to be given to at least 160 bits precision.
*/
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c (revision 131342)
+++ gcc/config/i386/i386.c (working copy)
@@ -24211,7 +24211,8 @@
void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
bool recip)
{
- rtx x0, e0, e1, e2, e3, three, half, zero, mask;
+ rtx x0, e0, e1, e2, e3, three, half, zero, inf, zero_or_inf;
+ rtx mask, not_mask, inf_mask;
x0 = gen_reg_rtx (mode);
e0 = gen_reg_rtx (mode);
@@ -24221,17 +24222,23 @@
three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
+ inf = CONST_DOUBLE_FROM_REAL_VALUE (dconstinf, SFmode);
+ zero_or_inf = gen_reg_rtx (mode);
mask = gen_reg_rtx (mode);
+ not_mask = gen_reg_rtx (mode);
+ inf_mask = gen_reg_rtx (mode);
if (VECTOR_MODE_P (mode))
{
three = ix86_build_const_vector (SFmode, true, three);
half = ix86_build_const_vector (SFmode, true, half);
+ inf = ix86_build_const_vector (SFmode, true, inf);
}
three = force_reg (mode, three);
half = force_reg (mode, half);
+ inf = force_reg (mode, inf);
zero = force_reg (mode, CONST0_RTX(mode));
@@ -24239,16 +24246,30 @@
1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) *
rsqrtss(a)) */
/* Compare a to zero. */
- if (!recip)
- emit_insn (gen_rtx_SET (VOIDmode, mask,
+ emit_insn (gen_rtx_SET (VOIDmode, mask,
gen_rtx_NE (mode, zero, a)));
+ /* For reciprocal, both zero and infinit need special handle.
+ TODO: need to handle infinit when recip == 0 */
+ if (recip)
+ {
+ /* Compare a to zero. */
+ emit_insn (gen_rtx_SET (VOIDmode, not_mask,
+ gen_rtx_EQ (mode, zero, a)));
+ /* Compare a to infinit. */
+ emit_insn (gen_rtx_SET (VOIDmode, inf_mask,
+ gen_rtx_NE (mode, inf, a)));
+ }
/* x0 = 1./sqrt(a) estimate */
emit_insn (gen_rtx_SET (VOIDmode, x0,
gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
UNSPEC_RSQRT)));
/* Filter out infinity. */
- if (!recip)
+ if (recip)
+ /* zero_or_inf = x0 & (a == 0) */
+ emit_insn (gen_rtx_SET (VOIDmode, zero_or_inf,
+ gen_rtx_AND (mode, x0, not_mask)));
+ else
emit_insn (gen_rtx_SET (VOIDmode, x0,
gen_rtx_AND (mode, x0, mask)));
/* e0 = x0 * a */
@@ -24271,6 +24292,19 @@
/* ret = e2 * e3 */
emit_insn (gen_rtx_SET (VOIDmode, res,
gen_rtx_MULT (mode, e2, e3)));
+ if (recip)
+ {
+ /* ret = ret & mask, to clean possible NAN into zero */
+ emit_insn (gen_rtx_SET (VOIDmode, res,
+ gen_rtx_AND (mode, res, mask)));
+ /* ret = ret + zero_or_inf */
+ emit_insn (gen_rtx_SET (VOIDmode, res,
+ gen_rtx_PLUS (mode, zero_or_inf, res)));
+
+ /* And with inf_mask. res = (a==inf)? 0 : res */
+ emit_insn (gen_rtx_SET (VOIDmode, res,
+ gen_rtx_AND (mode, res, inf_mask)));
+ }
}
/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */