[PATCH] Fix PR25620, pow() expansion missed-optimization, 2nd try
Richard Guenther
rguenther@suse.de
Mon Nov 13 14:36:00 GMT 2006
On Sun, 12 Nov 2006, Roger Sayle wrote:
>
> Hi Richard,
>
> On Sun, 12 Nov 2006, Richard Guenther wrote:
> > Bootstrapped and regtested on x86_64-unknown-linux-gnu. SPEC 2k
> > doesn't show a difference, Polyhedron has an improvement for air
> > (down from 21s to 16.5s runtime).
>
> Cool! I hadn't appreciated this had such a significant impact on
> polyhedron.
So, here's the 3rd version. I fixed another issue (handling of negative
exponents was wrong) on the way and re-shuffled the code to early check
for the fallback function. As I don't like deeply indented code too much
I have duplicated the fallback to expand_builtin_mathfn_2 ()
(I also made it correct for errno handling and removed the restriction on
unsafe_math for the fallback from expand_builtin_pow - you added that
back in 2004 but unfortunately the mailinglist archives for that year
are corrupt, so I cannot figure out why, but I cannot think of any
reason ;)). I chose to not do another indent level after the
real_to_integer calls where we could check for the
! && ((!optimize_size
! && flag_unsafe_math_optimizations
! && powi_cost (n/3) <= POWI_MAX_MULTS)
! || n == 1))
part.
I chose to use OPTAB_LIB_WIDEN and special cased expansion to pure
sqrt () and cbrt () that we can do even for !unsafe_math (I did
not update fold_builtin_pow (yet)).
Bootstrapped and tested on x86_64-unknown-linux-gnu.
Ok for mainline?
Thanks,
Richard.
2006-11-13 Richard Guenther <rguenther@suse.de>
PR middle-end/25620
* builtins.c (expand_builtin_mathfn_2): Handle setting errno
if flag_errno_math is set apart from remainder and drem which
do not set errno at all.
(expand_builtin_pow): Optimize non integer valued constant
exponents using sqrt or cbrt if possible. Always fall back to
expanding via optabs.
* gcc.target/i386/pow-1.c: New testcase.
* gcc.dg/builtins-58.c: Likewise.
Index: builtins.c
===================================================================
*** builtins.c (revision 118752)
--- builtins.c (working copy)
*************** expand_builtin_mathfn_2 (tree exp, rtx t
*** 1960,1966 ****
tree arglist = TREE_OPERAND (exp, 1);
tree arg0, arg1, temp, narg;
enum machine_mode mode;
! bool errno_set = true;
bool stable = true;
if ((DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LDEXP)
--- 1960,1966 ----
tree arglist = TREE_OPERAND (exp, 1);
tree arg0, arg1, temp, narg;
enum machine_mode mode;
! bool errno_set = flag_errno_math;
bool stable = true;
if ((DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LDEXP)
*************** expand_builtin_mathfn_2 (tree exp, rtx t
*** 1986,1992 ****
builtin_optab = fmod_optab; break;
CASE_FLT_FN (BUILT_IN_REMAINDER):
CASE_FLT_FN (BUILT_IN_DREM):
! builtin_optab = remainder_optab; break;
default:
gcc_unreachable ();
}
--- 1986,1994 ----
builtin_optab = fmod_optab; break;
CASE_FLT_FN (BUILT_IN_REMAINDER):
CASE_FLT_FN (BUILT_IN_DREM):
! builtin_optab = remainder_optab;
! errno_set = false;
! break;
default:
gcc_unreachable ();
}
*************** expand_builtin_mathfn_2 (tree exp, rtx t
*** 2000,2008 ****
target = gen_reg_rtx (mode);
- if (! flag_errno_math || ! HONOR_NANS (mode))
- errno_set = false;
-
/* Always stabilize the argument list. */
narg = builtin_save_expr (arg1);
if (narg != arg1)
--- 2002,2007 ----
*************** expand_powi (rtx x, enum machine_mode mo
*** 2601,2608 ****
static rtx
expand_builtin_pow (tree exp, rtx target, rtx subtarget)
{
tree arglist = TREE_OPERAND (exp, 1);
! tree arg0, arg1;
if (! validate_arglist (arglist, REAL_TYPE, REAL_TYPE, VOID_TYPE))
return 0;
--- 2600,2612 ----
static rtx
expand_builtin_pow (tree exp, rtx target, rtx subtarget)
{
+ tree arg0, arg1, fn, narg0, narglist;
tree arglist = TREE_OPERAND (exp, 1);
! tree type = TREE_TYPE (exp);
! REAL_VALUE_TYPE cint, c, c2;
! HOST_WIDE_INT n;
! rtx op, op2;
! enum machine_mode mode = TYPE_MODE (type);
if (! validate_arglist (arglist, REAL_TYPE, REAL_TYPE, VOID_TYPE))
return 0;
*************** expand_builtin_pow (tree exp, rtx target
*** 2610,2645 ****
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
! if (TREE_CODE (arg1) == REAL_CST
! && ! TREE_CONSTANT_OVERFLOW (arg1))
{
! REAL_VALUE_TYPE cint;
! REAL_VALUE_TYPE c;
! HOST_WIDE_INT n;
! c = TREE_REAL_CST (arg1);
! n = real_to_integer (&c);
real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
! if (real_identical (&c, &cint))
! {
! /* If the exponent is -1, 0, 1 or 2, then expand_powi is exact.
! Otherwise, check the number of multiplications required.
! Note that pow never sets errno for an integer exponent. */
! if ((n >= -1 && n <= 2)
! || (flag_unsafe_math_optimizations
! && ! optimize_size
! && powi_cost (n) <= POWI_MAX_MULTS))
{
! enum machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
! rtx op = expand_expr (arg0, subtarget, VOIDmode, 0);
! op = force_reg (mode, op);
! return expand_powi (op, mode, n);
}
}
}
! if (! flag_unsafe_math_optimizations)
! return NULL_RTX;
return expand_builtin_mathfn_2 (exp, target, subtarget);
}
--- 2614,2721 ----
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
! if (TREE_CODE (arg1) != REAL_CST
! || TREE_CONSTANT_OVERFLOW (arg1))
! return expand_builtin_mathfn_2 (exp, target, subtarget);
!
! /* Handle constant exponents. */
!
! /* For integer valued exponents we can expand to an optimal multiplication
! sequence using expand_powi. */
! c = TREE_REAL_CST (arg1);
! n = real_to_integer (&c);
! real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
! if (real_identical (&c, &cint)
! && ((n >= -1 && n <= 2)
! || (flag_unsafe_math_optimizations
! && !optimize_size
! && powi_cost (n) <= POWI_MAX_MULTS)))
! {
! op = expand_expr (arg0, subtarget, VOIDmode, 0);
! if (n != 1)
! {
! op = force_reg (mode, op);
! op = expand_powi (op, mode, n);
! }
! return op;
! }
!
! narg0 = builtin_save_expr (arg0);
! narglist = build_tree_list (NULL_TREE, narg0);
!
! /* If the exponent is not integer valued, check if it is half of an integer.
! In this case we can expand to sqrt (x) * x**(n/2). */
! fn = mathfn_built_in (type, BUILT_IN_SQRT);
! if (fn != NULL_TREE)
{
! real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
! n = real_to_integer (&c2);
! real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
! if (real_identical (&c2, &cint)
! && ((flag_unsafe_math_optimizations
! && !optimize_size
! && powi_cost (n/2) <= POWI_MAX_MULTS)
! || n == 1))
! {
! tree call_expr = build_function_call_expr (fn, narglist);
! op = expand_builtin (call_expr, NULL_RTX, subtarget, mode, 0);
! if (n != 1)
! {
! op2 = expand_expr (narg0, subtarget, VOIDmode, 0);
! op2 = force_reg (mode, op2);
! op2 = expand_powi (op2, mode, abs (n / 2));
! op = expand_simple_binop (mode, MULT, op, op2, NULL_RTX,
! 0, OPTAB_LIB_WIDEN);
! /* If the original exponent was negative, reciprocate the
! result. */
! if (n < 0)
! op = expand_binop (mode, sdiv_optab, CONST1_RTX (mode),
! op, NULL_RTX, 0, OPTAB_LIB_WIDEN);
! }
! return op;
! }
! }
! /* Try if the exponent is a third of an integer. In this case
! we can expand to x**(n/3) * cbrt(x)**(n%3). */
! fn = mathfn_built_in (type, BUILT_IN_CBRT);
! if (fn != NULL_TREE)
! {
! real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
! real_round (&c2, mode, &c2);
! n = real_to_integer (&c2);
real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
! real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
! real_convert (&c2, mode, &c2);
! if (real_identical (&c2, &c)
! && ((!optimize_size
! && flag_unsafe_math_optimizations
! && powi_cost (n/3) <= POWI_MAX_MULTS)
! || n == 1))
! {
! tree call_expr = build_function_call_expr (fn, narglist);
! op = expand_builtin (call_expr, NULL_RTX, subtarget, mode, 0);
! if (abs (n) % 3 == 2)
! op = expand_simple_binop (mode, MULT, op, op, op,
! 0, OPTAB_LIB_WIDEN);
! if (n != 1)
{
! op2 = expand_expr (narg0, subtarget, VOIDmode, 0);
! op2 = force_reg (mode, op2);
! op2 = expand_powi (op2, mode, abs (n / 3));
! op = expand_simple_binop (mode, MULT, op, op2, NULL_RTX,
! 0, OPTAB_LIB_WIDEN);
! /* If the original exponent was negative, reciprocate the
! result. */
! if (n < 0)
! op = expand_binop (mode, sdiv_optab, CONST1_RTX (mode),
! op, NULL_RTX, 0, OPTAB_LIB_WIDEN);
}
+ return op;
}
}
! /* Fall back to optab expansion. */
return expand_builtin_mathfn_2 (exp, target, subtarget);
}
Index: testsuite/gcc.dg/builtins-58.c
===================================================================
*** testsuite/gcc.dg/builtins-58.c (revision 0)
--- testsuite/gcc.dg/builtins-58.c (revision 0)
***************
*** 0 ****
--- 1,33 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O -ffast-math -std=c99" } */
+
+ #include "builtins-config.h"
+
+ #ifdef HAVE_C99_RUNTIME
+ double test1 (double x)
+ {
+ return __builtin_pow (x, 1./3.);
+ }
+
+ double test2 (double x)
+ {
+ return __builtin_pow (x, 4./3.);
+ }
+
+ double test3a (double x)
+ {
+ return __builtin_pow (x, 5./3.);
+ }
+
+ double test3b (double x)
+ {
+ return __builtin_pow (x, -5./3.);
+ }
+
+ double test4 (double x)
+ {
+ return __builtin_pow (x, 7./3.);
+ }
+ #endif
+
+ /* { dg-final { scan-assembler-not "pow" } } */
Index: testsuite/gcc.target/i386/pow-1.c
===================================================================
*** testsuite/gcc.target/i386/pow-1.c (revision 0)
--- testsuite/gcc.target/i386/pow-1.c (revision 0)
***************
*** 0 ****
--- 1,24 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O -ffast-math" } */
+
+ double test1 (double x)
+ {
+ return __builtin_pow (x, 1./2.);
+ }
+
+ double test2 (double x)
+ {
+ return __builtin_pow (x, 3./2.);
+ }
+
+ double test3 (double x)
+ {
+ return __builtin_pow (x, 5./2.);
+ }
+
+ double test4 (double x)
+ {
+ return __builtin_pow (x, -5./2.);
+ }
+
+ /* { dg-final { scan-assembler-not "call" } } */
More information about the Gcc-patches
mailing list