[PATCH] Fix PR25620, pow() expansion missed-optimization, 2nd try

Richard Guenther rguenther@suse.de
Mon Nov 13 14:36:00 GMT 2006


On Sun, 12 Nov 2006, Roger Sayle wrote:

> 
> Hi Richard,
> 
> On Sun, 12 Nov 2006, Richard Guenther wrote:
> > Bootstrapped and regtested on x86_64-unknown-linux-gnu.  SPEC 2k
> > doesn't show a difference, Polyhedron has an improvement for air
> > (down from 21s to 16.5s runtime).
> 
> Cool!  I hadn't appreciated this had such a significant impact on
> polyhedron.

So, here's the 3rd version.  I fixed another issue (handling of negative
exponents was wrong) on the way and re-shuffled the code to early check
for the fallback function.  As I don't like deeply indented code too much
I have duplicated the fallback to expand_builtin_mathfn_2 ()
(I also made it correct for errno handling and removed the restriction on
unsafe_math for the fallback from expand_builtin_pow - you added that
back in 2004 but unfortunately the mailinglist archives for that year
are corrupt, so I cannot figure out why, but I cannot think of any 
reason ;)).  I chose to not do another indent level after the
real_to_integer calls where we could check for the
!         && ((!optimize_size
!              && flag_unsafe_math_optimizations
!              && powi_cost (n/3) <= POWI_MAX_MULTS)
!             || n == 1))
part.

I chose to use OPTAB_LIB_WIDEN and special cased expansion to pure
sqrt () and cbrt () that we can do even for !unsafe_math (I did
not update fold_builtin_pow (yet)).

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Ok for mainline?

Thanks,
Richard.

2006-11-13  Richard Guenther  <rguenther@suse.de>

	PR middle-end/25620
	* builtins.c (expand_builtin_mathfn_2): Handle setting errno
	if flag_errno_math is set apart from remainder and drem which
	do not set errno at all.
	(expand_builtin_pow): Optimize non integer valued constant
	exponents using sqrt or cbrt if possible.  Always fall back to
	expanding via optabs.

	* gcc.target/i386/pow-1.c: New testcase.
	* gcc.dg/builtins-58.c: Likewise.

Index: builtins.c
===================================================================
*** builtins.c	(revision 118752)
--- builtins.c	(working copy)
*************** expand_builtin_mathfn_2 (tree exp, rtx t
*** 1960,1966 ****
    tree arglist = TREE_OPERAND (exp, 1);
    tree arg0, arg1, temp, narg;
    enum machine_mode mode;
!   bool errno_set = true;
    bool stable = true;
  
    if ((DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LDEXP)
--- 1960,1966 ----
    tree arglist = TREE_OPERAND (exp, 1);
    tree arg0, arg1, temp, narg;
    enum machine_mode mode;
!   bool errno_set = flag_errno_math;
    bool stable = true;
  
    if ((DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LDEXP)
*************** expand_builtin_mathfn_2 (tree exp, rtx t
*** 1986,1992 ****
        builtin_optab = fmod_optab; break;
      CASE_FLT_FN (BUILT_IN_REMAINDER):
      CASE_FLT_FN (BUILT_IN_DREM):
!       builtin_optab = remainder_optab; break;
      default:
        gcc_unreachable ();
      }
--- 1986,1994 ----
        builtin_optab = fmod_optab; break;
      CASE_FLT_FN (BUILT_IN_REMAINDER):
      CASE_FLT_FN (BUILT_IN_DREM):
!       builtin_optab = remainder_optab;
!       errno_set = false;
!       break;
      default:
        gcc_unreachable ();
      }
*************** expand_builtin_mathfn_2 (tree exp, rtx t
*** 2000,2008 ****
  
    target = gen_reg_rtx (mode);
  
-   if (! flag_errno_math || ! HONOR_NANS (mode))
-     errno_set = false;
- 
    /* Always stabilize the argument list.  */
    narg = builtin_save_expr (arg1);
    if (narg != arg1)
--- 2002,2007 ----
*************** expand_powi (rtx x, enum machine_mode mo
*** 2601,2608 ****
  static rtx
  expand_builtin_pow (tree exp, rtx target, rtx subtarget)
  {
    tree arglist = TREE_OPERAND (exp, 1);
!   tree arg0, arg1;
  
    if (! validate_arglist (arglist, REAL_TYPE, REAL_TYPE, VOID_TYPE))
      return 0;
--- 2600,2612 ----
  static rtx
  expand_builtin_pow (tree exp, rtx target, rtx subtarget)
  {
+   tree arg0, arg1, fn, narg0, narglist;
    tree arglist = TREE_OPERAND (exp, 1);
!   tree type = TREE_TYPE (exp);
!   REAL_VALUE_TYPE cint, c, c2;
!   HOST_WIDE_INT n;
!   rtx op, op2;
!   enum machine_mode mode = TYPE_MODE (type);
  
    if (! validate_arglist (arglist, REAL_TYPE, REAL_TYPE, VOID_TYPE))
      return 0;
*************** expand_builtin_pow (tree exp, rtx target
*** 2610,2645 ****
    arg0 = TREE_VALUE (arglist);
    arg1 = TREE_VALUE (TREE_CHAIN (arglist));
  
!   if (TREE_CODE (arg1) == REAL_CST
!       && ! TREE_CONSTANT_OVERFLOW (arg1))
      {
!       REAL_VALUE_TYPE cint;
!       REAL_VALUE_TYPE c;
!       HOST_WIDE_INT n;
  
!       c = TREE_REAL_CST (arg1);
!       n = real_to_integer (&c);
        real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
!       if (real_identical (&c, &cint))
! 	{
! 	  /* If the exponent is -1, 0, 1 or 2, then expand_powi is exact.
! 	     Otherwise, check the number of multiplications required.
! 	     Note that pow never sets errno for an integer exponent.  */
! 	  if ((n >= -1 && n <= 2)
! 	      || (flag_unsafe_math_optimizations
! 		  && ! optimize_size
! 		  && powi_cost (n) <= POWI_MAX_MULTS))
  	    {
! 	      enum machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
! 	      rtx op = expand_expr (arg0, subtarget, VOIDmode, 0);
! 	      op = force_reg (mode, op);
! 	      return expand_powi (op, mode, n);
  	    }
  	}
      }
  
!   if (! flag_unsafe_math_optimizations)
!     return NULL_RTX;
    return expand_builtin_mathfn_2 (exp, target, subtarget);
  }
  
--- 2614,2721 ----
    arg0 = TREE_VALUE (arglist);
    arg1 = TREE_VALUE (TREE_CHAIN (arglist));
  
!   if (TREE_CODE (arg1) != REAL_CST
!       || TREE_CONSTANT_OVERFLOW (arg1))
!     return expand_builtin_mathfn_2 (exp, target, subtarget);
! 
!   /* Handle constant exponents.  */
! 
!   /* For integer valued exponents we can expand to an optimal multiplication
!      sequence using expand_powi.  */
!   c = TREE_REAL_CST (arg1);
!   n = real_to_integer (&c);
!   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
!   if (real_identical (&c, &cint)
!       && ((n >= -1 && n <= 2)
! 	  || (flag_unsafe_math_optimizations
! 	      && !optimize_size
! 	      && powi_cost (n) <= POWI_MAX_MULTS)))
!     {
!       op = expand_expr (arg0, subtarget, VOIDmode, 0);
!       if (n != 1)
! 	{
! 	  op = force_reg (mode, op);
! 	  op = expand_powi (op, mode, n);
! 	}
!       return op;
!     }
! 
!   narg0 = builtin_save_expr (arg0);
!   narglist = build_tree_list (NULL_TREE, narg0);
! 
!   /* If the exponent is not integer valued, check if it is half of an integer.
!      In this case we can expand to sqrt (x) * x**(n/2).  */
!   fn = mathfn_built_in (type, BUILT_IN_SQRT);
!   if (fn != NULL_TREE)
      {
!       real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
!       n = real_to_integer (&c2);
!       real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
!       if (real_identical (&c2, &cint)
! 	  && ((flag_unsafe_math_optimizations
! 	       && !optimize_size
! 	       && powi_cost (n/2) <= POWI_MAX_MULTS)
! 	      || n == 1))
! 	{
! 	  tree call_expr = build_function_call_expr (fn, narglist);
! 	  op = expand_builtin (call_expr, NULL_RTX, subtarget, mode, 0);
! 	  if (n != 1)
! 	    {
! 	      op2 = expand_expr (narg0, subtarget, VOIDmode, 0);
! 	      op2 = force_reg (mode, op2);
! 	      op2 = expand_powi (op2, mode, abs (n / 2));
! 	      op = expand_simple_binop (mode, MULT, op, op2, NULL_RTX,
! 					0, OPTAB_LIB_WIDEN);
! 	      /* If the original exponent was negative, reciprocate the
! 		 result.  */
! 	      if (n < 0)
! 		op = expand_binop (mode, sdiv_optab, CONST1_RTX (mode),
! 				   op, NULL_RTX, 0, OPTAB_LIB_WIDEN);
! 	    }
! 	  return op;
! 	}
!     }
  
!   /* Try if the exponent is a third of an integer.  In this case
!      we can expand to x**(n/3) * cbrt(x)**(n%3).  */
!   fn = mathfn_built_in (type, BUILT_IN_CBRT);
!   if (fn != NULL_TREE)
!     {
!       real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
!       real_round (&c2, mode, &c2);
!       n = real_to_integer (&c2);
        real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
!       real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
!       real_convert (&c2, mode, &c2);
!       if (real_identical (&c2, &c)
! 	  && ((!optimize_size
! 	       && flag_unsafe_math_optimizations
! 	       && powi_cost (n/3) <= POWI_MAX_MULTS)
! 	      || n == 1))
! 	{
! 	  tree call_expr = build_function_call_expr (fn, narglist);
! 	  op = expand_builtin (call_expr, NULL_RTX, subtarget, mode, 0);
! 	  if (abs (n) % 3 == 2)
! 	    op = expand_simple_binop (mode, MULT, op, op, op,
! 				      0, OPTAB_LIB_WIDEN);
! 	  if (n != 1)
  	    {
! 	      op2 = expand_expr (narg0, subtarget, VOIDmode, 0);
! 	      op2 = force_reg (mode, op2);
! 	      op2 = expand_powi (op2, mode, abs (n / 3));
! 	      op = expand_simple_binop (mode, MULT, op, op2, NULL_RTX,
! 					0, OPTAB_LIB_WIDEN);
! 	      /* If the original exponent was negative, reciprocate the
! 		 result.  */
! 	      if (n < 0)
! 		op = expand_binop (mode, sdiv_optab, CONST1_RTX (mode),
! 				   op, NULL_RTX, 0, OPTAB_LIB_WIDEN);
  	    }
+ 	  return op;
  	}
      }
  
!   /* Fall back to optab expansion.  */
    return expand_builtin_mathfn_2 (exp, target, subtarget);
  }
  
Index: testsuite/gcc.dg/builtins-58.c
===================================================================
*** testsuite/gcc.dg/builtins-58.c	(revision 0)
--- testsuite/gcc.dg/builtins-58.c	(revision 0)
***************
*** 0 ****
--- 1,33 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O -ffast-math -std=c99" } */
+ 
+ #include "builtins-config.h"
+ 
+ #ifdef HAVE_C99_RUNTIME
+ double test1 (double x)
+ {
+   return __builtin_pow (x, 1./3.);
+ }
+ 
+ double test2 (double x)
+ {
+   return __builtin_pow (x, 4./3.);
+ }
+ 
+ double test3a (double x)
+ {
+   return __builtin_pow (x, 5./3.);
+ }
+ 
+ double test3b (double x)
+ {
+   return __builtin_pow (x, -5./3.);
+ }
+ 
+ double test4 (double x)
+ {
+   return __builtin_pow (x, 7./3.);
+ }
+ #endif
+ 
+ /* { dg-final { scan-assembler-not "pow" } } */
Index: testsuite/gcc.target/i386/pow-1.c
===================================================================
*** testsuite/gcc.target/i386/pow-1.c	(revision 0)
--- testsuite/gcc.target/i386/pow-1.c	(revision 0)
***************
*** 0 ****
--- 1,24 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O -ffast-math" } */
+ 
+ double test1 (double x)
+ {
+   return __builtin_pow (x, 1./2.);
+ }
+ 
+ double test2 (double x)
+ {
+   return __builtin_pow (x, 3./2.);
+ }
+ 
+ double test3 (double x)
+ {
+   return __builtin_pow (x, 5./2.);
+ }
+ 
+ double test4 (double x)
+ {
+   return __builtin_pow (x, -5./2.);
+ }
+ 
+ /* { dg-final { scan-assembler-not "call" } } */



More information about the Gcc-patches mailing list