This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix PR25620, pow() expansion missed-optimization


This implements expansion of pow (x, N/2) and pow (x, N/3) to sqrt/cbrt
and mult/add sequences.

Bootstrapped and fixed on x86_64-unknown-linux-gnu.

Ok for mainline?

(There is a related fortran change to make __builtin_cbrt available there,
but fortran bootstrap is currently broken for me due to the C99 inline
changes, so I left that part out)

Thanks,
Richard.

:ADDPATCH middle-end:

2006-11-04  Richard Guenther  <rguenther@suse.de>

	PR middle-end/25620
	* builtins.c (expand_pow, expand_powi): Split constant exponent
	expansion ...
	(expand_constant_power): ... here.  Use sqrt or cbrt if that
	makes the remaining exponent integer.

	* gcc.target/i386/pow-1.c: New testcase.
	* gcc.dg/builtins-58.c: Likewise.

Index: builtins.c
===================================================================
*** builtins.c	(revision 118450)
--- builtins.c	(working copy)
*************** expand_powi (rtx x, enum machine_mode mo
*** 2590,2595 ****
--- 2590,2683 ----
    return result;
  }
  
+ /* Expand exponentiation of ARG0 by ARG1 in mode MODE by sequences
+    of multiplication and addition if a constant ARG1 allows this.
+    SUBTARGET is used to expand arg0 if set.  If UNSAFE_MATH is true
+    there may be multiple rounding steps involved in the emitted
+    sequence.  */
+ 
+ static rtx
+ expand_constant_power (tree type, tree arg0, tree arg1, 
+ 		       rtx subtarget, bool unsafe_math)
+ {
+   enum machine_mode mode = TYPE_MODE (type);
+   HOST_WIDE_INT n;
+   tree fn = NULL_TREE;
+ 
+   /* Try to extract the constant integer valued exponent from either
+      a constant real exponent or a valid constant integer exponent.  */
+   if (TREE_CODE (arg1) == REAL_CST
+       && ! TREE_CONSTANT_OVERFLOW (arg1))
+     {
+       REAL_VALUE_TYPE cint;
+       REAL_VALUE_TYPE c, c2;
+ 
+       c = TREE_REAL_CST (arg1);
+       n = real_to_integer (&c);
+       real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
+       if (!real_identical (&c, &cint))
+         {
+ 	  /* If the exponent is not integer valued, check if it is
+ 	     half or a third of an integer.  */
+ 	  real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
+ 	  n = real_to_integer (&c2);
+ 	  real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
+ 	  if (!real_identical (&c2, &cint))
+ 	    {
+ 	      real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
+ 	      real_round (&c2, mode, &c2);
+ 	      n = real_to_integer (&c2);
+ 	      real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
+ 	      real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
+ 	      real_convert (&c2, mode, &c2);
+ 	      if (real_identical (&c2, &c))
+ 	        fn = mathfn_built_in (type, BUILT_IN_CBRT);
+ 	    }
+ 	  else
+ 	    fn = mathfn_built_in (type, BUILT_IN_SQRT);
+ 	  if (!fn)
+ 	    return NULL_RTX;
+ 	}
+     }
+   else if (host_integerp (arg1, 0)
+ 	   && ! TREE_CONSTANT_OVERFLOW (arg1))
+     n = TREE_INT_CST_LOW (arg1);
+   else
+     return NULL_RTX;
+ 
+   /* If the exponent is -1, 0, 1 or 2, then expand_powi is exact.
+      Otherwise, check the number of multiplications required.
+      Note that pow never sets errno for an integer exponent.  */
+   if ((n >= -1 && n <= 2 && !fn)
+       || (unsafe_math
+ 	  && ! optimize_size
+ 	  && powi_cost (n) <= POWI_MAX_MULTS))
+     {
+       rtx op;
+ 
+       /* See if we need to compensate for non-integer valued exponents.  */
+       if (fn)
+ 	{
+ 	  tree arg = build_tree_list (NULL_TREE, arg0);
+ 	  tree call_expr = build_function_call_expr (fn, arg);
+ 	  op = expand_builtin (call_expr, NULL_RTX, subtarget, mode, 0);
+ 	}
+       else
+ 	op = expand_expr (arg0, subtarget, VOIDmode, 0);
+ 
+       /* Expand the integer part.  */
+       if (n != 1)
+         {
+ 	  op = force_reg (mode, op);
+ 	  return expand_powi (op, mode, n);
+ 	}
+       else
+         return op;
+     }
+ 
+   return NULL_RTX;
+ }
+ 
  /* Expand a call to the pow built-in mathematical function.  Return 0 if
     a normal call should be emitted rather than expanding the function
     in-line.  EXP is the expression that is a call to the builtin
*************** expand_builtin_pow (tree exp, rtx target
*** 2607,2639 ****
    arg0 = TREE_VALUE (arglist);
    arg1 = TREE_VALUE (TREE_CHAIN (arglist));
  
!   if (TREE_CODE (arg1) == REAL_CST
!       && ! TREE_CONSTANT_OVERFLOW (arg1))
!     {
!       REAL_VALUE_TYPE cint;
!       REAL_VALUE_TYPE c;
!       HOST_WIDE_INT n;
! 
!       c = TREE_REAL_CST (arg1);
!       n = real_to_integer (&c);
!       real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
!       if (real_identical (&c, &cint))
! 	{
! 	  /* If the exponent is -1, 0, 1 or 2, then expand_powi is exact.
! 	     Otherwise, check the number of multiplications required.
! 	     Note that pow never sets errno for an integer exponent.  */
! 	  if ((n >= -1 && n <= 2)
! 	      || (flag_unsafe_math_optimizations
! 		  && ! optimize_size
! 		  && powi_cost (n) <= POWI_MAX_MULTS))
! 	    {
! 	      enum machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
! 	      rtx op = expand_expr (arg0, subtarget, VOIDmode, 0);
! 	      op = force_reg (mode, op);
! 	      return expand_powi (op, mode, n);
! 	    }
! 	}
!     }
  
    if (! flag_unsafe_math_optimizations)
      return NULL_RTX;
--- 2695,2705 ----
    arg0 = TREE_VALUE (arglist);
    arg1 = TREE_VALUE (TREE_CHAIN (arglist));
  
!   /* Handle constant exponents.  */
!   target = expand_constant_power (TREE_TYPE (exp), arg0, arg1,
! 				  subtarget, flag_unsafe_math_optimizations);
!   if (target)
!     return target;
  
    if (! flag_unsafe_math_optimizations)
      return NULL_RTX;
*************** expand_builtin_powi (tree exp, rtx targe
*** 2661,2686 ****
    arg1 = TREE_VALUE (TREE_CHAIN (arglist));
    mode = TYPE_MODE (TREE_TYPE (exp));
  
!   /* Handle constant power.  */
! 
!   if (TREE_CODE (arg1) == INTEGER_CST
!       && ! TREE_CONSTANT_OVERFLOW (arg1))
!     {
!       HOST_WIDE_INT n = TREE_INT_CST_LOW (arg1);
  
-       /* If the exponent is -1, 0, 1 or 2, then expand_powi is exact.
- 	 Otherwise, check the number of multiplications required.  */
-       if ((TREE_INT_CST_HIGH (arg1) == 0
- 	   || TREE_INT_CST_HIGH (arg1) == -1)
- 	  && ((n >= -1 && n <= 2)
- 	      || (! optimize_size
- 		  && powi_cost (n) <= POWI_MAX_MULTS)))
- 	{
- 	  op0 = expand_expr (arg0, subtarget, VOIDmode, 0);
- 	  op0 = force_reg (mode, op0);
- 	  return expand_powi (op0, mode, n);
- 	}
-     }
  
    /* Emit a libcall to libgcc.  */
  
--- 2727,2737 ----
    arg1 = TREE_VALUE (TREE_CHAIN (arglist));
    mode = TYPE_MODE (TREE_TYPE (exp));
  
!   /* Handle constant exponents.  */
!   target = expand_constant_power (TREE_TYPE (exp), arg0, arg1, subtarget, true);
!   if (target)
!     return target;
  
  
    /* Emit a libcall to libgcc.  */
  
Index: testsuite/gcc.target/i386/pow-1.c
===================================================================
*** testsuite/gcc.target/i386/pow-1.c	(revision 0)
--- testsuite/gcc.target/i386/pow-1.c	(revision 0)
***************
*** 0 ****
--- 1,19 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O -ffast-math" } */
+ 
+ double test1 (double x)
+ {
+   return __builtin_pow (x, 1./2.);
+ }
+ 
+ double test2 (double x)
+ {
+   return __builtin_pow (x, 3./2.);
+ }
+ 
+ double test3 (double x)
+ {
+   return __builtin_pow (x, 5./2.);
+ }
+ 
+ /* { dg-final { scan-assembler-not "call" } } */
Index: testsuite/gcc.dg/builtins-58.c
===================================================================
*** testsuite/gcc.dg/builtins-58.c	(revision 0)
--- testsuite/gcc.dg/builtins-58.c	(revision 0)
***************
*** 0 ****
--- 1,19 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O -ffast-math -std=c99" } */
+ 
+ double test1 (double x)
+ {
+   return __builtin_pow (x, 1./3.);
+ }
+ 
+ double test2 (double x)
+ {
+   return __builtin_pow (x, 4./3.);
+ }
+ 
+ double test3 (double x)
+ {
+   return __builtin_pow (x, 7./3.);
+ }
+ 
+ /* { dg-final { scan-assembler-times "cbrt" 3 } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]