This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Handle fractional exponents in gimple_expand_builtin_pow (PR46728 patch 5)
- From: "William J. Schmidt" <wschmidt at linux dot vnet dot ibm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Sun, 29 May 2011 12:06:47 -0500
- Subject: [PATCH] Handle fractional exponents in gimple_expand_builtin_pow (PR46728 patch 5)
This patch adds support for fractional exponents C where 2C or 3C is
equivalent to an integer. There is one FIXME in place for the issue
previously noted with respect to tree_expr_nonnegative_p, which I plan
to look at later this week.
Regtested on powerpc64-linux and examined code generation for
correctness and performance. OK for trunk?
Thanks,
Bill
2011-05-29 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* tree-ssa-math-opts.c (build_and_insert_binop): New.
(gimple_expand_pow_frac_exp): New.
(gimple_expand_builtin_pow): Use build_and_insert_binop and
gimple_expand_pow_frac_exp.
Index: gcc/tree-ssa-math-opts.c
===================================================================
--- gcc/tree-ssa-math-opts.c (revision 174395)
+++ gcc/tree-ssa-math-opts.c (working copy)
@@ -1054,6 +1054,161 @@ build_and_insert_call (gimple_stmt_iterator *gsi,
return ssa_target;
}
+/* Build a gimple binary operation with the given CODE and arguments
+ ARG0, ARG1, assigning the result to a new SSA name for variable
+ TARGET. Insert the statement prior to GSI's current position, and
+ return the fresh SSA name.*/
+
+static tree
+build_and_insert_binop (gimple_stmt_iterator *gsi, enum tree_code code,
+ tree arg0, tree arg1, tree target, location_t loc)
+{
+ tree result = make_ssa_name (target, NULL);
+ gimple stmt = gimple_build_assign_with_ops (code, result, arg0, arg1);
+ gimple_set_location (stmt, loc);
+ gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+ return result;
+}
+
+/* Attempt to optimize pow(ARG0,C), where C is a real constant not
+ equal to any integer. When 2C or 3C is an integer, we can sometimes
+ improve the code using sqrt and/or cbrt. */
+
+static tree
+gimple_expand_pow_frac_exp (gimple_stmt_iterator *gsi, location_t loc,
+ tree arg0, REAL_VALUE_TYPE c)
+{
+ REAL_VALUE_TYPE c2, cint, dconst3;
+ HOST_WIDE_INT n;
+ tree type = TREE_TYPE (arg0);
+ enum machine_mode mode = TYPE_MODE (type);
+ tree sqrtfn = mathfn_built_in (TREE_TYPE (arg0), BUILT_IN_SQRT);
+ tree cbrtfn;
+
+ /* Optimize pow(x,c), where n = 2c for some nonzero integer n, into
+
+ sqrt(x) * powi(x, n/2), n > 0;
+ 1.0 / (sqrt(x) * powi(x, abs(n/2))), n < 0.
+
+ Do not calculate the powi factor when n/2 = 0. */
+ real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
+ n = real_to_integer (&c2);
+ real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
+
+ if (flag_unsafe_math_optimizations
+ && sqrtfn
+ && real_identical (&c2, &cint))
+ {
+ tree powi_x_ndiv2 = NULL_TREE;
+ tree sqrt_arg0, result;
+ tree target = NULL_TREE;
+
+ /* Attempt to fold powi(arg0, abs(n/2)) into multiplies. If not
+ possible or profitable, give up. Skip the degenerate case when
+ n is 1 or -1, where the result is always 1. */
+ if (abs (n) != 1)
+ {
+ powi_x_ndiv2 = gimple_expand_builtin_powi (gsi, loc, arg0, abs(n/2));
+ if (!powi_x_ndiv2)
+ return NULL_TREE;
+ }
+
+ /* Calculate sqrt(x). When n is not 1 or -1, multiply it by the
+ result of the optimal multiply sequence just calculated. */
+ sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+ if (abs (n) == 1)
+ result = sqrt_arg0;
+ else
+ result = build_and_insert_binop (gsi, MULT_EXPR, sqrt_arg0,
+ powi_x_ndiv2, target, loc);
+
+ /* If n is negative, reciprocate the result. */
+ if (n < 0)
+ result = build_and_insert_binop (gsi, RDIV_EXPR,
+ build_real (type, dconst1),
+ result, target, loc);
+ return result;
+ }
+
+ /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
+
+ powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
+ 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
+
+ Do not calculate the first factor when n/3 = 0. As cbrt(x) is
+ different from pow(x, 1./3.) due to rounding and behavior with
+ negative x, we need to constrain this transformation to unsafe
+ math and positive x or finite math. */
+ cbrtfn = mathfn_built_in (TREE_TYPE (arg0), BUILT_IN_CBRT);
+
+ real_from_integer (&dconst3, VOIDmode, 3, 0, 0);
+ real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
+ real_round (&c2, mode, &c2);
+ n = real_to_integer (&c2);
+ real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
+ real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
+ real_convert (&c2, mode, &c2);
+
+ if (flag_unsafe_math_optimizations
+ && cbrtfn
+ /* FIXME: The following line was originally
+ && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode)),
+ but since arg0 is a gimple value, the first predicate
+ will always return false. It needs to be replaced with a
+ call to a similar gimple_val_nonnegative_p function to be
+ added in gimple-fold.c. */
+ && !HONOR_NANS (mode)
+ && real_identical (&c2, &c)
+ && optimize_function_for_speed_p (cfun)
+ && powi_cost (n / 3) <= POWI_MAX_MULTS)
+ {
+ tree powi_x_ndiv3 = NULL_TREE;
+ tree cbrt_x, powi_cbrt_x, result;
+ tree target = NULL_TREE;
+
+ /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
+ possible or profitable, give up. Skip the degenerate case when
+ abs(n) < 3, where the result is always 1. */
+ if (abs (n) >= 3)
+ {
+ powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
+ abs (n / 3));
+ if (!powi_x_ndiv3)
+ return NULL_TREE;
+ }
+
+ /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
+ as that creates an unnecessary variable. Instead, just produce
+ either cbrt(x) or cbrt(x) * cbrt(x). */
+ cbrt_x = build_and_insert_call (gsi, cbrtfn, arg0, &target, loc);
+
+ if (abs (n) % 3 == 1)
+ powi_cbrt_x = cbrt_x;
+ else
+ powi_cbrt_x = build_and_insert_binop (gsi, MULT_EXPR, cbrt_x,
+ cbrt_x, target, loc);
+
+ /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
+ if (abs (n) < 3)
+ result = powi_cbrt_x;
+ else
+ result = build_and_insert_binop (gsi, MULT_EXPR, powi_x_ndiv3,
+ powi_cbrt_x, target, loc);
+
+ /* If n is negative, reciprocate the result. */
+ if (n < 0)
+ result = build_and_insert_binop (gsi, RDIV_EXPR,
+ build_real (type, dconst1),
+ result, target, loc);
+
+ return result;
+ }
+
+ /* No optimizations succeeded. */
+ return NULL_TREE;
+}
+
/* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
with location info LOC. If possible, create an equivalent and
less expensive sequence of statements prior to GSI, and return an
@@ -1065,11 +1220,10 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
{
REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6;
HOST_WIDE_INT n;
- tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, ssa_target;
+ tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt;
tree target = NULL_TREE;
enum machine_mode mode;
bool hw_sqrt_exists;
- gimple mult_stmt;
/* If the exponent isn't a constant, there's nothing of interest
to be done. */
@@ -1141,13 +1295,8 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
sqrt_sqrt = build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, loc);
/* sqrt(x) * sqrt(sqrt(x)) */
- ssa_target = make_ssa_name (target, NULL);
- mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target,
- sqrt_arg0, sqrt_sqrt);
- gimple_set_location (mult_stmt, loc);
- gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
-
- return ssa_target;
+ return build_and_insert_binop (gsi, MULT_EXPR, sqrt_arg0, sqrt_sqrt,
+ target, loc);
}
/* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
@@ -1197,7 +1346,8 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
return build_and_insert_call (gsi, cbrtfn, sqrt_arg0, &target, loc);
}
- return NULL_TREE;
+ /* Attempt to optimize various other fractional exponents. */
+ return gimple_expand_pow_frac_exp (gsi, loc, arg0, c);
}
/* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1