This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] More pow(x,c) expansions in cse_sincos pass (PR46728, patch 3)


On Thu, May 26, 2011 at 7:50 PM, William J. Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
>
> On Thu, 2011-05-26 at 11:22 +0200, Richard Guenther wrote:
>
> <snip>
>
>
>> There are several extra pre-conditions in the original code in
>> builtins.c as well as commens for reasonings (yes, there seem
>> to be duplicates of several of the transforms there ...). ?Please
>> try to preserve them. ?I noticed especially
>>
>> ? ? ? ? ? ? ? /* Optimize pow (x, 0.25) into sqrt (sqrt (x)). ?Assume on most
>> ? ? ? ? ? ? ? ? ?machines that a builtin sqrt instruction is smaller than a
>> ? ? ? ? ? ? ? ? ?call to pow with 0.25, so do this optimization even if
>> ? ? ? ? ? ? ? ? ?-Os. ?*/
>>
>> and
>>
>> ? ? ? /* Check whether we can do cbrt insstead of pow (x, 1./3.) and
>> ? ? ? ? ?cbrt/sqrts instead of pow (x, 1./6.). ?*/
>> ? ? ? if (cbrtfn && ! op
>> ? ? ? ? ? && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode)))
>>
>> where you omit the non-negative check and the check for NANs.
>> Note that -funsafe-math-optimizations does not mean you can
>> ignore nans or signed zero issues. ?pow(x,1/3) -> cbrt(x) does not
>> need a -funsafe-math-optimization check either.
>
> OK. ?Good catch on the non-negative/NaN check -- that just slipped
> through the cracks. ?I've fixed this and beefed up the comments as you
> suggest.
>
>>
>> It would be nice if you could re-arrange this function so that before
>> each individual replacement all preconditions are checked (even if
>> that means duplicating some checks and code) - that way
>> reviewing and later maintainance should be much easier.
>>
>
> Done -- you're right, the result is much cleaner. ?Hopefully this
> version is easier to follow.
>
> Thanks,
> Bill
>
>
> 2011-05-25 ?Bill Schmidt ?<wschmidt@linux.vnet.ibm.com>
>
> ? ? ? ?PR tree-optimization/46728
> ? ? ? ?* tree-ssa-math-opts.c (powi_as_mults_1): Add gimple_set_location.
> ? ? ? ?(powi_as_mults): Add gimple_set_location.
> ? ? ? ?(build_and_insert_call): New.
> ? ? ? ?(gimple_expand_builtin_pow): Add handling for pow(x,y) when y is
> ? ? ? ?0.5, 0.25, 0.75, 1./3., or 1./6.
>
>
> Index: gcc/tree-ssa-math-opts.c
> ===================================================================
> --- gcc/tree-ssa-math-opts.c ? ?(revision 174199)
> +++ gcc/tree-ssa-math-opts.c ? ?(working copy)
> @@ -965,6 +965,7 @@ powi_as_mults_1 (gimple_stmt_iterator *gsi, locati
> ? ? }
>
> ? mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
> + ?gimple_set_location (mult_stmt, loc);
> ? gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
>
> ? return ssa_target;
> @@ -999,6 +1000,7 @@ powi_as_mults (gimple_stmt_iterator *gsi, location
> ? div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? build_real (type, dconst1),
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? result);
> + ?gimple_set_location (div_stmt, loc);
> ? gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
>
> ? return target;
> @@ -1024,6 +1026,34 @@ gimple_expand_builtin_powi (gimple_stmt_iterator *
> ? return NULL_TREE;
> ?}
>
> +/* Build a gimple call statement that calls FN with argument ARG.
> + ? Set the lhs of the call statement to a fresh SSA name for
> + ? variable VAR. ?If VAR is NULL, first allocate it. ?Insert the
> + ? statement prior to GSI's current position, and return the fresh
> + ? SSA name. ?*/
> +
> +static tree
> +build_and_insert_call (gimple_stmt_iterator *gsi, tree fn, tree arg,
> + ? ? ? ? ? ? ? ? ? ? ?tree *var, location_t loc)
> +{
> + ?gimple call_stmt;
> + ?tree ssa_target;
> +
> + ?if (!*var)
> + ? ?{
> + ? ? ?*var = create_tmp_var (TREE_TYPE (arg), "powroot");
> + ? ? ?add_referenced_var (*var);
> + ? ?}
> +
> + ?call_stmt = gimple_build_call (fn, 1, arg);
> + ?ssa_target = make_ssa_name (*var, NULL);
> + ?gimple_set_lhs (call_stmt, ssa_target);
> + ?gimple_set_location (call_stmt, loc);
> + ?gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
> +
> + ?return ssa_target;
> +}
> +
> ?/* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
> ? ?with location info LOC. ?If possible, create an equivalent and
> ? ?less expensive sequence of statements prior to GSI, and return an
> @@ -1033,16 +1063,21 @@ static tree
> ?gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
> ? ? ? ? ? ? ? ? ? ? ? ? ? tree arg0, tree arg1)
> ?{
> - ?REAL_VALUE_TYPE c, cint;
> + ?REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6;
> ? HOST_WIDE_INT n;
> + ?tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, ssa_target;
> + ?tree target = NULL_TREE;
> + ?enum machine_mode mode;
> + ?bool hw_sqrt_exists;
> + ?gimple mult_stmt;
>
> ? /* If the exponent isn't a constant, there's nothing of interest
> ? ? ?to be done. ?*/
> ? if (TREE_CODE (arg1) != REAL_CST)
> ? ? return NULL_TREE;
>
> - ?/* If the exponent is equivalent to an integer, expand it into
> - ? ? multiplies when profitable. ?*/
> + ?/* If the exponent is equivalent to an integer, expand to an optimal
> + ? ? multiplication sequence when profitable. ?*/
> ? c = TREE_REAL_CST (arg1);
> ? n = real_to_integer (&c);
> ? real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
> @@ -1054,6 +1089,97 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
> ? ? ? ? ? ? ?&& powi_cost (n) <= POWI_MAX_MULTS)))
> ? ? return gimple_expand_builtin_powi (gsi, loc, arg0, n);
>
> + ?/* Attempt various optimizations using sqrt and cbrt. ?*/
> + ?type = TREE_TYPE (arg0);
> + ?mode = TYPE_MODE (type);
> + ?sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
> +
> + ?/* Optimize pow(x,0.5) = sqrt(x). ?This replacement is always safe
> + ? ? unless signed zeros must be maintained. ?pow(-0,0.5) = +0, while
> + ? ? sqrt(-0) = -0. ?*/
> + ?if (sqrtfn
> + ? ? ?&& REAL_VALUES_EQUAL (c, dconsthalf)
> + ? ? ?&& (flag_unsafe_math_optimizations
> + ? ? ? ? || !HONOR_SIGNED_ZEROS (mode)))

Drop flag_unsafe_math_optimizations - the replacement isn't safe
for -funsafe-math-optimizations -fsigned-zeros.

> + ? ?return build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> + ?/* Optimize pow(x,0.25) = sqrt(sqrt(x)). ?Assume on most machines that
> + ? ? a builtin sqrt instruction is smaller than a call to pow with 0.25,
> + ? ? so do this optimization even if -Os. ?Don't do this optimization
> + ? ? if we don't have a hardware sqrt insn. ?*/
> + ?dconst1_4 = dconst1;
> + ?SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
> + ?hw_sqrt_exists = optab_handler(sqrt_optab, mode) != CODE_FOR_nothing;
> +
> + ?if (flag_unsafe_math_optimizations
> + ? ? ?&& sqrtfn
> + ? ? ?&& REAL_VALUES_EQUAL (c, dconst1_4)
> + ? ? ?&& hw_sqrt_exists)
> + ? ?{
> + ? ? ?/* sqrt(x) ?*/
> + ? ? ?sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> + ? ? ?/* sqrt(sqrt(x)) ?*/
> + ? ? ?return build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, loc);
> + ? ?}
> +
> + ?/* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are
> + ? ? optimizing for space. ?Don't do this optimization if we don't have
> + ? ? a hardware sqrt insn. ?*/
> + ?real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0);
> + ?SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
> +
> + ?if (flag_unsafe_math_optimizations
> + ? ? ?&& sqrtfn
> + ? ? ?&& optimize_function_for_speed_p (cfun)
> + ? ? ?&& REAL_VALUES_EQUAL (c, dconst3_4)
> + ? ? ?&& hw_sqrt_exists)
> + ? ?{
> + ? ? ?/* sqrt(x) ?*/
> + ? ? ?sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> + ? ? ?/* sqrt(sqrt(x)) ?*/
> + ? ? ?sqrt_sqrt = build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, loc);
> +
> + ? ? ?/* sqrt(x) * sqrt(sqrt(x)) ?*/
> + ? ? ?ssa_target = make_ssa_name (target, NULL);
> + ? ? ?mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? sqrt_arg0, sqrt_sqrt);
> + ? ? ?gimple_set_location (mult_stmt, loc);
> + ? ? ?gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
> +
> + ? ? ?return ssa_target;
> + ? ?}
> +
> + ?/* Optimize pow(x,1./3.) = cbrt(x). ?This is always safe. ?*/
> + ?cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
> + ?dconst1_3 = real_value_truncate (mode, dconst_third ());
> +
> + ?if (cbrtfn
> + ? ? ?&& (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode))
> + ? ? ?&& REAL_VALUES_EQUAL (c, dconst1_3))
> + ? ?return build_and_insert_call (gsi, cbrtfn, arg0, &target, loc);

As Joseph said this needs to be conditional on flag_unsafe_math_optimization
because of the inexactness of 1./3..
Please add rationale as explained by Joseph for the HONOR_NANS
check.

Now, the tree_expr_nonnegative_p check will always return false
because arg0 is a gimple value.  We would need to implement
either some propagation engine to derive properties for floating
point entities or mimic what tree_expr_nonnegative_p does by
looking at defining statements.  A very simple variant could be
like

bool
gimple_val_nonnegative_p (tree val)
{
  if (tree_expr_nonnegative_p (val))
    return true;

  if (TREE_CODE (val) == SSA_NAME)
   {
     gimple def_stmt = SSA_NAME_DEF_STMT (val);
     if (is_gimple_assign (def_stmt))
       return gimple_assign_rhs_code (def_stmt) == ABS_EXPR;
     else if (is_gimple_call (def_stmt))
       {
          tree fndecl = gimple_call_fndecl (def_stmt);
          if (fndecl
              && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
            {
               switch (DECL_FUNCTION_CODE (fndecl))
                 {
                 CASE_FLT_FN (BUILT_IN_FABS):
                     return true;
                 default:;
                 }
            }
       }
   }

  return false;
}

suitable for gimple-fold.c.

Alternatively you can drop the call and add a FIXME comment.

> +
> + ?/* Optimize pow(x,1./6.) = cbrt(sqrt(x)). ?Don't do this optimization
> + ? ? if we don't have a hardware sqrt insn. ?*/
> + ?dconst1_6 = dconst1_3;
> + ?SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
> +
> + ?if (flag_unsafe_math_optimizations
> + ? ? ?&& sqrtfn
> + ? ? ?&& cbrtfn
> + ? ? ?&& (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode))

Likewise.

Ok with these changes.

Thanks,
Richard.

> + ? ? ?&& optimize_function_for_speed_p (cfun)
> + ? ? ?&& hw_sqrt_exists
> + ? ? ?&& REAL_VALUES_EQUAL (c, dconst1_6))
> + ? ?{
> + ? ? ?/* sqrt(x) ?*/
> + ? ? ?sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> + ? ? ?/* cbrt(sqrt(x)) ?*/
> + ? ? ?return build_and_insert_call (gsi, cbrtfn, sqrt_arg0, &target, loc);
> + ? ?}
> +
> ? return NULL_TREE;
> ?}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]