This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFA][PATCH][PR tree-optimization/45685]


On Wed, Dec 11, 2013 at 9:25 PM, Jeff Law <law@redhat.com> wrote:
> On 12/11/13 02:51, Richard Biener wrote:
>>
>>
>> That looks wrong - you want to look at HONOR_NANS on the mode
>> of one of the comparison operands, not of the actual value you want
>> to negate (it's integer and thus never has NaNs).
>>
>> The rest of the patch looks ok to me.
>
> Here's the updated version.  It addresses the two issues you raised.
> Specifically it adds the hairy condition to avoid this code in cases where
> it's not likely to be useful and it fixes the call to
> invert_tree_comparison.
>
>
> Bootstrapped and regression tested on x86_64-unknown-linux-gnu
> OK for the trunk?

Ok.

Thanks,
Richard.

> jeff
>
>
>         PR tree-optimization/45685
>         * tree-ssa-phiopt.c (neg_replacement): New function.
>         (tree_ssa_phiopt_worker): Call it.
>
>         PR tree-optimization/45685
>         * gcc.dg/tree-ssa/pr45685.c: New test.
>
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
> b/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
> new file mode 100644
> index 0000000..0628943
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-tree-phiopt1-details" } */
> +
> +typedef unsigned long int uint64_t;
> +typedef long int int64_t;
> +int summation_helper_1(int64_t* products, uint64_t count)
> +{
> +       int s = 0;
> +       uint64_t i;
> +       for(i=0; i<count; i++)
> +       {
> +               int64_t val = (products[i]>0) ? 1 : -1;
> +               products[i] *= val;
> +               if(products[i] != i)
> +                       val = -val;
> +               products[i] = val;
> +               s += val;
> +       }
> +       return s;
> +}
> +
> +
> +int summation_helper_2(int64_t* products, uint64_t count)
> +{
> +       int s = 0;
> +       uint64_t i;
> +       for(i=0; i<count; i++)
> +       {
> +               int val = (products[i]>0) ? 1 : -1;
> +               products[i] *= val;
> +               if(products[i] != i)
> +                       val = -val;
> +               products[i] = val;
> +               s += val;
> +       }
> +       return s;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "converted to straightline code" 2
> "phiopt1" } } */
> +/* { dg-final { cleanup-tree-dump "phiopt1" } } */
> +
> diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
> index 11e565f..96154fb 100644
> --- a/gcc/tree-ssa-phiopt.c
> +++ b/gcc/tree-ssa-phiopt.c
> @@ -69,6 +69,8 @@ static bool minmax_replacement (basic_block, basic_block,
>                                 edge, edge, gimple, tree, tree);
>  static bool abs_replacement (basic_block, basic_block,
>                              edge, edge, gimple, tree, tree);
> +static bool neg_replacement (basic_block, basic_block,
> +                            edge, edge, gimple, tree, tree);
>  static bool cond_store_replacement (basic_block, basic_block, edge, edge,
>                                     struct pointer_set_t *);
>  static bool cond_if_else_store_replacement (basic_block, basic_block,
> basic_block);
> @@ -336,6 +338,23 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool
> do_hoist_loads)
>      /* Calculate the set of non-trapping memory accesses.  */
>      nontrap = get_non_trapping ();
>
> +  /* The replacement of conditional negation with a non-branching
> +     sequence is really only a win when optimizing for speed and we
> +     can avoid transformations by gimple if-conversion that result
> +     in poor RTL generation.
> +
> +     Ideally either gimple if-conversion or the RTL expanders will
> +     be improved and the code to emit branchless conditional negation
> +     can be removed.  */
> +  bool replace_conditional_negation = false;
> +  if (!do_store_elim)
> +    replace_conditional_negation
> +      = ((!optimize_size && optimize >= 2)
> +        || (((flag_tree_loop_vectorize || cfun->has_force_vect_loops)
> +             && flag_tree_loop_if_convert != 0)
> +            || flag_tree_loop_if_convert == 1
> +            || flag_tree_loop_if_convert_stores == 1));
> +
>    /* Search every basic block for COND_EXPR we may be able to optimize.
>
>       We walk the blocks in order that guarantees that a block with
> @@ -489,6 +508,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool
> do_hoist_loads)
>             cfgchanged = true;
>           else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
>             cfgchanged = true;
> +         else if (replace_conditional_negation
> +                  && neg_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
> +           cfgchanged = true;
>           else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
>             cfgchanged = true;
>         }
> @@ -1285,6 +1307,143 @@ abs_replacement (basic_block cond_bb, basic_block
> middle_bb,
>    return true;
>  }
>
> +/*  The function neg_replacement replaces conditional negation with
> +    equivalent straight line code.  Returns TRUE if replacement is done,
> +    otherwise returns FALSE.
> +
> +    COND_BB branches around negation occuring in MIDDLE_BB.
> +
> +    E0 and E1 are edges out of COND_BB.  E0 reaches MIDDLE_BB and
> +    E1 reaches the other successor which should contain PHI with
> +    arguments ARG0 and ARG1.
> +
> +    Assuming negation is to occur when the condition is true,
> +    then the non-branching sequence is:
> +
> +       result = (rhs ^ -cond) + cond
> +
> +    Inverting the condition or its result gives us negation
> +    when the original condition is false.  */
> +
> +static bool
> +neg_replacement (basic_block cond_bb, basic_block middle_bb,
> +                edge e0 ATTRIBUTE_UNUSED, edge e1,
> +                gimple phi, tree arg0, tree arg1)
> +{
> +  gimple new_stmt, cond;
> +  gimple_stmt_iterator gsi;
> +  gimple assign;
> +  edge true_edge, false_edge;
> +  tree rhs, lhs;
> +  enum tree_code cond_code;
> +  bool invert = false;
> +
> +  /* This transformation performs logical operations on the
> +     incoming arguments.  So force them to be integral types.   */
> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)))
> +    return false;
> +
> +  /* OTHER_BLOCK must have only one executable statement which must have
> the
> +     form arg0 = -arg1 or arg1 = -arg0.  */
> +
> +  assign = last_and_only_stmt (middle_bb);
> +  /* If we did not find the proper negation assignment, then we can not
> +     optimize.  */
> +  if (assign == NULL)
> +    return false;
> +
> +  /* If we got here, then we have found the only executable statement
> +     in OTHER_BLOCK.  If it is anything other than arg0 = -arg1 or
> +     arg1 = -arg0, then we can not optimize.  */
> +  if (gimple_code (assign) != GIMPLE_ASSIGN)
> +    return false;
> +
> +  lhs = gimple_assign_lhs (assign);
> +
> +  if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
> +    return false;
> +
> +  rhs = gimple_assign_rhs1 (assign);
> +
> +  /* The assignment has to be arg0 = -arg1 or arg1 = -arg0.  */
> +  if (!(lhs == arg0 && rhs == arg1)
> +      && !(lhs == arg1 && rhs == arg0))
> +    return false;
> +
> +  /* The basic sequence assumes we negate when the condition is true.
> +     If we need the opposite, then we will either need to invert the
> +     condition or its result.  */
> +  extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
> +  invert = false_edge->dest == middle_bb;
> +
> +  /* Unlike abs_replacement, we can handle arbitrary conditionals here.  */
> +  cond = last_stmt (cond_bb);
> +  cond_code = gimple_cond_code (cond);
> +
> +  /* If inversion is needed, first try to invert the test since
> +     that's cheapest.  */
> +  if (invert)
> +    {
> +      bool honor_nans
> +       = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (cond))));
> +      enum tree_code new_code = invert_tree_comparison (cond_code,
> honor_nans);
> +
> +      /* If invert_tree_comparison was successful, then use its return
> +        value as the new code and note that inversion is no longer
> +        needed.  */
> +      if (new_code != ERROR_MARK)
> +       {
> +         cond_code = new_code;
> +         invert = false;
> +       }
> +    }
> +
> +  tree cond_val = make_ssa_name (boolean_type_node, NULL);
> +  new_stmt = gimple_build_assign_with_ops (cond_code, cond_val,
> +                                          gimple_cond_lhs (cond),
> +                                          gimple_cond_rhs (cond));
> +  gsi = gsi_last_bb (cond_bb);
> +  gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
> +
> +  /* If we still need inversion, then invert the result of the
> +     condition.  */
> +  if (invert)
> +    {
> +      tree tmp = make_ssa_name (boolean_type_node, NULL);
> +      new_stmt = gimple_build_assign_with_ops (BIT_XOR_EXPR, tmp,
> +                                              cond_val, boolean_true_node);
> +      gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
> +      cond_val = tmp;
> +    }
> +
> +  /* Get the condition in the right type so that we can perform
> +     logical and arithmetic operations on it.  */
> +  tree cond_val_converted = make_ssa_name (TREE_TYPE (rhs), NULL);
> +  new_stmt = gimple_build_assign_with_ops (NOP_EXPR, cond_val_converted,
> +                                          cond_val, NULL_TREE);
> +  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
> +
> +  tree neg_cond_val_converted = make_ssa_name (TREE_TYPE (rhs), NULL);
> +  new_stmt = gimple_build_assign_with_ops (NEGATE_EXPR,
> neg_cond_val_converted,
> +                                          cond_val_converted, NULL_TREE);
> +  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
> +
> +  tree tmp = make_ssa_name (TREE_TYPE (rhs), NULL);
> +  new_stmt = gimple_build_assign_with_ops (BIT_XOR_EXPR, tmp,
> +                                          rhs, neg_cond_val_converted);
> +  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
> +
> +  tree new_lhs = make_ssa_name (TREE_TYPE (rhs), NULL);
> +  new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, new_lhs,
> +                                          tmp, cond_val_converted);
> +  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
> +
> +  replace_phi_edge_with_variable (cond_bb, e1, phi, new_lhs);
> +
> +  /* Note that we optimized this PHI.  */
> +  return true;
> +}
> +
>  /* Auxiliary functions to determine the set of memory accesses which
>     can't trap because they are preceded by accesses to the same memory
>     portion.  We do that for MEM_REFs, so we only need to track
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]