This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Optimize n?rotate(x,n):x


On Mon, Apr 14, 2014 at 6:40 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Mon, 14 Apr 2014, Richard Biener wrote:
>
>>> +  /* If the special case has a high probability, keep it.  */
>>> +  if (EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN)
>>
>>
>> I suppose Honza has a comment on how to test this properly
>> (not sure if ->probability or ->frequency is always initialized properly).
>> for example single_likely_edge tests profile_status_for_fn !=
>> PROFILE_ABSENT (and uses a fixed probability value ...).
>> Anyway, the comparison looks backwards to me, but maybe I'm
>> missing sth - I'd use >= PROB_LIKELY ;)
>
>
> Maybe the comment is confusing? middle_bb contains the expensive operation
> (say a/b) that the special case skips entirely. If the division happens in
> less than 50% of cases (that's the proba of the edge going from cond to
> middle_bb), then doing the comparison+jump may be cheaper and I abort the
> optimization. At least the testcase with __builtin_expect should prove that
> I didn't do it backwards.

Ah, indeed.  My mistake.

> value-prof seems to use 50% as the cut-off where it may become interesting
> to special case division, hence my choice of PROB_EVEN. I am not sure which
> way you want to use PROB_LIKELY (80%). If we have more than 80% chances of
> executing the division, always perform it? Or if we have more than 80%
> chances of skipping the division, keep the branch?

Ok, if it's from value-prof then that's fine.

The patch is ok if Honza doesn't have any comments on whether it's ok
to look at ->probability unconditionally.

Thanks,
Richard.

> Attached is the latest version (passed the testsuite).
> Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (working copy)
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-phiopt1" } */
> +
> +int f(int a, int b, int c) {
> +  if (c > 5) return c;
> +  if (a == 0) return b;
> +  return a + b;
> +}
> +
> +unsigned rot(unsigned x, int n) {
> +  const int bits = __CHAR_BIT__ * __SIZEOF_INT__;
> +  return (n == 0) ? x : ((x << n) | (x >> (bits - n)));
> +}
> +
> +unsigned m(unsigned a, unsigned b) {
> +  if (a == 0)
> +    return 0;
> +  else
> +    return a & b;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "goto" 2 "phiopt1" } } */
> +/* { dg-final { cleanup-tree-dump "phiopt1" } } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (working copy)
> @@ -0,0 +1,19 @@
> +/* { dg-do compile { target x86_64-*-* } } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +int f(int a, int b) {
> +  if (__builtin_expect(a == 0, 1)) return b;
> +  return a + b;
> +}
> +
> +// optab_handler can handle if(b==1) but not a/b
> +// so we consider a/b too expensive.
> +unsigned __int128 g(unsigned __int128 a, unsigned __int128 b) {
> +  if (b == 1)
> +    return a;
> +  else
> +    return a / b;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "goto " 4 "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
> Index: gcc/tree-ssa-phiopt.c
> ===================================================================
> --- gcc/tree-ssa-phiopt.c       (revision 209353)
> +++ gcc/tree-ssa-phiopt.c       (working copy)
> @@ -140,20 +140,37 @@ static bool gate_hoist_loads (void);
>         x = PHI (CONST, a)
>
>     Gets replaced with:
>       bb0:
>       bb2:
>         t1 = a == CONST;
>         t2 = b > c;
>         t3 = t1 & t2;
>         x = a;
>
> +
> +   It also replaces
> +
> +     bb0:
> +       if (a != 0) goto bb1; else goto bb2;
> +     bb1:
> +       c = a + b;
> +     bb2:
> +       x = PHI <c (bb1), b (bb0), ...>;
> +
> +   with
> +
> +     bb0:
> +       c = a + b;
> +     bb2:
> +       x = PHI <c (bb0), ...>;
> +
>     ABS Replacement
>     ---------------
>
>     This transformation, implemented in abs_replacement, replaces
>
>       bb0:
>         if (a >= 0) goto bb2; else goto bb1;
>       bb1:
>         x = -a;
>       bb2:
> @@ -809,20 +826,103 @@ operand_equal_for_value_replacement (con
>    if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
>      return true;
>
>    tmp = gimple_assign_rhs2 (def);
>    if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
>      return true;
>
>    return false;
>  }
>
> +/* Returns true if ARG is a neutral element for operation CODE
> +   on the RIGHT side.  */
> +
> +static bool
> +neutral_element_p (tree_code code, tree arg, bool right)
> +{
> +  switch (code)
> +    {
> +    case PLUS_EXPR:
> +    case BIT_IOR_EXPR:
> +    case BIT_XOR_EXPR:
> +      return integer_zerop (arg);
> +
> +    case LROTATE_EXPR:
> +    case RROTATE_EXPR:
> +    case LSHIFT_EXPR:
> +    case RSHIFT_EXPR:
> +    case MINUS_EXPR:
> +    case POINTER_PLUS_EXPR:
> +      return right && integer_zerop (arg);
> +
> +    case MULT_EXPR:
> +      return integer_onep (arg);
> +
> +    case TRUNC_DIV_EXPR:
> +    case CEIL_DIV_EXPR:
> +    case FLOOR_DIV_EXPR:
> +    case ROUND_DIV_EXPR:
> +    case EXACT_DIV_EXPR:
> +      return right && integer_onep (arg);
> +
> +    case BIT_AND_EXPR:
> +      return integer_all_onesp (arg);
> +
> +    default:
> +      return false;
> +    }
> +}
> +
> +/* Returns true if ARG is an absorbing element for operation CODE.  */
> +
> +static bool
> +absorbing_element_p (tree_code code, tree arg)
> +{
> +  switch (code)
> +    {
> +    case BIT_IOR_EXPR:
> +      return integer_all_onesp (arg);
> +
> +    case MULT_EXPR:
> +    case BIT_AND_EXPR:
> +      return integer_zerop (arg);
> +
> +    default:
> +      return false;
> +    }
> +}
> +
> +/* Returns true if the statement is cheap. The simple heuristic used here
> +   is that anything the optab knows is cheap.  */
> +
> +static bool
> +is_cheap_stmt (gimple stmt)
> +{
> +  tree type;
> +  if (is_gimple_assign (stmt))
> +    {
> +      type = TREE_TYPE (gimple_assign_rhs1 (stmt));
> +      enum tree_code code = gimple_assign_rhs_code (stmt);
> +      optab op = optab_for_tree_code (code, type, optab_scalar);
> +      return (op != unknown_optab
> +             && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing);
> +    }
> +  else if (gimple_code (stmt) == GIMPLE_COND)
> +    {
> +      type = TREE_TYPE (gimple_cond_lhs (stmt));
> +      enum rtx_code code = (gimple_cond_code (stmt) == EQ_EXPR) ? EQ : NE;
> +      return can_compare_p (code, TYPE_MODE (type), ccp_jump);
> +    }
> +  else
> +    gcc_unreachable ();
> +}
> +
>  /*  The function value_replacement does the main work of doing the value
>      replacement.  Return non-zero if the replacement is done.  Otherwise
> return
>      0.  If we remove the middle basic block, return 2.
>      BB is the basic block where the replacement is going to be done on.
> ARG0
>      is argument 0 from the PHI.  Likewise for ARG1.  */
>
>  static int
>  value_replacement (basic_block cond_bb, basic_block middle_bb,
>                    edge e0, edge e1, gimple phi,
>                    tree arg0, tree arg1)
> @@ -833,23 +933,21 @@ value_replacement (basic_block cond_bb,
>    enum tree_code code;
>    bool emtpy_or_with_defined_p = true;
>
>    /* If the type says honor signed zeros we cannot do this
>       optimization.  */
>    if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1))))
>      return 0;
>
>    /* If there is a statement in MIDDLE_BB that defines one of the PHI
>       arguments, then adjust arg0 or arg1.  */
> -  gsi = gsi_after_labels (middle_bb);
> -  if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
> -    gsi_next_nondebug (&gsi);
> +  gsi = gsi_start_nondebug_after_labels_bb (middle_bb);
>    while (!gsi_end_p (gsi))
>      {
>        gimple stmt = gsi_stmt (gsi);
>        tree lhs;
>        gsi_next_nondebug (&gsi);
>        if (!is_gimple_assign (stmt))
>         {
>           emtpy_or_with_defined_p = false;
>           continue;
>         }
> @@ -931,20 +1029,67 @@ value_replacement (basic_block cond_bb,
>               print_generic_expr (dump_file, gimple_phi_result (phi), 0);
>               fprintf (dump_file, " reduced for COND_EXPR in block %d to ",
>                        cond_bb->index);
>               print_generic_expr (dump_file, arg, 0);
>               fprintf (dump_file, ".\n");
>              }
>            return 1;
>         }
>
>      }
> +
> +  /* Now optimize (x != 0) ? x + y : y to just y.
> +     The following condition is too restrictive, there can easily be
> another
> +     stmt in middle_bb, for instance a CONVERT_EXPR for the second
> argument.  */
> +  gimple assign = last_and_only_stmt (middle_bb);
> +  if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
> +      || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
> +      || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
> +         && !POINTER_TYPE_P (TREE_TYPE (arg0))))
> +    return 0;
> +
> +  /* assign may call a libgcc routine, which is slow.  */
> +  if (!is_cheap_stmt (assign) && is_cheap_stmt (cond))
> +    return 0;
> +
> +  /* If the special case has a high probability, keep it.  */
> +  if (EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN)
> +    return 0;
> +
> +  tree lhs = gimple_assign_lhs (assign);
> +  tree rhs1 = gimple_assign_rhs1 (assign);
> +  tree rhs2 = gimple_assign_rhs2 (assign);
> +  enum tree_code code_def = gimple_assign_rhs_code (assign);
> +  tree cond_lhs = gimple_cond_lhs (cond);
> +  tree cond_rhs = gimple_cond_rhs (cond);
> +
> +  if (((code == NE_EXPR && e1 == false_edge)
> +       || (code == EQ_EXPR && e1 == true_edge))
> +      && arg0 == lhs
> +      && ((arg1 == rhs1
> +          && operand_equal_for_phi_arg_p (rhs2, cond_lhs)
> +          && neutral_element_p (code_def, cond_rhs, true))
> +         || (arg1 == rhs2
> +             && operand_equal_for_phi_arg_p (rhs1, cond_lhs)
> +             && neutral_element_p (code_def, cond_rhs, false))
> +         || (operand_equal_for_phi_arg_p (arg1, cond_rhs)
> +             && (operand_equal_for_phi_arg_p (rhs2, cond_lhs)
> +                 || operand_equal_for_phi_arg_p (rhs1, cond_lhs))
> +             && absorbing_element_p (code_def, cond_rhs))))
> +    {
> +      gsi = gsi_for_stmt (cond);
> +      gimple_stmt_iterator gsi_from = gsi_for_stmt (assign);
> +      gsi_move_before (&gsi_from, &gsi);
> +      replace_phi_edge_with_variable (cond_bb, e1, phi, lhs);
> +      return 2;
> +    }
> +
>    return 0;
>  }
>
>  /*  The function minmax_replacement does the main work of doing the minmax
>      replacement.  Return true if the replacement is done.  Otherwise return
>      false.
>      BB is the basic block where the replacement is going to be done on.
> ARG0
>      is argument 0 from the PHI.  Likewise for ARG1.  */
>
>  static bool
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]