This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Optimize n?rotate(x,n):x
- From: Richard Biener <richard dot guenther at gmail dot com>
- To: Marc Glisse <marc dot glisse at inria dot fr>
- Cc: Jan Hubicka <hubicka at ucw dot cz>, GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Tue, 15 Apr 2014 09:54:42 +0200
- Subject: Re: Optimize n?rotate(x,n):x
- Authentication-results: sourceware.org; auth=none
- References: <alpine dot DEB dot 2 dot 02 dot 1403011240500 dot 28536 at stedding dot saclay dot inria dot fr> <CAFiYyc2wWfrV4kMMewyBzz6m55jZW2TxmiD6x0sqtombs+OLYA at mail dot gmail dot com> <alpine dot DEB dot 2 dot 10 dot 1404131432090 dot 3577 at laptop-mg dot saclay dot inria dot fr> <CAFiYyc0grD-PGzgt3GQ5H76JnMFnsQy=pPicMu3sqjehtKU6Fw at mail dot gmail dot com> <alpine dot DEB dot 2 dot 10 dot 1404141703140 dot 3714 at laptop-mg dot saclay dot inria dot fr>
On Mon, Apr 14, 2014 at 6:40 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Mon, 14 Apr 2014, Richard Biener wrote:
>
>>> + /* If the special case has a high probability, keep it. */
>>> + if (EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN)
>>
>>
>> I suppose Honza has a comment on how to test this properly
>> (not sure if ->probability or ->frequency is always initialized properly).
>> for example single_likely_edge tests profile_status_for_fn !=
>> PROFILE_ABSENT (and uses a fixed probability value ...).
>> Anyway, the comparison looks backwards to me, but maybe I'm
>> missing sth - I'd use >= PROB_LIKELY ;)
>
>
> Maybe the comment is confusing? middle_bb contains the expensive operation
> (say a/b) that the special case skips entirely. If the division happens in
> less than 50% of cases (that's the proba of the edge going from cond to
> middle_bb), then doing the comparison+jump may be cheaper and I abort the
> optimization. At least the testcase with __builtin_expect should prove that
> I didn't do it backwards.
Ah, indeed. My mistake.
> value-prof seems to use 50% as the cut-off where it may become interesting
> to special case division, hence my choice of PROB_EVEN. I am not sure which
> way you want to use PROB_LIKELY (80%). If we have more than 80% chances of
> executing the division, always perform it? Or if we have more than 80%
> chances of skipping the division, keep the branch?
Ok, if it's from value-prof then that's fine.
The patch is ok if Honza doesn't have any comments on whether it's ok
to look at ->probability unconditionally.
Thanks,
Richard.
> Attached is the latest version (passed the testsuite).
> Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c (working copy)
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-phiopt1" } */
> +
> +int f(int a, int b, int c) {
> + if (c > 5) return c;
> + if (a == 0) return b;
> + return a + b;
> +}
> +
> +unsigned rot(unsigned x, int n) {
> + const int bits = __CHAR_BIT__ * __SIZEOF_INT__;
> + return (n == 0) ? x : ((x << n) | (x >> (bits - n)));
> +}
> +
> +unsigned m(unsigned a, unsigned b) {
> + if (a == 0)
> + return 0;
> + else
> + return a & b;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "goto" 2 "phiopt1" } } */
> +/* { dg-final { cleanup-tree-dump "phiopt1" } } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c (working copy)
> @@ -0,0 +1,19 @@
> +/* { dg-do compile { target x86_64-*-* } } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +int f(int a, int b) {
> + if (__builtin_expect(a == 0, 1)) return b;
> + return a + b;
> +}
> +
> +// optab_handler can handle if(b==1) but not a/b
> +// so we consider a/b too expensive.
> +unsigned __int128 g(unsigned __int128 a, unsigned __int128 b) {
> + if (b == 1)
> + return a;
> + else
> + return a / b;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "goto " 4 "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
> Index: gcc/tree-ssa-phiopt.c
> ===================================================================
> --- gcc/tree-ssa-phiopt.c (revision 209353)
> +++ gcc/tree-ssa-phiopt.c (working copy)
> @@ -140,20 +140,37 @@ static bool gate_hoist_loads (void);
> x = PHI (CONST, a)
>
> Gets replaced with:
> bb0:
> bb2:
> t1 = a == CONST;
> t2 = b > c;
> t3 = t1 & t2;
> x = a;
>
> +
> + It also replaces
> +
> + bb0:
> + if (a != 0) goto bb1; else goto bb2;
> + bb1:
> + c = a + b;
> + bb2:
> + x = PHI <c (bb1), b (bb0), ...>;
> +
> + with
> +
> + bb0:
> + c = a + b;
> + bb2:
> + x = PHI <c (bb0), ...>;
> +
> ABS Replacement
> ---------------
>
> This transformation, implemented in abs_replacement, replaces
>
> bb0:
> if (a >= 0) goto bb2; else goto bb1;
> bb1:
> x = -a;
> bb2:
> @@ -809,20 +826,103 @@ operand_equal_for_value_replacement (con
> if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
> return true;
>
> tmp = gimple_assign_rhs2 (def);
> if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
> return true;
>
> return false;
> }
>
> +/* Returns true if ARG is a neutral element for operation CODE
> + on the RIGHT side. */
> +
> +static bool
> +neutral_element_p (tree_code code, tree arg, bool right)
> +{
> + switch (code)
> + {
> + case PLUS_EXPR:
> + case BIT_IOR_EXPR:
> + case BIT_XOR_EXPR:
> + return integer_zerop (arg);
> +
> + case LROTATE_EXPR:
> + case RROTATE_EXPR:
> + case LSHIFT_EXPR:
> + case RSHIFT_EXPR:
> + case MINUS_EXPR:
> + case POINTER_PLUS_EXPR:
> + return right && integer_zerop (arg);
> +
> + case MULT_EXPR:
> + return integer_onep (arg);
> +
> + case TRUNC_DIV_EXPR:
> + case CEIL_DIV_EXPR:
> + case FLOOR_DIV_EXPR:
> + case ROUND_DIV_EXPR:
> + case EXACT_DIV_EXPR:
> + return right && integer_onep (arg);
> +
> + case BIT_AND_EXPR:
> + return integer_all_onesp (arg);
> +
> + default:
> + return false;
> + }
> +}
> +
> +/* Returns true if ARG is an absorbing element for operation CODE. */
> +
> +static bool
> +absorbing_element_p (tree_code code, tree arg)
> +{
> + switch (code)
> + {
> + case BIT_IOR_EXPR:
> + return integer_all_onesp (arg);
> +
> + case MULT_EXPR:
> + case BIT_AND_EXPR:
> + return integer_zerop (arg);
> +
> + default:
> + return false;
> + }
> +}
> +
> +/* Returns true if the statement is cheap. The simple heuristic used here
> + is that anything the optab knows is cheap. */
> +
> +static bool
> +is_cheap_stmt (gimple stmt)
> +{
> + tree type;
> + if (is_gimple_assign (stmt))
> + {
> + type = TREE_TYPE (gimple_assign_rhs1 (stmt));
> + enum tree_code code = gimple_assign_rhs_code (stmt);
> + optab op = optab_for_tree_code (code, type, optab_scalar);
> + return (op != unknown_optab
> + && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing);
> + }
> + else if (gimple_code (stmt) == GIMPLE_COND)
> + {
> + type = TREE_TYPE (gimple_cond_lhs (stmt));
> + enum rtx_code code = (gimple_cond_code (stmt) == EQ_EXPR) ? EQ : NE;
> + return can_compare_p (code, TYPE_MODE (type), ccp_jump);
> + }
> + else
> + gcc_unreachable ();
> +}
> +
> /* The function value_replacement does the main work of doing the value
> replacement. Return non-zero if the replacement is done. Otherwise
> return
> 0. If we remove the middle basic block, return 2.
> BB is the basic block where the replacement is going to be done on.
> ARG0
> is argument 0 from the PHI. Likewise for ARG1. */
>
> static int
> value_replacement (basic_block cond_bb, basic_block middle_bb,
> edge e0, edge e1, gimple phi,
> tree arg0, tree arg1)
> @@ -833,23 +933,21 @@ value_replacement (basic_block cond_bb,
> enum tree_code code;
> bool emtpy_or_with_defined_p = true;
>
> /* If the type says honor signed zeros we cannot do this
> optimization. */
> if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1))))
> return 0;
>
> /* If there is a statement in MIDDLE_BB that defines one of the PHI
> arguments, then adjust arg0 or arg1. */
> - gsi = gsi_after_labels (middle_bb);
> - if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
> - gsi_next_nondebug (&gsi);
> + gsi = gsi_start_nondebug_after_labels_bb (middle_bb);
> while (!gsi_end_p (gsi))
> {
> gimple stmt = gsi_stmt (gsi);
> tree lhs;
> gsi_next_nondebug (&gsi);
> if (!is_gimple_assign (stmt))
> {
> emtpy_or_with_defined_p = false;
> continue;
> }
> @@ -931,20 +1029,67 @@ value_replacement (basic_block cond_bb,
> print_generic_expr (dump_file, gimple_phi_result (phi), 0);
> fprintf (dump_file, " reduced for COND_EXPR in block %d to ",
> cond_bb->index);
> print_generic_expr (dump_file, arg, 0);
> fprintf (dump_file, ".\n");
> }
> return 1;
> }
>
> }
> +
> + /* Now optimize (x != 0) ? x + y : y to just y.
> + The following condition is too restrictive, there can easily be
> another
> + stmt in middle_bb, for instance a CONVERT_EXPR for the second
> argument. */
> + gimple assign = last_and_only_stmt (middle_bb);
> + if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
> + || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
> + || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
> + && !POINTER_TYPE_P (TREE_TYPE (arg0))))
> + return 0;
> +
> + /* assign may call a libgcc routine, which is slow. */
> + if (!is_cheap_stmt (assign) && is_cheap_stmt (cond))
> + return 0;
> +
> + /* If the special case has a high probability, keep it. */
> + if (EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN)
> + return 0;
> +
> + tree lhs = gimple_assign_lhs (assign);
> + tree rhs1 = gimple_assign_rhs1 (assign);
> + tree rhs2 = gimple_assign_rhs2 (assign);
> + enum tree_code code_def = gimple_assign_rhs_code (assign);
> + tree cond_lhs = gimple_cond_lhs (cond);
> + tree cond_rhs = gimple_cond_rhs (cond);
> +
> + if (((code == NE_EXPR && e1 == false_edge)
> + || (code == EQ_EXPR && e1 == true_edge))
> + && arg0 == lhs
> + && ((arg1 == rhs1
> + && operand_equal_for_phi_arg_p (rhs2, cond_lhs)
> + && neutral_element_p (code_def, cond_rhs, true))
> + || (arg1 == rhs2
> + && operand_equal_for_phi_arg_p (rhs1, cond_lhs)
> + && neutral_element_p (code_def, cond_rhs, false))
> + || (operand_equal_for_phi_arg_p (arg1, cond_rhs)
> + && (operand_equal_for_phi_arg_p (rhs2, cond_lhs)
> + || operand_equal_for_phi_arg_p (rhs1, cond_lhs))
> + && absorbing_element_p (code_def, cond_rhs))))
> + {
> + gsi = gsi_for_stmt (cond);
> + gimple_stmt_iterator gsi_from = gsi_for_stmt (assign);
> + gsi_move_before (&gsi_from, &gsi);
> + replace_phi_edge_with_variable (cond_bb, e1, phi, lhs);
> + return 2;
> + }
> +
> return 0;
> }
>
> /* The function minmax_replacement does the main work of doing the minmax
> replacement. Return true if the replacement is done. Otherwise return
> false.
> BB is the basic block where the replacement is going to be done on.
> ARG0
> is argument 0 from the PHI. Likewise for ARG1. */
>
> static bool
>