[PATCH] Slightly improve phiopt value_replacement optimization (PR middle-end/62263, PR middle-end/82498)
Richard Biener
rguenther@suse.de
Sun Oct 15 12:14:00 GMT 2017
On October 13, 2017 9:43:10 PM GMT+02:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>First of all, there was a typo, we are optimizing
>(x != 0) ? x + y : y
>into x + y rather than y.
>
>And, as the comment mentions, the condition that there is just a single
>stmt is too restrictive and in the various patterns posted in the
>various
>rotate related PRs there are cases where in addition to the last
>assign stmt there are some preparation statements (sometimes
>conversion,
>sometimes bit masking with constant, sometimes both).
>I think it is undesirable to turn the conditional code into always
>executed
>one if it is too large, so this patch allows just very few very cheap
>preparation statements which feed each other and it is easily possible
>to
>propagate the cond_rhs constant through those statements to compute
>what
>the result from those would be (and make sure no UB).
>
>With this patch on top of the previous one, on rotate-8.c testcase
>on x86_64-linux and i686-linux we get the most efficient code in all
>cases
>- just a rol/ror instruction with perhaps some moves into registers
>needed
>to perform that, but without any conditionals, masking etc.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK.
Richard.
>2017-10-13 Jakub Jelinek <jakub@redhat.com>
>
> PR middle-end/62263
> PR middle-end/82498
> * tree-ssa-phiopt.c (value_replacement): Comment fix. Handle
> up to 2 preparation statements for ASSIGN in MIDDLE_BB.
>
> * c-c++-common/rotate-8.c: Expect no PHIs in optimized dump.
>
>--- gcc/tree-ssa-phiopt.c.jj 2017-10-10 22:04:08.000000000 +0200
>+++ gcc/tree-ssa-phiopt.c 2017-10-13 17:55:01.578450763 +0200
>@@ -995,11 +995,13 @@ value_replacement (basic_block cond_bb,
>
> }
>
>- /* Now optimize (x != 0) ? x + y : y to just y.
>- The following condition is too restrictive, there can easily be
>another
>- stmt in middle_bb, for instance a CONVERT_EXPR for the second
>argument. */
>- gimple *assign = last_and_only_stmt (middle_bb);
>- if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
>+ /* Now optimize (x != 0) ? x + y : y to just x + y. */
>+ gsi = gsi_last_nondebug_bb (middle_bb);
>+ if (gsi_end_p (gsi))
>+ return 0;
>+
>+ gimple *assign = gsi_stmt (gsi);
>+ if (!is_gimple_assign (assign)
> || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
> || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
> && !POINTER_TYPE_P (TREE_TYPE (arg0))))
>@@ -1009,6 +1011,71 @@ value_replacement (basic_block cond_bb,
> if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
> return 0;
>
>+ /* Allow up to 2 cheap preparation statements that prepare argument
>+ for assign, e.g.:
>+ if (y_4 != 0)
>+ goto <bb 3>;
>+ else
>+ goto <bb 4>;
>+ <bb 3>:
>+ _1 = (int) y_4;
>+ iftmp.0_6 = x_5(D) r<< _1;
>+ <bb 4>:
>+ # iftmp.0_2 = PHI <iftmp.0_6(3), x_5(D)(2)>
>+ or:
>+ if (y_3(D) == 0)
>+ goto <bb 4>;
>+ else
>+ goto <bb 3>;
>+ <bb 3>:
>+ y_4 = y_3(D) & 31;
>+ _1 = (int) y_4;
>+ _6 = x_5(D) r<< _1;
>+ <bb 4>:
>+ # _2 = PHI <x_5(D)(2), _6(3)> */
>+ gimple *prep_stmt[2] = { NULL, NULL };
>+ int prep_cnt;
>+ for (prep_cnt = 0; ; prep_cnt++)
>+ {
>+ gsi_prev_nondebug (&gsi);
>+ if (gsi_end_p (gsi))
>+ break;
>+
>+ gimple *g = gsi_stmt (gsi);
>+ if (gimple_code (g) == GIMPLE_LABEL)
>+ break;
>+
>+ if (prep_cnt == 2 || !is_gimple_assign (g))
>+ return 0;
>+
>+ tree lhs = gimple_assign_lhs (g);
>+ tree rhs1 = gimple_assign_rhs1 (g);
>+ use_operand_p use_p;
>+ gimple *use_stmt;
>+ if (TREE_CODE (lhs) != SSA_NAME
>+ || TREE_CODE (rhs1) != SSA_NAME
>+ || !INTEGRAL_TYPE_P (TREE_TYPE (lhs))
>+ || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
>+ || !single_imm_use (lhs, &use_p, &use_stmt)
>+ || use_stmt != (prep_cnt ? prep_stmt[prep_cnt - 1] : assign))
>+ return 0;
>+ switch (gimple_assign_rhs_code (g))
>+ {
>+ CASE_CONVERT:
>+ break;
>+ case PLUS_EXPR:
>+ case BIT_AND_EXPR:
>+ case BIT_IOR_EXPR:
>+ case BIT_XOR_EXPR:
>+ if (TREE_CODE (gimple_assign_rhs2 (g)) != INTEGER_CST)
>+ return 0;
>+ break;
>+ default:
>+ return 0;
>+ }
>+ prep_stmt[prep_cnt] = g;
>+ }
>+
> /* Only transform if it removes the condition. */
>if (!single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)),
>e0, e1))
> return 0;
>@@ -1019,7 +1086,7 @@ value_replacement (basic_block cond_bb,
> && profile_status_for_fn (cfun) != PROFILE_ABSENT
>&& EDGE_PRED (middle_bb, 0)->probability < profile_probability::even ()
> /* If assign is cheap, there is no point avoiding it. */
>- && estimate_num_insns (assign, &eni_time_weights)
>+ && estimate_num_insns (bb_seq (middle_bb), &eni_time_weights)
> >= 3 * estimate_num_insns (cond, &eni_time_weights))
> return 0;
>
>@@ -1030,6 +1097,32 @@ value_replacement (basic_block cond_bb,
> tree cond_lhs = gimple_cond_lhs (cond);
> tree cond_rhs = gimple_cond_rhs (cond);
>
>+ /* Propagate the cond_rhs constant through preparation stmts,
>+ make sure UB isn't invoked while doing that. */
>+ for (int i = prep_cnt - 1; i >= 0; --i)
>+ {
>+ gimple *g = prep_stmt[i];
>+ tree grhs1 = gimple_assign_rhs1 (g);
>+ if (!operand_equal_for_phi_arg_p (cond_lhs, grhs1))
>+ return 0;
>+ cond_lhs = gimple_assign_lhs (g);
>+ cond_rhs = fold_convert (TREE_TYPE (grhs1), cond_rhs);
>+ if (TREE_CODE (cond_rhs) != INTEGER_CST
>+ || TREE_OVERFLOW (cond_rhs))
>+ return 0;
>+ if (gimple_assign_rhs_class (g) == GIMPLE_BINARY_RHS)
>+ {
>+ cond_rhs = int_const_binop (gimple_assign_rhs_code (g), cond_rhs,
>+ gimple_assign_rhs2 (g));
>+ if (TREE_OVERFLOW (cond_rhs))
>+ return 0;
>+ }
>+ cond_rhs = fold_convert (TREE_TYPE (cond_lhs), cond_rhs);
>+ if (TREE_CODE (cond_rhs) != INTEGER_CST
>+ || TREE_OVERFLOW (cond_rhs))
>+ return 0;
>+ }
>+
> if (((code == NE_EXPR && e1 == false_edge)
> || (code == EQ_EXPR && e1 == true_edge))
> && arg0 == lhs
>@@ -1071,7 +1164,15 @@ value_replacement (basic_block cond_bb,
> duplicate_ssa_name_range_info (lhs, SSA_NAME_RANGE_TYPE (phires),
> phires_range_info);
> }
>- gimple_stmt_iterator gsi_from = gsi_for_stmt (assign);
>+ gimple_stmt_iterator gsi_from;
>+ for (int i = prep_cnt - 1; i >= 0; --i)
>+ {
>+ tree plhs = gimple_assign_lhs (prep_stmt[i]);
>+ SSA_NAME_RANGE_INFO (plhs) = NULL;
>+ gsi_from = gsi_for_stmt (prep_stmt[i]);
>+ gsi_move_before (&gsi_from, &gsi);
>+ }
>+ gsi_from = gsi_for_stmt (assign);
> gsi_move_before (&gsi_from, &gsi);
> replace_phi_edge_with_variable (cond_bb, e1, phi, lhs);
> return 2;
>--- gcc/testsuite/c-c++-common/rotate-8.c.jj 2017-10-13
>15:55:32.000000000 +0200
>+++ gcc/testsuite/c-c++-common/rotate-8.c 2017-10-13 17:57:32.861627651
>+0200
>@@ -3,6 +3,7 @@
> /* { dg-do compile } */
> /* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */
>/* { dg-final { scan-tree-dump-times "r\[<>]\[<>]" 23 "optimized" } }
>*/
>+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" } } */
>
> unsigned int
> f1 (unsigned int x, unsigned char y)
>
> Jakub
More information about the Gcc-patches
mailing list