This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] More LCM speedup
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 16 Jan 2014 14:44:57 +0100 (CET)
- Subject: [PATCH] More LCM speedup
- Authentication-results: sourceware.org; auth=none
This fixes the two remaining dataflow iteration order issues
in lcm.c. On the testcase from PR46590 (long function with
many loops) we get an improvement from
PRE : 13.85 (42%) usr 0.08 (16%) sys 13.93 (41%)
wall 1067 kB ( 1%) ggc
TOTAL : 33.35 0.50 33.87
207511 kB
to
PRE : 6.27 (24%) usr 0.03 ( 8%) sys 6.31 (24%)
wall 1067 kB ( 1%) ggc
TOTAL : 25.62 0.36 25.97
207511 kB
Bootstrapped and tested on x86_64-unknown-linux, applied to trunk.
Richard.
2014-01-16 Richard Biener <rguenther@suse.de>
PR rtl-optimization/46590
* lcm.c (compute_antinout_edge): Use postorder iteration.
(compute_laterin): Use inverted postorder iteration.
Index: gcc/lcm.c
===================================================================
*** gcc/lcm.c (revision 206659)
--- gcc/lcm.c (working copy)
*************** compute_antinout_edge (sbitmap *antloc,
*** 109,119 ****
/* Put every block on the worklist; this is necessary because of the
optimistic initialization of ANTIN above. */
! FOR_EACH_BB_REVERSE_FN (bb, cfun)
{
*qin++ = bb;
bb->aux = bb;
}
qin = worklist;
qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS];
--- 109,123 ----
/* Put every block on the worklist; this is necessary because of the
optimistic initialization of ANTIN above. */
! int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
! int postorder_num = post_order_compute (postorder, false, false);
! for (int i = 0; i < postorder_num; ++i)
{
+ bb = BASIC_BLOCK_FOR_FN (cfun, postorder[i]);
*qin++ = bb;
bb->aux = bb;
}
+ free (postorder);
qin = worklist;
qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS];
*************** compute_laterin (struct edge_list *edge_
*** 281,291 ****
/* Add all the blocks to the worklist. This prevents an early exit from
the loop given our optimistic initialization of LATER above. */
! FOR_EACH_BB_FN (bb, cfun)
{
*qin++ = bb;
bb->aux = bb;
}
/* Note that we do not use the last allocated element for our queue,
as EXIT_BLOCK is never inserted into it. */
--- 285,302 ----
/* Add all the blocks to the worklist. This prevents an early exit from
the loop given our optimistic initialization of LATER above. */
! int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
! int postorder_num = inverted_post_order_compute (postorder);
! for (int i = 0; i < postorder_num; ++i)
{
+ bb = BASIC_BLOCK_FOR_FN (cfun, postorder[i]);
+ if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun)
+ || bb == ENTRY_BLOCK_PTR_FOR_FN (cfun))
+ continue;
*qin++ = bb;
bb->aux = bb;
}
+ free (postorder);
/* Note that we do not use the last allocated element for our queue,
as EXIT_BLOCK is never inserted into it. */
*************** compute_laterin (struct edge_list *edge_
*** 307,320 ****
bitmap_ones (laterin[bb->index]);
FOR_EACH_EDGE (e, ei, bb->preds)
bitmap_and (laterin[bb->index], laterin[bb->index],
! later[(size_t)e->aux]);
/* Calculate LATER for all outgoing edges. */
FOR_EACH_EDGE (e, ei, bb->succs)
if (bitmap_ior_and_compl (later[(size_t) e->aux],
! earliest[(size_t) e->aux],
! laterin[e->src->index],
! antloc[e->src->index])
/* If LATER for an outgoing edge was changed, then we need
to add the target of the outgoing edge to the worklist. */
&& e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest->aux == 0)
--- 318,331 ----
bitmap_ones (laterin[bb->index]);
FOR_EACH_EDGE (e, ei, bb->preds)
bitmap_and (laterin[bb->index], laterin[bb->index],
! later[(size_t)e->aux]);
/* Calculate LATER for all outgoing edges. */
FOR_EACH_EDGE (e, ei, bb->succs)
if (bitmap_ior_and_compl (later[(size_t) e->aux],
! earliest[(size_t) e->aux],
! laterin[bb->index],
! antloc[bb->index])
/* If LATER for an outgoing edge was changed, then we need
to add the target of the outgoing edge to the worklist. */
&& e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest->aux == 0)
*************** compute_laterin (struct edge_list *edge_
*** 333,340 ****
bitmap_ones (laterin[last_basic_block_for_fn (cfun)]);
FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
bitmap_and (laterin[last_basic_block_for_fn (cfun)],
! laterin[last_basic_block_for_fn (cfun)],
! later[(size_t) e->aux]);
clear_aux_for_edges ();
free (worklist);
--- 344,351 ----
bitmap_ones (laterin[last_basic_block_for_fn (cfun)]);
FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
bitmap_and (laterin[last_basic_block_for_fn (cfun)],
! laterin[last_basic_block_for_fn (cfun)],
! later[(size_t) e->aux]);
clear_aux_for_edges ();
free (worklist);