This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[patch] Preserving loops in threading (updated)
Hello,
this is updated version of
http://gcc.gnu.org/ml/gcc-patches/2006-10/msg01740.html
that also excludes the bugfixes that were originally included
with the patch. The actual code in tree-ssa-threadupdate.c changed
only very little, to reflect some cleanups to the loop infrastructure
in last few months.
Bootstrapped & regtested on i686.
Zdenek
* tree-vrp.c (finalize_jump_threads): Do not care about dominance info.
(execute_vrp): Preserve loops through jump threading.
* tree-ssa-threadupdate.c (thread_single_edge,
dbds_continue_enumeration_p, determine_bb_domination_status,
thread_through_loop_header): New functions.
(create_edge_and_update_destination_phis,
create_edge_and_update_destination_phis): Set loops for the new blocks.
(prune_undesirable_thread_requests): Removed.
(redirect_edges): Do not pretend that redirect_edge_and_branch can
create new blocks.
(thread_block): Do not call prune_undesirable_thread_requests.
Update loops.
(mark_threaded_blocks): Select edges to thread here.
(thread_through_all_blocks): Take may_peel_loop_headers argument.
Thread edges through loop headers independently.
* cfgloopmanip.c (create_preheader, mfb_keep_just): Export.
* tree-pass.h (TODO_mark_first_instance): New.
(first_pass_instance): Declare.
* cfghooks.c (duplicate_block): Put the block to the original loop
if copy is not specified.
* tree-ssa-dom.c (tree_ssa_dominator_optimize): Preserve loops through
jump threading. Pass may_peel_loop_headers to
thread_through_all_blocks according to first_pass_instance.
* cfgloop.h (create_preheader): Declare.
* tree-flow.h (thread_through_all_blocks): Declaration changed.
* basic-block.h (mfb_keep_just, mfb_kj_edge): Declare.
* passes.c (first_pass_instance): New variable.
(next_pass_1): Set TODO_mark_first_instance.
(execute_todo): Set first_pass_instance.
* gcc.dg/tree-ssa/ssa-dom-thread-2.c: New test.
* gcc.dg/vect/vect-102.c, gcc.dg/vect/vect-103.c: Use more complex
construction to prevent vectorizing.
Index: tree-vrp.c
===================================================================
*** tree-vrp.c (revision 123968)
--- tree-vrp.c (working copy)
*************** identify_jump_threads (void)
*** 5779,5791 ****
static void
finalize_jump_threads (void)
{
! bool cfg_altered = false;
! cfg_altered = thread_through_all_blocks ();
!
! /* If we threaded jumps, then we need to recompute the dominance
! information. */
! if (cfg_altered)
! free_dominance_info (CDI_DOMINATORS);
VEC_free (tree, heap, stack);
}
--- 5779,5785 ----
static void
finalize_jump_threads (void)
{
! thread_through_all_blocks (false);
VEC_free (tree, heap, stack);
}
*************** vrp_finalize (void)
*** 5904,5925 ****
static unsigned int
execute_vrp (void)
{
! insert_range_assertions ();
!
! loop_optimizer_init (LOOPS_NORMAL);
if (current_loops)
! scev_initialize ();
vrp_initialize ();
ssa_propagate (vrp_visit_stmt, vrp_visit_phi_node);
vrp_finalize ();
- if (current_loops)
- {
- scev_finalize ();
- loop_optimizer_finalize ();
- }
-
/* ASSERT_EXPRs must be removed before finalizing jump threads
as finalizing jump threads calls the CFG cleanup code which
does not properly handle ASSERT_EXPRs. */
--- 5898,5916 ----
static unsigned int
execute_vrp (void)
{
! loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
if (current_loops)
! {
! rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
! scev_initialize ();
! }
!
! insert_range_assertions ();
vrp_initialize ();
ssa_propagate (vrp_visit_stmt, vrp_visit_phi_node);
vrp_finalize ();
/* ASSERT_EXPRs must be removed before finalizing jump threads
as finalizing jump threads calls the CFG cleanup code which
does not properly handle ASSERT_EXPRs. */
*************** execute_vrp (void)
*** 5933,5938 ****
--- 5924,5935 ----
update_ssa (TODO_update_ssa);
finalize_jump_threads ();
+ if (current_loops)
+ {
+ scev_finalize ();
+ loop_optimizer_finalize ();
+ }
+
return 0;
}
Index: tree-ssa-threadupdate.c
===================================================================
*** tree-ssa-threadupdate.c (revision 123968)
--- tree-ssa-threadupdate.c (working copy)
*************** create_edge_and_update_destination_phis
*** 315,320 ****
--- 315,321 ----
e->probability = REG_BR_PROB_BASE;
e->count = rd->dup_block->count;
+ e->aux = rd->outgoing_edge->aux;
/* If there are any PHI nodes at the destination of the outgoing edge
from the duplicate block, then we will need to add a new argument
*************** fixup_template_block (void **slot, void
*** 385,583 ****
return 1;
}
- /* Not all jump threading requests are useful. In particular some
- jump threading requests can create irreducible regions which are
- undesirable.
-
- This routine will examine the BB's incoming edges for jump threading
- requests which, if acted upon, would create irreducible regions. Any
- such jump threading requests found will be pruned away. */
-
- static void
- prune_undesirable_thread_requests (basic_block bb)
- {
- edge e;
- edge_iterator ei;
- bool may_create_irreducible_region = false;
- unsigned int num_outgoing_edges_into_loop = 0;
-
- /* For the heuristics below, we need to know if BB has more than
- one outgoing edge into a loop. */
- FOR_EACH_EDGE (e, ei, bb->succs)
- num_outgoing_edges_into_loop += ((e->flags & EDGE_LOOP_EXIT) == 0);
-
- if (num_outgoing_edges_into_loop > 1)
- {
- edge backedge = NULL;
-
- /* Consider the effect of threading the edge (0, 1) to 2 on the left
- CFG to produce the right CFG:
-
-
- 0 0
- | |
- 1<--+ 2<--------+
- / \ | | |
- 2 3 | 4<----+ |
- \ / | / \ | |
- 4---+ E 1-- | --+
- | | |
- E 3---+
-
-
- Threading the (0, 1) edge to 2 effectively creates two loops
- (2, 4, 1) and (4, 1, 3) which are neither disjoint nor nested.
- This is not good.
-
- However, we do need to be able to thread (0, 1) to 2 or 3
- in the left CFG below (which creates the middle and right
- CFGs with nested loops).
-
- 0 0 0
- | | |
- 1<--+ 2<----+ 3<-+<-+
- /| | | | | | |
- 2 | | 3<-+ | 1--+ |
- \| | | | | | |
- 3---+ 1--+--+ 2-----+
-
-
- A safe heuristic appears to be to only allow threading if BB
- has a single incoming backedge from one of its direct successors. */
-
- FOR_EACH_EDGE (e, ei, bb->preds)
- {
- if (e->flags & EDGE_DFS_BACK)
- {
- if (backedge)
- {
- backedge = NULL;
- break;
- }
- else
- {
- backedge = e;
- }
- }
- }
-
- if (backedge && find_edge (bb, backedge->src))
- ;
- else
- may_create_irreducible_region = true;
- }
- else
- {
- edge dest = NULL;
-
- /* If we thread across the loop entry block (BB) into the
- loop and BB is still reached from outside the loop, then
- we would create an irreducible CFG. Consider the effect
- of threading the edge (1, 4) to 5 on the left CFG to produce
- the right CFG
-
- 0 0
- / \ / \
- 1 2 1 2
- \ / | |
- 4<----+ 5<->4
- / \ | |
- E 5---+ E
-
-
- Threading the (1, 4) edge to 5 creates two entry points
- into the loop (4, 5) (one from block 1, the other from
- block 2). A classic irreducible region.
-
- So look at all of BB's incoming edges which are not
- backedges and which are not threaded to the loop exit.
- If that subset of incoming edges do not all thread
- to the same block, then threading any of them will create
- an irreducible region. */
-
- FOR_EACH_EDGE (e, ei, bb->preds)
- {
- edge e2;
-
- /* We ignore back edges for now. This may need refinement
- as threading a backedge creates an inner loop which
- we would need to verify has a single entry point.
-
- If all backedges thread to new locations, then this
- block will no longer have incoming backedges and we
- need not worry about creating irreducible regions
- by threading through BB. I don't think this happens
- enough in practice to worry about it. */
- if (e->flags & EDGE_DFS_BACK)
- continue;
-
- /* If the incoming edge threads to the loop exit, then it
- is clearly safe. */
- e2 = e->aux;
- if (e2 && (e2->flags & EDGE_LOOP_EXIT))
- continue;
-
- /* E enters the loop header and is not threaded. We can
- not allow any other incoming edges to thread into
- the loop as that would create an irreducible region. */
- if (!e2)
- {
- may_create_irreducible_region = true;
- break;
- }
-
- /* We know that this incoming edge threads to a block inside
- the loop. This edge must thread to the same target in
- the loop as any previously seen threaded edges. Otherwise
- we will create an irreducible region. */
- if (!dest)
- dest = e2;
- else if (e2 != dest)
- {
- may_create_irreducible_region = true;
- break;
- }
- }
- }
-
- /* If we might create an irreducible region, then cancel any of
- the jump threading requests for incoming edges which are
- not backedges and which do not thread to the exit block. */
- if (may_create_irreducible_region)
- {
- FOR_EACH_EDGE (e, ei, bb->preds)
- {
- edge e2;
-
- /* Ignore back edges. */
- if (e->flags & EDGE_DFS_BACK)
- continue;
-
- e2 = e->aux;
-
- /* If this incoming edge was not threaded, then there is
- nothing to do. */
- if (!e2)
- continue;
-
- /* If this incoming edge threaded to the loop exit,
- then it can be ignored as it is safe. */
- if (e2->flags & EDGE_LOOP_EXIT)
- continue;
-
- if (e2)
- {
- /* This edge threaded into the loop and the jump thread
- request must be cancelled. */
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, " Not threading jump %d --> %d to %d\n",
- e->src->index, e->dest->index, e2->dest->index);
- e->aux = NULL;
- }
- }
- }
- }
-
/* Hash table traversal callback to redirect each incoming edge
associated with this hash table element to its new destination. */
--- 386,391 ----
*************** redirect_edges (void **slot, void *data)
*** 620,630 ****
/* Redirect the incoming edge to the appropriate duplicate
block. */
e2 = redirect_edge_and_branch (e, rd->dup_block);
flush_pending_stmts (e2);
-
- if ((dump_file && (dump_flags & TDF_DETAILS))
- && e->src != e2->src)
- fprintf (dump_file, " basic block %d created\n", e2->src->index);
}
else
{
--- 428,435 ----
/* Redirect the incoming edge to the appropriate duplicate
block. */
e2 = redirect_edge_and_branch (e, rd->dup_block);
+ gcc_assert (e == e2);
flush_pending_stmts (e2);
}
else
{
*************** redirection_block_p (basic_block bb)
*** 696,741 ****
successor of BB. We then revector the incoming edges into BB to
the appropriate duplicate of BB.
! BB and its duplicates will have assignments to the same set of
! SSA_NAMEs. Right now, we just call into update_ssa to update the
! SSA graph for those names.
!
! We are also going to experiment with a true incremental update
! scheme for the duplicated resources. One of the interesting
! properties we can exploit here is that all the resources set
! in BB will have the same IDFS, so we have one IDFS computation
! per block with incoming threaded edges, which can lower the
! cost of the true incremental update algorithm. */
static bool
! thread_block (basic_block bb)
{
/* E is an incoming edge into BB that we may or may not want to
redirect to a duplicate of BB. */
! edge e;
edge_iterator ei;
struct local_info local_info;
!
! /* FOUND_BACKEDGE indicates that we found an incoming backedge
! into BB, in which case we may ignore certain jump threads
! to avoid creating irreducible regions. */
! bool found_backedge = false;
/* ALL indicates whether or not all incoming edges into BB should
be threaded to a duplicate of BB. */
bool all = true;
- /* If optimizing for size, only thread this block if we don't have
- to duplicate it or it's an otherwise empty redirection block. */
- if (optimize_size
- && EDGE_COUNT (bb->preds) > 1
- && !redirection_block_p (bb))
- {
- FOR_EACH_EDGE (e, ei, bb->preds)
- e->aux = NULL;
- return false;
- }
-
/* To avoid scanning a linear array for the element we need we instead
use a hash table. For normal code there should be no noticeable
difference. However, if we have a block with a large number of
--- 501,523 ----
successor of BB. We then revector the incoming edges into BB to
the appropriate duplicate of BB.
! If NOLOOP_ONLY is true, we only perform the threading as long as it
! does not affect the structure of the loops in a nontrivial way. */
static bool
! thread_block (basic_block bb, bool noloop_only)
{
/* E is an incoming edge into BB that we may or may not want to
redirect to a duplicate of BB. */
! edge e, e2;
edge_iterator ei;
struct local_info local_info;
! struct loop *loop = bb->loop_father;
/* ALL indicates whether or not all incoming edges into BB should
be threaded to a duplicate of BB. */
bool all = true;
/* To avoid scanning a linear array for the element we need we instead
use a hash table. For normal code there should be no noticeable
difference. However, if we have a block with a large number of
*************** thread_block (basic_block bb)
*** 745,779 ****
redirection_data_eq,
free);
! FOR_EACH_EDGE (e, ei, bb->preds)
! found_backedge |= ((e->flags & EDGE_DFS_BACK) != 0);
! /* If BB has incoming backedges, then threading across BB might
! introduce an irreducible region, which would be undesirable
! as that inhibits various optimizations later. Prune away
! any jump threading requests which we know will result in
! an irreducible region. */
! if (found_backedge)
! prune_undesirable_thread_requests (bb);
/* Record each unique threaded destination into a hash table for
efficient lookups. */
FOR_EACH_EDGE (e, ei, bb->preds)
{
! if (!e->aux)
{
all = false;
}
! else
! {
! edge e2 = e->aux;
! update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
! e->count, e->aux);
!
! /* Insert the outgoing edge into the hash table if it is not
! already in the hash table. */
! lookup_redirection_data (e2, e, INSERT);
! }
}
/* If we are going to thread all incoming edges to an outgoing edge, then
--- 527,571 ----
redirection_data_eq,
free);
! /* If we thread the latch of the loop to its exit, the loop ceases to
! exist. Make sure we do not restrict ourselves in order to preserve
! this loop. */
! if (current_loops && loop->header == bb)
! {
! e = loop_latch_edge (loop);
! e2 = e->aux;
! if (e2 && loop_exit_edge_p (loop, e2))
! {
! loop->header = NULL;
! loop->latch = NULL;
! }
! }
/* Record each unique threaded destination into a hash table for
efficient lookups. */
FOR_EACH_EDGE (e, ei, bb->preds)
{
! e2 = e->aux;
!
! if (!e2
! /* If NOLOOP_ONLY is true, we only allow threading through the
! header of a loop to exit edges. */
! || (noloop_only
! && current_loops
! && bb == bb->loop_father->header
! && !loop_exit_edge_p (bb->loop_father, e2)))
{
all = false;
+ continue;
}
!
! update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
! e->count, e->aux);
!
! /* Insert the outgoing edge into the hash table if it is not
! already in the hash table. */
! lookup_redirection_data (e2, e, INSERT);
}
/* If we are going to thread all incoming edges to an outgoing edge, then
*************** thread_block (basic_block bb)
*** 821,826 ****
--- 613,951 ----
return local_info.jumps_threaded;
}
+ /* Threads edge E through E->dest to the edge E->aux. Returns the copy
+ of E->dest created during threading, or E->dest if it was not necessary
+ to copy it (E is its single predecessor). */
+
+ static basic_block
+ thread_single_edge (edge e)
+ {
+ basic_block bb = e->dest;
+ edge eto = e->aux;
+ struct redirection_data rd;
+ struct local_info local_info;
+
+ e->aux = NULL;
+
+ thread_stats.num_threaded_edges++;
+
+ if (single_pred_p (bb))
+ {
+ /* If BB has just a single predecessor, we should only remove the
+ control statements at its end, and successors except for ETO. */
+ remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
+ return bb;
+ }
+
+ /* Otherwise, we need to create a copy. */
+ update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
+
+ local_info.bb = bb;
+ rd.outgoing_edge = eto;
+
+ create_block_for_threading (bb, &rd);
+ create_edge_and_update_destination_phis (&rd);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
+ e->src->index, e->dest->index, rd.dup_block->index);
+
+ rd.dup_block->count = e->count;
+ rd.dup_block->frequency = EDGE_FREQUENCY (e);
+ single_succ_edge (rd.dup_block)->count = e->count;
+ redirect_edge_and_branch (e, rd.dup_block);
+ flush_pending_stmts (e);
+
+ return rd.dup_block;
+ }
+
+ /* Callback for dfs_enumerate_from. Returns true if BB is different
+ from STOP and DBDS_CE_STOP. */
+
+ static basic_block dbds_ce_stop;
+ static bool
+ dbds_continue_enumeration_p (basic_block bb, void *stop)
+ {
+ return (bb != (basic_block) stop
+ && bb != dbds_ce_stop);
+ }
+
+ /* Evaluates the dominance relationship of latch of the LOOP and BB, and
+ returns the state. */
+
+ enum bb_dom_status
+ {
+ /* BB does not dominate latch of the LOOP. */
+ DOMST_NONDOMINATING,
+ /* The LOOP is broken (there is no path from the header to its latch. */
+ DOMST_LOOP_BROKEN,
+ /* BB dominates the latch of the LOOP. */
+ DOMST_DOMINATING
+ };
+
+ static enum bb_dom_status
+ determine_bb_domination_status (struct loop *loop, basic_block bb)
+ {
+ basic_block *bblocks;
+ unsigned nblocks, i;
+ bool bb_reachable = false;
+ edge_iterator ei;
+ edge e;
+
+ #ifdef ENABLE_CHECKING
+ /* This function assumes BB is a successor of LOOP->header. */
+ {
+ bool ok = false;
+
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ {
+ if (e->src == loop->header)
+ {
+ ok = true;
+ break;
+ }
+ }
+
+ gcc_assert (ok);
+ }
+ #endif
+
+ if (bb == loop->latch)
+ return DOMST_DOMINATING;
+
+ /* Check that BB dominates LOOP->latch, and that it is back-reachable
+ from it. */
+
+ bblocks = XCNEWVEC (basic_block, loop->num_nodes);
+ dbds_ce_stop = loop->header;
+ nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
+ bblocks, loop->num_nodes, bb);
+ for (i = 0; i < nblocks; i++)
+ FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
+ {
+ if (e->src == loop->header)
+ {
+ free (bblocks);
+ return DOMST_NONDOMINATING;
+ }
+ if (e->src == bb)
+ bb_reachable = true;
+ }
+
+ free (bblocks);
+ return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
+ }
+
+ /* Thread jumps through the header of LOOP. Returns true if cfg changes.
+ If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
+ to the inside of the loop. */
+
+ static bool
+ thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
+ {
+ basic_block header = loop->header;
+ edge e, tgt_edge, latch = loop_latch_edge (loop);
+ edge_iterator ei;
+ basic_block tgt_bb, atgt_bb;
+ enum bb_dom_status domst;
+
+ /* We have already threaded through headers to exits, so all the threading
+ requests now are to the inside of the loop. We need to avoid creating
+ irreducible regions (i.e., loops with more than one entry block), and
+ also loop with several latch edges, or new subloops of the loop (although
+ there are cases where it might be appropriate, it is difficult to decide,
+ and doing it wrongly may confuse other optimizers).
+
+ We could handle more general cases here. However, the intention is to
+ preserve some information about the loop, which is impossible if its
+ structure changes significantly, in a way that is not well understood.
+ Thus we only handle few important special cases, in which also updating
+ of the loop-carried information should be feasible:
+
+ 1) Propagation of latch edge to a block that dominates the latch block
+ of a loop. This aims to handle the following idiom:
+
+ first = 1;
+ while (1)
+ {
+ if (first)
+ initialize;
+ first = 0;
+ body;
+ }
+
+ After threading the latch edge, this becomes
+
+ first = 1;
+ if (first)
+ initialize;
+ while (1)
+ {
+ first = 0;
+ body;
+ }
+
+ The original header of the loop is moved out of it, and we may thread
+ the remaining edges through it without further constraints.
+
+ 2) All entry edges are propagated to a single basic block that dominates
+ the latch block of the loop. This aims to handle the following idiom
+ (normally created for "for" loops):
+
+ i = 0;
+ while (1)
+ {
+ if (i >= 100)
+ break;
+ body;
+ i++;
+ }
+
+ This becomes
+
+ i = 0;
+ while (1)
+ {
+ body;
+ i++;
+ if (i >= 100)
+ break;
+ }
+ */
+
+ /* Threading through the header won't improve the code if the header has just
+ one successor. */
+ if (single_succ_p (header))
+ goto fail;
+
+ if (latch->aux)
+ {
+ tgt_edge = latch->aux;
+ tgt_bb = tgt_edge->dest;
+ }
+ else if (!may_peel_loop_headers
+ && !redirection_block_p (loop->header))
+ goto fail;
+ else
+ {
+ tgt_bb = NULL;
+ tgt_edge = NULL;
+ FOR_EACH_EDGE (e, ei, header->preds)
+ {
+ if (!e->aux)
+ {
+ if (e == latch)
+ continue;
+
+ /* If latch is not threaded, and there is a header
+ edge that is not threaded, we would create loop
+ with multiple entries. */
+ goto fail;
+ }
+
+ tgt_edge = e->aux;
+ atgt_bb = tgt_edge->dest;
+ if (!tgt_bb)
+ tgt_bb = atgt_bb;
+ /* Two targets of threading would make us create loop
+ with multiple entries. */
+ else if (tgt_bb != atgt_bb)
+ goto fail;
+ }
+
+ if (!tgt_bb)
+ {
+ /* There are no threading requests. */
+ return false;
+ }
+
+ /* Redirecting to empty loop latch is useless. */
+ if (tgt_bb == loop->latch
+ && empty_block_p (loop->latch))
+ goto fail;
+ }
+
+ /* The target block must dominate the loop latch, otherwise we would be
+ creating a subloop. */
+ domst = determine_bb_domination_status (loop, tgt_bb);
+ if (domst == DOMST_NONDOMINATING)
+ goto fail;
+ if (domst == DOMST_LOOP_BROKEN)
+ {
+ /* If the loop ceased to exist, mark it as such, and thread through its
+ original header. */
+ loop->header = NULL;
+ loop->latch = NULL;
+ return thread_block (header, false);
+ }
+
+ if (tgt_bb->loop_father->header == tgt_bb)
+ {
+ /* If the target of the threading is a header of a subloop, we need
+ to create a preheader for it, so that the headers of the two loops
+ do not merge. */
+ if (EDGE_COUNT (tgt_bb->preds) > 2)
+ {
+ tgt_bb = create_preheader (tgt_bb->loop_father, 0);
+ gcc_assert (tgt_bb != NULL);
+ }
+ else
+ tgt_bb = split_edge (tgt_edge);
+ }
+
+ if (latch->aux)
+ {
+ /* First handle the case latch edge is redirected. */
+ loop->latch = thread_single_edge (latch);
+ gcc_assert (single_succ (loop->latch) == tgt_bb);
+ loop->header = tgt_bb;
+
+ /* Thread the remaining edges through the former header. */
+ thread_block (header, false);
+ }
+ else
+ {
+ basic_block new_preheader;
+
+ /* Now consider the case entry edges are redirected to the new entry
+ block. Remember one entry edge, so that we can find the new
+ preheader (its destination after threading). */
+ FOR_EACH_EDGE (e, ei, header->preds)
+ {
+ if (e->aux)
+ break;
+ }
+
+ /* The duplicate of the header is the new preheader of the loop. Ensure
+ that it is placed correctly in the loop hierarchy. */
+ loop->copy = loop->outer;
+
+ thread_block (header, false);
+ loop->copy = NULL;
+ new_preheader = e->dest;
+
+ /* Create the new latch block. This is always necessary, as the latch
+ must have only a single successor, but the original header had at
+ least two successors. */
+ loop->latch = NULL;
+ mfb_kj_edge = single_succ_edge (new_preheader);
+ loop->header = mfb_kj_edge->dest;
+ latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
+ loop->header = latch->dest;
+ loop->latch = latch->src;
+ }
+
+ return true;
+
+ fail:
+ /* We failed to thread anything. Cancel the requests. */
+ FOR_EACH_EDGE (e, ei, header->preds)
+ {
+ e->aux = NULL;
+ }
+ return false;
+ }
+
/* Walk through the registered jump threads and convert them into a
form convenient for this pass.
*************** static void
*** 838,843 ****
--- 963,973 ----
mark_threaded_blocks (bitmap threaded_blocks)
{
unsigned int i;
+ bitmap_iterator bi;
+ bitmap tmp = BITMAP_ALLOC (NULL);
+ basic_block bb;
+ edge e;
+ edge_iterator ei;
for (i = 0; i < VEC_length (edge, threaded_edges); i += 2)
{
*************** mark_threaded_blocks (bitmap threaded_bl
*** 845,852 ****
edge e2 = VEC_index (edge, threaded_edges, i + 1);
e->aux = e2;
! bitmap_set_bit (threaded_blocks, e->dest->index);
}
}
--- 975,1004 ----
edge e2 = VEC_index (edge, threaded_edges, i + 1);
e->aux = e2;
! bitmap_set_bit (tmp, e->dest->index);
}
+
+ /* If optimizing for size, only thread through block if we don't have
+ to duplicate it or it's an otherwise empty redirection block. */
+ if (optimize_size)
+ {
+ EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
+ {
+ bb = BASIC_BLOCK (i);
+ if (EDGE_COUNT (bb->preds) > 1
+ && !redirection_block_p (bb))
+ {
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ e->aux = NULL;
+ }
+ else
+ bitmap_set_bit (threaded_blocks, i);
+ }
+ }
+ else
+ bitmap_copy (threaded_blocks, tmp);
+
+ BITMAP_FREE(tmp);
}
*************** mark_threaded_blocks (bitmap threaded_bl
*** 856,870 ****
It is the caller's responsibility to fix the dominance information
and rewrite duplicated SSA_NAMEs back into SSA form.
Returns true if one or more edges were threaded, false otherwise. */
bool
! thread_through_all_blocks (void)
{
bool retval = false;
unsigned int i;
bitmap_iterator bi;
bitmap threaded_blocks;
if (threaded_edges == NULL)
return false;
--- 1008,1027 ----
It is the caller's responsibility to fix the dominance information
and rewrite duplicated SSA_NAMEs back into SSA form.
+ If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
+ loop headers if it does not simplify the loop.
+
Returns true if one or more edges were threaded, false otherwise. */
bool
! thread_through_all_blocks (bool may_peel_loop_headers)
{
bool retval = false;
unsigned int i;
bitmap_iterator bi;
bitmap threaded_blocks;
+ struct loop *loop;
+ loop_iterator li;
if (threaded_edges == NULL)
return false;
*************** thread_through_all_blocks (void)
*** 874,887 ****
mark_threaded_blocks (threaded_blocks);
EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
{
basic_block bb = BASIC_BLOCK (i);
if (EDGE_COUNT (bb->preds) > 0)
! retval |= thread_block (bb);
}
if (dump_file && (dump_flags & TDF_STATS))
fprintf (dump_file, "\nJumps threaded: %lu\n",
thread_stats.num_threaded_edges);
--- 1031,1068 ----
mark_threaded_blocks (threaded_blocks);
+ if (current_loops)
+ FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
+ loop->copy = NULL;
+
+ /* First perform the threading requests that do not affect
+ loop structure. */
EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
{
basic_block bb = BASIC_BLOCK (i);
if (EDGE_COUNT (bb->preds) > 0)
! retval |= thread_block (bb, true);
}
+ /* Then perform the threading through loop headers. We start with the
+ innermost loop, so that the changes in cfg we perform won't affect
+ further threading. */
+ if (current_loops)
+ {
+ FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
+ {
+ if (!loop->header
+ || !bitmap_bit_p (threaded_blocks, loop->header->index))
+ continue;
+
+ retval |= thread_through_loop_header (loop, may_peel_loop_headers);
+ }
+ }
+
+ if (retval)
+ free_dominance_info (CDI_DOMINATORS);
+
if (dump_file && (dump_flags & TDF_STATS))
fprintf (dump_file, "\nJumps threaded: %lu\n",
thread_stats.num_threaded_edges);
*************** thread_through_all_blocks (void)
*** 890,895 ****
--- 1071,1077 ----
threaded_blocks = NULL;
VEC_free (edge, heap, threaded_edges);
threaded_edges = NULL;
+
return retval;
}
Index: cfgloopmanip.c
===================================================================
*** cfgloopmanip.c (revision 123968)
--- cfgloopmanip.c (working copy)
*************** static int find_path (edge, basic_block
*** 41,47 ****
static void fix_loop_placements (struct loop *, bool *);
static bool fix_bb_placement (basic_block);
static void fix_bb_placements (basic_block, bool *);
- static basic_block create_preheader (struct loop *, int);
static void unloop (struct loop *, bool *);
#define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
--- 41,46 ----
*************** duplicate_loop_to_header_edge (struct lo
*** 1085,1092 ****
MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
whether to redirect it. */
! static edge mfb_kj_edge;
! static bool
mfb_keep_just (edge e)
{
return e != mfb_kj_edge;
--- 1084,1091 ----
MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
whether to redirect it. */
! edge mfb_kj_edge;
! bool
mfb_keep_just (edge e)
{
return e != mfb_kj_edge;
*************** mfb_keep_just (edge e)
*** 1097,1103 ****
entry; otherwise we also force preheader block to have only one successor.
The function also updates dominators. */
! static basic_block
create_preheader (struct loop *loop, int flags)
{
edge e, fallthru;
--- 1096,1102 ----
entry; otherwise we also force preheader block to have only one successor.
The function also updates dominators. */
! basic_block
create_preheader (struct loop *loop, int flags)
{
edge e, fallthru;
Index: tree-pass.h
===================================================================
*** tree-pass.h (revision 123968)
--- tree-pass.h (working copy)
*************** struct dump_file_info
*** 215,220 ****
--- 215,223 ----
for the passes that are handed to register_dump_files. */
#define TODO_set_props (1 << 15)
+ /* Internally used for the first instance of a pass. */
+ #define TODO_mark_first_instance (1 << 16)
+
#define TODO_update_ssa_any \
(TODO_update_ssa \
| TODO_update_ssa_no_phi \
*************** extern struct tree_opt_pass *all_passes,
*** 415,418 ****
--- 418,425 ----
extern void execute_pass_list (struct tree_opt_pass *);
extern void execute_ipa_pass_list (struct tree_opt_pass *);
+ /* Set to true if the pass is called the first time during compilation of the
+ current function. */
+ extern bool first_pass_instance;
+
#endif /* GCC_TREE_PASS_H */
Index: cfghooks.c
===================================================================
*** cfghooks.c (revision 123968)
--- cfghooks.c (working copy)
*************** duplicate_block (basic_block bb, edge e,
*** 923,931 ****
set_bb_original (new_bb, bb);
set_bb_copy (bb, new_bb);
! /* Add the new block to the prescribed loop. */
if (current_loops != NULL)
! add_bb_to_loop (new_bb, bb->loop_father->copy);
return new_bb;
}
--- 923,937 ----
set_bb_original (new_bb, bb);
set_bb_copy (bb, new_bb);
! /* Add the new block to the copy of the loop of BB, or directly to the loop
! of BB if the loop is not being copied. */
if (current_loops != NULL)
! {
! struct loop *cloop = bb->loop_father;
! if (cloop->copy)
! cloop = cloop->copy;
! add_bb_to_loop (new_bb, cloop);
! }
return new_bb;
}
Index: testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2.c
===================================================================
*** testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2.c (revision 0)
--- testsuite/gcc.dg/tree-ssa/ssa-dom-thread-2.c (revision 0)
***************
*** 0 ****
--- 1,119 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-tree-vrp1-stats -fdump-tree-dom1-stats" } */
+
+ void foo();
+ void bla();
+ void bar();
+
+ /* In the following two cases, we should be able to thread edge through
+ the loop header. */
+
+ void thread_entry_through_header (void)
+ {
+ int i;
+
+ for (i = 0; i < 170; i++)
+ bla ();
+ }
+
+ void thread_latch_through_header (void)
+ {
+ int i = 0;
+ int first = 1;
+
+ do
+ {
+ if (first)
+ foo ();
+
+ first = 0;
+ bla ();
+ } while (i++ < 100);
+ }
+
+ /* This is a TODO -- it is correct to thread both entry and latch edge through
+ the header, but we do not handle this case yet. */
+
+ void dont_thread_1 (void)
+ {
+ int i = 0;
+ int first = 1;
+
+ do
+ {
+ if (first)
+ foo ();
+ else
+ bar ();
+
+ first = 0;
+ bla ();
+ } while (i++ < 100);
+ }
+
+ /* Avoid threading in the following two cases, to prevent creating subloops. */
+
+ void dont_thread_2 (int first)
+ {
+ int i = 0;
+
+ do
+ {
+ if (first)
+ foo ();
+ else
+ bar ();
+
+ first = 0;
+ bla ();
+ } while (i++ < 100);
+ }
+
+ void dont_thread_3 (int nfirst)
+ {
+ int i = 0;
+ int first = 0;
+
+ do
+ {
+ if (first)
+ foo ();
+ else
+ bar ();
+
+ first = nfirst;
+ bla ();
+ } while (i++ < 100);
+ }
+
+ /* Avoid threading in this case, in order to avoid creating loop with
+ multiple entries. */
+
+ void dont_thread_4 (int a, int nfirst)
+ {
+ int i = 0;
+ int first;
+
+ if (a)
+ first = 0;
+ else
+ first = 1;
+
+ do
+ {
+ if (first)
+ foo ();
+ else
+ bar ();
+
+ first = nfirst;
+ bla ();
+ } while (i++ < 100);
+ }
+
+ /* { dg-final { scan-tree-dump-times "Jumps threaded: 1" 1 "vrp1"} } */
+ /* { dg-final { scan-tree-dump-times "Jumps threaded: 2" 0 "vrp1"} } */
+ /* { dg-final { scan-tree-dump-times "Jumps threaded: 1" 0 "dom1"} } */
+ /* { dg-final { scan-tree-dump-times "Jumps threaded: 2" 1 "dom1"} } */
+ /* { dg-final { cleanup-tree-dump "dom1" } } */
+ /* { dg-final { cleanup-tree-dump "vrp1" } } */
Index: testsuite/gcc.dg/vect/vect-102.c
===================================================================
*** testsuite/gcc.dg/vect/vect-102.c (revision 123968)
--- testsuite/gcc.dg/vect/vect-102.c (working copy)
*************** int main1 (int x, int y) {
*** 23,29 ****
for (i = 0; i < N; i++)
{
p->a[i] = a[i];
! if (x == 135)
abort (); /* to avoid vectorization */
}
--- 23,29 ----
for (i = 0; i < N; i++)
{
p->a[i] = a[i];
! if (x == 135 || x == 137)
abort (); /* to avoid vectorization */
}
Index: testsuite/gcc.dg/vect/vect-103.c
===================================================================
*** testsuite/gcc.dg/vect/vect-103.c (revision 123968)
--- testsuite/gcc.dg/vect/vect-103.c (working copy)
*************** int main1 (int x, int y) {
*** 25,31 ****
{
p->a[i] = a[i];
p->b[i] = b[i];
! if (x == 135)
abort (); /* to avoid vectorization */
}
--- 25,31 ----
{
p->a[i] = a[i];
p->b[i] = b[i];
! if (x == 135 || x == 137)
abort (); /* to avoid vectorization */
}
Index: tree-ssa-dom.c
===================================================================
*** tree-ssa-dom.c (revision 123968)
--- tree-ssa-dom.c (working copy)
*************** tree_ssa_dominator_optimize (void)
*** 276,300 ****
calculate_dominance_info (CDI_DOMINATORS);
! /* We need to know which edges exit loops so that we can
! aggressively thread through loop headers to an exit
! edge. */
! loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
! if (current_loops)
! {
! mark_loop_exit_edges ();
! loop_optimizer_finalize ();
! }
!
! /* Clean up the CFG so that any forwarder blocks created by loop
! canonicalization are removed. */
! cleanup_tree_cfg ();
! calculate_dominance_info (CDI_DOMINATORS);
/* We need accurate information regarding back edges in the CFG
! for jump threading. */
mark_dfs_back_edges ();
!
/* Recursively walk the dominator tree optimizing statements. */
walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
--- 276,292 ----
calculate_dominance_info (CDI_DOMINATORS);
! /* We need to know loop structures in order to avoid destroying them
! in jump threading. Note that we still can e.g. thread through loop
! headers to an exit edge, or through loop header to the loop body, assuming
! that we update the loop info. */
! loop_optimizer_init (LOOPS_HAVE_SIMPLE_LATCHES);
/* We need accurate information regarding back edges in the CFG
! for jump threading; this may include back edes that are not part of
! a simple loop. */
mark_dfs_back_edges ();
!
/* Recursively walk the dominator tree optimizing statements. */
walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
*************** tree_ssa_dominator_optimize (void)
*** 318,324 ****
free_all_edge_infos ();
/* Thread jumps, creating duplicate blocks as needed. */
! cfg_altered |= thread_through_all_blocks ();
/* Removal of statements may make some EH edges dead. Purge
such edges from the CFG as needed. */
--- 310,316 ----
free_all_edge_infos ();
/* Thread jumps, creating duplicate blocks as needed. */
! cfg_altered |= thread_through_all_blocks (first_pass_instance);
/* Removal of statements may make some EH edges dead. Purge
such edges from the CFG as needed. */
*************** tree_ssa_dominator_optimize (void)
*** 328,336 ****
bitmap_zero (need_eh_cleanup);
}
- if (cfg_altered)
- free_dominance_info (CDI_DOMINATORS);
-
/* Finally, remove everything except invariants in SSA_NAME_VALUE.
Long term we will be able to let everything in SSA_NAME_VALUE
--- 320,325 ----
*************** tree_ssa_dominator_optimize (void)
*** 352,357 ****
--- 341,348 ----
if (dump_file && (dump_flags & TDF_STATS))
dump_dominator_optimization_stats (dump_file);
+ loop_optimizer_finalize ();
+
/* Delete our main hashtable. */
htab_delete (avail_exprs);
Index: cfgloop.h
===================================================================
*** cfgloop.h (revision 123968)
--- cfgloop.h (working copy)
*************** enum
*** 256,261 ****
--- 256,262 ----
CP_SIMPLE_PREHEADERS = 1
};
+ basic_block create_preheader (struct loop *, int);
extern void create_preheaders (int);
extern void force_single_succ_latches (void);
Index: tree-flow.h
===================================================================
*** tree-flow.h (revision 123968)
--- tree-flow.h (working copy)
*************** bool multiplier_allowed_in_address_p (HO
*** 1110,1116 ****
unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode);
/* In tree-ssa-threadupdate.c. */
! extern bool thread_through_all_blocks (void);
extern void register_jump_thread (edge, edge);
/* In gimplify.c */
--- 1110,1116 ----
unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode);
/* In tree-ssa-threadupdate.c. */
! extern bool thread_through_all_blocks (bool);
extern void register_jump_thread (edge, edge);
/* In gimplify.c */
Index: basic-block.h
===================================================================
*** basic-block.h (revision 123968)
--- basic-block.h (working copy)
*************** static inline bool bb_has_eh_pred (basic
*** 1171,1174 ****
--- 1171,1178 ----
return false;
}
+ /* In cfgloopmanip.c. */
+ extern edge mfb_kj_edge;
+ bool mfb_keep_just (edge);
+
#endif /* GCC_BASIC_BLOCK_H */
Index: passes.c
===================================================================
*** passes.c (revision 123968)
--- passes.c (working copy)
*************** Software Foundation, 51 Franklin Street,
*** 105,110 ****
--- 105,111 ----
/* Global variables used to communicate with passes. */
int dump_flags;
bool in_gimple_form;
+ bool first_pass_instance;
/* This is called from various places for FUNCTION_DECL, VAR_DECL,
*************** next_pass_1 (struct tree_opt_pass **list
*** 392,397 ****
--- 393,400 ----
memcpy (new, pass, sizeof (*new));
new->next = NULL;
+ new->todo_flags_start &= ~TODO_mark_first_instance;
+
/* Indicate to register_dump_files that this pass has duplicates,
and so it should rename the dump file. The first instance will
be -1, and be number of duplicates = -static_pass_number - 1.
*************** next_pass_1 (struct tree_opt_pass **list
*** 406,411 ****
--- 409,415 ----
}
else
{
+ pass->todo_flags_start |= TODO_mark_first_instance;
pass->static_pass_number = -1;
*list = pass;
}
*************** execute_todo (unsigned int flags)
*** 933,938 ****
--- 937,945 ----
gcc_assert (flags & TODO_update_ssa_any);
#endif
+ /* Inform the pass whether it is the first time it is run. */
+ first_pass_instance = (flags & TODO_mark_first_instance) != 0;
+
do_per_function (execute_function_todo, (void *)(size_t) flags);
/* Always remove functions just as before inlining: IPA passes might be