* basic-block.h (rediscover_loops_after_threading): Declare. * tree-ssa-dom.c: Include cfgloop.h. (tree_ssa_dominator_optimize): Discover loops and some basic properties. Remove forwarder blocks recreated by loop header canonicalization. Also mark backedges in the CFG. * tree-ssa-threadupdate.c: Include cfgloop.h (rediscover_loops_after_threading): Define. (struct local_info): New field, JUMP_THREADED. (prune_undesirable_thread_requests): New function. (redirect_edges): Clear EDGE_ABNORMAL. If edges were threaded then record that fact for the callers of redirct_edges. (thread_block): If BB has incoming backedges, then call prune_undesirable_thraed_requests. Note when we are going to have to rediscover loop information. Return a boolean indicating if any jumps were threaded. (thread_through_all_blocks): Bubble up boolean indicating if any jumps were threaded. * Makefile.in (tree-ssa-dom.o): Depend on cfgloop.h (tree-ssa-threadupdate.o): Similarly. Index: basic-block.h =================================================================== RCS file: /cvs/gcc/gcc/gcc/basic-block.h,v retrieving revision 1.238 diff -c -p -r1.238 basic-block.h *** basic-block.h 15 Feb 2005 07:18:22 -0000 1.238 --- basic-block.h 4 Mar 2005 20:54:59 -0000 *************** extern int last_basic_block; *** 352,357 **** --- 352,361 ---- extern int n_edges; + /* TRUE if we should re-run loop discovery after threading jumps, FALSE + otherwise. */ + extern bool rediscover_loops_after_threading; + /* Signalize the status of profile information in the CFG. */ extern enum profile_status { Index: tree-ssa-dom.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-ssa-dom.c,v retrieving revision 2.97 diff -c -p -r2.97 tree-ssa-dom.c *** tree-ssa-dom.c 2 Mar 2005 19:12:55 -0000 2.97 --- tree-ssa-dom.c 4 Mar 2005 20:55:00 -0000 *************** Boston, MA 02111-1307, USA. */ *** 29,34 **** --- 29,35 ---- #include "tm_p.h" #include "ggc.h" #include "basic-block.h" + #include "cfgloop.h" #include "output.h" #include "errors.h" #include "expr.h" *************** tree_ssa_dominator_optimize (void) *** 368,373 **** --- 369,375 ---- { struct dom_walk_data walk_data; unsigned int i; + struct loops loops_info; memset (&opt_stats, 0, sizeof (opt_stats)); *************** tree_ssa_dominator_optimize (void) *** 407,412 **** --- 409,425 ---- calculate_dominance_info (CDI_DOMINATORS); + /* We need to know which edges exit loops so that we can + aggressively thread through loop headers to an exit + edge. */ + flow_loops_find (&loops_info); + mark_loop_exit_edges (&loops_info); + flow_loops_free (&loops_info); + + /* Clean up the CFG so that any forwarder blocks created by loop + canonicalization are removed. */ + cleanup_tree_cfg (); + /* If we prove certain blocks are unreachable, then we want to repeat the dominator optimization process as PHI nodes may have turned into copies which allows better propagation of *************** tree_ssa_dominator_optimize (void) *** 417,422 **** --- 430,439 ---- /* Optimize the dominator tree. */ cfg_altered = false; + /* We need accurate information regarding back edges in the CFG + for jump threading. */ + mark_dfs_back_edges (); + /* Recursively walk the dominator tree optimizing statements. */ walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR); *************** tree_ssa_dominator_optimize (void) *** 445,452 **** } if (cfg_altered) ! free_dominance_info (CDI_DOMINATORS); cfg_altered |= cleanup_tree_cfg (); calculate_dominance_info (CDI_DOMINATORS); rewrite_ssa_into_ssa (); --- 462,485 ---- } if (cfg_altered) ! free_dominance_info (CDI_DOMINATORS); ! cfg_altered |= cleanup_tree_cfg (); + + if (rediscover_loops_after_threading) + { + /* Rerun basic loop analysis to discover any newly + created loops and update the set of exit edges. */ + rediscover_loops_after_threading = false; + flow_loops_find (&loops_info); + mark_loop_exit_edges (&loops_info); + flow_loops_free (&loops_info); + + /* Remove any forwarder blocks inserted by loop + header canonicalization. */ + cleanup_tree_cfg (); + } + calculate_dominance_info (CDI_DOMINATORS); rewrite_ssa_into_ssa (); Index: tree-ssa-threadupdate.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-ssa-threadupdate.c,v retrieving revision 2.20 diff -c -p -r2.20 tree-ssa-threadupdate.c *** tree-ssa-threadupdate.c 27 Jan 2005 18:22:22 -0000 2.20 --- tree-ssa-threadupdate.c 4 Mar 2005 20:55:00 -0000 *************** Boston, MA 02111-1307, USA. */ *** 36,41 **** --- 36,42 ---- #include "tree-flow.h" #include "tree-dump.h" #include "tree-pass.h" + #include "cfgloop.h" /* Given a block B, update the CFG and SSA graph to reflect redirecting one or more in-edges to B to instead reach the destination of an *************** struct redirection_data *** 131,136 **** --- 132,139 ---- /* Main data structure to hold information for duplicates of BB. */ static htab_t redirection_data; + bool rediscover_loops_after_threading; + /* Data structure of information to pass to hash table traversal routines. */ struct local_info { *************** struct local_info *** 140,145 **** --- 143,151 ---- /* A template copy of BB with no outgoing edges or control statement that we use for creating copies. */ basic_block template_block; + + /* TRUE if we thread one or more jumps, FALSE otherwise. */ + bool jumps_threaded; }; /* Remove the last statement in block BB if it is a control statement *************** fixup_template_block (void **slot, void *** 361,366 **** --- 367,565 ---- return 1; } + /* Not all jump threading requests are useful. In particular some + jump threading requests can create irreducible regions which are + undesirable. + + This routine will examine the BB's incoming edges for jump threading + requests which, if acted upon, would create irreducible regions. Any + such jump threading requests found will be pruned away. */ + + static void + prune_undesirable_thread_requests (basic_block bb) + { + edge e; + edge_iterator ei; + bool may_create_irreducible_region = false; + unsigned int num_outgoing_edges_into_loop = 0; + + /* For the heuristics below, we need to know if BB has more than + one outgoing edge into a loop. */ + FOR_EACH_EDGE (e, ei, bb->succs) + num_outgoing_edges_into_loop += ((e->flags & EDGE_LOOP_EXIT) == 0); + + if (num_outgoing_edges_into_loop > 1) + { + edge backedge = NULL; + + /* Consider the effect of threading the edge (0, 1) to 2 on the left + CFG to produce the right CFG: + + + 0 0 + | | + 1<--+ 2<--------+ + / \ | | | + 2 3 | 4<----+ | + \ / | / \ | | + 4---+ E 1-- | --+ + | | | + E 3---+ + + + Threading the (0, 1) edge to 2 effectively creates two loops + (2, 4, 1) and (4, 1, 3) which are neither disjoint nor nested. + This is not good. + + However, we do need to be able to thread (0, 1) to 2 or 3 + in the left CFG below (which creates the middle and right + CFGs with nested loops). + + 0 0 0 + | | | + 1<--+ 2<----+ 3<-+<-+ + /| | | | | | | + 2 | | 3<-+ | 1--+ | + \| | | | | | | + 3---+ 1--+--+ 2-----+ + + + A safe heuristic appears to be to only allow threading if BB + has a single incoming backedge from one of its direct successors. */ + + FOR_EACH_EDGE (e, ei, bb->preds) + { + if (e->flags & EDGE_DFS_BACK) + { + if (backedge) + { + backedge = NULL; + break; + } + else + { + backedge = e; + } + } + } + + if (backedge && find_edge (bb, backedge->src)) + ; + else + may_create_irreducible_region = true; + } + else + { + edge dest = NULL; + + /* If we thread across the loop entry block (BB) into the + loop and BB is still reached from outside the loop, then + we would create an irreducible CFG. Consider the effect + of threading the edge (1, 4) to 5 on the left CFG to produce + the right CFG + + 0 0 + / \ / \ + 1 2 1 2 + \ / | | + 4<----+ 5<->4 + / \ | | + E 5---+ E + + + Threading the (1, 4) edge to 5 creates two entry points + into the loop (4, 5) (one from block 1, the other from + block 2). A classic irreducible region. + + So look at all of BB's incoming edges which are not + backedges and which are not threaded to the loop exit. + If that subset of incoming edges do not all thread + to the same block, then threading any of them will create + an irreducible region. */ + + FOR_EACH_EDGE (e, ei, bb->preds) + { + edge e2; + + /* We ignore back edges for now. This may need refinement + as threading a backedge creates an inner loop which + we would need to verify has a single entry point. + + If all backedges thread to new locations, then this + block will no longer have incoming backedges and we + need not worry about creating irreducible regions + by threading through BB. I don't think this happens + enough in practice to worry about it. */ + if (e->flags & EDGE_DFS_BACK) + continue; + + /* If the incoming edge threads to the loop exit, then it + is clearly safe. */ + e2 = e->aux; + if (e2 && (e2->flags & EDGE_LOOP_EXIT)) + continue; + + /* E enters the loop header and is not threaded. We can + not allow any other incoming edges to thread into + the loop as that would create an irreducible region. */ + if (!e2) + { + may_create_irreducible_region = true; + break; + } + + /* We know that this incoming edge threads to a block inside + the loop. This edge must thread to the same target in + the loop as any previously seen threaded edges. Otherwise + we will create an irreducible region. */ + if (!dest) + dest = e2; + else if (e2 != dest) + { + may_create_irreducible_region = true; + break; + } + } + } + + /* If we might create an irreducible region, then cancel any of + the jump threading requests for incoming edges which are + not backedges and which do not thread to the exit block. */ + if (may_create_irreducible_region) + { + FOR_EACH_EDGE (e, ei, bb->preds) + { + edge e2; + + /* Ignore back edges. */ + if (e->flags & EDGE_DFS_BACK) + continue; + + e2 = e->aux; + + /* If this incoming edge was not threaded, then there is + nothing to do. */ + if (!e2) + continue; + + /* If this incoming edge threaded to the loop exit, + then it can be ignored as it is safe. */ + if (e2->flags & EDGE_LOOP_EXIT) + continue; + + if (e2) + { + /* This edge threaded into the loop and the jump thread + request must be cancelled. */ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Not threading jump %d --> %d to %d\n", + e->src->index, e->dest->index, e2->dest->index); + e->aux = NULL; + } + } + } + } + /* Hash table traversal callback to redirect each incoming edge associated with this hash table element to its new destination. */ *************** redirect_edges (void **slot, void *data) *** 417,426 **** /* And fixup the flags on the single remaining edge. */ EDGE_SUCC (local_info->bb, 0)->flags ! &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); EDGE_SUCC (local_info->bb, 0)->flags |= EDGE_FALLTHRU; } } return 1; } --- 616,630 ---- /* And fixup the flags on the single remaining edge. */ EDGE_SUCC (local_info->bb, 0)->flags ! &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL); EDGE_SUCC (local_info->bb, 0)->flags |= EDGE_FALLTHRU; } } + + /* Indicate that we actually threaded one or more jumps. */ + if (rd->incoming_edges) + local_info->jumps_threaded = true; + return 1; } *************** redirect_edges (void **slot, void *data) *** 453,459 **** per block with incoming threaded edges, which can lower the cost of the true incremental update algorithm. */ ! static void thread_block (basic_block bb) { /* E is an incoming edge into BB that we may or may not want to --- 657,663 ---- per block with incoming threaded edges, which can lower the cost of the true incremental update algorithm. */ ! static bool thread_block (basic_block bb) { /* E is an incoming edge into BB that we may or may not want to *************** thread_block (basic_block bb) *** 462,467 **** --- 666,676 ---- edge_iterator ei; struct local_info local_info; + /* FOUND_BACKEDGE indicates that we found an incoming backedge + into BB, in which case we may ignore certain jump threads + to avoid creating irreducible regions. */ + bool found_backedge = false; + /* ALL indicates whether or not all incoming edges into BB should be threaded to a duplicate of BB. */ bool all = true; *************** thread_block (basic_block bb) *** 475,480 **** --- 684,700 ---- redirection_data_eq, free); + FOR_EACH_EDGE (e, ei, bb->preds) + found_backedge |= ((e->flags & EDGE_DFS_BACK) != 0); + + /* If BB has incoming backedges, then threading across BB might + introduce an irreducible region, which would be undesirable + as that inhibits various optimizations later. Prune away + any jump threading requests which we know will result in + an irreducible region. */ + if (found_backedge) + prune_undesirable_thread_requests (bb); + /* Record each unique threaded destination into a hash table for efficient lookups. */ FOR_EACH_EDGE (e, ei, bb->preds) *************** thread_block (basic_block bb) *** 487,492 **** --- 707,736 ---- { edge e2 = e->aux; + /* If we thread to a loop exit edge, then we will need to + rediscover the loop exit edges. While it may seem that + the new edge is a loop exit edge, that is not the case. + Consider threading the edge (5,6) to E in the CFG on the + left which creates the CFG on the right: + + + 0<--+ 0<---+ + / \ | / \ | + 1 2 | 1 2 | + / \ | | / \ | | + 3 4 | | 3 4 6--+ + \ / | | \ / + 5 | | 5 + \ / | | + 6---+ E + | + E + + After threading, the edge (0, 1) is the loop exit edge and + the nodes 0, 2, 6 are the only nodes in the loop. */ + if (e2->flags & EDGE_LOOP_EXIT) + rediscover_loops_after_threading = true; + /* Insert the outgoing edge into the hash table if it is not already in the hash table. */ lookup_redirection_data (e2, e, true); *************** thread_block (basic_block bb) *** 514,519 **** --- 758,764 ---- the rest of the duplicates. */ local_info.template_block = NULL; local_info.bb = bb; + local_info.jumps_threaded = false; htab_traverse (redirection_data, create_duplicates, &local_info); /* The template does not have an outgoing edge. Create that outgoing *************** thread_block (basic_block bb) *** 532,537 **** --- 777,785 ---- /* Done with this block. Clear REDIRECTION_DATA. */ htab_delete (redirection_data); redirection_data = NULL; + + /* Indicate to our caller whether or not any jumps were threaded. */ + return local_info.jumps_threaded; } /* Walk through all blocks and thread incoming edges to the block's *************** thread_through_all_blocks (void) *** 558,571 **** basic_block bb; bool retval = false; FOR_EACH_BB (bb) { if (bb_ann (bb)->incoming_edge_threaded) { ! thread_block (bb); ! retval = true; bb_ann (bb)->incoming_edge_threaded = false; } } return retval; } --- 806,822 ---- basic_block bb; bool retval = false; + rediscover_loops_after_threading = false; + FOR_EACH_BB (bb) { if (bb_ann (bb)->incoming_edge_threaded) { ! retval |= thread_block (bb); bb_ann (bb)->incoming_edge_threaded = false; + } } + return retval; } Index: Makefile.in =================================================================== RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v retrieving revision 1.1450 diff -c -p -r1.1450 Makefile.in *** Makefile.in 1 Mar 2005 17:58:59 -0000 1.1450 --- Makefile.in 4 Mar 2005 21:03:24 -0000 *************** tree-ssa-dom.o : tree-ssa-dom.c $(TREE_F *** 1638,1648 **** $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h diagnostic.h \ errors.h function.h $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ $(BASIC_BLOCK_H) domwalk.h real.h tree-pass.h $(FLAGS_H) langhooks.h \ ! tree-ssa-propagate.h tree-ssa-threadupdate.o : tree-ssa-threadupdate.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h \ diagnostic.h errors.h function.h $(TM_H) coretypes.h $(TREE_DUMP_H) \ ! $(BASIC_BLOCK_H) $(FLAGS_H) tree-pass.h tree-ssanames.o : tree-ssanames.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(TREE_H) varray.h $(GGC_H) gt-tree-ssanames.h $(TREE_FLOW_H) tree-phinodes.o : tree-phinodes.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ --- 1638,1648 ---- $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h diagnostic.h \ errors.h function.h $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ $(BASIC_BLOCK_H) domwalk.h real.h tree-pass.h $(FLAGS_H) langhooks.h \ ! tree-ssa-propagate.h cfgloop.h tree-ssa-threadupdate.o : tree-ssa-threadupdate.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) output.h \ diagnostic.h errors.h function.h $(TM_H) coretypes.h $(TREE_DUMP_H) \ ! $(BASIC_BLOCK_H) $(FLAGS_H) tree-pass.h cfgloop.h tree-ssanames.o : tree-ssanames.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(TREE_H) varray.h $(GGC_H) gt-tree-ssanames.h $(TREE_FLOW_H) tree-phinodes.o : tree-phinodes.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \