[CFG] Loop unswitching
Zdenek Dvorak
rakdver@atrey.karlin.mff.cuni.cz
Fri Feb 22 13:35:00 GMT 2002
Hello.
This patch implements loop unswitching and brings in most of the machinery
also used in loop peeling/unrolling. Bootstrapped on i386.
Zdenek Dvorak
Changelog:
* loop-new.c (redirect_edge_with_update, loop_delete_branch_edge,
split_loop_bb): New, cfg manipulation.
(duplicate_loop, duplicate_subloops, copy_loops_to,
alp_enum_p, add_loop, place_new_loop): New, loop
structure duplication.
(copy_bbs, remove_exit_edges, loopify, can_duplicate_loop_p,
duplicate_loop_to_header_edge): New, loop body copying.
(rpe_enum_p, remove_path, unswitch_loop, unswitch_single_loop,
may_unswitch_on_p, unswitch_loops): New, loop unswitching.
* toplev.c (flag_unswitch_loops): New.
(rest_of_compilation): Add loop unswitching.
Index: loop-new.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/loop-new.c,v
retrieving revision 1.1.2.4
diff -c -3 -p -r1.1.2.4 loop-new.c
*** loop-new.c 2002/02/21 16:16:54 1.1.2.4
--- loop-new.c 2002/02/22 19:24:10
*************** Software Foundation, 59 Temple Place - S
*** 41,46 ****
--- 41,57 ----
#include "cfglayout.h"
#include "loop.h"
+ /* This controls how big loops are optimized, and by how much we allow
+ them to grow. */
+
+ #ifndef MAX_UNSWITCH_INSNS
+ #define MAX_UNSWITCH_INSNS 200
+ #endif
+
+ #ifndef MAX_NUM_UNSWITCH
+ #define MAX_NUM_UNSWITCH 8
+ #endif
+
/* Stupid definitions of dominator manipulation. */
basic_block
*************** iterate_fix_dominators (doms, bbs, n, lo
*** 230,235 ****
--- 241,270 ----
sbitmap_free (affected);
}
+ static struct loop *duplicate_loop PARAMS ((struct loops *, struct loop *, struct loop *));
+ static void duplicate_subloops PARAMS ((struct loops *, struct loop *, struct loop *));
+ static void copy_loops_to PARAMS ((struct loops *, struct loop **, int, struct loop *));
+ static int duplicate_loop_to_header_edge PARAMS ((struct loop *, edge, struct loops *, int, int, int, int));
+ static void redirect_edge_with_update PARAMS ((edge, basic_block));
+ static void loop_delete_branch_edge PARAMS ((edge));
+ static void copy_bbs PARAMS ((basic_block *, int, edge, edge, basic_block **, struct loops *, edge *, edge *));
+ static void remove_exit_edges PARAMS ((basic_block *, int, basic_block));
+ static struct loop *unswitch_loop PARAMS ((struct loops *, struct loop *, basic_block));
+ static bool rpe_enum_p PARAMS ((basic_block, void *));
+ static void remove_path PARAMS ((edge, struct loop *, sbitmap *, int));
+ static struct loop *loopify PARAMS ((struct loops *, edge, edge, basic_block));
+ static bool alp_enum_p PARAMS ((basic_block, void *));
+ static void add_loop PARAMS ((struct loops *, struct loop *));
+ static void place_new_loop PARAMS ((struct loops *, struct loop *));
+ static void unswitch_single_loop PARAMS ((struct loops *, struct loop *, rtx, int *));
+ static bool can_duplicate_loop_p PARAMS ((struct loop *loop));
+ static bool may_unswitch_on_p PARAMS ((struct loops *, basic_block, struct loop *, basic_block *));
+ static edge split_loop_bb PARAMS ((struct loops *, basic_block, rtx));
+ static rtx reversed_condition PARAMS ((rtx));
+
+ static rtx always_true_cond;
+ static rtx always_false_cond;
+
/* Initialize loop optimizer. */
struct loops *
*************** loop_optimizer_init (dumpfile)
*** 237,242 ****
--- 272,282 ----
FILE *dumpfile;
{
struct loops *loops = xcalloc (1, sizeof (struct loops));
+ rtx reg;
+
+ reg = gen_reg_rtx (SImode);
+ always_true_cond = gen_rtx_fmt_ee (EQ, SImode, reg, reg);
+ always_false_cond = gen_rtx_fmt_ee (NE, SImode, reg, reg);
/* Find the loops. */
*************** loop_optimizer_finalize (loops, dumpfile
*** 303,307 ****
--- 343,1497 ----
#ifdef ENABLE_CHECKING
verify_flow_info ();
#endif
+ }
+
+ /* Unswitch LOOPS. */
+ void
+ unswitch_loops (loops)
+ struct loops *loops;
+ {
+ int i;
+ /* Scan the loops, last ones first, since this means inner ones are done
+ before outer ones. */
+ for (i = loops->num - 1; i > 0; i--)
+ {
+ struct loop *loop = loops->parray[i];
+ int tot = 0;
+ unswitch_single_loop (loops, loop, NULL_RTX, &tot);
+ #ifdef ENABLE_CHECKING
+ verify_dominators ();
+ verify_loop_structure (loops, VLS_FOR_LOOP_NEW);
+ #endif
+ }
+ }
+
+ /* Splits basic block BB after INSN, returns created edge. Updates loops
+ and dominators. */
+ static edge
+ split_loop_bb (loops, bb, insn)
+ struct loops *loops;
+ basic_block bb;
+ rtx insn;
+ {
+ edge e;
+ basic_block *dom_bbs;
+ int n_dom_bbs, i;
+
+ /* Split the block. */
+ e = split_block (bb, insn);
+
+ /* Add dest to loop. */
+ add_bb_to_loop (e->dest, e->src->loop_father);
+
+ /* Fix dominators. */
+ n_dom_bbs = get_dominated_by (loops->cfg.dom, e->src, &dom_bbs);
+ for (i = 0; i < n_dom_bbs; i++)
+ set_immediate_dominator (loops->cfg.dom, dom_bbs[i], e->dest);
+ free (dom_bbs);
+ set_immediate_dominator (loops->cfg.dom, e->dest, e->src);
+
+ /* Take care of RBI. */
+ alloc_aux_for_block (e->dest, sizeof (struct reorder_block_def));
+
+ return e;
+ }
+
+ /* Checks whether we can unswitch LOOP on basic block BB. LOOP BODY
+ is provided to save time. */
+ static bool
+ may_unswitch_on_p (loops, bb, loop, body)
+ struct loops *loops;
+ basic_block bb;
+ struct loop *loop;
+ basic_block *body;
+ {
+ rtx set, test;
+ int i;
+
+ /* It must be a simple conditional jump. */
+ if (!bb->succ || !bb->succ->succ_next || bb->succ->succ_next->succ_next)
+ return false;
+ if (!any_condjump_p (bb->end))
+ return false;
+
+ /* Both branches must be inside loop. */
+ if (!flow_bb_inside_loop_p (loop, bb->succ->dest))
+ return false;
+ if (!flow_bb_inside_loop_p (loop, bb->succ->succ_next->dest))
+ return false;
+
+ /* Latch must be dominated by it (ugly technical restriction,
+ we should remove this later). */
+ if (!dominated_by_p (loops->cfg.dom, loop->latch, bb))
+ return false;
+
+ /* Condition must be invariant. */
+ set = pc_set (bb->end);
+ if (!set)
+ abort ();
+ test = XEXP (SET_SRC (set), 0);
+
+ /* Test must be non-trivial (we use trivial ones to disable unreachable
+ branches. */
+ if (rtx_equal_p (test, always_true_cond)
+ || rtx_equal_p (test, always_false_cond))
+ return false;
+
+ for (i = 0; i < loop->num_nodes; i++)
+ if (modified_between_p (test, body[i]->head, NEXT_INSN (body[i]->end)))
+ return false;
+
+ return true;
+ }
+
+ /* Reverses CONDition; returns NULL if we cannot. */
+ static rtx
+ reversed_condition (cond)
+ rtx cond;
+ {
+ enum rtx_code reversed;
+ reversed = reversed_comparison_code (cond, NULL);
+ if (reversed == UNKNOWN)
+ return NULL_RTX;
+ else
+ return gen_rtx_fmt_ee (reversed,
+ GET_MODE (cond), XEXP (cond, 0),
+ XEXP (cond, 1));
+ }
+
+ /* Unswitch single LOOP. COND_CHECKED holds list of conditions we already
+ unswitched on and are true in our branch. NUM is number of unswitchings
+ done; do not allow it to grow too much, it is too easy to create example
+ on that the code would grow exponentially. */
+ static void
+ unswitch_single_loop (loops, loop, cond_checked, num)
+ struct loops *loops;
+ struct loop *loop;
+ rtx cond_checked;
+ int *num;
+ {
+ basic_block *bbs, bb;
+ struct loop *nloop;
+ int i, true_first;
+ rtx cond, rcond, conds, rconds, acond, set;
+
+ /* Do not unswitch too much. */
+ if (*num > MAX_NUM_UNSWITCH)
+ return;
+
+ /* We only unswitch innermost loops (at least for now). */
+ if (loop->inner)
+ return;
+
+ /* And we must be able to duplicate loop body. */
+ if (!can_duplicate_loop_p (loop))
+ return;
+
+ /* Check the size of loop. */
+ if (num_loop_insns (loop) > MAX_UNSWITCH_INSNS)
+ return;
+
+ /* Find a bb to unswitch on. We use just a stupid test of invariantness
+ of the condition: all used regs must not be modified inside loop body. */
+ bbs = get_loop_body (loop);
+ for (i = 0; i < loop->num_nodes; i++)
+ if (may_unswitch_on_p (loops, bbs[i], loop, bbs))
+ {
+ int always_true = 0;
+ int always_false = 0;
+
+ if (!(cond = get_condition (bbs[i]->end, NULL)))
+ abort ();
+ rcond = reversed_condition (cond);
+
+ /* Check whether the result can be predicted. */
+ for (acond = cond_checked; acond; acond = XEXP (acond, 1))
+ {
+ if (rtx_equal_p (cond, XEXP (acond, 0)))
+ {
+ /* True. */
+ always_true = 1;
+ break;
+ }
+ if (rtx_equal_p (rcond, XEXP (acond, 0)))
+ {
+ /* False. */
+ always_false = 1;
+ break;
+ }
+ }
+
+ /* In case we are able to predict result, we replace condition by
+ corresponding trivial one. It is not best way -- removing the
+ unreachable branch would be better as it could make more
+ opportunities for unswitching/ unrolling. There are two problems
+ with this. The first is handling of dominators -- we would have to
+ recount dominators for big area. The second and more important one
+ is that we could create unreachable blocks; we would have to remove
+ them too, having big trouble with keeping loop structure up-to-date.
+ */
+ set = pc_set (bbs[i]->end);
+ if (!set)
+ abort ();
+ if (always_true)
+ {
+ if (!validate_change (bbs[i]->end, &XEXP (SET_SRC (set), 0),
+ copy_rtx (always_true_cond), 0))
+ break;
+ }
+ else if (always_false)
+ {
+ if (!validate_change (bbs[i]->end, &XEXP (SET_SRC (set), 0),
+ copy_rtx (always_false_cond), 0))
+ break;
+ }
+ else
+ break;
+ }
+
+ if (i == loop->num_nodes)
+ {
+ free (bbs);
+ return;
+ }
+
+ conds = alloc_EXPR_LIST (0, cond, cond_checked);
+ if (rcond)
+ rconds = alloc_EXPR_LIST (0, rcond, cond_checked);
+ else
+ rconds = cond_checked;
+
+ /* Separate condition. */
+ bb = split_loop_bb (loops, bbs[i], PREV_INSN (bbs[i]->end))->dest;
+ free (bbs);
+ true_first = !(bb->succ->flags & EDGE_FALLTHRU);
+
+ /* Unswitch the loop. */
+ nloop = unswitch_loop (loops, loop, bb);
+ if (!nloop)
+ abort ();
+
+ /* Count number of unswitchings. */
+ (*num)++;
+
+ /* Invoke itself on modified loops. */
+ unswitch_single_loop (loops, nloop, true_first ? conds : rconds, num);
+ unswitch_single_loop (loops, loop, true_first ? rconds : conds, num);
+ free_EXPR_LIST_node (conds);
+ if (rcond)
+ free_EXPR_LIST_node (rconds);
+ }
+
+ /* Checks whether BB is inside RPE_LOOP and is dominated by RPE_DOM. */
+ struct rpe_data
+ {
+ struct loop *loop;
+ basic_block dom;
+ sbitmap *doms;
+ };
+
+ static bool
+ rpe_enum_p (bb, data)
+ basic_block bb;
+ void *data;
+ {
+ struct rpe_data *rpe = data;
+ return flow_bb_inside_loop_p (rpe->loop, bb)
+ && dominated_by_p (rpe->doms, bb, rpe->dom);
+ }
+
+ /* Remove given edge E and all bbs inside LOOP that are dominated by target
+ of E. Fix dominators by redirecting to copy if FIX is set. */
+ static void
+ remove_path (e, loop, doms, fix)
+ edge e;
+ struct loop *loop;
+ sbitmap *doms;
+ int fix;
+ {
+ int nrem, i, j;
+ basic_block *rem_bbs, bb, nbb, *dom_bbs, dom_bb, ndom;
+ int n_dom_bbs;
+ edge ae;
+ struct rpe_data rpe;
+
+ /* Check for case when we are able to get to e->dest by other ways. */
+ if (e->dest->pred->pred_next)
+ {
+ for (ae = e->dest->pred; ae; ae = ae->pred_next)
+ if (ae != e && !dominated_by_p (doms, ae->src, e->dest))
+ break;
+ if (ae)
+ {
+ /* We just want to cancel edge. */
+ loop_delete_branch_edge (e);
+ return;
+ }
+ }
+
+ /* Find bbs we are interested in. */
+ rpe.loop = loop;
+ rpe.dom = e->dest;
+ rpe.doms = doms;
+ rem_bbs = xcalloc (loop->num_nodes, sizeof (basic_block));
+ nrem = dfs_enumerate_from (e->dest, 0, rpe_enum_p, rem_bbs,
+ loop->num_nodes, &rpe);
+
+ if (fix)
+ {
+ /* Fix dominators. */
+ for (i = 0; i < nrem; i++)
+ {
+ bb = rem_bbs[i];
+ ndom = RBI (bb)->copy;
+ n_dom_bbs = get_dominated_by (doms, bb, &dom_bbs);
+ for (j = 0; j < n_dom_bbs; j++)
+ {
+ /* This may also be bbs inside loop; but it does not matter,
+ as they will be deleted anyway. */
+ dom_bb = dom_bbs[j];
+ set_immediate_dominator (doms, dom_bb, ndom);
+ }
+ free (dom_bbs);
+ }
+ /* Fix frequencies. */
+ for (i = 0; i < nrem; i++)
+ {
+ bb = rem_bbs[i];
+ nbb = RBI (bb)->copy;
+ nbb->frequency += bb->frequency;
+ }
+ }
+ else
+ {
+ /* Fix frequencies. */
+ for (i = 0; i < nrem; i++)
+ {
+ bb = rem_bbs[i];
+ nbb = RBI (bb)->original;
+ nbb->frequency += bb->frequency;
+ }
+ }
+
+ /* Remove the jump and edge. */
+ loop_delete_branch_edge (e);
+
+ /* Remove the blocks. */
+ for (i = 0; i < nrem; i++)
+ {
+ edge next_ae;
+ for (ae = rem_bbs[i]->succ; ae; ae = next_ae)
+ {
+ bb = ae->dest;
+ next_ae = ae->succ_next;
+ remove_edge (ae);
+ }
+ remove_bb_from_loops (rem_bbs[i]);
+ flow_delete_block (rem_bbs[i]);
+ }
+ free (rem_bbs);
+ }
+
+ /* Predicate for enumeration in add_loop. */
+ static bool
+ alp_enum_p (bb, alp_header)
+ basic_block bb;
+ void *alp_header;
+ {
+ return bb != (basic_block) alp_header;
+ }
+
+ /* Compute loop from header and latch info filled in LOOP and add it to
+ LOOPS. */
+ static void
+ add_loop (loops, loop)
+ struct loops *loops;
+ struct loop *loop;
+ {
+ basic_block *bbs;
+ int i, n;
+
+ /* Add it to loop structure. */
+ place_new_loop (loops, loop);
+ loop->level = 1;
+
+ /* Find its nodes. */
+ bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
+ n = dfs_enumerate_from (loop->latch, 1, alp_enum_p, bbs, n_basic_blocks, loop->header);
+
+ /* Add those nodes. */
+ for (i = 0; i < n; i++)
+ add_bb_to_loop (bbs[i], loop);
+ add_bb_to_loop (loop->header, loop);
+
+ free (bbs);
+ }
+
+ /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
+ latch to header. Everything between them plus LATCH_EDGE destrination
+ must be dominated by HEADER_EDGE destination. Add SWITCH_BB to original
+ entry edge. Returns newly created loop. Dominators outside the area
+ are intentionally left wrong. */
+ static struct loop *
+ loopify (loops, latch_edge, header_edge, switch_bb)
+ struct loops *loops;
+ edge latch_edge;
+ edge header_edge;
+ basic_block switch_bb;
+ {
+ basic_block succ_bb = latch_edge->dest;
+ basic_block pred_bb = header_edge->src;
+ struct loop *loop = xcalloc (1, sizeof (struct loop));
+ struct loop *outer = succ_bb->loop_father->outer;
+ int freq, prob, tot_prob, i;
+ basic_block *bbs;
+
+ loop->header = header_edge->dest;
+ loop->latch = latch_edge->src;
+
+ freq = EDGE_FREQUENCY (header_edge);
+ prob = switch_bb->succ->probability;
+ tot_prob = prob + switch_bb->succ->succ_next->probability;
+ if (tot_prob == 0)
+ tot_prob = 1;
+
+ /* Redirect edges. */
+ redirect_edge_with_update (latch_edge, loop->header);
+ redirect_edge_with_update (header_edge, switch_bb);
+ redirect_edge_with_update (switch_bb->succ->succ_next, loop->header);
+ redirect_edge_with_update (switch_bb->succ, succ_bb);
+
+ /* And update dominators. */
+ set_immediate_dominator (loops->cfg.dom, switch_bb, pred_bb);
+ set_immediate_dominator (loops->cfg.dom, loop->header, switch_bb);
+ set_immediate_dominator (loops->cfg.dom, succ_bb, switch_bb);
+
+ /* Compute new loop. */
+ add_loop (loops, loop);
+ flow_loop_tree_node_add (outer, loop);
+
+ /* And add switch_bb to appropriate loop. */
+ add_bb_to_loop (switch_bb, outer);
+
+ /* Now fix frequencies. */
+ switch_bb->frequency = freq;
+ bbs = get_loop_body (loop);
+ for (i = 0; i < loop->num_nodes; i++)
+ bbs[i]->frequency = (bbs[i]->frequency * prob / tot_prob);
+ free (bbs);
+ bbs = get_loop_body (succ_bb->loop_father);
+ for (i = 0; i < succ_bb->loop_father->num_nodes; i++)
+ bbs[i]->frequency = (bbs[i]->frequency * (tot_prob - prob) / tot_prob);
+ free (bbs);
+
+ return loop;
+ }
+
+ /* Unswitch a LOOP w.r. to given basic block. We only support unswitching
+ of innermost loops (this is not a principial restriction, I'm just lazy
+ to handle subloops). UNSWITCH_ON must be executed in every iteration,
+ i.e. it must dominate LOOP latch. Returns NULL if impossible, new
+ loop otherwise. */
+ static struct loop *
+ unswitch_loop (loops, loop, unswitch_on)
+ struct loops *loops;
+ struct loop *loop;
+ basic_block unswitch_on;
+ {
+ edge entry, e, latch_edge;
+ basic_block switch_bb, unswitch_on_alt, *bbs, *dom_bbs, dom_bb;
+ struct loop *nloop;
+ int n_dom_bbs, i, j;
+ int just_one_edge;
+
+ /* Some sanity checking. */
+ if (!flow_bb_inside_loop_p (loop, unswitch_on))
+ abort ();
+ if (!unswitch_on->succ || !unswitch_on->succ->succ_next ||
+ unswitch_on->succ->succ_next->succ_next)
+ abort ();
+ if (!dominated_by_p (loops->cfg.dom, loop->latch, unswitch_on))
+ abort ();
+ if (loop->inner)
+ abort ();
+ if (!flow_bb_inside_loop_p (loop, unswitch_on->succ->dest))
+ abort ();
+ if (!flow_bb_inside_loop_p (loop, unswitch_on->succ->succ_next->dest))
+ abort ();
+
+ /* Will we be able to perform redirection? */
+ if (!any_condjump_p (unswitch_on->end))
+ return NULL;
+ if (!cfg_layout_can_duplicate_bb_p (unswitch_on))
+ return NULL;
+
+ for (entry = loop->header->pred;
+ entry->src == loop->latch;
+ entry = entry->pred_next);
+
+ /* Make a copy. */
+ if (!duplicate_loop_to_header_edge (loop, entry, loops, 1, 0, 0, 0))
+ return NULL;
+
+ /* Record switch block. */
+ unswitch_on_alt = RBI (unswitch_on)->copy;
+
+ /* Make a copy of unswitched block. */
+ switch_bb = cfg_layout_duplicate_bb (unswitch_on, NULL);
+ RBI (unswitch_on)->copy = unswitch_on_alt;
+
+ /* Loopify the copy. */
+ for (latch_edge = RBI (loop->latch)->copy->succ;
+ latch_edge->dest != loop->header;
+ latch_edge = latch_edge->succ_next);
+ nloop = loopify (loops, latch_edge,
+ RBI (loop->header)->copy->pred, switch_bb);
+
+ /* Remove paths from loop copies and update dominators.
+ We rely on the fact that cfg_layout_duplicate_bb reverses
+ list of edges here. */
+ for (e = unswitch_on->succ->succ_next->dest->pred; e; e = e->pred_next)
+ if (e->src != unswitch_on &&
+ !dominated_by_p (loops->cfg.dom, e->src, e->dest))
+ break;
+ just_one_edge = (e != NULL);
+ remove_path (unswitch_on->succ, loop, loops->cfg.dom, 1);
+ remove_path (unswitch_on_alt->succ, nloop, loops->cfg.dom, 0);
+
+ /* Now fix dominators of outside blocks. */
+ bbs = get_loop_body (loop);
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ if (!just_one_edge &&
+ dominated_by_p (loops->cfg.dom, bbs[i], unswitch_on->succ->dest))
+ continue;
+ n_dom_bbs = get_dominated_by (loops->cfg.dom, bbs[i], &dom_bbs);
+ for (j = 0; j < n_dom_bbs; j++)
+ {
+ dom_bb = dom_bbs[j];
+ if (flow_bb_inside_loop_p (loop, dom_bb))
+ continue;
+ set_immediate_dominator (loops->cfg.dom, dom_bb, switch_bb);
+ }
+ free (dom_bbs);
+ }
+ free (bbs);
+
+ /* Now the hard case. Dominators of blocks inside loop immediatelly
+ dominated by unswitch_on may behave really weird. */
+ n_dom_bbs = get_dominated_by (loops->cfg.dom, unswitch_on, &dom_bbs);
+ j = 0;
+ for (i = 0; i < n_dom_bbs; i++)
+ if (flow_bb_inside_loop_p (loop, dom_bbs[i]))
+ dom_bbs[j++] = dom_bbs [i];
+ iterate_fix_dominators (loops->cfg.dom, dom_bbs, j, 1);
+ free (dom_bbs);
+
+ n_dom_bbs = get_dominated_by (loops->cfg.dom, unswitch_on_alt, &dom_bbs);
+ j = 0;
+ for (i = 0; i < n_dom_bbs; i++)
+ if (flow_bb_inside_loop_p (nloop, dom_bbs[i]))
+ dom_bbs[j++] = dom_bbs [i];
+ iterate_fix_dominators (loops->cfg.dom, dom_bbs, j, 1);
+ free (dom_bbs);
+
+ return nloop;
+ }
+
+
+ /* Creates place for a new LOOP. */
+ static void
+ place_new_loop (loops, loop)
+ struct loops *loops;
+ struct loop *loop;
+ {
+ loops->parray =
+ xrealloc (loops->parray, (loops->num + 1) * sizeof (struct loop *));
+ loops->parray[loops->num] = loop;
+
+ loop->num = loops->num++;
+ }
+
+ /* Copies structure of LOOP into TARGET. */
+ static struct loop *
+ duplicate_loop (loops, loop, target)
+ struct loops *loops;
+ struct loop *loop;
+ struct loop *target;
+ {
+ struct loop *cloop;
+ cloop = xcalloc (1, sizeof (struct loop));
+ place_new_loop (loops, cloop);
+
+ /* Initialize copied loop. */
+ cloop->level = loop->level;
+
+ /* Set it as copy of loop. */
+ loop->copy = cloop;
+
+ /* Add it to target. */
+ flow_loop_tree_node_add (target, cloop);
+
+ return cloop;
+ }
+
+ /* Copies structure of subloops of LOOP into TARGET. */
+ static void
+ duplicate_subloops (loops, loop, target)
+ struct loops *loops;
+ struct loop *loop;
+ struct loop *target;
+ {
+ struct loop *aloop, *cloop;
+
+ for (aloop = loop->inner; aloop; aloop = aloop->next)
+ {
+ cloop = duplicate_loop (loops, aloop, target);
+ duplicate_subloops (loops, aloop, cloop);
+ }
+ }
+
+ /* Copies structure of COPIED_LOOPS into TARGET. */
+ static void
+ copy_loops_to (loops, copied_loops, n, target)
+ struct loops *loops;
+ struct loop **copied_loops;
+ int n;
+ struct loop *target;
+ {
+ struct loop *aloop;
+ int i;
+
+ for (i = 0; i < n; i++)
+ {
+ aloop = duplicate_loop (loops, copied_loops[i], target);
+ duplicate_subloops (loops, copied_loops[i], aloop);
+ }
+ }
+
+ /* Redirects edge E to DEST. */
+ static void
+ redirect_edge_with_update (e, dest)
+ edge e;
+ basic_block dest;
+ {
+ int dest_index = dest->index;
+ if (e->dest == dest)
+ return;
+
+ if (!e->src->succ->succ_next)
+ {
+ /* Code of cfglayout should be able to handle this, and redirection
+ is happier. */
+ e->flags |= EDGE_FALLTHRU;
+ }
+
+ /* Cfglayout says: */
+ /* Avoid redirect_edge_and_branch from overactive optimizing. */
+ dest->index = n_basic_blocks + 1;
+ if (e->flags & EDGE_FALLTHRU)
+ redirect_edge_succ (e, dest);
+ else
+ {
+ if (!redirect_edge_and_branch (e, dest))
+ abort ();
+ }
+ dest->index = dest_index;
+ }
+
+ /* Deletes edge if possible. */
+ static void
+ loop_delete_branch_edge (e)
+ edge e;
+ {
+ basic_block src = e->src;
+
+ if (src->succ->succ_next)
+ {
+ int tot_pr, tot_rem;
+ edge ae;
+
+ /* Cannot handle more than two exit edges. */
+ if (src->succ->succ_next->succ_next)
+ return;
+ /* Neither this. */
+ if (!any_condjump_p (src->end))
+ return;
+
+ /* Try to fix probabilities. This is probably as wrong as to leave
+ them as they are. */
+ tot_pr = 0;
+ for (ae = src->succ; ae; ae = ae->succ_next)
+ tot_pr += ae->probability;
+ tot_rem = tot_pr - e->probability;
+ if (tot_rem > 0)
+ {
+ for (ae = src->succ; ae; ae = ae->succ_next)
+ ae->probability = (ae->probability * tot_pr) / tot_rem;
+ }
+
+ /* I do not know how much is this correct. */
+ delete_insn (src->end);
+
+ remove_edge (e);
+ src->succ->flags |= EDGE_FALLTHRU;
+ }
+ else
+ {
+ /* Cannot happen -- we are duplicating loop! */
+ abort ();
+ }
+ }
+
+ /* Duplicates BBS. Newly created bbs are placed into NEW_BBS, edges to
+ header (target of ENTRY) and copy of header are returned, edge ENTRY
+ is redirected to header copy. Assigns bbs into loops, updates
+ dominators. */
+ static void
+ copy_bbs (bbs, n, entry, latch_edge, new_bbs, loops, header_edge, copy_header_edge)
+ basic_block *bbs;
+ int n;
+ edge entry;
+ edge latch_edge;
+ basic_block **new_bbs;
+ struct loops *loops;
+ edge *header_edge;
+ edge *copy_header_edge;
+ {
+ int i;
+ basic_block bb, new_bb, header = entry->dest, dom_bb;
+ edge e;
+
+ /* Duplicate bbs, update dominators, assign bbs to loops. */
+ (*new_bbs) = xcalloc (n, sizeof (basic_block));
+ for (i = 0; i < n; i++)
+ {
+ /* Duplicate. */
+ bb = bbs[i];
+ new_bb = (*new_bbs)[i] = cfg_layout_duplicate_bb (bb, NULL);
+ RBI (new_bb)->duplicated = 1;
+ /* Add to loop. */
+ add_bb_to_loop (new_bb, bb->loop_father->copy);
+ /* Possibly set header. */
+ if (bb->loop_father->header == bb && bb != header)
+ new_bb->loop_father->header = new_bb;
+ /* Or latch. */
+ if (bb->loop_father->latch == bb &&
+ bb->loop_father != header->loop_father)
+ new_bb->loop_father->latch = new_bb;
+ }
+
+ /* Set dominators. */
+ for (i = 0; i < n; i++)
+ {
+ bb = bbs[i];
+ new_bb = (*new_bbs)[i];
+ if (bb != header)
+ {
+ /* For anything than loop header, just copy it. */
+ dom_bb = get_immediate_dominator (loops->cfg.dom, bb);
+ dom_bb = RBI (dom_bb)->copy;
+ }
+ else
+ {
+ /* Copy of header is dominated by entry source. */
+ dom_bb = entry->src;
+ }
+ if (!dom_bb)
+ abort ();
+ set_immediate_dominator (loops->cfg.dom, new_bb, dom_bb);
+ }
+
+ /* Redirect edges. */
+ for (i = 0; i < n; i++)
+ {
+ edge e_pred;
+ new_bb = (*new_bbs)[i];
+ bb = bbs[i];
+ for (e = bb->pred; e; e = e_pred)
+ {
+ basic_block src = e->src;
+
+ e_pred = e->pred_next;
+
+ /* Does this edge interest us? */
+ if (!RBI (src)->duplicated)
+ continue;
+
+ /* So it interests us; redirect it. */
+ if (bb != header)
+ redirect_edge_with_update (e, new_bb);
+ }
+ }
+
+ /* Redirect header edge. */
+ bb = RBI (latch_edge->src)->copy;
+ for (e = bb->succ; e->dest != latch_edge->dest; e = e->succ_next);
+ *header_edge = e;
+ redirect_edge_with_update (*header_edge, header);
+
+ /* Redirect entry to copy of header. */
+ redirect_edge_with_update (entry, RBI (header)->copy);
+ *copy_header_edge = entry;
+
+ /* Cancel duplicated flags. */
+ for (i = 0; i < n; i++)
+ RBI ((*new_bbs)[i])->duplicated = 0;
+ }
+
+ /* Remove edges going out from BBS, except edge to HEADER. */
+ static void
+ remove_exit_edges (bbs, n, header)
+ basic_block *bbs;
+ int n;
+ basic_block header;
+ {
+ basic_block bb;
+ edge e;
+ int i;
+
+ /* duplicated is not supposed to be used for this, but I need
+ something to mark blocks. */
+ for (i = 0 ; i < n; i++)
+ {
+ bb = bbs[i];
+ RBI (bb)->duplicated = 1;
+ }
+ RBI (header)->duplicated = 1;
+
+ for (i = 0 ; i < n; i++)
+ {
+ edge succ_e;
+ bb = bbs[i];
+ for (e = bb->succ; e; e = succ_e)
+ {
+ succ_e = e->succ_next;
+ if (!RBI (e->dest)->duplicated)
+ loop_delete_branch_edge (e);
+ }
+ }
+
+ for (i = 0 ; i < n; i++)
+ {
+ bb = bbs[i];
+ RBI (bb)->duplicated = 0;
+ }
+ RBI (header)->duplicated = 0;
+ }
+
+ /* Check whether LOOP's body can be duplicated. */
+ static bool
+ can_duplicate_loop_p (loop)
+ struct loop *loop;
+ {
+ basic_block *bbs;
+ int i;
+
+ bbs = get_loop_body (loop);
+
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ if (!cfg_layout_can_duplicate_bb_p (bbs[i]))
+ {
+ free (bbs);
+ return false;
+ }
+ }
+ free (bbs);
+
+ return true;
+ }
+
+ /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of
+ updating LOOPS structure. E's destination must be LOOP header for this to
+ work. Remove exit edges from copies corresponding to set bits in WONT_EXIT
+ (bit 0 corresponds to original LOOP body). If UPDATE_DOMINATORS is set,
+ also take care of updating dominators outside LOOP. If UPDATE_FREQ is set,
+ also update frequencies, otherwise just copy them. Returns false if
+ duplication is impossible. */
+ /* Grrr... this function is getting far too complicated for me to like it.
+ I should make something with it. */
+ static int
+ duplicate_loop_to_header_edge (loop, e, loops, ndupl, wont_exit,
+ update_dominators, update_freq)
+ struct loop *loop;
+ edge e;
+ struct loops *loops;
+ int ndupl;
+ int wont_exit;
+ int update_dominators;
+ int update_freq;
+ {
+ struct loop *target, *aloop;
+ struct loop **orig_loops;
+ int n_orig_loops;
+ basic_block header = loop->header, latch = loop->latch;
+ basic_block *new_bbs, *bbs, *first_active;
+ basic_block new_bb, bb, first_active_latch = NULL;
+ edge ae, latch_edge, he;
+ int i, j, n, more_active = 0;
+ int is_latch = (latch == e->src);
+ int k0, k, kk, freq_in, freq_e, freq_le;
+ int loop_made_infinite;
+
+ if (e->dest != loop->header)
+ abort ();
+ if (ndupl <= 0)
+ abort ();
+
+ bbs = get_loop_body (loop);
+
+ /* Check whether duplication is possible. */
+
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ if (!cfg_layout_can_duplicate_bb_p (bbs[i]))
+ {
+ free (bbs);
+ return false;
+ }
+ }
+
+ /* Find edge from latch. */
+ for (latch_edge = header->pred;
+ latch_edge->src != latch;
+ latch_edge = latch_edge->pred_next);
+
+ /* For updating frequencies. */
+ freq_e = EDGE_FREQUENCY (e);
+ freq_in = header->frequency;
+ freq_le = EDGE_FREQUENCY (latch_edge);
+
+ if (is_latch)
+ {
+ /* Should work well unless something inside depends on parity of
+ iteration counter. */
+ if (freq_in > freq_e)
+ {
+ k0 = REG_BR_PROB_BASE;
+ for (i = 0; i < ndupl + 1; i++)
+ k0 = (k0 * freq_e) / freq_in;
+ k0 = REG_BR_PROB_BASE - k0;
+ k0 = (((REG_BR_PROB_BASE * REG_BR_PROB_BASE) / freq_in) *
+ (freq_in - freq_e)) / k0;
+ k = (REG_BR_PROB_BASE * freq_e) / freq_in;
+ }
+ else
+ {
+ k0 = REG_BR_PROB_BASE / (ndupl + 1);
+ k = REG_BR_PROB_BASE;
+ }
+ kk = k0;
+ }
+ else
+ {
+ /* Count probability that we will get to latch from header.
+ This is wrong; first iteration of cycle is certainly somewhat
+ special. But I cannot do anything with it. */
+
+ /* Strange things may happen to frequencies. :-( */
+ if (freq_le == 0)
+ freq_le = 1;
+ if (freq_in <= freq_le)
+ freq_in = freq_le + 1;
+ if (freq_in < freq_le + freq_e)
+ freq_e = freq_in - freq_le;
+
+ k = (REG_BR_PROB_BASE * freq_le) / freq_in;
+ kk = (REG_BR_PROB_BASE * freq_e) / freq_le;
+ k0 = kk;
+ for (i = 0; i < ndupl; i++)
+ k0 = (k0 * k) / REG_BR_PROB_BASE;
+ k0 = (k0 * freq_le) / REG_BR_PROB_BASE + freq_in - freq_le - freq_e;
+ k0 = (REG_BR_PROB_BASE * k0) / (freq_in - freq_le);
+ if (k0 < 0)
+ k0 = 0;
+ }
+ if (k0 < 0 || k0 > REG_BR_PROB_BASE ||
+ k < 0 || k > REG_BR_PROB_BASE ||
+ kk < 0 || kk * k > REG_BR_PROB_BASE * REG_BR_PROB_BASE)
+ {
+ /* Something is wrong. */
+ abort ();
+ }
+
+ /* Check whether we will create an infinite loop. */
+ if (is_latch)
+ {
+ loop_made_infinite = 1;
+ for (i = 0; i <= ndupl; i++)
+ if (!(wont_exit & (1 << i)))
+ {
+ loop_made_infinite = 0;
+ break;
+ }
+ }
+ else
+ loop_made_infinite = (wont_exit & 1);
+ /* We cannot handle infinite loops yet. It is relatively hard to do,
+ as outer loops might become infinite too. */
+ if (loop_made_infinite)
+ abort ();
+
+ /* Loop to that new bbs will belong. */
+ target = find_common_loop (e->src->loop_father, e->dest->loop_father);
+
+ /* Original loops. */
+ n_orig_loops = 0;
+ for (aloop = loop->inner; aloop; aloop = aloop->next)
+ n_orig_loops++;
+ orig_loops = xcalloc (n_orig_loops, sizeof (struct loop *));
+ for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
+ orig_loops[i] = aloop;
+
+ loop->copy = target;
+
+ /* Original basic blocks. */
+ n = loop->num_nodes;
+
+ first_active = xcalloc(n, sizeof (basic_block));
+ if (is_latch && !(wont_exit & 1))
+ {
+ memcpy (first_active, bbs, n * sizeof (basic_block));
+ first_active_latch = latch;
+ }
+
+ for (j = 0; j < ndupl; j++)
+ {
+ /* Copy loops. */
+ copy_loops_to (loops, orig_loops, n_orig_loops, target);
+
+ /* Copy bbs. */
+ copy_bbs (bbs, n, e, latch_edge, &new_bbs, loops, &e, &he);
+ if (is_latch)
+ loop->latch = RBI (latch)->copy;
+
+ /* Set counts and frequencies. */
+ kk = (kk * k) / REG_BR_PROB_BASE;
+ for (i = 0; i < n; i++)
+ {
+ new_bb = new_bbs[i];
+ bb = bbs[i];
+ if (update_freq)
+ {
+ new_bb->count = (kk * bb->count +
+ REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
+ new_bb->frequency = (bb->frequency * kk +
+ REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
+ }
+ else
+ {
+ new_bb->count = bb->count;
+ new_bb->frequency = bb->frequency;
+ }
+ }
+
+ /* Remove exit edges if needed. */
+ if (wont_exit & (1 << (j + 1)))
+ remove_exit_edges (new_bbs, n, header);
+ else if (!first_active_latch)
+ {
+ memcpy (first_active, new_bbs, n * sizeof (basic_block));
+ first_active_latch = RBI (latch)->copy;
+ }
+ else
+ more_active = 1;
+
+ /* Update edge counts. */
+ for (i = 0; i < n; i++)
+ {
+ bb = new_bbs[i];
+ for (ae = bb->succ; ae; ae = ae->succ_next)
+ ae->count = (bb->count * ae->probability +
+ REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
+ }
+
+ /* Update loops info. */
+ for (i = 0; i < n_orig_loops; i++)
+ flow_loop_scan (loops, orig_loops[i]->copy, 0);
+
+ free (new_bbs);
+
+ /* Original loop header is dominated by latch copy
+ if we duplicated on its only entry edge. */
+ if (!is_latch && !header->pred->pred_next->pred_next)
+ set_immediate_dominator (loops->cfg.dom, header, RBI (latch)->copy);
+ if (is_latch && j == 0)
+ {
+ /* Update edge from latch. */
+ for (latch_edge = RBI (header)->copy->pred;
+ latch_edge->src != latch;
+ latch_edge = latch_edge->pred_next);
+ }
+ }
+ free (orig_loops);
+
+ /* Now handle original loop. */
+
+ /* Remove exit edges if needed. */
+ if (wont_exit & 1)
+ remove_exit_edges (bbs, n, latch_edge->dest);
+
+ /* Update edge counts. */
+ if (update_freq)
+ {
+ for (i = 0; i < n; i++)
+ {
+ bb = bbs[i];
+ bb->count = (k0 * bb->count +
+ REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
+ bb->frequency = (bb->frequency * k0 +
+ REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
+ for (ae = bb->succ; ae; ae = ae->succ_next)
+ ae->count = (bb->count * ae->probability +
+ REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
+ }
+ }
+
+ if (!first_active_latch)
+ {
+ memcpy (first_active, bbs, n * sizeof (basic_block));
+ first_active_latch = latch;
+ }
+ else
+ more_active = 1;
+
+ /* Update dominators of other blocks if affected. */
+ if (update_dominators)
+ {
+ for (i = 0; i < n; i++)
+ {
+ basic_block dominated, dom_bb, *dom_bbs;
+ int n_dom_bbs,j;
+
+ bb = bbs[i];
+ n_dom_bbs = get_dominated_by (loops->cfg.dom, bb, &dom_bbs);
+ for (j = 0; j < n_dom_bbs; j++)
+ {
+ dominated = dom_bbs[j];
+ if (flow_bb_inside_loop_p (loop, dominated))
+ continue;
+ if (more_active)
+ {
+ dom_bb = nearest_common_dominator (
+ loops->cfg.dom, first_active[i], first_active_latch);
+ }
+ else
+ dom_bb = first_active[i];
+ set_immediate_dominator (loops->cfg.dom, dominated, dom_bb);
+ }
+ free (dom_bbs);
+ }
+ }
+ free (first_active);
+
+ free (bbs);
+
+ /* Fill other info for father loops. */
+ for (aloop = target; aloop->depth > 0; aloop = aloop->outer)
+ flow_loop_scan (loops, aloop, 0);
+
+ return true;
+
}
Index: toplev.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/toplev.c,v
retrieving revision 1.537.2.42
diff -c -3 -p -r1.537.2.42 toplev.c
*** toplev.c 2002/02/22 13:25:56 1.537.2.42
--- toplev.c 2002/02/22 19:24:26
*************** int flag_unroll_loops;
*** 554,559 ****
--- 554,562 ----
int flag_unroll_all_loops;
+ /* Nonzero enables loop unswitching. */
+ int flag_unswitch_loops;
+
/* Nonzero enables prefetch optimizations for arrays in loops. */
int flag_prefetch_loop_arrays;
*************** static const lang_independent_options f_
*** 1024,1029 ****
--- 1027,1034 ----
N_("Perform loop unrolling when iteration count is known") },
{"unroll-all-loops", &flag_unroll_all_loops, 1,
N_("Perform loop unrolling for all loops") },
+ {"unswitch-loops", &flag_unswitch_loops, 1,
+ N_("Perform loop unswitching") },
{"prefetch-loop-arrays", &flag_prefetch_loop_arrays, 1,
N_("Generate prefetch instructions, if available, for arrays in loops") },
{"move-all-movables", &flag_move_all_movables, 1,
*************** rest_of_compilation (decl)
*** 3049,3054 ****
--- 3054,3061 ----
if (loops)
{
/* Here will go optimalizations. */
+ if (flag_unswitch_loops)
+ unswitch_loops (loops);
loop_optimizer_finalize (loops, rtl_dump_file);
}
More information about the Gcc-patches
mailing list