[CFG] Loop unswitching

Zdenek Dvorak rakdver@atrey.karlin.mff.cuni.cz
Fri Feb 22 13:35:00 GMT 2002


Hello.

This patch implements loop unswitching and brings in most of the machinery
also used in loop peeling/unrolling. Bootstrapped on i386.

Zdenek Dvorak

Changelog:
	* loop-new.c (redirect_edge_with_update, loop_delete_branch_edge,
	split_loop_bb): New, cfg manipulation.
	(duplicate_loop, duplicate_subloops, copy_loops_to,
	alp_enum_p, add_loop, place_new_loop): New, loop
	structure duplication.
	(copy_bbs, remove_exit_edges, loopify, can_duplicate_loop_p,
	duplicate_loop_to_header_edge): New, loop body copying.
	(rpe_enum_p, remove_path, unswitch_loop, unswitch_single_loop,
	may_unswitch_on_p, unswitch_loops): New, loop unswitching.
	* toplev.c (flag_unswitch_loops): New.
	(rest_of_compilation): Add loop unswitching.

Index: loop-new.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/loop-new.c,v
retrieving revision 1.1.2.4
diff -c -3 -p -r1.1.2.4 loop-new.c
*** loop-new.c	2002/02/21 16:16:54	1.1.2.4
--- loop-new.c	2002/02/22 19:24:10
*************** Software Foundation, 59 Temple Place - S
*** 41,46 ****
--- 41,57 ----
  #include "cfglayout.h"
  #include "loop.h"
  
+ /* This controls how big loops are optimized, and by how much we allow
+    them to grow.  */
+   
+ #ifndef MAX_UNSWITCH_INSNS
+ #define MAX_UNSWITCH_INSNS 200
+ #endif
+ 
+ #ifndef MAX_NUM_UNSWITCH
+ #define MAX_NUM_UNSWITCH 8
+ #endif
+ 
  /* Stupid definitions of dominator manipulation.  */
  
  basic_block
*************** iterate_fix_dominators (doms, bbs, n, lo
*** 230,235 ****
--- 241,270 ----
      sbitmap_free (affected);
  }
  
+ static struct loop *duplicate_loop PARAMS ((struct loops *, struct loop *, struct loop *));
+ static void duplicate_subloops PARAMS ((struct loops *, struct loop *, struct loop *));
+ static void copy_loops_to PARAMS ((struct loops *, struct loop **, int, struct loop *));
+ static int duplicate_loop_to_header_edge PARAMS ((struct loop *, edge, struct loops *, int, int, int, int));
+ static void redirect_edge_with_update PARAMS ((edge, basic_block));
+ static void loop_delete_branch_edge PARAMS ((edge));
+ static void copy_bbs PARAMS ((basic_block *, int, edge, edge, basic_block **, struct loops *, edge *, edge *));
+ static void remove_exit_edges PARAMS ((basic_block *, int, basic_block));
+ static struct loop *unswitch_loop PARAMS ((struct loops *, struct loop *, basic_block));
+ static bool rpe_enum_p PARAMS ((basic_block, void *));
+ static void remove_path PARAMS ((edge, struct loop *, sbitmap *, int));
+ static struct loop *loopify PARAMS ((struct loops *, edge, edge, basic_block));
+ static bool alp_enum_p PARAMS ((basic_block, void *));
+ static void add_loop PARAMS ((struct loops *, struct loop *));
+ static void place_new_loop PARAMS ((struct loops *, struct loop *));
+ static void unswitch_single_loop PARAMS ((struct loops *, struct loop *, rtx, int *));
+ static bool can_duplicate_loop_p PARAMS ((struct loop *loop));
+ static bool may_unswitch_on_p PARAMS ((struct loops *, basic_block, struct loop *, basic_block *));
+ static edge split_loop_bb PARAMS ((struct loops *, basic_block, rtx));
+ static rtx reversed_condition PARAMS ((rtx));
+  
+ static rtx always_true_cond;
+ static rtx always_false_cond;
+  
  /* Initialize loop optimizer.  */
  
  struct loops *
*************** loop_optimizer_init (dumpfile)
*** 237,242 ****
--- 272,282 ----
       FILE *dumpfile;
  {
    struct loops *loops = xcalloc (1, sizeof (struct loops));
+   rtx reg;
+ 
+   reg = gen_reg_rtx (SImode);
+   always_true_cond = gen_rtx_fmt_ee (EQ, SImode, reg, reg);
+   always_false_cond = gen_rtx_fmt_ee (NE, SImode, reg, reg);
  
    /* Find the loops.  */
  
*************** loop_optimizer_finalize (loops, dumpfile
*** 303,307 ****
--- 343,1497 ----
  #ifdef ENABLE_CHECKING
    verify_flow_info ();
  #endif
+ }
+ 
+ /* Unswitch LOOPS.  */
+ void
+ unswitch_loops (loops)
+      struct loops *loops;
+ {
+   int i;
+   /* Scan the loops, last ones first, since this means inner ones are done
+      before outer ones.  */
+   for (i = loops->num - 1; i > 0; i--)
+     {
+       struct loop *loop = loops->parray[i];
+       int tot = 0;
+       unswitch_single_loop (loops, loop, NULL_RTX, &tot);
+ #ifdef ENABLE_CHECKING
+       verify_dominators ();
+       verify_loop_structure (loops, VLS_FOR_LOOP_NEW);
+ #endif
+     }
+ }
+ 
+ /* Splits basic block BB after INSN, returns created edge.  Updates loops
+    and dominators.  */
+ static edge
+ split_loop_bb (loops, bb, insn)
+      struct loops *loops;
+      basic_block bb;
+      rtx insn;
+ {
+   edge e;
+   basic_block *dom_bbs;
+   int n_dom_bbs, i;
+ 
+   /* Split the block.  */
+   e = split_block (bb, insn);
+ 
+   /* Add dest to loop.  */
+   add_bb_to_loop (e->dest, e->src->loop_father);
+ 
+   /* Fix dominators.  */
+   n_dom_bbs = get_dominated_by (loops->cfg.dom, e->src, &dom_bbs);
+   for (i = 0; i < n_dom_bbs; i++)
+     set_immediate_dominator (loops->cfg.dom, dom_bbs[i], e->dest);
+   free (dom_bbs);
+   set_immediate_dominator (loops->cfg.dom, e->dest, e->src);
+ 
+   /* Take care of RBI.  */
+   alloc_aux_for_block (e->dest, sizeof (struct reorder_block_def));
+ 
+   return e;
+ }
+ 
+ /* Checks whether we can unswitch LOOP on basic block BB.  LOOP BODY
+    is provided to save time.  */
+ static bool
+ may_unswitch_on_p (loops, bb, loop, body)
+      struct loops *loops;
+      basic_block bb;
+      struct loop *loop;
+      basic_block *body;
+ {
+   rtx set, test;
+   int i;
+ 
+   /* It must be a simple conditional jump.  */
+   if (!bb->succ || !bb->succ->succ_next || bb->succ->succ_next->succ_next)
+     return false;
+   if (!any_condjump_p (bb->end))
+     return false;
+ 
+   /* Both branches must be inside loop.  */
+   if (!flow_bb_inside_loop_p (loop, bb->succ->dest))
+     return false;
+   if (!flow_bb_inside_loop_p (loop, bb->succ->succ_next->dest))
+     return false;
+ 
+   /* Latch must be dominated by it (ugly technical restriction,
+      we should remove this later).  */
+   if (!dominated_by_p (loops->cfg.dom, loop->latch, bb))
+     return false;
+ 
+   /* Condition must be invariant.  */
+   set = pc_set (bb->end);
+   if (!set)
+     abort ();
+   test = XEXP (SET_SRC (set), 0);
+ 
+   /* Test must be non-trivial (we use trivial ones to disable unreachable
+      branches.  */
+   if (rtx_equal_p (test, always_true_cond)
+       || rtx_equal_p (test, always_false_cond))
+     return false;
+ 
+   for (i = 0; i < loop->num_nodes; i++)
+     if (modified_between_p (test, body[i]->head, NEXT_INSN (body[i]->end)))
+       return false;
+ 
+   return true;
+ }
+ 
+ /* Reverses CONDition; returns NULL if we cannot.  */
+ static rtx
+ reversed_condition (cond)
+      rtx cond;
+ {
+   enum rtx_code reversed;
+   reversed = reversed_comparison_code (cond, NULL);
+   if (reversed == UNKNOWN)
+     return NULL_RTX;
+   else
+     return gen_rtx_fmt_ee (reversed,
+ 	 		     GET_MODE (cond), XEXP (cond, 0),
+ 			     XEXP (cond, 1));
+ }
+ 
+ /* Unswitch single LOOP.  COND_CHECKED holds list of conditions we already
+    unswitched on and are true in our branch.  NUM is number of unswitchings
+    done; do not allow it to grow too much, it is too easy to create example
+    on that the code would grow exponentially.  */
+ static void
+ unswitch_single_loop (loops, loop, cond_checked, num)
+      struct loops *loops;
+      struct loop *loop;
+      rtx cond_checked;
+      int *num;
+ {
+   basic_block *bbs, bb;
+   struct loop *nloop;
+   int i, true_first;
+   rtx cond, rcond, conds, rconds, acond, set;
+ 
+   /* Do not unswitch too much.  */
+   if (*num > MAX_NUM_UNSWITCH)
+     return;
+ 
+   /* We only unswitch innermost loops (at least for now).  */
+   if (loop->inner)
+     return;
+ 
+   /* And we must be able to duplicate loop body.  */
+   if (!can_duplicate_loop_p (loop))
+     return;
+ 
+   /* Check the size of loop.  */
+   if (num_loop_insns (loop) > MAX_UNSWITCH_INSNS)
+     return;
+ 
+   /* Find a bb to unswitch on.  We use just a stupid test of invariantness
+      of the condition: all used regs must not be modified inside loop body.  */
+   bbs = get_loop_body (loop);
+   for (i = 0; i < loop->num_nodes; i++)
+     if (may_unswitch_on_p (loops, bbs[i], loop, bbs))
+       {
+         int always_true = 0;
+         int always_false = 0;
+ 
+ 	if (!(cond = get_condition (bbs[i]->end, NULL)))
+ 	  abort ();
+ 	rcond = reversed_condition (cond);
+ 
+ 	/* Check whether the result can be predicted.  */
+ 	for (acond = cond_checked; acond; acond = XEXP (acond, 1))
+ 	  {
+ 	    if (rtx_equal_p (cond, XEXP (acond, 0)))
+ 	      {
+ 		/* True.  */
+ 		always_true = 1;
+ 		break;
+ 	      }
+ 	    if (rtx_equal_p (rcond, XEXP (acond, 0)))
+ 	      {
+ 		/* False.  */
+ 		always_false = 1;
+ 		break;
+ 	      }
+ 	  }
+ 
+ 	/* In case we are able to predict result, we replace condition by
+ 	   corresponding trivial one.  It is not best way -- removing the
+ 	   unreachable branch would be better as it could make more
+ 	   opportunities for unswitching/ unrolling.  There are two problems
+ 	   with this.  The first is handling of dominators -- we would have to
+ 	   recount dominators for big area.  The second and more important one
+ 	   is that we could create unreachable blocks; we would have to remove
+ 	   them too, having big trouble with keeping loop structure up-to-date.
+ 	   */
+ 	set = pc_set (bbs[i]->end);
+ 	if (!set)
+ 	  abort ();
+ 	if (always_true)
+ 	  {
+ 	    if (!validate_change (bbs[i]->end, &XEXP (SET_SRC (set), 0),
+ 				  copy_rtx (always_true_cond), 0))
+ 	      break;
+ 	  }
+ 	else if (always_false)
+ 	  {
+ 	    if (!validate_change (bbs[i]->end, &XEXP (SET_SRC (set), 0),
+ 				  copy_rtx (always_false_cond), 0))
+ 	      break;
+ 	  }
+ 	else
+ 	  break;
+       }
+  
+   if (i == loop->num_nodes)
+     {
+       free (bbs);
+       return;
+     }
+ 
+   conds = alloc_EXPR_LIST (0, cond, cond_checked);
+   if (rcond)
+     rconds = alloc_EXPR_LIST (0, rcond, cond_checked);
+   else
+     rconds = cond_checked;
+ 
+   /* Separate condition.  */
+   bb = split_loop_bb (loops, bbs[i], PREV_INSN (bbs[i]->end))->dest;
+   free (bbs);
+   true_first = !(bb->succ->flags & EDGE_FALLTHRU);
+ 
+   /* Unswitch the loop.  */
+   nloop = unswitch_loop (loops, loop, bb);
+   if (!nloop)
+     abort ();
+ 
+   /* Count number of unswitchings.  */
+   (*num)++;
+ 
+   /* Invoke itself on modified loops.  */
+   unswitch_single_loop (loops, nloop, true_first ? conds : rconds, num);
+   unswitch_single_loop (loops, loop, true_first ? rconds : conds, num);
+   free_EXPR_LIST_node (conds);
+   if (rcond)
+     free_EXPR_LIST_node (rconds);
+ }
+ 
+ /* Checks whether BB is inside RPE_LOOP and is dominated by RPE_DOM.  */
+ struct rpe_data
+  {
+    struct loop *loop;
+    basic_block dom;
+    sbitmap *doms;
+  };
+ 
+ static bool
+ rpe_enum_p (bb, data)
+      basic_block bb;
+      void *data;
+ {
+   struct rpe_data *rpe = data;
+   return flow_bb_inside_loop_p (rpe->loop, bb)
+ 	 && dominated_by_p (rpe->doms, bb, rpe->dom);
+ }
+ 
+ /* Remove given edge E and all bbs inside LOOP that are dominated by target 
+    of E.  Fix dominators by redirecting to copy if FIX is set.  */
+ static void
+ remove_path (e, loop, doms, fix)
+      edge e;
+      struct loop *loop;
+      sbitmap *doms;
+      int fix;
+ {
+   int nrem, i, j;
+   basic_block *rem_bbs, bb, nbb, *dom_bbs, dom_bb, ndom;
+   int n_dom_bbs;
+   edge ae;
+   struct rpe_data rpe;
+  
+   /* Check for case when we are able to get to e->dest by other ways.  */
+   if (e->dest->pred->pred_next)
+     {
+       for (ae = e->dest->pred; ae; ae = ae->pred_next)
+ 	if (ae != e && !dominated_by_p (doms, ae->src, e->dest))
+ 	  break;
+       if (ae)
+ 	{
+ 	  /* We just want to cancel edge.  */
+ 	  loop_delete_branch_edge (e);
+ 	  return;
+ 	}
+     }
+   
+   /* Find bbs we are interested in.  */
+   rpe.loop = loop;
+   rpe.dom = e->dest;
+   rpe.doms = doms;
+   rem_bbs = xcalloc (loop->num_nodes, sizeof (basic_block));
+   nrem = dfs_enumerate_from (e->dest, 0, rpe_enum_p, rem_bbs,
+ 			     loop->num_nodes, &rpe);
+   
+   if (fix)
+     {
+       /* Fix dominators.  */
+       for (i = 0; i < nrem; i++)
+ 	{
+ 	  bb = rem_bbs[i];
+ 	  ndom = RBI (bb)->copy;
+ 	  n_dom_bbs = get_dominated_by (doms, bb, &dom_bbs);
+ 	  for (j = 0; j < n_dom_bbs; j++)
+ 	    {
+ 	      /* This may also be bbs inside loop; but it does not matter,
+ 		 as they will be deleted anyway.  */
+ 	      dom_bb = dom_bbs[j];
+ 	      set_immediate_dominator (doms, dom_bb, ndom);
+ 	    }
+ 	  free (dom_bbs);
+ 	}
+       /* Fix frequencies.  */
+       for (i = 0; i < nrem; i++)
+ 	{
+ 	  bb = rem_bbs[i];
+ 	  nbb = RBI (bb)->copy;
+ 	  nbb->frequency += bb->frequency;
+ 	}
+     }
+   else
+     {
+       /* Fix frequencies.  */
+       for (i = 0; i < nrem; i++)
+ 	{
+ 	  bb = rem_bbs[i];
+ 	  nbb = RBI (bb)->original;
+ 	  nbb->frequency += bb->frequency;
+ 	}
+      }
+ 
+   /* Remove the jump and edge.  */
+   loop_delete_branch_edge (e);
+ 
+   /* Remove the blocks.  */
+   for (i = 0; i < nrem; i++)
+     {
+       edge next_ae;
+       for (ae = rem_bbs[i]->succ; ae; ae = next_ae)
+ 	{
+ 	  bb = ae->dest;
+ 	  next_ae = ae->succ_next;
+ 	  remove_edge (ae);
+ 	}
+       remove_bb_from_loops (rem_bbs[i]);
+       flow_delete_block (rem_bbs[i]);
+     }
+   free (rem_bbs);
+ }
+ 
+ /* Predicate for enumeration in add_loop.  */
+ static bool
+ alp_enum_p (bb, alp_header)
+      basic_block bb;
+      void *alp_header;
+ {
+   return bb != (basic_block) alp_header;
+ }
+ 
+ /* Compute loop from header and latch info filled in LOOP and add it to
+    LOOPS.  */
+ static void
+ add_loop (loops, loop)
+      struct loops *loops;
+      struct loop *loop;
+ {
+   basic_block *bbs;
+   int i, n;
+   
+   /* Add it to loop structure.  */
+   place_new_loop (loops, loop);
+   loop->level = 1;
+ 
+   /* Find its nodes.  */
+   bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
+   n = dfs_enumerate_from (loop->latch, 1, alp_enum_p, bbs, n_basic_blocks, loop->header);
+ 
+   /* Add those nodes.  */
+   for (i = 0; i < n; i++)
+     add_bb_to_loop (bbs[i], loop);
+   add_bb_to_loop (loop->header, loop);
+ 
+   free (bbs);
+ }
+ 
+ /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
+    latch to header.  Everything between them plus LATCH_EDGE destrination
+    must be dominated by HEADER_EDGE destination.  Add SWITCH_BB to original
+    entry edge.  Returns newly created loop.  Dominators outside the area
+    are intentionally left wrong.  */
+ static struct loop *
+ loopify (loops, latch_edge, header_edge, switch_bb)
+      struct loops *loops;
+      edge latch_edge;
+      edge header_edge;
+      basic_block switch_bb;
+ {
+   basic_block succ_bb = latch_edge->dest;
+   basic_block pred_bb = header_edge->src;
+   struct loop *loop = xcalloc (1, sizeof (struct loop));
+   struct loop *outer = succ_bb->loop_father->outer;
+   int freq, prob, tot_prob, i;
+   basic_block *bbs;
+ 
+   loop->header = header_edge->dest;
+   loop->latch = latch_edge->src;
+ 
+   freq = EDGE_FREQUENCY (header_edge);
+   prob = switch_bb->succ->probability;
+   tot_prob = prob + switch_bb->succ->succ_next->probability;
+   if (tot_prob == 0)
+     tot_prob = 1;
+ 
+   /* Redirect edges.  */
+   redirect_edge_with_update (latch_edge, loop->header);
+   redirect_edge_with_update (header_edge, switch_bb);
+   redirect_edge_with_update (switch_bb->succ->succ_next, loop->header);
+   redirect_edge_with_update (switch_bb->succ, succ_bb);
+ 
+   /* And update dominators.  */
+   set_immediate_dominator (loops->cfg.dom, switch_bb, pred_bb);
+   set_immediate_dominator (loops->cfg.dom, loop->header, switch_bb);
+   set_immediate_dominator (loops->cfg.dom, succ_bb, switch_bb);
+ 
+   /* Compute new loop.  */
+   add_loop (loops, loop);
+   flow_loop_tree_node_add (outer, loop);
+ 
+   /* And add switch_bb to appropriate loop.  */
+   add_bb_to_loop (switch_bb, outer);
+ 
+   /* Now fix frequencies.  */
+   switch_bb->frequency = freq;
+   bbs = get_loop_body (loop);
+   for (i = 0; i < loop->num_nodes; i++)
+     bbs[i]->frequency = (bbs[i]->frequency * prob / tot_prob);
+   free (bbs);
+   bbs = get_loop_body (succ_bb->loop_father);
+   for (i = 0; i < succ_bb->loop_father->num_nodes; i++)
+     bbs[i]->frequency = (bbs[i]->frequency * (tot_prob - prob) / tot_prob);
+   free (bbs);
+ 
+   return loop;
+ }
+ 
+ /* Unswitch a LOOP w.r. to given basic block.  We only support unswitching
+    of innermost loops (this is not a principial restriction, I'm just lazy
+    to handle subloops).  UNSWITCH_ON must be executed in every iteration,
+    i.e. it must dominate LOOP latch.  Returns NULL if impossible, new
+    loop otherwise.  */
+ static struct loop *
+ unswitch_loop (loops, loop, unswitch_on)
+      struct loops *loops;
+      struct loop *loop;
+      basic_block unswitch_on;
+ {
+   edge entry, e, latch_edge;
+   basic_block switch_bb, unswitch_on_alt, *bbs, *dom_bbs, dom_bb;
+   struct loop *nloop;
+   int n_dom_bbs, i, j;
+   int just_one_edge;
+ 
+   /* Some sanity checking.  */
+   if (!flow_bb_inside_loop_p (loop, unswitch_on))
+     abort ();
+   if (!unswitch_on->succ || !unswitch_on->succ->succ_next ||
+       unswitch_on->succ->succ_next->succ_next)
+     abort ();
+   if (!dominated_by_p (loops->cfg.dom, loop->latch, unswitch_on))
+     abort ();
+   if (loop->inner)
+     abort ();
+   if (!flow_bb_inside_loop_p (loop, unswitch_on->succ->dest))
+     abort ();
+   if (!flow_bb_inside_loop_p (loop, unswitch_on->succ->succ_next->dest))
+     abort ();
+   
+   /* Will we be able to perform redirection?  */
+   if (!any_condjump_p (unswitch_on->end))
+     return NULL;
+   if (!cfg_layout_can_duplicate_bb_p (unswitch_on))
+     return NULL;
+   
+   for (entry = loop->header->pred;
+        entry->src == loop->latch;
+        entry = entry->pred_next);
+   
+   /* Make a copy.  */
+   if (!duplicate_loop_to_header_edge (loop, entry, loops, 1, 0, 0, 0))
+     return NULL;
+ 
+   /* Record switch block.  */
+   unswitch_on_alt = RBI (unswitch_on)->copy;
+ 
+   /* Make a copy of unswitched block.  */
+   switch_bb = cfg_layout_duplicate_bb (unswitch_on, NULL);
+   RBI (unswitch_on)->copy = unswitch_on_alt;
+ 
+   /* Loopify the copy.  */
+   for (latch_edge = RBI (loop->latch)->copy->succ;
+        latch_edge->dest != loop->header;
+        latch_edge = latch_edge->succ_next);
+   nloop = loopify (loops, latch_edge,
+ 		   RBI (loop->header)->copy->pred, switch_bb);
+   
+   /* Remove paths from loop copies and update dominators.
+      We rely on the fact that cfg_layout_duplicate_bb reverses
+      list of edges here.  */
+   for (e = unswitch_on->succ->succ_next->dest->pred; e; e = e->pred_next)
+     if (e->src != unswitch_on &&
+ 	!dominated_by_p (loops->cfg.dom, e->src, e->dest))
+       break;
+   just_one_edge = (e != NULL);
+   remove_path (unswitch_on->succ, loop, loops->cfg.dom, 1);
+   remove_path (unswitch_on_alt->succ, nloop, loops->cfg.dom, 0);
+ 
+   /* Now fix dominators of outside blocks.  */
+   bbs = get_loop_body (loop);
+   for (i = 0; i < loop->num_nodes; i++)
+     {
+       if (!just_one_edge &&
+ 	  dominated_by_p (loops->cfg.dom, bbs[i], unswitch_on->succ->dest))
+ 	continue;
+       n_dom_bbs = get_dominated_by (loops->cfg.dom, bbs[i], &dom_bbs);
+       for (j = 0; j < n_dom_bbs; j++)
+ 	{
+ 	  dom_bb = dom_bbs[j];
+ 	  if (flow_bb_inside_loop_p (loop, dom_bb))
+ 	    continue;
+ 	  set_immediate_dominator (loops->cfg.dom, dom_bb, switch_bb);
+ 	}
+       free (dom_bbs);
+     }
+   free (bbs);
+ 
+   /* Now the hard case.  Dominators of blocks inside loop immediatelly
+      dominated by unswitch_on may behave really weird.  */
+   n_dom_bbs = get_dominated_by (loops->cfg.dom, unswitch_on, &dom_bbs);
+   j = 0;
+   for (i = 0; i < n_dom_bbs; i++)
+     if (flow_bb_inside_loop_p (loop, dom_bbs[i]))
+       dom_bbs[j++] = dom_bbs [i];
+   iterate_fix_dominators (loops->cfg.dom, dom_bbs, j, 1);
+   free (dom_bbs);
+ 
+   n_dom_bbs = get_dominated_by (loops->cfg.dom, unswitch_on_alt, &dom_bbs);
+   j = 0;
+   for (i = 0; i < n_dom_bbs; i++)
+     if (flow_bb_inside_loop_p (nloop, dom_bbs[i]))
+       dom_bbs[j++] = dom_bbs [i];
+   iterate_fix_dominators (loops->cfg.dom, dom_bbs, j, 1);
+   free (dom_bbs);
+   
+   return nloop;
+ }
+ 
+ 
+ /* Creates place for a new LOOP.  */
+ static void
+ place_new_loop (loops, loop)
+      struct loops *loops;
+      struct loop *loop;
+ {
+   loops->parray =
+     xrealloc (loops->parray, (loops->num + 1) * sizeof (struct loop *));
+   loops->parray[loops->num] = loop;
+ 
+   loop->num = loops->num++;
+ }
+ 
+ /* Copies structure of LOOP into TARGET.  */
+ static struct loop *
+ duplicate_loop (loops, loop, target)
+      struct loops *loops;
+      struct loop *loop;
+      struct loop *target;
+ {
+   struct loop *cloop;
+   cloop = xcalloc (1, sizeof (struct loop));
+   place_new_loop (loops, cloop);
+ 
+   /* Initialize copied loop.  */
+   cloop->level = loop->level;
+ 
+   /* Set it as copy of loop.  */
+   loop->copy = cloop;
+ 
+   /* Add it to target.  */
+   flow_loop_tree_node_add (target, cloop);
+ 
+   return cloop;
+ }
+ 
+ /* Copies structure of subloops of LOOP into TARGET.  */
+ static void 
+ duplicate_subloops (loops, loop, target)
+      struct loops *loops;
+      struct loop *loop;
+      struct loop *target;
+ {
+   struct loop *aloop, *cloop;
+ 
+   for (aloop = loop->inner; aloop; aloop = aloop->next)
+     {
+       cloop = duplicate_loop (loops, aloop, target);
+       duplicate_subloops (loops, aloop, cloop);
+     }
+ }
+ 
+ /*  Copies structure of COPIED_LOOPS into TARGET.  */
+ static void 
+ copy_loops_to (loops, copied_loops, n, target)
+      struct loops *loops;
+      struct loop **copied_loops;
+      int n;
+      struct loop *target;
+ {
+   struct loop *aloop;
+   int i;
+ 
+   for (i = 0; i < n; i++)
+     {
+       aloop = duplicate_loop (loops, copied_loops[i], target);
+       duplicate_subloops (loops, copied_loops[i], aloop);
+     }
+ }
+ 
+ /* Redirects edge E to DEST.  */
+ static void
+ redirect_edge_with_update (e, dest)
+      edge e;
+      basic_block dest;
+ {
+   int dest_index = dest->index;
+   if (e->dest == dest)
+     return;
+ 
+   if (!e->src->succ->succ_next)
+     {
+       /* Code of cfglayout should be able to handle this, and redirection
+ 	 is happier.  */
+       e->flags |= EDGE_FALLTHRU;
+     }
+ 
+   /* Cfglayout says: */
+   /* Avoid redirect_edge_and_branch from overactive optimizing.  */
+   dest->index = n_basic_blocks + 1;
+   if (e->flags & EDGE_FALLTHRU)
+     redirect_edge_succ (e, dest);
+   else
+     {
+       if (!redirect_edge_and_branch (e, dest))
+ 	abort ();
+     }
+   dest->index = dest_index;
+   }
+ 
+ /* Deletes edge if possible.  */
+ static void
+ loop_delete_branch_edge (e)
+      edge e;
+ {
+   basic_block src = e->src;
+ 
+   if (src->succ->succ_next)
+     {
+       int tot_pr, tot_rem;
+       edge ae;
+ 
+       /* Cannot handle more than two exit edges.  */
+       if (src->succ->succ_next->succ_next)
+ 	return;
+       /* Neither this.  */
+       if (!any_condjump_p (src->end))
+ 	return;
+ 
+       /* Try to fix probabilities.  This is probably as wrong as to leave
+ 	 them as they are.  */
+       tot_pr = 0;
+       for (ae = src->succ; ae; ae = ae->succ_next)
+ 	tot_pr += ae->probability;
+       tot_rem = tot_pr - e->probability;
+       if (tot_rem > 0)
+ 	{
+ 	  for (ae = src->succ; ae; ae = ae->succ_next)
+ 	    ae->probability = (ae->probability * tot_pr) / tot_rem;
+ 	}
+ 
+       /* I do not know how much is this correct.  */
+       delete_insn (src->end);
+       
+       remove_edge (e);
+       src->succ->flags |= EDGE_FALLTHRU;
+     }
+   else
+     {
+       /* Cannot happen -- we are duplicating loop! */
+       abort ();
+     }
+ }
+ 
+ /* Duplicates BBS. Newly created bbs are placed into NEW_BBS, edges to
+    header (target of ENTRY) and copy of header are returned, edge ENTRY
+    is redirected to header copy.  Assigns bbs into loops, updates
+    dominators.  */
+ static void
+ copy_bbs (bbs, n, entry, latch_edge, new_bbs, loops, header_edge, copy_header_edge)
+      basic_block *bbs;
+      int n;
+      edge entry;
+      edge latch_edge;
+      basic_block **new_bbs;
+      struct loops *loops;
+      edge *header_edge;
+      edge *copy_header_edge;
+ {
+   int i;
+   basic_block bb, new_bb, header = entry->dest, dom_bb;
+   edge e;
+ 
+   /* Duplicate bbs, update dominators, assign bbs to loops.  */
+   (*new_bbs) = xcalloc (n, sizeof (basic_block));
+   for (i = 0; i < n; i++)
+     {
+       /* Duplicate.  */
+       bb = bbs[i];
+       new_bb = (*new_bbs)[i] = cfg_layout_duplicate_bb (bb, NULL);
+       RBI (new_bb)->duplicated = 1;
+       /* Add to loop.  */
+       add_bb_to_loop (new_bb, bb->loop_father->copy);
+       /* Possibly set header.  */
+       if (bb->loop_father->header == bb && bb != header)
+ 	new_bb->loop_father->header = new_bb;
+       /* Or latch.  */
+       if (bb->loop_father->latch == bb &&
+ 	  bb->loop_father != header->loop_father)
+ 	new_bb->loop_father->latch = new_bb;
+     }
+ 
+   /* Set dominators.  */
+   for (i = 0; i < n; i++)
+     {
+       bb = bbs[i];
+       new_bb = (*new_bbs)[i];
+       if (bb != header)
+ 	{
+ 	  /* For anything than loop header, just copy it.  */
+ 	  dom_bb = get_immediate_dominator (loops->cfg.dom, bb);
+ 	  dom_bb = RBI (dom_bb)->copy;
+ 	}
+       else
+ 	{
+ 	  /* Copy of header is dominated by entry source.  */
+ 	  dom_bb = entry->src;
+ 	}
+       if (!dom_bb)
+ 	abort ();
+       set_immediate_dominator (loops->cfg.dom, new_bb, dom_bb);
+     }
+ 
+   /* Redirect edges.  */
+   for (i = 0; i < n; i++)
+     {
+       edge e_pred;
+       new_bb = (*new_bbs)[i];
+       bb = bbs[i];
+       for (e = bb->pred; e; e = e_pred)
+ 	{
+ 	  basic_block src = e->src;
+ 
+ 	  e_pred = e->pred_next;
+ 	  
+ 	  /* Does this edge interest us? */
+ 	  if (!RBI (src)->duplicated)
+ 	    continue;
+ 
+ 	  /* So it interests us; redirect it.  */
+ 	  if (bb != header)
+ 	    redirect_edge_with_update (e, new_bb);
+ 	}
+     }
+ 
+   /* Redirect header edge.  */
+   bb = RBI (latch_edge->src)->copy;
+   for (e = bb->succ; e->dest != latch_edge->dest; e = e->succ_next);
+   *header_edge = e;
+   redirect_edge_with_update (*header_edge, header);
+ 
+   /* Redirect entry to copy of header.  */
+   redirect_edge_with_update (entry, RBI (header)->copy);
+   *copy_header_edge = entry;
+ 
+   /* Cancel duplicated flags.  */
+   for (i = 0; i < n; i++)
+    RBI ((*new_bbs)[i])->duplicated = 0;
+ }
+ 
+ /* Remove edges going out from BBS, except edge to HEADER.  */
+ static void
+ remove_exit_edges (bbs, n, header)
+      basic_block *bbs;
+      int n;
+      basic_block header;
+ {
+   basic_block bb;
+   edge e;
+   int i;
+ 
+   /* duplicated is not supposed to be used for this, but I need
+      something to mark blocks.  */
+   for (i = 0 ; i < n; i++)
+     {
+       bb = bbs[i];
+       RBI (bb)->duplicated = 1;
+     }
+   RBI (header)->duplicated = 1;
+ 
+   for (i = 0 ; i < n; i++)
+     {
+       edge succ_e;
+       bb = bbs[i];
+       for (e = bb->succ; e; e = succ_e)
+ 	{
+ 	  succ_e = e->succ_next;
+ 	  if (!RBI (e->dest)->duplicated)
+ 	    loop_delete_branch_edge (e);
+ 	}
+     }
+ 
+   for (i = 0 ; i < n; i++)
+     {
+       bb = bbs[i];
+       RBI (bb)->duplicated = 0;
+     }
+   RBI (header)->duplicated = 0;
+ }
+ 
+ /* Check whether LOOP's body can be duplicated.  */
+ static bool
+ can_duplicate_loop_p (loop)
+      struct loop *loop;
+ {
+   basic_block *bbs;
+   int i;
+ 
+   bbs = get_loop_body (loop);
+ 
+   for (i = 0; i < loop->num_nodes; i++)
+     {
+       if (!cfg_layout_can_duplicate_bb_p (bbs[i]))
+ 	{
+ 	  free (bbs);
+ 	  return false;
+ 	}
+     }
+   free (bbs);
+ 
+   return true;
+ }
+ 
+ /* Duplicates body of LOOP to given edge E NDUPL times.  Takes care of
+    updating LOOPS structure.  E's destination must be LOOP header for this to
+    work.  Remove exit edges from copies corresponding to set bits in WONT_EXIT
+    (bit 0 corresponds to original LOOP body).  If UPDATE_DOMINATORS is set,
+    also take care of updating dominators outside LOOP.  If UPDATE_FREQ is set,
+    also update frequencies, otherwise just copy them.  Returns false if
+    duplication is impossible.  */
+ /* Grrr... this function is getting far too complicated for me to like it.
+    I should make something with it.  */
+ static int
+ duplicate_loop_to_header_edge (loop, e, loops, ndupl, wont_exit,
+ 			       update_dominators, update_freq)
+      struct loop *loop;
+      edge e;
+      struct loops *loops;
+      int ndupl;
+      int wont_exit;
+      int update_dominators;
+      int update_freq;
+ {
+   struct loop *target, *aloop;
+   struct loop **orig_loops;
+   int n_orig_loops;
+   basic_block header = loop->header, latch = loop->latch;
+   basic_block *new_bbs, *bbs, *first_active;
+   basic_block new_bb, bb, first_active_latch = NULL;
+   edge ae, latch_edge, he;
+   int i, j, n, more_active = 0;
+   int is_latch = (latch == e->src);
+   int k0, k, kk, freq_in, freq_e, freq_le;
+   int loop_made_infinite;
+ 
+   if (e->dest != loop->header)
+     abort ();
+   if (ndupl <= 0)
+     abort ();
+ 
+   bbs = get_loop_body (loop);
+ 
+   /* Check whether duplication is possible.  */
+ 
+   for (i = 0; i < loop->num_nodes; i++)
+     {
+       if (!cfg_layout_can_duplicate_bb_p (bbs[i]))
+ 	{
+ 	  free (bbs);
+ 	  return false;
+ 	}
+     }
+ 
+   /* Find edge from latch.  */
+   for (latch_edge = header->pred;
+        latch_edge->src != latch;
+        latch_edge = latch_edge->pred_next);
+ 
+   /* For updating frequencies.  */
+   freq_e = EDGE_FREQUENCY (e);
+   freq_in = header->frequency;
+   freq_le = EDGE_FREQUENCY (latch_edge);
+ 
+   if (is_latch)
+     {
+       /* Should work well unless something inside depends on parity of
+ 	 iteration counter.  */
+       if (freq_in > freq_e)
+ 	{
+ 	  k0 = REG_BR_PROB_BASE;
+ 	  for (i = 0; i < ndupl + 1; i++)
+ 	    k0 = (k0 * freq_e) / freq_in;
+ 	  k0 = REG_BR_PROB_BASE - k0;
+ 	  k0 = (((REG_BR_PROB_BASE * REG_BR_PROB_BASE) / freq_in) * 
+ 		(freq_in - freq_e)) / k0;
+ 	  k = (REG_BR_PROB_BASE * freq_e) / freq_in;
+ 	}
+       else
+ 	{
+ 	  k0 = REG_BR_PROB_BASE / (ndupl + 1);
+ 	  k = REG_BR_PROB_BASE;
+ 	}
+       kk = k0;
+     }
+   else
+     {
+       /* Count probability that we will get to latch from header.
+ 	 This is wrong; first iteration of cycle is certainly somewhat
+ 	 special.  But I cannot do anything with it. */
+ 
+       /* Strange things may happen to frequencies.  :-(  */
+       if (freq_le == 0)
+ 	freq_le = 1;
+       if (freq_in <= freq_le)
+ 	freq_in = freq_le + 1;
+       if (freq_in < freq_le + freq_e)
+ 	freq_e = freq_in - freq_le;
+ 
+       k = (REG_BR_PROB_BASE * freq_le) / freq_in;
+       kk = (REG_BR_PROB_BASE * freq_e) / freq_le;
+       k0 = kk;
+       for (i = 0; i < ndupl; i++)
+ 	k0 = (k0 * k) / REG_BR_PROB_BASE;
+       k0 = (k0 * freq_le) / REG_BR_PROB_BASE + freq_in - freq_le - freq_e;
+       k0 = (REG_BR_PROB_BASE * k0) / (freq_in - freq_le);
+       if (k0 < 0)
+ 	k0 = 0;
+     }
+   if (k0 < 0 || k0 > REG_BR_PROB_BASE ||
+       k < 0  || k > REG_BR_PROB_BASE ||
+       kk < 0 || kk * k > REG_BR_PROB_BASE * REG_BR_PROB_BASE)
+     {
+       /* Something is wrong.  */
+       abort ();
+     }
+ 
+   /* Check whether we will create an infinite loop.  */
+   if (is_latch)
+     {
+       loop_made_infinite = 1;
+       for (i = 0; i <= ndupl; i++)
+ 	if (!(wont_exit & (1 << i)))
+ 	  {
+ 	    loop_made_infinite = 0;
+ 	    break;
+ 	  }
+     }
+   else
+     loop_made_infinite = (wont_exit & 1);
+   /* We cannot handle infinite loops yet.  It is relatively hard to do,
+      as outer loops might become infinite too.  */
+   if (loop_made_infinite)
+     abort ();
+ 
+   /* Loop to that new bbs will belong.  */
+   target = find_common_loop (e->src->loop_father, e->dest->loop_father);
+ 
+   /* Original loops.  */
+   n_orig_loops = 0;
+   for (aloop = loop->inner; aloop; aloop = aloop->next)
+     n_orig_loops++;
+   orig_loops = xcalloc (n_orig_loops, sizeof (struct loop *));
+   for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
+     orig_loops[i] = aloop;
+ 
+   loop->copy = target;
+   
+   /* Original basic blocks.  */
+   n = loop->num_nodes;
+ 
+   first_active = xcalloc(n, sizeof (basic_block));
+   if (is_latch && !(wont_exit & 1))
+     {
+       memcpy (first_active, bbs, n * sizeof (basic_block));
+       first_active_latch = latch;
+     }
+   
+   for (j = 0; j < ndupl; j++)
+     {
+       /* Copy loops.  */
+       copy_loops_to (loops, orig_loops, n_orig_loops, target);
+ 
+       /* Copy bbs.  */
+       copy_bbs (bbs, n, e, latch_edge, &new_bbs, loops, &e, &he);
+       if (is_latch)
+ 	loop->latch = RBI (latch)->copy;
+ 
+       /* Set counts and frequencies.  */
+       kk = (kk * k) / REG_BR_PROB_BASE;
+       for (i = 0; i < n; i++)
+ 	{
+ 	  new_bb = new_bbs[i];
+ 	  bb = bbs[i];
+ 	  if (update_freq)
+ 	    {
+ 	      new_bb->count = (kk * bb->count +
+ 			       REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
+ 	      new_bb->frequency = (bb->frequency * kk +
+ 				   REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
+ 	    }
+ 	  else
+ 	    {
+ 	      new_bb->count = bb->count;
+ 	      new_bb->frequency = bb->frequency;
+ 	    }
+ 	}
+ 
+       /* Remove exit edges if needed.  */
+       if (wont_exit & (1 << (j + 1)))
+ 	remove_exit_edges (new_bbs, n, header);
+       else if (!first_active_latch)
+ 	{
+ 	  memcpy (first_active, new_bbs, n * sizeof (basic_block));
+ 	  first_active_latch = RBI (latch)->copy;
+ 	}
+       else
+ 	more_active = 1;
+       
+       /* Update edge counts.  */
+       for (i = 0; i < n; i++)
+ 	{
+ 	  bb = new_bbs[i];
+ 	  for (ae = bb->succ; ae; ae = ae->succ_next)
+ 	    ae->count = (bb->count * ae->probability +
+ 			 REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
+ 	}
+ 
+       /* Update loops info.  */
+       for (i = 0; i < n_orig_loops; i++)
+ 	flow_loop_scan (loops, orig_loops[i]->copy, 0);
+ 
+       free (new_bbs);
+ 
+       /* Original loop header is dominated by latch copy
+ 	 if we duplicated on its only entry edge.  */
+       if (!is_latch && !header->pred->pred_next->pred_next)
+ 	set_immediate_dominator (loops->cfg.dom, header, RBI (latch)->copy);
+       if (is_latch && j == 0)
+ 	{
+ 	  /* Update edge from latch.  */
+ 	  for (latch_edge = RBI (header)->copy->pred;
+ 	       latch_edge->src != latch;
+ 	       latch_edge = latch_edge->pred_next);
+ 	}
+     }
+   free (orig_loops);
+ 
+   /* Now handle original loop.  */
+   
+   /* Remove exit edges if needed.  */
+   if (wont_exit & 1)
+     remove_exit_edges (bbs, n, latch_edge->dest);
+  
+   /* Update edge counts.  */
+   if (update_freq)
+     {
+       for (i = 0; i < n; i++)
+ 	{
+ 	  bb = bbs[i];
+ 	  bb->count = (k0 * bb->count +
+ 		       REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
+ 	  bb->frequency = (bb->frequency * k0 +
+ 			   REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
+ 	  for (ae = bb->succ; ae; ae = ae->succ_next)
+ 	    ae->count = (bb->count * ae->probability +
+ 			 REG_BR_PROB_BASE / 2) / REG_BR_PROB_BASE;
+ 	}
+     }
+ 
+   if (!first_active_latch)
+     {
+       memcpy (first_active, bbs, n * sizeof (basic_block));
+       first_active_latch = latch;
+     }
+   else
+     more_active = 1;
+ 
+   /* Update dominators of other blocks if affected.  */
+   if (update_dominators)
+     {
+       for (i = 0; i < n; i++)
+ 	{
+ 	  basic_block dominated, dom_bb, *dom_bbs;
+ 	  int n_dom_bbs,j;
+ 
+ 	  bb = bbs[i];
+ 	  n_dom_bbs = get_dominated_by (loops->cfg.dom, bb, &dom_bbs);
+ 	  for (j = 0; j < n_dom_bbs; j++)
+ 	    {
+ 	      dominated = dom_bbs[j];
+ 	      if (flow_bb_inside_loop_p (loop, dominated))
+ 		continue;
+ 	      if (more_active)
+ 		{
+ 		  dom_bb = nearest_common_dominator (
+ 		    loops->cfg.dom, first_active[i], first_active_latch);
+ 		}
+ 	      else
+ 		dom_bb = first_active[i];
+ 	      set_immediate_dominator (loops->cfg.dom, dominated, dom_bb);
+ 	    }
+ 	  free (dom_bbs);
+ 	}
+     }
+   free (first_active);
+ 
+   free (bbs);
+ 
+   /* Fill other info for father loops.  */
+   for (aloop = target; aloop->depth > 0; aloop = aloop->outer)
+     flow_loop_scan (loops, aloop, 0);
+ 
+   return true;
+ 
  }
  
Index: toplev.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/toplev.c,v
retrieving revision 1.537.2.42
diff -c -3 -p -r1.537.2.42 toplev.c
*** toplev.c	2002/02/22 13:25:56	1.537.2.42
--- toplev.c	2002/02/22 19:24:26
*************** int flag_unroll_loops;
*** 554,559 ****
--- 554,562 ----
  
  int flag_unroll_all_loops;
  
+ /* Nonzero enables loop unswitching.  */
+ int flag_unswitch_loops;
+ 
  /* Nonzero enables prefetch optimizations for arrays in loops.  */
  
  int flag_prefetch_loop_arrays;
*************** static const lang_independent_options f_
*** 1024,1029 ****
--- 1027,1034 ----
     N_("Perform loop unrolling when iteration count is known") },
    {"unroll-all-loops", &flag_unroll_all_loops, 1,
     N_("Perform loop unrolling for all loops") },
+   {"unswitch-loops", &flag_unswitch_loops, 1,
+    N_("Perform loop unswitching") },
    {"prefetch-loop-arrays", &flag_prefetch_loop_arrays, 1,
     N_("Generate prefetch instructions, if available, for arrays in loops") },
    {"move-all-movables", &flag_move_all_movables, 1,
*************** rest_of_compilation (decl)
*** 3049,3054 ****
--- 3054,3061 ----
        if (loops)
  	{
  	  /* Here will go optimalizations.  */
+ 	  if (flag_unswitch_loops)
+ 	    unswitch_loops (loops);
  
  	  loop_optimizer_finalize (loops, rtl_dump_file);
  	}



More information about the Gcc-patches mailing list