[PATCH] rtlopt branch merge part 5 -- loop unswitching

Zdenek Dvorak rakdver@atrey.karlin.mff.cuni.cz
Thu Jan 30 23:59:00 GMT 2003


Hello,

this patch brings in loop unswitching pass.

Zdenek

	* cfgloop.h (loop_optimizer_init, loop_optimizer_finalize,
	unswitch_loops): Declare.
	* loop-init.c: New file.
	* loop-unswitch.c: New file.
	* Makefile.in (loop-init.o, loop-unswitch.o): New.
	* params.def (PARAM_MAX_UNSWITCH_INSNS, PARAM_MAX_UNSWITCH_LEVEL): New.
	* toplev.c (DFI_loop2): New dump.
	(flag_unswitch_loops): New.
	(lang_independent_options): Add it.
	(rest_of_compilation): Call new loop optimizer.
	(parse_options_and_default_flags): Turn flag_unswitch_loops on with -O3.
	* doc/invoke.texi (-funswitch-loops, max-unswitch-insns,
	max-unswitch-level): Document.
	* doc/passes.texi (cfgloopanal.c, cfgloopmanip.c, loop-init.c,
	loop-unswitch.c): Document.

diff -c3pN /home/rakdver/gcc/gcc_base/gcc/gcc/cfgloop.h ./cfgloop.h
*** /home/rakdver/gcc/gcc_base/gcc/gcc/cfgloop.h	2003-01-26 16:51:49.000000000 +0100
--- ./cfgloop.h	2003-01-30 23:33:27.000000000 +0100
*************** extern bool remove_path			PARAMS ((struc
*** 330,332 ****
--- 330,339 ----
  extern edge split_loop_bb		PARAMS ((struct loops *, basic_block,
  						rtx));
  
+ /* Loop optimizer initialization.  */
+ extern struct loops *loop_optimizer_init PARAMS ((FILE *));
+ extern void loop_optimizer_finalize	PARAMS ((struct loops *, FILE *));
+ 
+ /* Optimization passes.  */
+ extern void unswitch_loops		PARAMS ((struct loops *));
+ 
diff -c3pN /home/rakdver/gcc/gcc_base/gcc/gcc/loop-init.c ./loop-init.c
*** /home/rakdver/gcc/gcc_base/gcc/gcc/loop-init.c	1970-01-01 01:00:00.000000000 +0100
--- ./loop-init.c	2003-01-30 23:15:12.000000000 +0100
***************
*** 0 ****
--- 1,116 ----
+ /* Loop optimizer initialization routines.
+    Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ 
+ This file is part of GCC.
+ 
+ GCC is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2, or (at your option) any later
+ version.
+ 
+ GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING.  If not, write to the Free
+ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA.  */
+ 
+ #include "config.h"
+ #include "system.h"
+ #include "coretypes.h"
+ #include "tm.h"
+ #include "rtl.h"
+ #include "hard-reg-set.h"
+ #include "basic-block.h"
+ #include "cfgloop.h"
+ #include "cfglayout.h"
+ #include "gcov-io.h"
+ #include "profile.h"
+ 
+ /* Initialize loop optimizer.  */
+ 
+ struct loops *
+ loop_optimizer_init (dumpfile)
+      FILE *dumpfile;
+ {
+   struct loops *loops = xcalloc (1, sizeof (struct loops));
+   edge e;
+ 
+   /* Avoid annoying special cases of edges going to exit
+      block.  */
+   for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
+     if ((e->flags & EDGE_FALLTHRU) && e->src->succ->succ_next)
+       split_edge (e);
+ 
+   /* Find the loops.  */
+ 
+   if (flow_loops_find (loops, LOOP_TREE) <= 1)
+     {
+       /* No loops.  */
+       flow_loops_free (loops);
+       free (loops);
+       return NULL;
+     }
+ 
+   /* Not going to update these.  */
+   free (loops->cfg.rc_order);
+   loops->cfg.rc_order = NULL;
+   free (loops->cfg.dfs_order);
+   loops->cfg.dfs_order = NULL;
+ 
+   /* Initialize structures for layout changes.  */
+   cfg_layout_initialize (loops);
+ 
+   /* Create pre-headers.  */
+   create_preheaders (loops, CP_SIMPLE_PREHEADERS | CP_INSIDE_CFGLAYOUT);
+ 
+   /* Force all latches to have only single successor.  */
+   force_single_succ_latches (loops);
+ 
+   /* Mark irreducible loops.  */
+   mark_irreducible_loops (loops);
+ 
+   /* Dump loops.  */
+   flow_loops_dump (loops, dumpfile, NULL, 1);
+ 
+ #ifdef ENABLE_CHECKING
+   verify_dominators (loops->cfg.dom);
+   verify_loop_structure (loops);
+ #endif
+ 
+   return loops;
+ }
+ 
+ /* Finalize loop optimizer.  */
+ void
+ loop_optimizer_finalize (loops, dumpfile)
+      struct loops *loops;
+      FILE *dumpfile;
+ {
+   basic_block bb;
+ 
+   /* Finalize layout changes.  */
+   /* Make chain.  */
+   FOR_EACH_BB (bb)
+     if (bb->next_bb != EXIT_BLOCK_PTR)
+       RBI (bb)->next = bb->next_bb;
+ 
+   /* Another dump.  */
+   flow_loops_dump (loops, dumpfile, NULL, 1);
+ 
+   /* Clean up.  */
+   flow_loops_free (loops);
+   free (loops);
+  
+   /* Finalize changes.  */
+   cfg_layout_finalize ();
+ 
+   /* Checking.  */
+ #ifdef ENABLE_CHECKING
+   verify_flow_info ();
+ #endif
+ }
+ 
diff -c3pN /home/rakdver/gcc/gcc_base/gcc/gcc/loop-unswitch.c ./loop-unswitch.c
*** /home/rakdver/gcc/gcc_base/gcc/gcc/loop-unswitch.c	1970-01-01 01:00:00.000000000 +0100
--- ./loop-unswitch.c	2003-01-30 23:17:31.000000000 +0100
***************
*** 0 ****
--- 1,363 ----
+ /* Loop unswitching for GNU compiler.
+    Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ 
+ This file is part of GCC.
+ 
+ GCC is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2, or (at your option) any later
+ version.
+ 
+ GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING.  If not, write to the Free
+ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA.  */
+ 
+ #include "config.h"
+ #include "system.h"
+ #include "coretypes.h"
+ #include "tm.h"
+ #include "rtl.h"
+ #include "hard-reg-set.h"
+ #include "basic-block.h"
+ #include "cfgloop.h"
+ #include "cfglayout.h"
+ #include "params.h"
+ #include "output.h"
+ #include "expr.h"
+ 
+ static struct loop *unswitch_loop	PARAMS ((struct loops *,
+ 						struct loop *, basic_block));
+ static void unswitch_single_loop	PARAMS ((struct loops *, struct loop *,
+ 						rtx, int));
+ static bool may_unswitch_on_p		PARAMS ((struct loops *, basic_block,
+ 						struct loop *, basic_block *));
+ static rtx reversed_condition		PARAMS ((rtx));
+ 
+ /* Unswitch LOOPS.  */
+ void
+ unswitch_loops (loops)
+      struct loops *loops;
+ {
+   int i, num;
+   struct loop *loop;
+ 
+   /* Go through inner loops (only original ones).  */
+   num = loops->num;
+   
+   for (i = 1; i < num; i++)
+     {
+       /* Removed loop?  */
+       loop = loops->parray[i];
+       if (!loop)
+ 	continue;
+       /* Not innermost?  */
+       if (loop->inner)
+ 	continue;
+ 
+       unswitch_single_loop (loops, loop, NULL_RTX, 0);
+ #ifdef ENABLE_CHECKING
+       verify_dominators (loops->cfg.dom);
+       verify_loop_structure (loops);
+ #endif
+     }
+ }
+ 
+ /* Checks whether we can unswitch LOOP on basic block BB.  LOOP BODY
+    is provided to save time.  */
+ static bool
+ may_unswitch_on_p (loops, bb, loop, body)
+      struct loops *loops;
+      basic_block bb;
+      struct loop *loop;
+      basic_block *body;
+ {
+   rtx test;
+   unsigned i;
+ 
+   /* It must be a simple conditional jump.  */
+   if (!bb->succ || !bb->succ->succ_next || bb->succ->succ_next->succ_next)
+     return false;
+   if (!any_condjump_p (bb->end))
+     return false;
+ 
+   /* Are both branches inside loop?  */
+   if (!flow_bb_inside_loop_p (loop, bb->succ->dest)
+       || !flow_bb_inside_loop_p (loop, bb->succ->succ_next->dest))
+     return false;
+ 
+   /* It must be executed just once each iteration (because otherwise we
+      are unable to update dominator/irreducible loop information correctly).  */
+   if (!just_once_each_iteration_p (loops, loop, bb))
+     return false;
+ 
+   /* Condition must be invariant.  */
+   test = get_condition (bb->end, NULL);
+   if (!test)
+     return false;
+ 
+   for (i = 0; i < loop->num_nodes; i++)
+     if (modified_between_p (test, body[i]->head, NEXT_INSN (body[i]->end)))
+       return false;
+ 
+   return true;
+ }
+ 
+ /* Reverses CONDition; returns NULL if we cannot.  */
+ static rtx
+ reversed_condition (cond)
+      rtx cond;
+ {
+   enum rtx_code reversed;
+   reversed = reversed_comparison_code (cond, NULL);
+   if (reversed == UNKNOWN)
+     return NULL_RTX;
+   else
+     return gen_rtx_fmt_ee (reversed,
+ 			   GET_MODE (cond), XEXP (cond, 0),
+ 			   XEXP (cond, 1));
+ }
+ 
+ /* Unswitch single LOOP.  COND_CHECKED holds list of conditions we already
+    unswitched on and are true in our branch.  NUM is number of unswitchings
+    done; do not allow it to grow too much, it is too easy to create example
+    on that the code would grow exponentially.  */
+ static void
+ unswitch_single_loop (loops, loop, cond_checked, num)
+      struct loops *loops;
+      struct loop *loop;
+      rtx cond_checked;
+      int num;
+ {
+   basic_block *bbs, bb;
+   struct loop *nloop;
+   unsigned i;
+   int true_first;
+   rtx cond, rcond, conds, rconds, acond, split_before;
+   int always_true;
+   int always_false;
+   int repeat;
+   edge e;
+ 
+   /* Do not unswitch too much.  */
+   if (num > PARAM_VALUE (PARAM_MAX_UNSWITCH_LEVEL))
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, ";; Not unswitching anymore, hit max level\n");
+       return;
+     }
+ 
+   /* We only unswitch innermost loops (at least for now).  */
+   if (loop->inner)
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, ";; Not unswitching, not innermost loop\n");
+       return;
+     }
+   
+   /* And we must be able to duplicate loop body.  */
+   if (!can_duplicate_loop_p (loop))
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, ";; Not unswitching, can't duplicate loop\n");
+       return;
+     }
+ 
+   /* Check the size of loop.  */
+   if (num_loop_insns (loop) > PARAM_VALUE (PARAM_MAX_UNSWITCH_INSNS))
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, ";; Not unswitching, loop too big\n");
+       return;
+     }
+   
+   /* Do not unswitch in cold areas.  */
+   if (!maybe_hot_bb_p (loop->header))
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, ";; Not unswitching, not hot area\n");
+       return;
+     }
+   
+   /* Nor if it usually do not pass.  */
+   if (expected_loop_iterations (loop) < 1)
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, ";; Not unswitching, loop iterations < 1\n");
+       return;
+     }
+ 
+   do
+     {
+       repeat = 0;
+     
+       /* Find a bb to unswitch on.  We use just a stupid test of invariantness
+ 	 of the condition: all used regs must not be modified inside loop body.  */
+       bbs = get_loop_body (loop);
+       for (i = 0; i < loop->num_nodes; i++)
+ 	if (may_unswitch_on_p (loops, bbs[i], loop, bbs))
+ 	  break;
+ 
+       if (i == loop->num_nodes)
+ 	{
+ 	  free (bbs);
+ 	  return;
+ 	}
+ 
+       if (!(cond = get_condition (bbs[i]->end, &split_before)))
+ 	abort ();
+       rcond = reversed_condition (cond);
+       
+       /* Check whether the result can be predicted.  */
+       always_true = 0;
+       always_false = 0;
+       for (acond = cond_checked; acond; acond = XEXP (acond, 1))
+ 	{
+ 	  if (rtx_equal_p (cond, XEXP (acond, 0)))
+ 	    {
+ 	      /* True.  */
+ 	      always_true = 1;
+ 	      break;
+ 	    }
+ 	  if (rtx_equal_p (rcond, XEXP (acond, 0)))
+ 	    {
+ 	      /* False.  */
+ 	      always_false = 1;
+ 	      break;
+ 	    }
+ 	}
+ 
+       if (always_true)
+ 	{
+ 	  /* Remove false path.  */
+  	  for (e = bbs[i]->succ; !(e->flags & EDGE_FALLTHRU); e = e->succ_next);
+ 	  remove_path (loops, e);
+ 	  free (bbs);
+ 	  repeat = 1;
+ 	}
+       else if (always_false)
+ 	{
+ 	  /* Remove true path.  */
+ 	  for (e = bbs[i]->succ; e->flags & EDGE_FALLTHRU; e = e->succ_next);
+ 	  remove_path (loops, e);
+ 	  free (bbs);
+ 	  repeat = 1;
+ 	}
+     } while (repeat);
+  
+   /* We found the condition we can unswitch on.  */
+   conds = alloc_EXPR_LIST (0, cond, cond_checked);
+   if (rcond)
+     rconds = alloc_EXPR_LIST (0, rcond, cond_checked);
+   else
+     rconds = cond_checked;
+ 
+   /* Separate condition.  */
+   bb = split_loop_bb (loops, bbs[i], PREV_INSN (split_before))->dest;
+   free (bbs);
+   true_first = !(bb->succ->flags & EDGE_FALLTHRU);
+   if (rtl_dump_file)
+     fprintf (rtl_dump_file, ";; Unswitching loop\n");
+   /* Unswitch the loop.  */
+   nloop = unswitch_loop (loops, loop, bb);
+   if (!nloop)
+   abort ();
+ 
+   /* Invoke itself on modified loops.  */
+   unswitch_single_loop (loops, nloop, true_first ? conds : rconds, num + 1);
+   unswitch_single_loop (loops, loop, true_first ? rconds : conds, num + 1);
+ 
+   free_EXPR_LIST_node (conds);
+   if (rcond)
+     free_EXPR_LIST_node (rconds);
+ }
+ 
+ /* Unswitch a LOOP w.r. to given basic block.  We only support unswitching
+    of innermost loops (this is not a principial restriction, I'm just lazy
+    to handle subloops).  UNSWITCH_ON must be executed in every iteration,
+    i.e. it must dominate LOOP latch.  Returns NULL if impossible, new
+    loop otherwise.  */
+ static struct loop *
+ unswitch_loop (loops, loop, unswitch_on)
+      struct loops *loops;
+      struct loop *loop;
+      basic_block unswitch_on;
+ {
+   edge entry, e, latch_edge;
+   basic_block switch_bb, unswitch_on_alt, src;
+   struct loop *nloop;
+   sbitmap zero_bitmap;
+   int irred_flag;
+ 
+   /* Some sanity checking.  */
+   if (!flow_bb_inside_loop_p (loop, unswitch_on))
+     abort ();
+   if (!unswitch_on->succ || !unswitch_on->succ->succ_next ||
+       unswitch_on->succ->succ_next->succ_next)
+     abort ();
+   if (!just_once_each_iteration_p (loops, loop, unswitch_on))
+     abort ();
+   if (loop->inner)
+     abort ();
+   if (!flow_bb_inside_loop_p (loop, unswitch_on->succ->dest))
+     abort ();
+   if (!flow_bb_inside_loop_p (loop, unswitch_on->succ->succ_next->dest))
+     abort ();
+   
+   /* Will we be able to perform redirection?  */
+   if (!any_condjump_p (unswitch_on->end))
+     return NULL;
+   if (!cfg_layout_can_duplicate_bb_p (unswitch_on))
+     return NULL;
+ 
+   entry = loop_preheader_edge (loop);
+   
+   /* Make a copy.  */
+   src = entry->src;
+   irred_flag = src->flags & BB_IRREDUCIBLE_LOOP;
+   src->flags &= ~BB_IRREDUCIBLE_LOOP;
+   zero_bitmap = sbitmap_alloc (2);
+   sbitmap_zero (zero_bitmap);
+   if (!duplicate_loop_to_header_edge (loop, entry, loops, 1,
+ 	zero_bitmap, NULL, NULL, NULL, 0))
+     return NULL;
+   free (zero_bitmap);
+   src->flags |= irred_flag;
+ 
+   /* Record switch block.  */
+   unswitch_on_alt = RBI (unswitch_on)->copy;
+ 
+   /* Make a copy of unswitched block.  */
+   switch_bb = cfg_layout_duplicate_bb (unswitch_on, NULL);
+   switch_bb->flags &= ~BB_IRREDUCIBLE_LOOP;
+   switch_bb->flags |= irred_flag;
+   add_to_dominance_info (loops->cfg.dom, switch_bb);
+   RBI (unswitch_on)->copy = unswitch_on_alt;
+ 
+   /* Loopify the copy.  */
+   for (latch_edge = RBI (loop->latch)->copy->succ;
+        latch_edge->dest != loop->header;
+        latch_edge = latch_edge->succ_next);
+   nloop = loopify (loops, latch_edge,
+ 		   RBI (loop->header)->copy->pred, switch_bb);
+ 
+   /* Remove paths from loop copies.  We rely on the fact that
+      cfg_layout_duplicate_bb reverses list of edges here.  */
+   for (e = unswitch_on->succ->succ_next->dest->pred; e; e = e->pred_next)
+     if (e->src != unswitch_on &&
+ 	!dominated_by_p (loops->cfg.dom, e->src, e->dest))
+       break;
+   remove_path (loops, unswitch_on->succ);
+   remove_path (loops, unswitch_on_alt->succ);
+ 
+   /* One of created loops do not have to be subloop of the outer loop now.  */
+   fix_loop_placement (loop);
+   fix_loop_placement (nloop);
+ 
+   return nloop;
+ }
diff -c3pN /home/rakdver/gcc/gcc_base/gcc/gcc/Makefile.in ./Makefile.in
*** /home/rakdver/gcc/gcc_base/gcc/gcc/Makefile.in	2003-01-30 22:29:34.000000000 +0100
--- ./Makefile.in	2003-01-30 23:52:47.000000000 +0100
*************** C_OBJS = c-parse.o c-lang.o c-pretty-pri
*** 770,776 ****
  
  OBJS = alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o	   \
   cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o		   \
!  cfgloopanal.o cfgloopmanip.o 						   \
   cfgrtl.o combine.o conflict.o convert.o cse.o cselib.o dbxout.o	   \
   debug.o df.o diagnostic.o doloop.o dominance.o		                   \
   dwarf2asm.o dwarf2out.o dwarfout.o emit-rtl.o except.o explow.o	   \
--- 770,776 ----
  
  OBJS = alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o	   \
   cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o		   \
!  cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o		   \
   cfgrtl.o combine.o conflict.o convert.o cse.o cselib.o dbxout.o	   \
   debug.o df.o diagnostic.o doloop.o dominance.o		                   \
   dwarf2asm.o dwarf2out.o dwarfout.o emit-rtl.o except.o explow.o	   \
*************** cfgloopanal.o : cfgloopanal.c $(CONFIG_H
*** 1609,1614 ****
--- 1609,1620 ----
     $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H)
  cfgloopmanip.o : cfgloopmanip.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
     $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h output.h coretypes.h $(TM_H)
+ loop-init.o : loop-init.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) gcov-io.h \
+    gcov-iov.h $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h profile.h \
+    coretypes.h $(TM_H)
+ loop-unswitch.o : loop-unswitch.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TM_H) \
+    $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h params.h \
+    output.h $(EXPR_H) coretypes.h $(TM_H)
  dominance.o : dominance.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
     hard-reg-set.h $(BASIC_BLOCK_H) et-forest.h
  et-forest.o : et-forest.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) et-forest.h alloc-pool.h
diff -c3pN /home/rakdver/gcc/gcc_base/gcc/gcc/params.def ./params.def
*** /home/rakdver/gcc/gcc_base/gcc/gcc/params.def	2003-01-04 20:37:34.000000000 +0100
--- ./params.def	2003-01-30 23:37:48.000000000 +0100
*************** DEFPARAM(PARAM_MAX_UNROLLED_INSNS,
*** 151,156 ****
--- 151,167 ----
  	 "The maximum number of instructions to consider to unroll in a loop",
  	 100)
  
+ /* The maximum number of insns of an unswitched loop.  */
+ DEFPARAM(PARAM_MAX_UNSWITCH_INSNS,
+ 	"max-unswitch-insns",
+ 	"The maximum number of insns of an unswitched loop",
+ 	50)
+ /* The maximum level of recursion in unswitch_single_loop.  */
+ DEFPARAM(PARAM_MAX_UNSWITCH_LEVEL,
+ 	"max-unswitch-level",
+ 	"The maximum number of unswitchings in a single loop",
+ 	3)
+ 
  DEFPARAM(HOT_BB_COUNT_FRACTION,
  	 "hot-bb-count-fraction",
  	 "Select fraction of the maximal count of repetitions of basic block in \
diff -c3pN /home/rakdver/gcc/gcc_base/gcc/gcc/toplev.c ./toplev.c
*** /home/rakdver/gcc/gcc_base/gcc/gcc/toplev.c	2003-01-26 16:38:44.000000000 +0100
--- ./toplev.c	2003-01-30 23:51:57.000000000 +0100
*************** enum dump_file_index
*** 238,243 ****
--- 238,244 ----
    DFI_bp,
    DFI_ce1,
    DFI_tracer,
+   DFI_loop2,
    DFI_cse2,
    DFI_life,
    DFI_combine,
*************** static struct dump_file_info dump_file[D
*** 288,293 ****
--- 289,295 ----
    { "bp",	'b', 1, 0, 0 },
    { "ce1",	'C', 1, 0, 0 },
    { "tracer",	'T', 1, 0, 0 },
+   { "loop2",	'L', 1, 0, 0 },
    { "cse2",	't', 1, 0, 0 },
    { "life",	'f', 1, 0, 0 },	/* Yes, duplicate enable switch.  */
    { "combine",	'c', 1, 0, 0 },
*************** int flag_unroll_loops;
*** 518,523 ****
--- 520,528 ----
  
  int flag_unroll_all_loops;
  
+ /* Nonzero enables loop unswitching.  */
+ int flag_unswitch_loops;
+ 
  /* Nonzero enables prefetch optimizations for arrays in loops.  */
  
  int flag_prefetch_loop_arrays;
*************** static const lang_independent_options f_
*** 1014,1019 ****
--- 1019,1026 ----
     N_("Perform loop unrolling when iteration count is known") },
    {"unroll-all-loops", &flag_unroll_all_loops, 1,
     N_("Perform loop unrolling for all loops") },
+   {"unswitch-loops", &flag_unswitch_loops, 1,
+    N_("Perform loop unswitching") },
    {"prefetch-loop-arrays", &flag_prefetch_loop_arrays, 1,
     N_("Generate prefetch instructions, if available, for arrays in loops") },
    {"move-all-movables", &flag_move_all_movables, 1,
*************** rest_of_compilation (decl)
*** 3074,3079 ****
--- 3081,3117 ----
        timevar_pop (TV_TRACER);
      }
  
+   /* Perform loop optimalizations.  */
+   if (optimize > 0
+       && flag_unswitch_loops)
+     {
+       struct loops *loops;
+       timevar_push (TV_LOOP);
+       open_dump_file (DFI_loop2, decl);
+       if (rtl_dump_file)
+ 	dump_flow_info (rtl_dump_file);
+ 
+       loops = loop_optimizer_init (rtl_dump_file);
+ 
+       if (loops)
+ 	{
+ 	  /* The optimalizations:  */
+ 	  if (flag_unswitch_loops)
+ 	    unswitch_loops (loops);
+ 
+ 	  loop_optimizer_finalize (loops, rtl_dump_file);
+ 	}
+ 
+       cleanup_cfg (CLEANUP_EXPENSIVE);
+       delete_trivially_dead_insns (insns, max_reg_num ());
+       reg_scan (insns, max_reg_num (), 0);
+       if (rtl_dump_file)
+ 	dump_flow_info (rtl_dump_file);
+       close_dump_file (DFI_loop2, print_rtl_with_bb, get_insns ());
+       timevar_pop (TV_LOOP);
+       ggc_collect ();
+     }
+ 
    if (flag_rerun_cse_after_loop)
      {
        timevar_push (TV_CSE2);
*************** parse_options_and_default_flags (argc, a
*** 4898,4903 ****
--- 4936,4942 ----
      {
        flag_inline_functions = 1;
        flag_rename_registers = 1;
+       flag_unswitch_loops = 1;
      }
  
    if (optimize < 2 || optimize_size)
diff -c3pN /home/rakdver/gcc/gcc_base/gcc/gcc/doc/invoke.texi ./doc/invoke.texi
*** /home/rakdver/gcc/gcc_base/gcc/gcc/doc/invoke.texi	2003-01-30 23:54:27.000000000 +0100
--- ./doc/invoke.texi	2003-01-30 23:49:22.000000000 +0100
*************** in the following sections.
*** 290,296 ****
  -fsched-spec-load-dangerous  -fsignaling-nans @gol
  -fsingle-precision-constant  -fssa -fssa-ccp -fssa-dce @gol
  -fstrength-reduce  -fstrict-aliasing  -ftracer -fthread-jumps @gol
! -funroll-all-loops  -funroll-loops  @gol
  --param @var{name}=@var{value}
  -O  -O0  -O1  -O2  -O3  -Os}
  
--- 290,296 ----
  -fsched-spec-load-dangerous  -fsignaling-nans @gol
  -fsingle-precision-constant  -fssa -fssa-ccp -fssa-dce @gol
  -fstrength-reduce  -fstrict-aliasing  -ftracer -fthread-jumps @gol
! -funroll-all-loops  -funroll-loops  -funswitch-loops @gol
  --param @var{name}=@var{value}
  -O  -O0  -O1  -O2  -O3  -Os}
  
*************** Dump after conversion from registers to 
*** 3172,3178 ****
  Dump after local register allocation, to @file{@var{file}.23.lreg}.
  @item L
  @opindex dL
! Dump after loop optimization, to @file{@var{file}.12.loop}.
  @item M
  @opindex dM
  Dump after performing the machine dependent reorganization pass, to
--- 3172,3179 ----
  Dump after local register allocation, to @file{@var{file}.23.lreg}.
  @item L
  @opindex dL
! Dump after loop optimization passes, to @file{@var{file}.12.loop} and
! @file{@var{file}.18.loop2}.
  @item M
  @opindex dM
  Dump after performing the machine dependent reorganization pass, to
*************** the loop is entered.  This usually makes
*** 4271,4276 ****
--- 4272,4282 ----
  @option{-funroll-all-loops} implies the same options as
  @option{-funroll-loops},
  
+ @item -funswitch-loops
+ @opindex funswitch-loops
+ Move branches with loop invariant conditions out of the loop, with duplicates
+ of the loop on both branches (modified according to result of the condition).
+ 
  @item -fprefetch-loop-arrays
  @opindex fprefetch-loop-arrays
  If supported by the target machine, generate instructions to prefetch
*************** The maximum number of instructions that 
*** 4373,4378 ****
--- 4379,4390 ----
  is unrolled, and if the loop is unrolled, it determines how many times
  the loop code is unrolled.
  
+ @item max-unswitch-insns
+ The maximum number of insns of an unswitched loop.
+ 
+ @item max-unswitch-level
+ The maximum number of branches unswitched in a single loop.
+ 
  @item hot-bb-count-fraction
  Select fraction of the maximal count of repetitions of basic block in program
  given basic block needs to have to be considered hot.
diff -c3pN /home/rakdver/gcc/gcc_base/gcc/gcc/doc/passes.texi ./doc/passes.texi
*** /home/rakdver/gcc/gcc_base/gcc/gcc/doc/passes.texi	2003-01-30 23:54:27.000000000 +0100
--- ./doc/passes.texi	2003-01-30 23:48:50.000000000 +0100
*************** Its source files are @file{loop.c} and @
*** 333,342 ****
  some functions in @file{integrate.c} and the header @file{integrate.h}.
  Loop dependency analysis routines are contained in @file{dependence.c}.
  
  @opindex dL
  The option @option{-dL} causes a debugging dump of the RTL code after
! this pass.  This dump file's name is made by appending @samp{.loop} to
! the input file name.
  
  @cindex jump bypassing
  @item
--- 333,348 ----
  some functions in @file{integrate.c} and the header @file{integrate.h}.
  Loop dependency analysis routines are contained in @file{dependence.c}.
  
+ Second loop optimization pass takes care of basic block level optimalizations --
+ unswitching loops. The source files are
+ @file{cfgloopanal.c} and @file{cfgloopmanip.c} containing generic loop
+ analysis and manipulation code, @file{loop-init.c} with initialization and
+ finalization code, @file{loop-unswitch.c} for loop unswitching.
+ 
  @opindex dL
  The option @option{-dL} causes a debugging dump of the RTL code after
! these passes.  The dump file names are made by appending @samp{.loop} and
! @samp{.loop2} to the input file name.
  
  @cindex jump bypassing
  @item



More information about the Gcc-patches mailing list