This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[lno] New doloop optimizer


Hello,

this patch is the first piece of the rtl level part of the loop
optimization.  It adds a more robust (although still pretty simplish)
induction variable analysis that can be run after unrolling, and
the rewrite of the doloop optimization pass using it.

Bootstrapped & regtested (with -floop-optimize2) on i686 (with
additional -march=k6) and on ia64.

Zdenek

Index: ChangeLog.lno
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/ChangeLog.lno,v
retrieving revision 1.1.2.38
diff -c -3 -p -r1.1.2.38 ChangeLog.lno
*** ChangeLog.lno	26 Jan 2004 17:59:15 -0000	1.1.2.38
--- ChangeLog.lno	28 Jan 2004 21:06:05 -0000
***************
*** 1,3 ****
--- 1,42 ----
+ 2004-01-28  Zdenek Dvorak  <rakdver@atrey.karlin.mff.cuni.cz>
+ 
+ 	* loop-iv.c: New.
+ 	* loop-doloop.c: New.
+ 	* Makefile.in (loop-doloop.o, loop-iv.o): Add.
+ 	* alias.c (init_alias_analysis): Test flag_unroll_loops instead of
+ 	flag_old_unroll_loops.
+ 	* cfgloop.h (struct rtx_iv, struct niter_desc): New.
+ 	(get_loop_level, iv_analysis_loop_init, iv_get_reaching_def,
+ 	iv_analyse, find_simple_exit, iv_number_of_iterations,
+ 	iv_analysis_done, doloop_optimize_loops): Declare.
+ 	* cfgloopanal.c (get_loop_level): New.
+ 	* common.opt (floop-optimize2): New.
+ 	(fold-unroll-loops, fold-unroll-all-loops): Remove.
+ 	* doloop.c (doloop_condition_get): Export.
+ 	* flags.h (flag_old_unroll_loops, flag_old_unroll_all_loops):
+ 	Declaration removed.
+ 	* loop-unswitch.c (reversed_condition): Export.
+ 	* loop.c (loop_invariant_p): Use flag_unroll_loops instead
+ 	of flag_old_unroll_loops.
+ 	* opts.c (common_handle_option): Handle -floop-optimize2,
+ 	do not handle -fold-unroll-loops and -fold-unroll-all-loops.
+ 	* params.def (PARAM_MAX_DOLOOP_INSNS): New.
+ 	* rtl.h (get_mode_bounds, doloop_condition_get,
+ 	reversed_condition): Declare.
+ 	* stor-layout.c (get_mode_bounds): New function.
+ 	* toplev.c (flag_old_unroll_loops, flag_old_unroll_all_loops):
+ 	Remove.
+ 	(flag_loop_optimize2): New.
+ 	(rest_of_handle_loop_optimize): Use flag_unroll_loops instead
+ 	of flag_old_unroll_loops.
+ 	(rest_of_handle_loop2): Call doloop_optimize_loops.
+ 	(rest_of_compilation): Use flag_loop_optimize2.
+ 	(process_options): Remove flag_old_unroll_loops handling, add
+ 	flag_loop_optimize2 handling.
+ 	* toplev.h (flag_loop_optimize2): Declare.
+ 	* unroll.c (unroll_loop): Use flag_unroll_all_loops instead of
+ 	flag_old_unroll_all_loops.
+ 
  2004-01-26  Dorit Naishlos <dorit@il.ibm.com>
  
  	* Makefile.in: (tree-vectorizer.o): Remove dependency on real.h.
Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.903.2.158.2.8
diff -c -3 -p -r1.903.2.158.2.8 Makefile.in
*** Makefile.in	26 Jan 2004 17:59:15 -0000	1.903.2.158.2.8
--- Makefile.in	28 Jan 2004 21:06:05 -0000
*************** XCFLAGS =
*** 141,147 ****
  TCFLAGS =
  CFLAGS = -g
  STAGE1_CFLAGS = -g @stage1_cflags@
! BOOT_CFLAGS = -g -O2 
  
  # Flags to determine code coverage. When coverage is disabled, this will
  # contain the optimization flags, as you normally want code coverage
--- 141,147 ----
  TCFLAGS =
  CFLAGS = -g
  STAGE1_CFLAGS = -g @stage1_cflags@
! BOOT_CFLAGS = -g -O2
  
  # Flags to determine code coverage. When coverage is disabled, this will
  # contain the optimization flags, as you normally want code coverage
*************** OBJS-common = \
*** 878,884 ****
   tree-ssa-loop-ivopts.o							   \
   alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o	  	   \
   cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o		   \
!  cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o	   \
   cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o	   \
   dbxout.o debug.o df.o diagnostic.o dojump.o doloop.o dominance.o	   \
   dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o	                   \
--- 878,885 ----
   tree-ssa-loop-ivopts.o							   \
   alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o	  	   \
   cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o		   \
!  cfgloopanal.o cfgloopmanip.o loop-iv.o loop-init.o loop-unswitch.o	   \
!  loop-unroll.o loop-doloop.o						   \
   cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o	   \
   dbxout.o debug.o df.o diagnostic.o dojump.o doloop.o dominance.o	   \
   dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o	                   \
*************** loop.o : loop.c $(CONFIG_H) $(SYSTEM_H) 
*** 1844,1849 ****
--- 1845,1853 ----
  doloop.o : doloop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) flags.h \
     $(LOOP_H) $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) $(TM_P_H) toplev.h \
     cfgloop.h
+ loop-doloop.o : loop-doloop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+    $(RTL_H) flags.h $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) $(TM_P_H) \
+    toplev.h cfgloop.h output.h $(PARAMS_H)
  unroll.o : unroll.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) insn-config.h \
     function.h $(INTEGRATE_H) $(REGS_H) $(RECOG_H) flags.h $(EXPR_H) $(LOOP_H) toplev.h \
     hard-reg-set.h varray.h $(BASIC_BLOCK_H) $(TM_P_H) $(PREDICT_H) $(PARAMS_H) \
*************** cfgcleanup.o : cfgcleanup.c $(CONFIG_H) 
*** 1872,1877 ****
--- 1876,1883 ----
  cfgloop.o : cfgloop.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) coretypes.h $(TM_H) \
     $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h flags.h
  cfgloopanal.o : cfgloopanal.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
+    $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H)
+ loop-iv.o : loop-iv.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(GGC_H) \
     $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H)
  cfgloopmanip.o : cfgloopmanip.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
     $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h output.h coretypes.h $(TM_H)
Index: alias.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/alias.c,v
retrieving revision 1.176.2.17.2.1
diff -c -3 -p -r1.176.2.17.2.1 alias.c
*** alias.c	21 Jan 2004 01:10:03 -0000	1.176.2.17.2.1
--- alias.c	28 Jan 2004 21:06:05 -0000
*************** init_alias_analysis (void)
*** 2733,2739 ****
  
    new_reg_base_value = xmalloc (reg_base_value_size * sizeof (rtx));
    reg_seen = xmalloc (reg_base_value_size);
!   if (! reload_completed && flag_old_unroll_loops)
      {
        /* ??? Why are we realloc'ing if we're just going to zero it?  */
        alias_invariant = xrealloc (alias_invariant,
--- 2733,2739 ----
  
    new_reg_base_value = xmalloc (reg_base_value_size * sizeof (rtx));
    reg_seen = xmalloc (reg_base_value_size);
!   if (! reload_completed && flag_unroll_loops)
      {
        /* ??? Why are we realloc'ing if we're just going to zero it?  */
        alias_invariant = xrealloc (alias_invariant,
Index: cfgloop.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cfgloop.h,v
retrieving revision 1.2.4.9.2.5
diff -c -3 -p -r1.2.4.9.2.5 cfgloop.h
*** cfgloop.h	21 Jan 2004 01:10:10 -0000	1.2.4.9.2.5
--- cfgloop.h	28 Jan 2004 21:06:05 -0000
*************** extern struct loop * find_common_loop (s
*** 281,286 ****
--- 281,287 ----
  struct loop *superloop_at_depth (struct loop *, unsigned);
  extern int num_loop_insns (struct loop *);
  extern int average_num_loop_insns (struct loop *);
+ extern unsigned get_loop_level (const struct loop *);
  
  /* Loops & cfg manipulation.  */
  extern basic_block *get_loop_body (const struct loop *);
*************** extern void unloop (struct loops *, stru
*** 329,334 ****
--- 330,398 ----
  extern bool remove_path (struct loops *, edge);
  extern edge split_loop_bb (basic_block, rtx);
  
+ /* Induction variable analysis.  */
+ 
+ struct rtx_iv
+ {
+   bool analysed;
+ 
+   enum machine_mode mode;
+   rtx base, step;
+ };
+ 
+ /* This should replace struct loop_desc.  We need to handle this in unrolling,
+    so leave it this way for now.  */
+ 
+ struct niter_desc
+ {
+   /* The edge out of the loop.  */
+   edge out_edge;
+ 
+   /* The other edge leading from the condition.  */
+   edge in_edge;
+ 
+   /* True if we are able to say anything about number of iterations of the
+      loop.  */
+   bool simple_p;
+ 
+   /* True if the loop iterates the constant number of times.  */
+   bool const_iter;
+ 
+   /* Number of iterations if constant.  */
+   unsigned HOST_WIDEST_INT niter;
+ 
+   /* Upper bound on the number of iterations.  */
+   unsigned HOST_WIDEST_INT niter_max;
+ 
+   /* Assumptions under that the rest of the information is valid.  */
+   rtx assumptions;
+ 
+   /* Assumptions under that the loop ends before reaching the latch,
+      even if value of niter_expr says otherwise.  */
+ 
+   rtx noloop_assumptions;
+ 
+   /* Condition under that the loop is infinite.  */
+   rtx infinite;
+ 
+   /* Whether the comparison is signed.  */
+   bool signed_p;
+ 
+   /* The mode in that niter_expr should be computed.  */
+   enum machine_mode mode;
+ 
+   /* The number of iterations of the loop.  */
+   rtx niter_expr;
+ };
+ 
+ extern void iv_analysis_loop_init (struct loop *);
+ extern rtx iv_get_reaching_def (rtx, rtx);
+ extern bool iv_analyse (rtx, rtx, struct rtx_iv *);
+ extern void find_simple_exit (struct loop *, struct niter_desc *);
+ extern void iv_number_of_iterations (struct loop *, rtx, rtx,
+ 				     struct niter_desc *);
+ extern void iv_analysis_done (void);
+ 
  /* Loop optimizer initialization.  */
  extern struct loops *loop_optimizer_init (FILE *);
  extern void loop_optimizer_finalize (struct loops *, FILE *);
*************** enum
*** 344,349 ****
--- 408,414 ----
  };
  
  extern void unroll_and_peel_loops (struct loops *, int);
+ extern void doloop_optimize_loops (struct loops *);
  
  static inline struct loop *loop_from_num (struct loops *, unsigned);
  static inline struct loop *outer_loop (struct loop *);
Index: cfgloopanal.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cfgloopanal.c,v
retrieving revision 1.2.4.9.2.3
diff -c -3 -p -r1.2.4.9.2.3 cfgloopanal.c
*** cfgloopanal.c	22 Jan 2004 01:10:45 -0000	1.2.4.9.2.3
--- cfgloopanal.c	28 Jan 2004 21:06:06 -0000
*************** expected_loop_iterations (const struct l
*** 1519,1521 ****
--- 1519,1538 ----
        return (freq_latch + freq_in - 1) / freq_in;
      }
  }
+ 
+ /* Returns the maximum level of nesting of subloops of LOOP.  */
+ 
+ unsigned
+ get_loop_level (const struct loop *loop)
+ {
+   const struct loop *ploop;
+   unsigned mx = 0, l;
+ 
+   for (ploop = loop->inner; ploop; ploop = ploop->next)
+     {
+       l = get_loop_level (ploop);
+       if (l >= mx)
+ 	mx = l + 1;
+     }
+   return mx;
+ }
Index: common.opt
===================================================================
RCS file: /cvs/gcc/gcc/gcc/common.opt,v
retrieving revision 1.14.2.13.2.4
diff -c -3 -p -r1.14.2.13.2.4 common.opt
*** common.opt	21 Jan 2004 01:10:12 -0000	1.14.2.13.2.4
--- common.opt	28 Jan 2004 21:06:06 -0000
*************** floop-optimize
*** 433,438 ****
--- 433,442 ----
  Common
  Perform loop optimizations
  
+ floop-optimize2
+ Common
+ Perform loop optimizations, new passes
+ 
  fmath-errno
  Common
  Set errno after built-in math functions
*************** Use graph-coloring register allocation
*** 468,481 ****
  fnon-call-exceptions
  Common
  Support synchronous non-call exceptions
- 
- fold-unroll-loops
- Common
- Perform loop unrolling when iteration count is known
- 
- fold-unroll-all-loops
- Common
- Perform loop unrolling for all loops
  
  fomit-frame-pointer
  Common
--- 472,477 ----
Index: doloop.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/doloop.c,v
retrieving revision 1.19.2.9.4.2
diff -c -3 -p -r1.19.2.9.4.2 doloop.c
*** doloop.c	24 Jan 2004 23:59:29 -0000	1.19.2.9.4.2
--- doloop.c	28 Jan 2004 21:06:06 -0000
*************** Software Foundation, 59 Temple Place - S
*** 60,66 ****
  
  #ifdef HAVE_doloop_end
  
- static rtx doloop_condition_get (rtx);
  static unsigned HOST_WIDE_INT doloop_iterations_max (const struct loop_info *,
  						     enum machine_mode, int);
  static int doloop_valid_p (const struct loop *, rtx);
--- 60,65 ----
*************** static int doloop_modify_runtime (const 
*** 71,77 ****
  
  /* Return the loop termination condition for PATTERN or zero
     if it is not a decrement and branch jump insn.  */
! static rtx
  doloop_condition_get (rtx pattern)
  {
    rtx cmp;
--- 70,77 ----
  
  /* Return the loop termination condition for PATTERN or zero
     if it is not a decrement and branch jump insn.  */
! 
! rtx
  doloop_condition_get (rtx pattern)
  {
    rtx cmp;
Index: flags.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/flags.h,v
retrieving revision 1.86.2.41.2.3
diff -c -3 -p -r1.86.2.41.2.3 flags.h
*** flags.h	21 Jan 2004 01:10:28 -0000	1.86.2.41.2.3
--- flags.h	28 Jan 2004 21:06:06 -0000
*************** extern int flag_float_store;
*** 277,294 ****
  
  extern int flag_strength_reduce;
  
- /* Nonzero enables loop unrolling in unroll.c.  Only loops for which the
-    number of iterations can be calculated at compile-time (UNROLL_COMPLETELY,
-    UNROLL_MODULO) or at run-time (preconditioned to be UNROLL_MODULO) are
-    unrolled.  */
- 
- extern int flag_old_unroll_loops;
- 
- /* Nonzero enables loop unrolling in unroll.c.  All loops are unrolled.
-    This is generally not a win.  */
- 
- extern int flag_old_unroll_all_loops;
- 
  /* Nonzero forces all invariant computations in loops to be moved
     outside the loop.  */
  
--- 277,282 ----
Index: loop-doloop.c
===================================================================
RCS file: loop-doloop.c
diff -N loop-doloop.c
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- loop-doloop.c	28 Jan 2004 21:06:06 -0000
***************
*** 0 ****
--- 1,493 ----
+ /* Perform doloop optimizations
+    Copyright (C) 2004 Free Software Foundation, Inc.
+    Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
+ 
+ This file is part of GCC.
+ 
+ GCC is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2, or (at your option) any later
+ version.
+ 
+ GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING.  If not, write to the Free
+ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA.  */
+ 
+ #include "config.h"
+ #include "system.h"
+ #include "coretypes.h"
+ #include "tm.h"
+ #include "rtl.h"
+ #include "flags.h"
+ #include "expr.h"
+ #include "hard-reg-set.h"
+ #include "basic-block.h"
+ #include "toplev.h"
+ #include "tm_p.h"
+ #include "cfgloop.h"
+ #include "output.h"
+ #include "params.h"
+ 
+ /* This module is used to modify loops with a determinable number of
+    iterations to use special low-overhead looping instructions.
+ 
+    It first validates whether the loop is well behaved and has a
+    determinable number of iterations (either at compile or run-time).
+    It then modifies the loop to use a low-overhead looping pattern as
+    follows:
+ 
+    1. A pseudo register is allocated as the loop iteration counter.
+ 
+    2. The number of loop iterations is calculated and is stored
+       in the loop counter.
+ 
+    3. At the end of the loop, the jump insn is replaced by the
+       doloop_end pattern.  The compare must remain because it might be
+       used elsewhere.  If the loop-variable or condition register are
+       used elsewhere, they will be eliminated by flow.
+ 
+    4. An optional doloop_begin pattern is inserted at the top of the
+       loop.
+ 
+    TODO The optimization should only performed when either the biv used for exit
+    condition is unused at all except for the exit test, or if we do not have to
+    change its value, since otherwise we have to add a new induction variable,
+    which usually will not pay up (unless the cost of the doloop pattern is
+    somehow extremely lower than the cost of compare & jump, or unless the bct
+    register cannot be used for anything else but doloop -- ??? detect these
+    cases).  */
+ 
+ #ifdef HAVE_doloop_end
+ 
+ /* Return nonzero if the loop specified by LOOP is suitable for
+    the use of special low-overhead looping instructions.  DESC
+    describes the number of iterations of the loop.  */
+ 
+ static bool
+ doloop_valid_p (struct loop *loop, struct niter_desc *desc)
+ {
+   basic_block *body = get_loop_body (loop), bb;
+   rtx insn;
+   unsigned i;
+ 
+   /* Check for loops that may not terminate under special conditions.  */
+   if (!desc->simple_p
+       || desc->assumptions
+       || desc->infinite != const0_rtx)
+     {
+       /* There are some cases that would require a special attention.
+ 	 For example if the comparison is LEU and the comparison value
+ 	 is UINT_MAX then the loop will not terminate.  Similarly, if the
+ 	 comparison code is GEU and the comparison value is 0, the
+ 	 loop will not terminate.
+ 
+ 	 If the absolute increment is not 1, the loop can be infinite
+ 	 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
+ 
+ 	 Note that with LE and GE, the loop behavior is undefined
+ 	 (C++ standard section 5 clause 5) if an overflow occurs, say
+ 	 between INT_MAX and INT_MAX + 1.  We thus don't have to worry
+ 	 about these two cases.
+ 
+ 	 ??? We could compute these conditions at run-time and have a
+ 	 additional jump around the loop to ensure an infinite loop.
+ 	 However, it is very unlikely that this is the intended
+ 	 behavior of the loop and checking for these rare boundary
+ 	 conditions would pessimize all other code.
+ 
+ 	 If the loop is executed only a few times an extra check to
+ 	 restart the loop could use up most of the benefits of using a
+ 	 count register loop.  Note however, that normally, this
+ 	 restart branch would never execute, so it could be predicted
+ 	 well by the CPU.  We should generate the pessimistic code by
+ 	 default, and have an option, e.g. -funsafe-loops that would
+ 	 enable count-register loops in this case.  */
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file,
+ 		 "Doloop: Possible infinite iteration case.\n");
+       return false;
+     }
+ 
+   for (i = 0; i < loop->num_nodes; i++)
+     {
+       bb = body[i];
+ 
+       for (insn = BB_HEAD (bb);
+ 	   insn != NEXT_INSN (BB_END (bb));
+ 	   insn = NEXT_INSN (insn))
+ 	{
+ 	  /* A called function may clobber any special registers required for
+ 	     low-overhead looping.  */
+ 	  if (GET_CODE (insn) == CALL_INSN)
+ 	    {
+ 	      if (rtl_dump_file)
+ 		fprintf (rtl_dump_file,
+ 			 "Doloop: Function call in loop.\n");
+ 	      return false;
+ 	    }
+ 
+ 	  /* Some targets (eg, PPC) use the count register for branch on table
+ 	     instructions.  ??? This should be a target specific check.  */
+ 	  if (GET_CODE (insn) == JUMP_INSN
+ 	      && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
+ 		  || GET_CODE (PATTERN (insn)) == ADDR_VEC))
+ 	    {
+ 	      if (rtl_dump_file)
+ 		fprintf (rtl_dump_file,
+ 			 "Doloop: Computed branch in the loop.\n");
+ 	      return false;
+ 	    }
+ 	}
+     }
+   free (body);
+ 
+   return true;
+ }
+ 
+ /* Adds test of COND jumping to DEST to the end of BB.  */
+ 
+ static void
+ add_test (rtx cond, basic_block bb, basic_block dest)
+ {
+   rtx seq, jump, label;
+   enum machine_mode mode;
+   rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
+   enum rtx_code code = GET_CODE (cond);
+ 
+   mode = GET_MODE (XEXP (cond, 0));
+   if (mode == VOIDmode)
+     mode = GET_MODE (XEXP (cond, 1));
+ 
+   start_sequence ();
+   op0 = force_operand (op0, NULL_RTX);
+   op1 = force_operand (op1, NULL_RTX);
+   label = block_label (dest);
+   do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL_RTX, label);
+ 
+   jump = get_last_insn ();
+   JUMP_LABEL (jump) = label;
+ 
+   /* The jump is supposed to handle an unlikely special case.  */
+   REG_NOTES (jump)
+ 	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
+ 			       GEN_INT (0), REG_NOTES (jump));
+ 
+   LABEL_NUSES (label)++;
+ 
+   seq = get_insns ();
+   end_sequence ();
+   emit_insn_after (seq, BB_END (bb));
+ }
+ 
+ /* Modify the loop to use the low-overhead looping insn where LOOP
+    describes the loop, DESC describes the number of iterations of the
+    loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
+    end of the loop.  CONDITION is the condition separated from the
+    DOLOOP_SEQ.  */
+ 
+ static void
+ doloop_modify (struct loop *loop, struct niter_desc *desc,
+ 	       rtx doloop_seq, rtx condition)
+ {
+   rtx counter_reg;
+   rtx count, tmp, noloop = NULL_RTX;
+   rtx sequence;
+   rtx jump_insn;
+   rtx jump_label;
+   int nonneg = 0, irr;
+   bool increment_count;
+   basic_block loop_end = desc->out_edge->src;
+ 
+   jump_insn = BB_END (loop_end);
+ 
+   if (rtl_dump_file)
+     {
+       fprintf (rtl_dump_file, "Doloop: Inserting doloop pattern (");
+       if (desc->const_iter)
+ 	fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
+       else
+ 	fputs ("runtime", rtl_dump_file);
+       fputs (" iterations).\n", rtl_dump_file);
+     }
+ 
+   /* Discard original jump to continue loop.  The original compare
+      result may still be live, so it cannot be discarded explicitly.  */
+   delete_insn (jump_insn);
+ 
+   counter_reg = XEXP (condition, 0);
+   if (GET_CODE (counter_reg) == PLUS)
+     counter_reg = XEXP (counter_reg, 0);
+ 
+   count = desc->niter_expr;
+   increment_count = false;
+   switch (GET_CODE (condition))
+     {
+     case NE:
+       /* Currently only NE tests against zero and one are supported.  */
+       if (XEXP (condition, 1) == const1_rtx)
+ 	{
+ 	  increment_count = true;
+ 	  noloop = const1_rtx;
+ 	}
+       else if (XEXP (condition, 1) == const0_rtx)
+        	noloop = const0_rtx;
+       else
+ 	abort ();
+       break;
+ 
+     case GE:
+       /* Currently only GE tests against zero are supported.  */
+       if (XEXP (condition, 1) != const0_rtx)
+ 	abort ();
+ 
+       noloop = constm1_rtx;
+ 
+       /* The iteration count does not need incrementing for a GE test.  */
+       increment_count = false;
+ 
+       /* Determine if the iteration counter will be non-negative.
+ 	 Note that the maximum value loaded is iterations_max - 1.  */
+       if (desc->niter_max
+ 	  <= ((unsigned HOST_WIDEST_INT) 1
+ 	      << (GET_MODE_BITSIZE (GET_MODE (counter_reg)) - 1)))
+ 	nonneg = 1;
+       break;
+ 
+       /* Abort if an invalid doloop pattern has been generated.  */
+     default:
+       abort ();
+     }
+ 
+   if (increment_count)
+     count = simplify_gen_binary (PLUS, desc->mode, count, const1_rtx);
+ 
+   /* Insert initialization of the count register into the loop header.  */
+   start_sequence ();
+   tmp = force_operand (count, counter_reg);
+   convert_move (counter_reg, tmp, 1);
+   sequence = get_insns ();
+   end_sequence ();
+   emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
+ 
+   if (desc->noloop_assumptions)
+     {
+       rtx ass = desc->noloop_assumptions;
+       basic_block preheader = loop_preheader_edge (loop)->src;
+       basic_block set_zero
+ 	      = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
+       basic_block new_preheader
+ 	      = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
+       basic_block bb;
+       edge te;
+       gcov_type cnt;
+ 
+       /* Expand the condition testing the assumptions and if it does not pass,
+ 	 reset the count register to 0.  */
+       add_test (XEXP (ass, 0), preheader, set_zero);
+       preheader->succ->flags &= ~EDGE_FALLTHRU;
+       cnt = preheader->succ->count;
+       preheader->succ->probability = 0;
+       preheader->succ->count = 0;
+       irr = preheader->succ->flags & EDGE_IRREDUCIBLE_LOOP;
+       te = make_edge (preheader, new_preheader, EDGE_FALLTHRU | irr);
+       te->probability = REG_BR_PROB_BASE;
+       te->count = cnt;
+       set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
+ 
+       set_zero->count = 0;
+       set_zero->frequency = 0;
+ 
+       for (ass = XEXP (ass, 1); ass; ass = XEXP (ass, 1))
+ 	{
+ 	  bb = loop_split_edge_with (te, NULL_RTX);
+ 	  te = bb->succ;
+ 	  add_test (XEXP (ass, 0), bb, set_zero);
+ 	  make_edge (bb, set_zero, irr);
+ 	}
+   
+       start_sequence ();
+       convert_move (counter_reg, noloop, 0);
+       sequence = get_insns ();
+       end_sequence ();
+       emit_insn_after (sequence, BB_END (set_zero));
+     }
+ 
+   /* Some targets (eg, C4x) need to initialize special looping
+      registers.  */
+ #ifdef HAVE_doloop_begin
+   {
+     rtx init;
+     unsigned level = get_loop_level (loop) + 1;
+     init = gen_doloop_begin (counter_reg,
+ 			     desc->const_iter ? desc->niter_expr : const0_rtx,
+ 			     desc->niter_max,
+ 			     GEN_INT (level));
+     if (init)
+       {
+ 	start_sequence ();
+ 	emit_insn (init);
+ 	sequence = get_insns ();
+ 	end_sequence ();
+ 	emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
+       }
+   }
+ #endif
+ 
+   /* Insert the new low-overhead looping insn.  */
+   emit_jump_insn_after (doloop_seq, BB_END (loop_end));
+   jump_insn = BB_END (loop_end);
+   jump_label = block_label (desc->in_edge->dest);
+   JUMP_LABEL (jump_insn) = jump_label;
+   LABEL_NUSES (jump_label)++;
+ 
+   /* Ensure the right fallthru edge is marked, for case we have reversed
+      the condition.  */
+   desc->in_edge->flags &= ~EDGE_FALLTHRU;
+   desc->out_edge->flags |= EDGE_FALLTHRU;
+ 
+   /* Add a REG_NONNEG note if the actual or estimated maximum number
+      of iterations is non-negative.  */
+   if (nonneg)
+     {
+       REG_NOTES (jump_insn)
+ 	= gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn));
+     }
+ }
+ 
+ /* Process loop described by LOOP validating that the loop is suitable for
+    conversion to use a low overhead looping instruction, replacing the jump
+    insn where suitable.  Returns true if the loop was successfully
+    modified.  */
+ 
+ static bool
+ doloop_optimize (struct loop *loop)
+ {
+   enum machine_mode mode;
+   rtx doloop_seq, doloop_pat, doloop_reg;
+   rtx iterations;
+   rtx iterations_max;
+   rtx start_label;
+   rtx condition;
+   unsigned level;
+   struct niter_desc desc;
+ 
+   if (rtl_dump_file)
+     fprintf (rtl_dump_file, "Doloop: Processing loop %d.\n", loop->num);
+ 
+   /* Ignore large loops.  */
+   if (loop->ninsns > (unsigned) PARAM_VALUE (PARAM_MAX_DOLOOP_INSNS))
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file,
+ 		 "Doloop: The loop is too large.\n");
+       return false;
+     }
+ 
+   iv_analysis_loop_init (loop);
+ 
+   /* Find the simple exit of a LOOP.  */
+   find_simple_exit (loop, &desc);
+ 
+   /* Check that loop is a candidate for a low-overhead looping insn.  */
+   if (!doloop_valid_p (loop, &desc))
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file,
+ 		 "Doloop: The loop is not suitable.\n");
+       return false;
+     }
+   mode = desc.mode;
+ 
+   if (desc.const_iter && desc.niter < 3)
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file,
+ 		 "Doloop: Too few iterations (%ld) to be profitable.\n",
+ 		 (long int) desc.niter);
+       return false;
+     }
+ 
+   iterations = desc.const_iter ? desc.niter_expr : const0_rtx;
+   iterations_max = GEN_INT (desc.niter_max);
+   level = get_loop_level (loop) + 1;
+ 
+   /* Generate looping insn.  If the pattern FAILs then give up trying
+      to modify the loop since there is some aspect the back-end does
+      not like.  */
+   start_label = block_label (desc.in_edge->dest);
+   doloop_reg = gen_reg_rtx (mode);
+   doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
+ 			       GEN_INT (level), start_label);
+   if (! doloop_seq && mode != word_mode)
+     {
+       PUT_MODE (doloop_reg, word_mode);
+       doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
+ 				   GEN_INT (level), start_label);
+     }
+   if (! doloop_seq)
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file,
+ 		 "Doloop: Target unwilling to use doloop pattern!\n");
+       return false;
+     }
+ 
+   /* If multiple instructions were created, the last must be the
+      jump instruction.  Also, a raw define_insn may yield a plain
+      pattern.  */
+   doloop_pat = doloop_seq;
+   if (INSN_P (doloop_pat))
+     {
+       while (NEXT_INSN (doloop_pat) != NULL_RTX)
+ 	doloop_pat = NEXT_INSN (doloop_pat);
+       if (GET_CODE (doloop_pat) == JUMP_INSN)
+ 	doloop_pat = PATTERN (doloop_pat);
+       else
+ 	doloop_pat = NULL_RTX;
+     }
+ 
+   if (! doloop_pat
+       || ! (condition = doloop_condition_get (doloop_pat)))
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file,
+ 		 "Doloop: Unrecognizable doloop pattern!\n");
+       return false;
+     }
+ 
+   doloop_modify (loop, &desc, doloop_seq, condition);
+   return true;
+ }
+ 
+ /* This is the main entry point.  Process all LOOPS using doloop_optimize.  */
+ 
+ void
+ doloop_optimize_loops (struct loops *loops)
+ {
+   unsigned i;
+   struct loop *loop;
+ 
+   for (i = 1; i < loops->num; i++)
+     {
+       loop = loops->parray[i];
+       if (!loop)
+ 	continue;
+ 
+       doloop_optimize (loop);
+     }
+ 
+   iv_analysis_done ();
+ 
+ #ifdef ENABLE_CHECKING
+   verify_dominators (CDI_DOMINATORS);
+   verify_loop_structure (loops);
+ #endif
+ }
+ #endif /* HAVE_doloop_end */
+ 
Index: loop-iv.c
===================================================================
RCS file: loop-iv.c
diff -N loop-iv.c
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- loop-iv.c	28 Jan 2004 21:06:06 -0000
***************
*** 0 ****
--- 1,1765 ----
+ /* Rtl-level induction variable analysis.
+    Copyright (C) 2004 Free Software Foundation, Inc.
+    
+ This file is part of GCC.
+    
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+    
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ for more details.
+    
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING.  If not, write to the Free
+ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA.  */
+ 
+ /* This is just a very simplistic analysis of induction variables of the loop.
+    The major use is for determining the number of iterations of a loop for
+    loop unrolling, doloop optimization and branch prediction.  For this we
+    are only interested in bivs and a fairly limited set of givs that are
+    needed in the exit condition.  We also only compute the iv information on
+    demand.
+ 
+    The interesting registers are determined.  A register is interesting if
+ 
+    -- it is set only in the blocks that dominate the latch of the current loop
+    -- all its sets are simple -- i.e. in the form we understand
+ 
+    We also number the insns sequentially in each basic block.  For a use of the
+    interesting reg, it is now easy to find a reaching definition (there may be
+    only one).
+ 
+    Induction variable is then simply analysed by walking the use-def
+    chains.
+    
+    Usage:
+ 
+    iv_analysis_loop_init (loop);
+    insn = iv_get_reaching_def (where, reg);
+    if (iv_analyse (insn, reg, &iv))
+      {
+        ...
+      }
+    iv_analysis_done (); */
+ 
+ #include "config.h"
+ #include "system.h"
+ #include "coretypes.h"
+ #include "tm.h"
+ #include "rtl.h"
+ #include "hard-reg-set.h"
+ #include "basic-block.h"
+ #include "cfgloop.h"
+ #include "expr.h"
+ #include "output.h"
+ 
+ /* The insn information.  */
+ 
+ struct insn_info
+ {
+   /* Id of the insn.  */
+   unsigned luid;
+ 
+   /* The previous definition of the register defined by the single
+      set in the insn.  */
+   rtx prev_def;
+ 
+   /* The description of the iv.  */
+   struct rtx_iv iv;
+ };
+ 
+ static struct insn_info *insn_info;
+ 
+ /* The last definition of register.  */
+ 
+ static rtx *last_def;
+ 
+ /* The bivs.  */
+ 
+ static struct rtx_iv *bivs;
+ 
+ /* Maximal insn number for that there is place in insn_info array.  */
+ 
+ static unsigned max_insn_no;
+ 
+ /* Maximal register number for that there is place in bivs and last_def
+    arrays.  */
+ 
+ static unsigned max_reg_no;
+ 
+ /* Dumps information about IV to FILE.  */
+ 
+ extern void dump_iv_info (FILE *, struct rtx_iv *);
+ void
+ dump_iv_info (FILE *file, struct rtx_iv *iv)
+ {
+   if (!iv->base)
+     {
+       fprintf (file, "not simple");
+       return;
+     }
+ 
+   if (iv->step == const0_rtx)
+     {
+       fprintf (file, "invariant ");
+       print_rtl (file, iv->base);
+       return;
+     }
+ 
+   print_rtl (file, iv->base);
+   fprintf (file, " + ");
+   print_rtl (file, iv->step);
+   fprintf (file, " * iteration");
+ }
+ 
+ /* Assigns luids to insns in basic block BB.  */
+ 
+ static void
+ assign_luids (basic_block bb)
+ {
+   unsigned i, uid;
+   rtx insn;
+ 
+   for (i = 0, insn = BB_HEAD (bb);
+        insn != NEXT_INSN (BB_END (bb));
+        insn = NEXT_INSN (insn), i++)
+     {
+       uid = INSN_UID (insn);
+       insn_info[uid].luid = i;
+       insn_info[uid].prev_def = NULL_RTX;
+       insn_info[uid].iv.analysed = false;
+     }
+ }
+ 
+ /* Checks whether REG is a well-behaved register.  */
+ 
+ static bool
+ simple_reg_p (rtx reg)
+ {
+   unsigned r;
+ 
+   if (!REG_P (reg))
+     return false;
+ 
+   r = REGNO (reg);
+   if (HARD_REGISTER_NUM_P (r))
+     return false;
+ 
+   if (GET_MODE_CLASS (GET_MODE (reg)) != MODE_INT)
+     return false;
+ 
+   if (last_def[r] == const0_rtx)
+     return false;
+ 
+   return true;
+ }
+ 
+ /* Checks whether assignment LHS = RHS is simple enough for us to process.  */
+ 
+ static bool
+ simple_set_p (rtx lhs, rtx rhs)
+ {
+   rtx op0, op1;
+ 
+   if (!simple_reg_p (lhs))
+     return false;
+ 
+   if (CONSTANT_P (rhs))
+     return true;
+ 
+   switch (GET_CODE (rhs))
+     {
+     case REG:
+       return simple_reg_p (rhs);
+ 
+     case NEG:
+       return simple_reg_p (XEXP (rhs, 0));
+ 
+     case PLUS:
+     case MINUS:
+     case MULT:
+       op0 = XEXP (rhs, 0);
+       op1 = XEXP (rhs, 1);
+ 
+       if (!simple_reg_p (op0)
+ 	  && !CONSTANT_P (op0))
+ 	return false;
+ 
+       if (!simple_reg_p (op1)
+ 	  && !CONSTANT_P (op1))
+ 	return false;
+ 
+       if (GET_CODE (rhs) == MULT
+ 	  && !CONSTANT_P (op0)
+ 	  && !CONSTANT_P (op1))
+ 	return false;
+ 
+       return true;
+ 
+     default:
+       return false;
+     }
+ }
+ 
+ /* Mark single SET in INSN.  */
+ 
+ static rtx
+ mark_single_set (rtx insn, rtx set)
+ {
+   rtx def = SET_DEST (set), src;
+   unsigned regno, uid;
+ 
+   src = find_reg_equal_equiv_note (insn);
+   if (!src)
+     src = SET_SRC (set);
+ 
+   if (!simple_set_p (SET_DEST (set), src))
+     return NULL_RTX;
+ 
+   regno = REGNO (def);
+   uid = INSN_UID (insn);
+ 
+   bivs[regno].analysed = false;
+   insn_info[uid].prev_def = last_def[regno];
+   last_def[regno] = insn;
+ 
+   return def;
+ }
+ 
+ /* Marks set register REG as wrong, unless it is equal to EXCEPT.  */
+ 
+ static void
+ kill_sets (rtx reg, rtx by ATTRIBUTE_UNUSED, void *except)
+ {
+   if (GET_CODE (reg) == SUBREG)
+     reg = SUBREG_REG (reg);
+   if (!REG_P (reg))
+     return;
+   if (reg == except)
+     return;
+ 
+   last_def[REGNO (reg)] = const0_rtx;
+ }
+ 
+ /* Marks sets in basic block BB.  If DOM is true, BB dominates the loop
+    latch.  */
+ 
+ static void
+ mark_sets (basic_block bb, bool dom)
+ {
+   rtx insn, set, def;
+ 
+   for (insn = BB_HEAD (bb);
+        insn != NEXT_INSN (BB_END (bb));
+        insn = NEXT_INSN (insn))
+     {
+       if (!INSN_P (insn))
+ 	continue;
+ 
+       if (dom
+ 	  && (set = single_set (insn)))
+ 	def = mark_single_set (insn, set);
+       else
+ 	def = NULL_RTX;
+ 
+       note_stores (PATTERN (insn), kill_sets, def);
+     }
+ }
+ 
+ /* Prepare the data for an induction variable analysis of a LOOP.  */
+ 
+ void
+ iv_analysis_loop_init (struct loop *loop)
+ {
+   basic_block *body = get_loop_body_in_dom_order (loop);
+   unsigned b;
+ 
+   if ((unsigned) get_max_uid () >= max_insn_no)
+     {
+       /* Add some reserve for insns and registers produced in optimizations.  */
+       max_insn_no = get_max_uid () + 100;
+       if (insn_info)
+ 	free (insn_info);
+       insn_info = xmalloc (max_insn_no * sizeof (struct insn_info));
+     }
+ 
+   if ((unsigned) max_reg_num () >= max_reg_no)
+     {
+       max_reg_no = max_reg_num () + 100;
+       if (last_def)
+ 	free (last_def);
+       last_def = xmalloc (max_reg_no * sizeof (rtx));
+       if (bivs)
+ 	free (bivs);
+       bivs = xmalloc (max_reg_no * sizeof (struct rtx_iv));
+     }
+ 
+   memset (last_def, 0, max_reg_num () * sizeof (rtx));
+ 
+   for (b = 0; b < loop->num_nodes; b++)
+     {
+       assign_luids (body[b]);
+       mark_sets (body[b], dominated_by_p (CDI_DOMINATORS, loop->latch,
+ 					  body[b]));
+     }
+ 
+   free (body);
+ }
+ 
+ /* Gets definition of REG reaching the INSN.  If REG is not simple, const0_rtx
+    is returned.  If INSN is before the first def in the loop, NULL_RTX is
+    returned.  */
+ 
+ rtx
+ iv_get_reaching_def (rtx insn, rtx reg)
+ {
+   unsigned regno, luid, auid;
+   rtx ainsn;
+   basic_block bb, abb;
+ 
+   if (!REG_P (reg))
+     return NULL_RTX;
+ 
+   regno = REGNO (reg);
+   if (!last_def[regno]
+       || last_def[regno] == const0_rtx)
+     return last_def[regno];
+ 
+   bb = BLOCK_FOR_INSN (insn);
+   luid = insn_info[INSN_UID (insn)].luid;
+ 
+   ainsn = last_def[regno];
+   while (1)
+     {
+       abb = BLOCK_FOR_INSN (ainsn);
+ 
+       if (dominated_by_p (CDI_DOMINATORS, bb, abb))
+ 	break;
+ 
+       auid = INSN_UID (ainsn);
+       ainsn = insn_info[auid].prev_def;
+ 
+       if (!ainsn)
+ 	return NULL_RTX;
+     }
+ 
+   while (1)
+     {
+       abb = BLOCK_FOR_INSN (ainsn);
+       if (abb != bb)
+ 	return ainsn;
+ 
+       auid = INSN_UID (ainsn);
+       if (luid > insn_info[auid].luid)
+ 	return ainsn;
+ 
+       ainsn = insn_info[auid].prev_def;
+       if (!ainsn)
+ 	return NULL_RTX;
+     }
+ }
+ 
+ /* Determines whether DEF is a biv and if so, stores its description
+    to *IV.  */
+ 
+ static bool
+ iv_analyse_biv (rtx def, struct rtx_iv *iv)
+ {
+   unsigned regno;
+   rtx insn, set, rhs, adef, op0, op1, tmp;
+   enum rtx_code code;
+ 
+   if (rtl_dump_file)
+     {
+       fprintf (rtl_dump_file, "Analysing ");
+       print_rtl (rtl_dump_file, def);
+       fprintf (rtl_dump_file, " for bivness.\n");
+     }
+     
+   if (!REG_P (def))
+     {
+       if (!CONSTANT_P (def))
+ 	return false;
+ 
+       iv->analysed = true;
+       iv->mode = VOIDmode;
+       iv->base = def;
+       iv->step = const0_rtx;
+       return true;
+     }
+ 
+   regno = REGNO (def);
+   if (last_def[regno] == const0_rtx)
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, "  not simple.\n");
+       return false;
+     }
+ 
+   if (last_def[regno] && bivs[regno].analysed)
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, "  already analysed.\n");
+ 
+       *iv = bivs[regno];
+       return iv->base != NULL_RTX;
+     }
+ 
+   iv->analysed = true;
+   iv->mode = GET_MODE (def);
+   iv->base = def;
+   iv->step = const0_rtx;
+ 
+   if (!last_def[regno])
+     goto end;
+ 
+   insn = last_def[regno];
+   adef = def;
+   while (insn)
+     {
+       set = single_set (insn);
+       rhs = find_reg_equal_equiv_note (insn);
+       if (!rhs)
+ 	rhs = SET_SRC (set);
+ 
+       code = GET_CODE (rhs);
+       switch (code)
+ 	{
+ 	case REG:
+ 	  adef = rhs;
+ 	  break;
+ 
+ 	case PLUS:
+ 	case MINUS:
+ 	  op0 = XEXP (rhs, 0);
+ 	  op1 = XEXP (rhs, 1);
+ 
+ 	  if (code == PLUS && CONSTANT_P (op0))
+ 	    {
+ 	      tmp = op0; op0 = op1; op1 = tmp;
+ 	    }
+ 
+ 	  if (!simple_reg_p (op0)
+ 	      || !CONSTANT_P (op1))
+ 	    {
+ 	      iv->base = NULL_RTX;
+ 	      goto end;
+ 	    }
+ 
+ 	  iv->step = simplify_gen_binary (code, GET_MODE (rhs), iv->step, op1);
+ 	  adef = op0;
+ 	  break;
+ 
+ 	default:
+ 	  iv->base = NULL_RTX;
+ 	  goto end;
+ 	}
+ 
+       insn = iv_get_reaching_def (insn, adef);
+       if (insn == const0_rtx)
+ 	{
+ 	  iv->base = NULL_RTX;
+ 	  goto end;
+ 	}
+     }
+ 
+   if (adef != def)
+     iv->base = NULL_RTX;
+ 
+ end:
+   if (iv->base)
+ 
+   if (rtl_dump_file)
+     {
+       fprintf (rtl_dump_file, "  ");
+       dump_iv_info (rtl_dump_file, iv);
+       fprintf (rtl_dump_file, "\n");
+     }
+ 
+   bivs[regno] = *iv;
+ 
+   return iv->base != NULL_RTX;
+ }
+ 
+ /* Analyses operand OP of INSN and stores the result to *IV.  */
+ 
+ static bool
+ iv_analyse_op (rtx insn, rtx op, struct rtx_iv *iv)
+ {
+   rtx def_insn;
+   unsigned regno;
+   bool inv = CONSTANT_P (op);
+ 
+   if (rtl_dump_file)
+     {
+       fprintf (rtl_dump_file, "Analysing operand ");
+       print_rtl (rtl_dump_file, op);
+       fprintf (rtl_dump_file, " of insn ");
+       print_rtl_single (rtl_dump_file, insn);
+     }
+ 
+   if (!inv)
+     {
+       regno = REGNO (op);
+       if (!last_def[regno])
+ 	inv = true;
+       else if (last_def[regno] == const0_rtx)
+ 	{
+ 	  if (rtl_dump_file)
+ 	    fprintf (rtl_dump_file, "  not simple.\n");
+ 	  return false;
+ 	}
+     }
+ 
+   if (inv)
+     {
+       iv->mode = GET_MODE (op);
+       iv->base = op;
+       iv->step = const0_rtx;
+ 	  
+       if (rtl_dump_file)
+ 	{
+ 	  fprintf (rtl_dump_file, "  ");
+ 	  dump_iv_info (rtl_dump_file, iv);
+ 	  fprintf (rtl_dump_file, "\n");
+ 	}
+       return true;
+     }
+ 
+   def_insn = iv_get_reaching_def (insn, op);
+   if (def_insn == const0_rtx)
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, "  not simple.\n");
+       return false;
+     }
+ 
+   return iv_analyse (def_insn, op, iv);
+ }
+ 
+ /* Analyses iv DEF defined in INSN and stores the result to *IV.  */
+ 
+ bool
+ iv_analyse (rtx insn, rtx def, struct rtx_iv *iv)
+ {
+   unsigned uid;
+   rtx set, rhs;
+   rtx op0 = NULL_RTX, op1 = NULL_RTX;
+   struct rtx_iv iv0, iv1;
+   enum machine_mode amode;
+   enum rtx_code code;
+ 
+   if (insn == const0_rtx)
+     return false;
+ 
+   if (!insn)
+     return iv_analyse_biv (def, iv);
+ 
+   if (rtl_dump_file)
+     {
+       fprintf (rtl_dump_file, "Analysing def of ");
+       print_rtl (rtl_dump_file, def);
+       fprintf (rtl_dump_file, " in insn ");
+       print_rtl_single (rtl_dump_file, insn);
+     }
+ 
+   uid = INSN_UID (insn);
+   if (insn_info[uid].iv.analysed)
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, "  already analysed.\n");
+       *iv = insn_info[uid].iv;
+       return iv->base != NULL_RTX;
+     }
+ 
+   iv->mode = VOIDmode;
+   iv->base = NULL_RTX;
+   iv->step = NULL_RTX;
+ 
+   set = single_set (insn);
+   rhs = find_reg_equal_equiv_note (insn);
+   if (!rhs)
+     rhs = SET_SRC (set);
+   code = GET_CODE (rhs);
+ 
+   if (CONSTANT_P (rhs))
+     {
+       op0 = rhs;
+       amode = GET_MODE (def);
+     }
+   else
+     {
+       switch (code)
+ 	{
+ 	case REG:
+ 	  op0 = rhs;
+ 	  break;
+ 
+ 	case NEG:
+ 	  op0 = XEXP (rhs, 0);
+ 	  break;
+ 
+ 	case PLUS:
+ 	case MINUS:
+ 	case MULT:
+ 	  op0 = XEXP (rhs, 0);
+ 	  op1 = XEXP (rhs, 1);
+ 	  break;
+ 
+ 	default:
+ 	  abort ();
+ 	}
+ 
+       amode = GET_MODE (rhs);
+     }
+ 
+   if (op0)
+     {
+       if (!iv_analyse_op (insn, op0, &iv0))
+ 	goto end;
+ 	
+       if (iv0.mode == VOIDmode)
+ 	iv0.mode = amode;
+     }
+ 
+   if (op1)
+     {
+       if (!iv_analyse_op (insn, op1, &iv1))
+ 	goto end;
+ 
+       if (iv1.mode == VOIDmode)
+ 	iv1.mode = amode;
+     }
+ 
+   if (CONSTANT_P (rhs)
+       || code == REG)
+     {
+       *iv = iv0;
+       goto end;
+     }
+ 
+   switch (code)
+     {
+     case NEG:
+       iv->mode = iv0.mode;
+       iv->base = simplify_gen_unary (NEG, amode, iv0.base, amode);
+       iv->step = simplify_gen_unary (NEG, amode, iv0.step, amode);
+       break;
+ 
+     case PLUS:
+     case MINUS:
+       if (iv0.mode != iv1.mode)
+ 	abort ();
+ 
+       iv->mode = iv0.mode;
+       iv->base = simplify_gen_binary (code, amode, iv0.base, iv1.base);
+       iv->step = simplify_gen_binary (code, amode, iv0.step, iv1.step);
+       break;
+ 
+     case MULT:
+       if (iv0.mode != iv1.mode)
+ 	abort ();
+ 
+       iv->mode = iv0.mode;
+ 
+       if (iv0.step == const0_rtx)
+ 	{
+ 	  iv->base = simplify_gen_binary (MULT, amode, iv0.base, iv1.base);
+ 	  iv->step = simplify_gen_binary (MULT, amode, iv0.base, iv1.step);
+ 	}
+       else if (iv1.step == const0_rtx)
+ 	{
+ 	  iv->base = simplify_gen_binary (MULT, amode, iv1.base, iv0.base);
+ 	  iv->step = simplify_gen_binary (MULT, amode, iv1.base, iv0.step);
+ 	}
+       else
+ 	abort ();
+       break;
+ 
+     default:
+       abort ();
+     }
+ 
+ end:
+   iv->analysed = true;
+   insn_info[uid].iv = *iv;
+ 
+   if (rtl_dump_file)
+     {
+       print_rtl (rtl_dump_file, def);
+       fprintf (rtl_dump_file, " in insn ");
+       print_rtl_single (rtl_dump_file, insn);
+       fprintf (rtl_dump_file, "  is ");
+       dump_iv_info (rtl_dump_file, iv);
+       fprintf (rtl_dump_file, "\n");
+     }
+ 
+   return iv->base != NULL_RTX;
+ }
+ 
+ /* Free the data for an induction variable analysis.  */
+ 
+ void
+ iv_analysis_done (void)
+ {
+   max_insn_no = 0;
+   max_reg_no = 0;
+   if (insn_info)
+     {
+       free (insn_info);
+       insn_info = NULL;
+     }
+   if (last_def)
+     {
+       free (last_def);
+       last_def = NULL;
+     }
+   if (bivs)
+     {
+       free (bivs);
+       bivs = NULL;
+     }
+ }
+ 
+ /* Computes inverse to X modulo (1 << MOD).  */
+ 
+ static unsigned HOST_WIDEST_INT
+ inverse (unsigned HOST_WIDEST_INT x, int mod)
+ {
+   unsigned HOST_WIDEST_INT mask =
+ 	  ((unsigned HOST_WIDEST_INT) 1 << (mod - 1) << 1) - 1;
+   unsigned HOST_WIDEST_INT rslt = 1;
+   int i;
+ 
+   for (i = 0; i < mod - 1; i++)
+     {
+       rslt = (rslt * x) & mask;
+       x = (x * x) & mask;
+     }
+ 
+   return rslt;
+ }
+ 
+ /* Tries to estimate the maximum number of iterations.  */
+ 
+ static unsigned HOST_WIDEST_INT
+ determine_max_iter (struct niter_desc *desc)
+ {
+   rtx niter = desc->niter_expr;
+   rtx mmin, mmax, left, right;
+   unsigned HOST_WIDEST_INT nmax, inc;
+ 
+   if (GET_CODE (niter) == AND
+       && GET_CODE (XEXP (niter, 0)) == CONST_INT)
+     {
+       nmax = INTVAL (XEXP (niter, 0));
+       if (!(nmax & (nmax + 1)))
+ 	{
+ 	  desc->niter_max = nmax;
+ 	  return nmax;
+ 	}
+     }
+ 
+   get_mode_bounds (desc->mode, desc->signed_p, &mmin, &mmax);
+   nmax = INTVAL (mmax) - INTVAL (mmin);
+ 
+   if (GET_CODE (niter) == UDIV)
+     {
+       if (GET_CODE (XEXP (niter, 1)) != CONST_INT)
+ 	{
+ 	  desc->niter_max = nmax;
+ 	  return nmax;
+ 	}
+       inc = INTVAL (XEXP (niter, 1));
+       niter = XEXP (niter, 0);
+     }
+   else
+     inc = 1;
+ 
+   if (GET_CODE (niter) == PLUS)
+     {
+       left = XEXP (niter, 0);
+       right = XEXP (niter, 0);
+ 
+       if (GET_CODE (right) == CONST_INT)
+ 	right = GEN_INT (-INTVAL (right));
+     }
+   else if (GET_CODE (niter) == MINUS)
+     {
+       left = XEXP (niter, 0);
+       right = XEXP (niter, 0);
+     }
+   else
+     {
+       left = niter;
+       right = mmin;
+     }
+ 
+   if (GET_CODE (left) == CONST_INT)
+     mmax = left;
+   if (GET_CODE (right) == CONST_INT)
+     mmin = right;
+   nmax = INTVAL (mmax) - INTVAL (mmin);
+ 
+   desc->niter_max = nmax / inc;
+   return nmax / inc;
+ }
+ 
+ /* Checks whether register *REG is in set ALT.  Callback for for_each_rtx.  */
+ 
+ static int
+ altered_reg_used (rtx *reg, void *alt)
+ {
+   if (!REG_P (*reg))
+     return 0;
+ 
+   return REGNO_REG_SET_P (alt, REGNO (*reg));
+ }
+ 
+ /* Marks registers altered by EXPR in set ALT.  */
+ 
+ static void
+ mark_altered (rtx expr, rtx by ATTRIBUTE_UNUSED, void *alt)
+ {
+   if (GET_CODE (expr) == SUBREG)
+     expr = SUBREG_REG (expr);
+   if (!REG_P (expr))
+     return;
+ 
+   SET_REGNO_REG_SET (alt, REGNO (expr));
+ }
+ 
+ /* Checks whether RHS is simple enough to process.  */
+ 
+ static bool
+ simple_rhs_p (rtx rhs)
+ {
+   rtx op0, op1;
+ 
+   if (CONSTANT_P (rhs)
+       || REG_P (rhs))
+     return true;
+ 
+   switch (GET_CODE (rhs))
+     {
+     case PLUS:
+     case MINUS:
+       op0 = XEXP (rhs, 0);
+       op1 = XEXP (rhs, 1);
+       /* Allow reg + const sets only.  */
+       if (REG_P (op0) && CONSTANT_P (op1))
+ 	return true;
+       if (REG_P (op1) && CONSTANT_P (op0))
+ 	return true;
+ 
+       return false;
+ 
+     default:
+       return false;
+     }
+ }
+ 
+ /* Simplifies *EXPR using assignment in INSN.  ALTERED is the set of registers
+    altered so far.  */
+ 
+ static void
+ simplify_using_assignment (rtx insn, rtx *expr, regset altered)
+ {
+   rtx set = single_set (insn);
+   rtx lhs, rhs;
+   bool ret = false;
+ 
+   if (set)
+     {
+       lhs = SET_DEST (set);
+       if (GET_CODE (lhs) != REG
+ 	  || altered_reg_used (&lhs, altered))
+ 	ret = true;
+     }
+   else
+     ret = true;
+ 
+   note_stores (PATTERN (insn), mark_altered, altered);
+ 
+   if (ret)
+     return;
+ 
+   rhs = find_reg_equal_equiv_note (insn);
+   if (!rhs)
+     rhs = SET_SRC (set);
+ 
+   if (!simple_rhs_p (rhs))
+     return;
+ 
+   if (for_each_rtx (&rhs, altered_reg_used, altered))
+     return;
+ 
+   *expr = simplify_replace_rtx (*expr, lhs, rhs);
+ }
+ 
+ /* Checks whether A implies B.  */
+ 
+ static bool
+ implies_p (rtx a, rtx b)
+ {
+   rtx op0, op1, r;
+ 
+   if (GET_CODE (a) == EQ)
+     {
+       op0 = XEXP (a, 0);
+       op1 = XEXP (a, 1);
+ 
+       if (REG_P (op0))
+ 	{
+ 	  r = simplify_replace_rtx (b, op0, op1);
+ 	  if (r == const_true_rtx)
+ 	    return true;
+ 	}
+ 
+       if (REG_P (op1))
+ 	{
+ 	  r = simplify_replace_rtx (b, op1, op0);
+ 	  if (r == const_true_rtx)
+ 	    return true;
+ 	}
+     }
+ 
+   return false;
+ }
+ 
+ /* Canonicalizes COND so that
+ 
+    (1) If an operand is a constant, it will be the second operand.
+    (2) (LE x const) will be replaced with (LT x <const+1>) and similarly
+        for GE, GEU, and LEU.  */
+ 
+ static rtx
+ canon_condition (rtx cond)
+ {
+   rtx tem;
+   rtx op0, op1;
+   enum rtx_code code;
+ 
+   code = GET_CODE (cond);
+   op0 = XEXP (cond, 0);
+   op1 = XEXP (cond, 1);
+ 
+   /* If constant is first, put it last.  */
+   if (CONSTANT_P (op0))
+     {
+       code = swap_condition (code);
+       tem = op0;
+       op0 = op1;
+       op1 = tem;
+     }
+ 
+   if (GET_CODE (op1) == CONST_INT
+       && GET_MODE (op0) != VOIDmode
+       && GET_MODE_BITSIZE (GET_MODE (op0)) <= HOST_BITS_PER_WIDE_INT)
+     {
+       HOST_WIDE_INT const_val = INTVAL (op1);
+       unsigned HOST_WIDE_INT uconst_val = const_val;
+       unsigned HOST_WIDE_INT max_val
+ 	= (unsigned HOST_WIDE_INT) GET_MODE_MASK (GET_MODE (op0));
+ 
+       switch (code)
+ 	{
+ 	case LE:
+ 	  if ((unsigned HOST_WIDE_INT) const_val != max_val >> 1)
+ 	    code = LT, op1 = gen_int_mode (const_val + 1, GET_MODE (op0));
+ 	  break;
+ 
+ 	/* When cross-compiling, const_val might be sign-extended from
+ 	   BITS_PER_WORD to HOST_BITS_PER_WIDE_INT */
+ 	case GE:
+ 	  if ((HOST_WIDE_INT) (const_val & max_val)
+ 	      != (((HOST_WIDE_INT) 1
+ 		   << (GET_MODE_BITSIZE (GET_MODE (op0)) - 1))))
+ 	    code = GT, op1 = gen_int_mode (const_val - 1, GET_MODE (op0));
+ 	  break;
+ 
+ 	case LEU:
+ 	  if (uconst_val < max_val)
+ 	    code = LTU, op1 = gen_int_mode (uconst_val + 1, GET_MODE (op0));
+ 	  break;
+ 
+ 	case GEU:
+ 	  if (uconst_val != 0)
+ 	    code = GTU, op1 = gen_int_mode (uconst_val - 1, GET_MODE (op0));
+ 	  break;
+ 
+ 	default:
+ 	  break;
+ 	}
+     }
+ 
+   if (op0 != XEXP (cond, 0)
+       || op1 != XEXP (cond, 1)
+       || code != GET_CODE (cond)
+       || GET_MODE (cond) != SImode)
+     cond = gen_rtx_fmt_ee (code, SImode, op0, op1);
+ 
+   return cond;
+ }
+ 
+ /* Tries to use the fact that COND holds to simplify EXPR.  ALTERED is the
+    set of altered regs.  */
+ 
+ static void
+ simplify_using_condition (rtx cond, rtx *expr, regset altered)
+ {
+   rtx rev, reve, exp = *expr;
+ 
+   if (GET_RTX_CLASS (GET_CODE (*expr)) != '<')
+     return;
+ 
+   /* If some register gets altered later, we do not really speak about its
+      value at the time of comparison.  */
+   if (for_each_rtx (&cond, altered_reg_used, altered))
+     return;
+ 
+   rev = reversed_condition (cond);
+   reve = reversed_condition (exp);
+ 
+   cond = canon_condition (cond);
+   exp = canon_condition (exp);
+   if (rev)
+     rev = canon_condition (rev);
+   if (reve)
+     reve = canon_condition (reve);
+ 
+   if (rtx_equal_p (exp, cond))
+     {
+       *expr = const_true_rtx;
+       return;
+     }
+ 
+ 
+   if (rev && rtx_equal_p (exp, rev))
+     {
+       *expr = const0_rtx;
+       return;
+     }
+ 
+   if (implies_p (cond, exp))
+     {
+       *expr = const_true_rtx;
+       return;
+     }
+   
+   if (reve && implies_p (cond, reve))
+     {
+       *expr = const0_rtx;
+       return;
+     }
+ 
+   /* A proof by contradiction.  If *EXPR implies (not cond), *EXPR must
+      be false.  */
+   if (rev && implies_p (exp, rev))
+     {
+       *expr = const0_rtx;
+       return;
+     }
+ 
+   /* Similarly, If (not *EXPR) implies (not cond), *EXPR must be true.  */
+   if (rev && reve && implies_p (reve, rev))
+     {
+       *expr = const_true_rtx;
+       return;
+     }
+ 
+   /* We would like to have some other tests here.  TODO.  */
+ 
+   return;
+ }
+ 
+ /* Use relationship between A and *B to eventually eliminate *B.
+    OP is the operation we consider.  */
+ 
+ static void
+ eliminate_implied_condition (enum rtx_code op, rtx a, rtx *b)
+ {
+   if (op == AND)
+     {
+       /* If A implies *B, we may replace *B by true.  */
+       if (implies_p (a, *b))
+ 	*b = const_true_rtx;
+     }
+   else if (op == IOR)
+     {
+       /* If *B implies A, we may replace *B by false.  */
+       if (implies_p (*b, a))
+ 	*b = const0_rtx;
+     }
+   else
+     abort ();
+ }
+ 
+ /* Eliminates the conditions in TAIL that are implied by HEAD.  OP is the
+    operation we consider.  */
+ 
+ static void
+ eliminate_implied_conditions (enum rtx_code op, rtx *head, rtx tail)
+ {
+   rtx elt;
+ 
+   for (elt = tail; elt; elt = XEXP (elt, 1))
+     eliminate_implied_condition (op, *head, &XEXP (elt, 0));
+   for (elt = tail; elt; elt = XEXP (elt, 1))
+     eliminate_implied_condition (op, XEXP (elt, 0), head);
+ }
+ 
+ /* Simplifies *EXPR using initial values at the start of the LOOP.  If *EXPR
+    is a list, its elements are assumed to be combined using OP.  */
+ 
+ static void
+ simplify_using_initial_values (struct loop *loop, enum rtx_code op, rtx *expr)
+ {
+   rtx head, tail, insn;
+   rtx neutral, aggr;
+   regset altered;
+   regset_head altered_head;
+   edge e;
+ 
+   if (!*expr)
+     return;
+ 
+   if (CONSTANT_P (*expr))
+     return;
+ 
+   if (GET_CODE (*expr) == EXPR_LIST)
+     {
+       head = XEXP (*expr, 0);
+       tail = XEXP (*expr, 1);
+ 
+       eliminate_implied_conditions (op, &head, tail);
+ 
+       if (op == AND)
+ 	{
+ 	  neutral = const_true_rtx;
+ 	  aggr = const0_rtx;
+ 	}
+       else if (op == IOR)
+ 	{
+ 	  neutral = const0_rtx;
+ 	  aggr = const_true_rtx;
+ 	}
+       else
+ 	abort ();
+ 
+       simplify_using_initial_values (loop, NIL, &head);
+       if (head == aggr)
+ 	{
+ 	  XEXP (*expr, 0) = aggr;
+ 	  XEXP (*expr, 1) = NULL_RTX;
+ 	  return;
+ 	}
+       else if (head == neutral)
+ 	{
+ 	  *expr = tail;
+ 	  simplify_using_initial_values (loop, op, expr);
+ 	  return;
+ 	}
+       simplify_using_initial_values (loop, op, &tail);
+ 
+       if (tail && XEXP (tail, 0) == aggr)
+ 	{
+ 	  *expr = tail;
+ 	  return;
+ 	}
+   
+       XEXP (*expr, 0) = head;
+       XEXP (*expr, 1) = tail;
+       return;
+     }
+ 
+   if (op != NIL)
+     abort ();
+ 
+   e = loop_preheader_edge (loop);
+   if (e->src == ENTRY_BLOCK_PTR)
+     return;
+ 
+   altered = INITIALIZE_REG_SET (altered_head);
+ 
+   while (1)
+     {
+       insn = BB_END (e->src);
+       if (any_condjump_p (insn))
+ 	{
+ 	  /* FIXME -- slightly wrong -- what if compared register
+ 	     gets altered between start of the condition and insn?  */
+ 	  rtx cond = get_condition (BB_END (e->src), NULL, true);
+       
+ 	  if (cond && (e->flags & EDGE_FALLTHRU))
+ 	    cond = reversed_condition (cond);
+ 	  if (cond)
+ 	    {
+ 	      simplify_using_condition (cond, expr, altered);
+ 	      if (CONSTANT_P (*expr))
+ 		{
+ 		  FREE_REG_SET (altered);
+ 		  return;
+ 		}
+ 	    }
+ 	}
+ 
+       for (; insn != PREV_INSN (BB_HEAD (e->src)); insn = PREV_INSN (insn))
+ 	{
+ 	  if (!INSN_P (insn))
+ 	    continue;
+ 	    
+ 	  simplify_using_assignment (insn, expr, altered);
+ 	  if (CONSTANT_P (*expr))
+ 	    {
+ 	      FREE_REG_SET (altered);
+ 	      return;
+ 	    }
+ 	}
+ 
+       e = e->src->pred;
+       if (e->pred_next
+ 	  || e->src == ENTRY_BLOCK_PTR)
+ 	break;
+     }
+ 
+   FREE_REG_SET (altered);
+ }
+ 
+ /* Computes number of iterations of the CONDITION in INSN in LOOP and stores
+    the result into DESC.  Very similar to determine_number_of_iterations
+    (basically its rtl version).  */
+ 
+ void
+ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition,
+ 			 struct niter_desc *desc)
+ {
+   rtx op0, op1, delta, step, bound, may_xform, def_insn, tmp;
+   struct rtx_iv iv0, iv1, tmp_iv;
+   rtx assumption;
+   enum rtx_code cond;
+   enum machine_mode mode;
+   rtx mmin, mmax;
+   unsigned HOST_WIDEST_INT s, size, d;
+   HOST_WIDEST_INT up, down, inc;
+   int was_sharp = false;
+ 
+   /* The meaning of these assumptions is this:
+      if !assumptions
+        then the rest of information does not have to be valid
+      if noloop_assumptions then the loop does not roll
+      if infinite then this exit is never used
+      */
+   desc->assumptions = NULL_RTX;
+   desc->noloop_assumptions = NULL_RTX;
+   desc->infinite = NULL_RTX;
+   desc->simple_p = true;
+ 
+   desc->const_iter = false;
+   desc->niter_expr = NULL_RTX;
+   desc->niter_max = 0;
+ 
+   cond = GET_CODE (condition);
+   if (GET_RTX_CLASS (cond) != '<')
+     abort ();
+ 
+   mode = GET_MODE (XEXP (condition, 0));
+   if (mode == VOIDmode)
+     mode = GET_MODE (XEXP (condition, 1));
+   /* The constant comparisons should be folded.  */
+   if (mode == VOIDmode)
+     abort ();
+ 
+   /* We only handle integers or pointers.  */
+   if (GET_MODE_CLASS (mode) != MODE_INT
+       && GET_MODE_CLASS (mode) != MODE_PARTIAL_INT)
+     goto fail;
+ 
+   op0 = XEXP (condition, 0);
+   def_insn = iv_get_reaching_def (insn, op0);
+   if (!iv_analyse (def_insn, op0, &iv0))
+     goto fail;
+   
+   op1 = XEXP (condition, 1);
+   def_insn = iv_get_reaching_def (insn, op1);
+   if (!iv_analyse (def_insn, op1, &iv1))
+     goto fail;
+ 
+   desc->mode = mode;
+ 
+   /* Check condition and normalize it.  */
+ 
+   switch (cond)
+     {
+       case GE:
+       case GT:
+       case GEU:
+       case GTU:
+ 	tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
+ 	cond = swap_condition (cond);
+ 	break;
+       case NE:
+       case LE:
+       case LEU:
+       case LT:
+       case LTU:
+ 	break;
+       default:
+ 	goto fail;
+     }
+ 
+   size = GET_MODE_BITSIZE (mode);
+   get_mode_bounds (mode, (cond == LE || cond == LT), &mmin, &mmax);
+   desc->signed_p = (cond == LE || cond == LT);
+ 
+   if (GET_CODE (iv0.step) != CONST_INT || GET_CODE (iv1.step) != CONST_INT)
+     goto fail;
+ 
+   /* We can take care of the case of two induction variables chasing each other
+      if the test is NE. I have never seen a loop using it, but still it is
+      cool.  */
+   if (iv0.step != const0_rtx && iv1.step != const0_rtx)
+     {
+       if (cond != NE)
+ 	goto fail;
+ 
+       iv0.step = simplify_gen_binary (MINUS, mode, iv0.step, iv1.step);
+       iv1.step = const0_rtx;
+     }
+ 
+   /* This is either infinite loop or the one that ends immediately, depending
+      on initial values.  Unswitching should remove this kind of conditions.  */
+   if (iv0.step == const0_rtx && iv1.step == const0_rtx)
+     goto fail;
+ 
+   /* Ignore loops of while (i-- < 10) type.  */
+   if (cond != NE
+       && (INTVAL (iv0.step) < 0 || INTVAL (iv1.step) > 0))
+     goto fail;
+ 
+   /* Some more condition normalization.  We must record some assumptions
+      due to overflows.  */
+   switch (cond)
+     {
+       case LT:
+       case LTU:
+ 	/* We want to take care only of non-sharp relationals; this is easy,
+ 	   as in cases the overflow would make the transformation unsafe
+ 	   the loop does not roll.  Seemingly it would make more sense to want
+ 	   to take care of sharp relationals instead, as NE is more simmilar to
+ 	   them, but the problem is that here the transformation would be more
+ 	   difficult due to possibly infinite loops.  */
+ 	if (iv0.step == const0_rtx)
+ 	  {
+ 	    assumption = simplify_gen_relational (EQ, SImode, mode,
+ 						  iv0.base, mmax);
+ 	    if (assumption == const_true_rtx)
+ 	      goto zero_iter;
+ 	    iv0.base = simplify_gen_binary (PLUS, mode, iv0.base, const1_rtx);
+ 	  }
+ 	else
+ 	  {
+ 	    assumption = simplify_gen_relational (EQ, SImode, mode,
+ 						  iv1.base, mmin);
+ 	    if (assumption == const_true_rtx)
+ 	      goto zero_iter;
+ 	    iv1.base = simplify_gen_binary (PLUS, mode, iv1.base, constm1_rtx);
+ 	  }
+ 	if (assumption != const0_rtx)
+ 	  desc->noloop_assumptions =
+ 		  alloc_EXPR_LIST (0, assumption, desc->noloop_assumptions);
+ 	cond = (cond == LT) ? LE : LEU;
+ 
+ 	/* It will be useful to be able to tell the difference once more in
+ 	   LE -> NE reduction.  */
+ 	was_sharp = true;
+ 	break;
+       default: ;
+     }
+ 
+   /* Take care of trivially infinite loops.  */
+   if (cond != NE)
+     {
+       if (iv0.step == const0_rtx)
+ 	{
+ 	  if (iv0.base == mmin)
+ 	    {
+ 	      desc->infinite =
+ 		      alloc_EXPR_LIST (0, const_true_rtx, NULL_RTX);
+ 	      goto fail;
+ 	    }
+ 	}
+       else
+ 	{
+ 	  if (iv1.base == mmax)
+ 	    {
+ 	      desc->infinite =
+ 		      alloc_EXPR_LIST (0, const_true_rtx, NULL_RTX);
+ 	      goto fail;
+ 	    }
+ 	}
+     }
+ 
+   /* If we can we want to take care of NE conditions instead of size
+      comparisons, as they are much more friendly (most importantly
+      this takes care of special handling of loops with step 1).  We can
+      do it if we first check that upper bound is greater or equal to
+      lower bound, their difference is constant c modulo step and that
+      there is not an overflow.  */
+   if (cond != NE)
+     {
+       if (iv0.step == const0_rtx)
+ 	step = simplify_gen_unary (NEG, mode, iv1.step, mode);
+       else
+ 	step = iv0.step;
+       delta = simplify_gen_binary (MINUS, mode, copy_rtx (iv1.base), copy_rtx (iv0.base));
+       delta = simplify_gen_binary (UMOD, mode, delta, step);
+       may_xform = const0_rtx;
+ 
+       if (GET_CODE (delta) == CONST_INT)
+ 	{
+ 	  if (was_sharp && INTVAL (delta) == INTVAL (step) - 1)
+ 	    {
+ 	      /* A special case.  We have transformed condition of type
+ 		 for (i = 0; i < 4; i += 4)
+ 		 into
+ 		 for (i = 0; i <= 3; i += 4)
+ 		 obviously if the test for overflow during that transformation
+ 		 passed, we cannot overflow here.  Most importantly any
+ 		 loop with sharp end condition and step 1 falls into this
+ 		 cathegory, so handling this case specially is definitely
+ 		 worth the troubles.  */
+ 	      may_xform = const_true_rtx;
+ 	    }
+ 	  else if (iv0.step == const0_rtx)
+ 	    {
+ 	      bound = simplify_gen_binary (PLUS, mode, mmin, step);
+ 	      bound = simplify_gen_binary (MINUS, mode, bound, delta);
+ 	      may_xform = simplify_gen_relational (cond, SImode, mode,
+ 						   bound, copy_rtx (iv0.base));
+ 	    }
+ 	  else
+ 	    {
+ 	      bound = simplify_gen_binary (MINUS, mode, mmax, step);
+ 	      bound = simplify_gen_binary (PLUS, mode, bound, delta);
+ 	      may_xform = simplify_gen_relational (cond, SImode, mode,
+ 						   copy_rtx (iv1.base), bound);
+ 	    }
+ 	}
+ 
+       if (may_xform != const0_rtx)
+ 	{
+ 	  /* We perform the transformation always provided that it is not
+ 	     completely senseless.  This is OK, as we would need this assumption
+ 	     to determine the number of iterations anyway.  */
+ 	  if (may_xform != const_true_rtx)
+ 	    desc->assumptions = alloc_EXPR_LIST (0, may_xform,
+ 						 desc->assumptions);
+ 
+ 	  /* We are going to lose some information about upper bound on
+ 	     number of iterations in this step, so record the information
+ 	     here.  */
+ 	  inc = INTVAL (iv0.step) - INTVAL (iv1.step);
+ 	  if (GET_CODE (iv1.base) == CONST_INT)
+ 	    up = INTVAL (iv1.base);
+ 	  else
+ 	    up = INTVAL (mmax) - inc;
+ 	  down = INTVAL (GET_CODE (iv0.base) == CONST_INT ? iv0.base : mmin);
+ 	  desc->niter_max = (up - down) / inc + 1;
+ 
+ 	  if (iv0.step == const0_rtx)
+ 	    {
+ 	      iv0.base = simplify_gen_binary (PLUS, mode, iv0.base, delta);
+ 	      iv0.base = simplify_gen_binary (MINUS, mode, iv0.base, step);
+ 	    }
+ 	  else
+ 	    {
+ 	      iv1.base = simplify_gen_binary (MINUS, mode, iv1.base, delta);
+ 	      iv1.base = simplify_gen_binary (PLUS, mode, iv1.base, step);
+ 	    }
+ 
+ 	  assumption = simplify_gen_relational (reverse_condition (cond),
+ 						SImode, mode,
+ 						copy_rtx (iv0.base),
+ 						copy_rtx (iv1.base));
+ 	  if (assumption == const_true_rtx)
+ 	    goto zero_iter;
+ 	  else if (assumption != const0_rtx)
+ 	    desc->noloop_assumptions =
+ 		    alloc_EXPR_LIST (0, assumption, desc->noloop_assumptions);
+ 	  cond = NE;
+ 	}
+     }
+ 
+   /* Count the number of iterations.  */
+   if (cond == NE)
+     {
+       /* Everything we do here is just arithmetics modulo size of mode.  This
+ 	 makes us able to do more involved computations of number of iterations
+ 	 than in other cases.  First transform the condition into shape
+ 	 s * i <> c, with s positive.  */
+       iv1.base = simplify_gen_binary (MINUS, mode, iv1.base, iv0.base);
+       iv0.base = const0_rtx;
+       iv0.step = simplify_gen_binary (MINUS, mode, iv0.step, iv1.step);
+       iv1.step = const0_rtx;
+       if (INTVAL (iv0.step) < 0)
+ 	{
+ 	  iv0.step = simplify_gen_unary (NEG, mode, iv0.step, mode);
+ 	  iv1.base = simplify_gen_unary (NEG, mode, iv1.base, mode);
+ 	}
+ 
+       /* Let nsd (s, size of mode) = d.  If d does not divide c, the loop
+ 	 is infinite.  Otherwise, the number of iterations is
+ 	 (inverse(s/d) * (c/d)) mod (size of mode/d).  */
+       s = INTVAL (iv0.step); d = 1;
+       while (s % 2 != 1)
+ 	{
+ 	  s /= 2;
+ 	  d *= 2;
+ 	  size--;
+ 	}
+       bound = GEN_INT (((unsigned HOST_WIDEST_INT) 1 << (size - 1 ) << 1) - 1);
+       tmp = simplify_gen_binary (UMOD, mode, copy_rtx (iv1.base),
+ 				 GEN_INT (d));
+       desc->infinite = simplify_gen_relational (NE, SImode, mode,
+ 						tmp, const0_rtx);
+       tmp = simplify_gen_binary (UDIV, mode, iv1.base, GEN_INT (d));
+       tmp = simplify_gen_binary (MULT, mode,
+ 				 tmp, GEN_INT (inverse (s, size)));
+       desc->niter_expr = simplify_gen_binary (AND, mode, tmp, bound);
+     }
+   else
+     {
+       if (iv1.step == const0_rtx)
+ 	/* Condition in shape a + s * i <= b
+ 	   We must know that b + s does not overflow and a <= b + s and then we
+ 	   can compute number of iterations as (b + s - a) / s.  (It might
+ 	   seem that we in fact could be more clever about testing the b + s
+ 	   overflow condition using some information about b - a mod s,
+ 	   but it was already taken into account during LE -> NE transform).  */
+ 	{
+ 	  bound = simplify_gen_binary (MINUS, mode, mmax, iv0.step);
+ 	  assumption = simplify_gen_relational (cond, SImode, mode,
+ 						copy_rtx (iv1.base), bound);
+ 	  desc->assumptions =
+ 		  alloc_EXPR_LIST (0, assumption, desc->assumptions);
+ 	  step = iv0.step;
+ 	  tmp = simplify_gen_binary (PLUS, mode, copy_rtx (iv1.base), iv0.step);
+ 	  assumption = simplify_gen_relational (reverse_condition (cond),
+ 						SImode,
+ 						mode,
+ 						copy_rtx (iv0.base),
+ 						tmp);
+ 	  delta = simplify_gen_binary (PLUS, mode, iv1.base, step);
+ 	  delta = simplify_gen_binary (MINUS, mode, delta, iv0.base);
+ 	}
+       else
+ 	{
+ 	  /* Condition in shape a <= b - s * i
+ 	     We must know that a - s does not overflow and a - s <= b and then
+ 	     we can again compute number of iterations as (b - (a - s)) / s.  */
+ 	  bound = simplify_gen_binary (MINUS, mode, mmin, iv1.step);
+ 	  assumption = simplify_gen_relational (cond, SImode, mode,
+ 						bound, copy_rtx (iv0.base));
+ 	  desc->assumptions =
+ 		  alloc_EXPR_LIST (0, assumption, desc->assumptions);
+ 	  step = simplify_gen_unary (NEG, mode, iv1.step, mode);
+ 	  tmp = simplify_gen_binary (PLUS, mode, copy_rtx (iv0.base), iv1.step);
+ 	  assumption = simplify_gen_relational (reverse_condition (cond),
+ 						SImode,
+ 						mode,
+ 						tmp,
+ 						copy_rtx (iv1.base));
+ 	  delta = simplify_gen_binary (MINUS, mode, iv0.base, step);
+ 	  delta = simplify_gen_binary (MINUS, mode, iv1.base, delta);
+ 	}
+       if (assumption == const_true_rtx)
+ 	goto zero_iter;
+       else if (assumption != const0_rtx)
+ 	desc->noloop_assumptions =
+ 		alloc_EXPR_LIST (0, assumption, desc->noloop_assumptions);
+       delta = simplify_gen_binary (UDIV, mode, delta, step);
+       desc->niter_expr = delta;
+     }
+ 
+   simplify_using_initial_values (loop, AND, &desc->assumptions);
+   if (desc->assumptions
+       && XEXP (desc->assumptions, 0) == const0_rtx)
+     goto fail;
+   simplify_using_initial_values (loop, IOR, &desc->noloop_assumptions);
+   simplify_using_initial_values (loop, NIL, &desc->niter_expr);
+ 
+   /* Rerun the simplification.  Consider code (created by copying loop headers)
+ 
+      i = 0;
+ 
+      if (0 < n)
+        {
+          do
+ 	   {
+ 	     i++;
+ 	   } while (i < n);
+        }
+ 
+     The first pass determines that i = 0, the second pass uses it to eliminate
+     noloop assumption.  */
+ 
+   simplify_using_initial_values (loop, AND, &desc->assumptions);
+   if (desc->assumptions
+       && XEXP (desc->assumptions, 0) == const0_rtx)
+     goto fail;
+   simplify_using_initial_values (loop, IOR, &desc->noloop_assumptions);
+   simplify_using_initial_values (loop, NIL, &desc->niter_expr);
+ 
+   if (GET_CODE (desc->niter_expr) == CONST_INT)
+     {
+       desc->const_iter = true;
+       desc->niter_max = desc->niter = INTVAL (desc->niter_expr);
+     }
+   else if (!desc->niter_max)
+     desc->niter_max = determine_max_iter (desc);
+ 
+   return;
+ 
+ fail:
+   desc->simple_p = false;
+   return;
+ 
+ zero_iter:
+   desc->const_iter = true;
+   desc->niter = 0;
+   desc->niter_max = 0;
+   desc->niter_expr = const0_rtx;
+   return;
+ }
+ 
+ /* Checks whether E is a simple exit from LOOP and stores its description
+    into DESC.  TODO Should replace cfgloopanal.c:simple_loop_exit_p.  */
+ 
+ static void
+ check_simple_exit (struct loop *loop, edge e, struct niter_desc *desc)
+ {
+   basic_block exit_bb;
+   rtx condition, at;
+   edge ei;
+ 
+   exit_bb = e->src;
+   desc->simple_p = false;
+ 
+   /* It must belong directly to the loop.  */
+   if (exit_bb->loop_father != loop)
+     return;
+ 
+   /* It must be tested (at least) once during any iteration.  */
+   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb))
+     return;
+ 
+   /* It must end in a simple conditional jump.  */
+   if (!any_condjump_p (BB_END (exit_bb)))
+     return;
+ 
+   ei = exit_bb->succ;
+   if (ei == e)
+     ei = ei->succ_next;
+ 
+   desc->out_edge = e;
+   desc->in_edge = ei;
+ 
+   /* Test whether the condition is suitable.  */
+   if (!(condition = get_condition (BB_END (ei->src), &at, true)))
+     return;
+ 
+   if (ei->flags & EDGE_FALLTHRU)
+     {
+       condition = reversed_condition (condition);
+       if (!condition)
+ 	return;
+     }
+ 
+   /* Check that we are able to determine number of iterations and fill
+      in information about it.  */
+   iv_number_of_iterations (loop, at, condition, desc);
+ }
+ 
+ /* Finds a simple exit of LOOP and stores its description into DESC.
+    TODO Should replace cfgloopanal.c:simple_loop_p.  */
+ 
+ void
+ find_simple_exit (struct loop *loop, struct niter_desc *desc)
+ {
+   unsigned i;
+   basic_block *body;
+   edge e;
+   struct niter_desc act;
+   bool any = false;
+ 
+   desc->simple_p = false;
+   body = get_loop_body (loop);
+ 
+   for (i = 0; i < loop->num_nodes; i++)
+     {
+       for (e = body[i]->succ; e; e = e->succ_next)
+ 	{
+ 	  if (flow_bb_inside_loop_p (loop, e->dest))
+ 	    continue;
+ 	  
+ 	  check_simple_exit (loop, e, &act);
+ 	  if (!act.simple_p)
+ 	    continue;
+ 
+ 	  /* Prefer constant iterations; the less the better.  */
+ 	  if (!any)
+ 	    any = true;
+ 	  else if (!act.const_iter
+ 		   || (desc->const_iter && act.niter >= desc->niter))
+ 	    continue;
+ 	  *desc = act;
+ 	}
+     }
+ 
+   if (rtl_dump_file)
+     {
+       if (desc->simple_p)
+ 	{
+ 	  fprintf (rtl_dump_file, "Loop %d is simple:\n", loop->num);
+ 	  fprintf (rtl_dump_file, "  simple exit %d -> %d\n",
+ 		   desc->out_edge->src->index,
+ 		   desc->out_edge->dest->index);
+ 	  if (desc->assumptions)
+ 	    {
+ 	      fprintf (rtl_dump_file, "  assumptions: ");
+ 	      print_rtl (rtl_dump_file, desc->assumptions);
+ 	      fprintf (rtl_dump_file, "\n");
+ 	    }
+ 	  if (desc->noloop_assumptions)
+ 	    {
+ 	      fprintf (rtl_dump_file, "  does not roll if: ");
+ 	      print_rtl (rtl_dump_file, desc->noloop_assumptions);
+ 	      fprintf (rtl_dump_file, "\n");
+ 	    }
+ 	  if (desc->infinite != const0_rtx)
+ 	    {
+ 	      fprintf (rtl_dump_file, "  infinite if: ");
+ 	      print_rtl (rtl_dump_file, desc->infinite);
+ 	      fprintf (rtl_dump_file, "\n");
+ 	    }
+ 
+ 	  fprintf (rtl_dump_file, "  number of iterations: ");
+ 	  print_rtl (rtl_dump_file, desc->niter_expr);
+       	  fprintf (rtl_dump_file, "\n");
+ 
+ 	  fprintf (rtl_dump_file, "  upper bound: ");
+ 	  fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter_max);
+       	  fprintf (rtl_dump_file, "\n");
+ 	}
+       else
+ 	fprintf (rtl_dump_file, "Loop %d is not simple.\n", loop->num);
+     }
+ 
+   free (body);
+ }
Index: loop-unswitch.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/loop-unswitch.c,v
retrieving revision 1.2.2.6.2.3
diff -c -3 -p -r1.2.2.6.2.3 loop-unswitch.c
*** loop-unswitch.c	21 Jan 2004 09:24:58 -0000	1.2.2.6.2.3
--- loop-unswitch.c	28 Jan 2004 21:06:06 -0000
*************** static struct loop *unswitch_loop (struc
*** 83,89 ****
  static void unswitch_single_loop (struct loops *, struct loop *, rtx, int);
  static bool may_unswitch_on_p (basic_block, struct loop *,
  			       basic_block *);
- static rtx reversed_condition (rtx);
  
  /* Main entry point.  Perform loop unswitching on all suitable LOOPS.  */
  void
--- 83,88 ----
*************** may_unswitch_on_p (basic_block bb, struc
*** 152,158 ****
  }
  
  /* Reverses CONDition; returns NULL if we cannot.  */
! static rtx
  reversed_condition (rtx cond)
  {
    enum rtx_code reversed;
--- 151,157 ----
  }
  
  /* Reverses CONDition; returns NULL if we cannot.  */
! rtx
  reversed_condition (rtx cond)
  {
    enum rtx_code reversed;
Index: loop.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/loop.c,v
retrieving revision 1.410.2.28.2.1
diff -c -3 -p -r1.410.2.28.2.1 loop.c
*** loop.c	21 Jan 2004 01:10:35 -0000	1.410.2.28.2.1
--- loop.c	28 Jan 2004 21:06:06 -0000
*************** loop_invariant_p (const struct loop *loo
*** 3308,3314 ****
  
  	 We don't know the loop bounds here though, so just fail for all
  	 labels.  */
!       if (flag_old_unroll_loops)
  	return 0;
        else
  	return 1;
--- 3308,3314 ----
  
  	 We don't know the loop bounds here though, so just fail for all
  	 labels.  */
!       if (flag_unroll_loops)
  	return 0;
        else
  	return 1;
Index: opts.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/opts.c,v
retrieving revision 1.31.2.22.2.3
diff -c -3 -p -r1.31.2.22.2.3 opts.c
*** opts.c	21 Jan 2004 01:10:36 -0000	1.31.2.22.2.3
--- opts.c	28 Jan 2004 21:06:06 -0000
*************** common_handle_option (size_t scode, cons
*** 1105,1110 ****
--- 1105,1114 ----
        flag_loop_optimize = value;
        break;
  
+     case OPT_floop_optimize2:
+       flag_loop_optimize2 = value;
+       break;
+ 
      case OPT_fmath_errno:
        flag_errno_math = value;
        break;
*************** common_handle_option (size_t scode, cons
*** 1139,1152 ****
  
      case OPT_fnon_call_exceptions:
        flag_non_call_exceptions = value;
-       break;
- 
-     case OPT_fold_unroll_all_loops:
-       flag_old_unroll_all_loops = value;
-       break;
- 
-     case OPT_fold_unroll_loops:
-       flag_old_unroll_loops = value;
        break;
  
      case OPT_fomit_frame_pointer:
--- 1143,1148 ----
Index: params.def
===================================================================
RCS file: /cvs/gcc/gcc/gcc/params.def,v
retrieving revision 1.15.2.15.2.2
diff -c -3 -p -r1.15.2.15.2.2 params.def
*** params.def	21 Jan 2004 01:10:38 -0000	1.15.2.15.2.2
--- params.def	28 Jan 2004 21:06:06 -0000
*************** DEFPARAM(PARAM_MAX_UNSWITCH_LEVEL,
*** 206,211 ****
--- 206,219 ----
  	"The maximum number of unswitchings in a single loop",
  	3)
  
+ /* This parameter limits the size of loop for that we attempt to
+    do doloop optimalization.  We set this quite high so that we do
+    not punish unrolled loops.  */
+ DEFPARAM(PARAM_MAX_DOLOOP_INSNS,
+ 	 "max-doloop-insns",
+ 	 "The maximum number of instructions of loop processed by doloop optimization",
+ 	 1000)
+ 
  DEFPARAM(HOT_BB_COUNT_FRACTION,
  	 "hot-bb-count-fraction",
  	 "Select fraction of the maximal count of repetitions of basic block in \
Index: rtl.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/rtl.h,v
retrieving revision 1.362.2.30.2.2
diff -c -3 -p -r1.362.2.30.2.2 rtl.h
*** rtl.h	21 Jan 2004 01:10:41 -0000	1.362.2.30.2.2
--- rtl.h	28 Jan 2004 21:06:06 -0000
*************** extern bool expensive_function_p (int);
*** 2324,2327 ****
--- 2324,2336 ----
  /* In tracer.c */
  extern void tracer (void);
  
+ /* In stor-layout.c.  */
+ extern void get_mode_bounds (enum machine_mode, int, rtx *, rtx *);
+ 
+ /* In doloop.c.  */
+ extern rtx doloop_condition_get (rtx);
+ 
+ /* In loop-unswitch.c  */
+ extern rtx reversed_condition (rtx);
+ 
  #endif /* ! GCC_RTL_H */
Index: stor-layout.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/stor-layout.c,v
retrieving revision 1.126.2.26.2.1
diff -c -3 -p -r1.126.2.26.2.1 stor-layout.c
*** stor-layout.c	21 Jan 2004 01:10:45 -0000	1.126.2.26.2.1
--- stor-layout.c	28 Jan 2004 21:06:06 -0000
*************** get_best_mode (int bitsize, int bitpos, 
*** 2120,2123 ****
--- 2120,2141 ----
    return mode;
  }
  
+ /* Gets minimal and maximal values for MODE (signed or unsigned depending on
+    SIGN).  */
+ void
+ get_mode_bounds (enum machine_mode mode, int sign, rtx *mmin, rtx *mmax)
+ {
+   int size = GET_MODE_BITSIZE (mode);
+ 
+   if (sign)
+     {
+       *mmin = GEN_INT (-((unsigned HOST_WIDEST_INT) 1 << (size - 1)));
+       *mmax = GEN_INT (((unsigned HOST_WIDEST_INT) 1 << (size - 1)) - 1);
+     }
+   else
+     {
+       *mmin = const0_rtx;
+       *mmax = GEN_INT (((unsigned HOST_WIDEST_INT) 1 << (size - 1) << 1) - 1);
+     }
+ }
  #include "gt-stor-layout.h"
Index: toplev.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/toplev.c,v
retrieving revision 1.654.2.84.2.4
diff -c -3 -p -r1.654.2.84.2.4 toplev.c
*** toplev.c	21 Jan 2004 01:10:46 -0000	1.654.2.84.2.4
--- toplev.c	28 Jan 2004 21:06:06 -0000
*************** int flag_thread_jumps;
*** 528,549 ****
  
  int flag_strength_reduce = 0;
  
! /* Nonzero enables loop unrolling in unroll.c.  Only loops for which the
!    number of iterations can be calculated at compile-time (UNROLL_COMPLETELY,
!    UNROLL_MODULO) or at run-time (preconditioned to be UNROLL_MODULO) are
!    unrolled.  */
! 
! int flag_old_unroll_loops;
! 
! /* Nonzero enables loop unrolling in unroll.c.  All loops are unrolled.
!    This is generally not a win.  */
! 
! int flag_old_unroll_all_loops;
! 
! /* Enables unrolling of simple loops in loop-unroll.c.  */
  int flag_unroll_loops;
  
! /* Enables unrolling of all loops in loop-unroll.c.  */
  int flag_unroll_all_loops;
  
  /* Nonzero enables loop peeling.  */
--- 528,537 ----
  
  int flag_strength_reduce = 0;
  
! /* Enables unrolling of simple loops.  */
  int flag_unroll_loops;
  
! /* Enables unrolling of all loops.  */
  int flag_unroll_all_loops;
  
  /* Nonzero enables loop peeling.  */
*************** int flag_web;
*** 657,662 ****
--- 645,654 ----
  
  int flag_loop_optimize;
  
+ /* Nonzero means perform second pass of the loop optimizer.  */
+ 
+ int flag_loop_optimize2;
+ 
  /* Nonzero means perform crossjumping.  */
  
  int flag_crossjumping;
*************** static const lang_independent_options f_
*** 1073,1080 ****
    {"strength-reduce", &flag_strength_reduce, 1 },
    {"unroll-loops", &flag_unroll_loops, 1 },
    {"unroll-all-loops", &flag_unroll_all_loops, 1 },
-   {"old-unroll-loops", &flag_old_unroll_loops, 1 },
-   {"old-unroll-all-loops", &flag_old_unroll_all_loops, 1 },
    {"peel-loops", &flag_peel_loops, 1 },
    {"unswitch-loops", &flag_unswitch_loops, 1 },
    {"prefetch-loop-arrays", &flag_prefetch_loop_arrays, 1 },
--- 1065,1070 ----
*************** rest_of_handle_loop_optimize (tree decl,
*** 3016,3022 ****
    if (flag_unroll_loops)
      do_unroll = LOOP_AUTO_UNROLL;	/* Having two unrollers is useless.  */
    else
!     do_unroll = flag_old_unroll_loops ? LOOP_UNROLL : LOOP_AUTO_UNROLL;
    do_prefetch = flag_prefetch_loop_arrays ? LOOP_PREFETCH : 0;
  
    if (flag_rerun_loop_opt)
--- 3006,3012 ----
    if (flag_unroll_loops)
      do_unroll = LOOP_AUTO_UNROLL;	/* Having two unrollers is useless.  */
    else
!     do_unroll = flag_unroll_loops ? LOOP_UNROLL : LOOP_AUTO_UNROLL;
    do_prefetch = flag_prefetch_loop_arrays ? LOOP_PREFETCH : 0;
  
    if (flag_rerun_loop_opt)
*************** rest_of_handle_loop_optimize (tree decl,
*** 3052,3057 ****
--- 3042,3048 ----
  /* Perform loop optimizations.  It might be better to do them a bit
     sooner, but we want the profile feedback to work more
     efficiently.  */
+ 
  static void
  rest_of_handle_loop2 (tree decl, rtx insns)
  {
*************** rest_of_handle_loop2 (tree decl, rtx ins
*** 3080,3085 ****
--- 3071,3081 ----
  			       (flag_unroll_loops ? UAP_UNROLL : 0) |
  			       (flag_unroll_all_loops ? UAP_UNROLL_ALL : 0));
  
+ #ifdef HAVE_doloop_end
+       if (flag_branch_on_count_reg && HAVE_doloop_end)
+ 	doloop_optimize_loops (loops);
+ #endif /* HAVE_doloop_end */
+ 
        loop_optimizer_finalize (loops, rtl_dump_file);
      }
  
*************** rest_of_compilation (tree decl)
*** 3363,3371 ****
      rest_of_handle_tracer (decl, insns);
  
    if (optimize > 0
!       && (flag_unswitch_loops
! 	  || flag_peel_loops
! 	  || flag_unroll_loops))
      rest_of_handle_loop2 (decl, insns);
  
    if (flag_web)
--- 3359,3365 ----
      rest_of_handle_tracer (decl, insns);
  
    if (optimize > 0
!       && flag_loop_optimize2)
      rest_of_handle_loop2 (decl, insns);
  
    if (flag_web)
*************** process_options (void)
*** 4304,4328 ****
    if (flag_unroll_all_loops)
      flag_unroll_loops = 1;
  
!   if (flag_unroll_loops)
!     {
!       flag_old_unroll_loops = 0;
!       flag_old_unroll_all_loops = 0;
!     }
! 
!   if (flag_old_unroll_all_loops)
!     flag_old_unroll_loops = 1;
  
    /* Old loop unrolling requires that strength_reduction be on also.  Silently
       turn on strength reduction here if it isn't already on.  Also, the loop
       unrolling code assumes that cse will be run after loop, so that must
       be turned on also.  */
!   if (flag_old_unroll_loops)
      {
        flag_strength_reduce = 1;
        flag_rerun_cse_after_loop = 1;
      }
!   if (flag_unroll_loops || flag_peel_loops)
      flag_rerun_cse_after_loop = 1;
  
    if (flag_non_call_exceptions)
--- 4298,4316 ----
    if (flag_unroll_all_loops)
      flag_unroll_loops = 1;
  
!   if (flag_loop_optimize2)
!     flag_loop_optimize = 0;
  
    /* Old loop unrolling requires that strength_reduction be on also.  Silently
       turn on strength reduction here if it isn't already on.  Also, the loop
       unrolling code assumes that cse will be run after loop, so that must
       be turned on also.  */
!   if (flag_unroll_loops)
      {
        flag_strength_reduce = 1;
        flag_rerun_cse_after_loop = 1;
      }
!   if (flag_peel_loops)
      flag_rerun_cse_after_loop = 1;
  
    if (flag_non_call_exceptions)
Index: toplev.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/toplev.h,v
retrieving revision 1.87.2.12.2.1
diff -c -3 -p -r1.87.2.12.2.1 toplev.h
*** toplev.h	21 Jan 2004 01:10:48 -0000	1.87.2.12.2.1
--- toplev.h	28 Jan 2004 21:06:06 -0000
*************** extern int target_flags_explicit;
*** 106,111 ****
--- 106,112 ----
  
  /* See toplev.c.  */
  extern int flag_loop_optimize;
+ extern int flag_loop_optimize2;
  extern int flag_crossjumping;
  extern int flag_if_conversion;
  extern int flag_if_conversion2;
Index: unroll.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/unroll.c,v
retrieving revision 1.168.2.20.2.1
diff -c -3 -p -r1.168.2.20.2.1 unroll.c
*** unroll.c	21 Jan 2004 01:10:56 -0000	1.168.2.20.2.1
--- unroll.c	28 Jan 2004 21:06:07 -0000
*************** unroll_loop (struct loop *loop, int insn
*** 1120,1126 ****
  
    /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll
       the loop unless all loops are being unrolled.  */
!   if (unroll_type == UNROLL_NAIVE && ! flag_old_unroll_all_loops)
      {
        if (loop_dump_stream)
  	fprintf (loop_dump_stream,
--- 1120,1126 ----
  
    /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll
       the loop unless all loops are being unrolled.  */
!   if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops)
      {
        if (loop_dump_stream)
  	fprintf (loop_dump_stream,


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]