This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] lno branch merge -- new iv & simple loop analysis


Hello,

this patch merges the new rtl level induction variable & simple loop
analysis from lno-branch.

Improvements against current version:
-- wider range of simple loops detected, thus making it possible to
   work with more aggressively optimized loops
-- more precise handling of overflows & other special conditions; unlike
   the previous ad-hoc handling, we now record the assumptions whenever
   some nontrivial transformation needs to be done, thus making its
   correctness easier to verify
-- improvements in handling of subregs (especially on big-endian architectures)

The change is compile-time neutral.

Bootstrapped (with -funroll-loops -funswitch-loops) & regtested on i686,
x86_64, ia64 and ppc64.  On all these architectures I have checked on
examples that the simple loops are recognized correctly.  On i686 I have
also checked the new version against the old one, verifying that it
detects more simple loops (i.e. that all loops detected as simple by the
old version are also detected as simple by the new version).

The patch leaves the (unused) old version, so that the code removal
chunks do not clutter the diff; I will submit a separate patch for this.

Zdenek

	* loop-iv.c: New file.
	* Makefile.in (loop-iv.o): New.
	* basic_block.h (FOR_BB_INSNS, FOR_BB_INSNS_REVERSE): New macros.
	* cfgloop.c (fill_sons_in_loop, get_loop_body_in_dom_order,
	num_loop_branches): New functions.
	* cfgloop.h (get_loop_body_in_dom_order, num_loop_branches,
	iv_analysis_loop_init, iv_get_reaching_def, iv_analyse, get_iv_value,
	find_simple_exit, iv_number_of_iterations, iv_analysis_done,
	get_simple_loop_desc, free_simple_loop_desc): Declare.
	(simple_loop_desc): New inline function.
	(struct rtx_iv, struct niter_desc): New.
	* cfgloopmanip.c (loopify): Specify semantics more precisely.
	* expr.c (force_operand): Handle subregs of expressions created by
	loop unroller.
	* loop-init.c (loop_optimizer_init, loop_optimizer_finalize): Move
	parts of the initialization to toplev.c
	* loop-unroll.c (loop_exit_at_end_p): New.
	(unroll_and_peel_loops): Call iv_analysis_done.
	(decide_peel_once_rolling, decide_peel_completely,
	decide_unroll_stupid, decide_unroll_constant_iterations,
	decide_unroll_runtime_iterations, decide_peel_simple,
	peel_loop_simple, unroll_loop_stupid, unroll_loop_constant_iterations,
	unroll_loop_runtime_iterations): Use new simple loop analysis.
	* loop-unswitch.c (compare_and_jump_seq): New.
	(may_unswitch_on_p): Renamed to ...
	(may_unswitch_on): Use new iv analysis.
	(reversed_condition): Export.
	(unswitch_single_loop, unswitch_loop): Use new iv analysis.
	* predict.c (estimate_probability): Use new simple loop analysis.
	* rtl.h (get_mode_bounds, reversed_condition,compare_and_jump_seq,
	canon_condition, simplify_using_condition): Declare.
	* stor-layout.c (get_mode_bounds): New.
	* toplev.c (rest_of_handle_loop2): Some parts of
	initialization/finalization moved here from loop-init.c.

Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.1249
diff -c -3 -p -r1.1249 Makefile.in
*** Makefile.in	14 Feb 2004 15:33:21 -0000	1.1249
--- Makefile.in	15 Feb 2004 22:48:09 -0000
*************** OBJS-common = \
*** 848,854 ****
   cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o	   \
   cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o 	   \
   dbxout.o debug.o df.o diagnostic.o dojump.o doloop.o dominance.o	   \
!  dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o	                   \
   expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o		   \
   genrtl.o ggc-common.o global.o graph.o gtype-desc.o			   \
   haifa-sched.o hooks.o ifcvt.o insn-attrtab.o insn-emit.o insn-modes.o	   \
--- 848,854 ----
   cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o	   \
   cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o 	   \
   dbxout.o debug.o df.o diagnostic.o dojump.o doloop.o dominance.o	   \
!  dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o loop-iv.o		   \
   expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o		   \
   genrtl.o ggc-common.o global.o graph.o gtype-desc.o			   \
   haifa-sched.o hooks.o ifcvt.o insn-attrtab.o insn-emit.o insn-modes.o	   \
*************** cfgcleanup.o : cfgcleanup.c $(CONFIG_H) 
*** 1718,1723 ****
--- 1718,1725 ----
  cfgloop.o : cfgloop.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) coretypes.h $(TM_H) \
     $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h flags.h
  cfgloopanal.o : cfgloopanal.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
+    $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H)
+ loop-iv.o : loop-iv.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(GGC_H) \
     $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H)
  cfgloopmanip.o : cfgloopmanip.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
     $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h output.h coretypes.h $(TM_H)
Index: basic-block.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/basic-block.h,v
retrieving revision 1.190
diff -c -3 -p -r1.190 basic-block.h
*** basic-block.h	29 Jan 2004 07:47:54 -0000	1.190
--- basic-block.h	15 Feb 2004 22:48:09 -0000
*************** extern varray_type basic_block_info;
*** 288,293 ****
--- 288,304 ----
  #define FOR_EACH_BB_REVERSE(BB) \
    FOR_BB_BETWEEN (BB, EXIT_BLOCK_PTR->prev_bb, ENTRY_BLOCK_PTR, prev_bb)
  
+ /* For iterating over insns in basic block.  */
+ #define FOR_BB_INSNS(BB, INSN)			\
+   for ((INSN) = BB_HEAD (BB);			\
+        (INSN) != NEXT_INSN (BB_END (BB));	\
+        (INSN) = NEXT_INSN (INSN))
+ 
+ #define FOR_BB_INSNS_REVERSE(BB, INSN)		\
+   for ((INSN) = BB_END (BB);			\
+        (INSN) != PREV_INSN (BB_HEAD (BB));	\
+        (INSN) = PREV_INSN (INSN))
+ 
  /* Cycles through _all_ basic blocks, even the fake ones (entry and
     exit block).  */
  
Index: cfgloop.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cfgloop.c,v
retrieving revision 1.30
diff -c -3 -p -r1.30 cfgloop.c
*** cfgloop.c	29 Jan 2004 07:47:54 -0000	1.30
--- cfgloop.c	15 Feb 2004 22:48:09 -0000
*************** get_loop_body (const struct loop *loop)
*** 959,964 ****
--- 959,1020 ----
    return tovisit;
  }
  
+ /* Fills dominance descendants inside LOOP of the basic block BB into
+    array TOVISIT from index *TV.  */
+ 
+ static void
+ fill_sons_in_loop (const struct loop *loop, basic_block bb,
+ 		   basic_block *tovisit, int *tv)
+ {
+   basic_block son, postpone = NULL;
+ 
+   tovisit[(*tv)++] = bb;
+   for (son = first_dom_son (CDI_DOMINATORS, bb);
+        son;
+        son = next_dom_son (CDI_DOMINATORS, son))
+     {
+       if (!flow_bb_inside_loop_p (loop, son))
+ 	continue;
+ 
+       if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
+ 	{
+ 	  postpone = son;
+ 	  continue;
+ 	}
+       fill_sons_in_loop (loop, son, tovisit, tv);
+     }
+ 
+   if (postpone)
+     fill_sons_in_loop (loop, postpone, tovisit, tv);
+ }
+ 
+ /* Gets body of a LOOP (that must be different from the outermost loop)
+    sorted by dominance relation.  Additionally, if a basic block s dominates
+    the latch, then only blocks dominated by s are be after it.  */
+ 
+ basic_block *
+ get_loop_body_in_dom_order (const struct loop *loop)
+ {
+   basic_block *tovisit;
+   int tv;
+ 
+   if (!loop->num_nodes)
+     abort ();
+ 
+   tovisit = xcalloc (loop->num_nodes, sizeof (basic_block));
+ 
+   if (loop->latch == EXIT_BLOCK_PTR)
+     abort ();
+ 
+   tv = 0;
+   fill_sons_in_loop (loop, loop->header, tovisit, &tv);
+ 
+   if (tv != (int) loop->num_nodes)
+     abort ();
+ 
+   return tovisit;
+ }
+ 
  /* Gets exit edges of a LOOP, returning their number in N_EDGES.  */
  edge *
  get_loop_exit_edges (const struct loop *loop, unsigned int *n_edges)
*************** get_loop_exit_edges (const struct loop *
*** 986,991 ****
--- 1042,1068 ----
    free (body);
  
    return edges;
+ }
+ 
+ /* Counts the number of conditional branches inside LOOP.  */
+ 
+ unsigned
+ num_loop_branches (const struct loop *loop)
+ {
+   unsigned i, n;
+   basic_block * body;
+ 
+   if (loop->latch == EXIT_BLOCK_PTR)
+     abort ();
+ 
+   body = get_loop_body (loop);
+   n = 0;
+   for (i = 0; i < loop->num_nodes; i++)
+     if (body[i]->succ && body[i]->succ->succ_next)
+       n++;
+   free (body);
+ 
+   return n;
  }
  
  /* Adds basic block BB to LOOP.  */
Index: cfgloop.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cfgloop.h,v
retrieving revision 1.12
diff -c -3 -p -r1.12 cfgloop.h
*** cfgloop.h	30 Dec 2003 10:40:51 -0000	1.12
--- cfgloop.h	15 Feb 2004 22:48:09 -0000
*************** extern int average_num_loop_insns (struc
*** 278,284 ****
--- 278,286 ----
  
  /* Loops & cfg manipulation.  */
  extern basic_block *get_loop_body (const struct loop *);
+ extern basic_block *get_loop_body_in_dom_order (const struct loop *);
  extern edge *get_loop_exit_edges (const struct loop *, unsigned *);
+ extern unsigned num_loop_branches (const struct loop *);
  
  extern edge loop_preheader_edge (const struct loop *);
  extern edge loop_latch_edge (const struct loop *);
*************** extern struct loop *loopify (struct loop
*** 321,326 ****
--- 323,436 ----
  extern void unloop (struct loops *, struct loop *);
  extern bool remove_path (struct loops *, edge);
  extern edge split_loop_bb (basic_block, rtx);
+ 
+ /* Induction variable analysis.  */
+ 
+ /* The description of induction variable.  The things are a bit complicated
+    due to need to handle subregs and extends.  The value of the object described
+    by it can be obtained as follows (all computations are done in extend_mode):
+ 
+    Value in i-th iteration is
+      delta + mult * extend_{extend_mode} (subreg_{mode} (base + i * step)).
+ 
+    If first_special is true, the value in the first iteration is
+      delta + mult * base
+      
+    If extend = NIL, first_special must be false, delta 0, mult 1 and value is
+      subreg_{mode} (base + i * step)
+ 
+    The get_iv_value function can be used to obtain these expressions.
+ 
+    ??? Add a third mode field that would specify the mode in that inner
+    computation is done, which would enable it to be different from the
+    outer one?  */
+ 
+ struct rtx_iv
+ {
+   /* Whether we have already filled the remaining fields.  */
+   bool analysed;
+ 
+   /* The mode the variable iterates in.  */
+   enum machine_mode mode;
+ 
+   /* Its base and step (mode of base and step is supposed to be extend_mode,
+      see the description above).  */
+   rtx base, step;
+ 
+   /* Whether the first iteration needs to be handled specially.  */
+   bool first_special;
+ 
+   /* The type of extend applied to it (SIGN_EXTEND, ZERO_EXTEND or NIL).  */
+   enum rtx_code extend;
+ 
+   /* The mode it is extended to.  */
+   enum machine_mode extend_mode;
+ 
+   /* Operations applied in the extended mode.  */
+   rtx delta, mult;
+ };
+ 
+ /* This should replace struct loop_desc.  We keep this just so that we are
+    able to compare the results.  */
+ 
+ struct niter_desc
+ {
+   /* The edge out of the loop.  */
+   edge out_edge;
+ 
+   /* The other edge leading from the condition.  */
+   edge in_edge;
+ 
+   /* True if we are able to say anything about number of iterations of the
+      loop.  */
+   bool simple_p;
+ 
+   /* True if the loop iterates the constant number of times.  */
+   bool const_iter;
+ 
+   /* Number of iterations if constant.  */
+   unsigned HOST_WIDEST_INT niter;
+ 
+   /* Upper bound on the number of iterations.  */
+   unsigned HOST_WIDEST_INT niter_max;
+ 
+   /* Assumptions under that the rest of the information is valid.  */
+   rtx assumptions;
+ 
+   /* Assumptions under that the loop ends before reaching the latch,
+      even if value of niter_expr says otherwise.  */
+   rtx noloop_assumptions;
+ 
+   /* Condition under that the loop is infinite.  */
+   rtx infinite;
+ 
+   /* Whether the comparison is signed.  */
+   bool signed_p;
+ 
+   /* The mode in that niter_expr should be computed.  */
+   enum machine_mode mode;
+ 
+   /* The number of iterations of the loop.  */
+   rtx niter_expr;
+ };
+ 
+ extern void iv_analysis_loop_init (struct loop *);
+ extern rtx iv_get_reaching_def (rtx, rtx);
+ extern bool iv_analyse (rtx, rtx, struct rtx_iv *);
+ extern rtx get_iv_value (struct rtx_iv *, rtx);
+ extern void find_simple_exit (struct loop *, struct niter_desc *);
+ extern void iv_number_of_iterations (struct loop *, rtx, rtx,
+ 				     struct niter_desc *);
+ extern void iv_analysis_done (void);
+ 
+ extern struct niter_desc *get_simple_loop_desc (struct loop *loop);
+ extern void free_simple_loop_desc (struct loop *loop);
+ 
+ static inline struct niter_desc *
+ simple_loop_desc (struct loop *loop)
+ {
+   return loop->aux;
+ }
  
  /* Loop optimizer initialization.  */
  extern struct loops *loop_optimizer_init (FILE *);
Index: cfgloopmanip.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cfgloopmanip.c,v
retrieving revision 1.21
diff -c -3 -p -r1.21 cfgloopmanip.c
*** cfgloopmanip.c	31 Jan 2004 02:06:45 -0000	1.21
--- cfgloopmanip.c	15 Feb 2004 22:48:09 -0000
*************** scale_loop_frequencies (struct loop *loo
*** 480,490 ****
     accordingly. Everything between them plus LATCH_EDGE destination must
     be dominated by HEADER_EDGE destination, and back-reachable from
     LATCH_EDGE source.  HEADER_EDGE is redirected to basic block SWITCH_BB,
!    SWITCH_BB->succ to original destination of LATCH_EDGE and
!    SWITCH_BB->succ->succ_next to original destination of HEADER_EDGE.
     Returns newly created loop.  */
  struct loop *
! loopify (struct loops *loops, edge latch_edge, edge header_edge, basic_block switch_bb)
  {
    basic_block succ_bb = latch_edge->dest;
    basic_block pred_bb = header_edge->src;
--- 480,492 ----
     accordingly. Everything between them plus LATCH_EDGE destination must
     be dominated by HEADER_EDGE destination, and back-reachable from
     LATCH_EDGE source.  HEADER_EDGE is redirected to basic block SWITCH_BB,
!    FALLTHRU_EDGE (SWITCH_BB) to original destination of HEADER_EDGE and
!    BRANCH_EDGE (SWITCH_BB) to original destination of LATCH_EDGE.
     Returns newly created loop.  */
+ 
  struct loop *
! loopify (struct loops *loops, edge latch_edge, edge header_edge, 
! 	 basic_block switch_bb)
  {
    basic_block succ_bb = latch_edge->dest;
    basic_block pred_bb = header_edge->src;
*************** loopify (struct loops *loops, edge latch
*** 509,521 ****
  
    /* Redirect edges.  */
    loop_redirect_edge (latch_edge, loop->header);
    loop_redirect_edge (header_edge, switch_bb);
!   loop_redirect_edge (switch_bb->succ->succ_next, loop->header);
!   loop_redirect_edge (switch_bb->succ, succ_bb);
  
    /* Update dominators.  */
    set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
    set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
    set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
  
    /* Compute new loop.  */
--- 511,525 ----
  
    /* Redirect edges.  */
    loop_redirect_edge (latch_edge, loop->header);
+   loop_redirect_edge (BRANCH_EDGE (switch_bb), succ_bb);
+ 
    loop_redirect_edge (header_edge, switch_bb);
!   loop_redirect_edge (FALLTHRU_EDGE (switch_bb), loop->header); 
  
    /* Update dominators.  */
    set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
    set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
+ 
    set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
  
    /* Compute new loop.  */
Index: expr.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/expr.c,v
retrieving revision 1.623
diff -c -3 -p -r1.623 expr.c
*** expr.c	12 Feb 2004 18:25:09 -0000	1.623
--- expr.c	15 Feb 2004 22:48:09 -0000
*************** force_operand (rtx value, rtx target)
*** 5588,5593 ****
--- 5588,5607 ----
    rtx subtarget = get_subtarget (target);
    enum rtx_code code = GET_CODE (value);
  
+   /* Check for subreg applied to an expression produced by loop optimizer.  */
+   if (code == SUBREG
+       && GET_CODE (SUBREG_REG (value)) != REG
+       && GET_CODE (SUBREG_REG (value)) != MEM)
+     {
+       value = simplify_gen_subreg (GET_MODE (value),
+ 				   force_reg (GET_MODE (SUBREG_REG (value)),
+ 					      force_operand (SUBREG_REG (value),
+ 							     NULL_RTX)),
+ 				   GET_MODE (SUBREG_REG (value)),
+ 				   SUBREG_BYTE (value));
+       code = GET_CODE (value);
+     }
+ 
    /* Check for a PIC address load.  */
    if ((code == PLUS || code == MINUS)
        && XEXP (value, 0) == pic_offset_table_rtx
Index: loop-init.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/loop-init.c,v
retrieving revision 1.7
diff -c -3 -p -r1.7 loop-init.c
*** loop-init.c	30 Dec 2003 10:40:54 -0000	1.7
--- loop-init.c	15 Feb 2004 22:48:09 -0000
*************** loop_optimizer_init (FILE *dumpfile)
*** 36,44 ****
    struct loops *loops = xcalloc (1, sizeof (struct loops));
    edge e;
  
-   /* Initialize structures for layout changes.  */
-   cfg_layout_initialize ();
- 
    /* Avoid annoying special cases of edges going to exit
       block.  */
    for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
--- 36,41 ----
*************** loop_optimizer_init (FILE *dumpfile)
*** 49,66 ****
  
    if (flow_loops_find (loops, LOOP_TREE) <= 1)
      {
-       basic_block bb;
- 
        /* No loops.  */
        flow_loops_free (loops);
        free_dominance_info (CDI_DOMINATORS);
        free (loops);
  
-       /* Make chain.  */
-       FOR_EACH_BB (bb)
- 	if (bb->next_bb != EXIT_BLOCK_PTR)
- 	  bb->rbi->next = bb->next_bb;
- 	  cfg_layout_finalize ();
        return NULL;
      }
  
--- 46,56 ----
*************** loop_optimizer_init (FILE *dumpfile)
*** 94,106 ****
  void
  loop_optimizer_finalize (struct loops *loops, FILE *dumpfile)
  {
!   basic_block bb;
  
!   /* Finalize layout changes.  */
!   /* Make chain.  */
!   FOR_EACH_BB (bb)
!     if (bb->next_bb != EXIT_BLOCK_PTR)
!       bb->rbi->next = bb->next_bb;
  
    /* Another dump.  */
    flow_loops_dump (loops, dumpfile, NULL, 1);
--- 84,97 ----
  void
  loop_optimizer_finalize (struct loops *loops, FILE *dumpfile)
  {
!   unsigned i;
  
!   if (!loops)
!     return;
! 
!   for (i = 1; i < loops->num; i++)
!     if (loops->parray[i])
!       free_simple_loop_desc (loops->parray[i]);
  
    /* Another dump.  */
    flow_loops_dump (loops, dumpfile, NULL, 1);
*************** loop_optimizer_finalize (struct loops *l
*** 109,117 ****
    flow_loops_free (loops);
    free_dominance_info (CDI_DOMINATORS);
    free (loops);
- 
-   /* Finalize changes.  */
-   cfg_layout_finalize ();
  
    /* Checking.  */
  #ifdef ENABLE_CHECKING
--- 100,105 ----
Index: loop-iv.c
===================================================================
RCS file: loop-iv.c
diff -N loop-iv.c
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- loop-iv.c	15 Feb 2004 22:48:09 -0000
***************
*** 0 ****
--- 1,2492 ----
+ /* Rtl-level induction variable analysis.
+    Copyright (C) 2004 Free Software Foundation, Inc.
+    
+ This file is part of GCC.
+    
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+    
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ for more details.
+    
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING.  If not, write to the Free
+ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA.  */
+ 
+ /* This is just a very simplistic analysis of induction variables of the loop.
+    The major use is for determining the number of iterations of a loop for
+    loop unrolling, doloop optimization and branch prediction.  For this we
+    are only interested in bivs and a fairly limited set of givs that are
+    needed in the exit condition.  We also only compute the iv information on
+    demand.
+ 
+    The interesting registers are determined.  A register is interesting if
+ 
+    -- it is set only in the blocks that dominate the latch of the current loop
+    -- all its sets are simple -- i.e. in the form we understand
+ 
+    We also number the insns sequentially in each basic block.  For a use of the
+    interesting reg, it is now easy to find a reaching definition (there may be
+    only one).
+ 
+    Induction variable is then simply analysed by walking the use-def
+    chains.
+    
+    Usage:
+ 
+    iv_analysis_loop_init (loop);
+    insn = iv_get_reaching_def (where, reg);
+    if (iv_analyse (insn, reg, &iv))
+      {
+        ...
+      }
+    iv_analysis_done (); */
+ 
+ #include "config.h"
+ #include "system.h"
+ #include "coretypes.h"
+ #include "tm.h"
+ #include "rtl.h"
+ #include "hard-reg-set.h"
+ #include "basic-block.h"
+ #include "cfgloop.h"
+ #include "expr.h"
+ #include "output.h"
+ 
+ /* The insn information.  */
+ 
+ struct insn_info
+ {
+   /* Id of the insn.  */
+   unsigned luid;
+ 
+   /* The previous definition of the register defined by the single
+      set in the insn.  */
+   rtx prev_def;
+ 
+   /* The description of the iv.  */
+   struct rtx_iv iv;
+ };
+ 
+ static struct insn_info *insn_info;
+ 
+ /* The last definition of register.  */
+ 
+ static rtx *last_def;
+ 
+ /* The bivs.  */
+ 
+ static struct rtx_iv *bivs;
+ 
+ /* Maximal insn number for that there is place in insn_info array.  */
+ 
+ static unsigned max_insn_no;
+ 
+ /* Maximal register number for that there is place in bivs and last_def
+    arrays.  */
+ 
+ static unsigned max_reg_no;
+ 
+ /* Dumps information about IV to FILE.  */
+ 
+ extern void dump_iv_info (FILE *, struct rtx_iv *);
+ void
+ dump_iv_info (FILE *file, struct rtx_iv *iv)
+ {
+   if (!iv->base)
+     {
+       fprintf (file, "not simple");
+       return;
+     }
+ 
+   if (iv->step == const0_rtx)
+     {
+       fprintf (file, "invariant ");
+       print_rtl (file, iv->base);
+       return;
+     }
+ 
+   print_rtl (file, iv->base);
+   fprintf (file, " + ");
+   print_rtl (file, iv->step);
+   fprintf (file, " * iteration");
+   fprintf (file, " (in %s)", GET_MODE_NAME (iv->mode));
+ 
+   if (iv->mode != iv->extend_mode)
+     fprintf (file, " %s to %s",
+ 	     rtx_name[iv->extend],
+ 	     GET_MODE_NAME (iv->extend_mode));
+ 
+   if (iv->mult != const1_rtx)
+     {
+       fprintf (file, " * ");
+       print_rtl (file, iv->mult);
+     }
+   if (iv->delta != const0_rtx)
+     {
+       fprintf (file, " + ");
+       print_rtl (file, iv->delta);
+     }
+   if (iv->first_special)
+     fprintf (file, " (first special)");
+ }
+ 
+ /* Assigns luids to insns in basic block BB.  */
+ 
+ static void
+ assign_luids (basic_block bb)
+ {
+   unsigned i = 0, uid;
+   rtx insn;
+ 
+   FOR_BB_INSNS (bb, insn)
+     {
+       uid = INSN_UID (insn);
+       insn_info[uid].luid = i++;
+       insn_info[uid].prev_def = NULL_RTX;
+       insn_info[uid].iv.analysed = false;
+     }
+ }
+ 
+ /* Returns the byte for taking the least significant OUTER_MODE subreg of
+    INNER_MODE.  */
+ 
+ static unsigned
+ lowpart_byte (enum machine_mode outer_mode, enum machine_mode inner_mode)
+ {
+   unsigned offset = 0;
+ 
+   if (WORDS_BIG_ENDIAN)
+     offset = GET_MODE_SIZE (inner_mode) - GET_MODE_SIZE (outer_mode);
+ 
+   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN
+       && GET_MODE_SIZE (outer_mode) < UNITS_PER_WORD)
+     {
+       /* We must swap the positions inside the word.  */
+       offset = (offset + UNITS_PER_WORD - GET_MODE_SIZE (outer_mode)
+ 		- 2 * (offset % UNITS_PER_WORD));
+     }
+ 
+   return offset;
+ }
+ 
+ /* Checks whether subreg EXPR takes the least significant part of EXPR.  */
+ 
+ static bool
+ lowpart_subreg_p (rtx expr)
+ {
+   return SUBREG_BYTE (expr) == lowpart_byte (GET_MODE (expr),
+ 					     GET_MODE (SUBREG_REG (expr)));
+ }
+ 
+ /* Generates a subreg to get the least significant part of EXPR (in mode
+    INNER_MODE) to OUTER_MODE.  */
+ 
+ static rtx
+ lowpart_subreg (enum machine_mode outer_mode, rtx expr,
+ 		enum machine_mode inner_mode)
+ {
+   return simplify_gen_subreg (outer_mode, expr, inner_mode,
+ 			      lowpart_byte (outer_mode, inner_mode));
+ }
+ 
+ /* Checks whether REG is a well-behaved register.  */
+ 
+ static bool
+ simple_reg_p (rtx reg)
+ {
+   unsigned r;
+ 
+   if (GET_CODE (reg) == SUBREG)
+     {
+       if (!lowpart_subreg_p (reg))
+ 	return false;
+       reg = SUBREG_REG (reg);
+     }
+ 
+   if (!REG_P (reg))
+     return false;
+ 
+   r = REGNO (reg);
+   if (HARD_REGISTER_NUM_P (r))
+     return false;
+ 
+   if (GET_MODE_CLASS (GET_MODE (reg)) != MODE_INT)
+     return false;
+ 
+   if (last_def[r] == const0_rtx)
+     return false;
+ 
+   return true;
+ }
+ 
+ /* Checks whether assignment LHS = RHS is simple enough for us to process.  */
+ 
+ static bool
+ simple_set_p (rtx lhs, rtx rhs)
+ {
+   rtx op0, op1;
+ 
+   if (!REG_P (lhs)
+       || !simple_reg_p (lhs))
+     return false;
+ 
+   if (CONSTANT_P (rhs))
+     return true;
+ 
+   switch (GET_CODE (rhs))
+     {
+     case SUBREG:
+     case REG:
+       return simple_reg_p (rhs);
+ 
+     case SIGN_EXTEND:
+     case ZERO_EXTEND:
+     case NEG:
+       return simple_reg_p (XEXP (rhs, 0));
+ 
+     case PLUS:
+     case MINUS:
+     case MULT:
+       op0 = XEXP (rhs, 0);
+       op1 = XEXP (rhs, 1);
+ 
+       if (!simple_reg_p (op0)
+ 	  && !CONSTANT_P (op0))
+ 	return false;
+ 
+       if (!simple_reg_p (op1)
+ 	  && !CONSTANT_P (op1))
+ 	return false;
+ 
+       if (GET_CODE (rhs) == MULT
+ 	  && !CONSTANT_P (op0)
+ 	  && !CONSTANT_P (op1))
+ 	return false;
+ 
+       return true;
+ 
+     default:
+       return false;
+     }
+ }
+ 
+ /* Mark single SET in INSN.  */
+ 
+ static rtx
+ mark_single_set (rtx insn, rtx set)
+ {
+   rtx def = SET_DEST (set), src;
+   unsigned regno, uid;
+ 
+   src = find_reg_equal_equiv_note (insn);
+   if (!src)
+     src = SET_SRC (set);
+ 
+   if (!simple_set_p (SET_DEST (set), src))
+     return NULL_RTX;
+ 
+   regno = REGNO (def);
+   uid = INSN_UID (insn);
+ 
+   bivs[regno].analysed = false;
+   insn_info[uid].prev_def = last_def[regno];
+   last_def[regno] = insn;
+ 
+   return def;
+ }
+ 
+ /* Marks set register REG as wrong, unless it is equal to EXCEPT.  */
+ 
+ static void
+ kill_sets (rtx reg, rtx by ATTRIBUTE_UNUSED, void *except)
+ {
+   if (GET_CODE (reg) == SUBREG)
+     reg = SUBREG_REG (reg);
+   if (!REG_P (reg))
+     return;
+   if (reg == except)
+     return;
+ 
+   last_def[REGNO (reg)] = const0_rtx;
+ }
+ 
+ /* Marks sets in basic block BB.  If DOM is true, BB dominates the loop
+    latch.  */
+ 
+ static void
+ mark_sets (basic_block bb, bool dom)
+ {
+   rtx insn, set, def;
+ 
+   FOR_BB_INSNS (bb, insn)
+     {
+       if (!INSN_P (insn))
+ 	continue;
+ 
+       if (dom
+ 	  && (set = single_set (insn)))
+ 	def = mark_single_set (insn, set);
+       else
+ 	def = NULL_RTX;
+ 
+       note_stores (PATTERN (insn), kill_sets, def);
+     }
+ }
+ 
+ /* Prepare the data for an induction variable analysis of a LOOP.  */
+ 
+ void
+ iv_analysis_loop_init (struct loop *loop)
+ {
+   basic_block *body = get_loop_body_in_dom_order (loop);
+   unsigned b;
+ 
+   if ((unsigned) get_max_uid () >= max_insn_no)
+     {
+       /* Add some reserve for insns and registers produced in optimizations.  */
+       max_insn_no = get_max_uid () + 100;
+       if (insn_info)
+ 	free (insn_info);
+       insn_info = xmalloc (max_insn_no * sizeof (struct insn_info));
+     }
+ 
+   if ((unsigned) max_reg_num () >= max_reg_no)
+     {
+       max_reg_no = max_reg_num () + 100;
+       if (last_def)
+ 	free (last_def);
+       last_def = xmalloc (max_reg_no * sizeof (rtx));
+       if (bivs)
+ 	free (bivs);
+       bivs = xmalloc (max_reg_no * sizeof (struct rtx_iv));
+     }
+ 
+   memset (last_def, 0, max_reg_num () * sizeof (rtx));
+ 
+   for (b = 0; b < loop->num_nodes; b++)
+     {
+       assign_luids (body[b]);
+       mark_sets (body[b], just_once_each_iteration_p (loop, body[b]));
+     }
+ 
+   free (body);
+ }
+ 
+ /* Gets definition of REG reaching the INSN.  If REG is not simple, const0_rtx
+    is returned.  If INSN is before the first def in the loop, NULL_RTX is
+    returned.  */
+ 
+ rtx
+ iv_get_reaching_def (rtx insn, rtx reg)
+ {
+   unsigned regno, luid, auid;
+   rtx ainsn;
+   basic_block bb, abb;
+ 
+   if (GET_CODE (reg) == SUBREG)
+     {
+       if (!lowpart_subreg_p (reg))
+ 	return const0_rtx;
+       reg = SUBREG_REG (reg);
+     }
+   if (!REG_P (reg))
+     return NULL_RTX;
+ 
+   regno = REGNO (reg);
+   if (!last_def[regno]
+       || last_def[regno] == const0_rtx)
+     return last_def[regno];
+ 
+   bb = BLOCK_FOR_INSN (insn);
+   luid = insn_info[INSN_UID (insn)].luid;
+ 
+   ainsn = last_def[regno];
+   while (1)
+     {
+       abb = BLOCK_FOR_INSN (ainsn);
+ 
+       if (dominated_by_p (CDI_DOMINATORS, bb, abb))
+ 	break;
+ 
+       auid = INSN_UID (ainsn);
+       ainsn = insn_info[auid].prev_def;
+ 
+       if (!ainsn)
+ 	return NULL_RTX;
+     }
+ 
+   while (1)
+     {
+       abb = BLOCK_FOR_INSN (ainsn);
+       if (abb != bb)
+ 	return ainsn;
+ 
+       auid = INSN_UID (ainsn);
+       if (luid > insn_info[auid].luid)
+ 	return ainsn;
+ 
+       ainsn = insn_info[auid].prev_def;
+       if (!ainsn)
+ 	return NULL_RTX;
+     }
+ }
+ 
+ /* Sets IV to invariant CST in MODE.  Always returns true (just for
+    consistency with other iv manipulation functions that may fail).  */
+ 
+ static bool
+ iv_constant (struct rtx_iv *iv, rtx cst, enum machine_mode mode)
+ {
+   if (mode == VOIDmode)
+     mode = GET_MODE (cst);
+ 
+   iv->analysed = true;
+   iv->mode = mode;
+   iv->base = cst;
+   iv->step = const0_rtx;
+   iv->first_special = false;
+   iv->extend = NIL;
+   iv->extend_mode = iv->mode;
+   iv->delta = const0_rtx;
+   iv->mult = const1_rtx;
+ 
+   return true;
+ }
+ 
+ /* Evaluates application of subreg to MODE on IV.  */
+ 
+ static bool
+ iv_subreg (struct rtx_iv *iv, enum machine_mode mode)
+ {
+   if (iv->extend_mode == mode)
+     return true;
+ 
+   if (GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (iv->mode))
+     return false;
+ 
+   iv->extend = NIL;
+   iv->mode = mode;
+ 
+   iv->base = simplify_gen_binary (PLUS, iv->extend_mode, iv->delta,
+ 				  simplify_gen_binary (MULT, iv->extend_mode,
+ 						       iv->base, iv->mult));
+   iv->step = simplify_gen_binary (MULT, iv->extend_mode, iv->step, iv->mult);
+   iv->mult = const1_rtx;
+   iv->delta = const0_rtx;
+   iv->first_special = false;
+ 
+   return true;
+ }
+ 
+ /* Evaluates application of EXTEND to MODE on IV.  */
+ 
+ static bool
+ iv_extend (struct rtx_iv *iv, enum rtx_code extend, enum machine_mode mode)
+ {
+   if (mode != iv->extend_mode)
+     return false;
+ 
+   if (iv->extend != NIL
+       && iv->extend != extend)
+     return false;
+ 
+   iv->extend = extend;
+ 
+   return true;
+ }
+ 
+ /* Evaluates negation of IV.  */
+ 
+ static bool
+ iv_neg (struct rtx_iv *iv)
+ {
+   if (iv->extend == NIL)
+     {
+       iv->base = simplify_gen_unary (NEG, iv->extend_mode,
+ 				     iv->base, iv->extend_mode);
+       iv->step = simplify_gen_unary (NEG, iv->extend_mode,
+ 				     iv->step, iv->extend_mode);
+     }
+   else
+     {
+       iv->delta = simplify_gen_unary (NEG, iv->extend_mode,
+ 				      iv->delta, iv->extend_mode);
+       iv->mult = simplify_gen_unary (NEG, iv->extend_mode,
+ 				     iv->mult, iv->extend_mode);
+     }
+ 
+   return true;
+ }
+ 
+ /* Evaluates addition or subtraction (according to OP) of IV1 to IV0.  */
+ 
+ static bool
+ iv_add (struct rtx_iv *iv0, struct rtx_iv *iv1, enum rtx_code op)
+ {
+   enum machine_mode mode;
+   rtx arg;
+ 
+   /* Extend the constant to extend_mode of the other operand if neccesary.  */
+   if (iv0->extend == NIL
+       && iv0->mode == iv0->extend_mode
+       && iv0->step == const0_rtx
+       && GET_MODE_SIZE (iv0->extend_mode) < GET_MODE_SIZE (iv1->extend_mode))
+     {
+       iv0->extend_mode = iv1->extend_mode;
+       iv0->base = simplify_gen_unary (ZERO_EXTEND, iv0->extend_mode,
+ 				      iv0->base, iv0->mode);
+     }
+   if (iv1->extend == NIL
+       && iv1->mode == iv1->extend_mode
+       && iv1->step == const0_rtx
+       && GET_MODE_SIZE (iv1->extend_mode) < GET_MODE_SIZE (iv0->extend_mode))
+     {
+       iv1->extend_mode = iv0->extend_mode;
+       iv1->base = simplify_gen_unary (ZERO_EXTEND, iv1->extend_mode,
+ 				      iv1->base, iv1->mode);
+     }
+ 
+   mode = iv0->extend_mode;
+   if (mode != iv1->extend_mode)
+     return false;
+ 
+   if (iv0->extend == NIL && iv1->extend == NIL)
+     {
+       if (iv0->mode != iv1->mode)
+ 	return false;
+ 
+       iv0->base = simplify_gen_binary (op, mode, iv0->base, iv1->base);
+       iv0->step = simplify_gen_binary (op, mode, iv0->step, iv1->step);
+ 
+       return true;
+     }
+ 
+   /* Handle addition of constant.  */
+   if (iv1->extend == NIL
+       && iv1->mode == mode
+       && iv1->step == const0_rtx)
+     {
+       iv0->delta = simplify_gen_binary (op, mode, iv0->delta, iv1->base);
+       return true;
+     }
+ 
+   if (iv0->extend == NIL
+       && iv0->mode == mode
+       && iv0->step == const0_rtx)
+     {
+       arg = iv0->base;
+       *iv0 = *iv1;
+       if (op == MINUS
+ 	  && !iv_neg (iv0))
+ 	return false;
+ 
+       iv0->delta = simplify_gen_binary (PLUS, mode, iv0->delta, arg);
+       return true;
+     }
+ 
+   return false;
+ }
+ 
+ /* Evaluates multiplication of IV by constant CST.  */
+ 
+ static bool
+ iv_mult (struct rtx_iv *iv, rtx mby)
+ {
+   enum machine_mode mode = iv->extend_mode;
+ 
+   if (GET_MODE (mby) != VOIDmode
+       && GET_MODE (mby) != mode)
+     return false;
+ 
+   if (iv->extend == NIL)
+     {
+       iv->base = simplify_gen_binary (MULT, mode, iv->base, mby);
+       iv->step = simplify_gen_binary (MULT, mode, iv->step, mby);
+     }
+   else
+     {
+       iv->delta = simplify_gen_binary (MULT, mode, iv->delta, mby);
+       iv->mult = simplify_gen_binary (MULT, mode, iv->mult, mby);
+     }
+ 
+   return true;
+ }
+ 
+ /* The recursive part of get_biv_step.  Gets the value of the single value
+    defined in INSN wrto initial value of REG inside loop, in shape described
+    at get_biv_step.  */
+ 
+ static bool
+ get_biv_step_1 (rtx insn, rtx reg,
+ 		rtx *inner_step, enum machine_mode *inner_mode,
+ 		enum rtx_code *extend, enum machine_mode outer_mode,
+ 		rtx *outer_step)
+ {
+   rtx set, lhs, rhs, op0 = NULL_RTX, op1 = NULL_RTX;
+   rtx next, nextr, def_insn, tmp;
+   enum rtx_code code;
+ 
+   set = single_set (insn);
+   rhs = find_reg_equal_equiv_note (insn);
+   if (!rhs)
+     rhs = SET_SRC (set);
+   lhs = SET_DEST (set);
+ 
+   code = GET_CODE (rhs);
+   switch (code)
+     {
+     case SUBREG:
+     case REG:
+       next = rhs;
+       break;
+ 
+     case PLUS:
+     case MINUS:
+       op0 = XEXP (rhs, 0);
+       op1 = XEXP (rhs, 1);
+ 
+       if (code == PLUS && CONSTANT_P (op0))
+ 	{
+ 	  tmp = op0; op0 = op1; op1 = tmp;
+ 	}
+ 
+       if (!simple_reg_p (op0)
+ 	  || !CONSTANT_P (op1))
+ 	return false;
+ 
+       if (GET_MODE (rhs) != outer_mode)
+ 	{
+ 	  /* ppc64 uses expressions like
+ 
+ 	     (set x:SI (plus:SI (subreg:SI y:DI) 1)).
+ 
+ 	     this is equivalent to
+ 
+ 	     (set x':DI (plus:DI y:DI 1))
+ 	     (set x:SI (subreg:SI (x':DI)).  */
+ 	  if (GET_CODE (op0) != SUBREG)
+ 	    return false;
+ 	  if (GET_MODE (SUBREG_REG (op0)) != outer_mode)
+ 	    return false;
+ 	}
+ 
+       next = op0;
+       break;
+ 
+     case SIGN_EXTEND:
+     case ZERO_EXTEND:
+       if (GET_MODE (rhs) != outer_mode)
+ 	return false;
+ 
+       op0 = XEXP (rhs, 0);
+       if (!simple_reg_p (op0))
+ 	return false;
+ 
+       next = op0;
+       break;
+ 
+     default:
+       return false;
+     }
+ 
+   if (GET_CODE (next) == SUBREG)
+     {
+       if (!lowpart_subreg_p (next))
+ 	return false;
+ 
+       nextr = SUBREG_REG (next);
+       if (GET_MODE (nextr) != outer_mode)
+ 	return false;
+     }
+   else
+     nextr = next;
+ 
+   def_insn = iv_get_reaching_def (insn, nextr);
+   if (def_insn == const0_rtx)
+     return false;
+ 
+   if (!def_insn)
+     {
+       if (!rtx_equal_p (nextr, reg))
+ 	return false;
+ 
+       *inner_step = const0_rtx;
+       *extend = NIL;
+       *inner_mode = outer_mode;
+       *outer_step = const0_rtx;
+     }
+   else if (!get_biv_step_1 (def_insn, reg,
+ 			    inner_step, inner_mode, extend, outer_mode,
+ 			    outer_step))
+     return false;
+ 
+   if (GET_CODE (next) == SUBREG)
+     {
+       enum machine_mode amode = GET_MODE (next);
+ 
+       if (GET_MODE_SIZE (amode) > GET_MODE_SIZE (*inner_mode))
+ 	return false;
+ 
+       *inner_mode = amode;
+       *inner_step = simplify_gen_binary (PLUS, outer_mode,
+ 					 *inner_step, *outer_step);
+       *outer_step = const0_rtx;
+       *extend = NIL;
+     }
+ 
+   switch (code)
+     {
+     case REG:
+     case SUBREG:
+       break;
+ 
+     case PLUS:
+     case MINUS:
+       if (*inner_mode == outer_mode
+ 	  /* See comment in previous switch.  */
+ 	  || GET_MODE (rhs) != outer_mode)
+ 	*inner_step = simplify_gen_binary (code, outer_mode,
+ 					   *inner_step, op1);
+       else
+ 	*outer_step = simplify_gen_binary (code, outer_mode,
+ 					   *outer_step, op1);
+       break;
+ 
+     case SIGN_EXTEND:
+     case ZERO_EXTEND:
+       if (GET_MODE (op0) != *inner_mode
+ 	  || *extend != NIL
+ 	  || *outer_step != const0_rtx)
+ 	abort ();
+ 
+       *extend = code;
+       break;
+ 
+     default:
+       abort ();
+     }
+ 
+   return true;
+ }
+ 
+ /* Gets the operation on register REG inside loop, in shape
+ 
+    OUTER_STEP + EXTEND_{OUTER_MODE} (SUBREG_{INNER_MODE} (REG + INNER_STEP))
+ 
+    If the operation cannot be described in this shape, return false.  */
+ 
+ static bool
+ get_biv_step (rtx reg, rtx *inner_step, enum machine_mode *inner_mode,
+ 	      enum rtx_code *extend, enum machine_mode *outer_mode,
+ 	      rtx *outer_step)
+ {
+   *outer_mode = GET_MODE (reg);
+ 
+   if (!get_biv_step_1 (last_def[REGNO (reg)], reg,
+ 		       inner_step, inner_mode, extend, *outer_mode,
+ 		       outer_step))
+     return false;
+ 
+   if (*inner_mode != *outer_mode
+       && *extend == NIL)
+     abort ();
+ 
+   if (*inner_mode == *outer_mode
+       && *extend != NIL)
+     abort ();
+ 
+   if (*inner_mode == *outer_mode
+       && *outer_step != const0_rtx)
+     abort ();
+ 
+   return true;
+ }
+ 
+ /* Determines whether DEF is a biv and if so, stores its description
+    to *IV.  */
+ 
+ static bool
+ iv_analyse_biv (rtx def, struct rtx_iv *iv)
+ {
+   unsigned regno;
+   rtx inner_step, outer_step;
+   enum machine_mode inner_mode, outer_mode;
+   enum rtx_code extend;
+ 
+   if (rtl_dump_file)
+     {
+       fprintf (rtl_dump_file, "Analysing ");
+       print_rtl (rtl_dump_file, def);
+       fprintf (rtl_dump_file, " for bivness.\n");
+     }
+     
+   if (!REG_P (def))
+     {
+       if (!CONSTANT_P (def))
+ 	return false;
+ 
+       return iv_constant (iv, def, VOIDmode);
+     }
+ 
+   regno = REGNO (def);
+   if (last_def[regno] == const0_rtx)
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, "  not simple.\n");
+       return false;
+     }
+ 
+   if (last_def[regno] && bivs[regno].analysed)
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, "  already analysed.\n");
+ 
+       *iv = bivs[regno];
+       return iv->base != NULL_RTX;
+     }
+ 
+   if (!last_def[regno])
+     {
+       iv_constant (iv, def, VOIDmode);
+       goto end;
+     }
+ 
+   iv->analysed = true;
+   if (!get_biv_step (def, &inner_step, &inner_mode, &extend,
+ 		     &outer_mode, &outer_step))
+     {
+       iv->base = NULL_RTX;
+       goto end;
+     }
+ 
+   /* Loop transforms base to es (base + inner_step) + outer_step,
+      where es means extend of subreg between inner_mode and outer_mode.
+      The corresponding induction variable is
+ 
+      es ((base - outer_step) + i * (inner_step + outer_step)) + outer_step  */
+ 
+   iv->base = simplify_gen_binary (MINUS, outer_mode, def, outer_step);
+   iv->step = simplify_gen_binary (PLUS, outer_mode, inner_step, outer_step);
+   iv->mode = inner_mode;
+   iv->extend_mode = outer_mode;
+   iv->extend = extend;
+   iv->mult = const1_rtx;
+   iv->delta = outer_step;
+   iv->first_special = inner_mode != outer_mode;
+ 
+ end:
+   if (rtl_dump_file)
+     {
+       fprintf (rtl_dump_file, "  ");
+       dump_iv_info (rtl_dump_file, iv);
+       fprintf (rtl_dump_file, "\n");
+     }
+ 
+   bivs[regno] = *iv;
+ 
+   return iv->base != NULL_RTX;
+ }
+ 
+ /* Analyses operand OP of INSN and stores the result to *IV.  */
+ 
+ static bool
+ iv_analyse_op (rtx insn, rtx op, struct rtx_iv *iv)
+ {
+   rtx def_insn;
+   unsigned regno;
+   bool inv = CONSTANT_P (op);
+ 
+   if (rtl_dump_file)
+     {
+       fprintf (rtl_dump_file, "Analysing operand ");
+       print_rtl (rtl_dump_file, op);
+       fprintf (rtl_dump_file, " of insn ");
+       print_rtl_single (rtl_dump_file, insn);
+     }
+ 
+   if (GET_CODE (op) == SUBREG)
+     {
+       if (!lowpart_subreg_p (op))
+ 	return false;
+ 
+       if (!iv_analyse_op (insn, SUBREG_REG (op), iv))
+ 	return false;
+ 
+       return iv_subreg (iv, GET_MODE (op));
+     }
+ 
+   if (!inv)
+     {
+       regno = REGNO (op);
+       if (!last_def[regno])
+ 	inv = true;
+       else if (last_def[regno] == const0_rtx)
+ 	{
+ 	  if (rtl_dump_file)
+ 	    fprintf (rtl_dump_file, "  not simple.\n");
+ 	  return false;
+ 	}
+     }
+ 
+   if (inv)
+     {
+       iv_constant (iv, op, VOIDmode);
+ 
+       if (rtl_dump_file)
+ 	{
+ 	  fprintf (rtl_dump_file, "  ");
+ 	  dump_iv_info (rtl_dump_file, iv);
+ 	  fprintf (rtl_dump_file, "\n");
+ 	}
+       return true;
+     }
+ 
+   def_insn = iv_get_reaching_def (insn, op);
+   if (def_insn == const0_rtx)
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, "  not simple.\n");
+       return false;
+     }
+ 
+   return iv_analyse (def_insn, op, iv);
+ }
+ 
+ /* Analyses iv DEF defined in INSN and stores the result to *IV.  */
+ 
+ bool
+ iv_analyse (rtx insn, rtx def, struct rtx_iv *iv)
+ {
+   unsigned uid;
+   rtx set, rhs, mby = NULL_RTX, tmp;
+   rtx op0 = NULL_RTX, op1 = NULL_RTX;
+   struct rtx_iv iv0, iv1;
+   enum machine_mode amode;
+   enum rtx_code code;
+ 
+   if (insn == const0_rtx)
+     return false;
+ 
+   if (GET_CODE (def) == SUBREG)
+     {
+       if (!lowpart_subreg_p (def))
+ 	return false;
+ 
+       if (!iv_analyse (insn, SUBREG_REG (def), iv))
+ 	return false;
+ 
+       return iv_subreg (iv, GET_MODE (def));
+     }
+ 
+   if (!insn)
+     return iv_analyse_biv (def, iv);
+ 
+   if (rtl_dump_file)
+     {
+       fprintf (rtl_dump_file, "Analysing def of ");
+       print_rtl (rtl_dump_file, def);
+       fprintf (rtl_dump_file, " in insn ");
+       print_rtl_single (rtl_dump_file, insn);
+     }
+ 
+   uid = INSN_UID (insn);
+   if (insn_info[uid].iv.analysed)
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, "  already analysed.\n");
+       *iv = insn_info[uid].iv;
+       return iv->base != NULL_RTX;
+     }
+ 
+   iv->mode = VOIDmode;
+   iv->base = NULL_RTX;
+   iv->step = NULL_RTX;
+ 
+   set = single_set (insn);
+   rhs = find_reg_equal_equiv_note (insn);
+   if (!rhs)
+     rhs = SET_SRC (set);
+   code = GET_CODE (rhs);
+ 
+   if (CONSTANT_P (rhs))
+     {
+       op0 = rhs;
+       amode = GET_MODE (def);
+     }
+   else
+     {
+       switch (code)
+ 	{
+ 	case SUBREG:
+ 	  if (!lowpart_subreg_p (rhs))
+ 	    goto end;
+ 	  op0 = rhs;
+ 	  break;
+ 	  
+ 	case REG:
+ 	  op0 = rhs;
+ 	  break;
+ 
+ 	case SIGN_EXTEND:
+ 	case ZERO_EXTEND:
+ 	case NEG:
+ 	  op0 = XEXP (rhs, 0);
+ 	  break;
+ 
+ 	case PLUS:
+ 	case MINUS:
+ 	  op0 = XEXP (rhs, 0);
+ 	  op1 = XEXP (rhs, 1);
+ 	  break;
+ 
+ 	case MULT:
+ 	  op0 = XEXP (rhs, 0);
+ 	  mby = XEXP (rhs, 1);
+ 	  if (!CONSTANT_P (mby))
+ 	    {
+ 	      if (!CONSTANT_P (op0))
+ 		abort ();
+ 	      tmp = op0;
+ 	      op0 = mby;
+ 	      mby = tmp;
+ 	    }
+ 	  break;
+ 	    
+ 	default:
+ 	  abort ();
+ 	}
+ 
+       amode = GET_MODE (rhs);
+     }
+ 
+   if (op0)
+     {
+       if (!iv_analyse_op (insn, op0, &iv0))
+ 	goto end;
+ 	
+       if (iv0.mode == VOIDmode)
+ 	{
+ 	  iv0.mode = amode;
+ 	  iv0.extend_mode = amode;
+ 	}
+     }
+ 
+   if (op1)
+     {
+       if (!iv_analyse_op (insn, op1, &iv1))
+ 	goto end;
+ 
+       if (iv1.mode == VOIDmode)
+ 	{
+ 	  iv1.mode = amode;
+ 	  iv1.extend_mode = amode;
+ 	}
+     }
+ 
+   switch (code)
+     {
+     case SIGN_EXTEND:
+     case ZERO_EXTEND:
+       if (!iv_extend (&iv0, code, amode))
+ 	goto end;
+       break;
+ 
+     case NEG:
+       if (!iv_neg (&iv0))
+ 	goto end;
+       break;
+ 
+     case PLUS:
+     case MINUS:
+       if (!iv_add (&iv0, &iv1, code))
+ 	goto end;
+       break;
+ 
+     case MULT:
+       if (!iv_mult (&iv0, mby))
+ 	goto end;
+       break;
+ 
+     default:
+       break;
+     }
+ 
+   *iv = iv0;
+ 
+ end:
+   iv->analysed = true;
+   insn_info[uid].iv = *iv;
+ 
+   if (rtl_dump_file)
+     {
+       print_rtl (rtl_dump_file, def);
+       fprintf (rtl_dump_file, " in insn ");
+       print_rtl_single (rtl_dump_file, insn);
+       fprintf (rtl_dump_file, "  is ");
+       dump_iv_info (rtl_dump_file, iv);
+       fprintf (rtl_dump_file, "\n");
+     }
+ 
+   return iv->base != NULL_RTX;
+ }
+ 
+ /* Calculates value of IV at ITERATION-th iteration.  */
+ 
+ rtx
+ get_iv_value (struct rtx_iv *iv, rtx iteration)
+ {
+   rtx val;
+ 
+   /* We would need to generate some if_then_else patterns, and so far
+      it is not needed anywhere.  */
+   if (iv->first_special)
+     abort ();
+ 
+   if (iv->step != const0_rtx && iteration != const0_rtx)
+     val = simplify_gen_binary (PLUS, iv->extend_mode, iv->base,
+ 			       simplify_gen_binary (MULT, iv->extend_mode,
+ 						    iv->step, iteration));
+   else
+     val = iv->base;
+ 
+   if (iv->extend_mode == iv->mode)
+     return val;
+ 
+   val = lowpart_subreg (iv->mode, val, iv->extend_mode);
+ 
+   if (iv->extend == NIL)
+     return val;
+ 
+   val = simplify_gen_unary (iv->extend, iv->extend_mode, val, iv->mode);
+   val = simplify_gen_binary (PLUS, iv->extend_mode, iv->delta,
+ 			     simplify_gen_binary (MULT, iv->extend_mode,
+ 						  iv->mult, val));
+ 
+   return val;
+ }
+ 
+ /* Free the data for an induction variable analysis.  */
+ 
+ void
+ iv_analysis_done (void)
+ {
+   max_insn_no = 0;
+   max_reg_no = 0;
+   if (insn_info)
+     {
+       free (insn_info);
+       insn_info = NULL;
+     }
+   if (last_def)
+     {
+       free (last_def);
+       last_def = NULL;
+     }
+   if (bivs)
+     {
+       free (bivs);
+       bivs = NULL;
+     }
+ }
+ 
+ /* Computes inverse to X modulo (1 << MOD).  */
+ 
+ static unsigned HOST_WIDEST_INT
+ inverse (unsigned HOST_WIDEST_INT x, int mod)
+ {
+   unsigned HOST_WIDEST_INT mask =
+ 	  ((unsigned HOST_WIDEST_INT) 1 << (mod - 1) << 1) - 1;
+   unsigned HOST_WIDEST_INT rslt = 1;
+   int i;
+ 
+   for (i = 0; i < mod - 1; i++)
+     {
+       rslt = (rslt * x) & mask;
+       x = (x * x) & mask;
+     }
+ 
+   return rslt;
+ }
+ 
+ /* Tries to estimate the maximum number of iterations.  */
+ 
+ static unsigned HOST_WIDEST_INT
+ determine_max_iter (struct niter_desc *desc)
+ {
+   rtx niter = desc->niter_expr;
+   rtx mmin, mmax, left, right;
+   unsigned HOST_WIDEST_INT nmax, inc;
+ 
+   if (GET_CODE (niter) == AND
+       && GET_CODE (XEXP (niter, 0)) == CONST_INT)
+     {
+       nmax = INTVAL (XEXP (niter, 0));
+       if (!(nmax & (nmax + 1)))
+ 	{
+ 	  desc->niter_max = nmax;
+ 	  return nmax;
+ 	}
+     }
+ 
+   get_mode_bounds (desc->mode, desc->signed_p, &mmin, &mmax);
+   nmax = INTVAL (mmax) - INTVAL (mmin);
+ 
+   if (GET_CODE (niter) == UDIV)
+     {
+       if (GET_CODE (XEXP (niter, 1)) != CONST_INT)
+ 	{
+ 	  desc->niter_max = nmax;
+ 	  return nmax;
+ 	}
+       inc = INTVAL (XEXP (niter, 1));
+       niter = XEXP (niter, 0);
+     }
+   else
+     inc = 1;
+ 
+   if (GET_CODE (niter) == PLUS)
+     {
+       left = XEXP (niter, 0);
+       right = XEXP (niter, 0);
+ 
+       if (GET_CODE (right) == CONST_INT)
+ 	right = GEN_INT (-INTVAL (right));
+     }
+   else if (GET_CODE (niter) == MINUS)
+     {
+       left = XEXP (niter, 0);
+       right = XEXP (niter, 0);
+     }
+   else
+     {
+       left = niter;
+       right = mmin;
+     }
+ 
+   if (GET_CODE (left) == CONST_INT)
+     mmax = left;
+   if (GET_CODE (right) == CONST_INT)
+     mmin = right;
+   nmax = INTVAL (mmax) - INTVAL (mmin);
+ 
+   desc->niter_max = nmax / inc;
+   return nmax / inc;
+ }
+ 
+ /* Checks whether register *REG is in set ALT.  Callback for for_each_rtx.  */
+ 
+ static int
+ altered_reg_used (rtx *reg, void *alt)
+ {
+   if (!REG_P (*reg))
+     return 0;
+ 
+   return REGNO_REG_SET_P (alt, REGNO (*reg));
+ }
+ 
+ /* Marks registers altered by EXPR in set ALT.  */
+ 
+ static void
+ mark_altered (rtx expr, rtx by ATTRIBUTE_UNUSED, void *alt)
+ {
+   if (GET_CODE (expr) == SUBREG)
+     expr = SUBREG_REG (expr);
+   if (!REG_P (expr))
+     return;
+ 
+   SET_REGNO_REG_SET (alt, REGNO (expr));
+ }
+ 
+ /* Checks whether RHS is simple enough to process.  */
+ 
+ static bool
+ simple_rhs_p (rtx rhs)
+ {
+   rtx op0, op1;
+ 
+   if (CONSTANT_P (rhs)
+       || REG_P (rhs))
+     return true;
+ 
+   switch (GET_CODE (rhs))
+     {
+     case PLUS:
+     case MINUS:
+       op0 = XEXP (rhs, 0);
+       op1 = XEXP (rhs, 1);
+       /* Allow reg + const sets only.  */
+       if (REG_P (op0) && CONSTANT_P (op1))
+ 	return true;
+       if (REG_P (op1) && CONSTANT_P (op0))
+ 	return true;
+ 
+       return false;
+ 
+     default:
+       return false;
+     }
+ }
+ 
+ /* Simplifies *EXPR using assignment in INSN.  ALTERED is the set of registers
+    altered so far.  */
+ 
+ static void
+ simplify_using_assignment (rtx insn, rtx *expr, regset altered)
+ {
+   rtx set = single_set (insn);
+   rtx lhs, rhs;
+   bool ret = false;
+ 
+   if (set)
+     {
+       lhs = SET_DEST (set);
+       if (GET_CODE (lhs) != REG
+ 	  || altered_reg_used (&lhs, altered))
+ 	ret = true;
+     }
+   else
+     ret = true;
+ 
+   note_stores (PATTERN (insn), mark_altered, altered);
+   if (GET_CODE (insn) == CALL_INSN)
+     {
+       int i;
+ 
+       /* Kill all call clobbered registers.  */
+       for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ 	if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i))
+ 	  SET_REGNO_REG_SET (altered, i);
+     }
+ 
+   if (ret)
+     return;
+ 
+   rhs = find_reg_equal_equiv_note (insn);
+   if (!rhs)
+     rhs = SET_SRC (set);
+ 
+   if (!simple_rhs_p (rhs))
+     return;
+ 
+   if (for_each_rtx (&rhs, altered_reg_used, altered))
+     return;
+ 
+   *expr = simplify_replace_rtx (*expr, lhs, rhs);
+ }
+ 
+ /* Checks whether A implies B.  */
+ 
+ static bool
+ implies_p (rtx a, rtx b)
+ {
+   rtx op0, op1, r;
+ 
+   if (GET_CODE (a) == EQ)
+     {
+       op0 = XEXP (a, 0);
+       op1 = XEXP (a, 1);
+ 
+       if (REG_P (op0))
+ 	{
+ 	  r = simplify_replace_rtx (b, op0, op1);
+ 	  if (r == const_true_rtx)
+ 	    return true;
+ 	}
+ 
+       if (REG_P (op1))
+ 	{
+ 	  r = simplify_replace_rtx (b, op1, op0);
+ 	  if (r == const_true_rtx)
+ 	    return true;
+ 	}
+     }
+ 
+   return false;
+ }
+ 
+ /* Canonicalizes COND so that
+ 
+    (1) If an operand is a constant, it will be the second operand.
+    (2) (LE x const) will be replaced with (LT x <const+1>) and similarly
+        for GE, GEU, and LEU.  */
+ 
+ rtx
+ canon_condition (rtx cond)
+ {
+   rtx tem;
+   rtx op0, op1;
+   enum rtx_code code;
+   enum machine_mode mode;
+ 
+   code = GET_CODE (cond);
+   op0 = XEXP (cond, 0);
+   op1 = XEXP (cond, 1);
+ 
+   /* If constant is first, put it last.  */
+   if (CONSTANT_P (op0))
+     {
+       code = swap_condition (code);
+       tem = op0;
+       op0 = op1;
+       op1 = tem;
+     }
+ 
+   mode = GET_MODE (op0);
+   if (mode == VOIDmode)
+     mode = GET_MODE (op1);
+   if (mode == VOIDmode)
+     abort ();
+ 
+   if (GET_CODE (op1) == CONST_INT
+       && GET_MODE_CLASS (mode) != MODE_CC
+       && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
+     {
+       HOST_WIDE_INT const_val = INTVAL (op1);
+       unsigned HOST_WIDE_INT uconst_val = const_val;
+       unsigned HOST_WIDE_INT max_val
+ 	= (unsigned HOST_WIDE_INT) GET_MODE_MASK (mode);
+ 
+       switch (code)
+ 	{
+ 	case LE:
+ 	  if ((unsigned HOST_WIDE_INT) const_val != max_val >> 1)
+ 	    code = LT, op1 = gen_int_mode (const_val + 1, GET_MODE (op0));
+ 	  break;
+ 
+ 	/* When cross-compiling, const_val might be sign-extended from
+ 	   BITS_PER_WORD to HOST_BITS_PER_WIDE_INT */
+ 	case GE:
+ 	  if ((HOST_WIDE_INT) (const_val & max_val)
+ 	      != (((HOST_WIDE_INT) 1
+ 		   << (GET_MODE_BITSIZE (GET_MODE (op0)) - 1))))
+ 	    code = GT, op1 = gen_int_mode (const_val - 1, mode);
+ 	  break;
+ 
+ 	case LEU:
+ 	  if (uconst_val < max_val)
+ 	    code = LTU, op1 = gen_int_mode (uconst_val + 1, mode);
+ 	  break;
+ 
+ 	case GEU:
+ 	  if (uconst_val != 0)
+ 	    code = GTU, op1 = gen_int_mode (uconst_val - 1, mode);
+ 	  break;
+ 
+ 	default:
+ 	  break;
+ 	}
+     }
+ 
+   if (op0 != XEXP (cond, 0)
+       || op1 != XEXP (cond, 1)
+       || code != GET_CODE (cond)
+       || GET_MODE (cond) != SImode)
+     cond = gen_rtx_fmt_ee (code, SImode, op0, op1);
+ 
+   return cond;
+ }
+ 
+ /* Tries to use the fact that COND holds to simplify EXPR.  ALTERED is the
+    set of altered regs.  */
+ 
+ void
+ simplify_using_condition (rtx cond, rtx *expr, regset altered)
+ {
+   rtx rev, reve, exp = *expr;
+ 
+   if (GET_RTX_CLASS (GET_CODE (*expr)) != '<')
+     return;
+ 
+   /* If some register gets altered later, we do not really speak about its
+      value at the time of comparison.  */
+   if (altered
+       && for_each_rtx (&cond, altered_reg_used, altered))
+     return;
+ 
+   rev = reversed_condition (cond);
+   reve = reversed_condition (exp);
+ 
+   cond = canon_condition (cond);
+   exp = canon_condition (exp);
+   if (rev)
+     rev = canon_condition (rev);
+   if (reve)
+     reve = canon_condition (reve);
+ 
+   if (rtx_equal_p (exp, cond))
+     {
+       *expr = const_true_rtx;
+       return;
+     }
+ 
+ 
+   if (rev && rtx_equal_p (exp, rev))
+     {
+       *expr = const0_rtx;
+       return;
+     }
+ 
+   if (implies_p (cond, exp))
+     {
+       *expr = const_true_rtx;
+       return;
+     }
+   
+   if (reve && implies_p (cond, reve))
+     {
+       *expr = const0_rtx;
+       return;
+     }
+ 
+   /* A proof by contradiction.  If *EXPR implies (not cond), *EXPR must
+      be false.  */
+   if (rev && implies_p (exp, rev))
+     {
+       *expr = const0_rtx;
+       return;
+     }
+ 
+   /* Similarly, If (not *EXPR) implies (not cond), *EXPR must be true.  */
+   if (rev && reve && implies_p (reve, rev))
+     {
+       *expr = const_true_rtx;
+       return;
+     }
+ 
+   /* We would like to have some other tests here.  TODO.  */
+ 
+   return;
+ }
+ 
+ /* Use relationship between A and *B to eventually eliminate *B.
+    OP is the operation we consider.  */
+ 
+ static void
+ eliminate_implied_condition (enum rtx_code op, rtx a, rtx *b)
+ {
+   if (op == AND)
+     {
+       /* If A implies *B, we may replace *B by true.  */
+       if (implies_p (a, *b))
+ 	*b = const_true_rtx;
+     }
+   else if (op == IOR)
+     {
+       /* If *B implies A, we may replace *B by false.  */
+       if (implies_p (*b, a))
+ 	*b = const0_rtx;
+     }
+   else
+     abort ();
+ }
+ 
+ /* Eliminates the conditions in TAIL that are implied by HEAD.  OP is the
+    operation we consider.  */
+ 
+ static void
+ eliminate_implied_conditions (enum rtx_code op, rtx *head, rtx tail)
+ {
+   rtx elt;
+ 
+   for (elt = tail; elt; elt = XEXP (elt, 1))
+     eliminate_implied_condition (op, *head, &XEXP (elt, 0));
+   for (elt = tail; elt; elt = XEXP (elt, 1))
+     eliminate_implied_condition (op, XEXP (elt, 0), head);
+ }
+ 
+ /* Simplifies *EXPR using initial values at the start of the LOOP.  If *EXPR
+    is a list, its elements are assumed to be combined using OP.  */
+ 
+ static void
+ simplify_using_initial_values (struct loop *loop, enum rtx_code op, rtx *expr)
+ {
+   rtx head, tail, insn;
+   rtx neutral, aggr;
+   regset altered;
+   regset_head altered_head;
+   edge e;
+ 
+   if (!*expr)
+     return;
+ 
+   if (CONSTANT_P (*expr))
+     return;
+ 
+   if (GET_CODE (*expr) == EXPR_LIST)
+     {
+       head = XEXP (*expr, 0);
+       tail = XEXP (*expr, 1);
+ 
+       eliminate_implied_conditions (op, &head, tail);
+ 
+       if (op == AND)
+ 	{
+ 	  neutral = const_true_rtx;
+ 	  aggr = const0_rtx;
+ 	}
+       else if (op == IOR)
+ 	{
+ 	  neutral = const0_rtx;
+ 	  aggr = const_true_rtx;
+ 	}
+       else
+ 	abort ();
+ 
+       simplify_using_initial_values (loop, NIL, &head);
+       if (head == aggr)
+ 	{
+ 	  XEXP (*expr, 0) = aggr;
+ 	  XEXP (*expr, 1) = NULL_RTX;
+ 	  return;
+ 	}
+       else if (head == neutral)
+ 	{
+ 	  *expr = tail;
+ 	  simplify_using_initial_values (loop, op, expr);
+ 	  return;
+ 	}
+       simplify_using_initial_values (loop, op, &tail);
+ 
+       if (tail && XEXP (tail, 0) == aggr)
+ 	{
+ 	  *expr = tail;
+ 	  return;
+ 	}
+   
+       XEXP (*expr, 0) = head;
+       XEXP (*expr, 1) = tail;
+       return;
+     }
+ 
+   if (op != NIL)
+     abort ();
+ 
+   e = loop_preheader_edge (loop);
+   if (e->src == ENTRY_BLOCK_PTR)
+     return;
+ 
+   altered = INITIALIZE_REG_SET (altered_head);
+ 
+   while (1)
+     {
+       insn = BB_END (e->src);
+       if (any_condjump_p (insn))
+ 	{
+ 	  /* FIXME -- slightly wrong -- what if compared register
+ 	     gets altered between start of the condition and insn?  */
+ 	  rtx cond = get_condition (BB_END (e->src), NULL, false);
+       
+ 	  if (cond && (e->flags & EDGE_FALLTHRU))
+ 	    cond = reversed_condition (cond);
+ 	  if (cond)
+ 	    {
+ 	      simplify_using_condition (cond, expr, altered);
+ 	      if (CONSTANT_P (*expr))
+ 		{
+ 		  FREE_REG_SET (altered);
+ 		  return;
+ 		}
+ 	    }
+ 	}
+ 
+       FOR_BB_INSNS_REVERSE (e->src, insn)
+ 	{
+ 	  if (!INSN_P (insn))
+ 	    continue;
+ 	    
+ 	  simplify_using_assignment (insn, expr, altered);
+ 	  if (CONSTANT_P (*expr))
+ 	    {
+ 	      FREE_REG_SET (altered);
+ 	      return;
+ 	    }
+ 	}
+ 
+       e = e->src->pred;
+       if (e->pred_next
+ 	  || e->src == ENTRY_BLOCK_PTR)
+ 	break;
+     }
+ 
+   FREE_REG_SET (altered);
+ }
+ 
+ /* Transforms invariant IV into MODE.  Adds assumptions based on the fact
+    that IV occurs as left operands of comparison COND and its signedness
+    is SIGNED_P to DESC.  */
+ 
+ static void
+ shorten_into_mode (struct rtx_iv *iv, enum machine_mode mode,
+ 		   enum rtx_code cond, bool signed_p, struct niter_desc *desc)
+ {
+   rtx mmin, mmax, cond_over, cond_under;
+ 
+   get_mode_bounds (mode, signed_p, &mmin, &mmax);
+   cond_under = simplify_gen_relational (LT, SImode, iv->extend_mode,
+ 					iv->base, mmin);
+   cond_over = simplify_gen_relational (GT, SImode, iv->extend_mode,
+ 				       iv->base, mmax);
+ 
+   switch (cond)
+     {
+       case LE:
+       case LT:
+       case LEU:
+       case LTU:
+ 	if (cond_under != const0_rtx)
+ 	  desc->infinite =
+ 		  alloc_EXPR_LIST (0, cond_under, desc->infinite);
+ 	if (cond_over != const0_rtx)
+ 	  desc->noloop_assumptions =
+ 		  alloc_EXPR_LIST (0, cond_over, desc->noloop_assumptions);
+ 	break;
+ 
+       case GE:
+       case GT:
+       case GEU:
+       case GTU:
+ 	if (cond_over != const0_rtx)
+ 	  desc->infinite =
+ 		  alloc_EXPR_LIST (0, cond_over, desc->infinite);
+ 	if (cond_under != const0_rtx)
+ 	  desc->noloop_assumptions =
+ 		  alloc_EXPR_LIST (0, cond_under, desc->noloop_assumptions);
+ 	break;
+ 
+       case NE:
+ 	if (cond_over != const0_rtx)
+ 	  desc->infinite =
+ 		  alloc_EXPR_LIST (0, cond_over, desc->infinite);
+ 	if (cond_under != const0_rtx)
+ 	  desc->infinite =
+ 		  alloc_EXPR_LIST (0, cond_under, desc->infinite);
+ 	break;
+ 
+       default:
+ 	abort ();
+     }
+ 
+   iv->mode = mode;
+   iv->extend = signed_p ? SIGN_EXTEND : ZERO_EXTEND;
+ }
+ 
+ /* Transforms IV0 and IV1 compared by COND so that they are both compared as
+    subregs of the same mode if possible (sometimes it is neccesary to add
+    some assumptions to DESC).  */
+ 
+ static bool
+ canonicalize_iv_subregs (struct rtx_iv *iv0, struct rtx_iv *iv1,
+ 			 enum rtx_code cond, struct niter_desc *desc)
+ {
+   enum machine_mode comp_mode;
+   bool signed_p;
+ 
+   /* If the ivs behave specially in the first iteration, or are
+      added/multiplied after extending, we ignore them.  */
+   if (iv0->first_special || iv0->mult != const1_rtx || iv0->delta != const0_rtx)
+     return false;
+   if (iv1->first_special || iv1->mult != const1_rtx || iv1->delta != const0_rtx)
+     return false;
+ 
+   /* If there is some extend, it must match signedness of the comparison.  */
+   switch (cond)
+     {
+       case LE:
+       case LT:
+ 	if (iv0->extend == ZERO_EXTEND
+ 	    || iv1->extend == ZERO_EXTEND)
+ 	  return false;
+ 	signed_p = true;
+ 	break;
+ 
+       case LEU:
+       case LTU:
+ 	if (iv0->extend == SIGN_EXTEND
+ 	    || iv1->extend == SIGN_EXTEND)
+ 	  return false;
+ 	signed_p = false;
+ 	break;
+ 
+       case NE:
+ 	if (iv0->extend != NIL
+ 	    && iv1->extend != NIL
+ 	    && iv0->extend != iv1->extend)
+ 	  return false;
+ 
+ 	signed_p = false;
+ 	if (iv0->extend != NIL)
+ 	  signed_p = iv0->extend == SIGN_EXTEND;
+ 	if (iv1->extend != NIL)
+ 	  signed_p = iv1->extend == SIGN_EXTEND;
+ 	break;
+ 
+       default:
+ 	abort ();
+     }
+ 
+   /* Values of both variables should be computed in the same mode.  These
+      might indeed be different, if we have comparison like
+ 
+      (compare (subreg:SI (iv0)) (subreg:SI (iv1)))
+ 
+      and iv0 and iv1 are both ivs iterating in SI mode, but calculated
+      in different modes.  This does not seem impossible to handle, but
+      it hardly ever occurs in practice.
+      
+      The only exception is the case when one of operands is invariant.
+      For example pentium 3 generates comparisons like
+      (lt (subreg:HI (reg:SI)) 100).  Here we assign HImode to 100, but we
+      definitely do not want this prevent the optimization.  */
+   comp_mode = iv0->extend_mode;
+   if (GET_MODE_BITSIZE (comp_mode) < GET_MODE_BITSIZE (iv1->extend_mode))
+     comp_mode = iv1->extend_mode;
+ 
+   if (iv0->extend_mode != comp_mode)
+     {
+       if (iv0->mode != iv0->extend_mode
+ 	  || iv0->step != const0_rtx)
+ 	return false;
+ 
+       iv0->base = simplify_gen_unary (signed_p ? SIGN_EXTEND : ZERO_EXTEND,
+ 				      comp_mode, iv0->base, iv0->mode);
+       iv0->extend_mode = comp_mode;
+     }
+ 
+   if (iv1->extend_mode != comp_mode)
+     {
+       if (iv1->mode != iv1->extend_mode
+ 	  || iv1->step != const0_rtx)
+ 	return false;
+ 
+       iv1->base = simplify_gen_unary (signed_p ? SIGN_EXTEND : ZERO_EXTEND,
+ 				      comp_mode, iv1->base, iv1->mode);
+       iv1->extend_mode = comp_mode;
+     }
+ 
+   /* Check that both ivs belong to a range of a single mode.  If one of the
+      operands is an invariant, we may need to shorten it into the common
+      mode.  */
+   if (iv0->mode == iv0->extend_mode
+       && iv0->step == const0_rtx
+       && iv0->mode != iv1->mode)
+     shorten_into_mode (iv0, iv1->mode, cond, signed_p, desc);
+ 
+   if (iv1->mode == iv1->extend_mode
+       && iv1->step == const0_rtx
+       && iv0->mode != iv1->mode)
+     shorten_into_mode (iv1, iv0->mode, swap_condition (cond), signed_p, desc);
+ 
+   if (iv0->mode != iv1->mode)
+     return false;
+ 
+   desc->mode = iv0->mode;
+   desc->signed_p = signed_p;
+ 
+   return true;
+ }
+ 
+ /* Computes number of iterations of the CONDITION in INSN in LOOP and stores
+    the result into DESC.  Very similar to determine_number_of_iterations
+    (basically its rtl version), complicated by things like subregs.  */
+ 
+ void
+ iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition,
+ 			 struct niter_desc *desc)
+ {
+   rtx op0, op1, delta, step, bound, may_xform, def_insn, tmp, tmp0, tmp1;
+   struct rtx_iv iv0, iv1, tmp_iv;
+   rtx assumption;
+   enum rtx_code cond;
+   enum machine_mode mode, comp_mode;
+   rtx mmin, mmax;
+   unsigned HOST_WIDEST_INT s, size, d;
+   HOST_WIDEST_INT up, down, inc;
+   int was_sharp = false;
+ 
+   /* The meaning of these assumptions is this:
+      if !assumptions
+        then the rest of information does not have to be valid
+      if noloop_assumptions then the loop does not roll
+      if infinite then this exit is never used */
+ 
+   desc->assumptions = NULL_RTX;
+   desc->noloop_assumptions = NULL_RTX;
+   desc->infinite = NULL_RTX;
+   desc->simple_p = true;
+ 
+   desc->const_iter = false;
+   desc->niter_expr = NULL_RTX;
+   desc->niter_max = 0;
+ 
+   cond = GET_CODE (condition);
+   if (GET_RTX_CLASS (cond) != '<')
+     abort ();
+ 
+   mode = GET_MODE (XEXP (condition, 0));
+   if (mode == VOIDmode)
+     mode = GET_MODE (XEXP (condition, 1));
+   /* The constant comparisons should be folded.  */
+   if (mode == VOIDmode)
+     abort ();
+ 
+   /* We only handle integers or pointers.  */
+   if (GET_MODE_CLASS (mode) != MODE_INT
+       && GET_MODE_CLASS (mode) != MODE_PARTIAL_INT)
+     goto fail;
+ 
+   op0 = XEXP (condition, 0);
+   def_insn = iv_get_reaching_def (insn, op0);
+   if (!iv_analyse (def_insn, op0, &iv0))
+     goto fail;
+   if (iv0.extend_mode == VOIDmode)
+     iv0.mode = iv0.extend_mode = mode;
+   
+   op1 = XEXP (condition, 1);
+   def_insn = iv_get_reaching_def (insn, op1);
+   if (!iv_analyse (def_insn, op1, &iv1))
+     goto fail;
+   if (iv1.extend_mode == VOIDmode)
+     iv1.mode = iv1.extend_mode = mode;
+ 
+   /* Check condition and normalize it.  */
+ 
+   switch (cond)
+     {
+       case GE:
+       case GT:
+       case GEU:
+       case GTU:
+ 	tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
+ 	cond = swap_condition (cond);
+ 	break;
+       case NE:
+       case LE:
+       case LEU:
+       case LT:
+       case LTU:
+ 	break;
+       default:
+ 	goto fail;
+     }
+ 
+   /* Handle extends.  This is relatively nontrivial, so we only try in some
+      easy cases, when we can canonicalize the ivs (possibly by adding some
+      assumptions) to shape subreg (base + i * step).  This function also fills
+      in desc->mode and desc->signed_p.  */
+ 
+   if (!canonicalize_iv_subregs (&iv0, &iv1, cond, desc))
+     goto fail;
+ 
+   comp_mode = iv0.extend_mode;
+   mode = iv0.mode;
+   size = GET_MODE_BITSIZE (mode);
+   get_mode_bounds (mode, (cond == LE || cond == LT), &mmin, &mmax);
+ 
+   if (GET_CODE (iv0.step) != CONST_INT || GET_CODE (iv1.step) != CONST_INT)
+     goto fail;
+ 
+   /* We can take care of the case of two induction variables chasing each other
+      if the test is NE. I have never seen a loop using it, but still it is
+      cool.  */
+   if (iv0.step != const0_rtx && iv1.step != const0_rtx)
+     {
+       if (cond != NE)
+ 	goto fail;
+ 
+       iv0.step = simplify_gen_binary (MINUS, comp_mode, iv0.step, iv1.step);
+       iv1.step = const0_rtx;
+     }
+ 
+   /* This is either infinite loop or the one that ends immediately, depending
+      on initial values.  Unswitching should remove this kind of conditions.  */
+   if (iv0.step == const0_rtx && iv1.step == const0_rtx)
+     goto fail;
+ 
+   /* Ignore loops of while (i-- < 10) type.  */
+   if (cond != NE
+       && (INTVAL (iv0.step) < 0 || INTVAL (iv1.step) > 0))
+     goto fail;
+ 
+   /* Some more condition normalization.  We must record some assumptions
+      due to overflows.  */
+   switch (cond)
+     {
+       case LT:
+       case LTU:
+ 	/* We want to take care only of non-sharp relationals; this is easy,
+ 	   as in cases the overflow would make the transformation unsafe
+ 	   the loop does not roll.  Seemingly it would make more sense to want
+ 	   to take care of sharp relationals instead, as NE is more similar to
+ 	   them, but the problem is that here the transformation would be more
+ 	   difficult due to possibly infinite loops.  */
+ 	if (iv0.step == const0_rtx)
+ 	  {
+ 	    tmp = lowpart_subreg (mode, iv0.base, comp_mode);
+ 	    assumption = simplify_gen_relational (EQ, SImode, mode, tmp, mmax);
+ 	    if (assumption == const_true_rtx)
+ 	      goto zero_iter;
+ 	    iv0.base = simplify_gen_binary (PLUS, comp_mode,
+ 					    iv0.base, const1_rtx);
+ 	  }
+ 	else
+ 	  {
+ 	    tmp = lowpart_subreg (mode, iv1.base, comp_mode);
+ 	    assumption = simplify_gen_relational (EQ, SImode, mode, tmp, mmin);
+ 	    if (assumption == const_true_rtx)
+ 	      goto zero_iter;
+ 	    iv1.base = simplify_gen_binary (PLUS, comp_mode,
+ 					    iv1.base, constm1_rtx);
+ 	  }
+ 
+ 	if (assumption != const0_rtx)
+ 	  desc->noloop_assumptions =
+ 		  alloc_EXPR_LIST (0, assumption, desc->noloop_assumptions);
+ 	cond = (cond == LT) ? LE : LEU;
+ 
+ 	/* It will be useful to be able to tell the difference once more in
+ 	   LE -> NE reduction.  */
+ 	was_sharp = true;
+ 	break;
+       default: ;
+     }
+ 
+   /* Take care of trivially infinite loops.  */
+   if (cond != NE)
+     {
+       if (iv0.step == const0_rtx)
+ 	{
+ 	  tmp = lowpart_subreg (mode, iv0.base, comp_mode);
+ 	  if (rtx_equal_p (tmp, mmin))
+ 	    {
+ 	      desc->infinite =
+ 		      alloc_EXPR_LIST (0, const_true_rtx, NULL_RTX);
+ 	      return;
+ 	    }
+ 	}
+       else
+ 	{
+ 	  tmp = lowpart_subreg (mode, iv1.base, comp_mode);
+ 	  if (rtx_equal_p (tmp, mmax))
+ 	    {
+ 	      desc->infinite =
+ 		      alloc_EXPR_LIST (0, const_true_rtx, NULL_RTX);
+ 	      return;
+ 	    }
+ 	}
+     }
+ 
+   /* If we can we want to take care of NE conditions instead of size
+      comparisons, as they are much more friendly (most importantly
+      this takes care of special handling of loops with step 1).  We can
+      do it if we first check that upper bound is greater or equal to
+      lower bound, their difference is constant c modulo step and that
+      there is not an overflow.  */
+   if (cond != NE)
+     {
+       if (iv0.step == const0_rtx)
+ 	step = simplify_gen_unary (NEG, comp_mode, iv1.step, comp_mode);
+       else
+ 	step = iv0.step;
+       delta = simplify_gen_binary (MINUS, comp_mode, iv1.base, iv0.base);
+       delta = lowpart_subreg (mode, delta, comp_mode);
+       delta = simplify_gen_binary (UMOD, mode, delta, step);
+       may_xform = const0_rtx;
+ 
+       if (GET_CODE (delta) == CONST_INT)
+ 	{
+ 	  if (was_sharp && INTVAL (delta) == INTVAL (step) - 1)
+ 	    {
+ 	      /* A special case.  We have transformed condition of type
+ 		 for (i = 0; i < 4; i += 4)
+ 		 into
+ 		 for (i = 0; i <= 3; i += 4)
+ 		 obviously if the test for overflow during that transformation
+ 		 passed, we cannot overflow here.  Most importantly any
+ 		 loop with sharp end condition and step 1 falls into this
+ 		 cathegory, so handling this case specially is definitely
+ 		 worth the troubles.  */
+ 	      may_xform = const_true_rtx;
+ 	    }
+ 	  else if (iv0.step == const0_rtx)
+ 	    {
+ 	      bound = simplify_gen_binary (PLUS, comp_mode, mmin, step);
+ 	      bound = simplify_gen_binary (MINUS, comp_mode, bound, delta);
+ 	      bound = lowpart_subreg (mode, bound, comp_mode);
+ 	      tmp = lowpart_subreg (mode, iv0.base, comp_mode);
+ 	      may_xform = simplify_gen_relational (cond, SImode, mode,
+ 						   bound, tmp);
+ 	    }
+ 	  else
+ 	    {
+ 	      bound = simplify_gen_binary (MINUS, comp_mode, mmax, step);
+ 	      bound = simplify_gen_binary (PLUS, comp_mode, bound, delta);
+ 	      bound = lowpart_subreg (mode, bound, comp_mode);
+ 	      tmp = lowpart_subreg (mode, iv1.base, comp_mode);
+ 	      may_xform = simplify_gen_relational (cond, SImode, mode,
+ 						   tmp, bound);
+ 	    }
+ 	}
+ 
+       if (may_xform != const0_rtx)
+ 	{
+ 	  /* We perform the transformation always provided that it is not
+ 	     completely senseless.  This is OK, as we would need this assumption
+ 	     to determine the number of iterations anyway.  */
+ 	  if (may_xform != const_true_rtx)
+ 	    desc->assumptions = alloc_EXPR_LIST (0, may_xform,
+ 						 desc->assumptions);
+ 
+ 	  /* We are going to lose some information about upper bound on
+ 	     number of iterations in this step, so record the information
+ 	     here.  */
+ 	  inc = INTVAL (iv0.step) - INTVAL (iv1.step);
+ 	  if (GET_CODE (iv1.base) == CONST_INT)
+ 	    up = INTVAL (iv1.base);
+ 	  else
+ 	    up = INTVAL (mmax) - inc;
+ 	  down = INTVAL (GET_CODE (iv0.base) == CONST_INT ? iv0.base : mmin);
+ 	  desc->niter_max = (up - down) / inc + 1;
+ 
+ 	  if (iv0.step == const0_rtx)
+ 	    {
+ 	      iv0.base = simplify_gen_binary (PLUS, comp_mode, iv0.base, delta);
+ 	      iv0.base = simplify_gen_binary (MINUS, comp_mode, iv0.base, step);
+ 	    }
+ 	  else
+ 	    {
+ 	      iv1.base = simplify_gen_binary (MINUS, comp_mode, iv1.base, delta);
+ 	      iv1.base = simplify_gen_binary (PLUS, comp_mode, iv1.base, step);
+ 	    }
+ 
+ 	  tmp0 = lowpart_subreg (mode, iv0.base, comp_mode);
+ 	  tmp1 = lowpart_subreg (mode, iv1.base, comp_mode);
+ 	  assumption = simplify_gen_relational (reverse_condition (cond),
+ 						SImode, mode, tmp0, tmp1);
+ 	  if (assumption == const_true_rtx)
+ 	    goto zero_iter;
+ 	  else if (assumption != const0_rtx)
+ 	    desc->noloop_assumptions =
+ 		    alloc_EXPR_LIST (0, assumption, desc->noloop_assumptions);
+ 	  cond = NE;
+ 	}
+     }
+ 
+   /* Count the number of iterations.  */
+   if (cond == NE)
+     {
+       /* Everything we do here is just arithmetics modulo size of mode.  This
+ 	 makes us able to do more involved computations of number of iterations
+ 	 than in other cases.  First transform the condition into shape
+ 	 s * i <> c, with s positive.  */
+       iv1.base = simplify_gen_binary (MINUS, comp_mode, iv1.base, iv0.base);
+       iv0.base = const0_rtx;
+       iv0.step = simplify_gen_binary (MINUS, comp_mode, iv0.step, iv1.step);
+       iv1.step = const0_rtx;
+       if (INTVAL (iv0.step) < 0)
+ 	{
+ 	  iv0.step = simplify_gen_unary (NEG, comp_mode, iv0.step, mode);
+ 	  iv1.base = simplify_gen_unary (NEG, comp_mode, iv1.base, mode);
+ 	}
+       iv0.step = lowpart_subreg (mode, iv0.step, comp_mode);
+ 
+       /* Let nsd (s, size of mode) = d.  If d does not divide c, the loop
+ 	 is infinite.  Otherwise, the number of iterations is
+ 	 (inverse(s/d) * (c/d)) mod (size of mode/d).  */
+       s = INTVAL (iv0.step); d = 1;
+       while (s % 2 != 1)
+ 	{
+ 	  s /= 2;
+ 	  d *= 2;
+ 	  size--;
+ 	}
+       bound = GEN_INT (((unsigned HOST_WIDEST_INT) 1 << (size - 1 ) << 1) - 1);
+ 
+       tmp1 = lowpart_subreg (mode, iv1.base, comp_mode);
+       tmp = simplify_gen_binary (UMOD, mode, tmp1, GEN_INT (d));
+       assumption = simplify_gen_relational (NE, SImode, mode, tmp, const0_rtx);
+       desc->infinite = alloc_EXPR_LIST (0, assumption, desc->infinite);
+ 
+       tmp = simplify_gen_binary (UDIV, mode, tmp1, GEN_INT (d));
+       tmp = simplify_gen_binary (MULT, mode,
+ 				 tmp, GEN_INT (inverse (s, size)));
+       desc->niter_expr = simplify_gen_binary (AND, mode, tmp, bound);
+     }
+   else
+     {
+       if (iv1.step == const0_rtx)
+ 	/* Condition in shape a + s * i <= b
+ 	   We must know that b + s does not overflow and a <= b + s and then we
+ 	   can compute number of iterations as (b + s - a) / s.  (It might
+ 	   seem that we in fact could be more clever about testing the b + s
+ 	   overflow condition using some information about b - a mod s,
+ 	   but it was already taken into account during LE -> NE transform).  */
+ 	{
+ 	  step = iv0.step;
+ 	  tmp0 = lowpart_subreg (mode, iv0.base, comp_mode);
+ 	  tmp1 = lowpart_subreg (mode, iv1.base, comp_mode);
+ 
+ 	  bound = simplify_gen_binary (MINUS, mode, mmax, step);
+ 	  assumption = simplify_gen_relational (cond, SImode, mode,
+ 						tmp1, bound);
+ 	  desc->assumptions =
+ 		  alloc_EXPR_LIST (0, assumption, desc->assumptions);
+ 
+ 	  tmp = simplify_gen_binary (PLUS, comp_mode, iv1.base, iv0.step);
+ 	  tmp = lowpart_subreg (mode, tmp, comp_mode);
+ 	  assumption = simplify_gen_relational (reverse_condition (cond),
+ 						SImode, mode, tmp0, tmp);
+ 
+ 	  delta = simplify_gen_binary (PLUS, mode, tmp1, step);
+ 	  delta = simplify_gen_binary (MINUS, mode, delta, tmp0);
+ 	}
+       else
+ 	{
+ 	  /* Condition in shape a <= b - s * i
+ 	     We must know that a - s does not overflow and a - s <= b and then
+ 	     we can again compute number of iterations as (b - (a - s)) / s.  */
+ 	  step = simplify_gen_unary (NEG, mode, iv1.step, mode);
+ 	  tmp0 = lowpart_subreg (mode, iv0.base, comp_mode);
+ 	  tmp1 = lowpart_subreg (mode, iv1.base, comp_mode);
+ 
+ 	  bound = simplify_gen_binary (MINUS, mode, mmin, step);
+ 	  assumption = simplify_gen_relational (cond, SImode, mode,
+ 						bound, tmp0);
+ 	  desc->assumptions =
+ 		  alloc_EXPR_LIST (0, assumption, desc->assumptions);
+ 
+ 	  tmp = simplify_gen_binary (PLUS, comp_mode, iv0.base, iv1.step);
+ 	  tmp = lowpart_subreg (mode, tmp, comp_mode);
+ 	  assumption = simplify_gen_relational (reverse_condition (cond),
+ 						SImode, mode,
+ 						tmp, tmp1);
+ 	  delta = simplify_gen_binary (MINUS, mode, tmp0, step);
+ 	  delta = simplify_gen_binary (MINUS, mode, tmp1, delta);
+ 	}
+       if (assumption == const_true_rtx)
+ 	goto zero_iter;
+       else if (assumption != const0_rtx)
+ 	desc->noloop_assumptions =
+ 		alloc_EXPR_LIST (0, assumption, desc->noloop_assumptions);
+       delta = simplify_gen_binary (UDIV, mode, delta, step);
+       desc->niter_expr = delta;
+     }
+ 
+   simplify_using_initial_values (loop, AND, &desc->assumptions);
+   if (desc->assumptions
+       && XEXP (desc->assumptions, 0) == const0_rtx)
+     goto fail;
+   simplify_using_initial_values (loop, IOR, &desc->noloop_assumptions);
+   simplify_using_initial_values (loop, IOR, &desc->infinite);
+   simplify_using_initial_values (loop, NIL, &desc->niter_expr);
+ 
+   /* Rerun the simplification.  Consider code (created by copying loop headers)
+ 
+      i = 0;
+ 
+      if (0 < n)
+        {
+          do
+ 	   {
+ 	     i++;
+ 	   } while (i < n);
+        }
+ 
+     The first pass determines that i = 0, the second pass uses it to eliminate
+     noloop assumption.  */
+ 
+   simplify_using_initial_values (loop, AND, &desc->assumptions);
+   if (desc->assumptions
+       && XEXP (desc->assumptions, 0) == const0_rtx)
+     goto fail;
+   simplify_using_initial_values (loop, IOR, &desc->noloop_assumptions);
+   simplify_using_initial_values (loop, IOR, &desc->infinite);
+   simplify_using_initial_values (loop, NIL, &desc->niter_expr);
+ 
+   if (GET_CODE (desc->niter_expr) == CONST_INT)
+     {
+       unsigned HOST_WIDEST_INT val = INTVAL (desc->niter_expr);
+ 
+       desc->const_iter = true;
+       desc->niter_max = desc->niter = val & GET_MODE_MASK (desc->mode);
+     }
+   else if (!desc->niter_max)
+     desc->niter_max = determine_max_iter (desc);
+ 
+   return;
+ 
+ fail:
+   desc->simple_p = false;
+   return;
+ 
+ zero_iter:
+   desc->const_iter = true;
+   desc->niter = 0;
+   desc->niter_max = 0;
+   desc->niter_expr = const0_rtx;
+   return;
+ }
+ 
+ /* Checks whether E is a simple exit from LOOP and stores its description
+    into DESC.  TODO Should replace cfgloopanal.c:simple_loop_exit_p.  */
+ 
+ static void
+ check_simple_exit (struct loop *loop, edge e, struct niter_desc *desc)
+ {
+   basic_block exit_bb;
+   rtx condition, at;
+   edge ei;
+ 
+   exit_bb = e->src;
+   desc->simple_p = false;
+ 
+   /* It must belong directly to the loop.  */
+   if (exit_bb->loop_father != loop)
+     return;
+ 
+   /* It must be tested (at least) once during any iteration.  */
+   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb))
+     return;
+ 
+   /* It must end in a simple conditional jump.  */
+   if (!any_condjump_p (BB_END (exit_bb)))
+     return;
+ 
+   ei = exit_bb->succ;
+   if (ei == e)
+     ei = ei->succ_next;
+ 
+   desc->out_edge = e;
+   desc->in_edge = ei;
+ 
+   /* Test whether the condition is suitable.  */
+   if (!(condition = get_condition (BB_END (ei->src), &at, false)))
+     return;
+ 
+   if (ei->flags & EDGE_FALLTHRU)
+     {
+       condition = reversed_condition (condition);
+       if (!condition)
+ 	return;
+     }
+ 
+   /* Check that we are able to determine number of iterations and fill
+      in information about it.  */
+   iv_number_of_iterations (loop, at, condition, desc);
+ }
+ 
+ /* Finds a simple exit of LOOP and stores its description into DESC.
+    TODO Should replace cfgloopanal.c:simple_loop_p.  */
+ 
+ void
+ find_simple_exit (struct loop *loop, struct niter_desc *desc)
+ {
+   unsigned i;
+   basic_block *body;
+   edge e;
+   struct niter_desc act;
+   bool any = false;
+ 
+   desc->simple_p = false;
+   body = get_loop_body (loop);
+ 
+   for (i = 0; i < loop->num_nodes; i++)
+     {
+       for (e = body[i]->succ; e; e = e->succ_next)
+ 	{
+ 	  if (flow_bb_inside_loop_p (loop, e->dest))
+ 	    continue;
+ 	  
+ 	  check_simple_exit (loop, e, &act);
+ 	  if (!act.simple_p)
+ 	    continue;
+ 
+ 	  /* Prefer constant iterations; the less the better.  */
+ 	  if (!any)
+ 	    any = true;
+ 	  else if (!act.const_iter
+ 		   || (desc->const_iter && act.niter >= desc->niter))
+ 	    continue;
+ 	  *desc = act;
+ 	}
+     }
+ 
+   if (rtl_dump_file)
+     {
+       if (desc->simple_p)
+ 	{
+ 	  fprintf (rtl_dump_file, "Loop %d is simple:\n", loop->num);
+ 	  fprintf (rtl_dump_file, "  simple exit %d -> %d\n",
+ 		   desc->out_edge->src->index,
+ 		   desc->out_edge->dest->index);
+ 	  if (desc->assumptions)
+ 	    {
+ 	      fprintf (rtl_dump_file, "  assumptions: ");
+ 	      print_rtl (rtl_dump_file, desc->assumptions);
+ 	      fprintf (rtl_dump_file, "\n");
+ 	    }
+ 	  if (desc->noloop_assumptions)
+ 	    {
+ 	      fprintf (rtl_dump_file, "  does not roll if: ");
+ 	      print_rtl (rtl_dump_file, desc->noloop_assumptions);
+ 	      fprintf (rtl_dump_file, "\n");
+ 	    }
+ 	  if (desc->infinite)
+ 	    {
+ 	      fprintf (rtl_dump_file, "  infinite if: ");
+ 	      print_rtl (rtl_dump_file, desc->infinite);
+ 	      fprintf (rtl_dump_file, "\n");
+ 	    }
+ 
+ 	  fprintf (rtl_dump_file, "  number of iterations: ");
+ 	  print_rtl (rtl_dump_file, desc->niter_expr);
+       	  fprintf (rtl_dump_file, "\n");
+ 
+ 	  fprintf (rtl_dump_file, "  upper bound: ");
+ 	  fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter_max);
+       	  fprintf (rtl_dump_file, "\n");
+ 	}
+       else
+ 	fprintf (rtl_dump_file, "Loop %d is not simple.\n", loop->num);
+     }
+ 
+   free (body);
+ }
+ 
+ /* Creates a simple loop description of LOOP if it was not computed
+    already.  */
+ 
+ struct niter_desc *
+ get_simple_loop_desc (struct loop *loop)
+ {
+   struct niter_desc *desc = simple_loop_desc (loop);
+ 
+   if (desc)
+     return desc;
+ 
+   desc = xmalloc (sizeof (struct niter_desc));
+   iv_analysis_loop_init (loop);
+   find_simple_exit (loop, desc);
+   loop->aux = desc;
+ 
+   return desc;
+ }
+ 
+ /* Releases simple loop description for LOOP.  */
+ 
+ void
+ free_simple_loop_desc (struct loop *loop)
+ {
+   struct niter_desc *desc = simple_loop_desc (loop);
+ 
+   if (!desc)
+     return;
+ 
+   free (desc);
+   loop->aux = NULL;
+ }
Index: loop-unroll.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/loop-unroll.c,v
retrieving revision 1.13
diff -c -3 -p -r1.13 loop-unroll.c
*** loop-unroll.c	30 Dec 2003 10:40:54 -0000	1.13
--- loop-unroll.c	15 Feb 2004 22:48:09 -0000
*************** void
*** 85,91 ****
  unroll_and_peel_loops (struct loops *loops, int flags)
  {
    struct loop *loop, *next;
!   int check;
  
    /* First perform complete loop peeling (it is almost surely a win,
       and affects parameters for further decision a lot).  */
--- 85,91 ----
  unroll_and_peel_loops (struct loops *loops, int flags)
  {
    struct loop *loop, *next;
!   bool check;
  
    /* First perform complete loop peeling (it is almost surely a win,
       and affects parameters for further decision a lot).  */
*************** unroll_and_peel_loops (struct loops *loo
*** 110,116 ****
        else
  	next = loop->outer;
  
!       check = 1;
        /* And perform the appropriate transformations.  */
        switch (loop->lpt_decision.decision)
  	{
--- 110,116 ----
        else
  	next = loop->outer;
  
!       check = true;
        /* And perform the appropriate transformations.  */
        switch (loop->lpt_decision.decision)
  	{
*************** unroll_and_peel_loops (struct loops *loo
*** 130,136 ****
  	  unroll_loop_stupid (loops, loop);
  	  break;
  	case LPT_NONE:
! 	  check = 0;
  	  break;
  	default:
  	  abort ();
--- 130,136 ----
  	  unroll_loop_stupid (loops, loop);
  	  break;
  	case LPT_NONE:
! 	  check = false;
  	  break;
  	default:
  	  abort ();
*************** unroll_and_peel_loops (struct loops *loo
*** 144,149 ****
--- 144,172 ----
  	}
        loop = next;
      }
+ 
+   iv_analysis_done ();
+ }
+ 
+ /* Check whether exit of the LOOP is at the end of loop body.  */
+ 
+ static bool
+ loop_exit_at_end_p (struct loop *loop)
+ {
+   struct niter_desc *desc = get_simple_loop_desc (loop);
+   rtx insn;
+ 
+   if (desc->in_edge->dest != loop->latch)
+     return false;
+ 
+   /* Check that the latch is empty.  */
+   FOR_BB_INSNS (loop->latch, insn)
+     {
+       if (INSN_P (insn))
+ 	return false;
+     }
+ 
+   return true;
  }
  
  /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
*************** peel_loops_completely (struct loops *loo
*** 168,177 ****
  	next = loop->outer;
  
        loop->lpt_decision.decision = LPT_NONE;
-       loop->has_desc = 0;
  
        if (rtl_dump_file)
! 	fprintf (rtl_dump_file, ";; Considering loop %d for complete peeling\n",
  		 loop->num);
  
        loop->ninsns = num_loop_insns (loop);
--- 191,199 ----
  	next = loop->outer;
  
        loop->lpt_decision.decision = LPT_NONE;
  
        if (rtl_dump_file)
! 	fprintf (rtl_dump_file, "\n;; *** Considering loop %d for complete peeling ***\n",
  		 loop->num);
  
        loop->ninsns = num_loop_insns (loop);
*************** decide_unrolling_and_peeling (struct loo
*** 216,222 ****
        loop->lpt_decision.decision = LPT_NONE;
  
        if (rtl_dump_file)
! 	fprintf (rtl_dump_file, ";; Considering loop %d\n", loop->num);
  
        /* Do not peel cold areas.  */
        if (!maybe_hot_bb_p (loop->header))
--- 238,244 ----
        loop->lpt_decision.decision = LPT_NONE;
  
        if (rtl_dump_file)
! 	fprintf (rtl_dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
  
        /* Do not peel cold areas.  */
        if (!maybe_hot_bb_p (loop->header))
*************** decide_unrolling_and_peeling (struct loo
*** 269,276 ****
  static void
  decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
  {
    if (rtl_dump_file)
!     fprintf (rtl_dump_file, ";; Considering peeling once rolling loop\n");
  
    /* Is the loop small enough?  */
    if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
--- 291,300 ----
  static void
  decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
  {
+   struct niter_desc *desc;
+ 
    if (rtl_dump_file)
!     fprintf (rtl_dump_file, "\n;; Considering peeling once rolling loop\n");
  
    /* Is the loop small enough?  */
    if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
*************** decide_peel_once_rolling (struct loop *l
*** 281,291 ****
      }
  
    /* Check for simple loops.  */
!   loop->simple = simple_loop_p (loop, &loop->desc);
!   loop->has_desc = 1;
  
    /* Check number of iterations.  */
!   if (!loop->simple || !loop->desc.const_iter || loop->desc.niter != 0)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Unable to prove that the loop rolls exactly once\n");
--- 305,317 ----
      }
  
    /* Check for simple loops.  */
!   desc = get_simple_loop_desc (loop);
  
    /* Check number of iterations.  */
!   if (!desc->simple_p
!       || desc->assumptions
!       || !desc->const_iter
!       || desc->niter != 0)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Unable to prove that the loop rolls exactly once\n");
*************** static void
*** 303,311 ****
  decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
  {
    unsigned npeel;
  
    if (rtl_dump_file)
!     fprintf (rtl_dump_file, ";; Considering peeling completely\n");
  
    /* Skip non-innermost loops.  */
    if (loop->inner)
--- 329,338 ----
  decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
  {
    unsigned npeel;
+   struct niter_desc *desc;
  
    if (rtl_dump_file)
!     fprintf (rtl_dump_file, "\n;; Considering peeling completely\n");
  
    /* Skip non-innermost loops.  */
    if (loop->inner)
*************** decide_peel_completely (struct loop *loo
*** 346,371 ****
      }
  
    /* Check for simple loops.  */
!   if (!loop->has_desc)
!     {
!       loop->simple = simple_loop_p (loop, &loop->desc);
!       loop->has_desc = 1;
!     }
  
    /* Check number of iterations.  */
!   if (!loop->simple || !loop->desc.const_iter)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
        return;
      }
  
!   if (loop->desc.niter > npeel - 1)
      {
        if (rtl_dump_file)
  	{
  	  fprintf (rtl_dump_file, ";; Not peeling loop completely, rolls too much (");
! 	  fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,(HOST_WIDEST_INT) loop->desc.niter);
  	  fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel);
  	}
        return;
--- 373,396 ----
      }
  
    /* Check for simple loops.  */
!   desc = get_simple_loop_desc (loop);
  
    /* Check number of iterations.  */
!   if (!desc->simple_p
!       || desc->assumptions
!       || !desc->const_iter)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
        return;
      }
  
!   if (desc->niter > npeel - 1)
      {
        if (rtl_dump_file)
  	{
  	  fprintf (rtl_dump_file, ";; Not peeling loop completely, rolls too much (");
! 	  fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
  	  fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel);
  	}
        return;
*************** peel_loop_completely (struct loops *loop
*** 397,404 ****
    sbitmap wont_exit;
    unsigned HOST_WIDE_INT npeel;
    unsigned n_remove_edges, i;
!   edge *remove_edges;
!   struct loop_desc *desc = &loop->desc;
  
    npeel = desc->niter;
  
--- 422,429 ----
    sbitmap wont_exit;
    unsigned HOST_WIDE_INT npeel;
    unsigned n_remove_edges, i;
!   edge *remove_edges, ei;
!   struct niter_desc *desc = get_simple_loop_desc (loop);
  
    npeel = desc->niter;
  
*************** peel_loop_completely (struct loops *loop
*** 407,413 ****
        wont_exit = sbitmap_alloc (npeel + 1);
        sbitmap_ones (wont_exit);
        RESET_BIT (wont_exit, 0);
!       if (desc->may_be_zero)
  	RESET_BIT (wont_exit, 1);
  
        remove_edges = xcalloc (npeel, sizeof (edge));
--- 432,438 ----
        wont_exit = sbitmap_alloc (npeel + 1);
        sbitmap_ones (wont_exit);
        RESET_BIT (wont_exit, 0);
!       if (desc->noloop_assumptions)
  	RESET_BIT (wont_exit, 1);
  
        remove_edges = xcalloc (npeel, sizeof (edge));
*************** peel_loop_completely (struct loops *loop
*** 427,445 ****
        free (remove_edges);
      }
  
    /* Now remove the unreachable part of the last iteration and cancel
       the loop.  */
!   remove_path (loops, desc->in_edge);
  
    if (rtl_dump_file)
      fprintf (rtl_dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
  }
  
  /* Decide whether to unroll LOOP iterating constant number of times and how much.  */
  static void
  decide_unroll_constant_iterations (struct loop *loop, int flags)
  {
!   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = -1, n_copies, i;
  
    if (!(flags & UAP_UNROLL))
      {
--- 452,475 ----
        free (remove_edges);
      }
  
+   ei = desc->in_edge;
+   free_simple_loop_desc (loop);
+ 
    /* Now remove the unreachable part of the last iteration and cancel
       the loop.  */
!   remove_path (loops, ei);
  
    if (rtl_dump_file)
      fprintf (rtl_dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
  }
  
  /* Decide whether to unroll LOOP iterating constant number of times and how much.  */
+ 
  static void
  decide_unroll_constant_iterations (struct loop *loop, int flags)
  {
!   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
!   struct niter_desc *desc;
  
    if (!(flags & UAP_UNROLL))
      {
*************** decide_unroll_constant_iterations (struc
*** 448,454 ****
      }
  
    if (rtl_dump_file)
!     fprintf (rtl_dump_file, ";; Considering unrolling loop with constant number of iterations\n");
  
    /* nunroll = total number of copies of the original loop body in
       unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
--- 478,485 ----
      }
  
    if (rtl_dump_file)
!     fprintf (rtl_dump_file,
! 	     "\n;; Considering unrolling loop with constant number of iterations\n");
  
    /* nunroll = total number of copies of the original loop body in
       unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
*************** decide_unroll_constant_iterations (struc
*** 468,481 ****
      }
  
    /* Check for simple loops.  */
!   if (!loop->has_desc)
!     {
!       loop->simple = simple_loop_p (loop, &loop->desc);
!       loop->has_desc = 1;
!     }
  
    /* Check number of iterations.  */
!   if (!loop->simple || !loop->desc.const_iter)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
--- 499,508 ----
      }
  
    /* Check for simple loops.  */
!   desc = get_simple_loop_desc (loop);
  
    /* Check number of iterations.  */
!   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
*************** decide_unroll_constant_iterations (struc
*** 483,489 ****
      }
  
    /* Check whether the loop rolls enough to consider.  */
!   if (loop->desc.niter < 2 * nunroll)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
--- 510,516 ----
      }
  
    /* Check whether the loop rolls enough to consider.  */
!   if (desc->niter < 2 * nunroll)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
*************** decide_unroll_constant_iterations (struc
*** 497,512 ****
    best_copies = 2 * nunroll + 10;
  
    i = 2 * nunroll + 2;
!   if ((unsigned) i - 1 >= loop->desc.niter)
!     i = loop->desc.niter - 2;
  
    for (; i >= nunroll - 1; i--)
      {
!       unsigned exit_mod = loop->desc.niter % (i + 1);
  
!       if (loop->desc.postincr)
  	n_copies = exit_mod + i + 1;
!       else if (exit_mod != (unsigned) i || loop->desc.may_be_zero)
  	n_copies = exit_mod + i + 2;
        else
  	n_copies = i + 1;
--- 524,540 ----
    best_copies = 2 * nunroll + 10;
  
    i = 2 * nunroll + 2;
!   if (i - 1 >= desc->niter)
!     i = desc->niter - 2;
  
    for (; i >= nunroll - 1; i--)
      {
!       unsigned exit_mod = desc->niter % (i + 1);
  
!       if (!loop_exit_at_end_p (loop))
  	n_copies = exit_mod + i + 1;
!       else if (exit_mod != (unsigned) i
! 	       || desc->noloop_assumptions != NULL_RTX)
  	n_copies = exit_mod + i + 2;
        else
  	n_copies = i + 1;
*************** decide_unroll_constant_iterations (struc
*** 524,529 ****
--- 552,562 ----
  
    loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
    loop->lpt_decision.times = best_unroll;
+   
+   if (rtl_dump_file)
+     fprintf (rtl_dump_file,
+ 	     ";; Decided to unroll the constant times rolling loop, %d times.\n",
+ 	     loop->lpt_decision.times);
  }
  
  /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
*************** unroll_loop_constant_iterations (struct 
*** 554,564 ****
    unsigned n_remove_edges, i;
    edge *remove_edges;
    unsigned max_unroll = loop->lpt_decision.times;
!   struct loop_desc *desc = &loop->desc;
  
    niter = desc->niter;
  
!   if (niter <= (unsigned) max_unroll + 1)
      abort ();  /* Should not get here (such loop should be peeled instead).  */
  
    exit_mod = niter % (max_unroll + 1);
--- 587,598 ----
    unsigned n_remove_edges, i;
    edge *remove_edges;
    unsigned max_unroll = loop->lpt_decision.times;
!   struct niter_desc *desc = get_simple_loop_desc (loop);
!   bool exit_at_end = loop_exit_at_end_p (loop);
  
    niter = desc->niter;
  
!   if (niter <= max_unroll + 1)
      abort ();  /* Should not get here (such loop should be peeled instead).  */
  
    exit_mod = niter % (max_unroll + 1);
*************** unroll_loop_constant_iterations (struct 
*** 569,577 ****
    remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
    n_remove_edges = 0;
  
!   if (desc->postincr)
      {
!       /* Counter is incremented after the exit test; leave exit test
  	 in the first copy, so that the loops that start with test
  	 of exit condition have continuous body after unrolling.  */
  
--- 603,611 ----
    remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
    n_remove_edges = 0;
  
!   if (!exit_at_end)
      {
!       /* The exit is not at the end of the loop; leave exit test
  	 in the first copy, so that the loops that start with test
  	 of exit condition have continuous body after unrolling.  */
  
*************** unroll_loop_constant_iterations (struct 
*** 580,594 ****
  
        /* Peel exit_mod iterations.  */
        RESET_BIT (wont_exit, 0);
!       if (desc->may_be_zero)
  	RESET_BIT (wont_exit, 1);
  
!       if (exit_mod
! 	  && !duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
! 		loops, exit_mod,
! 		wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
! 		DLTHE_FLAG_UPDATE_FREQ))
! 	abort ();
  
        SET_BIT (wont_exit, 1);
      }
--- 614,635 ----
  
        /* Peel exit_mod iterations.  */
        RESET_BIT (wont_exit, 0);
!       if (desc->noloop_assumptions)
  	RESET_BIT (wont_exit, 1);
  
!       if (exit_mod)
! 	{
! 	  if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
! 					      loops, exit_mod,
! 					      wont_exit, desc->out_edge,
! 					      remove_edges, &n_remove_edges,
! 					      DLTHE_FLAG_UPDATE_FREQ))
! 	    abort ();
! 
! 	  desc->noloop_assumptions = NULL_RTX;
! 	  desc->niter -= exit_mod;
! 	  desc->niter_max -= exit_mod;
! 	}
  
        SET_BIT (wont_exit, 1);
      }
*************** unroll_loop_constant_iterations (struct 
*** 602,613 ****
  
        /* We know that niter >= max_unroll + 2; so we do not need to care of
  	 case when we would exit before reaching the loop.  So just peel
! 	 exit_mod + 1 iterations.
! 	 */
!       if (exit_mod != (unsigned) max_unroll || desc->may_be_zero)
  	{
  	  RESET_BIT (wont_exit, 0);
! 	  if (desc->may_be_zero)
  	    RESET_BIT (wont_exit, 1);
  
  	  if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
--- 643,654 ----
  
        /* We know that niter >= max_unroll + 2; so we do not need to care of
  	 case when we would exit before reaching the loop.  So just peel
! 	 exit_mod + 1 iterations.  */
!       if (exit_mod != max_unroll
! 	  || desc->noloop_assumptions)
  	{
  	  RESET_BIT (wont_exit, 0);
! 	  if (desc->noloop_assumptions)
  	    RESET_BIT (wont_exit, 1);
  
  	  if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
*************** unroll_loop_constant_iterations (struct 
*** 616,621 ****
--- 657,666 ----
  		DLTHE_FLAG_UPDATE_FREQ))
  	    abort ();
  
+ 	  desc->niter -= exit_mod + 1;
+ 	  desc->niter_max -= exit_mod + 1;
+ 	  desc->noloop_assumptions = NULL_RTX;
+ 
  	  SET_BIT (wont_exit, 0);
  	  SET_BIT (wont_exit, 1);
  	}
*************** unroll_loop_constant_iterations (struct 
*** 632,637 ****
--- 677,703 ----
  
    free (wont_exit);
  
+   if (exit_at_end)
+     {
+       basic_block exit_block = desc->in_edge->src->rbi->copy;
+       /* Find a new in and out edge; they are in the last copy we have made.  */
+       
+       if (exit_block->succ->dest == desc->out_edge->dest)
+ 	{
+ 	  desc->out_edge = exit_block->succ;
+ 	  desc->in_edge = exit_block->succ->succ_next;
+ 	}
+       else
+ 	{
+ 	  desc->out_edge = exit_block->succ->succ_next;
+ 	  desc->in_edge = exit_block->succ;
+ 	}
+     }
+ 
+   desc->niter /= max_unroll + 1;
+   desc->niter_max /= max_unroll + 1;
+   desc->niter_expr = GEN_INT (desc->niter);
+ 
    /* Remove the edges.  */
    for (i = 0; i < n_remove_edges; i++)
      remove_path (loops, remove_edges[i]);
*************** static void
*** 647,652 ****
--- 713,719 ----
  decide_unroll_runtime_iterations (struct loop *loop, int flags)
  {
    unsigned nunroll, nunroll_by_av, i;
+   struct niter_desc *desc;
  
    if (!(flags & UAP_UNROLL))
      {
*************** decide_unroll_runtime_iterations (struct
*** 655,661 ****
      }
  
    if (rtl_dump_file)
!     fprintf (rtl_dump_file, ";; Considering unrolling loop with runtime computable number of iterations\n");
  
    /* nunroll = total number of copies of the original loop body in
       unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
--- 722,729 ----
      }
  
    if (rtl_dump_file)
!     fprintf (rtl_dump_file,
! 	     "\n;; Considering unrolling loop with runtime computable number of iterations\n");
  
    /* nunroll = total number of copies of the original loop body in
       unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
*************** decide_unroll_runtime_iterations (struct
*** 675,695 ****
      }
  
    /* Check for simple loops.  */
!   if (!loop->has_desc)
!     {
!       loop->simple = simple_loop_p (loop, &loop->desc);
!       loop->has_desc = 1;
!     }
  
    /* Check simpleness.  */
!   if (!loop->simple)
      {
        if (rtl_dump_file)
! 	fprintf (rtl_dump_file, ";; Unable to prove that the number of iterations can be counted in runtime\n");
        return;
      }
  
!   if (loop->desc.const_iter)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
--- 743,760 ----
      }
  
    /* Check for simple loops.  */
!   desc = get_simple_loop_desc (loop);
  
    /* Check simpleness.  */
!   if (!desc->simple_p || desc->assumptions)
      {
        if (rtl_dump_file)
! 	fprintf (rtl_dump_file,
! 		 ";; Unable to prove that the number of iterations can be counted in runtime\n");
        return;
      }
  
!   if (desc->const_iter)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
*************** decide_unroll_runtime_iterations (struct
*** 706,715 ****
  
    /* Success; now force nunroll to be power of 2, as we are unable to
       cope with overflows in computation of number of iterations.  */
!   for (i = 1; 2 * i <= nunroll; i *= 2);
  
    loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
    loop->lpt_decision.times = i - 1;
  }
  
  /* Unroll LOOP for that we are able to count number of iterations in runtime
--- 771,786 ----
  
    /* Success; now force nunroll to be power of 2, as we are unable to
       cope with overflows in computation of number of iterations.  */
!   for (i = 1; 2 * i <= nunroll; i *= 2)
!     continue;
  
    loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
    loop->lpt_decision.times = i - 1;
+   
+   if (rtl_dump_file)
+     fprintf (rtl_dump_file,
+ 	     ";; Decided to unroll the runtime computable times rolling loop, %d times.\n",
+ 	     loop->lpt_decision.times);
  }
  
  /* Unroll LOOP for that we are able to count number of iterations in runtime
*************** decide_unroll_runtime_iterations (struct
*** 746,752 ****
  static void
  unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
  {
!   rtx niter, init_code, branch_code, jump, label;
    unsigned i, j, p;
    basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
    unsigned n_dom_bbs;
--- 817,823 ----
  static void
  unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
  {
!   rtx old_niter, niter, init_code, branch_code, tmp;
    unsigned i, j, p;
    basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
    unsigned n_dom_bbs;
*************** unroll_loop_runtime_iterations (struct l
*** 756,762 ****
    edge *remove_edges, e;
    bool extra_zero_check, last_may_exit;
    unsigned max_unroll = loop->lpt_decision.times;
!   struct loop_desc *desc = &loop->desc;
  
    /* Remember blocks whose dominators will have to be updated.  */
    dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
--- 827,834 ----
    edge *remove_edges, e;
    bool extra_zero_check, last_may_exit;
    unsigned max_unroll = loop->lpt_decision.times;
!   struct niter_desc *desc = get_simple_loop_desc (loop);
!   bool exit_at_end = loop_exit_at_end_p (loop);
  
    /* Remember blocks whose dominators will have to be updated.  */
    dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
*************** unroll_loop_runtime_iterations (struct l
*** 777,783 ****
      }
    free (body);
  
!   if (desc->postincr)
      {
        /* Leave exit in first copy (for explanation why see comment in
  	 unroll_loop_constant_iterations).  */
--- 849,855 ----
      }
    free (body);
  
!   if (!exit_at_end)
      {
        /* Leave exit in first copy (for explanation why see comment in
  	 unroll_loop_constant_iterations).  */
*************** unroll_loop_runtime_iterations (struct l
*** 798,812 ****
  
    /* Get expression for number of iterations.  */
    start_sequence ();
!   niter = count_loop_iterations (desc, NULL, NULL);
!   if (!niter)
!     abort ();
!   niter = force_operand (niter, NULL);
  
    /* Count modulo by ANDing it with max_unroll; we use the fact that
       the number of unrollings is a power of two, and thus this is correct
       even if there is overflow in the computation.  */
!   niter = expand_simple_binop (GET_MODE (desc->var), AND,
  			       niter,
  			       GEN_INT (max_unroll),
  			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
--- 870,884 ----
  
    /* Get expression for number of iterations.  */
    start_sequence ();
!   old_niter = niter = gen_reg_rtx (desc->mode);
!   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
!   if (tmp != niter)
!     emit_move_insn (niter, tmp);
  
    /* Count modulo by ANDing it with max_unroll; we use the fact that
       the number of unrollings is a power of two, and thus this is correct
       even if there is overflow in the computation.  */
!   niter = expand_simple_binop (desc->mode, AND,
  			       niter,
  			       GEN_INT (max_unroll),
  			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
*************** unroll_loop_runtime_iterations (struct l
*** 824,833 ****
  
    /* Peel the first copy of loop body (almost always we must leave exit test
       here; the only exception is when we have extra zero check and the number
!      of iterations is reliable (i.e. comes out of NE condition).  Also record
!      the place of (possible) extra zero check.  */
    sbitmap_zero (wont_exit);
!   if (extra_zero_check && desc->cond == NE)
      SET_BIT (wont_exit, 1);
    ezc_swtch = loop_preheader_edge (loop)->src;
    if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
--- 896,906 ----
  
    /* Peel the first copy of loop body (almost always we must leave exit test
       here; the only exception is when we have extra zero check and the number
!      of iterations is reliable.  Also record the place of (possible) extra
!      zero check.  */
    sbitmap_zero (wont_exit);
!   if (extra_zero_check
!       && !desc->noloop_assumptions)
      SET_BIT (wont_exit, 1);
    ezc_swtch = loop_preheader_edge (loop)->src;
    if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
*************** unroll_loop_runtime_iterations (struct l
*** 857,876 ****
        p = REG_BR_PROB_BASE / (i + 2);
  
        preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
!       label = block_label (preheader);
!       start_sequence ();
!       do_compare_rtx_and_jump (copy_rtx (niter), GEN_INT (j), EQ, 0,
! 			       GET_MODE (desc->var), NULL_RTX, NULL_RTX,
! 			       label);
!       jump = get_last_insn ();
!       JUMP_LABEL (jump) = label;
!       REG_NOTES (jump)
! 	      = gen_rtx_EXPR_LIST (REG_BR_PROB,
! 				   GEN_INT (p), REG_NOTES (jump));
! 
!       LABEL_NUSES (label)++;
!       branch_code = get_insns ();
!       end_sequence ();
  
        swtch = loop_split_edge_with (swtch->pred, branch_code);
        set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
--- 930,937 ----
        p = REG_BR_PROB_BASE / (i + 2);
  
        preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
!       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
! 					  block_label (preheader), p, NULL_RTX);
  
        swtch = loop_split_edge_with (swtch->pred, branch_code);
        set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
*************** unroll_loop_runtime_iterations (struct l
*** 886,905 ****
        p = REG_BR_PROB_BASE / (max_unroll + 1);
        swtch = ezc_swtch;
        preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
!       label = block_label (preheader);
!       start_sequence ();
!       do_compare_rtx_and_jump (copy_rtx (niter), const0_rtx, EQ, 0,
! 			       GET_MODE (desc->var), NULL_RTX, NULL_RTX,
! 			       label);
!       jump = get_last_insn ();
!       JUMP_LABEL (jump) = label;
!       REG_NOTES (jump)
! 	      = gen_rtx_EXPR_LIST (REG_BR_PROB,
! 				   GEN_INT (p), REG_NOTES (jump));
! 
!       LABEL_NUSES (label)++;
!       branch_code = get_insns ();
!       end_sequence ();
  
        swtch = loop_split_edge_with (swtch->succ, branch_code);
        set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
--- 947,954 ----
        p = REG_BR_PROB_BASE / (max_unroll + 1);
        swtch = ezc_swtch;
        preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
!       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
! 					  block_label (preheader), p, NULL_RTX);
  
        swtch = loop_split_edge_with (swtch->succ, branch_code);
        set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
*************** unroll_loop_runtime_iterations (struct l
*** 925,935 ****
--- 974,1018 ----
  
    free (wont_exit);
  
+   if (exit_at_end)
+     {
+       basic_block exit_block = desc->in_edge->src->rbi->copy;
+       /* Find a new in and out edge; they are in the last copy we have made.  */
+       
+       if (exit_block->succ->dest == desc->out_edge->dest)
+ 	{
+ 	  desc->out_edge = exit_block->succ;
+ 	  desc->in_edge = exit_block->succ->succ_next;
+ 	}
+       else
+ 	{
+ 	  desc->out_edge = exit_block->succ->succ_next;
+ 	  desc->in_edge = exit_block->succ;
+ 	}
+     }
+ 
    /* Remove the edges.  */
    for (i = 0; i < n_remove_edges; i++)
      remove_path (loops, remove_edges[i]);
    free (remove_edges);
  
+   /* We must be careful when updating the number of iterations due to
+      preconditioning and the fact that the value must be valid at entry
+      of the loop.  After passing through the above code, we see that
+      the correct new number of iterations is this:  */
+   if (desc->const_iter)
+     abort ();
+   desc->niter_expr =
+     simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
+   desc->niter_max /= max_unroll + 1;
+   if (exit_at_end)
+     {
+       desc->niter_expr =
+ 	simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
+       desc->noloop_assumptions = NULL_RTX;
+       desc->niter_max--;
+     }
+ 
    if (rtl_dump_file)
      fprintf (rtl_dump_file,
  	     ";; Unrolled loop %d times, counting # of iterations in runtime, %i insns\n",
*************** static void
*** 941,946 ****
--- 1024,1030 ----
  decide_peel_simple (struct loop *loop, int flags)
  {
    unsigned npeel;
+   struct niter_desc *desc;
  
    if (!(flags & UAP_PEEL))
      {
*************** decide_peel_simple (struct loop *loop, i
*** 949,955 ****
      }
  
    if (rtl_dump_file)
!     fprintf (rtl_dump_file, ";; Considering simply peeling loop\n");
  
    /* npeel = number of iterations to peel.  */
    npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
--- 1033,1039 ----
      }
  
    if (rtl_dump_file)
!     fprintf (rtl_dump_file, "\n;; Considering simply peeling loop\n");
  
    /* npeel = number of iterations to peel.  */
    npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
*************** decide_peel_simple (struct loop *loop, i
*** 965,978 ****
      }
  
    /* Check for simple loops.  */
!   if (!loop->has_desc)
!     {
!       loop->simple = simple_loop_p (loop, &loop->desc);
!       loop->has_desc = 1;
!     }
  
    /* Check number of iterations.  */
!   if (loop->simple && loop->desc.const_iter)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
--- 1049,1058 ----
      }
  
    /* Check for simple loops.  */
!   desc = get_simple_loop_desc (loop);
  
    /* Check number of iterations.  */
!   if (desc->simple_p && !desc->assumptions && desc->const_iter)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
*************** decide_peel_simple (struct loop *loop, i
*** 981,987 ****
  
    /* Do not simply peel loops with branches inside -- it increases number
       of mispredicts.  */
!   if (loop->desc.n_branches > 1)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Not peeling, contains branches\n");
--- 1061,1067 ----
  
    /* Do not simply peel loops with branches inside -- it increases number
       of mispredicts.  */
!   if (num_loop_branches (loop) > 1)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Not peeling, contains branches\n");
*************** decide_peel_simple (struct loop *loop, i
*** 1016,1021 ****
--- 1096,1105 ----
    /* Success.  */
    loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
    loop->lpt_decision.times = npeel;
+       
+   if (rtl_dump_file)
+     fprintf (rtl_dump_file, ";; Decided to simply peel the loop, %d times.\n",
+ 	     loop->lpt_decision.times);
  }
  
  /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
*************** peel_loop_simple (struct loops *loops, s
*** 1037,1042 ****
--- 1121,1127 ----
  {
    sbitmap wont_exit;
    unsigned npeel = loop->lpt_decision.times;
+   struct niter_desc *desc = get_simple_loop_desc (loop);
  
    wont_exit = sbitmap_alloc (npeel + 1);
    sbitmap_zero (wont_exit);
*************** peel_loop_simple (struct loops *loops, s
*** 1048,1053 ****
--- 1133,1155 ----
  
    free (wont_exit);
  
+   if (desc->simple_p)
+     {
+       if (desc->const_iter)
+ 	{
+ 	  desc->niter -= npeel;
+ 	  desc->niter_expr = GEN_INT (desc->niter);
+ 	  desc->noloop_assumptions = NULL_RTX;
+ 	}
+       else
+ 	{
+ 	  /* We cannot just update niter_expr, as its value might be clobbered
+ 	     inside loop.  We could handle this by counting the number into
+ 	     temporary just like we do in runtime unrolling, but it does not
+ 	     seem worthwhile.  */
+ 	  free_simple_loop_desc (loop);
+ 	}
+     }
    if (rtl_dump_file)
      fprintf (rtl_dump_file, ";; Peeling loop %d times\n", npeel);
  }
*************** static void
*** 1057,1062 ****
--- 1159,1165 ----
  decide_unroll_stupid (struct loop *loop, int flags)
  {
    unsigned nunroll, nunroll_by_av, i;
+   struct niter_desc *desc;
  
    if (!(flags & UAP_UNROLL_ALL))
      {
*************** decide_unroll_stupid (struct loop *loop,
*** 1065,1071 ****
      }
  
    if (rtl_dump_file)
!     fprintf (rtl_dump_file, ";; Considering unrolling loop stupidly\n");
  
    /* nunroll = total number of copies of the original loop body in
       unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
--- 1168,1174 ----
      }
  
    if (rtl_dump_file)
!     fprintf (rtl_dump_file, "\n;; Considering unrolling loop stupidly\n");
  
    /* nunroll = total number of copies of the original loop body in
       unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
*************** decide_unroll_stupid (struct loop *loop,
*** 1085,1098 ****
      }
  
    /* Check for simple loops.  */
!   if (!loop->has_desc)
!     {
!       loop->simple = simple_loop_p (loop, &loop->desc);
!       loop->has_desc = 1;
!     }
  
    /* Check simpleness.  */
!   if (loop->simple)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; The loop is simple\n");
--- 1188,1197 ----
      }
  
    /* Check for simple loops.  */
!   desc = get_simple_loop_desc (loop);
  
    /* Check simpleness.  */
!   if (desc->simple_p && !desc->assumptions)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; The loop is simple\n");
*************** decide_unroll_stupid (struct loop *loop,
*** 1101,1107 ****
  
    /* Do not unroll loops with branches inside -- it increases number
       of mispredicts.  */
!   if (loop->desc.n_branches > 1)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Not unrolling, contains branches\n");
--- 1200,1206 ----
  
    /* Do not unroll loops with branches inside -- it increases number
       of mispredicts.  */
!   if (num_loop_branches (loop) > 1)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Not unrolling, contains branches\n");
*************** decide_unroll_stupid (struct loop *loop,
*** 1109,1115 ****
      }
  
    /* If we have profile feedback, check whether the loop rolls.  */
!   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
--- 1208,1215 ----
      }
  
    /* If we have profile feedback, check whether the loop rolls.  */
!   if (loop->header->count
!       && expected_loop_iterations (loop) < 2 * nunroll)
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
*************** decide_unroll_stupid (struct loop *loop,
*** 1119,1128 ****
    /* Success.  Now force nunroll to be power of 2, as it seems that this
       improves results (partially because of better alignments, partially
       because of some dark magic).  */
!   for (i = 1; 2 * i <= nunroll; i *= 2);
  
    loop->lpt_decision.decision = LPT_UNROLL_STUPID;
    loop->lpt_decision.times = i - 1;
  }
  
  /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
--- 1219,1234 ----
    /* Success.  Now force nunroll to be power of 2, as it seems that this
       improves results (partially because of better alignments, partially
       because of some dark magic).  */
!   for (i = 1; 2 * i <= nunroll; i *= 2)
!     continue;
  
    loop->lpt_decision.decision = LPT_UNROLL_STUPID;
    loop->lpt_decision.times = i - 1;
+       
+   if (rtl_dump_file)
+     fprintf (rtl_dump_file,
+ 	     ";; Decided to unroll the loop stupidly, %d times.\n",
+ 	     loop->lpt_decision.times);
  }
  
  /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
*************** unroll_loop_stupid (struct loops *loops,
*** 1147,1152 ****
--- 1253,1259 ----
  {
    sbitmap wont_exit;
    unsigned nunroll = loop->lpt_decision.times;
+   struct niter_desc *desc = get_simple_loop_desc (loop);
  
    wont_exit = sbitmap_alloc (nunroll + 1);
    sbitmap_zero (wont_exit);
*************** unroll_loop_stupid (struct loops *loops,
*** 1157,1162 ****
--- 1264,1280 ----
      abort ();
  
    free (wont_exit);
+ 
+   if (desc->simple_p)
+     {
+       /* We indeed may get here provided that there are nontrivial assumptions
+ 	 for a loop to be really simple.  We could update the counts, but the
+ 	 problem is that we are unable to decide which exit will be taken
+ 	 (not really true in case the number of iterations is constant,
+ 	 but noone will do anything with this information, so we do not
+ 	 worry about it).  */
+       desc->simple_p = false;
+     }
  
    if (rtl_dump_file)
      fprintf (rtl_dump_file, ";; Unrolled loop %d times, %i insns\n",
Index: loop-unswitch.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/loop-unswitch.c,v
retrieving revision 1.13
diff -c -3 -p -r1.13 loop-unswitch.c
*** loop-unswitch.c	31 Jan 2004 02:06:48 -0000	1.13
--- loop-unswitch.c	15 Feb 2004 22:48:09 -0000
*************** Software Foundation, 59 Temple Place - S
*** 79,89 ****
    with handling this case.  */
  
  static struct loop *unswitch_loop (struct loops *, struct loop *,
! 				   basic_block);
  static void unswitch_single_loop (struct loops *, struct loop *, rtx, int);
! static bool may_unswitch_on_p (basic_block, struct loop *,
! 			       basic_block *);
! static rtx reversed_condition (rtx);
  
  /* Main entry point.  Perform loop unswitching on all suitable LOOPS.  */
  void
--- 79,141 ----
    with handling this case.  */
  
  static struct loop *unswitch_loop (struct loops *, struct loop *,
! 				   basic_block, rtx, rtx);
  static void unswitch_single_loop (struct loops *, struct loop *, rtx, int);
! static rtx may_unswitch_on (basic_block, struct loop *, rtx *);
! 
! /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
!    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
!    in order to create a jump.  */
! 
! rtx
! compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
! 		      rtx cinsn)
! {
!   rtx seq, jump, cond;
!   enum machine_mode mode;
! 
!   mode = GET_MODE (op0);
!   if (mode == VOIDmode)
!     mode = GET_MODE (op1);
! 
!   start_sequence ();
!   if (GET_MODE_CLASS (mode) == MODE_CC)
!     {
!       /* A hack -- there seems to be no easy generic way how to make a
! 	 conditional jump from a ccmode comparison.  */
!       if (!cinsn)
! 	abort ();
!       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
!       if (GET_CODE (cond) != comp
! 	  || !rtx_equal_p (op0, XEXP (cond, 0))
! 	  || !rtx_equal_p (op1, XEXP (cond, 1)))
! 	abort ();
!       emit_jump_insn (copy_insn (PATTERN (cinsn)));
!       jump = get_last_insn ();
!       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
!       LABEL_NUSES (JUMP_LABEL (jump))++;
!       redirect_jump (jump, label, 0);
!     }
!   else
!     {
!       if (cinsn)
! 	abort ();
! 
!       op0 = force_operand (op0, NULL_RTX);
!       op1 = force_operand (op1, NULL_RTX);
!       do_compare_rtx_and_jump (op0, op1, comp, 0,
! 			       mode, NULL_RTX, NULL_RTX, label);
!       jump = get_last_insn ();
!       JUMP_LABEL (jump) = label;
!       LABEL_NUSES (label)++;
!     }
!   REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob),
! 					REG_NOTES (jump));
!   seq = get_insns ();
!   end_sequence ();
! 
!   return seq;
! }
  
  /* Main entry point.  Perform loop unswitching on all suitable LOOPS.  */
  void
*************** unswitch_loops (struct loops *loops)
*** 111,158 ****
        verify_loop_structure (loops);
  #endif
      }
  }
  
  /* Checks whether we can unswitch LOOP on condition at end of BB -- one of its
!    basic blocks (for what it means see comments below).  List of basic blocks
!    inside LOOP is provided in BODY to save time.  */
! static bool
! may_unswitch_on_p (basic_block bb, struct loop *loop, basic_block *body)
  {
!   rtx test;
    unsigned i;
  
    /* BB must end in a simple conditional jump.  */
    if (!bb->succ || !bb->succ->succ_next || bb->succ->succ_next->succ_next)
!     return false;
    if (!any_condjump_p (BB_END (bb)))
!     return false;
  
    /* With branches inside loop.  */
    if (!flow_bb_inside_loop_p (loop, bb->succ->dest)
        || !flow_bb_inside_loop_p (loop, bb->succ->succ_next->dest))
!     return false;
  
    /* It must be executed just once each iteration (because otherwise we
       are unable to update dominator/irreducible loop information correctly).  */
    if (!just_once_each_iteration_p (loop, bb))
!     return false;
  
!   /* Condition must be invariant.  We use just a stupid test of invariantness
!      of the condition: all used regs must not be modified inside loop body.  */
!   test = get_condition (BB_END (bb), NULL, true);
    if (!test)
!     return false;
  
!   for (i = 0; i < loop->num_nodes; i++)
!     if (modified_between_p (test, BB_HEAD (body[i]), NEXT_INSN (BB_END (body[i]))))
!       return false;
  
!   return true;
  }
  
  /* Reverses CONDition; returns NULL if we cannot.  */
! static rtx
  reversed_condition (rtx cond)
  {
    enum rtx_code reversed;
--- 163,244 ----
        verify_loop_structure (loops);
  #endif
      }
+ 
+   iv_analysis_done ();
  }
  
  /* Checks whether we can unswitch LOOP on condition at end of BB -- one of its
!    basic blocks (for what it means see comments below).  In case condition
!    compares loop invariant cc mode register, return the jump in CINSN.  */
! 
! static rtx
! may_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn)
  {
!   rtx test, at, insn, op[2];
!   struct rtx_iv iv;
    unsigned i;
+   enum machine_mode mode;
  
    /* BB must end in a simple conditional jump.  */
    if (!bb->succ || !bb->succ->succ_next || bb->succ->succ_next->succ_next)
!     return NULL_RTX;
    if (!any_condjump_p (BB_END (bb)))
!     return NULL_RTX;
  
    /* With branches inside loop.  */
    if (!flow_bb_inside_loop_p (loop, bb->succ->dest)
        || !flow_bb_inside_loop_p (loop, bb->succ->succ_next->dest))
!     return NULL_RTX;
  
    /* It must be executed just once each iteration (because otherwise we
       are unable to update dominator/irreducible loop information correctly).  */
    if (!just_once_each_iteration_p (loop, bb))
!     return NULL_RTX;
  
!   /* Condition must be invariant.  */
!   test = get_condition (BB_END (bb), &at, true);
    if (!test)
!     return NULL_RTX;
! 
!   for (i = 0; i < 2; i++)
!     {
!       op[i] = XEXP (test, i);
! 
!       if (CONSTANT_P (op[i]))
! 	continue;
! 
!       insn = iv_get_reaching_def (at, op[i]);
!       if (!iv_analyse (insn, op[i], &iv))
! 	return NULL_RTX;
!       if (iv.step != const0_rtx
! 	  || iv.first_special)
! 	return NULL_RTX;
! 
!       op[i] = get_iv_value (&iv, const0_rtx);
!     }
! 
!   mode = GET_MODE (op[0]);
!   if (mode == VOIDmode)
!     mode = GET_MODE (op[1]);
!   if (GET_MODE_CLASS (mode) == MODE_CC)
!     {
!       if (at != BB_END (bb))
! 	return NULL_RTX;
  
!       *cinsn = BB_END (bb);
!       if (!rtx_equal_p (op[0], XEXP (test, 0))
! 	  || !rtx_equal_p (op[1], XEXP (test, 1)))
! 	return NULL_RTX;
  
!       return test;
!     }
! 
!   return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode,
! 					  op[0], op[1]));
  }
  
  /* Reverses CONDition; returns NULL if we cannot.  */
! rtx
  reversed_condition (rtx cond)
  {
    enum rtx_code reversed;
*************** static void
*** 173,185 ****
  unswitch_single_loop (struct loops *loops, struct loop *loop,
  		      rtx cond_checked, int num)
  {
!   basic_block *bbs, bb;
    struct loop *nloop;
    unsigned i;
!   int true_first;
!   rtx cond, rcond, conds, rconds, acond, split_before;
!   int always_true;
!   int always_false;
    int repeat;
    edge e;
  
--- 259,268 ----
  unswitch_single_loop (struct loops *loops, struct loop *loop,
  		      rtx cond_checked, int num)
  {
!   basic_block *bbs;
    struct loop *nloop;
    unsigned i;
!   rtx cond, rcond, conds, rconds, acond, cinsn = NULL_RTX;
    int repeat;
    edge e;
  
*************** unswitch_single_loop (struct loops *loop
*** 237,244 ****
  
        /* Find a bb to unswitch on.  */
        bbs = get_loop_body (loop);
        for (i = 0; i < loop->num_nodes; i++)
! 	if (may_unswitch_on_p (bbs[i], loop, bbs))
  	  break;
  
        if (i == loop->num_nodes)
--- 320,328 ----
  
        /* Find a bb to unswitch on.  */
        bbs = get_loop_body (loop);
+       iv_analysis_loop_init (loop);
        for (i = 0; i < loop->num_nodes; i++)
! 	if ((cond = may_unswitch_on (bbs[i], loop, &cinsn)))
  	  break;
  
        if (i == loop->num_nodes)
*************** unswitch_single_loop (struct loops *loop
*** 247,285 ****
  	  return;
  	}
  
-       if (!(cond = get_condition (BB_END (bbs[i]), &split_before, true)))
- 	abort ();
        rcond = reversed_condition (cond);
  
        /* Check whether the result can be predicted.  */
-       always_true = 0;
-       always_false = 0;
        for (acond = cond_checked; acond; acond = XEXP (acond, 1))
! 	{
! 	  if (rtx_equal_p (cond, XEXP (acond, 0)))
! 	    {
! 	      always_true = 1;
! 	      break;
! 	    }
! 	  if (rtx_equal_p (rcond, XEXP (acond, 0)))
! 	    {
! 	      always_false = 1;
! 	      break;
! 	    }
! 	}
  
!       if (always_true)
  	{
  	  /* Remove false path.  */
! 	  for (e = bbs[i]->succ; !(e->flags & EDGE_FALLTHRU); e = e->succ_next);
  	  remove_path (loops, e);
  	  free (bbs);
  	  repeat = 1;
  	}
!       else if (always_false)
  	{
  	  /* Remove true path.  */
! 	  for (e = bbs[i]->succ; e->flags & EDGE_FALLTHRU; e = e->succ_next);
  	  remove_path (loops, e);
  	  free (bbs);
  	  repeat = 1;
--- 331,356 ----
  	  return;
  	}
  
        rcond = reversed_condition (cond);
+       if (rcond)
+ 	rcond = canon_condition (rcond);
  
        /* Check whether the result can be predicted.  */
        for (acond = cond_checked; acond; acond = XEXP (acond, 1))
! 	simplify_using_condition (XEXP (acond, 0), &cond, NULL);
  
!       if (cond == const_true_rtx)
  	{
  	  /* Remove false path.  */
! 	  e = FALLTHRU_EDGE (bbs[i]);
  	  remove_path (loops, e);
  	  free (bbs);
  	  repeat = 1;
  	}
!       else if (cond == const0_rtx)
  	{
  	  /* Remove true path.  */
! 	  e = BRANCH_EDGE (bbs[i]);
  	  remove_path (loops, e);
  	  free (bbs);
  	  repeat = 1;
*************** unswitch_single_loop (struct loops *loop
*** 293,313 ****
    else
      rconds = cond_checked;
  
-   /* Separate condition in a single basic block.  */
-   bb = split_loop_bb (bbs[i], PREV_INSN (split_before))->dest;
-   free (bbs);
-   true_first = !(bb->succ->flags & EDGE_FALLTHRU);
    if (rtl_dump_file)
      fprintf (rtl_dump_file, ";; Unswitching loop\n");
  
    /* Unswitch the loop on this condition.  */
!   nloop = unswitch_loop (loops, loop, bb);
    if (!nloop)
    abort ();
  
    /* Invoke itself on modified loops.  */
!   unswitch_single_loop (loops, nloop, true_first ? conds : rconds, num + 1);
!   unswitch_single_loop (loops, loop, true_first ? rconds : conds, num + 1);
  
    free_EXPR_LIST_node (conds);
    if (rcond)
--- 364,380 ----
    else
      rconds = cond_checked;
  
    if (rtl_dump_file)
      fprintf (rtl_dump_file, ";; Unswitching loop\n");
  
    /* Unswitch the loop on this condition.  */
!   nloop = unswitch_loop (loops, loop, bbs[i], cond, cinsn);
    if (!nloop)
    abort ();
  
    /* Invoke itself on modified loops.  */
!   unswitch_single_loop (loops, nloop, rconds, num + 1);
!   unswitch_single_loop (loops, loop, conds, num + 1);
  
    free_EXPR_LIST_node (conds);
    if (rcond)
*************** unswitch_single_loop (struct loops *loop
*** 316,332 ****
  
  /* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON.  We only support
     unswitching of innermost loops.  UNSWITCH_ON must be executed in every
!    iteration, i.e. it must dominate LOOP latch, and should only contain code
!    for the condition we unswitch on.  Returns NULL if impossible, new
!    loop otherwise.  */
  static struct loop *
! unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
  {
!   edge entry, latch_edge;
    basic_block switch_bb, unswitch_on_alt, src;
    struct loop *nloop;
    sbitmap zero_bitmap;
!   int irred_flag;
  
    /* Some sanity checking.  */
    if (!flow_bb_inside_loop_p (loop, unswitch_on))
--- 383,403 ----
  
  /* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON.  We only support
     unswitching of innermost loops.  UNSWITCH_ON must be executed in every
!    iteration, i.e. it must dominate LOOP latch.  COND is the condition
!    determining which loop is entered.  Returns NULL if impossible, new loop
!    otherwise.  The new loop is entered if COND is true.  If CINSN is not
!    NULL, it is the insn in that COND is compared.  */
! 
  static struct loop *
! unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on,
! 	       rtx cond, rtx cinsn)
  {
!   edge entry, latch_edge, true_edge, false_edge, e;
    basic_block switch_bb, unswitch_on_alt, src;
    struct loop *nloop;
    sbitmap zero_bitmap;
!   int irred_flag, prob;
!   rtx seq;
  
    /* Some sanity checking.  */
    if (!flow_bb_inside_loop_p (loop, unswitch_on))
*************** unswitch_loop (struct loops *loops, stru
*** 343,354 ****
    if (!flow_bb_inside_loop_p (loop, unswitch_on->succ->succ_next->dest))
      abort ();
  
-   /* Will we be able to perform redirection?  */
-   if (!any_condjump_p (BB_END (unswitch_on)))
-     return NULL;
-   if (!cfg_layout_can_duplicate_bb_p (unswitch_on))
-     return NULL;
- 
    entry = loop_preheader_edge (loop);
  
    /* Make a copy.  */
--- 414,419 ----
*************** unswitch_loop (struct loops *loops, stru
*** 365,374 ****
  
    /* Record the block with condition we unswitch on.  */
    unswitch_on_alt = unswitch_on->rbi->copy;
  
-   /* Make a copy of the block containing the condition; we will use
-      it as switch to decide which loop we want to use.  */
-   switch_bb = cfg_layout_duplicate_bb (unswitch_on, NULL);
    if (irred_flag)
      {
        switch_bb->flags |= BB_IRREDUCIBLE_LOOP;
--- 430,453 ----
  
    /* Record the block with condition we unswitch on.  */
    unswitch_on_alt = unswitch_on->rbi->copy;
+   true_edge = BRANCH_EDGE (unswitch_on_alt);
+   false_edge = FALLTHRU_EDGE (unswitch_on);
+   latch_edge = loop->latch->rbi->copy->succ;
+ 
+   /* Create a block with the condition.  */
+   prob = true_edge->probability;
+   switch_bb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
+   seq = compare_and_jump_seq (XEXP (cond, 0), XEXP (cond, 1), GET_CODE (cond),
+ 			      block_label (true_edge->dest),
+ 			      prob, cinsn);
+   emit_insn_after (seq, BB_END (switch_bb));
+   e = make_edge (switch_bb, true_edge->dest, 0);
+   e->probability = prob;
+   e->count = latch_edge->count * prob / REG_BR_PROB_BASE;
+   e = make_edge (switch_bb, FALLTHRU_EDGE (unswitch_on)->dest, EDGE_FALLTHRU);
+   e->probability = false_edge->probability;
+   e->count = latch_edge->count * (false_edge->probability) / REG_BR_PROB_BASE;
  
    if (irred_flag)
      {
        switch_bb->flags |= BB_IRREDUCIBLE_LOOP;
*************** unswitch_loop (struct loops *loops, stru
*** 381,399 ****
        switch_bb->succ->flags &= ~EDGE_IRREDUCIBLE_LOOP;
        switch_bb->succ->succ_next->flags &= ~EDGE_IRREDUCIBLE_LOOP;
      }
-   unswitch_on->rbi->copy = unswitch_on_alt;
  
    /* Loopify from the copy of LOOP body, constructing the new loop.  */
-   for (latch_edge = loop->latch->rbi->copy->succ;
-        latch_edge->dest != loop->header;
-        latch_edge = latch_edge->succ_next);
    nloop = loopify (loops, latch_edge,
  		   loop->header->rbi->copy->pred, switch_bb);
  
!   /* Remove branches that are now unreachable in new loops.  We rely on the
!      fact that cfg_layout_duplicate_bb reverses list of edges.  */
!   remove_path (loops, unswitch_on->succ);
!   remove_path (loops, unswitch_on_alt->succ);
  
    /* One of created loops do not have to be subloop of the outer loop now,
       so fix its placement in loop data structure.  */
--- 460,473 ----
        switch_bb->succ->flags &= ~EDGE_IRREDUCIBLE_LOOP;
        switch_bb->succ->succ_next->flags &= ~EDGE_IRREDUCIBLE_LOOP;
      }
  
    /* Loopify from the copy of LOOP body, constructing the new loop.  */
    nloop = loopify (loops, latch_edge,
  		   loop->header->rbi->copy->pred, switch_bb);
  
!   /* Remove branches that are now unreachable in new loops.  */
!   remove_path (loops, true_edge);
!   remove_path (loops, false_edge);
  
    /* One of created loops do not have to be subloop of the outer loop now,
       so fix its placement in loop data structure.  */
Index: predict.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/predict.c,v
retrieving revision 1.99
diff -c -3 -p -r1.99 predict.c
*** predict.c	27 Jan 2004 19:54:42 -0000	1.99
--- predict.c	15 Feb 2004 22:48:09 -0000
*************** estimate_probability (struct loops *loop
*** 406,418 ****
        unsigned j;
        int exits;
        struct loop *loop = loops_info->parray[i];
!       struct loop_desc desc;
        unsigned HOST_WIDE_INT niter;
  
        flow_loop_scan (loop, LOOP_EXIT_EDGES);
        exits = loop->num_exits;
  
!       if (simple_loop_p (loop, &desc) && desc.const_iter)
  	{
  	  int prob;
  	  niter = desc.niter + 1;
--- 406,421 ----
        unsigned j;
        int exits;
        struct loop *loop = loops_info->parray[i];
!       struct niter_desc desc;
        unsigned HOST_WIDE_INT niter;
  
        flow_loop_scan (loop, LOOP_EXIT_EDGES);
        exits = loop->num_exits;
  
!       iv_analysis_loop_init (loop);
!       find_simple_exit (loop, &desc);
! 
!       if (desc.simple_p && desc.const_iter)
  	{
  	  int prob;
  	  niter = desc.niter + 1;
*************** estimate_probability (struct loops *loop
*** 471,476 ****
--- 474,481 ----
        /* Free basic blocks from get_loop_body.  */
        free (bbs);
      }
+ 
+   iv_analysis_done ();
  
    /* Attempt to predict conditional jumps using a number of heuristics.  */
    FOR_EACH_BB (bb)
Index: rtl.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/rtl.h,v
retrieving revision 1.456
diff -c -3 -p -r1.456 rtl.h
*** rtl.h	10 Feb 2004 11:38:12 -0000	1.456
--- rtl.h	15 Feb 2004 22:48:09 -0000
*************** extern void tracer (void);
*** 2361,2364 ****
--- 2361,2375 ----
  /* In var-tracking.c */
  extern void variable_tracking_main (void);
  
+ /* In stor-layout.c.  */
+ extern void get_mode_bounds (enum machine_mode, int, rtx *, rtx *);
+ 
+ /* In loop-unswitch.c  */
+ extern rtx reversed_condition (rtx);
+ extern rtx compare_and_jump_seq (rtx, rtx, enum rtx_code, rtx, int, rtx);
+ 
+ /* In loop-iv.c  */
+ extern rtx canon_condition (rtx);
+ extern void simplify_using_condition (rtx, rtx *, struct bitmap_head_def *);
+ 
  #endif /* ! GCC_RTL_H */
Index: stor-layout.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/stor-layout.c,v
retrieving revision 1.175
diff -c -3 -p -r1.175 stor-layout.c
*** stor-layout.c	30 Dec 2003 19:49:59 -0000	1.175
--- stor-layout.c	15 Feb 2004 22:48:09 -0000
*************** get_best_mode (int bitsize, int bitpos, 
*** 2118,2121 ****
--- 2118,2141 ----
    return mode;
  }
  
+ /* Gets minimal and maximal values for MODE (signed or unsigned depending on
+    SIGN).  */
+ 
+ void
+ get_mode_bounds (enum machine_mode mode, int sign, rtx *mmin, rtx *mmax)
+ {
+   int size = GET_MODE_BITSIZE (mode);
+ 
+   if (sign)
+     {
+       *mmin = GEN_INT (-((unsigned HOST_WIDEST_INT) 1 << (size - 1)));
+       *mmax = GEN_INT (((unsigned HOST_WIDEST_INT) 1 << (size - 1)) - 1);
+     }
+   else
+     {
+       *mmin = const0_rtx;
+       *mmax = GEN_INT (((unsigned HOST_WIDEST_INT) 1 << (size - 1) << 1) - 1);
+     }
+ }
+ 
  #include "gt-stor-layout.h"
Index: toplev.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/toplev.c,v
retrieving revision 1.876
diff -c -3 -p -r1.876 toplev.c
*** toplev.c	12 Feb 2004 21:42:26 -0000	1.876
--- toplev.c	15 Feb 2004 22:48:09 -0000
*************** static void
*** 3034,3044 ****
--- 3034,3049 ----
  rest_of_handle_loop2 (tree decl, rtx insns)
  {
    struct loops *loops;
+   basic_block bb;
+ 
    timevar_push (TV_LOOP);
    open_dump_file (DFI_loop2, decl);
    if (rtl_dump_file)
      dump_flow_info (rtl_dump_file);
  
+   /* Initialize structures for layout changes.  */
+   cfg_layout_initialize ();
+ 
    loops = loop_optimizer_init (rtl_dump_file);
  
    if (loops)
*************** rest_of_handle_loop2 (tree decl, rtx ins
*** 3055,3060 ****
--- 3060,3071 ----
  
        loop_optimizer_finalize (loops, rtl_dump_file);
      }
+ 
+   /* Finalize layout changes.  */
+   FOR_EACH_BB (bb)
+     if (bb->next_bb != EXIT_BLOCK_PTR)
+       bb->rbi->next = bb->next_bb;
+   cfg_layout_finalize ();
  
    cleanup_cfg (CLEANUP_EXPENSIVE);
    delete_trivially_dead_insns (insns, max_reg_num ());


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]