[lno] Speedups + trivial induction variable elimination

Tue Mar 2 00:33:00 GMT 2004

Hello,

this patch improves the speed of the induction variable optimizations,
especially on testcases of type '1000 loops in a single function'
(such things really exist, see PR12440).

It also adds a very primitive induction variable elimination.

Zdenek

Index: ChangeLog.lno
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/ChangeLog.lno,v
retrieving revision 1.1.2.65
diff -c -3 -p -r1.1.2.65 ChangeLog.lno
*** ChangeLog.lno	27 Feb 2004 23:07:31 -0000	1.1.2.65
--- ChangeLog.lno	2 Mar 2004 00:23:17 -0000
***************
*** 1,3 ****
--- 1,26 ----
+ 2004-03-01  Zdenek Dvorak  <rakdver@atrey.karlin.mff.cuni.cz>
+ 
+ 	* Makefile.in (tree-ssa-loop-ivopts.o): Add HASHTAB_H dependency.
+ 	* tree-ssa-loop-ivopts.c: Include hashtab.h.
+ 	(old_highest_ssa_version): Rename to version_info_size.
+ 	(struct tree_niter_desc): Split from ...
+ 	(struct loop_data): ... here.
+ 	(relevant): New variable.
+ 	(tree_ssa_iv_optimize_init, set_iv, find_induction_variables,
+ 	record_invariant, find_interesting_uses, add_old_ivs_candidates,
+ 	determine_set_costs, free_loop_data, tree_ssa_iv_optimize_finalize):
+ 	Use bitmap of relevant ssa names.
+ 	(var_at_use): New function.
+ 	(get_computation): Use it.
+ 	(multiply_by_cost): Cache all results.
+ 	(mbc_entry_hash, mbc_entry_eq): New functions.
+ 	(number_of_iterations_cond): Split from ...
+ 	(determine_number_of_iterations): ... here.
+ 
+ 	(cand_value_at, may_eliminate_iv): New functions.
+ 	(determine_use_iv_cost_condition, rewrite_use_compare): Implement iv
+ 	elimination.
+ 
  2004-02-27  Zdenek Dvorak  <rakdver@atrey.karlin.mff.cuni.cz>

  	* tree-cfg.c (cleanup_control_expr_graph): Prevent probability from
Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.903.2.158.2.14
diff -c -3 -p -r1.903.2.158.2.14 Makefile.in
*** Makefile.in	21 Feb 2004 23:09:25 -0000	1.903.2.158.2.14
--- Makefile.in	2 Mar 2004 00:23:18 -0000
*************** tree-ssa-loop-im.o : tree-ssa-loop-im.c 
*** 1619,1625 ****
  tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) cfgloop.h varray.h $(EXPR_H) \
     output.h diagnostic.h $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
!    tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h
  tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) cfgloop.h \
     output.h diagnostic.h $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
--- 1619,1625 ----
  tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) cfgloop.h varray.h $(EXPR_H) \
     output.h diagnostic.h $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
!    tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H)
  tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) cfgloop.h \
     output.h diagnostic.h $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
Index: tree-ssa-loop-ivopts.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/tree-ssa-loop-ivopts.c,v
retrieving revision 1.1.2.11
diff -c -3 -p -r1.1.2.11 tree-ssa-loop-ivopts.c
*** tree-ssa-loop-ivopts.c	27 Feb 2004 23:07:32 -0000	1.1.2.11
--- tree-ssa-loop-ivopts.c	2 Mar 2004 00:23:18 -0000
*************** Software Foundation, 59 Temple Place - S
*** 83,88 ****
--- 83,89 ----
  #include "ggc.h"
  #include "insn-config.h"
  #include "recog.h"
+ #include "hashtab.h"

  /* The infinite cost.  */
  #define INFTY 10000000
*************** struct iv
*** 106,113 ****
     if needed?  */
  static struct loop *current_loop;

! /* The highest ssa name before optimizations.  */
! static unsigned old_highest_ssa_version;

  /* Per-ssa version information (induction variable descriptions, etc.).  */
  struct version_info
--- 107,114 ----
     if needed?  */
  static struct loop *current_loop;

! /* The size of version_info array allocated.  */
! static unsigned version_info_size;

  /* Per-ssa version information (induction variable descriptions, etc.).  */
  struct version_info
*************** struct version_info
*** 124,142 ****
  /* The array of this information indexed by the ssa name version.  */
  static struct version_info *version_info;

  /* The maximum invariant id.  */
  static unsigned max_inv_id;

  /* Information attached to loop.  */
  struct loop_data
  {
    unsigned n_exits;	/* Number of exit edges.  */
    edge single_exit;	/* The exit edge in case there is exactly one and
  			   its source dominates the loops latch.  */
!   tree assumptions;	/* Assumptions for the number of iterations be valid.  */
!   tree may_be_zero;	/* Condition under that the loop exits in the first
! 			   iteration.  */
!   tree niter;		/* Number of iterations.  */

    unsigned regs_used;	/* Number of registers used.  */
  };
--- 125,153 ----
  /* The array of this information indexed by the ssa name version.  */
  static struct version_info *version_info;

+ /* The bitmap of indices in version_info whose value was changed.  */
+ static bitmap relevant;
+ 
  /* The maximum invariant id.  */
  static unsigned max_inv_id;

+ /* Description of number of iterations of a loop.  */
+ struct tree_niter_desc
+ {
+   tree assumptions;	/* Assumptions for the number of iterations be valid.  */
+   tree may_be_zero;	/* Condition under that the loop exits in the first
+ 			   iteration.  */
+   tree niter;		/* Number of iterations.  */
+ };
+ 
  /* Information attached to loop.  */
  struct loop_data
  {
    unsigned n_exits;	/* Number of exit edges.  */
    edge single_exit;	/* The exit edge in case there is exactly one and
  			   its source dominates the loops latch.  */
!   struct tree_niter_desc niter;
! 			/* Number of iterations.  */

    unsigned regs_used;	/* Number of registers used.  */
  };
*************** tree_ssa_iv_optimize_init (struct loops 
*** 893,900 ****
  {
    unsigned i;

!   old_highest_ssa_version = highest_ssa_version;
!   version_info = xcalloc (highest_ssa_version, sizeof (struct version_info));

    for (i = 1; i < loops->num; i++)
      if (loops->parray[i])
--- 904,912 ----
  {
    unsigned i;

!   version_info_size = 2 * highest_ssa_version;
!   version_info = xcalloc (version_info_size, sizeof (struct version_info));
!   relevant = BITMAP_XMALLOC ();

    for (i = 1; i < loops->num; i++)
      if (loops->parray[i])
*************** set_iv (tree iv, tree base, tree step)
*** 938,943 ****
--- 950,956 ----
    if (info->iv)
      abort ();

+   bitmap_set_bit (relevant, SSA_NAME_VERSION (iv));
    info->iv = alloc_iv (base, step);
    info->iv->ssa_name = iv;
  }
*************** inverse (tree x, tree mask)
*** 1293,1304 ****
    return rslt;
  }

! /* Determine the number of iterations.  */

  static void
! determine_number_of_iterations (void)
  {
!   tree stmt, cond, type, op0, op1;
    enum tree_code code;
    tree base0, step0, base1, step1, step, delta, mmin, mmax;
    tree may_xform, bound, s, d, tmp;
--- 1306,1318 ----
    return rslt;
  }

! /* Determine the number of iterations according to condition COND (for staying
!    inside loop).  Store the results to NITER.  */

  static void
! number_of_iterations_cond (tree cond, struct tree_niter_desc *niter)
  {
!   tree type, op0, op1;
    enum tree_code code;
    tree base0, step0, base1, step1, step, delta, mmin, mmax;
    tree may_xform, bound, s, d, tmp;
*************** determine_number_of_iterations (void)
*** 1316,1333 ****
         if !noloop_assumptions, then the loop does not end before the computed
         number of iterations)  */

-   if (!LOOP_DATA (current_loop)->single_exit)
-     return;
- 
-   stmt = last_stmt (LOOP_DATA (current_loop)->single_exit->src);
-   if (!stmt || TREE_CODE (stmt) != COND_EXPR)
-     return;
- 
-   /* We want the condition for staying inside loop.  */
-   cond = COND_EXPR_COND (stmt);
-   if (LOOP_DATA (current_loop)->single_exit->flags & EDGE_TRUE_VALUE)
-     cond = invert_truthvalue (cond);
- 
    code = TREE_CODE (cond);
    switch (code)
      {
--- 1330,1335 ----
*************** determine_number_of_iterations (void)
*** 1415,1427 ****
  	  else
  	    assumption = boolean_true_node;
  	  if (integer_nonzerop (assumption))
! 	    {
! 	      /* We exit immediatelly.  */
! 	      LOOP_DATA (current_loop)->assumptions = boolean_true_node;
! 	      LOOP_DATA (current_loop)->may_be_zero = boolean_true_node;
! 	      LOOP_DATA (current_loop)->niter = convert (type, integer_zero_node);
! 	      return;
! 	    }
  	  base0 = fold (build (PLUS_EXPR, type, base0, integer_one_node));
  	}
        else
--- 1417,1423 ----
  	  else
  	    assumption = boolean_true_node;
  	  if (integer_nonzerop (assumption))
! 	    goto zero_iter;
  	  base0 = fold (build (PLUS_EXPR, type, base0, integer_one_node));
  	}
        else
*************** determine_number_of_iterations (void)
*** 1431,1442 ****
  	  else
  	    assumption = boolean_true_node;
  	  if (integer_nonzerop (assumption))
! 	    {
! 	      LOOP_DATA (current_loop)->assumptions = boolean_true_node;
! 	      LOOP_DATA (current_loop)->may_be_zero = boolean_true_node;
! 	      LOOP_DATA (current_loop)->niter = convert (type, integer_zero_node);
! 	      return;
! 	    }
  	  base1 = fold (build (MINUS_EXPR, type, base1, integer_one_node));
  	}
        noloop_assumptions = assumption;
--- 1427,1433 ----
  	  else
  	    assumption = boolean_true_node;
  	  if (integer_nonzerop (assumption))
! 	    goto zero_iter;
  	  base1 = fold (build (MINUS_EXPR, type, base1, integer_one_node));
  	}
        noloop_assumptions = assumption;
*************** determine_number_of_iterations (void)
*** 1578,1590 ****

  	  s = EXEC_BINARY (RSHIFT_EXPR, type, s, integer_one_node);
  	  d = EXEC_BINARY (LSHIFT_EXPR, type, d, integer_one_node);
! 	  bound = EXEC_BINARY (RSHIFT_EXPR, type, d, integer_one_node);
  	}

        tmp = fold (build (EXACT_DIV_EXPR, type, base1, d));
        tmp = fold (build (MULT_EXPR, type, tmp, inverse (s, bound)));
!       LOOP_DATA (current_loop)->niter = fold (build (BIT_AND_EXPR, type,
! 						     tmp, bound));
      }
    else
      {
--- 1569,1580 ----

  	  s = EXEC_BINARY (RSHIFT_EXPR, type, s, integer_one_node);
  	  d = EXEC_BINARY (LSHIFT_EXPR, type, d, integer_one_node);
! 	  bound = EXEC_BINARY (RSHIFT_EXPR, type, bound, integer_one_node);
  	}

        tmp = fold (build (EXACT_DIV_EXPR, type, base1, d));
        tmp = fold (build (MULT_EXPR, type, tmp, inverse (s, bound)));
!       niter->niter = fold (build (BIT_AND_EXPR, type, tmp, bound));
      }
    else
      {
*************** determine_number_of_iterations (void)
*** 1632,1642 ****
        noloop_assumptions = fold (build (TRUTH_OR_EXPR, boolean_type_node,
  					noloop_assumptions, assumption));
        delta = fold (build (FLOOR_DIV_EXPR, type, delta, step));
!       LOOP_DATA (current_loop)->niter = delta;
      }

!   LOOP_DATA (current_loop)->assumptions = assumptions;
!   LOOP_DATA (current_loop)->may_be_zero = noloop_assumptions;
  }

  /* For each ssa name defined in LOOP determines whether it is an induction
--- 1622,1661 ----
        noloop_assumptions = fold (build (TRUTH_OR_EXPR, boolean_type_node,
  					noloop_assumptions, assumption));
        delta = fold (build (FLOOR_DIV_EXPR, type, delta, step));
!       niter->niter = delta;
      }

!   niter->assumptions = assumptions;
!   niter->may_be_zero = noloop_assumptions;
!   return;
! 
! zero_iter:
!   niter->assumptions = boolean_true_node;
!   niter->may_be_zero = boolean_true_node;
!   niter->niter = convert (type, integer_zero_node);
!   return;
! }
! 
! /* Determine the number of iterations of the current loop.  */
! 
! static void
! determine_number_of_iterations (void)
! {
!   tree stmt, cond;
! 
!   if (!LOOP_DATA (current_loop)->single_exit)
!     return;
! 
!   stmt = last_stmt (LOOP_DATA (current_loop)->single_exit->src);
!   if (!stmt || TREE_CODE (stmt) != COND_EXPR)
!     return;
! 
!   /* We want the condition for staying inside loop.  */
!   cond = COND_EXPR_COND (stmt);
!   if (LOOP_DATA (current_loop)->single_exit->flags & EDGE_TRUE_VALUE)
!     cond = invert_truthvalue (cond);
! 
!   number_of_iterations_cond (cond, &LOOP_DATA (current_loop)->niter);
  }

  /* For each ssa name defined in LOOP determines whether it is an induction
*************** find_induction_variables (void)
*** 1659,1678 ****

    if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
      {
!       if (LOOP_DATA (current_loop)->niter)
  	{
  	  fprintf (tree_dump_file, "  number of iterations ");
! 	  print_generic_expr (tree_dump_file, LOOP_DATA (current_loop)->niter,
  			      TDF_SLIM);
  	  fprintf (tree_dump_file, "\n");

      	  fprintf (tree_dump_file, "  may be zero if ");
!     	  print_generic_expr (tree_dump_file, LOOP_DATA (current_loop)->may_be_zero,
      			      TDF_SLIM);
      	  fprintf (tree_dump_file, "\n");

      	  fprintf (tree_dump_file, "  bogus unless ");
!     	  print_generic_expr (tree_dump_file, LOOP_DATA (current_loop)->assumptions,
      			      TDF_SLIM);
      	  fprintf (tree_dump_file, "\n");
      	  fprintf (tree_dump_file, "\n");
--- 1678,1700 ----

    if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
      {
!       if (LOOP_DATA (current_loop)->niter.niter)
  	{
  	  fprintf (tree_dump_file, "  number of iterations ");
! 	  print_generic_expr (tree_dump_file,
! 			      LOOP_DATA (current_loop)->niter.niter,
  			      TDF_SLIM);
  	  fprintf (tree_dump_file, "\n");

      	  fprintf (tree_dump_file, "  may be zero if ");
!     	  print_generic_expr (tree_dump_file,
! 			      LOOP_DATA (current_loop)->niter.may_be_zero,
      			      TDF_SLIM);
      	  fprintf (tree_dump_file, "\n");

      	  fprintf (tree_dump_file, "  bogus unless ");
!     	  print_generic_expr (tree_dump_file,
! 			      LOOP_DATA (current_loop)->niter.assumptions,
      			      TDF_SLIM);
      	  fprintf (tree_dump_file, "\n");
      	  fprintf (tree_dump_file, "\n");
*************** find_induction_variables (void)
*** 1680,1688 ****

        fprintf (tree_dump_file, "Induction variables:\n\n");

!       for (i = 0; i < highest_ssa_version; i++)
! 	if (ver_info (i)->iv)
! 	  dump_iv (tree_dump_file, ver_info (i)->iv);
      }

    return true;
--- 1702,1712 ----

        fprintf (tree_dump_file, "Induction variables:\n\n");

!       EXECUTE_IF_SET_IN_BITMAP (relevant, 0, i,
! 	{
! 	  if (ver_info (i)->iv)
! 	    dump_iv (tree_dump_file, ver_info (i)->iv);
! 	});
      }

    return true;
*************** record_invariant (tree op, bool nonlinea
*** 1731,1736 ****
--- 1755,1761 ----
    info->has_nonlin_use |= nonlinear_use;
    if (!info->inv_id)
      info->inv_id = ++max_inv_id;
+   bitmap_set_bit (relevant, SSA_NAME_VERSION (op));
  }

  /* Checks whether the use *OP_P is interesting and if so, records it.  */
*************** find_interesting_uses (void)
*** 2079,2095 ****
      {
        fprintf (tree_dump_file, "\n");

!       for (i = 0; i < old_highest_ssa_version ; i++)
  	{
  	  info = ver_info (i);
! 	  if (!info->inv_id)
! 	    continue;
! 
! 	  fprintf (tree_dump_file, "  ");
! 	  print_generic_expr (tree_dump_file, info->name, TDF_SLIM);
! 	  fprintf (tree_dump_file, " is invariant (%d)%s\n",
! 		   info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
! 	}

        fprintf (tree_dump_file, "\n");
      }
--- 2104,2120 ----
      {
        fprintf (tree_dump_file, "\n");

!       EXECUTE_IF_SET_IN_BITMAP (relevant, 0, i,
  	{
  	  info = ver_info (i);
! 	  if (info->inv_id)
! 	    {
! 	      fprintf (tree_dump_file, "  ");
! 	      print_generic_expr (tree_dump_file, info->name, TDF_SLIM);
! 	      fprintf (tree_dump_file, " is invariant (%d)%s\n",
! 		       info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
! 	    }
! 	});

        fprintf (tree_dump_file, "\n");
      }
*************** add_old_ivs_candidates (void)
*** 2214,2227 ****
    unsigned i;
    struct iv *iv;

!   for (i = 0; i < highest_ssa_version; i++)
      {
        iv = ver_info (i)->iv;
!       if (!iv || !iv->biv_p || zero_p (iv->step))
! 	continue;
! 
!       add_old_iv_candidates (iv);
!     }
  }

  /* Adds candidates based on the value of the induction variable IV and USE.  */
--- 2239,2250 ----
    unsigned i;
    struct iv *iv;

!   EXECUTE_IF_SET_IN_BITMAP (relevant, 0, i,
      {
        iv = ver_info (i)->iv;
!       if (iv && iv->biv_p && !zero_p (iv->step))
! 	add_old_iv_candidates (iv);
!     });
  }

  /* Adds candidates based on the value of the induction variable IV and USE.  */
*************** computation_cost (tree expr)
*** 2511,2552 ****
    return cost;
  }

! /* Determines the expression by that USE is expressed from induction variable
!    CAND.  */

  static tree
! get_computation (struct iv_use *use, struct iv_cand *cand)
  {
-   tree ubase = use->iv->base, ustep = use->iv->step;
-   tree cbase = cand->iv->base, cstep = cand->iv->step;
-   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
-   tree expr, delta;
-   tree ratio;
-   unsigned HOST_WIDE_INT ustepi, cstepi;
-   HOST_WIDE_INT ratioi;
- 
    switch (cand->pos)
      {
  #if DISABLE_IP_START
      case IP_START:
!       expr = cand->var_after;
!       break;
  #endif

      case IP_NORMAL:
        if (stmt_after_ip_normal_pos (use->stmt))
! 	expr = cand->var_after;
        else
! 	expr = cand->var_before;
!       break;

      case IP_END:
!       expr = cand->var_before;
!       break;

      default:
        abort ();
      }

    if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
      {
--- 2534,2581 ----
    return cost;
  }

! /* Returns variable containing the value of candidate CAND at position
!    of USE.  */

  static tree
! var_at_use (struct iv_use *use, struct iv_cand *cand)
  {
    switch (cand->pos)
      {
  #if DISABLE_IP_START
      case IP_START:
!       return cand->var_after;
  #endif

      case IP_NORMAL:
        if (stmt_after_ip_normal_pos (use->stmt))
! 	return cand->var_after;
        else
! 	return cand->var_before;

      case IP_END:
!       return cand->var_before;

      default:
        abort ();
      }
+ }
+ 
+ /* Determines the expression by that USE is expressed from induction variable
+    CAND.  */
+ 
+ static tree
+ get_computation (struct iv_use *use, struct iv_cand *cand)
+ {
+   tree ubase = use->iv->base, ustep = use->iv->step;
+   tree cbase = cand->iv->base, cstep = cand->iv->step;
+   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
+   tree expr, delta;
+   tree ratio;
+   unsigned HOST_WIDE_INT ustepi, cstepi;
+   HOST_WIDE_INT ratioi;
+ 
+   expr = var_at_use (use, cand);

    if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
      {
*************** add_cost (enum machine_mode mode)
*** 2695,2723 ****
    return cost;
  }

  /* Returns cost of multiplication by constant CST in MODE.  */

- #define MAX_CACHED_VALUE 128
  static unsigned
  multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
  {
!   static unsigned costs[TImode - QImode + 1][2 * MAX_CACHED_VALUE + 1];
    rtx seq;
!   unsigned cost, *cadd = NULL;
!   bool cached;
!   int idx;
! 
!   cached = (-MAX_CACHED_VALUE <= cst && cst <= MAX_CACHED_VALUE);

!   idx = mode - QImode;
!   cached = cached && 0 <= idx && idx <= TImode - QImode;

!   if (cached)
!     {
!       cadd = &costs[idx][cst + MAX_CACHED_VALUE];
!       if (*cadd)
! 	return *cadd;
!     }

    start_sequence ();
    expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
--- 2724,2781 ----
    return cost;
  }

+ /* Entry in a hashtable of already known costs for multiplication.  */
+ struct mbc_entry
+ {
+   HOST_WIDE_INT cst;		/* The constant to multiply by.  */
+   enum machine_mode mode;	/* In mode.  */
+   unsigned cost;		/* The cost.  */
+ };
+ 
+ /* Counts hash value for the ENTRY.  */
+ 
+ static hashval_t
+ mbc_entry_hash (const void *entry)
+ {
+   const struct mbc_entry *e = entry;
+ 
+   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
+ }
+ 
+ /* Compares the hash table entries ENTRY1 and ENTRY2.  */
+ 
+ static int
+ mbc_entry_eq (const void *entry1, const void *entry2)
+ {
+   const struct mbc_entry *e1 = entry1;
+   const struct mbc_entry *e2 = entry2;
+ 
+   return (e1->mode == e2->mode
+ 	  && e1->cst == e2->cst);
+ }
+ 
  /* Returns cost of multiplication by constant CST in MODE.  */

  static unsigned
  multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
  {
!   static htab_t costs;
!   struct mbc_entry **cached, act;
    rtx seq;
!   unsigned cost;

!   if (!costs)
!     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);

!   act.mode = mode;
!   act.cst = cst;
!   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
!   if (*cached)
!     return (*cached)->cost;
! 
!   *cached = xmalloc (sizeof (struct mbc_entry));
!   (*cached)->mode = mode;
!   (*cached)->cst = cst;

    start_sequence ();
    expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
*************** multiply_by_cost (HOST_WIDE_INT cst, enu
*** 2726,2740 ****
    end_sequence ();

    cost = seq_cost (seq);
-   if (!cost)
-     cost = 1;

    if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
      fprintf (tree_dump_file, "Multiplication by %d in %s costs %d\n",
  	     (int) cst, GET_MODE_NAME (mode), cost);

!   if (cached)
!     *cadd = cost;
    return cost;
  }

--- 2784,2796 ----
    end_sequence ();

    cost = seq_cost (seq);

    if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
      fprintf (tree_dump_file, "Multiplication by %d in %s costs %d\n",
  	     (int) cst, GET_MODE_NAME (mode), cost);

!   (*cached)->cost = cost;
! 
    return cost;
  }

*************** determine_use_iv_cost_address (struct iv
*** 3291,3303 ****
    set_use_iv_cost (use, cand, cost, depends_on);
  }

  /* Determines cost of basing replacement of USE on CAND in a condition.  */

  static void
  determine_use_iv_cost_condition (struct iv_use *use, struct iv_cand *cand)
  {
!   /* TODO implement induction variable elimination.  */

    if (TREE_CODE (*use->op_p) == SSA_NAME)
      record_invariant (*use->op_p, true);
    else
--- 3347,3425 ----
    set_use_iv_cost (use, cand, cost, depends_on);
  }

+ /* Computes value of candidate CAND at position USE in iteration NITER.  */
+ 
+ static tree
+ cand_value_at (struct iv_cand *cand, struct iv_use *use, tree niter)
+ {
+   tree val;
+   tree type = TREE_TYPE (niter);
+ 
+   if (cand->pos == IP_NORMAL
+       && stmt_after_ip_normal_pos (use->stmt))
+     niter = fold (build (PLUS_EXPR, type, niter, integer_one_node));
+ 
+   val = fold (build (MULT_EXPR, type, cand->iv->step, niter));
+ 
+   return fold (build (PLUS_EXPR, type, cand->iv->base, val));
+ }
+ 
+ /* Check whether it is possible to express the condition in USE by comparison
+    of candidate CAND.  If so, store the comparison code to COMPARE and the
+    value compared with to BOUND.  */
+ 
+ static bool
+ may_eliminate_iv (struct iv_use *use, struct iv_cand *cand,
+ 		  enum tree_code *compare, tree *bound)
+ {
+   edge exit;
+   struct tree_niter_desc *niter;
+ 
+   /* For now just very primitive -- we work just for the single exit condition,
+      and are quite conservative about the possible overflows.  TODO -- both of
+      these can be improved.  */
+   exit = LOOP_DATA (current_loop)->single_exit;
+   if (!exit)
+     return false;
+   if (use->stmt != last_stmt (exit->src))
+     return false;
+ 
+   niter = &LOOP_DATA (current_loop)->niter;
+   if (!niter->niter
+       || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
+       || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
+     return false;
+ 
+   if (exit->flags & EDGE_TRUE_VALUE)
+     *compare = EQ_EXPR;
+   else
+     *compare = NE_EXPR;
+ 
+   *bound = cand_value_at (cand, use, niter->niter);
+ 
+   return true;
+ }
+ 
  /* Determines cost of basing replacement of USE on CAND in a condition.  */

  static void
  determine_use_iv_cost_condition (struct iv_use *use, struct iv_cand *cand)
  {
!   tree bound;
!   enum tree_code compare;

+   if (may_eliminate_iv (use, cand, &compare, &bound))
+     {
+       bitmap depends_on = BITMAP_XMALLOC ();
+       unsigned cost = force_var_cost (bound, &depends_on);
+ 
+       set_use_iv_cost (use, cand, cost, depends_on);
+       return;
+     }
+ 
+   /* The induction variable elimination failed; just express the original
+      giv.  If it is compared with an invariant, note that we cannot get
+      rid of it.  */
    if (TREE_CODE (*use->op_p) == SSA_NAME)
      record_invariant (*use->op_p, true);
    else
*************** determine_set_costs (void)
*** 3590,3602 ****
        n++;
      }

!   for (j = 0; j < old_highest_ssa_version; j++)
      {
        struct version_info *info = ver_info (j);

        if (info->inv_id && info->has_nonlin_use)
  	n++;
!     }

    LOOP_DATA (current_loop)->regs_used = n;
    if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
--- 3712,3724 ----
        n++;
      }

!   EXECUTE_IF_SET_IN_BITMAP (relevant, 0, j,
      {
        struct version_info *info = ver_info (j);

        if (info->inv_id && info->has_nonlin_use)
  	n++;
!     });

    LOOP_DATA (current_loop)->regs_used = n;
    if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
*************** rewrite_use_address (struct iv_use *use,
*** 4054,4064 ****
  static void
  rewrite_use_compare (struct iv_use *use, struct iv_cand *cand)
  {
!   tree comp = unshare_expr (get_computation (use, cand));
!   tree *op_p, cond, op, stmts;
    block_stmt_iterator bsi = stmt_bsi (use->stmt);

!   /* TODO -- induction variable elimination.  */

    cond = *use->op_p;
    op_p = &TREE_OPERAND (cond, 0);
--- 4176,4203 ----
  static void
  rewrite_use_compare (struct iv_use *use, struct iv_cand *cand)
  {
!   tree comp;
!   tree *op_p, cond, op, stmts, bound;
    block_stmt_iterator bsi = stmt_bsi (use->stmt);
+   enum tree_code compare;
+   
+   if (may_eliminate_iv (use, cand, &compare, &bound))
+     {
+       op = force_gimple_operand (unshare_expr (bound), &stmts,
+ 				 NULL_TREE, false);

!       if (stmts)
! 	bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
! 
!       *use->op_p = build (compare, boolean_type_node,
! 			  var_at_use (use, cand), op);
!       modify_stmt (use->stmt);
!       return;
!     }
! 
!   /* The induction variable elimination failed; just express the original
!      giv.  */
!   comp = unshare_expr (get_computation (use, cand));

    cond = *use->op_p;
    op_p = &TREE_OPERAND (cond, 0);
*************** free_loop_data (void)
*** 4122,4128 ****
  {
    unsigned i, j;

!   for (i = 0; i < old_highest_ssa_version; i++)
      {
        struct version_info *info;

--- 4261,4267 ----
  {
    unsigned i, j;

!   EXECUTE_IF_SET_IN_BITMAP (relevant, 0, i,
      {
        struct version_info *info;

*************** free_loop_data (void)
*** 4132,4138 ****
        info->iv = NULL;
        info->has_nonlin_use = false;
        info->inv_id = 0;
!     }

    for (i = 0; i < VARRAY_ACTIVE_SIZE (iv_uses); i++)
      {
--- 4271,4278 ----
        info->iv = NULL;
        info->has_nonlin_use = false;
        info->inv_id = 0;
!     });
!   bitmap_clear (relevant);

    for (i = 0; i < VARRAY_ACTIVE_SIZE (iv_uses); i++)
      {
*************** free_loop_data (void)
*** 4157,4169 ****
      }
    VARRAY_POP_ALL (iv_candidates);

!   if (old_highest_ssa_version != highest_ssa_version)
!     version_info = xrealloc (version_info,
! 			     (sizeof (struct version_info)
! 			      * highest_ssa_version));
!   memset (version_info + old_highest_ssa_version, 0,
! 	  ((highest_ssa_version - old_highest_ssa_version)
! 	   * sizeof (struct version_info)));
    max_inv_id = 0;

    for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
--- 4297,4309 ----
      }
    VARRAY_POP_ALL (iv_candidates);

!   if (version_info_size < highest_ssa_version)
!     {
!       version_info_size = 2 * highest_ssa_version;
!       free (version_info);
!       version_info = xcalloc (version_info_size, sizeof (struct version_info));
!     }
! 
    max_inv_id = 0;

    for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
*************** free_loop_data (void)
*** 4173,4180 ****
        SET_DECL_RTL (obj, NULL_RTX);
      }
    VARRAY_POP_ALL (decl_rtl_to_reset);
-   
-   old_highest_ssa_version = highest_ssa_version;
  }

  /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
--- 4313,4318 ----
*************** tree_ssa_iv_optimize_finalize (struct lo
*** 4194,4199 ****
--- 4332,4338 ----

    free_loop_data ();
    free (version_info);
+   BITMAP_XFREE (relevant);

    VARRAY_FREE (decl_rtl_to_reset);
    VARRAY_FREE (iv_uses);