This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[killloop] Ivopts cleanups


Hello,

this patch splits the functions dealing with affine decompositions of
trees to a separate file, and also teaches the utilities to handle
integers that do not fit into HOST_WIDE_INT.

It also rewrites costs computation of ivopts to much less magic way --
just compute the affine decompostition of the expression and evaluate
its cost (this was not possible in trees, because using fold would
not catch many cases, and also it would be insanely memory expensive --
unlike fold, utilities for handling affine decompositions work in-place,
without using any gc memory).  Still, this is not ideal -- I would very
much like to get ivopts to avoid computing the costs (almost)
completely, by rewriting the decision heuristics to something less
clever (but hopefully sufficient on most cases and much less expensive);
but this will have to wait till more pressing tasks are finished.

Zdenek

Index: ChangeLog.killloop
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/ChangeLog.killloop,v
retrieving revision 1.1.2.9
diff -c -3 -p -r1.1.2.9 ChangeLog.killloop
*** ChangeLog.killloop	2 Sep 2005 16:40:37 -0000	1.1.2.9
--- ChangeLog.killloop	9 Sep 2005 16:50:35 -0000
***************
*** 1,3 ****
--- 1,39 ----
+ 2005-09-09  Zdenek Dvorak  <dvorakz@suse.cz>
+ 
+ 	* tree-pretty-print.c (dump_bb_header): Fix merge conflict.
+ 
+ 	* fold-const.c (try_move_mult_to_index): Handle negative multipliers.
+ 
+ 	* Makefile.in (tree-affine.o): Add.
+ 	(tree-ssa-address.o, tree-ssa-loop-ivopts.o): Add tree-affine.h
+ 	dependency.
+ 	* tree-affine.c: New file.
+ 	* tree-affine.h: New file.
+ 	* tree-flow.h (struct affine_tree_combination): Moved to
+ 	tree-affine.h.
+ 	* tree-ssa-address.c: Include tree-affine.h.
+ 	(fixed_address_object_p): Export.
+ 	(add_to_parts): Do not take coef argument.
+ 	(most_expensive_mult_to_index, addr_to_parts, create_mem_ref):
+ 	Use tree-affine.h utilities.
+ 	* tree-ssa-loop-ivopts.c: Include tree-affine.h.
+ 	(struct iv_use, struct iv_cand): Add base_aff field.
+ 	(record_use, add_candidate_1): Initialize base_aff field.
+ 	(determine_base_object): Handle MULT_EXPR.
+ 	(aff_combination_const, aff_combination_elt, aff_combination_scale,
+ 	aff_combination_add_elt, aff_combination_add,
+ 	tree_to_aff_combination, add_elt_to_tree, unshare_aff_combination,
+ 	aff_combination_to_tree): Moved to tree-affine.c.
+ 	(get_computation_aff): Use tree-affine.h utilities.  Never compute
+ 	result in trees directly.  Return suggested multiplier for an
+ 	addressing mode.
+ 	(get_computation_at, rewrite_use_address): Changed to reflect
+ 	get_computation_aff change.
+ 	(split_address_cost, ptr_difference_cost, difference_cost): Removed.
+ 	(aff_combination_cost, aff_combination_cost_address): New.
+ 	(get_computation_cost_at): Use them.
+ 	* tree.h (fixed_address_object_p): Declare.
+ 
  2005-09-02  Zdenek Dvorak  <dvorakz@suse.cz>
  
  	Merge from mainline (killloop-merge-20050902).
Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.1530.2.4
diff -c -3 -p -r1.1530.2.4 Makefile.in
*** Makefile.in	2 Sep 2005 16:40:39 -0000	1.1530.2.4
--- Makefile.in	9 Sep 2005 16:50:35 -0000
*************** OBJS-common = \
*** 943,949 ****
   tree-ssa-loop-manip.o tree-ssa-threadupdate.o				   \
   tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o		   \
   tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o	   \
!  tree-ssa-math-opts.o tree-ssa-loop-endcond.o				   \
   tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o		   \
   alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o	  	   \
   cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o		   \
--- 943,949 ----
   tree-ssa-loop-manip.o tree-ssa-threadupdate.o				   \
   tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o		   \
   tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o	   \
!  tree-ssa-math-opts.o tree-ssa-loop-endcond.o tree-affine.o		   \
   tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o		   \
   alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o	  	   \
   cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o		   \
*************** tree-ssa-address.o : tree-ssa-address.c 
*** 1877,1883 ****
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) \
     output.h diagnostic.h $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
     tree-pass.h $(FLAGS_H) tree-inline.h $(RECOG_H) insn-config.h $(EXPR_H) \
!    gt-tree-ssa-address.h $(GGC_H)
  tree-ssa-loop-niter.o : tree-ssa-loop-niter.c $(TREE_FLOW_H) $(CONFIG_H) \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(PARAMS_H) \
     tree-inline.h output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
--- 1877,1883 ----
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) \
     output.h diagnostic.h $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
     tree-pass.h $(FLAGS_H) tree-inline.h $(RECOG_H) insn-config.h $(EXPR_H) \
!    gt-tree-ssa-address.h $(GGC_H) tree-affine.h
  tree-ssa-loop-niter.o : tree-ssa-loop-niter.c $(TREE_FLOW_H) $(CONFIG_H) \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(PARAMS_H) \
     tree-inline.h output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
*************** tree-ssa-loop-ivopts.o : tree-ssa-loop-i
*** 1897,1909 ****
     output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
     tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
     $(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
!    tree-chrec.h $(VARRAY_H)
  tree-ssa-loop-endcond.o : tree-ssa-loop-endcond.c $(TREE_FLOW_H) $(CONFIG_H) \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
     output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
     tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
     $(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
     tree-chrec.h $(VARRAY_H)
  tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \
     output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
--- 1897,1912 ----
     output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
     tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
     $(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
!    tree-chrec.h $(VARRAY_H) tree-affine.h
  tree-ssa-loop-endcond.o : tree-ssa-loop-endcond.c $(TREE_FLOW_H) $(CONFIG_H) \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
     output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
     tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
     $(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
     tree-chrec.h $(VARRAY_H)
+ tree-affine.o : tree-affine.c tree-affine.h $(CONFIG_H) \
+    $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) \
+    output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H)
  tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
     $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \
     output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
Index: fold-const.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/fold-const.c,v
retrieving revision 1.611.2.1
diff -c -3 -p -r1.611.2.1 fold-const.c
*** fold-const.c	2 Sep 2005 16:42:01 -0000	1.611.2.1
--- fold-const.c	9 Sep 2005 16:50:35 -0000
*************** fold_sign_changed_comparison (enum tree_
*** 6372,6378 ****
     being an integer constant (and thus already folded).
     ADDR is the address. MULT is the multiplicative expression.
     If the function succeeds, the new address expression is returned.  Otherwise
!    NULL_TREE is returned.  */
  
  static tree
  try_move_mult_to_index (enum tree_code code, tree addr, tree op1)
--- 6372,6378 ----
     being an integer constant (and thus already folded).
     ADDR is the address. MULT is the multiplicative expression.
     If the function succeeds, the new address expression is returned.  Otherwise
!    NULL_TREE is returned.  CODE must be either PLUS_EXPR or MINUS_EXPR.  */
  
  static tree
  try_move_mult_to_index (enum tree_code code, tree addr, tree op1)
*************** try_move_mult_to_index (enum tree_code c
*** 6382,6387 ****
--- 6382,6389 ----
    tree ret, pos;
    tree itype;
  
+   gcc_assert (code == PLUS_EXPR || code == MINUS_EXPR);
+ 
    /* Canonicalize op1 into a possibly non-constant delta
       and an INTEGER_CST s.  */
    if (TREE_CODE (op1) == MULT_EXPR)
*************** try_move_mult_to_index (enum tree_code c
*** 6403,6408 ****
--- 6405,6418 ----
          }
        else
          return NULL_TREE;
+ 
+       /* If the step is negative, negate it and the operation, since the step
+ 	 of an array is always positive.  */
+       if (tree_int_cst_sign_bit (s))
+ 	{
+ 	  s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
+ 	  code = (code == PLUS_EXPR ? MINUS_EXPR : PLUS_EXPR);
+ 	}
      }
    else if (TREE_CODE (op1) == INTEGER_CST)
      {
Index: tree-affine.c
===================================================================
RCS file: tree-affine.c
diff -N tree-affine.c
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- tree-affine.c	9 Sep 2005 16:50:36 -0000
***************
*** 0 ****
--- 1,528 ----
+ /* Operations with affine combinations of trees.
+    Copyright (C) 2005 Free Software Foundation, Inc.
+    
+ This file is part of GCC.
+    
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+    
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ for more details.
+    
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING.  If not, write to the Free
+ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.  */
+ 
+ #include "config.h"
+ #include "system.h"
+ #include "coretypes.h"
+ #include "tm.h"
+ #include "tree.h"
+ #include "rtl.h"
+ #include "tm_p.h"
+ #include "hard-reg-set.h"
+ #include "output.h"
+ #include "diagnostic.h"
+ #include "tree-dump.h"
+ #include "tree-affine.h"
+ 
+ /* Restricts constant CST according to precision of combination COMB.  */
+ 
+ static inline double_int
+ restrict_cst_to_precision (aff_tree *comb, double_int cst)
+ {
+   double_int r;
+   
+   r.low = cst.low & comb->mask.low;
+   r.high = cst.high & comb->mask.high;
+ 
+   return r;
+ }
+ 
+ /* Returns true if X fits in HOST_WIDE_INT, with respect to precision
+    of COMB.  */
+ 
+ bool
+ double_int_fits_in_hwi_p (aff_tree *comb, double_int x)
+ {
+   return x.high == comb->mask.high;
+ }
+ 
+ /* Returns true if SCALE is negative in precision of COMB.  */
+ 
+ bool
+ double_int_negative_p (aff_tree *comb, double_int scale)
+ {
+   if (comb->mask.high)
+     return (scale.high & ~(comb->mask.high >> 1)) != 0;
+   else
+     return (scale.low & ~(comb->mask.low >> 1)) != 0;
+ }
+ 
+ /* Returns value of X with respect to precision of COMB.  */
+ 
+ HOST_WIDE_INT
+ double_int_to_hwi (aff_tree *comb, double_int x)
+ {
+   if (comb->mask.high || !double_int_negative_p (comb, x))
+     return x.low;
+   else
+     return x.low | ~comb->mask.low;
+ }
+ 
+ /* Returns A * B, truncated so that it fits COMB.  */
+ 
+ double_int
+ double_int_mul (aff_tree *comb, double_int a, double_int b)
+ {
+   unsigned HOST_WIDE_INT lo;
+   HOST_WIDE_INT hi;
+   double_int ret;
+ 
+   mul_double (a.low, a.high, b.low, b.high, &lo, &hi);
+   ret.low = lo;
+   ret.high = hi;
+ 
+   return restrict_cst_to_precision (comb, ret);
+ }
+ 
+ /* Returns A + B, truncated so that it fits COMB.  */
+ 
+ double_int
+ double_int_add (aff_tree *comb, double_int a, double_int b)
+ {
+   unsigned HOST_WIDE_INT lo;
+   HOST_WIDE_INT hi;
+   double_int ret;
+ 
+   add_double (a.low, a.high, b.low, b.high, &lo, &hi);
+   ret.low = lo;
+   ret.high = hi;
+ 
+   return restrict_cst_to_precision (comb, ret);
+ }
+ 
+ /* Returns -A, truncated so that it fits COMB.  */
+ 
+ double_int
+ double_int_negate (aff_tree *comb, double_int a)
+ {
+   unsigned HOST_WIDE_INT lo;
+   HOST_WIDE_INT hi;
+   double_int ret;
+ 
+   neg_double (a.low, a.high, &lo, &hi);
+   ret.low = lo;
+   ret.high = hi;
+ 
+   return restrict_cst_to_precision (comb, ret);
+ }
+ 
+ /* Constructs tree in type TYPE from double_int CST.  */
+ 
+ tree
+ double_int_to_tree (tree type, double_int cst)
+ {
+   unsigned HOST_WIDE_INT lo = cst.low;
+   HOST_WIDE_INT hi = cst.high;
+   unsigned prec = TYPE_PRECISION (type);
+   bool negative;
+ 
+   if (prec > HOST_BITS_PER_WIDE_INT)
+     {
+       prec -= HOST_BITS_PER_WIDE_INT;
+       negative = (hi >> (prec - 1)) & 1;
+       if (negative)
+ 	hi |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1) << 1;
+     }
+   else
+     {
+       negative = (cst.low >> (prec - 1)) & 1;
+       if (negative)
+ 	{
+ 	  lo |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1) << 1;
+ 	  hi = ~(unsigned HOST_WIDE_INT) 0;
+ 	}
+     }
+ 
+   return build_int_cst_wide (type, lo, hi);
+ }
+ 
+ /* Initializes mask of COMB according to its type.  */
+ 
+ static void
+ affine_combination_set_mask (aff_tree *comb)
+ {
+   unsigned prec = TYPE_PRECISION (comb->type);
+ 
+   if (prec > HOST_BITS_PER_WIDE_INT)
+     {
+       prec -= HOST_BITS_PER_WIDE_INT;
+       comb->mask.high = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
+       comb->mask.low = ~(unsigned HOST_WIDE_INT) 0;
+     }
+   else
+     {
+       comb->mask.high = 0;
+       comb->mask.low = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
+     }
+ }
+ 
+ /* Initializes affine combination COMB so that its value is zero in TYPE.  */
+ 
+ static void
+ aff_combination_zero (aff_tree *comb, tree type)
+ {
+   comb->type = type;
+   affine_combination_set_mask (comb);
+ 
+   comb->offset.low = 0;
+   comb->offset.high = 0;
+   comb->n = 0;
+   comb->rest = NULL_TREE;
+ }
+ 
+ /* Sets COMB to CST.  */
+ 
+ void
+ aff_combination_const (aff_tree *comb, tree type, double_int cst)
+ {
+   aff_combination_zero (comb, type);
+   comb->offset = restrict_cst_to_precision (comb, cst);
+ }
+ 
+ /* Sets COMB to single element ELT.  */
+ 
+ void
+ aff_combination_elt (aff_tree *comb, tree type, tree elt)
+ {
+   aff_combination_zero (comb, type);
+ 
+   comb->n = 1;
+   comb->elts[0] = elt;
+   comb->coefs[0] = hwi_to_double_int (1);
+ }
+ 
+ /* Scales COMB by SCALE.  */
+ 
+ void
+ aff_combination_scale (aff_tree *comb, double_int scale)
+ {
+   unsigned i, j;
+ 
+   scale = restrict_cst_to_precision (comb, scale);
+   if (double_int_one_p (scale))
+     return;
+ 
+   if (double_int_zero_p (scale))
+     {
+       aff_combination_zero (comb, comb->type);
+       return;
+     }
+ 
+   comb->offset = double_int_mul (comb, scale, comb->offset);
+   for (i = 0, j = 0; i < comb->n; i++)
+     {
+       comb->coefs[j] = double_int_mul (comb, scale, comb->coefs[i]);
+       comb->elts[j] = comb->elts[i];
+       /* A coefficient may become zero due to overflow.  Remove the zero
+ 	 elements.  */
+       if (!double_int_zero_p (comb->coefs[j]))
+ 	j++;
+     }
+   comb->n = j;
+ 
+   if (comb->rest)
+     {
+       if (comb->n < MAX_AFF_ELTS)
+ 	{
+ 	  comb->coefs[comb->n] = scale;
+ 	  comb->elts[comb->n] = comb->rest;
+ 	  comb->rest = NULL_TREE;
+ 	  comb->n++;
+ 	}
+       else
+ 	comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest, 
+ 				  double_int_to_tree (comb->type, scale));
+     }
+ }
+ 
+ /* Adds ELT * SCALE to COMB.  */
+ 
+ void
+ aff_combination_add_elt (aff_tree *comb, tree elt, double_int scale)
+ {
+   unsigned i;
+ 
+   if (double_int_zero_p (scale))
+     return;
+ 
+   for (i = 0; i < comb->n; i++)
+     if (operand_equal_p (comb->elts[i], elt, 0))
+       {
+ 	comb->coefs[i] = double_int_add (comb, comb->coefs[i], scale);
+ 	if (!double_int_zero_p (comb->coefs[i]))
+ 	  return;
+ 
+ 	comb->n--;
+ 	comb->coefs[i] = comb->coefs[comb->n];
+ 	comb->elts[i] = comb->elts[comb->n];
+ 	return;
+       }
+   if (comb->n < MAX_AFF_ELTS)
+     {
+       comb->coefs[comb->n] = scale;
+       comb->elts[comb->n] = elt;
+       comb->n++;
+       return;
+     }
+ 
+   if (double_int_one_p (scale))
+     elt = fold_convert (comb->type, elt);
+   else
+     elt = fold_build2 (MULT_EXPR, comb->type,
+ 		       fold_convert (comb->type, elt),
+ 		       double_int_to_tree (comb->type, scale)); 
+ 
+   if (comb->rest)
+     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
+   else
+     comb->rest = elt;
+ }
+ 
+ /* Adds COMB2 to COMB1.  */
+ 
+ void
+ aff_combination_add (aff_tree *comb1, aff_tree *comb2)
+ {
+   unsigned i;
+ 
+   comb1->offset = double_int_add (comb1, comb1->offset, comb2->offset);
+   for (i = 0; i < comb2->n; i++)
+     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
+   if (comb2->rest)
+     aff_combination_add_elt (comb1, comb2->rest, hwi_to_double_int (1));
+ }
+ 
+ /* Converts affine combination COMB to TYPE.  */
+ 
+ void
+ aff_combination_convert (aff_tree *comb, tree type)
+ {
+   unsigned i, j;
+   tree comb_type = comb->type;
+ 
+   gcc_assert (TYPE_PRECISION (type) <= TYPE_PRECISION (comb_type));
+   comb->type = type;
+   if (comb->rest)
+     comb->rest = fold_convert (type, comb->rest);
+ 
+   if (TYPE_PRECISION (type) == TYPE_PRECISION (comb_type))
+     return;
+   affine_combination_set_mask (comb);
+ 
+   comb->offset = restrict_cst_to_precision (comb, comb->offset);
+   for (i = j = 0; i < comb->n; i++)
+     {
+       comb->coefs[j] = restrict_cst_to_precision (comb, comb->coefs[i]);
+       if (double_int_zero_p (comb->coefs[j]))
+ 	continue;
+       comb->elts[j] = fold_convert (type, comb->elts[i]);
+       j++;
+     }
+ 
+   comb->n = j;
+   if (comb->n < MAX_AFF_ELTS && comb->rest)
+     {
+       comb->coefs[comb->n] = hwi_to_double_int (1);
+       comb->elts[comb->n] = comb->rest;
+       comb->rest = NULL_TREE;
+       comb->n++;
+     }
+ }
+ 
+ /* Splits EXPR into an affine combination of parts.  */
+ 
+ void
+ tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
+ {
+   aff_tree tmp;
+   enum tree_code code;
+   tree cst, core, toffset;
+   HOST_WIDE_INT bitpos, bitsize;
+   enum machine_mode mode;
+   int unsignedp, volatilep;
+ 
+   STRIP_NOPS (expr);
+ 
+   code = TREE_CODE (expr);
+   switch (code)
+     {
+     case INTEGER_CST:
+       aff_combination_const (comb, type, tree_to_double_int (expr));
+       return;
+ 
+     case PLUS_EXPR:
+     case MINUS_EXPR:
+       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
+       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
+       if (code == MINUS_EXPR)
+ 	aff_combination_scale (&tmp, hwi_to_double_int (-1));
+       aff_combination_add (comb, &tmp);
+       return;
+ 
+     case MULT_EXPR:
+       cst = TREE_OPERAND (expr, 1);
+       if (TREE_CODE (cst) != INTEGER_CST)
+ 	break;
+       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
+       aff_combination_scale (comb, tree_to_double_int (cst));
+       return;
+ 
+     case NEGATE_EXPR:
+       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
+       aff_combination_scale (comb, hwi_to_double_int (-1));
+       return;
+ 
+     case ADDR_EXPR:
+       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
+ 				  &toffset, &mode, &unsignedp, &volatilep,
+ 				  false);
+       if (bitpos % BITS_PER_UNIT != 0)
+ 	break;
+       aff_combination_const (comb, type,
+ 			     hwi_to_double_int (bitpos / BITS_PER_UNIT));
+       core = build_fold_addr_expr (core);
+       if (TREE_CODE (core) == ADDR_EXPR)
+ 	aff_combination_add_elt (comb, core, hwi_to_double_int (1));
+       else
+ 	{
+ 	  tree_to_aff_combination (core, type, &tmp);
+ 	  aff_combination_add (comb, &tmp);
+ 	}
+       if (toffset)
+ 	{
+ 	  tree_to_aff_combination (toffset, type, &tmp);
+ 	  aff_combination_add (comb, &tmp);
+ 	}
+       return;
+ 
+     default:
+       break;
+     }
+ 
+   aff_combination_elt (comb, type, expr);
+ }
+ 
+ /* Creates EXPR + ELT * SCALE in TYPE.  EXPR is taken from affine
+    combination COMB.  */
+ 
+ static tree
+ add_elt_to_tree (tree expr, tree type, tree elt, double_int scale,
+ 		 aff_tree *comb)
+ {
+   enum tree_code code;
+ 
+   scale = restrict_cst_to_precision (comb, scale);
+   elt = fold_convert (type, elt);
+ 
+   if (double_int_one_p (scale))
+     {
+       if (!expr)
+ 	return elt;
+ 
+       return fold_build2 (PLUS_EXPR, type, expr, elt);
+     }
+ 
+   if (double_int_minus_one_p (comb, scale))
+     {
+       if (!expr)
+ 	return fold_build1 (NEGATE_EXPR, type, elt);
+ 
+       return fold_build2 (MINUS_EXPR, type, expr, elt);
+     }
+ 
+   if (!expr)
+     return fold_build2 (MULT_EXPR, type, elt,
+ 			double_int_to_tree (type, scale));
+ 
+   if (double_int_negative_p (comb, scale))
+     {
+       code = MINUS_EXPR;
+       scale = double_int_negate (comb, scale);
+     }
+   else
+     code = PLUS_EXPR;
+ 
+   elt = fold_build2 (MULT_EXPR, type, elt,
+ 		     double_int_to_tree (type, scale));
+   return fold_build2 (code, type, expr, elt);
+ }
+ 
+ /* Makes tree from the affine combination COMB.  */
+ 
+ tree
+ aff_combination_to_tree (aff_tree *comb)
+ {
+   tree type = comb->type;
+   tree expr = comb->rest;
+   unsigned i;
+   double_int off, sgn;
+ 
+   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
+ 
+   for (i = 0; i < comb->n; i++)
+     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i], comb);
+ 
+   if (double_int_negative_p (comb, comb->offset))
+     {
+       /* Offset is negative.  */
+       off = double_int_negate (comb, comb->offset);
+       sgn = comb->mask;
+     }
+   else
+     {
+       off = comb->offset;
+       sgn = hwi_to_double_int (1);
+     }
+   return add_elt_to_tree (expr, type, double_int_to_tree (type, off), sgn,
+ 			  comb);
+ }
+ 
+ /* Copies the tree elements of COMB to ensure that they are not shared.  */
+ 
+ void
+ unshare_aff_combination (aff_tree *comb)
+ {
+   unsigned i;
+ 
+   for (i = 0; i < comb->n; i++)
+     comb->elts[i] = unshare_expr (comb->elts[i]);
+   if (comb->rest)
+     comb->rest = unshare_expr (comb->rest);
+ }
+ 
+ /* Remove M-th element from COMB.  */
+ 
+ void
+ aff_combination_remove_elt (aff_tree *comb, unsigned m)
+ {
+   comb->n--;
+   if (m <= comb->n)
+     {
+       comb->coefs[m] = comb->coefs[comb->n];
+       comb->elts[m] = comb->elts[comb->n];
+     }
+   if (comb->rest)
+     {
+       comb->coefs[comb->n] = hwi_to_double_int (1);
+       comb->elts[comb->n] = comb->rest;
+       comb->rest = NULL_TREE;
+       comb->n++;
+     }
+ }
Index: tree-affine.h
===================================================================
RCS file: tree-affine.h
diff -N tree-affine.h
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- tree-affine.h	9 Sep 2005 16:50:36 -0000
***************
*** 0 ****
--- 1,133 ----
+ /* Operations with affine combinations of trees.
+    Copyright (C) 2005 Free Software Foundation, Inc.
+    
+ This file is part of GCC.
+    
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+    
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ for more details.
+    
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING.  If not, write to the Free
+ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.  */
+ 
+ /* A structure containing a large integer.  */
+ 
+ typedef struct
+ {
+   unsigned HOST_WIDE_INT low, high;
+ } double_int;
+ 
+ /* Affine combination of trees.  We keep track of at most MAX_AFF_ELTS elements
+    to make things simpler; this is sufficient in most cases.  */
+ 
+ #define MAX_AFF_ELTS 8
+ 
+ typedef struct affine_tree_combination
+ {
+   /* Type of the result of the combination.  */
+   tree type;
+ 
+   /* Mask modulo that the operations are performed.  */
+   double_int mask;
+ 
+   /* Constant offset.  */
+   double_int offset;
+ 
+   /* Number of elements of the combination.  */
+   unsigned n;
+ 
+   /* Elements and their coefficients.  Type of elements may be different from
+      TYPE, but their sizes must be the same (STRIP_NOPS is applied to the
+      elements).  */
+   tree elts[MAX_AFF_ELTS];
+   double_int coefs[MAX_AFF_ELTS];
+ 
+   /* Remainder of the expression.  Usually NULL, used only if there are more
+      than MAX_AFF_ELTS elements.  Type of REST must be TYPE.  */
+   tree rest;
+ } aff_tree;
+ 
+ void aff_combination_const (aff_tree *, tree, double_int);
+ void aff_combination_elt (aff_tree *, tree, tree);
+ void aff_combination_scale (aff_tree *, double_int);
+ void aff_combination_add (aff_tree *, aff_tree *);
+ void aff_combination_add_elt (aff_tree *, tree, double_int);
+ void aff_combination_remove_elt (aff_tree *, unsigned);
+ void aff_combination_convert (aff_tree *, tree);
+ void tree_to_aff_combination (tree, tree, aff_tree *);
+ tree aff_combination_to_tree (aff_tree *);
+ void unshare_aff_combination (aff_tree *);
+ tree double_int_to_tree (tree, double_int);
+ bool double_int_fits_in_hwi_p (aff_tree *, double_int);
+ HOST_WIDE_INT double_int_to_hwi (aff_tree *, double_int);
+ double_int double_int_mul (aff_tree *, double_int, double_int);
+ double_int double_int_add (aff_tree *, double_int, double_int);
+ double_int double_int_negate (aff_tree *, double_int);
+ bool double_int_negative_p (aff_tree *, double_int);
+ 
+ /* Constructs double_int from tree CST.  */
+ 
+ static inline double_int
+ tree_to_double_int (tree cst)
+ {
+   double_int r;
+   
+   r.low = TREE_INT_CST_LOW (cst);
+   r.high = TREE_INT_CST_HIGH (cst);
+ 
+   return r;
+ }
+ 
+ /* Constructs double_int from integer CST.  */
+ 
+ static inline double_int
+ hwi_to_double_int (HOST_WIDE_INT cst)
+ {
+   double_int r;
+   
+   r.low = cst;
+   r.high = cst < 0 ? ~(unsigned HOST_WIDE_INT) 0 : 0;
+ 
+   return r;
+ }
+ 
+ /* Returns true if CST is zero.  */
+ 
+ static inline bool
+ double_int_zero_p (double_int cst)
+ {
+   return cst.low == 0 && cst.high == 0;
+ }
+ 
+ /* Returns true if CST is one.  */
+ 
+ static inline bool
+ double_int_one_p (double_int cst)
+ {
+   return cst.low == 1 && cst.high == 0;
+ }
+ 
+ /* Returns true if CST is minus one in precision of COMB.  */
+ 
+ static inline bool
+ double_int_minus_one_p (aff_tree *comb, double_int cst)
+ {
+   return cst.low == comb->mask.low && cst.high == comb->mask.high;
+ }
+ 
+ /* Returns true if CST1 == CST2.  */
+ 
+ static inline bool
+ double_int_equal_p (double_int cst1, double_int cst2)
+ {
+   return cst1.low == cst2.low && cst1.high == cst2.high;
+ }
+ 
Index: tree-flow.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-flow.h,v
retrieving revision 2.130.2.2
diff -c -3 -p -r2.130.2.2 tree-flow.h
*** tree-flow.h	2 Sep 2005 16:42:56 -0000	2.130.2.2
--- tree-flow.h	9 Sep 2005 16:50:36 -0000
*************** bool find_what_p_points_to (tree);
*** 832,864 ****
  
  /* In tree-ssa-address.c  */
  
- /* Affine combination of trees.  We keep track of at most MAX_AFF_ELTS elements
-    to make things simpler; this is sufficient in most cases.  */
- 
- #define MAX_AFF_ELTS 8
- 
- struct affine_tree_combination
- {
-   /* Type of the result of the combination.  */
-   tree type;
- 
-   /* Mask modulo that the operations are performed.  */
-   unsigned HOST_WIDE_INT mask;
- 
-   /* Constant offset.  */
-   unsigned HOST_WIDE_INT offset;
- 
-   /* Number of elements of the combination.  */
-   unsigned n;
- 
-   /* Elements and their coefficients.  */
-   tree elts[MAX_AFF_ELTS];
-   unsigned HOST_WIDE_INT coefs[MAX_AFF_ELTS];
- 
-   /* Remainder of the expression.  */
-   tree rest;
- };
- 
  /* Description of a memory address.  */
  
  struct mem_address
--- 832,837 ----
*************** struct mem_address
*** 866,871 ****
--- 839,845 ----
    tree symbol, base, index, step, offset;
  };
  
+ struct affine_tree_combination;
  tree create_mem_ref (block_stmt_iterator *, tree, 
  		     struct affine_tree_combination *);
  rtx addr_for_mem_ref (struct mem_address *, bool);
Index: tree-pretty-print.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-pretty-print.c,v
retrieving revision 2.69.2.2
diff -c -3 -p -r2.69.2.2 tree-pretty-print.c
*** tree-pretty-print.c	2 Sep 2005 16:43:04 -0000	2.69.2.2
--- tree-pretty-print.c	9 Sep 2005 16:50:36 -0000
*************** dump_bb_header (pretty_printer *buffer, 
*** 2270,2277 ****
  		break;
  	      }
  	}
-       pp_string (buffer, ", frequency ");
-       pp_decimal_int (buffer, bb->frequency);
        newline_and_indent (buffer, indent);
  
        pp_string (buffer, "# PRED:");
--- 2270,2275 ----
Index: tree-ssa-address.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-ssa-address.c,v
retrieving revision 2.3
diff -c -3 -p -r2.3 tree-ssa-address.c
*** tree-ssa-address.c	25 Jun 2005 02:01:32 -0000	2.3
--- tree-ssa-address.c	9 Sep 2005 16:50:36 -0000
*************** Software Foundation, 51 Franklin Street,
*** 42,47 ****
--- 42,48 ----
  #include "recog.h"
  #include "expr.h"
  #include "ggc.h"
+ #include "tree-affine.h"
  
  /* TODO -- handling of symbols (according to Richard Hendersons
     comments, http://gcc.gnu.org/ml/gcc-patches/2005-04/msg00949.html):
*************** create_mem_ref_raw (tree type, struct me
*** 334,340 ****
  
  /* Returns true if OBJ is an object whose address is a link time constant.  */
  
! static bool
  fixed_address_object_p (tree obj)
  {
    return (TREE_CODE (obj) == VAR_DECL
--- 335,341 ----
  
  /* Returns true if OBJ is an object whose address is a link time constant.  */
  
! bool
  fixed_address_object_p (tree obj)
  {
    return (TREE_CODE (obj) == VAR_DECL
*************** fixed_address_object_p (tree obj)
*** 346,357 ****
     construct.  */
  
  static void
! add_to_parts (struct mem_address *parts, tree type, tree elt,
! 	      unsigned HOST_WIDE_INT coef)
  {
    /* Check if this is a symbol.  */
    if (!parts->symbol
-       && coef == 1
        && TREE_CODE (elt) == ADDR_EXPR
        && fixed_address_object_p (TREE_OPERAND (elt, 0)))
      {
--- 347,356 ----
     construct.  */
  
  static void
! add_to_parts (struct mem_address *parts, tree type, tree elt)
  {
    /* Check if this is a symbol.  */
    if (!parts->symbol
        && TREE_CODE (elt) == ADDR_EXPR
        && fixed_address_object_p (TREE_OPERAND (elt, 0)))
      {
*************** add_to_parts (struct mem_address *parts,
*** 359,370 ****
        return;
      }
  
-   if (coef != 1)
-     elt = fold_build2 (MULT_EXPR, type, fold_convert (type, elt),
- 		       build_int_cst_type (type, coef));
-   else
-     elt = fold_convert (type, elt);
- 
    if (!parts->base)
      {
        parts->base = elt;
--- 358,363 ----
*************** add_to_parts (struct mem_address *parts,
*** 388,407 ****
  
  static void
  most_expensive_mult_to_index (struct mem_address *parts, tree type,
! 			      struct affine_tree_combination *addr)
  {
!   unsigned HOST_WIDE_INT best_mult = 0;
    unsigned best_mult_cost = 0, acost;
    tree mult_elt = NULL_TREE, elt;
    unsigned i, j;
  
    for (i = 0; i < addr->n; i++)
      {
!       if (addr->coefs[i] == 1
! 	  || !multiplier_allowed_in_address_p (addr->coefs[i]))
  	continue;
        
!       acost = multiply_by_cost (addr->coefs[i], Pmode);
  
        if (acost > best_mult_cost)
  	{
--- 381,406 ----
  
  static void
  most_expensive_mult_to_index (struct mem_address *parts, tree type,
! 			      aff_tree *addr)
  {
!   HOST_WIDE_INT coef;
!   double_int best_mult = {0, 0}, amult, amult_neg;
    unsigned best_mult_cost = 0, acost;
    tree mult_elt = NULL_TREE, elt;
    unsigned i, j;
+   enum tree_code op_code;
  
    for (i = 0; i < addr->n; i++)
      {
!       if (!double_int_fits_in_hwi_p (addr, addr->coefs[i]))
! 	continue;
! 
!       coef = double_int_to_hwi (addr, addr->coefs[i]);
!       if (coef == 1
! 	  || !multiplier_allowed_in_address_p (coef))
  	continue;
        
!       acost = multiply_by_cost (coef, Pmode);
  
        if (acost > best_mult_cost)
  	{
*************** most_expensive_mult_to_index (struct mem
*** 410,421 ****
  	}
      }
  
!   if (!best_mult)
      return;
  
    for (i = j = 0; i < addr->n; i++)
      {
!       if (addr->coefs[i] != best_mult)
  	{
  	  addr->coefs[j] = addr->coefs[i];
  	  addr->elts[j] = addr->elts[i];
--- 409,428 ----
  	}
      }
  
!   if (!best_mult_cost)
      return;
  
+   /* Collect elements multiplied by best_mult.  */
    for (i = j = 0; i < addr->n; i++)
      {
!       amult = addr->coefs[i];
!       amult_neg = double_int_negate (addr, amult);
! 
!       if (double_int_equal_p (amult, best_mult))
! 	op_code = PLUS_EXPR;
!       else if (double_int_equal_p (amult_neg, best_mult))
! 	op_code = MINUS_EXPR;
!       else
  	{
  	  addr->coefs[j] = addr->coefs[i];
  	  addr->elts[j] = addr->elts[i];
*************** most_expensive_mult_to_index (struct mem
*** 424,438 ****
  	}
  
        elt = fold_convert (type, addr->elts[i]);
!       if (!mult_elt)
! 	mult_elt = elt;
        else
! 	mult_elt = fold_build2 (PLUS_EXPR, type, mult_elt, elt);
      }
    addr->n = j;
  
    parts->index = mult_elt;
!   parts->step = build_int_cst_type (type, best_mult);
  }
  
  /* Splits address ADDR into PARTS.
--- 431,447 ----
  	}
  
        elt = fold_convert (type, addr->elts[i]);
!       if (mult_elt)
! 	mult_elt = fold_build2 (op_code, type, mult_elt, elt);
!       else if (op_code == PLUS_EXPR)
!       	mult_elt = elt;
        else
! 	mult_elt = fold_build1 (NEGATE_EXPR, type, elt);
      }
    addr->n = j;
  
    parts->index = mult_elt;
!   parts->step = double_int_to_tree (type, best_mult);
  }
  
  /* Splits address ADDR into PARTS.
*************** most_expensive_mult_to_index (struct mem
*** 445,453 ****
     for complicated addressing modes is useless.  */
  
  static void
! addr_to_parts (struct affine_tree_combination *addr, tree type,
! 	       struct mem_address *parts)
  {
    unsigned i;
  
    parts->symbol = NULL_TREE;
--- 454,462 ----
     for complicated addressing modes is useless.  */
  
  static void
! addr_to_parts (aff_tree *addr, tree type, struct mem_address *parts)
  {
+   tree part;
    unsigned i;
  
    parts->symbol = NULL_TREE;
*************** addr_to_parts (struct affine_tree_combin
*** 455,462 ****
    parts->index = NULL_TREE;
    parts->step = NULL_TREE;
  
!   if (addr->offset)
!     parts->offset = build_int_cst_type (type, addr->offset);
    else
      parts->offset = NULL_TREE;
  
--- 464,471 ----
    parts->index = NULL_TREE;
    parts->step = NULL_TREE;
  
!   if (!double_int_zero_p (addr->offset))
!     parts->offset = double_int_to_tree (type, addr->offset);
    else
      parts->offset = NULL_TREE;
  
*************** addr_to_parts (struct affine_tree_combin
*** 466,474 ****
  
    /* Then try to process the remaining elements.  */
    for (i = 0; i < addr->n; i++)
!     add_to_parts (parts, type, addr->elts[i], addr->coefs[i]);
    if (addr->rest)
!     add_to_parts (parts, type, addr->rest, 1);
  }
  
  /* Force the PARTS to register.  */
--- 475,489 ----
  
    /* Then try to process the remaining elements.  */
    for (i = 0; i < addr->n; i++)
!     {
!       part = fold_convert (type, addr->elts[i]);
!       if (!double_int_one_p (addr->coefs[i]))
! 	part = fold_build2 (MULT_EXPR, type, part,
! 			    double_int_to_tree (type, addr->coefs[i]));
!       add_to_parts (parts, type, part);
!     }
    if (addr->rest)
!     add_to_parts (parts, type, addr->rest);
  }
  
  /* Force the PARTS to register.  */
*************** gimplify_mem_ref_parts (block_stmt_itera
*** 489,496 ****
     of created memory reference.  */
  
  tree
! create_mem_ref (block_stmt_iterator *bsi, tree type,
! 		struct affine_tree_combination *addr)
  {
    tree mem_ref, tmp;
    tree addr_type = build_pointer_type (type);
--- 504,510 ----
     of created memory reference.  */
  
  tree
! create_mem_ref (block_stmt_iterator *bsi, tree type, aff_tree *addr)
  {
    tree mem_ref, tmp;
    tree addr_type = build_pointer_type (type);
Index: tree-ssa-loop-ivopts.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-ssa-loop-ivopts.c,v
retrieving revision 2.85.2.3
diff -c -3 -p -r2.85.2.3 tree-ssa-loop-ivopts.c
*** tree-ssa-loop-ivopts.c	2 Sep 2005 16:43:14 -0000	2.85.2.3
--- tree-ssa-loop-ivopts.c	9 Sep 2005 16:50:36 -0000
*************** Software Foundation, 51 Franklin Street,
*** 89,94 ****
--- 89,95 ----
  #include "cfgloop.h"
  #include "params.h"
  #include "langhooks.h"
+ #include "tree-affine.h"
  
  /* The infinite cost.  */
  #define INFTY 10000000
*************** struct iv_use
*** 160,165 ****
--- 161,168 ----
    unsigned id;		/* The id of the use.  */
    enum use_type type;	/* Type of the use.  */
    struct iv *iv;	/* The induction variable it is based on.  */
+   aff_tree base_aff;	/* Base of the induction variable, as an affine
+ 			   combination.  */
    tree stmt;		/* Statement in that it occurs.  */
    tree *op_p;		/* The place where it occurs.  */
    bitmap related_cands;	/* The set of "related" iv candidates, plus the common
*************** struct iv_cand
*** 196,201 ****
--- 199,206 ----
  			   "pseudocandidate" used to indicate the possibility
  			   to replace the final value of an iv by direct
  			   computation of the value.  */
+   aff_tree base_aff;	/* Base of the induction variable, as an affine
+ 			   combination.  */
    unsigned cost;	/* Cost of the candidate.  */
    bitmap depends_on;	/* The list of invariants that are used in step of the
  			   biv.  */
*************** determine_base_object (tree expr)
*** 786,791 ****
--- 791,797 ----
    switch (code)
      {
      case INTEGER_CST:
+     case MULT_EXPR:
        return NULL_TREE;
  
      case ADDR_EXPR:
*************** record_use (struct ivopts_data *data, tr
*** 1174,1179 ****
--- 1180,1186 ----
    use->id = n_iv_uses (data);
    use->type = use_type;
    use->iv = iv;
+   tree_to_aff_combination (iv->base, TREE_TYPE (iv->base), &use->base_aff);
    use->stmt = stmt;
    use->op_p = use_p;
    use->related_cands = BITMAP_ALLOC (NULL);
*************** add_candidate_1 (struct ivopts_data *dat
*** 2019,2024 ****
--- 2026,2034 ----
    unsigned i;
    struct iv_cand *cand = NULL;
    tree type, orig_type;
+ 
+   /* Important candidate should not be specific to a single use.  */
+   gcc_assert (!use || !important);
    
    if (base)
      {
*************** add_candidate_1 (struct ivopts_data *dat
*** 2076,2082 ****
        if (!base && !step)
  	cand->iv = NULL;
        else
! 	cand->iv = alloc_iv (base, step);
  
        cand->pos = pos;
        if (pos != IP_ORIGINAL && cand->iv)
--- 2086,2095 ----
        if (!base && !step)
  	cand->iv = NULL;
        else
! 	{
! 	  cand->iv = alloc_iv (base, step);
! 	  tree_to_aff_combination (base, TREE_TYPE (base), &cand->base_aff);
! 	}
  
        cand->pos = pos;
        if (pos != IP_ORIGINAL && cand->iv)
*************** add_iv_value_candidates (struct ivopts_d
*** 2203,2213 ****
        base = fold_build2 (MULT_EXPR, type,
  			  fold_convert (type, niter->niter),
  			  fold_build1 (NEGATE_EXPR, type, iv->step));
!       add_candidate (data, base, iv->step, true, use);
      }
    else
      add_candidate (data, build_int_cst (type, 0),
! 		   iv->step, true, use);
  
    /* Third, try removing the constant offset.  */
    base = strip_offset (iv->base, &offset);
--- 2216,2226 ----
        base = fold_build2 (MULT_EXPR, type,
  			  fold_convert (type, niter->niter),
  			  fold_build1 (NEGATE_EXPR, type, iv->step));
!       add_candidate (data, base, iv->step, true, NULL);
      }
    else
      add_candidate (data, build_int_cst (type, 0),
! 		   iv->step, true, NULL);
  
    /* Third, try removing the constant offset.  */
    base = strip_offset (iv->base, &offset);
*************** constant_multiple_of (tree type, tree to
*** 2766,3080 ****
      }
  }
  
- /* Sets COMB to CST.  */
- 
- static void
- aff_combination_const (struct affine_tree_combination *comb, tree type,
- 		       unsigned HOST_WIDE_INT cst)
- {
-   unsigned prec = TYPE_PRECISION (type);
- 
-   comb->type = type;
-   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
- 
-   comb->n = 0;
-   comb->rest = NULL_TREE;
-   comb->offset = cst & comb->mask;
- }
- 
- /* Sets COMB to single element ELT.  */
- 
- static void
- aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
- {
-   unsigned prec = TYPE_PRECISION (type);
- 
-   comb->type = type;
-   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
- 
-   comb->n = 1;
-   comb->elts[0] = elt;
-   comb->coefs[0] = 1;
-   comb->rest = NULL_TREE;
-   comb->offset = 0;
- }
- 
- /* Scales COMB by SCALE.  */
- 
- static void
- aff_combination_scale (struct affine_tree_combination *comb,
- 		       unsigned HOST_WIDE_INT scale)
- {
-   unsigned i, j;
- 
-   if (scale == 1)
-     return;
- 
-   if (scale == 0)
-     {
-       aff_combination_const (comb, comb->type, 0);
-       return;
-     }
- 
-   comb->offset = (scale * comb->offset) & comb->mask;
-   for (i = 0, j = 0; i < comb->n; i++)
-     {
-       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
-       comb->elts[j] = comb->elts[i];
-       if (comb->coefs[j] != 0)
- 	j++;
-     }
-   comb->n = j;
- 
-   if (comb->rest)
-     {
-       if (comb->n < MAX_AFF_ELTS)
- 	{
- 	  comb->coefs[comb->n] = scale;
- 	  comb->elts[comb->n] = comb->rest;
- 	  comb->rest = NULL_TREE;
- 	  comb->n++;
- 	}
-       else
- 	comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
- 				  build_int_cst_type (comb->type, scale));
-     }
- }
- 
- /* Adds ELT * SCALE to COMB.  */
- 
- static void
- aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
- 			 unsigned HOST_WIDE_INT scale)
- {
-   unsigned i;
- 
-   if (scale == 0)
-     return;
- 
-   for (i = 0; i < comb->n; i++)
-     if (operand_equal_p (comb->elts[i], elt, 0))
-       {
- 	comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
- 	if (comb->coefs[i])
- 	  return;
- 
- 	comb->n--;
- 	comb->coefs[i] = comb->coefs[comb->n];
- 	comb->elts[i] = comb->elts[comb->n];
- 	return;
-       }
-   if (comb->n < MAX_AFF_ELTS)
-     {
-       comb->coefs[comb->n] = scale;
-       comb->elts[comb->n] = elt;
-       comb->n++;
-       return;
-     }
- 
-   if (scale == 1)
-     elt = fold_convert (comb->type, elt);
-   else
-     elt = fold_build2 (MULT_EXPR, comb->type,
- 		       fold_convert (comb->type, elt),
- 		       build_int_cst_type (comb->type, scale)); 
- 
-   if (comb->rest)
-     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
-   else
-     comb->rest = elt;
- }
- 
- /* Adds COMB2 to COMB1.  */
- 
- static void
- aff_combination_add (struct affine_tree_combination *comb1,
- 		     struct affine_tree_combination *comb2)
- {
-   unsigned i;
- 
-   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
-   for (i = 0; i < comb2-> n; i++)
-     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
-   if (comb2->rest)
-     aff_combination_add_elt (comb1, comb2->rest, 1);
- }
- 
- /* Splits EXPR into an affine combination of parts.  */
- 
- static void
- tree_to_aff_combination (tree expr, tree type,
- 			 struct affine_tree_combination *comb)
- {
-   struct affine_tree_combination tmp;
-   enum tree_code code;
-   tree cst, core, toffset;
-   HOST_WIDE_INT bitpos, bitsize;
-   enum machine_mode mode;
-   int unsignedp, volatilep;
- 
-   STRIP_NOPS (expr);
- 
-   code = TREE_CODE (expr);
-   switch (code)
-     {
-     case INTEGER_CST:
-       aff_combination_const (comb, type, int_cst_value (expr));
-       return;
- 
-     case PLUS_EXPR:
-     case MINUS_EXPR:
-       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
-       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
-       if (code == MINUS_EXPR)
- 	aff_combination_scale (&tmp, -1);
-       aff_combination_add (comb, &tmp);
-       return;
- 
-     case MULT_EXPR:
-       cst = TREE_OPERAND (expr, 1);
-       if (TREE_CODE (cst) != INTEGER_CST)
- 	break;
-       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
-       aff_combination_scale (comb, int_cst_value (cst));
-       return;
- 
-     case NEGATE_EXPR:
-       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
-       aff_combination_scale (comb, -1);
-       return;
- 
-     case ADDR_EXPR:
-       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
- 				  &toffset, &mode, &unsignedp, &volatilep,
- 				  false);
-       if (bitpos % BITS_PER_UNIT != 0)
- 	break;
-       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
-       core = build_fold_addr_expr (core);
-       if (TREE_CODE (core) == ADDR_EXPR)
- 	aff_combination_add_elt (comb, core, 1);
-       else
- 	{
- 	  tree_to_aff_combination (core, type, &tmp);
- 	  aff_combination_add (comb, &tmp);
- 	}
-       if (toffset)
- 	{
- 	  tree_to_aff_combination (toffset, type, &tmp);
- 	  aff_combination_add (comb, &tmp);
- 	}
-       return;
- 
-     default:
-       break;
-     }
- 
-   aff_combination_elt (comb, type, expr);
- }
- 
- /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
- 
- static tree
- add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
- 		 unsigned HOST_WIDE_INT mask)
- {
-   enum tree_code code;
- 
-   scale &= mask;
-   elt = fold_convert (type, elt);
- 
-   if (scale == 1)
-     {
-       if (!expr)
- 	return elt;
- 
-       return fold_build2 (PLUS_EXPR, type, expr, elt);
-     }
- 
-   if (scale == mask)
-     {
-       if (!expr)
- 	return fold_build1 (NEGATE_EXPR, type, elt);
- 
-       return fold_build2 (MINUS_EXPR, type, expr, elt);
-     }
- 
-   if (!expr)
-     return fold_build2 (MULT_EXPR, type, elt,
- 			build_int_cst_type (type, scale));
- 
-   if ((scale | (mask >> 1)) == mask)
-     {
-       /* Scale is negative.  */
-       code = MINUS_EXPR;
-       scale = (-scale) & mask;
-     }
-   else
-     code = PLUS_EXPR;
- 
-   elt = fold_build2 (MULT_EXPR, type, elt,
- 		     build_int_cst_type (type, scale));
-   return fold_build2 (code, type, expr, elt);
- }
- 
- /* Copies the tree elements of COMB to ensure that they are not shared.  */
- 
- static void
- unshare_aff_combination (struct affine_tree_combination *comb)
- {
-   unsigned i;
- 
-   for (i = 0; i < comb->n; i++)
-     comb->elts[i] = unshare_expr (comb->elts[i]);
-   if (comb->rest)
-     comb->rest = unshare_expr (comb->rest);
- }
- 
- /* Makes tree from the affine combination COMB.  */
- 
- static tree
- aff_combination_to_tree (struct affine_tree_combination *comb)
- {
-   tree type = comb->type;
-   tree expr = comb->rest;
-   unsigned i;
-   unsigned HOST_WIDE_INT off, sgn;
- 
-   /* Handle the special case produced by get_computation_aff when
-      the type does not fit in HOST_WIDE_INT.  */
-   if (comb->n == 0 && comb->offset == 0)
-     return fold_convert (type, expr);
- 
-   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
- 
-   for (i = 0; i < comb->n; i++)
-     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
- 			    comb->mask);
- 
-   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
-     {
-       /* Offset is negative.  */
-       off = (-comb->offset) & comb->mask;
-       sgn = comb->mask;
-     }
-   else
-     {
-       off = comb->offset;
-       sgn = 1;
-     }
-   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
- 			  comb->mask);
- }
- 
  /* Determines the expression by that USE is expressed from induction variable
     CAND at statement AT in LOOP.  The expression is stored in a decomposed
!    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
  
  static bool
  get_computation_aff (struct loop *loop,
  		     struct iv_use *use, struct iv_cand *cand, tree at,
! 		     struct affine_tree_combination *aff)
  {
    tree ubase = use->iv->base;
    tree ustep = use->iv->step;
--- 2779,2793 ----
      }
  }
  
  /* Determines the expression by that USE is expressed from induction variable
     CAND at statement AT in LOOP.  The expression is stored in a decomposed
!    form into AFF.  Returns false if USE cannot be expressed using CAND.
!    The ratio by that the candidate is multiplied is returned in MUL_RAT.  */
  
  static bool
  get_computation_aff (struct loop *loop,
  		     struct iv_use *use, struct iv_cand *cand, tree at,
! 		     aff_tree *aff, double_int *mul_rat)
  {
    tree ubase = use->iv->base;
    tree ustep = use->iv->step;
*************** get_computation_aff (struct loop *loop,
*** 3082,3093 ****
    tree cstep = cand->iv->step;
    tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
    tree uutype;
!   tree expr, delta;
!   tree ratio;
    unsigned HOST_WIDE_INT ustepi, cstepi;
!   HOST_WIDE_INT ratioi;
!   struct affine_tree_combination cbase_aff, expr_aff;
!   tree cstep_orig = cstep, ustep_orig = ustep;
  
    if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
      {
--- 2795,2803 ----
    tree cstep = cand->iv->step;
    tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
    tree uutype;
!   double_int rat;
    unsigned HOST_WIDE_INT ustepi, cstepi;
!   aff_tree cbase_aff, expr_aff;
  
    if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
      {
*************** get_computation_aff (struct loop *loop,
*** 3095,3169 ****
        return false;
      }
  
!   expr = var_at_stmt (loop, cand, at);
! 
!   if (TREE_TYPE (expr) != ctype)
!     {
!       /* This may happen with the original ivs.  */
!       expr = fold_convert (ctype, expr);
!     }
! 
!   if (TYPE_UNSIGNED (utype))
!     uutype = utype;
!   else
!     {
!       uutype = unsigned_type_for (utype);
!       ubase = fold_convert (uutype, ubase);
!       ustep = fold_convert (uutype, ustep);
!     }
! 
!   if (uutype != ctype)
!     {
!       expr = fold_convert (uutype, expr);
!       cbase = fold_convert (uutype, cbase);
!       cstep = fold_convert (uutype, cstep);
  
!       /* If the conversion is not noop, we must take it into account when
! 	 considering the value of the step.  */
!       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
! 	cstep_orig = cstep;
!     }
  
!   if (cst_and_fits_in_hwi (cstep_orig)
!       && cst_and_fits_in_hwi (ustep_orig))
      {
!       ustepi = int_cst_value (ustep_orig);
!       cstepi = int_cst_value (cstep_orig);
  
        if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
! 	{
! 	  /* TODO maybe consider case when ustep divides cstep and the ratio is
! 	     a power of 2 (so that the division is fast to execute)?  We would
! 	     need to be much more careful with overflows etc. then.  */
! 	  return false;
! 	}
  
!       ratio = build_int_cst_type (uutype, ratioi);
      }
    else
      {
!       ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
        if (!ratio)
  	return false;
! 
!       /* Ratioi is only used to detect special cases when the multiplicative
! 	 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
! 	 we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
! 	 to integer_onep/integer_all_onesp, since the former ignores
! 	 TREE_OVERFLOW.  */
!       if (cst_and_fits_in_hwi (ratio))
! 	ratioi = int_cst_value (ratio);
!       else if (integer_onep (ratio))
! 	ratioi = 1;
!       else if (integer_all_onesp (ratio))
! 	ratioi = -1;
!       else
! 	ratioi = 0;
      }
  
!   /* We may need to shift the value if we are after the increment.  */
    if (stmt_after_increment (loop, cand, at))
!     cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
  
    /* use = ubase - ratio * cbase + ratio * var.
  
--- 2805,2852 ----
        return false;
      }
  
!   uutype = unsigned_type_for (utype);
  
!   /* If the conversion is not noop, we must take it into account when
!      considering the value of the step.  */
!   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
!     cstep = fold_convert (uutype, cstep);
  
!   if (cst_and_fits_in_hwi (cstep)
!       && cst_and_fits_in_hwi (ustep))
      {
!       HOST_WIDE_INT ratioi;
!       ustepi = int_cst_value (ustep);
!       cstepi = int_cst_value (cstep);
  
        if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
! 	return false;
  
!       rat = hwi_to_double_int (ratioi);
      }
    else
      {
!       tree ratio = constant_multiple_of (uutype, ustep, cstep);
        if (!ratio)
  	return false;
!       rat = tree_to_double_int (ratio);
      }
  
!   *aff = use->base_aff;
!   cbase_aff = cand->base_aff;
!   tree_to_aff_combination (var_at_stmt (loop, cand, at), ctype, &expr_aff);
!   aff_combination_convert (aff, uutype);
!   aff_combination_convert (&cbase_aff, uutype);
!   aff_combination_convert (&expr_aff, uutype);
! 
!   /* We need to shift the value if we are after the increment.  */
    if (stmt_after_increment (loop, cand, at))
!     {
!       aff_tree cstep_aff;
! 
!       tree_to_aff_combination (cstep, uutype, &cstep_aff);
!       aff_combination_add (&cbase_aff, &cstep_aff);
!     }
  
    /* use = ubase - ratio * cbase + ratio * var.
  
*************** get_computation_aff (struct loop *loop,
*** 3173,3222 ****
       happen, fold is able to apply the distributive law to obtain this form
       anyway.  */
  
!   if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
!     {
!       /* Let's compute in trees and just return the result in AFF.  This case
! 	 should not be very common, and fold itself is not that bad either,
! 	 so making the aff. functions more complicated to handle this case
! 	 is not that urgent.  */
!       if (ratioi == 1)
! 	{
! 	  delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
! 	  expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
! 	}
!       else if (ratioi == -1)
! 	{
! 	  delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
! 	  expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
! 	}
!       else
! 	{
! 	  delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
! 	  delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
! 	  expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
! 	  expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
! 	}
! 
!       aff->type = uutype;
!       aff->n = 0;
!       aff->offset = 0;
!       aff->mask = 0;
!       aff->rest = expr;
!       return true;
!     }
! 
!   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
!      possible to compute ratioi.  */
!   gcc_assert (ratioi);
! 
!   tree_to_aff_combination (ubase, uutype, aff);
!   tree_to_aff_combination (cbase, uutype, &cbase_aff);
!   tree_to_aff_combination (expr, uutype, &expr_aff);
!   aff_combination_scale (&cbase_aff, -ratioi);
!   aff_combination_scale (&expr_aff, ratioi);
    aff_combination_add (aff, &cbase_aff);
    aff_combination_add (aff, &expr_aff);
  
    return true;
  }
  
--- 2856,2869 ----
       happen, fold is able to apply the distributive law to obtain this form
       anyway.  */
  
!   aff_combination_scale (&cbase_aff, double_int_negate (&cbase_aff, rat));
!   aff_combination_scale (&expr_aff, rat);
    aff_combination_add (aff, &cbase_aff);
    aff_combination_add (aff, &expr_aff);
  
+   if (mul_rat)
+     *mul_rat = rat;
+ 
    return true;
  }
  
*************** static tree
*** 3227,3236 ****
  get_computation_at (struct loop *loop,
  		    struct iv_use *use, struct iv_cand *cand, tree at)
  {
!   struct affine_tree_combination aff;
    tree type = TREE_TYPE (use->iv->base);
  
!   if (!get_computation_aff (loop, use, cand, at, &aff))
      return NULL_TREE;
    unshare_aff_combination (&aff);
    return fold_convert (type, aff_combination_to_tree (&aff));
--- 2874,2883 ----
  get_computation_at (struct loop *loop,
  		    struct iv_use *use, struct iv_cand *cand, tree at)
  {
!   aff_tree aff;
    tree type = TREE_TYPE (use->iv->base);
  
!   if (!get_computation_aff (loop, use, cand, at, &aff, NULL))
      return NULL_TREE;
    unshare_aff_combination (&aff);
    return fold_convert (type, aff_combination_to_tree (&aff));
*************** force_var_cost (struct ivopts_data *data
*** 3670,3807 ****
    return force_expr_to_var_cost (expr);
  }
  
! /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
!    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
!    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
!    invariants the computation depends on.  */
  
  static unsigned
! split_address_cost (struct ivopts_data *data,
! 		    tree addr, bool *symbol_present, bool *var_present,
! 		    unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
  {
!   tree core;
!   HOST_WIDE_INT bitsize;
!   HOST_WIDE_INT bitpos;
!   tree toffset;
!   enum machine_mode mode;
!   int unsignedp, volatilep;
    
!   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
! 			      &unsignedp, &volatilep, false);
! 
!   if (toffset != 0
!       || bitpos % BITS_PER_UNIT != 0
!       || TREE_CODE (core) != VAR_DECL)
      {
!       *symbol_present = false;
!       *var_present = true;
!       fd_ivopts_data = data;
!       walk_tree (&addr, find_depends, depends_on, NULL);
!       return target_spill_cost;
      }
  
!   *offset += bitpos / BITS_PER_UNIT;
!   if (TREE_STATIC (core)
!       || DECL_EXTERNAL (core))
!     {
!       *symbol_present = true;
!       *var_present = false;
!       return 0;
      }
-       
-   *symbol_present = false;
-   *var_present = true;
-   return 0;
- }
  
! /* Estimates cost of expressing difference of addresses E1 - E2 as
!    var + symbol + offset.  The value of offset is added to OFFSET,
!    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
!    part is missing.  DEPENDS_ON is a set of the invariants the computation
!    depends on.  */
  
! static unsigned
! ptr_difference_cost (struct ivopts_data *data,
! 		     tree e1, tree e2, bool *symbol_present, bool *var_present,
! 		     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
! {
!   HOST_WIDE_INT diff = 0;
!   unsigned cost;
  
!   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
  
!   if (ptr_difference_const (e1, e2, &diff))
!     {
!       *offset += diff;
!       *symbol_present = false;
!       *var_present = false;
!       return 0;
!     }
  
!   if (e2 == integer_zero_node)
!     return split_address_cost (data, TREE_OPERAND (e1, 0),
! 			       symbol_present, var_present, offset, depends_on);
  
!   *symbol_present = false;
!   *var_present = true;
!   
!   cost = force_var_cost (data, e1, depends_on);
!   cost += force_var_cost (data, e2, depends_on);
!   cost += add_cost (Pmode);
  
!   return cost;
  }
  
! /* Estimates cost of expressing difference E1 - E2 as
!    var + symbol + offset.  The value of offset is added to OFFSET,
!    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
!    part is missing.  DEPENDS_ON is a set of the invariants the computation
!    depends on.  */
  
  static unsigned
! difference_cost (struct ivopts_data *data,
! 		 tree e1, tree e2, bool *symbol_present, bool *var_present,
! 		 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
  {
!   unsigned cost;
!   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
!   unsigned HOST_WIDE_INT off1, off2;
  
!   e1 = strip_offset (e1, &off1);
!   e2 = strip_offset (e2, &off2);
!   *offset += off1 - off2;
  
!   STRIP_NOPS (e1);
!   STRIP_NOPS (e2);
  
!   if (TREE_CODE (e1) == ADDR_EXPR)
!     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
! 				depends_on);
!   *symbol_present = false;
  
!   if (operand_equal_p (e1, e2, 0))
      {
!       *var_present = false;
!       return 0;
      }
!   *var_present = true;
!   if (zero_p (e2))
!     return force_var_cost (data, e1, depends_on);
  
!   if (zero_p (e1))
!     {
!       cost = force_var_cost (data, e2, depends_on);
!       cost += multiply_by_cost (-1, mode);
  
!       return cost;
      }
  
!   cost = force_var_cost (data, e1, depends_on);
!   cost += force_var_cost (data, e2, depends_on);
!   cost += add_cost (mode);
! 
!   return cost;
  }
  
  /* Determines the cost of the computation by that USE is expressed
--- 3317,3483 ----
    return force_expr_to_var_cost (expr);
  }
  
! /* Determines the cost of the computation COMP.  A set of invariants
!    the expression depends on is stored in DEPENDS_ON.  COMP is modified
!    in the progress.  */
  
  static unsigned
! aff_combination_cost (struct ivopts_data *data, aff_tree *comp,
! 		      bitmap *depends_on)
  {
!   int n_adds = comp->n - 1;
!   unsigned i, j, cost = 0;
!   bool all_negated = true;
!   enum machine_mode mode = TYPE_MODE (comp->type);
    
!   if (comp->n == 0 && !comp->rest)
      {
!       /* Computation is an integer constant.  */
!       return force_expr_to_var_cost (double_int_to_tree (comp->type,
! 							 comp->offset));
      }
  
!   if (comp->rest)
!     {
!       n_adds++;
!       cost += force_var_cost (data, comp->rest, depends_on);
!       all_negated = false;
!     }
!   if (!double_int_zero_p (comp->offset))
!     {
!       n_adds++;
!       all_negated = false;
      }
  
!   /* Compute the cost of multiplications.  Using distributivity, each
!      coefficient needs to be considered only once.  Furthermore, we
!      do not need to consider both constant and its negation.
  
!      If all elements are multiplied by negative number, we must perform one
!      multiplication by a negative constant (and use MINUS_EXPR for the rest).
!      Otherwise, it is possible to always multiply be a positive constant.  */
!   for (i = 0; i < comp->n; i++)
!     {
!       cost += force_var_cost (data, comp->elts[i], depends_on);
!       if (double_int_negative_p (comp, comp->coefs[i]))
! 	comp->coefs[i] = double_int_negate (comp, comp->coefs[i]);
!       else
! 	all_negated = false;
  
!       if (double_int_one_p (comp->coefs[i]))
! 	continue;
  
!       for (j = 0; j < i; j++)
! 	if (double_int_equal_p (comp->coefs[i], comp->coefs[j]))
! 	  break;
!       if (j < i)
! 	continue;
  
!       if (double_int_fits_in_hwi_p (comp, comp->coefs[i]))
! 	{
! 	  /* comp->coefs[i] almost always fits into HOST_WIDE_INT, so do not
! 	     worry about this this case.  */
! 	  cost += target_spill_cost;
! 	}
!       else
! 	cost += multiply_by_cost (double_int_to_hwi (comp, comp->coefs[i]),
! 				  mode);
!     }
  
!   if (all_negated)
!     {
!       /* This is not really precise -- multiplying by a negative constant could
! 	 be cheaper than multiplyiing by its negation and negating.  */
!       cost += multiply_by_cost (-1, mode);
!     }
  
!   return cost + n_adds * add_cost (mode);
  }
  
! /* Determines the cost of the computation COMP used as a memory address.
!    A set of invariants the expression depends on is stored in DEPENDS_ON.
!    COMP is modified in the progress.  RATIO is the ratio by that iv is
!    multiplied, as a good candidate for step in an addressing mode.  */
  
  static unsigned
! aff_combination_cost_address (struct ivopts_data *data, aff_tree *comp,
! 			      bitmap *depends_on, double_int ratio)
  {
!   HOST_WIDE_INT rat = 1, off = 0;
!   unsigned cost = 0, i, nelts;
!   bool symbol_present = false, var_present = false;
!   enum machine_mode mode = TYPE_MODE (comp->type);
! 
!   /* Try finding a symbol.  */
!   for (i = 0; i < comp->n; i++)
!     {
!       tree elt = comp->elts[i];
!       if (double_int_one_p (comp->coefs[i])
! 	  && TREE_CODE (elt) == ADDR_EXPR
! 	  && fixed_address_object_p (TREE_OPERAND (elt, 0)))
! 	{
! 	  symbol_present = true;
! 	  aff_combination_remove_elt (comp, i);
! 	  break;
! 	}
!     }
  
!   if (double_int_fits_in_hwi_p (comp, ratio)
!       && !double_int_one_p (ratio))
!     {
!       /* Try separating index from comp.  */
!       for (i = 0; i < comp->n; i++)
! 	if (double_int_equal_p (ratio, comp->coefs[i]))
! 	  break;
! 
!       if (i < comp->n)
! 	{
! 	  double_int ratio_neg = double_int_negate (comp, ratio);
! 	  int n_add = -1;
! 	  rat = double_int_to_hwi (comp, ratio);
  
! 	  for (; i < comp->n; )
! 	    {
! 	      if (!double_int_equal_p (ratio, comp->coefs[i])
! 		  && !double_int_equal_p (ratio_neg, comp->coefs[i]))
! 		{
! 		  i++;
! 		  continue;
! 		}
! 	      n_add++;
! 	      cost += force_var_cost (data, comp->elts[i], depends_on);
! 	      aff_combination_remove_elt (comp, i);
! 	    }
! 	  cost += n_add * add_cost (mode);
! 	}
!     }
  
!   nelts = comp->n;
  
!   if (!double_int_zero_p (comp->offset))
      {
!       if (double_int_fits_in_hwi_p (comp, comp->offset))
! 	{
! 	  off = double_int_to_hwi (comp, comp->offset);
! 	  comp->offset = hwi_to_double_int (0);
! 	}
!       else
! 	nelts++;
      }
!   if (comp->rest)
!     nelts++;
  
!   cost += aff_combination_cost (data, comp, depends_on);
  
!   /* If there is more than one element, we can save one addition by using
!      BASE + INDEX addressing mode, if present.  */
!   if (nelts > 1)
!     {
!       var_present = true;
!       cost -= add_cost (mode);
      }
  
!   return cost + get_address_cost (symbol_present, var_present, off, rat);
  }
  
  /* Determines the cost of the computation by that USE is expressed
*************** get_computation_cost_at (struct ivopts_d
*** 3815,3827 ****
  			 struct iv_use *use, struct iv_cand *cand,
  			 bool address_p, bitmap *depends_on, tree at)
  {
!   tree ubase = use->iv->base, ustep = use->iv->step;
!   tree cbase, cstep;
    tree utype = TREE_TYPE (ubase), ctype;
!   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
!   HOST_WIDE_INT ratio, aratio;
!   bool var_present, symbol_present;
!   unsigned cost = 0, n_sums;
  
    *depends_on = NULL;
  
--- 3491,3502 ----
  			 struct iv_use *use, struct iv_cand *cand,
  			 bool address_p, bitmap *depends_on, tree at)
  {
!   tree ubase = use->iv->base;
!   tree cbase;
    tree utype = TREE_TYPE (ubase), ctype;
!   aff_tree comp;
!   double_int rat;
!   unsigned cost;
  
    *depends_on = NULL;
  
*************** get_computation_cost_at (struct ivopts_d
*** 3830,3836 ****
      return INFTY;
  
    cbase = cand->iv->base;
-   cstep = cand->iv->step;
    ctype = TREE_TYPE (cbase);
  
    if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
--- 3505,3510 ----
*************** get_computation_cost_at (struct ivopts_d
*** 3852,3978 ****
  	return INFTY;
      }
  
!   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
!     {
!       /* TODO -- add direct handling of this case.  */
!       goto fallback;
!     }
! 
!   /* CSTEPI is removed from the offset in case statement is after the
!      increment.  If the step is not constant, we use zero instead.
!      This is a bit imprecise (there is the extra addition), but
!      redundancy elimination is likely to transform the code so that
!      it uses value of the variable before increment anyway,
!      so it is not that much unrealistic.  */
!   if (cst_and_fits_in_hwi (cstep))
!     cstepi = int_cst_value (cstep);
!   else
!     cstepi = 0;
! 
!   if (cst_and_fits_in_hwi (ustep)
!       && cst_and_fits_in_hwi (cstep))
!     {
!       ustepi = int_cst_value (ustep);
! 
!       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
! 	return INFTY;
!     }
!   else
!     {
!       tree rat;
!       
!       rat = constant_multiple_of (utype, ustep, cstep);
!     
!       if (!rat)
! 	return INFTY;
! 
!       if (cst_and_fits_in_hwi (rat))
! 	ratio = int_cst_value (rat);
!       else if (integer_onep (rat))
! 	ratio = 1;
!       else if (integer_all_onesp (rat))
! 	ratio = -1;
!       else
! 	return INFTY;
!     }
! 
!   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
!      or ratio == 1, it is better to handle this like
!      
!      ubase - ratio * cbase + ratio * var
!      
!      (also holds in the case ratio == -1, TODO.  */
  
-   if (cst_and_fits_in_hwi (cbase))
-     {
-       offset = - ratio * int_cst_value (cbase); 
-       cost += difference_cost (data,
- 			       ubase, integer_zero_node,
- 			       &symbol_present, &var_present, &offset,
- 			       depends_on);
-     }
-   else if (ratio == 1)
-     {
-       cost += difference_cost (data,
- 			       ubase, cbase,
- 			       &symbol_present, &var_present, &offset,
- 			       depends_on);
-     }
-   else
-     {
-       cost += force_var_cost (data, cbase, depends_on);
-       cost += add_cost (TYPE_MODE (ctype));
-       cost += difference_cost (data,
- 			       ubase, integer_zero_node,
- 			       &symbol_present, &var_present, &offset,
- 			       depends_on);
-     }
- 
-   /* If we are after the increment, the value of the candidate is higher by
-      one iteration.  */
-   if (stmt_after_increment (data->current_loop, cand, at))
-     offset -= ratio * cstepi;
- 
-   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
-      (symbol/var/const parts may be omitted).  If we are looking for an address,
-      find the cost of addressing this.  */
    if (address_p)
!     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
! 
!   /* Otherwise estimate the costs for computing the expression.  */
!   aratio = ratio > 0 ? ratio : -ratio;
!   if (!symbol_present && !var_present && !offset)
!     {
!       if (ratio != 1)
! 	cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
! 
!       return cost;
!     }
! 
!   if (aratio != 1)
!     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
! 
!   n_sums = 1;
!   if (var_present
!       /* Symbol + offset should be compile-time computable.  */
!       && (symbol_present || offset))
!     n_sums++;
! 
!   return cost + n_sums * add_cost (TYPE_MODE (ctype));
! 
! fallback:
!   {
!     /* Just get the expression, expand it and measure the cost.  */
!     tree comp = get_computation_at (data->current_loop, use, cand, at);
! 
!     if (!comp)
!       return INFTY;
! 
!     if (address_p)
!       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
  
!     return computation_cost (comp);
!   }
  }
  
  /* Determines the cost of the computation by that USE is expressed
--- 3526,3541 ----
  	return INFTY;
      }
  
!   if (!get_computation_aff (data->current_loop, use, cand, at, &comp, &rat))
!     return INFTY;
  
    if (address_p)
!     cost = aff_combination_cost_address (data, &comp, depends_on, rat);
!   else
!     cost = aff_combination_cost (data, &comp, depends_on);
  
!   gcc_assert ((signed) cost >= 0);
!   return cost;
  }
  
  /* Determines the cost of the computation by that USE is expressed
*************** static void
*** 5683,5693 ****
  rewrite_use_address (struct ivopts_data *data,
  		     struct iv_use *use, struct iv_cand *cand)
  {
!   struct affine_tree_combination aff;
    block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
    tree ref;
  
!   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
    unshare_aff_combination (&aff);
  
    ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
--- 5246,5256 ----
  rewrite_use_address (struct ivopts_data *data,
  		     struct iv_use *use, struct iv_cand *cand)
  {
!   aff_tree aff;
    block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
    tree ref;
  
!   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff, NULL);
    unshare_aff_combination (&aff);
  
    ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
Index: tree.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree.h,v
retrieving revision 1.750.2.2
diff -c -3 -p -r1.750.2.2 tree.h
*** tree.h	2 Sep 2005 16:43:25 -0000	1.750.2.2
--- tree.h	9 Sep 2005 16:50:36 -0000
*************** extern int tree_map_eq (const void *, co
*** 4182,4187 ****
--- 4182,4188 ----
  /* In tree-ssa-address.c.  */
  extern tree tree_mem_ref_addr (tree, tree);
  extern void copy_mem_ref_info (tree, tree);
+ extern bool fixed_address_object_p (tree);
  
  /* In tree-object-size.c.  */
  extern void init_object_sizes (void);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]