This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] 4.1 ivopts bug


The attached patch backports a couple of ivopts changes from mainline to 4.1.
While they were nominally to fix missed optimization opportunities, it appears 
they also fix a a wrong-code bug.

When compiling the code below with -O1 the .t75.ivcanon dump contains:
  (set_nb_iterations_in_loop = 357913943B)
And the resulting code is incorrect.

The problem is buried in number_of_iterations_cond. This is called with two 
IV, both of which have a base of &num[1]. In the process of converting 
the "<" condition into "!=" it does (base1 - 1) - base0. This overflows and 
gives a delta of 0xffffffffu.

The code on mainline has been pretty much rewritten. I tried and failed to 
find a small fix, so I'm proposing backpointing the changes attached.

Tested with cross to arm-none-eabi.
Ok for 4.1?

Paul

2006-03-31  Paul Brook  <paul@codesourcery.com>

	Backport form mainline.
gcc/
	2006-01-14  Zdenek Dvorak <dvorakz@suse.cz>
	* tree-ssa-loop-niter.c (number_of_iterations_cond): Split into several
	functions.
	(number_of_iterations_ne, number_of_iterations_lt_to_ne,
	assert_no_overflow_lt, assert_loop_rolls_lt, number_of_iterations_lt,
	number_of_iterations_le): New functions.
	(number_of_iterations_special): Removed.
	(number_of_iterations_exit): Do not use number_of_iterations_special.
	* tree.c (unsigned_type_for): Always return integer type.

	2005-01-06  Zdenek Dvorak <dvorakz@suse.cz>
	PR tree-optimization/18527
	* tree-ssa-loop-niter.c (number_of_iterations_cond,
	number_of_iterations_special, number_of_iterations_exit):
	Move base and step of an iv to a single structure.  Add
	no_overflow flag, and use it in # of iterations analysis.
	* tree-scalar-evolution.c (analyze_scalar_evolution_in_loop): Add
	folded_casts argument.
	(simple_iv): Pass base and step in a structure.  Set no_overflow
	flag.
	(scev_const_prop): Add argument to analyze_scalar_evolution_in_loop.
	Evaluate expensiveness of computing # of iterations instead of
	the final expression.
	* tree-scalar-evolution.h (affine_iv): New structure.
	(simple_iv): Declaration changed.
	* tree-chrec.c (chrec_apply): Handle chrecs containing symbols.
	* tree-ssa-loop-ivopts.c (determine_biv_step, find_givs_in_stmt_scev,
	find_givs_in_stmt): Changed due to simple_iv change.

gcc/testsuite/
	2005-01-06  Zdenek Dvorak <dvorakz@suse.cz>
	* gcc.dg/tree-ssa/loop-15.c: New test.

	2005-01-14  Zdenek Dvorak <dvorakz@suse.cz>
	* gcc.dg/tree-ssa/pr19210-1.c: Update outcome.  Add new test loop.
	* gcc.dg/tree-ssa/pr19210-2.c: Ditto.



extern void *gen_rtx(int, int, int);

struct f
{
    int initial_offset;
    int can_eliminate;
    int can_eliminate_prev;
};


struct f num[1] = {33, 14};
extern int x;

void foo(void)
{
    struct f *p;
    for (p = num; p < &num[1]; p++)
        {
            x += p->can_eliminate;
        }
}
Index: gcc/tree-ssa-loop-niter.c
===================================================================
--- gcc/tree-ssa-loop-niter.c	(revision 112546)
+++ gcc/tree-ssa-loop-niter.c	(working copy)
@@ -126,469 +126,504 @@ inverse (tree x, tree mask)
   return rslt;
 }
 
-/* Determine the number of iterations according to condition (for staying
-   inside loop) which compares two induction variables using comparison
-   operator CODE.  The induction variable on left side of the comparison
-   has base BASE0 and step STEP0. the right-hand side one has base
-   BASE1 and step STEP1.  Both induction variables must have type TYPE,
-   which must be an integer or pointer type.  STEP0 and STEP1 must be
-   constants (or NULL_TREE, which is interpreted as constant zero).
-   
-   The results (number of iterations and assumptions as described in
-   comments at struct tree_niter_desc in tree-flow.h) are stored to NITER.
-   In case we are unable to determine number of iterations, contents of
-   this structure is unchanged.  */
+/* Determines number of iterations of loop whose ending condition
+   is IV <> FINAL.  TYPE is the type of the iv.  The number of
+   iterations is stored to NITER.  NEVER_INFINITE is true if
+   we know that the loop cannot be infinite (we derived this
+   earlier, and possibly set NITER->assumptions to make sure this
+   is the case.  */
 
-static void
-number_of_iterations_cond (tree type, tree base0, tree step0,
-			   enum tree_code code, tree base1, tree step1,
-			   struct tree_niter_desc *niter)
-{
-  tree step, delta, mmin, mmax;
-  tree may_xform, bound, s, d, tmp;
-  bool was_sharp = false;
-  tree assumption;
-  tree assumptions = boolean_true_node;
-  tree noloop_assumptions = boolean_false_node;
-  tree niter_type, signed_niter_type;
-  tree bits;
+static bool
+number_of_iterations_ne (tree type, affine_iv *iv, tree final,
+			 struct tree_niter_desc *niter, bool never_infinite)
+{
+  tree niter_type = unsigned_type_for (type);
+  tree s, c, d, bits, assumption, tmp, bound;
 
-  /* The meaning of these assumptions is this:
-     if !assumptions
-       then the rest of information does not have to be valid
-     if noloop_assumptions then the loop does not have to roll
-       (but it is only conservative approximation, i.e. it only says that
-       if !noloop_assumptions, then the loop does not end before the computed
-       number of iterations)  */
-
-  /* Make < comparison from > ones.  */
-  if (code == GE_EXPR
-      || code == GT_EXPR)
+  /* Rearrange the terms so that we get inequality s * i <> c, with s
+     positive.  Also cast everything to the unsigned type.  */
+  if (tree_int_cst_sign_bit (iv->step))
+    {
+      s = fold_convert (niter_type,
+			fold_build1 (NEGATE_EXPR, type, iv->step));
+      c = fold_build2 (MINUS_EXPR, niter_type,
+		       fold_convert (niter_type, iv->base),
+		       fold_convert (niter_type, final));
+    }
+  else
     {
-      SWAP (base0, base1);
-      SWAP (step0, step1);
-      code = swap_tree_comparison (code);
+      s = fold_convert (niter_type, iv->step);
+      c = fold_build2 (MINUS_EXPR, niter_type,
+		       fold_convert (niter_type, final),
+		       fold_convert (niter_type, iv->base));
     }
 
-  /* We can handle the case when neither of the sides of the comparison is
-     invariant, provided that the test is NE_EXPR.  This rarely occurs in
-     practice, but it is simple enough to manage.  */
-  if (!zero_p (step0) && !zero_p (step1))
+  /* First the trivial cases -- when the step is 1.  */
+  if (integer_onep (s))
     {
-      if (code != NE_EXPR)
-	return;
-
-      step0 = fold_binary_to_constant (MINUS_EXPR, type, step0, step1);
-      step1 = NULL_TREE;
+      niter->niter = c;
+      return true;
     }
 
-  /* If the result is a constant,  the loop is weird.  More precise handling
-     would be possible, but the situation is not common enough to waste time
-     on it.  */
-  if (zero_p (step0) && zero_p (step1))
-    return;
+  /* Let nsd (step, size of mode) = d.  If d does not divide c, the loop
+     is infinite.  Otherwise, the number of iterations is
+     (inverse(s/d) * (c/d)) mod (size of mode/d).  */
+  bits = num_ending_zeros (s);
+  bound = build_low_bits_mask (niter_type,
+			       (TYPE_PRECISION (niter_type)
+				- tree_low_cst (bits, 1)));
+
+  d = fold_binary_to_constant (LSHIFT_EXPR, niter_type,
+			       build_int_cst_type (niter_type, 1), bits);
+  s = fold_binary_to_constant (RSHIFT_EXPR, niter_type, s, bits);
+
+  if (!never_infinite)
+    {
+      /* If we cannot assume that the loop is not infinite, record the
+	 assumptions for divisibility of c.  */
+      assumption = fold_build2 (FLOOR_MOD_EXPR, niter_type, c, d);
+      assumption = fold_build2 (EQ_EXPR, boolean_type_node,
+				assumption, build_int_cst (niter_type, 0));
+      if (!nonzero_p (assumption))
+	niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+					  niter->assumptions, assumption);
+    }
+      
+  c = fold_build2 (EXACT_DIV_EXPR, niter_type, c, d);
+  tmp = fold_build2 (MULT_EXPR, niter_type, c, inverse (s, bound));
+  niter->niter = fold_build2 (BIT_AND_EXPR, niter_type, tmp, bound);
+  return true;
+}
 
-  /* Ignore loops of while (i-- < 10) type.  */
-  if (code != NE_EXPR)
-    {
-      if (step0 && tree_int_cst_sign_bit (step0))
-	return;
+/* Checks whether we can determine the final value of the control variable
+   of the loop with ending condition IV0 < IV1 (computed in TYPE).
+   DELTA is the difference IV1->base - IV0->base, STEP is the absolute value
+   of the step.  The assumptions necessary to ensure that the computation
+   of the final value does not overflow are recorded in NITER.  If we
+   find the final value, we adjust DELTA and return TRUE.  Otherwise
+   we return false.  */
 
-      if (!zero_p (step1) && !tree_int_cst_sign_bit (step1))
-	return;
-    }
+static bool
+number_of_iterations_lt_to_ne (tree type, affine_iv *iv0, affine_iv *iv1,
+			       struct tree_niter_desc *niter,
+			       tree *delta, tree step)
+{
+  tree niter_type = TREE_TYPE (step);
+  tree mod = fold_build2 (FLOOR_MOD_EXPR, niter_type, *delta, step);
+  tree tmod;
+  tree assumption = boolean_true_node, bound, noloop;
 
-  if (POINTER_TYPE_P (type))
-    {
-      /* We assume pointer arithmetic never overflows.  */
-      mmin = mmax = NULL_TREE;
+  if (TREE_CODE (mod) != INTEGER_CST)
+    return false;
+  if (nonzero_p (mod))
+    mod = fold_build2 (MINUS_EXPR, niter_type, step, mod);
+  tmod = fold_convert (type, mod);
+
+  if (nonzero_p (iv0->step))
+    {
+      /* The final value of the iv is iv1->base + MOD, assuming that this
+	 computation does not overflow, and that
+	 iv0->base <= iv1->base + MOD.  */
+      if (!iv1->no_overflow && !zero_p (mod))
+	{
+	  bound = fold_build2 (MINUS_EXPR, type,
+			       TYPE_MAX_VALUE (type), tmod);
+	  assumption = fold_build2 (LE_EXPR, boolean_type_node,
+				    iv1->base, bound);
+	  if (zero_p (assumption))
+	    return false;
+	}
+      noloop = fold_build2 (GT_EXPR, boolean_type_node,
+			    iv0->base,
+			    fold_build2 (PLUS_EXPR, type,
+					 iv1->base, tmod));
     }
   else
     {
-      mmin = TYPE_MIN_VALUE (type);
-      mmax = TYPE_MAX_VALUE (type);
+      /* The final value of the iv is iv0->base - MOD, assuming that this
+	 computation does not overflow, and that
+	 iv0->base - MOD <= iv1->base. */
+      if (!iv0->no_overflow && !zero_p (mod))
+	{
+	  bound = fold_build2 (PLUS_EXPR, type,
+			       TYPE_MIN_VALUE (type), tmod);
+	  assumption = fold_build2 (GE_EXPR, boolean_type_node,
+				    iv0->base, bound);
+	  if (zero_p (assumption))
+	    return false;
+	}
+      noloop = fold_build2 (GT_EXPR, boolean_type_node,
+			    fold_build2 (MINUS_EXPR, type,
+					 iv0->base, tmod),
+			    iv1->base);
     }
 
-  /* Some more condition normalization.  We must record some assumptions
-     due to overflows.  */
+  if (!nonzero_p (assumption))
+    niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+				      niter->assumptions,
+				      assumption);
+  if (!zero_p (noloop))
+    niter->may_be_zero = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
+				      niter->may_be_zero,
+				      noloop);
+  *delta = fold_build2 (PLUS_EXPR, niter_type, *delta, mod);
+  return true;
+}
+
+/* Add assertions to NITER that ensure that the control variable of the loop
+   with ending condition IV0 < IV1 does not overflow.  Types of IV0 and IV1
+   are TYPE.  Returns false if we can prove that there is an overflow, true
+   otherwise.  STEP is the absolute value of the step.  */
 
-  if (code == LT_EXPR)
+static bool
+assert_no_overflow_lt (tree type, affine_iv *iv0, affine_iv *iv1,
+		       struct tree_niter_desc *niter, tree step)
+{
+  tree bound, d, assumption, diff;
+  tree niter_type = TREE_TYPE (step);
+
+  if (nonzero_p (iv0->step))
     {
-      /* We want to take care only of <=; this is easy,
-	 as in cases the overflow would make the transformation unsafe the loop
-	 does not roll.  Seemingly it would make more sense to want to take
-	 care of <, as NE is more similar to it, but the problem is that here
-	 the transformation would be more difficult due to possibly infinite
-	 loops.  */
-      if (zero_p (step0))
+      /* for (i = iv0->base; i < iv1->base; i += iv0->step) */
+      if (iv0->no_overflow)
+	return true;
+
+      /* If iv0->base is a constant, we can determine the last value before
+	 overflow precisely; otherwise we conservatively assume
+	 MAX - STEP + 1.  */
+
+      if (TREE_CODE (iv0->base) == INTEGER_CST)
 	{
-	  if (mmax)
-	    assumption = fold_build2 (EQ_EXPR, boolean_type_node, base0, mmax);
-	  else
-	    assumption = boolean_false_node;
-	  if (nonzero_p (assumption))
-	    goto zero_iter;
-	  base0 = fold_build2 (PLUS_EXPR, type, base0,
-			       build_int_cst_type (type, 1));
+	  d = fold_build2 (MINUS_EXPR, niter_type,
+			   fold_convert (niter_type, TYPE_MAX_VALUE (type)),
+			   fold_convert (niter_type, iv0->base));
+	  diff = fold_build2 (FLOOR_MOD_EXPR, niter_type, d, step);
 	}
       else
-	{
-	  if (mmin)
-	    assumption = fold_build2 (EQ_EXPR, boolean_type_node, base1, mmin);
-	  else
-	    assumption = boolean_false_node;
-	  if (nonzero_p (assumption))
-	    goto zero_iter;
-	  base1 = fold_build2 (MINUS_EXPR, type, base1,
-			       build_int_cst_type (type, 1));
+	diff = fold_build2 (MINUS_EXPR, niter_type, step,
+			    build_int_cst_type (niter_type, 1));
+      bound = fold_build2 (MINUS_EXPR, type,
+			   TYPE_MAX_VALUE (type), fold_convert (type, diff));
+      assumption = fold_build2 (LE_EXPR, boolean_type_node,
+				iv1->base, bound);
+    }
+  else
+    {
+      /* for (i = iv1->base; i > iv0->base; i += iv1->step) */
+      if (iv1->no_overflow)
+	return true;
+
+      if (TREE_CODE (iv1->base) == INTEGER_CST)
+	{
+	  d = fold_build2 (MINUS_EXPR, niter_type,
+			   fold_convert (niter_type, iv1->base),
+			   fold_convert (niter_type, TYPE_MIN_VALUE (type)));
+	  diff = fold_build2 (FLOOR_MOD_EXPR, niter_type, d, step);
 	}
-      noloop_assumptions = assumption;
-      code = LE_EXPR;
-
-      /* It will be useful to be able to tell the difference once more in
-	 <= -> != reduction.  */
-      was_sharp = true;
+      else
+	diff = fold_build2 (MINUS_EXPR, niter_type, step,
+			    build_int_cst_type (niter_type, 1));
+      bound = fold_build2 (PLUS_EXPR, type,
+			   TYPE_MIN_VALUE (type), fold_convert (type, diff));
+      assumption = fold_build2 (GE_EXPR, boolean_type_node,
+				iv0->base, bound);
     }
 
-  /* Take care of trivially infinite loops.  */
-  if (code != NE_EXPR)
-    {
-      if (zero_p (step0)
-	  && mmin
-	  && operand_equal_p (base0, mmin, 0))
-	return;
-      if (zero_p (step1)
-	  && mmax
-	  && operand_equal_p (base1, mmax, 0))
-	return;
-    }
-
-  /* If we can we want to take care of NE conditions instead of size
-     comparisons, as they are much more friendly (most importantly
-     this takes care of special handling of loops with step 1).  We can
-     do it if we first check that upper bound is greater or equal to
-     lower bound, their difference is constant c modulo step and that
-     there is not an overflow.  */
-  if (code != NE_EXPR)
+  if (zero_p (assumption))
+    return false;
+  if (!nonzero_p (assumption))
+    niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+				      niter->assumptions, assumption);
+    
+  iv0->no_overflow = true;
+  iv1->no_overflow = true;
+  return true;
+}
+
+/* Add an assumption to NITER that a loop whose ending condition
+   is IV0 < IV1 rolls.  TYPE is the type of the control iv.  */
+
+static void
+assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1,
+		      struct tree_niter_desc *niter)
+{
+  tree assumption = boolean_true_node, bound, diff;
+  tree mbz, mbzl, mbzr;
+
+  if (nonzero_p (iv0->step))
     {
-      if (zero_p (step0))
-	step = fold_unary_to_constant (NEGATE_EXPR, type, step1);
-      else
-	step = step0;
-      delta = fold_build2 (MINUS_EXPR, type, base1, base0);
-      delta = fold_build2 (FLOOR_MOD_EXPR, type, delta, step);
-      may_xform = boolean_false_node;
-
-      if (TREE_CODE (delta) == INTEGER_CST)
-	{
-	  tmp = fold_binary_to_constant (MINUS_EXPR, type, step,
-					 build_int_cst_type (type, 1));
-	  if (was_sharp
-	      && operand_equal_p (delta, tmp, 0))
-	    {
-	      /* A special case.  We have transformed condition of type
-		 for (i = 0; i < 4; i += 4)
-		 into
-		 for (i = 0; i <= 3; i += 4)
-		 obviously if the test for overflow during that transformation
-		 passed, we cannot overflow here.  Most importantly any
-		 loop with sharp end condition and step 1 falls into this
-		 category, so handling this case specially is definitely
-		 worth the troubles.  */
-	      may_xform = boolean_true_node;
-	    }
-	  else if (zero_p (step0))
-	    {
-	      if (!mmin)
-		may_xform = boolean_true_node;
-	      else
-		{
-		  bound = fold_binary_to_constant (PLUS_EXPR, type,
-						   mmin, step);
-		  bound = fold_binary_to_constant (MINUS_EXPR, type,
-						   bound, delta);
-		  may_xform = fold_build2 (LE_EXPR, boolean_type_node,
-					   bound, base0);
-		}
-	    }
-	  else
-	    {
-	      if (!mmax)
-		may_xform = boolean_true_node;
-	      else
-		{
-		  bound = fold_binary_to_constant (MINUS_EXPR, type,
-						   mmax, step);
-		  bound = fold_binary_to_constant (PLUS_EXPR, type,
-						   bound, delta);
-		  may_xform = fold_build2 (LE_EXPR, boolean_type_node,
-					   base1, bound);
-		}
-	    }
-	}
+      diff = fold_build2 (MINUS_EXPR, type,
+			  iv0->step, build_int_cst_type (type, 1));
 
-      if (!zero_p (may_xform))
+      /* We need to know that iv0->base >= MIN + iv0->step - 1.  Since
+	 0 address never belongs to any object, we can assume this for
+	 pointers.  */
+      if (!POINTER_TYPE_P (type))
 	{
-	  /* We perform the transformation always provided that it is not
-	     completely senseless.  This is OK, as we would need this assumption
-	     to determine the number of iterations anyway.  */
-	  if (!nonzero_p (may_xform))
-	    assumptions = may_xform;
+	  bound = fold_build2 (PLUS_EXPR, type,
+			       TYPE_MIN_VALUE (type), diff);
+	  assumption = fold_build2 (GE_EXPR, boolean_type_node,
+				    iv0->base, bound);
+	}
 
-	  if (zero_p (step0))
-	    {
-	      base0 = fold_build2 (PLUS_EXPR, type, base0, delta);
-	      base0 = fold_build2 (MINUS_EXPR, type, base0, step);
-	    }
-	  else
-	    {
-	      base1 = fold_build2 (MINUS_EXPR, type, base1, delta);
-	      base1 = fold_build2 (PLUS_EXPR, type, base1, step);
-	    }
+      /* And then we can compute iv0->base - diff, and compare it with
+	 iv1->base.  */      
+      mbzl = fold_build2 (MINUS_EXPR, type, iv0->base, diff);
+      mbzr = iv1->base;
+    }
+  else
+    {
+      diff = fold_build2 (PLUS_EXPR, type,
+			  iv1->step, build_int_cst_type (type, 1));
 
-	  assumption = fold_build2 (GT_EXPR, boolean_type_node, base0, base1);
-	  noloop_assumptions = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
-					    noloop_assumptions, assumption);
-	  code = NE_EXPR;
+      if (!POINTER_TYPE_P (type))
+	{
+	  bound = fold_build2 (PLUS_EXPR, type,
+			       TYPE_MAX_VALUE (type), diff);
+	  assumption = fold_build2 (LE_EXPR, boolean_type_node,
+				    iv1->base, bound);
 	}
+
+      mbzl = iv0->base;
+      mbzr = fold_build2 (MINUS_EXPR, type, iv1->base, diff);
     }
 
-  /* Count the number of iterations.  */
-  niter_type = unsigned_type_for (type);
-  signed_niter_type = signed_type_for (type);
-
-  if (code == NE_EXPR)
-    {
-      /* Everything we do here is just arithmetics modulo size of mode.  This
-	 makes us able to do more involved computations of number of iterations
-	 than in other cases.  First transform the condition into shape
-	 s * i <> c, with s positive.  */
-      base1 = fold_build2 (MINUS_EXPR, type, base1, base0);
-      base0 = NULL_TREE;
-      if (!zero_p (step1))
-  	step0 = fold_unary_to_constant (NEGATE_EXPR, type, step1);
-      step1 = NULL_TREE;
-      if (tree_int_cst_sign_bit (fold_convert (signed_niter_type, step0)))
-	{
-	  step0 = fold_unary_to_constant (NEGATE_EXPR, type, step0);
-	  base1 = fold_build1 (NEGATE_EXPR, type, base1);
-	}
-
-      base1 = fold_convert (niter_type, base1);
-      step0 = fold_convert (niter_type, step0);
-
-      /* Let nsd (step, size of mode) = d.  If d does not divide c, the loop
-	 is infinite.  Otherwise, the number of iterations is
-	 (inverse(s/d) * (c/d)) mod (size of mode/d).  */
-      bits = num_ending_zeros (step0);
-      d = fold_binary_to_constant (LSHIFT_EXPR, niter_type,
-				   build_int_cst_type (niter_type, 1), bits);
-      s = fold_binary_to_constant (RSHIFT_EXPR, niter_type, step0, bits);
-
-      bound = build_low_bits_mask (niter_type,
-				   (TYPE_PRECISION (niter_type)
-				    - tree_low_cst (bits, 1)));
+  mbz = fold_build2 (GT_EXPR, boolean_type_node, mbzl, mbzr);
 
-      assumption = fold_build2 (FLOOR_MOD_EXPR, niter_type, base1, d);
-      assumption = fold_build2 (EQ_EXPR, boolean_type_node,
-				assumption,
-				build_int_cst (niter_type, 0));
-      assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
-				 assumptions, assumption);
-
-      tmp = fold_build2 (EXACT_DIV_EXPR, niter_type, base1, d);
-      tmp = fold_build2 (MULT_EXPR, niter_type, tmp, inverse (s, bound));
-      niter->niter = fold_build2 (BIT_AND_EXPR, niter_type, tmp, bound);
+  if (!nonzero_p (assumption))
+    niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+				      niter->assumptions, assumption);
+  if (!zero_p (mbz))
+    niter->may_be_zero = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
+				      niter->may_be_zero, mbz);
+}
+
+/* Determines number of iterations of loop whose ending condition
+   is IV0 < IV1.  TYPE is the type of the iv.  The number of
+   iterations is stored to NITER.  */
+
+static bool
+number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1,
+			 struct tree_niter_desc *niter,
+			 bool never_infinite ATTRIBUTE_UNUSED)
+{
+  tree niter_type = unsigned_type_for (type);
+  tree delta, step, s;
+
+  delta = fold_build2 (MINUS_EXPR, niter_type,
+		       fold_convert (niter_type, iv1->base),
+		       fold_convert (niter_type, iv0->base));
+
+  /* First handle the special case that the step is +-1.  */
+  if ((iv0->step && integer_onep (iv0->step)
+       && zero_p (iv1->step))
+      || (iv1->step && integer_all_onesp (iv1->step)
+	  && zero_p (iv0->step)))
+    {
+      /* for (i = iv0->base; i < iv1->base; i++)
+
+	 or
+
+	 for (i = iv1->base; i > iv0->base; i--).
+	     
+	 In both cases # of iterations is iv1->base - iv0->base, assuming that
+	 iv1->base >= iv0->base.  */
+      niter->may_be_zero = fold_build2 (LT_EXPR, boolean_type_node,
+					iv1->base, iv0->base);
+      niter->niter = delta;
+      return true;
     }
+
+  if (nonzero_p (iv0->step))
+    step = fold_convert (niter_type, iv0->step);
   else
+    step = fold_convert (niter_type,
+			 fold_build1 (NEGATE_EXPR, type, iv1->step));
+
+  /* If we can determine the final value of the control iv exactly, we can
+     transform the condition to != comparison.  In particular, this will be
+     the case if DELTA is constant.  */
+  if (number_of_iterations_lt_to_ne (type, iv0, iv1, niter, &delta, step))
     {
-      if (zero_p (step1))
-	/* Condition in shape a + s * i <= b
-	   We must know that b + s does not overflow and a <= b + s and then we
-	   can compute number of iterations as (b + s - a) / s.  (It might
-	   seem that we in fact could be more clever about testing the b + s
-	   overflow condition using some information about b - a mod s,
-	   but it was already taken into account during LE -> NE transform).  */
-	{
-	  if (mmax)
-	    {
-	      bound = fold_binary_to_constant (MINUS_EXPR, type, mmax, step0);
-	      assumption = fold_build2 (LE_EXPR, boolean_type_node,
-					base1, bound);
-	      assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
-					 assumptions, assumption);
-	    }
+      affine_iv zps;
 
-	  step = step0;
-	  tmp = fold_build2 (PLUS_EXPR, type, base1, step0);
-	  assumption = fold_build2 (GT_EXPR, boolean_type_node, base0, tmp);
-	  delta = fold_build2 (PLUS_EXPR, type, base1, step);
-	  delta = fold_build2 (MINUS_EXPR, type, delta, base0);
-	  delta = fold_convert (niter_type, delta);
-	}
-      else
-	{
-	  /* Condition in shape a <= b - s * i
-	     We must know that a - s does not overflow and a - s <= b and then
-	     we can again compute number of iterations as (b - (a - s)) / s.  */
-	  if (mmin)
-	    {
-	      bound = fold_binary_to_constant (MINUS_EXPR, type, mmin, step1);
-	      assumption = fold_build2 (LE_EXPR, boolean_type_node,
-					bound, base0);
-	      assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
-					 assumptions, assumption);
-	    }
-	  step = fold_build1 (NEGATE_EXPR, type, step1);
-	  tmp = fold_build2 (PLUS_EXPR, type, base0, step1);
-	  assumption = fold_build2 (GT_EXPR, boolean_type_node, tmp, base1);
-	  delta = fold_build2 (MINUS_EXPR, type, base0, step);
-	  delta = fold_build2 (MINUS_EXPR, type, base1, delta);
-	  delta = fold_convert (niter_type, delta);
-	}
-      noloop_assumptions = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
-					noloop_assumptions, assumption);
-      delta = fold_build2 (FLOOR_DIV_EXPR, niter_type, delta,
-			   fold_convert (niter_type, step));
-      niter->niter = delta;
+      zps.base = build_int_cst_type (niter_type, 0);
+      zps.step = step;
+      /* number_of_iterations_lt_to_ne will add assumptions that ensure that
+	 zps does not overflow.  */
+      zps.no_overflow = true;
+
+      return number_of_iterations_ne (type, &zps, delta, niter, true);
     }
 
-  niter->assumptions = assumptions;
-  niter->may_be_zero = noloop_assumptions;
-  return;
+  /* Make sure that the control iv does not overflow.  */
+  if (!assert_no_overflow_lt (type, iv0, iv1, niter, step))
+    return false;
 
-zero_iter:
-  niter->assumptions = boolean_true_node;
-  niter->may_be_zero = boolean_true_node;
-  niter->niter = build_int_cst_type (type, 0);
-  return;
+  /* We determine the number of iterations as (delta + step - 1) / step.  For
+     this to work, we must know that iv1->base >= iv0->base - step + 1,
+     otherwise the loop does not roll.  */
+  assert_loop_rolls_lt (type, iv0, iv1, niter);
+
+  s = fold_build2 (MINUS_EXPR, niter_type,
+		   step, build_int_cst_type (niter_type, 1));
+  delta = fold_build2 (PLUS_EXPR, niter_type, delta, s);
+  niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, delta, step);
+  return true;
 }
 
+/* Determines number of iterations of loop whose ending condition
+   is IV0 <= IV1.  TYPE is the type of the iv.  The number of
+   iterations is stored to NITER.  NEVER_INFINITE is true if
+   we know that the loop cannot be infinite (we derived this
+   earlier, and possibly set NITER->assumptions to make sure this
+   is the case.  */
+
+static bool
+number_of_iterations_le (tree type, affine_iv *iv0, affine_iv *iv1,
+			 struct tree_niter_desc *niter, bool never_infinite)
+{
+  tree assumption;
+
+  /* Say that IV0 is the control variable.  Then IV0 <= IV1 iff
+     IV0 < IV1 + 1, assuming that IV1 is not equal to the greatest
+     value of the type.  This we must know anyway, since if it is
+     equal to this value, the loop rolls forever.  */
+
+  if (!never_infinite)
+    {
+      if (nonzero_p (iv0->step))
+	assumption = fold_build2 (NE_EXPR, boolean_type_node,
+				  iv1->base, TYPE_MAX_VALUE (type));
+      else
+	assumption = fold_build2 (NE_EXPR, boolean_type_node,
+				  iv0->base, TYPE_MIN_VALUE (type));
+
+      if (zero_p (assumption))
+	return false;
+      if (!nonzero_p (assumption))
+	niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+					  niter->assumptions, assumption);
+    }
 
-/* Similar to number_of_iterations_cond, but only handles the special
-   case of loops with step 1 or -1.  The meaning of the arguments
-   is the same as in number_of_iterations_cond.  The function
-   returns true if the special case was recognized, false otherwise.  */
+  if (nonzero_p (iv0->step))
+    iv1->base = fold_build2 (PLUS_EXPR, type,
+			     iv1->base, build_int_cst_type (type, 1));
+  else
+    iv0->base = fold_build2 (MINUS_EXPR, type,
+			     iv0->base, build_int_cst_type (type, 1));
+  return number_of_iterations_lt (type, iv0, iv1, niter, never_infinite);
+}
+
+/* Determine the number of iterations according to condition (for staying
+   inside loop) which compares two induction variables using comparison
+   operator CODE.  The induction variable on left side of the comparison
+   is IV0, the right-hand side is IV1.  Both induction variables must have
+   type TYPE, which must be an integer or pointer type.  The steps of the
+   ivs must be constants (or NULL_TREE, which is interpreted as constant zero).
+   
+   The results (number of iterations and assumptions as described in
+   comments at struct tree_niter_desc in tree-flow.h) are stored to NITER.
+   Returns false if it fails to determine number of iterations, true if it
+   was determined (possibly with some assumptions).  */
 
 static bool
-number_of_iterations_special (tree type, tree base0, tree step0,
-			      enum tree_code code, tree base1, tree step1,
-			      struct tree_niter_desc *niter)
-{
-  tree niter_type = unsigned_type_for (type), mmax, mmin;
-
-  /* Make < comparison from > ones.  */
-  if (code == GE_EXPR
-      || code == GT_EXPR)
+number_of_iterations_cond (tree type, affine_iv *iv0, enum tree_code code,
+			   affine_iv *iv1, struct tree_niter_desc *niter)
+{
+  bool never_infinite;
+
+  /* The meaning of these assumptions is this:
+     if !assumptions
+       then the rest of information does not have to be valid
+     if may_be_zero then the loop does not roll, even if
+       niter != 0.  */
+  niter->assumptions = boolean_true_node;
+  niter->may_be_zero = boolean_false_node;
+  niter->niter = NULL_TREE;
+  niter->additional_info = boolean_true_node;
+
+  /* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that
+     the control variable is on lhs.  */
+  if (code == GE_EXPR || code == GT_EXPR
+      || (code == NE_EXPR && zero_p (iv0->step)))
     {
-      SWAP (base0, base1);
-      SWAP (step0, step1);
+      SWAP (iv0, iv1);
       code = swap_tree_comparison (code);
     }
 
-  switch (code)
+  if (POINTER_TYPE_P (type))
     {
-    case NE_EXPR:
-      if (zero_p (step0))
-	{
-	  if (zero_p (step1))
-	    return false;
-    	  SWAP (base0, base1);
-	  SWAP (step0, step1);
-	}
-      else if (!zero_p (step1))
-	return false;
+      /* Comparison of pointers is undefined unless both iv0 and iv1 point
+	 to the same object.  If they do, the control variable cannot wrap
+	 (as wrap around the bounds of memory will never return a pointer
+	 that would be guaranteed to point to the same object, even if we
+	 avoid undefined behavior by casting to size_t and back).  */
+      iv0->no_overflow = true;
+      iv1->no_overflow = true;
+    }
 
-      if (integer_onep (step0))
-	{
-	  /* for (i = base0; i != base1; i++)  */
-	  niter->assumptions = boolean_true_node;
-	  niter->may_be_zero = boolean_false_node;
-	  niter->niter = fold_build2 (MINUS_EXPR, type, base1, base0);
-	  niter->additional_info = boolean_true_node;
-	}
-      else if (integer_all_onesp (step0))
-	{
-	  /* for (i = base0; i != base1; i--)  */
-	  niter->assumptions = boolean_true_node;
-	  niter->may_be_zero = boolean_false_node;
-	  niter->niter = fold_build2 (MINUS_EXPR, type, base0, base1);
-	}
-      else
-	return false;
+  /* If the control induction variable does not overflow, the loop obviously
+     cannot be infinite.  */
+  if (!zero_p (iv0->step) && iv0->no_overflow)
+    never_infinite = true;
+  else if (!zero_p (iv1->step) && iv1->no_overflow)
+    never_infinite = true;
+  else
+    never_infinite = false;
 
-      break;
+  /* We can handle the case when neither of the sides of the comparison is
+     invariant, provided that the test is NE_EXPR.  This rarely occurs in
+     practice, but it is simple enough to manage.  */
+  if (!zero_p (iv0->step) && !zero_p (iv1->step))
+    {
+      if (code != NE_EXPR)
+	return false;
 
-    case LT_EXPR:
-      if ((step0 && integer_onep (step0) && zero_p (step1))
-	  || (step1 && integer_all_onesp (step1) && zero_p (step0)))
-	{
-	  /* for (i = base0; i < base1; i++)
-	     
-	     or
+      iv0->step = fold_binary_to_constant (MINUS_EXPR, type,
+					   iv0->step, iv1->step);
+      iv0->no_overflow = false;
+      iv1->step = NULL_TREE;
+      iv1->no_overflow = true;
+    }
 
-	     for (i = base1; i > base0; i--).
-	     
-	     In both cases # of iterations is base1 - base0.  */
+  /* If the result of the comparison is a constant,  the loop is weird.  More
+     precise handling would be possible, but the situation is not common enough
+     to waste time on it.  */
+  if (zero_p (iv0->step) && zero_p (iv1->step))
+    return false;
 
-	  niter->assumptions = boolean_true_node;
-	  niter->may_be_zero = fold_build2 (GT_EXPR, boolean_type_node,
-					    base0, base1);
-	  niter->niter = fold_build2 (MINUS_EXPR, type, base1, base0);
-	}
-      else
+  /* Ignore loops of while (i-- < 10) type.  */
+  if (code != NE_EXPR)
+    {
+      if (iv0->step && tree_int_cst_sign_bit (iv0->step))
 	return false;
-      break;
 
-    case LE_EXPR:
-      if (POINTER_TYPE_P (type))
-	{
-	  /* We assume pointer arithmetic never overflows.  */
-	  mmin = mmax = NULL_TREE;
-	}
-      else
-	{
-	  mmin = TYPE_MIN_VALUE (type);
-	  mmax = TYPE_MAX_VALUE (type);
-	}
-
-      if (step0 && integer_onep (step0) && zero_p (step1))
-	{
-	  /* for (i = base0; i <= base1; i++)  */
-	  if (mmax)
-	    niter->assumptions = fold_build2 (NE_EXPR, boolean_type_node,
-					      base1, mmax);
-	  else
-	    niter->assumptions = boolean_true_node;
-	  base1 = fold_build2 (PLUS_EXPR, type, base1,
-			       build_int_cst_type (type, 1));
-	}
-      else if (step1 && integer_all_onesp (step1) && zero_p (step0))
-	{
-	  /* for (i = base1; i >= base0; i--)  */
-	  if (mmin)
-	    niter->assumptions = fold_build2 (NE_EXPR, boolean_type_node,
-					      base0, mmin);
-	  else
-	    niter->assumptions = boolean_true_node;
-	  base0 = fold_build2 (MINUS_EXPR, type, base0,
-			       build_int_cst_type (type, 1));
-	}
-      else
+      if (!zero_p (iv1->step) && !tree_int_cst_sign_bit (iv1->step))
 	return false;
+    }
 
-      niter->may_be_zero = fold_build2 (GT_EXPR, boolean_type_node,
-					base0, base1);
-      niter->niter = fold_build2 (MINUS_EXPR, type, base1, base0);
-      break;
+  /* If the loop exits immediatelly, there is nothing to do.  */
+  if (zero_p (fold_build2 (code, boolean_type_node, iv0->base, iv1->base)))
+    {
+      niter->niter = build_int_cst_type (unsigned_type_for (type), 0);
+      return true;
+    }
 
+  /* OK, now we know we have a senseful loop.  Handle several cases, depending
+     on what comparison operator is used.  */
+  switch (code)
+    {
+    case NE_EXPR:
+      gcc_assert (zero_p (iv1->step));
+      return number_of_iterations_ne (type, iv0, iv1->base, niter, never_infinite);
+    case LT_EXPR:
+      return number_of_iterations_lt (type, iv0, iv1, niter, never_infinite);
+    case LE_EXPR:
+      return number_of_iterations_le (type, iv0, iv1, niter, never_infinite);
     default:
       gcc_unreachable ();
     }
-
-  niter->niter = fold_convert (niter_type, niter->niter);
-  niter->additional_info = boolean_true_node;
-  return true;
 }
 
 /* Substitute NEW for OLD in EXPR and fold the result.  */
@@ -922,9 +957,9 @@ number_of_iterations_exit (struct loop *
 			   bool warn)
 {
   tree stmt, cond, type;
-  tree op0, base0, step0;
-  tree op1, base1, step1;
+  tree op0, op1;
   enum tree_code code;
+  affine_iv iv0, iv1;
 
   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit->src))
     return false;
@@ -961,25 +996,15 @@ number_of_iterations_exit (struct loop *
       && !POINTER_TYPE_P (type))
     return false;
      
-  if (!simple_iv (loop, stmt, op0, &base0, &step0, false))
+  if (!simple_iv (loop, stmt, op0, &iv0, false))
     return false;
-  if (!simple_iv (loop, stmt, op1, &base1, &step1, false))
+  if (!simple_iv (loop, stmt, op1, &iv1, false))
     return false;
 
-  niter->niter = NULL_TREE;
-
-  /* Handle common special cases first, so that we do not need to use
-     generic (and slow) analysis very often.  */
-  if (!number_of_iterations_special (type, base0, step0, code, base1, step1,
-				     niter))
-    {
-
-      number_of_iterations_cond (type, base0, step0, code, base1, step1,
-				 niter);
-
-      if (!niter->niter)
-	return false;
-    }
+  iv0.base = expand_simple_operations (iv0.base);
+  iv1.base = expand_simple_operations (iv1.base);
+  if (!number_of_iterations_cond (type, &iv0, code, &iv1, niter))
+    return false;
 
   if (optimize >= 3)
     {
@@ -1019,8 +1044,11 @@ number_of_iterations_exit (struct loop *
   
       /* We can provide a more specific warning if one of the operator is
 	 constant and the other advances by +1 or -1.  */
-      if (step1 ? !step0 && (integer_onep (step1) || integer_all_onesp (step1))
-	  	: step0 && (integer_onep (step0) || integer_all_onesp (step0)))
+      if (!zero_p (iv1.step)
+	  ? (zero_p (iv0.step)
+	     && (integer_onep (iv1.step) || integer_all_onesp (iv1.step)))
+	  : (iv0.step
+	     && (integer_onep (iv0.step) || integer_all_onesp (iv0.step))))
         wording =
           flag_unsafe_loop_optimizations
           ? N_("assuming that the loop is not infinite")
Index: gcc/tree.c
===================================================================
--- gcc/tree.c	(revision 112546)
+++ gcc/tree.c	(working copy)
@@ -6812,6 +6812,8 @@ tree_fold_gcd (tree a, tree b)
 tree
 unsigned_type_for (tree type)
 {
+  if (POINTER_TYPE_P (type))
+    return size_type_node;
   return lang_hooks.types.unsigned_type (type);
 }
 
Index: gcc/tree-scalar-evolution.c
===================================================================
--- gcc/tree-scalar-evolution.c	(revision 112546)
+++ gcc/tree-scalar-evolution.c	(working copy)
@@ -1831,19 +1831,28 @@ analyze_scalar_evolution (struct loop *l
 
 /* Analyze scalar evolution of use of VERSION in USE_LOOP with respect to
    WRTO_LOOP (which should be a superloop of both USE_LOOP and definition
-   of VERSION).  */
+   of VERSION).
+
+   FOLDED_CASTS is set to true if resolve_mixers used
+   chrec_convert_aggressive (TODO -- not really, we are way too conservative
+   at the moment in order to keep things simple).  */
 
 static tree
 analyze_scalar_evolution_in_loop (struct loop *wrto_loop, struct loop *use_loop,
-				  tree version)
+				  tree version, bool *folded_casts)
 {
   bool val = false;
-  tree ev = version;
+  tree ev = version, tmp;
 
+  if (folded_casts)
+    *folded_casts = false;
   while (1)
     {
-      ev = analyze_scalar_evolution (use_loop, ev);
-      ev = resolve_mixers (use_loop, ev);
+      tmp = analyze_scalar_evolution (use_loop, ev);
+      ev = resolve_mixers (use_loop, tmp);
+
+      if (folded_casts && tmp != ev)
+	*folded_casts = true;
 
       if (use_loop == wrto_loop)
 	return ev;
@@ -2562,33 +2571,38 @@ scev_reset (void)
 }
 
 /* Checks whether OP behaves as a simple affine iv of LOOP in STMT and returns
-   its BASE and STEP if possible.  If ALLOW_NONCONSTANT_STEP is true, we
-   want STEP to be invariant in LOOP.  Otherwise we require it to be an
-   integer constant.  */
+   its base and step in IV if possible.  If ALLOW_NONCONSTANT_STEP is true, we
+   want step to be invariant in LOOP.  Otherwise we require it to be an
+   integer constant.  IV->no_overflow is set to true if we are sure the iv cannot
+   overflow (e.g.  because it is computed in signed arithmetics).  */
 
 bool
-simple_iv (struct loop *loop, tree stmt, tree op, tree *base, tree *step,
+simple_iv (struct loop *loop, tree stmt, tree op, affine_iv *iv,
 	   bool allow_nonconstant_step)
 {
   basic_block bb = bb_for_stmt (stmt);
   tree type, ev;
+  bool folded_casts;
 
-  *base = NULL_TREE;
-  *step = NULL_TREE;
+  iv->base = NULL_TREE;
+  iv->step = NULL_TREE;
+  iv->no_overflow = false;
 
   type = TREE_TYPE (op);
   if (TREE_CODE (type) != INTEGER_TYPE
       && TREE_CODE (type) != POINTER_TYPE)
     return false;
 
-  ev = analyze_scalar_evolution_in_loop (loop, bb->loop_father, op);
+  ev = analyze_scalar_evolution_in_loop (loop, bb->loop_father, op,
+					 &folded_casts);
   if (chrec_contains_undetermined (ev))
     return false;
 
   if (tree_does_not_contain_chrecs (ev)
       && !chrec_contains_symbols_defined_in_loop (ev, loop->num))
     {
-      *base = ev;
+      iv->base = ev;
+      iv->no_overflow = true;
       return true;
     }
 
@@ -2596,21 +2610,24 @@ simple_iv (struct loop *loop, tree stmt,
       || CHREC_VARIABLE (ev) != (unsigned) loop->num)
     return false;
 
-  *step = CHREC_RIGHT (ev);
+  iv->step = CHREC_RIGHT (ev);
   if (allow_nonconstant_step)
     {
-      if (tree_contains_chrecs (*step, NULL)
-	  || chrec_contains_symbols_defined_in_loop (*step, loop->num))
+      if (tree_contains_chrecs (iv->step, NULL)
+	  || chrec_contains_symbols_defined_in_loop (iv->step, loop->num))
 	return false;
     }
-  else if (TREE_CODE (*step) != INTEGER_CST)
+  else if (TREE_CODE (iv->step) != INTEGER_CST)
     return false;
 
-  *base = CHREC_LEFT (ev);
-  if (tree_contains_chrecs (*base, NULL)
-      || chrec_contains_symbols_defined_in_loop (*base, loop->num))
+  iv->base = CHREC_LEFT (ev);
+  if (tree_contains_chrecs (iv->base, NULL)
+      || chrec_contains_symbols_defined_in_loop (iv->base, loop->num))
     return false;
 
+  iv->no_overflow = (!folded_casts
+		     && !flag_wrapv
+		     && !TYPE_UNSIGNED (type));
   return true;
 }
 
@@ -2723,7 +2740,7 @@ scev_const_prop (void)
   for (i = current_loops->num - 1; i > 0; i--)
     {
       edge exit;
-      tree def, rslt, ass;
+      tree def, rslt, ass, niter;
       block_stmt_iterator bsi;
 
       loop = current_loops->parray[i];
@@ -2733,8 +2750,14 @@ scev_const_prop (void)
       /* If we do not know exact number of iterations of the loop, we cannot
 	 replace the final value.  */
       exit = loop->single_exit;
-      if (!exit
-	  || number_of_iterations_in_loop (loop) == chrec_dont_know)
+      if (!exit)
+	continue;
+
+      niter = number_of_iterations_in_loop (loop);
+      if (niter == chrec_dont_know
+	  /* If computing the number of iterations is expensive, it may be
+	     better not to introduce computations involving it.  */
+	  || expression_expensive_p (niter))
 	continue;
 
       /* Ensure that it is possible to insert new statements somewhere.  */
@@ -2757,17 +2780,12 @@ scev_const_prop (void)
 	      && !INTEGRAL_TYPE_P (TREE_TYPE (def)))
 	    continue;
 
-	  def = analyze_scalar_evolution_in_loop (ex_loop, loop, def);
+	  def = analyze_scalar_evolution_in_loop (ex_loop, loop, def, NULL);
 	  def = compute_overall_effect_of_inner_loop (ex_loop, def);
 	  if (!tree_does_not_contain_chrecs (def)
 	      || chrec_contains_symbols_defined_in_loop (def, ex_loop->num))
 	    continue;
 
-	  /* If computing the expression is expensive, let it remain in the
-	     loop.  */
-	  if (expression_expensive_p (def))
-	    continue;
-
 	  /* Eliminate the phi node and replace it by a computation outside
 	     the loop.  */
 	  def = unshare_expr (def);
Index: gcc/tree-scalar-evolution.h
===================================================================
--- gcc/tree-scalar-evolution.h	(revision 112546)
+++ gcc/tree-scalar-evolution.h	(working copy)
@@ -32,7 +32,19 @@ extern tree analyze_scalar_evolution (st
 extern tree instantiate_parameters (struct loop *, tree);
 extern void gather_stats_on_scev_database (void);
 extern void scev_analysis (void);
-extern bool simple_iv (struct loop *, tree, tree, tree *, tree *, bool);
 void scev_const_prop (void);
 
+/* Affine iv.  */
+
+typedef struct
+{
+  /* Iv = BASE + STEP * i.  */
+  tree base, step;
+
+  /* True if this iv does not overflow.  */
+  bool no_overflow;
+} affine_iv;
+
+extern bool simple_iv (struct loop *, tree, tree, affine_iv *, bool);
+
 #endif  /* GCC_TREE_SCALAR_EVOLUTION_H  */
Index: gcc/tree-chrec.c
===================================================================
--- gcc/tree-chrec.c	(revision 112546)
+++ gcc/tree-chrec.c	(working copy)
@@ -533,10 +533,9 @@ chrec_apply (unsigned var,
       /* When the symbols are defined in an outer loop, it is possible
 	 to symbolically compute the apply, since the symbols are
 	 constants with respect to the varying loop.  */
-      || chrec_contains_symbols_defined_in_loop (chrec, var)
-      || chrec_contains_symbols (x))
+      || chrec_contains_symbols_defined_in_loop (chrec, var))
     return chrec_dont_know;
-  
+ 
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "(chrec_apply \n");
 
@@ -546,14 +545,10 @@ chrec_apply (unsigned var,
   if (evolution_function_is_affine_p (chrec))
     {
       /* "{a, +, b} (x)"  ->  "a + b*x".  */
-      if (TREE_CODE (CHREC_LEFT (chrec)) == INTEGER_CST
-	  && integer_zerop (CHREC_LEFT (chrec)))
-	res = chrec_fold_multiply (type, CHREC_RIGHT (chrec), x);
-      
-      else
-	res = chrec_fold_plus (type, CHREC_LEFT (chrec), 
-			       chrec_fold_multiply (type, 
-						    CHREC_RIGHT (chrec), x));
+      x = chrec_convert (type, x, NULL_TREE);
+      res = chrec_fold_multiply (type, CHREC_RIGHT (chrec), x);
+      if (!integer_zerop (CHREC_LEFT (chrec)))
+	res = chrec_fold_plus (type, CHREC_LEFT (chrec), res);
     }
   
   else if (TREE_CODE (chrec) != POLYNOMIAL_CHREC)
@@ -563,7 +558,6 @@ chrec_apply (unsigned var,
 	   && tree_int_cst_sgn (x) == 1)
     /* testsuite/.../ssa-chrec-38.c.  */
     res = chrec_evaluate (var, chrec, x, 0);
-
   else
     res = chrec_dont_know;
   
Index: gcc/testsuite/gcc.dg/tree-ssa/pr19210-1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr19210-1.c	(revision 112546)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr19210-1.c	(working copy)
@@ -12,9 +12,18 @@ f (unsigned n)
   for(k = 0;k <= n;k += 4) /* { dg-warning "cannot optimize.*overflow" } */
     g();
 
-  for(k = 5;k <= n;k += 5) /* { dg-warning "cannot optimize.*overflow" } */
+  /* We used to get warning for this loop.  However, since then # of iterations
+     analysis improved, and we can now prove that this loop does not verflow.
+     This is because the only case when it would overflow is if n = ~0 (since
+     ~0 is divisible by 5), and this cannot be the case, since when we got
+     here, the previous loop exited, thus there exists k > n.  */
+  for(k = 5;k <= n;k += 5)
     g();
 
+  /* So we need the following loop, instead.  */
+  for(k = 4;k <= n;k += 5) /* { dg-warning "cannot optimize.*overflow" } */
+    g();
+  
   for(k = 15;k >= n;k--) /* { dg-warning "cannot optimize.*infinite" } */
     g();
 }
Index: gcc/testsuite/gcc.dg/tree-ssa/pr19210-2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr19210-2.c	(revision 112546)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr19210-2.c	(working copy)
@@ -12,7 +12,15 @@ f (unsigned n)
   for(k = 5;k <= n;k += 4) /* { dg-warning "assuming.*not overflow" } */
     g();
 
-  for(k = 5;k <= n;k += 5) /* { dg-warning "assuming.*not overflow" } */
+  /* We used to get warning for this loop.  However, since then # of iterations
+     analysis improved, and we can now prove that this loop does not verflow.
+     This is because the only case when it would overflow is if n = ~0 (since
+     ~0 is divisible by 5), and this cannot be the case, since when we got
+     here, the previous loop exited, thus there exists k > n.  */
+  for(k = 5;k <= n;k += 5)
+    g();
+
+  for(k = 4;k <= n;k += 5) /* { dg-warning "assuming.*not overflow" } */
     g();
 
   for(k = 15;k >= n;k--) /* { dg-warning "assuming.*not infinite" } */
Index: gcc/tree-ssa-loop-ivopts.c
===================================================================
--- gcc/tree-ssa-loop-ivopts.c	(revision 112546)
+++ gcc/tree-ssa-loop-ivopts.c	(working copy)
@@ -881,18 +881,16 @@ static tree
 determine_biv_step (tree phi)
 {
   struct loop *loop = bb_for_stmt (phi)->loop_father;
-  tree name = PHI_RESULT (phi), base, step;
+  tree name = PHI_RESULT (phi);
+  affine_iv iv;
 
   if (!is_gimple_reg (name))
     return NULL_TREE;
 
-  if (!simple_iv (loop, phi, name, &base, &step, true))
+  if (!simple_iv (loop, phi, name, &iv, true))
     return NULL_TREE;
 
-  if (zero_p (step))
-    return NULL_TREE;
-
-  return step;
+  return (zero_p (iv.step) ? NULL_TREE : iv.step);
 }
 
 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
@@ -1044,17 +1042,16 @@ mark_bivs (struct ivopts_data *data)
 }
 
 /* Checks whether STMT defines a linear induction variable and stores its
-   parameters to BASE and STEP.  */
+   parameters to IV.  */
 
 static bool
-find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
-			tree *base, tree *step)
+find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 {
   tree lhs;
   struct loop *loop = data->current_loop;
 
-  *base = NULL_TREE;
-  *step = NULL_TREE;
+  iv->base = NULL_TREE;
+  iv->step = NULL_TREE;
 
   if (TREE_CODE (stmt) != MODIFY_EXPR)
     return false;
@@ -1063,12 +1060,12 @@ find_givs_in_stmt_scev (struct ivopts_da
   if (TREE_CODE (lhs) != SSA_NAME)
     return false;
 
-  if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
+  if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
     return false;
-  *base = expand_simple_operations (*base);
+  iv->base = expand_simple_operations (iv->base);
 
-  if (contains_abnormal_ssa_name_p (*base)
-      || contains_abnormal_ssa_name_p (*step))
+  if (contains_abnormal_ssa_name_p (iv->base)
+      || contains_abnormal_ssa_name_p (iv->step))
     return false;
 
   return true;
@@ -1079,12 +1076,12 @@ find_givs_in_stmt_scev (struct ivopts_da
 static void
 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 {
-  tree base, step;
+  affine_iv iv;
 
-  if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
+  if (!find_givs_in_stmt_scev (data, stmt, &iv))
     return;
 
-  set_iv (data, TREE_OPERAND (stmt, 0), base, step);
+  set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
 }
 
 /* Finds general ivs in basic block BB.  */
Index: gcc/testsuite/gcc.dg/tree-ssa/loop-15.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/loop-15.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/loop-15.c	(revision 0)
@@ -0,0 +1,27 @@
+/* A test for # of iterations analysis (signed counter cannot wrap) and final
+   value replacement.  */
+
+/* { dg-options "-O2 -fdump-tree-vars" } */
+
+int foo(void);
+
+int bla(void)
+{
+  int i, n = foo (), j;
+
+  j = 0;
+  /* The loop should be removed completely.  */
+  for (i = 1; i <= n; i++)
+    j += n;
+
+  /* Should be replaced with return n * n;  */
+  return j;
+}
+
+/* Since the loop is removed, there should be no addition.  */
+/* { dg-final { scan-tree-dump-times "\\+" 0 "vars" } } */
+/* { dg-final { scan-tree-dump-times "n \\* n" 1 "vars" } } */
+
+/* The if from the loop header copying remains in the code.  */
+/* { dg-final { scan-tree-dump-times "if " 1 "vars" } } */
+/* { dg-final { cleanup-tree-dump "vars" } } */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]