This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: Serious performance regression -- some tree optimizer questions

From: Zdenek Dvorak <rakdver at atrey dot karlin dot mff dot cuni dot cz>
To: Ulrich Weigand <Ulrich dot Weigand at de dot ibm dot com>
Cc: Zdenek Dvorak <dvorakz at suse dot de>, gcc at gcc dot gnu dot org,Michael Matz <matz at suse dot de>
Date: Mon, 27 Dec 2004 02:38:15 +0100
Subject: Re: Serious performance regression -- some tree optimizer questions
References: <20041221162711.GA5647@atrey.karlin.mff.cuni.cz> <OF7960A3ED.3BEA3C29-ON41256F71.00815667-41256F71.008163B7@de.ibm.com>
Hello,

> > > It would appear to be correct to set the CHREC_NO_OVERFLOW flag
> > > on the result of chrec_fold_plus, say, if the inputs are either
> > > invariant or themselves cannot overflow, and the current data type
> > > is also such that the operation to be folded cannot overflow.
> >
> > yes, I plan to do this now.

here is the patch.

Zdenek

Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.1437
diff -c -3 -p -r1.1437 Makefile.in
*** Makefile.in	22 Dec 2004 17:22:59 -0000	1.1437
--- Makefile.in	27 Dec 2004 01:35:10 -0000
*************** tree-browser.o : tree-browser.c tree-bro
*** 1757,1763 ****
     $(TREE_H) errors.h tree-inline.h diagnostic.h $(HASHTAB_H) \
     $(TM_H) coretypes.h
  tree-chrec.o: tree-chrec.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
!    errors.h $(GGC_H) $(TREE_H) tree-chrec.h tree-pass.h
  tree-scalar-evolution.o: tree-scalar-evolution.c $(CONFIG_H) $(SYSTEM_H) \
     coretypes.h $(TM_H) errors.h $(GGC_H) $(TREE_H) $(RTL_H) \
     $(BASIC_BLOCK_H) diagnostic.h $(TREE_FLOW_H) $(TREE_DUMP_H) \
--- 1757,1763 ----
     $(TREE_H) errors.h tree-inline.h diagnostic.h $(HASHTAB_H) \
     $(TM_H) coretypes.h
  tree-chrec.o: tree-chrec.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
!    errors.h $(GGC_H) $(TREE_H) tree-chrec.h tree-pass.h $(FLAGS_H)
  tree-scalar-evolution.o: tree-scalar-evolution.c $(CONFIG_H) $(SYSTEM_H) \
     coretypes.h $(TM_H) errors.h $(GGC_H) $(TREE_H) $(RTL_H) \
     $(BASIC_BLOCK_H) diagnostic.h $(TREE_FLOW_H) $(TREE_DUMP_H) \
Index: cfgloop.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cfgloop.h,v
retrieving revision 1.40
diff -c -3 -p -r1.40 cfgloop.h
*** cfgloop.h	15 Dec 2004 21:18:42 -0000	1.40
--- cfgloop.h	27 Dec 2004 01:35:10 -0000
*************** struct nb_iter_bound
*** 50,58 ****
    tree bound;		/* The expression whose value is an upper bound on the
  			   number of executions of anything after ...  */
    tree at_stmt;		/* ... this statement during one execution of loop.  */
-   tree additional;	/* A conjunction of conditions the operands of BOUND
- 			   satisfy.  The additional information about the value
- 			   of the bound may be derived from it.  */
    struct nb_iter_bound *next;
  			/* The next bound in a list.  */
  };
--- 50,55 ----
*************** enum
*** 478,484 ****
  extern void unroll_and_peel_loops (struct loops *, int);
  extern void doloop_optimize_loops (struct loops *);
  extern void move_loop_invariants (struct loops *);
! extern void record_estimate (struct loop *, tree, tree, tree);
  
  /* Old loop optimizer interface.  */
  
--- 475,481 ----
  extern void unroll_and_peel_loops (struct loops *, int);
  extern void doloop_optimize_loops (struct loops *);
  extern void move_loop_invariants (struct loops *);
! extern void record_estimate (struct loop *, tree, tree);
  
  /* Old loop optimizer interface.  */
  
Index: tree-chrec.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-chrec.c,v
retrieving revision 2.12
diff -c -3 -p -r2.12 tree-chrec.c
*** tree-chrec.c	9 Dec 2004 16:17:00 -0000	2.12
--- tree-chrec.c	27 Dec 2004 01:35:10 -0000
*************** Software Foundation, 59 Temple Place - S
*** 35,40 ****
--- 35,41 ----
  #include "varray.h"
  #include "tree-chrec.h"
  #include "tree-pass.h"
+ #include "flags.h"
  
  
  
*************** chrec_fold_poly_cst (enum tree_code code
*** 86,161 ****
      }
  }
  
- /* Fold the addition of two polynomial functions.  */
- 
- static inline tree 
- chrec_fold_plus_poly_poly (enum tree_code code, 
- 			   tree type, 
- 			   tree poly0, 
- 			   tree poly1)
- {
-   tree left, right;
- 
-   gcc_assert (poly0);
-   gcc_assert (poly1);
-   gcc_assert (TREE_CODE (poly0) == POLYNOMIAL_CHREC);
-   gcc_assert (TREE_CODE (poly1) == POLYNOMIAL_CHREC);
-   
-   /*
-     {a, +, b}_1 + {c, +, d}_2  ->  {{a, +, b}_1 + c, +, d}_2,
-     {a, +, b}_2 + {c, +, d}_1  ->  {{c, +, d}_1 + a, +, b}_2,
-     {a, +, b}_x + {c, +, d}_x  ->  {a+c, +, b+d}_x.  */
-   if (CHREC_VARIABLE (poly0) < CHREC_VARIABLE (poly1))
-     {
-       if (code == PLUS_EXPR)
- 	return build_polynomial_chrec 
- 	  (CHREC_VARIABLE (poly1), 
- 	   chrec_fold_plus (type, poly0, CHREC_LEFT (poly1)),
- 	   CHREC_RIGHT (poly1));
-       else
- 	return build_polynomial_chrec 
- 	  (CHREC_VARIABLE (poly1), 
- 	   chrec_fold_minus (type, poly0, CHREC_LEFT (poly1)),
- 	   chrec_fold_multiply (type, CHREC_RIGHT (poly1), 
- 				build_int_cst_type (type, -1)));
-     }
-   
-   if (CHREC_VARIABLE (poly0) > CHREC_VARIABLE (poly1))
-     {
-       if (code == PLUS_EXPR)
- 	return build_polynomial_chrec 
- 	  (CHREC_VARIABLE (poly0), 
- 	   chrec_fold_plus (type, CHREC_LEFT (poly0), poly1),
- 	   CHREC_RIGHT (poly0));
-       else
- 	return build_polynomial_chrec 
- 	  (CHREC_VARIABLE (poly0), 
- 	   chrec_fold_minus (type, CHREC_LEFT (poly0), poly1),
- 	   CHREC_RIGHT (poly0));
-     }
-   
-   if (code == PLUS_EXPR)
-     {
-       left = chrec_fold_plus 
- 	(type, CHREC_LEFT (poly0), CHREC_LEFT (poly1));
-       right = chrec_fold_plus 
- 	(type, CHREC_RIGHT (poly0), CHREC_RIGHT (poly1));
-     }
-   else
-     {
-       left = chrec_fold_minus 
- 	(type, CHREC_LEFT (poly0), CHREC_LEFT (poly1));
-       right = chrec_fold_minus 
- 	(type, CHREC_RIGHT (poly0), CHREC_RIGHT (poly1));
-     }
- 
-   if (chrec_zerop (right))
-     return left;
-   else
-     return build_polynomial_chrec 
-       (CHREC_VARIABLE (poly0), left, right); 
- }
- 
  
  
  /* Fold the multiplication of two polynomial functions.  */
--- 87,92 ----
*************** chrec_fold_plus_1 (enum tree_code code, 
*** 244,298 ****
  		   tree op0,
  		   tree op1)
  {
    if (automatically_generated_chrec_p (op0)
        || automatically_generated_chrec_p (op1))
      return chrec_fold_automatically_generated_operands (op0, op1);
!   
!   switch (TREE_CODE (op0))
      {
!     case POLYNOMIAL_CHREC:
!       switch (TREE_CODE (op1))
  	{
! 	case POLYNOMIAL_CHREC:
! 	  return chrec_fold_plus_poly_poly (code, type, op0, op1);
! 
! 	default:
! 	  if (code == PLUS_EXPR)
! 	    return build_polynomial_chrec 
! 	      (CHREC_VARIABLE (op0), 
! 	       chrec_fold_plus (type, CHREC_LEFT (op0), op1),
! 	       CHREC_RIGHT (op0));
! 	  else
! 	    return build_polynomial_chrec 
! 	      (CHREC_VARIABLE (op0), 
! 	       chrec_fold_minus (type, CHREC_LEFT (op0), op1),
! 	       CHREC_RIGHT (op0));
  	}
! 
!     default:
!       switch (TREE_CODE (op1))
  	{
! 	case POLYNOMIAL_CHREC:
! 	  if (code == PLUS_EXPR)
! 	    return build_polynomial_chrec 
! 	      (CHREC_VARIABLE (op1), 
! 	       chrec_fold_plus (type, op0, CHREC_LEFT (op1)),
! 	       CHREC_RIGHT (op1));
! 	  else
! 	    return build_polynomial_chrec 
! 	      (CHREC_VARIABLE (op1), 
! 	       chrec_fold_minus (type, op0, CHREC_LEFT (op1)),
! 	       chrec_fold_multiply (type, CHREC_RIGHT (op1),
! 				    build_int_cst_type (type, -1)));
  
! 	default:
! 	  if (tree_contains_chrecs (op0)
! 	      || tree_contains_chrecs (op1))
! 	    return build (code, type, op0, op1);
! 	  else
! 	    return fold (build (code, type, op0, op1));
  	}
      }
  }
  
  /* Fold the addition of two chrecs.  */
--- 175,266 ----
  		   tree op0,
  		   tree op1)
  {
+   bool no_overflow;
+   int loop0, loop1;
+   tree chrec, left, right;
+ 
    if (automatically_generated_chrec_p (op0)
        || automatically_generated_chrec_p (op1))
      return chrec_fold_automatically_generated_operands (op0, op1);
!  
!   if (TREE_CODE (op0) != POLYNOMIAL_CHREC
!       && TREE_CODE (op1) != POLYNOMIAL_CHREC)
!     return fold (build (code, type, op0, op1));
! 
!   if (TREE_CODE (op0) == POLYNOMIAL_CHREC)
!     loop0 = CHREC_VARIABLE (op0);
!   else
!     loop0 = -1;
! 
!   if (TREE_CODE (op1) == POLYNOMIAL_CHREC)
!     loop1 = CHREC_VARIABLE (op1);
!   else
!     loop1 = -1;
! 
!   if (loop0 == loop1)
      {
!       /* Both chrecs belong to the same loop?  Then just sum the
! 	 coefficients.  */
!       no_overflow = (TYPE_ARITH_NO_OVERFLOW (type)
! 		     && CHREC_NO_OVERFLOW (op0)
! 		     && CHREC_NO_OVERFLOW (op1));
!   
!       if (code == PLUS_EXPR)
  	{
! 	  left = chrec_fold_plus 
! 		  (type, CHREC_LEFT (op0), CHREC_LEFT (op1));
! 	  right = chrec_fold_plus 
! 		  (type, CHREC_RIGHT (op0), CHREC_RIGHT (op1));
  	}
!       else
  	{
! 	  left = chrec_fold_minus 
! 		  (type, CHREC_LEFT (op0), CHREC_LEFT (op1));
! 	  right = chrec_fold_minus 
! 		  (type, CHREC_RIGHT (op0), CHREC_RIGHT (op1));
! 	}
!     }
!   else if (loop0 < loop1)
!     {
!       no_overflow = (TYPE_ARITH_NO_OVERFLOW (type)
! 		     && CHREC_NO_OVERFLOW (op1));
  
!       if (code == PLUS_EXPR)
! 	{
! 	  left = chrec_fold_plus (type, op0, CHREC_LEFT (op1));
! 	  right = CHREC_RIGHT (op1);
! 	}
!       else
! 	{
! 	  left = chrec_fold_minus (type, op0, CHREC_LEFT (op1));
! 	  right = chrec_fold_multiply (type, CHREC_RIGHT (op1),
! 				       build_int_cst_type (type, -1));
  	}
      }
+   else
+     {
+       /* loop0 > loop1 */
+       no_overflow = (TYPE_ARITH_NO_OVERFLOW (type)
+ 		     && CHREC_NO_OVERFLOW (op0));
+ 
+       if (code == PLUS_EXPR)
+ 	left = chrec_fold_plus (type, CHREC_LEFT (op0), op1);
+       else
+ 	left = chrec_fold_minus (type, CHREC_LEFT (op0), op1);
+ 
+       right = CHREC_RIGHT (op0);
+     }
+ 
+ 
+   if (chrec_zerop (right))
+     return left;
+ 
+   chrec = build_polynomial_chrec (CHREC_VARIABLE (op0), left, right); 
+ 
+   if (no_overflow)
+     CHREC_NO_OVERFLOW (chrec) = true;
+ 
+   return chrec;
  }
  
  /* Fold the addition of two chrecs.  */
*************** chrec_fold_multiply (tree type, 
*** 330,335 ****
--- 298,306 ----
  		     tree op0,
  		     tree op1)
  {
+   tree chrec;
+   bool no_overflow;
+ 
    if (automatically_generated_chrec_p (op0)
        || automatically_generated_chrec_p (op1))
      return chrec_fold_automatically_generated_operands (op0, op1);
*************** chrec_fold_multiply (tree type, 
*** 348,357 ****
  	  if (integer_zerop (op1))
  	    return build_int_cst_type (type, 0);
  	  
! 	  return build_polynomial_chrec 
! 	    (CHREC_VARIABLE (op0), 
! 	     chrec_fold_multiply (type, CHREC_LEFT (op0), op1),
! 	     chrec_fold_multiply (type, CHREC_RIGHT (op0), op1));
  	}
        
      default:
--- 319,334 ----
  	  if (integer_zerop (op1))
  	    return build_int_cst_type (type, 0);
  	  
! 	  no_overflow = (TYPE_ARITH_NO_OVERFLOW (type)
! 			 && CHREC_NO_OVERFLOW (op0));
! 
! 	  chrec = build_polynomial_chrec 
! 		  (CHREC_VARIABLE (op0), 
! 		   chrec_fold_multiply (type, CHREC_LEFT (op0), op1),
! 		   chrec_fold_multiply (type, CHREC_RIGHT (op0), op1));
! 	  CHREC_NO_OVERFLOW (chrec) = true;
! 
! 	  return chrec;
  	}
        
      default:
*************** chrec_fold_multiply (tree type, 
*** 364,373 ****
        switch (TREE_CODE (op1))
  	{
  	case POLYNOMIAL_CHREC:
! 	  return build_polynomial_chrec 
! 	    (CHREC_VARIABLE (op1), 
! 	     chrec_fold_multiply (type, CHREC_LEFT (op1), op0),
! 	     chrec_fold_multiply (type, CHREC_RIGHT (op1), op0));
  	  
  	default:
  	  if (integer_onep (op1))
--- 341,356 ----
        switch (TREE_CODE (op1))
  	{
  	case POLYNOMIAL_CHREC:
! 	  no_overflow = (TYPE_ARITH_NO_OVERFLOW (type)
! 			 && CHREC_NO_OVERFLOW (op1));
! 
! 	  chrec = build_polynomial_chrec 
! 		  (CHREC_VARIABLE (op1), 
! 		   chrec_fold_multiply (type, CHREC_LEFT (op1), op0),
! 		   chrec_fold_multiply (type, CHREC_RIGHT (op1), op0));
! 	  CHREC_NO_OVERFLOW (chrec) = true;
! 
! 	  return chrec;
  	  
  	default:
  	  if (integer_onep (op1))
Index: tree-data-ref.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-data-ref.c,v
retrieving revision 2.18
diff -c -3 -p -r2.18 tree-data-ref.c
*** tree-data-ref.c	10 Dec 2004 17:51:43 -0000	2.18
--- tree-data-ref.c	27 Dec 2004 01:35:10 -0000
*************** estimate_niter_from_size_of_data (struct
*** 529,535 ****
  				 fold (build2 (MINUS_EXPR, integer_type_node, 
  					       data_size, init)), step));
  
!       record_estimate (loop, estimation, boolean_true_node, stmt);
      }
  }
  
--- 529,535 ----
  				 fold (build2 (MINUS_EXPR, integer_type_node, 
  					       data_size, init)), step));
  
!       record_estimate (loop, estimation, stmt);
      }
  }
  
Index: tree-flow.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-flow.h,v
retrieving revision 2.75
diff -c -3 -p -r2.75 tree-flow.h
*** tree-flow.h	10 Dec 2004 21:54:41 -0000	2.75
--- tree-flow.h	27 Dec 2004 01:35:10 -0000
*************** struct tree_niter_desc
*** 630,647 ****
  			   a loop (provided that assumptions == true and
  			   may_be_zero == false), more precisely the number
  			   of executions of the latch of the loop.  */
-   tree additional_info;	/* The boolean expression.  Sometimes we use additional
- 			   knowledge to simplify the other expressions
- 			   contained in this structure (for example the
- 			   knowledge about value ranges of operands on entry to
- 			   the loop).  If this is a case, conjunction of such
- 			   condition is stored in this field, so that we do not
- 			   lose the information: for example if may_be_zero
- 			   is (n <= 0) and niter is (unsigned) n, we know
- 			   that the number of iterations is at most
- 			   MAX_SIGNED_INT.  However if the (n <= 0) assumption
- 			   is eliminated (by looking at the guard on entry of
- 			   the loop), then the information would be lost.  */
  };
  
  /* In tree-vectorizer.c */
--- 630,635 ----
*************** void canonicalize_induction_variables (s
*** 658,671 ****
  void tree_unroll_loops_completely (struct loops *);
  void tree_ssa_iv_optimize (struct loops *);
  
! void number_of_iterations_cond (tree, tree, tree, enum tree_code, tree, tree,
  				struct tree_niter_desc *);
  bool number_of_iterations_exit (struct loop *, edge,
  				struct tree_niter_desc *niter);
  tree loop_niter_by_eval (struct loop *, edge);
  tree find_loop_niter_by_eval (struct loop *, edge *);
  void estimate_numbers_of_iterations (struct loops *);
! tree can_count_iv_in_wider_type (struct loop *, tree, tree, tree, tree);
  void free_numbers_of_iterations_estimates (struct loops *);
  void rewrite_into_loop_closed_ssa (void);
  void verify_loop_closed_ssa (void);
--- 646,660 ----
  void tree_unroll_loops_completely (struct loops *);
  void tree_ssa_iv_optimize (struct loops *);
  
! void number_of_iterations_cond (tree, tree, tree, bool,
! 				enum tree_code, tree, tree, bool,
  				struct tree_niter_desc *);
  bool number_of_iterations_exit (struct loop *, edge,
  				struct tree_niter_desc *niter);
  tree loop_niter_by_eval (struct loop *, edge);
  tree find_loop_niter_by_eval (struct loop *, edge *);
  void estimate_numbers_of_iterations (struct loops *);
! tree can_count_iv_in_wider_type (struct loop *, tree, tree, tree, tree, bool);
  void free_numbers_of_iterations_estimates (struct loops *);
  void rewrite_into_loop_closed_ssa (void);
  void verify_loop_closed_ssa (void);
Index: tree-scalar-evolution.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-scalar-evolution.c,v
retrieving revision 2.13
diff -c -3 -p -r2.13 tree-scalar-evolution.c
*** tree-scalar-evolution.c	12 Nov 2004 13:28:16 -0000	2.13
--- tree-scalar-evolution.c	27 Dec 2004 01:35:10 -0000
*************** find_var_scev_info (tree var)
*** 355,364 ****
  tree
  count_ev_in_wider_type (tree type, tree chrec)
  {
!   tree base, step;
    struct loop *loop;
  
!   if (!evolution_function_is_affine_p (chrec))
      return fold_convert (type, chrec);
  
    base = CHREC_LEFT (chrec);
--- 355,364 ----
  tree
  count_ev_in_wider_type (tree type, tree chrec)
  {
!   tree base, step, new_chrec;
    struct loop *loop;
  
!   if (TREE_CODE (chrec) != POLYNOMIAL_CHREC)
      return fold_convert (type, chrec);
  
    base = CHREC_LEFT (chrec);
*************** count_ev_in_wider_type (tree type, tree 
*** 368,380 ****
    /* TODO -- if we knew the statement at that the conversion occurs,
       we could pass it to can_count_iv_in_wider_type and get a better
       result.  */
!   step = can_count_iv_in_wider_type (loop, type, base, step, NULL_TREE);
    if (!step)
      return fold_convert (type, chrec);
    base = chrec_convert (type, base);
  
!   return build_polynomial_chrec (CHREC_VARIABLE (chrec),
! 				 base, step);
  }
  
  /* Return true when CHREC contains symbolic names defined in
--- 368,386 ----
    /* TODO -- if we knew the statement at that the conversion occurs,
       we could pass it to can_count_iv_in_wider_type and get a better
       result.  */
!   step = can_count_iv_in_wider_type (loop, type, base, step, NULL_TREE,
! 				     CHREC_NO_OVERFLOW (chrec));
    if (!step)
      return fold_convert (type, chrec);
+ 
    base = chrec_convert (type, base);
  
!   new_chrec = build_polynomial_chrec (CHREC_VARIABLE (chrec),
! 				      base, step);
!   /* If we got this far, we also know that the chrec cannot overflow.  */
!   CHREC_NO_OVERFLOW (new_chrec) = true;
! 
!   return new_chrec;
  }
  
  /* Return true when CHREC contains symbolic names defined in
*************** get_scalar_evolution (tree scalar)
*** 682,694 ****
     
     When CHREC_BEFORE has an evolution part in LOOP_NB, add to this
     evolution the expression TO_ADD, otherwise construct an evolution
!    part for this loop.  */
  
  static tree
  add_to_evolution_1 (unsigned loop_nb, 
  		    tree chrec_before, 
! 		    tree to_add)
  {
    switch (TREE_CODE (chrec_before))
      {
      case POLYNOMIAL_CHREC:
--- 688,706 ----
     
     When CHREC_BEFORE has an evolution part in LOOP_NB, add to this
     evolution the expression TO_ADD, otherwise construct an evolution
!    part for this loop.
!    
!    CANNOT_OVERFLOW is true if we are sure that the operation cannot
!    overflow.  */
  
  static tree
  add_to_evolution_1 (unsigned loop_nb, 
  		    tree chrec_before, 
! 		    tree to_add,
! 		    bool cannot_overflow)
  {
+   tree chrec;
+ 
    switch (TREE_CODE (chrec_before))
      {
      case POLYNOMIAL_CHREC:
*************** add_to_evolution_1 (unsigned loop_nb, 
*** 704,709 ****
--- 716,722 ----
  	      var = loop_nb;
  	      left = chrec_before;
  	      right = build_int_cst (type, 0);
+ 	      chrec_before = NULL;
  	    }
  	  else
  	    {
*************** add_to_evolution_1 (unsigned loop_nb, 
*** 712,733 ****
  	      right = CHREC_RIGHT (chrec_before);
  	    }
  
! 	  return build_polynomial_chrec 
! 	    (var, left, chrec_fold_plus (type, right, to_add));
  	}
        else
  	/* Search the evolution in LOOP_NB.  */
  	return build_polynomial_chrec 
  	  (CHREC_VARIABLE (chrec_before),
! 	   add_to_evolution_1 (loop_nb, CHREC_LEFT (chrec_before), to_add),
  	   CHREC_RIGHT (chrec_before));
        
      default:
        /* These nodes do not depend on a loop.  */
        if (chrec_before == chrec_dont_know)
  	return chrec_dont_know;
!       return build_polynomial_chrec (loop_nb, chrec_before, to_add);
      }
  }
  
  /* Add TO_ADD to the evolution part of CHREC_BEFORE in the dimension
--- 725,762 ----
  	      right = CHREC_RIGHT (chrec_before);
  	    }
  
! 	  chrec = build_polynomial_chrec (var, left,
! 					  chrec_fold_plus (type,
! 							   right, to_add));
  	}
        else
  	/* Search the evolution in LOOP_NB.  */
  	return build_polynomial_chrec 
  	  (CHREC_VARIABLE (chrec_before),
! 	   add_to_evolution_1 (loop_nb, CHREC_LEFT (chrec_before), to_add,
! 			       cannot_overflow),
  	   CHREC_RIGHT (chrec_before));
        
      default:
        /* These nodes do not depend on a loop.  */
        if (chrec_before == chrec_dont_know)
  	return chrec_dont_know;
!       chrec = build_polynomial_chrec (loop_nb, chrec_before, to_add);
!       chrec_before = NULL;
!       break;
      }
+ 
+   if (!cannot_overflow)
+     return chrec;
+ 
+   /* If we know that the current operation cannot overflow, and that the chrec
+      either could not overflow in other operations, or there were no other
+      operations before, we know that the chrec cannot overflow now.  */
+   if (!chrec_before
+       || CHREC_NO_OVERFLOW (chrec_before))
+     CHREC_NO_OVERFLOW (chrec) = true;
+ 
+   return chrec;
  }
  
  /* Add TO_ADD to the evolution part of CHREC_BEFORE in the dimension
*************** static tree 
*** 868,874 ****
  add_to_evolution (unsigned loop_nb, 
  		  tree chrec_before,
  		  enum tree_code code,
! 		  tree to_add)
  {
    tree type = chrec_type (to_add);
    tree res = NULL_TREE;
--- 897,904 ----
  add_to_evolution (unsigned loop_nb, 
  		  tree chrec_before,
  		  enum tree_code code,
! 		  tree to_add,
! 		  bool cannot_overflow)
  {
    tree type = chrec_type (to_add);
    tree res = NULL_TREE;
*************** add_to_evolution (unsigned loop_nb, 
*** 897,903 ****
      to_add = chrec_fold_multiply (type, to_add, 
  				  build_int_cst_type (type, -1));
  
!   res = add_to_evolution_1 (loop_nb, chrec_before, to_add);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
--- 927,933 ----
      to_add = chrec_fold_multiply (type, to_add, 
  				  build_int_cst_type (type, -1));
  
!   res = add_to_evolution_1 (loop_nb, chrec_before, to_add, cannot_overflow);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1059,1064 ****
--- 1089,1095 ----
    bool res = false;
    tree rhs0, rhs1;
    tree type_rhs = TREE_TYPE (rhs);
+   bool cannot_overflow = false;
    
    /* The RHS is one of the following cases:
       - an SSA_NAME, 
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1088,1093 ****
--- 1119,1127 ----
        break;
        
      case PLUS_EXPR:
+       if (TYPE_ARITH_NO_OVERFLOW (type_rhs))
+ 	cannot_overflow = true;
+ 
        /* This case is under the form "rhs0 + rhs1".  */
        rhs0 = TREE_OPERAND (rhs, 0);
        rhs1 = TREE_OPERAND (rhs, 1);
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1108,1114 ****
  		*evolution_of_loop = add_to_evolution 
  		  (loop->num, 
  		   chrec_convert (type_rhs, *evolution_of_loop), 
! 		   PLUS_EXPR, rhs1);
  	      
  	      else
  		{
--- 1142,1148 ----
  		*evolution_of_loop = add_to_evolution 
  		  (loop->num, 
  		   chrec_convert (type_rhs, *evolution_of_loop), 
! 		   PLUS_EXPR, rhs1, cannot_overflow);
  	      
  	      else
  		{
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1120,1126 ****
  		    *evolution_of_loop = add_to_evolution 
  		      (loop->num, 
  		       chrec_convert (type_rhs, *evolution_of_loop), 
! 		       PLUS_EXPR, rhs0);
  		}
  	    }
  	  
--- 1154,1160 ----
  		    *evolution_of_loop = add_to_evolution 
  		      (loop->num, 
  		       chrec_convert (type_rhs, *evolution_of_loop), 
! 		       PLUS_EXPR, rhs0, cannot_overflow);
  		}
  	    }
  	  
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1134,1140 ****
  	      if (res)
  		*evolution_of_loop = add_to_evolution 
  		  (loop->num, chrec_convert (type_rhs, *evolution_of_loop), 
! 		   PLUS_EXPR, rhs1);
  	    }
  	}
        
--- 1168,1174 ----
  	      if (res)
  		*evolution_of_loop = add_to_evolution 
  		  (loop->num, chrec_convert (type_rhs, *evolution_of_loop), 
! 		   PLUS_EXPR, rhs1, cannot_overflow);
  	    }
  	}
        
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1148,1154 ****
  	  if (res)
  	    *evolution_of_loop = add_to_evolution 
  	      (loop->num, chrec_convert (type_rhs, *evolution_of_loop), 
! 	       PLUS_EXPR, rhs0);
  	}
  
        else
--- 1182,1188 ----
  	  if (res)
  	    *evolution_of_loop = add_to_evolution 
  	      (loop->num, chrec_convert (type_rhs, *evolution_of_loop), 
! 	       PLUS_EXPR, rhs0, cannot_overflow);
  	}
  
        else
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1160,1165 ****
--- 1194,1202 ----
        break;
        
      case MINUS_EXPR:
+       if (TYPE_ARITH_NO_OVERFLOW (type_rhs))
+ 	cannot_overflow = true;
+ 
        /* This case is under the form "opnd0 = rhs0 - rhs1".  */
        rhs0 = TREE_OPERAND (rhs, 0);
        rhs1 = TREE_OPERAND (rhs, 1);
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1175,1181 ****
  	  if (res)
  	    *evolution_of_loop = add_to_evolution 
  		    (loop->num, chrec_convert (type_rhs, *evolution_of_loop), 
! 		     MINUS_EXPR, rhs1);
  	}
        else
  	/* Otherwise, match an assignment under the form: 
--- 1212,1218 ----
  	  if (res)
  	    *evolution_of_loop = add_to_evolution 
  		    (loop->num, chrec_convert (type_rhs, *evolution_of_loop), 
! 		     MINUS_EXPR, rhs1, cannot_overflow);
  	}
        else
  	/* Otherwise, match an assignment under the form: 
*************** instantiate_parameters_1 (struct loop *l
*** 1955,1982 ****
  				      allow_superloop_chrecs);
        op1 = instantiate_parameters_1 (loop, CHREC_RIGHT (chrec),
  				      allow_superloop_chrecs);
!       return build_polynomial_chrec (CHREC_VARIABLE (chrec), op0, op1);
  
      case PLUS_EXPR:
        op0 = instantiate_parameters_1 (loop, TREE_OPERAND (chrec, 0),
  				      allow_superloop_chrecs);
        op1 = instantiate_parameters_1 (loop, TREE_OPERAND (chrec, 1),
  				      allow_superloop_chrecs);
!       return chrec_fold_plus (TREE_TYPE (chrec), op0, op1);
  
      case MINUS_EXPR:
        op0 = instantiate_parameters_1 (loop, TREE_OPERAND (chrec, 0),
  				      allow_superloop_chrecs);
        op1 = instantiate_parameters_1 (loop, TREE_OPERAND (chrec, 1),
  				      allow_superloop_chrecs);
!       return chrec_fold_minus (TREE_TYPE (chrec), op0, op1);
  
      case MULT_EXPR:
        op0 = instantiate_parameters_1 (loop, TREE_OPERAND (chrec, 0),
  				      allow_superloop_chrecs);
        op1 = instantiate_parameters_1 (loop, TREE_OPERAND (chrec, 1),
  				      allow_superloop_chrecs);
!       return chrec_fold_multiply (TREE_TYPE (chrec), op0, op1);
  
      case NOP_EXPR:
      case CONVERT_EXPR:
--- 1992,2031 ----
  				      allow_superloop_chrecs);
        op1 = instantiate_parameters_1 (loop, CHREC_RIGHT (chrec),
  				      allow_superloop_chrecs);
!       if (CHREC_LEFT (chrec) != op0
! 	  || CHREC_RIGHT (chrec) != op1)
! 	chrec = build_polynomial_chrec (CHREC_VARIABLE (chrec), op0, op1);
!       return chrec;
  
      case PLUS_EXPR:
        op0 = instantiate_parameters_1 (loop, TREE_OPERAND (chrec, 0),
  				      allow_superloop_chrecs);
        op1 = instantiate_parameters_1 (loop, TREE_OPERAND (chrec, 1),
  				      allow_superloop_chrecs);
!       if (TREE_OPERAND (chrec, 0) != op0
! 	  || TREE_OPERAND (chrec, 1) != op1)
!       	chrec = chrec_fold_plus (TREE_TYPE (chrec), op0, op1);
!       return chrec;
  
      case MINUS_EXPR:
        op0 = instantiate_parameters_1 (loop, TREE_OPERAND (chrec, 0),
  				      allow_superloop_chrecs);
        op1 = instantiate_parameters_1 (loop, TREE_OPERAND (chrec, 1),
  				      allow_superloop_chrecs);
!       if (TREE_OPERAND (chrec, 0) != op0
! 	  || TREE_OPERAND (chrec, 1) != op1)
!         chrec = chrec_fold_minus (TREE_TYPE (chrec), op0, op1);
!       return chrec;
  
      case MULT_EXPR:
        op0 = instantiate_parameters_1 (loop, TREE_OPERAND (chrec, 0),
  				      allow_superloop_chrecs);
        op1 = instantiate_parameters_1 (loop, TREE_OPERAND (chrec, 1),
  				      allow_superloop_chrecs);
!       if (TREE_OPERAND (chrec, 0) != op0
! 	  || TREE_OPERAND (chrec, 1) != op1)
! 	chrec = chrec_fold_multiply (TREE_TYPE (chrec), op0, op1);
!       return chrec;
  
      case NOP_EXPR:
      case CONVERT_EXPR:
*************** instantiate_parameters_1 (struct loop *l
*** 1986,1991 ****
--- 2035,2043 ----
        if (op0 == chrec_dont_know)
          return chrec_dont_know;
  
+       if (op0 == TREE_OPERAND (chrec, 0))
+ 	return chrec;
+ 
        return chrec_convert (TREE_TYPE (chrec), op0);
  
      case SCEV_NOT_KNOWN:
*************** instantiate_parameters_1 (struct loop *l
*** 2011,2016 ****
--- 2063,2074 ----
  	  || op1 == chrec_dont_know
  	  || op2 == chrec_dont_know)
          return chrec_dont_know;
+ 
+       if (op0 == TREE_OPERAND (chrec, 0)
+ 	  && op1 == TREE_OPERAND (chrec, 1)
+ 	  && op2 == TREE_OPERAND (chrec, 2))
+ 	return chrec;
+ 
        return fold (build (TREE_CODE (chrec),
  			  TREE_TYPE (chrec), op0, op1, op2));
  
*************** instantiate_parameters_1 (struct loop *l
*** 2022,2027 ****
--- 2080,2089 ----
        if (op0 == chrec_dont_know
  	  || op1 == chrec_dont_know)
          return chrec_dont_know;
+ 
+       if (op0 == TREE_OPERAND (chrec, 0)
+ 	  && op1 == TREE_OPERAND (chrec, 1))
+ 	return chrec;
        return fold (build (TREE_CODE (chrec), TREE_TYPE (chrec), op0, op1));
  	    
      case 1:
*************** instantiate_parameters_1 (struct loop *l
*** 2029,2034 ****
--- 2091,2098 ----
  				      allow_superloop_chrecs);
        if (op0 == chrec_dont_know)
          return chrec_dont_know;
+       if (op0 == TREE_OPERAND (chrec, 0))
+ 	return chrec;
        return fold (build1 (TREE_CODE (chrec), TREE_TYPE (chrec), op0));
  
      case 0:
*************** scev_reset (void)
*** 2416,2431 ****
  }
  
  /* Checks whether OP behaves as a simple affine iv of LOOP in STMT and returns
!    its BASE and STEP if possible.  */
  
  bool
! simple_iv (struct loop *loop, tree stmt, tree op, tree *base, tree *step)
  {
    basic_block bb = bb_for_stmt (stmt);
    tree type, ev;
  
    *base = NULL_TREE;
    *step = NULL_TREE;
  
    type = TREE_TYPE (op);
    if (TREE_CODE (type) != INTEGER_TYPE
--- 2480,2498 ----
  }
  
  /* Checks whether OP behaves as a simple affine iv of LOOP in STMT and returns
!    its BASE and STEP if possible.  NO_OVERFLOW is set to true if we know that
!    the induction variable cannot overflow.  */
  
  bool
! simple_iv (struct loop *loop, tree stmt, tree op, tree *base, tree *step,
! 	   bool *no_overflow)
  {
    basic_block bb = bb_for_stmt (stmt);
    tree type, ev;
  
    *base = NULL_TREE;
    *step = NULL_TREE;
+   *no_overflow = false;
  
    type = TREE_TYPE (op);
    if (TREE_CODE (type) != INTEGER_TYPE
*************** simple_iv (struct loop *loop, tree stmt,
*** 2440,2445 ****
--- 2507,2513 ----
        && !chrec_contains_symbols_defined_in_loop (ev, loop->num))
      {
        *base = ev;
+       *no_overflow = true;
        return true;
      }
  
*************** simple_iv (struct loop *loop, tree stmt,
*** 2455,2460 ****
--- 2523,2529 ----
        || chrec_contains_symbols_defined_in_loop (*base, loop->num))
      return false;
  
+   *no_overflow = CHREC_NO_OVERFLOW (ev);
    return true;
  }
  
Index: tree-scalar-evolution.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-scalar-evolution.h,v
retrieving revision 2.3
diff -c -3 -p -r2.3 tree-scalar-evolution.h
*** tree-scalar-evolution.h	17 Nov 2004 22:06:00 -0000	2.3
--- tree-scalar-evolution.h	27 Dec 2004 01:35:10 -0000
*************** extern tree analyze_scalar_evolution (st
*** 32,37 ****
  extern tree instantiate_parameters (struct loop *, tree);
  extern void gather_stats_on_scev_database (void);
  extern void scev_analysis (void);
! extern bool simple_iv (struct loop *, tree, tree, tree *, tree *);
  
  #endif  /* GCC_TREE_SCALAR_EVOLUTION_H  */
--- 32,37 ----
  extern tree instantiate_parameters (struct loop *, tree);
  extern void gather_stats_on_scev_database (void);
  extern void scev_analysis (void);
! extern bool simple_iv (struct loop *, tree, tree, tree *, tree *, bool *);
  
  #endif  /* GCC_TREE_SCALAR_EVOLUTION_H  */
Index: tree-ssa-loop-ivopts.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-ssa-loop-ivopts.c,v
retrieving revision 2.39
diff -c -3 -p -r2.39 tree-ssa-loop-ivopts.c
*** tree-ssa-loop-ivopts.c	25 Dec 2004 22:53:54 -0000	2.39
--- tree-ssa-loop-ivopts.c	27 Dec 2004 01:35:10 -0000
*************** struct iv
*** 106,111 ****
--- 106,113 ----
    tree ssa_name;	/* The ssa name with the value.  */
    bool biv_p;		/* Is it a biv?  */
    bool have_use_for;	/* Do we already have a use for it?  */
+   bool cannot_overflow;	/* True if we are sure that this iv does not
+ 			   overflow.  */
    unsigned use_id;	/* The identifier in the use if it is the case.  */
  };
  
*************** alloc_iv (tree base, tree step)
*** 728,743 ****
    iv->step = step;
    iv->biv_p = false;
    iv->have_use_for = false;
    iv->use_id = 0;
    iv->ssa_name = NULL_TREE;
  
    return iv;
  }
  
! /* Sets STEP and BASE for induction variable IV.  */
  
  static void
! set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
  {
    struct version_info *info = name_info (data, iv);
  
--- 730,748 ----
    iv->step = step;
    iv->biv_p = false;
    iv->have_use_for = false;
+   iv->cannot_overflow = false;
    iv->use_id = 0;
    iv->ssa_name = NULL_TREE;
  
    return iv;
  }
  
! /* Sets STEP and BASE for induction variable IV.  CANNOT_OVERFLOW is true
!    if the induction variable cannot overflow.  */
  
  static void
! set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
! 	bool cannot_overflow)
  {
    struct version_info *info = name_info (data, iv);
  
*************** set_iv (struct ivopts_data *data, tree i
*** 746,751 ****
--- 751,757 ----
    bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
    info->iv = alloc_iv (base, step);
    info->iv->ssa_name = iv;
+   info->iv->cannot_overflow = cannot_overflow;
  }
  
  /* Finds induction variable declaration for VAR.  */
*************** get_iv (struct ivopts_data *data, tree v
*** 761,776 ****
  
        if (!bb
  	  || !flow_bb_inside_loop_p (data->current_loop, bb))
! 	set_iv (data, var, var, NULL_TREE);
      }
  
    return name_info (data, var)->iv;
  }
  
! /* Determines the step of a biv defined in PHI.  */
  
  static tree
! determine_biv_step (tree phi)
  {
    struct loop *loop = bb_for_stmt (phi)->loop_father;
    tree name = PHI_RESULT (phi), base, step;
--- 767,783 ----
  
        if (!bb
  	  || !flow_bb_inside_loop_p (data->current_loop, bb))
! 	set_iv (data, var, var, NULL_TREE, true);
      }
  
    return name_info (data, var)->iv;
  }
  
! /* Determines the step of a biv defined in PHI.  CANNOT_OVERFLOW is set to
!    true if we know that the biv cannot overflow.  */
  
  static tree
! determine_biv_step (tree phi, bool *cannot_overflow)
  {
    struct loop *loop = bb_for_stmt (phi)->loop_father;
    tree name = PHI_RESULT (phi), base, step;
*************** determine_biv_step (tree phi)
*** 779,785 ****
    if (!is_gimple_reg (name))
      return NULL_TREE;
  
!   if (!simple_iv (loop, phi, name, &base, &step))
      return NULL_TREE;
  
    if (!step)
--- 786,792 ----
    if (!is_gimple_reg (name))
      return NULL_TREE;
  
!   if (!simple_iv (loop, phi, name, &base, &step, cannot_overflow))
      return NULL_TREE;
  
    if (!step)
*************** find_bivs (struct ivopts_data *data)
*** 870,882 ****
    tree phi, step, type, base;
    bool found = false;
    struct loop *loop = data->current_loop;
  
    for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
      {
        if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
  	continue;
  
!       step = determine_biv_step (phi);
  
        if (!step)
  	continue;
--- 877,890 ----
    tree phi, step, type, base;
    bool found = false;
    struct loop *loop = data->current_loop;
+   bool cannot_overflow;
  
    for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
      {
        if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
  	continue;
  
!       step = determine_biv_step (phi, &cannot_overflow);
  
        if (!step)
  	continue;
*************** find_bivs (struct ivopts_data *data)
*** 897,903 ****
        if (!cst_and_fits_in_hwi (step))
  	continue;
  
!       set_iv (data, PHI_RESULT (phi), base, step);
        found = true;
      }
  
--- 905,911 ----
        if (!cst_and_fits_in_hwi (step))
  	continue;
  
!       set_iv (data, PHI_RESULT (phi), base, step, cannot_overflow);
        found = true;
      }
  
*************** mark_bivs (struct ivopts_data *data)
*** 937,947 ****
  }
  
  /* Checks whether STMT defines a linear induction variable and stores its
!    parameters to BASE and STEP.  */
  
  static bool
  find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
! 			tree *base, tree *step)
  {
    tree lhs;
    struct loop *loop = data->current_loop;
--- 945,956 ----
  }
  
  /* Checks whether STMT defines a linear induction variable and stores its
!    parameters to BASE and STEP.  CANNOT_OVERFLOW is set to true if we know
!    that the induction variable cannot overflow.  */
  
  static bool
  find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
! 			tree *base, tree *step, bool *cannot_overflow)
  {
    tree lhs;
    struct loop *loop = data->current_loop;
*************** find_givs_in_stmt_scev (struct ivopts_da
*** 956,962 ****
    if (TREE_CODE (lhs) != SSA_NAME)
      return false;
  
!   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
      return false;
  
    /* FIXME: We do not handle induction variables whose step does
--- 965,972 ----
    if (TREE_CODE (lhs) != SSA_NAME)
      return false;
  
!   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step,
! 		  cannot_overflow))
      return false;
  
    /* FIXME: We do not handle induction variables whose step does
*************** static void
*** 977,987 ****
  find_givs_in_stmt (struct ivopts_data *data, tree stmt)
  {
    tree base, step;
  
!   if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
      return;
  
!   set_iv (data, TREE_OPERAND (stmt, 0), base, step);
  }
  
  /* Finds general ivs in basic block BB.  */
--- 987,998 ----
  find_givs_in_stmt (struct ivopts_data *data, tree stmt)
  {
    tree base, step;
+   bool cannot_overflow;
  
!   if (!find_givs_in_stmt_scev (data, stmt, &base, &step, &cannot_overflow))
      return;
  
!   set_iv (data, TREE_OPERAND (stmt, 0), base, step, cannot_overflow);
  }
  
  /* Finds general ivs in basic block BB.  */
*************** idx_find_step (tree base, tree *idx, voi
*** 1363,1369 ****
  
    if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
      iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
! 					  type, iv->base, iv->step, dta->stmt);
    else
      iv_step = fold_convert (iv_type, iv->step);
  
--- 1374,1381 ----
  
    if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
      iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
! 					  type, iv->base, iv->step, dta->stmt,
! 					  iv->cannot_overflow);
    else
      iv_step = fold_convert (iv_type, iv->step);
  
*************** may_eliminate_iv (struct loop *loop,
*** 3231,3238 ****
      base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
  
    new_niter.niter = NULL_TREE;
!   number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
! 			     cand->iv->step, NE_EXPR, *bound, NULL_TREE,
  			     &new_niter);
    if (!new_niter.niter
        || !integer_nonzerop (new_niter.assumptions)
--- 3243,3251 ----
      base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
  
    new_niter.niter = NULL_TREE;
!   number_of_iterations_cond (TREE_TYPE (cand->iv->base),
! 			     base, cand->iv->step, false, NE_EXPR,
! 			     *bound, NULL_TREE, false,
  			     &new_niter);
    if (!new_niter.niter
        || !integer_nonzerop (new_niter.assumptions)
Index: tree-ssa-loop-niter.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-ssa-loop-niter.c,v
retrieving revision 2.18
diff -c -3 -p -r2.18 tree-ssa-loop-niter.c
*** tree-ssa-loop-niter.c	23 Nov 2004 01:27:42 -0000	2.18
--- tree-ssa-loop-niter.c	27 Dec 2004 01:35:10 -0000
*************** inverse (tree x, tree mask)
*** 173,193 ****
     The results (number of iterations and assumptions as described in
     comments at struct tree_niter_desc in tree-flow.h) are stored to NITER.
     In case we are unable to determine number of iterations, contents of
!    this structure is unchanged.  */
  
  void
! number_of_iterations_cond (tree type, tree base0, tree step0,
! 			   enum tree_code code, tree base1, tree step1,
  			   struct tree_niter_desc *niter)
  {
    tree step, delta, mmin, mmax;
    tree may_xform, bound, s, d, tmp;
!   bool was_sharp = false;
    tree assumption;
    tree assumptions = boolean_true_node;
    tree noloop_assumptions = boolean_false_node;
    tree niter_type, signed_niter_type;
    tree bits;
  
    /* The meaning of these assumptions is this:
       if !assumptions
--- 173,198 ----
     The results (number of iterations and assumptions as described in
     comments at struct tree_niter_desc in tree-flow.h) are stored to NITER.
     In case we are unable to determine number of iterations, contents of
!    this structure is unchanged.
!    
!    NOOVFL0 and NOOVFL1 are true if we know that the lhs and the rhs iv cannot
!    overflow, respectively.  */
  
  void
! number_of_iterations_cond (tree type, tree base0, tree step0, bool noovfl0,
! 			   enum tree_code code,
! 			   tree base1, tree step1, bool noovfl1,
  			   struct tree_niter_desc *niter)
  {
    tree step, delta, mmin, mmax;
    tree may_xform, bound, s, d, tmp;
!   bool was_sharp = false, noovfl;
    tree assumption;
    tree assumptions = boolean_true_node;
    tree noloop_assumptions = boolean_false_node;
    tree niter_type, signed_niter_type;
    tree bits;
+   bool never_infinite;
  
    /* The meaning of these assumptions is this:
       if !assumptions
*************** number_of_iterations_cond (tree type, tr
*** 204,211 ****
--- 209,229 ----
        SWAP (base0, base1);
        SWAP (step0, step1);
        code = swap_tree_comparison (code);
+ 
+       noovfl = noovfl1;
+       noovfl1 = noovfl0;
+       noovfl0 = noovfl;
      }
  
+   /* If the control induction variable does not overflow, the loop obviously
+      cannot be infinite.  */
+   if (!zero_p (step0) && noovfl0)
+     never_infinite = true;
+   else if (!zero_p (step1) && noovfl1)
+     never_infinite = true;
+   else
+     never_infinite = false;
+ 
    /* We can handle the case when neither of the sides of the comparison is
       invariant, provided that the test is NE_EXPR.  This rarely occurs in
       practice, but it is simple enough to manage.  */
*************** number_of_iterations_cond (tree type, tr
*** 216,221 ****
--- 234,240 ----
  
        step0 = fold_binary_to_constant (MINUS_EXPR, type, step0, step1);
        step1 = NULL_TREE;
+       noovfl0 = noovfl1 = false;
      }
  
    /* If the result is a constant,  the loop is weird.  More precise handling
*************** number_of_iterations_cond (tree type, tr
*** 335,341 ****
  	    }
  	  else if (zero_p (step0))
  	    {
! 	      if (!mmin)
  		may_xform = boolean_true_node;
  	      else
  		{
--- 354,360 ----
  	    }
  	  else if (zero_p (step0))
  	    {
! 	      if (!mmin || noovfl1)
  		may_xform = boolean_true_node;
  	      else
  		{
*************** number_of_iterations_cond (tree type, tr
*** 349,355 ****
  	    }
  	  else
  	    {
! 	      if (!mmax)
  		may_xform = boolean_true_node;
  	      else
  		{
--- 368,374 ----
  	    }
  	  else
  	    {
! 	      if (!mmax || noovfl0)
  		may_xform = boolean_true_node;
  	      else
  		{
*************** number_of_iterations_cond (tree type, tr
*** 384,390 ****
  
  	  assumption = fold (build2 (GT_EXPR, boolean_type_node, base0, base1));
  	  noloop_assumptions = fold (build2 (TRUTH_OR_EXPR, boolean_type_node,
! 					    noloop_assumptions, assumption));
  	  code = NE_EXPR;
  	}
      }
--- 403,409 ----
  
  	  assumption = fold (build2 (GT_EXPR, boolean_type_node, base0, base1));
  	  noloop_assumptions = fold (build2 (TRUTH_OR_EXPR, boolean_type_node,
! 				 	     noloop_assumptions, assumption));
  	  code = NE_EXPR;
  	}
      }
*************** number_of_iterations_cond (tree type, tr
*** 425,436 ****
  				   (TYPE_PRECISION (niter_type)
  				    - tree_low_cst (bits, 1)));
  
!       assumption = fold (build2 (FLOOR_MOD_EXPR, niter_type, base1, d));
!       assumption = fold (build2 (EQ_EXPR, boolean_type_node,
! 				 assumption,
! 				 build_int_cst (niter_type, 0)));
!       assumptions = fold (build2 (TRUTH_AND_EXPR, boolean_type_node,
! 				  assumptions, assumption));
  
        tmp = fold (build2 (EXACT_DIV_EXPR, niter_type, base1, d));
        tmp = fold (build2 (MULT_EXPR, niter_type, tmp, inverse (s, bound)));
--- 444,458 ----
  				   (TYPE_PRECISION (niter_type)
  				    - tree_low_cst (bits, 1)));
  
!       if (!never_infinite)
! 	{
! 	  assumption = fold (build2 (FLOOR_MOD_EXPR, niter_type, base1, d));
! 	  assumption = fold (build2 (EQ_EXPR, boolean_type_node,
! 				     assumption,
! 				     build_int_cst (niter_type, 0)));
! 	  assumptions = fold (build2 (TRUTH_AND_EXPR, boolean_type_node,
! 				      assumptions, assumption));
! 	}
  
        tmp = fold (build2 (EXACT_DIV_EXPR, niter_type, base1, d));
        tmp = fold (build2 (MULT_EXPR, niter_type, tmp, inverse (s, bound)));
*************** number_of_iterations_cond (tree type, tr
*** 440,452 ****
      {
        if (zero_p (step1))
  	/* Condition in shape a + s * i <= b
! 	   We must know that b + s does not overflow and a <= b + s and then we
! 	   can compute number of iterations as (b + s - a) / s.  (It might
! 	   seem that we in fact could be more clever about testing the b + s
  	   overflow condition using some information about b - a mod s,
  	   but it was already taken into account during LE -> NE transform).  */
  	{
! 	  if (mmax)
  	    {
  	      bound = fold_binary_to_constant (MINUS_EXPR, type, mmax, step0);
  	      assumption = fold (build2 (LE_EXPR, boolean_type_node,
--- 462,474 ----
      {
        if (zero_p (step1))
  	/* Condition in shape a + s * i <= b
! 	   We must know that there is no overflow and and a <= b and then we
! 	   can compute number of iterations as (b - a) / s + 1.  (It might
! 	   seem that we in fact could be more clever about testing the
  	   overflow condition using some information about b - a mod s,
  	   but it was already taken into account during LE -> NE transform).  */
  	{
! 	  if (mmax && !noovfl0)
  	    {
  	      bound = fold_binary_to_constant (MINUS_EXPR, type, mmax, step0);
  	      assumption = fold (build2 (LE_EXPR, boolean_type_node,
*************** number_of_iterations_cond (tree type, tr
*** 456,473 ****
  	    }
  
  	  step = step0;
- 	  tmp = fold (build2 (PLUS_EXPR, type, base1, step0));
- 	  assumption = fold (build2 (GT_EXPR, boolean_type_node, base0, tmp));
- 	  delta = fold (build2 (PLUS_EXPR, type, base1, step));
- 	  delta = fold (build2 (MINUS_EXPR, type, delta, base0));
- 	  delta = fold_convert (niter_type, delta);
  	}
        else
  	{
  	  /* Condition in shape a <= b - s * i
! 	     We must know that a - s does not overflow and a - s <= b and then
! 	     we can again compute number of iterations as (b - (a - s)) / s.  */
! 	  if (mmin)
  	    {
  	      bound = fold_binary_to_constant (MINUS_EXPR, type, mmin, step1);
  	      assumption = fold (build2 (LE_EXPR, boolean_type_node,
--- 478,490 ----
  	    }
  
  	  step = step0;
  	}
        else
  	{
  	  /* Condition in shape a <= b - s * i
! 	     We must know that there is no overflow and a <= b and then
! 	     we can again compute number of iterations as (b - a) / s.  */
! 	  if (mmin && !noovfl1)
  	    {
  	      bound = fold_binary_to_constant (MINUS_EXPR, type, mmin, step1);
  	      assumption = fold (build2 (LE_EXPR, boolean_type_node,
*************** number_of_iterations_cond (tree type, tr
*** 476,492 ****
  					 assumptions, assumption));
  	    }
  	  step = fold (build1 (NEGATE_EXPR, type, step1));
- 	  tmp = fold (build2 (PLUS_EXPR, type, base0, step1));
- 	  assumption = fold (build2 (GT_EXPR, boolean_type_node, tmp, base1));
- 	  delta = fold (build2 (MINUS_EXPR, type, base0, step));
- 	  delta = fold (build2 (MINUS_EXPR, type, base1, delta));
- 	  delta = fold_convert (niter_type, delta);
  	}
        noloop_assumptions = fold (build2 (TRUTH_OR_EXPR, boolean_type_node,
  					noloop_assumptions, assumption));
        delta = fold (build2 (FLOOR_DIV_EXPR, niter_type, delta,
  			    fold_convert (niter_type, step)));
!       niter->niter = delta;
      }
  
    niter->assumptions = assumptions;
--- 493,510 ----
  					 assumptions, assumption));
  	    }
  	  step = fold (build1 (NEGATE_EXPR, type, step1));
  	}
+ 
+       assumption = fold (build2 (GT_EXPR, boolean_type_node, base0, base1));
        noloop_assumptions = fold (build2 (TRUTH_OR_EXPR, boolean_type_node,
  					noloop_assumptions, assumption));
+ 
+       delta = fold (build2 (MINUS_EXPR, type, base1, base0));
+       delta = fold_convert (niter_type, delta);
        delta = fold (build2 (FLOOR_DIV_EXPR, niter_type, delta,
  			    fold_convert (niter_type, step)));
!       niter->niter = fold (build2 (PLUS_EXPR, niter_type, delta,
! 				   build_int_cst_type (niter_type, 1)));
      }
  
    niter->assumptions = assumptions;
*************** tree_simplify_using_condition (tree cond
*** 701,717 ****
  }
  
  /* Tries to simplify EXPR using the conditions on entry to LOOP.
-    Record the conditions used for simplification to CONDS_USED.
     Returns the simplified expression (or EXPR unchanged, if no
     simplification was possible).*/
  
  static tree
! simplify_using_initial_conditions (struct loop *loop, tree expr,
! 				   tree *conds_used)
  {
    edge e;
    basic_block bb;
!   tree exp, cond;
  
    if (TREE_CODE (expr) == INTEGER_CST)
      return expr;
--- 719,733 ----
  }
  
  /* Tries to simplify EXPR using the conditions on entry to LOOP.
     Returns the simplified expression (or EXPR unchanged, if no
     simplification was possible).*/
  
  static tree
! simplify_using_initial_conditions (struct loop *loop, tree expr)
  {
    edge e;
    basic_block bb;
!   tree cond;
  
    if (TREE_CODE (expr) == INTEGER_CST)
      return expr;
*************** simplify_using_initial_conditions (struc
*** 730,744 ****
        cond = COND_EXPR_COND (last_stmt (e->src));
        if (e->flags & EDGE_FALSE_VALUE)
  	cond = invert_truthvalue (cond);
!       exp = tree_simplify_using_condition (cond, expr);
! 
!       if (exp != expr)
! 	*conds_used = fold (build2 (TRUTH_AND_EXPR,
! 				   boolean_type_node,
! 				   *conds_used,
! 				   cond));
! 
!       expr = exp;
      }
  
    return expr;
--- 746,752 ----
        cond = COND_EXPR_COND (last_stmt (e->src));
        if (e->flags & EDGE_FALSE_VALUE)
  	cond = invert_truthvalue (cond);
!       expr = tree_simplify_using_condition (cond, expr);
      }
  
    return expr;
*************** number_of_iterations_exit (struct loop *
*** 758,763 ****
--- 766,772 ----
    tree op0, base0, step0;
    tree op1, base1, step1;
    enum tree_code code;
+   bool op0_no_overflow, op1_no_overflow;
  
    if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit->src))
      return false;
*************** number_of_iterations_exit (struct loop *
*** 794,807 ****
        && !POINTER_TYPE_P (type))
      return false;
       
!   if (!simple_iv (loop, stmt, op0, &base0, &step0))
      return false;
!   if (!simple_iv (loop, stmt, op1, &base1, &step1))
      return false;
  
    niter->niter = NULL_TREE;
!   number_of_iterations_cond (type, base0, step0, code, base1, step1,
! 			     niter);
    if (!niter->niter)
      return false;
  
--- 803,816 ----
        && !POINTER_TYPE_P (type))
      return false;
       
!   if (!simple_iv (loop, stmt, op0, &base0, &step0, &op0_no_overflow))
      return false;
!   if (!simple_iv (loop, stmt, op1, &base1, &step1, &op1_no_overflow))
      return false;
  
    niter->niter = NULL_TREE;
!   number_of_iterations_cond (type, base0, step0, op0_no_overflow, code,
! 			     base1, step1, op1_no_overflow, niter);
    if (!niter->niter)
      return false;
  
*************** number_of_iterations_exit (struct loop *
*** 811,825 ****
  							niter->may_be_zero);
    niter->niter = simplify_using_outer_evolutions (loop, niter->niter);
  
-   niter->additional_info = boolean_true_node;
    niter->assumptions
  	  = simplify_using_initial_conditions (loop,
! 					       niter->assumptions,
! 					       &niter->additional_info);
    niter->may_be_zero
  	  = simplify_using_initial_conditions (loop,
! 					       niter->may_be_zero,
! 					       &niter->additional_info);
    return integer_onep (niter->assumptions);
  }
  
--- 820,831 ----
  							niter->may_be_zero);
    niter->niter = simplify_using_outer_evolutions (loop, niter->niter);
  
    niter->assumptions
  	  = simplify_using_initial_conditions (loop,
! 					       niter->assumptions);
    niter->may_be_zero
  	  = simplify_using_initial_conditions (loop,
! 					       niter->may_be_zero);
    return integer_onep (niter->assumptions);
  }
  
*************** find_loop_niter_by_eval (struct loop *lo
*** 1078,1090 ****
  
  */
  
! /* Records that AT_STMT is executed at most BOUND times in LOOP.  The
!    additional condition ADDITIONAL is recorded with the bound.  */
  
  void
! record_estimate (struct loop *loop, tree bound, tree additional, tree at_stmt)
  {
    struct nb_iter_bound *elt = xmalloc (sizeof (struct nb_iter_bound));
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
--- 1084,1107 ----
  
  */
  
! /* Records that AT_STMT is executed at most BOUND times in LOOP.  */
  
  void
! record_estimate (struct loop *loop, tree bound, tree at_stmt)
  {
    struct nb_iter_bound *elt = xmalloc (sizeof (struct nb_iter_bound));
+   tree bound_type = TREE_TYPE (bound);
+ 
+   /* Ensure that the bound is unsigned.  Signed values make no sense, and we
+      use the fact that the bound is unsigned later; of course it would be
+      possible to cast the value to unsigned type at the places that need it,
+      but it would happen repeatedly and would therefore waste memory.  */
+ 
+   if (!TYPE_UNSIGNED (bound_type))
+     {
+       bound_type = unsigned_type_for (bound_type);
+       bound = fold_convert (bound_type, bound);
+     }
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
*************** record_estimate (struct loop *loop, tree
*** 1097,1103 ****
  
    elt->bound = bound;
    elt->at_stmt = at_stmt;
-   elt->additional = additional;
    elt->next = loop->bounds;
    loop->bounds = elt;
  }
--- 1114,1119 ----
*************** estimate_numbers_of_iterations_loop (str
*** 1125,1133 ****
  	niter = build3 (COND_EXPR, type, niter_desc.may_be_zero,
  			build_int_cst_type (type, 0),
  			niter);
!       record_estimate (loop, niter,
! 		       niter_desc.additional_info,
! 		       last_stmt (exits[i]->src));
      }
    free (exits);
    
--- 1141,1147 ----
  	niter = build3 (COND_EXPR, type, niter_desc.may_be_zero,
  			build_int_cst_type (type, 0),
  			niter);
!       record_estimate (loop, niter, last_stmt (exits[i]->src));
      }
    free (exits);
    
*************** estimate_numbers_of_iterations (struct l
*** 1157,1189 ****
      }
  }
  
- /* If A > B, returns -1.  If A == B, returns 0.  If A < B, returns 1.
-    If neither of these relations can be proved, returns 2.  */
- 
- static int
- compare_trees (tree a, tree b)
- {
-   tree typea = TREE_TYPE (a), typeb = TREE_TYPE (b);
-   tree type;
- 
-   if (TYPE_PRECISION (typea) > TYPE_PRECISION (typeb))
-     type = typea;
-   else
-     type = typeb;
- 
-   a = fold_convert (type, a);
-   b = fold_convert (type, b);
- 
-   if (nonzero_p (fold (build2 (EQ_EXPR, boolean_type_node, a, b))))
-     return 0;
-   if (nonzero_p (fold (build2 (LT_EXPR, boolean_type_node, a, b))))
-     return 1;
-   if (nonzero_p (fold (build2 (GT_EXPR, boolean_type_node, a, b))))
-     return -1;
- 
-   return 2;
- }
- 
  /* Returns true if statement S1 dominates statement S2.  */
  
  static bool
--- 1171,1176 ----
*************** stmt_dominates_stmt_p (tree s1, tree s2)
*** 1209,1334 ****
    return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
  }
  
! /* Checks whether it is correct to count the induction variable BASE + STEP * I
!    at AT_STMT in wider TYPE, using the fact that statement OF is executed at
!    most BOUND times in the loop.  If it is possible, return the value of step
!    of the induction variable in the TYPE, otherwise return NULL_TREE.
!    
!    ADDITIONAL is the additional condition recorded for operands of the bound.
!    This is useful in the following case, created by loop header copying:
  
!    i = 0;
!    if (n > 0)
!      do
!        {
!          something;
!        } while (++i < n)
! 
!    If the n > 0 condition is taken into account, the number of iterations of the
!    loop can be expressed as n - 1.  If the type of n is signed, the ADDITIONAL
!    assumption "n > 0" says us that the value of the number of iterations is at
!    most MAX_TYPE - 1 (without this assumption, it might overflow).  */
  
! static tree
! can_count_iv_in_wider_type_bound (tree type, tree base, tree step,
! 				  tree at_stmt,
! 				  tree bound,
! 				  tree additional,
! 				  tree of)
  {
!   tree inner_type = TREE_TYPE (base), b, bplusstep, new_step, new_step_abs;
!   tree valid_niter, extreme, unsigned_type, delta, bound_type;
!   tree cond;
  
!   b = fold_convert (type, base);
!   bplusstep = fold_convert (type,
! 			    fold (build2 (PLUS_EXPR, inner_type, base, step)));
!   new_step = fold (build2 (MINUS_EXPR, type, bplusstep, b));
!   if (TREE_CODE (new_step) != INTEGER_CST)
!     return NULL_TREE;
  
!   switch (compare_trees (bplusstep, b))
      {
!     case -1:
!       extreme = upper_bound_in_type (type, inner_type);
!       delta = fold (build2 (MINUS_EXPR, type, extreme, b));
!       new_step_abs = new_step;
!       break;
  
!     case 1:
!       extreme = lower_bound_in_type (type, inner_type);
!       new_step_abs = fold (build1 (NEGATE_EXPR, type, new_step));
!       delta = fold (build2 (MINUS_EXPR, type, b, extreme));
        break;
  
!     case 0:
!       return new_step;
  
      default:
!       return NULL_TREE;
      }
  
!   unsigned_type = unsigned_type_for (type);
!   delta = fold_convert (unsigned_type, delta);
!   new_step_abs = fold_convert (unsigned_type, new_step_abs);
!   valid_niter = fold (build2 (FLOOR_DIV_EXPR, unsigned_type,
! 			     delta, new_step_abs));
! 
!   bound_type = TREE_TYPE (bound);
!   if (TYPE_PRECISION (type) > TYPE_PRECISION (bound_type))
!     bound = fold_convert (unsigned_type, bound);
!   else
!     valid_niter = fold_convert (bound_type, valid_niter);
!     
!   if (at_stmt && stmt_dominates_stmt_p (of, at_stmt))
!     {
!       /* After the statement OF we know that anything is executed at most
! 	 BOUND times.  */
!       cond = build2 (GE_EXPR, boolean_type_node, valid_niter, bound);
      }
    else
      {
!       /* Before the statement OF we know that anything is executed at most
! 	 BOUND + 1 times.  */
!       cond = build2 (GT_EXPR, boolean_type_node, valid_niter, bound);
!     }
! 
!   cond = fold (cond);
!   if (nonzero_p (cond))
!     return new_step;
! 
!   /* Try taking additional conditions into account.  */
!   cond = build2 (TRUTH_OR_EXPR, boolean_type_node,
! 		invert_truthvalue (additional),
! 		cond);
!   cond = fold (cond);
!   if (nonzero_p (cond))
!     return new_step;
  
!   return NULL_TREE;
  }
  
  /* Checks whether it is correct to count the induction variable BASE + STEP * I
     at AT_STMT in wider TYPE, using the bounds on numbers of iterations of a
     LOOP.  If it is possible, return the value of step of the induction variable
!    in the TYPE, otherwise return NULL_TREE.  */
  
  tree
  can_count_iv_in_wider_type (struct loop *loop, tree type, tree base, tree step,
! 			    tree at_stmt)
  {
    struct nb_iter_bound *bound;
!   tree new_step;
  
    for (bound = loop->bounds; bound; bound = bound->next)
      {
!       new_step = can_count_iv_in_wider_type_bound (type, base, step,
! 						   at_stmt,
! 						   bound->bound,
! 						   bound->additional,
! 						   bound->at_stmt);
  
!       if (new_step)
  	return new_step;
      }
  
--- 1196,1816 ----
    return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
  }
  
! /* Returns true if X > Y, counted in arithmetics in width given by MASK
!    and signedness given by SIGNED_P.  */
  
! static bool
! greater_number_p (unsigned HOST_WIDE_INT x, unsigned HOST_WIDE_INT y,
! 		  bool signed_p, unsigned HOST_WIDE_INT mask)
! {
!   bool x_neg, y_neg;
  
!   if (signed_p)
!     {
!       x_neg = (x & ~(mask >> 1)) != 0;
!       y_neg = (y & ~(mask >> 1)) != 0;
! 
!       if (x_neg && !y_neg)
! 	return false;
! 
!       if (!x_neg && y_neg)
! 	return true;
!     }
! 
!   return x > y;
! }
! 
! /* Returns mask for computing modulo by 2 ^ S by anding.  */
! 
! static unsigned HOST_WIDE_INT
! mask_for_size (unsigned s)
  {
!   gcc_assert (s <= HOST_BITS_PER_WIDE_INT);
  
!   return ((~(unsigned HOST_WIDE_INT) 0) >> (HOST_BITS_PER_WIDE_INT - s));
! }
  
! /* Determines the minimum and maximum value of expression VAR at the begining
!    of LOOP.  Minimum is stored to MIN, maximum to MAX.  Returns false if we
!    determine that loop is never reached.  */
! 
! static bool
! determine_range (struct loop *loop, tree var,
! 		 unsigned HOST_WIDE_INT *min,
! 		 unsigned HOST_WIDE_INT *max)
! {
!   tree type = TREE_TYPE (var);
!   unsigned type_bits = TYPE_PRECISION (type);
!   unsigned HOST_WIDE_INT mask, val = 0, minv, maxv;
!   basic_block bb;
!   enum tree_code code;
!   tree op, cond;
!   bool signed_p;
!   edge e;
! 
!   /* Quite simplistic.  Lots of space to improve.  Having value range info
!      would be the best.  */
! 
!   mask = mask_for_size (type_bits);
!   if (TREE_CODE (var) != SSA_NAME)
      {
!       *min = 0;
!       *max = mask;
!       return true;
!     }
! 
!   signed_p = !TYPE_UNSIGNED (type);
!   if (signed_p)
!     {
!       *min = minv = (mask >> 1) + 1;
!       *max = maxv = (mask >> 1);
!     }
!   else
!     {
!       *min = minv = 0;
!       *max = maxv = mask;
!     }
! 
!   for (bb = loop->header;
!        bb != ENTRY_BLOCK_PTR;
!        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
!     {
!       e = EDGE_PRED (bb, 0);
!       if (EDGE_COUNT (bb->preds) > 1)
! 	continue;
! 
!       if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
! 	continue;
! 
!       cond = COND_EXPR_COND (last_stmt (e->src));
!       if (e->flags & EDGE_FALSE_VALUE)
! 	cond = invert_truthvalue (cond);
! 
!       if (!COMPARISON_CLASS_P (cond))
! 	continue;
! 
!       code = TREE_CODE (cond);
!       if (TREE_OPERAND (cond, 0) == var)
! 	op = TREE_OPERAND (cond, 1);
!       else if (TREE_OPERAND (cond, 1) == var)
! 	{
! 	  op = TREE_OPERAND (cond, 0);
! 	  code = swap_tree_comparison (code);
! 	}
!       else
! 	continue;
! 
!       if (cst_and_fits_in_hwi (op))
! 	val = int_cst_value (op);
!       else if (code == GT_EXPR)
! 	val = minv;
!       else if (code == LT_EXPR)
! 	val = maxv;
!       else
! 	continue;
! 
!       switch (code)
! 	{
! 	case EQ_EXPR:
! 	  *min = *max = val;
! 	  return true;
! 
! 	case NE_EXPR:
! 	  if (val == *min)
! 	    {
! 	      if (*min == *max)
! 		{
! 		  /* The code is never reached.  */
! 		  return false;
! 		}
! 	      *min = (*min + 1) & mask;
! 	    }
! 
! 	  if (val == *max)
! 	    *max = (*max - 1) & mask;
! 	  break;
! 
! 	case GT_EXPR:
! 	  if (val == maxv)
! 	    return false;
! 
! 	  val = (val + 1) & mask;
! 	  /* Fallthru.  */
! 
! 	case GE_EXPR:
! 	  if (greater_number_p (val, *min, signed_p, mask))
! 	    *min = val;
! 	  break;
! 
! 	case LT_EXPR:
! 	  if (val == minv)
! 	    return false;
! 	  val = (val - 1) & mask;
! 	  /* Fallthru.  */
! 
! 	case LE_EXPR:
! 	  if (greater_number_p (*max, val, signed_p, mask))
! 	    *max = val;
! 	  break;
! 
! 	default:
! 	  break;
! 	}
!     }
! 
!   return true;
! }
! 
! /* For expression EXPR of form (cast) (m * v + a), store variable (or
!    subexpression) v to VAR, constant m to MULT, constant a to ADD
!    and the width of the type to *BITS.  */
! 
! static void
! split_affine_expression (tree expr, tree *var,
! 			 unsigned HOST_WIDE_INT *mult,
! 			 unsigned HOST_WIDE_INT *add,
! 			 unsigned *bits)
! {
!   tree type = TREE_TYPE (expr), op0 = NULL_TREE, op1 = NULL_TREE, op_type;
!   unsigned type_bits = TYPE_PRECISION (type);
!   unsigned HOST_WIDE_INT mask, delta = 0, mby = 1;
!   enum tree_code code = TREE_CODE (expr);
! 
!   if (TREE_CODE_LENGTH (code) > 0)
!     op0 = TREE_OPERAND (expr, 0);
!   if (TREE_CODE_LENGTH (code) > 1)
!     op1 = TREE_OPERAND (expr, 1);
! 
!   /* Default if something fails.  */
!   *var = expr;
!   *mult = 1;
!   *add = 0;
!   *bits = type_bits;
! 
!   if (type_bits > HOST_BITS_PER_WIDE_INT)
!     return;
! 
!   mask = mask_for_size (type_bits);
! 
!   if (code == NOP_EXPR
!       || code == CONVERT_EXPR)
!     {
!       op_type = TREE_TYPE (op0);
! 
!       /* If it is a sign extend, fail.  */
!       if (!TYPE_UNSIGNED (type)
! 	  && !TYPE_UNSIGNED (op_type)
! 	  && type_bits > TYPE_PRECISION (op_type))
! 	return;
! 
!       split_affine_expression (op0, var, mult, add, bits);
!       if (*bits > type_bits)
! 	{
! 	  *bits = type_bits;
! 	  *mult &= mask;
! 	  *add &= mask;
! 	}
!       return;
!     }
! 
!   switch (code)
!     {
!     case PLUS_EXPR:
!       if (cst_and_fits_in_hwi (op1))
! 	{
! 	  delta = int_cst_value (op1);
! 	  break;
! 	}
!       return;
! 
!     case MINUS_EXPR:
!       if (cst_and_fits_in_hwi (op0))
! 	{
! 	  delta = int_cst_value (op0);
! 	  mby = mask;
! 	  op0 = op1;
! 	  break;
! 	}
! 
!       if (cst_and_fits_in_hwi (op1))
! 	{
! 	  delta = (-int_cst_value (op1)) & mask;
! 	  break;
! 	}
!       return;
  
!     case NEGATE_EXPR:
!       mby = mask;
        break;
  
!     case MULT_EXPR:
!       if (cst_and_fits_in_hwi (op1))
! 	{
! 	  mby = int_cst_value (op1);
! 	  break;
! 	}
!       return;
  
      default:
!       return;
      }
  
!   split_affine_expression (op0, var, mult, add, bits);
!   if (type_bits == *bits)
!     {
!       *mult *= mby;
!       *add *= mby;
!       *add += delta;
!       *mult &= mask;
!       *add &= mask;
!       return;
!     }
! 
!   /* If there is some narrowing/extending below, consider the first operand
!      to be the variable.  */
!   *var = op0;
!   *mult = mby;
!   *add = delta;
!   *bits = type_bits;
!   return;
! }
! 
! /* Returns true if X COMP Y, where COMP is a comparison operator.  */
! 
! static bool
! compare_p (unsigned HOST_WIDE_INT x, enum tree_code comp,
! 	   unsigned HOST_WIDE_INT y)
! {
!   switch (comp)
!     {
!     case LT_EXPR:
!       return x < y;
! 
!     case LE_EXPR:
!       return x <= y;
! 
!     case GT_EXPR:
!       return x > y;
! 
!     case GE_EXPR:
!       return x >= y;
! 
!     default:
!       gcc_unreachable ();
!     }
! }
! 
! /* Returns the smallest value Y such that MAX >= Y >= X and expression
!    M * v + A wraps in interval 0 .. MASK at Y + 1, or MAX if no such
!    Y exists.  */
! 
! static unsigned HOST_WIDE_INT
! one_before_wrap (unsigned HOST_WIDE_INT m, unsigned HOST_WIDE_INT a,
! 		 unsigned HOST_WIDE_INT x, unsigned HOST_WIDE_INT mask,
! 		 unsigned HOST_WIDE_INT max)
! {
!   unsigned HOST_WIDE_INT diff, mabs, val;
! 
!   if (!m)
!     return max;
! 
!   /* If M has just the msb 1, M * v + A has just two values (A and A + M).
!      Could be handled in a better way, but this case is unlikely to occur
!      in practice, so just make it safe.  */
!   if (m == (mask >> 1) + 1)
!     return x;
! 
!   val = (m * x + a) & mask;
! 
!   if (m & ~(mask >> 1))
!     {
!       /* M is negative.  */
!       diff = val;
!       mabs = (-m) & mask;
      }
    else
      {
!       /* M is positive.  */
!       diff = mask - val;
!       mabs = m;
!     }
  
!   x = x + diff / mabs;
! 
!   return (x > max ? max : x);
! }
! 
! /* Returns true if we can prove that
! 
!    (M1 * x + A1) mod (2 ^ BITS1) COMP (M2 * x + A2) mod (2 ^ BITS2)
! 
!    where COMP is a comparison operator, for all x between
!    MIN and MAX.  The interval is taken as cyclic, i.e. wraps
!    over 2 ^ BITS.  */
! 
! static bool
! prove_affine_ineq (unsigned HOST_WIDE_INT m1, unsigned HOST_WIDE_INT a1,
! 		   unsigned bits1, enum tree_code comp,
! 		   unsigned HOST_WIDE_INT m2, unsigned HOST_WIDE_INT a2,
! 		   unsigned bits2,
! 		   unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max,
! 		   unsigned bits)
! {
!   unsigned HOST_WIDE_INT mask1, mask2, mask, top, w;
!   unsigned nwraps = 0;
! 
!   gcc_assert (comp == LT_EXPR
! 	      || comp == LE_EXPR
! 	      || comp == GT_EXPR
! 	      || comp == GE_EXPR);
!   mask = mask_for_size (bits);
! 
!   if (min > max)
!     {
!       /* For simplicity split the wrapped interval into two.  */
!       return (prove_affine_ineq (m1, a1, bits1, comp, m2, a2, bits2,
! 				 min, mask, bits)
! 	      && prove_affine_ineq (m1, a1, bits1, comp, m2, a2, bits2,
! 				    0, max, bits));
!     }
! 
!   gcc_assert (max <= mask);
! 
!   mask1 = mask_for_size (bits1);
!   mask2 = mask_for_size (bits2);
! 
!   gcc_assert (m1 <= mask1);
!   gcc_assert (a1 <= mask1);
!   gcc_assert (m2 <= mask2);
!   gcc_assert (a2 <= mask2);
! 
!   if (!compare_p ((m1 * min + a1) & mask1, comp, (m2 * min + a2) & mask2))
!     return false;
! 
!   do
!     {
!       /* Usually once there is a wrap, the condition becomes violated.  There
! 	 of course are exceptions -- for example if the underlying arithmetics
! 	 is signed, it is quite normal to get a wrap at 0.  Let us limit the
! 	 number of wraps to avoid compile time problems on weird tests.
! 	 The value here is probably a bit higher than strictly necessary.  */
!       if (nwraps++ > 10)
! 	return false;
! 
!       top = max;
! 
!       w = one_before_wrap (m1, a1, min, mask1, mask);
!       if (w < top)
! 	top = w;
!       w = one_before_wrap (m2, a2, min, mask2, mask);
!       if (w < top)
! 	top = w;
! 
!       /* The inequality is valid in the interval on that no wrap occurs iff
! 	 it is valid for both endpoints of the interval.  */
!       if (!compare_p ((m1 * top + a1) & mask1, comp, (m2 * top + a2) & mask2))
! 	return false;
! 
!       min = top + 1;
!     } while (top != max);
! 
!   return true;
! }
! 
! /* Returns true if it is able to prove X COMP Y, where COMP is some
!    comparison operator, inside LOOP.  TYPE is type of X and Y; it is
!    assumed to be unsigned.  */
! 
! static bool
! prove_ineq (struct loop *loop, tree type, tree x, enum tree_code comp, tree y)
! {
!   tree expr, varx, vary, var;
!   unsigned HOST_WIDE_INT multx, multy, addx, addy, minv, maxv;
!   unsigned bitsx, bitsy;
!   
!   gcc_assert (TYPE_UNSIGNED (type));
! 
!   expr = fold (build2 (comp, boolean_type_node, x, y));
!   if (TREE_CODE (expr) == INTEGER_CST)
!     return !zero_p (expr);
! 
!   /* Try proving the inequality using the entry conditions.  */
!   expr = simplify_using_initial_conditions (loop, expr);
!   if (TREE_CODE (expr) == INTEGER_CST)
!     return !zero_p (expr);
! 
!   /* Failed.  Try formulating the inequality as a modulo inequality of affine
!      expressions.  */
!   split_affine_expression (x, &varx, &multx, &addx, &bitsx);
!   split_affine_expression (y, &vary, &multy, &addy, &bitsy);
! 
!   var = varx;
!   if (!var)
!     {
!       var = vary;
!       if (!var)
! 	return false;
!     }
!   else if (vary && !operand_equal_p (var, vary, 0))
!     return false;
! 
!   if (bitsx > HOST_BITS_PER_WIDE_INT
!       || bitsy > HOST_BITS_PER_WIDE_INT
!       || TYPE_PRECISION (TREE_TYPE (var)) > HOST_BITS_PER_WIDE_INT)
!     return false;
! 
!   determine_range (loop, var, &minv, &maxv);
! 
!   return prove_affine_ineq (multx, addx, bitsx, comp,
! 			    multy, addy, bitsy,
! 			    minv, maxv, TYPE_PRECISION (TREE_TYPE (var)));
  }
  
  /* Checks whether it is correct to count the induction variable BASE + STEP * I
     at AT_STMT in wider TYPE, using the bounds on numbers of iterations of a
     LOOP.  If it is possible, return the value of step of the induction variable
!    in the TYPE, otherwise return NULL_TREE.  NO_OVERFLOW is true if we know
!    that the induction variable does not overflow in the inner type.  */
  
  tree
  can_count_iv_in_wider_type (struct loop *loop, tree type, tree base, tree step,
! 			    tree at_stmt, bool no_overflow)
  {
+   tree inner_type = TREE_TYPE (base), b, new_step, new_step_abs;
+   tree extreme, unsigned_type, delta, bound_type, bound_val, btype, tmp;
    struct nb_iter_bound *bound;
!   enum tree_code comp;
! 
!   gcc_assert (TYPE_PRECISION (type) > TYPE_PRECISION (inner_type));
! 
!   if (TREE_CODE (step) != INTEGER_CST)
!     return NULL_TREE;
! 
!   if (zero_p (step))
!     return build_int_cst_type (type, 0);
! 
!   unsigned_type = unsigned_type_for (inner_type);
!   b = fold_convert (unsigned_type, base);
! 
!   /* To get the new step, sign extend (regardless of signedness of the
!      target type) the old one.  */
! 
!   if (tree_int_cst_sign_bit (step))
!     {
!       tmp = fold (build1 (NEGATE_EXPR, inner_type, step));
! 
!       if (tree_int_cst_sign_bit (tmp))
! 	{
! 	  /* This happens only for the number that has just the msb 1.
! 	     Not worth handling.  */
! 	  return NULL_TREE;
! 	}
  
+       new_step_abs = fold_convert (unsigned_type, tmp);
+       new_step = fold (build1 (NEGATE_EXPR, type,
+ 			       fold_convert (type, tmp)));
+       extreme = fold_convert (unsigned_type,
+ 			      lower_bound_in_type (type, inner_type));
+       delta = fold (build2 (MINUS_EXPR, unsigned_type, b, extreme));
+     }
+   else
+     {
+       new_step = fold_convert (type, step);
+       new_step_abs = fold_convert (unsigned_type, step);
+       extreme = fold_convert (unsigned_type,
+ 			      upper_bound_in_type (type, inner_type));
+       delta = fold (build2 (MINUS_EXPR, unsigned_type, extreme, b));
+     }
+ 
+   if (no_overflow)
+     {
+       /* We know that the induction variable cannot overflow.  However this
+ 	 does not necessarily mean that it will not overflow in the target
+ 	 type.  Concretely the problems occur if we cast a signed value to
+ 	 unsigned and
+ 	 -- initial value is nonnegative and the step is negative, or
+ 	 -- initial value is negative and the step is positive.
+ 
+ 	 In both of these cases the overflow through zero would occur in the
+ 	 target type.
+ 	 
+ 	 There are no other problems.  If signedness is the same, everything is
+ 	 obviously OK.  When casting unsigned to signed, everything works since
+ 	 signed type is wider and therefore all values of the unsigned one fit
+ 	 in.  */
+       if (TYPE_UNSIGNED (inner_type)
+ 	  || !TYPE_UNSIGNED (type))
+ 	return new_step;
+ 
+       /* If the initial value has the same signedness as the step, we can still
+ 	 proceed.  */
+       if (tree_int_cst_sign_bit (step))
+ 	comp = LT_EXPR;
+       else
+ 	comp = GE_EXPR;
+ 
+       if (prove_ineq (loop, inner_type, base, comp,
+ 		      build_int_cst_type (inner_type, 0)))
+ 	return new_step;
+     }
+ 
+   if (!loop->bounds)
+     return NULL_TREE;
+ 
+   /* We have simplified the situation a bit.  We have an induction variable
+      with base 0 and step NEW_STEP_ABS, and we want to know whether it
+      may exceed DELTA during the iterations of the loop.  */
+      
    for (bound = loop->bounds; bound; bound = bound->next)
      {
!       bound_val = bound->bound;
!       bound_type = TREE_TYPE (bound_val);
! 
!       if (at_stmt
! 	  && stmt_dominates_stmt_p (bound->at_stmt, at_stmt))
! 	{
! 	  /* After the statement OF we know that anything is executed at most
! 	     BOUND times.  So we may decrease the value of the bound by one.
! 
! 	     This would fail in one case -- if the BOUND == 0.  This
! 	     means that this loop should have been removed somewhere (in jump
! 	     threading, or as last instance in ivcanon).  It is possible that
! 	     this failed for some reason (the condition being too complicated
! 	     for jump threading, ivcanon not being run, ...), but since the
! 	     loop runs exactly once, claiming that the iv may be extended
! 	     is OK, and claiming that it cannot will not cause any problems.
! 	     So do not bother with checking any conditions.  */
! 	  bound_val = fold (build2 (MINUS_EXPR, bound_type,
! 				    bound_val,
! 				    build_int_cst_type (bound_type, 1)));
! 	}
! 
!       /* First check whether it makes sense to consider the bound at all.
! 	 Unless we are able to prove that BOUND_VAL * NEW_STEP_ABS fits in
! 	 UNSIGNED_TYPE, we would not be able to prove anything useful.  */
!       if (TYPE_PRECISION (bound_type) >= TYPE_PRECISION (unsigned_type))
! 	{
! 	  btype = bound_type;
! 	  b = bound_val;
! 	}
!       else
! 	{
! 	  btype = unsigned_type;
! 	  b = fold_convert (btype, bound_val);
! 	}
!       tmp = upper_bound_in_type (btype, unsigned_type);
!       if (!integer_onep (new_step_abs))
! 	tmp = fold (build2 (TRUNC_DIV_EXPR, btype, tmp,
! 			    fold_convert (btype, new_step_abs)));
!       if (!prove_ineq (loop, btype, b, LE_EXPR, tmp))
! 	continue;
! 
!       /* Passed.  Now compare with DELTA to finish the verification.  */
!       bound_val = fold_convert (unsigned_type, bound_val);
!       bound_val = fold (build2 (MULT_EXPR, unsigned_type,
! 				bound_val, new_step_abs));
  
!       if (prove_ineq (loop, unsigned_type, delta, GE_EXPR, bound_val))
  	return new_step;
      }
  
Index: tree.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree.c,v
retrieving revision 1.458
diff -c -3 -p -r1.458 tree.c
*** tree.c	23 Dec 2004 16:21:31 -0000	1.458
--- tree.c	27 Dec 2004 01:35:10 -0000
*************** tree_int_cst_sgn (tree t)
*** 3851,3856 ****
--- 3851,3876 ----
      return 1;
  }
  
+ /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
+    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
+ 
+ int
+ tree_int_cst_sign_bit (tree t)
+ {
+   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
+   unsigned HOST_WIDE_INT w;
+ 
+   if (bitno < HOST_BITS_PER_WIDE_INT)
+     w = TREE_INT_CST_LOW (t);
+   else
+     {
+       w = TREE_INT_CST_HIGH (t);
+       bitno -= HOST_BITS_PER_WIDE_INT;
+     }
+ 
+   return (w >> bitno) & 1;
+ }
+ 
  /* Compare two constructor-element-type constants.  Return 1 if the lists
     are known to be equal; otherwise return 0.  */
  
Index: tree.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree.h,v
retrieving revision 1.675
diff -c -3 -p -r1.675 tree.h
*** tree.h	24 Dec 2004 05:23:08 -0000	1.675
--- tree.h	27 Dec 2004 01:35:10 -0000
*************** struct tree_common GTY(())
*** 379,384 ****
--- 379,386 ----
             all decls
         BIT_FIELD_REF_UNSIGNED in
             BIT_FIELD_REF
+        CHREC_NO_OVERFLOW in
+ 	   PLYNOMIAL_CHREC
  
     asm_written_flag:
  
*************** extern void tree_operand_check_failed (i
*** 946,957 ****
--- 948,974 ----
  #define BIT_FIELD_REF_UNSIGNED(NODE) \
    (BIT_FIELD_REF_CHECK (NODE)->common.unsigned_flag)
  
+ /* In a POLYNOMIAL_CHREC, means that the chrec cannot overflow.  */
+ #define CHREC_NO_OVERFLOW(NODE) \
+   (POLYNOMIAL_CHREC_CHECK (NODE)->common.unsigned_flag)
+ 
  /* In integral and pointer types, means an unsigned type.  */
  #define TYPE_UNSIGNED(NODE) (TYPE_CHECK (NODE)->common.unsigned_flag)
  
  #define TYPE_TRAP_SIGNED(NODE) \
    (flag_trapv && ! TYPE_UNSIGNED (NODE))
  
+ /* True if the arithmetics in TYPE does not overflow or the behavior on
+    overflow is undefined.
+    
+    This is true for pointer arithmetics and for signed arithmetics with
+    -fno-wrapv.  */
+ #define TYPE_ARITH_NO_OVERFLOW(TYPE) \
+   (POINTER_TYPE_P (TYPE) \
+    || (INTEGRAL_TYPE_P (TYPE) \
+        && !TYPE_UNSIGNED (TYPE) \
+        && !flag_wrapv))
+      
  /* Nonzero in a VAR_DECL means assembler code has been written.
     Nonzero in a FUNCTION_DECL means that the function has been compiled.
     This is interesting in an inline function, since it might not need
*************** extern int tree_int_cst_lt (tree, tree);
*** 2895,2900 ****
--- 2912,2918 ----
  extern int tree_int_cst_compare (tree, tree);
  extern int host_integerp (tree, int);
  extern HOST_WIDE_INT tree_low_cst (tree, int);
+ extern int tree_int_cst_sign_bit (tree);
  extern int tree_int_cst_msb (tree);
  extern int tree_int_cst_sgn (tree);
  extern int tree_expr_nonnegative_p (tree);
Follow-Ups:
- Re: Serious performance regression -- some tree optimizer questions
  - From: Sebastian Pop
- Re: Serious performance regression -- some tree optimizer questions
  - From: Sebastian Pop
- Re: Serious performance regression -- some tree optimizer questions
  - From: Ulrich Weigand
References:
- Re: Serious performance regression -- some tree optimizer questions
  - From: Zdenek Dvorak
- Re: Serious performance regression -- some tree optimizer questions
  - From: Ulrich Weigand
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]