This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: [PATCH][4.1] Fix PRs 27795, 27639, 26719 - wrong code with scev/loop

From: Zdenek Dvorak <rakdver at atrey dot karlin dot mff dot cuni dot cz>
To: Richard Guenther <rguenther at suse dot de>
Cc: gcc-patches at gcc dot gnu dot org
Date: Fri, 21 Jul 2006 16:07:54 +0200
Subject: Re: [PATCH][4.1] Fix PRs 27795, 27639, 26719 - wrong code with scev/loop
References: <Pine.LNX.4.64.0607211557100.8635@nyjnma.fhfr.qr>
Hello,

> This is a backport of a patch from mainline that has been sitting there
> since May 24.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu for all langs + ada.

if this patch is approved for 4.1, http://gcc.gnu.org/ml/gcc-patches/2006-07/msg00229.html
should probably go there as well.

Zdenek

> Ok for the branch?
> 
> Thanks,
> Richard.
> 
> 
> :ADDPATCH middle-end:
> 
> 2006-07-21  Zdenek Dvorak  <dvorakz@suse.cz>
> 	Richard Guenther  <rguenther@suse.de>
> 
> 	PR tree-optimization/27795
> 	PR tree-optimization/27639
> 	PR tree-optimization/26719
> 	* tree-vrp.c (adjust_range_with_scev): Use scev_direction and adjust
> 	call to scev_probably_wraps_p.
> 	* tree-ssa-loop-niter.c (compare_trees, convert_step_widening,
> 	used_in_pointer_arithmetic_p, convert_step): Removed.
> 	(nowrap_type_p): New function.
> 	(scev_probably_wraps_p): Rewritten.
> 	* tree-scalar-evolution.c (instantiate_parameters_1): Do not call
> 	chrec_convert if chrec_convert_aggressive might have been used.
> 	* tree-chrec.c (convert_affine_scev, chrec_convert_1,
> 	scev_direction): New functions.
> 	(chrec_convert): Changed to a wrapper over chrec_convert_1.
> 	* tree-ssa-loop-ivopts.c (idx_find_step): Use convert_affine_scev
> 	instead of convert_step.
> 	* tree-flow.h (scev_probably_wraps_p): Declaration changed.
> 	(convert_step): Declaration removed.
> 	(convert_affine_scev, nowrap_type_p, scev_direction): Declare.
> 
> 	* gcc.dg/pr27639.c: New test.
> 	* gcc.dg/pr26719.c: New test.
> 	* gcc.dg/tree-ssa/scev-cast.c: New test.
> 
> 
> Index: tree-vrp.c
> ===================================================================
> *** tree-vrp.c	(revision 115643)
> --- tree-vrp.c	(working copy)
> *************** adjust_range_with_scev (value_range_t *v
> *** 1760,1766 ****
>   			tree var)
>   {
>     tree init, step, chrec;
> !   bool init_is_max, unknown_max;
>   
>     /* TODO.  Don't adjust anti-ranges.  An anti-range may provide
>        better opportunities than a regular range, but I'm not sure.  */
> --- 1760,1766 ----
>   			tree var)
>   {
>     tree init, step, chrec;
> !   enum ev_direction dir;
>   
>     /* TODO.  Don't adjust anti-ranges.  An anti-range may provide
>        better opportunities than a regular range, but I'm not sure.  */
> *************** adjust_range_with_scev (value_range_t *v
> *** 1780,1790 ****
>         || !is_gimple_min_invariant (step))
>       return;
>   
> !   /* Do not adjust ranges when chrec may wrap.  */
> !   if (scev_probably_wraps_p (chrec_type (chrec), init, step, stmt,
> ! 			     cfg_loops->parray[CHREC_VARIABLE (chrec)],
> ! 			     &init_is_max, &unknown_max)
> !       || unknown_max)
>       return;
>   
>     if (!POINTER_TYPE_P (TREE_TYPE (init))
> --- 1780,1793 ----
>         || !is_gimple_min_invariant (step))
>       return;
>   
> !   dir = scev_direction (chrec);
> !   if (/* Do not adjust ranges if we do not know whether the iv increases
> ! 	 or decreases,  ... */
> !       dir == EV_DIR_UNKNOWN
> !       /* ... or if it may wrap.  */
> !       || scev_probably_wraps_p (init, step, stmt,
> ! 				cfg_loops->parray[CHREC_VARIABLE (chrec)],
> ! 				true))
>       return;
>   
>     if (!POINTER_TYPE_P (TREE_TYPE (init))
> *************** adjust_range_with_scev (value_range_t *v
> *** 1792,1798 ****
>       {
>         /* For VARYING or UNDEFINED ranges, just about anything we get
>   	 from scalar evolutions should be better.  */
> !       if (init_is_max)
>   	set_value_range (vr, VR_RANGE, TYPE_MIN_VALUE (TREE_TYPE (init)),
>   	                 init, vr->equiv);
>         else
> --- 1795,1801 ----
>       {
>         /* For VARYING or UNDEFINED ranges, just about anything we get
>   	 from scalar evolutions should be better.  */
> !       if (dir == EV_DIR_DECREASES)
>   	set_value_range (vr, VR_RANGE, TYPE_MIN_VALUE (TREE_TYPE (init)),
>   	                 init, vr->equiv);
>         else
> *************** adjust_range_with_scev (value_range_t *v
> *** 1804,1810 ****
>         tree min = vr->min;
>         tree max = vr->max;
>   
> !       if (init_is_max)
>   	{
>   	  /* INIT is the maximum value.  If INIT is lower than VR->MAX
>   	     but no smaller than VR->MIN, set VR->MAX to INIT.  */
> --- 1807,1813 ----
>         tree min = vr->min;
>         tree max = vr->max;
>   
> !       if (dir == EV_DIR_DECREASES)
>   	{
>   	  /* INIT is the maximum value.  If INIT is lower than VR->MAX
>   	     but no smaller than VR->MIN, set VR->MAX to INIT.  */
> Index: tree-ssa-loop-niter.c
> ===================================================================
> *** tree-ssa-loop-niter.c	(revision 115643)
> --- tree-ssa-loop-niter.c	(working copy)
> *************** estimate_numbers_of_iterations (struct l
> *** 1664,1696 ****
>       }
>   }
>   
> - /* If A > B, returns -1.  If A == B, returns 0.  If A < B, returns 1.
> -    If neither of these relations can be proved, returns 2.  */
> - 
> - static int
> - compare_trees (tree a, tree b)
> - {
> -   tree typea = TREE_TYPE (a), typeb = TREE_TYPE (b);
> -   tree type;
> - 
> -   if (TYPE_PRECISION (typea) > TYPE_PRECISION (typeb))
> -     type = typea;
> -   else
> -     type = typeb;
> - 
> -   a = fold_convert (type, a);
> -   b = fold_convert (type, b);
> - 
> -   if (nonzero_p (fold_binary (EQ_EXPR, boolean_type_node, a, b)))
> -     return 0;
> -   if (nonzero_p (fold_binary (LT_EXPR, boolean_type_node, a, b)))
> -     return 1;
> -   if (nonzero_p (fold_binary (GT_EXPR, boolean_type_node, a, b)))
> -     return -1;
> - 
> -   return 2;
> - }
> - 
>   /* Returns true if statement S1 dominates statement S2.  */
>   
>   static bool
> --- 1664,1669 ----
> *************** proved_non_wrapping_p (tree at_stmt,
> *** 1785,1907 ****
>     return false;
>   }
>   
> ! /* Checks whether it is correct to count the induction variable BASE +
> !    STEP * I at AT_STMT in a wider type NEW_TYPE, using the bounds on
> !    numbers of iterations of a LOOP.  If it is possible, return the
> !    value of step of the induction variable in the NEW_TYPE, otherwise
> !    return NULL_TREE.  */
> ! 
> ! static tree
> ! convert_step_widening (struct loop *loop, tree new_type, tree base, tree step,
> ! 		       tree at_stmt)
> ! {
> !   struct nb_iter_bound *bound;
> !   tree base_in_new_type, base_plus_step_in_new_type, step_in_new_type;
> !   tree delta, step_abs;
> !   tree unsigned_type, valid_niter;
> ! 
> !   /* Compute the new step.  For example, {(uchar) 100, +, (uchar) 240}
> !      is converted to {(uint) 100, +, (uint) 0xfffffff0} in order to
> !      keep the values of the induction variable unchanged: 100, 84, 68,
> !      ...
> ! 
> !      Another example is: (uint) {(uchar)100, +, (uchar)3} is converted
> !      to {(uint)100, +, (uint)3}.  
> ! 
> !      Before returning the new step, verify that the number of
> !      iterations is less than DELTA / STEP_ABS (i.e. in the previous
> !      example (256 - 100) / 3) such that the iv does not wrap (in which
> !      case the operations are too difficult to be represented and
> !      handled: the values of the iv should be taken modulo 256 in the
> !      wider type; this is not implemented).  */
> !   base_in_new_type = fold_convert (new_type, base);
> !   base_plus_step_in_new_type = 
> !     fold_convert (new_type,
> ! 		  fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step));
> !   step_in_new_type = fold_build2 (MINUS_EXPR, new_type,
> ! 				  base_plus_step_in_new_type,
> ! 				  base_in_new_type);
> ! 
> !   if (TREE_CODE (step_in_new_type) != INTEGER_CST)
> !     return NULL_TREE;
> ! 
> !   switch (compare_trees (base_plus_step_in_new_type, base_in_new_type))
> !     {
> !     case -1:
> !       {
> ! 	tree extreme = upper_bound_in_type (new_type, TREE_TYPE (base));
> ! 	delta = fold_build2 (MINUS_EXPR, new_type, extreme,
> ! 			     base_in_new_type);
> ! 	step_abs = step_in_new_type;
> ! 	break;
> !       }
> ! 
> !     case 1:
> !       {
> ! 	tree extreme = lower_bound_in_type (new_type, TREE_TYPE (base));
> ! 	delta = fold_build2 (MINUS_EXPR, new_type, base_in_new_type,
> ! 			     extreme);
> ! 	step_abs = fold_build1 (NEGATE_EXPR, new_type, step_in_new_type);
> ! 	break;
> !       }
>   
> !     case 0:
> !       return step_in_new_type;
> ! 
> !     default:
> !       return NULL_TREE;
> !     }
> ! 
> !   unsigned_type = unsigned_type_for (new_type);
> !   delta = fold_convert (unsigned_type, delta);
> !   step_abs = fold_convert (unsigned_type, step_abs);
> !   valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type,
> ! 			     delta, step_abs);
> ! 
> !   estimate_numbers_of_iterations_loop (loop);
> !   for (bound = loop->bounds; bound; bound = bound->next)
> !     if (proved_non_wrapping_p (at_stmt, bound, new_type, valid_niter))
> !       return step_in_new_type;
> ! 
> !   /* Fail when the loop has no bound estimations, or when no bound can
> !      be used for verifying the conversion.  */
> !   return NULL_TREE;
> ! }
> ! 
> ! /* Returns true when VAR is used in pointer arithmetics.  DEPTH is
> !    used for limiting the search.  */
> ! 
> ! static bool
> ! used_in_pointer_arithmetic_p (tree var, int depth)
>   {
> !   use_operand_p use_p;
> !   imm_use_iterator iter;
> ! 
> !   if (depth == 0
> !       || TREE_CODE (var) != SSA_NAME
> !       || !has_single_use (var))
> !     return false;
>   
> !   FOR_EACH_IMM_USE_FAST (use_p, iter, var)
> !     {
> !       tree stmt = USE_STMT (use_p);
>   
> -       if (stmt && TREE_CODE (stmt) == MODIFY_EXPR)
> - 	{
> - 	  tree rhs = TREE_OPERAND (stmt, 1);
> - 
> - 	  if (TREE_CODE (rhs) == NOP_EXPR
> - 	      || TREE_CODE (rhs) == CONVERT_EXPR)
> - 	    {
> - 	      if (POINTER_TYPE_P (TREE_TYPE (rhs)))
> - 		return true;
> - 	      return false;
> - 	    }
> - 	  else
> - 	    return used_in_pointer_arithmetic_p (TREE_OPERAND (stmt, 0),
> - 						 depth - 1);
> - 	}
> -     }
>     return false;
>   }
>   
> --- 1758,1776 ----
>     return false;
>   }
>   
> ! /* Returns true if the arithmetics in TYPE can be assumed not to wrap.  */
>   
> ! bool
> ! nowrap_type_p (tree type)
>   {
> !   if (!flag_wrapv
> !       && INTEGRAL_TYPE_P (type)
> !       && !TYPE_UNSIGNED (type))
> !     return true;
>   
> !   if (POINTER_TYPE_P (type))
> !     return true;
>   
>     return false;
>   }
>   
> *************** used_in_pointer_arithmetic_p (tree var, 
> *** 1910,2065 ****
>      enough with respect to the step and initial condition in order to
>      keep the evolution confined in TYPEs bounds.  Return true when the
>      iv is known to overflow or when the property is not computable.
> ! 
> !    Initialize INIT_IS_MAX to true when the evolution goes from
> !    INIT_IS_MAX to LOWER_BOUND_IN_TYPE, false in the contrary case.
> !    When this property cannot be determined, UNKNOWN_MAX is set to
> !    true.  */
>   
>   bool
> ! scev_probably_wraps_p (tree type, tree base, tree step, 
>   		       tree at_stmt, struct loop *loop,
> ! 		       bool *init_is_max, bool *unknown_max)
>   {
>     struct nb_iter_bound *bound;
>     tree delta, step_abs;
>     tree unsigned_type, valid_niter;
> !   tree base_plus_step, bpsps;
> !   int cps, cpsps;
>   
> !   /* FIXME: The following code will not be used anymore once
> !      http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html is
> !      committed.
> ! 
> !      If AT_STMT is a cast to unsigned that is later used for
> !      referencing a memory location, it is followed by a pointer
> !      conversion just after.  Because pointers do not wrap, the
> !      sequences that reference the memory do not wrap either.  In the
> !      following example, sequences corresponding to D_13 and to D_14
> !      can be proved to not wrap because they are used for computing a
> !      memory access:
>   	 
>          D.1621_13 = (long unsigned intD.4) D.1620_12;
>          D.1622_14 = D.1621_13 * 8;
>          D.1623_15 = (doubleD.29 *) D.1622_14;
> !   */
> !   if (at_stmt && TREE_CODE (at_stmt) == MODIFY_EXPR)
> !     {
> !       tree op0 = TREE_OPERAND (at_stmt, 0);
> !       tree op1 = TREE_OPERAND (at_stmt, 1);
> !       tree type_op1 = TREE_TYPE (op1);
> ! 
> !       if ((TYPE_UNSIGNED (type_op1)
> ! 	   && used_in_pointer_arithmetic_p (op0, 2))
> ! 	  || POINTER_TYPE_P (type_op1))
> ! 	{
> ! 	  *unknown_max = true;
> ! 	  return false;
> ! 	}
> !     }
>   
>     if (chrec_contains_undetermined (base)
>         || chrec_contains_undetermined (step)
> !       || TREE_CODE (base) == REAL_CST
> !       || TREE_CODE (step) == REAL_CST)
> !     {
> !       *unknown_max = true;
> !       return true;
> !     }
> ! 
> !   *unknown_max = false;
> !   base_plus_step = fold_build2 (PLUS_EXPR, type, base, step);
> !   bpsps = fold_build2 (PLUS_EXPR, type, base_plus_step, step);
> !   cps = compare_trees (base_plus_step, base);
> !   cpsps = compare_trees (bpsps, base_plus_step);
> ! 
> !   /* Check that the sequence is not wrapping in the first step: it
> !      should have the same monotonicity for the first two steps.  See
> !      PR23410.  */
> !   if (cps != cpsps)
>       return true;
>   
> !   switch (cps)
> !     {
> !     case -1:
> !       {
> ! 	tree extreme = upper_bound_in_type (type, TREE_TYPE (base));
> ! 	delta = fold_build2 (MINUS_EXPR, type, extreme, base);
> ! 	step_abs = step;
> ! 	*init_is_max = false;
> ! 	break;
> !       }
> ! 
> !     case 1:
> !       {
> ! 	tree extreme = lower_bound_in_type (type, TREE_TYPE (base));
> ! 	delta = fold_build2 (MINUS_EXPR, type, base, extreme);
> ! 	step_abs = fold_build1 (NEGATE_EXPR, type, step);
> ! 	*init_is_max = true;
> ! 	break;
> !       }
> ! 
> !     case 0:
> !       /* This means step is equal to 0.  This should not happen.  It
> ! 	 could happen in convert step, but not here.  Safely answer
> ! 	 don't know as in the default case.  */
> ! 
> !     default:
> !       *unknown_max = true;
> !       return true;
> !     }
>   
> !   /* If AT_STMT represents a cast operation, we may not be able to
> !      take advantage of the undefinedness of signed type evolutions.
>   
> !      implement-c.texi states: "For conversion to a type of width
> !      N, the value is reduced modulo 2^N to be within range of the
> !      type;"
> ! 
> !      See PR 21959 for a test case.  Essentially, given a cast
> !      operation
> !      		unsigned char uc;
> ! 		signed char sc;
> ! 		...
> !      		sc = (signed char) uc;
> ! 		if (sc < 0)
> ! 		  ...
> ! 
> !      where uc and sc have the scev {0, +, 1}, we would consider uc to
> !      wrap around, but not sc, because it is of a signed type.  This
> !      causes VRP to erroneously fold the predicate above because it
> !      thinks that sc cannot be negative.  */
> !   if (at_stmt && TREE_CODE (at_stmt) == MODIFY_EXPR)
> !     {
> !       tree rhs = TREE_OPERAND (at_stmt, 1);
> !       tree outer_t = TREE_TYPE (rhs);
>   
> !       if (!TYPE_UNSIGNED (outer_t)
> ! 	  && (TREE_CODE (rhs) == NOP_EXPR || TREE_CODE (rhs) == CONVERT_EXPR))
> ! 	{
> ! 	  tree inner_t = TREE_TYPE (TREE_OPERAND (rhs, 0));
> ! 
> ! 	  /* If the inner type is unsigned and its size and/or
> ! 	     precision are smaller to that of the outer type, then the
> ! 	     expression may wrap around.  */
> ! 	  if (TYPE_UNSIGNED (inner_t)
> ! 	      && (TYPE_SIZE (inner_t) <= TYPE_SIZE (outer_t)
> ! 		  || TYPE_PRECISION (inner_t) <= TYPE_PRECISION (outer_t)))
> ! 	    {
> ! 	      *unknown_max = true;
> ! 	      return true;
> ! 	    }
> ! 	}
>       }
>   
> -   /* After having set INIT_IS_MAX, we can return false: when not using
> -      wrapping arithmetic, signed types don't wrap.  */
> -   if (!flag_wrapv && !TYPE_UNSIGNED (type))
> -     return false;
> - 
> -   unsigned_type = unsigned_type_for (type);
> -   delta = fold_convert (unsigned_type, delta);
> -   step_abs = fold_convert (unsigned_type, step_abs);
>     valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
>   
>     estimate_numbers_of_iterations_loop (loop);
> --- 1779,1850 ----
>      enough with respect to the step and initial condition in order to
>      keep the evolution confined in TYPEs bounds.  Return true when the
>      iv is known to overflow or when the property is not computable.
> !  
> !    USE_OVERFLOW_SEMANTICS is true if this function should assume that
> !    the rules for overflow of the given language apply (e.g., that signed
> !    arithmetics in C does not overflow).  */
>   
>   bool
> ! scev_probably_wraps_p (tree base, tree step, 
>   		       tree at_stmt, struct loop *loop,
> ! 		       bool use_oveflow_semantics)
>   {
>     struct nb_iter_bound *bound;
>     tree delta, step_abs;
>     tree unsigned_type, valid_niter;
> !   tree type = TREE_TYPE (step);
> ! 
> !   /* FIXME: We really need something like
> !      http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
>   
> !      We used to test for the following situation that frequently appears
> !      during address arithmetics:
>   	 
>          D.1621_13 = (long unsigned intD.4) D.1620_12;
>          D.1622_14 = D.1621_13 * 8;
>          D.1623_15 = (doubleD.29 *) D.1622_14;
> ! 
> !      And derived that the sequence corresponding to D_14
> !      can be proved to not wrap because it is used for computing a
> !      memory access; however, this is not really the case -- for example,
> !      if D_12 = (unsigned char) [254,+,1], then D_14 has values
> !      2032, 2040, 0, 8, ..., but the code is still legal.  */
>   
>     if (chrec_contains_undetermined (base)
>         || chrec_contains_undetermined (step)
> !       || TREE_CODE (step) != INTEGER_CST)
>       return true;
>   
> !   if (zero_p (step))
> !     return false;
>   
> !   /* If we can use the fact that signed and pointer arithmetics does not
> !      wrap, we are done.  */
> !   if (use_oveflow_semantics && nowrap_type_p (type))
> !     return false;
>   
> !   /* Otherwise, compute the number of iterations before we reach the
> !      bound of the type, and verify that the loop is exited before this
> !      occurs.  */
> !   unsigned_type = unsigned_type_for (type);
> !   base = fold_convert (unsigned_type, base);
>   
> !   if (tree_int_cst_sign_bit (step))
> !     {
> !       tree extreme = fold_convert (unsigned_type,
> ! 				   lower_bound_in_type (type, type));
> !       delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme);
> !       step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
> ! 			      fold_convert (unsigned_type, step));
> !     }
> !   else
> !     {
> !       tree extreme = fold_convert (unsigned_type,
> ! 				   upper_bound_in_type (type, type));
> !       delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base);
> !       step_abs = fold_convert (unsigned_type, step);
>       }
>   
>     valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
>   
>     estimate_numbers_of_iterations_loop (loop);
> *************** scev_probably_wraps_p (tree type, tree b
> *** 2069,2104 ****
>   
>     /* At this point we still don't have a proof that the iv does not
>        overflow: give up.  */
> -   *unknown_max = true;
>     return true;
>   }
>   
> - /* Return the conversion to NEW_TYPE of the STEP of an induction
> -    variable BASE + STEP * I at AT_STMT.  When it fails, return
> -    NULL_TREE.  */
> - 
> - tree
> - convert_step (struct loop *loop, tree new_type, tree base, tree step,
> - 	      tree at_stmt)
> - {
> -   tree base_type;
> - 
> -   if (chrec_contains_undetermined (base)
> -       || chrec_contains_undetermined (step))
> -     return NULL_TREE;
> - 
> -   base_type = TREE_TYPE (base);
> - 
> -   /* When not using wrapping arithmetic, signed types don't wrap.  */
> -   if (!flag_wrapv && !TYPE_UNSIGNED (base_type))
> -     return fold_convert (new_type, step);
> - 
> -   if (TYPE_PRECISION (new_type) > TYPE_PRECISION (base_type))
> -     return convert_step_widening (loop, new_type, base, step, at_stmt);
> - 
> -   return fold_convert (new_type, step);
> - }
> - 
>   /* Frees the information on upper bounds on numbers of iterations of LOOP.  */
>   
>   void
> --- 1854,1862 ----
> Index: tree-scalar-evolution.c
> ===================================================================
> *** tree-scalar-evolution.c	(revision 115643)
> --- tree-scalar-evolution.c	(working copy)
> *************** instantiate_parameters_1 (struct loop *l
> *** 2113,2118 ****
> --- 2113,2124 ----
>         if (op0 == TREE_OPERAND (chrec, 0))
>   	return chrec;
>   
> +       /* If we used chrec_convert_aggressive, we can no longer assume that
> + 	 signed chrecs do not overflow, as chrec_convert does, so avoid
> +          calling it in that case.  */
> +       if (flags & FOLD_CONVERSIONS)
> + 	return fold_convert (TREE_TYPE (chrec), op0);
> + 
>         return chrec_convert (TREE_TYPE (chrec), op0, NULL_TREE);
>   
>       case SCEV_NOT_KNOWN:
> Index: tree-chrec.c
> ===================================================================
> *** tree-chrec.c	(revision 115643)
> --- tree-chrec.c	(working copy)
> *************** nb_vars_in_chrec (tree chrec)
> *** 1082,1087 ****
> --- 1082,1184 ----
>       }
>   }
>   
> + static tree chrec_convert_1 (tree, tree, tree, bool);
> + 
> + /* Converts BASE and STEP of affine scev to TYPE.  LOOP is the loop whose iv
> +    the scev corresponds to.  AT_STMT is the statement at that the scev is
> +    evaluated.  USE_OVERFLOW_SEMANTICS is true if this function should assume that
> +    the rules for overflow of the given language apply (e.g., that signed
> +    arithmetics in C does not overflow) -- i.e., to use them to avoid unnecessary
> +    tests, but also to enforce that the result follows them.  Returns true if the
> +    conversion succeeded, false otherwise.  */
> + 
> + bool
> + convert_affine_scev (struct loop *loop, tree type,
> + 		     tree *base, tree *step, tree at_stmt,
> + 		     bool use_overflow_semantics)
> + {
> +   tree ct = TREE_TYPE (*step);
> +   bool enforce_overflow_semantics;
> +   bool must_check_src_overflow, must_check_rslt_overflow;
> +   tree new_base, new_step;
> + 
> +   /* In general,
> +      (TYPE) (BASE + STEP * i) = (TYPE) BASE + (TYPE -- sign extend) STEP * i,
> +      but we must check some assumptions.
> +      
> +      1) If [BASE, +, STEP] wraps, the equation is not valid when precision
> +         of CT is smaller than the precision of TYPE.  For example, when we
> + 	cast unsigned char [254, +, 1] to unsigned, the values on left side
> + 	are 254, 255, 0, 1, ..., but those on the right side are
> + 	254, 255, 256, 257, ...
> +      2) In case that we must also preserve the fact that signed ivs do not
> +         overflow, we must additionally check that the new iv does not wrap.
> + 	For example, unsigned char [125, +, 1] casted to signed char could
> + 	become a wrapping variable with values 125, 126, 127, -128, -127, ...,
> + 	which would confuse optimizers that assume that this does not
> + 	happen.  */
> +   must_check_src_overflow = TYPE_PRECISION (ct) < TYPE_PRECISION (type);
> + 
> +   enforce_overflow_semantics = (use_overflow_semantics
> + 				&& nowrap_type_p (type));
> +   if (enforce_overflow_semantics)
> +     {
> +       /* We can avoid checking whether the result overflows in the following
> + 	 cases:
> + 
> + 	 -- must_check_src_overflow is true, and the range of TYPE is superset
> + 	    of the range of CT -- i.e., in all cases except if CT signed and
> + 	    TYPE unsigned.
> +          -- both CT and TYPE have the same precision and signedness.  */
> +       if (must_check_src_overflow)
> + 	{
> + 	  if (TYPE_UNSIGNED (type) && !TYPE_UNSIGNED (ct))
> + 	    must_check_rslt_overflow = true;
> + 	  else
> + 	    must_check_rslt_overflow = false;
> + 	}
> +       else if (TYPE_UNSIGNED (ct) == TYPE_UNSIGNED (type)
> + 	       && TYPE_PRECISION (ct) == TYPE_PRECISION (type))
> + 	must_check_rslt_overflow = false;
> +       else
> + 	must_check_rslt_overflow = true;
> +     }
> +   else
> +     must_check_rslt_overflow = false;
> + 
> +   if (must_check_src_overflow
> +       && scev_probably_wraps_p (*base, *step, at_stmt, loop,
> + 				use_overflow_semantics))
> +     return false;
> + 
> +   new_base = chrec_convert_1 (type, *base, at_stmt,
> + 			      use_overflow_semantics);
> +   /* The step must be sign extended, regardless of the signedness
> +      of CT and TYPE.  This only needs to be handled specially when
> +      CT is unsigned -- to avoid e.g. unsigned char [100, +, 255]
> +      (with values 100, 99, 98, ...) from becoming signed or unsigned
> +      [100, +, 255] with values 100, 355, ...; the sign-extension is 
> +      performed by default when CT is signed.  */
> +   new_step = *step;
> +   if (TYPE_PRECISION (type) > TYPE_PRECISION (ct) && TYPE_UNSIGNED (ct))
> +     new_step = chrec_convert_1 (signed_type_for (ct), new_step, at_stmt,
> + 				use_overflow_semantics);
> +   new_step = chrec_convert_1 (type, new_step, at_stmt, use_overflow_semantics);
> + 
> +   if (automatically_generated_chrec_p (new_base)
> +       || automatically_generated_chrec_p (new_step))
> +     return false;
> + 
> +   if (must_check_rslt_overflow
> +       /* Note that in this case we cannot use the fact that signed variables
> + 	 do not overflow, as this is what we are verifying for the new iv.  */
> +       && scev_probably_wraps_p (new_base, new_step, at_stmt, loop, false))
> +     return false;
> + 
> +   *base = new_base;
> +   *step = new_step;
> +   return true;
> + }
>   
>   
>   /* Convert CHREC to TYPE.  When the analyzer knows the context in
> *************** nb_vars_in_chrec (tree chrec)
> *** 1111,1117 ****
> --- 1208,1235 ----
>   tree 
>   chrec_convert (tree type, tree chrec, tree at_stmt)
>   {
> +   return chrec_convert_1 (type, chrec, at_stmt, true);
> + }
> + 
> + /* Convert CHREC to TYPE.  When the analyzer knows the context in
> +    which the CHREC is built, it sets AT_STMT to the statement that
> +    contains the definition of the analyzed variable, otherwise the
> +    conversion is less accurate: the information is used for
> +    determining a more accurate estimation of the number of iterations.
> +    By default AT_STMT could be safely set to NULL_TREE.
> +  
> +    USE_OVERFLOW_SEMANTICS is true if this function should assume that
> +    the rules for overflow of the given language apply (e.g., that signed
> +    arithmetics in C does not overflow) -- i.e., to use them to avoid unnecessary
> +    tests, but also to enforce that the result follows them.  */
> + 
> + static tree 
> + chrec_convert_1 (tree type, tree chrec, tree at_stmt,
> + 		 bool use_overflow_semantics)
> + {
>     tree ct, res;
> +   tree base, step;
> +   struct loop *loop;
>   
>     if (automatically_generated_chrec_p (chrec))
>       return chrec;
> *************** chrec_convert (tree type, tree chrec, tr
> *** 1120,1175 ****
>     if (ct == type)
>       return chrec;
>   
> !   if (evolution_function_is_affine_p (chrec))
> !     {
> !       tree base, step;
> !       bool dummy;
> !       struct loop *loop = current_loops->parray[CHREC_VARIABLE (chrec)];
> ! 
> !       base = instantiate_parameters (loop, CHREC_LEFT (chrec));
> !       step = instantiate_parameters (loop, CHREC_RIGHT (chrec));
> ! 
> !       /* Avoid conversion of (signed char) {(uchar)1, +, (uchar)1}_x
> ! 	 when it is not possible to prove that the scev does not wrap.
> ! 	 See PR22236, where a sequence 1, 2, ..., 255 has to be
> ! 	 converted to signed char, but this would wrap: 
> ! 	 1, 2, ..., 127, -128, ...  The result should not be
> ! 	 {(schar)1, +, (schar)1}_x, but instead, we should keep the
> ! 	 conversion: (schar) {(uchar)1, +, (uchar)1}_x.  */
> !       if (scev_probably_wraps_p (type, base, step, at_stmt, loop,
> ! 				 &dummy, &dummy))
> ! 	goto failed_to_convert;
> ! 
> !       step = convert_step (loop, type, base, step, at_stmt);
> !       if (!step)
> !  	{
> ! 	failed_to_convert:;
> ! 	  if (dump_file && (dump_flags & TDF_DETAILS))
> ! 	    {
> ! 	      fprintf (dump_file, "(failed conversion:");
> ! 	      fprintf (dump_file, "\n  type: ");
> ! 	      print_generic_expr (dump_file, type, 0);
> ! 	      fprintf (dump_file, "\n  base: ");
> ! 	      print_generic_expr (dump_file, base, 0);
> ! 	      fprintf (dump_file, "\n  step: ");
> ! 	      print_generic_expr (dump_file, step, 0);
> ! 	      fprintf (dump_file, "\n  estimated_nb_iterations: ");
> ! 	      print_generic_expr (dump_file, loop->estimated_nb_iterations, 0);
> ! 	      fprintf (dump_file, "\n)\n");
> ! 	    }
> ! 
> ! 	  return fold_convert (type, chrec);
> ! 	}
> ! 
> !       return build_polynomial_chrec (CHREC_VARIABLE (chrec),
> !  				     chrec_convert (type, CHREC_LEFT (chrec),
> !  						    at_stmt),
> !  				     step);
> !     }
>   
> !   if (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
> !     return chrec_dont_know;
>   
>     res = fold_convert (type, chrec);
>   
>     /* Don't propagate overflows.  */
> --- 1238,1256 ----
>     if (ct == type)
>       return chrec;
>   
> !   if (!evolution_function_is_affine_p (chrec))
> !     goto keep_cast;
>   
> !   loop = current_loops->parray[CHREC_VARIABLE (chrec)];
> !   base = CHREC_LEFT (chrec);
> !   step = CHREC_RIGHT (chrec);
> ! 
> !   if (convert_affine_scev (loop, type, &base, &step, at_stmt,
> ! 			   use_overflow_semantics))
> !     return build_polynomial_chrec (loop->num, base, step);
>   
> +   /* If we cannot propagate the cast inside the chrec, just keep the cast.  */
> + keep_cast:
>     res = fold_convert (type, chrec);
>   
>     /* Don't propagate overflows.  */
> *************** chrec_type (tree chrec)
> *** 1232,1234 ****
> --- 1313,1337 ----
>     
>     return TREE_TYPE (chrec);
>   }
> + 
> + /* Returns EV_GROWS if CHREC grows (assuming that it does not overflow),
> +    EV_DECREASES if it decreases, and EV_UNKNOWN if we cannot determine
> +    which of these cases happens.  */
> + 
> + enum ev_direction
> + scev_direction (tree chrec)
> + {
> +   tree step;
> + 
> +   if (!evolution_function_is_affine_p (chrec))
> +     return EV_DIR_UNKNOWN;
> + 
> +   step = CHREC_RIGHT (chrec);
> +   if (TREE_CODE (step) != INTEGER_CST)
> +     return EV_DIR_UNKNOWN;
> + 
> +   if (tree_int_cst_sign_bit (step))
> +     return EV_DIR_DECREASES;
> +   else
> +     return EV_DIR_GROWS;
> + }
> Index: tree-ssa-loop-ivopts.c
> ===================================================================
> *** tree-ssa-loop-ivopts.c	(revision 115643)
> --- tree-ssa-loop-ivopts.c	(working copy)
> *************** idx_find_step (tree base, tree *idx, voi
> *** 1385,1391 ****
>   {
>     struct ifs_ivopts_data *dta = data;
>     struct iv *iv;
> !   tree step, iv_step, lbound, off;
>     struct loop *loop = dta->ivopts_data->current_loop;
>   
>     if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
> --- 1385,1391 ----
>   {
>     struct ifs_ivopts_data *dta = data;
>     struct iv *iv;
> !   tree step, iv_base, iv_step, lbound, off;
>     struct loop *loop = dta->ivopts_data->current_loop;
>   
>     if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
> *************** idx_find_step (tree base, tree *idx, voi
> *** 1438,1449 ****
>       /* The step for pointer arithmetics already is 1 byte.  */
>       step = build_int_cst (sizetype, 1);
>   
> !   /* FIXME: convert_step should not be used outside chrec_convert: fix
> !      this by calling chrec_convert.  */
> !   iv_step = convert_step (dta->ivopts_data->current_loop,
> ! 			  sizetype, iv->base, iv->step, dta->stmt);
> ! 
> !   if (!iv_step)
>       {
>         /* The index might wrap.  */
>         return false;
> --- 1438,1448 ----
>       /* The step for pointer arithmetics already is 1 byte.  */
>       step = build_int_cst (sizetype, 1);
>   
> !   iv_base = iv->base;
> !   iv_step = iv->step;
> !   if (!convert_affine_scev (dta->ivopts_data->current_loop,
> ! 			    sizetype, &iv_base, &iv_step, dta->stmt,
> ! 			    false))
>       {
>         /* The index might wrap.  */
>         return false;
> Index: tree-flow.h
> ===================================================================
> *** tree-flow.h	(revision 115643)
> --- tree-flow.h	(working copy)
> *************** tree find_loop_niter (struct loop *, edg
> *** 735,743 ****
>   tree loop_niter_by_eval (struct loop *, edge);
>   tree find_loop_niter_by_eval (struct loop *, edge *);
>   void estimate_numbers_of_iterations (struct loops *);
> ! bool scev_probably_wraps_p (tree, tree, tree, tree, struct loop *, bool *,
> ! 			    bool *);
> ! tree convert_step (struct loop *, tree, tree, tree, tree);
>   void free_numbers_of_iterations_estimates (struct loops *);
>   void free_numbers_of_iterations_estimates_loop (struct loop *);
>   void rewrite_into_loop_closed_ssa (bitmap, unsigned);
> --- 735,747 ----
>   tree loop_niter_by_eval (struct loop *, edge);
>   tree find_loop_niter_by_eval (struct loop *, edge *);
>   void estimate_numbers_of_iterations (struct loops *);
> ! bool scev_probably_wraps_p (tree, tree, tree, struct loop *, bool);
> ! bool convert_affine_scev (struct loop *, tree, tree *, tree *, tree, bool);
> ! 
> ! bool nowrap_type_p (tree);
> ! enum ev_direction {EV_DIR_GROWS, EV_DIR_DECREASES, EV_DIR_UNKNOWN};
> ! enum ev_direction scev_direction (tree);
> ! 
>   void free_numbers_of_iterations_estimates (struct loops *);
>   void free_numbers_of_iterations_estimates_loop (struct loop *);
>   void rewrite_into_loop_closed_ssa (bitmap, unsigned);
Follow-Ups:
- Re: [PATCH][4.1] Fix PRs 27795, 27639, 26719 - wrong code with scev/loop
  - From: Mark Mitchell
References:
- [PATCH][4.1] Fix PRs 27795, 27639, 26719 - wrong code with scev/loop
  - From: Richard Guenther
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]