This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH][4.1] Fix PRs 27795, 27639, 26719 - wrong code with scev/loop
- From: Zdenek Dvorak <rakdver at atrey dot karlin dot mff dot cuni dot cz>
- To: Richard Guenther <rguenther at suse dot de>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Fri, 21 Jul 2006 16:07:54 +0200
- Subject: Re: [PATCH][4.1] Fix PRs 27795, 27639, 26719 - wrong code with scev/loop
- References: <Pine.LNX.4.64.0607211557100.8635@nyjnma.fhfr.qr>
Hello,
> This is a backport of a patch from mainline that has been sitting there
> since May 24.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu for all langs + ada.
if this patch is approved for 4.1, http://gcc.gnu.org/ml/gcc-patches/2006-07/msg00229.html
should probably go there as well.
Zdenek
> Ok for the branch?
>
> Thanks,
> Richard.
>
>
> :ADDPATCH middle-end:
>
> 2006-07-21 Zdenek Dvorak <dvorakz@suse.cz>
> Richard Guenther <rguenther@suse.de>
>
> PR tree-optimization/27795
> PR tree-optimization/27639
> PR tree-optimization/26719
> * tree-vrp.c (adjust_range_with_scev): Use scev_direction and adjust
> call to scev_probably_wraps_p.
> * tree-ssa-loop-niter.c (compare_trees, convert_step_widening,
> used_in_pointer_arithmetic_p, convert_step): Removed.
> (nowrap_type_p): New function.
> (scev_probably_wraps_p): Rewritten.
> * tree-scalar-evolution.c (instantiate_parameters_1): Do not call
> chrec_convert if chrec_convert_aggressive might have been used.
> * tree-chrec.c (convert_affine_scev, chrec_convert_1,
> scev_direction): New functions.
> (chrec_convert): Changed to a wrapper over chrec_convert_1.
> * tree-ssa-loop-ivopts.c (idx_find_step): Use convert_affine_scev
> instead of convert_step.
> * tree-flow.h (scev_probably_wraps_p): Declaration changed.
> (convert_step): Declaration removed.
> (convert_affine_scev, nowrap_type_p, scev_direction): Declare.
>
> * gcc.dg/pr27639.c: New test.
> * gcc.dg/pr26719.c: New test.
> * gcc.dg/tree-ssa/scev-cast.c: New test.
>
>
> Index: tree-vrp.c
> ===================================================================
> *** tree-vrp.c (revision 115643)
> --- tree-vrp.c (working copy)
> *************** adjust_range_with_scev (value_range_t *v
> *** 1760,1766 ****
> tree var)
> {
> tree init, step, chrec;
> ! bool init_is_max, unknown_max;
>
> /* TODO. Don't adjust anti-ranges. An anti-range may provide
> better opportunities than a regular range, but I'm not sure. */
> --- 1760,1766 ----
> tree var)
> {
> tree init, step, chrec;
> ! enum ev_direction dir;
>
> /* TODO. Don't adjust anti-ranges. An anti-range may provide
> better opportunities than a regular range, but I'm not sure. */
> *************** adjust_range_with_scev (value_range_t *v
> *** 1780,1790 ****
> || !is_gimple_min_invariant (step))
> return;
>
> ! /* Do not adjust ranges when chrec may wrap. */
> ! if (scev_probably_wraps_p (chrec_type (chrec), init, step, stmt,
> ! cfg_loops->parray[CHREC_VARIABLE (chrec)],
> ! &init_is_max, &unknown_max)
> ! || unknown_max)
> return;
>
> if (!POINTER_TYPE_P (TREE_TYPE (init))
> --- 1780,1793 ----
> || !is_gimple_min_invariant (step))
> return;
>
> ! dir = scev_direction (chrec);
> ! if (/* Do not adjust ranges if we do not know whether the iv increases
> ! or decreases, ... */
> ! dir == EV_DIR_UNKNOWN
> ! /* ... or if it may wrap. */
> ! || scev_probably_wraps_p (init, step, stmt,
> ! cfg_loops->parray[CHREC_VARIABLE (chrec)],
> ! true))
> return;
>
> if (!POINTER_TYPE_P (TREE_TYPE (init))
> *************** adjust_range_with_scev (value_range_t *v
> *** 1792,1798 ****
> {
> /* For VARYING or UNDEFINED ranges, just about anything we get
> from scalar evolutions should be better. */
> ! if (init_is_max)
> set_value_range (vr, VR_RANGE, TYPE_MIN_VALUE (TREE_TYPE (init)),
> init, vr->equiv);
> else
> --- 1795,1801 ----
> {
> /* For VARYING or UNDEFINED ranges, just about anything we get
> from scalar evolutions should be better. */
> ! if (dir == EV_DIR_DECREASES)
> set_value_range (vr, VR_RANGE, TYPE_MIN_VALUE (TREE_TYPE (init)),
> init, vr->equiv);
> else
> *************** adjust_range_with_scev (value_range_t *v
> *** 1804,1810 ****
> tree min = vr->min;
> tree max = vr->max;
>
> ! if (init_is_max)
> {
> /* INIT is the maximum value. If INIT is lower than VR->MAX
> but no smaller than VR->MIN, set VR->MAX to INIT. */
> --- 1807,1813 ----
> tree min = vr->min;
> tree max = vr->max;
>
> ! if (dir == EV_DIR_DECREASES)
> {
> /* INIT is the maximum value. If INIT is lower than VR->MAX
> but no smaller than VR->MIN, set VR->MAX to INIT. */
> Index: tree-ssa-loop-niter.c
> ===================================================================
> *** tree-ssa-loop-niter.c (revision 115643)
> --- tree-ssa-loop-niter.c (working copy)
> *************** estimate_numbers_of_iterations (struct l
> *** 1664,1696 ****
> }
> }
>
> - /* If A > B, returns -1. If A == B, returns 0. If A < B, returns 1.
> - If neither of these relations can be proved, returns 2. */
> -
> - static int
> - compare_trees (tree a, tree b)
> - {
> - tree typea = TREE_TYPE (a), typeb = TREE_TYPE (b);
> - tree type;
> -
> - if (TYPE_PRECISION (typea) > TYPE_PRECISION (typeb))
> - type = typea;
> - else
> - type = typeb;
> -
> - a = fold_convert (type, a);
> - b = fold_convert (type, b);
> -
> - if (nonzero_p (fold_binary (EQ_EXPR, boolean_type_node, a, b)))
> - return 0;
> - if (nonzero_p (fold_binary (LT_EXPR, boolean_type_node, a, b)))
> - return 1;
> - if (nonzero_p (fold_binary (GT_EXPR, boolean_type_node, a, b)))
> - return -1;
> -
> - return 2;
> - }
> -
> /* Returns true if statement S1 dominates statement S2. */
>
> static bool
> --- 1664,1669 ----
> *************** proved_non_wrapping_p (tree at_stmt,
> *** 1785,1907 ****
> return false;
> }
>
> ! /* Checks whether it is correct to count the induction variable BASE +
> ! STEP * I at AT_STMT in a wider type NEW_TYPE, using the bounds on
> ! numbers of iterations of a LOOP. If it is possible, return the
> ! value of step of the induction variable in the NEW_TYPE, otherwise
> ! return NULL_TREE. */
> !
> ! static tree
> ! convert_step_widening (struct loop *loop, tree new_type, tree base, tree step,
> ! tree at_stmt)
> ! {
> ! struct nb_iter_bound *bound;
> ! tree base_in_new_type, base_plus_step_in_new_type, step_in_new_type;
> ! tree delta, step_abs;
> ! tree unsigned_type, valid_niter;
> !
> ! /* Compute the new step. For example, {(uchar) 100, +, (uchar) 240}
> ! is converted to {(uint) 100, +, (uint) 0xfffffff0} in order to
> ! keep the values of the induction variable unchanged: 100, 84, 68,
> ! ...
> !
> ! Another example is: (uint) {(uchar)100, +, (uchar)3} is converted
> ! to {(uint)100, +, (uint)3}.
> !
> ! Before returning the new step, verify that the number of
> ! iterations is less than DELTA / STEP_ABS (i.e. in the previous
> ! example (256 - 100) / 3) such that the iv does not wrap (in which
> ! case the operations are too difficult to be represented and
> ! handled: the values of the iv should be taken modulo 256 in the
> ! wider type; this is not implemented). */
> ! base_in_new_type = fold_convert (new_type, base);
> ! base_plus_step_in_new_type =
> ! fold_convert (new_type,
> ! fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step));
> ! step_in_new_type = fold_build2 (MINUS_EXPR, new_type,
> ! base_plus_step_in_new_type,
> ! base_in_new_type);
> !
> ! if (TREE_CODE (step_in_new_type) != INTEGER_CST)
> ! return NULL_TREE;
> !
> ! switch (compare_trees (base_plus_step_in_new_type, base_in_new_type))
> ! {
> ! case -1:
> ! {
> ! tree extreme = upper_bound_in_type (new_type, TREE_TYPE (base));
> ! delta = fold_build2 (MINUS_EXPR, new_type, extreme,
> ! base_in_new_type);
> ! step_abs = step_in_new_type;
> ! break;
> ! }
> !
> ! case 1:
> ! {
> ! tree extreme = lower_bound_in_type (new_type, TREE_TYPE (base));
> ! delta = fold_build2 (MINUS_EXPR, new_type, base_in_new_type,
> ! extreme);
> ! step_abs = fold_build1 (NEGATE_EXPR, new_type, step_in_new_type);
> ! break;
> ! }
>
> ! case 0:
> ! return step_in_new_type;
> !
> ! default:
> ! return NULL_TREE;
> ! }
> !
> ! unsigned_type = unsigned_type_for (new_type);
> ! delta = fold_convert (unsigned_type, delta);
> ! step_abs = fold_convert (unsigned_type, step_abs);
> ! valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type,
> ! delta, step_abs);
> !
> ! estimate_numbers_of_iterations_loop (loop);
> ! for (bound = loop->bounds; bound; bound = bound->next)
> ! if (proved_non_wrapping_p (at_stmt, bound, new_type, valid_niter))
> ! return step_in_new_type;
> !
> ! /* Fail when the loop has no bound estimations, or when no bound can
> ! be used for verifying the conversion. */
> ! return NULL_TREE;
> ! }
> !
> ! /* Returns true when VAR is used in pointer arithmetics. DEPTH is
> ! used for limiting the search. */
> !
> ! static bool
> ! used_in_pointer_arithmetic_p (tree var, int depth)
> {
> ! use_operand_p use_p;
> ! imm_use_iterator iter;
> !
> ! if (depth == 0
> ! || TREE_CODE (var) != SSA_NAME
> ! || !has_single_use (var))
> ! return false;
>
> ! FOR_EACH_IMM_USE_FAST (use_p, iter, var)
> ! {
> ! tree stmt = USE_STMT (use_p);
>
> - if (stmt && TREE_CODE (stmt) == MODIFY_EXPR)
> - {
> - tree rhs = TREE_OPERAND (stmt, 1);
> -
> - if (TREE_CODE (rhs) == NOP_EXPR
> - || TREE_CODE (rhs) == CONVERT_EXPR)
> - {
> - if (POINTER_TYPE_P (TREE_TYPE (rhs)))
> - return true;
> - return false;
> - }
> - else
> - return used_in_pointer_arithmetic_p (TREE_OPERAND (stmt, 0),
> - depth - 1);
> - }
> - }
> return false;
> }
>
> --- 1758,1776 ----
> return false;
> }
>
> ! /* Returns true if the arithmetics in TYPE can be assumed not to wrap. */
>
> ! bool
> ! nowrap_type_p (tree type)
> {
> ! if (!flag_wrapv
> ! && INTEGRAL_TYPE_P (type)
> ! && !TYPE_UNSIGNED (type))
> ! return true;
>
> ! if (POINTER_TYPE_P (type))
> ! return true;
>
> return false;
> }
>
> *************** used_in_pointer_arithmetic_p (tree var,
> *** 1910,2065 ****
> enough with respect to the step and initial condition in order to
> keep the evolution confined in TYPEs bounds. Return true when the
> iv is known to overflow or when the property is not computable.
> !
> ! Initialize INIT_IS_MAX to true when the evolution goes from
> ! INIT_IS_MAX to LOWER_BOUND_IN_TYPE, false in the contrary case.
> ! When this property cannot be determined, UNKNOWN_MAX is set to
> ! true. */
>
> bool
> ! scev_probably_wraps_p (tree type, tree base, tree step,
> tree at_stmt, struct loop *loop,
> ! bool *init_is_max, bool *unknown_max)
> {
> struct nb_iter_bound *bound;
> tree delta, step_abs;
> tree unsigned_type, valid_niter;
> ! tree base_plus_step, bpsps;
> ! int cps, cpsps;
>
> ! /* FIXME: The following code will not be used anymore once
> ! http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html is
> ! committed.
> !
> ! If AT_STMT is a cast to unsigned that is later used for
> ! referencing a memory location, it is followed by a pointer
> ! conversion just after. Because pointers do not wrap, the
> ! sequences that reference the memory do not wrap either. In the
> ! following example, sequences corresponding to D_13 and to D_14
> ! can be proved to not wrap because they are used for computing a
> ! memory access:
>
> D.1621_13 = (long unsigned intD.4) D.1620_12;
> D.1622_14 = D.1621_13 * 8;
> D.1623_15 = (doubleD.29 *) D.1622_14;
> ! */
> ! if (at_stmt && TREE_CODE (at_stmt) == MODIFY_EXPR)
> ! {
> ! tree op0 = TREE_OPERAND (at_stmt, 0);
> ! tree op1 = TREE_OPERAND (at_stmt, 1);
> ! tree type_op1 = TREE_TYPE (op1);
> !
> ! if ((TYPE_UNSIGNED (type_op1)
> ! && used_in_pointer_arithmetic_p (op0, 2))
> ! || POINTER_TYPE_P (type_op1))
> ! {
> ! *unknown_max = true;
> ! return false;
> ! }
> ! }
>
> if (chrec_contains_undetermined (base)
> || chrec_contains_undetermined (step)
> ! || TREE_CODE (base) == REAL_CST
> ! || TREE_CODE (step) == REAL_CST)
> ! {
> ! *unknown_max = true;
> ! return true;
> ! }
> !
> ! *unknown_max = false;
> ! base_plus_step = fold_build2 (PLUS_EXPR, type, base, step);
> ! bpsps = fold_build2 (PLUS_EXPR, type, base_plus_step, step);
> ! cps = compare_trees (base_plus_step, base);
> ! cpsps = compare_trees (bpsps, base_plus_step);
> !
> ! /* Check that the sequence is not wrapping in the first step: it
> ! should have the same monotonicity for the first two steps. See
> ! PR23410. */
> ! if (cps != cpsps)
> return true;
>
> ! switch (cps)
> ! {
> ! case -1:
> ! {
> ! tree extreme = upper_bound_in_type (type, TREE_TYPE (base));
> ! delta = fold_build2 (MINUS_EXPR, type, extreme, base);
> ! step_abs = step;
> ! *init_is_max = false;
> ! break;
> ! }
> !
> ! case 1:
> ! {
> ! tree extreme = lower_bound_in_type (type, TREE_TYPE (base));
> ! delta = fold_build2 (MINUS_EXPR, type, base, extreme);
> ! step_abs = fold_build1 (NEGATE_EXPR, type, step);
> ! *init_is_max = true;
> ! break;
> ! }
> !
> ! case 0:
> ! /* This means step is equal to 0. This should not happen. It
> ! could happen in convert step, but not here. Safely answer
> ! don't know as in the default case. */
> !
> ! default:
> ! *unknown_max = true;
> ! return true;
> ! }
>
> ! /* If AT_STMT represents a cast operation, we may not be able to
> ! take advantage of the undefinedness of signed type evolutions.
>
> ! implement-c.texi states: "For conversion to a type of width
> ! N, the value is reduced modulo 2^N to be within range of the
> ! type;"
> !
> ! See PR 21959 for a test case. Essentially, given a cast
> ! operation
> ! unsigned char uc;
> ! signed char sc;
> ! ...
> ! sc = (signed char) uc;
> ! if (sc < 0)
> ! ...
> !
> ! where uc and sc have the scev {0, +, 1}, we would consider uc to
> ! wrap around, but not sc, because it is of a signed type. This
> ! causes VRP to erroneously fold the predicate above because it
> ! thinks that sc cannot be negative. */
> ! if (at_stmt && TREE_CODE (at_stmt) == MODIFY_EXPR)
> ! {
> ! tree rhs = TREE_OPERAND (at_stmt, 1);
> ! tree outer_t = TREE_TYPE (rhs);
>
> ! if (!TYPE_UNSIGNED (outer_t)
> ! && (TREE_CODE (rhs) == NOP_EXPR || TREE_CODE (rhs) == CONVERT_EXPR))
> ! {
> ! tree inner_t = TREE_TYPE (TREE_OPERAND (rhs, 0));
> !
> ! /* If the inner type is unsigned and its size and/or
> ! precision are smaller to that of the outer type, then the
> ! expression may wrap around. */
> ! if (TYPE_UNSIGNED (inner_t)
> ! && (TYPE_SIZE (inner_t) <= TYPE_SIZE (outer_t)
> ! || TYPE_PRECISION (inner_t) <= TYPE_PRECISION (outer_t)))
> ! {
> ! *unknown_max = true;
> ! return true;
> ! }
> ! }
> }
>
> - /* After having set INIT_IS_MAX, we can return false: when not using
> - wrapping arithmetic, signed types don't wrap. */
> - if (!flag_wrapv && !TYPE_UNSIGNED (type))
> - return false;
> -
> - unsigned_type = unsigned_type_for (type);
> - delta = fold_convert (unsigned_type, delta);
> - step_abs = fold_convert (unsigned_type, step_abs);
> valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
>
> estimate_numbers_of_iterations_loop (loop);
> --- 1779,1850 ----
> enough with respect to the step and initial condition in order to
> keep the evolution confined in TYPEs bounds. Return true when the
> iv is known to overflow or when the property is not computable.
> !
> ! USE_OVERFLOW_SEMANTICS is true if this function should assume that
> ! the rules for overflow of the given language apply (e.g., that signed
> ! arithmetics in C does not overflow). */
>
> bool
> ! scev_probably_wraps_p (tree base, tree step,
> tree at_stmt, struct loop *loop,
> ! bool use_oveflow_semantics)
> {
> struct nb_iter_bound *bound;
> tree delta, step_abs;
> tree unsigned_type, valid_niter;
> ! tree type = TREE_TYPE (step);
> !
> ! /* FIXME: We really need something like
> ! http://gcc.gnu.org/ml/gcc-patches/2005-06/msg02025.html.
>
> ! We used to test for the following situation that frequently appears
> ! during address arithmetics:
>
> D.1621_13 = (long unsigned intD.4) D.1620_12;
> D.1622_14 = D.1621_13 * 8;
> D.1623_15 = (doubleD.29 *) D.1622_14;
> !
> ! And derived that the sequence corresponding to D_14
> ! can be proved to not wrap because it is used for computing a
> ! memory access; however, this is not really the case -- for example,
> ! if D_12 = (unsigned char) [254,+,1], then D_14 has values
> ! 2032, 2040, 0, 8, ..., but the code is still legal. */
>
> if (chrec_contains_undetermined (base)
> || chrec_contains_undetermined (step)
> ! || TREE_CODE (step) != INTEGER_CST)
> return true;
>
> ! if (zero_p (step))
> ! return false;
>
> ! /* If we can use the fact that signed and pointer arithmetics does not
> ! wrap, we are done. */
> ! if (use_oveflow_semantics && nowrap_type_p (type))
> ! return false;
>
> ! /* Otherwise, compute the number of iterations before we reach the
> ! bound of the type, and verify that the loop is exited before this
> ! occurs. */
> ! unsigned_type = unsigned_type_for (type);
> ! base = fold_convert (unsigned_type, base);
>
> ! if (tree_int_cst_sign_bit (step))
> ! {
> ! tree extreme = fold_convert (unsigned_type,
> ! lower_bound_in_type (type, type));
> ! delta = fold_build2 (MINUS_EXPR, unsigned_type, base, extreme);
> ! step_abs = fold_build1 (NEGATE_EXPR, unsigned_type,
> ! fold_convert (unsigned_type, step));
> ! }
> ! else
> ! {
> ! tree extreme = fold_convert (unsigned_type,
> ! upper_bound_in_type (type, type));
> ! delta = fold_build2 (MINUS_EXPR, unsigned_type, extreme, base);
> ! step_abs = fold_convert (unsigned_type, step);
> }
>
> valid_niter = fold_build2 (FLOOR_DIV_EXPR, unsigned_type, delta, step_abs);
>
> estimate_numbers_of_iterations_loop (loop);
> *************** scev_probably_wraps_p (tree type, tree b
> *** 2069,2104 ****
>
> /* At this point we still don't have a proof that the iv does not
> overflow: give up. */
> - *unknown_max = true;
> return true;
> }
>
> - /* Return the conversion to NEW_TYPE of the STEP of an induction
> - variable BASE + STEP * I at AT_STMT. When it fails, return
> - NULL_TREE. */
> -
> - tree
> - convert_step (struct loop *loop, tree new_type, tree base, tree step,
> - tree at_stmt)
> - {
> - tree base_type;
> -
> - if (chrec_contains_undetermined (base)
> - || chrec_contains_undetermined (step))
> - return NULL_TREE;
> -
> - base_type = TREE_TYPE (base);
> -
> - /* When not using wrapping arithmetic, signed types don't wrap. */
> - if (!flag_wrapv && !TYPE_UNSIGNED (base_type))
> - return fold_convert (new_type, step);
> -
> - if (TYPE_PRECISION (new_type) > TYPE_PRECISION (base_type))
> - return convert_step_widening (loop, new_type, base, step, at_stmt);
> -
> - return fold_convert (new_type, step);
> - }
> -
> /* Frees the information on upper bounds on numbers of iterations of LOOP. */
>
> void
> --- 1854,1862 ----
> Index: tree-scalar-evolution.c
> ===================================================================
> *** tree-scalar-evolution.c (revision 115643)
> --- tree-scalar-evolution.c (working copy)
> *************** instantiate_parameters_1 (struct loop *l
> *** 2113,2118 ****
> --- 2113,2124 ----
> if (op0 == TREE_OPERAND (chrec, 0))
> return chrec;
>
> + /* If we used chrec_convert_aggressive, we can no longer assume that
> + signed chrecs do not overflow, as chrec_convert does, so avoid
> + calling it in that case. */
> + if (flags & FOLD_CONVERSIONS)
> + return fold_convert (TREE_TYPE (chrec), op0);
> +
> return chrec_convert (TREE_TYPE (chrec), op0, NULL_TREE);
>
> case SCEV_NOT_KNOWN:
> Index: tree-chrec.c
> ===================================================================
> *** tree-chrec.c (revision 115643)
> --- tree-chrec.c (working copy)
> *************** nb_vars_in_chrec (tree chrec)
> *** 1082,1087 ****
> --- 1082,1184 ----
> }
> }
>
> + static tree chrec_convert_1 (tree, tree, tree, bool);
> +
> + /* Converts BASE and STEP of affine scev to TYPE. LOOP is the loop whose iv
> + the scev corresponds to. AT_STMT is the statement at that the scev is
> + evaluated. USE_OVERFLOW_SEMANTICS is true if this function should assume that
> + the rules for overflow of the given language apply (e.g., that signed
> + arithmetics in C does not overflow) -- i.e., to use them to avoid unnecessary
> + tests, but also to enforce that the result follows them. Returns true if the
> + conversion succeeded, false otherwise. */
> +
> + bool
> + convert_affine_scev (struct loop *loop, tree type,
> + tree *base, tree *step, tree at_stmt,
> + bool use_overflow_semantics)
> + {
> + tree ct = TREE_TYPE (*step);
> + bool enforce_overflow_semantics;
> + bool must_check_src_overflow, must_check_rslt_overflow;
> + tree new_base, new_step;
> +
> + /* In general,
> + (TYPE) (BASE + STEP * i) = (TYPE) BASE + (TYPE -- sign extend) STEP * i,
> + but we must check some assumptions.
> +
> + 1) If [BASE, +, STEP] wraps, the equation is not valid when precision
> + of CT is smaller than the precision of TYPE. For example, when we
> + cast unsigned char [254, +, 1] to unsigned, the values on left side
> + are 254, 255, 0, 1, ..., but those on the right side are
> + 254, 255, 256, 257, ...
> + 2) In case that we must also preserve the fact that signed ivs do not
> + overflow, we must additionally check that the new iv does not wrap.
> + For example, unsigned char [125, +, 1] casted to signed char could
> + become a wrapping variable with values 125, 126, 127, -128, -127, ...,
> + which would confuse optimizers that assume that this does not
> + happen. */
> + must_check_src_overflow = TYPE_PRECISION (ct) < TYPE_PRECISION (type);
> +
> + enforce_overflow_semantics = (use_overflow_semantics
> + && nowrap_type_p (type));
> + if (enforce_overflow_semantics)
> + {
> + /* We can avoid checking whether the result overflows in the following
> + cases:
> +
> + -- must_check_src_overflow is true, and the range of TYPE is superset
> + of the range of CT -- i.e., in all cases except if CT signed and
> + TYPE unsigned.
> + -- both CT and TYPE have the same precision and signedness. */
> + if (must_check_src_overflow)
> + {
> + if (TYPE_UNSIGNED (type) && !TYPE_UNSIGNED (ct))
> + must_check_rslt_overflow = true;
> + else
> + must_check_rslt_overflow = false;
> + }
> + else if (TYPE_UNSIGNED (ct) == TYPE_UNSIGNED (type)
> + && TYPE_PRECISION (ct) == TYPE_PRECISION (type))
> + must_check_rslt_overflow = false;
> + else
> + must_check_rslt_overflow = true;
> + }
> + else
> + must_check_rslt_overflow = false;
> +
> + if (must_check_src_overflow
> + && scev_probably_wraps_p (*base, *step, at_stmt, loop,
> + use_overflow_semantics))
> + return false;
> +
> + new_base = chrec_convert_1 (type, *base, at_stmt,
> + use_overflow_semantics);
> + /* The step must be sign extended, regardless of the signedness
> + of CT and TYPE. This only needs to be handled specially when
> + CT is unsigned -- to avoid e.g. unsigned char [100, +, 255]
> + (with values 100, 99, 98, ...) from becoming signed or unsigned
> + [100, +, 255] with values 100, 355, ...; the sign-extension is
> + performed by default when CT is signed. */
> + new_step = *step;
> + if (TYPE_PRECISION (type) > TYPE_PRECISION (ct) && TYPE_UNSIGNED (ct))
> + new_step = chrec_convert_1 (signed_type_for (ct), new_step, at_stmt,
> + use_overflow_semantics);
> + new_step = chrec_convert_1 (type, new_step, at_stmt, use_overflow_semantics);
> +
> + if (automatically_generated_chrec_p (new_base)
> + || automatically_generated_chrec_p (new_step))
> + return false;
> +
> + if (must_check_rslt_overflow
> + /* Note that in this case we cannot use the fact that signed variables
> + do not overflow, as this is what we are verifying for the new iv. */
> + && scev_probably_wraps_p (new_base, new_step, at_stmt, loop, false))
> + return false;
> +
> + *base = new_base;
> + *step = new_step;
> + return true;
> + }
>
>
> /* Convert CHREC to TYPE. When the analyzer knows the context in
> *************** nb_vars_in_chrec (tree chrec)
> *** 1111,1117 ****
> --- 1208,1235 ----
> tree
> chrec_convert (tree type, tree chrec, tree at_stmt)
> {
> + return chrec_convert_1 (type, chrec, at_stmt, true);
> + }
> +
> + /* Convert CHREC to TYPE. When the analyzer knows the context in
> + which the CHREC is built, it sets AT_STMT to the statement that
> + contains the definition of the analyzed variable, otherwise the
> + conversion is less accurate: the information is used for
> + determining a more accurate estimation of the number of iterations.
> + By default AT_STMT could be safely set to NULL_TREE.
> +
> + USE_OVERFLOW_SEMANTICS is true if this function should assume that
> + the rules for overflow of the given language apply (e.g., that signed
> + arithmetics in C does not overflow) -- i.e., to use them to avoid unnecessary
> + tests, but also to enforce that the result follows them. */
> +
> + static tree
> + chrec_convert_1 (tree type, tree chrec, tree at_stmt,
> + bool use_overflow_semantics)
> + {
> tree ct, res;
> + tree base, step;
> + struct loop *loop;
>
> if (automatically_generated_chrec_p (chrec))
> return chrec;
> *************** chrec_convert (tree type, tree chrec, tr
> *** 1120,1175 ****
> if (ct == type)
> return chrec;
>
> ! if (evolution_function_is_affine_p (chrec))
> ! {
> ! tree base, step;
> ! bool dummy;
> ! struct loop *loop = current_loops->parray[CHREC_VARIABLE (chrec)];
> !
> ! base = instantiate_parameters (loop, CHREC_LEFT (chrec));
> ! step = instantiate_parameters (loop, CHREC_RIGHT (chrec));
> !
> ! /* Avoid conversion of (signed char) {(uchar)1, +, (uchar)1}_x
> ! when it is not possible to prove that the scev does not wrap.
> ! See PR22236, where a sequence 1, 2, ..., 255 has to be
> ! converted to signed char, but this would wrap:
> ! 1, 2, ..., 127, -128, ... The result should not be
> ! {(schar)1, +, (schar)1}_x, but instead, we should keep the
> ! conversion: (schar) {(uchar)1, +, (uchar)1}_x. */
> ! if (scev_probably_wraps_p (type, base, step, at_stmt, loop,
> ! &dummy, &dummy))
> ! goto failed_to_convert;
> !
> ! step = convert_step (loop, type, base, step, at_stmt);
> ! if (!step)
> ! {
> ! failed_to_convert:;
> ! if (dump_file && (dump_flags & TDF_DETAILS))
> ! {
> ! fprintf (dump_file, "(failed conversion:");
> ! fprintf (dump_file, "\n type: ");
> ! print_generic_expr (dump_file, type, 0);
> ! fprintf (dump_file, "\n base: ");
> ! print_generic_expr (dump_file, base, 0);
> ! fprintf (dump_file, "\n step: ");
> ! print_generic_expr (dump_file, step, 0);
> ! fprintf (dump_file, "\n estimated_nb_iterations: ");
> ! print_generic_expr (dump_file, loop->estimated_nb_iterations, 0);
> ! fprintf (dump_file, "\n)\n");
> ! }
> !
> ! return fold_convert (type, chrec);
> ! }
> !
> ! return build_polynomial_chrec (CHREC_VARIABLE (chrec),
> ! chrec_convert (type, CHREC_LEFT (chrec),
> ! at_stmt),
> ! step);
> ! }
>
> ! if (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
> ! return chrec_dont_know;
>
> res = fold_convert (type, chrec);
>
> /* Don't propagate overflows. */
> --- 1238,1256 ----
> if (ct == type)
> return chrec;
>
> ! if (!evolution_function_is_affine_p (chrec))
> ! goto keep_cast;
>
> ! loop = current_loops->parray[CHREC_VARIABLE (chrec)];
> ! base = CHREC_LEFT (chrec);
> ! step = CHREC_RIGHT (chrec);
> !
> ! if (convert_affine_scev (loop, type, &base, &step, at_stmt,
> ! use_overflow_semantics))
> ! return build_polynomial_chrec (loop->num, base, step);
>
> + /* If we cannot propagate the cast inside the chrec, just keep the cast. */
> + keep_cast:
> res = fold_convert (type, chrec);
>
> /* Don't propagate overflows. */
> *************** chrec_type (tree chrec)
> *** 1232,1234 ****
> --- 1313,1337 ----
>
> return TREE_TYPE (chrec);
> }
> +
> + /* Returns EV_GROWS if CHREC grows (assuming that it does not overflow),
> + EV_DECREASES if it decreases, and EV_UNKNOWN if we cannot determine
> + which of these cases happens. */
> +
> + enum ev_direction
> + scev_direction (tree chrec)
> + {
> + tree step;
> +
> + if (!evolution_function_is_affine_p (chrec))
> + return EV_DIR_UNKNOWN;
> +
> + step = CHREC_RIGHT (chrec);
> + if (TREE_CODE (step) != INTEGER_CST)
> + return EV_DIR_UNKNOWN;
> +
> + if (tree_int_cst_sign_bit (step))
> + return EV_DIR_DECREASES;
> + else
> + return EV_DIR_GROWS;
> + }
> Index: tree-ssa-loop-ivopts.c
> ===================================================================
> *** tree-ssa-loop-ivopts.c (revision 115643)
> --- tree-ssa-loop-ivopts.c (working copy)
> *************** idx_find_step (tree base, tree *idx, voi
> *** 1385,1391 ****
> {
> struct ifs_ivopts_data *dta = data;
> struct iv *iv;
> ! tree step, iv_step, lbound, off;
> struct loop *loop = dta->ivopts_data->current_loop;
>
> if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
> --- 1385,1391 ----
> {
> struct ifs_ivopts_data *dta = data;
> struct iv *iv;
> ! tree step, iv_base, iv_step, lbound, off;
> struct loop *loop = dta->ivopts_data->current_loop;
>
> if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
> *************** idx_find_step (tree base, tree *idx, voi
> *** 1438,1449 ****
> /* The step for pointer arithmetics already is 1 byte. */
> step = build_int_cst (sizetype, 1);
>
> ! /* FIXME: convert_step should not be used outside chrec_convert: fix
> ! this by calling chrec_convert. */
> ! iv_step = convert_step (dta->ivopts_data->current_loop,
> ! sizetype, iv->base, iv->step, dta->stmt);
> !
> ! if (!iv_step)
> {
> /* The index might wrap. */
> return false;
> --- 1438,1448 ----
> /* The step for pointer arithmetics already is 1 byte. */
> step = build_int_cst (sizetype, 1);
>
> ! iv_base = iv->base;
> ! iv_step = iv->step;
> ! if (!convert_affine_scev (dta->ivopts_data->current_loop,
> ! sizetype, &iv_base, &iv_step, dta->stmt,
> ! false))
> {
> /* The index might wrap. */
> return false;
> Index: tree-flow.h
> ===================================================================
> *** tree-flow.h (revision 115643)
> --- tree-flow.h (working copy)
> *************** tree find_loop_niter (struct loop *, edg
> *** 735,743 ****
> tree loop_niter_by_eval (struct loop *, edge);
> tree find_loop_niter_by_eval (struct loop *, edge *);
> void estimate_numbers_of_iterations (struct loops *);
> ! bool scev_probably_wraps_p (tree, tree, tree, tree, struct loop *, bool *,
> ! bool *);
> ! tree convert_step (struct loop *, tree, tree, tree, tree);
> void free_numbers_of_iterations_estimates (struct loops *);
> void free_numbers_of_iterations_estimates_loop (struct loop *);
> void rewrite_into_loop_closed_ssa (bitmap, unsigned);
> --- 735,747 ----
> tree loop_niter_by_eval (struct loop *, edge);
> tree find_loop_niter_by_eval (struct loop *, edge *);
> void estimate_numbers_of_iterations (struct loops *);
> ! bool scev_probably_wraps_p (tree, tree, tree, struct loop *, bool);
> ! bool convert_affine_scev (struct loop *, tree, tree *, tree *, tree, bool);
> !
> ! bool nowrap_type_p (tree);
> ! enum ev_direction {EV_DIR_GROWS, EV_DIR_DECREASES, EV_DIR_UNKNOWN};
> ! enum ev_direction scev_direction (tree);
> !
> void free_numbers_of_iterations_estimates (struct loops *);
> void free_numbers_of_iterations_estimates_loop (struct loop *);
> void rewrite_into_loop_closed_ssa (bitmap, unsigned);