This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: Serious performance regression -- some tree optimizer questions
- From: Zdenek Dvorak <rakdver at atrey dot karlin dot mff dot cuni dot cz>
- To: Ulrich Weigand <Ulrich dot Weigand at de dot ibm dot com>
- Cc: Michael Matz <matz at suse dot de>, Zdenek Dvorak <dvorakz at suse dot de>,gcc at gcc dot gnu dot org
- Date: Mon, 20 Dec 2004 23:52:07 +0100
- Subject: Re: Serious performance regression -- some tree optimizer questions
- References: <Pine.LNX.4.58.0412190118120.29201@wotan.suse.de> <OF1E640B28.65399D23-ON41256F6F.007CD0B0-41256F6F.007E3497@de.ibm.com>
Hello,
> > Perhaps you can try the attached patch. It is against a slightly older
> > version of HEAD, but fixes some ugly regression with ivopts on ppc64, and
> > in particular also helps a bit with the 32-bit index arithmetic. Zdenek
> > is in the process of splitting it up and submitting it for HEAD.
>
> It helps a bit, but unfortunately not significantly (for the mgrid resid
> hot spot, at least). It does recognize now that in the
> DO I1=2,N-1
> loop both I1 and I1-1 cannot overflow. However it does not recognize that
> I1+1 cannot overflow (and hence it doesn't combine the induction variables
> as it would be necessary).
>
> The problem is that this is not just an inadequacy of the current code,
> it is a fundamental problem: you *cannot* deduce I1+1 doesn't overflow
> because it is simply not *true* that it is a logical consequence of all
> known facts. In theory, I1+1 could overflow for the initial value of
> N == INT_MIN; and at the level of the GENERIC tree passed to the
> mid-end, there is nothing that allows to conclude that this case
> cannot happen ...
>
> This is why I would suggest that we finally do make use of the fact that
> signed overflow represents undefined behaviour in some languages. We
> couldn't at the RTL level because there is no reliable way to distinguish
> between signed and unsigned arithmetic there. But at the tree level,
> if the induction step is performed using arithmetic on a signed data
> type, and neither -fwrapv nor -ftrapv is in effect, we could simply
> allow the scalar evolution mechanism to conclude that this variable
> does not overflow.
>
> What do you think?
here is some basic implementation of the idea; I don't know whether it
will help in your case, from what you say it seems to me that it needs
to be a bit improved first.
Zdenek
Index: cfgloop.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cfgloop.h,v
retrieving revision 1.40
diff -c -3 -p -r1.40 cfgloop.h
*** cfgloop.h 15 Dec 2004 21:18:42 -0000 1.40
--- cfgloop.h 20 Dec 2004 22:47:29 -0000
*************** struct nb_iter_bound
*** 50,58 ****
tree bound; /* The expression whose value is an upper bound on the
number of executions of anything after ... */
tree at_stmt; /* ... this statement during one execution of loop. */
- tree additional; /* A conjunction of conditions the operands of BOUND
- satisfy. The additional information about the value
- of the bound may be derived from it. */
struct nb_iter_bound *next;
/* The next bound in a list. */
};
--- 50,55 ----
*************** enum
*** 478,484 ****
extern void unroll_and_peel_loops (struct loops *, int);
extern void doloop_optimize_loops (struct loops *);
extern void move_loop_invariants (struct loops *);
! extern void record_estimate (struct loop *, tree, tree, tree);
/* Old loop optimizer interface. */
--- 475,481 ----
extern void unroll_and_peel_loops (struct loops *, int);
extern void doloop_optimize_loops (struct loops *);
extern void move_loop_invariants (struct loops *);
! extern void record_estimate (struct loop *, tree, tree);
/* Old loop optimizer interface. */
Index: tree-data-ref.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-data-ref.c,v
retrieving revision 2.18
diff -c -3 -p -r2.18 tree-data-ref.c
*** tree-data-ref.c 10 Dec 2004 17:51:43 -0000 2.18
--- tree-data-ref.c 20 Dec 2004 22:47:29 -0000
*************** estimate_niter_from_size_of_data (struct
*** 529,535 ****
fold (build2 (MINUS_EXPR, integer_type_node,
data_size, init)), step));
! record_estimate (loop, estimation, boolean_true_node, stmt);
}
}
--- 529,535 ----
fold (build2 (MINUS_EXPR, integer_type_node,
data_size, init)), step));
! record_estimate (loop, estimation, stmt);
}
}
Index: tree-flow.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-flow.h,v
retrieving revision 2.75
diff -c -3 -p -r2.75 tree-flow.h
*** tree-flow.h 10 Dec 2004 21:54:41 -0000 2.75
--- tree-flow.h 20 Dec 2004 22:47:29 -0000
*************** struct tree_niter_desc
*** 630,647 ****
a loop (provided that assumptions == true and
may_be_zero == false), more precisely the number
of executions of the latch of the loop. */
- tree additional_info; /* The boolean expression. Sometimes we use additional
- knowledge to simplify the other expressions
- contained in this structure (for example the
- knowledge about value ranges of operands on entry to
- the loop). If this is a case, conjunction of such
- condition is stored in this field, so that we do not
- lose the information: for example if may_be_zero
- is (n <= 0) and niter is (unsigned) n, we know
- that the number of iterations is at most
- MAX_SIGNED_INT. However if the (n <= 0) assumption
- is eliminated (by looking at the guard on entry of
- the loop), then the information would be lost. */
};
/* In tree-vectorizer.c */
--- 630,635 ----
Index: tree-scalar-evolution.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-scalar-evolution.c,v
retrieving revision 2.13
diff -c -3 -p -r2.13 tree-scalar-evolution.c
*** tree-scalar-evolution.c 12 Nov 2004 13:28:16 -0000 2.13
--- tree-scalar-evolution.c 20 Dec 2004 22:47:29 -0000
*************** find_var_scev_info (tree var)
*** 355,376 ****
tree
count_ev_in_wider_type (tree type, tree chrec)
{
! tree base, step;
struct loop *loop;
! if (!evolution_function_is_affine_p (chrec))
return fold_convert (type, chrec);
base = CHREC_LEFT (chrec);
step = CHREC_RIGHT (chrec);
loop = current_loops->parray[CHREC_VARIABLE (chrec)];
! /* TODO -- if we knew the statement at that the conversion occurs,
! we could pass it to can_count_iv_in_wider_type and get a better
! result. */
! step = can_count_iv_in_wider_type (loop, type, base, step, NULL_TREE);
! if (!step)
! return fold_convert (type, chrec);
base = chrec_convert (type, base);
return build_polynomial_chrec (CHREC_VARIABLE (chrec),
--- 355,404 ----
tree
count_ev_in_wider_type (tree type, tree chrec)
{
! tree base, step, otype, tmp;
struct loop *loop;
! if (TREE_CODE (chrec) != POLYNOMIAL_CHREC)
return fold_convert (type, chrec);
+ otype = TREE_TYPE (chrec);
base = CHREC_LEFT (chrec);
step = CHREC_RIGHT (chrec);
loop = current_loops->parray[CHREC_VARIABLE (chrec)];
! if (CHREC_NO_OVERFLOW (chrec)
! && TREE_CODE (step) == INTEGER_CST
! /* We know that the chrec cannot overflow. However if we cast signed
! value to unsigned and the step is negative, it still does not mean
! that it would not overflow in the target type. This is the only
! problem -- if the signedness is the same, everything is obviously OK.
! When casting unsigned to signed, everything works since signed type
! is wider and therefore all values of the unsigned one fit in.
! If we cast signed to unsigned and the step is nonnegative, we
! also obviously fit in. */
! && (TYPE_UNSIGNED (otype)
! || !TYPE_UNSIGNED (type)
! || !tree_int_cst_sign_bit (step)))
! {
! if (tree_int_cst_sign_bit (step))
! {
! tmp = fold (build1 (NEGATE_EXPR, otype, step));
! step = fold (build1 (NEGATE_EXPR, type,
! fold_convert (type, tmp)));
! }
! else
! step = fold_convert (type, step);
! }
! else
! {
! /* TODO -- if we knew the statement at that the conversion occurs,
! we could pass it to can_count_iv_in_wider_type and get a better
! result. */
! step = can_count_iv_in_wider_type (loop, type, base, step, NULL_TREE);
! if (!step)
! return fold_convert (type, chrec);
! }
!
base = chrec_convert (type, base);
return build_polynomial_chrec (CHREC_VARIABLE (chrec),
*************** get_scalar_evolution (tree scalar)
*** 682,694 ****
When CHREC_BEFORE has an evolution part in LOOP_NB, add to this
evolution the expression TO_ADD, otherwise construct an evolution
! part for this loop. */
static tree
add_to_evolution_1 (unsigned loop_nb,
tree chrec_before,
! tree to_add)
{
switch (TREE_CODE (chrec_before))
{
case POLYNOMIAL_CHREC:
--- 710,728 ----
When CHREC_BEFORE has an evolution part in LOOP_NB, add to this
evolution the expression TO_ADD, otherwise construct an evolution
! part for this loop.
!
! CANNOT_OVERFLOW is true if we are sure that the operation cannot
! overflow. */
static tree
add_to_evolution_1 (unsigned loop_nb,
tree chrec_before,
! tree to_add,
! bool cannot_overflow)
{
+ tree chrec;
+
switch (TREE_CODE (chrec_before))
{
case POLYNOMIAL_CHREC:
*************** add_to_evolution_1 (unsigned loop_nb,
*** 704,709 ****
--- 738,744 ----
var = loop_nb;
left = chrec_before;
right = build_int_cst (type, 0);
+ chrec_before = NULL;
}
else
{
*************** add_to_evolution_1 (unsigned loop_nb,
*** 712,733 ****
right = CHREC_RIGHT (chrec_before);
}
! return build_polynomial_chrec
! (var, left, chrec_fold_plus (type, right, to_add));
}
else
/* Search the evolution in LOOP_NB. */
return build_polynomial_chrec
(CHREC_VARIABLE (chrec_before),
! add_to_evolution_1 (loop_nb, CHREC_LEFT (chrec_before), to_add),
CHREC_RIGHT (chrec_before));
default:
/* These nodes do not depend on a loop. */
if (chrec_before == chrec_dont_know)
return chrec_dont_know;
! return build_polynomial_chrec (loop_nb, chrec_before, to_add);
}
}
/* Add TO_ADD to the evolution part of CHREC_BEFORE in the dimension
--- 747,784 ----
right = CHREC_RIGHT (chrec_before);
}
! chrec = build_polynomial_chrec (var, left,
! chrec_fold_plus (type,
! right, to_add));
}
else
/* Search the evolution in LOOP_NB. */
return build_polynomial_chrec
(CHREC_VARIABLE (chrec_before),
! add_to_evolution_1 (loop_nb, CHREC_LEFT (chrec_before), to_add,
! cannot_overflow),
CHREC_RIGHT (chrec_before));
default:
/* These nodes do not depend on a loop. */
if (chrec_before == chrec_dont_know)
return chrec_dont_know;
! chrec = build_polynomial_chrec (loop_nb, chrec_before, to_add);
! chrec_before = NULL;
! break;
}
+
+ if (!cannot_overflow)
+ return chrec;
+
+ /* If we know that the current operation cannot overflow, and that the chrec
+ either could not overflow in other operations, or there were no other
+ operations before, we know that the chrec cannot overflow now. */
+ if (!chrec_before
+ || CHREC_NO_OVERFLOW (chrec_before))
+ CHREC_NO_OVERFLOW (chrec) = true;
+
+ return chrec;
}
/* Add TO_ADD to the evolution part of CHREC_BEFORE in the dimension
*************** static tree
*** 868,874 ****
add_to_evolution (unsigned loop_nb,
tree chrec_before,
enum tree_code code,
! tree to_add)
{
tree type = chrec_type (to_add);
tree res = NULL_TREE;
--- 919,926 ----
add_to_evolution (unsigned loop_nb,
tree chrec_before,
enum tree_code code,
! tree to_add,
! bool cannot_overflow)
{
tree type = chrec_type (to_add);
tree res = NULL_TREE;
*************** add_to_evolution (unsigned loop_nb,
*** 897,903 ****
to_add = chrec_fold_multiply (type, to_add,
build_int_cst_type (type, -1));
! res = add_to_evolution_1 (loop_nb, chrec_before, to_add);
if (dump_file && (dump_flags & TDF_DETAILS))
{
--- 949,955 ----
to_add = chrec_fold_multiply (type, to_add,
build_int_cst_type (type, -1));
! res = add_to_evolution_1 (loop_nb, chrec_before, to_add, cannot_overflow);
if (dump_file && (dump_flags & TDF_DETAILS))
{
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1059,1064 ****
--- 1111,1117 ----
bool res = false;
tree rhs0, rhs1;
tree type_rhs = TREE_TYPE (rhs);
+ bool cannot_overflow = false;
/* The RHS is one of the following cases:
- an SSA_NAME,
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1088,1093 ****
--- 1141,1151 ----
break;
case PLUS_EXPR:
+ /* Signed arithmetics does not wrap unless -fwrapv. */
+ if (!TYPE_UNSIGNED (type_rhs)
+ && !flag_wrapv)
+ cannot_overflow = true;
+
/* This case is under the form "rhs0 + rhs1". */
rhs0 = TREE_OPERAND (rhs, 0);
rhs1 = TREE_OPERAND (rhs, 1);
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1108,1114 ****
*evolution_of_loop = add_to_evolution
(loop->num,
chrec_convert (type_rhs, *evolution_of_loop),
! PLUS_EXPR, rhs1);
else
{
--- 1166,1172 ----
*evolution_of_loop = add_to_evolution
(loop->num,
chrec_convert (type_rhs, *evolution_of_loop),
! PLUS_EXPR, rhs1, cannot_overflow);
else
{
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1120,1126 ****
*evolution_of_loop = add_to_evolution
(loop->num,
chrec_convert (type_rhs, *evolution_of_loop),
! PLUS_EXPR, rhs0);
}
}
--- 1178,1184 ----
*evolution_of_loop = add_to_evolution
(loop->num,
chrec_convert (type_rhs, *evolution_of_loop),
! PLUS_EXPR, rhs0, cannot_overflow);
}
}
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1134,1140 ****
if (res)
*evolution_of_loop = add_to_evolution
(loop->num, chrec_convert (type_rhs, *evolution_of_loop),
! PLUS_EXPR, rhs1);
}
}
--- 1192,1198 ----
if (res)
*evolution_of_loop = add_to_evolution
(loop->num, chrec_convert (type_rhs, *evolution_of_loop),
! PLUS_EXPR, rhs1, cannot_overflow);
}
}
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1148,1154 ****
if (res)
*evolution_of_loop = add_to_evolution
(loop->num, chrec_convert (type_rhs, *evolution_of_loop),
! PLUS_EXPR, rhs0);
}
else
--- 1206,1212 ----
if (res)
*evolution_of_loop = add_to_evolution
(loop->num, chrec_convert (type_rhs, *evolution_of_loop),
! PLUS_EXPR, rhs0, cannot_overflow);
}
else
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1160,1165 ****
--- 1218,1228 ----
break;
case MINUS_EXPR:
+ /* Signed arithmetics does not wrap unless -fwrapv. */
+ if (!TYPE_UNSIGNED (type_rhs)
+ && !flag_wrapv)
+ cannot_overflow = true;
+
/* This case is under the form "opnd0 = rhs0 - rhs1". */
rhs0 = TREE_OPERAND (rhs, 0);
rhs1 = TREE_OPERAND (rhs, 1);
*************** follow_ssa_edge_in_rhs (struct loop *loo
*** 1175,1181 ****
if (res)
*evolution_of_loop = add_to_evolution
(loop->num, chrec_convert (type_rhs, *evolution_of_loop),
! MINUS_EXPR, rhs1);
}
else
/* Otherwise, match an assignment under the form:
--- 1238,1244 ----
if (res)
*evolution_of_loop = add_to_evolution
(loop->num, chrec_convert (type_rhs, *evolution_of_loop),
! MINUS_EXPR, rhs1, cannot_overflow);
}
else
/* Otherwise, match an assignment under the form:
Index: tree-ssa-loop-niter.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-ssa-loop-niter.c,v
retrieving revision 2.18
diff -c -3 -p -r2.18 tree-ssa-loop-niter.c
*** tree-ssa-loop-niter.c 23 Nov 2004 01:27:42 -0000 2.18
--- tree-ssa-loop-niter.c 20 Dec 2004 22:47:29 -0000
*************** tree_simplify_using_condition (tree cond
*** 701,717 ****
}
/* Tries to simplify EXPR using the conditions on entry to LOOP.
- Record the conditions used for simplification to CONDS_USED.
Returns the simplified expression (or EXPR unchanged, if no
simplification was possible).*/
static tree
! simplify_using_initial_conditions (struct loop *loop, tree expr,
! tree *conds_used)
{
edge e;
basic_block bb;
! tree exp, cond;
if (TREE_CODE (expr) == INTEGER_CST)
return expr;
--- 701,715 ----
}
/* Tries to simplify EXPR using the conditions on entry to LOOP.
Returns the simplified expression (or EXPR unchanged, if no
simplification was possible).*/
static tree
! simplify_using_initial_conditions (struct loop *loop, tree expr)
{
edge e;
basic_block bb;
! tree cond;
if (TREE_CODE (expr) == INTEGER_CST)
return expr;
*************** simplify_using_initial_conditions (struc
*** 730,744 ****
cond = COND_EXPR_COND (last_stmt (e->src));
if (e->flags & EDGE_FALSE_VALUE)
cond = invert_truthvalue (cond);
! exp = tree_simplify_using_condition (cond, expr);
!
! if (exp != expr)
! *conds_used = fold (build2 (TRUTH_AND_EXPR,
! boolean_type_node,
! *conds_used,
! cond));
!
! expr = exp;
}
return expr;
--- 728,734 ----
cond = COND_EXPR_COND (last_stmt (e->src));
if (e->flags & EDGE_FALSE_VALUE)
cond = invert_truthvalue (cond);
! expr = tree_simplify_using_condition (cond, expr);
}
return expr;
*************** number_of_iterations_exit (struct loop *
*** 811,825 ****
niter->may_be_zero);
niter->niter = simplify_using_outer_evolutions (loop, niter->niter);
- niter->additional_info = boolean_true_node;
niter->assumptions
= simplify_using_initial_conditions (loop,
! niter->assumptions,
! &niter->additional_info);
niter->may_be_zero
= simplify_using_initial_conditions (loop,
! niter->may_be_zero,
! &niter->additional_info);
return integer_onep (niter->assumptions);
}
--- 801,812 ----
niter->may_be_zero);
niter->niter = simplify_using_outer_evolutions (loop, niter->niter);
niter->assumptions
= simplify_using_initial_conditions (loop,
! niter->assumptions);
niter->may_be_zero
= simplify_using_initial_conditions (loop,
! niter->may_be_zero);
return integer_onep (niter->assumptions);
}
*************** find_loop_niter_by_eval (struct loop *lo
*** 1078,1090 ****
*/
! /* Records that AT_STMT is executed at most BOUND times in LOOP. The
! additional condition ADDITIONAL is recorded with the bound. */
void
! record_estimate (struct loop *loop, tree bound, tree additional, tree at_stmt)
{
struct nb_iter_bound *elt = xmalloc (sizeof (struct nb_iter_bound));
if (dump_file && (dump_flags & TDF_DETAILS))
{
--- 1065,1088 ----
*/
! /* Records that AT_STMT is executed at most BOUND times in LOOP. */
void
! record_estimate (struct loop *loop, tree bound, tree at_stmt)
{
struct nb_iter_bound *elt = xmalloc (sizeof (struct nb_iter_bound));
+ tree bound_type = TREE_TYPE (bound);
+
+ /* Ensure that the bound is unsigned. Signed values make no sense, and we
+ use the fact that the bound is unsigned later; of course it would be
+ possible to cast the value to unsigned type at the places that need it,
+ but it would happen repeatedly and would therefore waste memory. */
+
+ if (!TYPE_UNSIGNED (bound_type))
+ {
+ bound_type = unsigned_type_for (bound_type);
+ bound = fold_convert (bound_type, bound);
+ }
if (dump_file && (dump_flags & TDF_DETAILS))
{
*************** record_estimate (struct loop *loop, tree
*** 1097,1103 ****
elt->bound = bound;
elt->at_stmt = at_stmt;
- elt->additional = additional;
elt->next = loop->bounds;
loop->bounds = elt;
}
--- 1095,1100 ----
*************** estimate_numbers_of_iterations_loop (str
*** 1125,1133 ****
niter = build3 (COND_EXPR, type, niter_desc.may_be_zero,
build_int_cst_type (type, 0),
niter);
! record_estimate (loop, niter,
! niter_desc.additional_info,
! last_stmt (exits[i]->src));
}
free (exits);
--- 1122,1128 ----
niter = build3 (COND_EXPR, type, niter_desc.may_be_zero,
build_int_cst_type (type, 0),
niter);
! record_estimate (loop, niter, last_stmt (exits[i]->src));
}
free (exits);
*************** estimate_numbers_of_iterations (struct l
*** 1157,1189 ****
}
}
- /* If A > B, returns -1. If A == B, returns 0. If A < B, returns 1.
- If neither of these relations can be proved, returns 2. */
-
- static int
- compare_trees (tree a, tree b)
- {
- tree typea = TREE_TYPE (a), typeb = TREE_TYPE (b);
- tree type;
-
- if (TYPE_PRECISION (typea) > TYPE_PRECISION (typeb))
- type = typea;
- else
- type = typeb;
-
- a = fold_convert (type, a);
- b = fold_convert (type, b);
-
- if (nonzero_p (fold (build2 (EQ_EXPR, boolean_type_node, a, b))))
- return 0;
- if (nonzero_p (fold (build2 (LT_EXPR, boolean_type_node, a, b))))
- return 1;
- if (nonzero_p (fold (build2 (GT_EXPR, boolean_type_node, a, b))))
- return -1;
-
- return 2;
- }
-
/* Returns true if statement S1 dominates statement S2. */
static bool
--- 1152,1157 ----
*************** stmt_dominates_stmt_p (tree s1, tree s2)
*** 1209,1311 ****
return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
}
! /* Checks whether it is correct to count the induction variable BASE + STEP * I
! at AT_STMT in wider TYPE, using the fact that statement OF is executed at
! most BOUND times in the loop. If it is possible, return the value of step
! of the induction variable in the TYPE, otherwise return NULL_TREE.
!
! ADDITIONAL is the additional condition recorded for operands of the bound.
! This is useful in the following case, created by loop header copying:
! i = 0;
! if (n > 0)
! do
! {
! something;
! } while (++i < n)
!
! If the n > 0 condition is taken into account, the number of iterations of the
! loop can be expressed as n - 1. If the type of n is signed, the ADDITIONAL
! assumption "n > 0" says us that the value of the number of iterations is at
! most MAX_TYPE - 1 (without this assumption, it might overflow). */
! static tree
! can_count_iv_in_wider_type_bound (tree type, tree base, tree step,
! tree at_stmt,
! tree bound,
! tree additional,
! tree of)
{
! tree inner_type = TREE_TYPE (base), b, bplusstep, new_step, new_step_abs;
! tree valid_niter, extreme, unsigned_type, delta, bound_type;
! tree cond;
! b = fold_convert (type, base);
! bplusstep = fold_convert (type,
! fold (build2 (PLUS_EXPR, inner_type, base, step)));
! new_step = fold (build2 (MINUS_EXPR, type, bplusstep, b));
! if (TREE_CODE (new_step) != INTEGER_CST)
! return NULL_TREE;
! switch (compare_trees (bplusstep, b))
! {
! case -1:
! extreme = upper_bound_in_type (type, inner_type);
! delta = fold (build2 (MINUS_EXPR, type, extreme, b));
! new_step_abs = new_step;
! break;
! case 1:
! extreme = lower_bound_in_type (type, inner_type);
! new_step_abs = fold (build1 (NEGATE_EXPR, type, new_step));
! delta = fold (build2 (MINUS_EXPR, type, b, extreme));
break;
! case 0:
! return new_step;
default:
! return NULL_TREE;
}
! unsigned_type = unsigned_type_for (type);
! delta = fold_convert (unsigned_type, delta);
! new_step_abs = fold_convert (unsigned_type, new_step_abs);
! valid_niter = fold (build2 (FLOOR_DIV_EXPR, unsigned_type,
! delta, new_step_abs));
!
! bound_type = TREE_TYPE (bound);
! if (TYPE_PRECISION (type) > TYPE_PRECISION (bound_type))
! bound = fold_convert (unsigned_type, bound);
! else
! valid_niter = fold_convert (bound_type, valid_niter);
!
! if (at_stmt && stmt_dominates_stmt_p (of, at_stmt))
! {
! /* After the statement OF we know that anything is executed at most
! BOUND times. */
! cond = build2 (GE_EXPR, boolean_type_node, valid_niter, bound);
}
else
{
! /* Before the statement OF we know that anything is executed at most
! BOUND + 1 times. */
! cond = build2 (GT_EXPR, boolean_type_node, valid_niter, bound);
! }
!
! cond = fold (cond);
! if (nonzero_p (cond))
! return new_step;
!
! /* Try taking additional conditions into account. */
! cond = build2 (TRUTH_OR_EXPR, boolean_type_node,
! invert_truthvalue (additional),
! cond);
! cond = fold (cond);
! if (nonzero_p (cond))
! return new_step;
! return NULL_TREE;
}
/* Checks whether it is correct to count the induction variable BASE + STEP * I
--- 1177,1650 ----
return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
}
! /* Returns true if X > Y, counted in arithmetics in width given by MASK
! and signedness given by SIGNED_P. */
! static bool
! greater_number_p (unsigned HOST_WIDE_INT x, unsigned HOST_WIDE_INT y,
! bool signed_p, unsigned HOST_WIDE_INT mask)
! {
! bool x_neg, y_neg;
! if (signed_p)
! {
! x_neg = (x & ~(mask >> 1)) != 0;
! y_neg = (y & ~(mask >> 1)) != 0;
!
! if (x_neg && !y_neg)
! return false;
!
! if (!x_neg && y_neg)
! return true;
! }
!
! return x > y;
! }
!
! /* Returns mask for computing modulo by 2 ^ S by anding. */
!
! static unsigned HOST_WIDE_INT
! mask_for_size (unsigned s)
{
! gcc_assert (s <= HOST_BITS_PER_WIDE_INT);
! return ((~(unsigned HOST_WIDE_INT) 0) >> (HOST_BITS_PER_WIDE_INT - s));
! }
! /* Determines the minimum and maximum value of expression VAR at the begining
! of LOOP. Minimum is stored to MIN, maximum to MAX. Returns false if we
! determine that loop is never reached. */
!
! static bool
! determine_range (struct loop *loop, tree var,
! unsigned HOST_WIDE_INT *min,
! unsigned HOST_WIDE_INT *max)
! {
! tree type = TREE_TYPE (var);
! unsigned type_bits = TYPE_PRECISION (type);
! unsigned HOST_WIDE_INT mask, val = 0, minv, maxv;
! basic_block bb;
! enum tree_code code;
! tree op, cond;
! bool signed_p;
! edge e;
!
! /* Quite simplistic. Lots of space to improve. Having value range info
! would be the best. */
!
! mask = mask_for_size (type_bits);
! if (TREE_CODE (var) != SSA_NAME)
! {
! *min = 0;
! *max = mask;
! return true;
! }
!
! signed_p = !TYPE_UNSIGNED (type);
! if (signed_p)
! {
! *min = minv = (mask >> 1) + 1;
! *max = maxv = (mask >> 1);
! }
! else
! {
! *min = minv = 0;
! *max = maxv = mask;
! }
!
! for (bb = loop->header;
! bb != ENTRY_BLOCK_PTR;
! bb = get_immediate_dominator (CDI_DOMINATORS, bb))
! {
! e = EDGE_PRED (bb, 0);
! if (EDGE_COUNT (bb->preds) > 1)
! continue;
!
! if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
! continue;
!
! cond = COND_EXPR_COND (last_stmt (e->src));
! if (e->flags & EDGE_FALSE_VALUE)
! cond = invert_truthvalue (cond);
!
! if (!COMPARISON_CLASS_P (cond))
! continue;
!
! code = TREE_CODE (cond);
! if (TREE_OPERAND (cond, 0) == var)
! op = TREE_OPERAND (cond, 1);
! else if (TREE_OPERAND (cond, 1) == var)
! {
! op = TREE_OPERAND (cond, 0);
! code = swap_tree_comparison (code);
! }
! else
! continue;
!
! if (cst_and_fits_in_hwi (op))
! val = int_cst_value (op);
! else if (code == GT_EXPR)
! val = minv;
! else if (code == LT_EXPR)
! val = maxv;
! else
! continue;
!
! switch (code)
! {
! case EQ_EXPR:
! *min = *max = val;
! return true;
!
! case NE_EXPR:
! if (val == *min)
! {
! if (*min == *max)
! {
! /* The code is never reached. */
! return false;
! }
! *min = (*min + 1) & mask;
! }
!
! if (val == *max)
! *max = (*max - 1) & mask;
! break;
!
! case GT_EXPR:
! if (val == maxv)
! return false;
!
! val = (val + 1) & mask;
! /* Fallthru. */
!
! case GE_EXPR:
! if (greater_number_p (val, *min, signed_p, mask))
! *min = val;
! break;
!
! case LT_EXPR:
! if (val == minv)
! return false;
! val = (val - 1) & mask;
! /* Fallthru. */
!
! case LE_EXPR:
! if (greater_number_p (*max, val, signed_p, mask))
! *max = val;
! break;
!
! default:
! break;
! }
! }
!
! return true;
! }
!
! /* For expression EXPR of form (cast) (m * v + a), store variable (or
! subexpression) v to VAR, constant m to MULT, constant a to ADD
! and the width of the type to *BITS. */
!
! static void
! split_affine_expression (tree expr, tree *var,
! unsigned HOST_WIDE_INT *mult,
! unsigned HOST_WIDE_INT *add,
! unsigned *bits)
! {
! tree type = TREE_TYPE (expr), op0 = NULL_TREE, op1 = NULL_TREE, op_type;
! unsigned type_bits = TYPE_PRECISION (type);
! unsigned HOST_WIDE_INT mask, delta = 0, mby = 1;
! enum tree_code code = TREE_CODE (expr);
!
! if (TREE_CODE_LENGTH (code) > 0)
! op0 = TREE_OPERAND (expr, 0);
! if (TREE_CODE_LENGTH (code) > 1)
! op1 = TREE_OPERAND (expr, 1);
!
! /* Default if something fails. */
! *var = expr;
! *mult = 1;
! *add = 0;
! *bits = type_bits;
!
! if (type_bits > HOST_BITS_PER_WIDE_INT)
! return;
!
! mask = mask_for_size (type_bits);
!
! if (code == NOP_EXPR
! || code == CONVERT_EXPR)
! {
! op_type = TREE_TYPE (op0);
!
! /* If it is a sign extend, fail. */
! if (!TYPE_UNSIGNED (type)
! && !TYPE_UNSIGNED (op_type)
! && type_bits > TYPE_PRECISION (op_type))
! return;
!
! split_affine_expression (op0, var, mult, add, bits);
! if (*bits > type_bits)
! {
! *bits = type_bits;
! *mult &= mask;
! *add &= mask;
! }
! return;
! }
!
! switch (code)
! {
! case PLUS_EXPR:
! if (cst_and_fits_in_hwi (op1))
! {
! delta = int_cst_value (op1);
! break;
! }
! return;
!
! case MINUS_EXPR:
! if (cst_and_fits_in_hwi (op0))
! {
! delta = int_cst_value (op0);
! mby = mask;
! op0 = op1;
! break;
! }
!
! if (cst_and_fits_in_hwi (op1))
! {
! delta = (-int_cst_value (op1)) & mask;
! break;
! }
! return;
! case NEGATE_EXPR:
! mby = mask;
break;
! case MULT_EXPR:
! if (cst_and_fits_in_hwi (op1))
! {
! mby = int_cst_value (op1);
! break;
! }
! return;
default:
! return;
}
! split_affine_expression (op0, var, mult, add, bits);
! if (type_bits == *bits)
! {
! *mult *= mby;
! *add *= mby;
! *add += delta;
! *mult &= mask;
! *add &= mask;
! return;
! }
!
! /* If there is some narrowing/extending below, consider the first operand
! to be the variable. */
! *var = op0;
! *mult = mby;
! *add = delta;
! *bits = type_bits;
! return;
! }
!
! /* Returns true if X COMP Y, where COMP is a comparison operator. */
!
! static bool
! compare_p (unsigned HOST_WIDE_INT x, enum tree_code comp,
! unsigned HOST_WIDE_INT y)
! {
! switch (comp)
! {
! case LT_EXPR:
! return x < y;
!
! case LE_EXPR:
! return x <= y;
!
! case GT_EXPR:
! return x > y;
!
! case GE_EXPR:
! return x >= y;
!
! default:
! gcc_unreachable ();
! }
! }
!
! /* Returns the smallest value Y such that MAX >= Y >= X and expression
! M * v + A wraps in interval 0 .. MASK at Y + 1, or MAX if no such
! Y exists. */
!
! static unsigned HOST_WIDE_INT
! one_before_wrap (unsigned HOST_WIDE_INT m, unsigned HOST_WIDE_INT a,
! unsigned HOST_WIDE_INT x, unsigned HOST_WIDE_INT mask,
! unsigned HOST_WIDE_INT max)
! {
! unsigned HOST_WIDE_INT diff, mabs, val;
!
! if (!m)
! return max;
!
! /* If M has just the msb 1, M * v + A has just two values (A and A + M).
! Could be handled in a better way, but this case is unlikely to occur
! in practice, so just make it safe. */
! if (m == (mask >> 1) + 1)
! return x;
!
! val = (m * x + a) & mask;
!
! if (m & ~(mask >> 1))
! {
! /* M is negative. */
! diff = val;
! mabs = (-m) & mask;
}
else
{
! /* M is positive. */
! diff = mask - val;
! mabs = m;
! }
! x = x + diff / mabs;
!
! return (x > max ? max : x);
! }
!
! /* Returns true if we can prove that
!
! (M1 * x + A1) mod (2 ^ BITS1) COMP (M2 * x + A2) mod (2 ^ BITS2)
!
! where COMP is a comparison operator, for all x between
! MIN and MAX. The interval is taken as cyclic, i.e. wraps
! over 2 ^ BITS. */
!
! static bool
! prove_affine_ineq (unsigned HOST_WIDE_INT m1, unsigned HOST_WIDE_INT a1,
! unsigned bits1, enum tree_code comp,
! unsigned HOST_WIDE_INT m2, unsigned HOST_WIDE_INT a2,
! unsigned bits2,
! unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max,
! unsigned bits)
! {
! unsigned HOST_WIDE_INT mask1, mask2, mask, top, w;
! unsigned nwraps = 0;
!
! gcc_assert (comp == LT_EXPR
! || comp == LE_EXPR
! || comp == GT_EXPR
! || comp == GE_EXPR);
! mask = mask_for_size (bits);
!
! if (min > max)
! {
! /* For simplicity split the wrapped interval into two. */
! return (prove_affine_ineq (m1, a1, bits1, comp, m2, a2, bits2,
! min, mask, bits)
! && prove_affine_ineq (m1, a1, bits1, comp, m2, a2, bits2,
! 0, max, bits));
! }
!
! gcc_assert (max <= mask);
!
! mask1 = mask_for_size (bits1);
! mask2 = mask_for_size (bits2);
!
! gcc_assert (m1 <= mask1);
! gcc_assert (a1 <= mask1);
! gcc_assert (m2 <= mask2);
! gcc_assert (a2 <= mask2);
!
! if (!compare_p ((m1 * min + a1) & mask1, comp, (m2 * min + a2) & mask2))
! return false;
!
! do
! {
! /* Usually once there is a wrap, the condition becomes violated. There
! of course are exceptions -- for example if the underlying arithmetics
! is signed, it is quite normal to get a wrap at 0. Let us limit the
! number of wraps to avoid compile time problems on weird tests.
! The value here is probably a bit higher than strictly necessary. */
! if (nwraps++ > 10)
! return false;
!
! top = max;
!
! w = one_before_wrap (m1, a1, min, mask1, mask);
! if (w < top)
! top = w;
! w = one_before_wrap (m2, a2, min, mask2, mask);
! if (w < top)
! top = w;
!
! /* The inequality is valid in the interval on that no wrap occurs iff
! it is valid for both endpoints of the interval. */
! if (!compare_p ((m1 * top + a1) & mask1, comp, (m2 * top + a2) & mask2))
! return false;
!
! min = top + 1;
! } while (top != max);
!
! return true;
! }
!
! /* Returns true if it is able to prove X COMP Y, where COMP is some
! comparison operator, inside LOOP. TYPE is type of X and Y; it is
! assumed to be unsigned. */
!
! static bool
! prove_ineq (struct loop *loop, tree type, tree x, enum tree_code comp, tree y)
! {
! tree expr, varx, vary, var;
! unsigned HOST_WIDE_INT multx, multy, addx, addy, minv, maxv;
! unsigned bitsx, bitsy;
!
! gcc_assert (TYPE_UNSIGNED (type));
!
! expr = fold (build2 (comp, boolean_type_node, x, y));
! if (TREE_CODE (expr) == INTEGER_CST)
! return !zero_p (expr);
!
! /* Try proving the inequality using the entry conditions. */
! expr = simplify_using_initial_conditions (loop, expr);
! if (TREE_CODE (expr) == INTEGER_CST)
! return !zero_p (expr);
!
! /* Failed. Try formulating the inequality as a modulo inequality of affine
! expressions. */
! split_affine_expression (x, &varx, &multx, &addx, &bitsx);
! split_affine_expression (y, &vary, &multy, &addy, &bitsy);
!
! var = varx;
! if (!var)
! {
! var = vary;
! if (!var)
! return false;
! }
! else if (vary && !operand_equal_p (var, vary, 0))
! return false;
!
! if (bitsx > HOST_BITS_PER_WIDE_INT
! || bitsy > HOST_BITS_PER_WIDE_INT
! || TYPE_PRECISION (TREE_TYPE (var)) > HOST_BITS_PER_WIDE_INT)
! return false;
!
! determine_range (loop, var, &minv, &maxv);
!
! return prove_affine_ineq (multx, addx, bitsx, comp,
! multy, addy, bitsy,
! minv, maxv, TYPE_PRECISION (TREE_TYPE (var)));
}
/* Checks whether it is correct to count the induction variable BASE + STEP * I
*************** tree
*** 1317,1334 ****
can_count_iv_in_wider_type (struct loop *loop, tree type, tree base, tree step,
tree at_stmt)
{
struct nb_iter_bound *bound;
- tree new_step;
for (bound = loop->bounds; bound; bound = bound->next)
{
! new_step = can_count_iv_in_wider_type_bound (type, base, step,
! at_stmt,
! bound->bound,
! bound->additional,
! bound->at_stmt);
! if (new_step)
return new_step;
}
--- 1656,1763 ----
can_count_iv_in_wider_type (struct loop *loop, tree type, tree base, tree step,
tree at_stmt)
{
+ tree inner_type = TREE_TYPE (base), b, new_step, new_step_abs;
+ tree extreme, unsigned_type, delta, bound_type, bound_val, btype, tmp;
struct nb_iter_bound *bound;
+ gcc_assert (TYPE_PRECISION (type) > TYPE_PRECISION (inner_type));
+
+ if (TREE_CODE (step) != INTEGER_CST)
+ return NULL_TREE;
+
+ if (zero_p (step))
+ return build_int_cst_type (type, 0);
+
+ if (!loop->bounds)
+ return NULL_TREE;
+
+ unsigned_type = unsigned_type_for (inner_type);
+ b = fold_convert (unsigned_type, base);
+
+ /* To get the new step, sign extend (regardless of signedness of the
+ target type) the old one. */
+
+ if (tree_int_cst_sign_bit (step))
+ {
+ tmp = fold (build1 (NEGATE_EXPR, inner_type, step));
+
+ if (tree_int_cst_sign_bit (tmp))
+ {
+ /* This happens only for the number that has just the msb 1.
+ Not worth handling. */
+ return NULL_TREE;
+ }
+
+ new_step_abs = fold_convert (unsigned_type, tmp);
+ new_step = fold (build1 (NEGATE_EXPR, type,
+ fold_convert (type, tmp)));
+ extreme = fold_convert (unsigned_type,
+ lower_bound_in_type (type, inner_type));
+ delta = fold (build2 (MINUS_EXPR, unsigned_type, b, extreme));
+ }
+ else
+ {
+ new_step = fold_convert (type, step);
+ new_step_abs = fold_convert (unsigned_type, step);
+ extreme = fold_convert (unsigned_type,
+ upper_bound_in_type (type, inner_type));
+ delta = fold (build2 (MINUS_EXPR, unsigned_type, extreme, b));
+ }
+
+ /* We have simplified the situation a bit. We have an induction variable
+ with base 0 and step NEW_STEP_ABS, and we want to know whether it
+ may exceed DELTA during the iterations of the loop. */
+
for (bound = loop->bounds; bound; bound = bound->next)
{
! bound_val = bound->bound;
! bound_type = TREE_TYPE (bound_val);
!
! if (at_stmt
! && stmt_dominates_stmt_p (bound->at_stmt, at_stmt))
! {
! /* After the statement OF we know that anything is executed at most
! BOUND times. So we may decrease the value of the bound by one.
!
! This would fail in one case -- if the BOUND == 0. This
! means that this loop should have been removed somewhere (in jump
! threading, or as last instance in ivcanon). It is possible that
! this failed for some reason (the condition being too complicated
! for jump threading, ivcanon not being run, ...), but since the
! loop runs exactly once, claiming that the iv may be extended
! is OK, and claiming that it cannot will not cause any problems.
! So do not bother with checking any conditions. */
! bound_val = fold (build2 (MINUS_EXPR, bound_type,
! bound_val,
! build_int_cst_type (bound_type, 1)));
! }
!
! /* First check whether it makes sense to consider the bound at all.
! Unless we are able to prove that BOUND_VAL * NEW_STEP_ABS fits in
! UNSIGNED_TYPE, we would not be able to prove anything useful. */
! if (TYPE_PRECISION (bound_type) >= TYPE_PRECISION (unsigned_type))
! {
! btype = bound_type;
! b = bound_val;
! }
! else
! {
! btype = unsigned_type;
! b = fold_convert (btype, bound_val);
! }
! tmp = upper_bound_in_type (btype, unsigned_type);
! if (!integer_onep (new_step_abs))
! tmp = fold (build2 (TRUNC_DIV_EXPR, btype, tmp,
! fold_convert (btype, new_step_abs)));
! if (!prove_ineq (loop, btype, b, LE_EXPR, tmp))
! continue;
!
! /* Passed. Now compare with DELTA to finish the verification. */
! bound_val = fold_convert (unsigned_type, bound_val);
! bound_val = fold (build2 (MULT_EXPR, unsigned_type,
! bound_val, new_step_abs));
! if (prove_ineq (loop, unsigned_type, delta, GE_EXPR, bound_val))
return new_step;
}
Index: tree.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree.c,v
retrieving revision 1.457
diff -c -3 -p -r1.457 tree.c
*** tree.c 17 Dec 2004 08:17:01 -0000 1.457
--- tree.c 20 Dec 2004 22:47:30 -0000
*************** tree_int_cst_sgn (tree t)
*** 3851,3856 ****
--- 3851,3876 ----
return 1;
}
+ /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
+ but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
+
+ int
+ tree_int_cst_sign_bit (tree t)
+ {
+ unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
+ unsigned HOST_WIDE_INT w;
+
+ if (bitno < HOST_BITS_PER_WIDE_INT)
+ w = TREE_INT_CST_LOW (t);
+ else
+ {
+ w = TREE_INT_CST_HIGH (t);
+ bitno -= HOST_BITS_PER_WIDE_INT;
+ }
+
+ return (w >> bitno) & 1;
+ }
+
/* Compare two constructor-element-type constants. Return 1 if the lists
are known to be equal; otherwise return 0. */
Index: tree.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree.h,v
retrieving revision 1.668
diff -c -3 -p -r1.668 tree.h
*** tree.h 20 Dec 2004 11:26:28 -0000 1.668
--- tree.h 20 Dec 2004 22:47:30 -0000
*************** struct tree_common GTY(())
*** 375,380 ****
--- 375,382 ----
all decls
BIT_FIELD_REF_UNSIGNED in
BIT_FIELD_REF
+ CHREC_NO_OVERFLOW in
+ PLYNOMIAL_CHREC
asm_written_flag:
*************** extern void tree_operand_check_failed (i
*** 931,936 ****
--- 933,942 ----
#define BIT_FIELD_REF_UNSIGNED(NODE) \
(BIT_FIELD_REF_CHECK (NODE)->common.unsigned_flag)
+ /* In a POLYNOMIAL_CHREC, means that the chrec cannot overflow. */
+ #define CHREC_NO_OVERFLOW(NODE) \
+ (POLYNOMIAL_CHREC_CHECK (NODE)->common.unsigned_flag)
+
/* In integral and pointer types, means an unsigned type. */
#define TYPE_UNSIGNED(NODE) (TYPE_CHECK (NODE)->common.unsigned_flag)
*************** extern int tree_int_cst_lt (tree, tree);
*** 2889,2894 ****
--- 2895,2901 ----
extern int tree_int_cst_compare (tree, tree);
extern int host_integerp (tree, int);
extern HOST_WIDE_INT tree_low_cst (tree, int);
+ extern int tree_int_cst_sign_bit (tree);
extern int tree_int_cst_msb (tree);
extern int tree_int_cst_sgn (tree);
extern int tree_expr_nonnegative_p (tree);