[patch] Fix some pessimizations due to fixes for PRs 27144 and 27639
Zdenek Dvorak
rakdver@atrey.karlin.mff.cuni.cz
Tue Jun 13 20:48:00 GMT 2006
Hello,
with the fix for PR 27639, in some cases where we previously believed
folding a cast of the chrec is possible, we now know that this is not
the case. One important case is this:
void foo(unsigned *p, unsigned n)
{
unsigned i;
for (i = 0; something (); i++)
p[i] = bar ();
}
on x86_64, without the fix, we assumed that after casting of i (32-bit) to
the type of the pointer (64-bit), its evolution is [0,+,1] (using a
mistaken argumentation regarding undefined overflow of address
arithmetics). This is not the case, since i may overflow and the
evolution is (pointer) [0,+,1].
In general, there is nothing we can do about this; in some special cases,
though, e.g.
void foo(unsigned *p, unsigned n)
{
unsigned i;
for (i = 0; i < n; i++)
p[i] = bar ();
}
we know that i does not overflow. However, with the fixes for PRs 27144
and earlier PR 23434, we are not able to prove this, since we record
only a very rough estimate on number of iterations.
This patch fixes this by making us a bit more clever when recording
this upper bound -- in this particular case, the bound is n - 1, and
since we know that n > 0 when the loop is entered (because its header
is copied), we know that the number of iterations is at most
MAX_UNSIGNED_INT - 1, which is sufficient to derive that the index of
p does not overflow.
Bootstrapped & regtested on i686, x86_64 and ppc. The spec benchmark
result from x86_64 are below (they turn out to be more or less neutral).
Zdenek
164.gzip 1400 166 842 * 1400 169 831 *
175.vpr 1400 176 795 * 1400 174 805 *
176.gcc 1100 102 1083 * 1100 102 1077 *
181.mcf 1800 429 420 * 1800 426 422 *
186.crafty X X
197.parser 1800 279 646 * 1800 279 646 *
252.eon 1300 91.9 1415 * 1300 91.9 1415 *
253.perlbmk X X
254.gap 1100 129 850 * 1100 130 849 *
255.vortex X X
256.bzip2 1500 177 846 * 1500 177 847 *
300.twolf 3000 324 925 * 3000 325 924 *
Est. SPECint_base2000 829
Est. SPECint2000 829
168.wupwise 1600 164 974 * 1600 165 972 *
171.swim 3100 489 634 * 3100 488 635 *
172.mgrid 1800 264 683 * 1800 264 683 *
173.applu 2100 337 623 * 2100 336 625 *
177.mesa 1400 117 1196 * 1400 116 1206 *
178.galgel X X
179.art 2600 352 739 * 2600 348 748 *
183.equake 1300 163 798 * 1300 163 798 *
187.facerec 1900 259 733 * 1900 259 732 *
188.ammp 2200 240 917 * 2200 240 916 *
189.lucas 2000 238 840 * 2000 238 839 *
191.fma3d 2100 296 710 * 2100 295 711 *
200.sixtrack 1100 254 432 * 1100 254 432 *
301.apsi 2600 311 837 * 2600 311 836 *
Est. SPECfp_base2000 757
Est. SPECfp2000 758
* tree-ssa-loop-niter.c (implies_nonnegative_p): New function.
(derive_constant_upper_bound): Derive more precise upper bound in
common cases. Return type changed to double_int.
(record_estimate): Reflect the changed return type of
derive_constant_upper_bound.
* double-int.c (double_int_zext, double_int_sext): Fix.
* gcc.dg/tree-ssa/loop-18.c: New test.
Index: tree-ssa-loop-niter.c
===================================================================
*** tree-ssa-loop-niter.c (revision 114506)
--- tree-ssa-loop-niter.c (working copy)
*************** find_loop_niter_by_eval (struct loop *lo
*** 1470,1491 ****
*/
! /* Returns a constant upper bound on the value of expression VAL. The
! condition ADDITIONAL must be satisfied (for example, if VAL is
"(unsigned) n" and ADDITIONAL is "n > 0", then we can derive that
! VAL is at most (unsigned) MAX_INT).
! TODO -- actually do something nontrivial here. */
!
! static tree
! derive_constant_upper_bound (tree val, tree additional ATTRIBUTE_UNUSED)
{
tree type = TREE_TYPE (val);
! tree unsigned_type = unsigned_type_for (type);
! if (TREE_CODE (val) != INTEGER_CST)
! val = upper_bound_in_type (type, type);
! return fold_convert (unsigned_type, val);
}
/* Records that AT_STMT is executed at most BOUND times in LOOP. The
--- 1470,1606 ----
*/
! /* Returns true if we can prove that COND ==> VAL >= 0. */
!
! static bool
! implies_nonnegative_p (tree cond, tree val)
! {
! tree type = TREE_TYPE (val);
! tree compare;
!
! if (tree_expr_nonnegative_p (val))
! return true;
!
! if (nonzero_p (cond))
! return false;
!
! compare = fold_build2 (GE_EXPR,
! boolean_type_node, val, build_int_cst (type, 0));
! compare = tree_simplify_using_condition_1 (cond, compare);
!
! return nonzero_p (compare);
! }
!
! /* Returns a constant upper bound on the value of expression VAL. VAL
! is considered to be unsigned. If its type is signed, its value must
! be nonnegative.
!
! The condition ADDITIONAL must be satisfied (for example, if VAL is
"(unsigned) n" and ADDITIONAL is "n > 0", then we can derive that
! VAL is at most (unsigned) MAX_INT). */
! static double_int
! derive_constant_upper_bound (tree val, tree additional)
{
tree type = TREE_TYPE (val);
! tree op0, op1, subtype, maxt;
! double_int bnd, max, mmax, cst;
!
! if (INTEGRAL_TYPE_P (type))
! maxt = TYPE_MAX_VALUE (type);
! else
! maxt = upper_bound_in_type (type, type);
! max = tree_to_double_int (maxt);
!
! switch (TREE_CODE (val))
! {
! case INTEGER_CST:
! return tree_to_double_int (val);
!
! case NOP_EXPR:
! case CONVERT_EXPR:
! op0 = TREE_OPERAND (val, 0);
! subtype = TREE_TYPE (op0);
! if (!TYPE_UNSIGNED (subtype)
! /* If TYPE is also signed, the fact that VAL is nonnegative implies
! that OP0 is nonnegative. */
! && TYPE_UNSIGNED (type)
! && !implies_nonnegative_p (additional, op0))
! {
! /* If we cannot prove that the casted expression is nonnegative,
! we cannot establish more useful upper bound than the precision
! of the type gives us. */
! return max;
! }
!
! /* We now know that op0 is an nonnegative value. Try deriving an upper
! bound for it. */
! bnd = derive_constant_upper_bound (op0, additional);
!
! /* If the bound does not fit in TYPE, max. value of TYPE could be
! attained. */
! if (double_int_ucmp (max, bnd) < 0)
! return max;
!
! return bnd;
!
! case PLUS_EXPR:
! case MINUS_EXPR:
! op0 = TREE_OPERAND (val, 0);
! op1 = TREE_OPERAND (val, 1);
!
! if (TREE_CODE (op1) != INTEGER_CST
! || !implies_nonnegative_p (additional, op0))
! return max;
!
! /* Canonicalize to OP0 - CST. */
! cst = tree_to_double_int (op1);
! if (TREE_CODE (val) == PLUS_EXPR)
! cst = double_int_neg (cst);
!
! bnd = derive_constant_upper_bound (op0, additional);
!
! if (double_int_negative_p (cst))
! {
! cst = double_int_neg (cst);
! /* Avoid CST == 0x80000... */
! if (double_int_negative_p (cst))
! return max;;
!
! /* Case OP0 + CST. We need to check that
! BND <= MAX (type) - CST. */
!
! mmax = double_int_add (max, double_int_neg (cst));
! if (double_int_ucmp (bnd, mmax) > 0)
! return max;
!
! return double_int_add (bnd, cst);
! }
! else
! {
! if (double_int_ucmp (bnd, cst) < 0)
! return max;
!
! bnd = double_int_add (bnd, double_int_neg (cst));
! }
!
! return bnd;
!
! case FLOOR_DIV_EXPR:
! case EXACT_DIV_EXPR:
! op0 = TREE_OPERAND (val, 0);
! op1 = TREE_OPERAND (val, 1);
! if (TREE_CODE (op1) != INTEGER_CST
! || tree_int_cst_sign_bit (op1))
! return max;
!
! bnd = derive_constant_upper_bound (op0, additional);
! return double_int_udiv (bnd, tree_to_double_int (op1), FLOOR_DIV_EXPR);
!
! default:
! return max;
! }
}
/* Records that AT_STMT is executed at most BOUND times in LOOP. The
*************** void
*** 1495,1501 ****
record_estimate (struct loop *loop, tree bound, tree additional, tree at_stmt)
{
struct nb_iter_bound *elt = xmalloc (sizeof (struct nb_iter_bound));
! tree c_bound = derive_constant_upper_bound (bound, additional);
if (dump_file && (dump_flags & TDF_DETAILS))
{
--- 1610,1618 ----
record_estimate (struct loop *loop, tree bound, tree additional, tree at_stmt)
{
struct nb_iter_bound *elt = xmalloc (sizeof (struct nb_iter_bound));
! double_int i_bound = derive_constant_upper_bound (bound, additional);
! tree c_bound = double_int_to_tree (unsigned_type_for (TREE_TYPE (bound)),
! i_bound);
if (dump_file && (dump_flags & TDF_DETAILS))
{
Index: testsuite/gcc.dg/tree-ssa/loop-18.c
===================================================================
*** testsuite/gcc.dg/tree-ssa/loop-18.c (revision 0)
--- testsuite/gcc.dg/tree-ssa/loop-18.c (revision 0)
***************
*** 0 ****
--- 1,24 ----
+ /* A test for # of iterations estimation. We know that I does not overflow,
+ thus we can perform strength reduction (even though the 32-bit variable
+ i is first extended to 64-bit type). */
+
+ /* { dg-options "-O2 -fdump-tree-optimized" } */
+ /* { dg-do compile { target x86_64-*-* } } */
+
+ unsigned bar(void);
+
+ void foo(unsigned *p, unsigned n)
+ {
+ unsigned i;
+
+ for (i = 0; i < n; i++)
+ p[i] = bar ();
+ }
+
+ /* Check that the memory reference was replaced with MEM, and that there is no
+ multiplication. */
+
+ /* { dg-final { scan-tree-dump-times "MEM" 1 "optimized" } } */
+ /* { dg-final { scan-tree-dump-times "\[^\\n\\r\]*= \\* " 0 "optimized" } } */
+
+ /* { dg-final { cleanup-tree-dump "optimized" } } */
Index: double-int.c
===================================================================
*** double-int.c (revision 114506)
--- double-int.c (working copy)
*************** double_int_zext (double_int cst, unsigne
*** 72,79 ****
double_int mask = double_int_mask (prec);
double_int r;
! r.low = cst.low & ~mask.low;
! r.high = cst.high & ~mask.high;
return r;
}
--- 72,79 ----
double_int mask = double_int_mask (prec);
double_int r;
! r.low = cst.low & mask.low;
! r.high = cst.high & mask.high;
return r;
}
*************** double_int_sext (double_int cst, unsigne
*** 96,108 ****
}
if (((snum >> (prec - 1)) & 1) == 1)
{
! r.low = cst.low | mask.low;
! r.high = cst.high | mask.high;
}
else
{
! r.low = cst.low & ~mask.low;
! r.high = cst.high & ~mask.high;
}
return r;
--- 96,108 ----
}
if (((snum >> (prec - 1)) & 1) == 1)
{
! r.low = cst.low | ~mask.low;
! r.high = cst.high | ~mask.high;
}
else
{
! r.low = cst.low & mask.low;
! r.high = cst.high & mask.high;
}
return r;
More information about the Gcc-patches
mailing list