This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] Fix some pessimizations due to fixes for PRs 27144 and 27639


Hello,

with the fix for PR 27639, in some cases where we previously believed
folding a cast of the chrec is possible, we now know that this is not
the case.  One important case is this:

void foo(unsigned *p, unsigned n)
{
  unsigned i;

  for (i = 0; something (); i++)
    p[i] = bar ();
}

on x86_64, without the fix, we assumed that after casting of i (32-bit) to
the type of the pointer (64-bit), its evolution is [0,+,1] (using a
mistaken argumentation regarding undefined overflow of address
arithmetics).  This is not the case, since i may overflow and the
evolution is (pointer) [0,+,1].

In general, there is nothing we can do about this; in some special cases,
though, e.g.

void foo(unsigned *p, unsigned n)
{
  unsigned i;

  for (i = 0; i < n; i++)
    p[i] = bar ();
}

we know that i does not overflow.  However, with the fixes for PRs 27144
and earlier PR 23434, we are not able to prove this, since we record
only a very rough estimate on number of iterations.

This patch fixes this by making us a bit more clever when recording
this upper bound -- in this particular case, the bound is n - 1, and
since we know that n > 0 when the loop is entered (because its header
is copied), we know that the number of iterations is at most
MAX_UNSIGNED_INT - 1, which is sufficient to derive that the index of
p does not overflow.

Bootstrapped & regtested on i686, x86_64 and ppc.  The spec benchmark
result from x86_64 are below (they turn out to be more or less neutral).

Zdenek

   164.gzip          1400  166         842   *     1400  169         831   *
   175.vpr           1400  176         795   *     1400  174         805   *
   176.gcc           1100  102        1083   *     1100  102        1077   *
   181.mcf           1800  429         420   *     1800  426         422   *
   186.crafty                                X                             X
   197.parser        1800  279         646   *     1800  279         646   *
   252.eon           1300   91.9      1415   *     1300   91.9      1415   *
   253.perlbmk                               X                             X
   254.gap           1100  129         850   *     1100  130         849   *
   255.vortex                                X                             X
   256.bzip2         1500  177         846   *     1500  177         847   *
   300.twolf         3000  324         925   *     3000  325         924   *
   Est. SPECint_base2000               829   
   Est. SPECint2000                                                  829   

   168.wupwise       1600   164       974    *     1600   165       972    *
   171.swim          3100   489       634    *     3100   488       635    *
   172.mgrid         1800   264       683    *     1800   264       683    *
   173.applu         2100   337       623    *     2100   336       625    *
   177.mesa          1400   117      1196    *     1400   116      1206    *
   178.galgel                                X                             X
   179.art           2600   352       739    *     2600   348       748    *
   183.equake        1300   163       798    *     1300   163       798    *
   187.facerec       1900   259       733    *     1900   259       732    *
   188.ammp          2200   240       917    *     2200   240       916    *
   189.lucas         2000   238       840    *     2000   238       839    *
   191.fma3d         2100   296       710    *     2100   295       711    *
   200.sixtrack      1100   254       432    *     1100   254       432    *
   301.apsi          2600   311       837    *     2600   311       836    *
   Est. SPECfp_base2000               757    
   Est. SPECfp2000                                                  758    

	* tree-ssa-loop-niter.c (implies_nonnegative_p): New function.
	(derive_constant_upper_bound): Derive more precise upper bound in
	common cases.  Return type changed to double_int.
	(record_estimate): Reflect the changed return type of
	derive_constant_upper_bound.
	* double-int.c (double_int_zext, double_int_sext): Fix.

	* gcc.dg/tree-ssa/loop-18.c: New test.

Index: tree-ssa-loop-niter.c
===================================================================
*** tree-ssa-loop-niter.c	(revision 114506)
--- tree-ssa-loop-niter.c	(working copy)
*************** find_loop_niter_by_eval (struct loop *lo
*** 1470,1491 ****
  
  */
  
! /* Returns a constant upper bound on the value of expression VAL.  The
!    condition ADDITIONAL must be satisfied (for example, if VAL is
     "(unsigned) n" and ADDITIONAL is "n > 0", then we can derive that
!    VAL is at most (unsigned) MAX_INT).
   
!    TODO -- actually do something nontrivial here.  */
! 
! static tree
! derive_constant_upper_bound (tree val, tree additional ATTRIBUTE_UNUSED)
  {
    tree type = TREE_TYPE (val);
!   tree unsigned_type = unsigned_type_for (type);
  
!   if (TREE_CODE (val) != INTEGER_CST)
!     val = upper_bound_in_type (type, type);
!   return fold_convert (unsigned_type, val);
  }
  
  /* Records that AT_STMT is executed at most BOUND times in LOOP.  The
--- 1470,1606 ----
  
  */
  
! /* Returns true if we can prove that COND ==> VAL >= 0.  */
! 
! static bool
! implies_nonnegative_p (tree cond, tree val)
! {
!   tree type = TREE_TYPE (val);
!   tree compare;
! 
!   if (tree_expr_nonnegative_p (val))
!     return true;
! 
!   if (nonzero_p (cond))
!     return false;
! 
!   compare = fold_build2 (GE_EXPR,
! 			 boolean_type_node, val, build_int_cst (type, 0));
!   compare = tree_simplify_using_condition_1 (cond, compare);
! 
!   return nonzero_p (compare);
! }
! 
! /* Returns a constant upper bound on the value of expression VAL.  VAL
!    is considered to be unsigned.  If its type is signed, its value must
!    be nonnegative.
!    
!    The condition ADDITIONAL must be satisfied (for example, if VAL is
     "(unsigned) n" and ADDITIONAL is "n > 0", then we can derive that
!    VAL is at most (unsigned) MAX_INT).  */
   
! static double_int
! derive_constant_upper_bound (tree val, tree additional)
  {
    tree type = TREE_TYPE (val);
!   tree op0, op1, subtype, maxt;
!   double_int bnd, max, mmax, cst;
! 
!   if (INTEGRAL_TYPE_P (type))
!     maxt = TYPE_MAX_VALUE (type);
!   else
!     maxt = upper_bound_in_type (type, type);
  
!   max = tree_to_double_int (maxt);
! 
!   switch (TREE_CODE (val))
!     {
!     case INTEGER_CST:
!       return tree_to_double_int (val);
! 
!     case NOP_EXPR:
!     case CONVERT_EXPR:
!       op0 = TREE_OPERAND (val, 0);
!       subtype = TREE_TYPE (op0);
!       if (!TYPE_UNSIGNED (subtype)
! 	  /* If TYPE is also signed, the fact that VAL is nonnegative implies
! 	     that OP0 is nonnegative.  */
! 	  && TYPE_UNSIGNED (type)
! 	  && !implies_nonnegative_p (additional, op0))
! 	{
! 	  /* If we cannot prove that the casted expression is nonnegative,
! 	     we cannot establish more useful upper bound than the precision
! 	     of the type gives us.  */
! 	  return max;
! 	}
! 
!       /* We now know that op0 is an nonnegative value.  Try deriving an upper
! 	 bound for it.  */
!       bnd = derive_constant_upper_bound (op0, additional);
! 
!       /* If the bound does not fit in TYPE, max. value of TYPE could be
! 	 attained.  */
!       if (double_int_ucmp (max, bnd) < 0)
! 	return max;
! 
!       return bnd;
! 
!     case PLUS_EXPR:
!     case MINUS_EXPR:
!       op0 = TREE_OPERAND (val, 0);
!       op1 = TREE_OPERAND (val, 1);
! 
!       if (TREE_CODE (op1) != INTEGER_CST
! 	  || !implies_nonnegative_p (additional, op0))
! 	return max;
! 
!       /* Canonicalize to OP0 - CST.  */
!       cst = tree_to_double_int (op1);
!       if (TREE_CODE (val) == PLUS_EXPR)
! 	cst = double_int_neg (cst);
! 
!       bnd = derive_constant_upper_bound (op0, additional);
! 
!       if (double_int_negative_p (cst))
! 	{
! 	  cst = double_int_neg (cst);
! 	  /* Avoid CST == 0x80000...  */
! 	  if (double_int_negative_p (cst))
! 	    return max;;
! 
! 	  /* Case OP0 + CST.  We need to check that
! 	     BND <= MAX (type) - CST.  */
! 
! 	  mmax = double_int_add (max, double_int_neg (cst));
! 	  if (double_int_ucmp (bnd, mmax) > 0)
! 	    return max;
! 
! 	  return double_int_add (bnd, cst);
! 	}
!       else
! 	{
! 	  if (double_int_ucmp (bnd, cst) < 0)
! 	    return max;
! 
! 	  bnd = double_int_add (bnd, double_int_neg (cst));
! 	}
! 
!       return bnd;
! 
!     case FLOOR_DIV_EXPR:
!     case EXACT_DIV_EXPR:
!       op0 = TREE_OPERAND (val, 0);
!       op1 = TREE_OPERAND (val, 1);
!       if (TREE_CODE (op1) != INTEGER_CST
! 	  || tree_int_cst_sign_bit (op1))
! 	return max;
! 
!       bnd = derive_constant_upper_bound (op0, additional);
!       return double_int_udiv (bnd, tree_to_double_int (op1), FLOOR_DIV_EXPR);
! 
!     default: 
!       return max;
!     }
  }
  
  /* Records that AT_STMT is executed at most BOUND times in LOOP.  The
*************** void
*** 1495,1501 ****
  record_estimate (struct loop *loop, tree bound, tree additional, tree at_stmt)
  {
    struct nb_iter_bound *elt = xmalloc (sizeof (struct nb_iter_bound));
!   tree c_bound = derive_constant_upper_bound (bound, additional);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
--- 1610,1618 ----
  record_estimate (struct loop *loop, tree bound, tree additional, tree at_stmt)
  {
    struct nb_iter_bound *elt = xmalloc (sizeof (struct nb_iter_bound));
!   double_int i_bound = derive_constant_upper_bound (bound, additional);
!   tree c_bound = double_int_to_tree (unsigned_type_for (TREE_TYPE (bound)),
! 				     i_bound);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
Index: testsuite/gcc.dg/tree-ssa/loop-18.c
===================================================================
*** testsuite/gcc.dg/tree-ssa/loop-18.c	(revision 0)
--- testsuite/gcc.dg/tree-ssa/loop-18.c	(revision 0)
***************
*** 0 ****
--- 1,24 ----
+ /* A test for # of iterations estimation.  We know that I does not overflow,
+    thus we can perform strength reduction (even though the 32-bit variable
+    i is first extended to 64-bit type).  */
+ 
+ /* { dg-options "-O2 -fdump-tree-optimized" } */
+ /* { dg-do compile { target x86_64-*-* } } */
+ 
+ unsigned bar(void);
+ 
+ void foo(unsigned *p, unsigned n)
+ {
+   unsigned i;
+ 
+   for (i = 0; i < n; i++)
+     p[i] = bar ();
+ }
+ 
+ /* Check that the memory reference was replaced with MEM, and that there is no
+    multiplication.  */
+ 
+ /* { dg-final { scan-tree-dump-times "MEM" 1 "optimized" } } */
+ /* { dg-final { scan-tree-dump-times "\[^\\n\\r\]*= \\* " 0 "optimized" } } */
+ 
+ /* { dg-final { cleanup-tree-dump "optimized" } } */
Index: double-int.c
===================================================================
*** double-int.c	(revision 114506)
--- double-int.c	(working copy)
*************** double_int_zext (double_int cst, unsigne
*** 72,79 ****
    double_int mask = double_int_mask (prec);
    double_int r;
  
!   r.low = cst.low & ~mask.low;
!   r.high = cst.high & ~mask.high;
  
    return r;
  }
--- 72,79 ----
    double_int mask = double_int_mask (prec);
    double_int r;
  
!   r.low = cst.low & mask.low;
!   r.high = cst.high & mask.high;
  
    return r;
  }
*************** double_int_sext (double_int cst, unsigne
*** 96,108 ****
      }
    if (((snum >> (prec - 1)) & 1) == 1)
      {
!       r.low = cst.low | mask.low;
!       r.high = cst.high | mask.high;
      }
    else
      {
!       r.low = cst.low & ~mask.low;
!       r.high = cst.high & ~mask.high;
      } 
  
    return r;
--- 96,108 ----
      }
    if (((snum >> (prec - 1)) & 1) == 1)
      {
!       r.low = cst.low | ~mask.low;
!       r.high = cst.high | ~mask.high;
      }
    else
      {
!       r.low = cst.low & mask.low;
!       r.high = cst.high & mask.high;
      } 
  
    return r;


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]