[patch] PR 32044

Zdenek Dvorak rakdver@kam.mff.cuni.cz
Fri Dec 12 20:41:00 GMT 2008


Hi,

we do not want to transform

while (n >= 45) n -= 45;
return n;

to n % 45, to avoid introducing potentially more expensive division.
For more discussion, see PR32044 in the bugzilla.  The patch also fixes
a similar problem in ivopts; with final value replacement disabled, we
would be turning

i = 0;
while (n >= 45) {n -= 45; i++;}
return i;

into rather curious

k = n/45;
for (i = 0; i < k; i++)
  continue;
return i;

Bootstrapped & regtested on i686, commited.

Zdenek

	PR tree-optimization/32044
	* tree-scalar-evolution.h (expression_expensive_p): Declare.
	* tree-scalar-evolution.c (expression_expensive_p): New function.
	(scev_const_prop): Avoid introducing expensive expressions.
	* tree-ssa-loop-ivopts.c (may_eliminate_iv): Ditto.

	* gcc.dg/pr34027-1.c: Change outcome.
	* gcc.dg/tree-ssa/pr32044.c: New test.

Index: tree-scalar-evolution.c
===================================================================
*** tree-scalar-evolution.c	(revision 142718)
--- tree-scalar-evolution.c	(working copy)
*************** scev_finalize (void)
*** 2805,2810 ****
--- 2805,2854 ----
    scalar_evolution_info = NULL;
  }
  
+ /* Returns true if the expression EXPR is considered to be too expensive
+    for scev_const_prop.  */
+ 
+ bool
+ expression_expensive_p (tree expr)
+ {
+   enum tree_code code;
+ 
+   if (is_gimple_val (expr))
+     return false;
+ 
+   code = TREE_CODE (expr);
+   if (code == TRUNC_DIV_EXPR
+       || code == CEIL_DIV_EXPR
+       || code == FLOOR_DIV_EXPR
+       || code == ROUND_DIV_EXPR
+       || code == TRUNC_MOD_EXPR
+       || code == CEIL_MOD_EXPR
+       || code == FLOOR_MOD_EXPR
+       || code == ROUND_MOD_EXPR
+       || code == EXACT_DIV_EXPR)
+     {
+       /* Division by power of two is usually cheap, so we allow it.
+ 	 Forbid anything else.  */
+       if (!integer_pow2p (TREE_OPERAND (expr, 1)))
+ 	return true;
+     }
+ 
+   switch (TREE_CODE_CLASS (code))
+     {
+     case tcc_binary:
+     case tcc_comparison:
+       if (expression_expensive_p (TREE_OPERAND (expr, 1)))
+ 	return true;
+ 
+       /* Fallthru.  */
+     case tcc_unary:
+       return expression_expensive_p (TREE_OPERAND (expr, 0));
+ 
+     default:
+       return true;
+     }
+ }
+ 
  /* Replace ssa names for that scev can prove they are constant by the
     appropriate constants.  Also perform final value replacement in loops,
     in case the replacement expressions are cheap.
*************** scev_const_prop (void)
*** 2896,2907 ****
  	continue;
  
        niter = number_of_latch_executions (loop);
-       /* We used to check here whether the computation of NITER is expensive,
- 	 and avoided final value elimination if that is the case.  The problem
- 	 is that it is hard to evaluate whether the expression is too
- 	 expensive, as we do not know what optimization opportunities the
- 	 elimination of the final value may reveal.  Therefore, we now
- 	 eliminate the final values of induction variables unconditionally.  */
        if (niter == chrec_dont_know)
  	continue;
  
--- 2940,2945 ----
*************** scev_const_prop (void)
*** 2938,2944 ****
  	      /* Moving the computation from the loop may prolong life range
  		 of some ssa names, which may cause problems if they appear
  		 on abnormal edges.  */
! 	      || contains_abnormal_ssa_name_p (def))
  	    {
  	      gsi_next (&psi);
  	      continue;
--- 2976,2990 ----
  	      /* Moving the computation from the loop may prolong life range
  		 of some ssa names, which may cause problems if they appear
  		 on abnormal edges.  */
! 	      || contains_abnormal_ssa_name_p (def)
! 	      /* Do not emit expensive expressions.  The rationale is that
! 		 when someone writes a code like
! 
! 		 while (n > 45) n -= 45;
! 
! 		 he probably knows that n is not large, and does not want it
! 		 to be turned into n %= 45.  */
! 	      || expression_expensive_p (def))
  	    {
  	      gsi_next (&psi);
  	      continue;
Index: tree-scalar-evolution.h
===================================================================
*** tree-scalar-evolution.h	(revision 142718)
--- tree-scalar-evolution.h	(working copy)
*************** extern void gather_stats_on_scev_databas
*** 35,40 ****
--- 35,41 ----
  extern void scev_analysis (void);
  unsigned int scev_const_prop (void);
  
+ bool expression_expensive_p (tree);
  extern bool simple_iv (struct loop *, gimple, tree, affine_iv *, bool);
  
  /* Returns the basic block preceding LOOP or ENTRY_BLOCK_PTR when the
Index: testsuite/gcc.dg/pr34027-1.c
===================================================================
*** testsuite/gcc.dg/pr34027-1.c	(revision 142718)
--- testsuite/gcc.dg/pr34027-1.c	(working copy)
*************** unsigned long foobar(unsigned long ns)
*** 8,12 ****
    return ns;
  }
  
! /* { dg-final { scan-tree-dump "ns % 10000" "optimized" } } */
  /* { dg-final { cleanup-tree-dump "optimized" } } */
--- 8,16 ----
    return ns;
  }
  
! /* This test was originally introduced to test that we transform
!    to ns % 10000.  See the discussion of PR 32044 why we do not do
!    that anymore.  */
! /* { dg-final { scan-tree-dump-times "%" 0 "optimized" } } */
! /* { dg-final { scan-tree-dump-times "/" 0 "optimized" } } */
  /* { dg-final { cleanup-tree-dump "optimized" } } */
Index: testsuite/gcc.dg/tree-ssa/pr32044.c
===================================================================
*** testsuite/gcc.dg/tree-ssa/pr32044.c	(revision 0)
--- testsuite/gcc.dg/tree-ssa/pr32044.c	(revision 0)
***************
*** 0 ****
--- 1,55 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-tree-empty -fdump-tree-final_cleanup" } */
+ 
+ int foo (int n)
+ {
+   while (n >= 45)
+     n -= 45;
+ 
+   return n;
+ }
+ 
+ int bar (int n)
+ {
+   while (n >= 64)
+     n -= 64;
+ 
+   return n;
+ }
+ 
+ int bla (int n)
+ {
+   int i = 0;
+ 
+   while (n >= 45)
+     {
+       i++;
+       n -= 45;
+     }
+ 
+   return i;
+ }
+ 
+ int baz (int n)
+ {
+   int i = 0;
+ 
+   while (n >= 64)
+     {
+       i++;
+       n -= 64;
+     }
+ 
+   return i;
+ }
+ 
+ /* The loops computing division/modulo by 64 should be eliminated.  */
+ /* { dg-final { scan-tree-dump-times "Removing empty loop" 2 "empty" } } */
+ 
+ /* There should be no division/modulo in the final dump (division and modulo
+    by 64 are done using bit operations).  */
+ /* { dg-final { scan-tree-dump-times "/" 0 "final_cleanup" } } */
+ /* { dg-final { scan-tree-dump-times "%" 0 "final_cleanup" } } */
+ 
+ /* { dg-final { cleanup-tree-dump "empty" } } */
+ /* { dg-final { cleanup-tree-dump "final_cleanup" } } */
Index: tree-ssa-loop-ivopts.c
===================================================================
*** tree-ssa-loop-ivopts.c	(revision 142718)
--- tree-ssa-loop-ivopts.c	(working copy)
*************** may_eliminate_iv (struct ivopts_data *da
*** 3844,3850 ****
--- 3844,3855 ----
      return false;
  
    cand_value_at (loop, cand, use->stmt, nit, &bnd);
+ 
    *bound = aff_combination_to_tree (&bnd);
+   /* It is unlikely that computing the number of iterations using division
+      would be more profitable than keeping the original induction variable.  */
+   if (expression_expensive_p (*bound))
+     return false;
    return true;
  }
  



More information about the Gcc-patches mailing list