This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: IVOPT improvement patch


On Fri, May 28, 2010 at 1:50 AM, Zdenek Dvorak <rakdver@kam.mff.cuni.cz> wrote:
> Hi,
>
>> > I don't think nb_iterations_upper_bound is necessary. ?You already have the
>> > bound in elt->bound (the bound that you record is increased by one, since the
>> > statements before the exit can be executed one more time than the loop latch,
>> > but the code in may_eliminate_iv expects the number that is stored in
>> > elt->bound).
>>
>> may_eliminate_iv actually checks loop->nb_iterations_upper_bound which
>> is the merge of 'elt->bound + 1' -- that is why I introduced the new
>> member -- but I think this is more conservative than needed -- so
>> using elt->bound should be good.
>
> let's not guess about this -- we should have a testcase for the boundary values
> (e.g., one iteration before overflow, overflow, one iteration after overflow).

elt->bound computes the number of iterations where the exit condition
evaluates to true which means if exit test is bottom test, elt->bound
is one less than the number of iterations.  In fact the overflow check
in none of the revisions (including the original) is correct.  What
matters is that 'cand_value_at' won't overflow using cand -- this
means the max_niter should match 'niter''s semantics, and the overflow
check should match the computation in cand_value_at. The latest
revision attached cleans this up (hopefully). (BTW, it is not easy to
create off by one test cases)


>
> Also, note that for loops with one exit, your change actually makes the test weaker.
> For instance, before your change, we could deduce that
>
> int a[100];
> for (i = 0; i < n; i++)
> ?a[i] = i;
>
> iterates at most 100 times.

Fixed and added two test cases.

(Note -- one more bug in the original code was found and fixed -- the
period computation is wrong when step is not power of 2).

Thanks,

David


>
> Zdenek
>
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopt_1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopt_1.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_1.c	(revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */
+#define TYPE char*
+
+/* Testing that only one induction variable is selected after IVOPT on
+   the given target instead of 3.  */
+void foo (int i_width, TYPE dst, TYPE src1, TYPE src2)
+{
+      int x;
+       for( x = 0; x < i_width; x++ )
+       {
+           dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
+       }
+}
+
+
+/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopt_2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopt_2.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_2.c	(revision 0)
@@ -0,0 +1,17 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */
+
+#define TYPE char*
+
+/* Testing on the given target, only one iv candidate instead of 3.  */
+void foo (int i_width, TYPE dst, TYPE src1, TYPE src2)
+{
+      int x;
+       for( x = 0; x < i_width; x++ )
+       {
+           *dst++ = ( *src1++ + *src2++ + 1 ) >> 1;
+       }
+}
+
+/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopt_3.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopt_3.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_3.c	(revision 0)
@@ -0,0 +1,20 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */
+
+#define TYPE char*
+
+/* Make sure only 1 iv candidate is selected after IVOPT.  */
+void foo (int i_width, char* dst, char* src1, char* src2)
+{
+      int x;
+       for( x = 0; x < i_width; x++ )
+       {
+           *((TYPE)dst) = ( *((TYPE)src1) + *((TYPE)src2) + 1 ) >> 1;
+	   dst+=sizeof(TYPE);
+	   src1+=sizeof(TYPE);
+	   src2+=sizeof(TYPE);
+       }
+} 
+
+/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_1.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_1.c	(revision 0)
@@ -0,0 +1,25 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
+
+#ifndef TYPE
+#define TYPE char*
+#endif
+
+int a[400];
+
+/* Testing inferred loop iteration from array -- exit test can be replaced.  */
+void foo (int i_width, TYPE dst, TYPE src1, TYPE src2)
+{
+      TYPE dstn= dst + i_width;
+      TYPE dst0 = dst;
+      unsigned long long i = 0;
+       for( ; dst <= dstn; )
+       {
+           dst0[i] = ( src1[i] + src2[i] + 1 +a[i]) >> 1;
+           dst++;
+	   i += 7;
+       }
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopt_4.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopt_4.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_4.c	(revision 0)
@@ -0,0 +1,19 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */
+
+#ifndef TYPE
+#define TYPE char*
+#endif
+
+/* Make sure only 1 iv candidate is selected.  */
+void foo (int i_width, TYPE dst, TYPE src1, TYPE src2)
+{
+      TYPE dstn= dst + i_width;
+       for( ; dst < dstn; )
+       {
+           *dst++ = ( *src1++ + *src2++ + 1 ) >> 1;
+       }
+}
+
+/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_2.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_2.c	(revision 0)
@@ -0,0 +1,25 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
+
+#ifndef TYPE
+#define TYPE char*
+#endif
+
+extern int a[];
+
+/* Can not infer loop iteration from array -- exit test can not be replaced.  */
+void foo (int i_width, TYPE dst, TYPE src1, TYPE src2)
+{
+      TYPE dstn= dst + i_width;
+      TYPE dst0 = dst;
+      unsigned long long i = 0;
+       for( ; dst <= dstn; )
+       {
+           dst0[i] = ( src1[i] + src2[i] + 1 +a[i]) >> 1;
+           dst++;
+	   i += 7;
+       }
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing" 0 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c	(revision 0)
@@ -0,0 +1,24 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
+
+/* The test 'if (p2 > p_limit2)' can be replaced, so iv p2 can be
+ * eliminated.  */
+long foo(long* p, long* p2, int N1, int N2)
+{
+  int i = 0;
+  long* p_limit = p + N1;
+  long* p_limit2 = p2 + N2;
+  long s = 0;
+  while (p  <= p_limit)
+    {
+      p++;
+      p2++;
+      if (p2 > p_limit2)
+        break;
+      s += (*p);
+    }
+  return s;
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c	(revision 0)
@@ -0,0 +1,25 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
+
+/* Exit tests 'i < N1' and 'p2 > p_limit2' can be replaced, so
+ * two ivs i and p2 can be eliminate.  */
+long foo(long* p, long* p2, int N1, int N2)
+{
+  int i = 0;
+  long* p_limit2 = p2 + N2;
+  long s = 0;
+  while (i < N1)
+    {
+       p++;
+       p2++;
+       i++;
+       if (p2 > p_limit2)
+         break;
+       s += (*p);
+    }
+
+  return s;
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing" 2 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_3.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_3.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_3.c	(revision 0)
@@ -0,0 +1,22 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
+
+/* iv p2 can be eliminated.  */
+long foo(long* p, long* p2, int N1, int N2)
+{
+  unsigned long  i = 0;
+  long* p_limit2 = p2 + N2;
+  long s = 0;
+  while (i < N1)
+    {
+      p2++;
+      i++;
+      if (p2 > p_limit2)
+        break;
+      s += p[i];
+    }
+  return s;
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c	(revision 0)
@@ -0,0 +1,25 @@
+
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
+
+/* iv i's step 16 so its period is smaller than the max iterations
+ * i.e. replacing if (p2 > p_limit2) with testing of i may result in
+ * overflow.  */
+long foo(long* p, long* p2, int N1, int N2)
+{
+  unsigned long  i = 0;
+  long* p_limit2 = p2 + N2;
+  long s = 0;
+  while (i < N1)
+    {
+      p2++;
+      i += 16;
+      if (p2 > p_limit2)
+        break;
+     s += p[i];
+  }
+  return s;
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing" 0 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/tree-ssa-loop-ivopts.c
===================================================================
--- gcc/tree-ssa-loop-ivopts.c	(revision 159362)
+++ gcc/tree-ssa-loop-ivopts.c	(working copy)
@@ -91,14 +91,26 @@ along with GCC; see the file COPYING3.  
 #include "langhooks.h"
 #include "tree-affine.h"
 #include "target.h"
+#include "tree-inline.h"
 
 /* The infinite cost.  */
 #define INFTY 10000000
 
-/* The expected number of loop iterations.  TODO -- use profiling instead of
-   this.  */
 #define AVG_LOOP_NITER(LOOP) 5
 
+/* Returns the expected number of loop iterations for LOOP.
+   The average trip count is computed from profile data if it
+   exists. */
+
+static inline HOST_WIDE_INT
+avg_loop_niter (struct loop *loop)
+{
+  HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
+  if (niter == -1)
+    return AVG_LOOP_NITER (loop);
+
+  return niter;
+}
 
 /* Representation of the induction variable.  */
 struct iv
@@ -513,6 +525,19 @@ dump_cand (FILE *file, struct iv_cand *c
       return;
     }
 
+  if (cand->var_before)
+    {
+      fprintf (file, "  var_before ");
+      print_generic_expr (file, cand->var_before, TDF_SLIM);
+      fprintf (file, "\n");
+    }
+  if (cand->var_after)
+    {
+      fprintf (file, "  var_after ");
+      print_generic_expr (file, cand->var_after, TDF_SLIM);
+      fprintf (file, "\n");
+    }
+
   switch (cand->pos)
     {
     case IP_NORMAL:
@@ -706,9 +731,10 @@ contains_abnormal_ssa_name_p (tree expr)
     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 
 static tree
-niter_for_exit (struct ivopts_data *data, edge exit)
+niter_for_exit (struct ivopts_data *data, edge exit,
+                struct tree_niter_desc **desc_p)
 {
-  struct tree_niter_desc desc;
+  struct tree_niter_desc* desc = NULL;
   tree niter;
   void **slot;
 
@@ -727,19 +753,24 @@ niter_for_exit (struct ivopts_data *data
 	 being zero).  Also, we cannot safely work with ssa names that
 	 appear in phi nodes on abnormal edges, so that we do not create
 	 overlapping life ranges for them (PR 27283).  */
+      desc = XNEW (struct tree_niter_desc);
       if (number_of_iterations_exit (data->current_loop,
-				     exit, &desc, true)
-	  && integer_zerop (desc.may_be_zero)
-     	  && !contains_abnormal_ssa_name_p (desc.niter))
-	niter = desc.niter;
+				     exit, desc, true)
+	  && integer_zerop (desc->may_be_zero)
+     	  && !contains_abnormal_ssa_name_p (desc->niter))
+	niter = desc->niter;
       else
 	niter = NULL_TREE;
 
-      *pointer_map_insert (data->niters, exit) = niter;
+      desc->niter = niter;
+      slot = pointer_map_insert (data->niters, exit);
+      *slot = desc;
     }
   else
-    niter = (tree) *slot;
+    niter = ((struct tree_niter_desc *) *slot)->niter;
 
+  if (desc_p)
+    *desc_p = (struct tree_niter_desc *) *slot;
   return niter;
 }
 
@@ -755,7 +786,7 @@ niter_for_single_dom_exit (struct ivopts
   if (!exit)
     return NULL;
 
-  return niter_for_exit (data, exit);
+  return niter_for_exit (data, exit, NULL);
 }
 
 /* Initializes data structures used by the iv optimization pass, stored
@@ -1822,7 +1853,7 @@ find_interesting_uses_outside (struct iv
       phi = gsi_stmt (psi);
       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
       if (is_gimple_reg (def))
-	find_interesting_uses_op (data, def);
+        find_interesting_uses_op (data, def);
     }
 }
 
@@ -2138,7 +2169,9 @@ add_candidate_1 (struct ivopts_data *dat
 	continue;
 
       if (operand_equal_p (base, cand->iv->base, 0)
-	  && operand_equal_p (step, cand->iv->step, 0))
+	  && operand_equal_p (step, cand->iv->step, 0)
+          && (TYPE_PRECISION (TREE_TYPE (base))
+              == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
 	break;
     }
 
@@ -3779,6 +3812,7 @@ get_computation_cost_at (struct ivopts_d
 			      ubase, build_int_cst (utype, 0),
 			      &symbol_present, &var_present, &offset,
 			      depends_on);
+      cost.cost /= avg_loop_niter (data->current_loop);
     }
   else if (ratio == 1)
     {
@@ -3786,6 +3820,7 @@ get_computation_cost_at (struct ivopts_d
 			      ubase, cbase,
 			      &symbol_present, &var_present, &offset,
 			      depends_on);
+      cost.cost /= avg_loop_niter (data->current_loop);
     }
   else if (address_p
 	   && !POINTER_TYPE_P (ctype)
@@ -3799,16 +3834,18 @@ get_computation_cost_at (struct ivopts_d
 			      ubase, cbase,
 			      &symbol_present, &var_present, &offset,
 			      depends_on);
+      cost.cost /= avg_loop_niter (data->current_loop);
     }
   else
     {
       cost = force_var_cost (data, cbase, depends_on);
-      cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
       cost = add_costs (cost,
 			difference_cost (data,
 					 ubase, build_int_cst (utype, 0),
 					 &symbol_present, &var_present,
 					 &offset, depends_on));
+      cost.cost /= avg_loop_niter (data->current_loop);
+      cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
     }
 
   /* If we are after the increment, the value of the candidate is higher by
@@ -3841,7 +3878,7 @@ get_computation_cost_at (struct ivopts_d
       are added once to the variable, if present.  */
   if (var_present && (symbol_present || offset))
     cost.cost += add_cost (TYPE_MODE (ctype), speed)
-		 / AVG_LOOP_NITER (data->current_loop);
+		 / avg_loop_niter (data->current_loop);
 
   /* Having offset does not affect runtime cost in case it is added to
      symbol, but it increases complexity.  */
@@ -3911,6 +3948,7 @@ determine_use_iv_cost_generic (struct iv
     }
 
   cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
+
   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
 
   return !infinite_cost_p (cost);
@@ -3977,15 +4015,27 @@ iv_period (struct iv *iv)
 
   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
 
+  type = unsigned_type_for (TREE_TYPE (step));
   /* Period of the iv is gcd (step, type range).  Since type range is power
      of two, it suffices to determine the maximum power of two that divides
      step.  */
-  pow2div = num_ending_zeros (step);
-  type = unsigned_type_for (TREE_TYPE (step));
+  if (integer_pow2p (step))
+    {
+      pow2div = num_ending_zeros (step);
 
-  period = build_low_bits_mask (type,
-				(TYPE_PRECISION (type)
-				 - tree_low_cst (pow2div, 1)));
+      period = build_low_bits_mask (type,
+                                    (TYPE_PRECISION (type)
+                                     - tree_low_cst (pow2div, 1)));
+    }
+  else
+    {
+      double_int type_val_range, step_val, period_val;
+
+      type_val_range = tree_to_double_int (TYPE_MAX_VALUE (type));
+      step_val = tree_to_double_int (step);
+      period_val = double_int_udiv (type_val_range, step_val, FLOOR_DIV_EXPR);
+      period = double_int_to_tree (type, period_val);
+    }
 
   return period;
 }
@@ -4019,6 +4069,7 @@ may_eliminate_iv (struct ivopts_data *da
   tree nit, period;
   struct loop *loop = data->current_loop;
   aff_tree bnd;
+  struct tree_niter_desc *desc = NULL;
 
   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
     return false;
@@ -4037,7 +4088,7 @@ may_eliminate_iv (struct ivopts_data *da
   if (flow_bb_inside_loop_p (loop, exit->dest))
     return false;
 
-  nit = niter_for_exit (data, exit);
+  nit = niter_for_exit (data, exit, &desc);
   if (!nit)
     return false;
 
@@ -4049,27 +4100,46 @@ may_eliminate_iv (struct ivopts_data *da
   /* If the number of iterations is constant, compare against it directly.  */
   if (TREE_CODE (nit) == INTEGER_CST)
     {
-      if (!tree_int_cst_lt (nit, period))
-	return false;
+      /* See cand_value_at.  */
+      if (stmt_after_increment (loop, cand, use->stmt))
+        {
+          if (!tree_int_cst_lt (nit, period))
+            return false;
+        }
+      else
+        {
+          if (tree_int_cst_lt (period, nit))
+            return false;
+        }
     }
 
   /* If not, and if this is the only possible exit of the loop, see whether
      we can get a conservative estimate on the number of iterations of the
      entire loop and compare against that instead.  */
-  else if (loop_only_exit_p (loop, exit))
+  else
     {
       double_int period_value, max_niter;
-      if (!estimated_loop_iterations (loop, true, &max_niter))
-	return false;
+
+      max_niter = desc->max;
+      if (stmt_after_increment (loop, cand, use->stmt))
+        max_niter = double_int_add (max_niter, double_int_one);
       period_value = tree_to_double_int (period);
-      if (double_int_ucmp (max_niter, period_value) >= 0)
-	return false;
+      if (double_int_ucmp (max_niter, period_value) > 0)
+        {
+          /* See if we can take advantage of infered loop bound information.  */
+          if (loop_only_exit_p (loop, exit))
+            {
+              if (!estimated_loop_iterations (loop, true, &max_niter))
+                return false;
+              /* The loop bound is already adjusted by adding 1.  */
+              if (double_int_ucmp (max_niter, period_value) > 0)
+                return false;
+            }
+          else
+            return false;
+        }
     }
 
-  /* Otherwise, punt.  */
-  else
-    return false;
-
   cand_value_at (loop, cand, use->stmt, nit, &bnd);
 
   *bound = aff_combination_to_tree (&bnd);
@@ -4106,7 +4176,7 @@ determine_use_iv_cost_condition (struct 
       elim_cost = force_var_cost (data, bound, &depends_on_elim);
       /* The bound is a loop invariant, so it will be only computed
 	 once.  */
-      elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
+      elim_cost.cost /= avg_loop_niter (data->current_loop);
     }
   else
     elim_cost = infinite_cost;
@@ -4353,7 +4423,7 @@ determine_iv_cost (struct ivopts_data *d
   cost_base = force_var_cost (data, base, NULL);
   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
 
-  cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
+  cost = cost_step + cost_base.cost / avg_loop_niter (data->current_loop);
 
   /* Prefer the original ivs unless we may gain something by replacing it.
      The reason is to make debugging simpler; so this is not relevant for
@@ -4541,7 +4611,7 @@ iv_ca_set_remove_invariants (struct iv_c
     {
       ivs->n_invariant_uses[iid]--;
       if (ivs->n_invariant_uses[iid] == 0)
-	ivs->n_regs--;
+        ivs->n_regs--;
     }
 }
 
@@ -4596,7 +4666,7 @@ iv_ca_set_add_invariants (struct iv_ca *
     {
       ivs->n_invariant_uses[iid]++;
       if (ivs->n_invariant_uses[iid] == 1)
-	ivs->n_regs++;
+        ivs->n_regs++;
     }
 }
 
@@ -4871,8 +4941,21 @@ iv_ca_dump (struct ivopts_data *data, FI
   unsigned i;
   comp_cost cost = iv_ca_cost (ivs);
 
-  fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
-  bitmap_print (file, ivs->cands, "  candidates ","\n");
+  fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
+  fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
+           ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
+  bitmap_print (file, ivs->cands, "  candidates: ","\n");
+
+   for (i = 0; i < ivs->upto; i++)
+    {
+      struct iv_use *use = iv_use (data, i);
+      struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
+      if (cp)
+        fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
+                 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
+      else
+        fprintf (file, "   use:%d --> ??\n", use->id);
+    }
 
   for (i = 1; i <= data->max_inv_id; i++)
     if (ivs->n_invariant_uses[i])
@@ -4880,17 +4963,18 @@ iv_ca_dump (struct ivopts_data *data, FI
 	fprintf (file, "%s%d", pref, i);
 	pref = ", ";
       }
-  fprintf (file, "\n");
+  fprintf (file, "\n\n");
 }
 
 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
    new set, and store differences in DELTA.  Number of induction variables
-   in the new set is stored to N_IVS.  */
+   in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
+   the function will try to find a solution with mimimal iv candidates.  */
 
 static comp_cost
 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
 	      struct iv_cand *cand, struct iv_ca_delta **delta,
-	      unsigned *n_ivs)
+	      unsigned *n_ivs, bool min_ncand)
 {
   unsigned i;
   comp_cost cost;
@@ -4914,8 +4998,8 @@ iv_ca_extend (struct ivopts_data *data, 
       if (!iv_ca_has_deps (ivs, new_cp))
 	continue;
 
-      if (!cheaper_cost_pair (new_cp, old_cp))
-	continue;
+      if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
+        continue;
 
       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
     }
@@ -5110,7 +5194,8 @@ try_add_cand_for (struct ivopts_data *da
 	continue;
 
       iv_ca_set_cp (data, ivs, use, cp);
-      act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
+      act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
+                               true);
       iv_ca_set_no_cp (data, ivs, use);
       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
 
@@ -5143,7 +5228,7 @@ try_add_cand_for (struct ivopts_data *da
 
 	  act_delta = NULL;
 	  iv_ca_set_cp (data, ivs, use, cp);
-	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
+	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
 	  iv_ca_set_no_cp (data, ivs, use);
 	  act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
 				       cp, act_delta);
@@ -5203,7 +5288,7 @@ try_improve_iv_set (struct ivopts_data *
       if (iv_ca_cand_used_p (ivs, cand))
 	continue;
 
-      acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
+      acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
       if (!act_delta)
 	continue;
 
@@ -5330,7 +5415,6 @@ create_new_iv (struct ivopts_data *data,
 
       /* Rewrite the increment so that it uses var_before directly.  */
       find_interesting_uses_op (data, cand->var_after)->selected = cand;
-
       return;
     }
 
@@ -5358,8 +5442,18 @@ create_new_ivs (struct ivopts_data *data
       cand = iv_cand (data, i);
       create_new_iv (data, cand);
     }
-}
 
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "\nSelected IV set: \n");
+      EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
+        {
+          cand = iv_cand (data, i);
+          dump_cand (dump_file, cand);
+        }
+      fprintf (dump_file, "\n");
+    }
+}
 
 /* Rewrites USE (definition of iv used in a nonlinear expression)
    using candidate CAND.  */
@@ -5582,6 +5676,11 @@ rewrite_use_compare (struct ivopts_data 
       tree var_type = TREE_TYPE (var);
       gimple_seq stmts;
 
+      if (dump_file && (dump_flags & TDF_DETAILS))
+        {
+          fprintf (dump_file, "Replacing exit test: ");
+          print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
+        }
       compare = iv_elimination_compare (data, use);
       bound = unshare_expr (fold_convert (var_type, bound));
       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
@@ -5683,6 +5782,20 @@ remove_unused_ivs (struct ivopts_data *d
   BITMAP_FREE (toremove);
 }
 
+/* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
+   for pointer_map_traverse.  */
+
+static
+bool
+free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
+                      void *data ATTRIBUTE_UNUSED)
+{
+  struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
+
+  free (niter);
+  return true;
+}
+
 /* Frees data allocated by the optimization of a single loop.  */
 
 static void
@@ -5694,6 +5807,7 @@ free_loop_data (struct ivopts_data *data
 
   if (data->niters)
     {
+      pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
       pointer_map_destroy (data->niters);
       data->niters = NULL;
     }

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]