This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Cleanup and speedup inliner after conversion of heap to sreals


Hi,
conversion to sreal makes it possible to compute badness in more streamlined
manner.  Together with the sreal::normalize change this patch finally makes
fibheap badness calcualtion to be out of radar in profiling and I hope it
makes it more maintainable by eliminating the issues from roundoff errors and
overflows that was quite painful.

Incrementally it is also possible to turn time computation into sreals.

Bootstrapped/regtested x86_64-linux, will wait with the commit for the
preivous changes to show up in the benchmark testers.

Thanks for Martin and Trevor for working on sreals&fibheap changes.

Honza

	* ipa-inline.c (cgraph_freq_base_rec, percent_rec): New functions.
	(compute_uninlined_call_time): Return sreal.
	(compute_inlined_call_time): Return sreal.
	(big_speedup_p): Update to be computed with sreals.
	(relative_time_benefit): Turn to sreal computation; return
	value as numerator/denominator to save division.
	(edge_badness): Rewrite to sreals; remove overflow checks
	and cleanup.
	(ipa_inline): Initialize cgraph_freq_base_rec and percent_rec.
	(inline_small_functions): Update dumping; speedup fibheap maintenance.
	(update_edge_key): Update dumping.
Index: ipa-inline.c
===================================================================
--- ipa-inline.c	(revision 218730)
+++ ipa-inline.c	(working copy)
@@ -148,6 +148,10 @@ static gcov_type max_count;
 static sreal max_count_real, max_relbenefit_real, half_int_min_real;
 static gcov_type spec_rem;
 
+/* sreal constants representing 1/CGRAPH_FREQ_BASE and
+   1/100.  */
+static sreal cgraph_freq_base_rec, percent_rec;
+
 /* Return false when inlining edge E would lead to violating
    limits on function unit growth or stack usage growth.  
 
@@ -517,37 +521,34 @@ want_early_inline_function_p (struct cgr
 /* Compute time of the edge->caller + edge->callee execution when inlining
    does not happen.  */
 
-inline gcov_type
+inline sreal
 compute_uninlined_call_time (struct inline_summary *callee_info,
 			     struct cgraph_edge *edge)
 {
-  gcov_type uninlined_call_time =
-    RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
-	  CGRAPH_FREQ_BASE);
-  gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
-				          ? edge->caller->global.inlined_to
-				          : edge->caller)->time;
+  sreal uninlined_call_time = (sreal)callee_info->time
+			      * (sreal)MAX (edge->frequency, 1)
+			      * cgraph_freq_base_rec;
+  int caller_time = inline_summary (edge->caller->global.inlined_to
+				    ? edge->caller->global.inlined_to
+				    : edge->caller)->time;
   return uninlined_call_time + caller_time;
 }
 
 /* Same as compute_uinlined_call_time but compute time when inlining
    does happen.  */
 
-inline gcov_type
+inline sreal
 compute_inlined_call_time (struct cgraph_edge *edge,
 			   int edge_time)
 {
-  gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
-					  ? edge->caller->global.inlined_to
-					  : edge->caller)->time;
-  gcov_type time = (caller_time
-		    + RDIV (((gcov_type) edge_time
-			     - inline_edge_summary (edge)->call_stmt_time)
-		    * MAX (edge->frequency, 1), CGRAPH_FREQ_BASE));
-  /* Possible one roundoff error, but watch for overflows.  */
-  gcc_checking_assert (time >= INT_MIN / 2);
-  if (time < 0)
-    time = 0;
+  int caller_time = inline_summary (edge->caller->global.inlined_to
+				    ? edge->caller->global.inlined_to
+				    : edge->caller)->time;
+  sreal time = (sreal)caller_time
+	       + ((sreal)(edge_time - inline_edge_summary (edge)->call_stmt_time)
+	          * (sreal)MAX (edge->frequency, 1)
+	          * cgraph_freq_base_rec);
+  gcc_checking_assert (time >= 0);
   return time;
 }
 
@@ -557,12 +558,10 @@ compute_inlined_call_time (struct cgraph
 static bool
 big_speedup_p (struct cgraph_edge *e)
 {
-  gcov_type time = compute_uninlined_call_time (inline_summary (e->callee),
-					  	e);
-  gcov_type inlined_time = compute_inlined_call_time (e,
-					              estimate_edge_time (e));
+  sreal time = compute_uninlined_call_time (inline_summary (e->callee), e);
+  sreal inlined_time = compute_inlined_call_time (e, estimate_edge_time (e));
   if (time - inlined_time
-      > RDIV (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP), 100))
+      > (sreal)time * (sreal)PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP) * percent_rec)
     return true;
   return false;
 }
@@ -860,42 +859,45 @@ want_inline_function_to_all_callers_p (s
 #define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
 
 /* Return relative time improvement for inlining EDGE in range
-   1...RELATIVE_TIME_BENEFIT_RANGE  */
+   as value NUMERATOR/DENOMINATOR.  */
 
-static inline int
+static inline void
 relative_time_benefit (struct inline_summary *callee_info,
 		       struct cgraph_edge *edge,
-		       int edge_time)
+		       int edge_time,
+		       sreal *numerator,
+		       sreal *denominator)
 {
-  gcov_type relbenefit;
-  gcov_type uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
-  gcov_type inlined_call_time = compute_inlined_call_time (edge, edge_time);
-
   /* Inlining into extern inline function is not a win.  */
   if (DECL_EXTERNAL (edge->caller->global.inlined_to
 		     ? edge->caller->global.inlined_to->decl
 		     : edge->caller->decl))
-    return 1;
+    {
+      *numerator = (sreal)1;
+      *denominator = (sreal)1024;
+      return;
+    }
 
-  /* Watch overflows.  */
-  gcc_checking_assert (uninlined_call_time >= 0);
-  gcc_checking_assert (inlined_call_time >= 0);
-  gcc_checking_assert (uninlined_call_time >= inlined_call_time);
+  sreal uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
+  sreal inlined_call_time = compute_inlined_call_time (edge, edge_time);
 
   /* Compute relative time benefit, i.e. how much the call becomes faster.
      ??? perhaps computing how much the caller+calle together become faster
      would lead to more realistic results.  */
-  if (!uninlined_call_time)
+  if (uninlined_call_time == (sreal)0)
     uninlined_call_time = 1;
-  relbenefit =
-    RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
-	  uninlined_call_time);
-  relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
-  gcc_checking_assert (relbenefit >= 0);
-  relbenefit = MAX (relbenefit, 1);
-  return relbenefit;
-}
 
+  /* Avoid zeros, these are not useful later in calculations.  */
+  if (uninlined_call_time == inlined_call_time)
+    *numerator = ((sreal)1)>>8;
+  else
+    *numerator = uninlined_call_time - inlined_call_time;
+  *denominator = uninlined_call_time;
+#ifdef ENABLE_CHECKING
+  gcc_checking_assert (*numerator >= 0);
+  gcc_checking_assert (*denominator >= 0);
+#endif
+}
 
 /* A cost model driving the inlining heuristics in a way so the edges with
    smallest badness are inlined first.  After each inlining is performed
@@ -943,54 +945,39 @@ edge_badness (struct cgraph_edge *edge,
     {
       badness = INT_MIN / 2 + growth;
       if (dump)
-	fprintf (dump_file, "      %"PRId64": Growth %d <= 0\n", badness.to_int (),
+	fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
 		 growth);
     }
 
   /* When profiling is available, compute badness as:
 
-	        relative_edge_count * relative_time_benefit
+	        edge_count * relative_time_benefit
      goodness = -------------------------------------------
-		growth_f_caller
-     badness = -goodness  
+		growth_of_caller
+     badness = - goodness 
 
     The fraction is upside down, because on edge counts and time beneits
     the bounds are known. Edge growth is essentially unlimited.  */
 
   else if (max_count)
     {
-      int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
-      /* Capping edge->count to max_count. edge->count can be larger than
-	 max_count if an inline adds new edges which increase max_count
-	 after max_count is computed.  */
-      gcov_type edge_count = edge->count > max_count ? max_count : edge->count;
-
-      sreal relbenefit_real (relbenefit, 0);
-      sreal growth_real (growth, 0);
-
-      /* relative_edge_count.  */
-      sreal tmp (edge_count, 0);
-      tmp /= max_count_real;
-
-      /* relative_time_benefit.  */
-      tmp *= relbenefit_real;
-      tmp /= max_relbenefit_real;
-
-      /* growth_f_caller.  */
-      tmp *= half_int_min_real;
-      tmp /=  growth_real;
+      sreal numerator, denumerator;
+      relative_time_benefit (callee_info, edge, edge_time, &numerator, &denumerator);
+
+      numerator *= edge->count;
+      denumerator *= growth;
 
-      badness = -1 * tmp.to_int ();
+      badness = - numerator/denumerator;
  
       if (dump)
 	{
+	  sreal num,den;
+          relative_time_benefit (callee_info, edge, edge_time, &num, &den);
 	  fprintf (dump_file,
-		   "      %"PRId64" (relative %f): profile info. Relative count %f%s"
-		   " * Relative benefit %f\n",
-		   badness.to_int (), (double) badness.to_int () / INT_MIN,
-		   (double) edge_count / max_count,
-		   edge->count > max_count ? " (capped to max_count)" : "",
-		   relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
+		   "      %f: profile info. count %"PRId64
+		   " * Relative benefit %f / growth %i\n",
+		   badness.to_double (), (int64_t)edge->count,
+		   (num / den * 100).to_double (), growth);
 	}
     }
 
@@ -1008,36 +995,26 @@ edge_badness (struct cgraph_edge *edge,
      and some not.  */
   else if (flag_guess_branch_prob)
     {
-      badness = (relative_time_benefit (callee_info, edge, edge_time)
-		 * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
-      badness /= (MIN (65536/2, growth) * MIN (65536/2, MAX (1, callee_info->growth)));
-      gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
-      if ((hints & (INLINE_HINT_indirect_call
-		    | INLINE_HINT_loop_iterations
-	            | INLINE_HINT_array_index
-		    | INLINE_HINT_loop_stride))
-	  || callee_info->growth <= 0)
-	badness *= 8;
-      if (hints & (INLINE_HINT_same_scc))
-	badness /= 16;
-      else if (hints & (INLINE_HINT_in_scc))
-	badness /= 8;
-      else if (hints & (INLINE_HINT_cross_module))
-	badness /= 2;
-      gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
-      if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
-	badness *= 16;
+      sreal numerator, denumerator;
+      relative_time_benefit (callee_info, edge, edge_time, &numerator, &denumerator);
+      denumerator *= growth;
+      if (callee_info->growth > 0)
+	denumerator *= callee_info->growth;
+
+      badness = - numerator / denumerator;
+
       if (dump)
 	{
+	  sreal num,den;
+          relative_time_benefit (callee_info, edge, edge_time, &num, &den);
 	  fprintf (dump_file,
-		   "      %"PRId64": guessed profile. frequency %f,"
-		   " benefit %f%%, time w/o inlining %i, time w inlining %i"
+		   "      %f: guessed profile. frequency %f,"
+		   " benefit %f%%, time w/o inlining %f, time w inlining %f"
 		   " overall growth %i (current) %i (original)\n",
-		   badness.to_int (), (double)edge->frequency / CGRAPH_FREQ_BASE,
-		   relative_time_benefit (callee_info, edge, edge_time) * 100.0
-		   / RELATIVE_TIME_BENEFIT_RANGE, 
-		   (int)compute_uninlined_call_time (callee_info, edge),
-		   (int)compute_inlined_call_time (edge, edge_time),
+		   badness.to_double (), (double)edge->frequency / CGRAPH_FREQ_BASE,
+		   (num/den).to_double () * 100, 
+		   compute_uninlined_call_time (callee_info, edge).to_double (),
+		   compute_inlined_call_time (edge, edge_time).to_double (),
 		   estimate_growth (callee),
 		   callee_info->growth);
 	}
@@ -1049,28 +1026,38 @@ edge_badness (struct cgraph_edge *edge,
   else
     {
       int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
-      badness = growth * 256;
+      badness = growth;
 
       /* Decrease badness if call is nested.  */
       if (badness > 0)
 	badness = badness >> nest;
       else
-	{
-	  badness = badness << nest;
-	}
+	badness = badness << nest;
       if (dump)
-	fprintf (dump_file, "      %"PRId64": no profile. nest %i\n", badness.to_int (),
+	fprintf (dump_file, "      %f: no profile. nest %i\n", badness.to_double (),
 		 nest);
     }
+  gcc_checking_assert (badness != 0);
 
-  /* Ensure that we did not overflow in all the fixed point math above.  */
-  gcc_assert (badness >= INT_MIN);
-  gcc_assert (badness <= INT_MAX - 1);
-  /* Make recursive inlining happen always after other inlining is done.  */
   if (edge->recursive_p ())
-    return badness + 1;
-  else
-    return badness;
+    badness = badness.shift (badness > 0 ? 4 : -4);
+  if ((hints & (INLINE_HINT_indirect_call
+		| INLINE_HINT_loop_iterations
+		| INLINE_HINT_array_index
+		| INLINE_HINT_loop_stride))
+      || callee_info->growth <= 0)
+    badness = badness.shift (badness > 0 ? -2 : 2);
+  if (hints & (INLINE_HINT_same_scc))
+    badness = badness.shift (badness > 0 ? 3 : -3);
+  else if (hints & (INLINE_HINT_in_scc))
+    badness = badness.shift (badness > 0 ? 2 : -2);
+  else if (hints & (INLINE_HINT_cross_module))
+    badness = badness.shift (badness > 0 ? 1 : -1);
+  if ((hints & INLINE_HINT_declared_inline))
+    badness = badness.shift (badness > 0 ? -3 : 3);
+  if (dump)
+    fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
+  return badness;
 }
 
 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
@@ -1083,26 +1070,26 @@ update_edge_key (edge_heap_t *heap, stru
       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
       gcc_checking_assert (n->get_data () == edge);
 
-      /* fibonacci_heap::replace_key only decrease the keys.
-	 When we increase the key we do not update heap
-	 and instead re-insert the element once it becomes
-	 a minimum of heap.  */
+      /* fibonacci_heap::replace_key does busy updating of the
+	 heap that is unnecesarily expensive.
+	 We do lazy increases: after extracting minimum if the key
+	 turns out to be out of date, it is re-inserted into heap
+	 with correct value.  */
       if (badness < n->get_key ())
 	{
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	    {
 	      fprintf (dump_file,
-		       "  decreasing badness %s/%i -> %s/%i, %"PRId64
-		       " to %"PRId64"\n",
+		       "  decreasing badness %s/%i -> %s/%i, %f"
+		       " to %f\n",
 		       xstrdup_for_dump (edge->caller->name ()),
 		       edge->caller->order,
 		       xstrdup_for_dump (edge->callee->name ()),
 		       edge->callee->order,
-		       n->get_key ().to_int (),
-		       badness.to_int ());
+		       n->get_key ().to_double (),
+		       badness.to_double ());
 	    }
 	  heap->decrease_key (n, badness);
-	  gcc_checking_assert (n->get_key () == badness);
 	}
     }
   else
@@ -1110,12 +1097,12 @@ update_edge_key (edge_heap_t *heap, stru
        if (dump_file && (dump_flags & TDF_DETAILS))
 	 {
 	   fprintf (dump_file,
-		    "  enqueuing call %s/%i -> %s/%i, badness %"PRId64"\n",
+		    "  enqueuing call %s/%i -> %s/%i, badness %f\n",
 		    xstrdup_for_dump (edge->caller->name ()),
 		    edge->caller->order,
 		    xstrdup_for_dump (edge->callee->name ()),
 		    edge->callee->order,
-		    badness.to_int ());
+		    badness.to_double ());
 	 }
       edge->aux = heap->insert (badness, edge);
     }
@@ -1685,7 +1672,6 @@ inline_small_functions (void)
       struct cgraph_node *where, *callee;
       sreal badness = edge_heap.min_key ();
       sreal current_badness;
-      sreal cached_badness;
       int growth;
 
       edge = edge_heap.extract_min ();
@@ -1694,10 +1680,9 @@ inline_small_functions (void)
       if (!edge->inline_failed || !edge->callee->analyzed)
 	continue;
 
-      /* Be sure that caches are maintained consistent.  
-         We can not make this ENABLE_CHECKING only because it cause different
-         updates of the fibheap queue.  */
-      cached_badness = edge_badness (edge, false);
+#ifdef ENABLE_CHECKING
+      /* Be sure that caches are maintained consistent.  */
+      sreal cached_badness = edge_badness (edge, false);
       reset_edge_growth_cache (edge);
       reset_node_growth_cache (edge->callee);
 
@@ -1707,10 +1692,18 @@ inline_small_functions (void)
       current_badness = edge_badness (edge, false);
       gcc_assert (cached_badness == current_badness);
       gcc_assert (current_badness >= badness);
+#else
+      current_badness = edge_badness (edge, false);
+#endif
       if (current_badness != badness)
 	{
-	  edge->aux = edge_heap.insert (current_badness, edge);
-	  continue;
+	  if (edge_heap. min() && badness > edge_heap.min_key ())
+	    {
+	      edge->aux = edge_heap.insert (current_badness, edge);
+	      continue;
+	    }
+	  else
+	    badness = current_badness;
 	}
 
       if (!can_inline_edge_p (edge, true))
@@ -1729,13 +1722,13 @@ inline_small_functions (void)
 		   inline_summary (callee)->size);
 	  fprintf (dump_file,
 		   " to be inlined into %s/%i in %s:%i\n"
-		   " Estimated badness is %"PRId64", frequency %.2f.\n",
+		   " Estimated badness is %f, frequency %.2f.\n",
 		   edge->caller->name (), edge->caller->order,
-		   edge->call_stmt ? "unknown"
+		   !edge->call_stmt ? "unknown"
 		   : gimple_filename ((const_gimple) edge->call_stmt),
-		   edge->call_stmt ? -1
+		   !edge->call_stmt ? -1
 		   : gimple_lineno ((const_gimple) edge->call_stmt),
-		   badness.to_int (),
+		   badness.to_double (),
 		   edge->frequency / (double)CGRAPH_FREQ_BASE);
 	  if (edge->count)
 	    fprintf (dump_file," Called %"PRId64"x\n",
@@ -2147,6 +2140,9 @@ ipa_inline (void)
   if (!optimize)
     return 0;
 
+  cgraph_freq_base_rec = (sreal) 1 / (sreal) CGRAPH_FREQ_BASE;
+  percent_rec = (sreal) 1 / (sreal) 100;
+
   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
 
   if (in_lto_p && optimize)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]