This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Optimize handling of inline summaries


Hi,
this patch turns edge growth cache into fast summary and fixes
some fallout.  I have noticed that we still use get_create on many
places to access inline edge summary and turning them into gets
uncovered furhter problems.

I have added sanity check to estimate_calls_size_and_time that
inlined edges have no summaries which turned out to fail becuase
symbols-summaries for some reason have flag m_initialize_when_cloning
which is set to true for call summaries (and is non-existent for
function summaries).  When this flag is set at the time of
edge duplication we create empty summary for source edge (if
non-existent) and then copy it via hook to new edge.

Martin, do you know why this flag was introduced?

I have disabled it and noticed that cgraph_node::create_version_clone
calls node duplication hooks for no good reason: newly produced function
will get body and will eventually be registered as new function to
callgraph. Additionally inliner sometimes attempt to update overall
summary of already inlined functions and also functions with -O0.
Both is deemed to fail since we do not have any analysis data on them.

lto-bootstrapped/regtested x86_64-linux, comitted.

	* cgraphclones.c (cgraph_node::create_version_clone): Do not
	duplicate summaries.
	* ipa-fnsummary.c (ipa_fn_summary_alloc): Allocate size summary
	first.
	(ipa_fn_summary_t::duplicate): Use get instead of get_create to
	access call summaries.
	(dump_ipa_call_summary): Be ready for missing edge summaries.
	(analyze_function_body): Use get instead of get_create to access
	edge summary.
	(estimate_calls_size_and_time): Do not access summaries of
	inlined edges; sanity check they are missing.
	(ipa_call_context::estimate_size_and_time): Use get instead
	of get_create to access node summary.
	(inline_update_callee_summaries): Do not update depth of
	inlined edge.
	(ipa_merge_fn_summary_after_inlining): Remove inline edge from
	growth caches.
	(ipa_merge_fn_summary_after_inlining): Use get instead
	of get_create.
	* ipa-fnsummary.h (ipa_remove_from_growth_caches): Declare.
	* ipa-inline-analyssi.c (edge_growth_cache): Turn to
	fast summary.
	(initialize_growth_caches): Update.
	(do_estimate_edge_time): Remove redundant copy of context.
	(ipa_remove_from_growth_caches): New function.
	* ipa-inline.c (flatten_function): Update overall summary
	only when optimizing.
	(inline_to_all_callers): Update overall summary of function
	inlined to.
	* ipa-inline.h (edge_growth_cache): Turn to fast summary.
	* symbol-summary.h (call_summary_base): Set m_initialize_when_cloning
	to false.
Index: cgraphclones.c
===================================================================
--- cgraphclones.c	(revision 277758)
+++ cgraphclones.c	(working copy)
@@ -892,8 +892,6 @@ cgraph_node::create_version_clone (tree
        e->redirect_callee (new_version);
      }
 
-   symtab->call_cgraph_duplication_hooks (this, new_version);
-
    dump_callgraph_transformation (this, new_version, suffix);
 
    return new_version;
Index: ipa-fnsummary.c
===================================================================
--- ipa-fnsummary.c	(revision 277759)
+++ ipa-fnsummary.c	(working copy)
@@ -553,9 +553,9 @@ static void
 ipa_fn_summary_alloc (void)
 {
   gcc_checking_assert (!ipa_fn_summaries);
-  ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
   ipa_size_summaries = new fast_function_summary <ipa_size_summary *, va_heap>
 							 (symtab);
+  ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
   ipa_call_summaries = new ipa_call_summary_t (symtab);
 }
 
@@ -688,7 +688,7 @@ ipa_fn_summary_t::duplicate (cgraph_node
       for (edge = dst->callees; edge; edge = next)
 	{
 	  predicate new_predicate;
-	  class ipa_call_summary *es = ipa_call_summaries->get_create (edge);
+	  class ipa_call_summary *es = ipa_call_summaries->get (edge);
 	  next = edge->next_callee;
 
 	  if (!edge->inline_failed)
@@ -707,7 +707,7 @@ ipa_fn_summary_t::duplicate (cgraph_node
       for (edge = dst->indirect_calls; edge; edge = next)
 	{
 	  predicate new_predicate;
-	  class ipa_call_summary *es = ipa_call_summaries->get_create (edge);
+	  class ipa_call_summary *es = ipa_call_summaries->get (edge);
 	  next = edge->next_callee;
 
 	  gcc_checking_assert (edge->inline_failed);
@@ -787,12 +787,15 @@ dump_ipa_call_summary (FILE *f, int inde
       int i;
 
       fprintf (f,
-	       "%*s%s/%i %s\n%*s  loop depth:%2i freq:%4.2f size:%2i time: %2i",
+	       "%*s%s/%i %s\n%*s  freq:%4.2f",
 	       indent, "", callee->name (), callee->order,
 	       !edge->inline_failed
 	       ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
-	       indent, "", es->loop_depth, edge->sreal_frequency ().to_double (),
-	       es->call_stmt_size, es->call_stmt_time);
+	       indent, "", edge->sreal_frequency ().to_double ());
+
+      if (es)
+	fprintf (f, " loop depth:%2i size:%2i time: %2i",
+		 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
 
       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
       ipa_size_summary *ss = ipa_size_summaries->get (callee);
@@ -801,14 +804,14 @@ dump_ipa_call_summary (FILE *f, int inde
 		 (int) (ss->size / ipa_fn_summary::size_scale),
 		 (int) s->estimated_stack_size);
 
-      if (es->predicate)
+      if (es && es->predicate)
 	{
 	  fprintf (f, " predicate: ");
 	  es->predicate->dump (f, info->conds);
 	}
       else
 	fprintf (f, "\n");
-      if (es->param.exists ())
+      if (es && es->param.exists ())
 	for (i = 0; i < (int) es->param.length (); i++)
 	  {
 	    int prob = es->param[i].change_prob;
@@ -2480,7 +2483,7 @@ analyze_function_body (struct cgraph_nod
 		  edge->speculative_call_info (direct, indirect, ref);
 		  gcc_assert (direct == edge);
 	          ipa_call_summary *es2
-			 = ipa_call_summaries->get_create (indirect);
+			 = ipa_call_summaries->get (indirect);
 		  ipa_call_summaries->duplicate (edge, indirect,
 						 es, es2);
 		}
@@ -2924,10 +2927,20 @@ estimate_calls_size_and_time (struct cgr
   struct cgraph_edge *e;
   for (e = node->callees; e; e = e->next_callee)
     {
-      class ipa_call_summary *es = ipa_call_summaries->get_create (e);
+      if (!e->inline_failed)
+	{
+	  gcc_checking_assert (!ipa_call_summaries->get (e));
+	  estimate_calls_size_and_time (e->callee, size, min_size, time,
+					hints,
+					possible_truths,
+					known_vals, known_contexts,
+					known_aggs);
+	  continue;
+	}
+      class ipa_call_summary *es = ipa_call_summaries->get (e);
 
       /* Do not care about zero sized builtins.  */
-      if (e->inline_failed && !es->call_stmt_size)
+      if (!es->call_stmt_size)
 	{
 	  gcc_checking_assert (!es->call_stmt_time);
 	  continue;
@@ -2935,27 +2948,18 @@ estimate_calls_size_and_time (struct cgr
       if (!es->predicate
 	  || es->predicate->evaluate (possible_truths))
 	{
-	  if (e->inline_failed)
-	    {
-	      /* Predicates of calls shall not use NOT_CHANGED codes,
-	         sowe do not need to compute probabilities.  */
-	      estimate_edge_size_and_time (e, size,
-					   es->predicate ? NULL : min_size,
-					   time, REG_BR_PROB_BASE,
-					   known_vals, known_contexts,
-					   known_aggs, hints);
-	    }
-	  else
-	    estimate_calls_size_and_time (e->callee, size, min_size, time,
-					  hints,
-					  possible_truths,
-					  known_vals, known_contexts,
-					  known_aggs);
+	  /* Predicates of calls shall not use NOT_CHANGED codes,
+	     sowe do not need to compute probabilities.  */
+	  estimate_edge_size_and_time (e, size,
+				       es->predicate ? NULL : min_size,
+				       time, REG_BR_PROB_BASE,
+				       known_vals, known_contexts,
+				       known_aggs, hints);
 	}
     }
   for (e = node->indirect_calls; e; e = e->next_callee)
     {
-      class ipa_call_summary *es = ipa_call_summaries->get_create (e);
+      class ipa_call_summary *es = ipa_call_summaries->get (e);
       if (!es->predicate
 	  || es->predicate->evaluate (possible_truths))
 	estimate_edge_size_and_time (e, size,
@@ -3204,7 +3208,7 @@ ipa_call_context::estimate_size_and_time
 					  sreal *ret_nonspecialized_time,
 					  ipa_hints *ret_hints)
 {
-  class ipa_fn_summary *info = ipa_fn_summaries->get_create (m_node);
+  class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
   size_time_entry *e;
   int size = 0;
   sreal time = 0;
@@ -3382,7 +3386,8 @@ inline_update_callee_summaries (struct c
     {
       if (!e->inline_failed)
 	inline_update_callee_summaries (e->callee, depth);
-      ipa_call_summaries->get (e)->loop_depth += depth;
+      else
+	ipa_call_summaries->get (e)->loop_depth += depth;
     }
   for (e = node->indirect_calls; e; e = e->next_callee)
     ipa_call_summaries->get (e)->loop_depth += depth;
@@ -3649,6 +3654,7 @@ ipa_merge_fn_summary_after_inlining (str
   /* Free summaries that are not maintained for inline clones/edges.  */
   ipa_call_summaries->remove (edge);
   ipa_fn_summaries->remove (edge->callee);
+  ipa_remove_from_growth_caches (edge);
 }
 
 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
@@ -3657,8 +3663,8 @@ ipa_merge_fn_summary_after_inlining (str
 void
 ipa_update_overall_fn_summary (struct cgraph_node *node)
 {
-  class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
-  class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
+  class ipa_fn_summary *info = ipa_fn_summaries->get (node);
+  class ipa_size_summary *size_info = ipa_size_summaries->get (node);
   size_time_entry *e;
   int i;
 
Index: ipa-fnsummary.h
===================================================================
--- ipa-fnsummary.h	(revision 277758)
+++ ipa-fnsummary.h	(working copy)
@@ -364,5 +364,6 @@ void evaluate_properties_for_edge (struc
 
 void ipa_fnsummary_c_finalize (void);
 HOST_WIDE_INT ipa_get_stack_frame_offset (struct cgraph_node *node);
+void ipa_remove_from_growth_caches (struct cgraph_edge *edge);
 
 #endif /* GCC_IPA_FNSUMMARY_H */
Index: ipa-inline-analysis.c
===================================================================
--- ipa-inline-analysis.c	(revision 277758)
+++ ipa-inline-analysis.c	(working copy)
@@ -51,7 +51,7 @@ along with GCC; see the file COPYING3.
 #include "gimplify.h"
 
 /* Cached node/edge growths.  */
-call_summary<edge_growth_cache_entry *> *edge_growth_cache = NULL;
+fast_call_summary<edge_growth_cache_entry *, va_heap> *edge_growth_cache = NULL;
 
 /* The context cache remembers estimated time/size and hints for given
    ipa_call_context of a call.  */
@@ -125,7 +125,7 @@ void
 initialize_growth_caches ()
 {
   edge_growth_cache
-    = new call_summary<edge_growth_cache_entry *> (symtab, false);
+    = new fast_call_summary<edge_growth_cache_entry *, va_heap> (symtab);
   node_context_cache
     = new fast_function_summary<node_context_summary *, va_heap> (symtab);
 }
@@ -219,7 +219,6 @@ do_estimate_edge_time (struct cgraph_edg
 	  else
 	    node_context_cache_clear++;
 	  e->entry.ctx.release (true);
-	  e->entry.ctx = ctx;
 	  ctx.estimate_size_and_time (&size, &min_size,
 				      &time, &nonspec_time, &hints);
 	  e->entry.size = size;
@@ -275,6 +274,16 @@ reset_node_cache (struct cgraph_node *no
     node_context_cache->remove (node);
 }
 
+/* Remove EDGE from caches once it was inlined.  */
+void
+ipa_remove_from_growth_caches (struct cgraph_edge *edge)
+{
+  if (node_context_cache)
+    node_context_cache->remove (edge->callee);
+  if (edge_growth_cache)
+    edge_growth_cache->remove (edge);
+}
+
 /* Return estimated callee growth after inlining EDGE.
    Only to be called via estimate_edge_size.  */
 
Index: ipa-inline.c
===================================================================
--- ipa-inline.c	(revision 277758)
+++ ipa-inline.c	(working copy)
@@ -2290,9 +2290,9 @@ flatten_function (struct cgraph_node *no
     }
 
   node->aux = NULL;
-  if (update)
-    ipa_update_overall_fn_summary (node->inlined_to
-				   ? node->inlined_to : node);
+  cgraph_node *where = node->inlined_to ? node->inlined_to : node;
+  if (update && opt_for_fn (where->decl, optimize))
+    ipa_update_overall_fn_summary (where);
 }
 
 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
@@ -2367,7 +2367,7 @@ inline_to_all_callers (struct cgraph_nod
      we have a lot of calls to the same function.  */
   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
        i != callers.end (); ++i)
-    ipa_update_overall_fn_summary (*i);
+    ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
   return res;
 }
 
Index: ipa-inline.h
===================================================================
--- ipa-inline.h	(revision 277758)
+++ ipa-inline.h	(working copy)
@@ -39,7 +39,7 @@ public:
       hints (hints) {}
 };
 
-extern call_summary<edge_growth_cache_entry *> *edge_growth_cache;
+extern fast_call_summary<edge_growth_cache_entry *, va_heap> *edge_growth_cache;
 
 /* In ipa-inline-analysis.c  */
 int estimate_size_after_inlining (struct cgraph_node *, struct cgraph_edge *);
Index: symbol-summary.h
===================================================================
--- symbol-summary.h	(revision 277758)
+++ symbol-summary.h	(working copy)
@@ -532,7 +532,7 @@ class call_summary_base
 public:
   /* Default construction takes SYMTAB as an argument.  */
   call_summary_base (symbol_table *symtab): m_symtab (symtab),
-  m_initialize_when_cloning (true)
+  m_initialize_when_cloning (false)
   {}
 
   /* Basic implementation of removal operation.  */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]