This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Optimize handling of inline summaries
- From: Jan Hubicka <hubicka at ucw dot cz>
- To: gcc-patches at gcc dot gnu dot org, mliska at suse dot cz
- Date: Mon, 4 Nov 2019 15:12:55 +0100
- Subject: Optimize handling of inline summaries
Hi,
this patch turns edge growth cache into fast summary and fixes
some fallout. I have noticed that we still use get_create on many
places to access inline edge summary and turning them into gets
uncovered furhter problems.
I have added sanity check to estimate_calls_size_and_time that
inlined edges have no summaries which turned out to fail becuase
symbols-summaries for some reason have flag m_initialize_when_cloning
which is set to true for call summaries (and is non-existent for
function summaries). When this flag is set at the time of
edge duplication we create empty summary for source edge (if
non-existent) and then copy it via hook to new edge.
Martin, do you know why this flag was introduced?
I have disabled it and noticed that cgraph_node::create_version_clone
calls node duplication hooks for no good reason: newly produced function
will get body and will eventually be registered as new function to
callgraph. Additionally inliner sometimes attempt to update overall
summary of already inlined functions and also functions with -O0.
Both is deemed to fail since we do not have any analysis data on them.
lto-bootstrapped/regtested x86_64-linux, comitted.
* cgraphclones.c (cgraph_node::create_version_clone): Do not
duplicate summaries.
* ipa-fnsummary.c (ipa_fn_summary_alloc): Allocate size summary
first.
(ipa_fn_summary_t::duplicate): Use get instead of get_create to
access call summaries.
(dump_ipa_call_summary): Be ready for missing edge summaries.
(analyze_function_body): Use get instead of get_create to access
edge summary.
(estimate_calls_size_and_time): Do not access summaries of
inlined edges; sanity check they are missing.
(ipa_call_context::estimate_size_and_time): Use get instead
of get_create to access node summary.
(inline_update_callee_summaries): Do not update depth of
inlined edge.
(ipa_merge_fn_summary_after_inlining): Remove inline edge from
growth caches.
(ipa_merge_fn_summary_after_inlining): Use get instead
of get_create.
* ipa-fnsummary.h (ipa_remove_from_growth_caches): Declare.
* ipa-inline-analyssi.c (edge_growth_cache): Turn to
fast summary.
(initialize_growth_caches): Update.
(do_estimate_edge_time): Remove redundant copy of context.
(ipa_remove_from_growth_caches): New function.
* ipa-inline.c (flatten_function): Update overall summary
only when optimizing.
(inline_to_all_callers): Update overall summary of function
inlined to.
* ipa-inline.h (edge_growth_cache): Turn to fast summary.
* symbol-summary.h (call_summary_base): Set m_initialize_when_cloning
to false.
Index: cgraphclones.c
===================================================================
--- cgraphclones.c (revision 277758)
+++ cgraphclones.c (working copy)
@@ -892,8 +892,6 @@ cgraph_node::create_version_clone (tree
e->redirect_callee (new_version);
}
- symtab->call_cgraph_duplication_hooks (this, new_version);
-
dump_callgraph_transformation (this, new_version, suffix);
return new_version;
Index: ipa-fnsummary.c
===================================================================
--- ipa-fnsummary.c (revision 277759)
+++ ipa-fnsummary.c (working copy)
@@ -553,9 +553,9 @@ static void
ipa_fn_summary_alloc (void)
{
gcc_checking_assert (!ipa_fn_summaries);
- ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
ipa_size_summaries = new fast_function_summary <ipa_size_summary *, va_heap>
(symtab);
+ ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
ipa_call_summaries = new ipa_call_summary_t (symtab);
}
@@ -688,7 +688,7 @@ ipa_fn_summary_t::duplicate (cgraph_node
for (edge = dst->callees; edge; edge = next)
{
predicate new_predicate;
- class ipa_call_summary *es = ipa_call_summaries->get_create (edge);
+ class ipa_call_summary *es = ipa_call_summaries->get (edge);
next = edge->next_callee;
if (!edge->inline_failed)
@@ -707,7 +707,7 @@ ipa_fn_summary_t::duplicate (cgraph_node
for (edge = dst->indirect_calls; edge; edge = next)
{
predicate new_predicate;
- class ipa_call_summary *es = ipa_call_summaries->get_create (edge);
+ class ipa_call_summary *es = ipa_call_summaries->get (edge);
next = edge->next_callee;
gcc_checking_assert (edge->inline_failed);
@@ -787,12 +787,15 @@ dump_ipa_call_summary (FILE *f, int inde
int i;
fprintf (f,
- "%*s%s/%i %s\n%*s loop depth:%2i freq:%4.2f size:%2i time: %2i",
+ "%*s%s/%i %s\n%*s freq:%4.2f",
indent, "", callee->name (), callee->order,
!edge->inline_failed
? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
- indent, "", es->loop_depth, edge->sreal_frequency ().to_double (),
- es->call_stmt_size, es->call_stmt_time);
+ indent, "", edge->sreal_frequency ().to_double ());
+
+ if (es)
+ fprintf (f, " loop depth:%2i size:%2i time: %2i",
+ es->loop_depth, es->call_stmt_size, es->call_stmt_time);
ipa_fn_summary *s = ipa_fn_summaries->get (callee);
ipa_size_summary *ss = ipa_size_summaries->get (callee);
@@ -801,14 +804,14 @@ dump_ipa_call_summary (FILE *f, int inde
(int) (ss->size / ipa_fn_summary::size_scale),
(int) s->estimated_stack_size);
- if (es->predicate)
+ if (es && es->predicate)
{
fprintf (f, " predicate: ");
es->predicate->dump (f, info->conds);
}
else
fprintf (f, "\n");
- if (es->param.exists ())
+ if (es && es->param.exists ())
for (i = 0; i < (int) es->param.length (); i++)
{
int prob = es->param[i].change_prob;
@@ -2480,7 +2483,7 @@ analyze_function_body (struct cgraph_nod
edge->speculative_call_info (direct, indirect, ref);
gcc_assert (direct == edge);
ipa_call_summary *es2
- = ipa_call_summaries->get_create (indirect);
+ = ipa_call_summaries->get (indirect);
ipa_call_summaries->duplicate (edge, indirect,
es, es2);
}
@@ -2924,10 +2927,20 @@ estimate_calls_size_and_time (struct cgr
struct cgraph_edge *e;
for (e = node->callees; e; e = e->next_callee)
{
- class ipa_call_summary *es = ipa_call_summaries->get_create (e);
+ if (!e->inline_failed)
+ {
+ gcc_checking_assert (!ipa_call_summaries->get (e));
+ estimate_calls_size_and_time (e->callee, size, min_size, time,
+ hints,
+ possible_truths,
+ known_vals, known_contexts,
+ known_aggs);
+ continue;
+ }
+ class ipa_call_summary *es = ipa_call_summaries->get (e);
/* Do not care about zero sized builtins. */
- if (e->inline_failed && !es->call_stmt_size)
+ if (!es->call_stmt_size)
{
gcc_checking_assert (!es->call_stmt_time);
continue;
@@ -2935,27 +2948,18 @@ estimate_calls_size_and_time (struct cgr
if (!es->predicate
|| es->predicate->evaluate (possible_truths))
{
- if (e->inline_failed)
- {
- /* Predicates of calls shall not use NOT_CHANGED codes,
- sowe do not need to compute probabilities. */
- estimate_edge_size_and_time (e, size,
- es->predicate ? NULL : min_size,
- time, REG_BR_PROB_BASE,
- known_vals, known_contexts,
- known_aggs, hints);
- }
- else
- estimate_calls_size_and_time (e->callee, size, min_size, time,
- hints,
- possible_truths,
- known_vals, known_contexts,
- known_aggs);
+ /* Predicates of calls shall not use NOT_CHANGED codes,
+ sowe do not need to compute probabilities. */
+ estimate_edge_size_and_time (e, size,
+ es->predicate ? NULL : min_size,
+ time, REG_BR_PROB_BASE,
+ known_vals, known_contexts,
+ known_aggs, hints);
}
}
for (e = node->indirect_calls; e; e = e->next_callee)
{
- class ipa_call_summary *es = ipa_call_summaries->get_create (e);
+ class ipa_call_summary *es = ipa_call_summaries->get (e);
if (!es->predicate
|| es->predicate->evaluate (possible_truths))
estimate_edge_size_and_time (e, size,
@@ -3204,7 +3208,7 @@ ipa_call_context::estimate_size_and_time
sreal *ret_nonspecialized_time,
ipa_hints *ret_hints)
{
- class ipa_fn_summary *info = ipa_fn_summaries->get_create (m_node);
+ class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
size_time_entry *e;
int size = 0;
sreal time = 0;
@@ -3382,7 +3386,8 @@ inline_update_callee_summaries (struct c
{
if (!e->inline_failed)
inline_update_callee_summaries (e->callee, depth);
- ipa_call_summaries->get (e)->loop_depth += depth;
+ else
+ ipa_call_summaries->get (e)->loop_depth += depth;
}
for (e = node->indirect_calls; e; e = e->next_callee)
ipa_call_summaries->get (e)->loop_depth += depth;
@@ -3649,6 +3654,7 @@ ipa_merge_fn_summary_after_inlining (str
/* Free summaries that are not maintained for inline clones/edges. */
ipa_call_summaries->remove (edge);
ipa_fn_summaries->remove (edge->callee);
+ ipa_remove_from_growth_caches (edge);
}
/* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
@@ -3657,8 +3663,8 @@ ipa_merge_fn_summary_after_inlining (str
void
ipa_update_overall_fn_summary (struct cgraph_node *node)
{
- class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
- class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
+ class ipa_fn_summary *info = ipa_fn_summaries->get (node);
+ class ipa_size_summary *size_info = ipa_size_summaries->get (node);
size_time_entry *e;
int i;
Index: ipa-fnsummary.h
===================================================================
--- ipa-fnsummary.h (revision 277758)
+++ ipa-fnsummary.h (working copy)
@@ -364,5 +364,6 @@ void evaluate_properties_for_edge (struc
void ipa_fnsummary_c_finalize (void);
HOST_WIDE_INT ipa_get_stack_frame_offset (struct cgraph_node *node);
+void ipa_remove_from_growth_caches (struct cgraph_edge *edge);
#endif /* GCC_IPA_FNSUMMARY_H */
Index: ipa-inline-analysis.c
===================================================================
--- ipa-inline-analysis.c (revision 277758)
+++ ipa-inline-analysis.c (working copy)
@@ -51,7 +51,7 @@ along with GCC; see the file COPYING3.
#include "gimplify.h"
/* Cached node/edge growths. */
-call_summary<edge_growth_cache_entry *> *edge_growth_cache = NULL;
+fast_call_summary<edge_growth_cache_entry *, va_heap> *edge_growth_cache = NULL;
/* The context cache remembers estimated time/size and hints for given
ipa_call_context of a call. */
@@ -125,7 +125,7 @@ void
initialize_growth_caches ()
{
edge_growth_cache
- = new call_summary<edge_growth_cache_entry *> (symtab, false);
+ = new fast_call_summary<edge_growth_cache_entry *, va_heap> (symtab);
node_context_cache
= new fast_function_summary<node_context_summary *, va_heap> (symtab);
}
@@ -219,7 +219,6 @@ do_estimate_edge_time (struct cgraph_edg
else
node_context_cache_clear++;
e->entry.ctx.release (true);
- e->entry.ctx = ctx;
ctx.estimate_size_and_time (&size, &min_size,
&time, &nonspec_time, &hints);
e->entry.size = size;
@@ -275,6 +274,16 @@ reset_node_cache (struct cgraph_node *no
node_context_cache->remove (node);
}
+/* Remove EDGE from caches once it was inlined. */
+void
+ipa_remove_from_growth_caches (struct cgraph_edge *edge)
+{
+ if (node_context_cache)
+ node_context_cache->remove (edge->callee);
+ if (edge_growth_cache)
+ edge_growth_cache->remove (edge);
+}
+
/* Return estimated callee growth after inlining EDGE.
Only to be called via estimate_edge_size. */
Index: ipa-inline.c
===================================================================
--- ipa-inline.c (revision 277758)
+++ ipa-inline.c (working copy)
@@ -2290,9 +2290,9 @@ flatten_function (struct cgraph_node *no
}
node->aux = NULL;
- if (update)
- ipa_update_overall_fn_summary (node->inlined_to
- ? node->inlined_to : node);
+ cgraph_node *where = node->inlined_to ? node->inlined_to : node;
+ if (update && opt_for_fn (where->decl, optimize))
+ ipa_update_overall_fn_summary (where);
}
/* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
@@ -2367,7 +2367,7 @@ inline_to_all_callers (struct cgraph_nod
we have a lot of calls to the same function. */
for (hash_set<cgraph_node *>::iterator i = callers.begin ();
i != callers.end (); ++i)
- ipa_update_overall_fn_summary (*i);
+ ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
return res;
}
Index: ipa-inline.h
===================================================================
--- ipa-inline.h (revision 277758)
+++ ipa-inline.h (working copy)
@@ -39,7 +39,7 @@ public:
hints (hints) {}
};
-extern call_summary<edge_growth_cache_entry *> *edge_growth_cache;
+extern fast_call_summary<edge_growth_cache_entry *, va_heap> *edge_growth_cache;
/* In ipa-inline-analysis.c */
int estimate_size_after_inlining (struct cgraph_node *, struct cgraph_edge *);
Index: symbol-summary.h
===================================================================
--- symbol-summary.h (revision 277758)
+++ symbol-summary.h (working copy)
@@ -532,7 +532,7 @@ class call_summary_base
public:
/* Default construction takes SYMTAB as an argument. */
call_summary_base (symbol_table *symtab): m_symtab (symtab),
- m_initialize_when_cloning (true)
+ m_initialize_when_cloning (false)
{}
/* Basic implementation of removal operation. */