This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Fix crossmodule ipa-inline hint


Hi,
while looking into Firefox perofmrance I noticed that inline hint
marking crossmodule calls is off most of time.  It is based on comparing
lto_file_data which is often released because ipa-icf or profile merging
has read the function body before inlining.

Moreover this logic is broken for incremental inlining, producing local
clones, extern inline functions etc.  This patch adds tracking of merged
extern inlines similar way as we already track merged comdats.

Bootstrapped/regtested x86_64-linux, comitted.

Honza

	* cgraph.c (cgraph_node::dump): Dump unit_id and merged_extern_inline.
	* cgraph.h (cgraph_node): Add unit_id and
	merged_extern_inline.
	(symbol_table): Add max_unit.
	(symbol_table::symbol_table): Initialize it.
	* cgraphclones.c (duplicate_thunk_for_node): Copy unit_id.
	merged_comdat, merged_extern_inline.
	(cgraph_node::create_clone): Likewise.
	(cgraph_node::create_version_clone): Likewise.
	* ipa-fnsummary.c (dump_ipa_call_summary): Dump info about cross module
	calls.
	* ipa-fnsummary.h (cross_module_call_p): New inline function.
	* ipa-inline-analyssi.c (simple_edge_hints): Use it.
	* ipa-inline.c (inline_small_functions): Likewise.
	* lto-symtab.c (lto_cgraph_replace_node): Record merged_extern_inline;
	copy merged_comdat and merged_extern_inline.
	* lto-cgraph.c (lto_output_node): Stream out merged_comdat,
	merged_extern_inline and unit_id.
	(input_overwrite_node): Stream in these.
	(input_cgraph_1): Set unit_base.
	* lto-streamer.h (lto_file_decl_data): Add unit_base.
	* symtab.c (symtab_node::make_decl_local): Record former_comdat.

	* g++.dg/lto/inline-crossmodule-1.h: New testcase.
	* g++.dg/lto/inline-crossmodule-1_0.C: New testcase.
	* g++.dg/lto/inline-crossmodule-1_1.C: New testcase.
Index: cgraph.c
===================================================================
--- cgraph.c	(revision 278835)
+++ cgraph.c	(working copy)
@@ -1923,6 +1923,9 @@ cgraph_node::dump (FILE *f)
   if (profile_id)
     fprintf (f, "  Profile id: %i\n",
 	     profile_id);
+  if (unit_id)
+    fprintf (f, "  Unit id: %i\n",
+	     unit_id);
   cgraph_function_version_info *vi = function_version ();
   if (vi != NULL)
     {
@@ -1973,6 +1976,8 @@ cgraph_node::dump (FILE *f)
     fprintf (f, " icf_merged");
   if (merged_comdat)
     fprintf (f, " merged_comdat");
+  if (merged_extern_inline)
+    fprintf (f, " merged_extern_inline");
   if (split_part)
     fprintf (f, " split_part");
   if (indirect_call_target)
Index: cgraph.h
===================================================================
--- cgraph.h	(revision 278834)
+++ cgraph.h	(working copy)
@@ -1433,6 +1433,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cg
   int count_materialization_scale;
   /* ID assigned by the profiling.  */
   unsigned int profile_id;
+  /* ID of the translation unit.  */
+  int unit_id;
   /* Time profiler: first run of function.  */
   int tp_first_run;
 
@@ -1469,6 +1471,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cg
   unsigned nonfreeing_fn : 1;
   /* True if there was multiple COMDAT bodies merged by lto-symtab.  */
   unsigned merged_comdat : 1;
+  /* True if this def was merged with extern inlines.  */
+  unsigned merged_extern_inline : 1;
   /* True if function was created to be executed in parallel.  */
   unsigned parallelized_function : 1;
   /* True if function is part split out by ipa-split.  */
@@ -2090,7 +2094,7 @@ public:
   edges_count (0), edges_max_uid (1), edges_max_summary_id (0),
   cgraph_released_summary_ids (), edge_released_summary_ids (),
   nodes (NULL), asmnodes (NULL), asm_last_node (NULL),
-  order (0), global_info_ready (false), state (PARSING),
+  order (0), max_unit (0), global_info_ready (false), state (PARSING),
   function_flags_ready (false), cpp_implicit_aliases_done (false),
   section_hash (NULL), assembler_name_hash (NULL), init_priority_hash (NULL),
   dump_file (NULL), ipa_clones_dump_file (NULL), cloned_nodes (),
@@ -2355,6 +2359,9 @@ public:
      them, to support -fno-toplevel-reorder.  */
   int order;
 
+  /* Maximal unit ID used.  */
+  int max_unit;
+
   /* Set when whole unit has been analyzed so we can access global info.  */
   bool global_info_ready;
   /* What state callgraph is in right now.  */
Index: cgraphclones.c
===================================================================
--- cgraphclones.c	(revision 278834)
+++ cgraphclones.c	(working copy)
@@ -229,6 +229,9 @@ duplicate_thunk_for_node (cgraph_node *t
   new_thunk->unique_name = in_lto_p;
   new_thunk->former_clone_of = thunk->decl;
   new_thunk->clone.param_adjustments = node->clone.param_adjustments;
+  new_thunk->unit_id = thunk->unit_id;
+  new_thunk->merged_comdat = thunk->merged_comdat;
+  new_thunk->merged_extern_inline = thunk->merged_extern_inline;
 
   cgraph_edge *e = new_thunk->create_edge (node, NULL, new_thunk->count);
   symtab->call_edge_duplication_hooks (thunk->callees, e);
@@ -376,6 +379,9 @@ cgraph_node::create_clone (tree new_decl
   new_node->icf_merged = icf_merged;
   new_node->merged_comdat = merged_comdat;
   new_node->thunk = thunk;
+  new_node->unit_id = unit_id;
+  new_node->merged_comdat = merged_comdat;
+  new_node->merged_extern_inline = merged_extern_inline;
 
   if (param_adjustments)
     new_node->clone.param_adjustments = param_adjustments;
@@ -881,6 +887,9 @@ cgraph_node::create_version_clone (tree
    new_version->inlined_to = inlined_to;
    new_version->rtl = rtl;
    new_version->count = count;
+   new_version->unit_id = unit_id;
+   new_version->merged_comdat = merged_comdat;
+   new_version->merged_extern_inline = merged_extern_inline;
 
    for (e = callees; e; e=e->next_callee)
      if (!bbs_to_copy
Index: ipa-fnsummary.c
===================================================================
--- ipa-fnsummary.c	(revision 278834)
+++ ipa-fnsummary.c	(working copy)
@@ -913,6 +913,9 @@ dump_ipa_call_summary (FILE *f, int inde
 	       ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
 	       indent, "", edge->sreal_frequency ().to_double ());
 
+      if (cross_module_call_p (edge))
+	fprintf (f, " cross module");
+
       if (es)
 	fprintf (f, " loop depth:%2i size:%2i time: %2i",
 		 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
Index: ipa-fnsummary.h
===================================================================
--- ipa-fnsummary.h	(revision 278834)
+++ ipa-fnsummary.h	(working copy)
@@ -375,4 +375,21 @@ void ipa_fnsummary_c_finalize (void);
 HOST_WIDE_INT ipa_get_stack_frame_offset (struct cgraph_node *node);
 void ipa_remove_from_growth_caches (struct cgraph_edge *edge);
 
+/* Return true if EDGE is a cross module call.  */
+
+static inline bool
+cross_module_call_p (struct cgraph_edge *edge)
+{
+  /* Here we do not want to walk to alias target becuase ICF may create
+     cross-unit aliases.  */
+  if (edge->caller->unit_id == edge->callee->unit_id)
+    return false;
+  /* If the call is to a (former) comdat function or s symbol with mutiple
+     extern inline definitions then treat is as in-module call.  */
+  if (edge->callee->merged_extern_inline || edge->callee->merged_comdat
+      || DECL_COMDAT (edge->callee->decl))
+    return false;
+  return true;
+}
+
 #endif /* GCC_IPA_FNSUMMARY_H */
Index: ipa-inline-analysis.c
===================================================================
--- ipa-inline-analysis.c	(revision 278834)
+++ ipa-inline-analysis.c	(working copy)
@@ -163,9 +163,7 @@ simple_edge_hints (struct cgraph_edge *e
   if (to_scc_no && to_scc_no  == callee_scc_no && !edge->recursive_p ())
     hints |= INLINE_HINT_same_scc;
 
-  if (callee->lto_file_data && edge->caller->lto_file_data
-      && edge->caller->lto_file_data != callee->lto_file_data
-      && !callee->merged_comdat && !callee->icf_merged)
+  if (cross_module_call_p (edge))
     hints |= INLINE_HINT_cross_module;
 
   return hints;
Index: ipa-inline.c
===================================================================
--- ipa-inline.c	(revision 278834)
+++ ipa-inline.c	(working copy)
@@ -2257,11 +2257,12 @@ inline_small_functions (void)
 
 	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
 			   " Inlined %C into %C which now has time %f and "
-			   "size %i, net change of %s.\n",
+			   "size %i, net change of %s%s.\n",
 			   edge->callee, edge->caller,
 			   s->time.to_double (),
 			   ipa_size_summaries->get (edge->caller)->size,
-			   buf_net_change);
+			   buf_net_change,
+			   cross_module_call_p (edge) ? " (cross module)":"");
 	}
       if (min_size > overall_size)
 	{
Index: lto/lto-symtab.c
===================================================================
--- lto/lto-symtab.c	(revision 278834)
+++ lto/lto-symtab.c	(working copy)
@@ -69,6 +69,13 @@ lto_cgraph_replace_node (struct cgraph_n
   if (node->definition && prevailing_node->definition
       && DECL_COMDAT (node->decl) && DECL_COMDAT (prevailing_node->decl))
     prevailing_node->merged_comdat = true;
+  else if ((node->definition || node->body_removed)
+	   && DECL_DECLARED_INLINE_P (node->decl)
+	   && DECL_EXTERNAL (node->decl)
+	   && prevailing_node->definition)
+    prevailing_node->merged_extern_inline = true;
+  prevailing_node->merged_comdat |= node->merged_comdat;
+  prevailing_node->merged_extern_inline |= node->merged_extern_inline;
 
   /* Redirect all incoming edges.  */
   compatible_p
Index: lto-cgraph.c
===================================================================
--- lto-cgraph.c	(revision 278834)
+++ lto-cgraph.c	(working copy)
@@ -533,6 +533,8 @@ lto_output_node (struct lto_simple_outpu
   bp_pack_value (&bp, node->calls_comdat_local, 1);
   bp_pack_value (&bp, node->icf_merged, 1);
   bp_pack_value (&bp, node->nonfreeing_fn, 1);
+  bp_pack_value (&bp, node->merged_comdat, 1);
+  bp_pack_value (&bp, node->merged_extern_inline, 1);
   bp_pack_value (&bp, node->thunk.thunk_p, 1);
   bp_pack_value (&bp, node->parallelized_function, 1);
   bp_pack_enum (&bp, ld_plugin_symbol_resolution,
@@ -559,6 +561,7 @@ lto_output_node (struct lto_simple_outpu
       streamer_write_uhwi_stream (ob->main_stream, node->thunk.indirect_offset);
     }
   streamer_write_hwi_stream (ob->main_stream, node->profile_id);
+  streamer_write_hwi_stream (ob->main_stream, node->unit_id);
   if (DECL_STATIC_CONSTRUCTOR (node->decl))
     streamer_write_hwi_stream (ob->main_stream, node->get_init_priority ());
   if (DECL_STATIC_DESTRUCTOR (node->decl))
@@ -1177,6 +1180,8 @@ input_overwrite_node (struct lto_file_de
   node->calls_comdat_local = bp_unpack_value (bp, 1);
   node->icf_merged = bp_unpack_value (bp, 1);
   node->nonfreeing_fn = bp_unpack_value (bp, 1);
+  node->merged_comdat = bp_unpack_value (bp, 1);
+  node->merged_extern_inline = bp_unpack_value (bp, 1);
   node->thunk.thunk_p = bp_unpack_value (bp, 1);
   node->parallelized_function = bp_unpack_value (bp, 1);
   node->resolution = bp_unpack_enum (bp, ld_plugin_symbol_resolution,
@@ -1310,6 +1315,9 @@ input_node (struct lto_file_decl_data *f
   if (node->alias && !node->analyzed && node->weakref)
     node->alias_target = get_alias_symbol (node->decl);
   node->profile_id = streamer_read_hwi (ib);
+  node->unit_id = streamer_read_hwi (ib) + file_data->unit_base;
+  if (symtab->max_unit < node->unit_id)
+    symtab->max_unit = node->unit_id;
   if (DECL_STATIC_CONSTRUCTOR (node->decl))
     node->set_init_priority (streamer_read_hwi (ib));
   if (DECL_STATIC_DESTRUCTOR (node->decl))
@@ -1502,6 +1510,7 @@ input_cgraph_1 (struct lto_file_decl_dat
 
   tag = streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag);
   file_data->order_base = symtab->order;
+  file_data->unit_base = symtab->max_unit + 1;
   while (tag)
     {
       if (tag == LTO_symtab_edge)
Index: lto-streamer.h
===================================================================
--- lto-streamer.h	(revision 278834)
+++ lto-streamer.h	(working copy)
@@ -626,6 +626,8 @@ struct GTY(()) lto_file_decl_data
   lto_section lto_section_header;
 
   int order_base;
+
+  int unit_base;
 };
 
 typedef struct lto_file_decl_data *lto_file_decl_data_ptr;
Index: symtab.c
===================================================================
--- symtab.c	(revision 278834)
+++ symtab.c	(working copy)
@@ -1863,6 +1863,13 @@ symtab_node::noninterposable_alias (void
       DECL_STATIC_CONSTRUCTOR (new_decl) = 0;
       DECL_STATIC_DESTRUCTOR (new_decl) = 0;
       new_node = cgraph_node::create_alias (new_decl, node->decl);
+
+      cgraph_node *new_cnode = dyn_cast <cgraph_node *> (new_node),
+		   *cnode = dyn_cast <cgraph_node *> (node);
+
+      new_cnode->unit_id = cnode->unit_id;
+      new_cnode->merged_comdat = cnode->merged_comdat;
+      new_cnode->merged_extern_inline = cnode->merged_extern_inline;
     }
   else
     {
Index: testsuite/g++.dg/lto/inline-crossmodule-1.h
===================================================================
--- testsuite/g++.dg/lto/inline-crossmodule-1.h	(nonexistent)
+++ testsuite/g++.dg/lto/inline-crossmodule-1.h	(working copy)
@@ -0,0 +1,15 @@
+struct a
+{
+  int ret1 ()
+  {
+    return 1;
+  }
+  int key ();
+};
+struct b
+{
+  int ret2 ()
+  {
+    return 2;
+  }
+};
Index: testsuite/g++.dg/lto/inline-crossmodule-1_0.C
===================================================================
--- testsuite/g++.dg/lto/inline-crossmodule-1_0.C	(nonexistent)
+++ testsuite/g++.dg/lto/inline-crossmodule-1_0.C	(working copy)
@@ -0,0 +1,11 @@
+// { dg-lto-do link }
+/* { dg-lto-options { "-O2 -fno-early-inlining -flto -fdump-ipa-inline" } } */
+#include "inline-crossmodule-1.h"
+int a::key ()
+{
+  return 0;
+}
+/* { dg-final { scan-wpa-ipa-times "Inlined ret1" 1 "inlined"  } } */
+/* { dg-final { scan-wpa-ipa-times "Inlined ret2" 1 "inlined"  } } */
+/* { dg-final { scan-wpa-ipa-times "Inlined key\[^\\n\]*(cross module)" 1 "inlined"  } } */
+/* { dg-final { scan-wpa-ipa-times "(cross module)" 1 "inlined"  } } */
Index: testsuite/g++.dg/lto/inline-crossmodule-1_1.C
===================================================================
--- testsuite/g++.dg/lto/inline-crossmodule-1_1.C	(nonexistent)
+++ testsuite/g++.dg/lto/inline-crossmodule-1_1.C	(working copy)
@@ -0,0 +1,8 @@
+#include "inline-crossmodule-1.h"
+int
+main()
+{
+  struct a a;
+  struct b b;
+  return a.key () + a.ret1 () + b.ret2() - 3;
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]