This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Optimize streaming in of non-prevailing ipa-parm summaries


On December 15, 2018 10:17:43 PM GMT+01:00, Jan Hubicka <hubicka@ucw.cz> wrote:
>Hi,
>this patch is motivated by finding that Firefox ipa summaries stream-in
>is fully dominated by streaming edge summaries that are not necessary
>because they lead to external calls or are from comdat functions that
>are unused.
>
>Reading them, populating hashes and later removing them at once
>streaming is finished increases memory use by about 1GB. This patch
>simply adds logic to notice such unnecesary summaries early and not
>put them to memory. This includes ADDR_EXPR that seems to be most
>common
>constant values and are easy to rebuild.

Don't you want to go the full way decomposing it to sym + offset? 

>Our streams are not organized in a way it would be easy to skip
>something so I opted to simply "fast forward" reading the dead data but
>not producing datastructures.  We may think of more systematic solution
>next stage1, but there are only two places where this matters -
>ipa-prop
>and ipa-fnsummary I will treat next.
>
>Bootstrapped/regtested x86_64-linux, will commit it shortly.
>
>Honza
>
>	* cgraph.h (cgraph_node): Add predicate prevailing_p.
>	(cgraph_edge): Add predicate possible_call_in_translation_unit_p.
>	* ipa-prop.c (ipa_write_jump_function): Optimize streaming of
>ADDR_EXPR.
>	(ipa_read_jump_function): Add prevails parameter; optimize streaming.
>	(ipa_read_edge_info): Break out from ...
>	(ipa_read_node_info): ... here; optimize streaming.
>	* cgraph.c (cgraph_edge::possibly_call_in_translation_unit_p): New
>	predicate.
>
>Index: cgraph.h
>===================================================================
>--- cgraph.h	(revision 267122)
>+++ cgraph.h	(working copy)
>@@ -308,6 +308,10 @@ public:
>   /* Return availability of NODE when referenced from REF.  */
>   enum availability get_availability (symtab_node *ref = NULL);
> 
>+  /* During LTO stream-in this predicate can be used to check whether
>node
>+     in question prevails in the linking to save some memory usage. 
>*/
>+  bool prevailing_p (void);
>+
> /* Return true if NODE binds to current definition in final executable
>    when referenced from REF.  If REF is NULL return conservative value
>      for any reference.  */
>@@ -1730,6 +1734,10 @@ struct GTY((chain_next ("%h.next_caller"
>      after passes that don't update the cgraph.  */
>   static void rebuild_references (void);
> 
>+  /* During LTO stream in this can be used to check whether call can
>possibly
>+     be internal to the current translation unit.  */
>+  bool possibly_call_in_translation_unit_p (void);
>+
>   /* Expected number of executions: calculated in profile.c.  */
>   profile_count count;
>   cgraph_node *caller;
>@@ -3357,6 +3365,15 @@ xstrdup_for_dump (const char *transient_
>   return ggc_strdup (transient_str);
> }
> 
>+/* During LTO stream-in this predicate can be used to check whether
>node
>+   in question prevails in the linking to save some memory usage.  */
>+inline bool
>+symtab_node::prevailing_p (void)
>+{
>+  return definition && ((!TREE_PUBLIC (decl) && !DECL_EXTERNAL (decl))
>+			 || previous_sharing_asm_name == NULL);
>+}
>+
> extern GTY(()) symbol_table *saved_symtab;
> 
> #if CHECKING_P
>Index: ipa-prop.c
>===================================================================
>--- ipa-prop.c	(revision 267122)
>+++ ipa-prop.c	(working copy)
>@@ -4053,8 +4053,15 @@ ipa_write_jump_function (struct output_b
>   struct ipa_agg_jf_item *item;
>   struct bitpack_d bp;
>   int i, count;
>+  int flag = 0;
> 
>-  streamer_write_uhwi (ob, jump_func->type);
>+  /* ADDR_EXPRs are very comon IP invariants; save some streamer data
>+     as well as WPA memory by handling them specially.  */
>+  if (jump_func->type == IPA_JF_CONST
>+      && TREE_CODE (jump_func->value.constant.value) == ADDR_EXPR)
>+    flag = 1;
>+
>+  streamer_write_uhwi (ob, jump_func->type * 2 + flag);
>   switch (jump_func->type)
>     {
>     case IPA_JF_UNKNOWN:
>@@ -4062,7 +4069,10 @@ ipa_write_jump_function (struct output_b
>     case IPA_JF_CONST:
>       gcc_assert (
>	  EXPR_LOCATION (jump_func->value.constant.value) ==
>UNKNOWN_LOCATION);
>-      stream_write_tree (ob, jump_func->value.constant.value, true);
>+      stream_write_tree (ob,
>+			 flag
>+			 ? TREE_OPERAND (jump_func->value.constant.value, 0)
>+			 : jump_func->value.constant.value, true);
>       break;
>     case IPA_JF_PASS_THROUGH:
>     streamer_write_uhwi (ob, jump_func->value.pass_through.operation);
>@@ -4131,20 +4141,28 @@ static void
> ipa_read_jump_function (struct lto_input_block *ib,
> 			struct ipa_jump_func *jump_func,
> 			struct cgraph_edge *cs,
>-			struct data_in *data_in)
>+			struct data_in *data_in,
>+			bool prevails)
> {
>   enum jump_func_type jftype;
>   enum tree_code operation;
>   int i, count;
>+  int val = streamer_read_uhwi (ib);
>+  bool flag = val & 1;
> 
>-  jftype = (enum jump_func_type) streamer_read_uhwi (ib);
>+  jftype = (enum jump_func_type) (val / 2);
>   switch (jftype)
>     {
>     case IPA_JF_UNKNOWN:
>       ipa_set_jf_unknown (jump_func);
>       break;
>     case IPA_JF_CONST:
>-      ipa_set_jf_constant (jump_func, stream_read_tree (ib, data_in),
>cs);
>+      {
>+	tree t = stream_read_tree (ib, data_in);
>+	if (flag && prevails)
>+	  t = build_fold_addr_expr (t);
>+	ipa_set_jf_constant (jump_func, t, cs);
>+      }
>       break;
>     case IPA_JF_PASS_THROUGH:
>       operation = (enum tree_code) streamer_read_uhwi (ib);
>@@ -4177,10 +4195,13 @@ ipa_read_jump_function (struct lto_input
> 	ipa_set_ancestor_jf (jump_func, offset, formal_id, agg_preserved);
> 	break;
>       }
>+    default:
>+      fatal_error (UNKNOWN_LOCATION, "invalid jump function in LTO
>stream");
>     }
> 
>   count = streamer_read_uhwi (ib);
>-  vec_alloc (jump_func->agg.items, count);
>+  if (prevails)
>+    vec_alloc (jump_func->agg.items, count);
>   if (count)
>     {
>       struct bitpack_d bp = streamer_read_bitpack (ib);
>@@ -4191,7 +4212,8 @@ ipa_read_jump_function (struct lto_input
>       struct ipa_agg_jf_item item;
>       item.offset = streamer_read_uhwi (ib);
>       item.value = stream_read_tree (ib, data_in);
>-      jump_func->agg.items->quick_push (item);
>+      if (prevails)
>+        jump_func->agg.items->quick_push (item);
>     }
> 
>   struct bitpack_d bp = streamer_read_bitpack (ib);
>@@ -4200,7 +4222,8 @@ ipa_read_jump_function (struct lto_input
>     {
>       widest_int value = streamer_read_widest_int (ib);
>       widest_int mask = streamer_read_widest_int (ib);
>-      ipa_set_jfunc_bits (jump_func, value, mask);
>+      if (prevails)
>+        ipa_set_jfunc_bits (jump_func, value, mask);
>     }
>   else
>     jump_func->bits = NULL;
>@@ -4213,7 +4236,8 @@ ipa_read_jump_function (struct lto_input
> 						       VR_LAST);
>       tree min = stream_read_tree (ib, data_in);
>       tree max = stream_read_tree (ib, data_in);
>-      ipa_set_jfunc_vr (jump_func, type, min, max);
>+      if (prevails)
>+        ipa_set_jfunc_vr (jump_func, type, min, max);
>     }
>   else
>     jump_func->m_vr = NULL;
>@@ -4345,24 +4369,48 @@ ipa_write_node_info (struct output_block
>     }
> }
> 
>-/* If jump functions points to node we possibly can propagate into.
>-   At this moment symbol table is still not merged, but the prevailing
>-   symbol is always first in the list.  */
>+/* Stream in edge E from IB.  */
> 
>-static bool
>-jump_function_useful_p (symtab_node *node)
>+static void
>+ipa_read_edge_info (struct lto_input_block *ib,
>+		    struct data_in *data_in,
>+		    struct cgraph_edge *e, bool prevails)
> {
>-  /* While incremental linking we may end up getting function body
>later.  */
>-  if (flag_incremental_link == INCREMENTAL_LINK_LTO)
>-    return true;
>-  if (!TREE_PUBLIC (node->decl) && !DECL_EXTERNAL (node->decl))
>-    return true;
>-  for (int n = 10; node->previous_sharing_asm_name && n ; n--)
>-    node = node->previous_sharing_asm_name;
>-  if (node->previous_sharing_asm_name)
>-    node = symtab_node::get_for_asmname (DECL_ASSEMBLER_NAME
>(node->decl));
>-  gcc_assert (TREE_PUBLIC (node->decl));
>-  return node->definition;
>+  int count = streamer_read_uhwi (ib);
>+  bool contexts_computed = count & 1;
>+
>+  count /= 2;
>+  if (!count)
>+    return;
>+  if (prevails && e->possibly_call_in_translation_unit_p ())
>+    {
>+      struct ipa_edge_args *args = IPA_EDGE_REF (e);
>+      vec_safe_grow_cleared (args->jump_functions, count);
>+      if (contexts_computed)
>+	vec_safe_grow_cleared (args->polymorphic_call_contexts, count);
>+      for (int k = 0; k < count; k++)
>+	{
>+	  ipa_read_jump_function (ib, ipa_get_ith_jump_func (args, k), e,
>+				  data_in, prevails);
>+	  if (contexts_computed)
>+	    ipa_get_ith_polymorhic_call_context (args, k)->stream_in
>+							     (ib, data_in);
>+	}
>+    }
>+  else
>+    {
>+      for (int k = 0; k < count; k++)
>+	{
>+	  struct ipa_jump_func dummy;
>+	  ipa_read_jump_function (ib, &dummy, e,
>+				  data_in, prevails);
>+	  if (contexts_computed)
>+	    {
>+	      struct ipa_polymorphic_call_context ctx;
>+	      ctx.stream_in (ib, data_in);
>+	    }
>+	}
>+    }
> }
> 
> /* Stream in NODE info from IB.  */
>@@ -4371,82 +4419,50 @@ static void
>ipa_read_node_info (struct lto_input_block *ib, struct cgraph_node
>*node,
> 		    struct data_in *data_in)
> {
>-  struct ipa_node_params *info = IPA_NODE_REF (node);
>   int k;
>   struct cgraph_edge *e;
>   struct bitpack_d bp;
>+  bool prevails = node->prevailing_p ();
>+  struct ipa_node_params *info = prevails ? IPA_NODE_REF (node) :
>NULL;
> 
>-  ipa_alloc_node_params (node, streamer_read_uhwi (ib));
>-
>-  for (k = 0; k < ipa_get_param_count (info); k++)
>-    (*info->descriptors)[k].move_cost = streamer_read_uhwi (ib);
>+  int param_count = streamer_read_uhwi (ib);
>+  if (prevails)
>+    {
>+      ipa_alloc_node_params (node, param_count);
>+      for (k = 0; k < param_count; k++)
>+        (*info->descriptors)[k].move_cost = streamer_read_uhwi (ib);
>+      if (ipa_get_param_count (info) != 0)
>+	info->analysis_done = true;
>+      info->node_enqueued = false;
>+    }
>+  else
>+    for (k = 0; k < param_count; k++)
>+      streamer_read_uhwi (ib);
> 
>   bp = streamer_read_bitpack (ib);
>-  if (ipa_get_param_count (info) != 0)
>-    info->analysis_done = true;
>-  info->node_enqueued = false;
>-  for (k = 0; k < ipa_get_param_count (info); k++)
>-    ipa_set_param_used (info, k, bp_unpack_value (&bp, 1));
>-  for (k = 0; k < ipa_get_param_count (info); k++)
>+  for (k = 0; k < param_count; k++)
>     {
>-      ipa_set_controlled_uses (info, k, streamer_read_hwi (ib));
>-      (*info->descriptors)[k].decl_or_type = stream_read_tree (ib,
>data_in);
>+      bool used = bp_unpack_value (&bp, 1);
>+
>+      if (prevails)
>+        ipa_set_param_used (info, k, used);
>     }
>-  for (e = node->callees; e; e = e->next_callee)
>+  for (k = 0; k < param_count; k++)
>     {
>-      struct ipa_edge_args *args = IPA_EDGE_REF (e);
>-      int count = streamer_read_uhwi (ib);
>-      bool contexts_computed = count & 1;
>-      count /= 2;
>-
>-      if (!count)
>-	continue;
>-      if (!jump_function_useful_p (e->callee))
>-	{
>-          for (k = 0; k < count; k++)
>-	    {
>-	      struct ipa_jump_func dummy;
>-	      ipa_read_jump_function (ib, &dummy, e, data_in);
>-	      if (contexts_computed)
>-		{
>-		  struct ipa_polymorphic_call_context ctx;
>-		  ctx.stream_in (ib, data_in);
>-		}
>-	    }
>-	  continue;
>-	}
>-      vec_safe_grow_cleared (args->jump_functions, count);
>-      if (contexts_computed)
>-	vec_safe_grow_cleared (args->polymorphic_call_contexts, count);
>+      int nuses = streamer_read_hwi (ib);
>+      tree type = stream_read_tree (ib, data_in);
> 
>-      for (k = 0; k < ipa_get_cs_argument_count (args); k++)
>+      if (prevails)
> 	{
>-	  ipa_read_jump_function (ib, ipa_get_ith_jump_func (args, k), e,
>-				  data_in);
>-	  if (contexts_computed)
>-	    ipa_get_ith_polymorhic_call_context (args, k)->stream_in (ib,
>data_in);
>+	  ipa_set_controlled_uses (info, k, nuses);
>+	  (*info->descriptors)[k].decl_or_type = type;
> 	}
>     }
>+  for (e = node->callees; e; e = e->next_callee)
>+    ipa_read_edge_info (ib, data_in, e, prevails);
>   for (e = node->indirect_calls; e; e = e->next_callee)
>     {
>-      struct ipa_edge_args *args = IPA_EDGE_REF (e);
>-      int count = streamer_read_uhwi (ib);
>-      bool contexts_computed = count & 1;
>-      count /= 2;
>-
>-      if (count)
>-	{
>-	  vec_safe_grow_cleared (args->jump_functions, count);
>-	  if (contexts_computed)
>-	    vec_safe_grow_cleared (args->polymorphic_call_contexts, count);
>-          for (k = 0; k < ipa_get_cs_argument_count (args); k++)
>-	    {
>-	      ipa_read_jump_function (ib, ipa_get_ith_jump_func (args, k), e,
>-				      data_in);
>-	      if (contexts_computed)
>-		ipa_get_ith_polymorhic_call_context (args, k)->stream_in (ib,
>data_in);
>-	    }
>-	}
>+      ipa_read_edge_info (ib, data_in, e, prevails);
>       ipa_read_indirect_edge_info (ib, data_in, e);
>     }
> }
>Index: cgraph.c
>===================================================================
>--- cgraph.c	(revision 267122)
>+++ cgraph.c	(working copy)
>@@ -3766,6 +3780,41 @@ cgraph_edge::sreal_frequency ()
> 			       : caller->count);
> }
> 
>+
>+/* During LTO stream in this can be used to check whether call can
>possibly
>+   be internal to the current translation unit.  */
>+
>+bool
>+cgraph_edge::possibly_call_in_translation_unit_p (void)
>+{
>+  gcc_checking_assert (in_lto_p && caller->prevailing_p ());
>+
>+  /* While incremental linking we may end up getting function body
>later.  */
>+  if (flag_incremental_link == INCREMENTAL_LINK_LTO)
>+    return true;
>+
>+  /* We may be smarter here and avoid stremaing in indirect calls we
>can't
>+     track, but that would require arranging stremaing the indirect
>call
>+     summary first.  */
>+  if (!callee)
>+    return true;
>+
>+  /* If calle is local to the original translation unit, it will be
>defined.  */
>+  if (!TREE_PUBLIC (callee->decl) && !DECL_EXTERNAL (callee->decl))
>+    return true;
>+
>+  /* Otherwise we need to lookup prevailing symbol (symbol table is
>not merged,
>+     yet) and see if it is a definition.  In fact we may also resolve
>aliases,
>+     but that is probably not too important.  */
>+  symtab_node *node = callee;
>+  for (int n = 10; node->previous_sharing_asm_name && n ; n--)
>+    node = node->previous_sharing_asm_name;
>+  if (node->previous_sharing_asm_name)
>+    node = symtab_node::get_for_asmname (DECL_ASSEMBLER_NAME
>(callee->decl));
>+  gcc_assert (TREE_PUBLIC (node->decl));
>+  return node->get_availability () >= AVAIL_AVAILABLE;
>+}
>+
> /* A stashed copy of "symtab" for use by selftest::symbol_table_test.
>    This needs to be a global so that it can be a GC root, and thus
>   prevent the stashed copy from being garbage-collected if the GC runs


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]