Avid ggc_alloc and push_cfun during LTO streaming

Jan Hubicka hubicka@ucw.cz
Fri Oct 11 09:05:00 GMT 2019


Hi,
this patch prevents tree creation druing WPA stream out (to avoid
touching pages and triggering COW).  It fixes the following
 - gimple streamer produces MEM_REF wrappings for global decls.
   This is to preserve the type of access and is not necessary for
   WPA->LTRANS streaming when decls ar eno longer going to be merged.
 - we renumber stmt uids during streaming WPA summaries
 - loop optimizer is initialized in output_function.
After testing the patch I noticed that output_function does one extra
renumbering of stmts. This seems quite broken and I will fix it
incrementally.

Bootstrapped/regtested x86_64-linux, comitted.

	* gimple-streamer-out.c (output_gimple_stmt): Add explicit function
	parameter.
	* lto-streamer-out.c: Include tree-dfa.h.
	(output_cfg): Do not use cfun.
	(lto_prepare_function_for_streaming): New.
	(output_function): Do not push cfun; do not initialize loop optimizer.
	* lto-streamer.h (lto_prepare_function_for_streaming): Declare.
	* passes.c (ipa_write_summaries): Use it.
	(ipa_write_optimization_summaries): Do not modify bodies.
	* tree-dfa.c (renumber_gimple_stmt_uids): Add function parameter.
	* tree.dfa.h (renumber_gimple_stmt_uids): Update prototype.
	* tree-ssa-dse.c (pass_dse::execute): Update use of
	renumber_gimple_stmt_uids.
	* tree-ssa-math-opts.c (pass_optimize_widening_mul::execute): Likewise.

	* lto.c (lto_wpa_write_files): Prepare all bodies for streaming.
Index: gimple-streamer-out.c
===================================================================
--- gimple-streamer-out.c	(revision 276850)
+++ gimple-streamer-out.c	(working copy)
@@ -57,7 +57,7 @@ output_phi (struct output_block *ob, gph
 /* Emit statement STMT on the main stream of output block OB.  */
 
 static void
-output_gimple_stmt (struct output_block *ob, gimple *stmt)
+output_gimple_stmt (struct output_block *ob, struct function *fn, gimple *stmt)
 {
   unsigned i;
   enum gimple_code code;
@@ -80,7 +80,7 @@ output_gimple_stmt (struct output_block
 		     as_a <gassign *> (stmt)),
 		   1);
   bp_pack_value (&bp, gimple_has_volatile_ops (stmt), 1);
-  hist = gimple_histogram_value (cfun, stmt);
+  hist = gimple_histogram_value (fn, stmt);
   bp_pack_value (&bp, hist != NULL, 1);
   bp_pack_var_len_unsigned (&bp, stmt->subcode);
 
@@ -139,7 +139,7 @@ output_gimple_stmt (struct output_block
 	     so that we do not have to deal with type mismatches on
 	     merged symbols during IL read in.  The first operand
 	     of GIMPLE_DEBUG must be a decl, not MEM_REF, though.  */
-	  if (op && (i || !is_gimple_debug (stmt)))
+	  if (!flag_wpa && op && (i || !is_gimple_debug (stmt)))
 	    {
 	      basep = &op;
 	      if (TREE_CODE (*basep) == ADDR_EXPR)
@@ -147,7 +147,7 @@ output_gimple_stmt (struct output_block
 	      while (handled_component_p (*basep))
 		basep = &TREE_OPERAND (*basep, 0);
 	      if (VAR_P (*basep)
-		  && !auto_var_in_fn_p (*basep, current_function_decl)
+		  && !auto_var_in_fn_p (*basep, fn->decl)
 		  && !DECL_REGISTER (*basep))
 		{
 		  bool volatilep = TREE_THIS_VOLATILE (*basep);
@@ -228,7 +228,7 @@ output_bb (struct output_block *ob, basi
 	      print_gimple_stmt (streamer_dump_file, stmt, 0, TDF_SLIM);
 	    }
 
-	  output_gimple_stmt (ob, stmt);
+	  output_gimple_stmt (ob, fn, stmt);
 
 	  /* Emit the EH region holding STMT.  */
 	  region = lookup_stmt_eh_lp_fn (fn, stmt);
Index: lto/lto.c
===================================================================
--- lto/lto.c	(revision 276850)
+++ lto/lto.c	(working copy)
@@ -304,6 +304,13 @@ lto_wpa_write_files (void)
 
   timevar_push (TV_WHOPR_WPA_IO);
 
+  cgraph_node *node;
+  /* Do body modifications needed for streaming before we fork out
+     worker processes.  */
+  FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
+    if (gimple_has_body_p (node->decl))
+      lto_prepare_function_for_streaming (node);
+
   /* Generate a prefix for the LTRANS unit files.  */
   blen = strlen (ltrans_output_list);
   temp_filename = (char *) xmalloc (blen + sizeof ("2147483648.o"));
Index: lto-streamer-out.c
===================================================================
--- lto-streamer-out.c	(revision 276850)
+++ lto-streamer-out.c	(working copy)
@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3.
 #include "debug.h"
 #include "omp-offload.h"
 #include "print-tree.h"
+#include "tree-dfa.h"
 
 
 static void lto_write_tree (struct output_block*, tree, bool);
@@ -1893,7 +1894,7 @@ output_cfg (struct output_block *ob, str
 
   streamer_write_hwi (ob, -1);
 
-  bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
+  bb = ENTRY_BLOCK_PTR_FOR_FN (fn);
   while (bb->next_bb)
     {
       streamer_write_hwi (ob, bb->next_bb->index);
@@ -1902,9 +1903,6 @@ output_cfg (struct output_block *ob, str
 
   streamer_write_hwi (ob, -1);
 
-  /* ???  The cfgloop interface is tied to cfun.  */
-  gcc_assert (cfun == fn);
-
   /* Output the number of loops.  */
   streamer_write_uhwi (ob, number_of_loops (fn));
 
@@ -2062,6 +2060,22 @@ collect_block_tree_leafs (tree root, vec
       collect_block_tree_leafs (BLOCK_SUBBLOCKS (root), leafs);
 }
 
+/* This performs function body modifications that are needed for streaming
+   to work.  */
+
+void
+lto_prepare_function_for_streaming (struct cgraph_node *node)
+{
+  if (number_of_loops (DECL_STRUCT_FUNCTION (node->decl)))
+    {
+      push_cfun (DECL_STRUCT_FUNCTION (node->decl));
+      loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+      loop_optimizer_finalize ();
+      pop_cfun ();
+    }
+  renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (node->decl));
+}
+
 /* Output the body of function NODE->DECL.  */
 
 static void
@@ -2085,9 +2099,6 @@ output_function (struct cgraph_node *nod
 
   gcc_assert (current_function_decl == NULL_TREE && cfun == NULL);
 
-  /* Set current_function_decl and cfun.  */
-  push_cfun (fn);
-
   /* Make string 0 be a NULL string.  */
   streamer_write_char_stream (ob->string_stream, 0);
 
@@ -2124,9 +2135,6 @@ output_function (struct cgraph_node *nod
      debug info.  */
   if (gimple_has_body_p (function))
     {
-      /* Fixup loops if required to match discovery done in the reader.  */
-      loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
-
       streamer_write_uhwi (ob, 1);
       output_struct_function_base (ob, fn);
 
@@ -2143,8 +2151,8 @@ output_function (struct cgraph_node *nod
 	 statement numbers.  We do not assign UIDs to PHIs here because
 	 virtual PHIs get re-computed on-the-fly which would make numbers
 	 inconsistent.  */
-      set_gimple_stmt_max_uid (cfun, 0);
-      FOR_ALL_BB_FN (bb, cfun)
+      set_gimple_stmt_max_uid (fn, 0);
+      FOR_ALL_BB_FN (bb, fn)
 	{
 	  for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 	       gsi_next (&gsi))
@@ -2153,25 +2161,25 @@ output_function (struct cgraph_node *nod
 
 	      /* Virtual PHIs are not going to be streamed.  */
 	      if (!virtual_operand_p (gimple_phi_result (stmt)))
-	        gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
+	        gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn));
 	    }
 	  for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
 	       gsi_next (&gsi))
 	    {
 	      gimple *stmt = gsi_stmt (gsi);
-	      gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
+	      gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn));
 	    }
 	}
       /* To avoid keeping duplicate gimple IDs in the statements, renumber
 	 virtual phis now.  */
-      FOR_ALL_BB_FN (bb, cfun)
+      FOR_ALL_BB_FN (bb, fn)
 	{
 	  for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 	       gsi_next (&gsi))
 	    {
 	      gphi *stmt = gsi.phi ();
 	      if (virtual_operand_p (gimple_phi_result (stmt)))
-	        gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
+	        gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn));
 	    }
 	}
 
@@ -2183,9 +2191,6 @@ output_function (struct cgraph_node *nod
       streamer_write_record_start (ob, LTO_null);
 
       output_cfg (ob, fn);
-
-      loop_optimizer_finalize ();
-      pop_cfun ();
    }
   else
     streamer_write_uhwi (ob, 0);
Index: lto-streamer.h
===================================================================
--- lto-streamer.h	(revision 276850)
+++ lto-streamer.h	(working copy)
@@ -909,6 +909,7 @@ void lto_output_decl_state_refs (struct
 			         struct lto_out_decl_state *);
 void lto_output_location (struct output_block *, struct bitpack_d *, location_t);
 void lto_output_init_mode_table (void);
+void lto_prepare_function_for_streaming (cgraph_node *);
 
 
 /* In lto-cgraph.c  */
Index: passes.c
===================================================================
--- passes.c	(revision 276850)
+++ passes.c	(working copy)
@@ -2705,20 +2705,12 @@ ipa_write_summaries (void)
     {
       struct cgraph_node *node = order[i];
 
-      if (gimple_has_body_p (node->decl))
+      if (node->definition && node->need_lto_streaming)
 	{
-	  /* When streaming out references to statements as part of some IPA
-	     pass summary, the statements need to have uids assigned and the
-	     following does that for all the IPA passes here. Naturally, this
-	     ordering then matches the one IPA-passes get in their stmt_fixup
-	     hooks.  */
-
-	  push_cfun (DECL_STRUCT_FUNCTION (node->decl));
-	  renumber_gimple_stmt_uids ();
-	  pop_cfun ();
+	  if (gimple_has_body_p (node->decl))
+	    lto_prepare_function_for_streaming (node);
+	  lto_set_symtab_encoder_in_partition (encoder, node);
 	}
-      if (node->definition && node->need_lto_streaming)
-        lto_set_symtab_encoder_in_partition (encoder, node);
     }
 
   FOR_EACH_DEFINED_FUNCTION (node)
@@ -2786,28 +2778,13 @@ void
 ipa_write_optimization_summaries (lto_symtab_encoder_t encoder)
 {
   struct lto_out_decl_state *state = lto_new_out_decl_state ();
-  lto_symtab_encoder_iterator lsei;
   state->symtab_node_encoder = encoder;
 
   lto_output_init_mode_table ();
   lto_push_out_decl_state (state);
-  for (lsei = lsei_start_function_in_partition (encoder);
-       !lsei_end_p (lsei); lsei_next_function_in_partition (&lsei))
-    {
-      struct cgraph_node *node = lsei_cgraph_node (lsei);
-      /* When streaming out references to statements as part of some IPA
-	 pass summary, the statements need to have uids assigned.
-
-	 For functions newly born at WPA stage we need to initialize
-	 the uids here.  */
-      if (node->definition
-	  && gimple_has_body_p (node->decl))
-	{
-	  push_cfun (DECL_STRUCT_FUNCTION (node->decl));
-	  renumber_gimple_stmt_uids ();
-	  pop_cfun ();
-	}
-    }
+
+  /* Be sure that we did not forget to renumber stmt uids.  */
+  gcc_checking_assert (flag_wpa);
 
   gcc_assert (flag_wpa);
   pass_manager *passes = g->get_passes ();
Index: tree-dfa.c
===================================================================
--- tree-dfa.c	(revision 276850)
+++ tree-dfa.c	(working copy)
@@ -61,23 +61,23 @@ static void collect_dfa_stats (struct df
 /* Renumber all of the gimple stmt uids.  */
 
 void
-renumber_gimple_stmt_uids (void)
+renumber_gimple_stmt_uids (struct function *fun)
 {
   basic_block bb;
 
-  set_gimple_stmt_max_uid (cfun, 0);
-  FOR_ALL_BB_FN (bb, cfun)
+  set_gimple_stmt_max_uid (fun, 0);
+  FOR_ALL_BB_FN (bb, fun)
     {
       gimple_stmt_iterator bsi;
       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
 	{
 	  gimple *stmt = gsi_stmt (bsi);
-	  gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
+	  gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fun));
 	}
       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
 	{
 	  gimple *stmt = gsi_stmt (bsi);
-	  gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
+	  gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fun));
 	}
     }
 }
Index: tree-dfa.h
===================================================================
--- tree-dfa.h	(revision 276850)
+++ tree-dfa.h	(working copy)
@@ -20,7 +20,7 @@ along with GCC; see the file COPYING3.
 #ifndef GCC_TREE_DFA_H
 #define GCC_TREE_DFA_H
 
-extern void renumber_gimple_stmt_uids (void);
+extern void renumber_gimple_stmt_uids (struct function *);
 extern void renumber_gimple_stmt_uids_in_blocks (basic_block *, int);
 extern void dump_variable (FILE *, tree);
 extern void debug_variable (tree);
Index: tree-ssa-dse.c
===================================================================
--- tree-ssa-dse.c	(revision 276850)
+++ tree-ssa-dse.c	(working copy)
@@ -1113,7 +1113,7 @@ pass_dse::execute (function *fun)
 {
   need_eh_cleanup = BITMAP_ALLOC (NULL);
 
-  renumber_gimple_stmt_uids ();
+  renumber_gimple_stmt_uids (cfun);
 
   /* We might consider making this a property of each pass so that it
      can be [re]computed on an as-needed basis.  Particularly since
Index: tree-ssa-math-opts.c
===================================================================
--- tree-ssa-math-opts.c	(revision 276850)
+++ tree-ssa-math-opts.c	(working copy)
@@ -3850,7 +3850,7 @@ pass_optimize_widening_mul::execute (fun
 
   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
   calculate_dominance_info (CDI_DOMINATORS);
-  renumber_gimple_stmt_uids ();
+  renumber_gimple_stmt_uids (cfun);
 
   math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
 



More information about the Gcc-patches mailing list