Avid ggc_alloc and push_cfun during LTO streaming
Richard Biener
richard.guenther@gmail.com
Fri Oct 11 11:41:00 GMT 2019
On Fri, Oct 11, 2019 at 11:03 AM Jan Hubicka <hubicka@ucw.cz> wrote:
>
> Hi,
> this patch prevents tree creation druing WPA stream out (to avoid
> touching pages and triggering COW). It fixes the following
> - gimple streamer produces MEM_REF wrappings for global decls.
> This is to preserve the type of access and is not necessary for
> WPA->LTRANS streaming when decls ar eno longer going to be merged.
> - we renumber stmt uids during streaming WPA summaries
> - loop optimizer is initialized in output_function.
> After testing the patch I noticed that output_function does one extra
> renumbering of stmts. This seems quite broken and I will fix it
> incrementally.
>
> Bootstrapped/regtested x86_64-linux, comitted.
Huh. Why do we stream function bodies at WPA time at all?
We should already have input sections we can copy/remap?
That is, why does gcc_assert (!flag_wpa) in output_function trip?
Richard.
>
> * gimple-streamer-out.c (output_gimple_stmt): Add explicit function
> parameter.
> * lto-streamer-out.c: Include tree-dfa.h.
> (output_cfg): Do not use cfun.
> (lto_prepare_function_for_streaming): New.
> (output_function): Do not push cfun; do not initialize loop optimizer.
> * lto-streamer.h (lto_prepare_function_for_streaming): Declare.
> * passes.c (ipa_write_summaries): Use it.
> (ipa_write_optimization_summaries): Do not modify bodies.
> * tree-dfa.c (renumber_gimple_stmt_uids): Add function parameter.
> * tree.dfa.h (renumber_gimple_stmt_uids): Update prototype.
> * tree-ssa-dse.c (pass_dse::execute): Update use of
> renumber_gimple_stmt_uids.
> * tree-ssa-math-opts.c (pass_optimize_widening_mul::execute): Likewise.
>
> * lto.c (lto_wpa_write_files): Prepare all bodies for streaming.
> Index: gimple-streamer-out.c
> ===================================================================
> --- gimple-streamer-out.c (revision 276850)
> +++ gimple-streamer-out.c (working copy)
> @@ -57,7 +57,7 @@ output_phi (struct output_block *ob, gph
> /* Emit statement STMT on the main stream of output block OB. */
>
> static void
> -output_gimple_stmt (struct output_block *ob, gimple *stmt)
> +output_gimple_stmt (struct output_block *ob, struct function *fn, gimple *stmt)
> {
> unsigned i;
> enum gimple_code code;
> @@ -80,7 +80,7 @@ output_gimple_stmt (struct output_block
> as_a <gassign *> (stmt)),
> 1);
> bp_pack_value (&bp, gimple_has_volatile_ops (stmt), 1);
> - hist = gimple_histogram_value (cfun, stmt);
> + hist = gimple_histogram_value (fn, stmt);
> bp_pack_value (&bp, hist != NULL, 1);
> bp_pack_var_len_unsigned (&bp, stmt->subcode);
>
> @@ -139,7 +139,7 @@ output_gimple_stmt (struct output_block
> so that we do not have to deal with type mismatches on
> merged symbols during IL read in. The first operand
> of GIMPLE_DEBUG must be a decl, not MEM_REF, though. */
> - if (op && (i || !is_gimple_debug (stmt)))
> + if (!flag_wpa && op && (i || !is_gimple_debug (stmt)))
> {
> basep = &op;
> if (TREE_CODE (*basep) == ADDR_EXPR)
> @@ -147,7 +147,7 @@ output_gimple_stmt (struct output_block
> while (handled_component_p (*basep))
> basep = &TREE_OPERAND (*basep, 0);
> if (VAR_P (*basep)
> - && !auto_var_in_fn_p (*basep, current_function_decl)
> + && !auto_var_in_fn_p (*basep, fn->decl)
> && !DECL_REGISTER (*basep))
> {
> bool volatilep = TREE_THIS_VOLATILE (*basep);
> @@ -228,7 +228,7 @@ output_bb (struct output_block *ob, basi
> print_gimple_stmt (streamer_dump_file, stmt, 0, TDF_SLIM);
> }
>
> - output_gimple_stmt (ob, stmt);
> + output_gimple_stmt (ob, fn, stmt);
>
> /* Emit the EH region holding STMT. */
> region = lookup_stmt_eh_lp_fn (fn, stmt);
> Index: lto/lto.c
> ===================================================================
> --- lto/lto.c (revision 276850)
> +++ lto/lto.c (working copy)
> @@ -304,6 +304,13 @@ lto_wpa_write_files (void)
>
> timevar_push (TV_WHOPR_WPA_IO);
>
> + cgraph_node *node;
> + /* Do body modifications needed for streaming before we fork out
> + worker processes. */
> + FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
> + if (gimple_has_body_p (node->decl))
> + lto_prepare_function_for_streaming (node);
> +
> /* Generate a prefix for the LTRANS unit files. */
> blen = strlen (ltrans_output_list);
> temp_filename = (char *) xmalloc (blen + sizeof ("2147483648.o"));
> Index: lto-streamer-out.c
> ===================================================================
> --- lto-streamer-out.c (revision 276850)
> +++ lto-streamer-out.c (working copy)
> @@ -43,6 +43,7 @@ along with GCC; see the file COPYING3.
> #include "debug.h"
> #include "omp-offload.h"
> #include "print-tree.h"
> +#include "tree-dfa.h"
>
>
> static void lto_write_tree (struct output_block*, tree, bool);
> @@ -1893,7 +1894,7 @@ output_cfg (struct output_block *ob, str
>
> streamer_write_hwi (ob, -1);
>
> - bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
> + bb = ENTRY_BLOCK_PTR_FOR_FN (fn);
> while (bb->next_bb)
> {
> streamer_write_hwi (ob, bb->next_bb->index);
> @@ -1902,9 +1903,6 @@ output_cfg (struct output_block *ob, str
>
> streamer_write_hwi (ob, -1);
>
> - /* ??? The cfgloop interface is tied to cfun. */
> - gcc_assert (cfun == fn);
> -
> /* Output the number of loops. */
> streamer_write_uhwi (ob, number_of_loops (fn));
>
> @@ -2062,6 +2060,22 @@ collect_block_tree_leafs (tree root, vec
> collect_block_tree_leafs (BLOCK_SUBBLOCKS (root), leafs);
> }
>
> +/* This performs function body modifications that are needed for streaming
> + to work. */
> +
> +void
> +lto_prepare_function_for_streaming (struct cgraph_node *node)
> +{
> + if (number_of_loops (DECL_STRUCT_FUNCTION (node->decl)))
> + {
> + push_cfun (DECL_STRUCT_FUNCTION (node->decl));
> + loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
> + loop_optimizer_finalize ();
> + pop_cfun ();
> + }
> + renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (node->decl));
> +}
> +
> /* Output the body of function NODE->DECL. */
>
> static void
> @@ -2085,9 +2099,6 @@ output_function (struct cgraph_node *nod
>
> gcc_assert (current_function_decl == NULL_TREE && cfun == NULL);
>
> - /* Set current_function_decl and cfun. */
> - push_cfun (fn);
> -
> /* Make string 0 be a NULL string. */
> streamer_write_char_stream (ob->string_stream, 0);
>
> @@ -2124,9 +2135,6 @@ output_function (struct cgraph_node *nod
> debug info. */
> if (gimple_has_body_p (function))
> {
> - /* Fixup loops if required to match discovery done in the reader. */
> - loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
> -
> streamer_write_uhwi (ob, 1);
> output_struct_function_base (ob, fn);
>
> @@ -2143,8 +2151,8 @@ output_function (struct cgraph_node *nod
> statement numbers. We do not assign UIDs to PHIs here because
> virtual PHIs get re-computed on-the-fly which would make numbers
> inconsistent. */
> - set_gimple_stmt_max_uid (cfun, 0);
> - FOR_ALL_BB_FN (bb, cfun)
> + set_gimple_stmt_max_uid (fn, 0);
> + FOR_ALL_BB_FN (bb, fn)
> {
> for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
> gsi_next (&gsi))
> @@ -2153,25 +2161,25 @@ output_function (struct cgraph_node *nod
>
> /* Virtual PHIs are not going to be streamed. */
> if (!virtual_operand_p (gimple_phi_result (stmt)))
> - gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
> + gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn));
> }
> for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
> gsi_next (&gsi))
> {
> gimple *stmt = gsi_stmt (gsi);
> - gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
> + gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn));
> }
> }
> /* To avoid keeping duplicate gimple IDs in the statements, renumber
> virtual phis now. */
> - FOR_ALL_BB_FN (bb, cfun)
> + FOR_ALL_BB_FN (bb, fn)
> {
> for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
> gsi_next (&gsi))
> {
> gphi *stmt = gsi.phi ();
> if (virtual_operand_p (gimple_phi_result (stmt)))
> - gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
> + gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn));
> }
> }
>
> @@ -2183,9 +2191,6 @@ output_function (struct cgraph_node *nod
> streamer_write_record_start (ob, LTO_null);
>
> output_cfg (ob, fn);
> -
> - loop_optimizer_finalize ();
> - pop_cfun ();
> }
> else
> streamer_write_uhwi (ob, 0);
> Index: lto-streamer.h
> ===================================================================
> --- lto-streamer.h (revision 276850)
> +++ lto-streamer.h (working copy)
> @@ -909,6 +909,7 @@ void lto_output_decl_state_refs (struct
> struct lto_out_decl_state *);
> void lto_output_location (struct output_block *, struct bitpack_d *, location_t);
> void lto_output_init_mode_table (void);
> +void lto_prepare_function_for_streaming (cgraph_node *);
>
>
> /* In lto-cgraph.c */
> Index: passes.c
> ===================================================================
> --- passes.c (revision 276850)
> +++ passes.c (working copy)
> @@ -2705,20 +2705,12 @@ ipa_write_summaries (void)
> {
> struct cgraph_node *node = order[i];
>
> - if (gimple_has_body_p (node->decl))
> + if (node->definition && node->need_lto_streaming)
> {
> - /* When streaming out references to statements as part of some IPA
> - pass summary, the statements need to have uids assigned and the
> - following does that for all the IPA passes here. Naturally, this
> - ordering then matches the one IPA-passes get in their stmt_fixup
> - hooks. */
> -
> - push_cfun (DECL_STRUCT_FUNCTION (node->decl));
> - renumber_gimple_stmt_uids ();
> - pop_cfun ();
> + if (gimple_has_body_p (node->decl))
> + lto_prepare_function_for_streaming (node);
> + lto_set_symtab_encoder_in_partition (encoder, node);
> }
> - if (node->definition && node->need_lto_streaming)
> - lto_set_symtab_encoder_in_partition (encoder, node);
> }
>
> FOR_EACH_DEFINED_FUNCTION (node)
> @@ -2786,28 +2778,13 @@ void
> ipa_write_optimization_summaries (lto_symtab_encoder_t encoder)
> {
> struct lto_out_decl_state *state = lto_new_out_decl_state ();
> - lto_symtab_encoder_iterator lsei;
> state->symtab_node_encoder = encoder;
>
> lto_output_init_mode_table ();
> lto_push_out_decl_state (state);
> - for (lsei = lsei_start_function_in_partition (encoder);
> - !lsei_end_p (lsei); lsei_next_function_in_partition (&lsei))
> - {
> - struct cgraph_node *node = lsei_cgraph_node (lsei);
> - /* When streaming out references to statements as part of some IPA
> - pass summary, the statements need to have uids assigned.
> -
> - For functions newly born at WPA stage we need to initialize
> - the uids here. */
> - if (node->definition
> - && gimple_has_body_p (node->decl))
> - {
> - push_cfun (DECL_STRUCT_FUNCTION (node->decl));
> - renumber_gimple_stmt_uids ();
> - pop_cfun ();
> - }
> - }
> +
> + /* Be sure that we did not forget to renumber stmt uids. */
> + gcc_checking_assert (flag_wpa);
>
> gcc_assert (flag_wpa);
> pass_manager *passes = g->get_passes ();
> Index: tree-dfa.c
> ===================================================================
> --- tree-dfa.c (revision 276850)
> +++ tree-dfa.c (working copy)
> @@ -61,23 +61,23 @@ static void collect_dfa_stats (struct df
> /* Renumber all of the gimple stmt uids. */
>
> void
> -renumber_gimple_stmt_uids (void)
> +renumber_gimple_stmt_uids (struct function *fun)
> {
> basic_block bb;
>
> - set_gimple_stmt_max_uid (cfun, 0);
> - FOR_ALL_BB_FN (bb, cfun)
> + set_gimple_stmt_max_uid (fun, 0);
> + FOR_ALL_BB_FN (bb, fun)
> {
> gimple_stmt_iterator bsi;
> for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
> {
> gimple *stmt = gsi_stmt (bsi);
> - gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
> + gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fun));
> }
> for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
> {
> gimple *stmt = gsi_stmt (bsi);
> - gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
> + gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fun));
> }
> }
> }
> Index: tree-dfa.h
> ===================================================================
> --- tree-dfa.h (revision 276850)
> +++ tree-dfa.h (working copy)
> @@ -20,7 +20,7 @@ along with GCC; see the file COPYING3.
> #ifndef GCC_TREE_DFA_H
> #define GCC_TREE_DFA_H
>
> -extern void renumber_gimple_stmt_uids (void);
> +extern void renumber_gimple_stmt_uids (struct function *);
> extern void renumber_gimple_stmt_uids_in_blocks (basic_block *, int);
> extern void dump_variable (FILE *, tree);
> extern void debug_variable (tree);
> Index: tree-ssa-dse.c
> ===================================================================
> --- tree-ssa-dse.c (revision 276850)
> +++ tree-ssa-dse.c (working copy)
> @@ -1113,7 +1113,7 @@ pass_dse::execute (function *fun)
> {
> need_eh_cleanup = BITMAP_ALLOC (NULL);
>
> - renumber_gimple_stmt_uids ();
> + renumber_gimple_stmt_uids (cfun);
>
> /* We might consider making this a property of each pass so that it
> can be [re]computed on an as-needed basis. Particularly since
> Index: tree-ssa-math-opts.c
> ===================================================================
> --- tree-ssa-math-opts.c (revision 276850)
> +++ tree-ssa-math-opts.c (working copy)
> @@ -3850,7 +3850,7 @@ pass_optimize_widening_mul::execute (fun
>
> memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
> calculate_dominance_info (CDI_DOMINATORS);
> - renumber_gimple_stmt_uids ();
> + renumber_gimple_stmt_uids (cfun);
>
> math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
>
More information about the Gcc-patches
mailing list