Avid ggc_alloc and push_cfun during LTO streaming

Richard Biener richard.guenther@gmail.com
Fri Oct 11 11:41:00 GMT 2019


On Fri, Oct 11, 2019 at 11:03 AM Jan Hubicka <hubicka@ucw.cz> wrote:
>
> Hi,
> this patch prevents tree creation druing WPA stream out (to avoid
> touching pages and triggering COW).  It fixes the following
>  - gimple streamer produces MEM_REF wrappings for global decls.
>    This is to preserve the type of access and is not necessary for
>    WPA->LTRANS streaming when decls ar eno longer going to be merged.
>  - we renumber stmt uids during streaming WPA summaries
>  - loop optimizer is initialized in output_function.
> After testing the patch I noticed that output_function does one extra
> renumbering of stmts. This seems quite broken and I will fix it
> incrementally.
>
> Bootstrapped/regtested x86_64-linux, comitted.

Huh.  Why do we stream function bodies at WPA time at all?
We should already have input sections we can copy/remap?

That is, why does gcc_assert (!flag_wpa) in output_function trip?

Richard.

>
>         * gimple-streamer-out.c (output_gimple_stmt): Add explicit function
>         parameter.
>         * lto-streamer-out.c: Include tree-dfa.h.
>         (output_cfg): Do not use cfun.
>         (lto_prepare_function_for_streaming): New.
>         (output_function): Do not push cfun; do not initialize loop optimizer.
>         * lto-streamer.h (lto_prepare_function_for_streaming): Declare.
>         * passes.c (ipa_write_summaries): Use it.
>         (ipa_write_optimization_summaries): Do not modify bodies.
>         * tree-dfa.c (renumber_gimple_stmt_uids): Add function parameter.
>         * tree.dfa.h (renumber_gimple_stmt_uids): Update prototype.
>         * tree-ssa-dse.c (pass_dse::execute): Update use of
>         renumber_gimple_stmt_uids.
>         * tree-ssa-math-opts.c (pass_optimize_widening_mul::execute): Likewise.
>
>         * lto.c (lto_wpa_write_files): Prepare all bodies for streaming.
> Index: gimple-streamer-out.c
> ===================================================================
> --- gimple-streamer-out.c       (revision 276850)
> +++ gimple-streamer-out.c       (working copy)
> @@ -57,7 +57,7 @@ output_phi (struct output_block *ob, gph
>  /* Emit statement STMT on the main stream of output block OB.  */
>
>  static void
> -output_gimple_stmt (struct output_block *ob, gimple *stmt)
> +output_gimple_stmt (struct output_block *ob, struct function *fn, gimple *stmt)
>  {
>    unsigned i;
>    enum gimple_code code;
> @@ -80,7 +80,7 @@ output_gimple_stmt (struct output_block
>                      as_a <gassign *> (stmt)),
>                    1);
>    bp_pack_value (&bp, gimple_has_volatile_ops (stmt), 1);
> -  hist = gimple_histogram_value (cfun, stmt);
> +  hist = gimple_histogram_value (fn, stmt);
>    bp_pack_value (&bp, hist != NULL, 1);
>    bp_pack_var_len_unsigned (&bp, stmt->subcode);
>
> @@ -139,7 +139,7 @@ output_gimple_stmt (struct output_block
>              so that we do not have to deal with type mismatches on
>              merged symbols during IL read in.  The first operand
>              of GIMPLE_DEBUG must be a decl, not MEM_REF, though.  */
> -         if (op && (i || !is_gimple_debug (stmt)))
> +         if (!flag_wpa && op && (i || !is_gimple_debug (stmt)))
>             {
>               basep = &op;
>               if (TREE_CODE (*basep) == ADDR_EXPR)
> @@ -147,7 +147,7 @@ output_gimple_stmt (struct output_block
>               while (handled_component_p (*basep))
>                 basep = &TREE_OPERAND (*basep, 0);
>               if (VAR_P (*basep)
> -                 && !auto_var_in_fn_p (*basep, current_function_decl)
> +                 && !auto_var_in_fn_p (*basep, fn->decl)
>                   && !DECL_REGISTER (*basep))
>                 {
>                   bool volatilep = TREE_THIS_VOLATILE (*basep);
> @@ -228,7 +228,7 @@ output_bb (struct output_block *ob, basi
>               print_gimple_stmt (streamer_dump_file, stmt, 0, TDF_SLIM);
>             }
>
> -         output_gimple_stmt (ob, stmt);
> +         output_gimple_stmt (ob, fn, stmt);
>
>           /* Emit the EH region holding STMT.  */
>           region = lookup_stmt_eh_lp_fn (fn, stmt);
> Index: lto/lto.c
> ===================================================================
> --- lto/lto.c   (revision 276850)
> +++ lto/lto.c   (working copy)
> @@ -304,6 +304,13 @@ lto_wpa_write_files (void)
>
>    timevar_push (TV_WHOPR_WPA_IO);
>
> +  cgraph_node *node;
> +  /* Do body modifications needed for streaming before we fork out
> +     worker processes.  */
> +  FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
> +    if (gimple_has_body_p (node->decl))
> +      lto_prepare_function_for_streaming (node);
> +
>    /* Generate a prefix for the LTRANS unit files.  */
>    blen = strlen (ltrans_output_list);
>    temp_filename = (char *) xmalloc (blen + sizeof ("2147483648.o"));
> Index: lto-streamer-out.c
> ===================================================================
> --- lto-streamer-out.c  (revision 276850)
> +++ lto-streamer-out.c  (working copy)
> @@ -43,6 +43,7 @@ along with GCC; see the file COPYING3.
>  #include "debug.h"
>  #include "omp-offload.h"
>  #include "print-tree.h"
> +#include "tree-dfa.h"
>
>
>  static void lto_write_tree (struct output_block*, tree, bool);
> @@ -1893,7 +1894,7 @@ output_cfg (struct output_block *ob, str
>
>    streamer_write_hwi (ob, -1);
>
> -  bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
> +  bb = ENTRY_BLOCK_PTR_FOR_FN (fn);
>    while (bb->next_bb)
>      {
>        streamer_write_hwi (ob, bb->next_bb->index);
> @@ -1902,9 +1903,6 @@ output_cfg (struct output_block *ob, str
>
>    streamer_write_hwi (ob, -1);
>
> -  /* ???  The cfgloop interface is tied to cfun.  */
> -  gcc_assert (cfun == fn);
> -
>    /* Output the number of loops.  */
>    streamer_write_uhwi (ob, number_of_loops (fn));
>
> @@ -2062,6 +2060,22 @@ collect_block_tree_leafs (tree root, vec
>        collect_block_tree_leafs (BLOCK_SUBBLOCKS (root), leafs);
>  }
>
> +/* This performs function body modifications that are needed for streaming
> +   to work.  */
> +
> +void
> +lto_prepare_function_for_streaming (struct cgraph_node *node)
> +{
> +  if (number_of_loops (DECL_STRUCT_FUNCTION (node->decl)))
> +    {
> +      push_cfun (DECL_STRUCT_FUNCTION (node->decl));
> +      loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
> +      loop_optimizer_finalize ();
> +      pop_cfun ();
> +    }
> +  renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (node->decl));
> +}
> +
>  /* Output the body of function NODE->DECL.  */
>
>  static void
> @@ -2085,9 +2099,6 @@ output_function (struct cgraph_node *nod
>
>    gcc_assert (current_function_decl == NULL_TREE && cfun == NULL);
>
> -  /* Set current_function_decl and cfun.  */
> -  push_cfun (fn);
> -
>    /* Make string 0 be a NULL string.  */
>    streamer_write_char_stream (ob->string_stream, 0);
>
> @@ -2124,9 +2135,6 @@ output_function (struct cgraph_node *nod
>       debug info.  */
>    if (gimple_has_body_p (function))
>      {
> -      /* Fixup loops if required to match discovery done in the reader.  */
> -      loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
> -
>        streamer_write_uhwi (ob, 1);
>        output_struct_function_base (ob, fn);
>
> @@ -2143,8 +2151,8 @@ output_function (struct cgraph_node *nod
>          statement numbers.  We do not assign UIDs to PHIs here because
>          virtual PHIs get re-computed on-the-fly which would make numbers
>          inconsistent.  */
> -      set_gimple_stmt_max_uid (cfun, 0);
> -      FOR_ALL_BB_FN (bb, cfun)
> +      set_gimple_stmt_max_uid (fn, 0);
> +      FOR_ALL_BB_FN (bb, fn)
>         {
>           for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
>                gsi_next (&gsi))
> @@ -2153,25 +2161,25 @@ output_function (struct cgraph_node *nod
>
>               /* Virtual PHIs are not going to be streamed.  */
>               if (!virtual_operand_p (gimple_phi_result (stmt)))
> -               gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
> +               gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn));
>             }
>           for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
>                gsi_next (&gsi))
>             {
>               gimple *stmt = gsi_stmt (gsi);
> -             gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
> +             gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn));
>             }
>         }
>        /* To avoid keeping duplicate gimple IDs in the statements, renumber
>          virtual phis now.  */
> -      FOR_ALL_BB_FN (bb, cfun)
> +      FOR_ALL_BB_FN (bb, fn)
>         {
>           for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
>                gsi_next (&gsi))
>             {
>               gphi *stmt = gsi.phi ();
>               if (virtual_operand_p (gimple_phi_result (stmt)))
> -               gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
> +               gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fn));
>             }
>         }
>
> @@ -2183,9 +2191,6 @@ output_function (struct cgraph_node *nod
>        streamer_write_record_start (ob, LTO_null);
>
>        output_cfg (ob, fn);
> -
> -      loop_optimizer_finalize ();
> -      pop_cfun ();
>     }
>    else
>      streamer_write_uhwi (ob, 0);
> Index: lto-streamer.h
> ===================================================================
> --- lto-streamer.h      (revision 276850)
> +++ lto-streamer.h      (working copy)
> @@ -909,6 +909,7 @@ void lto_output_decl_state_refs (struct
>                                  struct lto_out_decl_state *);
>  void lto_output_location (struct output_block *, struct bitpack_d *, location_t);
>  void lto_output_init_mode_table (void);
> +void lto_prepare_function_for_streaming (cgraph_node *);
>
>
>  /* In lto-cgraph.c  */
> Index: passes.c
> ===================================================================
> --- passes.c    (revision 276850)
> +++ passes.c    (working copy)
> @@ -2705,20 +2705,12 @@ ipa_write_summaries (void)
>      {
>        struct cgraph_node *node = order[i];
>
> -      if (gimple_has_body_p (node->decl))
> +      if (node->definition && node->need_lto_streaming)
>         {
> -         /* When streaming out references to statements as part of some IPA
> -            pass summary, the statements need to have uids assigned and the
> -            following does that for all the IPA passes here. Naturally, this
> -            ordering then matches the one IPA-passes get in their stmt_fixup
> -            hooks.  */
> -
> -         push_cfun (DECL_STRUCT_FUNCTION (node->decl));
> -         renumber_gimple_stmt_uids ();
> -         pop_cfun ();
> +         if (gimple_has_body_p (node->decl))
> +           lto_prepare_function_for_streaming (node);
> +         lto_set_symtab_encoder_in_partition (encoder, node);
>         }
> -      if (node->definition && node->need_lto_streaming)
> -        lto_set_symtab_encoder_in_partition (encoder, node);
>      }
>
>    FOR_EACH_DEFINED_FUNCTION (node)
> @@ -2786,28 +2778,13 @@ void
>  ipa_write_optimization_summaries (lto_symtab_encoder_t encoder)
>  {
>    struct lto_out_decl_state *state = lto_new_out_decl_state ();
> -  lto_symtab_encoder_iterator lsei;
>    state->symtab_node_encoder = encoder;
>
>    lto_output_init_mode_table ();
>    lto_push_out_decl_state (state);
> -  for (lsei = lsei_start_function_in_partition (encoder);
> -       !lsei_end_p (lsei); lsei_next_function_in_partition (&lsei))
> -    {
> -      struct cgraph_node *node = lsei_cgraph_node (lsei);
> -      /* When streaming out references to statements as part of some IPA
> -        pass summary, the statements need to have uids assigned.
> -
> -        For functions newly born at WPA stage we need to initialize
> -        the uids here.  */
> -      if (node->definition
> -         && gimple_has_body_p (node->decl))
> -       {
> -         push_cfun (DECL_STRUCT_FUNCTION (node->decl));
> -         renumber_gimple_stmt_uids ();
> -         pop_cfun ();
> -       }
> -    }
> +
> +  /* Be sure that we did not forget to renumber stmt uids.  */
> +  gcc_checking_assert (flag_wpa);
>
>    gcc_assert (flag_wpa);
>    pass_manager *passes = g->get_passes ();
> Index: tree-dfa.c
> ===================================================================
> --- tree-dfa.c  (revision 276850)
> +++ tree-dfa.c  (working copy)
> @@ -61,23 +61,23 @@ static void collect_dfa_stats (struct df
>  /* Renumber all of the gimple stmt uids.  */
>
>  void
> -renumber_gimple_stmt_uids (void)
> +renumber_gimple_stmt_uids (struct function *fun)
>  {
>    basic_block bb;
>
> -  set_gimple_stmt_max_uid (cfun, 0);
> -  FOR_ALL_BB_FN (bb, cfun)
> +  set_gimple_stmt_max_uid (fun, 0);
> +  FOR_ALL_BB_FN (bb, fun)
>      {
>        gimple_stmt_iterator bsi;
>        for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
>         {
>           gimple *stmt = gsi_stmt (bsi);
> -         gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
> +         gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fun));
>         }
>        for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
>         {
>           gimple *stmt = gsi_stmt (bsi);
> -         gimple_set_uid (stmt, inc_gimple_stmt_max_uid (cfun));
> +         gimple_set_uid (stmt, inc_gimple_stmt_max_uid (fun));
>         }
>      }
>  }
> Index: tree-dfa.h
> ===================================================================
> --- tree-dfa.h  (revision 276850)
> +++ tree-dfa.h  (working copy)
> @@ -20,7 +20,7 @@ along with GCC; see the file COPYING3.
>  #ifndef GCC_TREE_DFA_H
>  #define GCC_TREE_DFA_H
>
> -extern void renumber_gimple_stmt_uids (void);
> +extern void renumber_gimple_stmt_uids (struct function *);
>  extern void renumber_gimple_stmt_uids_in_blocks (basic_block *, int);
>  extern void dump_variable (FILE *, tree);
>  extern void debug_variable (tree);
> Index: tree-ssa-dse.c
> ===================================================================
> --- tree-ssa-dse.c      (revision 276850)
> +++ tree-ssa-dse.c      (working copy)
> @@ -1113,7 +1113,7 @@ pass_dse::execute (function *fun)
>  {
>    need_eh_cleanup = BITMAP_ALLOC (NULL);
>
> -  renumber_gimple_stmt_uids ();
> +  renumber_gimple_stmt_uids (cfun);
>
>    /* We might consider making this a property of each pass so that it
>       can be [re]computed on an as-needed basis.  Particularly since
> Index: tree-ssa-math-opts.c
> ===================================================================
> --- tree-ssa-math-opts.c        (revision 276850)
> +++ tree-ssa-math-opts.c        (working copy)
> @@ -3850,7 +3850,7 @@ pass_optimize_widening_mul::execute (fun
>
>    memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
>    calculate_dominance_info (CDI_DOMINATORS);
> -  renumber_gimple_stmt_uids ();
> +  renumber_gimple_stmt_uids (cfun);
>
>    math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
>



More information about the Gcc-patches mailing list