Free decl_in_states when not needed
Richard Biener
rguenther@suse.de
Wed Aug 28 11:11:00 GMT 2013
On Wed, 28 Aug 2013, Jan Hubicka wrote:
> Hi,
> this patch saves almost 1Gb of GGC reachable memory for firefox.
> Currently we point to all streamed in trees from global decl in state and from
> function specific states. The states are big and they dangle pointers.
> This patch gets rid of states we do not need.
>
> We run GGC collect just once during usual WPA compilation. Currently
> it is just after tree loading that introduce very little garbage. This
> patch moves it after unreachable code removal when freeing happen.
>
> Still we do not suceed in much of reuse in between GGC and heap, so peak memory
> use in TOP reduces by only 300MB. It may be interesting to allow random access
> into per-function decl in states and read them only after decl merging (and
> possibly unreachable function removal).
>
> Bootstrapped/regtested x86_64-linux, OK?
>
> * lto-symtab.c (lto_cgraph_replace_node): Free decl_in_state.
> * cgraph.c (cgraph_release_function_body): Free decl_in_state.
> * lto-section-in.c (lto_free_function_in_decl_state): New function.
> (lto_free_function_in_decl_state_for_node): New function.
>
> * lto.c (read_cgraph_and_symbols): Remove ggc_collect;
> clear section node; add comment why we do not collect.
> Index: lto-symtab.c
> ===================================================================
> --- lto-symtab.c (revision 202047)
> +++ lto-symtab.c (working copy)
> @@ -80,6 +80,8 @@ lto_cgraph_replace_node (struct cgraph_n
> /* Redirect incomming references. */
> ipa_clone_referring ((symtab_node)prevailing_node, &node->symbol.ref_list);
>
> + lto_free_function_in_decl_state_for_node ((symtab_node)node);
> +
> if (node->symbol.decl != prevailing_node->symbol.decl)
> cgraph_release_function_body (node);
>
> Index: cgraph.c
> ===================================================================
> --- cgraph.c (revision 202047)
> +++ cgraph.c (working copy)
> @@ -1663,6 +1663,8 @@ cgraph_release_function_body (struct cgr
> if (!node->used_as_abstract_origin && DECL_INITIAL (node->symbol.decl))
> DECL_INITIAL (node->symbol.decl) = error_mark_node;
> release_function_body (node->symbol.decl);
> + if (node->symbol.lto_file_data)
> + lto_free_function_in_decl_state_for_node ((symtab_node) node);
> }
>
> /* Remove the node from cgraph. */
> Index: lto-section-in.c
> ===================================================================
> --- lto-section-in.c (revision 202047)
> +++ lto-section-in.c (working copy)
> @@ -414,6 +414,41 @@ lto_get_function_in_decl_state (struct l
> return slot? ((struct lto_in_decl_state*) *slot) : NULL;
> }
>
> +/* Free decl_states. */
> +
> +void
> +lto_free_function_in_decl_state (struct lto_in_decl_state *state)
> +{
> + int i;
> + for (i = 0; i < LTO_N_DECL_STREAMS; i++)
> + ggc_free (state->streams[i].trees);
We likely also waste a lot of memory by means of GC overhead here.
Moving this array out of GC space would be better (we know the
exact size upfront) - should be doable with making lto_tree_ref_able
GTY((user)) and a custom marker.
> + ggc_free (state);
> +}
> +
> +/* Free decl_states associated with NODE. This makes it possible to furhter
> + release trees needed by the NODE's body. */
> +
> +void
> +lto_free_function_in_decl_state_for_node (symtab_node node)
> +{
> + struct lto_in_decl_state temp;
> + void **slot;
> +
> + if (!node->symbol.lto_file_data)
> + return;
> +
> + temp.fn_decl = node->symbol.decl;
> + slot = htab_find_slot (node->symbol.lto_file_data->function_decl_states,
> + &temp, NO_INSERT);
> + if (slot && *slot)
> + {
> + lto_free_function_in_decl_state ((struct lto_in_decl_state*) *slot);
> + htab_clear_slot (node->symbol.lto_file_data->function_decl_states,
> + slot);
> + }
> + node->symbol.lto_file_data = NULL;
> +}
> +
>
> /* Report read pass end of the section. */
>
> Index: ipa.c
> ===================================================================
> --- ipa.c (revision 202047)
> +++ ipa.c (working copy)
> @@ -873,6 +873,17 @@ function_and_variable_visibility (bool w
> segfault though. */
> symtab_dissolve_same_comdat_group_list ((symtab_node) node);
> }
> + if (node->symbol.externally_visible
> + && DECL_COMDAT (node->symbol.decl)
> + && comdat_can_be_unshared_p ((symtab_node) node))
> + {
> + if (dump_file
> + && DECL_VISIBILITY (node->symbol.decl) != VISIBILITY_HIDDEN)
> + fprintf (dump_file, "Promoting visibility to hidden: %s/%i\n",
> + cgraph_node_name (node), node->symbol.order);
> + DECL_VISIBILITY (node->symbol.decl) = VISIBILITY_HIDDEN;
> + DECL_VISIBILITY_SPECIFIED (node->symbol.decl) = true;
> + }
>
> if (node->thunk.thunk_p
> && TREE_PUBLIC (node->symbol.decl))
> @@ -980,6 +991,17 @@ function_and_variable_visibility (bool w
> symtab_dissolve_same_comdat_group_list ((symtab_node) vnode);
> vnode->symbol.resolution = LDPR_PREVAILING_DEF_IRONLY;
> }
> + if (vnode->symbol.externally_visible
> + && DECL_COMDAT (vnode->symbol.decl)
> + && comdat_can_be_unshared_p ((symtab_node) vnode))
> + {
> + if (dump_file
> + && DECL_VISIBILITY (vnode->symbol.decl) == VISIBILITY_HIDDEN)
> + fprintf (dump_file, "Promoting visibility to hidden: %s/%i\n",
> + varpool_node_name (vnode), vnode->symbol.order);
> + DECL_VISIBILITY (vnode->symbol.decl) = VISIBILITY_HIDDEN;
> + DECL_VISIBILITY_SPECIFIED (vnode->symbol.decl) = true;
> + }
> }
>
> if (dump_file)
Stray changes?
> Index: lto/lto.c
> ===================================================================
> --- lto/lto.c (revision 202047)
> +++ lto/lto.c (working copy)
> @@ -3429,7 +3429,6 @@ read_cgraph_and_symbols (unsigned nfiles
> tree_scc_hash.dispose ();
> obstack_free (&tree_scc_hash_obstack, NULL);
> free_gimple_type_tables ();
> - ggc_collect ();
>
> /* Set the hooks so that all of the ipa passes can read in their data. */
> lto_set_in_hooks (all_file_decl_data, get_section_data, free_section_data);
> @@ -3484,7 +3483,6 @@ read_cgraph_and_symbols (unsigned nfiles
> }
> htab_delete (tree_with_vars);
> tree_with_vars = NULL;
> - ggc_collect ();
Just leave the ggc_collect ()s there, they are free.
> timevar_pop (TV_IPA_LTO_DECL_MERGE);
> /* Each pass will set the appropriate timer. */
> @@ -3503,6 +3501,9 @@ read_cgraph_and_symbols (unsigned nfiles
> gcc_assert (all_file_decl_data[i]->symtab_node_encoder);
> lto_symtab_encoder_delete (all_file_decl_data[i]->symtab_node_encoder);
> all_file_decl_data[i]->symtab_node_encoder = NULL;
> + lto_free_function_in_decl_state (all_file_decl_data[i]->global_decl_state);
> + all_file_decl_data[i]->global_decl_state = NULL;
> + all_file_decl_data[i]->current_decl_state = NULL;
> }
>
> /* Finally merge the cgraph according to the decl merging decisions. */
> @@ -3513,7 +3514,12 @@ read_cgraph_and_symbols (unsigned nfiles
> dump_symtab (cgraph_dump_file);
> }
> lto_symtab_merge_symbols ();
> - ggc_collect ();
> +
> + /* Do not GGC collect here; streaming in should not produce garbage.
> + Be sure we first collect after merging symbols, setting up visibilities
> + and removing unreachable nodes. This will happen after whole program
> + visibility pass. This should release more memory back to the system
> + and possibly allow us to re-use it for heap. */
But it's not wrong to collect here, no? See above, it should be
mostly free.
Thanks,
Richard.
More information about the Gcc-patches
mailing list