Avoid applying inline plan for all functions ahead of late compilation
Richard Biener
richard.guenther@gmail.com
Sun Nov 16 09:36:00 GMT 2014
On November 16, 2014 8:15:37 AM CET, Jan Hubicka <hubicka@ucw.cz> wrote:
>Hi,
>late in GCC 4.9 development we broke the feature that ltrans stages do
>not read all
>functions in ahead. This is because of late IPA passes that do not
>like to see functions
>without IPA transformations applied. I was originally OK with the
>solution based
>on fact that we have only IPA-PTA as late IPA pass that is disabled by
>default and
>eventually probably should become part of WPA in some form.
>SIMD streaming was however added and this causes us to stream in all
>function bodies
>and apply all inlining decisions at very beggining of optimization
>queue.
>
>Fixed by this patch. get_body is now responsible for applying
>transformations
>on demand and late IPA passes needs to call get_body on functions that
>they
>are interested in + are advised to not be interested in every single
>function in
>the program.
>
>The patch also hits a bug in i386's ix86_set_current_function. It is
>responsible
>for initializing backend and it does so lazily remembering the previous
>options
>backend was initialized for. Pragma parsing however clears the cache
>that leads
>to wrong settings being used for subsetquent functions.
>
>Bootstrapped/regtested x86_64-linux, will commit it tomorrow after bit
>of more testing.
But for example for IPA pta this means we apply all IPA transforms without any garbage collection run?
Richard.
>Index: gcc/cgraphclones.c
>===================================================================
>--- gcc/cgraphclones.c (revision 217612)
>+++ gcc/cgraphclones.c (working copy)
>@@ -307,7 +307,7 @@ duplicate_thunk_for_node (cgraph_node *t
> node = duplicate_thunk_for_node (thunk_of, node);
>
> if (!DECL_ARGUMENTS (thunk->decl))
>- thunk->get_body ();
>+ thunk->get_untransformed_body ();
>
> cgraph_edge *cs;
> for (cs = node->callers; cs; cs = cs->next_caller)
>@@ -1067,7 +1067,7 @@ symbol_table::materialize_all_clones (vo
> && !gimple_has_body_p (node->decl))
> {
> if (!node->clone_of->clone_of)
>- node->clone_of->get_body ();
>+ node->clone_of->get_untransformed_body ();
> if (gimple_has_body_p (node->clone_of->decl))
> {
> if (symtab->dump_file)
>Index: gcc/ipa-icf.c
>===================================================================
>--- gcc/ipa-icf.c (revision 217612)
>+++ gcc/ipa-icf.c (working copy)
>@@ -706,7 +706,7 @@ void
> sem_function::init (void)
> {
> if (in_lto_p)
>- get_node ()->get_body ();
>+ get_node ()->get_untransformed_body ();
>
> tree fndecl = node->decl;
> function *func = DECL_STRUCT_FUNCTION (fndecl);
>Index: gcc/passes.c
>===================================================================
>--- gcc/passes.c (revision 217612)
>+++ gcc/passes.c (working copy)
>@@ -2214,36 +2214,6 @@ execute_one_pass (opt_pass *pass)
> executed. */
> invoke_plugin_callbacks (PLUGIN_PASS_EXECUTION, pass);
>
>- /* SIPLE IPA passes do not handle callgraphs with IPA transforms in
>it.
>- Apply all trnasforms first. */
>- if (pass->type == SIMPLE_IPA_PASS)
>- {
>- struct cgraph_node *node;
>- bool applied = false;
>- FOR_EACH_DEFINED_FUNCTION (node)
>- if (node->analyzed
>- && node->has_gimple_body_p ()
>- && (!node->clone_of || node->decl != node->clone_of->decl))
>- {
>- if (!node->global.inlined_to
>- && node->ipa_transforms_to_apply.exists ())
>- {
>- node->get_body ();
>- push_cfun (DECL_STRUCT_FUNCTION (node->decl));
>- execute_all_ipa_transforms ();
>- cgraph_edge::rebuild_edges ();
>- free_dominance_info (CDI_DOMINATORS);
>- free_dominance_info (CDI_POST_DOMINATORS);
>- pop_cfun ();
>- applied = true;
>- }
>- }
>- if (applied)
>- symtab->remove_unreachable_nodes (false, dump_file);
>- /* Restore current_pass. */
>- current_pass = pass;
>- }
>-
> if (!quiet_flag && !cfun)
> fprintf (stderr, " <%s>", pass->name ? pass->name : "");
>
>Index: gcc/cgraphunit.c
>===================================================================
>--- gcc/cgraphunit.c (revision 217612)
>+++ gcc/cgraphunit.c (working copy)
>@@ -197,7 +197,6 @@ along with GCC; see the file COPYING3.
> #include "target.h"
> #include "diagnostic.h"
> #include "params.h"
>-#include "fibheap.h"
> #include "intl.h"
> #include "hash-map.h"
> #include "plugin-api.h"
>@@ -1469,7 +1468,7 @@ cgraph_node::expand_thunk (bool output_a
> }
>
> if (in_lto_p)
>- get_body ();
>+ get_untransformed_body ();
> a = DECL_ARGUMENTS (thunk_fndecl);
>
> current_function_decl = thunk_fndecl;
>@@ -1522,7 +1521,7 @@ cgraph_node::expand_thunk (bool output_a
> gimple ret;
>
> if (in_lto_p)
>- get_body ();
>+ get_untransformed_body ();
> a = DECL_ARGUMENTS (thunk_fndecl);
>
> current_function_decl = thunk_fndecl;
>@@ -1744,7 +1743,7 @@ cgraph_node::expand (void)
> announce_function (decl);
> process = 0;
> gcc_assert (lowered);
>- get_body ();
>+ get_untransformed_body ();
>
> /* Generate RTL for the body of DECL. */
>
>Index: gcc/cgraph.c
>===================================================================
>--- gcc/cgraph.c (revision 217612)
>+++ gcc/cgraph.c (working copy)
>@@ -1664,29 +1664,33 @@ release_function_body (tree decl)
> {
> if (DECL_STRUCT_FUNCTION (decl))
> {
>- push_cfun (DECL_STRUCT_FUNCTION (decl));
>- if (cfun->cfg
>- && current_loops)
>- {
>- cfun->curr_properties &= ~PROP_loops;
>- loop_optimizer_finalize ();
>- }
>- if (cfun->gimple_df)
>+ if (DECL_STRUCT_FUNCTION (decl)->cfg
>+ || DECL_STRUCT_FUNCTION (decl)->gimple_df)
> {
>- delete_tree_ssa ();
>- delete_tree_cfg_annotations ();
>- cfun->eh = NULL;
>- }
>- if (cfun->cfg)
>- {
>- gcc_assert (!dom_info_available_p (CDI_DOMINATORS));
>- gcc_assert (!dom_info_available_p (CDI_POST_DOMINATORS));
>- clear_edges ();
>- cfun->cfg = NULL;
>+ push_cfun (DECL_STRUCT_FUNCTION (decl));
>+ if (cfun->cfg
>+ && current_loops)
>+ {
>+ cfun->curr_properties &= ~PROP_loops;
>+ loop_optimizer_finalize ();
>+ }
>+ if (cfun->gimple_df)
>+ {
>+ delete_tree_ssa ();
>+ delete_tree_cfg_annotations ();
>+ cfun->eh = NULL;
>+ }
>+ if (cfun->cfg)
>+ {
>+ gcc_assert (!dom_info_available_p (CDI_DOMINATORS));
>+ gcc_assert (!dom_info_available_p (CDI_POST_DOMINATORS));
>+ clear_edges ();
>+ cfun->cfg = NULL;
>+ }
>+ if (cfun->value_histograms)
>+ free_histograms ();
>+ pop_cfun ();
> }
>- if (cfun->value_histograms)
>- free_histograms ();
>- pop_cfun ();
> gimple_set_body (decl, NULL);
> /* Struct function hangs a lot of data that would leak if we didn't
> removed all pointers to it. */
>@@ -3138,7 +3142,7 @@ cgraph_node::function_symbol (enum avail
> present. */
>
> bool
>-cgraph_node::get_body (void)
>+cgraph_node::get_untransformed_body (void)
> {
> lto_file_decl_data *file_data;
> const char *data, *name;
>@@ -3178,6 +3182,44 @@ cgraph_node::get_body (void)
> return true;
> }
>
>+/* Prepare function body. When doing LTO, read cgraph_node's body
>from disk
>+ if it is not already present. When some IPA transformations are
>scheduled,
>+ apply them. */
>+
>+bool
>+cgraph_node::get_body (void)
>+{
>+ bool updated;
>+
>+ updated = get_untransformed_body ();
>+
>+ /* Getting transformed body makes no sense for inline clones;
>+ we should never use this on real clones becuase they are
>materialized
>+ early.
>+ TODO: Materializing clones here will likely lead to smaller
>LTRANS
>+ footprint. */
>+ gcc_assert (!global.inlined_to && !clone_of);
>+ if (ipa_transforms_to_apply.exists ())
>+ {
>+ opt_pass *saved_current_pass = current_pass;
>+ FILE *saved_dump_file = dump_file;
>+ int saved_dump_flags = dump_flags;
>+
>+ push_cfun (DECL_STRUCT_FUNCTION (decl));
>+ execute_all_ipa_transforms ();
>+ cgraph_edge::rebuild_edges ();
>+ free_dominance_info (CDI_DOMINATORS);
>+ free_dominance_info (CDI_POST_DOMINATORS);
>+ pop_cfun ();
>+ updated = true;
>+
>+ current_pass = saved_current_pass;
>+ dump_file = saved_dump_file;
>+ dump_flags = saved_dump_flags;
>+ }
>+ return updated;
>+}
>+
> /* Return the DECL_STRUCT_FUNCTION of the function. */
>
> struct function *
>Index: gcc/cgraph.h
>===================================================================
>--- gcc/cgraph.h (revision 217612)
>+++ gcc/cgraph.h (working copy)
>@@ -933,6 +933,11 @@ public:
>
>/* When doing LTO, read cgraph_node's body from disk if it is not
>already
> present. */
>+ bool get_untransformed_body (void);
>+
>+ /* Prepare function body. When doing LTO, read cgraph_node's body
>from disk
>+ if it is not already present. When some IPA transformations are
>scheduled,
>+ apply them. */
> bool get_body (void);
>
> /* Release memory used to represent body of function.
>Index: gcc/config/i386/i386.c
>===================================================================
>--- gcc/config/i386/i386.c (revision 217612)
>+++ gcc/config/i386/i386.c (working copy)
>@@ -5029,10 +5029,35 @@ ix86_can_inline_p (tree caller, tree cal
> /* Remember the last target of ix86_set_current_function. */
> static GTY(()) tree ix86_previous_fndecl;
>
>+/* Set target globals to default. */
>+
>+static void
>+ix86_reset_to_default_globals (void)
>+{
>+ tree old_tree = (ix86_previous_fndecl
>+ ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
>+ : NULL_TREE);
>+
>+ if (old_tree)
>+ {
>+ tree new_tree = target_option_current_node;
>+ cl_target_option_restore (&global_options,
>+ TREE_TARGET_OPTION (new_tree));
>+ if (TREE_TARGET_GLOBALS (new_tree))
>+ restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
>+ else if (new_tree == target_option_default_node)
>+ restore_target_globals (&default_target_globals);
>+ else
>+ TREE_TARGET_GLOBALS (new_tree)
>+ = save_target_globals_default_opts ();
>+ }
>+}
>+
> /* Invalidate ix86_previous_fndecl cache. */
> void
> ix86_reset_previous_fndecl (void)
> {
>+ ix86_reset_to_default_globals ();
> ix86_previous_fndecl = NULL_TREE;
> }
>
>@@ -5071,18 +5096,7 @@ ix86_set_current_function (tree fndecl)
> }
>
> else if (old_tree)
>- {
>- new_tree = target_option_current_node;
>- cl_target_option_restore (&global_options,
>- TREE_TARGET_OPTION (new_tree));
>- if (TREE_TARGET_GLOBALS (new_tree))
>- restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
>- else if (new_tree == target_option_default_node)
>- restore_target_globals (&default_target_globals);
>- else
>- TREE_TARGET_GLOBALS (new_tree)
>- = save_target_globals_default_opts ();
>- }
>+ ix86_reset_to_default_globals ();
> }
> }
>
>@@ -50972,7 +50986,7 @@ ix86_simd_clone_adjust (struct cgraph_no
> bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
> gcc_assert (ok);
> pop_cfun ();
>- ix86_previous_fndecl = NULL_TREE;
>+ ix86_reset_previous_fndecl ();
> ix86_set_current_function (node->decl);
> }
>
>Index: gcc/tree-inline.c
>===================================================================
>--- gcc/tree-inline.c (revision 217612)
>+++ gcc/tree-inline.c (working copy)
>@@ -4338,7 +4338,7 @@ expand_call_inline (basic_block bb, gimp
> goto egress;
> }
> fn = cg_edge->callee->decl;
>- cg_edge->callee->get_body ();
>+ cg_edge->callee->get_untransformed_body ();
>
> #ifdef ENABLE_CHECKING
> if (cg_edge->callee->decl != id->dst_node->decl)
>Index: gcc/tree-ssa-structalias.c
>===================================================================
>--- gcc/tree-ssa-structalias.c (revision 217612)
>+++ gcc/tree-ssa-structalias.c (working copy)
>@@ -7086,7 +7086,7 @@ ipa_pta_execute (void)
> /* Nodes without a body are not interesting. Especially do not
> visit clones at this point for now - we get duplicate decls
> there for inline clones at least. */
>- if (!node->has_gimple_body_p () || node->clone_of)
>+ if (!node->has_gimple_body_p () || node->global.inlined_to)
> continue;
> node->get_body ();
>
More information about the Gcc-patches
mailing list