This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Whole program optimization and functions-only-called-once.
- From: Jan Hubicka <hubicka at ucw dot cz>
- To: Toon Moene <toon at moene dot org>
- Cc: Jan Hubicka <hubicka at ucw dot cz>, Richard Guenther <richard dot guenther at gmail dot com>, Jan Hubicka <jh at suse dot cz>, gcc-patches at gcc dot gnu dot org
- Date: Mon, 16 Nov 2009 10:54:35 +0100
- Subject: Re: Whole program optimization and functions-only-called-once.
- References: <4AF1D3C2.3000001@moene.org> <84fc9c000911041126x14ce9226w9dfb781ea284de6b@mail.gmail.com> <20091112161638.GC11341@atrey.karlin.mff.cuni.cz> <4AFEA884.9030003@moene.org>
> Jan Hubicka wrote:
>
>> -fno-ipa-cp should work around your problem for time being.
>
> Indeed it did. Some figures:
Thanks for confirmation!
> Considering invlo4 size 1462.
> Called once from lowpass 2293 insns.
> Not inlined because --param large-function-growth limit reached.
>
> Considering invlo2 size 933.
> Called once from lowpass 2293 insns.
> Not inlined because --param large-function-growth limit reached.
>
> where the largest callee *does* get inlined, while two smaller ones
> don't (I agree with Jan that this would have been solved by training the
> inliner with profiling data, because only invlo4 gets called).
Using profiling data does not really make inliner to bypass
large-function-growth. We can experiment with large-function-growth tweaking.
So far i didn't see any testcase where this limit would result in runtime
regression.
This is patch I intend to commit after re-testing at x86_64-linux after
some last minute changes.
* cgraph.c (cgraph_release_function_body): Update use of
ipa_transforms_to_apply.
(cgraph_remove_node): Remove ipa_transforms_to_apply.
* cgraph.h (struct cgraph_node): Add ipa_transforms_to_apply.
* cgraphunit.c (save_inline_function_body): Clear ipa_transforms for
copied body.
(cgraph_materialize_clone): Remove original if dead.
* lto-streamer-in.c (lto_read_body): Remove FIXME and
ipa_transforms_to_apply hack.
* function.h (struct function): Add ipa_transforms_to_apply.
* ipa.c (cgraph_remove_unreachable_nodes): Handle dead clone originals.
* tree-inline.c (copy_bb): Update sanity check.
(initialize_cfun): Do not copy ipa_transforms_to_apply.
(expand_call_inline): remove dead clone originals.
(tree_function_versioning): Merge transformation queues.
* passes.c (add_ipa_transform_pass): Remove.
(execute_one_ipa_transform_pass): Update ipa_transforms_to_apply
tracking.
(execute_all_ipa_transforms): Update.
(execute_one_pass): Update.
* lto.c (read_cgraph_and_symbols): Set also ipa_transforms_to_apply.
Index: cgraph.c
===================================================================
*** cgraph.c (revision 154198)
--- cgraph.c (working copy)
*************** cgraph_release_function_body (struct cgr
*** 1132,1138 ****
pop_cfun();
gimple_set_body (node->decl, NULL);
VEC_free (ipa_opt_pass, heap,
! DECL_STRUCT_FUNCTION (node->decl)->ipa_transforms_to_apply);
/* Struct function hangs a lot of data that would leak if we didn't
removed all pointers to it. */
ggc_free (DECL_STRUCT_FUNCTION (node->decl));
--- 1132,1138 ----
pop_cfun();
gimple_set_body (node->decl, NULL);
VEC_free (ipa_opt_pass, heap,
! node->ipa_transforms_to_apply);
/* Struct function hangs a lot of data that would leak if we didn't
removed all pointers to it. */
ggc_free (DECL_STRUCT_FUNCTION (node->decl));
*************** cgraph_remove_node (struct cgraph_node *
*** 1159,1164 ****
--- 1159,1166 ----
cgraph_call_node_removal_hooks (node);
cgraph_node_remove_callers (node);
cgraph_node_remove_callees (node);
+ VEC_free (ipa_opt_pass, heap,
+ node->ipa_transforms_to_apply);
/* Incremental inlining access removed nodes stored in the postorder list.
*/
Index: cgraph.h
===================================================================
*** cgraph.h (revision 154198)
--- cgraph.h (working copy)
*************** struct GTY((chain_next ("%h.next"), chai
*** 190,195 ****
--- 190,200 ----
PTR GTY ((skip)) aux;
+ /* Interprocedural passes scheduled to have their transform functions
+ applied next time we execute local pass on them. We maintain it
+ per-function in order to allow IPA passes to introduce new functions. */
+ VEC(ipa_opt_pass,heap) * GTY((skip)) ipa_transforms_to_apply;
+
struct cgraph_local_info local;
struct cgraph_global_info global;
struct cgraph_rtl_info rtl;
*************** struct GTY((chain_next ("%h.next"), chai
*** 206,221 ****
number of cfg nodes with -fprofile-generate and -fprofile-use */
int pid;
! /* Set when function must be output - it is externally visible
! or its address is taken. */
unsigned needed : 1;
! /* Set when function has address taken. */
unsigned address_taken : 1;
/* Set when decl is an abstract function pointed to by the
ABSTRACT_DECL_ORIGIN of a reachable function. */
unsigned abstract_and_needed : 1;
/* Set when function is reachable by call from other function
! that is either reachable or needed. */
unsigned reachable : 1;
/* Set once the function is lowered (i.e. its CFG is built). */
unsigned lowered : 1;
--- 211,234 ----
number of cfg nodes with -fprofile-generate and -fprofile-use */
int pid;
! /* Set when function must be output for some reason. The primary
! use of this flag is to mark functions needed to be output for
! non-standard reason. Functions that are externally visible
! or reachable from functions needed to be output are marked
! by specialized flags. */
unsigned needed : 1;
! /* Set when function has address taken.
! In current implementation it imply needed flag. */
unsigned address_taken : 1;
/* Set when decl is an abstract function pointed to by the
ABSTRACT_DECL_ORIGIN of a reachable function. */
unsigned abstract_and_needed : 1;
/* Set when function is reachable by call from other function
! that is either reachable or needed.
! This flag is computed at original cgraph construction and then
! updated in cgraph_remove_unreachable_nodes. Note that after
! cgraph_remove_unreachable_nodes cgraph still can contain unreachable
! nodes when they are needed for virtual clone instantiation. */
unsigned reachable : 1;
/* Set once the function is lowered (i.e. its CFG is built). */
unsigned lowered : 1;
Index: cgraphunit.c
===================================================================
*** cgraphunit.c (revision 154198)
--- cgraphunit.c (working copy)
*************** save_inline_function_body (struct cgraph
*** 1777,1784 ****
TREE_PUBLIC (first_clone->decl) = 0;
DECL_COMDAT (first_clone->decl) = 0;
VEC_free (ipa_opt_pass, heap,
! DECL_STRUCT_FUNCTION (first_clone->decl)->ipa_transforms_to_apply);
! DECL_STRUCT_FUNCTION (first_clone->decl)->ipa_transforms_to_apply = NULL;
#ifdef ENABLE_CHECKING
verify_cgraph_node (first_clone);
--- 1777,1784 ----
TREE_PUBLIC (first_clone->decl) = 0;
DECL_COMDAT (first_clone->decl) = 0;
VEC_free (ipa_opt_pass, heap,
! first_clone->ipa_transforms_to_apply);
! first_clone->ipa_transforms_to_apply = NULL;
#ifdef ENABLE_CHECKING
verify_cgraph_node (first_clone);
*************** cgraph_materialize_clone (struct cgraph_
*** 1810,1815 ****
--- 1810,1817 ----
node->clone_of->clones = node->next_sibling_clone;
node->next_sibling_clone = NULL;
node->prev_sibling_clone = NULL;
+ if (!node->clone_of->analyzed && !node->clone_of->clones)
+ cgraph_remove_node (node->clone_of);
node->clone_of = NULL;
bitmap_obstack_release (NULL);
}
Index: lto-streamer-in.c
===================================================================
*** lto-streamer-in.c (revision 154198)
--- lto-streamer-in.c (working copy)
*************** lto_read_body (struct lto_file_decl_data
*** 1476,1490 ****
/* Restore decl state */
file_data->current_decl_state = file_data->global_decl_state;
- /* FIXME: ipa_transforms_to_apply holds list of passes that have optimization
- summaries computed and needs to apply changes. At the moment WHOPR only
- supports inlining, so we can push it here by hand. In future we need to stream
- this field into ltrans compilation. This will also need to move the field
- from struct function into cgraph node where it belongs. */
- if (flag_ltrans && !cgraph_node (fn_decl)->global.inlined_to)
- VEC_safe_push (ipa_opt_pass, heap,
- cfun->ipa_transforms_to_apply,
- (ipa_opt_pass)&pass_ipa_inline);
pop_cfun ();
}
else
--- 1476,1481 ----
Index: function.h
===================================================================
*** function.h (revision 154198)
--- function.h (working copy)
*************** struct GTY(()) function {
*** 522,532 ****
unsigned int curr_properties;
unsigned int last_verified;
- /* Interprocedural passes scheduled to have their transform functions
- applied next time we execute local pass on them. We maintain it
- per-function in order to allow IPA passes to introduce new functions. */
- VEC(ipa_opt_pass,heap) * GTY((skip)) ipa_transforms_to_apply;
-
/* Non-null if the function does something that would prevent it from
being copied; this applies to both versioning and inlining. Set to
a string describing the reason for failure. */
--- 522,527 ----
Index: ipa.c
===================================================================
*** ipa.c (revision 154198)
--- ipa.c (working copy)
*************** bool
*** 121,126 ****
--- 121,127 ----
cgraph_remove_unreachable_nodes (bool before_inlining_p, FILE *file)
{
struct cgraph_node *first = (struct cgraph_node *) (void *) 1;
+ struct cgraph_node *processed = (struct cgraph_node *) (void *) 2;
struct cgraph_node *node, *next;
bool changed = false;
*************** cgraph_remove_unreachable_nodes (bool be
*** 142,150 ****
gcc_assert (!node->global.inlined_to);
node->aux = first;
first = node;
}
else
! gcc_assert (!node->aux);
/* Perform reachability analysis. As a special case do not consider
extern inline functions not inlined as live because we won't output
--- 143,155 ----
gcc_assert (!node->global.inlined_to);
node->aux = first;
first = node;
+ node->reachable = true;
}
else
! {
! gcc_assert (!node->aux);
! node->reachable = false;
! }
/* Perform reachability analysis. As a special case do not consider
extern inline functions not inlined as live because we won't output
*************** cgraph_remove_unreachable_nodes (bool be
*** 154,170 ****
struct cgraph_edge *e;
node = first;
first = (struct cgraph_node *) first->aux;
! for (e = node->callees; e; e = e->next_callee)
! if (!e->callee->aux
! && node->analyzed
! && (!e->inline_failed || !e->callee->analyzed
! || (!DECL_EXTERNAL (e->callee->decl))
! || before_inlining_p))
! {
! e->callee->aux = first;
! first = e->callee;
! }
while (node->clone_of && !node->clone_of->aux && !gimple_has_body_p (node->decl))
{
node = node->clone_of;
--- 159,184 ----
struct cgraph_edge *e;
node = first;
first = (struct cgraph_node *) first->aux;
+ node->aux = processed;
! if (node->reachable)
! for (e = node->callees; e; e = e->next_callee)
! if (!e->callee->reachable
! && node->analyzed
! && (!e->inline_failed || !e->callee->analyzed
! || (!DECL_EXTERNAL (e->callee->decl))
! || before_inlining_p))
! {
! bool prev_reachable = e->callee->reachable;
! e->callee->reachable |= node->reachable;
! if (!e->callee->aux
! || (e->callee->aux == processed
! && prev_reachable != e->callee->reachable))
! {
! e->callee->aux = first;
! first = e->callee;
! }
! }
while (node->clone_of && !node->clone_of->aux && !gimple_has_body_p (node->decl))
{
node = node->clone_of;
*************** cgraph_remove_unreachable_nodes (bool be
*** 184,196 ****
for (node = cgraph_nodes; node; node = next)
{
next = node->next;
if (!node->aux)
{
node->global.inlined_to = NULL;
if (file)
fprintf (file, " %s", cgraph_node_name (node));
! if (!node->analyzed || !DECL_EXTERNAL (node->decl)
! || before_inlining_p)
cgraph_remove_node (node);
else
{
--- 198,215 ----
for (node = cgraph_nodes; node; node = next)
{
next = node->next;
+ if (node->aux && !node->reachable)
+ {
+ cgraph_node_remove_callees (node);
+ node->analyzed = false;
+ node->local.inlinable = false;
+ }
if (!node->aux)
{
node->global.inlined_to = NULL;
if (file)
fprintf (file, " %s", cgraph_node_name (node));
! if (!node->analyzed || !DECL_EXTERNAL (node->decl) || before_inlining_p)
cgraph_remove_node (node);
else
{
*************** cgraph_remove_unreachable_nodes (bool be
*** 219,224 ****
--- 238,249 ----
node->analyzed = false;
node->local.inlinable = false;
}
+ if (node->prev_sibling_clone)
+ node->prev_sibling_clone->next_sibling_clone = node->next_sibling_clone;
+ else if (node->clone_of)
+ node->clone_of->clones = node->next_sibling_clone;
+ if (node->next_sibling_clone)
+ node->next_sibling_clone->prev_sibling_clone = node->prev_sibling_clone;
}
else
cgraph_remove_node (node);
Index: lto/lto.c
===================================================================
*** lto/lto.c (revision 154198)
--- lto/lto.c (working copy)
*************** read_cgraph_and_symbols (unsigned nfiles
*** 1826,1834 ****
phase. */
if (flag_ltrans)
for (node = cgraph_nodes; node; node = node->next)
! if (!node->global.inlined_to
! && cgraph_decide_is_function_needed (node, node->decl))
! cgraph_mark_needed_node (node);
timevar_push (TV_IPA_LTO_DECL_IO);
--- 1826,1844 ----
phase. */
if (flag_ltrans)
for (node = cgraph_nodes; node; node = node->next)
! {
! if (!node->global.inlined_to
! && cgraph_decide_is_function_needed (node, node->decl))
! cgraph_mark_needed_node (node);
! /* FIXME: ipa_transforms_to_apply holds list of passes that have optimization
! summaries computed and needs to apply changes. At the moment WHOPR only
! supports inlining, so we can push it here by hand. In future we need to stream
! this field into ltrans compilation. */
! if (node->analyzed)
! VEC_safe_push (ipa_opt_pass, heap,
! node->ipa_transforms_to_apply,
! (ipa_opt_pass)&pass_ipa_inline);
! }
timevar_push (TV_IPA_LTO_DECL_IO);
Index: tree-inline.c
===================================================================
*** tree-inline.c (revision 154198)
--- tree-inline.c (working copy)
*************** copy_bb (copy_body_data *id, basic_block
*** 1665,1674 ****
/* We have missing edge in the callgraph. This can happen
when previous inlining turned an indirect call into a
! direct call by constant propagating arguments. In all
other cases we hit a bug (incorrect node sharing is the
most common reason for missing edges). */
! gcc_assert (dest->needed || !dest->analyzed);
if (id->transform_call_graph_edges == CB_CGE_MOVE_CLONES)
cgraph_create_edge_including_clones
(id->dst_node, dest, stmt, bb->count,
--- 1665,1676 ----
/* We have missing edge in the callgraph. This can happen
when previous inlining turned an indirect call into a
! direct call by constant propagating arguments or we are
! producing dead clone (for further clonning). In all
other cases we hit a bug (incorrect node sharing is the
most common reason for missing edges). */
! gcc_assert (dest->needed || !dest->analyzed
! || !id->src_node->analyzed);
if (id->transform_call_graph_edges == CB_CGE_MOVE_CLONES)
cgraph_create_edge_including_clones
(id->dst_node, dest, stmt, bb->count,
*************** initialize_cfun (tree new_fndecl, tree c
*** 1983,1991 ****
cfun->function_end_locus = src_cfun->function_end_locus;
cfun->curr_properties = src_cfun->curr_properties;
cfun->last_verified = src_cfun->last_verified;
- if (src_cfun->ipa_transforms_to_apply)
- cfun->ipa_transforms_to_apply = VEC_copy (ipa_opt_pass, heap,
- src_cfun->ipa_transforms_to_apply);
cfun->va_list_gpr_size = src_cfun->va_list_gpr_size;
cfun->va_list_fpr_size = src_cfun->va_list_fpr_size;
cfun->function_frequency = src_cfun->function_frequency;
--- 1985,1990 ----
*************** expand_call_inline (basic_block bb, gimp
*** 3822,3827 ****
--- 3821,3830 ----
(*debug_hooks->outlining_inline_function) (cg_edge->callee->decl);
/* Update callgraph if needed. */
+ if (cg_edge->callee->clone_of
+ && !cg_edge->callee->clone_of->next_sibling_clone
+ && !cg_edge->callee->analyzed)
+ cgraph_remove_node (cg_edge->callee);
cgraph_remove_node (cg_edge->callee);
id->block = NULL_TREE;
*************** tree_function_versioning (tree old_decl,
*** 4848,4853 ****
--- 4851,4869 ----
id.src_node = old_version_node;
id.dst_node = new_version_node;
id.src_cfun = DECL_STRUCT_FUNCTION (old_decl);
+ if (id.src_node->ipa_transforms_to_apply)
+ {
+ VEC(ipa_opt_pass,heap) * old_transforms_to_apply = id.dst_node->ipa_transforms_to_apply;
+ unsigned int i;
+
+ id.dst_node->ipa_transforms_to_apply = VEC_copy (ipa_opt_pass, heap,
+ id.src_node->ipa_transforms_to_apply);
+ for (i = 0; i < VEC_length (ipa_opt_pass, old_transforms_to_apply); i++)
+ VEC_safe_push (ipa_opt_pass, heap, id.dst_node->ipa_transforms_to_apply,
+ VEC_index (ipa_opt_pass,
+ old_transforms_to_apply,
+ i));
+ }
id.copy_decl = copy_decl_no_change;
id.transform_call_graph_edges
Index: passes.c
===================================================================
*** passes.c (revision 154198)
--- passes.c (working copy)
*************** update_properties_after_pass (void *data
*** 1376,1390 ****
& ~pass->properties_destroyed;
}
- /* Schedule IPA transform pass DATA for CFUN. */
-
- static void
- add_ipa_transform_pass (void *data)
- {
- struct ipa_opt_pass_d *ipa_pass = (struct ipa_opt_pass_d *) data;
- VEC_safe_push (ipa_opt_pass, heap, cfun->ipa_transforms_to_apply, ipa_pass);
- }
-
/* Execute summary generation for all of the passes in IPA_PASS. */
void
--- 1376,1381 ----
*************** execute_one_ipa_transform_pass (struct c
*** 1464,1482 ****
void
execute_all_ipa_transforms (void)
{
! if (cfun && cfun->ipa_transforms_to_apply)
{
unsigned int i;
- struct cgraph_node *node = cgraph_node (current_function_decl);
! for (i = 0; i < VEC_length (ipa_opt_pass, cfun->ipa_transforms_to_apply);
i++)
execute_one_ipa_transform_pass (node,
VEC_index (ipa_opt_pass,
! cfun->ipa_transforms_to_apply,
i));
! VEC_free (ipa_opt_pass, heap, cfun->ipa_transforms_to_apply);
! cfun->ipa_transforms_to_apply = NULL;
}
}
--- 1455,1476 ----
void
execute_all_ipa_transforms (void)
{
! struct cgraph_node *node;
! if (!cfun)
! return;
! node = cgraph_node (current_function_decl);
! if (node->ipa_transforms_to_apply)
{
unsigned int i;
! for (i = 0; i < VEC_length (ipa_opt_pass, node->ipa_transforms_to_apply);
i++)
execute_one_ipa_transform_pass (node,
VEC_index (ipa_opt_pass,
! node->ipa_transforms_to_apply,
i));
! VEC_free (ipa_opt_pass, heap, node->ipa_transforms_to_apply);
! node->ipa_transforms_to_apply = NULL;
}
}
*************** execute_one_pass (struct opt_pass *pass)
*** 1551,1557 ****
execute_todo (todo_after | pass->todo_flags_finish);
verify_interpass_invariants ();
if (pass->type == IPA_PASS)
! do_per_function (add_ipa_transform_pass, pass);
if (!current_function_decl)
cgraph_process_new_functions ();
--- 1545,1557 ----
execute_todo (todo_after | pass->todo_flags_finish);
verify_interpass_invariants ();
if (pass->type == IPA_PASS)
! {
! struct cgraph_node *node;
! for (node = cgraph_nodes; node; node = node->next)
! if (node->analyzed)
! VEC_safe_push (ipa_opt_pass, heap, node->ipa_transforms_to_apply,
! (struct ipa_opt_pass_d *)pass);
! }
if (!current_function_decl)
cgraph_process_new_functions ();