This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Simple cost model for ipcp
- From: Jan Hubicka <jh at suse dot cz>
- To: gcc-patches at gcc dot gnu dot org
- Date: Sun, 24 Aug 2008 11:56:30 +0200
- Subject: Simple cost model for ipcp
Hi
this patch adds simple cost model into ipcp. It makes ipcp to realize
what constant propagation is for free and what cause code size growth
and code size growth is limited to 10%.
Because we lack any analysis on how much function will simplify, I
simply optimize for number of calls to constant propagated function for
start. We can add better analysis easilly on top of this.
Bootstrapped/regtested i686-linux, will commit it tonight if there
are no complains.
Honza
* doc/invoke.texi (ipcp-unit-growth): Document.
* ipa-cp: Include fibheap and params.h
(ipcp_need_redirect_p): Work for non-clones too.
(max_count, dead_nodes): New static variables.
(ipcp_need_original_clone_p, ipcp_estimate_clonning_cost,
ipcp_const_param_count): New functions.
(ipcp_insert_stage): Rewrite to be cost model based.
* testsuite/gcc.dg/ipa/ipa-7.c: Fix template.
* params.def (PARAM_IPCP_UNIT_GROWTH): New parameter.
Index: doc/invoke.texi
===================================================================
*** doc/invoke.texi (revision 139525)
--- doc/invoke.texi (working copy)
*************** Specifies maximal overall growth of the
*** 6953,6958 ****
--- 6953,6963 ----
The default value is 30 which limits unit growth to 1.3 times the original
size.
+ @item ipcp-unit-growth
+ Specifies maximal overall growth of the compilation unit caused by
+ interprocedural constant propagation. The default value is 10 which limits
+ unit growth to 1.1 times the original size.
+
@item large-stack-frame
The limit specifying large stack frames. While inlining the algorithm is trying
to not grow past this limit too much. Default value is 256 bytes.
Index: ipa-cp.c
===================================================================
*** ipa-cp.c (revision 139525)
--- ipa-cp.c (working copy)
*************** along with GCC; see the file COPYING3.
*** 132,137 ****
--- 132,139 ----
#include "diagnostic.h"
#include "tree-dump.h"
#include "tree-inline.h"
+ #include "fibheap.h"
+ #include "params.h"
/* Get the original node field of ipa_node_params associated with node NODE. */
static inline struct cgraph_node *
*************** ipcp_need_redirect_p (struct cgraph_edge
*** 751,758 ****
struct ipa_node_params *orig_callee_info;
int i, count;
struct ipa_jump_func *jump_func;
! orig_callee_info = IPA_NODE_REF (ipcp_get_orig_node (cs->callee));
count = ipa_get_param_count (orig_callee_info);
for (i = 0; i < count; i++)
{
--- 753,766 ----
struct ipa_node_params *orig_callee_info;
int i, count;
struct ipa_jump_func *jump_func;
+ struct cgraph_node *node = cs->callee, *orig;
! if ((orig = ipcp_get_orig_node (node)) != NULL)
! node = orig;
! if (ipcp_get_orig_node (cs->caller))
! return false;
!
! orig_callee_info = IPA_NODE_REF (node);
count = ipa_get_param_count (orig_callee_info);
for (i = 0; i < count; i++)
{
*************** ipcp_update_profiling (void)
*** 848,870 ****
}
}
/* Propagate the constant parameters found by ipcp_iterate_stage()
to the function's code. */
static void
ipcp_insert_stage (void)
{
struct cgraph_node *node, *node1 = NULL;
! int i, const_param;
VEC (cgraph_edge_p, heap) * redirect_callers;
varray_type replace_trees;
struct cgraph_edge *cs;
int node_callers, count;
tree parm_tree;
struct ipa_replace_map *replace_param;
ipa_check_create_node_params ();
ipa_check_create_edge_args ();
for (node = cgraph_nodes; node; node = node->next)
{
struct ipa_node_params *info;
--- 856,974 ----
}
}
+ /* Maximal count found in program. */
+ static gcov_type max_count;
+ bitmap dead_nodes;
+
+ /* Return true if original clone needs to be preserved. */
+ static bool
+ ipcp_need_original_clone_p (struct cgraph_node *node)
+ {
+ struct cgraph_edge *e;
+
+ if (node->needed)
+ return true;
+ for (e = node->callers; e; e = e->next_caller)
+ if (!bitmap_bit_p (dead_nodes, e->caller->uid)
+ && ipcp_need_redirect_p (e))
+ return true;
+
+ return false;
+ }
+
+ /* Estimate cost of clonning NODE. */
+ static long
+ ipcp_estimate_clonning_cost (struct cgraph_node *node)
+ {
+ int freq_sum = 1;
+ gcov_type count_sum = 1;
+ struct cgraph_edge *e;
+ int cost;
+
+ /* When we don't need original clone; we should always propagate. */
+ if (!ipcp_need_original_clone_p (node))
+ return 0;
+
+ for (e = node->callers; e; e = e->next_caller)
+ if (!bitmap_bit_p (dead_nodes, e->caller->uid)
+ && !ipcp_need_redirect_p (e))
+ {
+ count_sum += e->count;
+ freq_sum += e->frequency + 1;
+ }
+
+ cost = node->local.inline_summary.self_insns * 1000;
+ if (max_count)
+ cost /= count_sum * 1000 / max_count + 1;
+ else
+ cost /= freq_sum * 1000 / REG_BR_PROB_BASE + 1;
+ if (dump_file)
+ fprintf (dump_file, "Cost of versioning %s is %i, (size: %i, freq: %i)\n",
+ cgraph_node_name (node), cost, node->local.inline_summary.self_insns,
+ freq_sum);
+ return false;
+ }
+
+ /* Return number of live constant parameters. */
+ static int
+ ipcp_const_param_count (struct cgraph_node *node)
+ {
+ int const_param = 0;
+ struct ipa_node_params *info = IPA_NODE_REF (node);
+ int count = ipa_get_param_count (info);
+ int i;
+
+ for (i = 0; i < count; i++)
+ {
+ struct ipcp_lattice *lat = ipcp_get_ith_lattice (info, i);
+ tree parm_tree = ipa_get_ith_param (info, i);
+ if (ipcp_lat_is_insertable (lat)
+ /* Do not count obviously unused arguments. */
+ && (!is_gimple_reg (parm_tree)
+ || gimple_default_def (DECL_STRUCT_FUNCTION (node->decl),
+ parm_tree)))
+ const_param++;
+ }
+ return const_param;
+ }
+
/* Propagate the constant parameters found by ipcp_iterate_stage()
to the function's code. */
static void
ipcp_insert_stage (void)
{
struct cgraph_node *node, *node1 = NULL;
! int i;
VEC (cgraph_edge_p, heap) * redirect_callers;
varray_type replace_trees;
struct cgraph_edge *cs;
int node_callers, count;
tree parm_tree;
struct ipa_replace_map *replace_param;
+ fibheap_t heap;
+ long overall_insns = 0, new_insns = 0;
+ long max_new_insns;
ipa_check_create_node_params ();
ipa_check_create_edge_args ();
+ dead_nodes = BITMAP_ALLOC (NULL);
+
+ for (node = cgraph_nodes; node; node = node->next)
+ if (node->analyzed)
+ {
+ if (node->count > max_count)
+ max_count = node->count;
+ overall_insns += node->local.inline_summary.self_insns;
+ }
+
+ max_new_insns = overall_insns;
+ if (max_new_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
+ max_new_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
+ max_new_insns = max_new_insns * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1;
+
+ /* First collect all functions we proved to have constant arguments to heap. */
+ heap = fibheap_new ();
for (node = cgraph_nodes; node; node = node->next)
{
struct ipa_node_params *info;
*************** ipcp_insert_stage (void)
*** 874,905 ****
info = IPA_NODE_REF (node);
if (ipa_is_called_with_var_arguments (info))
continue;
! const_param = 0;
count = ipa_get_param_count (info);
for (i = 0; i < count; i++)
{
struct ipcp_lattice *lat = ipcp_get_ith_lattice (info, i);
! tree parm_tree = ipa_get_ith_param (info, i);
! if (ipcp_lat_is_insertable (lat)
/* Do not count obviously unused arguments. */
&& (!is_gimple_reg (parm_tree)
! || gimple_default_def (DECL_STRUCT_FUNCTION (node->decl), parm_tree)))
! const_param++;
! }
! if (const_param == 0)
! continue;
! VARRAY_GENERIC_PTR_INIT (replace_trees, const_param, "replace_trees");
! for (i = 0; i < count; i++)
! {
! struct ipcp_lattice *lat = ipcp_get_ith_lattice (info, i);
! if (lat->type == IPA_CONST_VALUE)
{
- parm_tree = ipa_get_ith_param (info, i);
replace_param =
ipcp_create_replace_map (parm_tree, lat);
VARRAY_PUSH_GENERIC_PTR (replace_trees, replace_param);
}
}
/* Compute how many callers node has. */
node_callers = 0;
for (cs = node->callers; cs != NULL; cs = cs->next_caller)
--- 978,1041 ----
info = IPA_NODE_REF (node);
if (ipa_is_called_with_var_arguments (info))
continue;
! if (ipcp_const_param_count (node))
! node->aux = fibheap_insert (heap, ipcp_estimate_clonning_cost (node), node);
! }
!
! /* Now clone in priority order until code size growth limits are met or
! heap is emptied. */
! while (!fibheap_empty (heap))
! {
! struct ipa_node_params *info;
! int growth = 0;
!
! node = (struct cgraph_node *)fibheap_extract_min (heap);
! node->aux = NULL;
! if (dump_file)
! fprintf (dump_file, "considering function %s\n",
! cgraph_node_name (node));
!
! if (ipcp_need_original_clone_p (node))
! growth = node->local.inline_summary.self_insns;
! else
! bitmap_set_bit (dead_nodes, node->uid);
!
! if (new_insns + growth > max_new_insns)
! break;
! if (growth
! && (optimize_size
! || (DECL_STRUCT_FUNCTION (node->decl)
! ->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)))
! {
! if (dump_file)
! fprintf (dump_file, "Not versioning, cold code would grow");
! continue;
! }
!
! new_insns += growth;
!
! info = IPA_NODE_REF (node);
count = ipa_get_param_count (info);
+
+ VARRAY_GENERIC_PTR_INIT (replace_trees, ipcp_const_param_count (node),
+ "replace_trees");
for (i = 0; i < count; i++)
{
struct ipcp_lattice *lat = ipcp_get_ith_lattice (info, i);
! parm_tree = ipa_get_ith_param (info, i);
!
! if (lat->type == IPA_CONST_VALUE
/* Do not count obviously unused arguments. */
&& (!is_gimple_reg (parm_tree)
! || gimple_default_def (DECL_STRUCT_FUNCTION (node->decl),
! parm_tree)))
{
replace_param =
ipcp_create_replace_map (parm_tree, lat);
VARRAY_PUSH_GENERIC_PTR (replace_trees, replace_param);
}
}
+
/* Compute how many callers node has. */
node_callers = 0;
for (cs = node->callers; cs != NULL; cs = cs->next_caller)
*************** ipcp_insert_stage (void)
*** 907,912 ****
--- 1043,1049 ----
redirect_callers = VEC_alloc (cgraph_edge_p, heap, node_callers);
for (cs = node->callers; cs != NULL; cs = cs->next_caller)
VEC_quick_push (cgraph_edge_p, redirect_callers, cs);
+
/* Redirecting all the callers of the node to the
new versioned node. */
node1 =
*************** ipcp_insert_stage (void)
*** 916,930 ****
if (node1 == NULL)
continue;
if (dump_file)
! fprintf (dump_file, "versioned function %s\n",
! cgraph_node_name (node));
ipcp_init_cloned_node (node, node1);
/* We've possibly introduced direct calls. */
ipcp_update_cloned_node (node1);
if (dump_file)
dump_function_to_file (node1->decl, dump_file, dump_flags);
}
ipcp_update_callgraph ();
ipcp_update_profiling ();
}
--- 1053,1088 ----
if (node1 == NULL)
continue;
if (dump_file)
! fprintf (dump_file, "versioned function %s with growth %i, overall %i\n",
! cgraph_node_name (node), (int)growth, (int)new_insns);
ipcp_init_cloned_node (node, node1);
+
/* We've possibly introduced direct calls. */
ipcp_update_cloned_node (node1);
if (dump_file)
dump_function_to_file (node1->decl, dump_file, dump_flags);
+
+ for (cs = node->callees; cs; cs = cs->next_callee)
+ if (cs->callee->aux)
+ {
+ fibheap_delete_node (heap, (fibnode_t) cs->callee->aux);
+ cs->callee->aux = fibheap_insert (heap,
+ ipcp_estimate_clonning_cost (cs->callee),
+ cs->callee);
+ }
+ }
+
+ while (!fibheap_empty (heap))
+ {
+ if (dump_file)
+ fprintf (dump_file, "skipping function %s\n",
+ cgraph_node_name (node));
+ node = (struct cgraph_node *) fibheap_extract_min (heap);
+ node->aux = NULL;
}
+ fibheap_delete (heap);
+ BITMAP_FREE (dead_nodes);
ipcp_update_callgraph ();
ipcp_update_profiling ();
}
Index: testsuite/gcc.dg/ipa/ipa-7.c
===================================================================
*** testsuite/gcc.dg/ipa/ipa-7.c (revision 139525)
--- testsuite/gcc.dg/ipa/ipa-7.c (working copy)
*************** int main ()
*** 25,31 ****
/* { dg-final { scan-ipa-dump-times "versioned function" 1 "cp" } } */
! /* { dg-final { scan-ipa-dump-times "propagating const" 1 "cp" } } */
/* { dg-final { cleanup-ipa-dump "cp" } } */
--- 25,31 ----
/* { dg-final { scan-ipa-dump-times "versioned function" 1 "cp" } } */
! /* { dg-final { scan-ipa-dump-times "replacing param with const" 1 "cp" } } */
/* { dg-final { cleanup-ipa-dump "cp" } } */
Index: params.def
===================================================================
*** params.def (revision 139525)
--- params.def (working copy)
*************** DEFPARAM(PARAM_INLINE_UNIT_GROWTH,
*** 193,198 ****
--- 193,202 ----
"inline-unit-growth",
"how much can given compilation unit grow because of the inlining (in percent)",
30, 0, 0)
+ DEFPARAM(PARAM_IPCP_UNIT_GROWTH,
+ "ipcp-unit-growth",
+ "how much can given compilation unit grow because of the interprocedural constant propagation (in percent)",
+ 10, 0, 0)
DEFPARAM(PARAM_INLINE_CALL_COST,
"inline-call-cost",
"expense of call operation relative to ordinary arithmetic operations",