This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Cost model for indirect call speculation
- From: Xinliang David Li <davidxl at google dot com>
- To: Jan Hubicka <hubicka at ucw dot cz>
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>, Martin LiÅka <marxin dot liska at gmail dot com>
- Date: Sun, 11 Aug 2013 21:52:38 -0700
- Subject: Re: Cost model for indirect call speculation
- References: <20130811231123 dot GB9197 at kam dot mff dot cuni dot cz>
I like the approach in general -- in the past, indirect call promotion
and function inlining heuristics are disconnected -- which can lead to
either missing promotions or useless ones. This approach solves the
problem.
On Sun, Aug 11, 2013 at 4:11 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
> Hi,
> this patch adds simple cost model into indirect call speculation. First we do not
> turn calls into speculative calls when it seems bad idea (i.e. call is cold)
> and during inlining we remove speculations that do not seem benefical.
> On modern chip speculative call sequence without inlining is not really going
> to fare better than indirect call because of indirect call predictor.
> So we keep them only if the call was inlined or if the callee is turned to clone
> or CONST/PURE flags are propagated to them.
>
> We may want to add target hook specifying if target support indirect call predictor,
> but I am not sure how important this is in practice.
It might be also useful to introduce a parameter to control the
behavior so that people can do better experiment. The capability of
indirect branch predictions varies a lot depending on the target.
I only noticed a couple of indentation problems.
thanks,
David
>
> To enable cost model everywhere, the old unit-local transform code now does nothing
> but does sanity checking and debug output dumping.
> /* Speculative call consist of three components:
> *************** cgraph_speculative_call_info (struct cgr
> *** 1107,1113 ****
> if (e2->call_stmt)
> {
indentation?
> e = cgraph_edge (e->caller, e2->call_stmt);
> ! gcc_assert (!e->speculative && !e->indirect_unknown_callee);
> }
> else
> for (e = e->caller->callees;
> --- 1110,1116 ----
> if (e2->call_stmt)
> {
> e = cgraph_edge (e->caller, e2->call_stmt);
> ! gcc_assert (e->speculative && !e->indirect_unknown_callee);
> }
> else
> for (e = e->caller->callees;
> *************** cgraph_redirect_edge_callee (struct cgra
> *** 1147,1153 ****
> Remove the speculative call sequence and return edge representing the call.
> It is up to caller to redirect the call as appropriate. */
>
> ! static struct cgraph_edge *
> cgraph_resolve_speculation (struct cgraph_edge *edge, tree callee_decl)
> {
> struct cgraph_edge *e2;
> --- 1150,1156 ----
> Remove the speculative call sequence and return edge representing the call.
> It is up to caller to redirect the call as appropriate. */
>
> ! struct cgraph_edge *
> cgraph_resolve_speculation (struct cgraph_edge *edge, tree callee_decl)
> {
> struct cgraph_edge *e2;
> *************** cgraph_resolve_speculation (struct cgrap
> *** 1159,1170 ****
> {
> if (dump_file)
> {
> ! fprintf (dump_file, "Speculative indirect call %s/%i => %s/%i has "
> ! "turned out to have contradicitng known target ",
> ! xstrdup (cgraph_node_name (edge->caller)), edge->caller->symbol.order,
> ! xstrdup (cgraph_node_name (e2->callee)), e2->callee->symbol.order);
> ! print_generic_expr (dump_file, callee_decl, 0);
> ! fprintf (dump_file, "\n");
> }
> }
> else
> --- 1162,1182 ----
> {
> if (dump_file)
> {
> ! if (callee_decl)
> ! {
> ! fprintf (dump_file, "Speculative indirect call %s/%i => %s/%i has "
> ! "turned out to have contradicitng known target ",
> ! xstrdup (cgraph_node_name (edge->caller)), edge->caller->symbol.order,
> ! xstrdup (cgraph_node_name (e2->callee)), e2->callee->symbol.order);
> ! print_generic_expr (dump_file, callee_decl, 0);
> ! fprintf (dump_file, "\n");
> ! }
> ! else
> ! {
> ! fprintf (dump_file, "Removing speculative call %s/%i => %s/%i\n",
> ! xstrdup (cgraph_node_name (edge->caller)), edge->caller->symbol.order,
> ! xstrdup (cgraph_node_name (e2->callee)), e2->callee->symbol.order);
> ! }
> }
> }
> else
> *************** cgraph_redirect_edge_call_stmt_to_callee
> *** 1264,1275 ****
> cgraph_speculative_call_info (e, e, e2, ref);
> if (gimple_call_fndecl (e->call_stmt))
> e = cgraph_resolve_speculation (e, gimple_call_fndecl (e->call_stmt));
> ! else
> {
> if (dump_file)
> ! fprintf (dump_file, "Expanding speculative call of %s/%i -> %s/%i\n",
> xstrdup (cgraph_node_name (e->caller)), e->caller->symbol.order,
> xstrdup (cgraph_node_name (e->callee)), e->callee->symbol.order);
> gcc_assert (e2->speculative);
> push_cfun (DECL_STRUCT_FUNCTION (e->caller->symbol.decl));
> new_stmt = gimple_ic (e->call_stmt, cgraph (ref->referred),
> --- 1276,1299 ----
> cgraph_speculative_call_info (e, e, e2, ref);
> if (gimple_call_fndecl (e->call_stmt))
> e = cgraph_resolve_speculation (e, gimple_call_fndecl (e->call_stmt));
> ! if (!gimple_check_call_matching_types (e->call_stmt, e->callee->symbol.decl,
> ! true))
> {
> + e = cgraph_resolve_speculation (e, NULL);
> if (dump_file)
> ! fprintf (dump_file, "Not expanding speculative call of %s/%i -> %s/%i\n"
> ! "Type mismatch.\n",
> xstrdup (cgraph_node_name (e->caller)), e->caller->symbol.order,
> xstrdup (cgraph_node_name (e->callee)), e->callee->symbol.order);
> + }
> + else
> + {
> + if (dump_file)
> + fprintf (dump_file, "Expanding speculative call of %s/%i -> %s/%i count:"
> + HOST_WIDEST_INT_PRINT_DEC"\n",
> + xstrdup (cgraph_node_name (e->caller)), e->caller->symbol.order,
> + xstrdup (cgraph_node_name (e->callee)), e->callee->symbol.order,
> + (HOST_WIDEST_INT)e->count);
> gcc_assert (e2->speculative);
> push_cfun (DECL_STRUCT_FUNCTION (e->caller->symbol.decl));
> new_stmt = gimple_ic (e->call_stmt, cgraph (ref->referred),
> Index: cgraph.h
> ===================================================================
> *** cgraph.h (revision 201640)
> --- cgraph.h (working copy)
> *************** bool cgraph_propagate_frequency (struct
> *** 726,732 ****
> struct cgraph_node * cgraph_function_node (struct cgraph_node *,
> enum availability *avail = NULL);
> bool cgraph_get_body (struct cgraph_node *node);
> ! void
> cgraph_turn_edge_to_speculative (struct cgraph_edge *,
> struct cgraph_node *,
> gcov_type, int);
> --- 726,732 ----
> struct cgraph_node * cgraph_function_node (struct cgraph_node *,
> enum availability *avail = NULL);
> bool cgraph_get_body (struct cgraph_node *node);
> ! struct cgraph_edge *
> cgraph_turn_edge_to_speculative (struct cgraph_edge *,
> struct cgraph_node *,
> gcov_type, int);
> *************** struct cgraph_node *cgraph_function_vers
> *** 783,788 ****
> --- 783,789 ----
> basic_block, const char *);
> void tree_function_versioning (tree, tree, vec<ipa_replace_map_p, va_gc> *,
> bool, bitmap, bool, bitmap, basic_block);
> + struct cgraph_edge *cgraph_resolve_speculation (struct cgraph_edge *, tree);
>
> /* In cgraphbuild.c */
> unsigned int rebuild_cgraph_edges (void);
> *************** symtab_real_symbol_p (symtab_node node)
> *** 1398,1401 ****
> --- 1399,1414 ----
> return false;
> return true;
> }
> +
> + /* Return true if NODE can be discarded by linker from the binary. */
> +
> + static inline bool
> + symtab_can_be_discarded (symtab_node node)
> + {
> + return (DECL_EXTERNAL (node->symbol.decl)
> + || (DECL_ONE_ONLY (node->symbol.decl)
> + && node->symbol.resolution != LDPR_PREVAILING_DEF
> + && node->symbol.resolution != LDPR_PREVAILING_DEF_IRONLY
> + && node->symbol.resolution != LDPR_PREVAILING_DEF_IRONLY_EXP));
> + }
> #endif /* GCC_CGRAPH_H */
> Index: value-prof.c
> ===================================================================
> *** value-prof.c (revision 201640)
> --- value-prof.c (working copy)
> *************** gimple_ic_transform (gimple_stmt_iterato
> *** 1431,1438 ****
> gimple stmt = gsi_stmt (*gsi);
> histogram_value histogram;
> gcov_type val, count, all, bb_all;
> - gcov_type prob;
> - gimple modify;
> struct cgraph_node *direct_call;
>
> if (gimple_code (stmt) != GIMPLE_CALL)
> --- 1431,1436 ----
> *************** gimple_ic_transform (gimple_stmt_iterato
> *** 1452,1463 ****
> count = histogram->hvalue.counters [1];
> all = histogram->hvalue.counters [2];
>
> - if (4 * count <= 3 * all)
> - {
> - gimple_remove_histogram_value (cfun, stmt, histogram);
> - return false;
> - }
> -
> bb_all = gimple_bb (stmt)->count;
> /* The order of CHECK_COUNTER calls is important -
> since check_counter can correct the third parameter
> --- 1450,1455 ----
> *************** gimple_ic_transform (gimple_stmt_iterato
> *** 1469,1478 ****
> return false;
> }
>
> ! if (all > 0)
> ! prob = GCOV_COMPUTE_SCALE (count, all);
> ! else
> ! prob = 0;
> direct_call = find_func_by_profile_id ((int)val);
>
> if (direct_call == NULL)
> --- 1461,1469 ----
> return false;
> }
>
> ! if (4 * count <= 3 * all)
> ! return false;
> !
> direct_call = find_func_by_profile_id ((int)val);
>
> if (direct_call == NULL)
> *************** gimple_ic_transform (gimple_stmt_iterato
> *** 1488,1499 ****
> }
> return false;
> }
> - gimple_remove_histogram_value (cfun, stmt, histogram);
>
> if (!check_ic_target (stmt, direct_call))
> ! return false;
> !
> ! modify = gimple_ic (stmt, direct_call, prob, count, all);
>
> if (dump_file)
> {
> --- 1479,1499 ----
> }
> return false;
> }
>
> if (!check_ic_target (stmt, direct_call))
> ! {
> ! if (dump_file)
> ! {
> ! fprintf (dump_file, "Indirect call -> direct call ");
> ! print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
> ! fprintf (dump_file, "=> ");
> ! print_generic_expr (dump_file, direct_call->symbol.decl, TDF_SLIM);
> ! fprintf (dump_file, " transformation skipped because of type mismatch");
> ! print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> ! }
> ! gimple_remove_histogram_value (cfun, stmt, histogram);
> ! return false;
> ! }
>
> if (dump_file)
> {
> *************** gimple_ic_transform (gimple_stmt_iterato
> *** 1501,1510 ****
> print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
> fprintf (dump_file, "=> ");
> print_generic_expr (dump_file, direct_call->symbol.decl, TDF_SLIM);
> ! fprintf (dump_file, " transformation on insn ");
> print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> - fprintf (dump_file, " to ");
> - print_gimple_stmt (dump_file, modify, 0, TDF_SLIM);
> fprintf (dump_file, "hist->count "HOST_WIDEST_INT_PRINT_DEC
> " hist->all "HOST_WIDEST_INT_PRINT_DEC"\n", count, all);
> }
> --- 1501,1508 ----
> print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
> fprintf (dump_file, "=> ");
> print_generic_expr (dump_file, direct_call->symbol.decl, TDF_SLIM);
> ! fprintf (dump_file, " transformation on insn postponned to ipa-profile");
> print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> fprintf (dump_file, "hist->count "HOST_WIDEST_INT_PRINT_DEC
> " hist->all "HOST_WIDEST_INT_PRINT_DEC"\n", count, all);
> }
> Index: ipa-inline-transform.c
> ===================================================================
> *** ipa-inline-transform.c (revision 201640)
> --- ipa-inline-transform.c (working copy)
> *************** along with GCC; see the file COPYING3.
> *** 46,51 ****
> --- 46,52 ----
>
> int ncalls_inlined;
> int nfunctions_inlined;
> + bool speculation_removed;
>
> /* Scale frequency of NODE edges by FREQ_SCALE. */
>
> *************** clone_inlined_nodes (struct cgraph_edge
> *** 134,139 ****
> --- 135,141 ----
> bool update_original, int *overall_size)
> {
> struct cgraph_node *inlining_into;
> + struct cgraph_edge *next;
>
> if (e->caller->global.inlined_to)
> inlining_into = e->caller->global.inlined_to;
> *************** clone_inlined_nodes (struct cgraph_edge
> *** 186,194 ****
> e->callee->global.inlined_to = inlining_into;
>
> /* Recursively clone all bodies. */
> ! for (e = e->callee->callees; e; e = e->next_callee)
> ! if (!e->inline_failed)
> ! clone_inlined_nodes (e, duplicate, update_original, overall_size);
> }
>
>
> --- 188,204 ----
> e->callee->global.inlined_to = inlining_into;
>
> /* Recursively clone all bodies. */
> ! for (e = e->callee->callees; e; e = next)
> ! {
> ! next = e->next_callee;
> ! if (!e->inline_failed)
> ! clone_inlined_nodes (e, duplicate, update_original, overall_size);
> ! if (e->speculative && !speculation_useful_p (e, true))
> ! {
> ! cgraph_resolve_speculation (e, NULL);
> ! speculation_removed = true;
> ! }
> ! }
> }
>
>
> *************** inline_call (struct cgraph_edge *e, bool
> *** 218,223 ****
> --- 228,234 ----
> bool predicated = inline_edge_summary (e)->predicate != NULL;
> #endif
>
> + speculation_removed = false;
> /* Don't inline inlined edges. */
> gcc_assert (e->inline_failed);
> /* Don't even think of inlining inline clone. */
> *************** inline_call (struct cgraph_edge *e, bool
> *** 267,272 ****
> --- 278,284 ----
> error due to INLINE_SIZE_SCALE roudoff errors. */
> gcc_assert (!update_overall_summary || !overall_size || new_edges_found
> || abs (estimated_growth - (new_size - old_size)) <= 1
> + || speculation_removed
> /* FIXME: a hack. Edges with false predicate are accounted
> wrong, we should remove them from callgraph. */
> || predicated);
> Index: ipa-inline.c
> ===================================================================
> *** ipa-inline.c (revision 201640)
> --- ipa-inline.c (working copy)
> *************** report_inline_failed_reason (struct cgra
> *** 229,238 ****
> We check whether inlining is possible at all and whether
> caller growth limits allow doing so.
>
> ! if REPORT is true, output reason to the dump file. */
>
> static bool
> ! can_inline_edge_p (struct cgraph_edge *e, bool report)
> {
> bool inlinable = true;
> enum availability avail;
> --- 229,241 ----
> We check whether inlining is possible at all and whether
> caller growth limits allow doing so.
>
> ! if REPORT is true, output reason to the dump file.
> !
> ! if DISREGARD_LIMITES is true, ignore size limits.*/
>
> static bool
> ! can_inline_edge_p (struct cgraph_edge *e, bool report,
> ! bool disregard_limits = false)
> {
> bool inlinable = true;
> enum availability avail;
> *************** can_inline_edge_p (struct cgraph_edge *e
> *** 309,314 ****
> --- 312,318 ----
> }
> /* Check if caller growth allows the inlining. */
> else if (!DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl)
> + && !disregard_limits
> && !lookup_attribute ("flatten",
> DECL_ATTRIBUTES
> (e->caller->global.inlined_to
> *************** heap_edge_removal_hook (struct cgraph_ed
> *** 1400,1405 ****
> --- 1404,1482 ----
> }
> }
>
> + /* Return true if speculation of edge E seems useful.
> + If ANTICIPATE_INLINING is true, be conservative and hope that E
> + may get inlined. */
> +
> + bool
> + speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
> + {
> + enum availability avail;
> + struct cgraph_node *target = cgraph_function_or_thunk_node (e->callee, &avail);
> + struct cgraph_edge *direct, *indirect;
> + struct ipa_ref *ref;
> +
> + gcc_assert (e->speculative && !e->indirect_unknown_callee);
> +
> + if (!cgraph_maybe_hot_edge_p (e))
> + return false;
> +
> + /* See if IP optimizations found something potentially useful about the
> + function. For now we look only for CONST/PURE flags. Almost everything
> + else we propagate is useless. */
> + if (avail >= AVAIL_AVAILABLE)
> + {
> + int ecf_flags = flags_from_decl_or_type (target->symbol.decl);
> + if (ecf_flags & ECF_CONST)
> + {
> + cgraph_speculative_call_info (e, direct, indirect, ref);
> + if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
> + return true;
> + }
> + else if (ecf_flags & ECF_PURE)
> + {
> + cgraph_speculative_call_info (e, direct, indirect, ref);
> + if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
> + return true;
> + }
> + }
> + /* If we did not managed to inline the function nor redirect
> + to an ipa-cp clone (that are seen by having local flag set),
> + it is probably pointless to inline it unless hardware is missing
> + indirect call predictor. */
> + if (!anticipate_inlining && e->inline_failed && !target->local.local)
> + return false;
> + /* For overwritable targets there is not much to do. */
> + if (e->inline_failed && !can_inline_edge_p (e, false, true))
> + return false;
> + /* OK, speculation seems interesting. */
> + return true;
> + }
> +
> + /* We know that EDGE is not going to be inlined.
> + See if we can remove speculation. */
> +
> + static void
> + resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
> + {
> + if (edge->speculative && !speculation_useful_p (edge, false))
> + {
> + struct cgraph_node *node = edge->caller;
> + struct cgraph_node *where = node->global.inlined_to
> + ? node->global.inlined_to : node;
> + bitmap updated_nodes = BITMAP_ALLOC (NULL);
> +
> + cgraph_resolve_speculation (edge, NULL);
> + reset_node_growth_cache (where);
> + reset_edge_caches (where);
> + inline_update_overall_summary (where);
> + update_caller_keys (edge_heap, where,
> + updated_nodes, NULL);
> + reset_node_growth_cache (where);
> + BITMAP_FREE (updated_nodes);
> + }
> + }
> +
> /* We use greedy algorithm for inlining of small functions:
> All inline candidates are put into prioritized heap ordered in
> increasing badness.
> *************** inline_small_functions (void)
> *** 1478,1491 ****
> /* Populate the heeap with all edges we might inline. */
>
> FOR_EACH_DEFINED_FUNCTION (node)
> ! if (!node->global.inlined_to)
> ! {
> ! if (dump_file)
> ! fprintf (dump_file, "Enqueueing calls of %s/%i.\n",
> ! cgraph_node_name (node), node->symbol.order);
>
> ! for (edge = node->callers; edge; edge = edge->next_caller)
> if (edge->inline_failed
> && can_inline_edge_p (edge, true)
> && want_inline_small_function_p (edge, true)
> && edge->inline_failed)
> --- 1555,1573 ----
> /* Populate the heeap with all edges we might inline. */
>
> FOR_EACH_DEFINED_FUNCTION (node)
> ! {
> ! bool update = false;
> ! struct cgraph_edge *next;
>
> ! if (dump_file)
> ! fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
> ! cgraph_node_name (node), node->symbol.order);
> !
> ! for (edge = node->callees; edge; edge = next)
> ! {
> ! next = edge->next_callee;
> if (edge->inline_failed
> + && !edge->aux
> && can_inline_edge_p (edge, true)
> && want_inline_small_function_p (edge, true)
> && edge->inline_failed)
> *************** inline_small_functions (void)
> *** 1493,1499 ****
> gcc_assert (!edge->aux);
> update_edge_key (edge_heap, edge);
> }
> ! }
>
> gcc_assert (in_lto_p
> || !max_count
> --- 1575,1598 ----
> gcc_assert (!edge->aux);
> update_edge_key (edge_heap, edge);
> }
> ! if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
> ! {
> ! cgraph_resolve_speculation (edge, NULL);
> ! update = true;
> ! }
> ! }
> ! if (update)
> ! {
> ! struct cgraph_node *where = node->global.inlined_to
> ! ? node->global.inlined_to : node;
> ! inline_update_overall_summary (where);
> ! reset_node_growth_cache (where);
> ! reset_edge_caches (where);
> ! update_caller_keys (edge_heap, where,
> ! updated_nodes, NULL);
> ! bitmap_clear (updated_nodes);
> ! }
> ! }
>
> gcc_assert (in_lto_p
> || !max_count
> *************** inline_small_functions (void)
> *** 1534,1540 ****
> }
>
> if (!can_inline_edge_p (edge, true))
> ! continue;
>
> callee = cgraph_function_or_thunk_node (edge->callee, NULL);
> growth = estimate_edge_growth (edge);
> --- 1633,1642 ----
> }
>
> if (!can_inline_edge_p (edge, true))
> ! {
> ! resolve_noninline_speculation (edge_heap, edge);
> ! continue;
> ! }
>
> callee = cgraph_function_or_thunk_node (edge->callee, NULL);
> growth = estimate_edge_growth (edge);
> *************** inline_small_functions (void)
> *** 1568,1578 ****
> {
> edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
> report_inline_failed_reason (edge);
> continue;
> }
>
> if (!want_inline_small_function_p (edge, true))
> ! continue;
>
> /* Heuristics for inlining small functions works poorly for
> recursive calls where we do efect similar to loop unrolling.
> --- 1670,1684 ----
> {
> edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
> report_inline_failed_reason (edge);
> + resolve_noninline_speculation (edge_heap, edge);
> continue;
> }
>
> if (!want_inline_small_function_p (edge, true))
> ! {
> ! resolve_noninline_speculation (edge_heap, edge);
> ! continue;
> ! }
>
> /* Heuristics for inlining small functions works poorly for
> recursive calls where we do efect similar to loop unrolling.
> *************** inline_small_functions (void)
> *** 1588,1593 ****
> --- 1694,1700 ----
> ? &new_indirect_edges : NULL))
> {
> edge->inline_failed = CIF_RECURSIVE_INLINING;
> + resolve_noninline_speculation (edge_heap, edge);
> continue;
> }
> reset_edge_caches (where);
> *************** inline_small_functions (void)
> *** 1596,1601 ****
> --- 1703,1709 ----
> if (flag_indirect_inlining)
> add_new_edges_to_heap (edge_heap, new_indirect_edges);
> update_callee_keys (edge_heap, where, updated_nodes);
> + bitmap_clear (updated_nodes);
> }
> else
> {
> *************** inline_small_functions (void)
> *** 1621,1626 ****
> --- 1729,1735 ----
> edge->inline_failed
> = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->symbol.decl)
> ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
> + resolve_noninline_speculation (edge_heap, edge);
> continue;
> }
> else if (depth && dump_file)
> *************** ipa_inline (void)
> *** 1773,1778 ****
> --- 1882,1888 ----
> struct cgraph_node **order =
> XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
> int i;
> + int cold;
>
> if (in_lto_p && optimize)
> ipa_update_after_lto_read ();
> *************** ipa_inline (void)
> *** 1820,1885 ****
> code size will shrink because the out-of-line copy is eliminated.
> We do this regardless on the callee size as long as function growth limits
> are met. */
> ! if (flag_inline_functions_called_once)
> ! {
> ! int cold;
> ! if (dump_file)
> ! fprintf (dump_file,
> ! "\nDeciding on functions to be inlined into all callers:\n");
>
> ! /* Inlining one function called once has good chance of preventing
> ! inlining other function into the same callee. Ideally we should
> ! work in priority order, but probably inlining hot functions first
> ! is good cut without the extra pain of maintaining the queue.
> !
> ! ??? this is not really fitting the bill perfectly: inlining function
> ! into callee often leads to better optimization of callee due to
> ! increased context for optimization.
> ! For example if main() function calls a function that outputs help
> ! and then function that does the main optmization, we should inline
> ! the second with priority even if both calls are cold by themselves.
> !
> ! We probably want to implement new predicate replacing our use of
> ! maybe_hot_edge interpreted as maybe_hot_edge || callee is known
> ! to be hot. */
> ! for (cold = 0; cold <= 1; cold ++)
> {
> ! FOR_EACH_DEFINED_FUNCTION (node)
> {
> ! if (want_inline_function_to_all_callers_p (node, cold))
> {
> ! int num_calls = 0;
> ! struct cgraph_edge *e;
> ! for (e = node->callers; e; e = e->next_caller)
> ! num_calls++;
> ! while (node->callers && !node->global.inlined_to)
> ! {
> ! struct cgraph_node *caller = node->callers->caller;
>
> ! if (dump_file)
> ! {
> ! fprintf (dump_file,
> ! "\nInlining %s size %i.\n",
> ! cgraph_node_name (node),
> ! inline_summary (node)->size);
> ! fprintf (dump_file,
> ! " Called once from %s %i insns.\n",
> ! cgraph_node_name (node->callers->caller),
> ! inline_summary (node->callers->caller)->size);
> ! }
>
> ! inline_call (node->callers, true, NULL, NULL, true);
> if (dump_file)
> ! fprintf (dump_file,
> ! " Inlined into %s which now has %i size\n",
> ! cgraph_node_name (caller),
> ! inline_summary (caller)->size);
> ! if (!num_calls--)
> ! {
> ! if (dump_file)
> ! fprintf (dump_file, "New calls found; giving up.\n");
> ! break;
> ! }
> }
> }
> }
> --- 1930,2012 ----
> code size will shrink because the out-of-line copy is eliminated.
> We do this regardless on the callee size as long as function growth limits
> are met. */
> ! if (dump_file)
> ! fprintf (dump_file,
> ! "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
>
> ! /* Inlining one function called once has good chance of preventing
> ! inlining other function into the same callee. Ideally we should
> ! work in priority order, but probably inlining hot functions first
> ! is good cut without the extra pain of maintaining the queue.
> !
> ! ??? this is not really fitting the bill perfectly: inlining function
> ! into callee often leads to better optimization of callee due to
> ! increased context for optimization.
> ! For example if main() function calls a function that outputs help
> ! and then function that does the main optmization, we should inline
> ! the second with priority even if both calls are cold by themselves.
> !
> ! We probably want to implement new predicate replacing our use of
> ! maybe_hot_edge interpreted as maybe_hot_edge || callee is known
> ! to be hot. */
> ! for (cold = 0; cold <= 1; cold ++)
> ! {
> ! FOR_EACH_DEFINED_FUNCTION (node)
> {
> ! struct cgraph_edge *edge, *next;
> ! bool update=false;
> !
> ! for (edge = node->callees; edge; edge = next)
> {
> ! next = edge->next_callee;
> ! if (edge->speculative && !speculation_useful_p (edge, false))
> {
> ! cgraph_resolve_speculation (edge, NULL);
> ! update = true;
> ! }
> ! }
> ! if (update)
> ! {
> ! struct cgraph_node *where = node->global.inlined_to
> ! ? node->global.inlined_to : node;
> ! reset_node_growth_cache (where);
> ! reset_edge_caches (where);
> ! inline_update_overall_summary (where);
> ! }
> ! if (flag_inline_functions_called_once
> ! && want_inline_function_to_all_callers_p (node, cold))
> ! {
> ! int num_calls = 0;
> ! struct cgraph_edge *e;
> ! for (e = node->callers; e; e = e->next_caller)
> ! num_calls++;
> ! while (node->callers && !node->global.inlined_to)
> ! {
> ! struct cgraph_node *caller = node->callers->caller;
>
> ! if (dump_file)
> ! {
> ! fprintf (dump_file,
> ! "\nInlining %s size %i.\n",
> ! cgraph_node_name (node),
> ! inline_summary (node)->size);
> ! fprintf (dump_file,
> ! " Called once from %s %i insns.\n",
> ! cgraph_node_name (node->callers->caller),
> ! inline_summary (node->callers->caller)->size);
> ! }
>
> ! inline_call (node->callers, true, NULL, NULL, true);
> ! if (dump_file)
> ! fprintf (dump_file,
> ! " Inlined into %s which now has %i size\n",
> ! cgraph_node_name (caller),
> ! inline_summary (caller)->size);
> ! if (!num_calls--)
> ! {
> if (dump_file)
> ! fprintf (dump_file, "New calls found; giving up.\n");
> ! break;
> }
> }
> }
> Index: ipa-inline.h
> ===================================================================
> *** ipa-inline.h (revision 201640)
> --- ipa-inline.h (working copy)
> *************** inline_hints do_estimate_edge_hints (str
> *** 226,231 ****
> --- 226,232 ----
> void initialize_growth_caches (void);
> void free_growth_caches (void);
> void compute_inline_parameters (struct cgraph_node *, bool);
> + bool speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining);
>
> /* In ipa-inline-transform.c */
> bool inline_call (struct cgraph_edge *, bool, vec<cgraph_edge_p> *, int *, bool);
> Index: ipa.c
> ===================================================================
> *** ipa.c (revision 201640)
> --- ipa.c (working copy)
> *************** bool
> *** 768,778 ****
> can_replace_by_local_alias (symtab_node node)
> {
> return (symtab_node_availability (node) > AVAIL_OVERWRITABLE
> ! && !DECL_EXTERNAL (node->symbol.decl)
> ! && (!DECL_ONE_ONLY (node->symbol.decl)
> ! || node->symbol.resolution == LDPR_PREVAILING_DEF
> ! || node->symbol.resolution == LDPR_PREVAILING_DEF_IRONLY
> ! || node->symbol.resolution == LDPR_PREVAILING_DEF_IRONLY_EXP));
> }
>
> /* Mark visibility of all functions.
> --- 768,774 ----
> can_replace_by_local_alias (symtab_node node)
> {
> return (symtab_node_availability (node) > AVAIL_OVERWRITABLE
> ! && !symtab_can_be_discarded (node));
> }
>
> /* Mark visibility of all functions.
> *************** ipa_profile (void)
> *** 1407,1459 ****
> bool something_changed = false;
> int i;
> gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
> !
> ! /* Produce speculative calls: we saved common traget from porfiling into
> ! e->common_target_id. Now, at link time, we can look up corresponding
> ! function node and produce speculative call. */
> ! if (in_lto_p)
> ! {
> ! struct cgraph_edge *e;
> ! struct cgraph_node *n,*n2;
> !
> ! init_node_map (false);
> ! FOR_EACH_DEFINED_FUNCTION (n)
> ! {
> ! bool update = false;
> !
> ! for (e = n->indirect_calls; e; e = e->next_callee)
> ! if (e->indirect_info->common_target_id)
> ! {
> ! n2 = find_func_by_profile_id (e->indirect_info->common_target_id);
> ! if (n2)
> ! {
> ! if (dump_file)
> ! {
> ! fprintf (dump_file, "Indirect call -> direct call from"
> ! " other module %s/%i => %s/%i, prob %3.2f\n",
> ! xstrdup (cgraph_node_name (n)), n->symbol.order,
> ! xstrdup (cgraph_node_name (n2)), n2->symbol.order,
> ! e->indirect_info->common_target_probability
> ! / (float)REG_BR_PROB_BASE);
> ! }
> ! cgraph_turn_edge_to_speculative
> ! (e, n2,
> ! apply_scale (e->count,
> ! e->indirect_info->common_target_probability),
> ! apply_scale (e->frequency,
> ! e->indirect_info->common_target_probability));
> ! update = true;
> ! }
> ! else
> ! if (dump_file)
> ! fprintf (dump_file, "Function with profile-id %i not found.\n",
> ! e->indirect_info->common_target_id);
> ! }
> ! if (update)
> ! inline_update_overall_summary (n);
> ! }
> ! del_node_map ();
> ! }
>
> if (dump_file)
> dump_histogram (dump_file, histogram);
> --- 1403,1411 ----
> bool something_changed = false;
> int i;
> gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
> ! struct cgraph_node *n,*n2;
> ! int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
> ! bool node_map_initialized = false;
>
> if (dump_file)
> dump_histogram (dump_file, histogram);
> *************** ipa_profile (void)
> *** 1523,1528 ****
> --- 1475,1580 ----
> histogram.release();
> free_alloc_pool (histogram_pool);
>
> + /* Produce speculative calls: we saved common traget from porfiling into
> + e->common_target_id. Now, at link time, we can look up corresponding
> + function node and produce speculative call. */
> +
> + FOR_EACH_DEFINED_FUNCTION (n)
> + {
> + bool update = false;
> +
> + for (e = n->indirect_calls; e; e = e->next_callee)
> + {
> + if (n->count)
> + nindirect++;
> + if (e->indirect_info->common_target_id)
> + {
> + if (!node_map_initialized)
> + init_node_map (false);
> + node_map_initialized = true;
> + ncommon++;
> + n2 = find_func_by_profile_id (e->indirect_info->common_target_id);
> + if (n2)
> + {
> + if (dump_file)
> + {
> + fprintf (dump_file, "Indirect call -> direct call from"
> + " other module %s/%i => %s/%i, prob %3.2f\n",
> + xstrdup (cgraph_node_name (n)), n->symbol.order,
> + xstrdup (cgraph_node_name (n2)), n2->symbol.order,
> + e->indirect_info->common_target_probability
> + / (float)REG_BR_PROB_BASE);
> + }
> + if (e->indirect_info->common_target_probability
> + < REG_BR_PROB_BASE / 2)
> + {
> + nuseless++;
> + if (dump_file)
> + fprintf (dump_file,
> + "Not speculating: probability is too low.\n");
> + }
> + else if (!cgraph_maybe_hot_edge_p (e))
> + {
> + nuseless++;
> + if (dump_file)
> + fprintf (dump_file,
> + "Not speculating: call is cold.\n");
> + }
> + else if (cgraph_function_body_availability (n2)
> + <= AVAIL_OVERWRITABLE
> + && symtab_can_be_discarded ((symtab_node) n2))
> + {
> + nuseless++;
> + if (dump_file)
> + fprintf (dump_file,
> + "Not speculating: target is overwritable "
> + "and can be discarded.\n");
> + }
> + else
> + {
> + /* Target may be overwritable, but profile says that
> + control flow goes to this particular implementation
> + of N2. Speculate on the local alias to allow inlining.
> + */
> + if (!symtab_can_be_discarded ((symtab_node) n2))
> + n2 = cgraph (symtab_nonoverwritable_alias ((symtab_node)n2));
> + nconverted++;
> + cgraph_turn_edge_to_speculative
> + (e, n2,
> + apply_scale (e->count,
> + e->indirect_info->common_target_probability),
> + apply_scale (e->frequency,
> + e->indirect_info->common_target_probability));
> + update = true;
> + }
> + }
> + else
> + {
> + if (dump_file)
> + fprintf (dump_file, "Function with profile-id %i not found.\n",
> + e->indirect_info->common_target_id);
> + nunknown++;
> + }
> + }
> + }
> + if (update)
> + inline_update_overall_summary (n);
> + }
> + if (node_map_initialized)
> + del_node_map ();
> + if (dump_file && nindirect)
> + fprintf (dump_file,
> + "%i indirect calls trained.\n"
> + "%i (%3.2f%%) have common target.\n"
> + "%i (%3.2f%%) targets was not found.\n"
> + "%i (%3.2f%%) speculations seems useless.\n"
> + "%i (%3.2f%%) speculations produced.\n",
> + nindirect,
> + ncommon, ncommon * 100.0 / nindirect,
> + nunknown, nunknown * 100.0 / nindirect,
> + nuseless, nuseless * 100.0 / nindirect,
> + nconverted, nconverted * 100.0 / nindirect);
> +
> order_pos = ipa_reverse_postorder (order);
> for (i = order_pos - 1; i >= 0; i--)
> {