Inliner heuristics TLC 3/n
Richard Biener
richard.guenther@gmail.com
Tue Apr 22 08:24:00 GMT 2014
On Fri, Apr 18, 2014 at 9:45 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
> Hi,
> this patch makes FDO inliner to be more aggressive on inlining function
> calls that are considered hot. This is based on observation that
> INLINE_INSNS_AUTO is the most common reason for inlining not happening
> (20.5% for Firefox, where 63.2% of calls are not inlinable because body
> is not avaiable) and 66% for GCC.
>
> With this patch INLINE_HINT_known_hot hint is added to edges that was
> determined to be hot by profile and moreover there is at least 50%
> chance that caller will invoke the call during its execution.
>
> With this hint we now ignore both limits - this is because the greedy algorithm
> driven by speed/size_cost metric should work pretty well here, but we may want
> to revisit it (i.e. add INLINE_INSNS_FDO or so). I am on the aggressive side so
> we collect some data on when the profile is a win or loss.
Just remove those artificial limits and replace them with a factor on the
estimated size/time benefit (cold-vs-hot and inline-declared-vs-not). At
least don't introduce yet another set of size params.
Richard.
> Bootstrapped/regtested x86_64-linux, comitted.
>
> Honza
>
> * ipa-inline.h (INLINE_HINT_known_hot): New hint.
> * ipa-inline-analysis.c (dump_inline_hints): Dump it.
> (do_estimate_edge_time): Compute it.
> * ipa-inline.c (want_inline_small_function_p): Bypass
> INLINE_INSNS_AUTO/SINGLE limits for calls that are known
> to be hot.
> Index: ipa-inline.h
> ===================================================================
> --- ipa-inline.h (revision 209489)
> +++ ipa-inline.h (working copy)
> @@ -68,7 +68,9 @@ enum inline_hints_vals {
> INLINE_HINT_cross_module = 64,
> /* If array indexes of loads/stores become known there may be room for
> further optimization. */
> - INLINE_HINT_array_index = 128
> + INLINE_HINT_array_index = 128,
> + /* We know that the callee is hot by profile. */
> + INLINE_HINT_known_hot = 256
> };
> typedef int inline_hints;
>
> Index: ipa-inline-analysis.c
> ===================================================================
> --- ipa-inline-analysis.c (revision 209489)
> +++ ipa-inline-analysis.c (working copy)
> @@ -671,6 +671,11 @@ dump_inline_hints (FILE *f, inline_hints
> hints &= ~INLINE_HINT_array_index;
> fprintf (f, " array_index");
> }
> + if (hints & INLINE_HINT_known_hot)
> + {
> + hints &= ~INLINE_HINT_known_hot;
> + fprintf (f, " known_hot");
> + }
> gcc_assert (!hints);
> }
>
> @@ -3666,6 +3671,17 @@ do_estimate_edge_time (struct cgraph_edg
> &known_aggs);
> estimate_node_size_and_time (callee, clause, known_vals, known_binfos,
> known_aggs, &size, &min_size, &time, &hints, es->param);
> +
> + /* When we have profile feedback, we can quite safely identify hot
> + edges and for those we disable size limits. Don't do that when
> + probability that caller will call the callee is low however, since it
> + may hurt optimization of the caller's hot path. */
> + if (edge->count && cgraph_maybe_hot_edge_p (edge)
> + && (edge->count * 2
> + > (edge->caller->global.inlined_to
> + ? edge->caller->global.inlined_to->count : edge->caller->count)))
> + hints |= INLINE_HINT_known_hot;
> +
> known_vals.release ();
> known_binfos.release ();
> known_aggs.release ();
> Index: ipa-inline.c
> ===================================================================
> --- ipa-inline.c (revision 209522)
> +++ ipa-inline.c (working copy)
> @@ -578,18 +578,21 @@ want_inline_small_function_p (struct cgr
> inline cnadidate. At themoment we allow inline hints to
> promote non-inline function to inline and we increase
> MAX_INLINE_INSNS_SINGLE 16fold for inline functions. */
> - else if (!DECL_DECLARED_INLINE_P (callee->decl)
> + else if ((!DECL_DECLARED_INLINE_P (callee->decl)
> + && (!e->count || !cgraph_maybe_hot_edge_p (e)))
> && inline_summary (callee)->min_size - inline_edge_summary (e)->call_stmt_size
> > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
> {
> e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
> want_inline = false;
> }
> - else if (DECL_DECLARED_INLINE_P (callee->decl)
> + else if ((DECL_DECLARED_INLINE_P (callee->decl) || e->count)
> && inline_summary (callee)->min_size - inline_edge_summary (e)->call_stmt_size
> > 16 * MAX_INLINE_INSNS_SINGLE)
> {
> - e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
> + e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
> + ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
> + : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
> want_inline = false;
> }
> else
> @@ -606,6 +609,7 @@ want_inline_small_function_p (struct cgr
> && growth >= MAX_INLINE_INSNS_SINGLE
> && ((!big_speedup
> && !(hints & (INLINE_HINT_indirect_call
> + | INLINE_HINT_known_hot
> | INLINE_HINT_loop_iterations
> | INLINE_HINT_array_index
> | INLINE_HINT_loop_stride)))
> @@ -630,6 +634,7 @@ want_inline_small_function_p (struct cgr
> inlining given function is very profitable. */
> else if (!DECL_DECLARED_INLINE_P (callee->decl)
> && !big_speedup
> + && !(hints & INLINE_HINT_known_hot)
> && growth >= ((hints & (INLINE_HINT_indirect_call
> | INLINE_HINT_loop_iterations
> | INLINE_HINT_array_index
More information about the Gcc-patches
mailing list