Inliner heuristics TLC 3/n

Richard Biener richard.guenther@gmail.com
Tue Apr 22 08:24:00 GMT 2014


On Fri, Apr 18, 2014 at 9:45 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
> Hi,
> this patch makes FDO inliner to be more aggressive on inlining function
> calls that are considered hot.  This is based on observation that
> INLINE_INSNS_AUTO is the most common reason for inlining not happening
> (20.5% for Firefox, where 63.2% of calls are not inlinable because body
> is not avaiable) and 66% for GCC.
>
> With this patch INLINE_HINT_known_hot hint is added to edges that was
> determined to be hot by profile and moreover there is at least 50%
> chance that caller will invoke the call during its execution.
>
> With this hint we now ignore both limits - this is because the greedy algorithm
> driven by speed/size_cost metric should work pretty well here, but we may want
> to revisit it (i.e. add INLINE_INSNS_FDO or so).  I am on the aggressive side so
> we collect some data on when the profile is a win or loss.

Just remove those artificial limits and replace them with a factor on the
estimated size/time benefit (cold-vs-hot and inline-declared-vs-not).  At
least don't introduce yet another set of size params.

Richard.

> Bootstrapped/regtested x86_64-linux, comitted.
>
> Honza
>
>         * ipa-inline.h (INLINE_HINT_known_hot): New hint.
>         * ipa-inline-analysis.c (dump_inline_hints): Dump it.
>         (do_estimate_edge_time): Compute it.
>         * ipa-inline.c (want_inline_small_function_p): Bypass
>         INLINE_INSNS_AUTO/SINGLE limits for calls that are known
>         to be hot.
> Index: ipa-inline.h
> ===================================================================
> --- ipa-inline.h        (revision 209489)
> +++ ipa-inline.h        (working copy)
> @@ -68,7 +68,9 @@ enum inline_hints_vals {
>    INLINE_HINT_cross_module = 64,
>    /* If array indexes of loads/stores become known there may be room for
>       further optimization.  */
> -  INLINE_HINT_array_index = 128
> +  INLINE_HINT_array_index = 128,
> +  /* We know that the callee is hot by profile.  */
> +  INLINE_HINT_known_hot = 256
>  };
>  typedef int inline_hints;
>
> Index: ipa-inline-analysis.c
> ===================================================================
> --- ipa-inline-analysis.c       (revision 209489)
> +++ ipa-inline-analysis.c       (working copy)
> @@ -671,6 +671,11 @@ dump_inline_hints (FILE *f, inline_hints
>        hints &= ~INLINE_HINT_array_index;
>        fprintf (f, " array_index");
>      }
> +  if (hints & INLINE_HINT_known_hot)
> +    {
> +      hints &= ~INLINE_HINT_known_hot;
> +      fprintf (f, " known_hot");
> +    }
>    gcc_assert (!hints);
>  }
>
> @@ -3666,6 +3671,17 @@ do_estimate_edge_time (struct cgraph_edg
>                                 &known_aggs);
>    estimate_node_size_and_time (callee, clause, known_vals, known_binfos,
>                                known_aggs, &size, &min_size, &time, &hints, es->param);
> +
> +  /* When we have profile feedback, we can quite safely identify hot
> +     edges and for those we disable size limits.  Don't do that when
> +     probability that caller will call the callee is low however, since it
> +     may hurt optimization of the caller's hot path.  */
> +  if (edge->count && cgraph_maybe_hot_edge_p (edge)
> +      && (edge->count * 2
> +          > (edge->caller->global.inlined_to
> +            ? edge->caller->global.inlined_to->count : edge->caller->count)))
> +    hints |= INLINE_HINT_known_hot;
> +
>    known_vals.release ();
>    known_binfos.release ();
>    known_aggs.release ();
> Index: ipa-inline.c
> ===================================================================
> --- ipa-inline.c        (revision 209522)
> +++ ipa-inline.c        (working copy)
> @@ -578,18 +578,21 @@ want_inline_small_function_p (struct cgr
>       inline cnadidate.  At themoment we allow inline hints to
>       promote non-inline function to inline and we increase
>       MAX_INLINE_INSNS_SINGLE 16fold for inline functions.  */
> -  else if (!DECL_DECLARED_INLINE_P (callee->decl)
> +  else if ((!DECL_DECLARED_INLINE_P (callee->decl)
> +          && (!e->count || !cgraph_maybe_hot_edge_p (e)))
>            && inline_summary (callee)->min_size - inline_edge_summary (e)->call_stmt_size
>               > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
>      {
>        e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
>        want_inline = false;
>      }
> -  else if (DECL_DECLARED_INLINE_P (callee->decl)
> +  else if ((DECL_DECLARED_INLINE_P (callee->decl) || e->count)
>            && inline_summary (callee)->min_size - inline_edge_summary (e)->call_stmt_size
>               > 16 * MAX_INLINE_INSNS_SINGLE)
>      {
> -      e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
> +      e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
> +                         ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
> +                         : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
>        want_inline = false;
>      }
>    else
> @@ -606,6 +609,7 @@ want_inline_small_function_p (struct cgr
>                && growth >= MAX_INLINE_INSNS_SINGLE
>                && ((!big_speedup
>                     && !(hints & (INLINE_HINT_indirect_call
> +                                 | INLINE_HINT_known_hot
>                                   | INLINE_HINT_loop_iterations
>                                   | INLINE_HINT_array_index
>                                   | INLINE_HINT_loop_stride)))
> @@ -630,6 +634,7 @@ want_inline_small_function_p (struct cgr
>          inlining given function is very profitable.  */
>        else if (!DECL_DECLARED_INLINE_P (callee->decl)
>                && !big_speedup
> +              && !(hints & INLINE_HINT_known_hot)
>                && growth >= ((hints & (INLINE_HINT_indirect_call
>                                        | INLINE_HINT_loop_iterations
>                                        | INLINE_HINT_array_index



More information about the Gcc-patches mailing list