Index: gcc/params.def =================================================================== --- gcc/params.def (revision 201891) +++ gcc/params.def (working copy) @@ -108,9 +108,26 @@ DEFPARAM (PARAM_INLINE_FUNCTION_OVERHEAD_SIZE, "Size estimate of function overhead (prologue/epilogue) for inlining purposes", 7, 0, 0) +/* Knob to control hot-caller heuristic. 0 means it is turned off, 1 means + it is always applied, and 2 means it is applied only if the footprint is + smaller than PARAM_HOT_CALLER_CODESIZE_THRESHOLD. */ DEFPARAM (PARAM_INLINE_HOT_CALLER, "inline-hot-caller", "Consider cold callsites for inlining if caller contains hot code", + 2, 0, 2) + +/* The maximum code size estimate under which hot caller heuristic is + applied. */ +DEFPARAM(PARAM_HOT_CALLER_CODESIZE_THRESHOLD, + "hot-caller-codesize-threshold", + "Maximum profile-based code size footprint estimate for " + "hot caller heuristic ", + 10000, 0, 0) + +DEFPARAM (PARAM_INLINE_USEFUL_COLD_CALLEE, + "inline-useful-cold-callee", + "Only consider cold callsites for inlining if analysis estimates " + "downstream optimization opportunities", 1, 0, 1) /* Limit of iterations of early inliner. This basically bounds number of Index: gcc/ipa-inline.c =================================================================== --- gcc/ipa-inline.c (revision 201891) +++ gcc/ipa-inline.c (working copy) @@ -528,12 +528,76 @@ big_speedup_p (struct cgraph_edge *e) return false; } +/* Returns true if callee of edge E is considered useful to inline + even if it is cold. A callee is considered useful if there is at + least one argument of pointer type with IPA_JF_KNOWN_TYPE or + IPA_JF_UNKNOWN as the jump function. The reasoniong here is that + it is often beneficial to inline bar into foo in the following + code even if the callsite is cold: + void foo () { + A a; + bar (&a); + ... + } + + This exposes accesses to the 'a' object. The jump function of &a + is either IPA_JF_KNOWN_TYPE or IPA_JF_UNKNOWN (depending on + intervening code). */ + +static inline bool +useful_cold_callee (struct cgraph_edge *e) +{ + gimple call = e->call_stmt; + int n, arg_num = gimple_call_num_args (call); + struct ipa_edge_args *args = IPA_EDGE_REF (e); + + if (ipa_node_params_vector.exists ()) + { + for (n = 0; n < arg_num; n++) + { + tree arg = gimple_call_arg (call, n); + if (POINTER_TYPE_P (TREE_TYPE (arg))) + { + struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, n); + if (jfunc->type == IPA_JF_KNOWN_TYPE + || jfunc->type == IPA_JF_UNKNOWN) + return true; + } + } + } + return false; +} + +/* Returns true if hot caller heuristic should be used. */ + +static inline bool +enable_hot_caller_heuristic (void) +{ + + gcov_working_set_t *ws = NULL; + int size_threshold = PARAM_VALUE (PARAM_HOT_CALLER_CODESIZE_THRESHOLD); + int num_counters = 0; + int param_inline_hot_caller = PARAM_VALUE (PARAM_INLINE_HOT_CALLER); + + if (param_inline_hot_caller == 0) + return false; + else if (param_inline_hot_caller == 1) + return true; + + ws = find_working_set(PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE)); + if (!ws) + return false; + num_counters = ws->num_counters; + return num_counters <= size_threshold; + +} /* Returns true if an edge or its caller are hot enough to be considered for inlining. */ static bool edge_hot_enough_p (struct cgraph_edge *edge) { + static bool use_hot_caller_heuristic = enable_hot_caller_heuristic (); if (cgraph_maybe_hot_edge_p (edge)) return true; @@ -543,9 +607,17 @@ edge_hot_enough_p (struct cgraph_edge *edge) if (flag_auto_profile && edge->callee->count == 0 && edge->callee->max_bb_count > 0) return false; - if (PARAM_VALUE (PARAM_INLINE_HOT_CALLER) - && maybe_hot_count_p (NULL, edge->caller->max_bb_count)) - return true; + if (use_hot_caller_heuristic) + { + struct cgraph_node *where = edge->caller; + if (maybe_hot_count_p (NULL, where->max_bb_count)) + { + if (PARAM_VALUE (PARAM_INLINE_USEFUL_COLD_CALLEE)) + return useful_cold_callee (edge); + else + return true; + } + } return false; }