This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: Cost model for indirect call speculation

From: Xinliang David Li <davidxl at google dot com>
To: Jan Hubicka <hubicka at ucw dot cz>
Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>, Martin LiÅka <marxin dot liska at gmail dot com>
Date: Sun, 11 Aug 2013 21:52:38 -0700
Subject: Re: Cost model for indirect call speculation
References: <20130811231123 dot GB9197 at kam dot mff dot cuni dot cz>
I like the approach in general -- in the past, indirect call promotion
and function inlining heuristics are disconnected -- which can lead to
either missing promotions or useless ones.  This approach solves the
problem.

On Sun, Aug 11, 2013 at 4:11 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
> Hi,
> this patch adds simple cost model into indirect call speculation.  First we do not
> turn calls into speculative calls when it seems bad idea (i.e. call is cold)
> and during inlining we remove speculations that do not seem benefical.
> On modern chip speculative call sequence without inlining is not really going
> to fare better than indirect call because of indirect call predictor.
> So we keep them only if the call was inlined or if the callee is turned to clone
> or CONST/PURE flags are propagated to them.
>
> We may want to add target hook specifying if target support indirect call predictor,
> but I am not sure how important this is in practice.

It might be also useful to introduce a parameter to control the
behavior so that people can do better experiment. The capability of
indirect branch predictions varies a lot depending on the target.

I only noticed a couple of indentation problems.

thanks,

David

>
> To enable cost model everywhere, the old unit-local transform code now does nothing
> but does sanity checking and debug output dumping.

>   /* Speculative call consist of three components:
> *************** cgraph_speculative_call_info (struct cgr
> *** 1107,1113 ****
>         if (e2->call_stmt)
>         {

indentation?

>           e = cgraph_edge (e->caller, e2->call_stmt);
> !         gcc_assert (!e->speculative && !e->indirect_unknown_callee);
>         }
>         else
>         for (e = e->caller->callees;
> --- 1110,1116 ----
>         if (e2->call_stmt)
>         {
>           e = cgraph_edge (e->caller, e2->call_stmt);
> !         gcc_assert (e->speculative && !e->indirect_unknown_callee);
>         }
>         else
>         for (e = e->caller->callees;
> *************** cgraph_redirect_edge_callee (struct cgra
> *** 1147,1153 ****
>      Remove the speculative call sequence and return edge representing the call.
>      It is up to caller to redirect the call as appropriate. */
>
> ! static struct cgraph_edge *
>   cgraph_resolve_speculation (struct cgraph_edge *edge, tree callee_decl)
>   {
>     struct cgraph_edge *e2;
> --- 1150,1156 ----
>      Remove the speculative call sequence and return edge representing the call.
>      It is up to caller to redirect the call as appropriate. */
>
> ! struct cgraph_edge *
>   cgraph_resolve_speculation (struct cgraph_edge *edge, tree callee_decl)
>   {
>     struct cgraph_edge *e2;
> *************** cgraph_resolve_speculation (struct cgrap
> *** 1159,1170 ****
>       {
>         if (dump_file)
>         {
> !         fprintf (dump_file, "Speculative indirect call %s/%i => %s/%i has "
> !                  "turned out to have contradicitng known target ",
> !                  xstrdup (cgraph_node_name (edge->caller)), edge->caller->symbol.order,
> !                  xstrdup (cgraph_node_name (e2->callee)), e2->callee->symbol.order);
> !         print_generic_expr (dump_file, callee_decl, 0);
> !           fprintf (dump_file, "\n");
>         }
>       }
>     else
> --- 1162,1182 ----
>       {
>         if (dump_file)
>         {
> !         if (callee_decl)
> !           {
> !             fprintf (dump_file, "Speculative indirect call %s/%i => %s/%i has "
> !                      "turned out to have contradicitng known target ",
> !                      xstrdup (cgraph_node_name (edge->caller)), edge->caller->symbol.order,
> !                      xstrdup (cgraph_node_name (e2->callee)), e2->callee->symbol.order);
> !             print_generic_expr (dump_file, callee_decl, 0);
> !             fprintf (dump_file, "\n");
> !           }
> !         else
> !           {
> !             fprintf (dump_file, "Removing speculative call %s/%i => %s/%i\n",
> !                      xstrdup (cgraph_node_name (edge->caller)), edge->caller->symbol.order,
> !                      xstrdup (cgraph_node_name (e2->callee)), e2->callee->symbol.order);
> !           }
>         }
>       }
>     else
> *************** cgraph_redirect_edge_call_stmt_to_callee
> *** 1264,1275 ****
>         cgraph_speculative_call_info (e, e, e2, ref);
>         if (gimple_call_fndecl (e->call_stmt))
>         e = cgraph_resolve_speculation (e, gimple_call_fndecl (e->call_stmt));
> !       else
>         {
>           if (dump_file)
> !           fprintf (dump_file, "Expanding speculative call of %s/%i -> %s/%i\n",
>                      xstrdup (cgraph_node_name (e->caller)), e->caller->symbol.order,
>                      xstrdup (cgraph_node_name (e->callee)), e->callee->symbol.order);
>           gcc_assert (e2->speculative);
>           push_cfun (DECL_STRUCT_FUNCTION (e->caller->symbol.decl));
>           new_stmt = gimple_ic (e->call_stmt, cgraph (ref->referred),
> --- 1276,1299 ----
>         cgraph_speculative_call_info (e, e, e2, ref);
>         if (gimple_call_fndecl (e->call_stmt))
>         e = cgraph_resolve_speculation (e, gimple_call_fndecl (e->call_stmt));
> !       if (!gimple_check_call_matching_types (e->call_stmt, e->callee->symbol.decl,
> !                                            true))
>         {
> +         e = cgraph_resolve_speculation (e, NULL);
>           if (dump_file)
> !           fprintf (dump_file, "Not expanding speculative call of %s/%i -> %s/%i\n"
> !                    "Type mismatch.\n",
>                      xstrdup (cgraph_node_name (e->caller)), e->caller->symbol.order,
>                      xstrdup (cgraph_node_name (e->callee)), e->callee->symbol.order);
> +       }
> +       else
> +       {
> +         if (dump_file)
> +           fprintf (dump_file, "Expanding speculative call of %s/%i -> %s/%i count:"
> +                    HOST_WIDEST_INT_PRINT_DEC"\n",
> +                    xstrdup (cgraph_node_name (e->caller)), e->caller->symbol.order,
> +                    xstrdup (cgraph_node_name (e->callee)), e->callee->symbol.order,
> +                    (HOST_WIDEST_INT)e->count);
>           gcc_assert (e2->speculative);
>           push_cfun (DECL_STRUCT_FUNCTION (e->caller->symbol.decl));
>           new_stmt = gimple_ic (e->call_stmt, cgraph (ref->referred),
> Index: cgraph.h
> ===================================================================
> *** cgraph.h    (revision 201640)
> --- cgraph.h    (working copy)
> *************** bool cgraph_propagate_frequency (struct
> *** 726,732 ****
>   struct cgraph_node * cgraph_function_node (struct cgraph_node *,
>                                            enum availability *avail = NULL);
>   bool cgraph_get_body (struct cgraph_node *node);
> ! void
>   cgraph_turn_edge_to_speculative (struct cgraph_edge *,
>                                  struct cgraph_node *,
>                                  gcov_type, int);
> --- 726,732 ----
>   struct cgraph_node * cgraph_function_node (struct cgraph_node *,
>                                            enum availability *avail = NULL);
>   bool cgraph_get_body (struct cgraph_node *node);
> ! struct cgraph_edge *
>   cgraph_turn_edge_to_speculative (struct cgraph_edge *,
>                                  struct cgraph_node *,
>                                  gcov_type, int);
> *************** struct cgraph_node *cgraph_function_vers
> *** 783,788 ****
> --- 783,789 ----
>                                                 basic_block, const char *);
>   void tree_function_versioning (tree, tree, vec<ipa_replace_map_p, va_gc> *,
>                                bool, bitmap, bool, bitmap, basic_block);
> + struct cgraph_edge *cgraph_resolve_speculation (struct cgraph_edge *, tree);
>
>   /* In cgraphbuild.c  */
>   unsigned int rebuild_cgraph_edges (void);
> *************** symtab_real_symbol_p (symtab_node node)
> *** 1398,1401 ****
> --- 1399,1414 ----
>       return false;
>     return true;
>   }
> +
> + /* Return true if NODE can be discarded by linker from the binary.  */
> +
> + static inline bool
> + symtab_can_be_discarded (symtab_node node)
> + {
> +   return (DECL_EXTERNAL (node->symbol.decl)
> +         || (DECL_ONE_ONLY (node->symbol.decl)
> +             && node->symbol.resolution != LDPR_PREVAILING_DEF
> +             && node->symbol.resolution != LDPR_PREVAILING_DEF_IRONLY
> +             && node->symbol.resolution != LDPR_PREVAILING_DEF_IRONLY_EXP));
> + }
>   #endif  /* GCC_CGRAPH_H  */
> Index: value-prof.c
> ===================================================================
> *** value-prof.c        (revision 201640)
> --- value-prof.c        (working copy)
> *************** gimple_ic_transform (gimple_stmt_iterato
> *** 1431,1438 ****
>     gimple stmt = gsi_stmt (*gsi);
>     histogram_value histogram;
>     gcov_type val, count, all, bb_all;
> -   gcov_type prob;
> -   gimple modify;
>     struct cgraph_node *direct_call;
>
>     if (gimple_code (stmt) != GIMPLE_CALL)
> --- 1431,1436 ----
> *************** gimple_ic_transform (gimple_stmt_iterato
> *** 1452,1463 ****
>     count = histogram->hvalue.counters [1];
>     all = histogram->hvalue.counters [2];
>
> -   if (4 * count <= 3 * all)
> -     {
> -       gimple_remove_histogram_value (cfun, stmt, histogram);
> -       return false;
> -     }
> -
>     bb_all = gimple_bb (stmt)->count;
>     /* The order of CHECK_COUNTER calls is important -
>        since check_counter can correct the third parameter
> --- 1450,1455 ----
> *************** gimple_ic_transform (gimple_stmt_iterato
> *** 1469,1478 ****
>         return false;
>       }
>
> !   if (all > 0)
> !     prob = GCOV_COMPUTE_SCALE (count, all);
> !   else
> !     prob = 0;
>     direct_call = find_func_by_profile_id ((int)val);
>
>     if (direct_call == NULL)
> --- 1461,1469 ----
>         return false;
>       }
>
> !   if (4 * count <= 3 * all)
> !     return false;
> !
>     direct_call = find_func_by_profile_id ((int)val);
>
>     if (direct_call == NULL)
> *************** gimple_ic_transform (gimple_stmt_iterato
> *** 1488,1499 ****
>         }
>         return false;
>       }
> -   gimple_remove_histogram_value (cfun, stmt, histogram);
>
>     if (!check_ic_target (stmt, direct_call))
> !     return false;
> !
> !   modify = gimple_ic (stmt, direct_call, prob, count, all);
>
>     if (dump_file)
>       {
> --- 1479,1499 ----
>         }
>         return false;
>       }
>
>     if (!check_ic_target (stmt, direct_call))
> !     {
> !       if (dump_file)
> !       {
> !         fprintf (dump_file, "Indirect call -> direct call ");
> !         print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
> !         fprintf (dump_file, "=> ");
> !         print_generic_expr (dump_file, direct_call->symbol.decl, TDF_SLIM);
> !         fprintf (dump_file, " transformation skipped because of type mismatch");
> !         print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> !       }
> !       gimple_remove_histogram_value (cfun, stmt, histogram);
> !       return false;
> !     }
>
>     if (dump_file)
>       {
> *************** gimple_ic_transform (gimple_stmt_iterato
> *** 1501,1510 ****
>         print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
>         fprintf (dump_file, "=> ");
>         print_generic_expr (dump_file, direct_call->symbol.decl, TDF_SLIM);
> !       fprintf (dump_file, " transformation on insn ");
>         print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> -       fprintf (dump_file, " to ");
> -       print_gimple_stmt (dump_file, modify, 0, TDF_SLIM);
>         fprintf (dump_file, "hist->count "HOST_WIDEST_INT_PRINT_DEC
>                " hist->all "HOST_WIDEST_INT_PRINT_DEC"\n", count, all);
>       }
> --- 1501,1508 ----
>         print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
>         fprintf (dump_file, "=> ");
>         print_generic_expr (dump_file, direct_call->symbol.decl, TDF_SLIM);
> !       fprintf (dump_file, " transformation on insn postponned to ipa-profile");
>         print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
>         fprintf (dump_file, "hist->count "HOST_WIDEST_INT_PRINT_DEC
>                " hist->all "HOST_WIDEST_INT_PRINT_DEC"\n", count, all);
>       }
> Index: ipa-inline-transform.c
> ===================================================================
> *** ipa-inline-transform.c      (revision 201640)
> --- ipa-inline-transform.c      (working copy)
> *************** along with GCC; see the file COPYING3.
> *** 46,51 ****
> --- 46,52 ----
>
>   int ncalls_inlined;
>   int nfunctions_inlined;
> + bool speculation_removed;
>
>   /* Scale frequency of NODE edges by FREQ_SCALE.  */
>
> *************** clone_inlined_nodes (struct cgraph_edge
> *** 134,139 ****
> --- 135,141 ----
>                      bool update_original, int *overall_size)
>   {
>     struct cgraph_node *inlining_into;
> +   struct cgraph_edge *next;
>
>     if (e->caller->global.inlined_to)
>       inlining_into = e->caller->global.inlined_to;
> *************** clone_inlined_nodes (struct cgraph_edge
> *** 186,194 ****
>     e->callee->global.inlined_to = inlining_into;
>
>     /* Recursively clone all bodies.  */
> !   for (e = e->callee->callees; e; e = e->next_callee)
> !     if (!e->inline_failed)
> !       clone_inlined_nodes (e, duplicate, update_original, overall_size);
>   }
>
>
> --- 188,204 ----
>     e->callee->global.inlined_to = inlining_into;
>
>     /* Recursively clone all bodies.  */
> !   for (e = e->callee->callees; e; e = next)
> !     {
> !       next = e->next_callee;
> !       if (!e->inline_failed)
> !         clone_inlined_nodes (e, duplicate, update_original, overall_size);
> !       if (e->speculative && !speculation_useful_p (e, true))
> !       {
> !         cgraph_resolve_speculation (e, NULL);
> !         speculation_removed = true;
> !       }
> !     }
>   }
>
>
> *************** inline_call (struct cgraph_edge *e, bool
> *** 218,223 ****
> --- 228,234 ----
>     bool predicated = inline_edge_summary (e)->predicate != NULL;
>   #endif
>
> +   speculation_removed = false;
>     /* Don't inline inlined edges.  */
>     gcc_assert (e->inline_failed);
>     /* Don't even think of inlining inline clone.  */
> *************** inline_call (struct cgraph_edge *e, bool
> *** 267,272 ****
> --- 278,284 ----
>        error due to INLINE_SIZE_SCALE roudoff errors.  */
>     gcc_assert (!update_overall_summary || !overall_size || new_edges_found
>               || abs (estimated_growth - (new_size - old_size)) <= 1
> +             || speculation_removed
>               /* FIXME: a hack.  Edges with false predicate are accounted
>                  wrong, we should remove them from callgraph.  */
>               || predicated);
> Index: ipa-inline.c
> ===================================================================
> *** ipa-inline.c        (revision 201640)
> --- ipa-inline.c        (working copy)
> *************** report_inline_failed_reason (struct cgra
> *** 229,238 ****
>      We check whether inlining is possible at all and whether
>      caller growth limits allow doing so.
>
> !    if REPORT is true, output reason to the dump file.  */
>
>   static bool
> ! can_inline_edge_p (struct cgraph_edge *e, bool report)
>   {
>     bool inlinable = true;
>     enum availability avail;
> --- 229,241 ----
>      We check whether inlining is possible at all and whether
>      caller growth limits allow doing so.
>
> !    if REPORT is true, output reason to the dump file.
> !
> !    if DISREGARD_LIMITES is true, ignore size limits.*/
>
>   static bool
> ! can_inline_edge_p (struct cgraph_edge *e, bool report,
> !                  bool disregard_limits = false)
>   {
>     bool inlinable = true;
>     enum availability avail;
> *************** can_inline_edge_p (struct cgraph_edge *e
> *** 309,314 ****
> --- 312,318 ----
>       }
>     /* Check if caller growth allows the inlining.  */
>     else if (!DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl)
> +          && !disregard_limits
>            && !lookup_attribute ("flatten",
>                                  DECL_ATTRIBUTES
>                                    (e->caller->global.inlined_to
> *************** heap_edge_removal_hook (struct cgraph_ed
> *** 1400,1405 ****
> --- 1404,1482 ----
>       }
>   }
>
> + /* Return true if speculation of edge E seems useful.
> +    If ANTICIPATE_INLINING is true, be conservative and hope that E
> +    may get inlined.  */
> +
> + bool
> + speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
> + {
> +   enum availability avail;
> +   struct cgraph_node *target = cgraph_function_or_thunk_node (e->callee, &avail);
> +   struct cgraph_edge *direct, *indirect;
> +   struct ipa_ref *ref;
> +
> +   gcc_assert (e->speculative && !e->indirect_unknown_callee);
> +
> +   if (!cgraph_maybe_hot_edge_p (e))
> +     return false;
> +
> +   /* See if IP optimizations found something potentially useful about the
> +      function.  For now we look only for CONST/PURE flags.  Almost everything
> +      else we propagate is useless.  */
> +   if (avail >= AVAIL_AVAILABLE)
> +     {
> +       int ecf_flags = flags_from_decl_or_type (target->symbol.decl);
> +       if (ecf_flags & ECF_CONST)
> +         {
> +           cgraph_speculative_call_info (e, direct, indirect, ref);
> +         if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
> +           return true;
> +         }
> +       else if (ecf_flags & ECF_PURE)
> +         {
> +           cgraph_speculative_call_info (e, direct, indirect, ref);
> +         if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
> +           return true;
> +         }
> +     }
> +   /* If we did not managed to inline the function nor redirect
> +      to an ipa-cp clone (that are seen by having local flag set),
> +      it is probably pointless to inline it unless hardware is missing
> +      indirect call predictor.  */
> +   if (!anticipate_inlining && e->inline_failed && !target->local.local)
> +     return false;
> +   /* For overwritable targets there is not much to do.  */
> +   if (e->inline_failed && !can_inline_edge_p (e, false, true))
> +     return false;
> +   /* OK, speculation seems interesting.  */
> +   return true;
> + }
> +
> + /* We know that EDGE is not going to be inlined.
> +    See if we can remove speculation.  */
> +
> + static void
> + resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
> + {
> +   if (edge->speculative && !speculation_useful_p (edge, false))
> +     {
> +       struct cgraph_node *node = edge->caller;
> +       struct cgraph_node *where = node->global.inlined_to
> +                                 ? node->global.inlined_to : node;
> +       bitmap updated_nodes = BITMAP_ALLOC (NULL);
> +
> +       cgraph_resolve_speculation (edge, NULL);
> +       reset_node_growth_cache (where);
> +       reset_edge_caches (where);
> +       inline_update_overall_summary (where);
> +       update_caller_keys (edge_heap, where,
> +                         updated_nodes, NULL);
> +       reset_node_growth_cache (where);
> +       BITMAP_FREE (updated_nodes);
> +     }
> + }
> +
>   /* We use greedy algorithm for inlining of small functions:
>      All inline candidates are put into prioritized heap ordered in
>      increasing badness.
> *************** inline_small_functions (void)
> *** 1478,1491 ****
>     /* Populate the heeap with all edges we might inline.  */
>
>     FOR_EACH_DEFINED_FUNCTION (node)
> !     if (!node->global.inlined_to)
> !       {
> !       if (dump_file)
> !         fprintf (dump_file, "Enqueueing calls of %s/%i.\n",
> !                  cgraph_node_name (node), node->symbol.order);
>
> !       for (edge = node->callers; edge; edge = edge->next_caller)
>           if (edge->inline_failed
>               && can_inline_edge_p (edge, true)
>               && want_inline_small_function_p (edge, true)
>               && edge->inline_failed)
> --- 1555,1573 ----
>     /* Populate the heeap with all edges we might inline.  */
>
>     FOR_EACH_DEFINED_FUNCTION (node)
> !     {
> !       bool update = false;
> !       struct cgraph_edge *next;
>
> !       if (dump_file)
> !       fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
> !                cgraph_node_name (node), node->symbol.order);
> !
> !       for (edge = node->callees; edge; edge = next)
> !       {
> !         next = edge->next_callee;
>           if (edge->inline_failed
> +             && !edge->aux
>               && can_inline_edge_p (edge, true)
>               && want_inline_small_function_p (edge, true)
>               && edge->inline_failed)
> *************** inline_small_functions (void)
> *** 1493,1499 ****
>               gcc_assert (!edge->aux);
>               update_edge_key (edge_heap, edge);
>             }
> !       }
>
>     gcc_assert (in_lto_p
>               || !max_count
> --- 1575,1598 ----
>               gcc_assert (!edge->aux);
>               update_edge_key (edge_heap, edge);
>             }
> !         if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
> !           {
> !             cgraph_resolve_speculation (edge, NULL);
> !             update = true;
> !           }
> !       }
> !       if (update)
> !       {
> !         struct cgraph_node *where = node->global.inlined_to
> !                                     ? node->global.inlined_to : node;
> !         inline_update_overall_summary (where);
> !           reset_node_growth_cache (where);
> !         reset_edge_caches (where);
> !           update_caller_keys (edge_heap, where,
> !                             updated_nodes, NULL);
> !           bitmap_clear (updated_nodes);
> !       }
> !     }
>
>     gcc_assert (in_lto_p
>               || !max_count
> *************** inline_small_functions (void)
> *** 1534,1540 ****
>         }
>
>         if (!can_inline_edge_p (edge, true))
> !       continue;
>
>         callee = cgraph_function_or_thunk_node (edge->callee, NULL);
>         growth = estimate_edge_growth (edge);
> --- 1633,1642 ----
>         }
>
>         if (!can_inline_edge_p (edge, true))
> !       {
> !         resolve_noninline_speculation (edge_heap, edge);
> !         continue;
> !       }
>
>         callee = cgraph_function_or_thunk_node (edge->callee, NULL);
>         growth = estimate_edge_growth (edge);
> *************** inline_small_functions (void)
> *** 1568,1578 ****
>         {
>           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
>           report_inline_failed_reason (edge);
>           continue;
>         }
>
>         if (!want_inline_small_function_p (edge, true))
> !       continue;
>
>         /* Heuristics for inlining small functions works poorly for
>          recursive calls where we do efect similar to loop unrolling.
> --- 1670,1684 ----
>         {
>           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
>           report_inline_failed_reason (edge);
> +         resolve_noninline_speculation (edge_heap, edge);
>           continue;
>         }
>
>         if (!want_inline_small_function_p (edge, true))
> !       {
> !         resolve_noninline_speculation (edge_heap, edge);
> !         continue;
> !       }
>
>         /* Heuristics for inlining small functions works poorly for
>          recursive calls where we do efect similar to loop unrolling.
> *************** inline_small_functions (void)
> *** 1588,1593 ****
> --- 1694,1700 ----
>                                    ? &new_indirect_edges : NULL))
>             {
>               edge->inline_failed = CIF_RECURSIVE_INLINING;
> +             resolve_noninline_speculation (edge_heap, edge);
>               continue;
>             }
>           reset_edge_caches (where);
> *************** inline_small_functions (void)
> *** 1596,1601 ****
> --- 1703,1709 ----
>           if (flag_indirect_inlining)
>             add_new_edges_to_heap (edge_heap, new_indirect_edges);
>             update_callee_keys (edge_heap, where, updated_nodes);
> +         bitmap_clear (updated_nodes);
>         }
>         else
>         {
> *************** inline_small_functions (void)
> *** 1621,1626 ****
> --- 1729,1735 ----
>               edge->inline_failed
>                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->symbol.decl)
>                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
> +             resolve_noninline_speculation (edge_heap, edge);
>               continue;
>             }
>           else if (depth && dump_file)
> *************** ipa_inline (void)
> *** 1773,1778 ****
> --- 1882,1888 ----
>     struct cgraph_node **order =
>       XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
>     int i;
> +   int cold;
>
>     if (in_lto_p && optimize)
>       ipa_update_after_lto_read ();
> *************** ipa_inline (void)
> *** 1820,1885 ****
>        code size will shrink because the out-of-line copy is eliminated.
>        We do this regardless on the callee size as long as function growth limits
>        are met.  */
> !   if (flag_inline_functions_called_once)
> !     {
> !       int cold;
> !       if (dump_file)
> !       fprintf (dump_file,
> !                "\nDeciding on functions to be inlined into all callers:\n");
>
> !       /* Inlining one function called once has good chance of preventing
> !        inlining other function into the same callee.  Ideally we should
> !        work in priority order, but probably inlining hot functions first
> !        is good cut without the extra pain of maintaining the queue.
> !
> !        ??? this is not really fitting the bill perfectly: inlining function
> !        into callee often leads to better optimization of callee due to
> !        increased context for optimization.
> !        For example if main() function calls a function that outputs help
> !        and then function that does the main optmization, we should inline
> !        the second with priority even if both calls are cold by themselves.
> !
> !        We probably want to implement new predicate replacing our use of
> !        maybe_hot_edge interpreted as maybe_hot_edge || callee is known
> !        to be hot.  */
> !       for (cold = 0; cold <= 1; cold ++)
>         {
> !         FOR_EACH_DEFINED_FUNCTION (node)
>             {
> !             if (want_inline_function_to_all_callers_p (node, cold))
>                 {
> !                 int num_calls = 0;
> !                 struct cgraph_edge *e;
> !                 for (e = node->callers; e; e = e->next_caller)
> !                   num_calls++;
> !                 while (node->callers && !node->global.inlined_to)
> !                   {
> !                     struct cgraph_node *caller = node->callers->caller;
>
> !                     if (dump_file)
> !                       {
> !                         fprintf (dump_file,
> !                                  "\nInlining %s size %i.\n",
> !                                  cgraph_node_name (node),
> !                                  inline_summary (node)->size);
> !                         fprintf (dump_file,
> !                                  " Called once from %s %i insns.\n",
> !                                  cgraph_node_name (node->callers->caller),
> !                                  inline_summary (node->callers->caller)->size);
> !                       }
>
> !                     inline_call (node->callers, true, NULL, NULL, true);
>                       if (dump_file)
> !                       fprintf (dump_file,
> !                                " Inlined into %s which now has %i size\n",
> !                                cgraph_node_name (caller),
> !                                inline_summary (caller)->size);
> !                     if (!num_calls--)
> !                       {
> !                         if (dump_file)
> !                           fprintf (dump_file, "New calls found; giving up.\n");
> !                         break;
> !                       }
>                     }
>                 }
>             }
> --- 1930,2012 ----
>        code size will shrink because the out-of-line copy is eliminated.
>        We do this regardless on the callee size as long as function growth limits
>        are met.  */
> !   if (dump_file)
> !     fprintf (dump_file,
> !            "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
>
> !   /* Inlining one function called once has good chance of preventing
> !      inlining other function into the same callee.  Ideally we should
> !      work in priority order, but probably inlining hot functions first
> !      is good cut without the extra pain of maintaining the queue.
> !
> !      ??? this is not really fitting the bill perfectly: inlining function
> !      into callee often leads to better optimization of callee due to
> !      increased context for optimization.
> !      For example if main() function calls a function that outputs help
> !      and then function that does the main optmization, we should inline
> !      the second with priority even if both calls are cold by themselves.
> !
> !      We probably want to implement new predicate replacing our use of
> !      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
> !      to be hot.  */
> !   for (cold = 0; cold <= 1; cold ++)
> !     {
> !       FOR_EACH_DEFINED_FUNCTION (node)
>         {
> !         struct cgraph_edge *edge, *next;
> !         bool update=false;
> !
> !         for (edge = node->callees; edge; edge = next)
>             {
> !             next = edge->next_callee;
> !             if (edge->speculative && !speculation_useful_p (edge, false))
>                 {
> !                 cgraph_resolve_speculation (edge, NULL);
> !                 update = true;
> !               }
> !           }
> !         if (update)
> !           {
> !             struct cgraph_node *where = node->global.inlined_to
> !                                         ? node->global.inlined_to : node;
> !               reset_node_growth_cache (where);
> !             reset_edge_caches (where);
> !             inline_update_overall_summary (where);
> !           }
> !         if (flag_inline_functions_called_once
> !             && want_inline_function_to_all_callers_p (node, cold))
> !           {
> !             int num_calls = 0;
> !             struct cgraph_edge *e;
> !             for (e = node->callers; e; e = e->next_caller)
> !               num_calls++;
> !             while (node->callers && !node->global.inlined_to)
> !               {
> !                 struct cgraph_node *caller = node->callers->caller;
>
> !                 if (dump_file)
> !                   {
> !                     fprintf (dump_file,
> !                              "\nInlining %s size %i.\n",
> !                              cgraph_node_name (node),
> !                              inline_summary (node)->size);
> !                     fprintf (dump_file,
> !                              " Called once from %s %i insns.\n",
> !                              cgraph_node_name (node->callers->caller),
> !                              inline_summary (node->callers->caller)->size);
> !                   }
>
> !                 inline_call (node->callers, true, NULL, NULL, true);
> !                 if (dump_file)
> !                   fprintf (dump_file,
> !                            " Inlined into %s which now has %i size\n",
> !                            cgraph_node_name (caller),
> !                            inline_summary (caller)->size);
> !                 if (!num_calls--)
> !                   {
>                       if (dump_file)
> !                       fprintf (dump_file, "New calls found; giving up.\n");
> !                     break;
>                     }
>                 }
>             }
> Index: ipa-inline.h
> ===================================================================
> *** ipa-inline.h        (revision 201640)
> --- ipa-inline.h        (working copy)
> *************** inline_hints do_estimate_edge_hints (str
> *** 226,231 ****
> --- 226,232 ----
>   void initialize_growth_caches (void);
>   void free_growth_caches (void);
>   void compute_inline_parameters (struct cgraph_node *, bool);
> + bool speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining);
>
>   /* In ipa-inline-transform.c  */
>   bool inline_call (struct cgraph_edge *, bool, vec<cgraph_edge_p> *, int *, bool);
> Index: ipa.c
> ===================================================================
> *** ipa.c       (revision 201640)
> --- ipa.c       (working copy)
> *************** bool
> *** 768,778 ****
>   can_replace_by_local_alias (symtab_node node)
>   {
>     return (symtab_node_availability (node) > AVAIL_OVERWRITABLE
> !         && !DECL_EXTERNAL (node->symbol.decl)
> !         && (!DECL_ONE_ONLY (node->symbol.decl)
> !             || node->symbol.resolution == LDPR_PREVAILING_DEF
> !             || node->symbol.resolution == LDPR_PREVAILING_DEF_IRONLY
> !             || node->symbol.resolution == LDPR_PREVAILING_DEF_IRONLY_EXP));
>   }
>
>   /* Mark visibility of all functions.
> --- 768,774 ----
>   can_replace_by_local_alias (symtab_node node)
>   {
>     return (symtab_node_availability (node) > AVAIL_OVERWRITABLE
> !         && !symtab_can_be_discarded (node));
>   }
>
>   /* Mark visibility of all functions.
> *************** ipa_profile (void)
> *** 1407,1459 ****
>     bool something_changed = false;
>     int i;
>     gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
> !
> !   /* Produce speculative calls: we saved common traget from porfiling into
> !      e->common_target_id.  Now, at link time, we can look up corresponding
> !      function node and produce speculative call.  */
> !   if (in_lto_p)
> !     {
> !       struct cgraph_edge *e;
> !       struct cgraph_node *n,*n2;
> !
> !       init_node_map (false);
> !       FOR_EACH_DEFINED_FUNCTION (n)
> !       {
> !         bool update = false;
> !
> !         for (e = n->indirect_calls; e; e = e->next_callee)
> !           if (e->indirect_info->common_target_id)
> !             {
> !               n2 = find_func_by_profile_id (e->indirect_info->common_target_id);
> !               if (n2)
> !                 {
> !                   if (dump_file)
> !                     {
> !                       fprintf (dump_file, "Indirect call -> direct call from"
> !                                " other module %s/%i => %s/%i, prob %3.2f\n",
> !                                xstrdup (cgraph_node_name (n)), n->symbol.order,
> !                                xstrdup (cgraph_node_name (n2)), n2->symbol.order,
> !                                e->indirect_info->common_target_probability
> !                                / (float)REG_BR_PROB_BASE);
> !                     }
> !                   cgraph_turn_edge_to_speculative
> !                     (e, n2,
> !                      apply_scale (e->count,
> !                                   e->indirect_info->common_target_probability),
> !                      apply_scale (e->frequency,
> !                                   e->indirect_info->common_target_probability));
> !                   update = true;
> !                 }
> !               else
> !                 if (dump_file)
> !                   fprintf (dump_file, "Function with profile-id %i not found.\n",
> !                            e->indirect_info->common_target_id);
> !              }
> !            if (update)
> !              inline_update_overall_summary (n);
> !          }
> !       del_node_map ();
> !     }
>
>     if (dump_file)
>       dump_histogram (dump_file, histogram);
> --- 1403,1411 ----
>     bool something_changed = false;
>     int i;
>     gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
> !   struct cgraph_node *n,*n2;
> !   int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
> !   bool node_map_initialized = false;
>
>     if (dump_file)
>       dump_histogram (dump_file, histogram);
> *************** ipa_profile (void)
> *** 1523,1528 ****
> --- 1475,1580 ----
>     histogram.release();
>     free_alloc_pool (histogram_pool);
>
> +   /* Produce speculative calls: we saved common traget from porfiling into
> +      e->common_target_id.  Now, at link time, we can look up corresponding
> +      function node and produce speculative call.  */
> +
> +   FOR_EACH_DEFINED_FUNCTION (n)
> +     {
> +       bool update = false;
> +
> +       for (e = n->indirect_calls; e; e = e->next_callee)
> +       {
> +         if (n->count)
> +           nindirect++;
> +         if (e->indirect_info->common_target_id)
> +           {
> +             if (!node_map_initialized)
> +               init_node_map (false);
> +             node_map_initialized = true;
> +             ncommon++;
> +             n2 = find_func_by_profile_id (e->indirect_info->common_target_id);
> +             if (n2)
> +               {
> +                 if (dump_file)
> +                   {
> +                     fprintf (dump_file, "Indirect call -> direct call from"
> +                              " other module %s/%i => %s/%i, prob %3.2f\n",
> +                              xstrdup (cgraph_node_name (n)), n->symbol.order,
> +                              xstrdup (cgraph_node_name (n2)), n2->symbol.order,
> +                              e->indirect_info->common_target_probability
> +                              / (float)REG_BR_PROB_BASE);
> +                   }
> +                 if (e->indirect_info->common_target_probability
> +                     < REG_BR_PROB_BASE / 2)
> +                   {
> +                     nuseless++;
> +                     if (dump_file)
> +                       fprintf (dump_file,
> +                                "Not speculating: probability is too low.\n");
> +                   }
> +                 else if (!cgraph_maybe_hot_edge_p (e))
> +                   {
> +                     nuseless++;
> +                     if (dump_file)
> +                       fprintf (dump_file,
> +                                "Not speculating: call is cold.\n");
> +                   }
> +                 else if (cgraph_function_body_availability (n2)
> +                          <= AVAIL_OVERWRITABLE
> +                          && symtab_can_be_discarded ((symtab_node) n2))
> +                   {
> +                     nuseless++;
> +                     if (dump_file)
> +                       fprintf (dump_file,
> +                                "Not speculating: target is overwritable "
> +                                "and can be discarded.\n");
> +                   }
> +                 else
> +                   {
> +                     /* Target may be overwritable, but profile says that
> +                        control flow goes to this particular implementation
> +                        of N2.  Speculate on the local alias to allow inlining.
> +                      */
> +                     if (!symtab_can_be_discarded ((symtab_node) n2))
> +                       n2 = cgraph (symtab_nonoverwritable_alias ((symtab_node)n2));
> +                     nconverted++;
> +                     cgraph_turn_edge_to_speculative
> +                       (e, n2,
> +                        apply_scale (e->count,
> +                                     e->indirect_info->common_target_probability),
> +                        apply_scale (e->frequency,
> +                                     e->indirect_info->common_target_probability));
> +                     update = true;
> +                   }
> +               }
> +             else
> +               {
> +                 if (dump_file)
> +                   fprintf (dump_file, "Function with profile-id %i not found.\n",
> +                            e->indirect_info->common_target_id);
> +                 nunknown++;
> +               }
> +           }
> +        }
> +        if (update)
> +        inline_update_overall_summary (n);
> +      }
> +   if (node_map_initialized)
> +     del_node_map ();
> +   if (dump_file && nindirect)
> +     fprintf (dump_file,
> +            "%i indirect calls trained.\n"
> +            "%i (%3.2f%%) have common target.\n"
> +            "%i (%3.2f%%) targets was not found.\n"
> +            "%i (%3.2f%%) speculations seems useless.\n"
> +            "%i (%3.2f%%) speculations produced.\n",
> +            nindirect,
> +            ncommon, ncommon * 100.0 / nindirect,
> +            nunknown, nunknown * 100.0 / nindirect,
> +            nuseless, nuseless * 100.0 / nindirect,
> +            nconverted, nconverted * 100.0 / nindirect);
> +
>     order_pos = ipa_reverse_postorder (order);
>     for (i = order_pos - 1; i >= 0; i--)
>       {
Follow-Ups:
- Re: Cost model for indirect call speculation
  - From: Andi Kleen
References:
- Cost model for indirect call speculation
  - From: Jan Hubicka
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]