Cost model for indirect call speculation

Jan Hubicka hubicka@ucw.cz
Sun Aug 11 23:11:00 GMT 2013


Hi,
this patch adds simple cost model into indirect call speculation.  First we do not
turn calls into speculative calls when it seems bad idea (i.e. call is cold)
and during inlining we remove speculations that do not seem benefical.
On modern chip speculative call sequence without inlining is not really going
to fare better than indirect call because of indirect call predictor.
So we keep them only if the call was inlined or if the callee is turned to clone
or CONST/PURE flags are propagated to them.

We may want to add target hook specifying if target support indirect call predictor,
but I am not sure how important this is in practice.

To enable cost model everywhere, the old unit-local transform code now does nothing
but does sanity checking and debug output dumping.

On GCC there are 2600 indirect calls executed during training. For 1600 we find
histograms (I have no clue what happens to the others - will debug it tomorrow)
and out of them 500 are not cold and thus turned into speculative call.  After
inlining about speculative 60 calls survives into final binary (as opposed to
2000 before)

Bootstrapped/regtested x86_64-linux, will commit it after testers picks previous
change.

Honza

	* cgraph.c (cgraph_turn_edge_to_speculative): Return newly
	introduced edge; fix typo in sanity check.
	(cgraph_resolve_speculation): Export; improve diagnostic.
	(cgraph_redirect_edge_call_stmt_to_callee): Better diagnostic; cancel
	speculation at type mismatch.
	* cgraph.h (cgraph_turn_edge_to_speculative): Update.
	(cgraph_resolve_speculation): Declare.
	(symtab_can_be_discarded): New function.
	* value-prof.c (gimple_ic_transform): Remove actual transform code.
	* ipa-inline-transform.c (speculation_removed): New global var.
	(clone_inlined_nodes): See if speculation can be removed.
	(inline_call): If speculations was removed, we growths may not match.
	* ipa-inline.c (can_inline_edge_p): Add DISREGARD_LIMITS parameter.
	(speculation_useful_p): New function.
	(resolve_noninline_speculation): New function.
	(inline_small_functions): Resolve useless speculations.
	* ipa-inline.h (speculation_useful_p): Declare
	* ipa.c (can_replace_by_local_alias): Simplify.
	(ipa_profile): Produce speculative calls in non-lto, too;
	add simple cost model; produce local aliases.

Index: cgraph.c
===================================================================
*** cgraph.c	(revision 201646)
--- cgraph.c	(working copy)
*************** cgraph_set_edge_callee (struct cgraph_ed
*** 1040,1048 ****
  
     At this time the function just creates the direct call,
     the referencd representing the if conditional and attaches
!    them all to the orginal indirect call statement.  */
  
! void
  cgraph_turn_edge_to_speculative (struct cgraph_edge *e,
  				 struct cgraph_node *n2,
  				 gcov_type direct_count,
--- 1040,1050 ----
  
     At this time the function just creates the direct call,
     the referencd representing the if conditional and attaches
!    them all to the orginal indirect call statement.  
  
!    Return direct edge created.  */
! 
! struct cgraph_edge *
  cgraph_turn_edge_to_speculative (struct cgraph_edge *e,
  				 struct cgraph_node *n2,
  				 gcov_type direct_count,
*************** cgraph_turn_edge_to_speculative (struct
*** 1073,1078 ****
--- 1075,1081 ----
  			      IPA_REF_ADDR, e->call_stmt);
    ref->lto_stmt_uid = e->lto_stmt_uid;
    ref->speculative = e->speculative;
+   return e2;
  }
  
  /* Speculative call consist of three components:
*************** cgraph_speculative_call_info (struct cgr
*** 1107,1113 ****
        if (e2->call_stmt)
  	{
  	  e = cgraph_edge (e->caller, e2->call_stmt);
! 	  gcc_assert (!e->speculative && !e->indirect_unknown_callee);
  	}
        else
  	for (e = e->caller->callees; 
--- 1110,1116 ----
        if (e2->call_stmt)
  	{
  	  e = cgraph_edge (e->caller, e2->call_stmt);
! 	  gcc_assert (e->speculative && !e->indirect_unknown_callee);
  	}
        else
  	for (e = e->caller->callees; 
*************** cgraph_redirect_edge_callee (struct cgra
*** 1147,1153 ****
     Remove the speculative call sequence and return edge representing the call.
     It is up to caller to redirect the call as appropriate. */
  
! static struct cgraph_edge *
  cgraph_resolve_speculation (struct cgraph_edge *edge, tree callee_decl)
  {
    struct cgraph_edge *e2;
--- 1150,1156 ----
     Remove the speculative call sequence and return edge representing the call.
     It is up to caller to redirect the call as appropriate. */
  
! struct cgraph_edge *
  cgraph_resolve_speculation (struct cgraph_edge *edge, tree callee_decl)
  {
    struct cgraph_edge *e2;
*************** cgraph_resolve_speculation (struct cgrap
*** 1159,1170 ****
      {
        if (dump_file)
  	{
! 	  fprintf (dump_file, "Speculative indirect call %s/%i => %s/%i has "
! 		   "turned out to have contradicitng known target ",
! 		   xstrdup (cgraph_node_name (edge->caller)), edge->caller->symbol.order,
! 		   xstrdup (cgraph_node_name (e2->callee)), e2->callee->symbol.order);
! 	  print_generic_expr (dump_file, callee_decl, 0);
!           fprintf (dump_file, "\n");
  	}
      }
    else
--- 1162,1182 ----
      {
        if (dump_file)
  	{
! 	  if (callee_decl)
! 	    {
! 	      fprintf (dump_file, "Speculative indirect call %s/%i => %s/%i has "
! 		       "turned out to have contradicitng known target ",
! 		       xstrdup (cgraph_node_name (edge->caller)), edge->caller->symbol.order,
! 		       xstrdup (cgraph_node_name (e2->callee)), e2->callee->symbol.order);
! 	      print_generic_expr (dump_file, callee_decl, 0);
! 	      fprintf (dump_file, "\n");
! 	    }
! 	  else
! 	    {
! 	      fprintf (dump_file, "Removing speculative call %s/%i => %s/%i\n",
! 		       xstrdup (cgraph_node_name (edge->caller)), edge->caller->symbol.order,
! 		       xstrdup (cgraph_node_name (e2->callee)), e2->callee->symbol.order);
! 	    }
  	}
      }
    else
*************** cgraph_redirect_edge_call_stmt_to_callee
*** 1264,1275 ****
        cgraph_speculative_call_info (e, e, e2, ref);
        if (gimple_call_fndecl (e->call_stmt))
  	e = cgraph_resolve_speculation (e, gimple_call_fndecl (e->call_stmt));
!       else
  	{
  	  if (dump_file)
! 	    fprintf (dump_file, "Expanding speculative call of %s/%i -> %s/%i\n",
  		     xstrdup (cgraph_node_name (e->caller)), e->caller->symbol.order,
  		     xstrdup (cgraph_node_name (e->callee)), e->callee->symbol.order);
  	  gcc_assert (e2->speculative);
  	  push_cfun (DECL_STRUCT_FUNCTION (e->caller->symbol.decl));
  	  new_stmt = gimple_ic (e->call_stmt, cgraph (ref->referred),
--- 1276,1299 ----
        cgraph_speculative_call_info (e, e, e2, ref);
        if (gimple_call_fndecl (e->call_stmt))
  	e = cgraph_resolve_speculation (e, gimple_call_fndecl (e->call_stmt));
!       if (!gimple_check_call_matching_types (e->call_stmt, e->callee->symbol.decl,
! 					     true))
  	{
+ 	  e = cgraph_resolve_speculation (e, NULL);
  	  if (dump_file)
! 	    fprintf (dump_file, "Not expanding speculative call of %s/%i -> %s/%i\n"
! 		     "Type mismatch.\n",
  		     xstrdup (cgraph_node_name (e->caller)), e->caller->symbol.order,
  		     xstrdup (cgraph_node_name (e->callee)), e->callee->symbol.order);
+ 	}
+       else
+ 	{
+ 	  if (dump_file)
+ 	    fprintf (dump_file, "Expanding speculative call of %s/%i -> %s/%i count:"
+ 		     HOST_WIDEST_INT_PRINT_DEC"\n",
+ 		     xstrdup (cgraph_node_name (e->caller)), e->caller->symbol.order,
+ 		     xstrdup (cgraph_node_name (e->callee)), e->callee->symbol.order,
+ 		     (HOST_WIDEST_INT)e->count);
  	  gcc_assert (e2->speculative);
  	  push_cfun (DECL_STRUCT_FUNCTION (e->caller->symbol.decl));
  	  new_stmt = gimple_ic (e->call_stmt, cgraph (ref->referred),
Index: cgraph.h
===================================================================
*** cgraph.h	(revision 201640)
--- cgraph.h	(working copy)
*************** bool cgraph_propagate_frequency (struct
*** 726,732 ****
  struct cgraph_node * cgraph_function_node (struct cgraph_node *,
  					   enum availability *avail = NULL);
  bool cgraph_get_body (struct cgraph_node *node);
! void
  cgraph_turn_edge_to_speculative (struct cgraph_edge *,
  				 struct cgraph_node *,
  				 gcov_type, int);
--- 726,732 ----
  struct cgraph_node * cgraph_function_node (struct cgraph_node *,
  					   enum availability *avail = NULL);
  bool cgraph_get_body (struct cgraph_node *node);
! struct cgraph_edge *
  cgraph_turn_edge_to_speculative (struct cgraph_edge *,
  				 struct cgraph_node *,
  				 gcov_type, int);
*************** struct cgraph_node *cgraph_function_vers
*** 783,788 ****
--- 783,789 ----
  						basic_block, const char *);
  void tree_function_versioning (tree, tree, vec<ipa_replace_map_p, va_gc> *,
  			       bool, bitmap, bool, bitmap, basic_block);
+ struct cgraph_edge *cgraph_resolve_speculation (struct cgraph_edge *, tree);
  
  /* In cgraphbuild.c  */
  unsigned int rebuild_cgraph_edges (void);
*************** symtab_real_symbol_p (symtab_node node)
*** 1398,1401 ****
--- 1399,1414 ----
      return false;
    return true;
  }
+ 
+ /* Return true if NODE can be discarded by linker from the binary.  */
+ 
+ static inline bool
+ symtab_can_be_discarded (symtab_node node)
+ {
+   return (DECL_EXTERNAL (node->symbol.decl)
+ 	  || (DECL_ONE_ONLY (node->symbol.decl)
+ 	      && node->symbol.resolution != LDPR_PREVAILING_DEF
+ 	      && node->symbol.resolution != LDPR_PREVAILING_DEF_IRONLY
+ 	      && node->symbol.resolution != LDPR_PREVAILING_DEF_IRONLY_EXP));
+ }
  #endif  /* GCC_CGRAPH_H  */
Index: value-prof.c
===================================================================
*** value-prof.c	(revision 201640)
--- value-prof.c	(working copy)
*************** gimple_ic_transform (gimple_stmt_iterato
*** 1431,1438 ****
    gimple stmt = gsi_stmt (*gsi);
    histogram_value histogram;
    gcov_type val, count, all, bb_all;
-   gcov_type prob;
-   gimple modify;
    struct cgraph_node *direct_call;
  
    if (gimple_code (stmt) != GIMPLE_CALL)
--- 1431,1436 ----
*************** gimple_ic_transform (gimple_stmt_iterato
*** 1452,1463 ****
    count = histogram->hvalue.counters [1];
    all = histogram->hvalue.counters [2];
  
-   if (4 * count <= 3 * all)
-     {
-       gimple_remove_histogram_value (cfun, stmt, histogram);
-       return false;
-     }
- 
    bb_all = gimple_bb (stmt)->count;
    /* The order of CHECK_COUNTER calls is important -
       since check_counter can correct the third parameter
--- 1450,1455 ----
*************** gimple_ic_transform (gimple_stmt_iterato
*** 1469,1478 ****
        return false;
      }
  
!   if (all > 0)
!     prob = GCOV_COMPUTE_SCALE (count, all);
!   else
!     prob = 0;
    direct_call = find_func_by_profile_id ((int)val);
  
    if (direct_call == NULL)
--- 1461,1469 ----
        return false;
      }
  
!   if (4 * count <= 3 * all)
!     return false;
! 
    direct_call = find_func_by_profile_id ((int)val);
  
    if (direct_call == NULL)
*************** gimple_ic_transform (gimple_stmt_iterato
*** 1488,1499 ****
  	}
        return false;
      }
-   gimple_remove_histogram_value (cfun, stmt, histogram);
  
    if (!check_ic_target (stmt, direct_call))
!     return false;
! 
!   modify = gimple_ic (stmt, direct_call, prob, count, all);
  
    if (dump_file)
      {
--- 1479,1499 ----
  	}
        return false;
      }
  
    if (!check_ic_target (stmt, direct_call))
!     {
!       if (dump_file)
! 	{
! 	  fprintf (dump_file, "Indirect call -> direct call ");
! 	  print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
! 	  fprintf (dump_file, "=> ");
! 	  print_generic_expr (dump_file, direct_call->symbol.decl, TDF_SLIM);
! 	  fprintf (dump_file, " transformation skipped because of type mismatch");
! 	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
! 	}
!       gimple_remove_histogram_value (cfun, stmt, histogram);
!       return false;
!     }
  
    if (dump_file)
      {
*************** gimple_ic_transform (gimple_stmt_iterato
*** 1501,1510 ****
        print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
        fprintf (dump_file, "=> ");
        print_generic_expr (dump_file, direct_call->symbol.decl, TDF_SLIM);
!       fprintf (dump_file, " transformation on insn ");
        print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
-       fprintf (dump_file, " to ");
-       print_gimple_stmt (dump_file, modify, 0, TDF_SLIM);
        fprintf (dump_file, "hist->count "HOST_WIDEST_INT_PRINT_DEC
  	       " hist->all "HOST_WIDEST_INT_PRINT_DEC"\n", count, all);
      }
--- 1501,1508 ----
        print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
        fprintf (dump_file, "=> ");
        print_generic_expr (dump_file, direct_call->symbol.decl, TDF_SLIM);
!       fprintf (dump_file, " transformation on insn postponned to ipa-profile");
        print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
        fprintf (dump_file, "hist->count "HOST_WIDEST_INT_PRINT_DEC
  	       " hist->all "HOST_WIDEST_INT_PRINT_DEC"\n", count, all);
      }
Index: ipa-inline-transform.c
===================================================================
*** ipa-inline-transform.c	(revision 201640)
--- ipa-inline-transform.c	(working copy)
*************** along with GCC; see the file COPYING3.
*** 46,51 ****
--- 46,52 ----
  
  int ncalls_inlined;
  int nfunctions_inlined;
+ bool speculation_removed;
  
  /* Scale frequency of NODE edges by FREQ_SCALE.  */
  
*************** clone_inlined_nodes (struct cgraph_edge
*** 134,139 ****
--- 135,141 ----
  		     bool update_original, int *overall_size)
  {
    struct cgraph_node *inlining_into;
+   struct cgraph_edge *next;
  
    if (e->caller->global.inlined_to)
      inlining_into = e->caller->global.inlined_to;
*************** clone_inlined_nodes (struct cgraph_edge
*** 186,194 ****
    e->callee->global.inlined_to = inlining_into;
  
    /* Recursively clone all bodies.  */
!   for (e = e->callee->callees; e; e = e->next_callee)
!     if (!e->inline_failed)
!       clone_inlined_nodes (e, duplicate, update_original, overall_size);
  }
  
  
--- 188,204 ----
    e->callee->global.inlined_to = inlining_into;
  
    /* Recursively clone all bodies.  */
!   for (e = e->callee->callees; e; e = next)
!     {
!       next = e->next_callee;
!       if (!e->inline_failed)
!         clone_inlined_nodes (e, duplicate, update_original, overall_size);
!       if (e->speculative && !speculation_useful_p (e, true))
! 	{
! 	  cgraph_resolve_speculation (e, NULL);
! 	  speculation_removed = true;
! 	}
!     }
  }
  
  
*************** inline_call (struct cgraph_edge *e, bool
*** 218,223 ****
--- 228,234 ----
    bool predicated = inline_edge_summary (e)->predicate != NULL;
  #endif
  
+   speculation_removed = false;
    /* Don't inline inlined edges.  */
    gcc_assert (e->inline_failed);
    /* Don't even think of inlining inline clone.  */
*************** inline_call (struct cgraph_edge *e, bool
*** 267,272 ****
--- 278,284 ----
       error due to INLINE_SIZE_SCALE roudoff errors.  */
    gcc_assert (!update_overall_summary || !overall_size || new_edges_found
  	      || abs (estimated_growth - (new_size - old_size)) <= 1
+ 	      || speculation_removed
  	      /* FIXME: a hack.  Edges with false predicate are accounted
  		 wrong, we should remove them from callgraph.  */
  	      || predicated);
Index: ipa-inline.c
===================================================================
*** ipa-inline.c	(revision 201640)
--- ipa-inline.c	(working copy)
*************** report_inline_failed_reason (struct cgra
*** 229,238 ****
     We check whether inlining is possible at all and whether
     caller growth limits allow doing so.  
  
!    if REPORT is true, output reason to the dump file.  */
  
  static bool
! can_inline_edge_p (struct cgraph_edge *e, bool report)
  {
    bool inlinable = true;
    enum availability avail;
--- 229,241 ----
     We check whether inlining is possible at all and whether
     caller growth limits allow doing so.  
  
!    if REPORT is true, output reason to the dump file.  
! 
!    if DISREGARD_LIMITES is true, ignore size limits.*/
  
  static bool
! can_inline_edge_p (struct cgraph_edge *e, bool report,
! 		   bool disregard_limits = false)
  {
    bool inlinable = true;
    enum availability avail;
*************** can_inline_edge_p (struct cgraph_edge *e
*** 309,314 ****
--- 312,318 ----
      }
    /* Check if caller growth allows the inlining.  */
    else if (!DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl)
+ 	   && !disregard_limits
  	   && !lookup_attribute ("flatten",
  				 DECL_ATTRIBUTES
  				   (e->caller->global.inlined_to
*************** heap_edge_removal_hook (struct cgraph_ed
*** 1400,1405 ****
--- 1404,1482 ----
      }
  }
  
+ /* Return true if speculation of edge E seems useful.
+    If ANTICIPATE_INLINING is true, be conservative and hope that E
+    may get inlined.  */
+ 
+ bool
+ speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
+ {
+   enum availability avail;
+   struct cgraph_node *target = cgraph_function_or_thunk_node (e->callee, &avail);
+   struct cgraph_edge *direct, *indirect;
+   struct ipa_ref *ref;
+ 
+   gcc_assert (e->speculative && !e->indirect_unknown_callee);
+ 
+   if (!cgraph_maybe_hot_edge_p (e))
+     return false;
+ 
+   /* See if IP optimizations found something potentially useful about the
+      function.  For now we look only for CONST/PURE flags.  Almost everything
+      else we propagate is useless.  */
+   if (avail >= AVAIL_AVAILABLE)
+     {
+       int ecf_flags = flags_from_decl_or_type (target->symbol.decl);
+       if (ecf_flags & ECF_CONST)
+         {
+           cgraph_speculative_call_info (e, direct, indirect, ref);
+ 	  if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
+ 	    return true;
+         }
+       else if (ecf_flags & ECF_PURE)
+         {
+           cgraph_speculative_call_info (e, direct, indirect, ref);
+ 	  if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
+ 	    return true;
+         }
+     }
+   /* If we did not managed to inline the function nor redirect
+      to an ipa-cp clone (that are seen by having local flag set),
+      it is probably pointless to inline it unless hardware is missing
+      indirect call predictor.  */
+   if (!anticipate_inlining && e->inline_failed && !target->local.local)
+     return false;
+   /* For overwritable targets there is not much to do.  */
+   if (e->inline_failed && !can_inline_edge_p (e, false, true))
+     return false;
+   /* OK, speculation seems interesting.  */
+   return true;
+ }
+ 
+ /* We know that EDGE is not going to be inlined.
+    See if we can remove speculation.  */
+ 
+ static void
+ resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
+ {
+   if (edge->speculative && !speculation_useful_p (edge, false))
+     {
+       struct cgraph_node *node = edge->caller;
+       struct cgraph_node *where = node->global.inlined_to
+ 				  ? node->global.inlined_to : node;
+       bitmap updated_nodes = BITMAP_ALLOC (NULL);
+ 
+       cgraph_resolve_speculation (edge, NULL);
+       reset_node_growth_cache (where);
+       reset_edge_caches (where);
+       inline_update_overall_summary (where);
+       update_caller_keys (edge_heap, where,
+ 			  updated_nodes, NULL);
+       reset_node_growth_cache (where);
+       BITMAP_FREE (updated_nodes);
+     }
+ }
+ 
  /* We use greedy algorithm for inlining of small functions:
     All inline candidates are put into prioritized heap ordered in
     increasing badness.
*************** inline_small_functions (void)
*** 1478,1491 ****
    /* Populate the heeap with all edges we might inline.  */
  
    FOR_EACH_DEFINED_FUNCTION (node)
!     if (!node->global.inlined_to)
!       {
! 	if (dump_file)
! 	  fprintf (dump_file, "Enqueueing calls of %s/%i.\n",
! 		   cgraph_node_name (node), node->symbol.order);
  
! 	for (edge = node->callers; edge; edge = edge->next_caller)
  	  if (edge->inline_failed
  	      && can_inline_edge_p (edge, true)
  	      && want_inline_small_function_p (edge, true)
  	      && edge->inline_failed)
--- 1555,1573 ----
    /* Populate the heeap with all edges we might inline.  */
  
    FOR_EACH_DEFINED_FUNCTION (node)
!     {
!       bool update = false;
!       struct cgraph_edge *next;
  
!       if (dump_file)
! 	fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
! 		 cgraph_node_name (node), node->symbol.order);
! 
!       for (edge = node->callees; edge; edge = next)
! 	{
! 	  next = edge->next_callee;
  	  if (edge->inline_failed
+ 	      && !edge->aux
  	      && can_inline_edge_p (edge, true)
  	      && want_inline_small_function_p (edge, true)
  	      && edge->inline_failed)
*************** inline_small_functions (void)
*** 1493,1499 ****
  	      gcc_assert (!edge->aux);
  	      update_edge_key (edge_heap, edge);
  	    }
!       }
  
    gcc_assert (in_lto_p
  	      || !max_count
--- 1575,1598 ----
  	      gcc_assert (!edge->aux);
  	      update_edge_key (edge_heap, edge);
  	    }
! 	  if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
! 	    {
! 	      cgraph_resolve_speculation (edge, NULL);
! 	      update = true;
! 	    }
! 	}
!       if (update)
! 	{
! 	  struct cgraph_node *where = node->global.inlined_to
! 				      ? node->global.inlined_to : node;
! 	  inline_update_overall_summary (where);
!           reset_node_growth_cache (where);
! 	  reset_edge_caches (where);
!           update_caller_keys (edge_heap, where,
! 			      updated_nodes, NULL);
!           bitmap_clear (updated_nodes);
! 	}
!     }
  
    gcc_assert (in_lto_p
  	      || !max_count
*************** inline_small_functions (void)
*** 1534,1540 ****
  	}
  
        if (!can_inline_edge_p (edge, true))
! 	continue;
        
        callee = cgraph_function_or_thunk_node (edge->callee, NULL);
        growth = estimate_edge_growth (edge);
--- 1633,1642 ----
  	}
  
        if (!can_inline_edge_p (edge, true))
! 	{
! 	  resolve_noninline_speculation (edge_heap, edge);
! 	  continue;
! 	}
        
        callee = cgraph_function_or_thunk_node (edge->callee, NULL);
        growth = estimate_edge_growth (edge);
*************** inline_small_functions (void)
*** 1568,1578 ****
  	{
  	  edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
  	  report_inline_failed_reason (edge);
  	  continue;
  	}
  
        if (!want_inline_small_function_p (edge, true))
! 	continue;
  
        /* Heuristics for inlining small functions works poorly for
  	 recursive calls where we do efect similar to loop unrolling.
--- 1670,1684 ----
  	{
  	  edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
  	  report_inline_failed_reason (edge);
+ 	  resolve_noninline_speculation (edge_heap, edge);
  	  continue;
  	}
  
        if (!want_inline_small_function_p (edge, true))
! 	{
! 	  resolve_noninline_speculation (edge_heap, edge);
! 	  continue;
! 	}
  
        /* Heuristics for inlining small functions works poorly for
  	 recursive calls where we do efect similar to loop unrolling.
*************** inline_small_functions (void)
*** 1588,1593 ****
--- 1694,1700 ----
  				   ? &new_indirect_edges : NULL))
  	    {
  	      edge->inline_failed = CIF_RECURSIVE_INLINING;
+ 	      resolve_noninline_speculation (edge_heap, edge);
  	      continue;
  	    }
  	  reset_edge_caches (where);
*************** inline_small_functions (void)
*** 1596,1601 ****
--- 1703,1709 ----
  	  if (flag_indirect_inlining)
  	    add_new_edges_to_heap (edge_heap, new_indirect_edges);
            update_callee_keys (edge_heap, where, updated_nodes);
+ 	  bitmap_clear (updated_nodes);
  	}
        else
  	{
*************** inline_small_functions (void)
*** 1621,1626 ****
--- 1729,1735 ----
  	      edge->inline_failed
  		= (DECL_DISREGARD_INLINE_LIMITS (edge->callee->symbol.decl)
  		   ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
+ 	      resolve_noninline_speculation (edge_heap, edge);
  	      continue;
  	    }
  	  else if (depth && dump_file)
*************** ipa_inline (void)
*** 1773,1778 ****
--- 1882,1888 ----
    struct cgraph_node **order =
      XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
    int i;
+   int cold;
  
    if (in_lto_p && optimize)
      ipa_update_after_lto_read ();
*************** ipa_inline (void)
*** 1820,1885 ****
       code size will shrink because the out-of-line copy is eliminated. 
       We do this regardless on the callee size as long as function growth limits
       are met.  */
!   if (flag_inline_functions_called_once)
!     {
!       int cold;
!       if (dump_file)
! 	fprintf (dump_file,
! 		 "\nDeciding on functions to be inlined into all callers:\n");
  
!       /* Inlining one function called once has good chance of preventing
! 	 inlining other function into the same callee.  Ideally we should
! 	 work in priority order, but probably inlining hot functions first
! 	 is good cut without the extra pain of maintaining the queue.
! 
! 	 ??? this is not really fitting the bill perfectly: inlining function
! 	 into callee often leads to better optimization of callee due to
! 	 increased context for optimization.
! 	 For example if main() function calls a function that outputs help
! 	 and then function that does the main optmization, we should inline
! 	 the second with priority even if both calls are cold by themselves.
! 
! 	 We probably want to implement new predicate replacing our use of
! 	 maybe_hot_edge interpreted as maybe_hot_edge || callee is known
! 	 to be hot.  */
!       for (cold = 0; cold <= 1; cold ++)
  	{
! 	  FOR_EACH_DEFINED_FUNCTION (node)
  	    {
! 	      if (want_inline_function_to_all_callers_p (node, cold))
  		{
! 		  int num_calls = 0;
! 		  struct cgraph_edge *e;
! 		  for (e = node->callers; e; e = e->next_caller)
! 		    num_calls++;
! 		  while (node->callers && !node->global.inlined_to)
! 		    {
! 		      struct cgraph_node *caller = node->callers->caller;
  
! 		      if (dump_file)
! 			{
! 			  fprintf (dump_file,
! 				   "\nInlining %s size %i.\n",
! 				   cgraph_node_name (node),
! 				   inline_summary (node)->size);
! 			  fprintf (dump_file,
! 				   " Called once from %s %i insns.\n",
! 				   cgraph_node_name (node->callers->caller),
! 				   inline_summary (node->callers->caller)->size);
! 			}
  
! 		      inline_call (node->callers, true, NULL, NULL, true);
  		      if (dump_file)
! 			fprintf (dump_file,
! 				 " Inlined into %s which now has %i size\n",
! 				 cgraph_node_name (caller),
! 				 inline_summary (caller)->size);
! 		      if (!num_calls--)
! 		        {
! 			  if (dump_file)
! 			    fprintf (dump_file, "New calls found; giving up.\n");
! 			  break;
! 		        }
  		    }
  		}
  	    }
--- 1930,2012 ----
       code size will shrink because the out-of-line copy is eliminated. 
       We do this regardless on the callee size as long as function growth limits
       are met.  */
!   if (dump_file)
!     fprintf (dump_file,
! 	     "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
  
!   /* Inlining one function called once has good chance of preventing
!      inlining other function into the same callee.  Ideally we should
!      work in priority order, but probably inlining hot functions first
!      is good cut without the extra pain of maintaining the queue.
! 
!      ??? this is not really fitting the bill perfectly: inlining function
!      into callee often leads to better optimization of callee due to
!      increased context for optimization.
!      For example if main() function calls a function that outputs help
!      and then function that does the main optmization, we should inline
!      the second with priority even if both calls are cold by themselves.
! 
!      We probably want to implement new predicate replacing our use of
!      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
!      to be hot.  */
!   for (cold = 0; cold <= 1; cold ++)
!     {
!       FOR_EACH_DEFINED_FUNCTION (node)
  	{
! 	  struct cgraph_edge *edge, *next;
! 	  bool update=false;
! 
! 	  for (edge = node->callees; edge; edge = next)
  	    {
! 	      next = edge->next_callee;
! 	      if (edge->speculative && !speculation_useful_p (edge, false))
  		{
! 		  cgraph_resolve_speculation (edge, NULL);
! 		  update = true;
! 		}
! 	    }
! 	  if (update)
! 	    {
! 	      struct cgraph_node *where = node->global.inlined_to
! 					  ? node->global.inlined_to : node;
!               reset_node_growth_cache (where);
! 	      reset_edge_caches (where);
! 	      inline_update_overall_summary (where);
! 	    }
! 	  if (flag_inline_functions_called_once
! 	      && want_inline_function_to_all_callers_p (node, cold))
! 	    {
! 	      int num_calls = 0;
! 	      struct cgraph_edge *e;
! 	      for (e = node->callers; e; e = e->next_caller)
! 		num_calls++;
! 	      while (node->callers && !node->global.inlined_to)
! 		{
! 		  struct cgraph_node *caller = node->callers->caller;
  
! 		  if (dump_file)
! 		    {
! 		      fprintf (dump_file,
! 			       "\nInlining %s size %i.\n",
! 			       cgraph_node_name (node),
! 			       inline_summary (node)->size);
! 		      fprintf (dump_file,
! 			       " Called once from %s %i insns.\n",
! 			       cgraph_node_name (node->callers->caller),
! 			       inline_summary (node->callers->caller)->size);
! 		    }
  
! 		  inline_call (node->callers, true, NULL, NULL, true);
! 		  if (dump_file)
! 		    fprintf (dump_file,
! 			     " Inlined into %s which now has %i size\n",
! 			     cgraph_node_name (caller),
! 			     inline_summary (caller)->size);
! 		  if (!num_calls--)
! 		    {
  		      if (dump_file)
! 			fprintf (dump_file, "New calls found; giving up.\n");
! 		      break;
  		    }
  		}
  	    }
Index: ipa-inline.h
===================================================================
*** ipa-inline.h	(revision 201640)
--- ipa-inline.h	(working copy)
*************** inline_hints do_estimate_edge_hints (str
*** 226,231 ****
--- 226,232 ----
  void initialize_growth_caches (void);
  void free_growth_caches (void);
  void compute_inline_parameters (struct cgraph_node *, bool);
+ bool speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining);
  
  /* In ipa-inline-transform.c  */
  bool inline_call (struct cgraph_edge *, bool, vec<cgraph_edge_p> *, int *, bool);
Index: ipa.c
===================================================================
*** ipa.c	(revision 201640)
--- ipa.c	(working copy)
*************** bool
*** 768,778 ****
  can_replace_by_local_alias (symtab_node node)
  {
    return (symtab_node_availability (node) > AVAIL_OVERWRITABLE
! 	  && !DECL_EXTERNAL (node->symbol.decl)
! 	  && (!DECL_ONE_ONLY (node->symbol.decl)
! 	      || node->symbol.resolution == LDPR_PREVAILING_DEF
! 	      || node->symbol.resolution == LDPR_PREVAILING_DEF_IRONLY
! 	      || node->symbol.resolution == LDPR_PREVAILING_DEF_IRONLY_EXP));
  }
  
  /* Mark visibility of all functions.
--- 768,774 ----
  can_replace_by_local_alias (symtab_node node)
  {
    return (symtab_node_availability (node) > AVAIL_OVERWRITABLE
! 	  && !symtab_can_be_discarded (node));
  }
  
  /* Mark visibility of all functions.
*************** ipa_profile (void)
*** 1407,1459 ****
    bool something_changed = false;
    int i;
    gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
! 
!   /* Produce speculative calls: we saved common traget from porfiling into
!      e->common_target_id.  Now, at link time, we can look up corresponding
!      function node and produce speculative call.  */
!   if (in_lto_p)
!     {
!       struct cgraph_edge *e;
!       struct cgraph_node *n,*n2;
! 
!       init_node_map (false);
!       FOR_EACH_DEFINED_FUNCTION (n)
! 	{
! 	  bool update = false;
! 
! 	  for (e = n->indirect_calls; e; e = e->next_callee)
! 	    if (e->indirect_info->common_target_id)
! 	      {
! 		n2 = find_func_by_profile_id (e->indirect_info->common_target_id);
! 		if (n2)
! 		  {
! 		    if (dump_file)
! 		      {
! 			fprintf (dump_file, "Indirect call -> direct call from"
! 				 " other module %s/%i => %s/%i, prob %3.2f\n",
! 				 xstrdup (cgraph_node_name (n)), n->symbol.order,
! 				 xstrdup (cgraph_node_name (n2)), n2->symbol.order,
! 				 e->indirect_info->common_target_probability
! 				 / (float)REG_BR_PROB_BASE);
! 		      }
! 		    cgraph_turn_edge_to_speculative
! 		      (e, n2,
! 		       apply_scale (e->count,
! 				    e->indirect_info->common_target_probability),
! 		       apply_scale (e->frequency,
! 				    e->indirect_info->common_target_probability));
! 		    update = true;
! 		  }
! 		else
! 		  if (dump_file)
! 		    fprintf (dump_file, "Function with profile-id %i not found.\n",
! 			     e->indirect_info->common_target_id);
! 	       }
! 	     if (update)
! 	       inline_update_overall_summary (n);
! 	   }
! 	del_node_map ();
!     }
  
    if (dump_file)
      dump_histogram (dump_file, histogram);
--- 1403,1411 ----
    bool something_changed = false;
    int i;
    gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
!   struct cgraph_node *n,*n2;
!   int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
!   bool node_map_initialized = false;
  
    if (dump_file)
      dump_histogram (dump_file, histogram);
*************** ipa_profile (void)
*** 1523,1528 ****
--- 1475,1580 ----
    histogram.release();
    free_alloc_pool (histogram_pool);
  
+   /* Produce speculative calls: we saved common traget from porfiling into
+      e->common_target_id.  Now, at link time, we can look up corresponding
+      function node and produce speculative call.  */
+ 
+   FOR_EACH_DEFINED_FUNCTION (n)
+     {
+       bool update = false;
+ 
+       for (e = n->indirect_calls; e; e = e->next_callee)
+ 	{
+ 	  if (n->count)
+ 	    nindirect++;
+ 	  if (e->indirect_info->common_target_id)
+ 	    {
+ 	      if (!node_map_initialized)
+ 	        init_node_map (false);
+ 	      node_map_initialized = true;
+ 	      ncommon++;
+ 	      n2 = find_func_by_profile_id (e->indirect_info->common_target_id);
+ 	      if (n2)
+ 		{
+ 		  if (dump_file)
+ 		    {
+ 		      fprintf (dump_file, "Indirect call -> direct call from"
+ 			       " other module %s/%i => %s/%i, prob %3.2f\n",
+ 			       xstrdup (cgraph_node_name (n)), n->symbol.order,
+ 			       xstrdup (cgraph_node_name (n2)), n2->symbol.order,
+ 			       e->indirect_info->common_target_probability
+ 			       / (float)REG_BR_PROB_BASE);
+ 		    }
+ 		  if (e->indirect_info->common_target_probability
+ 		      < REG_BR_PROB_BASE / 2)
+ 		    {
+ 		      nuseless++;
+ 		      if (dump_file)
+ 			fprintf (dump_file,
+ 				 "Not speculating: probability is too low.\n");
+ 		    }
+ 		  else if (!cgraph_maybe_hot_edge_p (e))
+ 		    {
+ 		      nuseless++;
+ 		      if (dump_file)
+ 			fprintf (dump_file,
+ 				 "Not speculating: call is cold.\n");
+ 		    }
+ 		  else if (cgraph_function_body_availability (n2)
+ 			   <= AVAIL_OVERWRITABLE
+ 			   && symtab_can_be_discarded ((symtab_node) n2))
+ 		    {
+ 		      nuseless++;
+ 		      if (dump_file)
+ 			fprintf (dump_file,
+ 				 "Not speculating: target is overwritable "
+ 				 "and can be discarded.\n");
+ 		    }
+ 		  else
+ 		    {
+ 		      /* Target may be overwritable, but profile says that
+ 			 control flow goes to this particular implementation
+ 			 of N2.  Speculate on the local alias to allow inlining.
+ 		       */
+ 		      if (!symtab_can_be_discarded ((symtab_node) n2))
+ 			n2 = cgraph (symtab_nonoverwritable_alias ((symtab_node)n2));
+ 		      nconverted++;
+ 		      cgraph_turn_edge_to_speculative
+ 			(e, n2,
+ 			 apply_scale (e->count,
+ 				      e->indirect_info->common_target_probability),
+ 			 apply_scale (e->frequency,
+ 				      e->indirect_info->common_target_probability));
+ 		      update = true;
+ 		    }
+ 		}
+ 	      else
+ 		{
+ 		  if (dump_file)
+ 		    fprintf (dump_file, "Function with profile-id %i not found.\n",
+ 			     e->indirect_info->common_target_id);
+ 		  nunknown++;
+ 		}
+ 	    }
+ 	 }
+        if (update)
+ 	 inline_update_overall_summary (n);
+      }
+   if (node_map_initialized)
+     del_node_map ();
+   if (dump_file && nindirect)
+     fprintf (dump_file,
+ 	     "%i indirect calls trained.\n"
+ 	     "%i (%3.2f%%) have common target.\n"
+ 	     "%i (%3.2f%%) targets was not found.\n"
+ 	     "%i (%3.2f%%) speculations seems useless.\n"
+ 	     "%i (%3.2f%%) speculations produced.\n",
+ 	     nindirect,
+ 	     ncommon, ncommon * 100.0 / nindirect,
+ 	     nunknown, nunknown * 100.0 / nindirect,
+ 	     nuseless, nuseless * 100.0 / nindirect,
+ 	     nconverted, nconverted * 100.0 / nindirect);
+ 
    order_pos = ipa_reverse_postorder (order);
    for (i = order_pos - 1; i >= 0; i--)
      {



More information about the Gcc-patches mailing list