Use estimated profile for inlining

Jan Hubicka jh@suse.cz
Fri Feb 9 20:36:00 GMT 2007


Hi,
this patch updated cgraph infrastructure to track function frequencies.
Frequencies are represented as ingegers in range 0...CGRAPH_FREQ_MAX
(100000), where CGRAPH_FREQ_BASE (1000) means one expected invocation
per function function call.

This is different from function local profiles in a way so all functions
are normalized so frequencies across functions use same base.

The heuristic is very simple at the moment just using
expected_growth_per_inline/frequency as badness of the call and inlining
in order, but I've tested that it allows to reduce unit growth argument
from 60% to 30% without any significant regression on C++ benchmark
tester.  This cause almost 30% compile time speedups on many C++ units
and also -O3 IMA compilation of GCC modules/many of SPEC benchmarks and
I've previously also verified that SPEC overall don't push the unit
growth argument up.

C++ benchmarks showed after additional patch to reduce inline gorwth to
30% only execution regression in botan benchmark, but all the
regressions was within "random noise" factor (even tought the overall
little degradation is pretty sure).  I wil look into this futher, but I
don't think it is strong enough argument to hold the patch that helps
in compile time and code size to all other testcases.

Bootstrapped/regtested i686-linux, I am re-testing after some last
minute changes and intend to commit it afterwards if there are no
complains.

Honza

PS: I've just noticed I forgot to appropriately renumber SSA TODOs to
keep them in sequence, I will care that too.

	* cgraphbuild.c (build_cgraph_edges): Compute frequencies.
	(rebuild_cgraph_edges): Likewise.
	* cgraph.c (cgraph_set_call_stmt): Add new argument frequency.
	(dump_cgraph_node): Dump frequencies.
	(cgraph_clone_edge): Add frequency scales.
	(cgraph_clone_node): Add freuqnecy.
	* cgraph.h (cgraph_edge): Add freuqnecy argument.
	(CGRAPH_FREQ_BASE, CGRAPH_FREQ_MAX): New constants.
	(cgraph_create_edge, cgraph_clone_edge, cgraph_clone_node): Update.
	* tree-pass.h (TODO_rebuild_frequencies): New constant.
	* cgraphunit.c (verify_cgraph_node): Verify frequencies.
	(cgraph_copy_node_for_versioning): Update call of cgraph_clone_edge.
	(save_inline_function_body): Likewise.
	* ipa-inline.c: inluce rtl.h
	(cgraph_clone_inlined_nods): Update call of cgraph_clone_node.
	(cgraph_edge_badness): Use frequencies.
	(cgraph_decide_recursive_inlining): Update clonning.
	(cgraph_decide_inlining_of_small_function): Dump frequency.
	* predict.c (estimate_bb_frequencies): Export.
	* predict.h (estimate_bb_frequencies): Declare.
	* tree-inline.c (copy_bb): Watch overflows.
	(expand_call_inline): Update call of cgraph_create_edge.
	(optimize_inline_calls): Use TODO flags to update frequnecies.
	* passes.h: Include predict.h
	(init_optimization_passes): Move profile ahead.
	(execute_function_todo): Handle TODO_rebuild_frequencies.
Index: cgraphbuild.c
===================================================================
*** cgraphbuild.c	(revision 121767)
--- cgraphbuild.c	(working copy)
*************** build_cgraph_edges (void)
*** 115,120 ****
--- 115,124 ----
    struct pointer_set_t *visited_nodes = pointer_set_create ();
    block_stmt_iterator bsi;
    tree step;
+   int entry_freq = ENTRY_BLOCK_PTR->frequency;
+ 
+   if (!entry_freq)
+     entry_freq = 1;
  
    /* Create the callgraph edges and record the nodes referenced by the function.
       body.  */
*************** build_cgraph_edges (void)
*** 127,134 ****
  
  	if (call && (decl = get_callee_fndecl (call)))
  	  {
  	    cgraph_create_edge (node, cgraph_node (decl), stmt,
! 				bb->count,
  				bb->loop_depth);
  	    walk_tree (&TREE_OPERAND (call, 1),
  		       record_reference, node, visited_nodes);
--- 131,142 ----
  
  	if (call && (decl = get_callee_fndecl (call)))
  	  {
+ 	    int freq = (!bb->frequency && !entry_freq ? CGRAPH_FREQ_BASE
+ 			: bb->frequency * CGRAPH_FREQ_BASE / entry_freq);
+ 	    if (freq > CGRAPH_FREQ_MAX)
+ 	      freq = CGRAPH_FREQ_MAX;
  	    cgraph_create_edge (node, cgraph_node (decl), stmt,
! 				bb->count, freq,
  				bb->loop_depth);
  	    walk_tree (&TREE_OPERAND (call, 1),
  		       record_reference, node, visited_nodes);
*************** rebuild_cgraph_edges (void)
*** 196,201 ****
--- 204,213 ----
    basic_block bb;
    struct cgraph_node *node = cgraph_node (current_function_decl);
    block_stmt_iterator bsi;
+   int entry_freq = ENTRY_BLOCK_PTR->frequency;
+ 
+   if (!entry_freq)
+     entry_freq = 1;
  
    cgraph_node_remove_callees (node);
  
*************** rebuild_cgraph_edges (void)
*** 209,217 ****
  	tree decl;
  
  	if (call && (decl = get_callee_fndecl (call)))
! 	  cgraph_create_edge (node, cgraph_node (decl), stmt,
! 			      bb->count,
! 			      bb->loop_depth);
        }
    initialize_inline_failed (node);
    gcc_assert (!node->global.inlined_to);
--- 221,234 ----
  	tree decl;
  
  	if (call && (decl = get_callee_fndecl (call)))
! 	  {
! 	    int freq = (!bb->frequency && !entry_freq ? CGRAPH_FREQ_BASE
! 			: bb->frequency * CGRAPH_FREQ_BASE / entry_freq);
! 	    if (freq > CGRAPH_FREQ_MAX)
! 	      freq = CGRAPH_FREQ_MAX;
! 	    cgraph_create_edge (node, cgraph_node (decl), stmt,
! 				bb->count, freq, bb->loop_depth);
! 	   }
        }
    initialize_inline_failed (node);
    gcc_assert (!node->global.inlined_to);
Index: cgraph.c
===================================================================
*** cgraph.c	(revision 121767)
--- cgraph.c	(working copy)
*************** cgraph_set_call_stmt (struct cgraph_edge
*** 324,330 ****
  
  struct cgraph_edge *
  cgraph_create_edge (struct cgraph_node *caller, struct cgraph_node *callee,
! 		    tree call_stmt, gcov_type count, int nest)
  {
    struct cgraph_edge *edge = GGC_NEW (struct cgraph_edge);
  #ifdef ENABLE_CHECKING
--- 324,330 ----
  
  struct cgraph_edge *
  cgraph_create_edge (struct cgraph_node *caller, struct cgraph_node *callee,
! 		    tree call_stmt, gcov_type count, int freq, int nest)
  {
    struct cgraph_edge *edge = GGC_NEW (struct cgraph_edge);
  #ifdef ENABLE_CHECKING
*************** cgraph_create_edge (struct cgraph_node *
*** 362,367 ****
--- 362,371 ----
    caller->callees = edge;
    callee->callers = edge;
    edge->count = count;
+   gcc_assert (count >= 0);
+   edge->frequency = freq;
+   gcc_assert (freq >= 0);
+   gcc_assert (freq <= CGRAPH_FREQ_MAX);
    edge->loop_nest = nest;
    if (caller->call_site_hash)
      {
*************** dump_cgraph_node (FILE *f, struct cgraph
*** 713,718 ****
--- 717,725 ----
        if (edge->count)
  	fprintf (f, "("HOST_WIDEST_INT_PRINT_DEC"x) ",
  		 (HOST_WIDEST_INT)edge->count);
+       if (edge->frequency)
+ 	fprintf (f, "(%.2f per call) ",
+ 		 edge->frequency / (double)CGRAPH_FREQ_BASE);
        if (!edge->inline_failed)
  	fprintf(f, "(inlined) ");
      }
*************** dump_cgraph_node (FILE *f, struct cgraph
*** 727,732 ****
--- 734,742 ----
        if (edge->count)
  	fprintf (f, "("HOST_WIDEST_INT_PRINT_DEC"x) ",
  		 (HOST_WIDEST_INT)edge->count);
+       if (edge->frequency)
+ 	fprintf (f, "(%.2f per call) ",
+ 		 edge->frequency / (double)CGRAPH_FREQ_BASE);
        if (edge->loop_nest)
  	fprintf (f, "(nested in %i loops) ", edge->loop_nest);
      }
*************** cgraph_function_possibly_inlined_p (tree
*** 795,807 ****
  /* Create clone of E in the node N represented by CALL_EXPR the callgraph.  */
  struct cgraph_edge *
  cgraph_clone_edge (struct cgraph_edge *e, struct cgraph_node *n,
! 		   tree call_stmt, gcov_type count_scale, int loop_nest,
! 		   bool update_original)
  {
    struct cgraph_edge *new;
  
!   new = cgraph_create_edge (n, e->callee, call_stmt,
! 			    e->count * count_scale / REG_BR_PROB_BASE,
  			    e->loop_nest + loop_nest);
  
    new->inline_failed = e->inline_failed;
--- 805,820 ----
  /* Create clone of E in the node N represented by CALL_EXPR the callgraph.  */
  struct cgraph_edge *
  cgraph_clone_edge (struct cgraph_edge *e, struct cgraph_node *n,
! 		   tree call_stmt, gcov_type count_scale, int freq_scale,
! 		   int loop_nest, bool update_original)
  {
    struct cgraph_edge *new;
+   gcov_type count = e->count * count_scale / REG_BR_PROB_BASE;
+   gcov_type freq = e->frequency * (gcov_type) freq_scale / CGRAPH_FREQ_BASE;
  
!   if (freq > CGRAPH_FREQ_MAX)
!     freq = CGRAPH_FREQ_MAX;
!   new = cgraph_create_edge (n, e->callee, call_stmt, count, freq,
  			    e->loop_nest + loop_nest);
  
    new->inline_failed = e->inline_failed;
*************** cgraph_clone_edge (struct cgraph_edge *e
*** 821,827 ****
     function's profile to reflect the fact that part of execution is handled
     by node.  */
  struct cgraph_node *
! cgraph_clone_node (struct cgraph_node *n, gcov_type count, int loop_nest,
  		   bool update_original)
  {
    struct cgraph_node *new = cgraph_create_node ();
--- 834,840 ----
     function's profile to reflect the fact that part of execution is handled
     by node.  */
  struct cgraph_node *
! cgraph_clone_node (struct cgraph_node *n, gcov_type count, int freq, int loop_nest,
  		   bool update_original)
  {
    struct cgraph_node *new = cgraph_create_node ();
*************** cgraph_clone_node (struct cgraph_node *n
*** 853,859 ****
      }
  
    for (e = n->callees;e; e=e->next_callee)
!     cgraph_clone_edge (e, new, e->call_stmt, count_scale, loop_nest,
  		       update_original);
  
    new->next_clone = n->next_clone;
--- 866,872 ----
      }
  
    for (e = n->callees;e; e=e->next_callee)
!     cgraph_clone_edge (e, new, e->call_stmt, count_scale, freq, loop_nest,
  		       update_original);
  
    new->next_clone = n->next_clone;
Index: cgraph.h
===================================================================
*** cgraph.h	(revision 121767)
--- cgraph.h	(working copy)
*************** struct cgraph_edge GTY((chain_next ("%h.
*** 201,210 ****
--- 201,217 ----
    const char *inline_failed;
    /* Expected number of executions: calculated in profile.c.  */
    gcov_type count;
+   /* Expected frequency of executions within the function. 
+      When set to CGRAPH_FREQ_BASE, the edge is expected to be called once
+      per function call.  The range is 0 to CGRAPH_FREQ_MAX.  */
+   int frequency;
    /* Depth of loop nest, 1 means no loop nest.  */
    int loop_nest;
  };
  
+ #define CGRAPH_FREQ_BASE 1000
+ #define CGRAPH_FREQ_MAX 100000
+ 
  typedef struct cgraph_edge *cgraph_edge_p;
  
  DEF_VEC_P(cgraph_edge_p);
*************** void cgraph_release_function_body (struc
*** 290,296 ****
  void cgraph_node_remove_callees (struct cgraph_node *node);
  struct cgraph_edge *cgraph_create_edge (struct cgraph_node *,
  					struct cgraph_node *,
! 					tree, gcov_type, int);
  struct cgraph_node *cgraph_node (tree);
  struct cgraph_node *cgraph_node_for_asm (tree asmname);
  struct cgraph_edge *cgraph_edge (struct cgraph_node *, tree);
--- 297,303 ----
  void cgraph_node_remove_callees (struct cgraph_node *node);
  struct cgraph_edge *cgraph_create_edge (struct cgraph_node *,
  					struct cgraph_node *,
! 					tree, gcov_type, int, int);
  struct cgraph_node *cgraph_node (tree);
  struct cgraph_node *cgraph_node_for_asm (tree asmname);
  struct cgraph_edge *cgraph_edge (struct cgraph_node *, tree);
*************** struct cgraph_rtl_info *cgraph_rtl_info 
*** 301,308 ****
  const char * cgraph_node_name (struct cgraph_node *);
  struct cgraph_edge * cgraph_clone_edge (struct cgraph_edge *,
  					struct cgraph_node *,
! 					tree, gcov_type, int, bool);
! struct cgraph_node * cgraph_clone_node (struct cgraph_node *, gcov_type,
  					int, bool);
  
  void cgraph_redirect_edge_callee (struct cgraph_edge *, struct cgraph_node *);
--- 308,315 ----
  const char * cgraph_node_name (struct cgraph_node *);
  struct cgraph_edge * cgraph_clone_edge (struct cgraph_edge *,
  					struct cgraph_node *,
! 					tree, gcov_type, int, int, bool);
! struct cgraph_node * cgraph_clone_node (struct cgraph_node *, gcov_type, int,
  					int, bool);
  
  void cgraph_redirect_edge_callee (struct cgraph_edge *, struct cgraph_node *);
Index: tree-pass.h
===================================================================
*** tree-pass.h	(revision 121767)
--- tree-pass.h	(working copy)
*************** struct dump_file_info
*** 167,172 ****
--- 167,173 ----
  #define TODO_verify_loops		(1 << 6)
  #define TODO_dump_cgraph		(1 << 7)
  #define TODO_remove_functions		(1 << 8)
+ #define TODO_rebuild_frequencies	(1 << 16)
  
  /* To-do flags for calls to update_ssa.  */
  
Index: cgraphunit.c
===================================================================
*** cgraphunit.c	(revision 121767)
--- cgraphunit.c	(working copy)
*************** verify_cgraph_node (struct cgraph_node *
*** 519,524 ****
--- 519,534 ----
  	  error ("caller edge count is negative");
  	  error_found = true;
  	}
+       if (e->frequency < 0)
+ 	{
+ 	  error ("caller edge frequency is negative");
+ 	  error_found = true;
+ 	}
+       if (e->frequency > CGRAPH_FREQ_MAX)
+ 	{
+ 	  error ("caller edge frequency is too large");
+ 	  error_found = true;
+ 	}
        if (!e->inline_failed)
  	{
  	  if (node->global.inlined_to
*************** cgraph_copy_node_for_versioning (struct 
*** 1412,1418 ****
        also cloned.  */
     for (e = old_version->callees;e; e=e->next_callee)
       {
!        new_e = cgraph_clone_edge (e, new_version, e->call_stmt, 0, e->loop_nest, true);
         new_e->count = e->count;
       }
     /* Fix recursive calls.
--- 1422,1429 ----
        also cloned.  */
     for (e = old_version->callees;e; e=e->next_callee)
       {
!        new_e = cgraph_clone_edge (e, new_version, e->call_stmt, 0, e->frequency,
! 				  e->loop_nest, true);
         new_e->count = e->count;
       }
     /* Fix recursive calls.
*************** save_inline_function_body (struct cgraph
*** 1511,1517 ****
      {
        struct cgraph_edge *e;
  
!       first_clone = cgraph_clone_node (node, node->count, 0, false);
        first_clone->needed = 0;
        first_clone->reachable = 1;
        /* Recursively clone all bodies.  */
--- 1522,1529 ----
      {
        struct cgraph_edge *e;
  
!       first_clone = cgraph_clone_node (node, node->count, 0, CGRAPH_FREQ_BASE,
! 				       false);
        first_clone->needed = 0;
        first_clone->reachable = 1;
        /* Recursively clone all bodies.  */
Index: ipa-inline.c
===================================================================
*** ipa-inline.c	(revision 121767)
--- ipa-inline.c	(working copy)
*************** Software Foundation, 51 Franklin Street,
*** 139,144 ****
--- 139,145 ----
  #include "coverage.h"
  #include "ggc.h"
  #include "tree-flow.h"
+ #include "rtl.h"
  
  /* Mode incremental inliner operate on:
  
*************** cgraph_clone_inlined_nodes (struct cgrap
*** 215,221 ****
        else
  	{
  	  struct cgraph_node *n;
! 	  n = cgraph_clone_node (e->callee, e->count, e->loop_nest, 
  				 update_original);
  	  cgraph_redirect_edge_callee (e, n);
  	}
--- 216,222 ----
        else
  	{
  	  struct cgraph_node *n;
! 	  n = cgraph_clone_node (e->callee, e->count, e->frequency, e->loop_nest, 
  				 update_original);
  	  cgraph_redirect_edge_callee (e, n);
  	}
*************** cgraph_maybe_hot_edge_p (struct cgraph_e
*** 478,521 ****
     smallest badness are inlined first.  After each inlining is performed
     the costs of all caller edges of nodes affected are recomputed so the
     metrics may accurately depend on values such as number of inlinable callers
!    of the function or function body size.
! 
!    With profiling we use number of executions of each edge to drive the cost.
!    We also should distinguish hot and cold calls where the cold calls are
!    inlined into only when code size is overall improved.  
!    */
  
  static int
  cgraph_edge_badness (struct cgraph_edge *edge)
  {
    if (max_count)
      {
        int growth =
  	cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
        growth -= edge->caller->global.insns;
  
!       /* Always prefer inlining saving code size.  */
!       if (growth <= 0)
! 	return INT_MIN - growth;
!       return ((int)((double)edge->count * INT_MIN / max_count)) / growth;
      }
    else
!   {
!     int nest = MIN (edge->loop_nest, 8);
!     int badness = cgraph_estimate_growth (edge->callee) * 256;
! 
!     /* Decrease badness if call is nested.  */
!     if (badness > 0)    
!       badness >>= nest;
!     else
!       badness <<= nest;
! 
!     /* Make recursive inlining happen always after other inlining is done.  */
!     if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL))
!       return badness + 1;
!     else
!       return badness;
!   }
  }
  
  /* Recompute heap nodes for each of caller edge.  */
--- 479,553 ----
     smallest badness are inlined first.  After each inlining is performed
     the costs of all caller edges of nodes affected are recomputed so the
     metrics may accurately depend on values such as number of inlinable callers
!    of the function or function body size.  */
  
  static int
  cgraph_edge_badness (struct cgraph_edge *edge)
  {
+   int badness;
+   int growth =
+     cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
+ 
+   growth -= edge->caller->global.insns;
+ 
+   /* Always prefer inlining saving code size.  */
+   if (growth <= 0)
+     badness = INT_MIN - growth;
+ 
+   /* When profiling is available, base priorities -(#calls / growth).
+      So we optimize for overall number of "executed" inlined calls.  */
    if (max_count)
+     badness = ((int)((double)edge->count * INT_MIN / max_count)) / growth;
+ 
+   /* When function local profile is available, base priorities on
+      growth / frequency, so we optimize for overall frequency of inlined
+      calls.  This is not too accurate since while the call might be frequent
+      within function, the function itself is infrequent.
+ 
+      Other objective to optimize for is number of different calls inlined.
+      We add the estimated growth after inlining all functions to biass the
+      priorities slightly in this direction (so fewer times called functions
+      of the same size gets priority).  */
+   else if (flag_guess_branch_prob)
      {
+       int div = edge->frequency * 100 / CGRAPH_FREQ_BASE;
        int growth =
  	cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
        growth -= edge->caller->global.insns;
+       badness = growth * 256;
+ 
+       /* Decrease badness if call is nested.  */
+       /* Compress the range so we don't overflow.  */
+       if (div > 256)
+ 	div = 256 + ceil_log2 (div) - 8;
+       if (div < 1)
+ 	div = 1;
+       if (badness > 0)
+ 	badness /= div;
+       badness += cgraph_estimate_growth (edge->callee);
+     }
+   /* When function local profile is not available or it does not give
+      useful information (ie frequency is zero), base the cost on
+      loop nest and overall size growth, so we optimize for overall number
+      of functions fully inlined in program.  */
+   else
+     {
+       int nest = MIN (edge->loop_nest, 8);
+       badness = cgraph_estimate_growth (edge->callee) * 256;
  
!       /* Decrease badness if call is nested.  */
!       if (badness > 0)    
! 	badness >>= nest;
!       else
!         {
! 	  badness <<= nest;
!         }
      }
+   /* Make recursive inlining happen always after other inlining is done.  */
+   if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL))
+     return badness + 1;
    else
!     return badness;
  }
  
  /* Recompute heap nodes for each of caller edge.  */
*************** cgraph_decide_recursive_inlining (struct
*** 651,657 ****
  	     cgraph_node_name (node));
  
    /* We need original clone to copy around.  */
!   master_clone = cgraph_clone_node (node, node->count, 1, false);
    master_clone->needed = true;
    for (e = master_clone->callees; e; e = e->next_callee)
      if (!e->inline_failed)
--- 683,689 ----
  	     cgraph_node_name (node));
  
    /* We need original clone to copy around.  */
!   master_clone = cgraph_clone_node (node, node->count, CGRAPH_FREQ_BASE, 1, false);
    master_clone->needed = true;
    for (e = master_clone->callees; e; e = e->next_callee)
      if (!e->inline_failed)
*************** cgraph_decide_inlining_of_small_function
*** 831,840 ****
  	  fprintf (dump_file, 
  		   " to be inlined into %s\n"
  		   " Estimated growth after inlined into all callees is %+i insns.\n"
! 		   " Estimated badness is %i.\n",
  		   cgraph_node_name (edge->caller),
  		   cgraph_estimate_growth (edge->callee),
! 		   cgraph_edge_badness (edge));
  	  if (edge->count)
  	    fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", edge->count);
  	}
--- 863,873 ----
  	  fprintf (dump_file, 
  		   " to be inlined into %s\n"
  		   " Estimated growth after inlined into all callees is %+i insns.\n"
! 		   " Estimated badness is %i, frequency %.2f.\n",
  		   cgraph_node_name (edge->caller),
  		   cgraph_estimate_growth (edge->callee),
! 		   cgraph_edge_badness (edge),
! 		   edge->frequency / (double)CGRAPH_FREQ_BASE);
  	  if (edge->count)
  	    fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", edge->count);
  	}
Index: predict.c
===================================================================
*** predict.c	(revision 121767)
--- predict.c	(working copy)
*************** static bool last_basic_block_p (basic_bl
*** 79,85 ****
  static void compute_function_frequency (void);
  static void choose_function_section (void);
  static bool can_predict_insn_p (rtx);
- static void estimate_bb_frequencies (void);
  
  /* Information we hold about each branch predictor.
     Filled using information from predict.def.  */
--- 79,84 ----
*************** expensive_function_p (int threshold)
*** 1685,1691 ****
  
  /* Estimate basic blocks frequency by given branch probabilities.  */
  
! static void
  estimate_bb_frequencies (void)
  {
    basic_block bb;
--- 1684,1690 ----
  
  /* Estimate basic blocks frequency by given branch probabilities.  */
  
! void
  estimate_bb_frequencies (void)
  {
    basic_block bb;
Index: predict.h
===================================================================
*** predict.h	(revision 121767)
--- predict.h	(working copy)
*************** enum prediction
*** 38,42 ****
--- 38,43 ----
  
  extern void predict_insn_def (rtx, enum br_predictor, enum prediction);
  extern int counts_to_freqs (void);
+ extern void estimate_bb_frequencies (void);
  
  #endif  /* GCC_PREDICT_H */
Index: tree-inline.c
===================================================================
*** tree-inline.c	(revision 121767)
--- tree-inline.c	(working copy)
*************** copy_bb (copy_body_data *id, basic_block
*** 777,784 ****
    copy_basic_block = create_basic_block (NULL, (void *) 0,
                                           (basic_block) bb->prev_bb->aux);
    copy_basic_block->count = bb->count * count_scale / REG_BR_PROB_BASE;
!   copy_basic_block->frequency = (bb->frequency
  				     * frequency_scale / REG_BR_PROB_BASE);
    copy_bsi = bsi_start (copy_basic_block);
  
    for (bsi = bsi_start (bb);
--- 777,789 ----
    copy_basic_block = create_basic_block (NULL, (void *) 0,
                                           (basic_block) bb->prev_bb->aux);
    copy_basic_block->count = bb->count * count_scale / REG_BR_PROB_BASE;
! 
!   /* We are going to rebuild frequencies from scratch.  These values have just
!      small importance to drive canonicalize_loop_headers.  */
!   copy_basic_block->frequency = ((gcov_type)bb->frequency
  				     * frequency_scale / REG_BR_PROB_BASE);
+   if (copy_basic_block->frequency > BB_FREQ_MAX)
+     copy_basic_block->frequency = BB_FREQ_MAX;
    copy_bsi = bsi_start (copy_basic_block);
  
    for (bsi = bsi_start (bb);
*************** copy_bb (copy_body_data *id, basic_block
*** 839,845 ****
  		      edge = cgraph_edge (id->src_node, orig_stmt);
  		      if (edge)
  			cgraph_clone_edge (edge, id->dst_node, stmt,
! 					   REG_BR_PROB_BASE, 1, true);
  		      break;
  
  		    case CB_CGE_MOVE_CLONES:
--- 844,850 ----
  		      edge = cgraph_edge (id->src_node, orig_stmt);
  		      if (edge)
  			cgraph_clone_edge (edge, id->dst_node, stmt,
! 					   REG_BR_PROB_BASE, 1, edge->frequency, true);
  		      break;
  
  		    case CB_CGE_MOVE_CLONES:
*************** expand_call_inline (basic_block bb, tree
*** 2400,2407 ****
           (incorrect node sharing is most common reason for missing edges.  */
        gcc_assert (dest->needed || !flag_unit_at_a_time);
        cgraph_create_edge (id->dst_node, dest, stmt,
! 			  bb->count, bb->loop_depth)->inline_failed
  	= N_("originally indirect function call not considered for inlining");
        goto egress;
      }
  
--- 2405,2418 ----
           (incorrect node sharing is most common reason for missing edges.  */
        gcc_assert (dest->needed || !flag_unit_at_a_time);
        cgraph_create_edge (id->dst_node, dest, stmt,
! 			  bb->count, CGRAPH_FREQ_BASE,
! 			  bb->loop_depth)->inline_failed
  	= N_("originally indirect function call not considered for inlining");
+       if (dump_file)
+ 	{
+ 	   fprintf (dump_file, "Created new direct edge to %s",
+ 		    cgraph_node_name (dest));
+ 	}
        goto egress;
      }
  
*************** optimize_inline_calls (tree fn)
*** 2808,2817 ****
  	gcc_assert (e->inline_failed);
      }
  #endif
-   /* We need to rescale frequencies again to peak at REG_BR_PROB_BASE
-      as inlining loops might increase the maximum.  */
-   if (ENTRY_BLOCK_PTR->count)
-     counts_to_freqs ();
  
    /* We are not going to maintain the cgraph edges up to date.
       Kill it so it won't confuse us.  */
--- 2819,2824 ----
*************** optimize_inline_calls (tree fn)
*** 2830,2836 ****
       throw and they don't care to proactively update local EH info.  This is
       done later in fixup_cfg pass that also execute the verification.  */
    return (TODO_update_ssa | TODO_cleanup_cfg
! 	  | (gimple_in_ssa_p (cfun) ? TODO_remove_unused_locals : 0));
  }
  
  /* FN is a function that has a complete body, and CLONE is a function whose
--- 2837,2844 ----
       throw and they don't care to proactively update local EH info.  This is
       done later in fixup_cfg pass that also execute the verification.  */
    return (TODO_update_ssa | TODO_cleanup_cfg
! 	  | (gimple_in_ssa_p (cfun) ? TODO_remove_unused_locals : 0)
! 	  | (profile_status != PROFILE_ABSENT ? TODO_rebuild_frequencies : 0));
  }
  
  /* FN is a function that has a complete body, and CLONE is a function whose
Index: passes.c
===================================================================
*** passes.c	(revision 121767)
--- passes.c	(working copy)
*************** Software Foundation, 51 Franklin Street,
*** 83,88 ****
--- 83,89 ----
  #include "tree-flow.h"
  #include "tree-pass.h"
  #include "tree-dump.h"
+ #include "predict.h"
  
  #if defined (DWARF2_UNWIND_INFO) || defined (DWARF2_DEBUGGING_INFO)
  #include "dwarf2out.h"
*************** init_optimization_passes (void)
*** 493,498 ****
--- 494,500 ----
  	  NEXT_PASS (pass_merge_phi);
  	  NEXT_PASS (pass_dce);
  	  NEXT_PASS (pass_tail_recursion);
+           NEXT_PASS (pass_profile);
  	  NEXT_PASS (pass_release_ssa_names);
  	}
        NEXT_PASS (pass_rebuild_cgraph_edges);
*************** init_optimization_passes (void)
*** 540,546 ****
        NEXT_PASS (pass_phiopt);
        NEXT_PASS (pass_may_alias);
        NEXT_PASS (pass_tail_recursion);
-       NEXT_PASS (pass_profile);
        NEXT_PASS (pass_ch);
        NEXT_PASS (pass_stdarg);
        NEXT_PASS (pass_lower_complex);
--- 542,547 ----
*************** execute_function_todo (void *data)
*** 886,891 ****
--- 887,910 ----
        fflush (dump_file);
      }
  
+   if (flags & TODO_rebuild_frequencies)
+     {
+       if (profile_status == PROFILE_GUESSED)
+ 	{
+ 	  loop_optimizer_init (0);
+ 	  add_noreturn_fake_exit_edges ();
+ 	  mark_irreducible_loops ();
+ 	  connect_infinite_loops_to_exit ();
+ 	  estimate_bb_frequencies ();
+ 	  remove_fake_exit_edges ();
+ 	  loop_optimizer_finalize ();
+ 	}
+       else if (profile_status == PROFILE_READ)
+ 	counts_to_freqs ();
+       else
+ 	gcc_unreachable ();
+     }
+ 
  #if defined ENABLE_CHECKING
    if (flags & TODO_verify_ssa)
      verify_ssa (true);



More information about the Gcc-patches mailing list