This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Fix profile updating in partial inlining


Hi,
Jakub noticed funny behaviour on the testcase attached where ipa-split splits
correctly the unlikely path leading to abort, but inliner inlines it back
first and then proceeds by inlining foo.

This is because the BB containing abort has frequency of 0 that leads to problems
with division by 0 in inline heruistics that funilly conclude that time spent
in the split part is 0 and inlining it saves time of 11 that makes the function very
good candidate for inliing.

I guess this is quite common behaviour on sanity checking code (that is pretty
common target for splitting)

This patch fixes way entry block frequency is computed and also makes repropagation
or rescaling of frequencies after partial inlining is done.  The second is needed
to get sane profiles in such a split parts of frequency of 0.

Bootstrapped/regtested x86_64-linux, comitted.

Honza

	* gcc.dg/tree-ssa/ipa-split-3.c: New testcase.
	* predict.c (propagate_freq): Clear EXIT_BLOCK_PTR frequency if it is
	unreachable.
	(rebuild_frequencies): New function.
	* predict.h (rebuild_frequencies): Declare.
	* tree-inline.c (copy_cfg_body): Compute properly count & frequency of
	entry block and edge reaching new_entry.
	(tree_function_versioning): When doing partial cloning, rebuild frequencies
	when done.
	* passes.c (execute_function_todo): Use rebild_frequencies.

Index: testsuite/gcc.dg/tree-ssa/ipa-split-3.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ipa-split-3.c	(revision 0)
+++ testsuite/gcc.dg/tree-ssa/ipa-split-3.c	(revision 0)
@@ -0,0 +1,21 @@
+int baz (void);
+static int
+foo (int x)
+{
+  if (__builtin_expect (x <= 0, 0))
+    {
+      __builtin_printf ("foo\n");
+      __builtin_printf ("foo\n");
+      __builtin_printf ("foo\n");
+      __builtin_abort ();
+    }
+  return 6;
+}
+
+int a,b,c;
+
+int
+bar (int x)
+{
+  return foo (a) + foo (b) + foo (c);
+}
Index: predict.c
===================================================================
--- predict.c	(revision 161494)
+++ predict.c	(working copy)
@@ -1855,9 +1855,6 @@ propagate_freq (basic_block head, bitmap
       edge_iterator ei;
       int count = 0;
 
-       /* The outermost "loop" includes the exit block, which we can not
-	  look up via BASIC_BLOCK.  Detect this and use EXIT_BLOCK_PTR
-	  directly.  Do the same for the entry block.  */
       bb = BASIC_BLOCK (i);
 
       FOR_EACH_EDGE (e, ei, bb->preds)
@@ -1872,6 +1869,9 @@ propagate_freq (basic_block head, bitmap
 		     e->src->index, bb->index);
 	}
       BLOCK_INFO (bb)->npredecessors = count;
+      /* When function never returns, we will never process exit block.  */
+      if (!count && bb == EXIT_BLOCK_PTR)
+	bb->count = bb->frequency = 0;
     }
 
   memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
@@ -2282,3 +2282,27 @@ struct gimple_opt_pass pass_strip_predic
   TODO_ggc_collect | TODO_verify_ssa			/* todo_flags_finish */
  }
 };
+
+/* Rebuild function frequencies.  Passes are in general expected to
+   maintain profile by hand, however in some cases this is not possible:
+   for example when inlining several functions with loops freuqencies might run
+   out of scale and thus needs to be recomputed.  */
+
+void
+rebuild_frequencies (void)
+{
+  if (profile_status == PROFILE_GUESSED)
+    {
+      loop_optimizer_init (0);
+      add_noreturn_fake_exit_edges ();
+      mark_irreducible_loops ();
+      connect_infinite_loops_to_exit ();
+      estimate_bb_frequencies ();
+      remove_fake_exit_edges ();
+      loop_optimizer_finalize ();
+    }
+  else if (profile_status == PROFILE_READ)
+    counts_to_freqs ();
+  else
+    gcc_unreachable ();
+}
Index: predict.h
===================================================================
--- predict.h	(revision 161494)
+++ predict.h	(working copy)
@@ -42,5 +42,6 @@ extern const char *predictor_name (enum 
 extern tree build_predict_expr (enum br_predictor, enum prediction);
 extern void tree_estimate_probability (void);
 extern void compute_function_frequency (void);
+extern void rebuild_frequencies (void);
 
 #endif  /* GCC_PREDICT_H */
Index: tree-inline.c
===================================================================
--- tree-inline.c	(revision 161494)
+++ tree-inline.c	(working copy)
@@ -2159,6 +2159,8 @@ copy_cfg_body (copy_body_data * id, gcov
   bool need_debug_cleanup = false;
   gcov_type count_scale;
   int last;
+  int incomming_frequency = 0;
+  gcov_type incomming_count = 0;
 
   if (ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count)
     count_scale = (REG_BR_PROB_BASE * count
@@ -2169,6 +2171,28 @@ copy_cfg_body (copy_body_data * id, gcov
   /* Register specific tree functions.  */
   gimple_register_cfg_hooks ();
 
+  /* If we are inlining just region of the function, make sure to connect new entry
+     to ENTRY_BLOCK_PTR.  Since new entry can be part of loop, we must compute
+     frequency and probability of ENTRY_BLOCK_PTR based on the frequencies and
+     probabilities of edges incomming from nonduplicated region.  */
+  if (new_entry)
+    {
+      edge e;
+      edge_iterator ei;
+
+      FOR_EACH_EDGE (e, ei, new_entry->preds)
+	if (!e->src->aux)
+	  {
+	    incomming_frequency += EDGE_FREQUENCY (e);
+	    incomming_count += e->count;
+	  }
+      incomming_count = incomming_count * count_scale / REG_BR_PROB_BASE;
+      incomming_frequency
+	= incomming_frequency * frequency_scale / REG_BR_PROB_BASE;
+      ENTRY_BLOCK_PTR->count = incomming_count;
+      ENTRY_BLOCK_PTR->frequency = incomming_frequency;
+    }
+
   /* Must have a CFG here at this point.  */
   gcc_assert (ENTRY_BLOCK_PTR_FOR_FUNCTION
 	      (DECL_STRUCT_FUNCTION (callee_fndecl)));
@@ -2204,10 +2228,9 @@ copy_cfg_body (copy_body_data * id, gcov
 
   if (new_entry)
     {
-      edge e;
-      e = make_edge (entry_block_map, (basic_block)new_entry->aux, EDGE_FALLTHRU);
+      edge e = make_edge (entry_block_map, (basic_block)new_entry->aux, EDGE_FALLTHRU);
       e->probability = REG_BR_PROB_BASE;
-      e->count = entry_block_map->count;
+      e->count = incomming_count;
     }
 
   if (gimple_in_ssa_p (cfun))
@@ -5180,6 +5203,23 @@ tree_function_versioning (tree old_decl,
   if (id.dst_node->analyzed)
     cgraph_rebuild_references ();
   update_ssa (TODO_update_ssa);
+
+  /* After partial cloning we need to rescale frequencies, so they are
+     within proper range in the cloned function.  */
+  if (new_entry)
+    {
+      struct cgraph_edge *e;
+      rebuild_frequencies ();
+
+      new_version_node->count = ENTRY_BLOCK_PTR->count;
+      for (e = new_version_node->callees; e; e = e->next_callee)
+	{
+	  basic_block bb = gimple_bb (e->call_stmt);
+	  e->frequency = compute_call_stmt_bb_frequency (current_function_decl, bb);
+	  e->count = bb->count;
+	}
+    }
+
   free_dominance_info (CDI_DOMINATORS);
   free_dominance_info (CDI_POST_DOMINATORS);
 
Index: passes.c
===================================================================
--- passes.c	(revision 161494)
+++ passes.c	(working copy)
@@ -1243,22 +1243,7 @@ execute_function_todo (void *data)
     }
 
   if (flags & TODO_rebuild_frequencies)
-    {
-      if (profile_status == PROFILE_GUESSED)
-	{
-	  loop_optimizer_init (0);
-	  add_noreturn_fake_exit_edges ();
-	  mark_irreducible_loops ();
-	  connect_infinite_loops_to_exit ();
-	  estimate_bb_frequencies ();
-	  remove_fake_exit_edges ();
-	  loop_optimizer_finalize ();
-	}
-      else if (profile_status == PROFILE_READ)
-	counts_to_freqs ();
-      else
-	gcc_unreachable ();
-    }
+    rebuild_frequencies ();
 
 #if defined ENABLE_CHECKING
   if (flags & TODO_verify_ssa


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]