This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Fix profile updating bug in ipa-split
- From: Jan Hubicka <hubicka at ucw dot cz>
- To: gcc-patches at gcc dot gnu dot org
- Date: Mon, 13 Nov 2017 18:58:59 +0100
- Subject: Fix profile updating bug in ipa-split
- Authentication-results: sourceware.org; auth=none
Hi,
this patch fixes bug I noticed while looking for profile mismatches in tramp3d.
When split part starts with the loop, the frequency of call to split function
is not the same as frequency of this block. This also affects the heuristics
which should not count the loopback edges.
Patch aslo gets rid of frequencies and uses counts consistently.
Bootstrapped/regtested x86_64-linux, will commit it shortly.
* ipa-split.c (struct split_point): Add count.
(consider_split): Do not compute incoming frequency; compute incoming
count and store it to split_point.
(split_function): Set count of the call to split part correctly.
* testsuite/gcc.dg/tree-ssa/fnsplit-2.c: New testcase.
Index: ipa-split.c
===================================================================
--- ipa-split.c (revision 254700)
+++ ipa-split.c (working copy)
@@ -129,6 +129,10 @@ struct split_point
/* Basic block where we split (that will become entry point of new function. */
basic_block entry_bb;
+ /* Count for entering the split part.
+ This is not count of the entry_bb because it may be in loop. */
+ profile_count count;
+
/* Basic blocks we are splitting away. */
bitmap split_bbs;
@@ -426,7 +430,6 @@ consider_split (struct split_point *curr
edge_iterator ei;
gphi_iterator bsi;
unsigned int i;
- int incoming_freq = 0;
tree retval;
tree retbnd;
bool back_edge = false;
@@ -434,18 +437,21 @@ consider_split (struct split_point *curr
if (dump_file && (dump_flags & TDF_DETAILS))
dump_split_point (dump_file, current);
+ current->count = profile_count::zero ();
FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
{
if (e->flags & EDGE_DFS_BACK)
back_edge = true;
if (!bitmap_bit_p (current->split_bbs, e->src->index))
- incoming_freq += EDGE_FREQUENCY (e);
+ current->count += e->count ();
}
- /* Do not split when we would end up calling function anyway. */
- if (incoming_freq
- >= (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun)
- * PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100))
+ /* Do not split when we would end up calling function anyway.
+ Compares are three state, use !(...<...) to also give up when outcome
+ is unknown. */
+ if (!(current->count
+ < (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.apply_scale
+ (PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY), 100))))
{
/* When profile is guessed, we can not expect it to give us
realistic estimate on likelyness of function taking the
@@ -454,14 +460,17 @@ consider_split (struct split_point *curr
is likely noticeable win. */
if (back_edge
&& profile_status_for_fn (cfun) != PROFILE_READ
- && incoming_freq
- < ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun))
+ && current->count
+ < ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
{
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file,
- " Split before loop, accepting despite low frequencies %i %i.\n",
- incoming_freq,
- ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun));
+ {
+ fprintf (dump_file,
+ " Split before loop, accepting despite low counts");
+ current->count.dump (dump_file);
+ fprintf (dump_file, " ");
+ ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.dump (dump_file);
+ }
}
else
{
@@ -711,14 +720,13 @@ consider_split (struct split_point *curr
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Accepted!\n");
- /* At the moment chose split point with lowest frequency and that leaves
+ /* At the moment chose split point with lowest count and that leaves
out smallest size of header.
In future we might re-consider this heuristics. */
if (!best_split_point.split_bbs
- || best_split_point.entry_bb->count.to_frequency (cfun)
- > current->entry_bb->count.to_frequency (cfun)
- || (best_split_point.entry_bb->count.to_frequency (cfun)
- == current->entry_bb->count.to_frequency (cfun)
+ || best_split_point.count
+ > current->count
+ || (best_split_point.count == current->count
&& best_split_point.split_size < current->split_size))
{
@@ -1446,6 +1454,7 @@ split_function (basic_block return_bb, s
}
else
break;
+ call_bb->count = split_point->count;
e = split_block (split_point->entry_bb, last_stmt);
remove_edge (e);
Index: testsuite/gcc.dg/tree-ssa/fnsplit-2.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/fnsplit-2.c (nonexistent)
+++ testsuite/gcc.dg/tree-ssa/fnsplit-2.c (working copy)
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fnsplit-blocks-details" } */
+int b;
+void test (void);
+void
+split_me (int *a)
+{
+ if (__builtin_expect (a==0, 0))
+ do
+ {
+ test();
+ test();
+ test();
+ test();
+ test();
+ }
+ while (b);
+ else
+ q();
+}
+
+int
+main(void)
+{
+ int i;
+ for (i = 0; i < 1000; i++)
+ split_me(&i);
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Splitting function at:" 1 "fnsplit"} } */
+/* { dg-final { scan-tree-dump-times "Invalid sum" 0 "fnsplit"} } */