This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PR80218: Call CDCE fails to update the block profile


tree-call-cdce.c was updating the edge probabilities and counts but
it wasn't updating the corresponding block information.  Among other
things, this tricked the register allocator into thinking that the
libm call was relatively hot and that it wasn't worth assigning
call-clobbered registers to values that were live across the call.
With correct frequency information, the RA instead keeps x in the
first argument register and spills it only around the call.

Although the problem has been around for a long time, it became more
acute (and would only trigger for the first function in the testcase)
after r230488.  Until that patch, the code was specific to calls that
had no lhs, but that we still had to keep for their effect on errno.
After the patch we also used the code for calls with an lhs, provided
that the hardware could calculate the lhs directly.

This showed up as a significant performance drop in an internal
benchmark from GCC 5 to GCC 6.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK for trunk
and gcc-6-branch?

Thanks,
Richard


gcc/
	PR tree-optimization/80218
	* tree-call-cdce.c (shrink_wrap_one_built_in_call_with_conds):
	Update block frequencies and counts.

gcc/testsuite/
	PR tree-optimization/80218
	* gcc.dg/pr80218.c: New test.

diff --git a/gcc/testsuite/gcc.dg/pr80218.c b/gcc/testsuite/gcc.dg/pr80218.c
new file mode 100644
index 0000000..8669ee1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr80218.c
@@ -0,0 +1,28 @@
+/* { dg-options "-O2 -fdump-rtl-ira-details-blocks" } */
+/* { dg-require-effective-target c99_runtime } */
+
+#include <math.h>
+
+void foo (float *);
+
+void
+f1 (float *x)
+{
+  x[0] = sqrtf (x[0]);
+}
+
+void
+f2 (float *x)
+{
+  sqrtf (x[0]);
+  foo (x);
+}
+
+void
+f3 (float *x)
+{
+  acosf (x[0]);
+  foo (x);
+}
+
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "ira" } } */
diff --git a/gcc/tree-call-cdce.c b/gcc/tree-call-cdce.c
index 10d2c47..7bd242c 100644
--- a/gcc/tree-call-cdce.c
+++ b/gcc/tree-call-cdce.c
@@ -847,10 +847,12 @@ shrink_wrap_one_built_in_call_with_conds (gcall *bi_call, vec <gimple *> conds,
       gsi_insert_before (&bi_call_bsi, c, GSI_SAME_STMT);
       cond_expr = c;
     }
-  nconds--;
   ci++;
   gcc_assert (cond_expr && gimple_code (cond_expr) == GIMPLE_COND);
 
+  typedef std::pair<edge, edge> edge_pair;
+  auto_vec<edge_pair, 8> edges;
+
   bi_call_in_edge0 = split_block (bi_call_bb, cond_expr);
   bi_call_in_edge0->flags &= ~EDGE_FALLTHRU;
   bi_call_in_edge0->flags |= EDGE_FALSE_VALUE;
@@ -859,17 +861,11 @@ shrink_wrap_one_built_in_call_with_conds (gcall *bi_call, vec <gimple *> conds,
   join_tgt_in_edge_fall_thru = make_edge (guard_bb, join_tgt_bb,
                                           EDGE_TRUE_VALUE);
 
-  bi_call_in_edge0->probability = REG_BR_PROB_BASE * ERR_PROB;
-  bi_call_in_edge0->count =
-      apply_probability (guard_bb->count,
-			 bi_call_in_edge0->probability);
-  join_tgt_in_edge_fall_thru->probability =
-      inverse_probability (bi_call_in_edge0->probability);
-  join_tgt_in_edge_fall_thru->count =
-      guard_bb->count - bi_call_in_edge0->count;
+  edges.reserve (nconds);
+  edges.quick_push (edge_pair (bi_call_in_edge0, join_tgt_in_edge_fall_thru));
 
   /* Code generation for the rest of the conditions  */
-  while (nconds > 0)
+  for (unsigned int i = 1; i < nconds; ++i)
     {
       unsigned ci0;
       edge bi_call_in_edge;
@@ -885,7 +881,6 @@ shrink_wrap_one_built_in_call_with_conds (gcall *bi_call, vec <gimple *> conds,
           gsi_insert_before (&guard_bsi, c, GSI_SAME_STMT);
           cond_expr = c;
         }
-      nconds--;
       ci++;
       gcc_assert (cond_expr && gimple_code (cond_expr) == GIMPLE_COND);
       guard_bb_in_edge = split_block (guard_bb, cond_expr);
@@ -893,14 +888,51 @@ shrink_wrap_one_built_in_call_with_conds (gcall *bi_call, vec <gimple *> conds,
       guard_bb_in_edge->flags |= EDGE_TRUE_VALUE;
 
       bi_call_in_edge = make_edge (guard_bb, bi_call_bb, EDGE_FALSE_VALUE);
+      edges.quick_push (edge_pair (bi_call_in_edge, guard_bb_in_edge));
+    }
+
+  /* Now update the probability and profile information, processing the
+     guards in order of execution.
+
+     There are two approaches we could take here.  On the one hand we
+     could assign a probability of X to the call block and distribute
+     that probability among its incoming edges.  On the other hand we
+     could assign a probability of X to each individual call edge.
+
+     The choice only affects calls that have more than one condition.
+     In those cases, the second approach would give the call block
+     a greater probability than the first.  However, the difference
+     is only small, and our chosen X is a pure guess anyway.
+
+     Here we take the second approach because it's slightly simpler
+     and because it's easy to see that it doesn't lose profile counts.  */
+  bi_call_bb->count = 0;
+  bi_call_bb->frequency = 0;
+  while (!edges.is_empty ())
+    {
+      edge_pair e = edges.pop ();
+      edge call_edge = e.first;
+      edge nocall_edge = e.second;
+      basic_block src_bb = call_edge->src;
+      gcc_assert (src_bb == nocall_edge->src);
+
+      call_edge->probability = REG_BR_PROB_BASE * ERR_PROB;
+      call_edge->count = apply_probability (src_bb->count,
+					    call_edge->probability);
+      nocall_edge->probability = inverse_probability (call_edge->probability);
+      nocall_edge->count = src_bb->count - call_edge->count;
+
+      unsigned int call_frequency = apply_probability (src_bb->frequency,
+						       call_edge->probability);
 
-      bi_call_in_edge->probability = REG_BR_PROB_BASE * ERR_PROB;
-      bi_call_in_edge->count =
-	  apply_probability (guard_bb->count,
-			     bi_call_in_edge->probability);
-      guard_bb_in_edge->probability =
-          inverse_probability (bi_call_in_edge->probability);
-      guard_bb_in_edge->count = guard_bb->count - bi_call_in_edge->count;
+      bi_call_bb->count += call_edge->count;
+      bi_call_bb->frequency += call_frequency;
+
+      if (nocall_edge->dest != join_tgt_bb)
+	{
+	  nocall_edge->dest->count = nocall_edge->count;
+	  nocall_edge->dest->frequency = src_bb->frequency - call_frequency;
+	}
     }
 
   if (dom_info_available_p (CDI_DOMINATORS))


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]