This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Patch] Handle profile counts truncated to 0 in coldness test


This patch handles the fact that small profile count values sometimes
get truncated down to 0 during updates due to integer arithmetic. This
causes sub-optimal function splitting under
-freorder-blocks-and-partition.

The first part fixes the logic in probably_never_executed that looks
at the bb frequency when the bb count is zero. It now correctly
compares the count computed from the frequency to the number of
profile runs instead of to REG_BR_PROB_BASE. The minimum ratio of
profile counts to training runs required for a block to not be
considered cold is now encoded in a parameter, with the default value
of 4 to match the existing code.

The second part ensures that frequencies are correctly updated after
inlining. The problem is that after inlining the frequencies were
being recomputed directly from the corresponding bb counts in
rebuild_frequencies. If the counts had been truncated to 0, then the
recomputed frequencies would become 0 as well (often leading to
inconsistencies in the frequencies between blocks). Then the above
change to probably_never_executed would not help identify these blocks
as non-cold from the frequencies. The fix was to use the existing
logic used during static profile rebuilding to also
recompute/propagate the frequencies from the branch probabilities in
the profile feedback case. I also renamed a number of estimate_*
routines to compute_* and updated the comments to reflect the fact
that these routines are not doing estimation (from a static profile),
but in fact recomputing/propagating frequencies from the existing
(either guessed or profile-feedback-based) probabilities.

Bootstrapped and tested on x86_64-unknown-linux-gnu. Ok for trunk?
(One more round of regression testing in progress after making some
slight cleanup changes.)

Thanks,
Teresa

2013-10-03  Teresa Johnson  <tejohnson@google.com>

        * params.def (PARAM_MIN_HOT_RUN_RATIO): New parameter.
        * predict.c (probably_never_executed): Compare frequency-based
        count to number of training runs.
        (tree_estimate_probability): Update function call to new name.
        compute_bb_frequencies and pass new parameter.
        (compute_loops_at_level): Renamed.
        (compute_loops): Ditto.
        (compute_bb_frequencies): Renamed, and new parameter to
        force recomputing frequencies.
        (rebuild_frequencies): Recompute bb frequencies from the
        probabilities instead of from counts due to truncation issues.
        * predict.h (compute_bb_frequencies): Update declaration.

Index: params.def
===================================================================
--- params.def  (revision 203152)
+++ params.def  (working copy)
@@ -373,6 +373,12 @@ DEFPARAM(HOT_BB_FREQUENCY_FRACTION,
         "Select fraction of the maximal frequency of executions of
basic block in function given basic block needs to have to be
considered hot",
         1000, 0, 0)

+DEFPARAM(PARAM_MIN_HOT_RUN_RATIO,
+        "min-hot-run-ratio",
+         "The minimum ratio of profile runs to basic block execution count "
+         "for the block to be considered hot",
+        4, 0, 10000)
+
 DEFPARAM (PARAM_ALIGN_THRESHOLD,
          "align-threshold",
          "Select fraction of the maximal frequency of executions of
basic block in function given basic block get alignment",
Index: predict.c
===================================================================
--- predict.c   (revision 203152)
+++ predict.c   (working copy)
@@ -237,17 +237,31 @@ probably_never_executed (struct function *fun,
   gcc_checking_assert (fun);
   if (profile_status_for_function (fun) == PROFILE_READ)
     {
-      if ((count * 4 + profile_info->runs / 2) / profile_info->runs > 0)
+      int min_run_ratio = PARAM_VALUE (PARAM_MIN_HOT_RUN_RATIO);
+      if (RDIV (count * min_run_ratio, profile_info->runs) > 0)
        return false;
       if (!frequency)
        return true;
       if (!ENTRY_BLOCK_PTR->frequency)
        return false;
-      if (ENTRY_BLOCK_PTR->count && ENTRY_BLOCK_PTR->count < REG_BR_PROB_BASE)
+      if (ENTRY_BLOCK_PTR->count)
        {
-         return (RDIV (frequency * ENTRY_BLOCK_PTR->count,
-                       ENTRY_BLOCK_PTR->frequency)
-                 < REG_BR_PROB_BASE / 4);
+          gcov_type scaled_count
+              = frequency * ENTRY_BLOCK_PTR->count * min_run_ratio;
+          gcov_type computed_count;
+          /* Check for overflow, in which case ENTRY_BLOCK_PTR->count should
+             be large enough to do the division first without losing much
+             precision.  */
+          if (scaled_count/frequency/min_run_ratio != ENTRY_BLOCK_PTR->count)
+            {
+              computed_count = RDIV (ENTRY_BLOCK_PTR->count,
+                                     ENTRY_BLOCK_PTR->frequency);
+              computed_count *= frequency * min_run_ratio;
+            }
+          else
+            computed_count = RDIV (scaled_count, ENTRY_BLOCK_PTR->frequency);
+          if (RDIV (computed_count * min_run_ratio, profile_info->runs) > 0)
+            return false;
        }
       return true;
     }
@@ -2388,7 +2402,7 @@ tree_estimate_probability (void)
   pointer_map_destroy (bb_predictions);
   bb_predictions = NULL;

-  estimate_bb_frequencies ();
+  compute_bb_frequencies (false);
   free_dominance_info (CDI_POST_DOMINATORS);
   remove_fake_exit_edges ();
 }
@@ -2551,7 +2565,7 @@ typedef struct edge_info_def
 #define BLOCK_INFO(B)  ((block_info) (B)->aux)
 #define EDGE_INFO(E)   ((edge_info) (E)->aux)

-/* Helper function for estimate_bb_frequencies.
+/* Helper function for compute_bb_frequencies.
    Propagate the frequencies in blocks marked in
    TOVISIT, starting in HEAD.  */

@@ -2691,10 +2705,10 @@ propagate_freq (basic_block head, bitmap tovisit)
     }
 }

-/* Estimate probabilities of loopback edges in loops at same nest level.  */
+/* Compute frequencies in loops at same nest level.  */

 static void
-estimate_loops_at_level (struct loop *first_loop)
+compute_loops_at_level (struct loop *first_loop)
 {
   struct loop *loop;

@@ -2705,7 +2719,7 @@ static void
       unsigned i;
       bitmap tovisit = BITMAP_ALLOC (NULL);

-      estimate_loops_at_level (loop->inner);
+      compute_loops_at_level (loop->inner);

       /* Find current loop back edge and mark it.  */
       e = loop_latch_edge (loop);
@@ -2723,14 +2737,14 @@ static void
 /* Propagates frequencies through structure of loops.  */

 static void
-estimate_loops (void)
+compute_loops (void)
 {
   bitmap tovisit = BITMAP_ALLOC (NULL);
   basic_block bb;

-  /* Start by estimating the frequencies in the loops.  */
+  /* Start by computing the frequencies in the loops.  */
   if (number_of_loops (cfun) > 1)
-    estimate_loops_at_level (current_loops->tree_root->inner);
+    compute_loops_at_level (current_loops->tree_root->inner);

   /* Now propagate the frequencies through all the blocks.  */
   FOR_ALL_BB (bb)
@@ -2800,15 +2814,16 @@ expensive_function_p (int threshold)
   return false;
 }

-/* Estimate basic blocks frequency by given branch probabilities.  */
+/* Compute and propagate basic block frequencies using the given branch
+   probabilities.  */

 void
-estimate_bb_frequencies (void)
+compute_bb_frequencies (bool rebuild)
 {
   basic_block bb;
   sreal freq_max;

-  if (profile_status != PROFILE_READ || !counts_to_freqs ())
+  if (rebuild || profile_status != PROFILE_READ || !counts_to_freqs ())
     {
       static int real_values_initialized = 0;

@@ -2845,9 +2860,9 @@ void
            }
        }

-      /* First compute probabilities locally for each loop from innermost
+      /* First compute frequencies locally for each loop from innermost
          to outermost to examine probabilities for back edges.  */
-      estimate_loops ();
+      compute_loops ();

       memcpy (&freq_max, &real_zero, sizeof (real_zero));
       FOR_EACH_BB (bb)
@@ -3027,18 +3042,16 @@ void
 rebuild_frequencies (void)
 {
   timevar_push (TV_REBUILD_FREQUENCIES);
-  if (profile_status == PROFILE_GUESSED)
+  if (profile_status == PROFILE_GUESSED || profile_status == PROFILE_READ)
     {
       loop_optimizer_init (0);
       add_noreturn_fake_exit_edges ();
       mark_irreducible_loops ();
       connect_infinite_loops_to_exit ();
-      estimate_bb_frequencies ();
+      compute_bb_frequencies (true);
       remove_fake_exit_edges ();
       loop_optimizer_finalize ();
     }
-  else if (profile_status == PROFILE_READ)
-    counts_to_freqs ();
   else
     gcc_unreachable ();
   timevar_pop (TV_REBUILD_FREQUENCIES);
Index: predict.h
===================================================================
--- predict.h   (revision 203152)
+++ predict.h   (working copy)
@@ -37,7 +37,7 @@ enum prediction

 extern void predict_insn_def (rtx, enum br_predictor, enum prediction);
 extern int counts_to_freqs (void);
-extern void estimate_bb_frequencies (void);
+extern void compute_bb_frequencies (bool rebuild);
 extern const char *predictor_name (enum br_predictor);
 extern tree build_predict_expr (enum br_predictor, enum prediction);
 extern void tree_estimate_probability (void);


-- 
Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]