This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix inaccurate estimation of probabilities in the scheduler


Hello,

This patch fixes inaccurate calculation of basic block execution probabilities in the scheduler. The old code assumed the probability of leaving a loop as 10%, and distributed the rest evenly among all other outgoing edges. The patch fixes this by using frequency field of basic block structure, either when branch guessing is working or profile information is available.

The patch also introduces two parameters instead of hard-coded scheduler values. These are maximal latency of the instruction, which is to be considered for interblock code motion, and minimal relative basic block probability. The default values are 3 and 40%, respectively.

The patch also sets these parameters to 2 and 20% for IA-64 platform. These settings were chosen as the best one during SPEC testing done back in March. That time, the change was practically neutral with exception of bzip2, which had 2,7% speedup at -O2. I was going to redo the tests with the mainline, but unfortunately, gcc and galgel do not work on ia64 as of June 29 mainline. I can either submit these values or redo the tests and send the new values as a followup.

Bootstrapped and regtested on i686-linux and ia64-linux. Bootstrap times practically haven't changed.

OK for mainline?

Thanks, Andrey
diff -upr orig/gcc/config/ia64/ia64-protos.h gcc/config/ia64/ia64-protos.h
--- orig/gcc/config/ia64/ia64-protos.h	2005-07-04 18:24:43.000000000 +0400
+++ gcc/config/ia64/ia64-protos.h	2005-07-04 18:27:40.000000000 +0400
@@ -94,6 +94,7 @@ extern int ia64_eh_uses (int);
 extern void emit_safe_across_calls (void);
 extern void ia64_init_builtins (void);
 extern void ia64_override_options (void);
+extern void ia64_optimization_options (int, int);
 extern int ia64_dbx_register_number (int);
 
 extern rtx ia64_return_addr_rtx (HOST_WIDE_INT, rtx);
diff -upr orig/gcc/config/ia64/ia64.c gcc/config/ia64/ia64.c
--- orig/gcc/config/ia64/ia64.c	2005-07-04 18:24:43.000000000 +0400
+++ gcc/config/ia64/ia64.c	2005-07-04 18:27:40.000000000 +0400
@@ -38,6 +38,7 @@ Boston, MA 02110-1301, USA.  */
 #include "recog.h"
 #include "expr.h"
 #include "optabs.h"
+#include "params.h"
 #include "except.h"
 #include "function.h"
 #include "ggc.h"
@@ -4866,6 +4867,28 @@ ia64_asm_output_external (FILE *file, tr
       TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
     }
 }
+
+/* Implement OPTIMIZATION_OPTIONS.  */
+void
+ia64_optimization_options (int level, int size ATTRIBUTE_UNUSED)
+{
+
+  if (level >= 2)
+    {  
+      /* We'd like to have more insns for interblock motions. Thus we allow 
+         bigger latencies for such insns. We are interested in the motions of 
+         floating-point loads, so we use the latency of ldf (6) as a cutoff.  */
+      set_param_value ("max-sched-insn-conflict-delay", 6);
+  
+      /* We lower the limit of bb probability to get more bbs involved in 
+         interblock motions. SPEC testing shows that this is beneficial when 
+         profile information is not available.  */
+      if (!flag_branch_probabilities)
+        {
+          set_param_value ("min-sched-bb-probability", 20);
+        }
+    }
+}
 
 /* Parse the -mfixed-range= option string.  */
 
diff -upr orig/gcc/config/ia64/ia64.h gcc/config/ia64/ia64.h
--- orig/gcc/config/ia64/ia64.h	2005-07-04 18:24:43.000000000 +0400
+++ gcc/config/ia64/ia64.h	2005-07-04 18:27:40.000000000 +0400
@@ -128,7 +128,7 @@ extern enum processor_type ia64_tune;
    command options have been parsed.  Values set in this macro are used as the
    default values for the other command line options.  */
 
-/* #define OPTIMIZATION_OPTIONS(LEVEL,SIZE) */
+#define OPTIMIZATION_OPTIONS(LEVEL,SIZE) ia64_optimization_options ((LEVEL), (SIZE))
 
 /* Driver configuration */
 
diff -upr orig/gcc/doc/invoke.texi gcc/doc/invoke.texi
--- orig/gcc/doc/invoke.texi	2005-07-04 18:24:47.000000000 +0400
+++ gcc/doc/invoke.texi	2005-07-04 18:27:40.000000000 +0400
@@ -5964,6 +5964,14 @@ The maximum size measured as number of R
 in combiner for a pseudo register as last known value of that register.  The default
 is 10000.
 
+@item max-sched-insn-conflict-delay
+The maximum conflict delay of an insn making it available for interblock
+motion. The default value is 3.
+
+@item min-sched-bb-probability
+The minimum basic block probability making its insns available for interblock
+motions, measured in percents. The default value is 40%.  
+
 @item integer-share-limit
 Small integer constants can use a shared data structure, reducing the
 compiler's memory usage and increasing its speed.  This sets the maximum
diff -upr orig/gcc/params.def gcc/params.def
--- orig/gcc/params.def	2005-07-04 18:24:39.000000000 +0400
+++ gcc/params.def	2005-07-04 18:27:40.000000000 +0400
@@ -435,6 +435,16 @@ DEFPARAM(PARAM_MAX_LAST_VALUE_RTL,
 	 "The maximum number of RTL nodes that can be recorded as combiner's last value",
 	 10000, 0, 0)
 
+DEFPARAM(PARAM_MAX_SCHED_INSN_CONFLICT_DELAY,
+         "max-sched-insn-conflict-delay",
+         "The maximum conflict delay for an insn to be considered for interblock motion",
+         3, 1, 10)
+
+DEFPARAM(PARAM_MIN_SCHED_BB_PROBABILITY,
+         "min-sched-bb-probability",
+         "The minimum bb probability making its insns to be considered for interblock motion",
+         40, 10, 100)
+
 /* INTEGER_CST nodes are shared for values [{-1,0} .. N) for
    {signed,unsigned} integral types.  This determines N.
    Experimentation shows 256 to be a good value.  */
diff -upr orig/gcc/sched-rgn.c gcc/sched-rgn.c
--- orig/gcc/sched-rgn.c	2005-07-04 18:24:40.000000000 +0400
+++ gcc/sched-rgn.c	2005-07-04 18:27:40.000000000 +0400
@@ -213,6 +213,19 @@ static sbitmap *dom;
    of bb i relative to the region entry.  */
 static float *prob;
 
+/* Estimation of basic block probabilities in compute_dom_prob_ps() is 
+   not accurate because of historic reasons. When at -O2 or higher, 
+   GCC probability analysis (with -fguess-branch-prob) is used instead. Profile 
+   information is also used, if available.  */  
+
+/* The frequency of region entry block.  */
+static int rgn_entry_frequency;
+
+/* When 1, the frequency information is available for use. It happens when 
+   either -fguess-branch-prob or -fbranch-probabilities is specified.  */
+ 
+static int can_use_bb_freqs;
+    
 /* The probability of bb_src, relative to bb_trg.  Note, that while the
    'prob[bb]' is a float in [0, 1], this macro returns an integer
    in [0, 100].  */
@@ -251,7 +264,7 @@ static void compute_dom_prob_ps (int);
 
 /* Parameters affecting the decision of rank_for_schedule().
    ??? Nope.  But MIN_PROBABILITY is used in compute_trg_info.  */
-#define MIN_PROBABILITY 40
+#define MIN_PROBABILITY PARAM_VALUE (PARAM_MIN_SCHED_BB_PROBABILITY)
 
 /* Speculative scheduling functions.  */
 static int check_live_1 (int, rtx);
@@ -910,6 +923,11 @@ compute_dom_prob_ps (int bb)
     {
       SET_BIT (dom[bb], 0);
       prob[bb] = 1.0;
+     
+      /* Set the frequency of region entry block to use in prob calculation.  */
+      if (can_use_bb_freqs)
+        rgn_entry_frequency = BASIC_BLOCK (BB_TO_BLOCK (bb))->frequency;  
+    
       return;
     }
 
@@ -946,22 +964,31 @@ compute_dom_prob_ps (int bb)
 	  SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge));
 	}
 
-      /* Now nr_rgn_out_edges is the number of region-exit edges from
-         pred, and nr_out_edges will be the number of pred out edges
-         not leaving the region.  */
-      nr_out_edges -= nr_rgn_out_edges;
-      if (nr_rgn_out_edges > 0)
-	prob[bb] += 0.9 * prob[pred_bb] / nr_out_edges;
+      if (can_use_bb_freqs && rgn_entry_frequency)
+	{
+	  prob[bb] = (BASIC_BLOCK (BB_TO_BLOCK (bb))->frequency 
+                      / (float) rgn_entry_frequency);  
+	}
       else
-	prob[bb] += prob[pred_bb] / nr_out_edges;
+	{      
+	  /* Now nr_rgn_out_edges is the number of region-exit edges from
+	     pred, and nr_out_edges will be the number of pred out edges
+	     not leaving the region.  */
+	  nr_out_edges -= nr_rgn_out_edges;
+	  if (nr_rgn_out_edges > 0)
+	    prob[bb] += 0.9 * prob[pred_bb] / nr_out_edges;
+	  else
+	    prob[bb] += prob[pred_bb] / nr_out_edges;
+	}  
     }
 
   SET_BIT (dom[bb], bb);
   sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
 
   if (sched_verbose >= 2)
-    fprintf (sched_dump, ";;  bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
-	     (int) (100.0 * prob[bb]));
+    fprintf (sched_dump, ";;  bb_prob(%d, %d) = %3d (with %s)\n", bb, 
+             BB_TO_BLOCK (bb), (int) (100.0 * prob[bb]), can_use_bb_freqs 
+             ? "frequencies" : "historic evaluation");
 }
 
 /* Functions for target info.  */
@@ -1643,7 +1670,8 @@ init_ready_list (struct ready_list *read
 		&& (!IS_SPECULATIVE_INSN (insn)
 		    || ((recog_memoized (insn) < 0
 			 || min_insn_conflict_delay (curr_state,
-						     insn, insn) <= 3)
+						     insn, insn) 
+                         <= PARAM_VALUE (PARAM_MAX_SCHED_INSN_CONFLICT_DELAY))
 			&& check_live (insn, bb_src)
 			&& is_exception_free (insn, bb_src, target_bb))))
 	      if (INSN_DEP_COUNT (insn) == 0)
@@ -1735,7 +1763,8 @@ new_ready (rtx next)
 	  || CANT_MOVE (next)
 	  || (IS_SPECULATIVE_INSN (next)
 	      && ((recog_memoized (next) >= 0
-		   && min_insn_conflict_delay (curr_state, next, next) > 3)
+		   && min_insn_conflict_delay (curr_state, next, next) 
+                   > PARAM_VALUE (PARAM_MAX_SCHED_INSN_CONFLICT_DELAY))
 		  || !check_live (next, INSN_BB (next))
 		  || !is_exception_free (next, INSN_BB (next), target_bb)))))
     return 0;
@@ -2289,8 +2318,18 @@ schedule_region (int rgn)
       sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
 
       /* Compute probabilities, dominators, split_edges.  */
+      can_use_bb_freqs = flag_branch_probabilities || flag_guess_branch_prob;
       for (bb = 0; bb < current_nr_blocks; bb++)
 	compute_dom_prob_ps (bb);
+
+      /* Output debug info of interblock scheduling parameters.  */
+      if (sched_verbose && flag_schedule_interblock) 
+	{
+	  fprintf (sched_dump, "Maximal conflict delay = %d\n", 
+		   PARAM_VALUE (PARAM_MAX_SCHED_INSN_CONFLICT_DELAY));
+	  fprintf (sched_dump, "Minimal probability cutoff = %d\n",
+		   PARAM_VALUE (PARAM_MIN_SCHED_BB_PROBABILITY));
+	}
     }
 
   /* Now we can schedule all blocks.  */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]