This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Profile housekeeping 6/n (-fprofile-consistency-report)


> > Index: passes.c
> > +/* Hold statistic about profile consistency.  */
> ...
> 
> I don't see why this should live in passes.c, can you please put it in
> a more logical place (profile.c, perhaps)?

Hmm, the problem here is that the code is using passmanager's dumping bits
to order the passes and assign them names.  So moving it elsewhere requires
exporting it that is not nice.  passes.c does similar stuff already, so I decided
to keep it there.

Here is patch I comitted that prints more detailed report.  It reports also changes
in overall unit time/size estimates and when pass did nothing it reports it as
uneffective.  For tramp3d it looks this way:

Pass name                        |mismatch in |mismated out|Overall
                                 |freq count  |freq count  |size       time
cfg                  (after TODO)|            |            |  -1.5474%
ssa                              |            |            |  -2.0148%
inline_param         ------------|            |            |
einline                          |            |            |  -0.4991%
einline              (after TODO)|            |            |  -0.0129%
early_optimizations  ------------|            |            |
copyrename           ------------|            |            |
ccp                              |            |            |  -0.2273%
forwprop                         |            |            |  -0.0688%
ealias               ------------|            |            |
esra                             |            |            |  -0.1892%
fre                              |            |            |  -7.9369%
copyprop             (after TODO)|            |            |  -0.0187%
mergephi             ------------|            |            |
cddce                            |            |            |  -0.1655%
eipa_sra                         |            |            |  -0.0237%
tailr                            |            |            |  -0.5305%
switchconv                       |            |            |  +0.0190%
profile_estimate                 |    +1      |            |
local-pure-const     ------------|            |            |
fnsplit                          |   +20      |            |  +0.2333%  +0.8267%
fnsplit              (after TODO)|   -20      |            |  -1.8146%  -1.3300%
release_ssa          ------------|            |            |
inline_param         ------------|            |            |
inline                           |  +229      |            | +14.5138% -10.6020%
inline               (after TODO)|  -225      |            |  -0.2662% -10.2195%
copyrename                       |            |            |  -2.7026%  -2.9448%
cunrolli             ------------|            |            |
ccp                              |            |            |  -0.0174%  -0.0142%
ccp                  (after TODO)|    +8      |            |  -0.3571%  -0.1213%
forwprop                         |            |            |  -0.1442%  -0.5343%
alias                ------------|            |            |
retslot              ------------|            |            |
phiprop              ------------|            |            |
fre                              |            |            |  -0.2801%  -0.2814%
fre                  (after TODO)|            |            |  -0.2897%  -0.0362%
copyprop                         |            |            |  -0.0264%  -0.0251%
mergephi             ------------|            |            |
vrp                              |   +80      |            |  +0.1233%  -1.0271%
vrp                  (after TODO)|    +3      |            |  -0.8618%  -0.2421%
dce                              |            |            |  -0.0089%  -0.0021%
dce                  (after TODO)|    -7      |            |  -0.0089%  -0.0347%
cdce                 ------------|            |            |
cselim                           |            |            |  -0.0177%  +0.0000%
ifcombine                        |            |            |  +0.0044%  +0.0001%
ifcombine            (after TODO)|            |            |  -0.0089%  +0.0000%
phiopt                           |            |            |  -0.1730%  -0.1203%
tailr                ------------|            |            |
ch                               |    +2      |            |  +2.0535%  +0.0000%
ch                   (after TODO)|            |            |            +0.0002%
cplxlower            ------------|            |            |
sra                              |            |            |  +0.0087%
copyrename           ------------|            |            |
dom                              |   +86      |            |  +1.1802%  -0.1366%
dom                  (after TODO)|    +5      |            |  -0.1894%  -0.3042%
phicprop                         |            |            |  -0.0043%
phicprop             (after TODO)|    -6      |            |            +0.2117%
dse                              |            |            |  -0.0086%  -0.0009%
reassoc                          |            |            |  +0.0302%  +0.0116%
dce                              |            |            |  -0.6640%  -0.1408%
dce                  (after TODO)|    -2      |            |
forwprop                         |            |            |  -0.1129%  -0.1826%
phiopt               ------------|            |            |
objsz                ------------|            |            |
strlen               ------------|            |            |
ccp                  (after TODO)|            |            |  -0.0087%
copyprop             ------------|            |            |
sincos               ------------|            |            |
bswap                ------------|            |            |
crited               ------------|            |            |
pre                              |    +1      |            |  +1.2168%  -1.3703%
pre                  (after TODO)|    -2      |            |  -0.0215%  +0.0764%
sink                             |            |            |            -0.0256%
loop                 ------------|            |            |
loopinit             ------------|            |            |
lim                              |            |            |  +0.0086%  -0.1529%
copyprop             (after TODO)|   -11      |            |            +0.1483%
dceloop                          |            |            |  -0.0172%  -0.0028%
unswitch                         |    +7      |            |  +1.0265%  +0.0404%
unswitch             (after TODO)|   +21      |            |  -0.5569%  -0.7164%
sccp                 ------------|            |            |
ldist                ------------|            |            |
copyprop             ------------|            |            |
ivcanon                          |            |            |  +0.0214%  +0.0661%
ifcvt                ------------|            |            |
vect                 ------------|            |            |
dceloop                          |            |            |  -0.0085%  -0.0009%
pcom                             |            |            |            -0.0002%
cunroll                          |    -2      |            |  +0.8891%  -0.0781%
slp                  ------------|            |            |
ivopts                           |            |            |  +0.1822%  +0.0837%
lim                  ------------|            |            |
loopdone             ------------|            |            |
veclower2            ------------|            |            |
reassoc                          |            |            |  -0.0042%  -0.0169%
vrp                              |   +17      |            |  -0.1015%  -0.2523%
vrp                  (after TODO)|    +1      |            |  -0.2540%  -0.3505%
slsr                             |            |            |  +0.1952%  +0.6254%
dom                              |   +16      |            |  -0.0254%  -0.6641%
dom                  (after TODO)|    +5      |            |  -0.0424%  -0.0243%
phicprop                         |            |            |  -0.0170%
phicprop             (after TODO)|    +2      |            |            +0.0141%
cddce                            |            |            |  -0.1229%  -0.0515%
cddce                (after TODO)|    -1      |            |  -0.0085%
dse                  ------------|            |            |
forwprop                         |            |            |  -0.0382%  -0.0254%
phiopt                           |            |            |  -0.0255%  -0.0462%
fab                  ------------|            |            |
widening_mul         ------------|            |            |
tailc                ------------|            |            |
copyrename           ------------|            |            |
uncprop              ------------|            |            |
local-pure-const     ------------|            |            |
nrv                  ------------|            |            |
optimized            ------------|            |            |
expand                           |  +430      |            |----------
vregs                ------------|            |            |
into_cfglayout                   |            |            |  -4.4596%  -3.2103%
jump                             |    +6      |            |  -0.5852%  -0.5775%
subreg1                          |            |            |  -0.0589%
dfinit               ------------|            |            |
cse1                             |            |            |  -0.0966%  -0.0620%
fwprop1                          |            |            |  -2.5152%  -3.0170%
cprop                            |    -8      |            |  -0.4209%  -0.4256%
rtl pre                          |            |            |  +0.8649%  +0.7079%
hoist                ------------|            |            |
cprop                            |    -7      |            |  -0.9099%  -1.0555%
cse_local                        |            |            |  -0.1787%  -0.2454%
ce1                              |    -1      |            |  +0.0913%  +0.3673%
reginfo              ------------|            |            |
loop2                ------------|            |            |
loop2_init           ------------|            |            |
loop2_invariant                  |            |            |  +0.0523%  +0.1014%
loop2_unswitch       ------------|            |            |
loop2_done           ------------|            |            |
cprop                            |            |            |  -0.3951%  -0.2580%
cse2                             |            |            |  -0.3698%  -0.3355%
dse1                             |            |            |  -0.0025%  -0.0011%
fwprop2                          |            |            |  -0.0012%  -0.0013%
init-regs            ------------|            |            |
ud_dce                           |            |            |  -0.0784%  -0.0126%
combine                          |            |            |  -3.1886%  -3.4658%
ce2                              |            |            |            +0.0001%
regmove                          |            |            |  +0.0025%  +0.0000%
outof_cfglayout                  |            |            |  +3.9667%  +1.8602%
split1                           |            |            |  +0.0828%  +0.0878%
subreg2              ------------|            |            |
mode_sw              ------------|            |            |
asmcons              ------------|            |            |
ira                              |            |            |  +3.0210%  +0.5602%
reload                           |    -4      |            |  -2.3169%  -3.8620%
postreload                       |            |            |  -0.3193%  -0.0548%
gcse2                            |            |            |  -0.0170%  -0.0051%
split2                           |            |            |  +0.2646%  +0.3206%
ree                              |            |            |  +0.0012%  +0.0175%
pro_and_epilogue                 |    +4      |            |  +3.8163% +18.4507%
dse2                 ------------|            |            |
csa                  ------------|            |            |
jump2                            |            |            |  -4.4781%  -0.0612%
peephole2                        |            |            |  +0.0073%  -0.0539%
ce3                              |            |            |  -0.0049%  +0.0092%
cprop_hardreg        ------------|            |            |
rtl_dce                          |            |            |  -0.0903%  -0.0736%
bbro                             |    -7      |            |  +0.4776%  -1.0224%
split4               ------------|            |            |
sched2               ------------|            |            |
stack                ------------|            |            |
alignments           ------------|            |            |
compgotos            ------------|            |            |

Honza

	* doc/invoke.texi (-fprofile-report): Document.
	* common.opt (-fprofile-report): New option.
	* toplev.c (finalize): Call dump_profile_report.
	* toplev.h (profile_report): Declare.
	* passes.c (profile_record): New static var.
	(check_profile_consistency): New function.
	(dump_profile_record): New function.
	(execute_one_ipa_transform_pass): Call check_profile_consistency.
	(execute_one_pass): Likewise.
Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi	(revision 192116)
+++ doc/invoke.texi	(working copy)
@@ -388,7 +388,7 @@ Objective-C and Objective-C++ Dialects}.
 -fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol
 -fomit-frame-pointer -foptimize-register-move -foptimize-sibling-calls @gol
 -fpartial-inlining -fpeel-loops -fpredictive-commoning @gol
--fprefetch-loop-arrays @gol
+-fprefetch-loop-arrays -fprofile-report @gol
 -fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol
 -fprofile-generate=@var{path} @gol
 -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
@@ -5153,6 +5153,11 @@ allocation for the WPA phase only.
 Makes the compiler print some statistics about permanent memory
 allocation before or after interprocedural optimization.
 
+@item -fprofile-report
+@opindex fprofile-report
+Makes the compiler print some statistics about consistency of the
+(estimated) profile and effect of individual passes.
+
 @item -fstack-usage
 @opindex fstack-usage
 Makes the compiler output stack usage information for the program, on a
Index: common.opt
===================================================================
--- common.opt	(revision 192116)
+++ common.opt	(working copy)
@@ -1654,6 +1654,10 @@ fprofile-values
 Common Report Var(flag_profile_values)
 Insert code to profile values of expressions
 
+fprofile-report
+Common Report Var(profile_report)
+Report on consistency of profile
+
 frandom-seed
 Common Var(common_deferred_options) Defer
 
Index: toplev.c
===================================================================
--- toplev.c	(revision 192116)
+++ toplev.c	(working copy)
@@ -1815,6 +1815,9 @@ finalize (bool no_backend)
   if (mem_report)
     dump_memory_report (true);
 
+  if (dump_profile_report)
+    dump_profile_report ();
+
   /* Language-specific end of compilation actions.  */
   lang_hooks.finish ();
 }
Index: toplev.h
===================================================================
--- toplev.h	(revision 192116)
+++ toplev.h	(working copy)
@@ -49,6 +49,7 @@ extern void emit_debug_global_declaratio
 extern void write_global_declarations (void);
 
 extern void dump_memory_report (bool);
+extern void dump_profile_report (void);
 
 extern void target_reinit (void);
 
Index: passes.c
===================================================================
--- passes.c	(revision 192116)
+++ passes.c	(working copy)
@@ -1778,6 +1780,209 @@ execute_function_dump (void *data ATTRIB
     }
 }
 
+/* Make statistic about profile consistency.  */
+
+struct profile_record
+{
+  int num_mismatched_freq_in[2];
+  int num_mismatched_freq_out[2];
+  int num_mismatched_count_in[2];
+  int num_mismatched_count_out[2];
+  bool run;
+  gcov_type time[2];
+  int size[2];
+};
+
+static struct profile_record *profile_record;
+
+static void
+check_profile_consistency (int index, int subpass, bool run)
+{
+  basic_block bb;
+  edge_iterator ei;
+  edge e;
+  int sum;
+  gcov_type lsum;
+
+  if (index == -1)
+    return;
+  if (!profile_record)
+    profile_record = XCNEWVEC (struct profile_record,
+			       passes_by_id_size);
+  gcc_assert (index < passes_by_id_size && index >= 0);
+  gcc_assert (subpass < 2);
+  profile_record[index].run |= run;
+
+  FOR_ALL_BB (bb)
+   {
+      if (bb != EXIT_BLOCK_PTR_FOR_FUNCTION (cfun)
+	  && profile_status != PROFILE_ABSENT)
+	{
+	  sum = 0;
+	  FOR_EACH_EDGE (e, ei, bb->succs)
+	    sum += e->probability;
+	  if (EDGE_COUNT (bb->succs) && abs (sum - REG_BR_PROB_BASE) > 100)
+	    profile_record[index].num_mismatched_freq_out[subpass]++;
+	  lsum = 0;
+	  FOR_EACH_EDGE (e, ei, bb->succs)
+	    lsum += e->count;
+	  if (EDGE_COUNT (bb->succs)
+	      && (lsum - bb->count > 100 || lsum - bb->count < -100))
+	    profile_record[index].num_mismatched_count_out[subpass]++;
+	}
+      if (bb != ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)
+	  && profile_status != PROFILE_ABSENT)
+	{
+	  sum = 0;
+	  FOR_EACH_EDGE (e, ei, bb->preds)
+	    sum += EDGE_FREQUENCY (e);
+	  if (abs (sum - bb->frequency) > 100
+	      || (MAX (sum, bb->frequency) > 10
+		  && abs ((sum - bb->frequency) * 100 / (MAX (sum, bb->frequency) + 1)) > 10))
+	    profile_record[index].num_mismatched_freq_in[subpass]++;
+	  lsum = 0;
+	  FOR_EACH_EDGE (e, ei, bb->preds)
+	    lsum += e->count;
+	  if (lsum - bb->count > 100 || lsum - bb->count < -100)
+	    profile_record[index].num_mismatched_count_in[subpass]++;
+	}
+      if (bb == ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun)
+	  || bb == EXIT_BLOCK_PTR_FOR_FUNCTION (cfun))
+	continue;
+      if ((cfun && (cfun->curr_properties & PROP_trees)))
+	{
+	  gimple_stmt_iterator i;
+
+	  for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
+	    {
+	      profile_record[index].size[subpass]
+		 += estimate_num_insns (gsi_stmt (i), &eni_size_weights);
+	      if (profile_status == PROFILE_READ)
+		profile_record[index].time[subpass]
+		   += estimate_num_insns (gsi_stmt (i),
+					  &eni_time_weights) * bb->count;
+	      else if (profile_status == PROFILE_GUESSED)
+		profile_record[index].time[subpass]
+		   += estimate_num_insns (gsi_stmt (i),
+					  &eni_time_weights) * bb->frequency;
+	    }
+	}
+      else if (cfun && (cfun->curr_properties & PROP_rtl))
+	{
+	  rtx insn;
+	  for (insn = NEXT_INSN (BB_HEAD (bb)); insn && insn != NEXT_INSN (BB_END (bb));
+	       insn = NEXT_INSN (insn))
+	    if (INSN_P (insn))
+	      {
+		profile_record[index].size[subpass]
+		   += insn_rtx_cost (PATTERN (insn), false);
+		if (profile_status == PROFILE_READ)
+		  profile_record[index].time[subpass]
+		     += insn_rtx_cost (PATTERN (insn), true) * bb->count;
+		else if (profile_status == PROFILE_GUESSED)
+		  profile_record[index].time[subpass]
+		     += insn_rtx_cost (PATTERN (insn), true) * bb->frequency;
+	      }
+	}
+   }
+}
+
+/* Output profile consistency.  */
+
+void
+dump_profile_report (void)
+{
+  int i, j;
+  int last_freq_in = 0, last_count_in = 0, last_freq_out = 0, last_count_out = 0;
+  gcov_type last_time, last_size;
+  double rel_time_change, rel_size_change;
+  int last_reported;
+
+  if (!profile_record)
+    return;
+  fprintf (stderr, "\nProfile consistency report:\n\n");
+  fprintf (stderr, "Pass name                        |mismatch in |mismated out|Overall\n");
+  fprintf (stderr, "                                 |freq count  |freq count  |size   time\n");
+	   
+  for (i = 0; i < passes_by_id_size; i++)
+    for (j = 0 ; j < 2; j++)
+      if (profile_record[i].run)
+	{
+	  if (last_time)
+	    rel_time_change = (profile_record[i].time[j]
+			       - (double)last_time) * 100 / (double)last_time;
+	  else
+	    rel_time_change = 0;
+	  if (last_size)
+	    rel_size_change = (profile_record[i].size[j]
+			       - (double)last_size) * 100 / (double)last_size;
+	  else
+	    rel_size_change = 0;
+
+	  if (profile_record[i].num_mismatched_freq_in[j] != last_freq_in
+	      || profile_record[i].num_mismatched_freq_out[j] != last_freq_out
+	      || profile_record[i].num_mismatched_count_in[j] != last_count_in
+	      || profile_record[i].num_mismatched_count_out[j] != last_count_out
+	      || rel_time_change || rel_size_change)
+	    {
+	      last_reported = i;
+              fprintf (stderr, "%-20s %s",
+		       passes_by_id [i]->name,
+		       j ? "(after TODO)" : "            ");
+	      if (profile_record[i].num_mismatched_freq_in[j] != last_freq_in)
+		fprintf (stderr, "| %+5i",
+		         profile_record[i].num_mismatched_freq_in[j]
+			  - last_freq_in);
+	      else
+		fprintf (stderr, "|      ");
+	      if (profile_record[i].num_mismatched_count_in[j] != last_count_in)
+		fprintf (stderr, " %+5i",
+		         profile_record[i].num_mismatched_count_in[j]
+			  - last_count_in);
+	      else
+		fprintf (stderr, "      ");
+	      if (profile_record[i].num_mismatched_freq_out[j] != last_freq_out)
+		fprintf (stderr, "| %+5i",
+		         profile_record[i].num_mismatched_freq_out[j]
+			  - last_freq_out);
+	      else
+		fprintf (stderr, "|      ");
+	      if (profile_record[i].num_mismatched_count_out[j] != last_count_out)
+		fprintf (stderr, " %+5i",
+		         profile_record[i].num_mismatched_count_out[j]
+			  - last_count_out);
+	      else
+		fprintf (stderr, "      ");
+
+	      /* Size/time units change across gimple and RTL.  */
+	      if (i == pass_expand.pass.static_pass_number)
+		fprintf (stderr, "|----------");
+	      else
+		{
+		  if (rel_size_change)
+		    fprintf (stderr, "| %+8.4f%%", rel_size_change);
+		  else
+		    fprintf (stderr, "|          ");
+		  if (rel_time_change)
+		    fprintf (stderr, " %+8.4f%%", rel_time_change);
+		}
+	      fprintf (stderr, "\n");
+	      last_freq_in = profile_record[i].num_mismatched_freq_in[j];
+	      last_freq_out = profile_record[i].num_mismatched_freq_out[j];
+	      last_count_in = profile_record[i].num_mismatched_count_in[j];
+	      last_count_out = profile_record[i].num_mismatched_count_out[j];
+	    }
+	  else if (j && last_reported != i)
+	    {
+	      last_reported = i;
+              fprintf (stderr, "%-20s ------------|            |            |\n",
+		       passes_by_id [i]->name);
+	    }
+	  last_time = profile_record[i].time[j];
+	  last_size = profile_record[i].size[j];
+	}
+}
+
 /* Perform all TODO actions that ought to be done on each function.  */
 
 static void
@@ -2042,9 +2247,14 @@ execute_one_ipa_transform_pass (struct c
   if (pass->tv_id != TV_NONE)
     timevar_pop (pass->tv_id);
 
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg))
+    check_profile_consistency (pass->static_pass_number, 0, true);
+
   /* Run post-pass cleanup and verification.  */
   execute_todo (todo_after);
   verify_interpass_invariants ();
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg))
+    check_profile_consistency (pass->static_pass_number, 1, true);
 
   do_per_function (execute_function_dump, NULL);
   pass_fini_dump_file (pass);
@@ -2144,6 +2354,13 @@ execute_one_pass (struct opt_pass *pass)
 
   if (!gate_status)
     {
+      /* Run so passes selectively disabling themselves on a given function
+	 are not miscounted.  */
+      if (profile_report && cfun && (cfun->curr_properties & PROP_cfg))
+	{
+          check_profile_consistency (pass->static_pass_number, 0, false);
+          check_profile_consistency (pass->static_pass_number, 1, false);
+	}
       current_pass = NULL;
       return false;
     }
@@ -2210,8 +2427,14 @@ execute_one_pass (struct opt_pass *pass)
       clean_graph_dump_file (dump_file_name);
     }
 
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg))
+    check_profile_consistency (pass->static_pass_number, 0, true);
+
   /* Run post-pass cleanup and verification.  */
   execute_todo (todo_after | pass->todo_flags_finish);
+  if (profile_report && cfun && (cfun->curr_properties & PROP_cfg))
+    check_profile_consistency (pass->static_pass_number, 1, true);
+
   verify_interpass_invariants ();
   do_per_function (execute_function_dump, NULL);
   if (pass->type == IPA_PASS)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]