This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[google]Add support for sampled profile collection (issue4438083)


This patch from Silvius Rus  adds support for sampled edge profile collection to reduce instrumentation run overhead. Bootstraps and no test regressions. Ok for google/main?

2011-04-28  Silvius Rus  <silvius.rus@gmail.com>

	* doc/invoke.texi: Document -fprofile-generate-sampling option.
	* gcov-io.h (__gcov_set_sampling_rate): New declaration.
	* profile.c (branch_prob): Add support for sampled profile
	collection.
	* profile.h (add_sampling_to_edge_counters): New declaration.
	* common.opt (fprofile-generate-sampling): New option.
	* tree-profile: Include header files; define EDGE_COUNTER_STMT_COUNT.
	(instrumentation_to_be_sampled, gcov_sample_counter_decl)
	(gcov_sampling_rate_decl): New globals.
	(insert_if_then, add_sampling_wrapper, is_instrumentation_to_be_sampled)
	(add_sampling_to_edge_counters, gimple_init_instrumentation_sampling):
	New functions.
	(gimple_init_edge_profiler): Call gimple_init_instrumentation_sampling.
	(gimple_gen_edge_profiler): Mark start of instrumentation block.
	* libgcov.c (__gcov_sampling_rate): New extern declaration.
	(gcov_sampling_rate_initialized, __gcov_sample_counter): New globals.
	(gcov_exit): Set sampling rate; minor coding style fixes.
	* params.def (PARAM_PROFILE_GENERATE_SAMPLING_RATE): New parameter.

Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi	(revision 173136)
+++ gcc/doc/invoke.texi	(working copy)
@@ -375,7 +375,7 @@ Objective-C and Objective-C++ Dialects}.
 -fpartial-inlining -fpeel-loops -fpredictive-commoning @gol
 -fprefetch-loop-arrays @gol
 -fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol
--fprofile-generate=@var{path} @gol
+-fprofile-generate=@var{path} -fprofile-generate-sampling @gol
 -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
 -freciprocal-math -fregmove -frename-registers -freorder-blocks @gol
 -freorder-blocks-and-partition -freorder-functions @gol
@@ -7923,6 +7923,20 @@ The following options are enabled: @code{-fprofile
 If @var{path} is specified, GCC will look at the @var{path} to find
 the profile feedback data files. See @option{-fprofile-dir}.
 
+@item -fprofile-generate-sampling
+@opindex -fprofile-generate-sampling
+
+Enable sampling for instrumented binaries.  Instead of recording every event,
+record only every N-th event, where N (the sampling rate) can be set either
+at compile time using
+@option{--param profile-generate-sampling-rate=@var{value}}, or
+at execution start time through environment variable @samp{GCOV_SAMPLING_RATE}.
+
+At this time sampling applies only to branch counters.  A sampling rate of 100
+decreases instrumentated binary slowdown from up to 20x for heavily threaded
+applications down to around 2x.  @option{-fprofile-correction} is always
+needed with sampling.
+
 @item -fprofile-use
 @itemx -fprofile-use=@var{path}
 @opindex fprofile-use
@@ -9138,6 +9152,9 @@ recognize.
 If you want to pass an option that takes an argument, you must use
 @option{-Xassembler} twice, once for the option and once for the argument.
 
+@item profile-generate-sampling-rate
+Set the sampling rate with @option{-fprofile-generate-sampling}.
+
 @end table
 
 @node Link Options
Index: gcc/gcov-io.h
===================================================================
--- gcc/gcov-io.h	(revision 173136)
+++ gcc/gcov-io.h	(working copy)
@@ -544,6 +544,9 @@ struct dyn_imp_mod
 /* Register a new object file module.  */
 extern void __gcov_init (struct gcov_info *) ATTRIBUTE_HIDDEN;
 
+/* Set sampling rate to RATE.  */
+extern void __gcov_set_sampling_rate (unsigned int rate);
+
 /* Called before fork, to avoid double counting.  */
 extern void __gcov_flush (void) ATTRIBUTE_HIDDEN;
 
Index: gcc/profile.c
===================================================================
--- gcc/profile.c	(revision 173136)
+++ gcc/profile.c	(working copy)
@@ -1210,6 +1210,9 @@ branch_prob (void)
 
       /* Commit changes done by instrumentation.  */
       gsi_commit_edge_inserts ();
+
+      if (flag_profile_generate_sampling)
+        add_sampling_to_edge_counters ();
     }
 
   free_aux_for_edges ();
Index: gcc/profile.h
===================================================================
--- gcc/profile.h	(revision 173136)
+++ gcc/profile.h	(working copy)
@@ -47,4 +47,10 @@ extern gcov_type sum_edge_counts (VEC (edge, gc) *
 extern void init_node_map (void);
 extern void del_node_map (void);
 
+/* Implement sampling to avoid writing to edge counters very often.
+   Many concurrent writes to the same counters, or to counters that share
+   the same cache line leads to up to 30x slowdown on an application running
+   on 8 CPUs.  With sampling, the slowdown reduced to 2x.  */
+extern void add_sampling_to_edge_counters (void);
+
 #endif /* PROFILE_H */
Index: gcc/common.opt
===================================================================
--- gcc/common.opt	(revision 173136)
+++ gcc/common.opt	(working copy)
@@ -1605,6 +1605,10 @@ fprofile-generate=
 Common Joined RejectNegative
 Enable common options for generating profile info for profile feedback directed optimizations, and set -fprofile-dir=
 
+fprofile-generate-sampling
+Common Var(flag_profile_generate_sampling)
+Turn on instrumentation sampling with -fprofile-generate with rate set by --param profile-generate-sampling-rate or environment variable GCOV_SAMPLING_RATE
+
 fprofile-use
 Common Var(flag_profile_use)
 Enable common options for performing profile feedback directed optimizations
Index: gcc/tree-profile.c
===================================================================
--- gcc/tree-profile.c	(revision 173136)
+++ gcc/tree-profile.c	(working copy)
@@ -31,6 +31,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "flags.h"
+#include "target.h"
+#include "output.h"
 #include "regs.h"
 #include "function.h"
 #include "basic-block.h"
@@ -44,9 +46,14 @@ along with GCC; see the file COPYING3.  If not see
 #include "value-prof.h"
 #include "cgraph.h"
 #include "output.h"
+#include "params.h"
+#include "profile.h"
 #include "l-ipo.h"
 #include "profile.h"
 
+/* Number of statements inserted for each edge counter increment.  */
+#define EDGE_COUNTER_STMT_COUNT 3
+
 static GTY(()) tree gcov_type_node;
 static GTY(()) tree gcov_type_tmp_var;
 static GTY(()) tree tree_interval_profiler_fn;
@@ -136,7 +143,179 @@ init_ic_make_global_vars (void)
     }
 }
 
+/* A set of the first statement in each block of statements that need to
+   be applied a sampling wrapper.  */
+static htab_t instrumentation_to_be_sampled = NULL;
+
+/* extern __thread gcov_unsigned_t __gcov_sample_counter  */
+static tree gcov_sample_counter_decl = NULL_TREE;
+
+/* extern gcov_unsigned_t __gcov_sampling_rate  */
+static tree gcov_sampling_rate_decl = NULL_TREE;
+
+/* Insert STMT_IF around given sequence of consecutive statements in the
+   same basic block starting with STMT_START, ending with STMT_END.  */
+
+static void
+insert_if_then (gimple stmt_start, gimple stmt_end, gimple stmt_if)
+{
+  gimple_stmt_iterator gsi;
+  basic_block bb_original, bb_before_if, bb_after_if;
+  edge e_if_taken, e_then_join;
+
+  gsi = gsi_for_stmt (stmt_start);
+  gsi_insert_before (&gsi, stmt_if, GSI_SAME_STMT);
+  bb_original = gsi_bb (gsi);
+  e_if_taken = split_block (bb_original, stmt_if);
+  e_if_taken->flags &= ~EDGE_FALLTHRU;
+  e_if_taken->flags |= EDGE_TRUE_VALUE;
+  e_then_join = split_block (e_if_taken->dest, stmt_end);
+  bb_before_if = e_if_taken->src;
+  bb_after_if = e_then_join->dest;
+  make_edge (bb_before_if, bb_after_if, EDGE_FALSE_VALUE);
+}
+
+/* Transform:
+
+   ORIGINAL CODE
+
+   Into:
+
+   __gcov_sample_counter++;
+   if (__gcov_sample_counter >= __gcov_sampling_rate)
+     {
+       __gcov_sample_counter = 0;
+       ORIGINAL CODE
+     }
+
+   The original code block starts with STMT_START, is made of STMT_COUNT
+   consecutive statements in the same basic block.  */
+
+static void
+add_sampling_wrapper (gimple stmt_start, int stmt_count)
+{
+  int i;
+  tree zero, one, tmp_var, tmp1, tmp2, tmp3;
+  gimple stmt_block_end;
+  gimple stmt_inc_counter1, stmt_inc_counter2, stmt_inc_counter3;
+  gimple stmt_reset_counter, stmt_assign_rate, stmt_if;
+  gimple_stmt_iterator gsi;
+
+  tmp_var = create_tmp_var (get_gcov_unsigned_t (), "PROF_sample_counter");
+  tmp1 = make_ssa_name (tmp_var, NULL);
+  tmp2 = make_ssa_name (tmp_var, NULL);
+
+  /* Create all the new statements needed.  */
+  stmt_inc_counter1 = gimple_build_assign (tmp1, gcov_sample_counter_decl);
+  one = build_int_cst (get_gcov_unsigned_t (), 1);
+  stmt_inc_counter2 = gimple_build_assign_with_ops (
+      PLUS_EXPR, tmp2, tmp1, one);
+  stmt_inc_counter3 = gimple_build_assign (gcov_sample_counter_decl, tmp2);
+  zero = build_int_cst (get_gcov_unsigned_t (), 0);
+  stmt_reset_counter = gimple_build_assign (gcov_sample_counter_decl, zero);
+  tmp_var = create_tmp_var (get_gcov_unsigned_t (), "PROF_sample_counter");
+  tmp3 = make_ssa_name (tmp_var, NULL);
+  stmt_assign_rate = gimple_build_assign (tmp3, gcov_sampling_rate_decl);
+  stmt_if = gimple_build_cond (GE_EXPR, tmp2, tmp3, NULL_TREE, NULL_TREE);
+
+  /* Insert them for now in the original basic block.  */
+  gsi = gsi_for_stmt (stmt_start);
+  gsi_insert_before (&gsi, stmt_inc_counter1, GSI_SAME_STMT);
+  gsi_insert_before (&gsi, stmt_inc_counter2, GSI_SAME_STMT);
+  gsi_insert_before (&gsi, stmt_inc_counter3, GSI_SAME_STMT);
+  gsi_insert_before (&gsi, stmt_assign_rate, GSI_SAME_STMT);
+  gsi_insert_before (&gsi, stmt_reset_counter, GSI_SAME_STMT);
+
+  /* Move to last statement.  */
+  for (i = 0; i < stmt_count - 1; i++)
+    gsi_next (&gsi);
+
+  stmt_block_end = gsi_stmt (gsi);
+  gcc_assert (stmt_block_end);
+
+  /* Insert IF block.  */
+  insert_if_then (stmt_reset_counter, stmt_block_end, stmt_if);
+}
+
+/* Return whether STMT is the beginning of an instrumentation block to be
+   applied sampling.  */
+
+static bool
+is_instrumentation_to_be_sampled (gimple stmt)
+{
+  return (htab_find_slot_with_hash (instrumentation_to_be_sampled, stmt,
+                                    htab_hash_pointer (stmt), NO_INSERT)
+          != NULL);
+}
+
+/* Add sampling wrappers around edge counter code in current function.  */
+
 void
+add_sampling_to_edge_counters (void)
+{
+  gimple_stmt_iterator gsi;
+  basic_block bb;
+
+  FOR_EACH_BB_REVERSE (bb)
+    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+      {
+        gimple stmt = gsi_stmt (gsi);
+        if (is_instrumentation_to_be_sampled (stmt))
+          {
+            add_sampling_wrapper (stmt, EDGE_COUNTER_STMT_COUNT);
+            break;
+          }
+      }
+
+  /* Empty the set of statements performing the edge counter increment.  */
+  if (instrumentation_to_be_sampled)
+    htab_empty (instrumentation_to_be_sampled);
+}
+
+static void
+gimple_init_instrumentation_sampling (void)
+{
+  if (!gcov_sampling_rate_decl)
+    {
+      /* Define __gcov_sampling_rate regardless of -fprofile-generate-sampling.
+         Otherwise the extern reference to it from libgcov becomes unmatched.
+      */
+      gcov_sampling_rate_decl = build_decl (
+          UNKNOWN_LOCATION,
+          VAR_DECL,
+          get_identifier ("__gcov_sampling_rate"),
+          get_gcov_unsigned_t ());
+      TREE_PUBLIC (gcov_sampling_rate_decl) = 1;
+      DECL_ARTIFICIAL (gcov_sampling_rate_decl) = 1;
+      DECL_COMDAT_GROUP (gcov_sampling_rate_decl)
+          = DECL_ASSEMBLER_NAME (gcov_sampling_rate_decl);
+      TREE_STATIC (gcov_sampling_rate_decl) = 1;
+      DECL_INITIAL (gcov_sampling_rate_decl) = build_int_cst (
+          get_gcov_unsigned_t (),
+          PARAM_VALUE (PARAM_PROFILE_GENERATE_SAMPLING_RATE));
+      assemble_variable (gcov_sampling_rate_decl, 0, 0, 0);
+    }
+
+  if (flag_profile_generate_sampling && !instrumentation_to_be_sampled)
+    {
+      instrumentation_to_be_sampled = htab_create (100, htab_hash_pointer,
+                                                   htab_eq_pointer, NULL);
+      gcov_sample_counter_decl = build_decl (
+          UNKNOWN_LOCATION,
+          VAR_DECL,
+          get_identifier ("__gcov_sample_counter"),
+          get_gcov_unsigned_t ());
+      TREE_PUBLIC (gcov_sample_counter_decl) = 1;
+      DECL_EXTERNAL (gcov_sample_counter_decl) = 1;
+      DECL_ARTIFICIAL (gcov_sample_counter_decl) = 1;
+      if (targetm.have_tls)
+        DECL_TLS_MODEL (gcov_sample_counter_decl) =
+            decl_default_tls_model (gcov_sample_counter_decl);
+      assemble_variable (gcov_sample_counter_decl, 0, 0, 0);
+    }
+}
+
+void
 gimple_init_edge_profiler (void)
 {
   tree interval_profiler_fn_type;
@@ -148,6 +327,8 @@ gimple_init_edge_profiler (void)
   tree dc_profiler_fn_type;
   tree average_profiler_fn_type;
 
+  gimple_init_instrumentation_sampling ();
+
   if (!gcov_type_node)
     {
       char name_buf[32];
@@ -277,6 +458,7 @@ gimple_init_edge_profiler (void)
 void
 gimple_gen_edge_profiler (int edgeno, edge e)
 {
+  void** slot;
   tree ref, one;
   gimple stmt1, stmt2, stmt3;
 
@@ -292,6 +474,15 @@ gimple_gen_edge_profiler (int edgeno, edge e)
 					gimple_assign_lhs (stmt1), one);
   gimple_assign_set_lhs (stmt2, make_ssa_name (gcov_type_tmp_var, stmt2));
   stmt3 = gimple_build_assign (unshare_expr (ref), gimple_assign_lhs (stmt2));
+
+  if (flag_profile_generate_sampling)
+    {
+      slot = htab_find_slot_with_hash (instrumentation_to_be_sampled, stmt1,
+                                       htab_hash_pointer (stmt1), INSERT);
+      gcc_assert (!*slot);
+      *slot = stmt1;
+    }
+
   gsi_insert_on_edge (e, stmt1);
   gsi_insert_on_edge (e, stmt2);
   gsi_insert_on_edge (e, stmt3);
Index: gcc/libgcov.c
===================================================================
--- gcc/libgcov.c	(revision 173136)
+++ gcc/libgcov.c	(working copy)
@@ -83,6 +83,20 @@ void __gcov_merge_delta (gcov_type *counters  __at
 #ifdef L_gcov
 #include "gcov-io.c"
 
+/* Sampling rate.  */
+extern gcov_unsigned_t __gcov_sampling_rate;
+static int gcov_sampling_rate_initialized = 0;
+
+/* Set sampling rate to RATE.  */
+
+void __gcov_set_sampling_rate (unsigned int rate)
+{
+  __gcov_sampling_rate = rate;
+}
+
+/* Per thread sample counter.  */
+THREAD_PREFIX gcov_unsigned_t __gcov_sample_counter = 0;
+
 /* Chain of per-object gcov structures.  */
 extern struct gcov_info *__gcov_list;
 
@@ -365,7 +379,7 @@ gcov_exit (void)
 
   {
     /* Check if the level of dirs to strip off specified. */
-    char *tmp = getenv("GCOV_PREFIX_STRIP");
+    char *tmp = getenv ("GCOV_PREFIX_STRIP");
     if (tmp)
       {
 	gcov_prefix_strip = atoi (tmp);
@@ -375,7 +389,7 @@ gcov_exit (void)
       }
   }
   /* Get file name relocation prefix.  Non-absolute values are ignored. */
-  gcov_prefix = getenv("GCOV_PREFIX");
+  gcov_prefix = getenv ("GCOV_PREFIX");
   if (gcov_prefix)
     {
       prefix_length = strlen(gcov_prefix);
@@ -757,6 +771,17 @@ gcov_exit (void)
 void
 __gcov_init (struct gcov_info *info)
 {
+  if (!gcov_sampling_rate_initialized)
+    {
+      const char* env_value_str = getenv ("GCOV_SAMPLING_RATE");
+      if (env_value_str)
+        {
+          int env_value_int = atoi(env_value_str);
+          if (env_value_int >= 1)
+            __gcov_sampling_rate = env_value_int;
+        }
+      gcov_sampling_rate_initialized = 1;
+    }
   if (!info->version)
     return;
   if (gcov_version (info, info->version, 0))
Index: gcc/params.def
===================================================================
--- gcc/params.def	(revision 173136)
+++ gcc/params.def	(working copy)
@@ -929,6 +929,11 @@ DEFPARAM (CXX_MAX_NAMESPACES_FOR_DIAGNOSTIC_HELP,
 	  "name lookup fails",
 	  1000, 0, 0)
 
+DEFPARAM (PARAM_PROFILE_GENERATE_SAMPLING_RATE,
+	  "profile-generate-sampling-rate",
+	  "sampling rate with -fprofile-generate-sampling",
+	  100, 0, 2000000000)
+
 /*
 Local variables:
 mode:c

--
This patch is available for review at http://codereview.appspot.com/4438083


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]