[google]Add support for sampled profile collection (issue4438083)
Easwaran Raman
eraman@google.com
Fri Apr 29 00:03:00 GMT 2011
This patch from Silvius Rus adds support for sampled edge profile collection to reduce instrumentation run overhead. Bootstraps and no test regressions. Ok for google/main?
2011-04-28 Silvius Rus <silvius.rus@gmail.com>
* doc/invoke.texi: Document -fprofile-generate-sampling option.
* gcov-io.h (__gcov_set_sampling_rate): New declaration.
* profile.c (branch_prob): Add support for sampled profile
collection.
* profile.h (add_sampling_to_edge_counters): New declaration.
* common.opt (fprofile-generate-sampling): New option.
* tree-profile: Include header files; define EDGE_COUNTER_STMT_COUNT.
(instrumentation_to_be_sampled, gcov_sample_counter_decl)
(gcov_sampling_rate_decl): New globals.
(insert_if_then, add_sampling_wrapper, is_instrumentation_to_be_sampled)
(add_sampling_to_edge_counters, gimple_init_instrumentation_sampling):
New functions.
(gimple_init_edge_profiler): Call gimple_init_instrumentation_sampling.
(gimple_gen_edge_profiler): Mark start of instrumentation block.
* libgcov.c (__gcov_sampling_rate): New extern declaration.
(gcov_sampling_rate_initialized, __gcov_sample_counter): New globals.
(gcov_exit): Set sampling rate; minor coding style fixes.
* params.def (PARAM_PROFILE_GENERATE_SAMPLING_RATE): New parameter.
Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi (revision 173136)
+++ gcc/doc/invoke.texi (working copy)
@@ -375,7 +375,7 @@ Objective-C and Objective-C++ Dialects}.
-fpartial-inlining -fpeel-loops -fpredictive-commoning @gol
-fprefetch-loop-arrays @gol
-fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol
--fprofile-generate=@var{path} @gol
+-fprofile-generate=@var{path} -fprofile-generate-sampling @gol
-fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
-freciprocal-math -fregmove -frename-registers -freorder-blocks @gol
-freorder-blocks-and-partition -freorder-functions @gol
@@ -7923,6 +7923,20 @@ The following options are enabled: @code{-fprofile
If @var{path} is specified, GCC will look at the @var{path} to find
the profile feedback data files. See @option{-fprofile-dir}.
+@item -fprofile-generate-sampling
+@opindex -fprofile-generate-sampling
+
+Enable sampling for instrumented binaries. Instead of recording every event,
+record only every N-th event, where N (the sampling rate) can be set either
+at compile time using
+@option{--param profile-generate-sampling-rate=@var{value}}, or
+at execution start time through environment variable @samp{GCOV_SAMPLING_RATE}.
+
+At this time sampling applies only to branch counters. A sampling rate of 100
+decreases instrumentated binary slowdown from up to 20x for heavily threaded
+applications down to around 2x. @option{-fprofile-correction} is always
+needed with sampling.
+
@item -fprofile-use
@itemx -fprofile-use=@var{path}
@opindex fprofile-use
@@ -9138,6 +9152,9 @@ recognize.
If you want to pass an option that takes an argument, you must use
@option{-Xassembler} twice, once for the option and once for the argument.
+@item profile-generate-sampling-rate
+Set the sampling rate with @option{-fprofile-generate-sampling}.
+
@end table
@node Link Options
Index: gcc/gcov-io.h
===================================================================
--- gcc/gcov-io.h (revision 173136)
+++ gcc/gcov-io.h (working copy)
@@ -544,6 +544,9 @@ struct dyn_imp_mod
/* Register a new object file module. */
extern void __gcov_init (struct gcov_info *) ATTRIBUTE_HIDDEN;
+/* Set sampling rate to RATE. */
+extern void __gcov_set_sampling_rate (unsigned int rate);
+
/* Called before fork, to avoid double counting. */
extern void __gcov_flush (void) ATTRIBUTE_HIDDEN;
Index: gcc/profile.c
===================================================================
--- gcc/profile.c (revision 173136)
+++ gcc/profile.c (working copy)
@@ -1210,6 +1210,9 @@ branch_prob (void)
/* Commit changes done by instrumentation. */
gsi_commit_edge_inserts ();
+
+ if (flag_profile_generate_sampling)
+ add_sampling_to_edge_counters ();
}
free_aux_for_edges ();
Index: gcc/profile.h
===================================================================
--- gcc/profile.h (revision 173136)
+++ gcc/profile.h (working copy)
@@ -47,4 +47,10 @@ extern gcov_type sum_edge_counts (VEC (edge, gc) *
extern void init_node_map (void);
extern void del_node_map (void);
+/* Implement sampling to avoid writing to edge counters very often.
+ Many concurrent writes to the same counters, or to counters that share
+ the same cache line leads to up to 30x slowdown on an application running
+ on 8 CPUs. With sampling, the slowdown reduced to 2x. */
+extern void add_sampling_to_edge_counters (void);
+
#endif /* PROFILE_H */
Index: gcc/common.opt
===================================================================
--- gcc/common.opt (revision 173136)
+++ gcc/common.opt (working copy)
@@ -1605,6 +1605,10 @@ fprofile-generate=
Common Joined RejectNegative
Enable common options for generating profile info for profile feedback directed optimizations, and set -fprofile-dir=
+fprofile-generate-sampling
+Common Var(flag_profile_generate_sampling)
+Turn on instrumentation sampling with -fprofile-generate with rate set by --param profile-generate-sampling-rate or environment variable GCOV_SAMPLING_RATE
+
fprofile-use
Common Var(flag_profile_use)
Enable common options for performing profile feedback directed optimizations
Index: gcc/tree-profile.c
===================================================================
--- gcc/tree-profile.c (revision 173136)
+++ gcc/tree-profile.c (working copy)
@@ -31,6 +31,8 @@ along with GCC; see the file COPYING3. If not see
#include "coretypes.h"
#include "tm.h"
#include "flags.h"
+#include "target.h"
+#include "output.h"
#include "regs.h"
#include "function.h"
#include "basic-block.h"
@@ -44,9 +46,14 @@ along with GCC; see the file COPYING3. If not see
#include "value-prof.h"
#include "cgraph.h"
#include "output.h"
+#include "params.h"
+#include "profile.h"
#include "l-ipo.h"
#include "profile.h"
+/* Number of statements inserted for each edge counter increment. */
+#define EDGE_COUNTER_STMT_COUNT 3
+
static GTY(()) tree gcov_type_node;
static GTY(()) tree gcov_type_tmp_var;
static GTY(()) tree tree_interval_profiler_fn;
@@ -136,7 +143,179 @@ init_ic_make_global_vars (void)
}
}
+/* A set of the first statement in each block of statements that need to
+ be applied a sampling wrapper. */
+static htab_t instrumentation_to_be_sampled = NULL;
+
+/* extern __thread gcov_unsigned_t __gcov_sample_counter */
+static tree gcov_sample_counter_decl = NULL_TREE;
+
+/* extern gcov_unsigned_t __gcov_sampling_rate */
+static tree gcov_sampling_rate_decl = NULL_TREE;
+
+/* Insert STMT_IF around given sequence of consecutive statements in the
+ same basic block starting with STMT_START, ending with STMT_END. */
+
+static void
+insert_if_then (gimple stmt_start, gimple stmt_end, gimple stmt_if)
+{
+ gimple_stmt_iterator gsi;
+ basic_block bb_original, bb_before_if, bb_after_if;
+ edge e_if_taken, e_then_join;
+
+ gsi = gsi_for_stmt (stmt_start);
+ gsi_insert_before (&gsi, stmt_if, GSI_SAME_STMT);
+ bb_original = gsi_bb (gsi);
+ e_if_taken = split_block (bb_original, stmt_if);
+ e_if_taken->flags &= ~EDGE_FALLTHRU;
+ e_if_taken->flags |= EDGE_TRUE_VALUE;
+ e_then_join = split_block (e_if_taken->dest, stmt_end);
+ bb_before_if = e_if_taken->src;
+ bb_after_if = e_then_join->dest;
+ make_edge (bb_before_if, bb_after_if, EDGE_FALSE_VALUE);
+}
+
+/* Transform:
+
+ ORIGINAL CODE
+
+ Into:
+
+ __gcov_sample_counter++;
+ if (__gcov_sample_counter >= __gcov_sampling_rate)
+ {
+ __gcov_sample_counter = 0;
+ ORIGINAL CODE
+ }
+
+ The original code block starts with STMT_START, is made of STMT_COUNT
+ consecutive statements in the same basic block. */
+
+static void
+add_sampling_wrapper (gimple stmt_start, int stmt_count)
+{
+ int i;
+ tree zero, one, tmp_var, tmp1, tmp2, tmp3;
+ gimple stmt_block_end;
+ gimple stmt_inc_counter1, stmt_inc_counter2, stmt_inc_counter3;
+ gimple stmt_reset_counter, stmt_assign_rate, stmt_if;
+ gimple_stmt_iterator gsi;
+
+ tmp_var = create_tmp_var (get_gcov_unsigned_t (), "PROF_sample_counter");
+ tmp1 = make_ssa_name (tmp_var, NULL);
+ tmp2 = make_ssa_name (tmp_var, NULL);
+
+ /* Create all the new statements needed. */
+ stmt_inc_counter1 = gimple_build_assign (tmp1, gcov_sample_counter_decl);
+ one = build_int_cst (get_gcov_unsigned_t (), 1);
+ stmt_inc_counter2 = gimple_build_assign_with_ops (
+ PLUS_EXPR, tmp2, tmp1, one);
+ stmt_inc_counter3 = gimple_build_assign (gcov_sample_counter_decl, tmp2);
+ zero = build_int_cst (get_gcov_unsigned_t (), 0);
+ stmt_reset_counter = gimple_build_assign (gcov_sample_counter_decl, zero);
+ tmp_var = create_tmp_var (get_gcov_unsigned_t (), "PROF_sample_counter");
+ tmp3 = make_ssa_name (tmp_var, NULL);
+ stmt_assign_rate = gimple_build_assign (tmp3, gcov_sampling_rate_decl);
+ stmt_if = gimple_build_cond (GE_EXPR, tmp2, tmp3, NULL_TREE, NULL_TREE);
+
+ /* Insert them for now in the original basic block. */
+ gsi = gsi_for_stmt (stmt_start);
+ gsi_insert_before (&gsi, stmt_inc_counter1, GSI_SAME_STMT);
+ gsi_insert_before (&gsi, stmt_inc_counter2, GSI_SAME_STMT);
+ gsi_insert_before (&gsi, stmt_inc_counter3, GSI_SAME_STMT);
+ gsi_insert_before (&gsi, stmt_assign_rate, GSI_SAME_STMT);
+ gsi_insert_before (&gsi, stmt_reset_counter, GSI_SAME_STMT);
+
+ /* Move to last statement. */
+ for (i = 0; i < stmt_count - 1; i++)
+ gsi_next (&gsi);
+
+ stmt_block_end = gsi_stmt (gsi);
+ gcc_assert (stmt_block_end);
+
+ /* Insert IF block. */
+ insert_if_then (stmt_reset_counter, stmt_block_end, stmt_if);
+}
+
+/* Return whether STMT is the beginning of an instrumentation block to be
+ applied sampling. */
+
+static bool
+is_instrumentation_to_be_sampled (gimple stmt)
+{
+ return (htab_find_slot_with_hash (instrumentation_to_be_sampled, stmt,
+ htab_hash_pointer (stmt), NO_INSERT)
+ != NULL);
+}
+
+/* Add sampling wrappers around edge counter code in current function. */
+
void
+add_sampling_to_edge_counters (void)
+{
+ gimple_stmt_iterator gsi;
+ basic_block bb;
+
+ FOR_EACH_BB_REVERSE (bb)
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (is_instrumentation_to_be_sampled (stmt))
+ {
+ add_sampling_wrapper (stmt, EDGE_COUNTER_STMT_COUNT);
+ break;
+ }
+ }
+
+ /* Empty the set of statements performing the edge counter increment. */
+ if (instrumentation_to_be_sampled)
+ htab_empty (instrumentation_to_be_sampled);
+}
+
+static void
+gimple_init_instrumentation_sampling (void)
+{
+ if (!gcov_sampling_rate_decl)
+ {
+ /* Define __gcov_sampling_rate regardless of -fprofile-generate-sampling.
+ Otherwise the extern reference to it from libgcov becomes unmatched.
+ */
+ gcov_sampling_rate_decl = build_decl (
+ UNKNOWN_LOCATION,
+ VAR_DECL,
+ get_identifier ("__gcov_sampling_rate"),
+ get_gcov_unsigned_t ());
+ TREE_PUBLIC (gcov_sampling_rate_decl) = 1;
+ DECL_ARTIFICIAL (gcov_sampling_rate_decl) = 1;
+ DECL_COMDAT_GROUP (gcov_sampling_rate_decl)
+ = DECL_ASSEMBLER_NAME (gcov_sampling_rate_decl);
+ TREE_STATIC (gcov_sampling_rate_decl) = 1;
+ DECL_INITIAL (gcov_sampling_rate_decl) = build_int_cst (
+ get_gcov_unsigned_t (),
+ PARAM_VALUE (PARAM_PROFILE_GENERATE_SAMPLING_RATE));
+ assemble_variable (gcov_sampling_rate_decl, 0, 0, 0);
+ }
+
+ if (flag_profile_generate_sampling && !instrumentation_to_be_sampled)
+ {
+ instrumentation_to_be_sampled = htab_create (100, htab_hash_pointer,
+ htab_eq_pointer, NULL);
+ gcov_sample_counter_decl = build_decl (
+ UNKNOWN_LOCATION,
+ VAR_DECL,
+ get_identifier ("__gcov_sample_counter"),
+ get_gcov_unsigned_t ());
+ TREE_PUBLIC (gcov_sample_counter_decl) = 1;
+ DECL_EXTERNAL (gcov_sample_counter_decl) = 1;
+ DECL_ARTIFICIAL (gcov_sample_counter_decl) = 1;
+ if (targetm.have_tls)
+ DECL_TLS_MODEL (gcov_sample_counter_decl) =
+ decl_default_tls_model (gcov_sample_counter_decl);
+ assemble_variable (gcov_sample_counter_decl, 0, 0, 0);
+ }
+}
+
+void
gimple_init_edge_profiler (void)
{
tree interval_profiler_fn_type;
@@ -148,6 +327,8 @@ gimple_init_edge_profiler (void)
tree dc_profiler_fn_type;
tree average_profiler_fn_type;
+ gimple_init_instrumentation_sampling ();
+
if (!gcov_type_node)
{
char name_buf[32];
@@ -277,6 +458,7 @@ gimple_init_edge_profiler (void)
void
gimple_gen_edge_profiler (int edgeno, edge e)
{
+ void** slot;
tree ref, one;
gimple stmt1, stmt2, stmt3;
@@ -292,6 +474,15 @@ gimple_gen_edge_profiler (int edgeno, edge e)
gimple_assign_lhs (stmt1), one);
gimple_assign_set_lhs (stmt2, make_ssa_name (gcov_type_tmp_var, stmt2));
stmt3 = gimple_build_assign (unshare_expr (ref), gimple_assign_lhs (stmt2));
+
+ if (flag_profile_generate_sampling)
+ {
+ slot = htab_find_slot_with_hash (instrumentation_to_be_sampled, stmt1,
+ htab_hash_pointer (stmt1), INSERT);
+ gcc_assert (!*slot);
+ *slot = stmt1;
+ }
+
gsi_insert_on_edge (e, stmt1);
gsi_insert_on_edge (e, stmt2);
gsi_insert_on_edge (e, stmt3);
Index: gcc/libgcov.c
===================================================================
--- gcc/libgcov.c (revision 173136)
+++ gcc/libgcov.c (working copy)
@@ -83,6 +83,20 @@ void __gcov_merge_delta (gcov_type *counters __at
#ifdef L_gcov
#include "gcov-io.c"
+/* Sampling rate. */
+extern gcov_unsigned_t __gcov_sampling_rate;
+static int gcov_sampling_rate_initialized = 0;
+
+/* Set sampling rate to RATE. */
+
+void __gcov_set_sampling_rate (unsigned int rate)
+{
+ __gcov_sampling_rate = rate;
+}
+
+/* Per thread sample counter. */
+THREAD_PREFIX gcov_unsigned_t __gcov_sample_counter = 0;
+
/* Chain of per-object gcov structures. */
extern struct gcov_info *__gcov_list;
@@ -365,7 +379,7 @@ gcov_exit (void)
{
/* Check if the level of dirs to strip off specified. */
- char *tmp = getenv("GCOV_PREFIX_STRIP");
+ char *tmp = getenv ("GCOV_PREFIX_STRIP");
if (tmp)
{
gcov_prefix_strip = atoi (tmp);
@@ -375,7 +389,7 @@ gcov_exit (void)
}
}
/* Get file name relocation prefix. Non-absolute values are ignored. */
- gcov_prefix = getenv("GCOV_PREFIX");
+ gcov_prefix = getenv ("GCOV_PREFIX");
if (gcov_prefix)
{
prefix_length = strlen(gcov_prefix);
@@ -757,6 +771,17 @@ gcov_exit (void)
void
__gcov_init (struct gcov_info *info)
{
+ if (!gcov_sampling_rate_initialized)
+ {
+ const char* env_value_str = getenv ("GCOV_SAMPLING_RATE");
+ if (env_value_str)
+ {
+ int env_value_int = atoi(env_value_str);
+ if (env_value_int >= 1)
+ __gcov_sampling_rate = env_value_int;
+ }
+ gcov_sampling_rate_initialized = 1;
+ }
if (!info->version)
return;
if (gcov_version (info, info->version, 0))
Index: gcc/params.def
===================================================================
--- gcc/params.def (revision 173136)
+++ gcc/params.def (working copy)
@@ -929,6 +929,11 @@ DEFPARAM (CXX_MAX_NAMESPACES_FOR_DIAGNOSTIC_HELP,
"name lookup fails",
1000, 0, 0)
+DEFPARAM (PARAM_PROFILE_GENERATE_SAMPLING_RATE,
+ "profile-generate-sampling-rate",
+ "sampling rate with -fprofile-generate-sampling",
+ 100, 0, 2000000000)
+
/*
Local variables:
mode:c
--
This patch is available for review at http://codereview.appspot.com/4438083
More information about the Gcc-patches
mailing list