This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [google]Add support for sampled profile collection (issue4438083)


Committed the attached patch to google/main. Will send a patch for trunk soon.


On Thu, Apr 28, 2011 at 10:03 PM, Xinliang David Li <davidxl@google.com> wrote:
> Please add regression test cases for the feature. Address the comments
> when available. Ok for google/main.
>
> Thanks,
>
> David
>
> On Thu, Apr 28, 2011 at 4:42 PM, Easwaran Raman <eraman@google.com> wrote:
>> This patch from Silvius Rus ?adds support for sampled edge profile collection to reduce instrumentation run overhead. Bootstraps and no test regressions. Ok for google/main?
>>
>> 2011-04-28 ?Silvius Rus ?<silvius.rus@gmail.com>
>>
>> ? ? ? ?* doc/invoke.texi: Document -fprofile-generate-sampling option.
>> ? ? ? ?* gcov-io.h (__gcov_set_sampling_rate): New declaration.
>> ? ? ? ?* profile.c (branch_prob): Add support for sampled profile
>> ? ? ? ?collection.
>> ? ? ? ?* profile.h (add_sampling_to_edge_counters): New declaration.
>> ? ? ? ?* common.opt (fprofile-generate-sampling): New option.
>> ? ? ? ?* tree-profile: Include header files; define EDGE_COUNTER_STMT_COUNT.
>> ? ? ? ?(instrumentation_to_be_sampled, gcov_sample_counter_decl)
>> ? ? ? ?(gcov_sampling_rate_decl): New globals.
>> ? ? ? ?(insert_if_then, add_sampling_wrapper, is_instrumentation_to_be_sampled)
>> ? ? ? ?(add_sampling_to_edge_counters, gimple_init_instrumentation_sampling):
>> ? ? ? ?New functions.
>> ? ? ? ?(gimple_init_edge_profiler): Call gimple_init_instrumentation_sampling.
>> ? ? ? ?(gimple_gen_edge_profiler): Mark start of instrumentation block.
>> ? ? ? ?* libgcov.c (__gcov_sampling_rate): New extern declaration.
>> ? ? ? ?(gcov_sampling_rate_initialized, __gcov_sample_counter): New globals.
>> ? ? ? ?(gcov_exit): Set sampling rate; minor coding style fixes.
>> ? ? ? ?* params.def (PARAM_PROFILE_GENERATE_SAMPLING_RATE): New parameter.
>>
>> Index: gcc/doc/invoke.texi
>> ===================================================================
>> --- gcc/doc/invoke.texi (revision 173136)
>> +++ gcc/doc/invoke.texi (working copy)
>> @@ -375,7 +375,7 @@ Objective-C and Objective-C++ Dialects}.
>> ?-fpartial-inlining -fpeel-loops -fpredictive-commoning @gol
>> ?-fprefetch-loop-arrays @gol
>> ?-fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol
>> --fprofile-generate=@var{path} @gol
>> +-fprofile-generate=@var{path} -fprofile-generate-sampling @gol
>> ?-fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
>> ?-freciprocal-math -fregmove -frename-registers -freorder-blocks @gol
>> ?-freorder-blocks-and-partition -freorder-functions @gol
>> @@ -7923,6 +7923,20 @@ The following options are enabled: @code{-fprofile
>> ?If @var{path} is specified, GCC will look at the @var{path} to find
>> ?the profile feedback data files. See @option{-fprofile-dir}.
>>
>> +@item -fprofile-generate-sampling
>> +@opindex -fprofile-generate-sampling
>> +
>> +Enable sampling for instrumented binaries. ?Instead of recording every event,
>> +record only every N-th event, where N (the sampling rate) can be set either
>> +at compile time using
>> +@option{--param profile-generate-sampling-rate=@var{value}}, or
>> +at execution start time through environment variable @samp{GCOV_SAMPLING_RATE}.
>> +
>> +At this time sampling applies only to branch counters. ?A sampling rate of 100
>> +decreases instrumentated binary slowdown from up to 20x for heavily threaded
>> +applications down to around 2x. ?@option{-fprofile-correction} is always
>> +needed with sampling.
>> +
>> ?@item -fprofile-use
>> ?@itemx -fprofile-use=@var{path}
>> ?@opindex fprofile-use
>> @@ -9138,6 +9152,9 @@ recognize.
>> ?If you want to pass an option that takes an argument, you must use
>> ?@option{-Xassembler} twice, once for the option and once for the argument.
>>
>> +@item profile-generate-sampling-rate
>> +Set the sampling rate with @option{-fprofile-generate-sampling}.
>> +
>> ?@end table
>>
>> ?@node Link Options
>> Index: gcc/gcov-io.h
>> ===================================================================
>> --- gcc/gcov-io.h ? ? ? (revision 173136)
>> +++ gcc/gcov-io.h ? ? ? (working copy)
>> @@ -544,6 +544,9 @@ struct dyn_imp_mod
>> ?/* Register a new object file module. ?*/
>> ?extern void __gcov_init (struct gcov_info *) ATTRIBUTE_HIDDEN;
>>
>> +/* Set sampling rate to RATE. ?*/
>> +extern void __gcov_set_sampling_rate (unsigned int rate);
>> +
>> ?/* Called before fork, to avoid double counting. ?*/
>> ?extern void __gcov_flush (void) ATTRIBUTE_HIDDEN;
>>
>> Index: gcc/profile.c
>> ===================================================================
>> --- gcc/profile.c ? ? ? (revision 173136)
>> +++ gcc/profile.c ? ? ? (working copy)
>> @@ -1210,6 +1210,9 @@ branch_prob (void)
>>
>> ? ? ? /* Commit changes done by instrumentation. ?*/
>> ? ? ? gsi_commit_edge_inserts ();
>> +
>> + ? ? ?if (flag_profile_generate_sampling)
>> + ? ? ? ?add_sampling_to_edge_counters ();
>> ? ? }
>>
>> ? free_aux_for_edges ();
>> Index: gcc/profile.h
>> ===================================================================
>> --- gcc/profile.h ? ? ? (revision 173136)
>> +++ gcc/profile.h ? ? ? (working copy)
>> @@ -47,4 +47,10 @@ extern gcov_type sum_edge_counts (VEC (edge, gc) *
>> ?extern void init_node_map (void);
>> ?extern void del_node_map (void);
>>
>> +/* Implement sampling to avoid writing to edge counters very often.
>> + ? Many concurrent writes to the same counters, or to counters that share
>> + ? the same cache line leads to up to 30x slowdown on an application running
>> + ? on 8 CPUs. ?With sampling, the slowdown reduced to 2x. ?*/
>> +extern void add_sampling_to_edge_counters (void);
>> +
>> ?#endif /* PROFILE_H */
>> Index: gcc/common.opt
>> ===================================================================
>> --- gcc/common.opt ? ? ?(revision 173136)
>> +++ gcc/common.opt ? ? ?(working copy)
>> @@ -1605,6 +1605,10 @@ fprofile-generate=
>> ?Common Joined RejectNegative
>> ?Enable common options for generating profile info for profile feedback directed optimizations, and set -fprofile-dir=
>>
>> +fprofile-generate-sampling
>> +Common Var(flag_profile_generate_sampling)
>> +Turn on instrumentation sampling with -fprofile-generate with rate set by --param profile-generate-sampling-rate or environment variable GCOV_SAMPLING_RATE
>> +
>> ?fprofile-use
>> ?Common Var(flag_profile_use)
>> ?Enable common options for performing profile feedback directed optimizations
>> Index: gcc/tree-profile.c
>> ===================================================================
>> --- gcc/tree-profile.c ?(revision 173136)
>> +++ gcc/tree-profile.c ?(working copy)
>> @@ -31,6 +31,8 @@ along with GCC; see the file COPYING3. ?If not see
>> ?#include "coretypes.h"
>> ?#include "tm.h"
>> ?#include "flags.h"
>> +#include "target.h"
>> +#include "output.h"
>> ?#include "regs.h"
>> ?#include "function.h"
>> ?#include "basic-block.h"
>> @@ -44,9 +46,14 @@ along with GCC; see the file COPYING3. ?If not see
>> ?#include "value-prof.h"
>> ?#include "cgraph.h"
>> ?#include "output.h"
>> +#include "params.h"
>> +#include "profile.h"
>> ?#include "l-ipo.h"
>> ?#include "profile.h"
>>
>> +/* Number of statements inserted for each edge counter increment. ?*/
>> +#define EDGE_COUNTER_STMT_COUNT 3
>> +
>> ?static GTY(()) tree gcov_type_node;
>> ?static GTY(()) tree gcov_type_tmp_var;
>> ?static GTY(()) tree tree_interval_profiler_fn;
>> @@ -136,7 +143,179 @@ init_ic_make_global_vars (void)
>> ? ? }
>> ?}
>>
>> +/* A set of the first statement in each block of statements that need to
>> + ? be applied a sampling wrapper. ?*/
>> +static htab_t instrumentation_to_be_sampled = NULL;
>> +
>> +/* extern __thread gcov_unsigned_t __gcov_sample_counter ?*/
>> +static tree gcov_sample_counter_decl = NULL_TREE;
>> +
>> +/* extern gcov_unsigned_t __gcov_sampling_rate ?*/
>> +static tree gcov_sampling_rate_decl = NULL_TREE;
>> +
>> +/* Insert STMT_IF around given sequence of consecutive statements in the
>> + ? same basic block starting with STMT_START, ending with STMT_END. ?*/
>> +
>> +static void
>> +insert_if_then (gimple stmt_start, gimple stmt_end, gimple stmt_if)
>> +{
>> + ?gimple_stmt_iterator gsi;
>> + ?basic_block bb_original, bb_before_if, bb_after_if;
>> + ?edge e_if_taken, e_then_join;
>> +
>> + ?gsi = gsi_for_stmt (stmt_start);
>> + ?gsi_insert_before (&gsi, stmt_if, GSI_SAME_STMT);
>> + ?bb_original = gsi_bb (gsi);
>> + ?e_if_taken = split_block (bb_original, stmt_if);
>> + ?e_if_taken->flags &= ~EDGE_FALLTHRU;
>> + ?e_if_taken->flags |= EDGE_TRUE_VALUE;
>> + ?e_then_join = split_block (e_if_taken->dest, stmt_end);
>> + ?bb_before_if = e_if_taken->src;
>> + ?bb_after_if = e_then_join->dest;
>> + ?make_edge (bb_before_if, bb_after_if, EDGE_FALSE_VALUE);
>> +}
>> +
>> +/* Transform:
>> +
>> + ? ORIGINAL CODE
>> +
>> + ? Into:
>> +
>> + ? __gcov_sample_counter++;
>> + ? if (__gcov_sample_counter >= __gcov_sampling_rate)
>> + ? ? {
>> + ? ? ? __gcov_sample_counter = 0;
>> + ? ? ? ORIGINAL CODE
>> + ? ? }
>> +
>> + ? The original code block starts with STMT_START, is made of STMT_COUNT
>> + ? consecutive statements in the same basic block. ?*/
>> +
>> +static void
>> +add_sampling_wrapper (gimple stmt_start, int stmt_count)
>> +{
>> + ?int i;
>> + ?tree zero, one, tmp_var, tmp1, tmp2, tmp3;
>> + ?gimple stmt_block_end;
>> + ?gimple stmt_inc_counter1, stmt_inc_counter2, stmt_inc_counter3;
>> + ?gimple stmt_reset_counter, stmt_assign_rate, stmt_if;
>> + ?gimple_stmt_iterator gsi;
>> +
>> + ?tmp_var = create_tmp_var (get_gcov_unsigned_t (), "PROF_sample_counter");
>> + ?tmp1 = make_ssa_name (tmp_var, NULL);
>> + ?tmp2 = make_ssa_name (tmp_var, NULL);
>> +
>> + ?/* Create all the new statements needed. ?*/
>> + ?stmt_inc_counter1 = gimple_build_assign (tmp1, gcov_sample_counter_decl);
>> + ?one = build_int_cst (get_gcov_unsigned_t (), 1);
>> + ?stmt_inc_counter2 = gimple_build_assign_with_ops (
>> + ? ? ?PLUS_EXPR, tmp2, tmp1, one);
>> + ?stmt_inc_counter3 = gimple_build_assign (gcov_sample_counter_decl, tmp2);
>> + ?zero = build_int_cst (get_gcov_unsigned_t (), 0);
>> + ?stmt_reset_counter = gimple_build_assign (gcov_sample_counter_decl, zero);
>> + ?tmp_var = create_tmp_var (get_gcov_unsigned_t (), "PROF_sample_counter");
>> + ?tmp3 = make_ssa_name (tmp_var, NULL);
>> + ?stmt_assign_rate = gimple_build_assign (tmp3, gcov_sampling_rate_decl);
>> + ?stmt_if = gimple_build_cond (GE_EXPR, tmp2, tmp3, NULL_TREE, NULL_TREE);
>> +
>> + ?/* Insert them for now in the original basic block. ?*/
>> + ?gsi = gsi_for_stmt (stmt_start);
>> + ?gsi_insert_before (&gsi, stmt_inc_counter1, GSI_SAME_STMT);
>> + ?gsi_insert_before (&gsi, stmt_inc_counter2, GSI_SAME_STMT);
>> + ?gsi_insert_before (&gsi, stmt_inc_counter3, GSI_SAME_STMT);
>> + ?gsi_insert_before (&gsi, stmt_assign_rate, GSI_SAME_STMT);
>> + ?gsi_insert_before (&gsi, stmt_reset_counter, GSI_SAME_STMT);
>> +
>> + ?/* Move to last statement. ?*/
>> + ?for (i = 0; i < stmt_count - 1; i++)
>> + ? ?gsi_next (&gsi);
>> +
>> + ?stmt_block_end = gsi_stmt (gsi);
>> + ?gcc_assert (stmt_block_end);
>> +
>> + ?/* Insert IF block. ?*/
>> + ?insert_if_then (stmt_reset_counter, stmt_block_end, stmt_if);
>> +}
>> +
>> +/* Return whether STMT is the beginning of an instrumentation block to be
>> + ? applied sampling. ?*/
>> +
>> +static bool
>> +is_instrumentation_to_be_sampled (gimple stmt)
>> +{
>> + ?return (htab_find_slot_with_hash (instrumentation_to_be_sampled, stmt,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?htab_hash_pointer (stmt), NO_INSERT)
>> + ? ? ? ? ?!= NULL);
>> +}
>> +
>> +/* Add sampling wrappers around edge counter code in current function. ?*/
>> +
>> ?void
>> +add_sampling_to_edge_counters (void)
>> +{
>> + ?gimple_stmt_iterator gsi;
>> + ?basic_block bb;
>> +
>> + ?FOR_EACH_BB_REVERSE (bb)
>> + ? ?for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> + ? ? ?{
>> + ? ? ? ?gimple stmt = gsi_stmt (gsi);
>> + ? ? ? ?if (is_instrumentation_to_be_sampled (stmt))
>> + ? ? ? ? ?{
>> + ? ? ? ? ? ?add_sampling_wrapper (stmt, EDGE_COUNTER_STMT_COUNT);
>> + ? ? ? ? ? ?break;
>> + ? ? ? ? ?}
>> + ? ? ?}
>> +
>> + ?/* Empty the set of statements performing the edge counter increment. ?*/
>> + ?if (instrumentation_to_be_sampled)
>> + ? ?htab_empty (instrumentation_to_be_sampled);
>> +}
>> +
>> +static void
>> +gimple_init_instrumentation_sampling (void)
>> +{
>> + ?if (!gcov_sampling_rate_decl)
>> + ? ?{
>> + ? ? ?/* Define __gcov_sampling_rate regardless of -fprofile-generate-sampling.
>> + ? ? ? ? Otherwise the extern reference to it from libgcov becomes unmatched.
>> + ? ? ?*/
>> + ? ? ?gcov_sampling_rate_decl = build_decl (
>> + ? ? ? ? ?UNKNOWN_LOCATION,
>> + ? ? ? ? ?VAR_DECL,
>> + ? ? ? ? ?get_identifier ("__gcov_sampling_rate"),
>> + ? ? ? ? ?get_gcov_unsigned_t ());
>> + ? ? ?TREE_PUBLIC (gcov_sampling_rate_decl) = 1;
>> + ? ? ?DECL_ARTIFICIAL (gcov_sampling_rate_decl) = 1;
>> + ? ? ?DECL_COMDAT_GROUP (gcov_sampling_rate_decl)
>> + ? ? ? ? ?= DECL_ASSEMBLER_NAME (gcov_sampling_rate_decl);
>> + ? ? ?TREE_STATIC (gcov_sampling_rate_decl) = 1;
>> + ? ? ?DECL_INITIAL (gcov_sampling_rate_decl) = build_int_cst (
>> + ? ? ? ? ?get_gcov_unsigned_t (),
>> + ? ? ? ? ?PARAM_VALUE (PARAM_PROFILE_GENERATE_SAMPLING_RATE));
>> + ? ? ?assemble_variable (gcov_sampling_rate_decl, 0, 0, 0);
>> + ? ?}
>> +
>> + ?if (flag_profile_generate_sampling && !instrumentation_to_be_sampled)
>> + ? ?{
>> + ? ? ?instrumentation_to_be_sampled = htab_create (100, htab_hash_pointer,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? htab_eq_pointer, NULL);
>> + ? ? ?gcov_sample_counter_decl = build_decl (
>> + ? ? ? ? ?UNKNOWN_LOCATION,
>> + ? ? ? ? ?VAR_DECL,
>> + ? ? ? ? ?get_identifier ("__gcov_sample_counter"),
>> + ? ? ? ? ?get_gcov_unsigned_t ());
>> + ? ? ?TREE_PUBLIC (gcov_sample_counter_decl) = 1;
>> + ? ? ?DECL_EXTERNAL (gcov_sample_counter_decl) = 1;
>> + ? ? ?DECL_ARTIFICIAL (gcov_sample_counter_decl) = 1;
>> + ? ? ?if (targetm.have_tls)
>> + ? ? ? ?DECL_TLS_MODEL (gcov_sample_counter_decl) =
>> + ? ? ? ? ? ?decl_default_tls_model (gcov_sample_counter_decl);
>> + ? ? ?assemble_variable (gcov_sample_counter_decl, 0, 0, 0);
>> + ? ?}
>> +}
>> +
>> +void
>> ?gimple_init_edge_profiler (void)
>> ?{
>> ? tree interval_profiler_fn_type;
>> @@ -148,6 +327,8 @@ gimple_init_edge_profiler (void)
>> ? tree dc_profiler_fn_type;
>> ? tree average_profiler_fn_type;
>>
>> + ?gimple_init_instrumentation_sampling ();
>> +
>> ? if (!gcov_type_node)
>> ? ? {
>> ? ? ? char name_buf[32];
>> @@ -277,6 +458,7 @@ gimple_init_edge_profiler (void)
>> ?void
>> ?gimple_gen_edge_profiler (int edgeno, edge e)
>> ?{
>> + ?void** slot;
>> ? tree ref, one;
>> ? gimple stmt1, stmt2, stmt3;
>>
>> @@ -292,6 +474,15 @@ gimple_gen_edge_profiler (int edgeno, edge e)
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?gimple_assign_lhs (stmt1), one);
>> ? gimple_assign_set_lhs (stmt2, make_ssa_name (gcov_type_tmp_var, stmt2));
>> ? stmt3 = gimple_build_assign (unshare_expr (ref), gimple_assign_lhs (stmt2));
>> +
>> + ?if (flag_profile_generate_sampling)
>> + ? ?{
>> + ? ? ?slot = htab_find_slot_with_hash (instrumentation_to_be_sampled, stmt1,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? htab_hash_pointer (stmt1), INSERT);
>> + ? ? ?gcc_assert (!*slot);
>> + ? ? ?*slot = stmt1;
>> + ? ?}
>> +
>> ? gsi_insert_on_edge (e, stmt1);
>> ? gsi_insert_on_edge (e, stmt2);
>> ? gsi_insert_on_edge (e, stmt3);
>> Index: gcc/libgcov.c
>> ===================================================================
>> --- gcc/libgcov.c ? ? ? (revision 173136)
>> +++ gcc/libgcov.c ? ? ? (working copy)
>> @@ -83,6 +83,20 @@ void __gcov_merge_delta (gcov_type *counters ?__at
>> ?#ifdef L_gcov
>> ?#include "gcov-io.c"
>>
>> +/* Sampling rate. ?*/
>> +extern gcov_unsigned_t __gcov_sampling_rate;
>> +static int gcov_sampling_rate_initialized = 0;
>> +
>> +/* Set sampling rate to RATE. ?*/
>> +
>> +void __gcov_set_sampling_rate (unsigned int rate)
>> +{
>> + ?__gcov_sampling_rate = rate;
>> +}
>> +
>> +/* Per thread sample counter. ?*/
>> +THREAD_PREFIX gcov_unsigned_t __gcov_sample_counter = 0;
>> +
>> ?/* Chain of per-object gcov structures. ?*/
>> ?extern struct gcov_info *__gcov_list;
>>
>> @@ -365,7 +379,7 @@ gcov_exit (void)
>>
>> ? {
>> ? ? /* Check if the level of dirs to strip off specified. */
>> - ? ?char *tmp = getenv("GCOV_PREFIX_STRIP");
>> + ? ?char *tmp = getenv ("GCOV_PREFIX_STRIP");
>> ? ? if (tmp)
>> ? ? ? {
>> ? ? ? ?gcov_prefix_strip = atoi (tmp);
>> @@ -375,7 +389,7 @@ gcov_exit (void)
>> ? ? ? }
>> ? }
>> ? /* Get file name relocation prefix. ?Non-absolute values are ignored. */
>> - ?gcov_prefix = getenv("GCOV_PREFIX");
>> + ?gcov_prefix = getenv ("GCOV_PREFIX");
>> ? if (gcov_prefix)
>> ? ? {
>> ? ? ? prefix_length = strlen(gcov_prefix);
>> @@ -757,6 +771,17 @@ gcov_exit (void)
>> ?void
>> ?__gcov_init (struct gcov_info *info)
>> ?{
>> + ?if (!gcov_sampling_rate_initialized)
>> + ? ?{
>> + ? ? ?const char* env_value_str = getenv ("GCOV_SAMPLING_RATE");
>> + ? ? ?if (env_value_str)
>> + ? ? ? ?{
>> + ? ? ? ? ?int env_value_int = atoi(env_value_str);
>> + ? ? ? ? ?if (env_value_int >= 1)
>> + ? ? ? ? ? ?__gcov_sampling_rate = env_value_int;
>> + ? ? ? ?}
>> + ? ? ?gcov_sampling_rate_initialized = 1;
>> + ? ?}
>> ? if (!info->version)
>> ? ? return;
>> ? if (gcov_version (info, info->version, 0))
>> Index: gcc/params.def
>> ===================================================================
>> --- gcc/params.def ? ? ?(revision 173136)
>> +++ gcc/params.def ? ? ?(working copy)
>> @@ -929,6 +929,11 @@ DEFPARAM (CXX_MAX_NAMESPACES_FOR_DIAGNOSTIC_HELP,
>> ? ? ? ? ?"name lookup fails",
>> ? ? ? ? ?1000, 0, 0)
>>
>> +DEFPARAM (PARAM_PROFILE_GENERATE_SAMPLING_RATE,
>> + ? ? ? ? "profile-generate-sampling-rate",
>> + ? ? ? ? "sampling rate with -fprofile-generate-sampling",
>> + ? ? ? ? 100, 0, 2000000000)
>> +
>> ?/*
>> ?Local variables:
>> ?mode:c
>>
>> --
>> This patch is available for review at http://codereview.appspot.com/4438083
>>
>

Attachment: fdopatch.txt
Description: Text document


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]