This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [google] Added new dump flag -pmu to display pmu data in pass summaries (issue6489092)
- From: Teresa Johnson <tejohnson at google dot com>
- To: Chris Manghane <cmang at google dot com>
- Cc: reply at codereview dot appspotmail dot com, davidxl at google dot com, gcc-patches at gcc dot gnu dot org
- Date: Thu, 6 Sep 2012 17:08:26 -0700
- Subject: Re: [google] Added new dump flag -pmu to display pmu data in pass summaries (issue6489092)
- References: <20120906214903.031941417E7@rainbowponydeluxe.mtv.corp.google.com>
On Thu, Sep 6, 2012 at 2:49 PM, Chris Manghane <cmang@google.com> wrote:
> This patch adds a new dump flag that dumps PMU profile information using
> the -pmu dump option.
>
> This patch should be applied to google/main.
>
> Tested with crosstools.
>
> 2012-09-06 Chris Manghane <cmang@google.com>
>
> * gcc/doc/invoke.texi: Modified pmu-profile-use option.
> * gcc/tree-dump.c: Added new dump flag.
> * gcc/tree-pretty-print.c
> (dump_load_latency_details): New function.
> (dump_pmu): New function.
> (dump_generic_node): Added support for new dump flag.
> * gcc/tree-pretty-print.h: Added new function to global header.
> * gcc/tree-pass.h (enum tree_dump_index): Added new dump flag.
> * gcc/gcov.c:
> (process_pmu_profile): Fixed assertion conditions.
> * gcc/gcov-io.h (struct gcov_pmu_summary): Added new struct.
> * gcc/opts.c (common_handle_option): Added support for modified option.
> * gcc/gimple-pretty-print.c
> (dump_gimple_phi): Added support for new dump flag.
> (dump_gimple_stmt): Ditto.
> * gcc/coverage.c
> (htab_counts_entry_hash): Added new hash table for PMU info.
> (htab_pmu_entry_hash): Ditto.
> (htab_counts_entry_eq): Ditto.
> (htab_pmu_entry_eq): Ditto.
> (htab_counts_entry_del): Ditto.
> (htab_pmu_entry_del): Ditto.
> (read_counts_file): Ditto.
> (static void read_pmu_file): Ditto.
> (get_coverage_pmu_latency): Ditto.
> (get_coverage_pmu_branch_mispredict): Ditto.
> (pmu_data_present): Added new function.
> (coverage_init): Added pmu file reading support.
> * gcc/coverage.h: Added pmu functions to global header.
> * gcc/common.opt: Modified pmu-profile-use option.
>
> Index: gcc/doc/invoke.texi
> ===================================================================
> --- gcc/doc/invoke.texi (revision 190817)
> +++ gcc/doc/invoke.texi (working copy)
> @@ -399,7 +399,7 @@ Objective-C and Objective-C++ Dialects}.
> -fprofile-generate=@var{path} -fprofile-generate-sampling @gol
> -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
> -fpmu-profile-generate=@var{pmuoption} @gol
> --fpmu-profile-use=@var{pmuoption} @gol
> +-fpmu-profile-use=@var{pmudata} @gol
> -freciprocal-math -free -fregmove -frename-registers -freorder-blocks @gol
> -frecord-gcc-switches-in-elf@gol
> -freorder-blocks-and-partition -freorder-functions @gol
> @@ -8381,12 +8381,11 @@ displayed using coverage tool gcov. The params var
> "pmu_profile_n_addresses" can be used to restrict PMU data collection
> to only this many addresses.
>
> -@item -fpmu-profile-use=@var{pmuoption}
> +@item -fpmu-profile-use=@var{pmudata}
> @opindex fpmu-profile-use
>
> -Enable performance monitoring unit (PMU) profiling based
> -optimizations. Currently only @var{load-latency} and
> -@var{branch-mispredict} are supported.
> +If @var{pmudata} is specified, GCC will read PMU data from @var{pmudata}. If
> +unspecified, PMU data will be read from 'pmuprofile.gcda'.
>
> @item -fprofile-strip=@var{base_suffix}
> @opindex fprofile-strip
> Index: gcc/tree-dump.c
> ===================================================================
> --- gcc/tree-dump.c (revision 190817)
> +++ gcc/tree-dump.c (working copy)
> @@ -824,9 +824,11 @@ static const struct dump_option_value_info dump_op
> {"nouid", TDF_NOUID},
> {"enumerate_locals", TDF_ENUMERATE_LOCALS},
> {"scev", TDF_SCEV},
> + {"pmu", TDF_PMU},
> {"all", ~(TDF_RAW | TDF_SLIM | TDF_LINENO | TDF_TREE | TDF_RTL | TDF_IPA
> | TDF_STMTADDR | TDF_GRAPH | TDF_DIAGNOSTIC | TDF_VERBOSE
> - | TDF_RHS_ONLY | TDF_NOUID | TDF_ENUMERATE_LOCALS | TDF_SCEV)},
> + | TDF_RHS_ONLY | TDF_NOUID | TDF_ENUMERATE_LOCALS | TDF_SCEV
> + | TDF_PMU)},
> {NULL, 0}
> };
>
> Index: gcc/tree-pretty-print.c
> ===================================================================
> --- gcc/tree-pretty-print.c (revision 190817)
> +++ gcc/tree-pretty-print.c (working copy)
> @@ -25,6 +25,9 @@ along with GCC; see the file COPYING3. If not see
> #include "tm.h"
> #include "tree.h"
> #include "output.h"
> +#include "basic-block.h"
> +#include "gcov-io.h"
> +#include "coverage.h"
> #include "tree-pretty-print.h"
> #include "hashtab.h"
> #include "tree-flow.h"
> @@ -51,6 +54,7 @@ static void do_niy (pretty_printer *, const_tree);
>
> static pretty_printer buffer;
> static int initialized = 0;
> +static char *file_prefix = NULL;
>
> /* Try to print something for an unknown tree code. */
>
> @@ -461,7 +465,32 @@ dump_omp_clauses (pretty_printer *buffer, tree cla
> }
> }
>
> +/* Dump detailed information about pmu load latency events */
>
> +void
> +dump_load_latency_details (pretty_printer *buffer, gcov_pmu_ll_info_t *ll_info)
> +{
> + if (ll_info == NULL)
> + return;
> +
> + pp_string (buffer, "\n[load latency contribution: ");
> + pp_scalar (buffer, "%.2f%%\n", ll_info->self / 100.f);
> + pp_string (buffer, "average cycle distribution:\n");
> + pp_scalar (buffer, "%.2f%% <= 10 cycles\n",
> + ll_info->lt_10 / 100.f);
> + pp_scalar (buffer, "%.2f%% <= 32 cycles\n",
> + ll_info->lt_32 / 100.f);
> + pp_scalar (buffer, "%.2f%% <= 64 cycles\n",
> + ll_info->lt_64 / 100.f);
> + pp_scalar (buffer, "%.2f%% <= 256 cycles\n",
> + ll_info->lt_256 / 100.f);
> + pp_scalar (buffer, "%.2f%% <= 1024 cycles\n",
> + ll_info->lt_1024 / 100.f);
> + pp_scalar (buffer, "%.2f%% > 1024 cycles\n",
> + ll_info->gt_1024 / 100.f);
> + pp_string (buffer, "] ");
> +}
> +
> /* Dump location LOC to BUFFER. */
>
> static void
> @@ -485,7 +514,51 @@ dump_location (pretty_printer *buffer, location_t
> pp_string (buffer, "] ");
> }
>
> +/* Dump PMU info about LOC to BUFFER. */
>
> +static void
> +dump_pmu (pretty_printer *buffer, location_t loc)
> +{
> + expanded_location xloc = expand_location (loc);
> + gcov_pmu_ll_info_t *ll_info;
> + gcov_pmu_brm_info_t *brm_info;
> + char *src;
> + uint64_t src_size;
> +
> + if (!xloc.file)
> + return;
> +
> + if (!file_prefix)
> + file_prefix = getpwd();
Missing space before "(".
> +
> + if (!IS_ABSOLUTE_PATH (xloc.file))
> + {
> + src_size = strlen (xloc.file) + strlen (file_prefix) + 1;
> + src = XCNEWVEC (char, src_size + 1);
> + strcpy (src, file_prefix);
> + strcat (src, "/");
> + strcat (src, xloc.file);
> + }
> + else
> + src = xstrdup (xloc.file);
> +
> + ll_info = get_coverage_pmu_latency (src, xloc.line);
> + brm_info =
> + get_coverage_pmu_branch_mispredict (src, xloc.line);
> +
> + if (ll_info)
> + dump_load_latency_details (buffer, ll_info);
> +
> + if (brm_info)
> + {
> + pp_string (buffer, "[branch misprediction contribution: ");
> + pp_scalar (buffer, "%.2f%%", brm_info->self / 100.f);
> + pp_string (buffer, "] ");
> + }
> +
> + XDELETE (src);
> +}
> +
> /* Dump lexical block BLOCK. BUFFER, SPC and FLAGS are as in
> dump_generic_node. */
>
> @@ -622,6 +695,9 @@ dump_generic_node (pretty_printer *buffer, tree no
> if ((flags & TDF_LINENO) && EXPR_HAS_LOCATION (node))
> dump_location (buffer, EXPR_LOCATION (node));
>
> + if ((flags & TDF_PMU) && pmu_data_present () && EXPR_HAS_LOCATION (node))
> + dump_pmu (buffer, EXPR_LOCATION (node));
> +
> switch (TREE_CODE (node))
> {
> case ERROR_MARK:
> Index: gcc/tree-pretty-print.h
> ===================================================================
> --- gcc/tree-pretty-print.h (revision 190817)
> +++ gcc/tree-pretty-print.h (working copy)
> @@ -24,6 +24,8 @@ along with GCC; see the file COPYING3. If not see
> #define GCC_TREE_PRETTY_PRINT_H
>
> #include "pretty-print.h"
> +#include "basic-block.h"
> +#include "gcov-io.h"
>
> #define pp_tree_identifier(PP, T) \
> pp_base_tree_identifier (pp_base (PP), T)
> @@ -45,6 +47,7 @@ extern void print_generic_expr (FILE *, tree, int)
> extern void print_generic_decl (FILE *, tree, int);
> extern void debug_c_tree (tree);
> extern void dump_omp_clauses (pretty_printer *, tree, int, int);
> +extern void dump_load_latency_details (pretty_printer *, gcov_pmu_ll_info_t *);
> extern void print_call_name (pretty_printer *, tree, int);
> extern void debug_generic_expr (tree);
> extern void debug_generic_stmt (tree);
> Index: gcc/tree-pass.h
> ===================================================================
> --- gcc/tree-pass.h (revision 190817)
> +++ gcc/tree-pass.h (working copy)
> @@ -84,8 +84,8 @@ enum tree_dump_index
> #define TDF_ENUMERATE_LOCALS (1 << 22) /* Enumerate locals by uid. */
> #define TDF_CSELIB (1 << 23) /* Dump cselib details. */
> #define TDF_SCEV (1 << 24) /* Dump SCEV details. */
> +#define TDF_PMU (1 << 25) /* Dump PMU Profiling details */
>
> -
> /* In tree-dump.c */
>
> extern char *get_dump_file_name (int);
> Index: gcc/gcov.c
> ===================================================================
> --- gcc/gcov.c (revision 190817)
> +++ gcc/gcov.c (working copy)
> @@ -2350,6 +2350,7 @@ filter_pmu_data_lines (source_t *src)
> }
> }
>
> +
> /* Sort the load latency data according to the line numbers because
> we later iterate over sources in line number order. Normally we
> expect the PMU tool to provide sorted data, but a few entries can
> @@ -3022,9 +3023,9 @@ static void process_pmu_profile (void)
> {
> gcov_pmu_st_entry_t *st_entry = XCNEW (gcov_pmu_st_entry_t);
> gcov_read_pmu_string_table_entry (st_entry, length);
> + string_table->st_count++;
> /* Verify that we read string table entries in the right order */
> gcc_assert (st_entry->index == string_table->st_count);
> - string_table->st_count++;
> if (string_table->st_count >= string_table->alloc_st_count)
> {
> string_table->alloc_st_count *= 2;
> Index: gcc/gcov-io.h
> ===================================================================
> --- gcc/gcov-io.h (revision 190817)
> +++ gcc/gcov-io.h (working copy)
> @@ -702,6 +702,14 @@ typedef struct string_table
> gcov_pmu_tool_header_t *pmu_tool_header;
> } string_table_t;
>
> +/* Cumulative pmu data */
> +struct gcov_pmu_summary
> +{
> + ll_infos_t ll_infos; /* load latency infos. */
> + brm_infos_t brm_infos; /* branch misprediction infos */
> + string_table_t string_table; /* string table entries */
> +};
> +
> /* Structures embedded in coveraged program. The structures generated
> by write_profile must match these. */
>
> Index: gcc/opts.c
> ===================================================================
> --- gcc/opts.c (revision 190817)
> +++ gcc/opts.c (working copy)
> @@ -1645,6 +1645,11 @@ common_handle_option (struct gcc_options *opts,
> opts->x_flag_gcse_after_reload = value;
> break;
>
> + case OPT_fpmu_profile_use_:
> + opts->x_pmu_profile_data = xstrdup (arg);
> + value = true;
> + break;
> +
> case OPT_fprofile_generate_:
> opts->x_profile_data_prefix = xstrdup (arg);
> value = true;
> Index: gcc/gimple-pretty-print.c
> ===================================================================
> --- gcc/gimple-pretty-print.c (revision 190817)
> +++ gcc/gimple-pretty-print.c (working copy)
> @@ -26,8 +26,11 @@ along with GCC; see the file COPYING3. If not see
> #include "tm.h"
> #include "tree.h"
> #include "diagnostic.h"
> +#include "basic-block.h"
> #include "tree-pretty-print.h"
> #include "gimple-pretty-print.h"
> +#include "gcov-io.h"
> +#include "coverage.h"
> #include "hashtab.h"
> #include "tree-flow.h"
> #include "tree-pass.h"
> @@ -40,6 +43,7 @@ along with GCC; see the file COPYING3. If not see
>
> static pretty_printer buffer;
> static bool initialized = false;
> +static char *file_prefix = NULL;
>
> #define GIMPLE_NIY do_niy (buffer,gs)
>
> @@ -1629,6 +1633,51 @@ dump_gimple_phi (pretty_printer *buffer, gimple ph
> pp_decimal_int (buffer, xloc.column);
> pp_string (buffer, "] ");
> }
> + if ((flags & TDF_PMU) && pmu_data_present ()
> + && (gimple_phi_arg_location (phi, i)))
Combine this and similar block below into a helper.
> + {
> + expanded_location xloc;
> + gcov_pmu_ll_info_t *ll_info;
> + gcov_pmu_brm_info_t *brm_info;
> + char *src;
> + uint64_t src_size;
> +
> + xloc = expand_location (gimple_phi_arg_location (phi, i));
> + if (xloc.file)
> + {
> + if (!file_prefix)
> + file_prefix = getpwd();
Missing space.
> +
> + if (!IS_ABSOLUTE_PATH (xloc.file))
> + {
> + src_size = strlen (xloc.file) + strlen (file_prefix) + 1;
> + src = XCNEWVEC (char, src_size + 1);
> + strcpy (src, file_prefix);
> + strcat (src, "/");
> + strcat (src, xloc.file);
> + }
> + else
> + src = xstrdup (xloc.file);
> +
> + ll_info = get_coverage_pmu_latency (src, xloc.line);
> + brm_info =
> + get_coverage_pmu_branch_mispredict (src, xloc.line);
> +
> + if (ll_info)
> + dump_load_latency_details (buffer, ll_info);
> +
> + if (brm_info)
> + {
> + pp_string (buffer, "\n[branch misprediction contribution: ");
> + pp_scalar (buffer, "%.2f%%", brm_info->self / 100.f);
> + pp_string (buffer, "] ");
> + }
> +
> + XDELETE (src);
> + }
> +
> + }
> +
> dump_generic_node (buffer, gimple_phi_arg_def (phi, i), spc, flags,
> false);
> pp_character (buffer, '(');
> @@ -1875,6 +1924,50 @@ dump_gimple_stmt (pretty_printer *buffer, gimple g
> pp_string (buffer, "] ");
> }
>
> + if ((flags & TDF_PMU) && pmu_data_present () && gimple_has_location (gs))
> + {
> + expanded_location xloc;
> + gcov_pmu_ll_info_t *ll_info;
> + gcov_pmu_brm_info_t *brm_info;
> + char *src;
> + uint64_t src_size;
> +
> + xloc = expand_location (gimple_location (gs));
> + if (xloc.file)
> + {
> + if (!file_prefix)
> + file_prefix = getpwd();
Missing space.
> +
> + if (!IS_ABSOLUTE_PATH (xloc.file))
> + {
> + src_size = strlen (xloc.file) + strlen (file_prefix) + 1;
> + src = XCNEWVEC (char, src_size + 1);
> + strcpy (src, file_prefix);
> + strcat (src, "/");
> + strcat (src, xloc.file);
> + }
> + else
> + src = xstrdup (xloc.file);
> +
> + ll_info = get_coverage_pmu_latency (src, xloc.line);
> + brm_info =
> + get_coverage_pmu_branch_mispredict (src, xloc.line);
> +
> + if (ll_info)
> + dump_load_latency_details (buffer, ll_info);
> +
> + if (brm_info)
> + {
> + pp_string (buffer, "\n[branch misprediction contribution: ");
> + pp_scalar (buffer, "%.2f%%", brm_info->self / 100.f);
> + pp_string (buffer, "] ");
> + }
> +
> + XDELETE (src);
> + }
> + }
> +
> +
> if (flags & TDF_EH)
> {
> int lp_nr = lookup_stmt_eh_lp (gs);
> Index: gcc/coverage.c
> ===================================================================
> --- gcc/coverage.c (revision 190817)
> +++ gcc/coverage.c (working copy)
> @@ -96,6 +96,17 @@ typedef struct counts_entry
> struct gcov_ctr_summary summary;
> } counts_entry_t;
>
> +typedef struct pmu_entry
> +{
> + /* We hash by */
> + gcov_unsigned_t lineno;
> + char *filename;
> +
> + /* Store */
> + gcov_pmu_ll_info_t *ll_info;
> + gcov_pmu_brm_info_t *brm_info;
> +} pmu_entry_t;
> +
> static GTY(()) struct coverage_data *functions_head = 0;
> static struct coverage_data **functions_tail = &functions_head;
> static unsigned no_coverage = 0;
> @@ -129,6 +140,9 @@ static char pmu_profile_filename[] = "pmuprofile";
> /* Hash table of count data. */
> static htab_t counts_hash = NULL;
>
> +/* Hash table of pmu data, */
> +static htab_t pmu_hash = NULL;
> +
> /* The names of merge functions for counters. */
> static const char *const ctr_merge_functions[GCOV_COUNTERS] = GCOV_MERGE_FUNCTIONS;
> static const char *const ctr_names[GCOV_COUNTERS] = GCOV_COUNTER_NAMES;
> @@ -159,11 +173,17 @@ static tree gcov_pmu_top_n_address_decl = NULL_TRE
> /* To ensure that the above variables are initialized only once. */
> static int pmu_profiling_initialized = 0;
>
> +struct gcov_pmu_summary pmu_global_summary;
> +
> /* Forward declarations. */
> static hashval_t htab_counts_entry_hash (const void *);
> +static hashval_t htab_pmu_entry_hash (const void *);
> static int htab_counts_entry_eq (const void *, const void *);
> +static int htab_pmu_entry_eq (const void *, const void *);
> static void htab_counts_entry_del (void *);
> +static void htab_pmu_entry_del (void *);
> static void read_counts_file (const char *, unsigned);
> +static void read_pmu_file (const char*);
> static tree build_var (tree, tree, int);
> static void build_fn_info_type (tree, unsigned, tree);
> static void build_info_type (tree, tree);
> @@ -211,6 +231,14 @@ htab_counts_entry_hash (const void *of)
> return entry->ident * GCOV_COUNTERS + entry->ctr;
> }
>
> +static hashval_t
> +htab_pmu_entry_hash (const void *of)
> +{
> + const pmu_entry_t *const entry = (const pmu_entry_t *) of;
> +
> + return htab_hash_string (entry->filename) + entry->lineno;
> +}
> +
> static int
> htab_counts_entry_eq (const void *of1, const void *of2)
> {
> @@ -220,6 +248,16 @@ htab_counts_entry_eq (const void *of1, const void
> return entry1->ident == entry2->ident && entry1->ctr == entry2->ctr;
> }
>
> +static int
> +htab_pmu_entry_eq (const void *of1, const void *of2)
> +{
> + const pmu_entry_t *const entry1 = (const pmu_entry_t *) of1;
> + const pmu_entry_t *const entry2 = (const pmu_entry_t *) of2;
> +
> + return strcmp (entry1->filename, entry2->filename) == 0 &&
> + entry1->lineno == entry2->lineno;
> +}
> +
> static void
> htab_counts_entry_del (void *of)
> {
> @@ -233,6 +271,17 @@ htab_counts_entry_del (void *of)
> }
> }
>
> +static void
> +htab_pmu_entry_del (void *of)
> +{
> + pmu_entry_t *const entry = (pmu_entry_t *) of;
> +
> + free (entry->filename);
> + free (entry->ll_info);
> + free (entry->brm_info);
> + free (entry);
> +}
> +
> /* Returns true if MOD_ID is the id of the last source module. */
>
> int
> @@ -722,6 +771,247 @@ read_counts_file (const char *da_file_name, unsign
> gcov_close ();
> }
>
> +/* Read in the pmu profiling file, if available. DA_FILE_NAME is the
> + name of the gcda file. */
> +
> +static void read_pmu_file (const char* da_file_name)
> +{
> + gcov_unsigned_t tag;
> + ll_infos_t* ll_infos = &pmu_global_summary.ll_infos;
> + brm_infos_t* brm_infos = &pmu_global_summary.brm_infos;
> + string_table_t* string_table = &pmu_global_summary.string_table;
> + int is_error = 0;
> + unsigned i;
> + pmu_entry_t **slot, *entry, elt;
> + gcov_pmu_ll_info_t *ll_info;
> + gcov_pmu_brm_info_t *brm_info;
> + gcov_pmu_st_entry_t *st_entry;
> +
> +
> + if (!gcov_open (da_file_name, 1))
> + {
> + if (PARAM_VALUE (PARAM_GCOV_DEBUG))
> + {
> + /* Try to find .gcda file in the current working dir. */
> + da_file_name = lbasename (da_file_name);
> + if (!gcov_open (da_file_name, 1))
> + return;
> + }
> + else
> + return;
> + }
> +
> + if (!gcov_magic (gcov_read_unsigned (), GCOV_DATA_MAGIC))
> + {
> + warning (0, "%qs is not a gcov data file", da_file_name);
> + gcov_close ();
> + return;
> + }
> + else if ((tag = gcov_read_unsigned ()) != GCOV_VERSION)
> + {
> + char v[4], e[4];
> +
> + GCOV_UNSIGNED2STRING (v, tag);
> + GCOV_UNSIGNED2STRING (e, GCOV_VERSION);
> +
> + warning (0, "%qs is version %q.*s, expected version %q.*s",
> + da_file_name, 4, v, 4, e);
> + gcov_close ();
> + return;
> + }
> +
> + /* Read and discard the version. */
> + tag = gcov_read_unsigned ();
> +
> + /* Read and discard the stamp. */
> + tag = gcov_read_unsigned ();
> +
> + /* Initialize PMU data fields. */
> + ll_infos->ll_count = 0;
> + ll_infos->alloc_ll_count = 64;
> + ll_infos->ll_array = XCNEWVEC (gcov_pmu_ll_info_t *, ll_infos->alloc_ll_count);
> +
> + brm_infos->brm_count = 0;
> + brm_infos->alloc_brm_count = 64;
> + brm_infos->brm_array = XCNEWVEC (gcov_pmu_brm_info_t *,
> + brm_infos->alloc_brm_count);
> +
> + string_table->st_count = 0;
> + string_table->alloc_st_count = 64;
> + string_table->st_array = XCNEWVEC (gcov_pmu_st_entry_t *,
> + string_table->alloc_st_count);
> +
> + while ((tag = gcov_read_unsigned ()))
> + {
> + unsigned length = gcov_read_unsigned ();
> + unsigned long base = gcov_position ();
> +
> + if (tag == GCOV_TAG_PMU_LOAD_LATENCY_INFO)
> + {
> + gcov_pmu_ll_info_t *ll_info = XCNEW (gcov_pmu_ll_info_t);
> + gcov_read_pmu_load_latency_info (ll_info, length);
> + ll_infos->ll_count++;
> + if (ll_infos->ll_count >= ll_infos->alloc_ll_count)
> + {
> + /* need to realloc */
> + ll_infos->ll_array = (gcov_pmu_ll_info_t **)
> + xrealloc (ll_infos->ll_array, 2 * ll_infos->alloc_ll_count);
> + }
> + ll_infos->ll_array[ll_infos->ll_count - 1] = ll_info;
> + }
> + else if (tag == GCOV_TAG_PMU_BRANCH_MISPREDICT_INFO)
> + {
> + gcov_pmu_brm_info_t *brm_info = XCNEW (gcov_pmu_brm_info_t);
> + gcov_read_pmu_branch_mispredict_info (brm_info, length);
> + brm_infos->brm_count++;
> + if (brm_infos->brm_count >= brm_infos->alloc_brm_count)
> + {
> + /* need to realloc */
> + brm_infos->brm_array = (gcov_pmu_brm_info_t **)
> + xrealloc (brm_infos->brm_array, 2 * brm_infos->alloc_brm_count);
> + }
> + brm_infos->brm_array[brm_infos->brm_count - 1] = brm_info;
> + }
> + else if (tag == GCOV_TAG_PMU_TOOL_HEADER)
> + {
> + gcov_pmu_tool_header_t *tool_header = XCNEW (gcov_pmu_tool_header_t);
> + gcov_read_pmu_tool_header (tool_header, length);
> + ll_infos->pmu_tool_header = tool_header;
> + brm_infos->pmu_tool_header = tool_header;
> + }
> + else if (tag == GCOV_TAG_PMU_STRING_TABLE_ENTRY)
> + {
> + gcov_pmu_st_entry_t *st_entry = XCNEW (gcov_pmu_st_entry_t);
> + gcov_read_pmu_string_table_entry(st_entry, length);
Missing space.
> + string_table->st_count++;
> + if (string_table->st_count >= string_table->alloc_st_count)
> + {
> + string_table->alloc_st_count *= 2;
> + string_table->st_array = (gcov_pmu_st_entry_t **)
> + xrealloc (string_table->st_array,
> + string_table->alloc_st_count);
> + }
> +
> + string_table->st_array[string_table->st_count - 1] = st_entry;
> + }
> +
> + gcov_sync (base, length);
> + if ((is_error = gcov_is_error ()))
> + {
> + error (is_error < 0 ? "%qs has overflowed" : "%qs is corrupted",
> + da_file_name);
> + gcov_close();
Missing space.
> + break;
> + }
> + }
> +
> + gcov_close();
Missing space.
> +
> + /* Construct hash table with information from gcda file. Entry keys are a
> + unique combination of the filename and the line number for easy access */
> + if (!pmu_hash)
> + pmu_hash = htab_create (10,
> + htab_pmu_entry_hash, htab_pmu_entry_eq,
> + htab_pmu_entry_del);
> +
> + gcc_assert (pmu_hash != NULL);
> + gcc_assert (ll_infos->ll_count > 0);
> + gcc_assert (brm_infos->brm_count > 0);
> +
> + for (i = 0; i < ll_infos->ll_count; ++i)
> + {
> + ll_info = ll_infos->ll_array[i];
> + st_entry = string_table->st_array[ll_info->filetag - 1];
> + elt.lineno = ll_info->line;
> + elt.filename = xstrdup (st_entry->str);
> +
> + slot = (pmu_entry_t **) htab_find_slot
> + (pmu_hash, &elt, INSERT);
> + entry = *slot;
> + XDELETE (elt.filename);
> + if (!entry)
> + {
> + *slot = entry = XCNEW (pmu_entry_t);
> + entry->lineno = elt.lineno;
> + entry->filename = xstrdup (st_entry->str);
> + entry->ll_info = ll_info;
> + }
> + /* No need to check for existing entries because
> + there should only be one entry per filename and line number */
> + }
> +
> + for (i = 0; i < brm_infos->brm_count; ++i)
> + {
> + brm_info = brm_infos->brm_array[i];
> + st_entry = string_table->st_array[brm_info->filetag - 1];
> + elt.lineno = brm_info->line;
> + elt.filename = xstrdup (st_entry->str);
> + slot = (pmu_entry_t **) htab_find_slot
> + (pmu_hash, &elt, INSERT);
> + entry = *slot;
> + XDELETE (elt.filename);
> + if (!entry)
> + {
> + *slot = entry = XCNEW (pmu_entry_t);
> + entry->lineno = elt.lineno;
> + entry->filename = xstrdup(st_entry->str);
Missing space.
> + entry->brm_info = brm_info;
> + }
> + else
> + {
> + /* There already exists a pmu_entry_t that is partially filled
> + with load latency info */
> + entry->brm_info = brm_info;
> + }
> + }
> +}
> +
> +/* Returns the load latency info for line number LINENO of source file
> + FILENAME. */
> +
> +gcov_pmu_ll_info_t *
> +get_coverage_pmu_latency (const char* filename, gcov_unsigned_t lineno)
> +{
> + pmu_entry_t *entry, elt;
> +
> + /* No hash table, no pmu data */
> + if (pmu_hash == NULL)
> + return NULL;
> +
> + elt.filename = xstrdup (filename);
> + elt.lineno = lineno;
> +
> + entry = (pmu_entry_t *) htab_find(pmu_hash, &elt);
Missing space.
> + XDELETE (elt.filename);
> + if (entry)
> + return entry->ll_info;
> +
> + return NULL;
> +}
> +
> +/* Returns the branch misprediction info for line number LINENO of source file
> + FILENAME. */
> +
> +gcov_pmu_brm_info_t *
> +get_coverage_pmu_branch_mispredict (const char* filename, gcov_unsigned_t lineno)
> +{
> + pmu_entry_t *entry, elt;
> +
> + /* No hash table, no pmu data */
> + if (pmu_hash == NULL)
> + return NULL;
> +
> + elt.filename = xstrdup(filename);
Missing space.
> + elt.lineno = lineno;
> +
> + entry = (pmu_entry_t *) htab_find(pmu_hash, &elt);
Missing space.
> + XDELETE (elt.filename);
> + if (entry)
> + return entry->brm_info;
> +
> + return NULL;
> +}
> +
> /* Returns the coverage data entry for counter type COUNTER of function
> FUNC. EXPECTED is the number of expected counter entries. */
>
> @@ -1125,6 +1415,14 @@ coverage_function_present (unsigned fn_ident)
> return item != NULL;
> }
>
> +/* True if there is PMU data present in this compilation */
> +
> +bool
> +pmu_data_present (void)
> +{
> + return (pmu_hash != NULL);
> +}
> +
> /* Update function and program direct-call coverage counts. */
>
> void
> @@ -2271,6 +2569,10 @@ coverage_init (const char *filename, const char* s
> if (flag_branch_probabilities)
> read_counts_file (da_file_name, 0);
>
> + /* Reads at most one auxiliary GCDA file since we don't support merging */
> + if (pmu_profile_data != 0 && TDF_PMU)
> + read_pmu_file (pmu_profile_data);
> +
> /* Rebuild counts_hash and read the auxiliary GCDA files. */
> if (flag_profile_use && L_IPO_COMP_MODE)
> {
> Index: gcc/coverage.h
> ===================================================================
> --- gcc/coverage.h (revision 190817)
> +++ gcc/coverage.h (working copy)
> @@ -45,7 +45,12 @@ extern int coverage_counter_alloc (unsigned /*coun
> extern tree tree_coverage_counter_ref (unsigned /*counter*/, unsigned/*num*/);
> /* Use a counter address from the most recent allocation. */
> extern tree tree_coverage_counter_addr (unsigned /*counter*/, unsigned/*num*/);
> -
> +/* Get the load latency info for the current file and line */
> +extern gcov_pmu_ll_info_t *get_coverage_pmu_latency (const char*,
> + gcov_unsigned_t);
> +/* Get the load latency info for the current file and line */
> +extern gcov_pmu_brm_info_t *
> +get_coverage_pmu_branch_mispredict (const char*, gcov_unsigned_t);
> /* Get all the counters for the current function. */
> extern gcov_type *get_coverage_counts (unsigned /*counter*/,
> unsigned /*expected*/,
> @@ -70,6 +75,9 @@ extern void coverage_dc_end_function (void);
> is present in the coverage internal data structures. */
> extern bool coverage_function_present (unsigned fn_ident);
>
> +/* True if there is PMU data present in this compilation. */
> +extern bool pmu_data_present (void);
> +
> extern tree get_gcov_type (void);
> extern tree get_gcov_unsigned_t (void);
>
> Index: gcc/common.opt
> ===================================================================
> --- gcc/common.opt (revision 190817)
> +++ gcc/common.opt (working copy)
> @@ -1684,8 +1684,8 @@ Common Joined RejectNegative Var(flag_pmu_profile_
> -fpmu-profile-generate=[load-latency] Generate pmu profile for cache misses. Currently only pfmon based load latency profiling is supported on Intel/PEBS and AMD/IBS platforms.
>
> fpmu-profile-use=
> -Common Joined RejectNegative Var(flag_pmu_profile_use)
> --fpmu-profile-use=[load-latency] Use pmu profile data while optimizing. Currently only perfmon based load latency profiling is supported on Intel/PEBS and AMD/IBS platforms.
> +Common Joined RejectNegative Var(pmu_profile_data)
> +-fpmu-profile-use=[pmuprofile.gcda] The pmu profile data file to use for pmu feedback.
>
> fpredictive-commoning
> Common Report Var(flag_predictive_commoning) Optimization
>
> --
> This patch is available for review at http://codereview.appspot.com/6489092
--
Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413