This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [google] Add new dump flag -pmu to display PMU data in dumps (issue6551072)


Sorry to reply late, missed this mail again... not sure why.

LGTM, okay for google branches.

Dehao

On Mon, Sep 24, 2012 at 1:20 PM, Teresa Johnson <tejohnson@google.com> wrote:
> Revised patch to add a new dump flag that dumps PMU profile information using
> the -pmu dump option. (Was issue 6489092, creating new issue since I don't own
> that one.)
>
> Ok for google/main?
>
> Passes bootstrap and regression tests.
>
> Teresa
>
> 2012-09-24  Teresa Johnson  <tejohnson@google.com>
>             Chris Manghane  <cmang@google.com>
>
>         * doc/invoke.texi: Update -fpmu-profile-use option.
>         * tree-dump.c: Add new dump flag.
>         * tree-pretty-print.c (dump_load_latency_details): New function.
>         (dump_pmu): Ditto.
>         (dump_generic_node): Add support for new dump flag.
>         * tree-pretty-print.h (dump_pmu): Declare.
>         * tree-pass.h (enum tree_dump_index): Add new dump flag.
>         * gcov.c (process_pmu_profile): Fix string table count assert.
>         * opts.c (OPT_fpmu_profile_use_): Add support for -fpmu-profile-use.
>         * gimple-pretty-print.c (dump_gimple_phi): Add support for new dump
>         flag.
>         (dump_gimple_stmt): Ditto.
>         * coverage.c (struct pmu_entry): New structure.
>         (struct gcov_pmu_summary): Ditto.
>         (htab_pmu_entry_hash): New function.
>         (htab_pmu_entry_eq): Ditto.
>         (htab_pmu_entry_del): Ditto.
>         (read_pmu_file): Ditto.
>         (get_pmu_hash_entry): Ditto.
>         (process_pmu_data): Ditto.
>         (get_coverage_pmu_latency): Ditto.
>         (get_coverage_pmu_branch_mispredict): Ditto.
>         (pmu_data_present): Ditto.
>         (coverage_init): Add pmu file read support.
>         * coverage.h (get_coverage_pmu_latency): Declare.
>         (get_coverage_pmu_branch_mispredict): Ditto.
>         * common.opt: Update -fpmu-profile-use option.
>
> Index: doc/invoke.texi
> ===================================================================
> --- doc/invoke.texi     (revision 191138)
> +++ doc/invoke.texi     (working copy)
> @@ -399,7 +399,7 @@ Objective-C and Objective-C++ Dialects}.
>  -fprofile-generate=@var{path} -fprofile-generate-sampling @gol
>  -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
>  -fpmu-profile-generate=@var{pmuoption} @gol
> --fpmu-profile-use=@var{pmuoption} @gol
> +-fpmu-profile-use=@var{pmudata} @gol
>  -freciprocal-math -free -fregmove -frename-registers -freorder-blocks @gol
>  -frecord-gcc-switches-in-elf@gol
>  -freorder-blocks-and-partition -freorder-functions @gol
> @@ -8381,12 +8381,11 @@ displayed using coverage tool gcov. The params var
>  "pmu_profile_n_addresses" can be used to restrict PMU data collection
>  to only this many addresses.
>
> -@item -fpmu-profile-use=@var{pmuoption}
> +@item -fpmu-profile-use=@var{pmudata}
>  @opindex fpmu-profile-use
>
> -Enable performance monitoring unit (PMU) profiling based
> -optimizations.  Currently only @var{load-latency} and
> -@var{branch-mispredict} are supported.
> +If @var{pmudata} is specified, GCC will read PMU data from @var{pmudata}. If
> +unspecified, PMU data will be read from 'pmuprofile.gcda'.
>
>  @item -fprofile-strip=@var{base_suffix}
>  @opindex fprofile-strip
> Index: tree-dump.c
> ===================================================================
> --- tree-dump.c (revision 191138)
> +++ tree-dump.c (working copy)
> @@ -824,9 +824,11 @@ static const struct dump_option_value_info dump_op
>    {"nouid", TDF_NOUID},
>    {"enumerate_locals", TDF_ENUMERATE_LOCALS},
>    {"scev", TDF_SCEV},
> +  {"pmu", TDF_PMU},
>    {"all", ~(TDF_RAW | TDF_SLIM | TDF_LINENO | TDF_TREE | TDF_RTL | TDF_IPA
>             | TDF_STMTADDR | TDF_GRAPH | TDF_DIAGNOSTIC | TDF_VERBOSE
> -           | TDF_RHS_ONLY | TDF_NOUID | TDF_ENUMERATE_LOCALS | TDF_SCEV)},
> +           | TDF_RHS_ONLY | TDF_NOUID | TDF_ENUMERATE_LOCALS | TDF_SCEV
> +            | TDF_PMU)},
>    {NULL, 0}
>  };
>
> Index: tree-pretty-print.c
> ===================================================================
> --- tree-pretty-print.c (revision 191138)
> +++ tree-pretty-print.c (working copy)
> @@ -25,6 +25,9 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tm.h"
>  #include "tree.h"
>  #include "output.h"
> +#include "basic-block.h"
> +#include "gcov-io.h"
> +#include "coverage.h"
>  #include "tree-pretty-print.h"
>  #include "hashtab.h"
>  #include "tree-flow.h"
> @@ -51,6 +54,7 @@ static void do_niy (pretty_printer *, const_tree);
>
>  static pretty_printer buffer;
>  static int initialized = 0;
> +static char *file_prefix = NULL;
>
>  /* Try to print something for an unknown tree code.  */
>
> @@ -461,7 +465,32 @@ dump_omp_clauses (pretty_printer *buffer, tree cla
>      }
>  }
>
> +/* Dump detailed information about pmu load latency events */
>
> +static void
> +dump_load_latency_details (pretty_printer *buffer, gcov_pmu_ll_info_t *ll_info)
> +{
> +  if (ll_info == NULL)
> +    return;
> +
> +  pp_string (buffer, "\n[load latency contribution: ");
> +  pp_scalar (buffer, "%.2f%%\n", ll_info->self / 100.f);
> +  pp_string (buffer, "average cycle distribution:\n");
> +  pp_scalar (buffer, "%.2f%% <= 10 cycles\n",
> +             ll_info->lt_10 / 100.f);
> +  pp_scalar (buffer, "%.2f%% <= 32 cycles\n",
> +             ll_info->lt_32 / 100.f);
> +  pp_scalar (buffer, "%.2f%% <= 64 cycles\n",
> +             ll_info->lt_64 / 100.f);
> +  pp_scalar (buffer, "%.2f%% <= 256 cycles\n",
> +             ll_info->lt_256 / 100.f);
> +  pp_scalar (buffer, "%.2f%% <= 1024 cycles\n",
> +             ll_info->lt_1024 / 100.f);
> +  pp_scalar (buffer, "%.2f%% > 1024 cycles\n",
> +             ll_info->gt_1024 / 100.f);
> +  pp_string (buffer, "] ");
> +}
> +
>  /* Dump location LOC to BUFFER.  */
>
>  static void
> @@ -485,7 +514,51 @@ dump_location (pretty_printer *buffer, location_t
>    pp_string (buffer, "] ");
>  }
>
> +/* Dump PMU info about LOC to BUFFER.  */
>
> +void
> +dump_pmu (pretty_printer *buffer, location_t loc)
> +{
> +  expanded_location xloc = expand_location (loc);
> +  gcov_pmu_ll_info_t *ll_info;
> +  gcov_pmu_brm_info_t *brm_info;
> +  char *src;
> +  uint64_t src_size;
> +
> +  if (!xloc.file)
> +    return;
> +
> +  if (!file_prefix)
> +    file_prefix = getpwd ();
> +
> +  if (!IS_ABSOLUTE_PATH (xloc.file))
> +    {
> +      src_size = strlen (xloc.file) + strlen (file_prefix) + 1;
> +      src = XCNEWVEC (char, src_size + 1);
> +      strcpy (src, file_prefix);
> +      strcat (src, "/");
> +      strcat (src, xloc.file);
> +    }
> +  else
> +    src = xstrdup (xloc.file);
> +
> +  ll_info = get_coverage_pmu_latency (src, xloc.line);
> +  brm_info =
> +      get_coverage_pmu_branch_mispredict (src, xloc.line);
> +
> +  if (ll_info)
> +    dump_load_latency_details (buffer, ll_info);
> +
> +  if (brm_info)
> +    {
> +      pp_string (buffer, "[branch misprediction contribution: ");
> +      pp_scalar (buffer, "%.2f%%", brm_info->self / 100.f);
> +      pp_string (buffer, "] ");
> +    }
> +
> +  XDELETE (src);
> +}
> +
>  /* Dump lexical block BLOCK.  BUFFER, SPC and FLAGS are as in
>     dump_generic_node.  */
>
> @@ -622,6 +695,9 @@ dump_generic_node (pretty_printer *buffer, tree no
>    if ((flags & TDF_LINENO) && EXPR_HAS_LOCATION (node))
>      dump_location (buffer, EXPR_LOCATION (node));
>
> +  if ((flags & TDF_PMU) && pmu_data_present () && EXPR_HAS_LOCATION (node))
> +    dump_pmu (buffer, EXPR_LOCATION (node));
> +
>    switch (TREE_CODE (node))
>      {
>      case ERROR_MARK:
> Index: tree-pretty-print.h
> ===================================================================
> --- tree-pretty-print.h (revision 191138)
> +++ tree-pretty-print.h (working copy)
> @@ -45,6 +45,7 @@ extern void print_generic_expr (FILE *, tree, int)
>  extern void print_generic_decl (FILE *, tree, int);
>  extern void debug_c_tree (tree);
>  extern void dump_omp_clauses (pretty_printer *, tree, int, int);
> +extern void dump_pmu (pretty_printer *, location_t);
>  extern void print_call_name (pretty_printer *, tree, int);
>  extern void debug_generic_expr (tree);
>  extern void debug_generic_stmt (tree);
> Index: tree-pass.h
> ===================================================================
> --- tree-pass.h (revision 191138)
> +++ tree-pass.h (working copy)
> @@ -84,6 +84,7 @@ enum tree_dump_index
>  #define TDF_ENUMERATE_LOCALS (1 << 22) /* Enumerate locals by uid.  */
>  #define TDF_CSELIB     (1 << 23)       /* Dump cselib details.  */
>  #define TDF_SCEV       (1 << 24)       /* Dump SCEV details.  */
> +#define TDF_PMU         (1 << 25)       /* Dump PMU Profiling details  */
>
>
>  /* In tree-dump.c */
> Index: gcov.c
> ===================================================================
> --- gcov.c      (revision 191138)
> +++ gcov.c      (working copy)
> @@ -3022,9 +3022,9 @@ static void process_pmu_profile (void)
>          {
>            gcov_pmu_st_entry_t *st_entry = XCNEW (gcov_pmu_st_entry_t);
>            gcov_read_pmu_string_table_entry (st_entry, length);
> +          string_table->st_count++;
>            /* Verify that we read string table entries in the right order */
>            gcc_assert (st_entry->index == string_table->st_count);
> -          string_table->st_count++;
>            if (string_table->st_count >= string_table->alloc_st_count)
>              {
>                string_table->alloc_st_count *= 2;
> Index: opts.c
> ===================================================================
> --- opts.c      (revision 191138)
> +++ opts.c      (working copy)
> @@ -1645,6 +1645,12 @@ common_handle_option (struct gcc_options *opts,
>         opts->x_flag_gcse_after_reload = value;
>        break;
>
> +    case OPT_fpmu_profile_use_:
> +      opts->x_pmu_profile_data = xstrdup (arg);
> +      opts->x_flag_pmu_profile_use = true;
> +      value = true;
> +      break;
> +
>      case OPT_fprofile_generate_:
>        opts->x_profile_data_prefix = xstrdup (arg);
>        value = true;
> Index: gimple-pretty-print.c
> ===================================================================
> --- gimple-pretty-print.c       (revision 191138)
> +++ gimple-pretty-print.c       (working copy)
> @@ -26,8 +26,10 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tm.h"
>  #include "tree.h"
>  #include "diagnostic.h"
> +#include "basic-block.h"
>  #include "tree-pretty-print.h"
>  #include "gimple-pretty-print.h"
> +#include "coverage.h"
>  #include "hashtab.h"
>  #include "tree-flow.h"
>  #include "tree-pass.h"
> @@ -1629,6 +1631,10 @@ dump_gimple_phi (pretty_printer *buffer, gimple ph
>           pp_decimal_int (buffer, xloc.column);
>           pp_string (buffer, "] ");
>         }
> +      if ((flags & TDF_PMU) && pmu_data_present ()
> +          && (gimple_phi_arg_location (phi, i)))
> +        dump_pmu (buffer, gimple_phi_arg_location (phi, i));
> +
>        dump_generic_node (buffer, gimple_phi_arg_def (phi, i), spc, flags,
>                          false);
>        pp_character (buffer, '(');
> @@ -1875,6 +1881,9 @@ dump_gimple_stmt (pretty_printer *buffer, gimple g
>        pp_string (buffer, "] ");
>      }
>
> +  if ((flags & TDF_PMU) && pmu_data_present () && gimple_has_location (gs))
> +    dump_pmu (buffer, gimple_location (gs));
> +
>    if (flags & TDF_EH)
>      {
>        int lp_nr = lookup_stmt_eh_lp (gs);
> Index: coverage.c
> ===================================================================
> --- coverage.c  (revision 191138)
> +++ coverage.c  (working copy)
> @@ -96,6 +96,17 @@ typedef struct counts_entry
>    struct gcov_ctr_summary summary;
>  } counts_entry_t;
>
> +typedef struct pmu_entry
> +{
> +  /* We hash by  */
> +  gcov_unsigned_t lineno;
> +  char *filename;
> +
> +  /* Store  */
> +  gcov_pmu_ll_info_t *ll_info;
> +  gcov_pmu_brm_info_t *brm_info;
> +} pmu_entry_t;
> +
>  static GTY(()) struct coverage_data *functions_head = 0;
>  static struct coverage_data **functions_tail = &functions_head;
>  static unsigned no_coverage = 0;
> @@ -129,6 +140,9 @@ static char pmu_profile_filename[] = "pmuprofile";
>  /* Hash table of count data.  */
>  static htab_t counts_hash = NULL;
>
> +/* Hash table of pmu data, */
> +static htab_t pmu_hash = NULL;
> +
>  /* The names of merge functions for counters.  */
>  static const char *const ctr_merge_functions[GCOV_COUNTERS] = GCOV_MERGE_FUNCTIONS;
>  static const char *const ctr_names[GCOV_COUNTERS] = GCOV_COUNTER_NAMES;
> @@ -159,11 +173,27 @@ static tree gcov_pmu_top_n_address_decl = NULL_TRE
>  /* To ensure that the above variables are initialized only once.  */
>  static int pmu_profiling_initialized = 0;
>
> +/* Cumulative pmu data */
> +struct gcov_pmu_summary
> +{
> +  ll_infos_t ll_infos;         /* load latency infos. */
> +  brm_infos_t brm_infos;       /* branch misprediction infos */
> +  string_table_t string_table; /* string table entries */
> +};
> +
> +struct gcov_pmu_summary pmu_global_summary;
> +
>  /* Forward declarations.  */
>  static hashval_t htab_counts_entry_hash (const void *);
> +static hashval_t htab_pmu_entry_hash (const void *);
>  static int htab_counts_entry_eq (const void *, const void *);
> +static int htab_pmu_entry_eq (const void *, const void *);
>  static void htab_counts_entry_del (void *);
> +static void htab_pmu_entry_del (void *);
>  static void read_counts_file (const char *, unsigned);
> +static void read_pmu_file (const char*);
> +static pmu_entry_t *get_pmu_hash_entry (gcov_unsigned_t, gcov_unsigned_t);
> +static void process_pmu_data (void);
>  static tree build_var (tree, tree, int);
>  static void build_fn_info_type (tree, unsigned, tree);
>  static void build_info_type (tree, tree);
> @@ -211,6 +241,14 @@ htab_counts_entry_hash (const void *of)
>    return entry->ident * GCOV_COUNTERS + entry->ctr;
>  }
>
> +static hashval_t
> +htab_pmu_entry_hash (const void *of)
> +{
> +  const pmu_entry_t *const entry = (const pmu_entry_t *) of;
> +
> +  return htab_hash_string (entry->filename) + entry->lineno;
> +}
> +
>  static int
>  htab_counts_entry_eq (const void *of1, const void *of2)
>  {
> @@ -220,6 +258,16 @@ htab_counts_entry_eq (const void *of1, const void
>    return entry1->ident == entry2->ident && entry1->ctr == entry2->ctr;
>  }
>
> +static int
> +htab_pmu_entry_eq (const void *of1, const void *of2)
> +{
> +  const pmu_entry_t *const entry1 = (const pmu_entry_t *) of1;
> +  const pmu_entry_t *const entry2 = (const pmu_entry_t *) of2;
> +
> +  return strcmp (entry1->filename, entry2->filename) == 0 &&
> +      entry1->lineno == entry2->lineno;
> +}
> +
>  static void
>  htab_counts_entry_del (void *of)
>  {
> @@ -233,6 +281,17 @@ htab_counts_entry_del (void *of)
>      }
>  }
>
> +static void
> +htab_pmu_entry_del (void *of)
> +{
> +  pmu_entry_t *const entry = (pmu_entry_t *) of;
> +
> +  free (entry->filename);
> +  free (entry->ll_info);
> +  free (entry->brm_info);
> +  free (entry);
> +}
> +
>  /* Returns true if MOD_ID is the id of the last source module.  */
>
>  int
> @@ -722,6 +781,252 @@ read_counts_file (const char *da_file_name, unsign
>    gcov_close ();
>  }
>
> +/* Read in the pmu profiling file, if available. DA_FILE_NAME is the
> +   name of the gcda file. */
> +
> +static void
> +read_pmu_file (const char* da_file_name)
> +{
> +  gcov_unsigned_t tag;
> +  ll_infos_t *ll_infos = &pmu_global_summary.ll_infos;
> +  brm_infos_t *brm_infos = &pmu_global_summary.brm_infos;
> +  string_table_t *string_table = &pmu_global_summary.string_table;
> +  int is_error = 0;
> +
> +  if (!gcov_open (da_file_name, 1))
> +    {
> +      if (PARAM_VALUE (PARAM_GCOV_DEBUG))
> +        {
> +          /* Try to find .gcda file in the current working dir.  */
> +          da_file_name = lbasename (da_file_name);
> +          if (!gcov_open (da_file_name, 1))
> +            return;
> +        }
> +      else
> +        return;
> +    }
> +
> +  if (!gcov_magic (gcov_read_unsigned (), GCOV_DATA_MAGIC))
> +    {
> +      warning (0, "%qs is not a gcov data file", da_file_name);
> +      gcov_close ();
> +      return;
> +    }
> +  else if ((tag = gcov_read_unsigned ()) != GCOV_VERSION)
> +    {
> +      char v[4], e[4];
> +
> +      GCOV_UNSIGNED2STRING (v, tag);
> +      GCOV_UNSIGNED2STRING (e, GCOV_VERSION);
> +
> +      warning (0, "%qs is version %q.*s, expected version %q.*s",
> +               da_file_name, 4, v, 4, e);
> +      gcov_close ();
> +      return;
> +    }
> +
> +  /* Read and discard the version. */
> +  tag = gcov_read_unsigned ();
> +
> +  /* Read and discard the stamp.  */
> +  tag = gcov_read_unsigned ();
> +
> +  /* Initialize PMU data fields. */
> +  ll_infos->ll_count = 0;
> +  ll_infos->alloc_ll_count = 64;
> +  ll_infos->ll_array = XCNEWVEC (gcov_pmu_ll_info_t *, ll_infos->alloc_ll_count);
> +
> +  brm_infos->brm_count = 0;
> +  brm_infos->alloc_brm_count = 64;
> +  brm_infos->brm_array = XCNEWVEC (gcov_pmu_brm_info_t *,
> +                                   brm_infos->alloc_brm_count);
> +
> +  string_table->st_count = 0;
> +  string_table->alloc_st_count = 64;
> +  string_table->st_array = XCNEWVEC (gcov_pmu_st_entry_t *,
> +                                     string_table->alloc_st_count);
> +
> +  while ((tag = gcov_read_unsigned ()))
> +    {
> +      gcov_unsigned_t length = gcov_read_unsigned ();
> +      gcov_position_t base = gcov_position ();
> +
> +      if (tag == GCOV_TAG_PMU_LOAD_LATENCY_INFO)
> +        {
> +          gcov_pmu_ll_info_t *ll_info = XCNEW (gcov_pmu_ll_info_t);
> +          gcov_read_pmu_load_latency_info (ll_info, length);
> +          ll_infos->ll_count++;
> +          if (ll_infos->ll_count >= ll_infos->alloc_ll_count)
> +            {
> +              /* need to realloc */
> +              ll_infos->ll_array = (gcov_pmu_ll_info_t **)
> +                xrealloc (ll_infos->ll_array, 2 * ll_infos->alloc_ll_count);
> +            }
> +          ll_infos->ll_array[ll_infos->ll_count - 1] = ll_info;
> +        }
> +      else if (tag == GCOV_TAG_PMU_BRANCH_MISPREDICT_INFO)
> +        {
> +          gcov_pmu_brm_info_t *brm_info = XCNEW (gcov_pmu_brm_info_t);
> +          gcov_read_pmu_branch_mispredict_info (brm_info, length);
> +          brm_infos->brm_count++;
> +          if (brm_infos->brm_count >= brm_infos->alloc_brm_count)
> +            {
> +              /* need to realloc */
> +              brm_infos->brm_array = (gcov_pmu_brm_info_t **)
> +                xrealloc (brm_infos->brm_array, 2 * brm_infos->alloc_brm_count);
> +            }
> +          brm_infos->brm_array[brm_infos->brm_count - 1] = brm_info;
> +        }
> +      else if (tag == GCOV_TAG_PMU_TOOL_HEADER)
> +        {
> +          gcov_pmu_tool_header_t *tool_header = XCNEW (gcov_pmu_tool_header_t);
> +          gcov_read_pmu_tool_header (tool_header, length);
> +          ll_infos->pmu_tool_header = tool_header;
> +          brm_infos->pmu_tool_header = tool_header;
> +        }
> +      else if (tag == GCOV_TAG_PMU_STRING_TABLE_ENTRY)
> +       {
> +         gcov_pmu_st_entry_t *st_entry = XCNEW (gcov_pmu_st_entry_t);
> +         gcov_read_pmu_string_table_entry (st_entry, length);
> +         string_table->st_count++;
> +         if (string_table->st_count >= string_table->alloc_st_count)
> +           {
> +             string_table->alloc_st_count *= 2;
> +             string_table->st_array = (gcov_pmu_st_entry_t **)
> +                 xrealloc (string_table->st_array,
> +                           string_table->alloc_st_count);
> +           }
> +
> +         string_table->st_array[string_table->st_count - 1] = st_entry;
> +       }
> +
> +      gcov_sync (base, length);
> +      if ((is_error = gcov_is_error ()))
> +       {
> +         error (is_error < 0 ? "%qs has overflowed" : "%qs is corrupted",
> +                da_file_name);
> +          gcov_close ();
> +         break;
> +       }
> +    }
> +
> +  gcov_close ();
> +
> +  /* Store pmu data in a global hash table keyed by source position.  */
> +  process_pmu_data ();
> +}
> +
> +/* Return a pmu hash table entry for the given FILETAG and LINE, creating
> +   a new entry if necessary.  */
> +
> +static pmu_entry_t *
> +get_pmu_hash_entry (gcov_unsigned_t filetag, gcov_unsigned_t line)
> +{
> +  string_table_t *string_table = &pmu_global_summary.string_table;
> +  gcov_pmu_st_entry_t *st_entry;
> +  pmu_entry_t **slot, *entry, elt;
> +
> +  st_entry = string_table->st_array[filetag - 1];
> +  elt.lineno = line;
> +  elt.filename = xstrdup (st_entry->str);
> +  slot = (pmu_entry_t **) htab_find_slot
> +      (pmu_hash, &elt, INSERT);
> +  entry = *slot;
> +  XDELETE (elt.filename);
> +  if (!entry)
> +    {
> +      *slot = entry = XCNEW (pmu_entry_t);
> +      entry->lineno = elt.lineno;
> +      entry->filename = xstrdup (st_entry->str);
> +    }
> +  return entry;
> +}
> +
> +/* Process the pmu profiling data, storing it in a global hash table
> +   keyed by source position.  */
> +
> +static void
> +process_pmu_data (void)
> +{
> +  ll_infos_t *ll_infos = &pmu_global_summary.ll_infos;
> +  brm_infos_t *brm_infos = &pmu_global_summary.brm_infos;
> +  unsigned i;
> +  pmu_entry_t *entry;
> +  gcov_pmu_ll_info_t *ll_info;
> +  gcov_pmu_brm_info_t *brm_info;
> +
> +  /* Construct hash table with information from gcda file. Entry keys are a
> +     unique combination of the filename and the line number for easy access */
> +  if (!pmu_hash)
> +    pmu_hash = htab_create (10,
> +                            htab_pmu_entry_hash, htab_pmu_entry_eq,
> +                            htab_pmu_entry_del);
> +
> +  gcc_assert (pmu_hash != NULL);
> +  gcc_assert (ll_infos->ll_count > 0);
> +  gcc_assert (brm_infos->brm_count > 0);
> +
> +  for (i = 0; i < ll_infos->ll_count; ++i)
> +    {
> +      ll_info = ll_infos->ll_array[i];
> +      entry = get_pmu_hash_entry (ll_info->filetag, ll_info->line);
> +      entry->ll_info = ll_info;
> +    }
> +
> +  for (i = 0; i < brm_infos->brm_count; ++i)
> +    {
> +      brm_info = brm_infos->brm_array[i];
> +      entry = get_pmu_hash_entry (brm_info->filetag, brm_info->line);
> +      entry->brm_info = brm_info;
> +    }
> +}
> +
> +/* Returns the load latency info for line number LINENO of source file
> +   FILENAME. */
> +
> +gcov_pmu_ll_info_t *
> +get_coverage_pmu_latency (const char* filename, gcov_unsigned_t lineno)
> +{
> +  pmu_entry_t *entry, elt;
> +
> +  /* No hash table, no pmu data */
> +  if (pmu_hash == NULL)
> +    return NULL;
> +
> +  elt.filename = xstrdup (filename);
> +  elt.lineno = lineno;
> +
> +  entry = (pmu_entry_t *) htab_find (pmu_hash, &elt);
> +  XDELETE (elt.filename);
> +  if (entry)
> +    return entry->ll_info;
> +
> +  return NULL;
> +}
> +
> +/* Returns the branch misprediction info for line number LINENO of source file
> +   FILENAME. */
> +
> +gcov_pmu_brm_info_t *
> +get_coverage_pmu_branch_mispredict (const char* filename, gcov_unsigned_t lineno)
> +{
> +  pmu_entry_t *entry, elt;
> +
> +  /* No hash table, no pmu data */
> +  if (pmu_hash == NULL)
> +    return NULL;
> +
> +  elt.filename = xstrdup (filename);
> +  elt.lineno = lineno;
> +
> +  entry = (pmu_entry_t *) htab_find (pmu_hash, &elt);
> +  XDELETE (elt.filename);
> +  if (entry)
> +    return entry->brm_info;
> +
> +  return NULL;
> +}
> +
>  /* Returns the coverage data entry for counter type COUNTER of function
>     FUNC. EXPECTED is the number of expected counter entries.  */
>
> @@ -1125,6 +1430,14 @@ coverage_function_present (unsigned fn_ident)
>    return item != NULL;
>  }
>
> +/* True if there is PMU data present in this compilation */
> +
> +bool
> +pmu_data_present (void)
> +{
> +  return (pmu_hash != NULL);
> +}
> +
>  /* Update function and program direct-call coverage counts.  */
>
>  void
> @@ -2271,6 +2584,11 @@ coverage_init (const char *filename, const char* s
>    if (flag_branch_probabilities)
>      read_counts_file (da_file_name, 0);
>
> +  /* Reads at most one auxiliary GCDA file since we don't support merging */
> +  if (flag_pmu_profile_use)
> +    read_pmu_file (pmu_profile_data ? pmu_profile_data
> +                   : get_da_file_name (pmu_profile_filename));
> +
>    /* Rebuild counts_hash and read the auxiliary GCDA files.  */
>    if (flag_profile_use && L_IPO_COMP_MODE)
>      {
> Index: coverage.h
> ===================================================================
> --- coverage.h  (revision 191138)
> +++ coverage.h  (working copy)
> @@ -45,7 +45,12 @@ extern int coverage_counter_alloc (unsigned /*coun
>  extern tree tree_coverage_counter_ref (unsigned /*counter*/, unsigned/*num*/);
>  /* Use a counter address from the most recent allocation.  */
>  extern tree tree_coverage_counter_addr (unsigned /*counter*/, unsigned/*num*/);
> -
> +/* Get the load latency info for the current file and line */
> +extern gcov_pmu_ll_info_t *get_coverage_pmu_latency (const char*,
> +                                                     gcov_unsigned_t);
> +/* Get the load latency info for the current file and line */
> +extern gcov_pmu_brm_info_t *
> +get_coverage_pmu_branch_mispredict (const char*, gcov_unsigned_t);
>  /* Get all the counters for the current function.  */
>  extern gcov_type *get_coverage_counts (unsigned /*counter*/,
>                                        unsigned /*expected*/,
> @@ -70,6 +75,9 @@ extern void coverage_dc_end_function (void);
>     is present in the coverage internal data structures.  */
>  extern bool coverage_function_present (unsigned fn_ident);
>
> +/* True if there is PMU data present in this compilation. */
> +extern bool pmu_data_present (void);
> +
>  extern tree get_gcov_type (void);
>  extern tree get_gcov_unsigned_t (void);
>
> Index: common.opt
> ===================================================================
> --- common.opt  (revision 191138)
> +++ common.opt  (working copy)
> @@ -1683,9 +1683,13 @@ fpmu-profile-generate=
>  Common Joined RejectNegative Var(flag_pmu_profile_generate)
>  -fpmu-profile-generate=[load-latency]  Generate pmu profile for cache misses. Currently only pfmon based load latency profiling is supported on Intel/PEBS and AMD/IBS platforms.
>
> +fpmu-profile-use
> +Common Var(flag_pmu_profile_use)
> +-fpmu-profile-use=[pmuprofile.gcda]  The pmu profile data file to use for pmu feedback.
> +
>  fpmu-profile-use=
> -Common Joined RejectNegative Var(flag_pmu_profile_use)
> --fpmu-profile-use=[load-latency]  Use pmu profile data while optimizing.  Currently only perfmon based load latency profiling is supported on Intel/PEBS and AMD/IBS platforms.
> +Common Joined RejectNegative Var(pmu_profile_data)
> +-fpmu-profile-use=[pmuprofile.gcda]  The pmu profile data file to use for pmu feedback.
>
>  fpredictive-commoning
>  Common Report Var(flag_predictive_commoning) Optimization
>
> --
> This patch is available for review at http://codereview.appspot.com/6551072


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]