[PATCH] Make profile feedback more robust to source code changes.

Neil Vachharajani nvachhar@google.com
Tue Sep 22 23:22:00 GMT 2009


This patch makes GCC profiles more robust to source code changes.  In
particular, this patch changes the way profile data is looked up, and
it uses two checksums rather than one, to validate whether the profile
information corresponds to the code being compiled.  Previously,
profile data was identified by the field IDENT in COUNTS_ENTRY_T.
IDENT was based on the order of functions in the file.  So inserting a
function would invalidate profile data.  Now, profile data is
identified by function name.  The profile data was previously
validated by computing a checksum of the file name, line number, and
assembly name of the function.  In the new code, profile data is
validated by comparing a checksum of the control flow graphs.  This
makes the profile data robust to line number changes at the risk of
accidentally using the wrong profile data if the code changed but the
CFG did not.

Bootstrapped/regtested on x86_64-linux and i686-linux.  Verified that
FDO still works.  Ok for trunk?

2009-09-21  Neil Vachharajani  <nvachhar@google.com>

	* tree.c (crc32_byte): New function.
	(crc32_string): Use crc32_byte.
	(get_file_function_name): Update comment.
	* tree.h (crc32_byte): New function.
	* gcov.c: Update file comment.
	(struct function_info): Remove checksum.  Add lineno_checksum and
	cfg_checksum.
	(read_graph_file): Update to reflect changes in struct function_info.
	(read_count_file): Ditto.
	* gcov-io.c (gcov_string_length): New function.
	(gcov_write_words): Adjust since writes can be more than 8 bytes.
	(gcov_read_words): Ditto.
	* gcov-io.h: Update file comment to reflect .gcda/.gcno file
	format change.
	(gcov_write_string): Unpoison this if IN_LIBGOV is true.
	(gcov_read_string): Ditto.
	(gcov_string_length): New function.
	(GCOV_TAG_FUNCTION_LENGTH): Update to reflect new file format.
	(struct gcov_fn_info): Remove checksum.  Add lineno_checksum,
	cfg_checksum, and name.
	(struct gcov_var): Update since writes are no longer guaranteed to
	be 4 or 8 bytes when IN_LIBGCOV is true.
	* profile.c: Remove unneeded prototypes.
	(get_exec_counts): Pass cfg_checksum to called functions.
	(compute_branch_probabilities): Ditto.
	(compute_value_histograms): Ditto.	
	(branch_prob): Compute cfg_checksum and lineno_checksum.
	* coverage.c (struct function_list): Add lineno_checksum,
	cfg_checksum, name, and decl.
	(struct counts_entry): Add name, lineno_checksum, and cfg_checksum.
	(compute_checksum): Remove.
	(coverage_checksum_string): Ditto.
	(xtrdump_mask_random): New function.
	(get_const_string_type): Ditto.
	(htab_counts_entry_hash): Use new hash function.
	(htab_counts_entry_eq): Use new equality.
	(htab_counts_entry_del): Free memory for name.
	(read_counts_file): Update for new file format.
	(get_coverage_counts): Ditto.
	(coverage_compute_lineno_checksum): New function.
	(coverage_compute_cfg_checksum): Ditto.
	(coverage_begin_output): Update for new file format.
	(coverage_end_function): Ditto.
	(build_fn_info_type): Ditto.
	(build_fn_info_value): Ditto.
	(build_gcov_info): Ditto.
	* coverage.h (coverage_end_function): Update prototype.
	(coverage_begin_output): Ditto.
	(coverage_compute_cfg_checksum): New function.
	(coverage_compute_lineno_checksum): New function.
	(get_coverage_counts): Update prototype.
	* libgcov.c (gcov_exit): Update to reflect new profile format.
	* gcov-dump.c (tag_function): Update to reflect .gcda file
        format change.
	* testsuite/gcc.dg/tree-prof/prof-robust-1.c: Test to ensure
        profiles are still used even if the source code changes.
	* testsuite/g++.dg/tree-prof/prof-robust-1.C: Test for
        stripping multiple random prefixes due to anonymous
        namespaces.

--- gcc/tree.c.orig
+++ gcc/tree.c
@@ -8117,14 +8117,12 @@
 

 #define FILE_FUNCTION_FORMAT "_GLOBAL__%s_%s"

-/* Generate a crc32 of a string.  */
+/* Generate a crc32 of a byte.  */

 unsigned
-crc32_string (unsigned chksum, const char *string)
+crc32_byte (unsigned chksum, char byte)
 {
-  do
-    {
-      unsigned value = *string << 24;
+  unsigned value = (unsigned) byte << 24;
       unsigned ix;

       for (ix = 8; ix--; value <<= 1)
@@ -8135,6 +8133,18 @@
  	  chksum <<= 1;
  	  chksum ^= feedback;
   	}
+  return chksum;
+}
+
+
+/* Generate a crc32 of a string.  */
+
+unsigned
+crc32_string (unsigned chksum, const char *string)
+{
+  do
+    {
+      chksum = crc32_byte (chksum, *string);
     }
   while (*string++);
   return chksum;
@@ -8158,8 +8168,10 @@
       *p = '_';
 }

-/* Generate a name for a special-purpose function function.
+/* Generate a name for a special-purpose function.
    The generated name may need to be unique across the whole link.
+   Changes to this function may also require corresponding changes to
+   xstrdup_mask_random.
    TYPE is some string to identify the purpose of this function to the
    linker or collect2; it must start with an uppercase letter,
    one of:
--- gcc/tree.h.orig
+++ gcc/tree.h
@@ -4652,6 +4652,7 @@
 

 /* In tree.c */
 extern unsigned crc32_string (unsigned, const char *);
+extern unsigned crc32_byte (unsigned, char);
 extern void clean_symbol_name (char *);
 extern tree get_file_function_name (const char *);
 extern tree get_callee_fndecl (const_tree);
--- gcc/gcov.c.orig
+++ gcc/gcov.c
@@ -54,6 +54,13 @@
    some places we make use of the knowledge of how profile.c works to
    select particular algorithms here.  */

+/* The code validates that the profile information read in corresponds
+   to the code currently being compiled.  Rather than checking for
+   identical files, the code below computes a checksum on the CFG
+   (based on the order of basic blocks and the arcs in the CFG).  If
+   the CFG checksum in the gcda file match the CFG checksum for the
+   code currently being compiled, the profile data will be used.  */
+
 /* This is the size of the buffer used to read in source file lines.  */

 #define STRING_SIZE 200
@@ -161,7 +168,8 @@
   /* Name of function.  */
   char *name;
   unsigned ident;
-  unsigned checksum;
+  unsigned lineno_checksum;
+  unsigned cfg_checksum;

   /* Array of basic blocks.  */
   block_t *blocks;
@@ -791,12 +799,14 @@
       if (tag == GCOV_TAG_FUNCTION)
 	{
 	  char *function_name;
-	  unsigned ident, checksum, lineno;
+	  unsigned ident, lineno;
+	  unsigned lineno_checksum, cfg_checksum;
 	  source_t *src;
 	  function_t *probe, *prev;

 	  ident = gcov_read_unsigned ();
-	  checksum = gcov_read_unsigned ();
+	  lineno_checksum = gcov_read_unsigned ();
+	  cfg_checksum = gcov_read_unsigned ();
 	  function_name = xstrdup (gcov_read_string ());
 	  src = find_source (gcov_read_string ());
 	  lineno = gcov_read_unsigned ();
@@ -804,7 +814,8 @@
 	  fn = XCNEW (function_t);
 	  fn->name = function_name;
 	  fn->ident = ident;
-	  fn->checksum = checksum;
+	  fn->lineno_checksum = lineno_checksum;
+	  fn->cfg_checksum = cfg_checksum;
 	  fn->src = src;
 	  fn->line = lineno;

@@ -1065,6 +1076,7 @@
 	program_count++;
       else if (tag == GCOV_TAG_FUNCTION)
 	{
+          const char *fn_name;
 	  {
 	    unsigned ident = gcov_read_unsigned ();
 	    struct function_info *fn_n = functions;
@@ -1091,13 +1103,20 @@

 	  if (!fn)
 	    ;
-	  else if (gcov_read_unsigned () != fn->checksum)
+	  else if (gcov_read_unsigned () != fn->lineno_checksum
+		   || gcov_read_unsigned () != fn->cfg_checksum)
 	    {
 	    mismatch:;
 	      fnotice (stderr, "%s:profile mismatch for '%s'\n",
 		       da_file_name, fn->name);
 	      goto cleanup;
 	    }
+
+	  fn_name = gcov_read_string ();
+	  if (strcmp (fn_name, fn->name) != 0)
+	    fnotice (stderr, "%s:profile mismatch:"
+		     " expected function name '%s' but found '%s'\n",
+		     da_file_name, fn->name, fn_name);
 	}
       else if (tag == GCOV_TAG_FOR_COUNTER (GCOV_COUNTER_ARCS) && fn)
 	{
--- gcc/testsuite/gcc.dg/tree-prof/prof-robust-1.c.orig
+++ gcc/testsuite/gcc.dg/tree-prof/prof-robust-1.c
@@ -0,0 +1,21 @@
+/* { dg-options "-O2 -w" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef _PROFILE_USE
+int foo(int x) {
+  return 3 * x;
+}
+#endif
+
+int x = 1000;
+
+int main(int argc, char *argv[]) {
+  int i;
+  int sum = 0;
+  for (i = 0; i < x; i++)
+    sum += i;
+  printf("%d\n", sum);
+  return 0;
+}
--- gcc/testsuite/g++.dg/tree-prof/prof-robust-1.C.orig
+++ gcc/testsuite/g++.dg/tree-prof/prof-robust-1.C
@@ -0,0 +1,24 @@
+/* { dg-options "-O2 -fno-weak" } */
+
+#include <stdio.h>
+
+namespace {
+  namespace {
+
+    class MyClass {
+    public:
+      void foo() const;
+      ~MyClass() { foo(); }
+    };
+
+    void MyClass::foo() const { printf("Goodbye World\n"); }
+
+  }
+
+  static MyClass variable;
+
+}
+
+int main() {
+  return 0;
+}
--- gcc/gcov-io.c.orig
+++ gcc/gcov-io.c
@@ -212,7 +212,28 @@
   gcov_var.offset -= size;
 }

-/* Allocate space to write BYTES bytes to the gcov file. Return a
+#if IN_LIBGCOV
+
+/* These functions are guarded by #if to avoid compile time warning.  */
+
+/* Return the number of words STRING would need including the length
+   field in the output stream itself.  This should be identical to
+   "alloc" calculation in gcov_write_string().  */
+
+GCOV_LINKAGE gcov_unsigned_t
+gcov_string_length (const char *string)
+{
+  gcov_unsigned_t len = (string) ? strlen (string) : 0;
+  /* + 1 because of the length field. */
+  gcov_unsigned_t alloc = 1 + ((len + 4) >> 2);
+
+  /* Can not write a bigger than GCOV_BLOCK_SIZE string yet */
+  gcc_assert (alloc < GCOV_BLOCK_SIZE);
+  return alloc;
+}
+#endif
+
+/* Allocate space to write WORDS words to the gcov file. Return a
    pointer to those bytes, or NULL on failure.  */

 static gcov_unsigned_t *
@@ -222,13 +243,13 @@

   gcc_assert (gcov_var.mode < 0);
 #if IN_LIBGCOV
-  if (gcov_var.offset >= GCOV_BLOCK_SIZE)
+  gcc_assert (words < GCOV_BLOCK_SIZE);
+  if (gcov_var.offset + words >= GCOV_BLOCK_SIZE)
     {
-      gcov_write_block (GCOV_BLOCK_SIZE);
       if (gcov_var.offset)
 	{
-	  gcc_assert (gcov_var.offset == 1);
-	  memcpy (gcov_var.buffer, gcov_var.buffer + GCOV_BLOCK_SIZE, 4);
+	  gcc_assert (gcov_var.offset < GCOV_BLOCK_SIZE);
+	  gcov_write_block (gcov_var.offset);
 	}
     }
 #else
@@ -269,7 +290,6 @@
 }
 #endif /* IN_LIBGCOV */

-#if !IN_LIBGCOV
 /* Write STRING to coverage file.  Sets error flag on file
    error, overflow flag on overflow */

@@ -292,8 +312,8 @@
   buffer[alloc] = 0;
   memcpy (&buffer[1], string, length);
 }
-#endif

+
 #if !IN_LIBGCOV
 /* Write a tag TAG and reserve space for the record length. Return a
    value to be used for gcov_write_length.  */
@@ -380,14 +400,15 @@
   unsigned excess = gcov_var.length - gcov_var.offset;

   gcc_assert (gcov_var.mode > 0);
+  gcc_assert (words < GCOV_BLOCK_SIZE);
   if (excess < words)
     {
       gcov_var.start += gcov_var.offset;
 #if IN_LIBGCOV
       if (excess)
 	{
-	  gcc_assert (excess == 1);
-	  memcpy (gcov_var.buffer, gcov_var.buffer + gcov_var.offset, 4);
+	  gcc_assert (excess < GCOV_BLOCK_SIZE);
+	  memmove (gcov_var.buffer, gcov_var.buffer + gcov_var.offset, excess * 4);
 	}
 #else
       memmove (gcov_var.buffer, gcov_var.buffer + gcov_var.offset, excess * 4);
@@ -395,8 +416,7 @@
       gcov_var.offset = 0;
       gcov_var.length = excess;
 #if IN_LIBGCOV
-      gcc_assert (!gcov_var.length || gcov_var.length == 1);
-      excess = GCOV_BLOCK_SIZE;
+      excess = (sizeof (gcov_var.buffer) / sizeof
(gcov_var.buffer[0])) - gcov_var.length;
 #else
       if (gcov_var.length + words > gcov_var.alloc)
 	gcov_allocate (gcov_var.length + words);
@@ -456,7 +476,6 @@
    buffer, or NULL on empty string. You must copy the string before
    calling another gcov function.  */

-#if !IN_LIBGCOV
 GCOV_LINKAGE const char *
 gcov_read_string (void)
 {
@@ -467,8 +486,8 @@

   return (const char *) gcov_read_words (length);
 }
-#endif

+
 GCOV_LINKAGE void
 gcov_read_summary (struct gcov_summary *summary)
 {
--- gcc/gcov-io.h.orig
+++ gcc/gcov-io.h
@@ -103,7 +103,8 @@
    	note: unit function-graph*
 	unit: header int32:checksum string:source
 	function-graph: announce_function basic_blocks {arcs | lines}*
-	announce_function: header int32:ident int32:checksum
+	announce_function: header int32:ident
+		int32:lineno_checksum int32:cfg_checksum
 		string:name string:source int32:lineno
 	basic_block: header int32:flags*
 	arcs: header int32:block_no arc*
@@ -132,7 +133,9 @@
         data: {unit function-data* summary:object summary:program*}*
 	unit: header int32:checksum
         function-data:	announce_function arc_counts
-	announce_function: header int32:ident int32:checksum
+	announce_function: header int32:ident
+		int32:lineno_checksum int32:cfg_checksum
+		string:name
 	arc_counts: header int64:count*
 	summary: int32:checksum {count-summary}GCOV_COUNTERS
 	count-summary:	int32:num int32:runs int64:sum
@@ -243,13 +246,16 @@
 #define gcov_write_unsigned __gcov_write_unsigned
 #define gcov_write_counter __gcov_write_counter
 #define gcov_write_summary __gcov_write_summary
+#define gcov_write_string __gcov_write_string
+#define gcov_string_length __gcov_string_length
 #define gcov_read_unsigned __gcov_read_unsigned
 #define gcov_read_counter __gcov_read_counter
+#define gcov_read_string  __gcov_read_string
 #define gcov_read_summary __gcov_read_summary

 /* Poison these, so they don't accidentally slip in.  */
-#pragma GCC poison gcov_write_string gcov_write_tag gcov_write_length
-#pragma GCC poison gcov_read_string gcov_sync gcov_time gcov_magic
+#pragma GCC poison gcov_write_tag gcov_write_length
+#pragma GCC poison gcov_sync gcov_time gcov_magic

 #ifdef HAVE_GAS_HIDDEN
 #define ATTRIBUTE_HIDDEN  __attribute__ ((__visibility__ ("hidden")))
@@ -294,7 +300,7 @@
    file marker -- it is not required to be present.  */

 #define GCOV_TAG_FUNCTION	 ((gcov_unsigned_t)0x01000000)
-#define GCOV_TAG_FUNCTION_LENGTH (2)
+#define GCOV_TAG_FUNCTION_LENGTH (3)
 #define GCOV_TAG_BLOCKS		 ((gcov_unsigned_t)0x01410000)
 #define GCOV_TAG_BLOCKS_LENGTH(NUM) (NUM)
 #define GCOV_TAG_BLOCKS_NUM(LENGTH) (LENGTH)
@@ -411,11 +417,18 @@
 /* Information about a single function.  This uses the trailing array
    idiom. The number of counters is determined from the counter_mask
    in gcov_info.  We hold an array of function info, so have to
-   explicitly calculate the correct array stride.  */
+   explicitly calculate the correct array stride.
+
+   "ident" is no longer used in hash computation.  Additionally,
+   "name" is used in hash computation.  This makes the profile data
+   not compatible across function name changes.
+   */
 struct gcov_fn_info
 {
   gcov_unsigned_t ident;	/* unique ident of function */
-  gcov_unsigned_t checksum;	/* function checksum */
+  gcov_unsigned_t lineno_checksum;	/* function lineo_checksum */
+  gcov_unsigned_t cfg_checksum;	/* function cfg checksum */
+  const char     *name;         /* name of the function */
   unsigned n_ctrs[0];		/* instrumented counters */
 };

@@ -505,11 +518,10 @@
   int error;			/* < 0 overflow, > 0 disk error.  */
   int mode;	                /* < 0 writing, > 0 reading */
 #if IN_LIBGCOV
-  /* Holds one block plus 4 bytes, thus all coverage reads & writes
+  /* Holds one block, thus all coverage reads & writes
      fit within this buffer and we always can transfer GCOV_BLOCK_SIZE
-     to and from the disk. libgcov never backtracks and only writes 4
-     or 8 byte objects.  */
-  gcov_unsigned_t buffer[GCOV_BLOCK_SIZE + 1];
+     to and from the disk. libgcov never backtracks.  */
+  gcov_unsigned_t buffer[GCOV_BLOCK_SIZE];
 #else
   int endian;			/* Swap endianness.  */
   /* Holds a variable length block, as the compiler can write
@@ -543,6 +555,7 @@
 GCOV_LINKAGE gcov_unsigned_t gcov_read_unsigned (void) ATTRIBUTE_HIDDEN;
 GCOV_LINKAGE gcov_type gcov_read_counter (void) ATTRIBUTE_HIDDEN;
 GCOV_LINKAGE void gcov_read_summary (struct gcov_summary *) ATTRIBUTE_HIDDEN;
+GCOV_LINKAGE const char *gcov_read_string (void) ATTRIBUTE_HIDDEN;

 #if IN_LIBGCOV
 /* Available only in libgcov */
@@ -554,9 +567,9 @@
     ATTRIBUTE_HIDDEN;
 static void gcov_rewrite (void);
 GCOV_LINKAGE void gcov_seek (gcov_position_t /*position*/) ATTRIBUTE_HIDDEN;
+GCOV_LINKAGE gcov_unsigned_t gcov_string_length (const char *)
ATTRIBUTE_HIDDEN;
 #else
 /* Available outside libgcov */
-GCOV_LINKAGE const char *gcov_read_string (void);
 GCOV_LINKAGE void gcov_sync (gcov_position_t /*base*/,
 			     gcov_unsigned_t /*length */);
 #endif
@@ -564,11 +577,11 @@
 #if !IN_GCOV
 /* Available outside gcov */
 GCOV_LINKAGE void gcov_write_unsigned (gcov_unsigned_t) ATTRIBUTE_HIDDEN;
+GCOV_LINKAGE void gcov_write_string (const char *) ATTRIBUTE_HIDDEN;
 #endif

 #if !IN_GCOV && !IN_LIBGCOV
 /* Available only in compiler */
-GCOV_LINKAGE void gcov_write_string (const char *);
 GCOV_LINKAGE gcov_position_t gcov_write_tag (gcov_unsigned_t);
 GCOV_LINKAGE void gcov_write_length (gcov_position_t /*position*/);
 #endif
--- gcc/profile.c.orig
+++ gcc/profile.c
@@ -104,13 +104,6 @@

 /* Forward declarations.  */
 static void find_spanning_tree (struct edge_list *);
-static unsigned instrument_edges (struct edge_list *);
-static void instrument_values (histogram_values);
-static void compute_branch_probabilities (void);
-static void compute_value_histograms (histogram_values);
-static gcov_type * get_exec_counts (void);
-static basic_block find_group (basic_block);
-static void union_groups (basic_block, basic_block);

 /* Add edge instrumentation code to the entire insn chain.

@@ -235,10 +228,12 @@
 }
 


-/* Computes hybrid profile for all matching entries in da_file.  */
+/* Computes hybrid profile for all matching entries in da_file.
+
+   CFG_CHECKSUM is the precomputed checksum for the CFG.  */

 static gcov_type *
-get_exec_counts (void)
+get_exec_counts (unsigned cfg_checksum)
 {
   unsigned num_edges = 0;
   basic_block bb;
@@ -255,7 +250,8 @@
 	  num_edges++;
     }

-  counts = get_coverage_counts (GCOV_COUNTER_ARCS, num_edges, &profile_info);
+  counts = get_coverage_counts (GCOV_COUNTER_ARCS, num_edges, cfg_checksum,
+				&profile_info);
   if (!counts)
     return NULL;

@@ -435,10 +431,12 @@
 }

 /* Compute the branch probabilities for the various branches.
-   Annotate them accordingly.  */
+   Annotate them accordingly.

+   CFG_CHECKSUM is the precomputed checksum for the CFG.  */
+
 static void
-compute_branch_probabilities (void)
+compute_branch_probabilities (unsigned cfg_checksum)
 {
   basic_block bb;
   int i;
@@ -447,7 +445,7 @@
   int passes;
   int hist_br_prob[20];
   int num_branches;
-  gcov_type *exec_counts = get_exec_counts ();
+  gcov_type *exec_counts = get_exec_counts (cfg_checksum);
   int inconsistent = 0;

   /* Very simple sanity checks so we catch bugs in our profiling code.  */
@@ -765,10 +763,12 @@
 }

 /* Load value histograms values whose description is stored in VALUES array
-   from .gcda file.  */
+   from .gcda file.

+   CFG_CHECKSUM is the precomputed checksum for the CFG.  */
+
 static void
-compute_value_histograms (histogram_values values)
+compute_value_histograms (histogram_values values, unsigned cfg_checksum)
 {
   unsigned i, j, t, any;
   unsigned n_histogram_counters[GCOV_N_VALUE_COUNTERS];
@@ -796,7 +796,7 @@

       histogram_counts[t] =
 	get_coverage_counts (COUNTER_FOR_HIST_TYPE (t),
-			     n_histogram_counters[t], NULL);
+			     n_histogram_counters[t], cfg_checksum, NULL);
       if (histogram_counts[t])
 	any = 1;
       act_count[t] = histogram_counts[t];
@@ -899,6 +899,7 @@
   unsigned num_instrumented;
   struct edge_list *el;
   histogram_values values = NULL;
+  unsigned cfg_checksum, lineno_checksum;

   total_num_times_called++;

@@ -1050,11 +1051,19 @@
   if (dump_file)
     fprintf (dump_file, "%d ignored edges\n", ignored_edges);

+
+  /* Compute two different checksums. Note that we want to compute
+     the checksum in only once place, since it depends on the shape
+     of the control flow which can change during
+     various transformations.  */
+  cfg_checksum = coverage_compute_cfg_checksum ();
+  lineno_checksum = coverage_compute_lineno_checksum ();
+
   /* Write the data from which gcov can reconstruct the basic block
      graph.  */

   /* Basic block flags */
-  if (coverage_begin_output ())
+  if (coverage_begin_output (lineno_checksum, cfg_checksum))
     {
       gcov_position_t offset;

@@ -1071,7 +1080,7 @@
   EXIT_BLOCK_PTR->index = last_basic_block;

   /* Arcs */
-  if (coverage_begin_output ())
+  if (coverage_begin_output (lineno_checksum, cfg_checksum))
     {
       gcov_position_t offset;

@@ -1112,7 +1121,7 @@
     }

   /* Line numbers.  */
-  if (coverage_begin_output ())
+  if (coverage_begin_output (lineno_checksum, cfg_checksum))
     {
       gcov_position_t offset;

@@ -1171,9 +1180,9 @@

   if (flag_branch_probabilities)
     {
-      compute_branch_probabilities ();
+      compute_branch_probabilities (cfg_checksum);
       if (flag_profile_values)
-	compute_value_histograms (values);
+	compute_value_histograms (values, cfg_checksum);
     }

   remove_fake_edges ();
@@ -1201,7 +1210,7 @@

   VEC_free (histogram_value, heap, values);
   free_edge_list (el);
-  coverage_end_function ();
+  coverage_end_function (lineno_checksum, cfg_checksum);
 }
 

 /* Union find algorithm implementation for the basic blocks using
--- gcc/coverage.c.orig
+++ gcc/coverage.c
@@ -47,13 +47,17 @@
 #include "cgraph.h"
 #include "tree-pass.h"

+#include "gcov-io.h"
 #include "gcov-io.c"

 struct function_list
 {
   struct function_list *next;	 /* next function */
   unsigned ident;		 /* function ident */
-  unsigned checksum;	         /* function checksum */
+  unsigned lineno_checksum;	 /* function lineno checksum */
+  unsigned cfg_checksum;	 /* function cfg checksum */
+  const char *name;              /* function name */
+  tree decl;                     /* function decl */
   unsigned n_ctrs[GCOV_COUNTERS];/* number of counters.  */
 };

@@ -63,9 +67,11 @@
   /* We hash by  */
   unsigned ident;
   unsigned ctr;
+  char *name;

   /* Store  */
-  unsigned checksum;
+  unsigned lineno_checksum;
+  unsigned cfg_checksum;
   gcov_type *counts;
   struct gcov_ctr_summary summary;

@@ -114,14 +120,13 @@
 static int htab_counts_entry_eq (const void *, const void *);
 static void htab_counts_entry_del (void *);
 static void read_counts_file (void);
-static unsigned compute_checksum (void);
-static unsigned coverage_checksum_string (unsigned, const char *);
 static tree build_fn_info_type (unsigned);
 static tree build_fn_info_value (const struct function_list *, tree);
 static tree build_ctr_info_type (void);
 static tree build_ctr_info_value (unsigned, tree);
 static tree build_gcov_info (void);
 static void create_coverage (void);
+static char * xstrdup_mask_random (const char *);
 

 /* Return the type node for gcov_type.  */

@@ -139,12 +144,21 @@
   return lang_hooks.types.type_for_size (32, true);
 }
 

+/* Return the type node for const char *.  */
+
+static tree
+get_const_string_type (void)
+{
+  return build_pointer_type
+    (build_qualified_type (char_type_node, TYPE_QUAL_CONST));
+}
+

 static hashval_t
 htab_counts_entry_hash (const void *of)
 {
   const counts_entry_t *const entry = (const counts_entry_t *) of;

-  return entry->ident * GCOV_COUNTERS + entry->ctr;
+  return htab_hash_string (entry->name) + entry->ctr;
 }

 static int
@@ -153,7 +167,8 @@
   const counts_entry_t *const entry1 = (const counts_entry_t *) of1;
   const counts_entry_t *const entry2 = (const counts_entry_t *) of2;

-  return entry1->ident == entry2->ident && entry1->ctr == entry2->ctr;
+  return (entry1->ctr == entry2->ctr
+	  && strcmp (entry1->name, entry2->name) == 0);
 }

 static void
@@ -161,6 +176,7 @@
 {
   counts_entry_t *const entry = (counts_entry_t *) of;

+  free (entry->name);
   free (entry->counts);
   free (entry);
 }
@@ -171,11 +187,13 @@
 read_counts_file (void)
 {
   gcov_unsigned_t fn_ident = 0;
-  gcov_unsigned_t checksum = -1;
+  char *name = NULL;
   counts_entry_t *summaried = NULL;
   unsigned seen_summary = 0;
   gcov_unsigned_t tag;
   int is_error = 0;
+  unsigned lineno_checksum = 0;
+  unsigned cfg_checksum = 0;

   if (!gcov_open (da_file_name, 1))
     return;
@@ -214,8 +232,21 @@
       offset = gcov_position ();
       if (tag == GCOV_TAG_FUNCTION)
 	{
+          const char *nm;
 	  fn_ident = gcov_read_unsigned ();
-	  checksum = gcov_read_unsigned ();
+	  lineno_checksum = gcov_read_unsigned ();
+	  cfg_checksum = gcov_read_unsigned ();
+          nm = gcov_read_string();
+          if (!nm)
+            {
+              /* Error. Die! */
+              fatal_error ("corrupted profile - name of the function"
+                           " (ident = 0x%x lineno_checksum 0x%x "
+                           "ccfg_checksum 0x%x)"
+                           " is NULL.", fn_ident, lineno_checksum,
+                           cfg_checksum);
+            }
+          name = xstrdup (nm);
 	  if (seen_summary)
 	    {
 	      /* We have already seen a summary, this means that this
@@ -257,6 +288,7 @@
 	  unsigned ix;

 	  elt.ident = fn_ident;
+          elt.name = name;
 	  elt.ctr = GCOV_COUNTER_FOR_TAG (tag);

 	  slot = (counts_entry_t **) htab_find_slot
@@ -265,17 +297,22 @@
 	  if (!entry)
 	    {
 	      *slot = entry = XCNEW (counts_entry_t);
-	      entry->ident = elt.ident;
+	      entry->ident = fn_ident;
+	      entry->name = name;
 	      entry->ctr = elt.ctr;
-	      entry->checksum = checksum;
+	      entry->lineno_checksum = lineno_checksum;
+	      entry->cfg_checksum = cfg_checksum;
 	      entry->summary.num = n_counts;
 	      entry->counts = XCNEWVEC (gcov_type, n_counts);
 	    }
-	  else if (entry->checksum != checksum)
+	  else if (entry->lineno_checksum != lineno_checksum
+		   || entry->cfg_checksum != cfg_checksum)
 	    {
 	      error ("coverage mismatch for function %u while reading
execution counters",
 		     fn_ident);
-	      error ("checksum is %x instead of %x", entry->checksum, checksum);
+	      error ("checksum is (%x,%x) instead of (%x,%x)",
+		     entry->lineno_checksum, entry->cfg_checksum,
+		     lineno_checksum, cfg_checksum);
 	      htab_delete (counts_hash);
 	      break;
 	    }
@@ -323,11 +360,10 @@
 /* Returns the counters for a particular tag.  */

 gcov_type *
-get_coverage_counts (unsigned counter, unsigned expected,
+get_coverage_counts (unsigned counter, unsigned expected, unsigned
cfg_checksum,
 		     const struct gcov_ctr_summary **summary)
 {
   counts_entry_t *entry, elt;
-  gcov_unsigned_t checksum = -1;

   /* No hash table, no counts.  */
   if (!counts_hash)
@@ -343,6 +379,8 @@
     }

   elt.ident = current_function_funcdef_no + 1;
+  elt.name = xstrdup_mask_random
+         (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl)));
   elt.ctr = counter;
   entry = (counts_entry_t *) htab_find (counts_hash, &elt);
   if (!entry)
@@ -352,8 +390,7 @@
       return NULL;
     }

-  checksum = compute_checksum ();
-  if (entry->checksum != checksum
+  if (entry->cfg_checksum != cfg_checksum
       || entry->summary.num != expected)
     {
       static int warned = 0;
@@ -368,8 +405,9 @@

       if (!inhibit_warnings)
 	{
-	  if (entry->checksum != checksum)
-	    inform (input_location, "checksum is %x instead of %x",
entry->checksum, checksum);
+	  if (entry->cfg_checksum != cfg_checksum)
+	    inform (input_location, "cfg_checksum is %x instead of %x",
+                    entry->cfg_checksum, cfg_checksum);
 	  else
 	    inform (input_location, "number of counters is %d instead of %d",
 		    entry->summary.num, expected);
@@ -379,7 +417,8 @@
 	  && !inhibit_warnings
 	  && !warned++)
 	{
-	  inform (input_location, "coverage mismatch ignored due to
-Wcoverage-mismatch");
+	  inform (input_location,
+                  "coverage mismatch ignored due to -Wcoverage-mismatch");
 	  inform (input_location, flag_guess_branch_prob
 		  ? "execution counts estimated"
 		  : "execution counts assumed to be zero");
@@ -468,82 +507,128 @@
 				       NULL, NULL));
 }
 

-/* Generate a checksum for a string.  CHKSUM is the current
-   checksum.  */
-
-static unsigned
-coverage_checksum_string (unsigned chksum, const char *string)
+/* Mask out the random part of the identifier name.
+   Returns a pointer to the newly allocated string
+   that contains random part masked out to 0.  */
+
+static char *
+xstrdup_mask_random (const char *string)
 {
-  int i;
-  char *dup = NULL;
+  char *dup = xstrdup (string);
+  char *ptr = dup;

   /* Look for everything that looks if it were produced by
      get_file_function_name and zero out the second part
-     that may result from flag_random_seed.  This is not critical
-     as the checksums are used only for sanity checking.  */
-  for (i = 0; string[i]; i++)
+     that may result from flag_random_seed.  This is critical
+     since we use the function name as the key for finding
+     the profile entry.  */
+#define GLOBAL_PREFIX "_GLOBAL__"
+#define TRAILING_N "N_"
+#define ISCAPXDIGIT(a) (((a) >= '0' && (a) <= '9') || ((a) >= 'A' &&
(a) <= 'F'))
+  /* Thanks to anonymous namespace, we can have a function name
+     with multiple GLOBAL_PREFIX.  */
+  while ((ptr = strstr (ptr, GLOBAL_PREFIX)))
     {
-      int offset = 0;
-      if (!strncmp (string + i, "_GLOBAL__N_", 11))
-      offset = 11;
-      if (!strncmp (string + i, "_GLOBAL__", 9))
-      offset = 9;
+      char *temp_ptr;
+      /* Skip _GLOBAL__. */
+      ptr += strlen (GLOBAL_PREFIX);

-      /* C++ namespaces do have scheme:
-         _GLOBAL__N_<filename>_<wrongmagicnumber>_<magicnumber>functionname
-       since filename might contain extra underscores there seems
-       to be no better chance then walk all possible offsets looking
-       for magicnumber.  */
-      if (offset)
-	{
-	  for (i = i + offset; string[i]; i++)
-	    if (string[i]=='_')
-	      {
-		int y;
+      /* Skip optional N_ (in case __GLOBAL_N__). */
+      if (!strncmp (ptr, TRAILING_N, strlen (TRAILING_N)))
+          ptr += strlen (TRAILING_N);
+      /* At this point, ptr should point after "_GLOBAL__N_" or "_GLOBAL__". */

-		for (y = 1; y < 9; y++)
-		  if (!(string[i + y] >= '0' && string[i + y] <= '9')
-		      && !(string[i + y] >= 'A' && string[i + y] <= 'F'))
-		    break;
-		if (y != 9 || string[i + 9] != '_')
-		  continue;
-		for (y = 10; y < 18; y++)
-		  if (!(string[i + y] >= '0' && string[i + y] <= '9')
-		      && !(string[i + y] >= 'A' && string[i + y] <= 'F'))
-		    break;
-		if (y != 18)
-		  continue;
-		if (!dup)
-		  string = dup = xstrdup (string);
-		for (y = 10; y < 18; y++)
-		  dup[i + y] = '0';
-	      }
-	  break;
-	}
+      while ((temp_ptr = strchr (ptr, '_')) != NULL)
+        {
+          int y;
+
+	  ptr = temp_ptr;
+          /* For every "_" in the rest of the string,
+             try the follwing pattern matching */
+
+          /* Skip over '_'. */
+          ptr++;
+#define NDIGITS (8)
+          /* Try matching the pattern:
+             <8-digit hex>_<8-digit hex>
+             The second number is randomly generated
+             so we want to mask it out before computing the checksum. */
+          for (y = 0; *ptr != 0 && y < NDIGITS; y++, ptr++)
+              if (!ISCAPXDIGIT (*ptr))
+                  break;
+          if (y != NDIGITS || *ptr != '_')
+              continue;
+          /* Skip over '_' again. */
+          ptr++;
+          for (y = 0; *ptr != 0 && y < NDIGITS; y++, ptr++)
+              if (!ISCAPXDIGIT (*ptr))
+                  break;
+
+          if (y == NDIGITS)
+            {
+              /* We have a match.
+                 Mask out the second 8-digit number. */
+              for(y = -NDIGITS - 1 ; y < 0; y++)
+                ptr[y] = '0';
+              break;
+            }
+        }
     }

-  chksum = crc32_string (chksum, string);
-  if (dup)
-    free (dup);
-
-  return chksum;
+  return dup;
 }

-/* Compute checksum for the current function.  We generate a CRC32.  */

-static unsigned
-compute_checksum (void)
+/* Compute checksum for the current function.  We generate a CRC32.
+   TODO: This checksum can be made more stringent by computing
+   crc32 over the filename/lineno output in gcno. */
+
+unsigned
+coverage_compute_lineno_checksum (void)
 {
   expanded_location xloc
     = expand_location (DECL_SOURCE_LOCATION (current_function_decl));
+  char *name = xstrdup_mask_random
+        (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl)));
   unsigned chksum = xloc.line;

-  chksum = coverage_checksum_string (chksum, xloc.file);
-  chksum = coverage_checksum_string
-    (chksum, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl)));
+  chksum = crc32_string (chksum, xloc.file);
+  chksum = crc32_string (chksum, name);

+  free (name);
+
   return chksum;
 }
+
+
+/* Compute cfg checksum for the current function.
+   The checksum is calculated carefully so that
+   source code changes that doesn't affect the control flow graph
+   won't change the checksum.
+   This is to make the profile data useable across source code change.
+   The downside of this is that the compiler may use potentially
+   wrong profile data - that the source code change has non-trivial impact
+   on the validity of profile data (e.g. the reversed condition)
+   but the compiler won't detect the change and use the wrong profile data.  */
+
+unsigned
+coverage_compute_cfg_checksum (void)
+{
+  basic_block bb;
+  unsigned chksum = n_basic_blocks;
+
+  FOR_EACH_BB (bb)
+    {
+      edge e;
+      edge_iterator ei;
+      FOR_EACH_EDGE (e, ei, bb->succs)
+        {
+          chksum = crc32_byte (chksum, e->dest->index);
+        }
+    }
+
+  return chksum;
+}
 

 /* Begin output to the graph file for the current function.
    Opens the output file, if not already done. Writes the
@@ -551,7 +636,7 @@
    should be output.  */

 int
-coverage_begin_output (void)
+coverage_begin_output (unsigned lineno_checksum, unsigned cfg_checksum)
 {
   /* We don't need to output .gcno file unless we're under -ftest-coverage
      (e.g. -fprofile-arcs/generate/use don't need .gcno to work). */
@@ -563,6 +648,7 @@
       expanded_location xloc
 	= expand_location (DECL_SOURCE_LOCATION (current_function_decl));
       unsigned long offset;
+      char *name;

       if (!bbg_file_opened)
 	{
@@ -577,17 +663,22 @@
 	  bbg_file_opened = 1;
 	}

+      name = xstrdup_mask_random
+        (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl)));
+
       /* Announce function */
       offset = gcov_write_tag (GCOV_TAG_FUNCTION);
       gcov_write_unsigned (current_function_funcdef_no + 1);
-      gcov_write_unsigned (compute_checksum ());
-      gcov_write_string (IDENTIFIER_POINTER
-			 (DECL_ASSEMBLER_NAME (current_function_decl)));
+      gcov_write_unsigned (lineno_checksum);
+      gcov_write_unsigned (cfg_checksum);
+      gcov_write_string (name);
       gcov_write_string (xloc.file);
       gcov_write_unsigned (xloc.line);
       gcov_write_length (offset);

       bbg_function_announced = 1;
+
+      free (name);
     }
   return !gcov_is_error ();
 }
@@ -596,7 +687,7 @@
    error has occurred.  Save function coverage counts.  */

 void
-coverage_end_function (void)
+coverage_end_function (unsigned lineno_checksum, unsigned cfg_checksum)
 {
   unsigned i;

@@ -609,15 +700,22 @@
   if (fn_ctr_mask)
     {
       struct function_list *item;
+      const char *name;

       item = XNEW (struct function_list);

       *functions_tail = item;
       functions_tail = &item->next;

+      name = xstrdup_mask_random
+        (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl)));
+
       item->next = 0;
       item->ident = current_function_funcdef_no + 1;
-      item->checksum = compute_checksum ();
+      item->decl = current_function_decl;
+      item->name = name;
+      item->lineno_checksum = lineno_checksum;
+      item->cfg_checksum = cfg_checksum;
       for (i = 0; i != GCOV_COUNTERS; i++)
 	{
 	  item->n_ctrs[i] = fn_n_ctrs[i];
@@ -643,12 +741,24 @@
   fields = build_decl (BUILTINS_LOCATION,
 		       FIELD_DECL, NULL_TREE, get_gcov_unsigned_t ());

-  /* checksum */
+  /* lineno_checksum */
   field = build_decl (BUILTINS_LOCATION,
 		      FIELD_DECL, NULL_TREE, get_gcov_unsigned_t ());
   TREE_CHAIN (field) = fields;
   fields = field;

+  /* cfg_checksum */
+  field = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, NULL_TREE, get_gcov_unsigned_t ());
+  TREE_CHAIN (field) = fields;
+  fields = field;
+
+  /* name */
+  field = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, NULL_TREE, get_const_string_type ());
+  TREE_CHAIN (field) = fields;
+  fields = field;
+
   array_type = build_int_cst (NULL_TREE, counters - 1);
   array_type = build_index_type (array_type);
   array_type = build_array_type (get_gcov_unsigned_t (), array_type);
@@ -681,11 +791,23 @@
 					     function->ident), value);
   fields = TREE_CHAIN (fields);

-  /* checksum */
+  /* lineno_checksum */
   value = tree_cons (fields, build_int_cstu (get_gcov_unsigned_t (),
-					     function->checksum), value);
+					     function->lineno_checksum), value);
   fields = TREE_CHAIN (fields);

+  /* cfg checksum */
+  value = tree_cons (fields, build_int_cstu (get_gcov_unsigned_t (),
+					     function->cfg_checksum), value);
+  fields = TREE_CHAIN (fields);
+
+  /* name */
+  value = tree_cons (fields,
+                     build_string_literal (strlen (function->name) + 1,
+                                           function->name),
+                     value);
+  fields = TREE_CHAIN (fields);
+
   /* counters */
   for (ix = 0; ix != GCOV_COUNTERS; ix++)
     if (prg_ctr_mask & (1 << ix))
@@ -865,8 +987,8 @@
   da_file_name_len = strlen (da_file_name);
   filename_string = build_string (da_file_name_len + 1, da_file_name);
   TREE_TYPE (filename_string) = build_array_type
-    (char_type_node, build_index_type
-     (build_int_cst (NULL_TREE, da_file_name_len)));
+      (char_type_node, build_index_type
+       (build_int_cst (NULL_TREE, da_file_name_len)));
   value = tree_cons (field, build1 (ADDR_EXPR, string_type, filename_string),
 		     value);

@@ -874,10 +996,14 @@
   fn_info_type = build_fn_info_type (n_ctr_types);
   fn_info_ptr_type = build_pointer_type (build_qualified_type
 					 (fn_info_type, TYPE_QUAL_CONST));
-  for (fn = functions_head, n_fns = 0; fn; fn = fn->next, n_fns++)
-    fn_info_value = tree_cons (NULL_TREE,
-			       build_fn_info_value (fn, fn_info_type),
-			       fn_info_value);
+
+  for (fn = functions_head, n_fns = 0; fn; fn = fn->next)
+    {
+      n_fns++;
+      fn_info_value = tree_cons (NULL_TREE,
+                                 build_fn_info_value (fn, fn_info_type),
+                                 fn_info_value);
+    }
   if (n_fns)
     {
       tree array_type;
--- gcc/coverage.h.orig
+++ gcc/coverage.h
@@ -28,12 +28,18 @@

 /* Complete the coverage information for the current function. Once
    per function.  */
-extern void coverage_end_function (void);
+extern void coverage_end_function (unsigned, unsigned);

 /* Start outputting coverage information for the current
    function. Repeatable per function.  */
-extern int coverage_begin_output (void);
+extern int coverage_begin_output (unsigned, unsigned);

+/* Compute the control flow checksum for the current function.  */
+extern unsigned coverage_compute_cfg_checksum (void);
+
+/* Compute the line number checksum for the current function.  */
+extern unsigned coverage_compute_lineno_checksum (void);
+
 /* Allocate some counters. Repeatable per function.  */
 extern int coverage_counter_alloc (unsigned /*counter*/, unsigned/*num*/);
 /* Use a counter from the most recent allocation.  */
@@ -44,6 +50,7 @@
 /* Get all the counters for the current function.  */
 extern gcov_type *get_coverage_counts (unsigned /*counter*/,
 				       unsigned /*expected*/,
+				       unsigned /*checksum*/,
 				       const struct gcov_ctr_summary **);

 extern tree get_gcov_type (void);
--- gcc/libgcov.c.orig
+++ gcc/libgcov.c
@@ -342,9 +342,10 @@

 	      /* Check function.  */
 	      if (tag != GCOV_TAG_FUNCTION
-		  || length != GCOV_TAG_FUNCTION_LENGTH
 		  || gcov_read_unsigned () != fi_ptr->ident
-		  || gcov_read_unsigned () != fi_ptr->checksum)
+		  || gcov_read_unsigned () != fi_ptr->lineno_checksum
+		  || gcov_read_unsigned () != fi_ptr->cfg_checksum
+		  || strcmp (gcov_read_string (), fi_ptr->name))
 		{
 		read_mismatch:;
 		  fprintf (stderr, "profiling:%s:Merge mismatch for %s\n",
@@ -485,9 +486,13 @@
 		  ((const char *) gi_ptr->functions + f_ix * fi_stride);

 	  /* Announce function.  */
-	  gcov_write_tag_length (GCOV_TAG_FUNCTION, GCOV_TAG_FUNCTION_LENGTH);
+	  gcov_write_tag_length
+	    (GCOV_TAG_FUNCTION,
+	     GCOV_TAG_FUNCTION_LENGTH + gcov_string_length (fi_ptr->name));
 	  gcov_write_unsigned (fi_ptr->ident);
-	  gcov_write_unsigned (fi_ptr->checksum);
+	  gcov_write_unsigned (fi_ptr->lineno_checksum);
+	  gcov_write_unsigned (fi_ptr->cfg_checksum);
+	  gcov_write_string (fi_ptr->name);

 	  c_ix = 0;
 	  for (t_ix = 0; t_ix < GCOV_COUNTERS; t_ix++)
--- gcc/gcov-dump.c.orig
+++ gcc/gcov-dump.c
@@ -265,20 +265,17 @@
 	      unsigned tag ATTRIBUTE_UNUSED, unsigned length ATTRIBUTE_UNUSED)
 {
   unsigned long pos = gcov_position ();
-
+  const char *name;
+
   printf (" ident=%u", gcov_read_unsigned ());
-  printf (", checksum=0x%08x", gcov_read_unsigned ());
+  printf (", lineno_checksum=0x%08x", gcov_read_unsigned ());
+  printf (", cfg_checksum=0x%08x", gcov_read_unsigned ());
+
+  name = gcov_read_string ();
+  printf (" %s", name ? name : "NULL");

   if (gcov_position () - pos < length)
-    {
-      const char *name;
-
-      name = gcov_read_string ();
-      printf (", `%s'", name ? name : "NULL");
-      name = gcov_read_string ();
-      printf (" %s", name ? name : "NULL");
-      printf (":%u", gcov_read_unsigned ());
-    }
+    printf (":%u", gcov_read_unsigned ());
 }

 static void



More information about the Gcc-patches mailing list