[pph] Use table of contents for merging (issue5305056)

Diego Novillo dnovillo@google.com
Sat Oct 22 19:32:00 GMT 2011


This patch adds a table of contents to use when merging ASTs into the
current compilation contexts.  We consider a compilation context any
kind of tree chain or vector that holds symbols/types used by the
parser.  For intance, scope_chain->bindings->names
or scope_chain->bindings->namespaces.

The idea of the table of contents is to quickly determine whether an
object key read from a PPH file is a new object or if it has already
been instantiated (http://gcc.gnu.org/ml/gcc-patches/2011-10/msg01918.html)

As mergeable objects are read, they are added to the TOC and inserted
into the corresponding context.  If a match already existed, then
the existing symbol is used.

Note that since we read PPH images during tokenization, the parser
data structures are "empty".  They only contain pre-built symbols and
types.  This means that we do not need to pre-populate the TOC before
parsing.

Symbols are hashed based on various attributes, for now we are only
dealing with symbols, so we are using the symbol's name, and the
context it belongs to (htab_merge_key_hash).

Additionally, this patch avoids merging symbols that are in images on
the same #include path.  These symbols have already been merged when
the images were generated (using external references).  For example,
suppose we have this tree of #includes:

			tu.cc
		  +-------+-------+
		1.pph	2.pph	3.pph
			  |
			4.pph
			  |
			5.pph

A symbol defined in 5.pph and used in 2.pph is already emitted inside
2.pph as an external reference to 5.pph.  So, it needs no further
merging.  And, clearly, all the symbols defined in a single file do
not need to be merged with each other.  We only need to deal with
merging across different branches in the #include tree.

This patch implements this by computing a signature for the #include
path from a given image up to the root of the path.  In the case of
5.pph this would be 5 -> 4 -> 2.  If two symbols in that path have a
conflicting hash, we reject them when testing for equality
(htab_merge_key_eq).

With this patch, we no longer have an O(n^2) merge loop, so
c1limits-externalid.cc is back to being 20% faster in PPH mode.

Tested on x86_64.  Committed to branch.


Diego.

	* pph-streamer-in.c (pph_in_merge_key_chain): Add new argument
	IPATH_HASH.  Update all users.
	(pph_match_to_overload): Remove.
	(pph_match_to_function): Remove.
	(pph_match_to_link): Remove.
	(pph_search_in_chain): Remove.
	(merge_toc): Declare.
	(merge_toc_entry): Declare.
	(htab_merge_key_hash): New.
	(htab_merge_key_eq): New.
	(pph_toc_lookup): New.
	(pph_toc_add): New.
	(pph_prepend_to_chain): Add argument IPATH_HASH.  Update all users.
	Call pph_toc_lookup and pph_toc_add.
	(pph_in_merge_key_tree): Add argument IPATH_HASH.  Update all users.
	Call pph_in_string.
	(pph_get_include_path_hash): New.
	(pph_in_merge_keys): Call it.
	(pph_reader_init): New.
	(pph_reader_finish): New.
	* pph-streamer-out.c (pph_out_merge_key_tree): Do not call
	pph_out_location.
	* pph-streamer.c (pph_streamer_finish): Call pph_reader_finish.
	(pph_add_include): Update INCLUDE->PARENT.
	* pph-streamer.h (struct pph_stream): Add field PARENT.
	(pph_reader_init): Declare.
	(pph_reader_finish): Declare.
	* pph.c (pph_init): Call pph_reader_init.


testsuite/ChangeLog.pph

	* g++.dg/pph/c1limits-externalid.cc: Mark fixed.

diff --git a/gcc/cp/pph-streamer-in.c b/gcc/cp/pph-streamer-in.c
index ecb7182..6c8bec4 100644
--- a/gcc/cp/pph-streamer-in.c
+++ b/gcc/cp/pph-streamer-in.c
@@ -67,7 +67,7 @@ static VEC(char_p,heap) *string_tables = NULL;
 static int pph_loc_offset;
 
 /* Forward declarations to avoid circularity.  */
-static tree pph_in_merge_key_tree (pph_stream *, tree *);
+static tree pph_in_merge_key_tree (pph_stream *, tree *, hashval_t);
 
 /***************************************************** stream initialization */
 
@@ -690,17 +690,18 @@ pph_in_chain (pph_stream *stream)
 
 
 /* Read a chain of AST merge keys from STREAM.  Merge each tree
-   into *CHAIN.  */
+   into *CHAIN.  IPATH_HASH is the hash value of the include path
+   from STREAM to the root of the include tree.  */
 
 static void
-pph_in_merge_key_chain (pph_stream *stream, tree *chain)
+pph_in_merge_key_chain (pph_stream *stream, tree *chain, hashval_t ipath_hash)
 {
   unsigned i;
   HOST_WIDE_INT count;
 
   count = pph_in_hwi (stream);
   for (i = 0; i < count; i++)
-    pph_in_merge_key_tree (stream, chain);
+    pph_in_merge_key_tree (stream, chain, ipath_hash);
 }
 
 
@@ -718,106 +719,104 @@ pph_in_merge_body_chain (pph_stream *stream)
 }
 
 
-/* Match a new decl EXPR at location WHERE with identifier string IDSTR
-   against an overload set at the LINK of a chain.
-   The EXPR may be added to that set.  */
-
-static tree
-pph_match_to_overload (tree expr ATTRIBUTE_UNUSED,
-			location_t where ATTRIBUTE_UNUSED,
-			const char *idstr, tree *link ATTRIBUTE_UNUSED)
-{
-  /* FIXME pph: This is apparently not how overloading works.  */
-  if (flag_pph_debug >= 2)
-    fprintf (pph_logfile, "PPH: unhandled overload list for \"%s\"\n", idstr);
-  return NULL;
-}
+/* Merge table of contents.  This TOC is used to decide whether a
+   symbol has already been merged into a given compilation context.
+   Compilation contexts are usually tree chains (e.g.,
+   scope_chain->bindings->names), but they can be any stable memory
+   address.
 
+   This TOC is indexed by two values: the merge key read by
+   pph_in_merge_key_tree and the context in which we are doing this
+   merge.  */
+static htab_t merge_toc = NULL;
 
-/* Match a new decl EXPR at location WHERE with identifier string IDSTR
-   against a function at the LINK of a chain.
-   We may need to create an overload set if EXPR is not the same overload.  */
+/* Table of contents entry.  */
+typedef struct {
+  /* Tree being matched.  */
+  tree expr;
 
-static tree
-pph_match_to_function (tree expr ATTRIBUTE_UNUSED,
-			location_t where ATTRIBUTE_UNUSED,
-			const char *idstr ATTRIBUTE_UNUSED, tree *link)
-{
-  /* This function is called when strings match, which in the case
-     of functions are the mangled names.  The mangled names should be
-     distinct, so assume a match. */
+  /* Context where this tree should be inserted into.  */
+  tree *context;
 
-  /* FIXME pph: This routine may need to check overloading.
-     If it does, then we need to match on the identifier,
-     not the mangled name, as the identifier is the overload set.  */
+  /* Name of the tree (from pph_merge_name).  */
+  const char *name;
 
-  return *link;
-}
+  /* Hash value representing the include path starting at the image
+     where EXPR resides up to the root of the include tree.  Objects
+     found in any of these PPH images do not need to be merged.  They
+     were already emitted as external references and merged when
+     the PPH images were being generated.  */
+  hashval_t ipath_hash;
+} merge_toc_entry;
 
 
-/* Match a new decl EXPR at location WHERE with identifier string IDSTR
-   against an LINK of a chain. */
+/* Hash and equivalence functions for the merge TOC.  */
 
-static tree
-pph_match_to_link (tree expr, location_t where, const char *idstr, tree *link)
+static hashval_t
+htab_merge_key_hash (const void *p)
 {
-  enum tree_code link_code, expr_code;
-  tree idtree;
-  const char *idptr;
-
-  link_code = TREE_CODE (*link);
-  if (link_code == TREE_LIST)
-    /* FIXME pph: This is apparently not how overloading works.  */
-    return pph_match_to_overload (expr, where, idstr, link);
-
-  expr_code = TREE_CODE (expr);
-  if (link_code != expr_code)
-    return NULL;
+  const merge_toc_entry *key = (const merge_toc_entry *) p;
+  hashval_t context_val = htab_hash_pointer (key->context);
+  hashval_t name_val = htab_hash_string (key->name);
+  return iterative_hash_hashval_t (context_val, name_val);
+}
 
-  idtree = pph_merge_name (*link);
-  if (!idtree)
-    return NULL;
+static int
+htab_merge_key_eq (const void *p1, const void *p2)
+{
+  const merge_toc_entry *key1 = (const merge_toc_entry *) p1;
+  const merge_toc_entry *key2 = (const merge_toc_entry *) p2;
 
-  idptr = IDENTIFIER_POINTER (idtree);
-  if (!idptr)
-    return NULL;
+  /* Matches inside the same include path are not interesting.  These
+     symbols have already been merged.  */
+  if (key1->ipath_hash == key2->ipath_hash)
+    return false;
 
-  if (strcmp (idptr, idstr) != 0)
-    {
-      if (flag_pph_debug >= 4)
-        fprintf (pph_logfile, "PPH: link \"%s\" "
-			      "does not match mergeable \"%s\"\n",
-			      idptr, idstr);
-      return NULL;
-    }
+  if (key1->context != key2->context)
+    return false;
 
-  /* A name match!  */
+  if (TREE_CODE (key1->expr) != TREE_CODE (key2->expr))
+    return false;
 
-  if (expr_code == FUNCTION_DECL)
-    return pph_match_to_function (expr, where, idstr, link);
+  if (key1->name == NULL || key2->name == NULL)
+    return false;
 
-  /* A non-function match.  */
-  return *link;
+  return strcmp (key1->name, key2->name) == 0;
 }
 
 
-/* Possibly merge a new decl EXPR at location WHERE with identifier
-   string IDSTR into an the decl in the CHAIN. */
+/* Look in TOC for an existing tree matching KEY.  */
 
 static tree
-pph_search_in_chain (tree expr, location_t where, const char *idstr,
-		     tree *chain)
+pph_toc_lookup (htab_t toc, merge_toc_entry *key)
 {
-  /* FIXME pph: This could resultin O(POW(n,2)) compilation.  */
-  tree *link = chain;
-  while (*link != NULL)
+  void *slot = htab_find (toc, key);
+  tree expr = NULL;
+
+  if (slot)
     {
-      tree found = pph_match_to_link (expr, where, idstr, link);
-      if (found)
-        return found;
-      link = &DECL_CHAIN (*link);
+      merge_toc_entry *e = (merge_toc_entry *) slot;
+      expr = e->expr;
     }
-  return NULL;
+
+  return expr;
+}
+
+
+/* Insert KEY into TOC.  */
+
+static void
+pph_toc_add (htab_t toc, merge_toc_entry *key)
+{
+  void **slot;
+  merge_toc_entry *entry;
+
+  slot = htab_find_slot (toc, key, INSERT);
+  gcc_assert (*slot == NULL);
+
+  entry = XCNEW (merge_toc_entry);
+  memcpy (entry, key, sizeof (*key));
+  *slot = (void *) entry;
 }
 
 
@@ -831,34 +830,31 @@ pph_prepend_to_chain (tree expr, tree *chain)
   return expr;
 }
 
-/* Merge the just-read header for tree EXPR onto the CHAIN,
-   which may require reading more from the STREAM.  */
+
+/* Merge the just-read header for tree EXPR with NAME onto the CHAIN.
+   IPATH_HASH is the hash value of the include path from STREAM to the
+   root of the include tree.  */
 
 static tree
-pph_merge_into_chain (pph_stream *stream, tree expr, tree *chain)
+pph_merge_into_chain (tree expr, const char *name, tree *chain,
+		      hashval_t ipath_hash)
 {
-  location_t where;
-  const char *idstr;
-  tree found;
-
-  if (!DECL_P (expr))
-    return pph_prepend_to_chain (expr, chain);
-
-  where = pph_in_location (stream);
-  idstr = pph_in_string (stream);
-  if (!idstr)
-    return pph_prepend_to_chain (expr, chain);
-
-  found = pph_search_in_chain (expr, where, idstr, chain);
+  merge_toc_entry key = { expr, chain, name, ipath_hash };
+  tree found = pph_toc_lookup (merge_toc, &key);
   if (!found)
     {
+      pph_toc_add (merge_toc, &key);
+
       if (flag_pph_debug >= 3)
-        fprintf (pph_logfile, "PPH: %s NOT found on chain\n", idstr);
+        fprintf (pph_logfile, "PPH: %s NOT found on chain\n", name);
+
       return pph_prepend_to_chain (expr, chain);
     }
 
   if (flag_pph_debug >= 3)
-    fprintf (pph_logfile, "PPH: %s FOUND on chain\n", idstr);
+    fprintf (pph_logfile, "PPH: %s FOUND on chain\n", name);
+
+  gcc_assert (TREE_CODE (found) == TREE_CODE (expr));
 
   return found;
 }
@@ -1897,16 +1893,18 @@ pph_in_tree_header (pph_stream *stream, enum LTO_tags tag)
 
 /* Read a merge key from STREAM.  If the merge key read from
    STREAM is not found in *CHAIN, the newly allocated tree is added to
-   it.  */
+   it.  IPATH_HASH is the hash value of the include path from STREAM to
+   the root of the include tree.  */
 
 static tree
-pph_in_merge_key_tree (pph_stream *stream, tree *chain)
+pph_in_merge_key_tree (pph_stream *stream, tree *chain, hashval_t ipath_hash)
 {
   struct lto_input_block *ib = stream->encoder.r.ib;
   enum pph_record_marker marker;
   unsigned image_ix, ix;
   tree read_expr, expr;
   enum LTO_tags tag;
+  const char *name;
 
   marker = pph_in_start_record (stream, &image_ix, &ix, PPH_any_tree);
   gcc_assert (marker == PPH_RECORD_START_MERGE_KEY);
@@ -1915,11 +1913,12 @@ pph_in_merge_key_tree (pph_stream *stream, tree *chain)
   /* Materialize a new node from STREAM.  This will also read all the
      language-independent bitfields for the new tree.  */
   read_expr = pph_in_tree_header (stream, tag);
+  name = pph_in_string (stream);
 
   /* Look for a match in CHAIN to READ_EXPR's header.  If we found a
      match, EXPR will be the existing tree that matches READ_EXPR.
      Otherwise, EXPR is the newly allocated READ_EXPR.  */
-  expr = pph_merge_into_chain (stream, read_expr, chain);
+  expr = pph_merge_into_chain (read_expr, name, chain, ipath_hash);
 
   gcc_assert (expr != NULL);
 
@@ -2024,7 +2023,6 @@ pph_in_tree (pph_stream *stream)
   if (marker == PPH_RECORD_START_MERGE_BODY)
     TREE_CHAIN (expr) = saved_expr_chain;
 
-
   if (flag_pph_tracer)
     pph_trace_tree (expr, false, false);
 
@@ -2332,6 +2330,22 @@ pph_in_identifiers (pph_stream *stream, cpp_idents_used *identifiers)
 }
 
 
+/* Compute a hash value for all the images starting at STREAM up to the
+   root of the include hierarchy.  */
+
+static hashval_t
+pph_get_include_path_hash (pph_stream *stream)
+{
+  pph_stream *i;
+  hashval_t val;
+
+  for (val = 0, i = stream; i; i = i->parent)
+    val = iterative_hash_hashval_t (htab_hash_pointer (i), val);
+
+  return val;
+}
+
+
 /* Read all the merge keys from STREAM.  Merge into the corresponding
    contexts.  Return a VEC of all the merge keys read.  */
 
@@ -2339,12 +2353,19 @@ static void
 pph_in_merge_keys (pph_stream *stream)
 {
   cp_binding_level *bl = scope_chain->bindings;
+  hashval_t include_path_hash = 0;
+
+  /* Compute the signature for the include path from STREAM up to
+     the root of the inclusion tree.  Symbols found in any image in
+     the direct path from STREAM up to the root do not need to be
+     merged.  They were already merged when the images were generated.  */
+  include_path_hash = pph_get_include_path_hash (stream);
 
   /* First read all the merge keys and merge into the global bindings.  */
-  pph_in_merge_key_chain (stream, &bl->names);
-  pph_in_merge_key_chain (stream, &bl->namespaces);
-  pph_in_merge_key_chain (stream, &bl->usings);
-  pph_in_merge_key_chain (stream, &bl->using_directives);
+  pph_in_merge_key_chain (stream, &bl->names, include_path_hash);
+  pph_in_merge_key_chain (stream, &bl->namespaces, include_path_hash);
+  pph_in_merge_key_chain (stream, &bl->usings, include_path_hash);
+  pph_in_merge_key_chain (stream, &bl->using_directives, include_path_hash);
 
   /* Now read the bodies of all the trees merged above.  */
   pph_in_merge_body_chain (stream);
@@ -2485,3 +2506,21 @@ pph_read_file (const char *filename)
 
   return stream;
 }
+
+
+/* Initialize the reader.  */
+
+void
+pph_reader_init (void)
+{
+  merge_toc = htab_create (551, htab_merge_key_hash, htab_merge_key_eq, free);
+}
+
+
+/* Finalize the reader.  */
+
+void
+pph_reader_finish (void)
+{
+  htab_delete (merge_toc);
+}
diff --git a/gcc/cp/pph-streamer-out.c b/gcc/cp/pph-streamer-out.c
index 32cf96c..ddff891 100644
--- a/gcc/cp/pph-streamer-out.c
+++ b/gcc/cp/pph-streamer-out.c
@@ -1929,7 +1929,6 @@ pph_out_merge_key_tree (pph_stream *stream, tree expr)
      to re-allocate EXPR in the reader) and the merge key, used to
      lookup EXPR in the reader's context and merge if necessary.  */
   pph_out_tree_header (stream, expr);
-  pph_out_location (stream, DECL_SOURCE_LOCATION (expr));
   pph_out_merge_name (stream, expr);
 }
 
diff --git a/gcc/cp/pph-streamer.c b/gcc/cp/pph-streamer.c
index 8476717..8624c13 100644
--- a/gcc/cp/pph-streamer.c
+++ b/gcc/cp/pph-streamer.c
@@ -194,6 +194,9 @@ pph_streamer_finish (void)
   /* Finalize the writer.  */
   pph_writer_finish ();
 
+  /* Finalize the reader.  */
+  pph_reader_finish ();
+
   /* Close any images read during parsing.  */
   FOR_EACH_VEC_ELT (pph_stream_ptr, pph_read_images, i, image)
     pph_stream_close (image);
@@ -325,6 +328,7 @@ pph_add_include (pph_stream *stream, pph_stream *include)
   pph_stream *include_child;
   unsigned i;
 
+  include->parent = stream;
   VEC_safe_push (pph_stream_ptr, heap, stream->includes, include);
   FOR_EACH_VEC_ELT (pph_stream_ptr, include->includes, i, include_child)
     VEC_safe_push (pph_stream_ptr, heap, stream->includes, include_child);
diff --git a/gcc/cp/pph-streamer.h b/gcc/cp/pph-streamer.h
index 61af2e6..35791bb 100644
--- a/gcc/cp/pph-streamer.h
+++ b/gcc/cp/pph-streamer.h
@@ -203,10 +203,10 @@ struct pph_stream {
   unsigned int in_memory_p : 1;
 
   /* Symbol table.  This is collected as the compiler instantiates
-    symbols and functions.  Once we finish parsing the header file,
-    this array is written out to the PPH image.  This way, the reader
-    will be able to instantiate these symbols in the same order that
-    they were instantiated originally.  */
+     symbols and functions.  Once we finish parsing the header file,
+     this array is written out to the PPH image.  This way, the reader
+     will be able to instantiate these symbols in the same order that
+     they were instantiated originally.  */
   pph_symtab symtab;
 
   /* Transitive closure list of all the images included directly and
@@ -214,6 +214,10 @@ struct pph_stream {
      files, not regular text headers.  Regular text headers are embedded
      in this stream.  */
   VEC(pph_stream_ptr,heap) *includes;
+
+  /* Parent include file.  This points to the PPH stream for the file
+     that immediately includes this one.  */
+  pph_stream_ptr parent;
 };
 
 /* Filter values to avoid emitting certain objects to a PPH file.  */
@@ -264,6 +268,8 @@ void pph_init_read (pph_stream *);
 location_t pph_in_location (pph_stream *);
 pph_stream *pph_read_file (const char *);
 tree pph_in_tree (pph_stream *stream);
+void pph_reader_init (void);
+void pph_reader_finish (void);
 
 
 /* Inline functions.  */
diff --git a/gcc/cp/pph.c b/gcc/cp/pph.c
index 04df1c5..096a632 100644
--- a/gcc/cp/pph.c
+++ b/gcc/cp/pph.c
@@ -259,6 +259,8 @@ pph_init (void)
   /* If we are generating a PPH file, initialize the writer.  */
   if (pph_writer_enabled_p ())
     pph_writer_init ();
+
+  pph_reader_init ();
 }
 
 
diff --git a/gcc/testsuite/g++.dg/pph/c1limits-externalid.cc b/gcc/testsuite/g++.dg/pph/c1limits-externalid.cc
index 8dc6b8a..b10f1c1 100644
--- a/gcc/testsuite/g++.dg/pph/c1limits-externalid.cc
+++ b/gcc/testsuite/g++.dg/pph/c1limits-externalid.cc
@@ -1,8 +1 @@
-/* { dg-timeout 15 } */
-/* { dg-xfail-if "BOGUS MERGE HUGE SYMBOL LIST" { *-*-* } { "-fpph-map=pph.map" } } */
-/* FIXME pph - The following timeout may cause failures on slow targets.
-   In general it takes no longer than a couple of seconds to compile
-   this test, but the new merging code is having trouble with this.
-   Probably due to an O(n^2) merging algorithm.  */
-
 #include "c0limits-externalid.h"

--
This patch is available for review at http://codereview.appspot.com/5305056



More information about the Gcc-patches mailing list