[RFC] Teaching SCC merging about unit local trees

Jan Hubicka hubicka@ucw.cz
Thu Jun 12 08:47:00 GMT 2014


Richard,
as briefly discussed before, I would like to teach LTO type merging to not merge
types that was declared in anonymous namespaces and use C++ ODR type names 
(stored in DECL_ASSEMBLER_NAME of the TYPE_DECL) to break down canonical types
by their names.

First thing I need to arrange IMO is to not merge two anonymous types from
two different units.  While looking into it I noticed that the current code
in unify_scc that refuses to merge local decls produces conflicts and seems
useless excercise to do.

This patch introduces special hash code 1 that specify that given SCC is known
to be local and should bypass the merging logic. This is propagated down and
seems to quite noticeably reduce size of SCC hash:

[WPA] read 10190717 SCCs of average size 1.980409
[WPA] 20181785 tree bodies read in total
[WPA] tree SCC table: size 4194301, 1882700 elements, collision ratio: 0.815497
[WPA] tree SCC max chain length 140 (size 1)
[WPA] Compared 3392363 SCCs, 2718822 collisions (0.801454)
[WPA] Merged 3314075 SCCs
[WPA] Merged 9693632 tree bodies
[WPA] Merged 2467704 types
[WPA] 1783262 types prevailed (4491218 associated trees)
[WPA] GIMPLE canonical type table: size 131071, 94867 elements, 1783347 searches, 737056 collisions (ratio: 0.413299)
[WPA] GIMPLE canonical type pointer-map: 94867 elements, 3973875 searches
[WPA] Compression: 282828785 input bytes, 831186147 uncompressed bytes (ratio: 2.938832)
[WPA] Size of mmap'd section decls: 282828785 bytes

to:

[WPA] read 10172291 SCCs of average size 1.982162
[WPA] 20163124 tree bodies read in total
[WPA] tree SCC table: size 2097143, 988764 elements, collision ratio: 0.684967
[WPA] tree SCC max chain length 140 (size 1)
[WPA] Compared 3060932 SCCs, 2405009 collisions (0.785711)
[WPA] Merged 3040565 SCCs
[WPA] Merged 9246482 tree bodies
[WPA] Merged 2382312 types
[WPA] 1868611 types prevailed (4728465 associated trees)
[WPA] GIMPLE canonical type table: size 131071, 94910 elements, 1868696 searches, 790939 collisions (ratio: 0.423257)
[WPA] GIMPLE canonical type pointer-map: 94910 elements, 4216423 searches
[WPA] Compression: 273322455 input bytes, 824178095 uncompressed bytes (ratio: 3.015406) 

We merge less, but not by much and I think we was not right not merge in that cases.

Would something like this make sense? (I am not saying my definition of unit_local_tree_p
is most polished one :)

I think next step could be to make anonymous types to bypass the canonical type
merging (i.e. simply save the chains as they comde from frontends forthose) and
then look into computing the type names in free lang data, using odr name hash instaed
of canonical type hash for those named types + link them to canonical type hash
entries and if we end up with unnamed type in canonical type hash, then make its
alias class to conflict with all the named types.

Honza

Index: lto-streamer-out.c
===================================================================
--- lto-streamer-out.c	(revision 211488)
+++ lto-streamer-out.c	(working copy)
@@ -54,6 +54,47 @@ along with GCC; see the file COPYING3.
 #include "cfgloop.h"
 #include "builtins.h"
 
+/* Return if T can never be shared across units.  */
+static bool
+unit_local_tree_p (tree t)
+{
+  switch (TREE_CODE (t))
+    {
+      case VAR_DECL:
+	/* Automatic variables are always unit local.  */
+	if (!TREE_STATIC (t) && !DECL_EXTERNAL (t)
+	    && !DECL_HARD_REGISTER (t))
+	  return true;
+	/* ... fall through ... */
+
+      case FUNCTION_DECL:
+	/* Non-public declarations are alwyas local. */
+	if (!TREE_PUBLIC (t))
+	  return true;
+
+	/* Public definitions that would cause linker error if
+	   appeared in other unit.  */
+	if (TREE_PUBLIC (t)
+	    && !DECL_EXTERNAL (t)
+	    && !DECL_WEAK (t))
+	  return true;
+	return false;
+      case NAMESPACE_DECL:
+	return !TREE_PUBLIC (t);
+      case TRANSLATION_UNIT_DECL:
+	return true;
+      case PARM_DECL:
+      case RESULT_DECL:
+      case LABEL_DECL:
+      case SSA_NAME:
+	return true;
+      default:
+	if (TYPE_P (t)
+	    && type_in_anonymous_namespace_p (t))
+	  return true;
+	return false;
+    }
+}
 
 static void lto_write_tree (struct output_block*, tree, bool);
 
@@ -686,7 +727,9 @@ DFS_write_tree_body (struct output_block
 #undef DFS_follow_tree_edge
 }
 
-/* Return a hash value for the tree T.  */
+/* Return a hash value for the tree T. 
+   If T is local to unit or refers anything local to unit, return 1.
+   Otherwise return non-1.  */
 
 static hashval_t
 hash_tree (struct streamer_tree_cache_d *cache, tree t)
@@ -694,10 +737,19 @@ hash_tree (struct streamer_tree_cache_d
 #define visit(SIBLING) \
   do { \
     unsigned ix; \
+    hashval_t h; \
     if (SIBLING && streamer_tree_cache_lookup (cache, SIBLING, &ix)) \
-      v = iterative_hash_hashval_t (streamer_tree_cache_get_hash (cache, ix), v); \
+      { \
+        h = streamer_tree_cache_get_hash (cache, ix); \
+        if (h == STREAMER_TREE_CACHE_HASH_LOCAL) \
+	  return STREAMER_TREE_CACHE_HASH_LOCAL; \
+        v = iterative_hash_hashval_t (h, v); \
+      } \
   } while (0)
 
+  if (unit_local_tree_p (t))
+    return STREAMER_TREE_CACHE_HASH_LOCAL;
+
   /* Hash TS_BASE.  */
   enum tree_code code = TREE_CODE (t);
   hashval_t v = iterative_hash_host_wide_int (code, 0);
@@ -911,7 +963,11 @@ hash_tree (struct streamer_tree_cache_d
 	  hashval_t x;
 	  unsigned ix;
 	  if (streamer_tree_cache_lookup (cache, TREE_TYPE (t), &ix))
-	    x = streamer_tree_cache_get_hash (cache, ix);
+	    {
+	      x = streamer_tree_cache_get_hash (cache, ix);
+	      if (x == STREAMER_TREE_CACHE_HASH_LOCAL)
+		return STREAMER_TREE_CACHE_HASH_LOCAL;
+	    }
 	  else
 	    x = hash_tree (cache, TREE_TYPE (t));
 	  v = iterative_hash_hashval_t (x, v);
@@ -1101,7 +1157,7 @@ hash_tree (struct streamer_tree_cache_d
       visit (OMP_CLAUSE_CHAIN (t));
     }
 
-  return v;
+  return v + (v == STREAMER_TREE_CACHE_HASH_LOCAL);
 
 #undef visit
 }
@@ -1121,14 +1177,25 @@ scc_entry_compare (const void *p1_, cons
 }
 
 /* Return a hash value for the SCC on the SCC stack from FIRST with
-   size SIZE.  */
+   size SIZE.
+   If STREAMER_TREE_CACHE_HASH_LOCAL is returned, we know the SCC will never
+   get merged with other unit.  */
 
 static hashval_t
 hash_scc (struct streamer_tree_cache_d *cache, unsigned first, unsigned size)
 {
   /* Compute hash values for the SCC members.  */
   for (unsigned i = 0; i < size; ++i)
-    sccstack[first+i].hash = hash_tree (cache, sccstack[first+i].t);
+    {
+      sccstack[first+i].hash = hash_tree (cache, sccstack[first+i].t);
+      /* If any member of SCC is local, whole SCC is.  */
+      if (sccstack[first+i].hash == STREAMER_TREE_CACHE_HASH_LOCAL)
+	{
+	  for (unsigned i = 0; i < size; ++i)
+	    sccstack[first+i].hash = STREAMER_TREE_CACHE_HASH_LOCAL;
+	  return STREAMER_TREE_CACHE_HASH_LOCAL;
+	}
+    }
 
   if (size == 1)
     return sccstack[first].hash;
@@ -1161,7 +1228,7 @@ hash_scc (struct streamer_tree_cache_d *
       sccstack[first+i].hash = tem[i];
       scc_hash = iterative_hash_hashval_t (tem[i], scc_hash);
     }
-  return scc_hash;
+  return scc_hash + (scc_hash == STREAMER_TREE_CACHE_HASH_LOCAL);
 }
 
 /* DFS walk EXPR and stream SCCs of tree bodies if they are not
@@ -1242,26 +1309,29 @@ DFS_write_tree (struct output_block *ob,
 	      scc_hash = hash_scc (ob->writer_cache, first, size);
 
 	      /* Put the entries with the least number of collisions first.  */
-	      unsigned entry_start = 0;
-	      scc_entry_len = size + 1;
-	      for (unsigned i = 0; i < size;)
+	      if (scc_hash != STREAMER_TREE_CACHE_HASH_LOCAL)
 		{
-		  unsigned from = i;
-		  for (i = i + 1; i < size
-		       && (sccstack[first + i].hash
-			   == sccstack[first + from].hash); ++i)
-		    ;
-		  if (i - from < scc_entry_len)
+		  unsigned entry_start = 0;
+		  scc_entry_len = size + 1;
+		  for (unsigned i = 0; i < size;)
 		    {
-		      scc_entry_len = i - from;
-		      entry_start = from;
+		      unsigned from = i;
+		      for (i = i + 1; i < size
+			   && (sccstack[first + i].hash
+			       == sccstack[first + from].hash); ++i)
+			;
+		      if (i - from < scc_entry_len)
+			{
+			  scc_entry_len = i - from;
+			  entry_start = from;
+			}
+		    }
+		  for (unsigned i = 0; i < scc_entry_len; ++i)
+		    {
+		      scc_entry tem = sccstack[first + i];
+		      sccstack[first + i] = sccstack[first + entry_start + i];
+		      sccstack[first + entry_start + i] = tem;
 		    }
-		}
-	      for (unsigned i = 0; i < scc_entry_len; ++i)
-		{
-		  scc_entry tem = sccstack[first + i];
-		  sccstack[first + i] = sccstack[first + entry_start + i];
-		  sccstack[first + entry_start + i] = tem;
 		}
 	    }
 
@@ -1271,7 +1341,7 @@ DFS_write_tree (struct output_block *ob,
 	  streamer_write_uhwi (ob, scc_hash);
 
 	  /* Write size-1 SCCs without wrapping them inside SCC bundles.
-	     All INTEGER_CSTs need to be handled this way as we need
+	     All INTEGER_CSTs need thiso be handled this way as we need
 	     their type to materialize them.  Also builtins are handled
 	     this way.
 	     ???  We still wrap these in LTO_tree_scc so at the
@@ -1291,6 +1361,8 @@ DFS_write_tree (struct output_block *ob,
 		  tree t = sccstack[first+i].t;
 		  bool exists_p = streamer_tree_cache_insert (ob->writer_cache,
 							      t, hash, &ix);
+		  gcc_assert (scc_hash != STREAMER_TREE_CACHE_HASH_LOCAL
+			      || hash == STREAMER_TREE_CACHE_HASH_LOCAL);
 		  gcc_assert (!exists_p);
 
 		  if (!lto_is_streamable (t))
Index: lto/lto.c
===================================================================
--- lto/lto.c	(revision 211488)
+++ lto/lto.c	(working copy)
@@ -1896,7 +1897,7 @@ lto_read_decls (struct lto_file_decl_dat
 	    continue;
 
 	  /* Try to unify the SCC with already existing ones.  */
-	  if (!flag_ltrans
+	  if (!flag_ltrans && scc_hash != STREAMER_TREE_CACHE_HASH_LOCAL
 	      && unify_scc (data_in->reader_cache, from,
 			    len, scc_entry_len, scc_hash))
 	    continue;



More information about the Gcc-patches mailing list