Avoid multiple entry SCC regions

Jan Hubicka hubicka@ucw.cz
Mon Jul 28 10:09:00 GMT 2014


Hi,
here is updated version of the patch to avoid streaming trees that are local.
I made it mostly to see what effect it have on collisions (as I think having
unmergeable trees in the SCC hash artificially increase collision rate for
trees derrived from this).

To my surprise the number are quite different.  Now I get on slightly older
unpatched tree:

[WPA] read 9572344 SCCs of average size 2.030230
[WPA] 19434056 tree bodies read in total
[WPA] tree SCC table: size 4194301, 2047117 elements, collision ratio: 0.784630
[WPA] tree SCC max chain length 140 (size 1)
[WPA] Compared 3380248 SCCs, 2334141 collisions (0.690524)
[WPA] Merged 3368715 SCCs
[WPA] Merged 10190141 tree bodies
[WPA] Merged 2664380 types
[WPA] 1779189 types prevailed (4089309 associated trees)
[WPA] GIMPLE canonical type table: size 262139, 98918 elements, 1779274 searches, 756421 collisions (ratio: 0.425129)
[WPA] GIMPLE canonical type pointer-map: 98918 elements, 4109561 searches
[WPA] # of input files: 1817
[WPA] Compression: 266574384 input bytes, 818483159 uncompressed bytes (ratio: 3.070374)

While with the patch I get:

[WPA] read 9572334 SCCs of average size 2.030231
[WPA] 19434046 tree bodies read in total
[WPA] tree SCC table: size 4194301, 1827487 elements, collision ratio: 0.797434
[WPA] tree SCC max chain length 140 (size 1)
[WPA] Compared 3317907 SCCs, 2322035 collisions (0.699849)
[WPA] 465571 local SCCs
[WPA] Merged 3307884 SCCs
[WPA] Merged 10001740 tree bodies
[WPA] Merged 2612232 types
[WPA] 1831335 types prevailed (4257855 associated trees)
[WPA] GIMPLE canonical type table: size 262139, 98953 elements, 1831420 searches, 619322 collisions (ratio: 0.338165)
[WPA] GIMPLE canonical type pointer-map: 98953 elements, 4287782 searches
[WPA] # of input files: 1817
[WPA] Compression: 264291248 input bytes, 816565244 uncompressed bytes (ratio: 3.089642)

It seems that the number of local SCCs is somewhat low (465571) to affect
overall statistics (I think we however still need for on-disk merging and for
proper handling of anonymous types to decide mergeability early).

Main change was modification to type_in_anonymous_namespace_p to ignore
declarations with no context to patch around builtin types having wrong
flags.  I will double check that anonymous types are recognized correctly,
but we do have testsuite coverage for that, so probably they are.

Honza

Index: lto-streamer-out.c
===================================================================
--- lto-streamer-out.c	(revision 213121)
+++ lto-streamer-out.c	(working copy)
@@ -54,7 +54,49 @@ along with GCC; see the file COPYING3.
 #include "streamer-hooks.h"
 #include "cfgloop.h"
 #include "builtins.h"
+#include "print-tree.h"
 
+/* Return if T can never be shared across units.  */
+static bool
+unit_local_tree_p (tree t)
+{
+  switch (TREE_CODE (t))
+    {
+      case VAR_DECL:
+	/* Automatic variables are always unit local.  */
+	if (!TREE_STATIC (t) && !DECL_EXTERNAL (t)
+	    && !DECL_HARD_REGISTER (t))
+	  return true;
+	/* ... fall through ... */
+
+      case FUNCTION_DECL:
+	/* Non-public declarations are alwyas local. */
+	if (!TREE_PUBLIC (t))
+	  return true;
+
+	/* Public definitions that would cause linker error if
+	   appeared in other unit.  */
+	if (TREE_PUBLIC (t)
+	    && !DECL_EXTERNAL (t)
+	    && !DECL_WEAK (t))
+	  return true;
+	return false;
+      case NAMESPACE_DECL:
+	return !TREE_PUBLIC (t);
+      case TRANSLATION_UNIT_DECL:
+	return true;
+      case PARM_DECL:
+      case RESULT_DECL:
+      case LABEL_DECL:
+      case SSA_NAME:
+	return true;
+      default:
+	if (TYPE_P (t)
+	    && type_in_anonymous_namespace_p (t))
+	  return true;
+	return false;
+    }
+}
 
 static void lto_write_tree (struct output_block*, tree, bool);
 
@@ -740,7 +782,12 @@ hash_tree (struct streamer_tree_cache_d
     if (!SIBLING) \
       hstate.add_int (0); \
     else if (streamer_tree_cache_lookup (cache, SIBLING, &ix)) \
-      hstate.add_int (streamer_tree_cache_get_hash (cache, ix)); \
+      { \
+        hashval_t h = streamer_tree_cache_get_hash (cache, ix); \
+	if (h == STREAMER_TREE_CACHE_HASH_LOCAL) \
+	  return STREAMER_TREE_CACHE_HASH_LOCAL; \
+        hstate.add_int (h); \
+      } \
     else if (map) \
       hstate.add_int (*map->get (SIBLING)); \
     else \
@@ -750,6 +797,8 @@ hash_tree (struct streamer_tree_cache_d
   /* Hash TS_BASE.  */
   enum tree_code code = TREE_CODE (t);
   hstate.add_int (code);
+  if (unit_local_tree_p (t))
+    return STREAMER_TREE_CACHE_HASH_LOCAL;
   if (!TYPE_P (t))
     {
       hstate.add_flag (TREE_SIDE_EFFECTS (t));
@@ -1136,7 +1185,9 @@ hash_tree (struct streamer_tree_cache_d
       visit (OMP_CLAUSE_CHAIN (t));
     }
 
-  return hstate.end ();
+  hashval_t v = hstate.end ();
+
+  return v + (v == STREAMER_TREE_CACHE_HASH_LOCAL);
 
 #undef visit
 }
@@ -1166,8 +1217,16 @@ DFS::hash_scc (struct output_block *ob,
 
   /* Compute hash values for the SCC members.  */
   for (unsigned i = 0; i < size; ++i)
-    sccstack[first+i].hash = hash_tree (ob->writer_cache, NULL,
-					sccstack[first+i].t);
+    {
+      sccstack[first+i].hash = hash_tree (ob->writer_cache, NULL,
+					  sccstack[first+i].t);
+      if (sccstack[first+i].hash == STREAMER_TREE_CACHE_HASH_LOCAL)
+        {
+	  for (unsigned i = 0; i < size; ++i)
+	    sccstack[first+i].hash = STREAMER_TREE_CACHE_HASH_LOCAL;
+	  return STREAMER_TREE_CACHE_HASH_LOCAL;
+	}
+    }
 
   if (size == 1)
     return sccstack[first].hash;
@@ -1255,6 +1314,8 @@ DFS::hash_scc (struct output_block *ob,
 		  sccstack[first+i].hash
 		    = iterative_hash_hashval_t (i,
 						*map.get (sccstack[first+i].t));
+		  sccstack[first+i].hash += (sccstack[first+i].hash
+					     == STREAMER_TREE_CACHE_HASH_LOCAL);
 		  scc_hash = iterative_hash_hashval_t (scc_hash,
 						       sccstack[first+i].hash);
 		}
@@ -1283,7 +1344,7 @@ DFS::hash_scc (struct output_block *ob,
 	  for (unsigned i = 0; i < size; ++i)
 	    sccstack[first+i].hash
 	      = iterative_hash_hashval_t (sccstack[first+i].hash, scc_hash);
-	  return scc_hash;
+	  return scc_hash + (scc_hash == STREAMER_TREE_CACHE_HASH_LOCAL);
 	}
 
       last_classes = classes;
@@ -1383,32 +1444,37 @@ DFS::DFS_write_tree (struct output_block
 	     any merging there.  */
 	  hashval_t scc_hash = 0;
 	  unsigned scc_entry_len = 0;
-	  if (!flag_wpa)
+	  if (!flag_wpa && !ob->symbol)
 	    {
 	      scc_hash = hash_scc (ob, first, size);
 
 	      /* Put the entries with the least number of collisions first.  */
-	      unsigned entry_start = 0;
-	      scc_entry_len = size + 1;
-	      for (unsigned i = 0; i < size;)
+	      if (scc_hash != STREAMER_TREE_CACHE_HASH_LOCAL)
 		{
-		  unsigned from = i;
-		  for (i = i + 1; i < size
-		       && (sccstack[first + i].hash
-			   == sccstack[first + from].hash); ++i)
-		    ;
-		  if (i - from < scc_entry_len)
+		  unsigned entry_start = 0;
+		  scc_entry_len = size + 1;
+		  for (unsigned i = 0; i < size;)
 		    {
-		      scc_entry_len = i - from;
-		      entry_start = from;
+		      unsigned from = i;
+		      for (i = i + 1; i < size
+			   && (sccstack[first + i].hash
+			       == sccstack[first + from].hash); ++i)
+			;
+		      if (i - from < scc_entry_len)
+			{
+			  scc_entry_len = i - from;
+			  entry_start = from;
+			}
+		    }
+		  for (unsigned i = 0; i < scc_entry_len; ++i)
+		    {
+		      scc_entry tem = sccstack[first + i];
+		      sccstack[first + i] = sccstack[first + entry_start + i];
+		      sccstack[first + entry_start + i] = tem;
 		    }
 		}
-	      for (unsigned i = 0; i < scc_entry_len; ++i)
-		{
-		  scc_entry tem = sccstack[first + i];
-		  sccstack[first + i] = sccstack[first + entry_start + i];
-		  sccstack[first + entry_start + i] = tem;
-		}
+	      else
+		scc_entry_len = 1;
 
 	      if (scc_entry_len == 1)
 		; /* We already sorted SCC deterministically in hash_scc.  */
@@ -1429,7 +1495,7 @@ DFS::DFS_write_tree (struct output_block
 	  streamer_write_uhwi (ob, scc_hash);
 
 	  /* Write size-1 SCCs without wrapping them inside SCC bundles.
-	     All INTEGER_CSTs need to be handled this way as we need
+	     All INTEGER_CSTs need thiso be handled this way as we need
 	     their type to materialize them.  Also builtins are handled
 	     this way.
 	     ???  We still wrap these in LTO_tree_scc so at the
@@ -1449,6 +1515,8 @@ DFS::DFS_write_tree (struct output_block
 		  tree t = sccstack[first+i].t;
 		  bool exists_p = streamer_tree_cache_insert (ob->writer_cache,
 							      t, hash, &ix);
+		  gcc_assert (scc_hash != STREAMER_TREE_CACHE_HASH_LOCAL
+			      || hash == STREAMER_TREE_CACHE_HASH_LOCAL);
 		  gcc_assert (!exists_p);
 
 		  if (!lto_is_streamable (t))
Index: lto-streamer.h
===================================================================
--- lto-streamer.h	(revision 213121)
+++ lto-streamer.h	(working copy)
@@ -1191,4 +1191,6 @@ DEFINE_DECL_STREAM_FUNCS (TYPE_DECL, typ
 DEFINE_DECL_STREAM_FUNCS (NAMESPACE_DECL, namespace_decl)
 DEFINE_DECL_STREAM_FUNCS (LABEL_DECL, label_decl)
 
+#define STREAMER_TREE_CACHE_HASH_LOCAL 1
+
 #endif /* GCC_LTO_STREAMER_H  */
Index: lto/lto.c
===================================================================
--- lto/lto.c	(revision 213121)
+++ lto/lto.c	(working copy)
@@ -1144,6 +1146,7 @@ static unsigned long num_prevailing_type
 static unsigned long num_type_scc_trees;
 static unsigned long total_scc_size;
 static unsigned long num_sccs_read;
+static unsigned long num_local_nodes;
 static unsigned long total_scc_size_merged;
 static unsigned long num_sccs_merged;
 static unsigned long num_scc_compares;
@@ -1725,9 +1728,7 @@ unify_scc (struct streamer_tree_cache_d
 	      && !(TREE_PUBLIC (t) || DECL_EXTERNAL (t)))
 	  || TREE_CODE (t) == LABEL_DECL)
 	{
-	  /* Avoid doing any work for these cases and do not worry to
-	     record the SCCs for further merging.  */
-	  return false;
+	  gcc_unreachable ();
 	}
     }
 
@@ -1889,8 +1890,11 @@ lto_read_decls (struct lto_file_decl_dat
 		  || streamer_handle_as_builtin_p (first)))
 	    continue;
 
+	  if (scc_hash == STREAMER_TREE_CACHE_HASH_LOCAL)
+	    num_local_nodes++;
+
 	  /* Try to unify the SCC with already existing ones.  */
-	  if (!flag_ltrans
+	  if (!flag_ltrans && scc_hash != STREAMER_TREE_CACHE_HASH_LOCAL
 	      && unify_scc (data_in->reader_cache, from,
 			    len, scc_entry_len, scc_hash))
 	    continue;
@@ -2710,13 +2714,10 @@ lto_fixup_prevailing_decls (tree t)
 	{
 	  LTO_NO_PREVAIL (t->decl_with_vis.assembler_name);
 	}
-      if (CODE_CONTAINS_STRUCT (code, TS_DECL_NON_COMMON))
-	{
-	  LTO_NO_PREVAIL (DECL_RESULT_FLD (t));
-	}
       if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL))
 	{
 	  LTO_NO_PREVAIL (DECL_ARGUMENTS (t));
+	  LTO_NO_PREVAIL (DECL_RESULT (t));
 	  LTO_SET_PREVAIL (DECL_FUNCTION_PERSONALITY (t));
 	  LTO_NO_PREVAIL (DECL_VINDEX (t));
 	}
@@ -3180,6 +3181,7 @@ print_lto_report_1 (void)
       fprintf (stderr, "[%s] Compared %lu SCCs, %lu collisions (%f)\n", pfx,
 	       num_scc_compares, num_scc_compare_collisions,
 	       num_scc_compare_collisions / (double) num_scc_compares);
+      fprintf (stderr, "[%s] %lu local SCCs\n", pfx, num_local_nodes);
       fprintf (stderr, "[%s] Merged %lu SCCs\n", pfx, num_sccs_merged);
       fprintf (stderr, "[%s] Merged %lu tree bodies\n", pfx,
 	       total_scc_size_merged);



More information about the Gcc-patches mailing list