Avoid multiple entry SCC regions
Jan Hubicka
hubicka@ucw.cz
Mon Jul 28 10:09:00 GMT 2014
Hi,
here is updated version of the patch to avoid streaming trees that are local.
I made it mostly to see what effect it have on collisions (as I think having
unmergeable trees in the SCC hash artificially increase collision rate for
trees derrived from this).
To my surprise the number are quite different. Now I get on slightly older
unpatched tree:
[WPA] read 9572344 SCCs of average size 2.030230
[WPA] 19434056 tree bodies read in total
[WPA] tree SCC table: size 4194301, 2047117 elements, collision ratio: 0.784630
[WPA] tree SCC max chain length 140 (size 1)
[WPA] Compared 3380248 SCCs, 2334141 collisions (0.690524)
[WPA] Merged 3368715 SCCs
[WPA] Merged 10190141 tree bodies
[WPA] Merged 2664380 types
[WPA] 1779189 types prevailed (4089309 associated trees)
[WPA] GIMPLE canonical type table: size 262139, 98918 elements, 1779274 searches, 756421 collisions (ratio: 0.425129)
[WPA] GIMPLE canonical type pointer-map: 98918 elements, 4109561 searches
[WPA] # of input files: 1817
[WPA] Compression: 266574384 input bytes, 818483159 uncompressed bytes (ratio: 3.070374)
While with the patch I get:
[WPA] read 9572334 SCCs of average size 2.030231
[WPA] 19434046 tree bodies read in total
[WPA] tree SCC table: size 4194301, 1827487 elements, collision ratio: 0.797434
[WPA] tree SCC max chain length 140 (size 1)
[WPA] Compared 3317907 SCCs, 2322035 collisions (0.699849)
[WPA] 465571 local SCCs
[WPA] Merged 3307884 SCCs
[WPA] Merged 10001740 tree bodies
[WPA] Merged 2612232 types
[WPA] 1831335 types prevailed (4257855 associated trees)
[WPA] GIMPLE canonical type table: size 262139, 98953 elements, 1831420 searches, 619322 collisions (ratio: 0.338165)
[WPA] GIMPLE canonical type pointer-map: 98953 elements, 4287782 searches
[WPA] # of input files: 1817
[WPA] Compression: 264291248 input bytes, 816565244 uncompressed bytes (ratio: 3.089642)
It seems that the number of local SCCs is somewhat low (465571) to affect
overall statistics (I think we however still need for on-disk merging and for
proper handling of anonymous types to decide mergeability early).
Main change was modification to type_in_anonymous_namespace_p to ignore
declarations with no context to patch around builtin types having wrong
flags. I will double check that anonymous types are recognized correctly,
but we do have testsuite coverage for that, so probably they are.
Honza
Index: lto-streamer-out.c
===================================================================
--- lto-streamer-out.c (revision 213121)
+++ lto-streamer-out.c (working copy)
@@ -54,7 +54,49 @@ along with GCC; see the file COPYING3.
#include "streamer-hooks.h"
#include "cfgloop.h"
#include "builtins.h"
+#include "print-tree.h"
+/* Return if T can never be shared across units. */
+static bool
+unit_local_tree_p (tree t)
+{
+ switch (TREE_CODE (t))
+ {
+ case VAR_DECL:
+ /* Automatic variables are always unit local. */
+ if (!TREE_STATIC (t) && !DECL_EXTERNAL (t)
+ && !DECL_HARD_REGISTER (t))
+ return true;
+ /* ... fall through ... */
+
+ case FUNCTION_DECL:
+ /* Non-public declarations are alwyas local. */
+ if (!TREE_PUBLIC (t))
+ return true;
+
+ /* Public definitions that would cause linker error if
+ appeared in other unit. */
+ if (TREE_PUBLIC (t)
+ && !DECL_EXTERNAL (t)
+ && !DECL_WEAK (t))
+ return true;
+ return false;
+ case NAMESPACE_DECL:
+ return !TREE_PUBLIC (t);
+ case TRANSLATION_UNIT_DECL:
+ return true;
+ case PARM_DECL:
+ case RESULT_DECL:
+ case LABEL_DECL:
+ case SSA_NAME:
+ return true;
+ default:
+ if (TYPE_P (t)
+ && type_in_anonymous_namespace_p (t))
+ return true;
+ return false;
+ }
+}
static void lto_write_tree (struct output_block*, tree, bool);
@@ -740,7 +782,12 @@ hash_tree (struct streamer_tree_cache_d
if (!SIBLING) \
hstate.add_int (0); \
else if (streamer_tree_cache_lookup (cache, SIBLING, &ix)) \
- hstate.add_int (streamer_tree_cache_get_hash (cache, ix)); \
+ { \
+ hashval_t h = streamer_tree_cache_get_hash (cache, ix); \
+ if (h == STREAMER_TREE_CACHE_HASH_LOCAL) \
+ return STREAMER_TREE_CACHE_HASH_LOCAL; \
+ hstate.add_int (h); \
+ } \
else if (map) \
hstate.add_int (*map->get (SIBLING)); \
else \
@@ -750,6 +797,8 @@ hash_tree (struct streamer_tree_cache_d
/* Hash TS_BASE. */
enum tree_code code = TREE_CODE (t);
hstate.add_int (code);
+ if (unit_local_tree_p (t))
+ return STREAMER_TREE_CACHE_HASH_LOCAL;
if (!TYPE_P (t))
{
hstate.add_flag (TREE_SIDE_EFFECTS (t));
@@ -1136,7 +1185,9 @@ hash_tree (struct streamer_tree_cache_d
visit (OMP_CLAUSE_CHAIN (t));
}
- return hstate.end ();
+ hashval_t v = hstate.end ();
+
+ return v + (v == STREAMER_TREE_CACHE_HASH_LOCAL);
#undef visit
}
@@ -1166,8 +1217,16 @@ DFS::hash_scc (struct output_block *ob,
/* Compute hash values for the SCC members. */
for (unsigned i = 0; i < size; ++i)
- sccstack[first+i].hash = hash_tree (ob->writer_cache, NULL,
- sccstack[first+i].t);
+ {
+ sccstack[first+i].hash = hash_tree (ob->writer_cache, NULL,
+ sccstack[first+i].t);
+ if (sccstack[first+i].hash == STREAMER_TREE_CACHE_HASH_LOCAL)
+ {
+ for (unsigned i = 0; i < size; ++i)
+ sccstack[first+i].hash = STREAMER_TREE_CACHE_HASH_LOCAL;
+ return STREAMER_TREE_CACHE_HASH_LOCAL;
+ }
+ }
if (size == 1)
return sccstack[first].hash;
@@ -1255,6 +1314,8 @@ DFS::hash_scc (struct output_block *ob,
sccstack[first+i].hash
= iterative_hash_hashval_t (i,
*map.get (sccstack[first+i].t));
+ sccstack[first+i].hash += (sccstack[first+i].hash
+ == STREAMER_TREE_CACHE_HASH_LOCAL);
scc_hash = iterative_hash_hashval_t (scc_hash,
sccstack[first+i].hash);
}
@@ -1283,7 +1344,7 @@ DFS::hash_scc (struct output_block *ob,
for (unsigned i = 0; i < size; ++i)
sccstack[first+i].hash
= iterative_hash_hashval_t (sccstack[first+i].hash, scc_hash);
- return scc_hash;
+ return scc_hash + (scc_hash == STREAMER_TREE_CACHE_HASH_LOCAL);
}
last_classes = classes;
@@ -1383,32 +1444,37 @@ DFS::DFS_write_tree (struct output_block
any merging there. */
hashval_t scc_hash = 0;
unsigned scc_entry_len = 0;
- if (!flag_wpa)
+ if (!flag_wpa && !ob->symbol)
{
scc_hash = hash_scc (ob, first, size);
/* Put the entries with the least number of collisions first. */
- unsigned entry_start = 0;
- scc_entry_len = size + 1;
- for (unsigned i = 0; i < size;)
+ if (scc_hash != STREAMER_TREE_CACHE_HASH_LOCAL)
{
- unsigned from = i;
- for (i = i + 1; i < size
- && (sccstack[first + i].hash
- == sccstack[first + from].hash); ++i)
- ;
- if (i - from < scc_entry_len)
+ unsigned entry_start = 0;
+ scc_entry_len = size + 1;
+ for (unsigned i = 0; i < size;)
{
- scc_entry_len = i - from;
- entry_start = from;
+ unsigned from = i;
+ for (i = i + 1; i < size
+ && (sccstack[first + i].hash
+ == sccstack[first + from].hash); ++i)
+ ;
+ if (i - from < scc_entry_len)
+ {
+ scc_entry_len = i - from;
+ entry_start = from;
+ }
+ }
+ for (unsigned i = 0; i < scc_entry_len; ++i)
+ {
+ scc_entry tem = sccstack[first + i];
+ sccstack[first + i] = sccstack[first + entry_start + i];
+ sccstack[first + entry_start + i] = tem;
}
}
- for (unsigned i = 0; i < scc_entry_len; ++i)
- {
- scc_entry tem = sccstack[first + i];
- sccstack[first + i] = sccstack[first + entry_start + i];
- sccstack[first + entry_start + i] = tem;
- }
+ else
+ scc_entry_len = 1;
if (scc_entry_len == 1)
; /* We already sorted SCC deterministically in hash_scc. */
@@ -1429,7 +1495,7 @@ DFS::DFS_write_tree (struct output_block
streamer_write_uhwi (ob, scc_hash);
/* Write size-1 SCCs without wrapping them inside SCC bundles.
- All INTEGER_CSTs need to be handled this way as we need
+ All INTEGER_CSTs need thiso be handled this way as we need
their type to materialize them. Also builtins are handled
this way.
??? We still wrap these in LTO_tree_scc so at the
@@ -1449,6 +1515,8 @@ DFS::DFS_write_tree (struct output_block
tree t = sccstack[first+i].t;
bool exists_p = streamer_tree_cache_insert (ob->writer_cache,
t, hash, &ix);
+ gcc_assert (scc_hash != STREAMER_TREE_CACHE_HASH_LOCAL
+ || hash == STREAMER_TREE_CACHE_HASH_LOCAL);
gcc_assert (!exists_p);
if (!lto_is_streamable (t))
Index: lto-streamer.h
===================================================================
--- lto-streamer.h (revision 213121)
+++ lto-streamer.h (working copy)
@@ -1191,4 +1191,6 @@ DEFINE_DECL_STREAM_FUNCS (TYPE_DECL, typ
DEFINE_DECL_STREAM_FUNCS (NAMESPACE_DECL, namespace_decl)
DEFINE_DECL_STREAM_FUNCS (LABEL_DECL, label_decl)
+#define STREAMER_TREE_CACHE_HASH_LOCAL 1
+
#endif /* GCC_LTO_STREAMER_H */
Index: lto/lto.c
===================================================================
--- lto/lto.c (revision 213121)
+++ lto/lto.c (working copy)
@@ -1144,6 +1146,7 @@ static unsigned long num_prevailing_type
static unsigned long num_type_scc_trees;
static unsigned long total_scc_size;
static unsigned long num_sccs_read;
+static unsigned long num_local_nodes;
static unsigned long total_scc_size_merged;
static unsigned long num_sccs_merged;
static unsigned long num_scc_compares;
@@ -1725,9 +1728,7 @@ unify_scc (struct streamer_tree_cache_d
&& !(TREE_PUBLIC (t) || DECL_EXTERNAL (t)))
|| TREE_CODE (t) == LABEL_DECL)
{
- /* Avoid doing any work for these cases and do not worry to
- record the SCCs for further merging. */
- return false;
+ gcc_unreachable ();
}
}
@@ -1889,8 +1890,11 @@ lto_read_decls (struct lto_file_decl_dat
|| streamer_handle_as_builtin_p (first)))
continue;
+ if (scc_hash == STREAMER_TREE_CACHE_HASH_LOCAL)
+ num_local_nodes++;
+
/* Try to unify the SCC with already existing ones. */
- if (!flag_ltrans
+ if (!flag_ltrans && scc_hash != STREAMER_TREE_CACHE_HASH_LOCAL
&& unify_scc (data_in->reader_cache, from,
len, scc_entry_len, scc_hash))
continue;
@@ -2710,13 +2714,10 @@ lto_fixup_prevailing_decls (tree t)
{
LTO_NO_PREVAIL (t->decl_with_vis.assembler_name);
}
- if (CODE_CONTAINS_STRUCT (code, TS_DECL_NON_COMMON))
- {
- LTO_NO_PREVAIL (DECL_RESULT_FLD (t));
- }
if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL))
{
LTO_NO_PREVAIL (DECL_ARGUMENTS (t));
+ LTO_NO_PREVAIL (DECL_RESULT (t));
LTO_SET_PREVAIL (DECL_FUNCTION_PERSONALITY (t));
LTO_NO_PREVAIL (DECL_VINDEX (t));
}
@@ -3180,6 +3181,7 @@ print_lto_report_1 (void)
fprintf (stderr, "[%s] Compared %lu SCCs, %lu collisions (%f)\n", pfx,
num_scc_compares, num_scc_compare_collisions,
num_scc_compare_collisions / (double) num_scc_compares);
+ fprintf (stderr, "[%s] %lu local SCCs\n", pfx, num_local_nodes);
fprintf (stderr, "[%s] Merged %lu SCCs\n", pfx, num_sccs_merged);
fprintf (stderr, "[%s] Merged %lu tree bodies\n", pfx,
total_scc_size_merged);
More information about the Gcc-patches
mailing list