This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: WPA stream_out form & memory consumption
- From: Jan Hubicka <hubicka at ucw dot cz>
- To: Martin Liška <mliska at suse dot cz>
- Cc: Jan Hubicka <hubicka at ucw dot cz>, gcc at gcc dot gnu dot org, Richard Biener <richard dot guenther at gmail dot com>
- Date: Thu, 3 Apr 2014 22:40:13 +0200
- Subject: Re: WPA stream_out form & memory consumption
- Authentication-results: sourceware.org; auth=none
- References: <53286192 dot 3030600 at suse dot cz> <20140325205021 dot GA6581 at atrey dot karlin dot mff dot cuni dot cz> <5333E6B8 dot 3000504 at suse dot cz> <5333F3D3 dot 1010009 at suse dot cz> <533C1B04 dot 40407 at suse dot cz> <20140402224344 dot GB1359 at atrey dot karlin dot mff dot cuni dot cz> <533D6FF7 dot 1030009 at suse dot cz>
> >Firefox:
> >cgraph.c:869 (cgraph_create_edge_1) 0: 0.0% 0: 0.0% 130358176: 6.9% 0: 0.0% 1253444
> >cgraph.c:510 (cgraph_allocate_node) 0: 0.0% 0: 0.0% 182236800: 9.7% 0: 0.0% 555600
> >toplev.c:960 (realloc_for_line_map) 0: 0.0% 89503888: 5.5% 268468240:14.3% 160: 0.0% 13
> >tree-streamer-in.c:621 (streamer_alloc_tree) 93089976:77.5% 972848816:59.6% 639230248:33.9% 21332480:32.3% 13496198
> >Total 120076578 1632997043 1883064062 65981723 24732501
> >source location Garbage Freed Leak Overhead Times
Hi,
this is the linemap compression patch. For me it reduces realloc_for_line_map to about 60MB, 4%
toplev.c:960 (realloc_for_line_map) 0: 0.0% 22395000: 1.5% 67141656: 4.1% 144: 0.0% 12
vec.h:626 ((null)) 0: 0.0% 134568360: 9.3% 75497528: 4.6% 1569368: 2.4% 1009306
vec.h:614 ((null)) 10659408: 8.3% 61265504: 4.2% 78711104: 4.8% 793720: 1.2% 632110
vec.h:698 ((null)) 908768: 0.7% 99564524: 6.9% 82255800: 5.0% 4201148: 6.5% 874628
vec.h:666 ((null)) 12840: 0.0% 73534476: 5.1% 92633604: 5.7% 2929176: 4.5% 776347
lto/lto.c:245 (lto_read_in_decl_state) 0: 0.0% 43115656: 3.0% 94137976: 5.8% 25396856:39.2% 2698570
cgraph.c:869 (cgraph_create_edge_1) 0: 0.0% 0: 0.0% 124069712: 7.6% 0: 0.0% 1192978
cgraph.c:510 (cgraph_allocate_node) 0: 0.0% 0: 0.0% 189855256:11.6% 0: 0.0% 578827
tree-streamer-in.c:621 (streamer_alloc_tree) 97891888:76.1% 892961808:61.6% 613594816:37.6% 22268544:34.3% 12574164
Total 128555402 1448954963 1633186049 64847582 24190936
source location Garbage Freed Leak Overhead Times
So I get about 1GB of peak GGC memory and about 2.6GB for WPA in TOP. (still on
the old tree) You may try to experiment with LOCATION_CACHE_SIZE. It sould be
big enough so the locations get shared across different input files.
Honza
Index: lto-streamer-in.c
===================================================================
--- lto-streamer-in.c (revision 209047)
+++ lto-streamer-in.c (working copy)
@@ -145,21 +145,49 @@ canon_file_name (const char *string)
}
+/* location_cache is used at LTO read in to avoid too many duplicates in
+ the linemap tables. */
+
+#define LOCATION_CACHE_SIZE 524287
+struct location_cache_entry
+{
+ const char *file;
+ int line;
+ int col;
+ location_t location;
+};
+static struct location_cache_entry *location_cache;
+
+/* Return hash of FILE/LINE/COL. */
+
+int
+location_cache_hash (const char *file, int line, int col)
+{
+ return iterative_hash_hashval_t ((size_t)file,
+ iterative_hash_hashval_t (line, col)) % LOCATION_CACHE_SIZE;
+}
+
+
/* Read a location bitpack from input block IB. */
location_t
lto_input_location (struct bitpack_d *bp, struct data_in *data_in)
{
- static const char *current_file;
- static int current_line;
+ static const char *current_file, *last_file;
+ static int current_line, last_line;
static int current_col;
bool file_change, line_change, column_change;
unsigned len;
- bool prev_file = current_file != NULL;
+ bool prev_file = last_file != NULL;
+ int hash;
+ const char *cfile;
if (bp_unpack_value (bp, 1))
return UNKNOWN_LOCATION;
+ if (!location_cache)
+ location_cache = XCNEWVEC (struct location_cache_entry, LOCATION_CACHE_SIZE);
+
file_change = bp_unpack_value (bp, 1);
line_change = bp_unpack_value (bp, 1);
column_change = bp_unpack_value (bp, 1);
@@ -175,18 +203,32 @@ lto_input_location (struct bitpack_d *bp
if (column_change)
current_col = bp_unpack_var_len_unsigned (bp);
+ cfile = current_file;
+ hash = location_cache_hash (cfile, current_line, current_col);
- if (file_change)
+ if (location_cache[hash].file == cfile
+ && location_cache[hash].line == current_line
+ && location_cache[hash].col == current_col + 1)
+ return location_cache[hash].location;
+ location_cache[hash].file = cfile;
+ location_cache[hash].line = current_line;
+ location_cache[hash].col = current_col + 1;
+
+ if (current_file != last_file)
{
if (prev_file)
linemap_add (line_table, LC_LEAVE, false, NULL, 0);
linemap_add (line_table, LC_ENTER, false, current_file, current_line);
}
- else if (line_change)
+ else if (current_line != last_line)
linemap_line_start (line_table, current_line, current_col);
- return linemap_position_for_column (line_table, current_col);
+ location_cache[hash].location
+ = linemap_position_for_column (line_table, current_col);
+ last_file = current_file;
+ last_line = current_line;
+ return location_cache[hash].location;
}
@@ -981,6 +1023,27 @@ input_function (tree fn_decl, struct dat
}
bsi = gsi_start_bb (bb);
while (!gsi_end_p (bsi))
+ {
+ gimple stmt = gsi_stmt (bsi);
+ /* If we're recompiling LTO objects with debug stmts but
+ we're not supposed to have debug stmts, remove them now.
+ We can't remove them earlier because this would cause uid
+ mismatches in fixups, but we can do it at this point, as
+ long as debug stmts don't require fixups. */
+ if (!MAY_HAVE_DEBUG_STMTS && is_gimple_debug (stmt))
+ {
+ gimple_stmt_iterator gsi = bsi;
+ gsi_next (&bsi);
+ gsi_remove (&gsi, true);
+ }
+ else
+ {
+ gsi_next (&bsi);
+ stmts[gimple_uid (stmt)] = stmt;
+ }
+ }
+ bsi = gsi_start_bb (bb);
+ while (!gsi_end_p (bsi))
{
gimple stmt = gsi_stmt (bsi);
/* If we're recompiling LTO objects with debug stmts but