Optimize lto location stremaing

Jan Hubicka hubicka@ucw.cz
Wed Mar 25 22:54:00 GMT 2015


Hi,
linemap is optimized for situation where parser enters positions into it in source order.
LTO does not work this way - it attach locations to trees and reads them more or less
randomly. This results in large memory use of linemaps, slow lookups (that are critical
for WPA stremaing) and as i noticed recently also wrong line&column info.

This patch changes the way by streaming in the location into cache that is ordered
and applied in source order.  The cache also knows how to cheaply discard elements
for linemaps of trees that was rmeoved by tree merging.

One catch ist hat the linemaps are not present in trees and thus can not be expanded,
copied or relocated before calling lto_apply_location_cache. I hope I caught the cases
where this can happen. This include
 1) calling debug hooks during ltrans from lto_read_decls
 2) producing odr violation warnings from ipa-devirt
 3) modifying locations to record blocks (unpack_ts_block_value_fields)
 4) for safety I skipped the trick for gimple streaming for now becuase at
    least PHI args can probably be relocated.

Bootstrapped/regtested x86_64-linux, the patch saves about 1GB of locators for chromium
and 400MB for firefox LTO.

OK?

Honza
	PR lto/65536
	* streamer-hooks.h (struct streamer_hooks): Make input_location to take
	pointer to location.
	(stream_input_location): Update.
	(lto_apply_location_cache, lto_revert_location_cache,
	lto_accept_location_cache): Declare.
	(stream_input_location_now): New inline function.
	* ipa-devirt.c: Include streamer-hooks.h.
	(warn_odr): Apply location cache before warning.
	(lto_input_location): Update prototype.
	* gimple-streamer-in.c (input_phi, input_gimple_stmt):
	Use stream_input_location_now.
	* lto/lto.c (unify_scc): Revert location cache when unification
	suceeded.
	(lto_read_decls): Accept location cache after sucess;
	apply location cache before calling debug hooks.
	* lto-streamer-in.c (struct cached_location): New.
	(loc_cache, accepted_length, current_file, current_line,
	current_col, current_loc): New static vars.
	(cmp_loc): New function.
	(lto_apply_location_cache): New function.
	(lto_accept_location_cache): New function.
	(lto_revert_location_cache): New function.
	(lto_input_location): Do location caching.
	(input_eh_region, input_struct_function_base): Use
	stream_input_location_now.
	* tree-streamer-in.c (unpack_ts_block_value_fields,
	unpack_ts_omp_clause_value_fields, streamer_read_tree_bitfields,
	lto_input_ts_exp_tree_pointers): Update for cached location api.
Index: streamer-hooks.h
===================================================================
--- streamer-hooks.h	(revision 221582)
+++ streamer-hooks.h	(working copy)
@@ -52,7 +52,7 @@ struct streamer_hooks {
   tree (*read_tree) (struct lto_input_block *, struct data_in *);
 
   /* [REQ] Called by every streaming routine that needs to read a location.  */
-  location_t (*input_location) (struct bitpack_d *, struct data_in *);
+  void (*input_location) (location_t *, struct bitpack_d *, struct data_in *);
 
   /* [REQ] Called by every streaming routine that needs to write a location.  */
   void (*output_location) (struct output_block *, struct bitpack_d *, location_t);
@@ -67,8 +67,8 @@ struct streamer_hooks {
 #define stream_read_tree(IB, DATA_IN) \
     streamer_hooks.read_tree (IB, DATA_IN)
 
-#define stream_input_location(BP, DATA_IN) \
-    streamer_hooks.input_location (BP, DATA_IN)
+#define stream_input_location(LOCPTR, BP, DATA_IN) \
+    streamer_hooks.input_location (LOCPTR, BP, DATA_IN)
 
 #define stream_output_location(OB, BP, LOC) \
     streamer_hooks.output_location (OB, BP, LOC)
@@ -78,5 +78,21 @@ extern struct streamer_hooks streamer_ho
 
 /* In streamer-hooks.c.  */
 void streamer_hooks_init (void);
+bool lto_apply_location_cache ();
+void lto_revert_location_cache ();
+void lto_accept_location_cache ();
+
+/* Read location and return it instead of going through location caching.
+   This should be used only when the resulting location is not going to be
+   discarded.  */
+
+inline location_t
+stream_input_location_now (struct bitpack_d *bp, struct data_in *data)
+{
+  location_t loc;
+  streamer_hooks.input_location (&loc, bp, data);
+  lto_apply_location_cache ();
+  return loc;
+}
 
 #endif  /* GCC_STREAMER_HOOKS_H  */
Index: ipa-devirt.c
===================================================================
--- ipa-devirt.c	(revision 221582)
+++ ipa-devirt.c	(working copy)
@@ -166,7 +166,7 @@ along with GCC; see the file COPYING3.
 #include "gimple-pretty-print.h"
 #include "stor-layout.h"
 #include "intl.h"
-#include "demangle.h"
+#include "streamer-hooks.h"
 
 /* Hash based set of pairs of types.  */
 typedef struct
@@ -936,6 +936,8 @@ warn_odr (tree t1, tree t2, tree st1, tr
   if (!warn || !TYPE_NAME(t1))
     return;
 
+  lto_apply_location_cache ();
+
   if (!warning_at (DECL_SOURCE_LOCATION (TYPE_NAME (t1)), OPT_Wodr,
 		   "type %qT violates one definition rule",
 		   t1))
Index: lto-streamer.h
===================================================================
--- lto-streamer.h	(revision 221582)
+++ lto-streamer.h	(working copy)
@@ -788,7 +795,7 @@ extern struct data_in *lto_data_in_creat
 				    vec<ld_plugin_symbol_resolution_t> );
 extern void lto_data_in_delete (struct data_in *);
 extern void lto_input_data_block (struct lto_input_block *, void *, size_t);
-location_t lto_input_location (struct bitpack_d *, struct data_in *);
+void lto_input_location (location_t *, struct bitpack_d *, struct data_in *);
 tree lto_input_tree_ref (struct lto_input_block *, struct data_in *,
 			 struct function *, enum LTO_tags);
 void lto_tag_check_set (enum LTO_tags, int, ...);
Index: gimple-streamer-in.c
===================================================================
--- gimple-streamer-in.c	(revision 221582)
+++ gimple-streamer-in.c	(working copy)
@@ -87,7 +87,7 @@ input_phi (struct lto_input_block *ib, b
       tree def = stream_read_tree (ib, data_in);
       int src_index = streamer_read_uhwi (ib);
       bitpack_d bp = streamer_read_bitpack (ib);
-      location_t arg_loc = stream_input_location (&bp, data_in);
+      location_t arg_loc = stream_input_location_now (&bp, data_in);
       basic_block sbb = BASIC_BLOCK_FOR_FN (fn, src_index);
 
       edge e = NULL;
@@ -135,7 +135,7 @@ input_gimple_stmt (struct lto_input_bloc
   stmt->subcode = bp_unpack_var_len_unsigned (&bp);
 
   /* Read location information.  */
-  gimple_set_location (stmt, stream_input_location (&bp, data_in));
+  gimple_set_location (stmt, stream_input_location_now (&bp, data_in));
 
   /* Read lexical block reference.  */
   gimple_set_block (stmt, stream_read_tree (ib, data_in));
Index: lto/lto.c
===================================================================
--- lto/lto.c	(revision 221582)
+++ lto/lto.c	(working copy)
@@ -1826,6 +1826,7 @@ unify_scc (struct streamer_tree_cache_d
 						  (uintptr_t)map2[2*i]);
 	    }
 
+	  lto_revert_location_cache ();
 	  /* Free the tree nodes from the read SCC.  */
 	  for (unsigned i = 0; i < len; ++i)
 	    {
@@ -1923,6 +1924,7 @@ lto_read_decls (struct lto_file_decl_dat
 	      && unify_scc (data_in->reader_cache, from,
 			    len, scc_entry_len, scc_hash))
 	    continue;
+	  lto_accept_location_cache ();
 
 	  bool seen_type = false;
 	  for (unsigned i = 0; i < len; ++i)
@@ -1953,7 +1955,10 @@ lto_read_decls (struct lto_file_decl_dat
 	      /* Register TYPE_DECLs with the debuginfo machinery.  */
 	      if (!flag_wpa
 		  && TREE_CODE (t) == TYPE_DECL)
-		debug_hooks->type_decl (t, !DECL_FILE_SCOPE_P (t));
+		{
+		  lto_apply_location_cache ();
+		  debug_hooks->type_decl (t, !DECL_FILE_SCOPE_P (t));
+		}
 	      if (!flag_ltrans)
 		{
 		  /* Register variables and functions with the
@@ -1979,6 +1984,7 @@ lto_read_decls (struct lto_file_decl_dat
 	  gcc_assert (t && data_in->reader_cache->nodes.length () == from);
 	}
     }
+  lto_apply_location_cache ();
 
   /* Read in lto_in_decl_state objects.  */
   data_ptr = (const uint32_t *) ((const char*) data + decl_offset); 
Index: lto-streamer-in.c
===================================================================
--- lto-streamer-in.c	(revision 221582)
+++ lto-streamer-in.c	(working copy)
@@ -172,45 +172,169 @@ canon_file_name (const char *string)
     }
 }
 
+/* The linemap cache holds expanded locations for streamed in trees.
+   This is done to reduce memory usage of libcpp linemap that strongly preffers
+   locations to be inserted in the soruce order.  */
 
-/* Read a location bitpack from input block IB.  */
+struct cached_location
+{
+  const char *file;
+  location_t *loc;
+  int line, col;
+};
+
+/* The location cache.  */
+
+static vec<cached_location, va_heap> loc_cache;
+
+/* Accepted entries are ones used by trees that are known to be not unified
+   by tree merging.  */
+
+static int accepted_length;
+
+/* Bookkeeping to remember state in between calls to lto_apply_location_cache
+   When streaming gimple, the location cache is not used and thus
+   lto_apply_location_cache happens per location basis.  It is then
+   useful to avoid redundant calls of linemap API.  */
 
-location_t
-lto_input_location (struct bitpack_d *bp, struct data_in *data_in)
+static const char *current_file;
+static int current_line;
+static int current_col;
+static location_t current_loc;
+
+/* Sort locations in source order. Start with file from last application.  */
+
+static int
+cmp_loc (const void *pa, const void *pb)
 {
-  static const char *current_file;
-  static int current_line;
-  static int current_col;
+  const cached_location *a = ((const cached_location *)pa);
+  const cached_location *b = ((const cached_location *)pb);
+
+  if (a->file == current_file && b->file != current_file)
+    return -1;
+  if (a->file != current_file && b->file == current_file)
+    return 1;
+  if (a->file == current_file && b->file == current_file)
+    {
+      if (a->line == current_line && b->line != current_line)
+	return -1;
+      if (a->line != current_line && b->line == current_line)
+	return 1;
+    }
+  if (a->file != b->file)
+    return strcmp (a->file, b->file);
+  if (a->line != b->line)
+    return a->line - b->line;
+  return a->col - b->col;
+}
+
+/* Apply all changes in location cache.  Add locations into linemap and patch
+   trees.  */
+
+bool
+lto_apply_location_cache ()
+{
+  if (!loc_cache.length ())
+    return false;
+  if (loc_cache.length () > 1)
+    loc_cache.qsort (cmp_loc);
+
+  for (unsigned int i = 0; i < loc_cache.length (); i++)
+    {
+      struct cached_location loc = loc_cache[i];
+
+      if (current_file != loc.file)
+	linemap_add (line_table, current_file ? LC_RENAME : LC_ENTER,
+		     false, loc.file, loc.line);
+      else if (current_line != loc.line)
+	{
+	  int max = loc.col;
+
+	  for (unsigned int j = i + 1; j < loc_cache.length (); j++)
+	    if (loc.file != loc_cache[j].file
+		|| loc.line != loc_cache[j].line)
+	      break;
+	    else if (max < loc_cache[j].col)
+	      max = loc_cache[j].col;
+	  linemap_line_start (line_table, loc.line, max + 1);
+	}
+      gcc_assert (*loc.loc == BUILTINS_LOCATION + 1);
+      if (current_file == loc.file && current_line == loc.line
+	  && current_col == loc.col)
+	*loc.loc = current_loc;
+      else
+        current_loc = *loc.loc = linemap_position_for_column (line_table,
+							      loc.col);
+      current_line = loc.line;
+      current_file = loc.file;
+      current_col = loc.col;
+    }
+  loc_cache.truncate (0);
+  accepted_length = 0;
+  return true;
+}
+
+/* Tree merging did not suceed; mark all changes in the cache as accepted.  */
+
+void
+lto_accept_location_cache ()
+{
+  accepted_length = loc_cache.length ();
+}
+
+/* Tree merging did suceed; throw away recent changes.  */
+
+void
+lto_revert_location_cache ()
+{
+  loc_cache.truncate (accepted_length);
+}
+
+/* Read a location bitpack from input block IB and either update *LOC directly
+   or add it to the location cache.
+   It is neccesary to call lto_apply_location_cache to get *LOC updated.  */
+
+void
+lto_input_location (location_t *loc, struct bitpack_d *bp,
+		    struct data_in *data_in)
+{
+  static const char *stream_file;
+  static int stream_line;
+  static int stream_col;
   bool file_change, line_change, column_change;
-  bool prev_file = current_file != NULL;
 
   if (bp_unpack_value (bp, 1))
-    return UNKNOWN_LOCATION;
+    {
+      *loc = UNKNOWN_LOCATION;
+      return;
+    }
+  *loc = BUILTINS_LOCATION + 1;
 
   file_change = bp_unpack_value (bp, 1);
   line_change = bp_unpack_value (bp, 1);
   column_change = bp_unpack_value (bp, 1);
 
   if (file_change)
-    current_file = canon_file_name (bp_unpack_string (data_in, bp));
+    stream_file = canon_file_name (bp_unpack_string (data_in, bp));
 
   if (line_change)
-    current_line = bp_unpack_var_len_unsigned (bp);
+    stream_line = bp_unpack_var_len_unsigned (bp);
 
   if (column_change)
-    current_col = bp_unpack_var_len_unsigned (bp);
+    stream_col = bp_unpack_var_len_unsigned (bp);
 
-  if (file_change)
+  /* This optimization saves location cache operations druing gimple
+     streaming.  */
+     
+  if (current_file == stream_file && current_line == stream_line
+      && current_col == stream_col)
     {
-      if (prev_file)
-	linemap_add (line_table, LC_LEAVE, false, NULL, 0);
-
-      linemap_add (line_table, LC_ENTER, false, current_file, current_line);
+      *loc = current_loc;
+      return;
     }
-  else if (line_change)
-    linemap_line_start (line_table, current_line, current_col);
 
-  return linemap_position_for_column (line_table, current_col);
+  struct cached_location entry = {stream_file, loc, stream_line, stream_col};
+  loc_cache.safe_push (entry);
 }
 
 
@@ -390,7 +514,7 @@ input_eh_region (struct lto_input_block
 	  r->u.must_not_throw.failure_decl = stream_read_tree (ib, data_in);
 	  bitpack_d bp = streamer_read_bitpack (ib);
 	  r->u.must_not_throw.failure_loc
-	   = stream_input_location (&bp, data_in);
+	   = stream_input_location_now (&bp, data_in);
 	}
 	break;
 
@@ -927,8 +1051,8 @@ input_struct_function_base (struct funct
   fn->last_clique = bp_unpack_value (&bp, sizeof (short) * 8);
 
   /* Input the function start and end loci.  */
-  fn->function_start_locus = stream_input_location (&bp, data_in);
-  fn->function_end_locus = stream_input_location (&bp, data_in);
+  fn->function_start_locus = stream_input_location_now (&bp, data_in);
+  fn->function_end_locus = stream_input_location_now (&bp, data_in);
 }
 
 
@@ -1126,6 +1250,7 @@ lto_read_body_or_constructor (struct lto
 	}
       else
         input_constructor (fn_decl, data_in, &ib_main);
+      lto_apply_location_cache ();
       /* And fixup types we streamed locally.  */
 	{
 	  struct streamer_tree_cache_d *cache = data_in->reader_cache;
Index: tree-streamer-in.c
===================================================================
--- tree-streamer-in.c	(revision 221582)
+++ tree-streamer-in.c	(working copy)
@@ -411,7 +411,7 @@ unpack_ts_block_value_fields (struct dat
 {
   BLOCK_ABSTRACT (expr) = (unsigned) bp_unpack_value (bp, 1);
   /* BLOCK_NUMBER is recomputed.  */
-  BLOCK_SOURCE_LOCATION (expr) = stream_input_location (bp, data_in);
+  stream_input_location (&BLOCK_SOURCE_LOCATION (expr), bp, data_in);
 }
 
 /* Unpack all the non-pointer fields of the TS_TRANSLATION_UNIT_DECL
@@ -433,7 +433,7 @@ static void
 unpack_ts_omp_clause_value_fields (struct data_in *data_in,
 				   struct bitpack_d *bp, tree expr)
 {
-  OMP_CLAUSE_LOCATION (expr) = stream_input_location (bp, data_in);
+  stream_input_location (&OMP_CLAUSE_LOCATION (expr), bp, data_in);
   switch (OMP_CLAUSE_CODE (expr))
     {
     case OMP_CLAUSE_DEFAULT:
@@ -503,7 +503,7 @@ streamer_read_tree_bitfields (struct lto
     unpack_ts_fixed_cst_value_fields (&bp, expr);
 
   if (CODE_CONTAINS_STRUCT (code, TS_DECL_MINIMAL))
-    DECL_SOURCE_LOCATION (expr) = stream_input_location (&bp, data_in);
+    stream_input_location (&DECL_SOURCE_LOCATION (expr), &bp, data_in);
 
   if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON))
     unpack_ts_decl_common_value_fields (&bp, expr);
@@ -522,7 +522,7 @@ streamer_read_tree_bitfields (struct lto
 
   if (CODE_CONTAINS_STRUCT (code, TS_EXP))
     {
-      SET_EXPR_LOCATION (expr, stream_input_location (&bp, data_in));
+      stream_input_location (&EXPR_CHECK (expr)->exp.locus, &bp, data_in);
       if (code == MEM_REF
 	  || code == TARGET_MEM_REF)
 	{
@@ -905,11 +905,20 @@ lto_input_ts_exp_tree_pointers (struct l
 			        struct data_in *data_in, tree expr)
 {
   int i;
+  tree block;
 
   for (i = 0; i < TREE_OPERAND_LENGTH (expr); i++)
     TREE_OPERAND (expr, i) = stream_read_tree (ib, data_in);
 
-  TREE_SET_BLOCK (expr, stream_read_tree (ib, data_in));
+  block = stream_read_tree (ib, data_in);
+
+  /* TODO: Block is stored in the locus information.  It may make more sense to
+     to make it go via the location cache.  */
+  if (block)
+    {
+      lto_apply_location_cache ();
+      TREE_SET_BLOCK (expr, block);
+    }
 }
 
 



More information about the Gcc-patches mailing list