[incremental] Patch: FYI: collect unused hunks

Tom Tromey tromey@redhat.com
Thu Jan 24 05:44:00 GMT 2008


I'm checking this in on the incremental-compiler branch.

I think the comments explain this adequately, but in summary the idea
is that we want to keep around those hunks which were used in a recent
compilation, and we want to drop those hunks which are no longer
relevant.

Of course, there is no way to guess which hunks may be useful in the
future.  But, without something like this, the server will simply use
more and more memory over time.

This implements a reasonable heuristic: we track compilation requests
according to the requested object file name.  We keep the most recent
hunks resulting in a given .o, and drop the old hunks for that object.
(However, we keep the hunks while recompiling an object file, since
this compilation is very likely to re-use hunks.)

IOW, the cache is designed to model the build of a typical program.

Tom

ChangeLog:
2008-01-23  Tom Tromey  <tromey@redhat.com>

	* c-lang.c (LANG_HOOKS_PARSE_FILE): Redefine.
	* toplev.h (get_asm_object_file_name): Declare.
	* c-tree.h (c_parser_mark_hunk_set): Declare.
	(c_parse_file_wrapper): Declare.
	* c-parser.c (global_hunk_map): Use 'if_marked'.
	(mark_one_hunk_binding): New function.
	(c_parser_mark_hunk_set): Likewise.
	(finish_current_hunk): Update used_hunks.
	(struct c_compile_job): New struct.
	(hash_c_compile_job): New function.
	(eq_c_compile_job): Likewise.
	(all_compile_jobs): New global.
	(old_job, working_job): Likewise.
	(c_parse_file_wrapper): New function.
	(insert_single_binding): Likewise.
	(copy_used_hunks): Likewise.
	(c_parse_file): Call copy_used_hunks.
	* toplev.c (server_asm_object_file_name): New global.
	(parse_as_command_line): New function.
	(server_callback): Call it.
	(get_asm_object_file_name): New function.

Index: toplev.c
===================================================================
--- toplev.c	(revision 131637)
+++ toplev.c	(working copy)
@@ -399,6 +399,11 @@
    late.  */
 static FILE *server_asm_out_file;
 
+/* On the server we use the name of the resulting object file as a key
+   to hold references to some objects.  This may be NULL if there is
+   no known object file, for instance with -fsyntax-only.  */
+static GTY (()) const char *server_asm_object_file_name;
+
 typedef const char *cchar_p;
 DEF_VEC_P(cchar_p);
 DEF_VEC_ALLOC_P(cchar_p,gc);
@@ -485,6 +490,12 @@
     }
 }
 
+const char *
+get_asm_object_file_name (void)
+{
+  return server_asm_object_file_name;
+}
+
 /* Called when the start of a function definition is parsed,
    this function prints on stderr the name of the function.  */
 void
@@ -2360,6 +2371,70 @@
     *n_args = i - 1;
 }
 
+/* Parse the 'as' command line, and compute the resulting object file
+   name.  */
+static void
+parse_as_command_line (char *dir, char **as_argv)
+{
+  char **p;
+  char *work;
+  char *out, *in;
+
+  server_asm_object_file_name = NULL;
+  if (! as_argv)
+    return;
+
+  for (p = as_argv; *p; ++p)
+    {
+      if (! strcmp (*p, "-o"))
+	{
+	  ++p;
+	  break;
+	}
+    }
+  if (! *p)
+    return;
+
+  if (IS_ABSOLUTE_PATH (*p))
+    work = xstrdup (*p);
+  else
+    {
+      char ds[2];
+      ds[0] = DIR_SEPARATOR;
+      ds[1] = '\0';
+      work = concat (dir, ds, *p, NULL);
+    }
+
+  /* Normalize by nuking '/./' and '//' components.  Ordinarily gcc
+     will be invoked the same way each time, but this provides a
+     little extra leak protection.  */
+  in = out = work;
+  while (*in)
+    {
+      if (IS_DIR_SEPARATOR (*in))
+	{
+	  char *save;
+	  do
+	    {
+	      save = in;
+	      /* Remove './'.  */
+	      while (in[1] == '.' && IS_DIR_SEPARATOR (in[2]))
+		in += 2;
+	      /* Remove extra '/'s.  */
+	      while (IS_DIR_SEPARATOR (in[1]))
+		++in;
+	    }
+	  while (save != in);
+	}
+      *out++ = *in++;
+    }
+  /* Copy the \0.  */
+  *out = *in;
+
+  server_asm_object_file_name = ggc_strdup (work);
+  free (work);
+}
+
 bool
 server_callback (int fd, char *dir, char **cc1_argv, char **as_argv)
 {
@@ -2388,6 +2463,8 @@
   clear_src_pwd ();
   chdir (dir);
 
+  parse_as_command_line (dir, as_argv);
+
   init_local_tick ();
   do_compile ();
 
Index: toplev.h
===================================================================
--- toplev.h	(revision 130053)
+++ toplev.h	(working copy)
@@ -1,5 +1,5 @@
 /* toplev.h - Various declarations for functions found in toplev.c
-   Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2007
+   Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2007, 2008
    Free Software Foundation, Inc.
 
 This file is part of GCC.
@@ -190,6 +190,11 @@
 extern const char *get_src_pwd	       (void);
 extern bool set_src_pwd		       (const char *);
 
+/* Return the object file name as used by 'as'.  Will return NULL if
+   not running as a server, or if there is no known object file name.
+   If a value is returned, it has been allocated by the GC.  */
+extern const char *get_asm_object_file_name (void);
+
 /* Functions used to manipulate the random seed.  */
 
 extern const char *get_random_seed (bool);
Index: c-tree.h
===================================================================
--- c-tree.h	(revision 130792)
+++ c-tree.h	(working copy)
@@ -1,6 +1,6 @@
 /* Definitions for C parsing and type checking.
    Copyright (C) 1987, 1993, 1994, 1995, 1997, 1998,
-   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -453,7 +453,11 @@
 extern bool object_in_current_hunk_p (tree);
 extern tree c_parser_find_binding (tree);
 extern void c_parser_lookup_callback (tree, tree, bool);
+extern void c_parse_file_wrapper (int);
 
+/* Only for use by the GC.  */
+extern int c_parser_mark_hunk_set (const void *);
+
 /* True if this decl or type has been smashed.  */
 #define C_SMASHED_P(T) TREE_LANG_FLAG_5 (T)
 
Index: c-parser.c
===================================================================
--- c-parser.c	(revision 131644)
+++ c-parser.c	(working copy)
@@ -793,8 +793,37 @@
 
 /* Map a hunk signature to its bindings.  This is a true global,
    shared by all parsers.  */
-static GTY ((param_is (struct hunk_set))) htab_t global_hunk_map;
+static GTY ((if_marked ("c_parser_mark_hunk_set"), param_is (struct hunk_set)))
+     htab_t global_hunk_map;
 
+/* "Mark" a hunk_binding as found in a hunk_set.  This is the leaf of
+   the global_hunk_map marking functions.  If the hunk_binding is
+   already marked, we keep it; otherwise, we remove it.  */
+static int
+mark_one_hunk_binding (void **slot, void *r)
+{
+  int *found_any = (int *) r;
+  if (ggc_marked_p (*slot))
+    *found_any = 1;
+  else
+    *slot = NULL;
+  return 1;
+}
+
+/* Mark an entry in the global_hunk_map.  Non-static because GGC
+   machinery needs access to this.  A hunk set in the global hunk map
+   does not "own" the hunk_bindings to which it refers.  Instead, the
+   hunk_binding is owned by the jobs which used it.  */
+int
+c_parser_mark_hunk_set (const void *p)
+{
+  /* Have to cast away const.  */
+  struct hunk_set *hs = (struct hunk_set *) p;
+  int found_any = 0;
+  htab_traverse_noresize (hs->bindings, mark_one_hunk_binding, &found_any);
+  return found_any;
+}
+
 /* This is called when making a file-scope binding.  It registers the
    new binding in the current hunk binding map.  */
 void
@@ -995,6 +1024,13 @@
 						  INSERT);
   gcc_assert (!*slot);
   *slot = parser->current_hunk_binding;
+
+  /* Also note this hunk in used_hunks, for long-term storage.  */
+  slot = (struct hunk_binding **) htab_find_slot (parser->used_hunks,
+						  parser->current_hunk_binding,
+						  INSERT);
+  gcc_assert (!*slot);
+  *slot = parser->current_hunk_binding;
 }
 
 /* Initialize the global hunk binding map.  */
@@ -8952,6 +8988,110 @@
 }
 
 
+
+/* Link an object file to the hunks used when parsing the
+   corresponding source.  We use object file names and not source file
+   names because it is not uncommon to compile a source file multiple
+   times in a project.
+   
+   This data structure is the key to how declarations are retained in
+   the compile server.  When a file is recompiled, we save its old
+   hunks, run the job, install the resulting hunks, and finally remove
+   the old values.  This lets us maximize sharing across compilations
+   while not preserving declarations which are unlikely to be
+   reused.  */
+struct c_compile_job GTY (())
+{
+  /* The object file.  */
+  const char *object_file_name;
+
+  /* All the hunks used by this compilation.  */
+  htab_t GTY ((param_is (struct hunk_binding))) hunks;
+};
+
+/* Hash function for a struct c_compile_job.  */
+static hashval_t
+hash_c_compile_job (const void *cj)
+{
+  const struct c_compile_job *job = (const struct c_compile_job *) cj;
+  return htab_hash_string (job->object_file_name);
+}
+
+/* Equality function for a struct c_compile_job.  */
+static int
+eq_c_compile_job (const void *a, const void *b)
+{
+  const struct c_compile_job *job_a = (const struct c_compile_job *) a;
+  const struct c_compile_job *job_b = (const struct c_compile_job *) b;
+  return ! strcmp (job_a->object_file_name, job_b->object_file_name);
+}
+
+/* All the compile jobs held by the server.  Eventually we want to
+   move this to toplev.c.  */
+static GTY ((param_is (struct c_compile_job))) htab_t all_compile_jobs;
+
+/* The old values of the current compilation.  We keep a handle on
+   this when re-running a compilation because it is very likely that
+   the new job will share hunks with the old one, and we don't want to
+   collect the old hunks too early.  */
+static GTY (()) struct c_compile_job *old_job;
+
+/* The job currently being compiled.  */
+static GTY (()) struct c_compile_job *working_job;
+
+/* This is a wrapper for c_common_parse_file which handles allocation
+   of hunk-related data structures.  This is done as a wrapper rather
+   than being part of c_parse_file so that we can share decls even in
+   --combine mode.  */
+void
+c_parse_file_wrapper (int set_yydebug)
+{
+  void **slot;
+
+  gcc_assert (! working_job);
+
+  if (! all_compile_jobs)
+    all_compile_jobs = htab_create_ggc (20, hash_c_compile_job,
+					eq_c_compile_job, NULL);
+ 
+  working_job = GGC_NEW (struct c_compile_job);
+  working_job->object_file_name = get_asm_object_file_name ();
+  working_job->hunks = htab_create_ggc (20, htab_hash_pointer, htab_eq_pointer,
+					NULL);
+
+  slot = htab_find_slot (all_compile_jobs, working_job, INSERT);
+  /* Save the old job while we work.  */
+  if (*slot)
+    old_job = (struct c_compile_job *) *slot;
+  *slot = working_job;
+
+  c_common_parse_file (set_yydebug);
+  old_job = NULL;
+  working_job = NULL;  
+}
+
+/* Insert a single hunk_binding into the working job's hash set.  */
+static int
+insert_single_binding (void **slot, void * ARG_UNUSED (user_data))
+{
+  struct hunk_binding *binding = (struct hunk_binding *) *slot;
+  slot = htab_find_slot (working_job->hunks, binding, INSERT);
+  *slot = binding;
+  return 1;
+}
+
+/* Update the working job by copying all the items from USED into the
+   job's hunk set.  Note that we do not simply save USED, because with
+   --combine we may run multiple parses, and we want to preserve the
+   union of all the used hunks.  */
+static void
+copy_used_hunks (htab_t used)
+{
+  gcc_assert (working_job);
+  htab_traverse_noresize (used, insert_single_binding, NULL);
+}
+
+
 /* Parse a single source file.  */
 
 void
@@ -8980,6 +9120,10 @@
 
   c_parser_lex_all (the_parser, &token);
   c_parser_translation_unit (the_parser);
+
+  /* Preserve the hunks created or used during this compilation.  */
+  copy_used_hunks (the_parser->used_hunks);
+
   the_parser = NULL;
 }
 
Index: c-lang.c
===================================================================
--- c-lang.c	(revision 130053)
+++ c-lang.c	(working copy)
@@ -1,6 +1,6 @@
 /* Language-specific hook definitions for C front end.
    Copyright (C) 1991, 1995, 1997, 1998,
-   1999, 2000, 2001, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+   1999, 2000, 2001, 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -45,6 +45,8 @@
 #define LANG_HOOKS_INIT c_objc_common_init
 #undef LANG_HOOKS_CLEAR_BINDING_STACK
 #define LANG_HOOKS_CLEAR_BINDING_STACK c_clear_binding_stack
+#undef LANG_HOOKS_PARSE_FILE
+#define LANG_HOOKS_PARSE_FILE c_parse_file_wrapper
 
 /* Each front end provides its own lang hook initializer.  */
 const struct lang_hooks lang_hooks = LANG_HOOKS_INITIALIZER;



More information about the Gcc-patches mailing list