This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PATCH RFA: libcpp speedup patch: avoid opening nonexistent files


This patch speeds up the preprocessor in two ways.

First, if we fail to open a file, we cache that fact.  This helps us
to avoid making a system call to open a file which we already know
does not exist.  This cases arises when one header file includes
another header file using double quotes, so we must look in that
directory first.  The other caches will fail to notice this, causing a
duplicate open of the same path.

Second, when we find a file, we add it to the cache of header files
entries with the bracket include dir and the quote include dir, when
possible.  The existing code checks the cache when it reaches those
directories, but it only coincidentally adds entries to the cache with
those directories.  This is another speedup when one header file
includes another header file using double quotes: after checking the
header file's directory, we can then use the cache to find the file.

In conjunction the patch to c-common.c which I already sent, and with
another patch to libcpp/lex.c which I am about to send, this gave me
an 18% reduction in time required to run the preprocessor on a large
C++ file with a lot of #include statements.

Tested with a C/C++ bootstrap and testsuite run on i686-pc-linux-gnu.

OK for mainline?

Ian


2006-12-28  Ian Lance Taylor  <iant@google.com>

	* internal.h (struct cpp_reader): Add new fields:
	nonexistent_file_hash and nonexistent_file_ob.
	* files.c: Include "obstack.h".
	(find_file_in_dir): Before trying to open the file, look up the
	path name in the hash table of nonexistent files.  After failing
	to open the file, add the path name to the hash table.
	(_cpp_find_file): Cache the results of looking up the file name
	starting with the quote and bracket chain heads, if we can.
	(nonexistent_file_hash_eq): New static function.
	(_cpp_init_files): Initialize pfile->nonexistent_file_hash and
	pfile->nonexistent_file_ob.
	(_cpp_cleanup_files): Free pfile->nonexistent_file_hash and
	pfile->nonexistent_file_ob.


Index: libcpp/files.c
===================================================================
--- libcpp/files.c	(revision 120236)
+++ libcpp/files.c	(working copy)
@@ -26,6 +26,7 @@ Foundation, 51 Franklin Street, Fifth Fl
 #include "cpplib.h"
 #include "internal.h"
 #include "mkdeps.h"
+#include "obstack.h"
 #include "hashtab.h"
 #include "md5.h"
 #include <dirent.h>
@@ -322,6 +323,16 @@ find_file_in_dir (cpp_reader *pfile, _cp
 
   if (path)
     {
+      hashval_t hv = htab_hash_string (path);
+      char *copy;
+      void **pp;
+
+      if (htab_find_with_hash (pfile->nonexistent_file_hash, path, hv) != NULL)
+	{
+	  file->err_no = ENOENT;
+	  return false;
+	}
+
       file->path = path;
       if (pch_open_file (pfile, file, invalid_pch))
 	return true;
@@ -335,7 +346,16 @@ find_file_in_dir (cpp_reader *pfile, _cp
 	  return true;
 	}
 
+      /* We copy the path name onto an obstack partly so that we don't
+	 leak the memory, but mostly so that we don't fragment the
+	 heap.  */
+      copy = obstack_copy0 (&pfile->nonexistent_file_ob, path,
+			    strlen (path));
       free (path);
+      pp = htab_find_slot_with_hash (pfile->nonexistent_file_hash,
+				     copy, hv, INSERT);
+      *pp = copy;
+
       file->path = file->name;
     }
   else
@@ -396,6 +416,9 @@ _cpp_find_file (cpp_reader *pfile, const
   struct file_hash_entry *entry, **hash_slot;
   _cpp_file *file;
   bool invalid_pch = false;
+  bool saw_bracket_include = false;
+  bool saw_quote_include = false;
+  struct cpp_dir *found_in_cache = NULL;
 
   /* Ensure we get no confusion between cached files and directories.  */
   if (start_dir == NULL)
@@ -448,13 +471,19 @@ _cpp_find_file (cpp_reader *pfile, const
       /* Only check the cache for the starting location (done above)
 	 and the quote and bracket chain heads because there are no
 	 other possible starting points for searches.  */
-      if (file->dir != pfile->bracket_include
-	  && file->dir != pfile->quote_include)
+      if (file->dir == pfile->bracket_include)
+	saw_bracket_include = true;
+      else if (file->dir == pfile->quote_include)
+	saw_quote_include = true;
+      else
 	continue;
 
       entry = search_cache (*hash_slot, file->dir);
       if (entry)
-	break;
+	{
+	  found_in_cache = file->dir;
+	  break;
+	}
     }
 
   if (entry)
@@ -478,6 +507,29 @@ _cpp_find_file (cpp_reader *pfile, const
   entry->u.file = file;
   *hash_slot = entry;
 
+  /* If we passed the quote or bracket chain heads, cache them also.
+     This speeds up processing if there are lots of -I options.  */
+  if (saw_bracket_include
+      && pfile->bracket_include != start_dir
+      && found_in_cache != pfile->bracket_include)
+    {
+      entry = new_file_hash_entry (pfile);
+      entry->next = *hash_slot;
+      entry->start_dir = pfile->bracket_include;
+      entry->u.file = file;
+      *hash_slot = entry;
+    }
+  if (saw_quote_include
+      && pfile->quote_include != start_dir
+      && found_in_cache != pfile->quote_include)
+    {
+      entry = new_file_hash_entry (pfile);
+      entry->next = *hash_slot;
+      entry->start_dir = pfile->quote_include;
+      entry->u.file = file;
+      *hash_slot = entry;
+    }
+
   return file;
 }
 
@@ -997,6 +1049,14 @@ file_hash_eq (const void *p, const void 
   return strcmp (hname, fname) == 0;
 }
 
+/* Compare entries in the nonexistent file hash table.  These are just
+   strings.  */
+static int
+nonexistent_file_hash_eq (const void *p, const void *q)
+{
+  return strcmp (p, q) == 0;
+}
+
 /* Initialize everything in this source file.  */
 void
 _cpp_init_files (cpp_reader *pfile)
@@ -1006,6 +1066,12 @@ _cpp_init_files (cpp_reader *pfile)
   pfile->dir_hash = htab_create_alloc (127, file_hash_hash, file_hash_eq,
 					NULL, xcalloc, free);
   allocate_file_hash_entries (pfile);
+  pfile->nonexistent_file_hash = htab_create_alloc (127, htab_hash_string,
+						    nonexistent_file_hash_eq,
+						    NULL, xcalloc, free);
+  _obstack_begin (&pfile->nonexistent_file_ob, 0, 0,
+		  (void *(*) (long)) xmalloc,
+		  (void (*) (void *)) free);
 }
 
 /* Finalize everything in this source file.  */
@@ -1014,6 +1080,8 @@ _cpp_cleanup_files (cpp_reader *pfile)
 {
   htab_delete (pfile->file_hash);
   htab_delete (pfile->dir_hash);
+  htab_delete (pfile->nonexistent_file_hash);
+  obstack_free (&pfile->nonexistent_file_ob, 0);
 }
 
 /* Enter a file name in the hash for the sake of cpp_included.  */
Index: libcpp/internal.h
===================================================================
--- libcpp/internal.h	(revision 120236)
+++ libcpp/internal.h	(working copy)
@@ -355,6 +355,10 @@ struct cpp_reader
   struct file_hash_entry *file_hash_entries;
   unsigned int file_hash_entries_allocated, file_hash_entries_used;
 
+  /* Negative path lookup hash table.  */
+  struct htab *nonexistent_file_hash;
+  struct obstack nonexistent_file_ob;
+
   /* Nonzero means don't look for #include "foo" the source-file
      directory.  */
   bool quote_ignores_source_dir;


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]