PATCH RFA: libcpp speedup patch: avoid opening nonexistent files
Ian Lance Taylor
iant@google.com
Thu Dec 28 23:02:00 GMT 2006
This patch speeds up the preprocessor in two ways.
First, if we fail to open a file, we cache that fact. This helps us
to avoid making a system call to open a file which we already know
does not exist. This cases arises when one header file includes
another header file using double quotes, so we must look in that
directory first. The other caches will fail to notice this, causing a
duplicate open of the same path.
Second, when we find a file, we add it to the cache of header files
entries with the bracket include dir and the quote include dir, when
possible. The existing code checks the cache when it reaches those
directories, but it only coincidentally adds entries to the cache with
those directories. This is another speedup when one header file
includes another header file using double quotes: after checking the
header file's directory, we can then use the cache to find the file.
In conjunction the patch to c-common.c which I already sent, and with
another patch to libcpp/lex.c which I am about to send, this gave me
an 18% reduction in time required to run the preprocessor on a large
C++ file with a lot of #include statements.
Tested with a C/C++ bootstrap and testsuite run on i686-pc-linux-gnu.
OK for mainline?
Ian
2006-12-28 Ian Lance Taylor <iant@google.com>
* internal.h (struct cpp_reader): Add new fields:
nonexistent_file_hash and nonexistent_file_ob.
* files.c: Include "obstack.h".
(find_file_in_dir): Before trying to open the file, look up the
path name in the hash table of nonexistent files. After failing
to open the file, add the path name to the hash table.
(_cpp_find_file): Cache the results of looking up the file name
starting with the quote and bracket chain heads, if we can.
(nonexistent_file_hash_eq): New static function.
(_cpp_init_files): Initialize pfile->nonexistent_file_hash and
pfile->nonexistent_file_ob.
(_cpp_cleanup_files): Free pfile->nonexistent_file_hash and
pfile->nonexistent_file_ob.
Index: libcpp/files.c
===================================================================
--- libcpp/files.c (revision 120236)
+++ libcpp/files.c (working copy)
@@ -26,6 +26,7 @@ Foundation, 51 Franklin Street, Fifth Fl
#include "cpplib.h"
#include "internal.h"
#include "mkdeps.h"
+#include "obstack.h"
#include "hashtab.h"
#include "md5.h"
#include <dirent.h>
@@ -322,6 +323,16 @@ find_file_in_dir (cpp_reader *pfile, _cp
if (path)
{
+ hashval_t hv = htab_hash_string (path);
+ char *copy;
+ void **pp;
+
+ if (htab_find_with_hash (pfile->nonexistent_file_hash, path, hv) != NULL)
+ {
+ file->err_no = ENOENT;
+ return false;
+ }
+
file->path = path;
if (pch_open_file (pfile, file, invalid_pch))
return true;
@@ -335,7 +346,16 @@ find_file_in_dir (cpp_reader *pfile, _cp
return true;
}
+ /* We copy the path name onto an obstack partly so that we don't
+ leak the memory, but mostly so that we don't fragment the
+ heap. */
+ copy = obstack_copy0 (&pfile->nonexistent_file_ob, path,
+ strlen (path));
free (path);
+ pp = htab_find_slot_with_hash (pfile->nonexistent_file_hash,
+ copy, hv, INSERT);
+ *pp = copy;
+
file->path = file->name;
}
else
@@ -396,6 +416,9 @@ _cpp_find_file (cpp_reader *pfile, const
struct file_hash_entry *entry, **hash_slot;
_cpp_file *file;
bool invalid_pch = false;
+ bool saw_bracket_include = false;
+ bool saw_quote_include = false;
+ struct cpp_dir *found_in_cache = NULL;
/* Ensure we get no confusion between cached files and directories. */
if (start_dir == NULL)
@@ -448,13 +471,19 @@ _cpp_find_file (cpp_reader *pfile, const
/* Only check the cache for the starting location (done above)
and the quote and bracket chain heads because there are no
other possible starting points for searches. */
- if (file->dir != pfile->bracket_include
- && file->dir != pfile->quote_include)
+ if (file->dir == pfile->bracket_include)
+ saw_bracket_include = true;
+ else if (file->dir == pfile->quote_include)
+ saw_quote_include = true;
+ else
continue;
entry = search_cache (*hash_slot, file->dir);
if (entry)
- break;
+ {
+ found_in_cache = file->dir;
+ break;
+ }
}
if (entry)
@@ -478,6 +507,29 @@ _cpp_find_file (cpp_reader *pfile, const
entry->u.file = file;
*hash_slot = entry;
+ /* If we passed the quote or bracket chain heads, cache them also.
+ This speeds up processing if there are lots of -I options. */
+ if (saw_bracket_include
+ && pfile->bracket_include != start_dir
+ && found_in_cache != pfile->bracket_include)
+ {
+ entry = new_file_hash_entry (pfile);
+ entry->next = *hash_slot;
+ entry->start_dir = pfile->bracket_include;
+ entry->u.file = file;
+ *hash_slot = entry;
+ }
+ if (saw_quote_include
+ && pfile->quote_include != start_dir
+ && found_in_cache != pfile->quote_include)
+ {
+ entry = new_file_hash_entry (pfile);
+ entry->next = *hash_slot;
+ entry->start_dir = pfile->quote_include;
+ entry->u.file = file;
+ *hash_slot = entry;
+ }
+
return file;
}
@@ -997,6 +1049,14 @@ file_hash_eq (const void *p, const void
return strcmp (hname, fname) == 0;
}
+/* Compare entries in the nonexistent file hash table. These are just
+ strings. */
+static int
+nonexistent_file_hash_eq (const void *p, const void *q)
+{
+ return strcmp (p, q) == 0;
+}
+
/* Initialize everything in this source file. */
void
_cpp_init_files (cpp_reader *pfile)
@@ -1006,6 +1066,12 @@ _cpp_init_files (cpp_reader *pfile)
pfile->dir_hash = htab_create_alloc (127, file_hash_hash, file_hash_eq,
NULL, xcalloc, free);
allocate_file_hash_entries (pfile);
+ pfile->nonexistent_file_hash = htab_create_alloc (127, htab_hash_string,
+ nonexistent_file_hash_eq,
+ NULL, xcalloc, free);
+ _obstack_begin (&pfile->nonexistent_file_ob, 0, 0,
+ (void *(*) (long)) xmalloc,
+ (void (*) (void *)) free);
}
/* Finalize everything in this source file. */
@@ -1014,6 +1080,8 @@ _cpp_cleanup_files (cpp_reader *pfile)
{
htab_delete (pfile->file_hash);
htab_delete (pfile->dir_hash);
+ htab_delete (pfile->nonexistent_file_hash);
+ obstack_free (&pfile->nonexistent_file_ob, 0);
}
/* Enter a file name in the hash for the sake of cpp_included. */
Index: libcpp/internal.h
===================================================================
--- libcpp/internal.h (revision 120236)
+++ libcpp/internal.h (working copy)
@@ -355,6 +355,10 @@ struct cpp_reader
struct file_hash_entry *file_hash_entries;
unsigned int file_hash_entries_allocated, file_hash_entries_used;
+ /* Negative path lookup hash table. */
+ struct htab *nonexistent_file_hash;
+ struct obstack nonexistent_file_ob;
+
/* Nonzero means don't look for #include "foo" the source-file
directory. */
bool quote_ignores_source_dir;
More information about the Gcc-patches
mailing list