This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PATCH cppfiles file_hash brokenness


cppfiles.c manages a hash table that maps file and directory
names to lists of struct file_hash_entry.  It uses an
assymmetrical 'equality' function to compare keys (strings)
with entries (struct file_hash_entry*).  This is allowed by
the specification in hashtab.h, but in that case "htab_find
and htab_find_slot cannot be used; instead the variants that
accept a hash value must be used".  The code does not do this,
instead counting of htab_find and htab_find_slot to calculate
the hash value from the key string.

This works fine until the hashtable must be resized, which
calls htab_expand.  This calls the registered hash function
to recalculate the new hash slot.  But that function is
htab_hash_string, which expects and was originally called with
a filename argument.  However, htab_expand calls with the
(struct file_hash_entry*).  The result is nonsense, and
the hash table gets all jumbled with old entries that we can
no longer find because they get hashed to the wrong slots.  This
usually doesn't matter much, because the file_hash table is only
a cache.  However, we lose some of the performance benefit of the
cache.  This is major for the compile-server, which uses the file
cache to contain old and re-usable include file fragments.

I checked this into mainline as well as the compile server branch.
The patch causes no regressions on mainline on Fedora (as usual
ignoring pch randomness).

Worth considering is a more extensive change where we hash directly
from (filename, start_dir) to a (struct file_hash_entry*).  Then we
wouldn't need the search_cache function, nor would we need the
'next' field of struct file_hash_entry.  A minor downside is that
cpp_include would be harder to implement efficiently, but it doesn't
appear to be used anywhere and it is poorly specified, so it can
probably just be removed.
--
	--Per Bothner
per@bothner.com   http://per.bothner.com/




2003-12-05  Per Bothner  <pbothner@apple.com>

	* cppfiles.c (file_hash_hash):  New static function.
	(hash_string_eq):  Renamed static function to file_hash_eq.
	(_cpp_init_files):  Create file_hash table with above callbacks.
	(cpp_included):  Must use htab_find_with_hash insead of htab_find.
	(_cpp_find_find, make_cpp_dir):  Must use htab_find_slot_with_hash.

Index: cppfiles.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cppfiles.c,v
retrieving revision 1.195
diff -u -p -r1.195 cppfiles.c
--- cppfiles.c	14 Nov 2003 19:00:04 -0000	1.195
+++ cppfiles.c	5 Dec 2003 22:43:17 -0000
@@ -172,7 +172,8 @@ static cpp_dir *make_cpp_dir (cpp_reader
 static void allocate_file_hash_entries (cpp_reader *pfile);
 static struct file_hash_entry *new_file_hash_entry (cpp_reader *pfile);
 static int report_missing_guard (void **slot, void *b);
-static int hash_string_eq (const void *p, const void *q);
+static hashval_t file_hash_hash (const void *p);
+static int file_hash_eq (const void *p, const void *q);
 static char *read_filename_string (int ch, FILE *f);
 static void read_name_map (cpp_dir *dir);
 static char *remap_filename (cpp_reader *pfile, _cpp_file *file);
@@ -367,7 +368,9 @@ _cpp_find_file (cpp_reader *pfile, const
     cpp_error (pfile, CPP_DL_ICE, "NULL directory in find_file");
 
   hash_slot = (struct file_hash_entry **)
-    htab_find_slot (pfile->file_hash, fname, INSERT);
+    htab_find_slot_with_hash (pfile->file_hash, fname,
+			      htab_hash_string (fname),
+			      INSERT);
 
   /* First check the cache before we resort to memory allocation.  */
   entry = search_cache (*hash_slot, start_dir);
@@ -797,7 +800,9 @@ make_cpp_dir (cpp_reader *pfile, const c
   cpp_dir *dir;
 
   hash_slot = (struct file_hash_entry **)
-    htab_find_slot (pfile->file_hash, dir_name, INSERT);
+    htab_find_slot_with_hash (pfile->file_hash, dir_name,
+			      htab_hash_string (dir_name),
+			      INSERT);
 
   /* Have we already hashed this directory?  */
   for (entry = *hash_slot; entry; entry = entry->next)
@@ -848,7 +853,8 @@ cpp_included (cpp_reader *pfile, const c
 {
   struct file_hash_entry *entry;
 
-  entry = htab_find (pfile->file_hash, fname);
+  entry = htab_find_with_hash (pfile->file_hash, fname,
+			       htab_hash_string (fname));
 
   while (entry && (entry->start_dir == NULL || entry->u.file->err_no))
     entry = entry->next;
@@ -856,9 +862,24 @@ cpp_included (cpp_reader *pfile, const c
   return entry != NULL;
 }
 
+/* Calculate the hash value of a file hash entry P. */
+
+static hashval_t
+file_hash_hash (const void *p)
+{
+  struct file_hash_entry *entry = (struct file_hash_entry *) p;
+  const char *hname;
+  if (entry->start_dir)
+    hname = entry->u.file->name;
+  else
+    hname = entry->u.dir->name;
+
+  return htab_hash_string (hname);
+}
+
 /* Compare a string Q against a file hash entry P.  */
 static int
-hash_string_eq (const void *p, const void *q)
+file_hash_eq (const void *p, const void *q)
 {
   struct file_hash_entry *entry = (struct file_hash_entry *) p;
   const char *fname = (const char *) q;
@@ -876,7 +897,7 @@ hash_string_eq (const void *p, const voi
 void
 _cpp_init_files (cpp_reader *pfile)
 {
-  pfile->file_hash = htab_create_alloc (127, htab_hash_string, hash_string_eq,
+  pfile->file_hash = htab_create_alloc (127, file_hash_hash, file_hash_eq,
 					NULL, xcalloc, free);
   allocate_file_hash_entries (pfile);
 }

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]