This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

expandable hash tables




This is primarily Vladimir Makarov's work with style/warning fixes from me.

Basically this package provides support for expandable hash tables, ie
hash tables that grow based on occupancy rates.

The original plan was to use expandable hash tables for some long term work
in the scheduler; however, Vlad found that expandable hash tables could also
be used to speed up CSE.

Vlad observed a speedup of around 9% building GCC itself and 25% building
certain nasty tests from the plumhall testsuite on a Sparc Ultra 30.  I've
confirmed similar improvements for the plumhall improvement on a PA8200.

For an x86 there was no noticable improvement building GCC and only a
small improvement (2%) for the nasty plumhall tests.  

Anyway, there's the raw expandable hash table code for libiberty.  A patch
will follow shortly to tie it into CSE.

	* libiberty/hashtab.c: New file.
	* include/hashtab.h: New file.

*** /dev/null	Thu Oct 14 18:08:17 1999
--- libiberty/hashtab.c	Fri Oct 15 00:47:45 1999
***************
*** 0 ****
--- 1,291 ----
+ /* An expandable hash tables datatype.  
+    Copyright (C) 1999 Free Software Foundation, Inc.
+    Contributed by Vladimir Makarov (vmakarov@cygnus.com).
+ 
+ This file is part of the libiberty library.
+ Libiberty is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+ 
+ Libiberty is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Library General Public License for more details.
+ 
+ You should have received a copy of the GNU Library General Public
+ License along with libiberty; see the file COPYING.LIB.  If
+ not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 
330,
+ Boston, MA 02111-1307, USA.  */
+ 
+ /* This package implements basic hash table functionality.  It is possible
+    to search for an entry, create an entry and destroy an entry.
+ 
+    Elements in the table are generic pointers.
+ 
+    The size of the table is not fixed; if the occupancy of the table
+    grows too high the hash table will be expanded.
+ 
+    The abstract data implementation is based on generalized Algorithm D
+    from Knuth's book "The art of computer programming".  Hash table is
+    expanded by creation of new hash table and transferring elements from
+    the old table to the new table. */
+ 
+ #ifdef HAVE_CONFIG_H
+ #include "config.h"
+ #endif
+ 
+ #ifdef HAVE_STDLIB_H
+ #include <stdlib.h>
+ #endif
+ 
+ #include "libiberty.h"
+ #include "hashtab.h"
+ 
+ /* The following variable is used for debugging. Its value is number
+    of all calls of `find_hash_table_entry' for all hash tables. */
+ 
+ static int all_searches = 0;
+ 
+ /* The following variable is used for debugging. Its value is number
+    of collisions fixed for time of work with all hash tables. */
+ 
+ static int all_collisions = 0;
+ 
+ /* The following variable is used for debugging. Its value is number
+    of all table expansions fixed for time of work with all hash
+    tables. */
+ 
+ static int all_expansions = 0;
+ 
+ /* This macro defines reserved value for empty table entry. */
+ 
+ #define EMPTY_ENTRY    NULL
+ 
+ /* This macro defines reserved value for table entry which contained
+    a deleted element. */
+ 
+ #define DELETED_ENTRY  ((void *) 1)
+ 
+ /* The following function returns the nearest prime number which is
+    greater than given source number. */
+ 
+ static unsigned long
+ higher_prime_number (number)
+      unsigned long number;
+ {
+   unsigned long i;
+ 
+   for (number = (number / 2) * 2 + 3;; number += 2)
+     {
+       for (i = 3; i * i <= number; i += 2)
+         if (number % i == 0)
+           break;
+       if (i * i > number)
+         return number;
+     }
+ }
+ 
+ /* This function creates table with length slightly longer than given
+    source length.  Created hash table is initiated as empty (all the
+    hash table entries are EMPTY_ENTRY).  The function returns the
+    created hash table. */
+ 
+ hash_table_t
+ create_hash_table (size, hash_function, eq_function)
+      size_t size;
+      unsigned (*hash_function) PARAMS ((hash_table_entry_t));
+      int (*eq_function) PARAMS ((hash_table_entry_t, hash_table_entry_t));
+ {
+   hash_table_t result;
+ 
+   size = higher_prime_number (size);
+   result = (hash_table_t) xmalloc (sizeof (*result));
+   result->entries
+     = (hash_table_entry_t *) xmalloc (size * sizeof (hash_table_entry_t));
+   result->size = size;
+   result->hash_function = hash_function;
+   result->eq_function = eq_function;
+   result->number_of_elements = 0;
+   result->number_of_deleted_elements = 0;
+   result->searches = 0;
+   result->collisions = 0;
+   memset (result->entries, 0, size * sizeof (hash_table_entry_t));
+   return result;
+ }
+ 
+ /* This function frees all memory allocated for given hash table.
+    Naturally the hash table must already exist. */
+ 
+ void
+ delete_hash_table (htab)
+      hash_table_t htab;
+ {
+   free (htab->entries);
+   free (htab);
+ }
+ 
+ /* This function clears all entries in the given hash table.  */
+ 
+ void
+ empty_hash_table (htab)
+      hash_table_t htab;
+ {
+   memset (htab->entries, 0, htab->size * sizeof (hash_table_entry_t));
+ }
+ 
+ /* The following function changes size of memory allocated for the
+    entries and repeatedly inserts the table elements.  The occupancy
+    of the table after the call will be about 50%.  Naturally the hash
+    table must already exist.  Remember also that the place of the
+    table entries is changed. */
+ 
+ static void
+ expand_hash_table (htab)
+      hash_table_t htab;
+ {
+   hash_table_t new_htab;
+   hash_table_entry_t *entry_ptr;
+   hash_table_entry_t *new_entry_ptr;
+ 
+   new_htab = create_hash_table (htab->number_of_elements * 2,
+                                 htab->hash_function, htab->eq_function);
+   for (entry_ptr = htab->entries; entry_ptr < htab->entries + htab->size;
+        entry_ptr++)
+     if (*entry_ptr != EMPTY_ENTRY && *entry_ptr != DELETED_ENTRY)
+       {
+         new_entry_ptr = find_hash_table_entry (new_htab, *entry_ptr, 1);
+         *new_entry_ptr = (*entry_ptr);
+       }
+   free (htab->entries);
+   *htab = (*new_htab);
+   free (new_htab);
+ }
+ 
+ /* This function searches for hash table entry which contains element
+    equal to given value or empty entry in which given value can be
+    placed (if the element with given value does not exist in the
+    table).  The function works in two regimes.  The first regime is
+    used only for search.  The second is used for search and
+    reservation empty entry for given value.  The table is expanded if
+    occupancy (taking into accout also deleted elements) is more than
+    75%.  Naturally the hash table must already exist.  If reservation
+    flag is TRUE then the element with given value should be inserted
+    into the table entry before another call of
+    `find_hash_table_entry'. */
+ 
+ hash_table_entry_t *
+ find_hash_table_entry (htab, element, reserve)
+      hash_table_t htab;
+      hash_table_entry_t element;
+      int reserve;
+ {
+   hash_table_entry_t *entry_ptr;
+   hash_table_entry_t *first_deleted_entry_ptr;
+   unsigned index, hash_value, secondary_hash_value;
+ 
+   if (htab->size * 3 <= htab->number_of_elements * 4)
+     {
+       all_expansions++;
+       expand_hash_table (htab);
+     }
+   hash_value = (*htab->hash_function) (element);
+   secondary_hash_value = 1 + hash_value % (htab->size - 2);
+   index = hash_value % htab->size;
+   htab->searches++;
+   all_searches++;
+   first_deleted_entry_ptr = NULL;
+   for (;;htab->collisions++, all_collisions++)
+     {
+       entry_ptr = htab->entries + index;
+       if (*entry_ptr == EMPTY_ENTRY)
+         {
+           if (reserve)
+ 	    {
+ 	      htab->number_of_elements++;
+ 	      if (first_deleted_entry_ptr != NULL)
+ 		{
+ 		  entry_ptr = first_deleted_entry_ptr;
+ 		  *entry_ptr = DELETED_ENTRY;
+ 		}
+ 	    }
+           break;
+         }
+       else if (*entry_ptr != DELETED_ENTRY)
+         {
+           if ((*htab->eq_function) (*entry_ptr, element))
+             break;
+         }
+       else if (first_deleted_entry_ptr == NULL)
+ 	first_deleted_entry_ptr = entry_ptr;
+       index += secondary_hash_value;
+       if (index >= htab->size)
+         index -= htab->size;
+     }
+   return entry_ptr;
+ }
+ 
+ /* This function deletes element with given value from hash table.
+    The hash table entry value will be `DELETED_ENTRY' after the
+    function call.  Naturally the hash table must already exist.  Hash
+    table entry for given value should be not empty (or deleted). */
+ 
+ void
+ remove_element_from_hash_table_entry (htab, element)
+      hash_table_t htab;
+      hash_table_entry_t element;
+ {
+   hash_table_entry_t *entry_ptr;
+ 
+   entry_ptr = find_hash_table_entry (htab, element, 0);
+   *entry_ptr = DELETED_ENTRY;
+   htab->number_of_deleted_elements++;
+ }
+ 
+ /* The following function returns current size of given hash table. */
+ 
+ size_t
+ hash_table_size (htab)
+      hash_table_t htab;
+ {
+   return htab->size;
+ }
+ 
+ /* The following function returns current number of elements in given
+    hash table. */
+ 
+ size_t
+ hash_table_elements_number (htab)
+      hash_table_t htab;
+ {
+   return htab->number_of_elements - htab->number_of_deleted_elements;
+ }
+ 
+ /* The following function returns number of percents of fixed
+    collisions during all work with given hash table. */
+ 
+ int
+ hash_table_collisions (htab)
+      hash_table_t htab;
+ {
+   int searches;
+ 
+   searches = htab->searches;
+   if (searches == 0)
+     searches++;
+   return htab->collisions * 100 / searches;
+ }
+ 
+ /* The following function returns number of percents of fixed
+    collisions during all work with all hash tables. */
+ 
+ int
+ all_hash_table_collisions ()
+ {
+   int searches;
+ 
+   searches = all_searches;
+   if (searches == 0)
+     searches++;
+   return all_collisions * 100 / searches;
+ }
*** /dev/null	Thu Oct 14 18:08:17 1999
--- include/hashtab.h	Fri Oct 15 00:19:12 1999
***************
*** 0 ****
--- 1,103 ----
+ /* An expandable hash tables datatype.  
+    Copyright (C) 1999 Free Software Foundation, Inc.
+    Contributed by Vladimir Makarov (vmakarov@cygnus.com).
+ 
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  
*/
+ 
+ /* This package implements basic hash table functionality.  It is possible
+    to search for an entry, create an entry and destroy an entry.
+ 
+    Elements in the table are generic pointers.
+ 
+    The size of the table is not fixed; if the occupancy of the table
+    grows too high the hash table will be expanded.
+ 
+    The abstract data implementation is based on generalized Algorithm D
+    from Knuth's book "The art of computer programming".  Hash table is
+    expanded by creation of new hash table and transferring elements from
+    the old table to the new table.  */
+ 
+ #ifndef __HASHTAB_H__
+ #define __HASHTAB_H__
+ 
+ #ifdef __cplusplus
+ extern "C" {
+ #endif /* __cplusplus */
+ 
+ #include <ansidecl.h>
+ 
+ /* The hash table element is represented by the following type. */
+ 
+ typedef const void *hash_table_entry_t;
+ 
+ /* Hash tables are of the following type.  The structure
+    (implementation) of this type is not needed for using the hash
+    tables.  All work with hash table should be executed only through
+    functions mentioned below. */
+ 
+ typedef struct
+ {
+   /* Current size (in entries) of the hash table */
+   size_t size;
+   /* Current number of elements including also deleted elements */
+   size_t number_of_elements;
+   /* Current number of deleted elements in the table */
+   size_t number_of_deleted_elements;
+   /* The following member is used for debugging. Its value is number
+      of all calls of `find_hash_table_entry' for the hash table. */
+   int searches;
+   /* The following member is used for debugging.  Its value is number
+      of collisions fixed for time of work with the hash table. */
+   int collisions;
+   /* Pointer to function for evaluation of hash value (any unsigned value).
+      This function has one parameter of type hash_table_entry_t. */
+   unsigned (*hash_function) PARAMS ((hash_table_entry_t));
+   /* Pointer to function for test on equality of hash table elements (two
+      parameter of type hash_table_entry_t. */
+   int (*eq_function) PARAMS ((hash_table_entry_t, hash_table_entry_t));
+   /* Table itself */
+   hash_table_entry_t *entries;
+ } *hash_table_t;
+ 
+ 
+ /* The prototypes of the package functions. */
+ 
+ extern hash_table_t create_hash_table
+   PARAMS ((size_t, unsigned (*) (hash_table_entry_t),
+ 	   int (*) (hash_table_entry_t, hash_table_entry_t)));
+ 
+ extern void delete_hash_table PARAMS ((hash_table_t));
+ 
+ extern void empty_hash_table PARAMS ((hash_table_t));
+ 
+ extern hash_table_entry_t *find_hash_table_entry
+   PARAMS ((hash_table_t, hash_table_entry_t, int));
+ 
+ extern void remove_element_from_hash_table_entry PARAMS ((hash_table_t,
+ 							  hash_table_entry_t));
+ 
+ extern size_t hash_table_size PARAMS ((hash_table_t));
+ 
+ extern size_t hash_table_elements_number PARAMS ((hash_table_t));
+ 
+ extern int hash_table_collisions PARAMS ((hash_table_t));
+ 
+ extern int all_hash_table_collisions ();
+ 
+ #ifdef __cplusplus
+ }
+ #endif /* __cplusplus */
+ 
+ #endif /* __HASHTAB_H */






Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]