This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

libiberty PATCH: add generic iterative hash to hashtable.c


The hashtable code currently has hash functions for single pointers and
strings, but doesn't provide a way to generate a hash value from multiple
bits of data.  I want something like that for the tree-ssa gimplifier, for
hashing arbitrary tree expressions; I want to at least hash the tree code
and the operands.  Looking around online, I found the following algorithm.
Zack mentions it in the comment for htab_hash_string as being inferior to
his algorithm for hashing identifiers, but my needs are more general.

I expect that this will be useful to other folks, too.

Applied to trunk and tree-ssa.

2003-05-07  Jason Merrill  <jason@redhat.com>

libiberty/
	* hashtab.c (burtle_hash): New fn.
	* configure.in: Add AC_C_BIGENDIAN_CROSS.
	* aclocal.m4: Include accross.m4.
	* configure, config.in: Regenerate.
include/
	* hashtab.h (burtle_hash): Prototype.
	(burtle_hash_object): New macro.

*** libiberty/aclocal.m4.~1~	2003-02-25 17:24:21.000000000 -0500
--- libiberty/aclocal.m4	2003-05-07 11:59:24.000000000 -0400
***************
*** 1,3 ****
--- 1,5 ----
+ sinclude(../config/accross.m4)
+ 
  dnl See whether strncmp reads past the end of its string parameters.
  dnl On some versions of SunOS4 at least, strncmp reads a word at a time
  dnl but erroneously reads past the end of strings.  This can cause
*** libiberty/configure.in.~1~	2003-05-07 11:14:00.000000000 -0400
--- libiberty/configure.in	2003-05-07 11:48:50.000000000 -0400
*************** AC_SUBST(OUTPUT_OPTION)
*** 114,119 ****
--- 114,120 ----
  AC_ISC_POSIX
  AC_C_CONST
  AC_C_INLINE
+ AC_C_BIGENDIAN_CROSS
  
  dnl When we start using libtool:
  dnl Default to a non shared library.  This may be overridden by the
*** libiberty/hashtab.c.~1~	2003-05-07 11:18:31.000000000 -0400
--- libiberty/hashtab.c	2003-05-07 13:35:31.000000000 -0400
*************** htab_hash_string (p)
*** 709,711 ****
--- 709,849 ----
  
    return r;
  }
+ 
+ /* DERIVED FROM:
+ --------------------------------------------------------------------
+ lookup2.c, by Bob Jenkins, December 1996, Public Domain.
+ hash(), hash2(), hash3, and mix() are externally useful functions.
+ Routines to test the hash are included if SELF_TEST is defined.
+ You can use this free for any purpose.  It has no warranty.
+ --------------------------------------------------------------------
+ */
+ 
+ /*
+ --------------------------------------------------------------------
+ mix -- mix 3 32-bit values reversibly.
+ For every delta with one or two bit set, and the deltas of all three
+   high bits or all three low bits, whether the original value of a,b,c
+   is almost all zero or is uniformly distributed,
+ * If mix() is run forward or backward, at least 32 bits in a,b,c
+   have at least 1/4 probability of changing.
+ * If mix() is run forward, every bit of c will change between 1/3 and
+   2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
+ mix() was built out of 36 single-cycle latency instructions in a 
+   structure that could supported 2x parallelism, like so:
+       a -= b; 
+       a -= c; x = (c>>13);
+       b -= c; a ^= x;
+       b -= a; x = (a<<8);
+       c -= a; b ^= x;
+       c -= b; x = (b>>13);
+       ...
+   Unfortunately, superscalar Pentiums and Sparcs can't take advantage 
+   of that parallelism.  They've also turned some of those single-cycle
+   latency instructions into multi-cycle latency instructions.  Still,
+   this is the fastest good hash I could find.  There were about 2^^68
+   to choose from.  I only looked at a billion or so.
+ --------------------------------------------------------------------
+ */
+ /* same, but slower, works on systems that might have 8 byte hashval_t's */
+ #define mix(a,b,c) \
+ { \
+   a -= b; a -= c; a ^= (c>>13); \
+   b -= c; b -= a; b ^= (a<< 8); \
+   c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
+   a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
+   b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
+   c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
+   a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
+   b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
+   c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
+ }
+ 
+ /*
+ --------------------------------------------------------------------
+ hash() -- hash a variable-length key into a 32-bit value
+   k     : the key (the unaligned variable-length array of bytes)
+   len   : the length of the key, counting by bytes
+   level : can be any 4-byte value
+ Returns a 32-bit value.  Every bit of the key affects every bit of
+ the return value.  Every 1-bit and 2-bit delta achieves avalanche.
+ About 36+6len instructions.
+ 
+ The best hash table sizes are powers of 2.  There is no need to do
+ mod a prime (mod is sooo slow!).  If you need less than 32 bits,
+ use a bitmask.  For example, if you need only 10 bits, do
+   h = (h & hashmask(10));
+ In which case, the hash table should have hashsize(10) elements.
+ 
+ If you are hashing n strings (ub1 **)k, do it like this:
+   for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
+ 
+ By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
+ code any way you wish, private, educational, or commercial.  It's free.
+ 
+ See http://burtleburtle.net/bob/hash/evahash.html
+ Use for hash table lookup, or anything where one collision in 2^32 is
+ acceptable.  Do NOT use for cryptographic purposes.
+ --------------------------------------------------------------------
+ */
+ 
+ hashval_t burtle_hash (k_in, length, initval)
+      const PTR k_in;               /* the key */
+      register size_t  length;      /* the length of the key */
+      register hashval_t  initval;  /* the previous hash, or an arbitrary value */
+ {
+   register const unsigned char *k = (const unsigned char *)k_in;
+   register hashval_t a,b,c,len;
+ 
+   /* Set up the internal state */
+   len = length;
+   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
+   c = initval;           /* the previous hash value */
+ 
+   /*---------------------------------------- handle most of the key */
+ #ifndef WORDS_BIGENDIAN
+   /* On a little-endian machine, if the data is 4-byte aligned we can hash
+      by word for better speed.  This gives nondeterministic results on
+      big-endian machines.  */
+   if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
+     while (len >= 12)    /* aligned */
+       {
+ 	a += *(hashval_t *)(k+0);
+ 	b += *(hashval_t *)(k+4);
+ 	c += *(hashval_t *)(k+8);
+ 	mix(a,b,c);
+ 	k += 12; len -= 12;
+       }
+   else /* unaligned */
+ #endif
+     while (len >= 12)
+       {
+ 	a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
+ 	b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
+ 	c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
+ 	mix(a,b,c);
+ 	k += 12; len -= 12;
+       }
+ 
+   /*------------------------------------- handle the last 11 bytes */
+   c += length;
+   switch(len)              /* all the case statements fall through */
+     {
+     case 11: c+=((hashval_t)k[10]<<24);
+     case 10: c+=((hashval_t)k[9]<<16);
+     case 9 : c+=((hashval_t)k[8]<<8);
+       /* the first byte of c is reserved for the length */
+     case 8 : b+=((hashval_t)k[7]<<24);
+     case 7 : b+=((hashval_t)k[6]<<16);
+     case 6 : b+=((hashval_t)k[5]<<8);
+     case 5 : b+=k[4];
+     case 4 : a+=((hashval_t)k[3]<<24);
+     case 3 : a+=((hashval_t)k[2]<<16);
+     case 2 : a+=((hashval_t)k[1]<<8);
+     case 1 : a+=k[0];
+       /* case 0: nothing left to add */
+     }
+   mix(a,b,c);
+   /*-------------------------------------------- report the result */
+   return c;
+ }
*** include/hashtab.h.~1~	2003-05-07 11:18:31.000000000 -0400
--- include/hashtab.h	2003-05-07 13:35:05.000000000 -0400
*************** extern htab_eq htab_eq_pointer;
*** 183,188 ****
--- 183,193 ----
  /* A hash function for null-terminated strings.  */
  extern hashval_t htab_hash_string PARAMS ((const PTR));
  
+ /* An iterative hash function for arbitrary data.  */
+ extern hashval_t burtle_hash PARAMS ((const PTR, size_t, hashval_t));
+ /* Shorthand for hashing something with an intrinsic size.  */
+ #define burtle_hash_object(OB,INIT) burtle_hash (&OB, sizeof (OB), INIT)
+ 
  #ifdef __cplusplus
  }
  #endif /* __cplusplus */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]