[PATCH] Speed-up get_identifier("main") (take 4)

Roger Sayle roger@eyesopen.com
Fri Aug 8 19:43:00 GMT 2003


The following version of the patch implements Richard Henderson's
suggestion of changing the length argument to get_identifier_with_length
to be size_t.  I followed Neil Booth's and Zack's recommendation of
also changing the argument types of ht_lookup and calc_hash to size_t
but keeping the len field of ht_identifier "unsigned int", and casting
down with an explicit cast in ht_lookup.

Then whilst I was tweaking ht_lookup anyway, I also implemented a
suggestion from Andi Kleen to optimize ht_lookup for the common cases
where the first probe finds either an empty slot or a matching entry.
In these cases, its possible to avoid calculating the secondary hash.
Given GCC's identifier table initially contains 16K entries, most of
the calls to ht_lookup on a small or medium input file should benefit,
and there should be no slow down on larger source files.


The following patch has been tested on i686-pc-linux-gnu with a
complete "make bootstrap", all languages except treelang, and regression
tested with a top-level "make -k check" with no new failures.

Ok for mainline?


2003-08-08  Roger Sayle  <roger@eyesopen.com>

	* tree.h (get_identifier) Define a macro form of get_identifier
	that calls get_identifier_with_length when the string is constant.
	(get_identifier_with_length): Change type of second argument to
	size_t in prototype.
	* stringpool.c (get_identifier): Undefine the macro before giving
	the function definition.
	(get_identifier_with_length): Change  type of second argument to
	size_t in function definition.
	* hashtable.c (calc_hash): Change type of second argument to size_t.
  	(ht_lookup): Change type of third argument to size_t.  Reorganize
	to speed-up the cases where the hash table slot is empty, or the
	first probe matches (i.e. there isn't a collision).
	* hashtable.h (ht_lookup): Adjust function prototype.


Index: tree.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree.h,v
retrieving revision 1.433
diff -c -3 -p -r1.433 tree.h
*** tree.h	5 Aug 2003 14:11:43 -0000	1.433
--- tree.h	8 Aug 2003 15:29:49 -0000
*************** extern tree make_tree_vec (int);
*** 2050,2059 ****

  extern tree get_identifier (const char *);

  /* Identical to get_identifier, except that the length is assumed
     known.  */

! extern tree get_identifier_with_length (const char *, unsigned int);

  /* If an identifier with the name TEXT (a null-terminated string) has
     previously been referred to, return that node; otherwise return
--- 2050,2067 ----

  extern tree get_identifier (const char *);

+ #if GCC_VERSION >= 3000
+ #define get_identifier(str) \
+   (__builtin_constant_p (str)				\
+     ? get_identifier_with_length ((str), strlen (str))  \
+     : get_identifier (str))
+ #endif
+
+
  /* Identical to get_identifier, except that the length is assumed
     known.  */

! extern tree get_identifier_with_length (const char *, size_t);

  /* If an identifier with the name TEXT (a null-terminated string) has
     previously been referred to, return that node; otherwise return
Index: stringpool.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/stringpool.c,v
retrieving revision 1.21
diff -c -3 -p -r1.21 stringpool.c
*** stringpool.c	6 Jul 2003 12:35:55 -0000	1.21
--- stringpool.c	8 Aug 2003 15:29:49 -0000
*************** ggc_alloc_string (const char *contents,
*** 95,100 ****
--- 95,102 ----
     If an identifier with that name has previously been referred to,
     the same node is returned this time.  */

+ #undef get_identifier
+
  tree
  get_identifier (const char *text)
  {
*************** get_identifier (const char *text)
*** 110,116 ****
     known.  */

  tree
! get_identifier_with_length (const char *text, unsigned int length)
  {
    hashnode ht_node = ht_lookup (ident_hash,
  				(const unsigned char *) text,
--- 112,118 ----
     known.  */

  tree
! get_identifier_with_length (const char *text, size_t length)
  {
    hashnode ht_node = ht_lookup (ident_hash,
  				(const unsigned char *) text,
Index: hashtable.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/hashtable.c,v
retrieving revision 1.13
diff -c -3 -p -r1.13 hashtable.c
*** hashtable.c	22 Jul 2003 16:24:53 -0000	1.13
--- hashtable.c	8 Aug 2003 15:29:49 -0000
*************** Foundation, 59 Temple Place - Suite 330,
*** 30,45 ****
     existing entry with a potential new one.  Also, the ability to
     delete members from the table has been removed.  */

! static unsigned int calc_hash (const unsigned char *, unsigned int);
  static void ht_expand (hash_table *);
  static double approx_sqrt (double);

  /* Calculate the hash of the string STR of length LEN.  */

  static unsigned int
! calc_hash (const unsigned char *str, unsigned int len)
  {
!   unsigned int n = len;
    unsigned int r = 0;
  #define HASHSTEP(r, c) ((r) * 67 + ((c) - 113));

--- 30,45 ----
     existing entry with a potential new one.  Also, the ability to
     delete members from the table has been removed.  */

! static unsigned int calc_hash (const unsigned char *, size_t);
  static void ht_expand (hash_table *);
  static double approx_sqrt (double);

  /* Calculate the hash of the string STR of length LEN.  */

  static unsigned int
! calc_hash (const unsigned char *str, size_t len)
  {
!   size_t n = len;
    unsigned int r = 0;
  #define HASHSTEP(r, c) ((r) * 67 + ((c) - 113));

*************** ht_destroy (hash_table *table)
*** 92,98 ****
     CPP_ALLOCED and the item is assumed to be at the top of the
     obstack.  */
  hashnode
! ht_lookup (hash_table *table, const unsigned char *str, unsigned int len,
  	   enum ht_lookup_option insert)
  {
    unsigned int hash = calc_hash (str, len);
--- 92,98 ----
     CPP_ALLOCED and the item is assumed to be at the top of the
     obstack.  */
  hashnode
! ht_lookup (hash_table *table, const unsigned char *str, size_t len,
  	   enum ht_lookup_option insert)
  {
    unsigned int hash = calc_hash (str, len);
*************** ht_lookup (hash_table *table, const unsi
*** 103,123 ****

    sizemask = table->nslots - 1;
    index = hash & sizemask;
-
-   /* hash2 must be odd, so we're guaranteed to visit every possible
-      location in the table during rehashing.  */
-   hash2 = ((hash * 17) & sizemask) | 1;
    table->searches++;

!   for (;;)
      {
!       node = table->entries[index];
!
!       if (node == NULL)
! 	break;
!
!       if (node->hash_value == hash && HT_LEN (node) == len
!           && !memcmp (HT_STR (node), str, len))
  	{
  	  if (insert == HT_ALLOCED)
  	    /* The string we search for was placed at the end of the
--- 103,117 ----

    sizemask = table->nslots - 1;
    index = hash & sizemask;
    table->searches++;

!   node = table->entries[index];
!
!   if (node != NULL)
      {
!       if (node->hash_value == hash
! 	  && HT_LEN (node) == (unsigned int) len
! 	  && !memcmp (HT_STR (node), str, len))
  	{
  	  if (insert == HT_ALLOCED)
  	    /* The string we search for was placed at the end of the
*************** ht_lookup (hash_table *table, const unsi
*** 126,133 ****
  	  return node;
  	}

!       index = (index + hash2) & sizemask;
!       table->collisions++;
      }

    if (insert == HT_NO_INSERT)
--- 120,148 ----
  	  return node;
  	}

!       /* hash2 must be odd, so we're guaranteed to visit every possible
! 	 location in the table during rehashing.  */
!       hash2 = ((hash * 17) & sizemask) | 1;
!
!       for (;;)
! 	{
! 	  table->collisions++;
! 	  index = (index + hash2) & sizemask;
! 	  node = table->entries[index];
! 	  if (node == NULL)
! 	    break;
!
! 	  if (node->hash_value == hash
! 	      && HT_LEN (node) == (unsigned int) len
! 	      && !memcmp (HT_STR (node), str, len))
! 	    {
! 	      if (insert == HT_ALLOCED)
! 	      /* The string we search for was placed at the end of the
! 		 obstack.  Release it.  */
! 		obstack_free (&table->stack, (void *) str);
! 	      return node;
! 	    }
! 	}
      }

    if (insert == HT_NO_INSERT)
*************** ht_lookup (hash_table *table, const unsi
*** 136,142 ****
    node = (*table->alloc_node) (table);
    table->entries[index] = node;

!   HT_LEN (node) = len;
    node->hash_value = hash;
    if (insert == HT_ALLOC)
      HT_STR (node) = obstack_copy0 (&table->stack, str, len);
--- 151,157 ----
    node = (*table->alloc_node) (table);
    table->entries[index] = node;

!   HT_LEN (node) = (unsigned int) len;
    node->hash_value = hash;
    if (insert == HT_ALLOC)
      HT_STR (node) = obstack_copy0 (&table->stack, str, len);
Index: hashtable.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/hashtable.h,v
retrieving revision 1.11
diff -c -3 -p -r1.11 hashtable.h
*** hashtable.h	22 Jul 2003 16:24:53 -0000	1.11
--- hashtable.h	8 Aug 2003 15:29:49 -0000
*************** extern hash_table *ht_create (unsigned i
*** 67,73 ****
  extern void ht_destroy (hash_table *);

  extern hashnode ht_lookup (hash_table *, const unsigned char *,
! 			   unsigned int, enum ht_lookup_option);

  /* For all nodes in TABLE, make a callback.  The callback takes
     TABLE->PFILE, the node, and a PTR, and the callback sequence stops
--- 67,73 ----
  extern void ht_destroy (hash_table *);

  extern hashnode ht_lookup (hash_table *, const unsigned char *,
! 			   size_t, enum ht_lookup_option);

  /* For all nodes in TABLE, make a callback.  The callback takes
     TABLE->PFILE, the node, and a PTR, and the callback sequence stops

Roger
--
Roger Sayle,                         E-mail: roger@eyesopen.com
OpenEye Scientific Software,         WWW: http://www.eyesopen.com/
Suite 1107, 3600 Cerrillos Road,     Tel: (+1) 505-473-7385
Santa Fe, New Mexico, 87507.         Fax: (+1) 505-473-0833



More information about the Gcc-patches mailing list