This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Speed-up get_identifier("main") (take 4)
- From: Roger Sayle <roger at eyesopen dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Richard Henderson <rth at redhat dot com>, Zack Weinberg <zack at codesourcery dot com>
- Date: Fri, 8 Aug 2003 13:37:57 -0600 (MDT)
- Subject: [PATCH] Speed-up get_identifier("main") (take 4)
The following version of the patch implements Richard Henderson's
suggestion of changing the length argument to get_identifier_with_length
to be size_t. I followed Neil Booth's and Zack's recommendation of
also changing the argument types of ht_lookup and calc_hash to size_t
but keeping the len field of ht_identifier "unsigned int", and casting
down with an explicit cast in ht_lookup.
Then whilst I was tweaking ht_lookup anyway, I also implemented a
suggestion from Andi Kleen to optimize ht_lookup for the common cases
where the first probe finds either an empty slot or a matching entry.
In these cases, its possible to avoid calculating the secondary hash.
Given GCC's identifier table initially contains 16K entries, most of
the calls to ht_lookup on a small or medium input file should benefit,
and there should be no slow down on larger source files.
The following patch has been tested on i686-pc-linux-gnu with a
complete "make bootstrap", all languages except treelang, and regression
tested with a top-level "make -k check" with no new failures.
Ok for mainline?
2003-08-08 Roger Sayle <roger@eyesopen.com>
* tree.h (get_identifier) Define a macro form of get_identifier
that calls get_identifier_with_length when the string is constant.
(get_identifier_with_length): Change type of second argument to
size_t in prototype.
* stringpool.c (get_identifier): Undefine the macro before giving
the function definition.
(get_identifier_with_length): Change type of second argument to
size_t in function definition.
* hashtable.c (calc_hash): Change type of second argument to size_t.
(ht_lookup): Change type of third argument to size_t. Reorganize
to speed-up the cases where the hash table slot is empty, or the
first probe matches (i.e. there isn't a collision).
* hashtable.h (ht_lookup): Adjust function prototype.
Index: tree.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree.h,v
retrieving revision 1.433
diff -c -3 -p -r1.433 tree.h
*** tree.h 5 Aug 2003 14:11:43 -0000 1.433
--- tree.h 8 Aug 2003 15:29:49 -0000
*************** extern tree make_tree_vec (int);
*** 2050,2059 ****
extern tree get_identifier (const char *);
/* Identical to get_identifier, except that the length is assumed
known. */
! extern tree get_identifier_with_length (const char *, unsigned int);
/* If an identifier with the name TEXT (a null-terminated string) has
previously been referred to, return that node; otherwise return
--- 2050,2067 ----
extern tree get_identifier (const char *);
+ #if GCC_VERSION >= 3000
+ #define get_identifier(str) \
+ (__builtin_constant_p (str) \
+ ? get_identifier_with_length ((str), strlen (str)) \
+ : get_identifier (str))
+ #endif
+
+
/* Identical to get_identifier, except that the length is assumed
known. */
! extern tree get_identifier_with_length (const char *, size_t);
/* If an identifier with the name TEXT (a null-terminated string) has
previously been referred to, return that node; otherwise return
Index: stringpool.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/stringpool.c,v
retrieving revision 1.21
diff -c -3 -p -r1.21 stringpool.c
*** stringpool.c 6 Jul 2003 12:35:55 -0000 1.21
--- stringpool.c 8 Aug 2003 15:29:49 -0000
*************** ggc_alloc_string (const char *contents,
*** 95,100 ****
--- 95,102 ----
If an identifier with that name has previously been referred to,
the same node is returned this time. */
+ #undef get_identifier
+
tree
get_identifier (const char *text)
{
*************** get_identifier (const char *text)
*** 110,116 ****
known. */
tree
! get_identifier_with_length (const char *text, unsigned int length)
{
hashnode ht_node = ht_lookup (ident_hash,
(const unsigned char *) text,
--- 112,118 ----
known. */
tree
! get_identifier_with_length (const char *text, size_t length)
{
hashnode ht_node = ht_lookup (ident_hash,
(const unsigned char *) text,
Index: hashtable.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/hashtable.c,v
retrieving revision 1.13
diff -c -3 -p -r1.13 hashtable.c
*** hashtable.c 22 Jul 2003 16:24:53 -0000 1.13
--- hashtable.c 8 Aug 2003 15:29:49 -0000
*************** Foundation, 59 Temple Place - Suite 330,
*** 30,45 ****
existing entry with a potential new one. Also, the ability to
delete members from the table has been removed. */
! static unsigned int calc_hash (const unsigned char *, unsigned int);
static void ht_expand (hash_table *);
static double approx_sqrt (double);
/* Calculate the hash of the string STR of length LEN. */
static unsigned int
! calc_hash (const unsigned char *str, unsigned int len)
{
! unsigned int n = len;
unsigned int r = 0;
#define HASHSTEP(r, c) ((r) * 67 + ((c) - 113));
--- 30,45 ----
existing entry with a potential new one. Also, the ability to
delete members from the table has been removed. */
! static unsigned int calc_hash (const unsigned char *, size_t);
static void ht_expand (hash_table *);
static double approx_sqrt (double);
/* Calculate the hash of the string STR of length LEN. */
static unsigned int
! calc_hash (const unsigned char *str, size_t len)
{
! size_t n = len;
unsigned int r = 0;
#define HASHSTEP(r, c) ((r) * 67 + ((c) - 113));
*************** ht_destroy (hash_table *table)
*** 92,98 ****
CPP_ALLOCED and the item is assumed to be at the top of the
obstack. */
hashnode
! ht_lookup (hash_table *table, const unsigned char *str, unsigned int len,
enum ht_lookup_option insert)
{
unsigned int hash = calc_hash (str, len);
--- 92,98 ----
CPP_ALLOCED and the item is assumed to be at the top of the
obstack. */
hashnode
! ht_lookup (hash_table *table, const unsigned char *str, size_t len,
enum ht_lookup_option insert)
{
unsigned int hash = calc_hash (str, len);
*************** ht_lookup (hash_table *table, const unsi
*** 103,123 ****
sizemask = table->nslots - 1;
index = hash & sizemask;
-
- /* hash2 must be odd, so we're guaranteed to visit every possible
- location in the table during rehashing. */
- hash2 = ((hash * 17) & sizemask) | 1;
table->searches++;
! for (;;)
{
! node = table->entries[index];
!
! if (node == NULL)
! break;
!
! if (node->hash_value == hash && HT_LEN (node) == len
! && !memcmp (HT_STR (node), str, len))
{
if (insert == HT_ALLOCED)
/* The string we search for was placed at the end of the
--- 103,117 ----
sizemask = table->nslots - 1;
index = hash & sizemask;
table->searches++;
! node = table->entries[index];
!
! if (node != NULL)
{
! if (node->hash_value == hash
! && HT_LEN (node) == (unsigned int) len
! && !memcmp (HT_STR (node), str, len))
{
if (insert == HT_ALLOCED)
/* The string we search for was placed at the end of the
*************** ht_lookup (hash_table *table, const unsi
*** 126,133 ****
return node;
}
! index = (index + hash2) & sizemask;
! table->collisions++;
}
if (insert == HT_NO_INSERT)
--- 120,148 ----
return node;
}
! /* hash2 must be odd, so we're guaranteed to visit every possible
! location in the table during rehashing. */
! hash2 = ((hash * 17) & sizemask) | 1;
!
! for (;;)
! {
! table->collisions++;
! index = (index + hash2) & sizemask;
! node = table->entries[index];
! if (node == NULL)
! break;
!
! if (node->hash_value == hash
! && HT_LEN (node) == (unsigned int) len
! && !memcmp (HT_STR (node), str, len))
! {
! if (insert == HT_ALLOCED)
! /* The string we search for was placed at the end of the
! obstack. Release it. */
! obstack_free (&table->stack, (void *) str);
! return node;
! }
! }
}
if (insert == HT_NO_INSERT)
*************** ht_lookup (hash_table *table, const unsi
*** 136,142 ****
node = (*table->alloc_node) (table);
table->entries[index] = node;
! HT_LEN (node) = len;
node->hash_value = hash;
if (insert == HT_ALLOC)
HT_STR (node) = obstack_copy0 (&table->stack, str, len);
--- 151,157 ----
node = (*table->alloc_node) (table);
table->entries[index] = node;
! HT_LEN (node) = (unsigned int) len;
node->hash_value = hash;
if (insert == HT_ALLOC)
HT_STR (node) = obstack_copy0 (&table->stack, str, len);
Index: hashtable.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/hashtable.h,v
retrieving revision 1.11
diff -c -3 -p -r1.11 hashtable.h
*** hashtable.h 22 Jul 2003 16:24:53 -0000 1.11
--- hashtable.h 8 Aug 2003 15:29:49 -0000
*************** extern hash_table *ht_create (unsigned i
*** 67,73 ****
extern void ht_destroy (hash_table *);
extern hashnode ht_lookup (hash_table *, const unsigned char *,
! unsigned int, enum ht_lookup_option);
/* For all nodes in TABLE, make a callback. The callback takes
TABLE->PFILE, the node, and a PTR, and the callback sequence stops
--- 67,73 ----
extern void ht_destroy (hash_table *);
extern hashnode ht_lookup (hash_table *, const unsigned char *,
! size_t, enum ht_lookup_option);
/* For all nodes in TABLE, make a callback. The callback takes
TABLE->PFILE, the node, and a PTR, and the callback sequence stops
Roger
--
Roger Sayle, E-mail: roger@eyesopen.com
OpenEye Scientific Software, WWW: http://www.eyesopen.com/
Suite 1107, 3600 Cerrillos Road, Tel: (+1) 505-473-7385
Santa Fe, New Mexico, 87507. Fax: (+1) 505-473-0833