]>
Commit | Line | Data |
---|---|---|
2a967f3d | 1 | /* Hash tables. |
748086b7 JJ |
2 | Copyright (C) 2000, 2001, 2003, 2004, 2008, 2009 |
3 | Free Software Foundation, Inc. | |
2a967f3d NB |
4 | |
5 | This program is free software; you can redistribute it and/or modify it | |
6 | under the terms of the GNU General Public License as published by the | |
748086b7 | 7 | Free Software Foundation; either version 3, or (at your option) any |
2a967f3d NB |
8 | later version. |
9 | ||
10 | This program is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | GNU General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU General Public License | |
748086b7 JJ |
16 | along with this program; see the file COPYING3. If not see |
17 | <http://www.gnu.org/licenses/>. | |
2a967f3d NB |
18 | |
19 | In other words, you are welcome to use, share and improve this program. | |
20 | You are forbidden to forbid anyone else to use, share and improve | |
21 | what you give them. Help stamp out software-hoarding! */ | |
22 | ||
23 | #include "config.h" | |
24 | #include "system.h" | |
4f4e53dd | 25 | #include "symtab.h" |
2a967f3d NB |
26 | |
27 | /* The code below is a specialization of Vladimir Makarov's expandable | |
28 | hash tables (see libiberty/hashtab.c). The abstraction penalty was | |
29 | too high to continue using the generic form. This code knows | |
30 | intrinsically how to calculate a hash value, and how to compare an | |
dae4174e | 31 | existing entry with a potential new one. */ |
2a967f3d | 32 | |
7bb3fbbb | 33 | static unsigned int calc_hash (const unsigned char *, size_t); |
1d088dee | 34 | static void ht_expand (hash_table *); |
a2f7be91 | 35 | static double approx_sqrt (double); |
2a967f3d | 36 | |
dae4174e TT |
37 | /* A deleted entry. */ |
38 | #define DELETED ((hashnode) -1) | |
39 | ||
2a967f3d NB |
40 | /* Calculate the hash of the string STR of length LEN. */ |
41 | ||
42 | static unsigned int | |
7bb3fbbb | 43 | calc_hash (const unsigned char *str, size_t len) |
2a967f3d | 44 | { |
7bb3fbbb | 45 | size_t n = len; |
2a967f3d | 46 | unsigned int r = 0; |
2a967f3d NB |
47 | |
48 | while (n--) | |
c6e83800 | 49 | r = HT_HASHSTEP (r, *str++); |
2a967f3d | 50 | |
c6e83800 | 51 | return HT_HASHFINISH (r, len); |
2a967f3d NB |
52 | } |
53 | ||
54 | /* Initialize an identifier hashtable. */ | |
55 | ||
56 | hash_table * | |
1d088dee | 57 | ht_create (unsigned int order) |
2a967f3d NB |
58 | { |
59 | unsigned int nslots = 1 << order; | |
60 | hash_table *table; | |
61 | ||
c3f829c1 | 62 | table = XCNEW (hash_table); |
2a967f3d NB |
63 | |
64 | /* Strings need no alignment. */ | |
43839642 ZW |
65 | _obstack_begin (&table->stack, 0, 0, |
66 | (void *(*) (long)) xmalloc, | |
67 | (void (*) (void *)) free); | |
68 | ||
2a967f3d NB |
69 | obstack_alignment_mask (&table->stack) = 0; |
70 | ||
c3f829c1 | 71 | table->entries = XCNEWVEC (hashnode, nslots); |
b453c95f | 72 | table->entries_owned = true; |
2a967f3d NB |
73 | table->nslots = nslots; |
74 | return table; | |
75 | } | |
76 | ||
bef985f3 NB |
77 | /* Frees all memory associated with a hash table. */ |
78 | ||
79 | void | |
1d088dee | 80 | ht_destroy (hash_table *table) |
bef985f3 NB |
81 | { |
82 | obstack_free (&table->stack, NULL); | |
b453c95f GK |
83 | if (table->entries_owned) |
84 | free (table->entries); | |
bef985f3 NB |
85 | free (table); |
86 | } | |
87 | ||
2a967f3d | 88 | /* Returns the hash entry for the a STR of length LEN. If that string |
dae4174e | 89 | already exists in the table, returns the existing entry. If the |
2a967f3d NB |
90 | identifier hasn't been seen before, and INSERT is CPP_NO_INSERT, |
91 | returns NULL. Otherwise insert and returns a new entry. A new | |
dae4174e | 92 | string is allocated. */ |
2a967f3d | 93 | hashnode |
7bb3fbbb | 94 | ht_lookup (hash_table *table, const unsigned char *str, size_t len, |
1d088dee | 95 | enum ht_lookup_option insert) |
2a967f3d | 96 | { |
c6e83800 ZW |
97 | return ht_lookup_with_hash (table, str, len, calc_hash (str, len), |
98 | insert); | |
99 | } | |
100 | ||
101 | hashnode | |
102 | ht_lookup_with_hash (hash_table *table, const unsigned char *str, | |
103 | size_t len, unsigned int hash, | |
104 | enum ht_lookup_option insert) | |
105 | { | |
2a967f3d NB |
106 | unsigned int hash2; |
107 | unsigned int index; | |
dae4174e | 108 | unsigned int deleted_index = table->nslots; |
2a967f3d NB |
109 | size_t sizemask; |
110 | hashnode node; | |
111 | ||
112 | sizemask = table->nslots - 1; | |
113 | index = hash & sizemask; | |
2a967f3d NB |
114 | table->searches++; |
115 | ||
7bb3fbbb | 116 | node = table->entries[index]; |
dae4174e | 117 | |
7bb3fbbb | 118 | if (node != NULL) |
2a967f3d | 119 | { |
dae4174e TT |
120 | if (node == DELETED) |
121 | deleted_index = index; | |
122 | else if (node->hash_value == hash | |
123 | && HT_LEN (node) == (unsigned int) len | |
124 | && !memcmp (HT_STR (node), str, len)) | |
125 | return node; | |
2a967f3d | 126 | |
7bb3fbbb RS |
127 | /* hash2 must be odd, so we're guaranteed to visit every possible |
128 | location in the table during rehashing. */ | |
129 | hash2 = ((hash * 17) & sizemask) | 1; | |
130 | ||
131 | for (;;) | |
132 | { | |
133 | table->collisions++; | |
134 | index = (index + hash2) & sizemask; | |
135 | node = table->entries[index]; | |
136 | if (node == NULL) | |
137 | break; | |
138 | ||
dae4174e | 139 | if (node == DELETED) |
7bb3fbbb | 140 | { |
dae4174e TT |
141 | if (deleted_index != table->nslots) |
142 | deleted_index = index; | |
7bb3fbbb | 143 | } |
dae4174e TT |
144 | else if (node->hash_value == hash |
145 | && HT_LEN (node) == (unsigned int) len | |
146 | && !memcmp (HT_STR (node), str, len)) | |
147 | return node; | |
7bb3fbbb | 148 | } |
2a967f3d NB |
149 | } |
150 | ||
151 | if (insert == HT_NO_INSERT) | |
152 | return NULL; | |
153 | ||
dae4174e TT |
154 | /* We prefer to overwrite the first deleted slot we saw. */ |
155 | if (deleted_index != table->nslots) | |
156 | index = deleted_index; | |
157 | ||
2a967f3d NB |
158 | node = (*table->alloc_node) (table); |
159 | table->entries[index] = node; | |
160 | ||
7bb3fbbb | 161 | HT_LEN (node) = (unsigned int) len; |
5e0c54e5 | 162 | node->hash_value = hash; |
dae4174e TT |
163 | |
164 | if (table->alloc_subobject) | |
165 | { | |
f1bf410c | 166 | char *chars = (char *) table->alloc_subobject (len + 1); |
dae4174e TT |
167 | memcpy (chars, str, len); |
168 | chars[len] = '\0'; | |
169 | HT_STR (node) = (const unsigned char *) chars; | |
170 | } | |
2a967f3d | 171 | else |
dae4174e TT |
172 | HT_STR (node) = (const unsigned char *) obstack_copy0 (&table->stack, |
173 | str, len); | |
2a967f3d NB |
174 | |
175 | if (++table->nelements * 4 >= table->nslots * 3) | |
176 | /* Must expand the string table. */ | |
177 | ht_expand (table); | |
178 | ||
179 | return node; | |
180 | } | |
181 | ||
182 | /* Double the size of a hash table, re-hashing existing entries. */ | |
183 | ||
184 | static void | |
1d088dee | 185 | ht_expand (hash_table *table) |
2a967f3d NB |
186 | { |
187 | hashnode *nentries, *p, *limit; | |
188 | unsigned int size, sizemask; | |
189 | ||
190 | size = table->nslots * 2; | |
c3f829c1 | 191 | nentries = XCNEWVEC (hashnode, size); |
2a967f3d NB |
192 | sizemask = size - 1; |
193 | ||
194 | p = table->entries; | |
195 | limit = p + table->nslots; | |
196 | do | |
dae4174e | 197 | if (*p && *p != DELETED) |
2a967f3d NB |
198 | { |
199 | unsigned int index, hash, hash2; | |
200 | ||
5e0c54e5 | 201 | hash = (*p)->hash_value; |
2a967f3d NB |
202 | index = hash & sizemask; |
203 | ||
4ae2e3e9 | 204 | if (nentries[index]) |
2a967f3d | 205 | { |
4ae2e3e9 RS |
206 | hash2 = ((hash * 17) & sizemask) | 1; |
207 | do | |
2a967f3d | 208 | { |
4ae2e3e9 | 209 | index = (index + hash2) & sizemask; |
2a967f3d | 210 | } |
4ae2e3e9 | 211 | while (nentries[index]); |
2a967f3d | 212 | } |
4ae2e3e9 | 213 | nentries[index] = *p; |
2a967f3d NB |
214 | } |
215 | while (++p < limit); | |
216 | ||
b453c95f GK |
217 | if (table->entries_owned) |
218 | free (table->entries); | |
219 | table->entries_owned = true; | |
2a967f3d NB |
220 | table->entries = nentries; |
221 | table->nslots = size; | |
222 | } | |
223 | ||
224 | /* For all nodes in TABLE, callback CB with parameters TABLE->PFILE, | |
225 | the node, and V. */ | |
226 | void | |
1d088dee | 227 | ht_forall (hash_table *table, ht_cb cb, const void *v) |
2a967f3d NB |
228 | { |
229 | hashnode *p, *limit; | |
230 | ||
231 | p = table->entries; | |
232 | limit = p + table->nslots; | |
233 | do | |
dae4174e | 234 | if (*p && *p != DELETED) |
2a967f3d NB |
235 | { |
236 | if ((*cb) (table->pfile, *p, v) == 0) | |
237 | break; | |
238 | } | |
239 | while (++p < limit); | |
240 | } | |
241 | ||
dae4174e TT |
242 | /* Like ht_forall, but a nonzero return from the callback means that |
243 | the entry should be removed from the table. */ | |
244 | void | |
245 | ht_purge (hash_table *table, ht_cb cb, const void *v) | |
246 | { | |
247 | hashnode *p, *limit; | |
248 | ||
249 | p = table->entries; | |
250 | limit = p + table->nslots; | |
251 | do | |
252 | if (*p && *p != DELETED) | |
253 | { | |
254 | if ((*cb) (table->pfile, *p, v)) | |
255 | *p = DELETED; | |
256 | } | |
257 | while (++p < limit); | |
258 | } | |
259 | ||
b453c95f GK |
260 | /* Restore the hash table. */ |
261 | void | |
262 | ht_load (hash_table *ht, hashnode *entries, | |
263 | unsigned int nslots, unsigned int nelements, | |
264 | bool own) | |
265 | { | |
266 | if (ht->entries_owned) | |
267 | free (ht->entries); | |
268 | ht->entries = entries; | |
269 | ht->nslots = nslots; | |
270 | ht->nelements = nelements; | |
271 | ht->entries_owned = own; | |
272 | } | |
273 | ||
2a967f3d NB |
274 | /* Dump allocation statistics to stderr. */ |
275 | ||
276 | void | |
1d088dee | 277 | ht_dump_statistics (hash_table *table) |
2a967f3d NB |
278 | { |
279 | size_t nelts, nids, overhead, headers; | |
dae4174e | 280 | size_t total_bytes, longest, deleted = 0; |
0fd9e8dd | 281 | double sum_of_squares, exp_len, exp_len2, exp2_len; |
2a967f3d NB |
282 | hashnode *p, *limit; |
283 | ||
284 | #define SCALE(x) ((unsigned long) ((x) < 1024*10 \ | |
285 | ? (x) \ | |
286 | : ((x) < 1024*1024*10 \ | |
287 | ? (x) / 1024 \ | |
288 | : (x) / (1024*1024)))) | |
289 | #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M')) | |
290 | ||
291 | total_bytes = longest = sum_of_squares = nids = 0; | |
292 | p = table->entries; | |
293 | limit = p + table->nslots; | |
294 | do | |
dae4174e TT |
295 | if (*p == DELETED) |
296 | ++deleted; | |
297 | else if (*p) | |
2a967f3d NB |
298 | { |
299 | size_t n = HT_LEN (*p); | |
300 | ||
301 | total_bytes += n; | |
0fd9e8dd | 302 | sum_of_squares += (double) n * n; |
2a967f3d NB |
303 | if (n > longest) |
304 | longest = n; | |
305 | nids++; | |
306 | } | |
307 | while (++p < limit); | |
1d088dee | 308 | |
2a967f3d NB |
309 | nelts = table->nelements; |
310 | overhead = obstack_memory_used (&table->stack) - total_bytes; | |
311 | headers = table->nslots * sizeof (hashnode); | |
312 | ||
313 | fprintf (stderr, "\nString pool\nentries\t\t%lu\n", | |
314 | (unsigned long) nelts); | |
315 | fprintf (stderr, "identifiers\t%lu (%.2f%%)\n", | |
316 | (unsigned long) nids, nids * 100.0 / nelts); | |
317 | fprintf (stderr, "slots\t\t%lu\n", | |
318 | (unsigned long) table->nslots); | |
dae4174e TT |
319 | fprintf (stderr, "deleted\t\t%lu\n", |
320 | (unsigned long) deleted); | |
2a967f3d NB |
321 | fprintf (stderr, "bytes\t\t%lu%c (%lu%c overhead)\n", |
322 | SCALE (total_bytes), LABEL (total_bytes), | |
323 | SCALE (overhead), LABEL (overhead)); | |
324 | fprintf (stderr, "table size\t%lu%c\n", | |
325 | SCALE (headers), LABEL (headers)); | |
326 | ||
327 | exp_len = (double)total_bytes / (double)nelts; | |
328 | exp2_len = exp_len * exp_len; | |
329 | exp_len2 = (double) sum_of_squares / (double) nelts; | |
330 | ||
331 | fprintf (stderr, "coll/search\t%.4f\n", | |
332 | (double) table->collisions / (double) table->searches); | |
333 | fprintf (stderr, "ins/search\t%.4f\n", | |
334 | (double) nelts / (double) table->searches); | |
335 | fprintf (stderr, "avg. entry\t%.2f bytes (+/- %.2f)\n", | |
336 | exp_len, approx_sqrt (exp_len2 - exp2_len)); | |
337 | fprintf (stderr, "longest entry\t%lu\n", | |
338 | (unsigned long) longest); | |
339 | #undef SCALE | |
340 | #undef LABEL | |
341 | } | |
342 | ||
343 | /* Return the approximate positive square root of a number N. This is for | |
344 | statistical reports, not code generation. */ | |
a2f7be91 | 345 | static double |
1d088dee | 346 | approx_sqrt (double x) |
2a967f3d NB |
347 | { |
348 | double s, d; | |
349 | ||
350 | if (x < 0) | |
351 | abort (); | |
352 | if (x == 0) | |
353 | return 0; | |
354 | ||
355 | s = x; | |
356 | do | |
357 | { | |
358 | d = (s * s - x) / (2 * s); | |
359 | s -= d; | |
360 | } | |
361 | while (d > .0001); | |
362 | return s; | |
363 | } |