1 /* hash - hashing table processing.
3 Copyright (C) 1998-2004, 2006-2007, 2009-2011 Free Software Foundation, Inc.
5 Written by Jim Meyering, 1992.
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 /* A generic hash table package. */
22 /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
23 of malloc. If you change USE_OBSTACK, you have to recompile! */
29 #include "bitrotate.h"
38 # ifndef obstack_chunk_alloc
39 # define obstack_chunk_alloc malloc
41 # ifndef obstack_chunk_free
42 # define obstack_chunk_free free
46 /* The attribute __const__ was added in gcc 2.95. */
47 #undef _GL_ATTRIBUTE_CONST
48 #if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95)
49 # define _GL_ATTRIBUTE_CONST __attribute__ ((__const__))
51 # define _GL_ATTRIBUTE_CONST /* empty */
57 struct hash_entry *next;
62 /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
63 for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets
64 are not empty, there are N_ENTRIES active entries in the table. */
65 struct hash_entry *bucket;
66 struct hash_entry const *bucket_limit;
68 size_t n_buckets_used;
71 /* Tuning arguments, kept in a physically separate structure. */
72 const Hash_tuning *tuning;
74 /* Three functions are given to `hash_initialize', see the documentation
75 block for this function. In a word, HASHER randomizes a user entry
76 into a number up from 0 up to some maximum minus 1; COMPARATOR returns
77 true if two user entries compare equally; and DATA_FREER is the cleanup
78 function for a user entry. */
80 Hash_comparator comparator;
81 Hash_data_freer data_freer;
83 /* A linked list of freed struct hash_entry structs. */
84 struct hash_entry *free_entry_list;
87 /* Whenever obstacks are used, it is possible to allocate all overflowed
88 entries into a single stack, so they all can be freed in a single
89 operation. It is not clear if the speedup is worth the trouble. */
90 struct obstack entry_stack;
94 /* A hash table contains many internal entries, each holding a pointer to
95 some user-provided data (also called a user entry). An entry indistinctly
96 refers to both the internal entry and its associated user entry. A user
97 entry contents may be hashed by a randomization function (the hashing
98 function, or just `hasher' for short) into a number (or `slot') between 0
99 and the current table size. At each slot position in the hash table,
100 starts a linked chain of entries for which the user data all hash to this
101 slot. A bucket is the collection of all entries hashing to the same slot.
103 A good `hasher' function will distribute entries rather evenly in buckets.
104 In the ideal case, the length of each bucket is roughly the number of
105 entries divided by the table size. Finding the slot for a data is usually
106 done in constant time by the `hasher', and the later finding of a precise
107 entry is linear in time with the size of the bucket. Consequently, a
108 larger hash table size (that is, a larger number of buckets) is prone to
109 yielding shorter chains, *given* the `hasher' function behaves properly.
111 Long buckets slow down the lookup algorithm. One might use big hash table
112 sizes in hope to reduce the average length of buckets, but this might
113 become inordinate, as unused slots in the hash table take some space. The
114 best bet is to make sure you are using a good `hasher' function (beware
115 that those are not that easy to write! :-), and to use a table size
116 larger than the actual number of entries. */
118 /* If an insertion makes the ratio of nonempty buckets to table size larger
119 than the growth threshold (a number between 0.0 and 1.0), then increase
120 the table size by multiplying by the growth factor (a number greater than
121 1.0). The growth threshold defaults to 0.8, and the growth factor
122 defaults to 1.414, meaning that the table will have doubled its size
123 every second time 80% of the buckets get used. */
124 #define DEFAULT_GROWTH_THRESHOLD 0.8
125 #define DEFAULT_GROWTH_FACTOR 1.414
127 /* If a deletion empties a bucket and causes the ratio of used buckets to
128 table size to become smaller than the shrink threshold (a number between
129 0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
130 number greater than the shrink threshold but smaller than 1.0). The shrink
131 threshold and factor default to 0.0 and 1.0, meaning that the table never
133 #define DEFAULT_SHRINK_THRESHOLD 0.0
134 #define DEFAULT_SHRINK_FACTOR 1.0
136 /* Use this to initialize or reset a TUNING structure to
137 some sensible values. */
138 static const Hash_tuning default_tuning =
140 DEFAULT_SHRINK_THRESHOLD,
141 DEFAULT_SHRINK_FACTOR,
142 DEFAULT_GROWTH_THRESHOLD,
143 DEFAULT_GROWTH_FACTOR,
147 /* Information and lookup. */
149 /* The following few functions provide information about the overall hash
150 table organization: the number of entries, number of buckets and maximum
151 length of buckets. */
153 /* Return the number of buckets in the hash table. The table size, the total
154 number of buckets (used plus unused), or the maximum number of slots, are
155 the same quantity. */
157 size_t _GL_ATTRIBUTE_PURE
158 hash_get_n_buckets (const Hash_table *table)
160 return table->n_buckets;
163 /* Return the number of slots in use (non-empty buckets). */
165 size_t _GL_ATTRIBUTE_PURE
166 hash_get_n_buckets_used (const Hash_table *table)
168 return table->n_buckets_used;
171 /* Return the number of active entries. */
173 size_t _GL_ATTRIBUTE_PURE
174 hash_get_n_entries (const Hash_table *table)
176 return table->n_entries;
179 /* Return the length of the longest chain (bucket). */
181 size_t _GL_ATTRIBUTE_PURE
182 hash_get_max_bucket_length (const Hash_table *table)
184 struct hash_entry const *bucket;
185 size_t max_bucket_length = 0;
187 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
191 struct hash_entry const *cursor = bucket;
192 size_t bucket_length = 1;
194 while (cursor = cursor->next, cursor)
197 if (bucket_length > max_bucket_length)
198 max_bucket_length = bucket_length;
202 return max_bucket_length;
205 /* Do a mild validation of a hash table, by traversing it and checking two
208 bool _GL_ATTRIBUTE_PURE
209 hash_table_ok (const Hash_table *table)
211 struct hash_entry const *bucket;
212 size_t n_buckets_used = 0;
213 size_t n_entries = 0;
215 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
219 struct hash_entry const *cursor = bucket;
221 /* Count bucket head. */
225 /* Count bucket overflow. */
226 while (cursor = cursor->next, cursor)
231 if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
238 hash_print_statistics (const Hash_table *table, FILE *stream)
240 size_t n_entries = hash_get_n_entries (table);
241 size_t n_buckets = hash_get_n_buckets (table);
242 size_t n_buckets_used = hash_get_n_buckets_used (table);
243 size_t max_bucket_length = hash_get_max_bucket_length (table);
245 fprintf (stream, "# entries: %lu\n", (unsigned long int) n_entries);
246 fprintf (stream, "# buckets: %lu\n", (unsigned long int) n_buckets);
247 fprintf (stream, "# buckets used: %lu (%.2f%%)\n",
248 (unsigned long int) n_buckets_used,
249 (100.0 * n_buckets_used) / n_buckets);
250 fprintf (stream, "max bucket length: %lu\n",
251 (unsigned long int) max_bucket_length);
254 /* Hash KEY and return a pointer to the selected bucket.
255 If TABLE->hasher misbehaves, abort. */
256 static struct hash_entry *
257 safe_hasher (const Hash_table *table, const void *key)
259 size_t n = table->hasher (key, table->n_buckets);
260 if (! (n < table->n_buckets))
262 return table->bucket + n;
265 /* If ENTRY matches an entry already in the hash table, return the
266 entry from the table. Otherwise, return NULL. */
269 hash_lookup (const Hash_table *table, const void *entry)
271 struct hash_entry const *bucket = safe_hasher (table, entry);
272 struct hash_entry const *cursor;
274 if (bucket->data == NULL)
277 for (cursor = bucket; cursor; cursor = cursor->next)
278 if (entry == cursor->data || table->comparator (entry, cursor->data))
286 /* The functions in this page traverse the hash table and process the
287 contained entries. For the traversal to work properly, the hash table
288 should not be resized nor modified while any particular entry is being
289 processed. In particular, entries should not be added, and an entry
290 may be removed only if there is no shrink threshold and the entry being
291 removed has already been passed to hash_get_next. */
293 /* Return the first data in the table, or NULL if the table is empty. */
295 void * _GL_ATTRIBUTE_PURE
296 hash_get_first (const Hash_table *table)
298 struct hash_entry const *bucket;
300 if (table->n_entries == 0)
303 for (bucket = table->bucket; ; bucket++)
304 if (! (bucket < table->bucket_limit))
306 else if (bucket->data)
310 /* Return the user data for the entry following ENTRY, where ENTRY has been
311 returned by a previous call to either `hash_get_first' or `hash_get_next'.
312 Return NULL if there are no more entries. */
315 hash_get_next (const Hash_table *table, const void *entry)
317 struct hash_entry const *bucket = safe_hasher (table, entry);
318 struct hash_entry const *cursor;
320 /* Find next entry in the same bucket. */
324 if (cursor->data == entry && cursor->next)
325 return cursor->next->data;
326 cursor = cursor->next;
328 while (cursor != NULL);
330 /* Find first entry in any subsequent bucket. */
331 while (++bucket < table->bucket_limit)
339 /* Fill BUFFER with pointers to active user entries in the hash table, then
340 return the number of pointers copied. Do not copy more than BUFFER_SIZE
344 hash_get_entries (const Hash_table *table, void **buffer,
348 struct hash_entry const *bucket;
349 struct hash_entry const *cursor;
351 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
355 for (cursor = bucket; cursor; cursor = cursor->next)
357 if (counter >= buffer_size)
359 buffer[counter++] = cursor->data;
367 /* Call a PROCESSOR function for each entry of a hash table, and return the
368 number of entries for which the processor function returned success. A
369 pointer to some PROCESSOR_DATA which will be made available to each call to
370 the processor function. The PROCESSOR accepts two arguments: the first is
371 the user entry being walked into, the second is the value of PROCESSOR_DATA
372 as received. The walking continue for as long as the PROCESSOR function
373 returns nonzero. When it returns zero, the walking is interrupted. */
376 hash_do_for_each (const Hash_table *table, Hash_processor processor,
377 void *processor_data)
380 struct hash_entry const *bucket;
381 struct hash_entry const *cursor;
383 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
387 for (cursor = bucket; cursor; cursor = cursor->next)
389 if (! processor (cursor->data, processor_data))
399 /* Allocation and clean-up. */
401 /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
402 This is a convenience routine for constructing other hashing functions. */
406 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
407 B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
408 Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash
409 algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
410 may not be good for your application." */
412 size_t _GL_ATTRIBUTE_PURE
413 hash_string (const char *string, size_t n_buckets)
415 # define HASH_ONE_CHAR(Value, Byte) \
416 ((Byte) + rotl_sz (Value, 7))
421 for (; (ch = *string); string++)
422 value = HASH_ONE_CHAR (value, ch);
423 return value % n_buckets;
425 # undef HASH_ONE_CHAR
428 #else /* not USE_DIFF_HASH */
430 /* This one comes from `recode', and performs a bit better than the above as
431 per a few experiments. It is inspired from a hashing routine found in the
432 very old Cyber `snoop', itself written in typical Greg Mansfield style.
433 (By the way, what happened to this excellent man? Is he still alive?) */
436 hash_string (const char *string, size_t n_buckets)
441 for (; (ch = *string); string++)
442 value = (value * 31 + ch) % n_buckets;
446 #endif /* not USE_DIFF_HASH */
448 /* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd
449 number at least equal to 11. */
451 static bool _GL_ATTRIBUTE_CONST
452 is_prime (size_t candidate)
455 size_t square = divisor * divisor;
457 while (square < candidate && (candidate % divisor))
460 square += 4 * divisor;
464 return (candidate % divisor ? true : false);
467 /* Round a given CANDIDATE number up to the nearest prime, and return that
468 prime. Primes lower than 10 are merely skipped. */
470 static size_t _GL_ATTRIBUTE_CONST
471 next_prime (size_t candidate)
473 /* Skip small primes. */
477 /* Make it definitely odd. */
480 while (SIZE_MAX != candidate && !is_prime (candidate))
487 hash_reset_tuning (Hash_tuning *tuning)
489 *tuning = default_tuning;
492 /* If the user passes a NULL hasher, we hash the raw pointer. */
494 raw_hasher (const void *data, size_t n)
496 /* When hashing unique pointers, it is often the case that they were
497 generated by malloc and thus have the property that the low-order
498 bits are 0. As this tends to give poorer performance with small
499 tables, we rotate the pointer value before performing division,
500 in an attempt to improve hash quality. */
501 size_t val = rotr_sz ((size_t) data, 3);
505 /* If the user passes a NULL comparator, we use pointer comparison. */
507 raw_comparator (const void *a, const void *b)
513 /* For the given hash TABLE, check the user supplied tuning structure for
514 reasonable values, and return true if there is no gross error with it.
515 Otherwise, definitively reset the TUNING field to some acceptable default
516 in the hash table (that is, the user loses the right of further modifying
517 tuning arguments), and return false. */
520 check_tuning (Hash_table *table)
522 const Hash_tuning *tuning = table->tuning;
524 if (tuning == &default_tuning)
527 /* Be a bit stricter than mathematics would require, so that
528 rounding errors in size calculations do not cause allocations to
529 fail to grow or shrink as they should. The smallest allocation
530 is 11 (due to next_prime's algorithm), so an epsilon of 0.1
531 should be good enough. */
534 if (epsilon < tuning->growth_threshold
535 && tuning->growth_threshold < 1 - epsilon
536 && 1 + epsilon < tuning->growth_factor
537 && 0 <= tuning->shrink_threshold
538 && tuning->shrink_threshold + epsilon < tuning->shrink_factor
539 && tuning->shrink_factor <= 1
540 && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
543 table->tuning = &default_tuning;
547 /* Compute the size of the bucket array for the given CANDIDATE and
548 TUNING, or return 0 if there is no possible way to allocate that
551 static size_t _GL_ATTRIBUTE_PURE
552 compute_bucket_size (size_t candidate, const Hash_tuning *tuning)
554 if (!tuning->is_n_buckets)
556 float new_candidate = candidate / tuning->growth_threshold;
557 if (SIZE_MAX <= new_candidate)
559 candidate = new_candidate;
561 candidate = next_prime (candidate);
562 if (xalloc_oversized (candidate, sizeof (struct hash_entry *)))
567 /* Allocate and return a new hash table, or NULL upon failure. The initial
568 number of buckets is automatically selected so as to _guarantee_ that you
569 may insert at least CANDIDATE different user entries before any growth of
570 the hash table size occurs. So, if have a reasonably tight a-priori upper
571 bound on the number of entries you intend to insert in the hash table, you
572 may save some table memory and insertion time, by specifying it here. If
573 the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
574 argument has its meaning changed to the wanted number of buckets.
576 TUNING points to a structure of user-supplied values, in case some fine
577 tuning is wanted over the default behavior of the hasher. If TUNING is
578 NULL, the default tuning parameters are used instead. If TUNING is
579 provided but the values requested are out of bounds or might cause
580 rounding errors, return NULL.
582 The user-supplied HASHER function, when not NULL, accepts two
583 arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a
584 slot number for that entry which should be in the range 0..TABLE_SIZE-1.
585 This slot number is then returned.
587 The user-supplied COMPARATOR function, when not NULL, accepts two
588 arguments pointing to user data, it then returns true for a pair of entries
589 that compare equal, or false otherwise. This function is internally called
590 on entries which are already known to hash to the same bucket index,
591 but which are distinct pointers.
593 The user-supplied DATA_FREER function, when not NULL, may be later called
594 with the user data as an argument, just before the entry containing the
595 data gets freed. This happens from within `hash_free' or `hash_clear'.
596 You should specify this function only if you want these functions to free
597 all of your `data' data. This is typically the case when your data is
598 simply an auxiliary struct that you have malloc'd to aggregate several
602 hash_initialize (size_t candidate, const Hash_tuning *tuning,
603 Hash_hasher hasher, Hash_comparator comparator,
604 Hash_data_freer data_freer)
610 if (comparator == NULL)
611 comparator = raw_comparator;
613 table = malloc (sizeof *table);
618 tuning = &default_tuning;
619 table->tuning = tuning;
620 if (!check_tuning (table))
622 /* Fail if the tuning options are invalid. This is the only occasion
623 when the user gets some feedback about it. Once the table is created,
624 if the user provides invalid tuning options, we silently revert to
625 using the defaults, and ignore further request to change the tuning
630 table->n_buckets = compute_bucket_size (candidate, tuning);
631 if (!table->n_buckets)
634 table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
635 if (table->bucket == NULL)
637 table->bucket_limit = table->bucket + table->n_buckets;
638 table->n_buckets_used = 0;
639 table->n_entries = 0;
641 table->hasher = hasher;
642 table->comparator = comparator;
643 table->data_freer = data_freer;
645 table->free_entry_list = NULL;
647 obstack_init (&table->entry_stack);
656 /* Make all buckets empty, placing any chained entries on the free list.
657 Apply the user-specified function data_freer (if any) to the datas of any
661 hash_clear (Hash_table *table)
663 struct hash_entry *bucket;
665 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
669 struct hash_entry *cursor;
670 struct hash_entry *next;
672 /* Free the bucket overflow. */
673 for (cursor = bucket->next; cursor; cursor = next)
675 if (table->data_freer)
676 table->data_freer (cursor->data);
680 /* Relinking is done one entry at a time, as it is to be expected
681 that overflows are either rare or short. */
682 cursor->next = table->free_entry_list;
683 table->free_entry_list = cursor;
686 /* Free the bucket head. */
687 if (table->data_freer)
688 table->data_freer (bucket->data);
694 table->n_buckets_used = 0;
695 table->n_entries = 0;
698 /* Reclaim all storage associated with a hash table. If a data_freer
699 function has been supplied by the user when the hash table was created,
700 this function applies it to the data of each entry before freeing that
704 hash_free (Hash_table *table)
706 struct hash_entry *bucket;
707 struct hash_entry *cursor;
708 struct hash_entry *next;
710 /* Call the user data_freer function. */
711 if (table->data_freer && table->n_entries)
713 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
717 for (cursor = bucket; cursor; cursor = cursor->next)
718 table->data_freer (cursor->data);
725 obstack_free (&table->entry_stack, NULL);
729 /* Free all bucket overflowed entries. */
730 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
732 for (cursor = bucket->next; cursor; cursor = next)
739 /* Also reclaim the internal list of previously freed entries. */
740 for (cursor = table->free_entry_list; cursor; cursor = next)
748 /* Free the remainder of the hash table structure. */
749 free (table->bucket);
753 /* Insertion and deletion. */
755 /* Get a new hash entry for a bucket overflow, possibly by recycling a
756 previously freed one. If this is not possible, allocate a new one. */
758 static struct hash_entry *
759 allocate_entry (Hash_table *table)
761 struct hash_entry *new;
763 if (table->free_entry_list)
765 new = table->free_entry_list;
766 table->free_entry_list = new->next;
771 new = obstack_alloc (&table->entry_stack, sizeof *new);
773 new = malloc (sizeof *new);
780 /* Free a hash entry which was part of some bucket overflow,
781 saving it for later recycling. */
784 free_entry (Hash_table *table, struct hash_entry *entry)
787 entry->next = table->free_entry_list;
788 table->free_entry_list = entry;
791 /* This private function is used to help with insertion and deletion. When
792 ENTRY matches an entry in the table, return a pointer to the corresponding
793 user data and set *BUCKET_HEAD to the head of the selected bucket.
794 Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in
795 the table, unlink the matching entry. */
798 hash_find_entry (Hash_table *table, const void *entry,
799 struct hash_entry **bucket_head, bool delete)
801 struct hash_entry *bucket = safe_hasher (table, entry);
802 struct hash_entry *cursor;
804 *bucket_head = bucket;
806 /* Test for empty bucket. */
807 if (bucket->data == NULL)
810 /* See if the entry is the first in the bucket. */
811 if (entry == bucket->data || table->comparator (entry, bucket->data))
813 void *data = bucket->data;
819 struct hash_entry *next = bucket->next;
821 /* Bump the first overflow entry into the bucket head, then save
822 the previous first overflow entry for later recycling. */
824 free_entry (table, next);
835 /* Scan the bucket overflow. */
836 for (cursor = bucket; cursor->next; cursor = cursor->next)
838 if (entry == cursor->next->data
839 || table->comparator (entry, cursor->next->data))
841 void *data = cursor->next->data;
845 struct hash_entry *next = cursor->next;
847 /* Unlink the entry to delete, then save the freed entry for later
849 cursor->next = next->next;
850 free_entry (table, next);
857 /* No entry found. */
861 /* Internal helper, to move entries from SRC to DST. Both tables must
862 share the same free entry list. If SAFE, only move overflow
863 entries, saving bucket heads for later, so that no allocations will
864 occur. Return false if the free entry list is exhausted and an
868 transfer_entries (Hash_table *dst, Hash_table *src, bool safe)
870 struct hash_entry *bucket;
871 struct hash_entry *cursor;
872 struct hash_entry *next;
873 for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
877 struct hash_entry *new_bucket;
879 /* Within each bucket, transfer overflow entries first and
880 then the bucket head, to minimize memory pressure. After
881 all, the only time we might allocate is when moving the
882 bucket head, but moving overflow entries first may create
883 free entries that can be recycled by the time we finally
884 get to the bucket head. */
885 for (cursor = bucket->next; cursor; cursor = next)
888 new_bucket = safe_hasher (dst, data);
892 if (new_bucket->data)
894 /* Merely relink an existing entry, when moving from a
895 bucket overflow into a bucket overflow. */
896 cursor->next = new_bucket->next;
897 new_bucket->next = cursor;
901 /* Free an existing entry, when moving from a bucket
902 overflow into a bucket header. */
903 new_bucket->data = data;
904 dst->n_buckets_used++;
905 free_entry (dst, cursor);
908 /* Now move the bucket head. Be sure that if we fail due to
909 allocation failure that the src table is in a consistent
915 new_bucket = safe_hasher (dst, data);
917 if (new_bucket->data)
919 /* Allocate or recycle an entry, when moving from a bucket
920 header into a bucket overflow. */
921 struct hash_entry *new_entry = allocate_entry (dst);
923 if (new_entry == NULL)
926 new_entry->data = data;
927 new_entry->next = new_bucket->next;
928 new_bucket->next = new_entry;
932 /* Move from one bucket header to another. */
933 new_bucket->data = data;
934 dst->n_buckets_used++;
937 src->n_buckets_used--;
942 /* For an already existing hash table, change the number of buckets through
943 specifying CANDIDATE. The contents of the hash table are preserved. The
944 new number of buckets is automatically selected so as to _guarantee_ that
945 the table may receive at least CANDIDATE different user entries, including
946 those already in the table, before any other growth of the hash table size
947 occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
948 exact number of buckets desired. Return true iff the rehash succeeded. */
951 hash_rehash (Hash_table *table, size_t candidate)
954 Hash_table *new_table;
955 size_t new_size = compute_bucket_size (candidate, table->tuning);
959 if (new_size == table->n_buckets)
961 new_table = &storage;
962 new_table->bucket = calloc (new_size, sizeof *new_table->bucket);
963 if (new_table->bucket == NULL)
965 new_table->n_buckets = new_size;
966 new_table->bucket_limit = new_table->bucket + new_size;
967 new_table->n_buckets_used = 0;
968 new_table->n_entries = 0;
969 new_table->tuning = table->tuning;
970 new_table->hasher = table->hasher;
971 new_table->comparator = table->comparator;
972 new_table->data_freer = table->data_freer;
974 /* In order for the transfer to successfully complete, we need
975 additional overflow entries when distinct buckets in the old
976 table collide into a common bucket in the new table. The worst
977 case possible is a hasher that gives a good spread with the old
978 size, but returns a constant with the new size; if we were to
979 guarantee table->n_buckets_used-1 free entries in advance, then
980 the transfer would be guaranteed to not allocate memory.
981 However, for large tables, a guarantee of no further allocation
982 introduces a lot of extra memory pressure, all for an unlikely
983 corner case (most rehashes reduce, rather than increase, the
984 number of overflow entries needed). So, we instead ensure that
985 the transfer process can be reversed if we hit a memory
986 allocation failure mid-transfer. */
988 /* Merely reuse the extra old space into the new table. */
990 new_table->entry_stack = table->entry_stack;
992 new_table->free_entry_list = table->free_entry_list;
994 if (transfer_entries (new_table, table, false))
996 /* Entries transferred successfully; tie up the loose ends. */
997 free (table->bucket);
998 table->bucket = new_table->bucket;
999 table->bucket_limit = new_table->bucket_limit;
1000 table->n_buckets = new_table->n_buckets;
1001 table->n_buckets_used = new_table->n_buckets_used;
1002 table->free_entry_list = new_table->free_entry_list;
1003 /* table->n_entries and table->entry_stack already hold their value. */
1007 /* We've allocated new_table->bucket (and possibly some entries),
1008 exhausted the free list, and moved some but not all entries into
1009 new_table. We must undo the partial move before returning
1010 failure. The only way to get into this situation is if new_table
1011 uses fewer buckets than the old table, so we will reclaim some
1012 free entries as overflows in the new table are put back into
1013 distinct buckets in the old table.
1015 There are some pathological cases where a single pass through the
1016 table requires more intermediate overflow entries than using two
1017 passes. Two passes give worse cache performance and takes
1018 longer, but at this point, we're already out of memory, so slow
1019 and safe is better than failure. */
1020 table->free_entry_list = new_table->free_entry_list;
1021 if (! (transfer_entries (table, new_table, true)
1022 && transfer_entries (table, new_table, false)))
1024 /* table->n_entries already holds its value. */
1025 free (new_table->bucket);
1029 /* Return -1 upon memory allocation failure.
1030 Return 1 if insertion succeeded.
1031 Return 0 if there is already a matching entry in the table,
1032 and in that case, if MATCHED_ENT is non-NULL, set *MATCHED_ENT
1035 This interface is easier to use than hash_insert when you must
1036 distinguish between the latter two cases. More importantly,
1037 hash_insert is unusable for some types of ENTRY values. When using
1038 hash_insert, the only way to distinguish those cases is to compare
1039 the return value and ENTRY. That works only when you can have two
1040 different ENTRY values that point to data that compares "equal". Thus,
1041 when the ENTRY value is a simple scalar, you must use hash_insert0.
1042 ENTRY must not be NULL. */
1044 hash_insert0 (Hash_table *table, void const *entry, void const **matched_ent)
1047 struct hash_entry *bucket;
1049 /* The caller cannot insert a NULL entry, since hash_lookup returns NULL
1050 to indicate "not found", and hash_find_entry uses "bucket->data == NULL"
1051 to indicate an empty bucket. */
1055 /* If there's a matching entry already in the table, return that. */
1056 if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
1059 *matched_ent = data;
1063 /* If the growth threshold of the buckets in use has been reached, increase
1064 the table size and rehash. There's no point in checking the number of
1065 entries: if the hashing function is ill-conditioned, rehashing is not
1066 likely to improve it. */
1068 if (table->n_buckets_used
1069 > table->tuning->growth_threshold * table->n_buckets)
1071 /* Check more fully, before starting real work. If tuning arguments
1072 became invalid, the second check will rely on proper defaults. */
1073 check_tuning (table);
1074 if (table->n_buckets_used
1075 > table->tuning->growth_threshold * table->n_buckets)
1077 const Hash_tuning *tuning = table->tuning;
1079 (tuning->is_n_buckets
1080 ? (table->n_buckets * tuning->growth_factor)
1081 : (table->n_buckets * tuning->growth_factor
1082 * tuning->growth_threshold));
1084 if (SIZE_MAX <= candidate)
1087 /* If the rehash fails, arrange to return NULL. */
1088 if (!hash_rehash (table, candidate))
1091 /* Update the bucket we are interested in. */
1092 if (hash_find_entry (table, entry, &bucket, false) != NULL)
1097 /* ENTRY is not matched, it should be inserted. */
1101 struct hash_entry *new_entry = allocate_entry (table);
1103 if (new_entry == NULL)
1106 /* Add ENTRY in the overflow of the bucket. */
1108 new_entry->data = (void *) entry;
1109 new_entry->next = bucket->next;
1110 bucket->next = new_entry;
1115 /* Add ENTRY right in the bucket head. */
1117 bucket->data = (void *) entry;
1119 table->n_buckets_used++;
1124 /* If ENTRY matches an entry already in the hash table, return the pointer
1125 to the entry from the table. Otherwise, insert ENTRY and return ENTRY.
1126 Return NULL if the storage required for insertion cannot be allocated.
1127 This implementation does not support duplicate entries or insertion of
1131 hash_insert (Hash_table *table, void const *entry)
1133 void const *matched_ent;
1134 int err = hash_insert0 (table, entry, &matched_ent);
1137 : (void *) (err == 0 ? matched_ent : entry));
1140 /* If ENTRY is already in the table, remove it and return the just-deleted
1141 data (the user may want to deallocate its storage). If ENTRY is not in the
1142 table, don't modify the table and return NULL. */
1145 hash_delete (Hash_table *table, const void *entry)
1148 struct hash_entry *bucket;
1150 data = hash_find_entry (table, entry, &bucket, true);
1157 table->n_buckets_used--;
1159 /* If the shrink threshold of the buckets in use has been reached,
1160 rehash into a smaller table. */
1162 if (table->n_buckets_used
1163 < table->tuning->shrink_threshold * table->n_buckets)
1165 /* Check more fully, before starting real work. If tuning arguments
1166 became invalid, the second check will rely on proper defaults. */
1167 check_tuning (table);
1168 if (table->n_buckets_used
1169 < table->tuning->shrink_threshold * table->n_buckets)
1171 const Hash_tuning *tuning = table->tuning;
1173 (tuning->is_n_buckets
1174 ? table->n_buckets * tuning->shrink_factor
1175 : (table->n_buckets * tuning->shrink_factor
1176 * tuning->growth_threshold));
1178 if (!hash_rehash (table, candidate))
1180 /* Failure to allocate memory in an attempt to
1181 shrink the table is not fatal. But since memory
1182 is low, we can at least be kind and free any
1183 spare entries, rather than keeping them tied up
1184 in the free entry list. */
1186 struct hash_entry *cursor = table->free_entry_list;
1187 struct hash_entry *next;
1190 next = cursor->next;
1194 table->free_entry_list = NULL;
1209 hash_print (const Hash_table *table)
1211 struct hash_entry *bucket = (struct hash_entry *) table->bucket;
1213 for ( ; bucket < table->bucket_limit; bucket++)
1215 struct hash_entry *cursor;
1218 printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1220 for (cursor = bucket; cursor; cursor = cursor->next)
1222 char const *s = cursor->data;
1225 printf (" %s\n", s);
1230 #endif /* TESTING */