3 - a certain amount of library support, at least <stdlib.h>
4 - C ints are at least 32-bits long
8 - add a sample do_all function for listing the hash table.
18 /* This macro assumes that there is an HT with an initialized
19 HT_OBSTACK in scope. */
20 # define ZALLOC(n) obstack_alloc (&(ht->ht_obstack), (n))
22 # define ZALLOC(n) malloc ((n))
25 #define BUCKET_HEAD(ht, idx) ((ht)->hash_table[(idx)])
27 static void hash_free_0 (HT *, int);
31 unsigned long candidate;
33 /* No even number and none less than 10 will be passed here. */
34 unsigned long divn = 3;
35 unsigned long sq = divn * divn;
37 while (sq < candidate && (candidate % divn))
44 return (candidate % divn);
47 /* Round a given number up to the nearest prime. */
50 next_prime (candidate)
51 unsigned long candidate;
53 /* Make it definitely odd. */
56 while (!is_prime (candidate))
63 hash_free_entry (HT *ht, HASH_ENT *e)
66 e->next = ht->hash_free_entry_list;
67 ht->hash_free_entry_list = e;
71 hash_allocate_entry (HT *ht)
74 if (ht->hash_free_entry_list)
76 new = ht->hash_free_entry_list;
77 ht->hash_free_entry_list = new->next;
81 new = (HASH_ENT *) ZALLOC (sizeof (HASH_ENT));
87 hash_get_n_slots_used (const HT *ht)
89 return ht->hash_n_slots_used;
95 hash_rehash (HT *ht, unsigned int new_table_size)
100 if (ht->hash_table_size <= 0 || new_table_size == 0)
103 ht_new = hash_initialize (new_table_size, ht->hash_key_freer,
104 ht->hash_hash, ht->hash_key_comparator);
109 for (i = 0; i < ht->hash_table_size; i++)
111 HASH_ENT *p = BUCKET_HEAD (ht, i);
112 for ( /* empty */ ; p; p = p->next)
115 const void *already_in_table;
116 already_in_table = hash_insert_if_absent (ht_new, p->key, &failed);
117 assert (failed == 0 && already_in_table == 0);
124 assert (hash_table_ok (ht_new));
129 /* FIXME: fill in ht_new->n_slots_used and other statistics fields. */
137 hash_get_max_chain_length (HT *ht)
140 unsigned int max_chain_length = 0;
142 if (!ht->hash_dirty_max_chain_length)
143 return ht->hash_max_chain_length;
145 for (i = 0; i < ht->hash_table_size; i++)
147 unsigned int chain_length = 0;
148 HASH_ENT *p = BUCKET_HEAD (ht, i);
149 for ( /* empty */ ; p; p = p->next)
151 if (chain_length > max_chain_length)
152 max_chain_length = chain_length;
155 ht->hash_max_chain_length = max_chain_length;
156 ht->hash_dirty_max_chain_length = 0;
157 return ht->hash_max_chain_length;
161 hash_get_n_keys (const HT *ht)
163 return ht->hash_n_keys;
167 hash_get_table_size (const HT *ht)
169 return ht->hash_table_size;
172 /* CANDIDATE_TABLE_SIZE need not be prime. If WHEN_TO_REHASH (FIXME: add
173 this parameter) is positive, when that percentage of table entries have
174 been used, the table size is increased; then a new, larger table
175 (GROW_FACTOR (FIXME: maybe add this parameter) times larger than the previous
176 size) is allocated and all entries in the old table are rehashed into
177 the new, larger one. The old table is freed. If WHEN_TO_REHASH is zero
178 or negative, the table is never resized.
180 The function returns non-zero
181 - if CANDIDATE_TABLE_SIZE is zero or negative
182 - if KEY_COMPARATOR or HASH is null
183 - if it was unable to allocate sufficient storage for the hash table
184 - if WHEN_TO_REHASH is zero or negative
185 Otherwise it returns zero. */
188 hash_initialize (unsigned int candidate_table_size,
189 Hash_key_freer_type key_freer,
190 unsigned int (*hash) (const void *, unsigned int),
191 int (*key_comparator) (const void *, const void *))
195 unsigned int table_size;
197 if (candidate_table_size <= 0)
200 if (hash == NULL || key_comparator == NULL)
203 ht = (HT *) malloc (sizeof (HT));
207 table_size = next_prime (candidate_table_size);
208 ht->hash_table = (HASH_ENT **) malloc (table_size * sizeof (HASH_ENT *));
209 if (ht->hash_table == NULL)
212 for (i = 0; i < table_size; i++)
214 BUCKET_HEAD (ht, i) = NULL;
217 ht->hash_free_entry_list = NULL;
218 ht->hash_table_size = table_size;
219 ht->hash_hash = hash;
220 ht->hash_key_comparator = key_comparator;
221 ht->hash_key_freer = key_freer;
222 ht->hash_n_slots_used = 0;
223 ht->hash_max_chain_length = 0;
225 ht->hash_dirty_max_chain_length = 0;
227 obstack_init (&(ht->ht_obstack));
233 /* This private function is used to help with insertion and deletion.
234 If E does *not* compare equal to the key of any entry in the table,
236 When E matches an entry in the table, return a pointer to the matching
237 entry. When DELETE is non-zero and E matches an entry in the table,
238 unlink the matching entry. Set *CHAIN_LENGTH to the number of keys
239 that have hashed to the bucket E hashed to. */
242 hash_find_entry (HT *ht, const void *e, unsigned int *table_idx,
243 unsigned int *chain_length, int delete)
249 idx = ht->hash_hash (e, ht->hash_table_size);
250 assert (idx < ht->hash_table_size);
255 prev = ht->hash_table[idx];
261 if (ht->hash_key_comparator (e, prev->key) == 0)
264 ht->hash_table[idx] = prev->next;
273 if (ht->hash_key_comparator (e, p->key) == 0)
287 prev->next = p->next;
292 /* Return non-zero if E is already in the table, zero otherwise. */
295 hash_query_in_table (const HT *ht, const void *e)
300 idx = ht->hash_hash (e, ht->hash_table_size);
301 assert (idx < ht->hash_table_size);
302 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
303 if (ht->hash_key_comparator (e, p->key) == 0)
309 hash_lookup (const HT *ht, const void *e)
314 idx = ht->hash_hash (e, ht->hash_table_size);
315 assert (idx < ht->hash_table_size);
316 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
317 if (ht->hash_key_comparator (e, p->key) == 0)
322 /* If E matches an entry already in the hash table, don't modify the
323 table and return a pointer to the matched entry. If E does not
324 match any item in the table, insert E and return NULL.
325 If the storage required for insertion cannot be allocated
326 set *FAILED to non-zero and return NULL. */
329 hash_insert_if_absent (HT *ht, const void *e, int *failed)
334 unsigned int chain_length;
336 assert (e != NULL); /* Can't insert a NULL key. */
339 ent = hash_find_entry (ht, e, &idx, &chain_length, 0);
342 /* E matches a key from an entry already in the table. */
346 new = hash_allocate_entry (ht);
353 new->key = (void *) e;
354 new->next = BUCKET_HEAD (ht, idx);
355 BUCKET_HEAD (ht, idx) = new;
357 if (chain_length == 0)
358 ++(ht->hash_n_slots_used);
360 /* The insertion has just increased chain_length by 1. */
363 if (chain_length > ht->hash_max_chain_length)
364 ht->hash_max_chain_length = chain_length;
367 if ((double) ht->hash_n_keys / ht->hash_table_size > 0.80)
369 unsigned int new_size;
370 new_size = next_prime (2 * ht->hash_table_size + 1);
371 *failed = hash_rehash (ht, new_size);
375 assert (hash_table_ok (ht));
381 /* If E is already in the table, remove it and return a pointer to
382 the just-deleted key (the user may want to deallocate its storage).
383 If E is not in the table, don't modify the table and return NULL. */
386 hash_delete_if_present (HT *ht, const void *e)
391 unsigned int chain_length;
393 ent = hash_find_entry (ht, e, &idx, &chain_length, 1);
397 if (ent->next == NULL && chain_length == 1)
398 --(ht->hash_n_slots_used);
403 ht->hash_dirty_max_chain_length = 1;
404 if (ent->next == NULL && chain_length < ht->hash_max_chain_length)
405 ht->hash_dirty_max_chain_length = 0;
407 hash_free_entry (ht, ent);
410 assert (hash_table_ok (ht));
416 hash_print_statistics (const HT *ht, FILE *stream)
418 unsigned int n_slots_used;
420 unsigned int max_chain_length;
423 err = hash_get_statistics (ht, &n_slots_used, &n_keys, &max_chain_length);
425 fprintf (stream, "table size: %d\n", ht->hash_table_size);
426 fprintf (stream, "# slots used: %u (%.2f%%)\n", n_slots_used,
427 (100.0 * n_slots_used) / ht->hash_table_size);
428 fprintf (stream, "# keys: %u\n", n_keys);
429 fprintf (stream, "max chain length: %u\n", max_chain_length);
432 /* If there is *NO* table (so, no meaningful stats) return non-zero
433 and don't reference the argument pointers. Otherwise compute the
434 performance statistics and return non-zero. */
437 hash_get_statistics (const HT *ht,
438 unsigned int *n_slots_used,
439 unsigned int *n_keys,
440 unsigned int *max_chain_length)
444 if (ht == NULL || ht->hash_table == NULL)
447 *max_chain_length = 0;
451 for (i = 0; i < ht->hash_table_size; i++)
453 unsigned int chain_length = 0;
456 p = BUCKET_HEAD (ht, i);
460 for (; p; p = p->next)
463 *n_keys += chain_length;
464 if (chain_length > *max_chain_length)
465 *max_chain_length = chain_length;
471 hash_table_ok (HT *ht)
474 unsigned int n_slots_used;
476 unsigned int max_chain_length;
478 if (ht == NULL || ht->hash_table == NULL)
481 code = hash_get_statistics (ht, &n_slots_used, &n_keys,
485 || n_slots_used != ht->hash_n_slots_used
486 || n_keys != ht->hash_n_keys
487 || max_chain_length != hash_get_max_chain_length (ht))
493 /* See hash_do_for_each_2 (below) for a variant. */
496 hash_do_for_each (HT *ht, void (*f) (void *e, void *aux), void *aux)
501 assert (hash_table_ok (ht));
504 if (ht->hash_table == NULL)
507 for (i = 0; i < ht->hash_table_size; i++)
510 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
517 /* Just like hash_do_for_each, except that function F returns an int
518 that can signal (when non-zero) we should return early. */
521 hash_do_for_each_2 (HT *ht, int (*f) (void *e, void *aux), void *aux)
526 assert (hash_table_ok (ht));
529 if (ht->hash_table == NULL)
532 for (i = 0; i < ht->hash_table_size; i++)
535 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
539 return_code = (*f) (p->key, aux);
540 if (return_code != 0)
547 /* For each entry in the bucket addressed by BUCKET_KEY of the hash
548 table HT, invoke the function F. If F returns non-zero, stop
549 iterating and return that value. Otherwise, apply F to all entries
550 in the selected bucket and return zero. The AUX argument to this
551 function is passed as the last argument in each invocation of F.
552 The first argument to F is BUCKET_KEY, and the second is the key of
553 an entry in the selected bucket. */
556 hash_do_for_each_in_selected_bucket (HT *ht, const void *bucket_key,
557 int (*f) (const void *bucket_key,
565 assert (hash_table_ok (ht));
568 if (ht->hash_table == NULL)
571 idx = ht->hash_hash (bucket_key, ht->hash_table_size);
573 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
577 return_code = (*f) (bucket_key, p->key, aux);
578 if (return_code != 0)
585 /* Make all buckets empty, placing any chained entries on the free list.
586 As with hash_free, apply the user-specified function key_freer
587 (if it's not NULL) to the keys of any affected entries. */
595 for (i = 0; i < ht->hash_table_size; i++)
597 HASH_ENT *tail = NULL;
598 HASH_ENT *head = BUCKET_HEAD (ht, i);
600 /* Free any keys and get tail pointer to last entry in chain. */
601 for (p = head; p; p = p->next)
603 if (ht->hash_key_freer != NULL)
604 ht->hash_key_freer (p->key);
605 p->key = NULL; /* Make sure no one tries to use this key later. */
608 BUCKET_HEAD (ht, i) = NULL;
610 /* If there's a chain in this bucket, tack it onto the
611 beginning of the free list. */
614 assert (tail != NULL && tail->next == NULL);
615 tail->next = ht->hash_free_entry_list;
616 ht->hash_free_entry_list = head;
619 ht->hash_n_slots_used = 0;
620 ht->hash_max_chain_length = 0;
622 ht->hash_dirty_max_chain_length = 0;
625 /* Free all storage associated with HT that functions in this package
626 have allocated. If a key_freer function has been supplied (when HT
627 was created), this function applies it to the key of each entry before
628 freeing that entry. */
631 hash_free_0 (HT *ht, int free_user_data)
633 if (free_user_data && ht->hash_key_freer != NULL)
637 for (i = 0; i < ht->hash_table_size; i++)
642 for (p = BUCKET_HEAD (ht, i); p; p = next)
645 ht->hash_key_freer (p->key);
651 obstack_free (&(ht->ht_obstack), NULL);
655 for (i = 0; i < ht->hash_table_size; i++)
660 for (p = BUCKET_HEAD (ht, i); p; p = next)
668 ht->hash_free_entry_list = NULL;
669 free (ht->hash_table);
682 hash_print (const HT *ht)
686 for (i = 0; i < ht->hash_table_size; i++)
690 if (BUCKET_HEAD (ht, i) != NULL)
693 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
695 char *s = (char *) p->key;
705 hash_get_key_list (const HT *ht, unsigned int bufsize, void **buf)
710 for (i = 0; i < ht->hash_table_size; i++)
714 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
723 /* Return the first key in the table. If the table is empty, return NULL. */
726 hash_get_first (const HT *ht)
731 if (ht->hash_n_keys == 0)
734 for (idx = 0; idx < ht->hash_table_size; idx++)
736 if ((p = BUCKET_HEAD (ht, idx)) != NULL)
742 /* Return the key in the entry following the entry whose key matches E.
743 If there is the only one key in the table and that key matches E,
744 return the matching key. If E is not in the table, return NULL. */
747 hash_get_next (const HT *ht, const void *e)
752 idx = ht->hash_hash (e, ht->hash_table_size);
753 assert (idx < ht->hash_table_size);
754 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
756 if (ht->hash_key_comparator (e, p->key) == 0)
766 /* E is the last or only key in the bucket chain. */
767 if (ht->hash_n_keys == 1)
769 /* There is only one key in the table, and it matches E. */
775 idx = (idx + 1) % ht->hash_table_size;
776 if ((p = BUCKET_HEAD (ht, idx)) != NULL)
779 while (idx != bucket);
784 /* E is not in the table. */