3 - a certain amount of library support, at least <stdlib.h>
4 - C ints are at least 32-bits long
8 - add a sample do_all function for listing the hash table.
15 #include "near-prime.h"
19 /* This macro assumes that there is an HT with an initialized
20 HT_OBSTACK in scope. */
21 # define ZALLOC(n) obstack_alloc (&(ht->ht_obstack), (n))
23 # define ZALLOC(n) malloc ((n))
26 #define BUCKET_HEAD(ht, idx) ((ht)->hash_table[(idx)])
28 static void hash_free_0 (HT *, int);
31 hash_free_entry (HT *ht, HASH_ENT *e)
34 e->next = ht->hash_free_entry_list;
35 ht->hash_free_entry_list = e;
39 hash_allocate_entry (HT *ht)
42 if (ht->hash_free_entry_list)
44 new = ht->hash_free_entry_list;
45 ht->hash_free_entry_list = new->next;
49 new = (HASH_ENT *) ZALLOC (sizeof (HASH_ENT));
55 hash_get_n_slots_used (const HT *ht)
57 return ht->hash_n_slots_used;
63 hash_rehash (HT *ht, unsigned int new_table_size)
68 if (ht->hash_table_size <= 0 || new_table_size == 0)
71 ht_new = hash_initialize (new_table_size, ht->hash_key_freer,
72 ht->hash_hash, ht->hash_key_comparator);
77 for (i = 0; i < ht->hash_table_size; i++)
79 HASH_ENT *p = BUCKET_HEAD (ht, i);
80 for ( /* empty */ ; p; p = p->next)
83 const void *already_in_table;
84 already_in_table = hash_insert_if_absent (ht_new, p->key, &failed);
85 assert (failed == 0 && already_in_table == 0);
92 assert (hash_table_ok (ht_new));
97 /* FIXME: fill in ht_new->n_slots_used and other statistics fields. */
105 hash_get_max_chain_length (HT *ht)
108 unsigned int max_chain_length = 0;
110 if (!ht->hash_dirty_max_chain_length)
111 return ht->hash_max_chain_length;
113 for (i = 0; i < ht->hash_table_size; i++)
115 unsigned int chain_length = 0;
116 HASH_ENT *p = BUCKET_HEAD (ht, i);
117 for ( /* empty */ ; p; p = p->next)
119 if (chain_length > max_chain_length)
120 max_chain_length = chain_length;
123 ht->hash_max_chain_length = max_chain_length;
124 ht->hash_dirty_max_chain_length = 0;
125 return ht->hash_max_chain_length;
129 hash_get_n_keys (const HT *ht)
131 return ht->hash_n_keys;
135 hash_get_table_size (const HT *ht)
137 return ht->hash_table_size;
140 /* TABLE_SIZE should be prime. If WHEN_TO_REHASH is positive, when
141 that percentage of table entries have been used, the table is
142 deemed too small; then a new, larger table (GROW_FACTOR times
143 larger than the previous size) is allocated and all entries in
144 the old table are rehashed into the new, larger one. The old
145 table is freed. If WHEN_TO_REHASH is zero or negative, the
146 table is never resized.
148 The function returns non-zero
149 - if TABLE_SIZE is zero or negative
150 - if EQUALITY_TESTER or HASH is null
151 - if it was unable to allocate sufficient storage for the hash table
152 - if WHEN_TO_REHASH is zero or negative
153 Otherwise it returns zero.
155 FIXME: tell what happens to any existing hash table when this
156 function is called (e.g. a second time). */
159 hash_initialize (unsigned int table_size,
160 Hash_key_freer_type key_freer,
161 unsigned int (*hash) (const void *, unsigned int),
162 int (*key_comparator) (const void *, const void *))
170 if (hash == NULL || key_comparator == NULL)
173 ht = (HT *) malloc (sizeof (HT));
177 ht->hash_table = (HASH_ENT **) malloc (table_size * sizeof (HASH_ENT *));
178 if (ht->hash_table == NULL)
181 for (i = 0; i < table_size; i++)
183 BUCKET_HEAD (ht, i) = NULL;
186 ht->hash_free_entry_list = NULL;
187 ht->hash_table_size = table_size;
188 ht->hash_hash = hash;
189 ht->hash_key_comparator = key_comparator;
190 ht->hash_key_freer = key_freer;
191 ht->hash_n_slots_used = 0;
192 ht->hash_max_chain_length = 0;
194 ht->hash_dirty_max_chain_length = 0;
196 obstack_init (&(ht->ht_obstack));
202 /* This private function is used to help with insertion and deletion.
203 If E does *not* compare equal to the key of any entry in the table,
205 When E matches an entry in the table, return a pointer to the matching
206 entry. When DELETE is non-zero and E matches an entry in the table,
207 unlink the matching entry. Set *CHAIN_LENGTH to the number of keys
208 that have hashed to the bucket E hashed to. */
211 hash_find_entry (HT *ht, const void *e, unsigned int *table_idx,
212 unsigned int *chain_length, int delete)
218 idx = ht->hash_hash (e, ht->hash_table_size);
219 assert (idx < ht->hash_table_size);
224 prev = ht->hash_table[idx];
230 if (ht->hash_key_comparator (e, prev->key) == 0)
233 ht->hash_table[idx] = prev->next;
242 if (ht->hash_key_comparator (e, p->key) == 0)
256 prev->next = p->next;
261 /* Return non-zero if E is already in the table, zero otherwise. */
264 hash_query_in_table (const HT *ht, const void *e)
269 idx = ht->hash_hash (e, ht->hash_table_size);
270 assert (idx < ht->hash_table_size);
271 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
272 if (ht->hash_key_comparator (e, p->key) == 0)
278 hash_lookup (const HT *ht, const void *e)
283 idx = ht->hash_hash (e, ht->hash_table_size);
284 assert (idx < ht->hash_table_size);
285 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
286 if (ht->hash_key_comparator (e, p->key) == 0)
291 /* If E matches an entry already in the hash table, don't modify the
292 table and return a pointer to the matched entry. If E does not
293 match any item in the table, insert E and return NULL.
294 If the storage required for insertion cannot be allocated
295 set *FAILED to non-zero and return NULL. */
298 hash_insert_if_absent (HT *ht, const void *e, int *failed)
303 unsigned int chain_length;
305 assert (e != NULL); /* Can't insert a NULL key. */
308 ent = hash_find_entry (ht, e, &idx, &chain_length, 0);
311 /* E matches a key from an entry already in the table. */
315 new = hash_allocate_entry (ht);
322 new->key = (void *) e;
323 new->next = BUCKET_HEAD (ht, idx);
324 BUCKET_HEAD (ht, idx) = new;
326 if (chain_length == 0)
327 ++(ht->hash_n_slots_used);
329 /* The insertion has just increased chain_length by 1. */
332 if (chain_length > ht->hash_max_chain_length)
333 ht->hash_max_chain_length = chain_length;
336 if ((double) ht->hash_n_keys / ht->hash_table_size > 0.80)
338 unsigned int new_size;
339 new_size = near_prime (2 * ht->hash_table_size);
340 *failed = hash_rehash (ht, new_size);
344 assert (hash_table_ok (ht));
350 /* If E is already in the table, remove it and return a pointer to
351 the just-deleted key (the user may want to deallocate its storage).
352 If E is not in the table, don't modify the table and return NULL. */
355 hash_delete_if_present (HT *ht, const void *e)
360 unsigned int chain_length;
362 ent = hash_find_entry (ht, e, &idx, &chain_length, 1);
366 if (ent->next == NULL && chain_length == 1)
367 --(ht->hash_n_slots_used);
372 ht->hash_dirty_max_chain_length = 1;
373 if (ent->next == NULL && chain_length < ht->hash_max_chain_length)
374 ht->hash_dirty_max_chain_length = 0;
376 hash_free_entry (ht, ent);
379 assert (hash_table_ok (ht));
385 hash_print_statistics (const HT *ht, FILE *stream)
387 unsigned int n_slots_used;
389 unsigned int max_chain_length;
392 err = hash_get_statistics (ht, &n_slots_used, &n_keys, &max_chain_length);
394 fprintf (stream, "table size: %d\n", ht->hash_table_size);
395 fprintf (stream, "# slots used: %u (%.2f%%)\n", n_slots_used,
396 (100.0 * n_slots_used) / ht->hash_table_size);
397 fprintf (stream, "# keys: %u\n", n_keys);
398 fprintf (stream, "max chain length: %u\n", max_chain_length);
401 /* If there is *NO* table (so, no meaningful stats) return non-zero
402 and don't reference the argument pointers. Otherwise compute the
403 performance statistics and return non-zero. */
406 hash_get_statistics (const HT *ht,
407 unsigned int *n_slots_used,
408 unsigned int *n_keys,
409 unsigned int *max_chain_length)
413 if (ht == NULL || ht->hash_table == NULL)
416 *max_chain_length = 0;
420 for (i = 0; i < ht->hash_table_size; i++)
422 unsigned int chain_length = 0;
425 p = BUCKET_HEAD (ht, i);
429 for (; p; p = p->next)
432 *n_keys += chain_length;
433 if (chain_length > *max_chain_length)
434 *max_chain_length = chain_length;
440 hash_table_ok (HT *ht)
443 unsigned int n_slots_used;
445 unsigned int max_chain_length;
447 if (ht == NULL || ht->hash_table == NULL)
450 code = hash_get_statistics (ht, &n_slots_used, &n_keys,
454 || n_slots_used != ht->hash_n_slots_used
455 || n_keys != ht->hash_n_keys
456 || max_chain_length != hash_get_max_chain_length (ht))
462 /* See hash_do_for_each_2 (below) for a variant. */
465 hash_do_for_each (HT *ht, void (*f) (void *e, void *aux), void *aux)
470 assert (hash_table_ok (ht));
473 if (ht->hash_table == NULL)
476 for (i = 0; i < ht->hash_table_size; i++)
479 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
486 /* Just like hash_do_for_each, except that function F returns an int
487 that can signal (when non-zero) we should return early. */
490 hash_do_for_each_2 (HT *ht, int (*f) (void *e, void *aux), void *aux)
495 assert (hash_table_ok (ht));
498 if (ht->hash_table == NULL)
501 for (i = 0; i < ht->hash_table_size; i++)
504 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
508 return_code = (*f) (p->key, aux);
509 if (return_code != 0)
516 /* For each entry in the bucket addressed by BUCKET_KEY of the hash
517 table HT, invoke the function F. If F returns non-zero, stop
518 iterating and return that value. Otherwise, apply F to all entries
519 in the selected bucket and return zero. The AUX argument to this
520 function is passed as the last argument in each invocation of F.
521 The first argument to F is BUCKET_KEY, and the second is the key of
522 an entry in the selected bucket. */
525 hash_do_for_each_in_selected_bucket (HT *ht, const void *bucket_key,
526 int (*f) (const void *bucket_key,
534 assert (hash_table_ok (ht));
537 if (ht->hash_table == NULL)
540 idx = ht->hash_hash (bucket_key, ht->hash_table_size);
542 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
546 return_code = (*f) (bucket_key, p->key, aux);
547 if (return_code != 0)
554 /* Make all buckets empty, placing any chained entries on the free list.
555 As with hash_free, apply the user-specified function key_freer
556 (if it's not NULL) to the keys of any affected entries. */
564 for (i = 0; i < ht->hash_table_size; i++)
566 HASH_ENT *tail = NULL;
567 HASH_ENT *head = BUCKET_HEAD (ht, i);
569 /* Free any keys and get tail pointer to last entry in chain. */
570 for (p = head; p; p = p->next)
572 if (ht->hash_key_freer != NULL)
573 ht->hash_key_freer (p->key);
574 p->key = NULL; /* Make sure no one tries to use this key later. */
577 BUCKET_HEAD (ht, i) = NULL;
579 /* If there's a chain in this bucket, tack it onto the
580 beginning of the free list. */
583 assert (tail != NULL && tail->next == NULL);
584 tail->next = ht->hash_free_entry_list;
585 ht->hash_free_entry_list = head;
588 ht->hash_n_slots_used = 0;
589 ht->hash_max_chain_length = 0;
591 ht->hash_dirty_max_chain_length = 0;
594 /* Free all storage associated with HT that functions in this package
595 have allocated. If a key_freer function has been supplied (when HT
596 was created), this function applies it to the key of each entry before
597 freeing that entry. */
600 hash_free_0 (HT *ht, int free_user_data)
602 if (free_user_data && ht->hash_key_freer != NULL)
606 for (i = 0; i < ht->hash_table_size; i++)
611 for (p = BUCKET_HEAD (ht, i); p; p = next)
614 ht->hash_key_freer (p->key);
620 obstack_free (&(ht->ht_obstack), NULL);
624 for (i = 0; i < ht->hash_table_size; i++)
629 for (p = BUCKET_HEAD (ht, i); p; p = next)
637 ht->hash_free_entry_list = NULL;
638 free (ht->hash_table);
651 hash_print (const HT *ht)
655 for (i = 0; i < ht->hash_table_size; i++)
659 if (BUCKET_HEAD (ht, i) != NULL)
662 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
664 char *s = (char *) p->key;
674 hash_get_key_list (const HT *ht, unsigned int bufsize, void **buf)
679 for (i = 0; i < ht->hash_table_size; i++)
683 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
692 /* Return the first key in the table. If the table is empty, return NULL. */
695 hash_get_first (const HT *ht)
700 if (ht->hash_n_keys == 0)
703 for (idx = 0; idx < ht->hash_table_size; idx++)
705 if ((p = BUCKET_HEAD (ht, idx)) != NULL)
711 /* Return the key in the entry following the entry whose key matches E.
712 If there is the only one key in the table and that key matches E,
713 return the matching key. If E is not in the table, return NULL. */
716 hash_get_next (const HT *ht, const void *e)
721 idx = ht->hash_hash (e, ht->hash_table_size);
722 assert (idx < ht->hash_table_size);
723 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
725 if (ht->hash_key_comparator (e, p->key) == 0)
735 /* E is the last or only key in the bucket chain. */
736 if (ht->hash_n_keys == 1)
738 /* There is only one key in the table, and it matches E. */
744 idx = (idx + 1) % ht->hash_table_size;
745 if ((p = BUCKET_HEAD (ht, idx)) != NULL)
748 while (idx != bucket);
753 /* E is not in the table. */