3 - a certain amount of library support, at least <stdlib.h>
4 - C ints are at least 32-bits long
8 - add a sample do_all function for listing the hash table.
18 /* This macro assumes that there is an HT with an initialized
19 HT_OBSTACK in scope. */
20 # define ZALLOC(n) obstack_alloc (&(ht->ht_obstack), (n))
22 # define ZALLOC(n) malloc ((n))
25 #define BUCKET_HEAD(ht, idx) ((ht)->hash_table[(idx)])
27 static void hash_free_0 (HT *, int);
31 unsigned long candidate;
33 /* No even number and none less than 10 will be passed here. */
34 unsigned long divn = 3;
35 unsigned long sq = divn * divn;
37 while (sq < candidate && (candidate % divn))
44 return (candidate % divn);
47 /* Round a given number up to the nearest prime. */
50 next_prime (candidate)
51 unsigned long candidate;
53 /* Make it definitely odd. */
56 while (!is_prime (candidate))
63 hash_free_entry (HT *ht, HASH_ENT *e)
66 e->next = ht->hash_free_entry_list;
67 ht->hash_free_entry_list = e;
71 hash_allocate_entry (HT *ht)
74 if (ht->hash_free_entry_list)
76 new = ht->hash_free_entry_list;
77 ht->hash_free_entry_list = new->next;
81 new = (HASH_ENT *) ZALLOC (sizeof (HASH_ENT));
87 hash_get_n_slots_used (const HT *ht)
89 return ht->hash_n_slots_used;
95 hash_rehash (HT *ht, unsigned int new_table_size)
100 if (ht->hash_table_size <= 0 || new_table_size == 0)
103 ht_new = hash_initialize (new_table_size, ht->hash_key_freer,
104 ht->hash_hash, ht->hash_key_comparator);
109 for (i = 0; i < ht->hash_table_size; i++)
111 HASH_ENT *p = BUCKET_HEAD (ht, i);
112 for ( /* empty */ ; p; p = p->next)
115 const void *already_in_table;
116 already_in_table = hash_insert_if_absent (ht_new, p->key, &failed);
117 assert (failed == 0 && already_in_table == 0);
124 assert (hash_table_ok (ht_new));
129 /* FIXME: fill in ht_new->n_slots_used and other statistics fields. */
137 hash_get_max_chain_length (HT *ht)
140 unsigned int max_chain_length = 0;
142 if (!ht->hash_dirty_max_chain_length)
143 return ht->hash_max_chain_length;
145 for (i = 0; i < ht->hash_table_size; i++)
147 unsigned int chain_length = 0;
148 HASH_ENT *p = BUCKET_HEAD (ht, i);
149 for ( /* empty */ ; p; p = p->next)
151 if (chain_length > max_chain_length)
152 max_chain_length = chain_length;
155 ht->hash_max_chain_length = max_chain_length;
156 ht->hash_dirty_max_chain_length = 0;
157 return ht->hash_max_chain_length;
161 hash_get_n_keys (const HT *ht)
163 return ht->hash_n_keys;
167 hash_get_table_size (const HT *ht)
169 return ht->hash_table_size;
172 /* TABLE_SIZE should be prime. If WHEN_TO_REHASH is positive, when
173 that percentage of table entries have been used, the table is
174 deemed too small; then a new, larger table (GROW_FACTOR times
175 larger than the previous size) is allocated and all entries in
176 the old table are rehashed into the new, larger one. The old
177 table is freed. If WHEN_TO_REHASH is zero or negative, the
178 table is never resized.
180 The function returns non-zero
181 - if TABLE_SIZE is zero or negative
182 - if EQUALITY_TESTER or HASH is null
183 - if it was unable to allocate sufficient storage for the hash table
184 - if WHEN_TO_REHASH is zero or negative
185 Otherwise it returns zero.
187 FIXME: tell what happens to any existing hash table when this
188 function is called (e.g. a second time). */
191 hash_initialize (unsigned int candidate_table_size,
192 Hash_key_freer_type key_freer,
193 unsigned int (*hash) (const void *, unsigned int),
194 int (*key_comparator) (const void *, const void *))
198 unsigned int table_size;
200 if (candidate_table_size <= 0)
203 if (hash == NULL || key_comparator == NULL)
206 ht = (HT *) malloc (sizeof (HT));
210 table_size = next_prime (candidate_table_size);
211 ht->hash_table = (HASH_ENT **) malloc (table_size * sizeof (HASH_ENT *));
212 if (ht->hash_table == NULL)
215 for (i = 0; i < table_size; i++)
217 BUCKET_HEAD (ht, i) = NULL;
220 ht->hash_free_entry_list = NULL;
221 ht->hash_table_size = table_size;
222 ht->hash_hash = hash;
223 ht->hash_key_comparator = key_comparator;
224 ht->hash_key_freer = key_freer;
225 ht->hash_n_slots_used = 0;
226 ht->hash_max_chain_length = 0;
228 ht->hash_dirty_max_chain_length = 0;
230 obstack_init (&(ht->ht_obstack));
236 /* This private function is used to help with insertion and deletion.
237 If E does *not* compare equal to the key of any entry in the table,
239 When E matches an entry in the table, return a pointer to the matching
240 entry. When DELETE is non-zero and E matches an entry in the table,
241 unlink the matching entry. Set *CHAIN_LENGTH to the number of keys
242 that have hashed to the bucket E hashed to. */
245 hash_find_entry (HT *ht, const void *e, unsigned int *table_idx,
246 unsigned int *chain_length, int delete)
252 idx = ht->hash_hash (e, ht->hash_table_size);
253 assert (idx < ht->hash_table_size);
258 prev = ht->hash_table[idx];
264 if (ht->hash_key_comparator (e, prev->key) == 0)
267 ht->hash_table[idx] = prev->next;
276 if (ht->hash_key_comparator (e, p->key) == 0)
290 prev->next = p->next;
295 /* Return non-zero if E is already in the table, zero otherwise. */
298 hash_query_in_table (const HT *ht, const void *e)
303 idx = ht->hash_hash (e, ht->hash_table_size);
304 assert (idx < ht->hash_table_size);
305 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
306 if (ht->hash_key_comparator (e, p->key) == 0)
312 hash_lookup (const HT *ht, const void *e)
317 idx = ht->hash_hash (e, ht->hash_table_size);
318 assert (idx < ht->hash_table_size);
319 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
320 if (ht->hash_key_comparator (e, p->key) == 0)
325 /* If E matches an entry already in the hash table, don't modify the
326 table and return a pointer to the matched entry. If E does not
327 match any item in the table, insert E and return NULL.
328 If the storage required for insertion cannot be allocated
329 set *FAILED to non-zero and return NULL. */
332 hash_insert_if_absent (HT *ht, const void *e, int *failed)
337 unsigned int chain_length;
339 assert (e != NULL); /* Can't insert a NULL key. */
342 ent = hash_find_entry (ht, e, &idx, &chain_length, 0);
345 /* E matches a key from an entry already in the table. */
349 new = hash_allocate_entry (ht);
356 new->key = (void *) e;
357 new->next = BUCKET_HEAD (ht, idx);
358 BUCKET_HEAD (ht, idx) = new;
360 if (chain_length == 0)
361 ++(ht->hash_n_slots_used);
363 /* The insertion has just increased chain_length by 1. */
366 if (chain_length > ht->hash_max_chain_length)
367 ht->hash_max_chain_length = chain_length;
370 if ((double) ht->hash_n_keys / ht->hash_table_size > 0.80)
372 unsigned int new_size;
373 new_size = next_prime (2 * ht->hash_table_size + 1);
374 *failed = hash_rehash (ht, new_size);
378 assert (hash_table_ok (ht));
384 /* If E is already in the table, remove it and return a pointer to
385 the just-deleted key (the user may want to deallocate its storage).
386 If E is not in the table, don't modify the table and return NULL. */
389 hash_delete_if_present (HT *ht, const void *e)
394 unsigned int chain_length;
396 ent = hash_find_entry (ht, e, &idx, &chain_length, 1);
400 if (ent->next == NULL && chain_length == 1)
401 --(ht->hash_n_slots_used);
406 ht->hash_dirty_max_chain_length = 1;
407 if (ent->next == NULL && chain_length < ht->hash_max_chain_length)
408 ht->hash_dirty_max_chain_length = 0;
410 hash_free_entry (ht, ent);
413 assert (hash_table_ok (ht));
419 hash_print_statistics (const HT *ht, FILE *stream)
421 unsigned int n_slots_used;
423 unsigned int max_chain_length;
426 err = hash_get_statistics (ht, &n_slots_used, &n_keys, &max_chain_length);
428 fprintf (stream, "table size: %d\n", ht->hash_table_size);
429 fprintf (stream, "# slots used: %u (%.2f%%)\n", n_slots_used,
430 (100.0 * n_slots_used) / ht->hash_table_size);
431 fprintf (stream, "# keys: %u\n", n_keys);
432 fprintf (stream, "max chain length: %u\n", max_chain_length);
435 /* If there is *NO* table (so, no meaningful stats) return non-zero
436 and don't reference the argument pointers. Otherwise compute the
437 performance statistics and return non-zero. */
440 hash_get_statistics (const HT *ht,
441 unsigned int *n_slots_used,
442 unsigned int *n_keys,
443 unsigned int *max_chain_length)
447 if (ht == NULL || ht->hash_table == NULL)
450 *max_chain_length = 0;
454 for (i = 0; i < ht->hash_table_size; i++)
456 unsigned int chain_length = 0;
459 p = BUCKET_HEAD (ht, i);
463 for (; p; p = p->next)
466 *n_keys += chain_length;
467 if (chain_length > *max_chain_length)
468 *max_chain_length = chain_length;
474 hash_table_ok (HT *ht)
477 unsigned int n_slots_used;
479 unsigned int max_chain_length;
481 if (ht == NULL || ht->hash_table == NULL)
484 code = hash_get_statistics (ht, &n_slots_used, &n_keys,
488 || n_slots_used != ht->hash_n_slots_used
489 || n_keys != ht->hash_n_keys
490 || max_chain_length != hash_get_max_chain_length (ht))
496 /* See hash_do_for_each_2 (below) for a variant. */
499 hash_do_for_each (HT *ht, void (*f) (void *e, void *aux), void *aux)
504 assert (hash_table_ok (ht));
507 if (ht->hash_table == NULL)
510 for (i = 0; i < ht->hash_table_size; i++)
513 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
520 /* Just like hash_do_for_each, except that function F returns an int
521 that can signal (when non-zero) we should return early. */
524 hash_do_for_each_2 (HT *ht, int (*f) (void *e, void *aux), void *aux)
529 assert (hash_table_ok (ht));
532 if (ht->hash_table == NULL)
535 for (i = 0; i < ht->hash_table_size; i++)
538 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
542 return_code = (*f) (p->key, aux);
543 if (return_code != 0)
550 /* For each entry in the bucket addressed by BUCKET_KEY of the hash
551 table HT, invoke the function F. If F returns non-zero, stop
552 iterating and return that value. Otherwise, apply F to all entries
553 in the selected bucket and return zero. The AUX argument to this
554 function is passed as the last argument in each invocation of F.
555 The first argument to F is BUCKET_KEY, and the second is the key of
556 an entry in the selected bucket. */
559 hash_do_for_each_in_selected_bucket (HT *ht, const void *bucket_key,
560 int (*f) (const void *bucket_key,
568 assert (hash_table_ok (ht));
571 if (ht->hash_table == NULL)
574 idx = ht->hash_hash (bucket_key, ht->hash_table_size);
576 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
580 return_code = (*f) (bucket_key, p->key, aux);
581 if (return_code != 0)
588 /* Make all buckets empty, placing any chained entries on the free list.
589 As with hash_free, apply the user-specified function key_freer
590 (if it's not NULL) to the keys of any affected entries. */
598 for (i = 0; i < ht->hash_table_size; i++)
600 HASH_ENT *tail = NULL;
601 HASH_ENT *head = BUCKET_HEAD (ht, i);
603 /* Free any keys and get tail pointer to last entry in chain. */
604 for (p = head; p; p = p->next)
606 if (ht->hash_key_freer != NULL)
607 ht->hash_key_freer (p->key);
608 p->key = NULL; /* Make sure no one tries to use this key later. */
611 BUCKET_HEAD (ht, i) = NULL;
613 /* If there's a chain in this bucket, tack it onto the
614 beginning of the free list. */
617 assert (tail != NULL && tail->next == NULL);
618 tail->next = ht->hash_free_entry_list;
619 ht->hash_free_entry_list = head;
622 ht->hash_n_slots_used = 0;
623 ht->hash_max_chain_length = 0;
625 ht->hash_dirty_max_chain_length = 0;
628 /* Free all storage associated with HT that functions in this package
629 have allocated. If a key_freer function has been supplied (when HT
630 was created), this function applies it to the key of each entry before
631 freeing that entry. */
634 hash_free_0 (HT *ht, int free_user_data)
636 if (free_user_data && ht->hash_key_freer != NULL)
640 for (i = 0; i < ht->hash_table_size; i++)
645 for (p = BUCKET_HEAD (ht, i); p; p = next)
648 ht->hash_key_freer (p->key);
654 obstack_free (&(ht->ht_obstack), NULL);
658 for (i = 0; i < ht->hash_table_size; i++)
663 for (p = BUCKET_HEAD (ht, i); p; p = next)
671 ht->hash_free_entry_list = NULL;
672 free (ht->hash_table);
685 hash_print (const HT *ht)
689 for (i = 0; i < ht->hash_table_size; i++)
693 if (BUCKET_HEAD (ht, i) != NULL)
696 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
698 char *s = (char *) p->key;
708 hash_get_key_list (const HT *ht, unsigned int bufsize, void **buf)
713 for (i = 0; i < ht->hash_table_size; i++)
717 for (p = BUCKET_HEAD (ht, i); p; p = p->next)
726 /* Return the first key in the table. If the table is empty, return NULL. */
729 hash_get_first (const HT *ht)
734 if (ht->hash_n_keys == 0)
737 for (idx = 0; idx < ht->hash_table_size; idx++)
739 if ((p = BUCKET_HEAD (ht, idx)) != NULL)
745 /* Return the key in the entry following the entry whose key matches E.
746 If there is the only one key in the table and that key matches E,
747 return the matching key. If E is not in the table, return NULL. */
750 hash_get_next (const HT *ht, const void *e)
755 idx = ht->hash_hash (e, ht->hash_table_size);
756 assert (idx < ht->hash_table_size);
757 for (p = BUCKET_HEAD (ht, idx); p != NULL; p = p->next)
759 if (ht->hash_key_comparator (e, p->key) == 0)
769 /* E is the last or only key in the bucket chain. */
770 if (ht->hash_n_keys == 1)
772 /* There is only one key in the table, and it matches E. */
778 idx = (idx + 1) % ht->hash_table_size;
779 if ((p = BUCKET_HEAD (ht, idx)) != NULL)
782 while (idx != bucket);
787 /* E is not in the table. */