#include "hash.h"
#include "malloc.h"
+static struct list *find_bucket (struct hash *, hash_elem *);
+static struct list_elem *find_elem (struct hash *, struct list *, hash_elem *);
+static void insert_elem (struct hash *, struct list *, hash_elem *);
+static void remove_elem (struct hash *, hash_elem *);
+static void rehash (struct hash *);
+
+/* Initializes hash table H to compute hash values using HASH and
+ compare hash elements using LESS, given auxiliary data AUX. */
bool
hash_init (struct hash *h,
- hash_less_func *less, hash_hash_func *hash, void *aux)
+ hash_hash_func *hash, hash_less_func *less, void *aux)
{
h->elem_cnt = 0;
h->bucket_cnt = 4;
h->buckets = malloc (sizeof *h->buckets * h->bucket_cnt);
- h->less = less;
h->hash = hash;
+ h->less = less;
h->aux = aux;
if (h->buckets != NULL)
return false;
}
-struct list *
-find_bucket (struct hash *h, hash_elem *e)
-{
- size_t bucket_idx = h->hash (e, h->aux) & (h->bucket_cnt - 1);
- return h->buckets[bucket_idx];
-}
-
-struct list_elem *
-find_elem (struct list *bucket, hash_elem *e)
-{
- struct list_elem *i;
-
- for (i = list_begin (bucket); i != list_end (bucket); i = list_next (i))
- if (equal (i, e))
- return i;
- return NULL;
-}
-
-size_t
-turn_off_least_1bit (size_t x)
-{
- return x & (x - 1);
-}
-
-size_t
-is_power_of_2 (size_t x)
-{
- return turn_off_least_1bit (x) == 0;
-}
-
-#define MIN_ELEMS_PER_BUCKET 1
-#define BEST_ELEMS_PER_BUCKET 2
-#define MAX_ELEMS_PER_BUCKET 4
-
+/* Removes all the elements from H. */
void
-rehash (struct hash *h)
+hash_clear (struct hash *h)
{
- size_t old_bucket_cnt, new_bucket_cnt;
- struct list *new_buckets, *old_buckets;
size_t i;
-
- ASSERT (h != NULL);
-
- /* Save old bucket info for later use. */
- old_buckets = h->buckets;
- old_bucket_cnt = h->bucket_cnt;
-
- /* Calculate the number of buckets to use now.
- We want one bucket for about every BEST_ELEMS_PER_BUCKET.
- We must have at least four buckets, and the number of
- buckets must be a power of 2. */
- new_bucket_cnt = h->elem_cnt / BEST_ELEMS_PER_BUCKET;
- if (new_bucket_cnt < 4)
- new_bucket_cnt = 4;
- while (!is_power_of_2 (new_bucket_cnt))
- new_bucket_cnt = turn_off_least_1bit (new_bucket_cnt);
-
- /* Don't do anything if the bucket count doesn't change. */
- if (new_bucket_cnt == old_bucket_cnt)
- return;
-
- /* Allocate new buckets and initialize them as empty. */
- new_buckets = malloc (sizeof *new_buckets * new_bucket_cnt);
- if (new_buckets == NULL)
- {
- /* Allocation failed. This means that use of the hash table will
- be less efficient. However, it is still usable, so
- there's no reason for it to be an error. */
- return;
- }
- for (i = 0; i < new_bucket_cnt; i++)
- list_init (&new_buckets[i]);
-
- /* Install new bucket info. */
- h->buckets = new_buckets;
- h->bucket_cnt = new_bucket_cnt;
-
- /* Move each old element into the appropriate new bucket. */
- for (i = 0; i < old_bucket_cnt; i++)
- {
- struct list *old_bucket, *new_bucket;
- struct list_elem *elem, *next;
-
- old_bucket = &old_buckets[i];
- for (elem = list_begin (old_bucket);
- elem != list_end (old_bucket); elem = next)
- {
- struct list *new_bucket = find_bucket (h, e);
- next = list_next (elem);
- list_push_front (new_bucket, elem);
- }
- }
-}
-
-void
-insert_elem (struct list *bucket, hash_elem *e)
-{
- h->elem_cnt++;
- if (h->elem_cnt > h->bucket_cnt * MAX_ELEMS_PER_BUCKET)
- rehash (h);
- list_push_front (bucket, e);
+
+ for (i = 0; i < h->bucket_cnt; h++)
+ list_init (&h->buckets[i]);
+ h->elem_cnt = 0;
}
+/* Destroys hash table H. */
void
-remove_elem (struct hash *h, hash_elem *e)
+hash_destroy (struct hash *h)
{
- h->elem_cnt--;
- if (h->elem_cnt < h->bucket_cnt * MIN_ELEMS_PER_BUCKET)
- rehash (h);
- list_remove (e);
+ free (h->buckets);
}
+/* Inserts NEW into hash table H and returns a null pointer, if
+ no equal element is already in the table.
+ If an equal element is already in the table, returns it
+ without inserting NEW. */
hash_elem *
hash_insert (struct hash *h, hash_elem *new)
{
struct list *bucket = find_bucket (h, new);
- struct list_elem *old = find_elem (bucket, new);
+ struct list_elem *old = find_elem (h, bucket, new);
if (old == NULL)
- insert_elem (bucket, new);
+ insert_elem (h, bucket, new);
return old;
}
+/* Inserts NEW into hash table H, replacing any equal element
+ already in the table, which is returned. */
hash_elem *
hash_replace (struct hash *h, hash_elem *new)
{
struct list *bucket = find_bucket (h, new);
- struct list_elem *old = find_elem (bucket, new);
+ struct list_elem *old = find_elem (h, bucket, new);
if (old != NULL)
remove_elem (h, old);
- insert_elem (bucket, new);
+ insert_elem (h, bucket, new);
return old;
}
+/* Finds and returns an element equal to E in hash table H, or a
+ null pointer if no equal element exists in the table. */
hash_elem *
hash_find (struct hash *h, hash_elem *e)
{
- struct list *bucket = find_bucket (h, e);
- return find_elem (bucket, new);
+ return find_elem (h, find_bucket (h, e), e);
}
+/* Finds, removes, and returns an element equal to E in hash
+ table H. Returns a null pointer if no equal element existed
+ in the table. */
hash_elem *
-hash_delete (struct hash *h, hash_elem *target)
+hash_delete (struct hash *h, hash_elem *e)
{
- struct list *bucket = find_bucket (h, e);
- struct list_elem *found = find_elem (bucket, new);
+ struct list_elem *found = find_elem (h, find_bucket (h, e), e);
if (found != NULL)
remove_elem (h, found);
return found;
}
-void
-hash_clear (struct hash *h)
-{
- size_t i;
-
- for (i = 0; i < h->bucket_cnt; h++)
- list_init (&h->buckets[i]);
- h->elem_cnt = 0;
-}
+/* Initializes I for iterating hash table H.
+
+ Iteration idiom:
+
+ struct hash_iterator i;
+ hash_first (&i, h);
+ while (hash_next (&i, h))
+ {
+ struct foo *f = list_entry (hash_cur (&i), struct foo, elem);
+ ...do something with f...
+ }
+
+ NOTE: Modifying a hash table during iteration invalidates all
+ iterators.
+*/
void
hash_first (struct hash_iterator *i, struct hash *h)
{
+ ASSERT (i != NULL);
+ ASSERT (h != NULL);
+
i->hash = h;
i->bucket = i->hash->buckets;
- i->elem = list_begin (*i->bucket);
+ i->elem = list_head (i->bucket);
}
+/* Advances I to the next element in the hash table and returns
+ it. Returns a null pointer if no elements are left. Elements
+ are returned in arbitrary order.
+
+ NOTE: Modifying a hash table during iteration invalidates all
+ iterators. */
hash_elem *
hash_next (struct hash_iterator *i)
{
- ASSERT (i->elem != NULL);
+ ASSERT (i != NULL);
i->elem = list_next (i->elem);
- while (i->elem == list_end (*i->bucket))
- if (++i->bucket >= i->hash->buckets + i->hash->bucket_cnt)
- {
- i->elem = NULL;
- break;
- }
-
+ while (i->elem == list_end (i->bucket))
+ {
+ if (++i->bucket >= i->hash->buckets + i->hash->bucket_cnt)
+ {
+ i->elem = NULL;
+ break;
+ }
+ i->elem = list_begin (i->bucket);
+ }
+
return i->elem;
}
+/* Returns the current element in the hash table iteration, or a
+ null pointer at the end of the table. Undefined behavior
+ after calling hash_first() but before hash_next(). */
hash_elem *
hash_cur (struct hash_iterator *i)
{
return i->elem;
}
-size_t hash_size (struct hash *h)
+/* Returns the number of elements in H. */
+size_t
+hash_size (struct hash *h)
{
return h->elem_cnt;
}
+/* Returns true if H contains no elements, false otherwise. */
bool
hash_empty (struct hash *h)
{
#define FNV_32_PRIME 16777619u
#define FNV_32_BASIS 2166136261u
-/* Fowler-Noll-Vo 32-bit hash, for bytes. */
+/* Returns a hash of the SIZE bytes in BUF. */
unsigned
-hsh_hash_bytes (const void *buf_, size_t size)
+hash_bytes (const void *buf_, size_t size)
{
+ /* Fowler-Noll-Vo 32-bit hash, for bytes. */
const unsigned char *buf = buf_;
unsigned hash;
- assert (buf != NULL);
+ ASSERT (buf != NULL);
hash = FNV_32_BASIS;
while (size-- > 0)
return hash;
}
-/* Fowler-Noll-Vo 32-bit hash, for strings. */
+/* Returns a hash of string S. */
unsigned
hash_string (const char *s_)
{
const unsigned char *s = s_;
unsigned hash;
- assert (s != NULL);
+ ASSERT (s != NULL);
hash = FNV_32_BASIS;
while (*s != '\0')
return hash;
}
-/* Hash for ints. */
+/* Returns a hash of integer I. */
unsigned
hash_int (int i)
{
- return hsh_hash_bytes (&i, sizeof i);
+ return hash_bytes (&i, sizeof i);
+}
+\f
+/* Returns the bucket in H that E belongs in. */
+static struct list *
+find_bucket (struct hash *h, hash_elem *e)
+{
+ size_t bucket_idx = h->hash (e, h->aux) & (h->bucket_cnt - 1);
+ return &h->buckets[bucket_idx];
+}
+
+/* Searches BUCKET in H for a hash element equal to E. Returns
+ it if found or a null pointer otherwise. */
+static struct list_elem *
+find_elem (struct hash *h, struct list *bucket, hash_elem *e)
+{
+ struct list_elem *i;
+
+ for (i = list_begin (bucket); i != list_end (bucket); i = list_next (i))
+ if (!h->less (i, e, h->aux) && !h->less (e, i, h->aux))
+ return i;
+ return NULL;
+}
+
+/* Returns X with its lowest-order bit set to 1 turned off. */
+static inline size_t
+turn_off_least_1bit (size_t x)
+{
+ return x & (x - 1);
+}
+
+/* Returns true if X is a power of 2, otherwise false. */
+static inline size_t
+is_power_of_2 (size_t x)
+{
+ return x != 0 && turn_off_least_1bit (x) == 0;
}
+
+/* Element per bucket ratios. */
+#define MIN_ELEMS_PER_BUCKET 1 /* Elems/bucket < 1: reduce # of buckets. */
+#define BEST_ELEMS_PER_BUCKET 2 /* Ideal elems/bucket. */
+#define MAX_ELEMS_PER_BUCKET 4 /* Elems/bucket > 4: increase # of buckets. */
+
+/* Changes the number of buckets in hash table H to match the
+ ideal. This function can fail because of an out-of-memory
+ condition, but that'll just make hash accesses less efficient;
+ we can still continue. */
+static void
+rehash (struct hash *h)
+{
+ size_t old_bucket_cnt, new_bucket_cnt;
+ struct list *new_buckets, *old_buckets;
+ size_t i;
+
+ ASSERT (h != NULL);
+
+ /* Save old bucket info for later use. */
+ old_buckets = h->buckets;
+ old_bucket_cnt = h->bucket_cnt;
+
+ /* Calculate the number of buckets to use now.
+ We want one bucket for about every BEST_ELEMS_PER_BUCKET.
+ We must have at least four buckets, and the number of
+ buckets must be a power of 2. */
+ new_bucket_cnt = h->elem_cnt / BEST_ELEMS_PER_BUCKET;
+ if (new_bucket_cnt < 4)
+ new_bucket_cnt = 4;
+ while (!is_power_of_2 (new_bucket_cnt))
+ new_bucket_cnt = turn_off_least_1bit (new_bucket_cnt);
+
+ /* Don't do anything if the bucket count wouldn't change. */
+ if (new_bucket_cnt == old_bucket_cnt)
+ return;
+
+ /* Allocate new buckets and initialize them as empty. */
+ new_buckets = malloc (sizeof *new_buckets * new_bucket_cnt);
+ if (new_buckets == NULL)
+ {
+ /* Allocation failed. This means that use of the hash table will
+ be less efficient. However, it is still usable, so
+ there's no reason for it to be an error. */
+ return;
+ }
+ for (i = 0; i < new_bucket_cnt; i++)
+ list_init (&new_buckets[i]);
+
+ /* Install new bucket info. */
+ h->buckets = new_buckets;
+ h->bucket_cnt = new_bucket_cnt;
+
+ /* Move each old element into the appropriate new bucket. */
+ for (i = 0; i < old_bucket_cnt; i++)
+ {
+ struct list *old_bucket;
+ struct list_elem *elem, *next;
+
+ old_bucket = &old_buckets[i];
+ for (elem = list_begin (old_bucket);
+ elem != list_end (old_bucket); elem = next)
+ {
+ struct list *new_bucket = find_bucket (h, elem);
+ next = list_next (elem);
+ list_push_front (new_bucket, elem);
+ }
+ }
+}
+
+/* Inserts E into BUCKET (in hash table H).
+ Rehashes if this increases elems/bucket above
+ MAX_ELEMS_PER_BUCKET. */
+static void
+insert_elem (struct hash *h, struct list *bucket, hash_elem *e)
+{
+ h->elem_cnt++;
+ if (h->elem_cnt > h->bucket_cnt * MAX_ELEMS_PER_BUCKET)
+ rehash (h);
+ list_push_front (bucket, e);
+}
+
+/* Removes E from hash table H.
+ Rehashes if this decreases elems/bucket below
+ MIN_ELEMS_PER_BUCKET. */
+static void
+remove_elem (struct hash *h, hash_elem *e)
+{
+ h->elem_cnt--;
+ if (h->elem_cnt < h->bucket_cnt * MIN_ELEMS_PER_BUCKET)
+ rehash (h);
+ list_remove (e);
+}
+
#ifndef HEADER_HASH_H
#define HEADER_HASH_H 1
+/* Hash table.
+
+ This is a standard hash table with chaining. To locate an
+ element in the table, we compute a hash function over the
+ element's data and use that as an index into an array of
+ doubly linked lists, then linearly search the list.
+
+ The chain lists do not use dynamic allocation. Instead, each
+ structure that can potentially be in a hash must embed a
+ hash_elem member. All of the hash functions operate on these
+ `hash_elem's. The hash_entry macro allows conversion from a
+ hash_elem back to a structure object that contains it.
+
+
+
+*/
+
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include "list.h"
+/* Hash element. */
typedef list_elem hash_elem;
+/* Converts pointer to hash element HASH_ELEM into a pointer to
+ the structure that HASH_ELEM is embedded inside. Supply the
+ name of the outer structure STRUCT and the member name MEMBER
+ of the hash element. See the big comment at the top of the
+ file for an example. */
#define hash_entry(HASH_ELEM, STRUCT, MEMBER) \
((STRUCT *) ((uint8_t *) (HASH_ELEM) - offsetof (STRUCT, MEMBER)))
+/* Computes and returns the hash value for hash element E, given
+ auxiliary data AUX. */
+typedef unsigned hash_hash_func (const hash_elem *e, void *aux);
+
+/* Compares the value of two hash elements A and B, given
+ auxiliary data AUX. Returns true if A is less than B, or
+ false if A is greater than or equal to B. */
typedef bool hash_less_func (const hash_elem *a, const hash_elem *b,
void *aux);
-typedef unsigned hash_hash_func (const hash_elem *, void *aux);
+/* Hash table. */
struct hash
{
- size_t elem_cnt;
- size_t bucket_cnt;
- struct list *buckets;
- hash_less_func *less;
- hash_hash_func *hash;
- void *aux;
+ size_t elem_cnt; /* Number of elements in table. */
+ size_t bucket_cnt; /* Number of buckets, a power of 2. */
+ struct list *buckets; /* Array of `bucket_cnt' lists. */
+ hash_hash_func *hash; /* Hash function. */
+ hash_less_func *less; /* Comparison function. */
+ void *aux; /* Auxiliary data for `hash' and `less'. */
};
+/* A hash table iterator. */
struct hash_iterator
{
- struct hash *hash;
- struct list **bucket;
- hash_elem *elem;
+ struct hash *hash; /* The hash table. */
+ struct list *bucket; /* Current bucket. */
+ hash_elem *elem; /* Current hash element in current bucket. */
};
-bool hash_init (struct hash *, hash_less_func *, hash_hash_func *, void *aux);
+/* Basic life cycle. */
+bool hash_init (struct hash *, hash_hash_func *, hash_less_func *, void *aux);
void hash_clear (struct hash *);
void hash_destroy (struct hash *);
+/* Search, insertion, deletion. */
hash_elem *hash_insert (struct hash *, hash_elem *);
hash_elem *hash_replace (struct hash *, hash_elem *);
hash_elem *hash_find (struct hash *, hash_elem *);
hash_elem *hash_delete (struct hash *, hash_elem *);
+/* Iteration. */
void hash_first (struct hash_iterator *, struct hash *);
hash_elem *hash_next (struct hash_iterator *);
hash_elem *hash_cur (struct hash_iterator *);
+/* Information. */
size_t hash_size (struct hash *);
bool hash_empty (struct hash *);
+/* Sample hash functions. */
unsigned hash_bytes (const void *, size_t);
unsigned hash_string (const char *);
unsigned hash_int (int);