From d5786496ed604118e24c4ce78604cfd4d669cc19 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 26 Aug 2004 18:58:41 +0000 Subject: [PATCH] Initial hash table implementation. --- src/lib/hash.c | 269 +++++++++++++++++++++++++++++++++++++++++++++++++ src/lib/hash.h | 55 ++++++++++ 2 files changed, 324 insertions(+) create mode 100644 src/lib/hash.c create mode 100644 src/lib/hash.h diff --git a/src/lib/hash.c b/src/lib/hash.c new file mode 100644 index 0000000..8a11556 --- /dev/null +++ b/src/lib/hash.c @@ -0,0 +1,269 @@ +#include "hash.h" +#include "malloc.h" + +bool +hash_init (struct hash *h, + hash_less_func *less, hash_hash_func *hash, void *aux) +{ + h->elem_cnt = 0; + h->bucket_cnt = 4; + h->buckets = malloc (sizeof *h->buckets * h->bucket_cnt); + h->less = less; + h->hash = hash; + h->aux = aux; + + if (h->buckets != NULL) + { + hash_clear (h); + return true; + } + else + return false; +} + +struct list * +find_bucket (struct hash *h, hash_elem *e) +{ + size_t bucket_idx = h->hash (e, h->aux) & (h->bucket_cnt - 1); + return h->buckets[bucket_idx]; +} + +struct list_elem * +find_elem (struct list *bucket, hash_elem *e) +{ + struct list_elem *i; + + for (i = list_begin (bucket); i != list_end (bucket); i = list_next (i)) + if (equal (i, e)) + return i; + return NULL; +} + +size_t +turn_off_least_1bit (size_t x) +{ + return x & (x - 1); +} + +size_t +is_power_of_2 (size_t x) +{ + return turn_off_least_1bit (x) == 0; +} + +#define MIN_ELEMS_PER_BUCKET 1 +#define BEST_ELEMS_PER_BUCKET 2 +#define MAX_ELEMS_PER_BUCKET 4 + +void +rehash (struct hash *h) +{ + size_t old_bucket_cnt, new_bucket_cnt; + struct list *new_buckets, *old_buckets; + size_t i; + + ASSERT (h != NULL); + + /* Save old bucket info for later use. */ + old_buckets = h->buckets; + old_bucket_cnt = h->bucket_cnt; + + /* Calculate the number of buckets to use now. + We want one bucket for about every BEST_ELEMS_PER_BUCKET. + We must have at least four buckets, and the number of + buckets must be a power of 2. */ + new_bucket_cnt = h->elem_cnt / BEST_ELEMS_PER_BUCKET; + if (new_bucket_cnt < 4) + new_bucket_cnt = 4; + while (!is_power_of_2 (new_bucket_cnt)) + new_bucket_cnt = turn_off_least_1bit (new_bucket_cnt); + + /* Don't do anything if the bucket count doesn't change. */ + if (new_bucket_cnt == old_bucket_cnt) + return; + + /* Allocate new buckets and initialize them as empty. */ + new_buckets = malloc (sizeof *new_buckets * new_bucket_cnt); + if (new_buckets == NULL) + { + /* Allocation failed. This means that use of the hash table will + be less efficient. However, it is still usable, so + there's no reason for it to be an error. */ + return; + } + for (i = 0; i < new_bucket_cnt; i++) + list_init (&new_buckets[i]); + + /* Install new bucket info. */ + h->buckets = new_buckets; + h->bucket_cnt = new_bucket_cnt; + + /* Move each old element into the appropriate new bucket. */ + for (i = 0; i < old_bucket_cnt; i++) + { + struct list *old_bucket, *new_bucket; + struct list_elem *elem, *next; + + old_bucket = &old_buckets[i]; + for (elem = list_begin (old_bucket); + elem != list_end (old_bucket); elem = next) + { + struct list *new_bucket = find_bucket (h, e); + next = list_next (elem); + list_push_front (new_bucket, elem); + } + } +} + +void +insert_elem (struct list *bucket, hash_elem *e) +{ + h->elem_cnt++; + if (h->elem_cnt > h->bucket_cnt * MAX_ELEMS_PER_BUCKET) + rehash (h); + list_push_front (bucket, e); +} + +void +remove_elem (struct hash *h, hash_elem *e) +{ + h->elem_cnt--; + if (h->elem_cnt < h->bucket_cnt * MIN_ELEMS_PER_BUCKET) + rehash (h); + list_remove (e); +} + +hash_elem * +hash_insert (struct hash *h, hash_elem *new) +{ + struct list *bucket = find_bucket (h, new); + struct list_elem *old = find_elem (bucket, new); + + if (old == NULL) + insert_elem (bucket, new); + return old; +} + +hash_elem * +hash_replace (struct hash *h, hash_elem *new) +{ + struct list *bucket = find_bucket (h, new); + struct list_elem *old = find_elem (bucket, new); + + if (old != NULL) + remove_elem (h, old); + + insert_elem (bucket, new); + return old; +} + +hash_elem * +hash_find (struct hash *h, hash_elem *e) +{ + struct list *bucket = find_bucket (h, e); + return find_elem (bucket, new); +} + +hash_elem * +hash_delete (struct hash *h, hash_elem *target) +{ + struct list *bucket = find_bucket (h, e); + struct list_elem *found = find_elem (bucket, new); + if (found != NULL) + remove_elem (h, found); + return found; +} + +void +hash_clear (struct hash *h) +{ + size_t i; + + for (i = 0; i < h->bucket_cnt; h++) + list_init (&h->buckets[i]); + h->elem_cnt = 0; +} + +void +hash_first (struct hash_iterator *i, struct hash *h) +{ + i->hash = h; + i->bucket = i->hash->buckets; + i->elem = list_begin (*i->bucket); +} + +hash_elem * +hash_next (struct hash_iterator *i) +{ + ASSERT (i->elem != NULL); + + i->elem = list_next (i->elem); + while (i->elem == list_end (*i->bucket)) + if (++i->bucket >= i->hash->buckets + i->hash->bucket_cnt) + { + i->elem = NULL; + break; + } + + return i->elem; +} + +hash_elem * +hash_cur (struct hash_iterator *i) +{ + return i->elem; +} + +size_t hash_size (struct hash *h) +{ + return h->elem_cnt; +} + +bool +hash_empty (struct hash *h) +{ + return h->elem_cnt == 0; +} + +/* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */ +#define FNV_32_PRIME 16777619u +#define FNV_32_BASIS 2166136261u + +/* Fowler-Noll-Vo 32-bit hash, for bytes. */ +unsigned +hsh_hash_bytes (const void *buf_, size_t size) +{ + const unsigned char *buf = buf_; + unsigned hash; + + assert (buf != NULL); + + hash = FNV_32_BASIS; + while (size-- > 0) + hash = (hash * FNV_32_PRIME) ^ *buf++; + + return hash; +} + +/* Fowler-Noll-Vo 32-bit hash, for strings. */ +unsigned +hash_string (const char *s_) +{ + const unsigned char *s = s_; + unsigned hash; + + assert (s != NULL); + + hash = FNV_32_BASIS; + while (*s != '\0') + hash = (hash * FNV_32_PRIME) ^ *s++; + + return hash; +} + +/* Hash for ints. */ +unsigned +hash_int (int i) +{ + return hsh_hash_bytes (&i, sizeof i); +} diff --git a/src/lib/hash.h b/src/lib/hash.h new file mode 100644 index 0000000..da60d01 --- /dev/null +++ b/src/lib/hash.h @@ -0,0 +1,55 @@ +#ifndef HEADER_HASH_H +#define HEADER_HASH_H 1 + +#include +#include +#include +#include "list.h" + +typedef list_elem hash_elem; + +#define hash_entry(HASH_ELEM, STRUCT, MEMBER) \ + ((STRUCT *) ((uint8_t *) (HASH_ELEM) - offsetof (STRUCT, MEMBER))) + +typedef bool hash_less_func (const hash_elem *a, const hash_elem *b, + void *aux); +typedef unsigned hash_hash_func (const hash_elem *, void *aux); + +struct hash + { + size_t elem_cnt; + size_t bucket_cnt; + struct list *buckets; + hash_less_func *less; + hash_hash_func *hash; + void *aux; + }; + +struct hash_iterator + { + struct hash *hash; + struct list **bucket; + hash_elem *elem; + }; + +bool hash_init (struct hash *, hash_less_func *, hash_hash_func *, void *aux); +void hash_clear (struct hash *); +void hash_destroy (struct hash *); + +hash_elem *hash_insert (struct hash *, hash_elem *); +hash_elem *hash_replace (struct hash *, hash_elem *); +hash_elem *hash_find (struct hash *, hash_elem *); +hash_elem *hash_delete (struct hash *, hash_elem *); + +void hash_first (struct hash_iterator *, struct hash *); +hash_elem *hash_next (struct hash_iterator *); +hash_elem *hash_cur (struct hash_iterator *); + +size_t hash_size (struct hash *); +bool hash_empty (struct hash *); + +unsigned hash_bytes (const void *, size_t); +unsigned hash_string (const char *); +unsigned hash_int (int); + +#endif /* hash.h */ -- 2.30.2