Reorganize, add comments.

author Ben Pfaff <blp@cs.stanford.edu>

Fri, 3 Sep 2004 06:25:44 +0000 (06:25 +0000)

committer Ben Pfaff <blp@cs.stanford.edu>

Fri, 3 Sep 2004 06:25:44 +0000 (06:25 +0000)
author Ben Pfaff <blp@cs.stanford.edu>
Fri, 3 Sep 2004 06:25:44 +0000 (06:25 +0000)
committer Ben Pfaff <blp@cs.stanford.edu>
Fri, 3 Sep 2004 06:25:44 +0000 (06:25 +0000)
diff --git a/src/lib/hash.c b/src/lib/hash.c

index 8a11556294b9083260bf9aff604266fb9461ca02..665fd0990a4537c0bfbcef861be86a295f6efc42 100644 (file)
--- a/src/lib/hash.c
+++ b/src/lib/hash.c
@@ -1,15 +1,23 @@
  #include "hash.h"
  #include "malloc.h"
  
+static struct list *find_bucket (struct hash *, hash_elem *);
+static struct list_elem *find_elem (struct hash *, struct list *, hash_elem *);
+static void insert_elem (struct hash *, struct list *, hash_elem *);
+static void remove_elem (struct hash *, hash_elem *);
+static void rehash (struct hash *);
+
+/* Initializes hash table H to compute hash values using HASH and
+   compare hash elements using LESS, given auxiliary data AUX. */
  bool
  hash_init (struct hash *h,
-           hash_less_func *less, hash_hash_func *hash, void *aux) 
+           hash_hash_func *hash, hash_less_func *less, void *aux) 
  {
    h->elem_cnt = 0;
    h->bucket_cnt = 4;
    h->buckets = malloc (sizeof *h->buckets * h->bucket_cnt);
-  h->less = less;
    h->hash = hash;
+  h->less = less;
    h->aux = aux;
  
    if (h->buckets != NULL) 
@@ -21,204 +29,143 @@ hash_init (struct hash *h,
      return false;
  }
  
-struct list *
-find_bucket (struct hash *h, hash_elem *e) 
-{
-  size_t bucket_idx = h->hash (e, h->aux) & (h->bucket_cnt - 1);
-  return h->buckets[bucket_idx];
-}
-
-struct list_elem *
-find_elem (struct list *bucket, hash_elem *e) 
-{
-  struct list_elem *i;
-
-  for (i = list_begin (bucket); i != list_end (bucket); i = list_next (i))
-    if (equal (i, e))
-      return i;
-  return NULL;
-}
-
-size_t
-turn_off_least_1bit (size_t x) 
-{
-  return x & (x - 1);
-}
-
-size_t
-is_power_of_2 (size_t x) 
-{
-  return turn_off_least_1bit (x) == 0;
-}
-
-#define MIN_ELEMS_PER_BUCKET  1
-#define BEST_ELEMS_PER_BUCKET 2
-#define MAX_ELEMS_PER_BUCKET  4
-
+/* Removes all the elements from H. */
  void
-rehash (struct hash *h) 
+hash_clear (struct hash *h) 
  {
-  size_t old_bucket_cnt, new_bucket_cnt;
-  struct list *new_buckets, *old_buckets;
    size_t i;
-
-  ASSERT (h != NULL);
-
-  /* Save old bucket info for later use. */
-  old_buckets = h->buckets;
-  old_bucket_cnt = h->bucket_cnt;
-
-  /* Calculate the number of buckets to use now.
-     We want one bucket for about every BEST_ELEMS_PER_BUCKET.
-     We must have at least four buckets, and the number of
-     buckets must be a power of 2. */
-  new_bucket_cnt = h->elem_cnt / BEST_ELEMS_PER_BUCKET;
-  if (new_bucket_cnt < 4)
-    new_bucket_cnt = 4;
-  while (!is_power_of_2 (new_bucket_cnt))
-    new_bucket_cnt = turn_off_least_1bit (new_bucket_cnt);
-
-  /* Don't do anything if the bucket count doesn't change. */
-  if (new_bucket_cnt == old_bucket_cnt)
-    return;
-
-  /* Allocate new buckets and initialize them as empty. */
-  new_buckets = malloc (sizeof *new_buckets * new_bucket_cnt);
-  if (new_buckets == NULL) 
-    {
-      /* Allocation failed.  This means that use of the hash table will
-         be less efficient.  However, it is still usable, so
-         there's no reason for it to be an error. */
-      return;
-    }
-  for (i = 0; i < new_bucket_cnt; i++) 
-    list_init (&new_buckets[i]);
-
-  /* Install new bucket info. */
-  h->buckets = new_buckets;
-  h->bucket_cnt = new_bucket_cnt;
-
-  /* Move each old element into the appropriate new bucket. */
-  for (i = 0; i < old_bucket_cnt; i++) 
-    {
-      struct list *old_bucket, *new_bucket;
-      struct list_elem *elem, *next;
-
-      old_bucket = &old_buckets[i];
-      for (elem = list_begin (old_bucket);
-           elem != list_end (old_bucket); elem = next) 
-        {
-          struct list *new_bucket = find_bucket (h, e);
-          next = list_next (elem);
-          list_push_front (new_bucket, elem);
-        }
-    }
-}
-
-void
-insert_elem (struct list *bucket, hash_elem *e) 
-{
-  h->elem_cnt++;
-  if (h->elem_cnt > h->bucket_cnt * MAX_ELEMS_PER_BUCKET)
-    rehash (h);
-  list_push_front (bucket, e);
+      
+  for (i = 0; i < h->bucket_cnt; h++) 
+    list_init (&h->buckets[i]);
+  h->elem_cnt = 0;
  }
  
+/* Destroys hash table H. */
  void
-remove_elem (struct hash *h, hash_elem *e) 
+hash_destroy (struct hash *h) 
  {
-  h->elem_cnt--;
-  if (h->elem_cnt < h->bucket_cnt * MIN_ELEMS_PER_BUCKET)
-    rehash (h);
-  list_remove (e);
+  free (h->buckets);
  }
  
+/* Inserts NEW into hash table H and returns a null pointer, if
+   no equal element is already in the table.
+   If an equal element is already in the table, returns it
+   without inserting NEW. */   
  hash_elem *
  hash_insert (struct hash *h, hash_elem *new)
  {
    struct list *bucket = find_bucket (h, new);
-  struct list_elem *old = find_elem (bucket, new);
+  struct list_elem *old = find_elem (h, bucket, new);
  
    if (old == NULL) 
-    insert_elem (bucket, new);
+    insert_elem (h, bucket, new);
    return old; 
  }
  
+/* Inserts NEW into hash table H, replacing any equal element
+   already in the table, which is returned. */
  hash_elem *
  hash_replace (struct hash *h, hash_elem *new) 
  {
    struct list *bucket = find_bucket (h, new);
-  struct list_elem *old = find_elem (bucket, new);
+  struct list_elem *old = find_elem (h, bucket, new);
  
    if (old != NULL)
      remove_elem (h, old);
    
-  insert_elem (bucket, new);
+  insert_elem (h, bucket, new);
    return old;
  }
  
+/* Finds and returns an element equal to E in hash table H, or a
+   null pointer if no equal element exists in the table. */
  hash_elem *
  hash_find (struct hash *h, hash_elem *e) 
  {
-  struct list *bucket = find_bucket (h, e);
-  return find_elem (bucket, new);
+  return find_elem (h, find_bucket (h, e), e);
  }
  
+/* Finds, removes, and returns an element equal to E in hash
+   table H.  Returns a null pointer if no equal element existed
+   in the table. */
  hash_elem *
-hash_delete (struct hash *h, hash_elem *target)
+hash_delete (struct hash *h, hash_elem *e)
  {
-  struct list *bucket = find_bucket (h, e);
-  struct list_elem *found = find_elem (bucket, new);
+  struct list_elem *found = find_elem (h, find_bucket (h, e), e);
    if (found != NULL)
      remove_elem (h, found);
    return found;
  }
  
-void
-hash_clear (struct hash *h) 
-{
-  size_t i;
-      
-  for (i = 0; i < h->bucket_cnt; h++) 
-    list_init (&h->buckets[i]);
-  h->elem_cnt = 0;
-}
+/* Initializes I for iterating hash table H.
+
+   Iteration idiom:
+
+      struct hash_iterator i;
  
+      hash_first (&i, h);
+      while (hash_next (&i, h))
+        {
+          struct foo *f = list_entry (hash_cur (&i), struct foo, elem);
+          ...do something with f...
+        }
+
+   NOTE: Modifying a hash table during iteration invalidates all
+   iterators.
+*/
  void
  hash_first (struct hash_iterator *i, struct hash *h) 
  {
+  ASSERT (i != NULL);
+  ASSERT (h != NULL);
+
    i->hash = h;
    i->bucket = i->hash->buckets;
-  i->elem = list_begin (*i->bucket);
+  i->elem = list_head (i->bucket);
  }
  
+/* Advances I to the next element in the hash table and returns
+   it.  Returns a null pointer if no elements are left.  Elements
+   are returned in arbitrary order.
+
+   NOTE: Modifying a hash table during iteration invalidates all
+   iterators. */
  hash_elem *
  hash_next (struct hash_iterator *i)
  {
-  ASSERT (i->elem != NULL);
+  ASSERT (i != NULL);
  
    i->elem = list_next (i->elem);
-  while (i->elem == list_end (*i->bucket))
-    if (++i->bucket >= i->hash->buckets + i->hash->bucket_cnt) 
-      {
-        i->elem = NULL;
-        break; 
-      }
-
+  while (i->elem == list_end (i->bucket)) 
+    {
+      if (++i->bucket >= i->hash->buckets + i->hash->bucket_cnt)
+        {
+          i->elem = NULL;
+          break;
+        }
+      i->elem = list_begin (i->bucket);
+    }
+  
    return i->elem;
  }
  
+/* Returns the current element in the hash table iteration, or a
+   null pointer at the end of the table.  Undefined behavior
+   after calling hash_first() but before hash_next(). */
  hash_elem *
  hash_cur (struct hash_iterator *i) 
  {
    return i->elem;
  }
  
-size_t hash_size (struct hash *h) 
+/* Returns the number of elements in H. */
+size_t
+hash_size (struct hash *h) 
  {
    return h->elem_cnt;
  }
  
+/* Returns true if H contains no elements, false otherwise. */
  bool
  hash_empty (struct hash *h) 
  {
@@ -229,14 +176,15 @@ hash_empty (struct hash *h)
  #define FNV_32_PRIME 16777619u
  #define FNV_32_BASIS 2166136261u
  
-/* Fowler-Noll-Vo 32-bit hash, for bytes. */
+/* Returns a hash of the SIZE bytes in BUF. */
  unsigned
-hsh_hash_bytes (const void *buf_, size_t size)
+hash_bytes (const void *buf_, size_t size)
  {
+  /* Fowler-Noll-Vo 32-bit hash, for bytes. */
    const unsigned char *buf = buf_;
    unsigned hash;
  
-  assert (buf != NULL);
+  ASSERT (buf != NULL);
  
    hash = FNV_32_BASIS;
    while (size-- > 0)
@@ -245,14 +193,14 @@ hsh_hash_bytes (const void *buf_, size_t size)
    return hash;
  } 
  
-/* Fowler-Noll-Vo 32-bit hash, for strings. */
+/* Returns a hash of string S. */
  unsigned
  hash_string (const char *s_) 
  {
    const unsigned char *s = s_;
    unsigned hash;
  
-  assert (s != NULL);
+  ASSERT (s != NULL);
  
    hash = FNV_32_BASIS;
    while (*s != '\0')
@@ -261,9 +209,138 @@ hash_string (const char *s_)
    return hash;
  }
  
-/* Hash for ints. */
+/* Returns a hash of integer I. */
  unsigned
  hash_int (int i) 
  {
-  return hsh_hash_bytes (&i, sizeof i);
+  return hash_bytes (&i, sizeof i);
+}
+\f
+/* Returns the bucket in H that E belongs in. */
+static struct list *
+find_bucket (struct hash *h, hash_elem *e) 
+{
+  size_t bucket_idx = h->hash (e, h->aux) & (h->bucket_cnt - 1);
+  return &h->buckets[bucket_idx];
+}
+
+/* Searches BUCKET in H for a hash element equal to E.  Returns
+   it if found or a null pointer otherwise. */
+static struct list_elem *
+find_elem (struct hash *h, struct list *bucket, hash_elem *e) 
+{
+  struct list_elem *i;
+
+  for (i = list_begin (bucket); i != list_end (bucket); i = list_next (i))
+    if (!h->less (i, e, h->aux) && !h->less (e, i, h->aux))
+      return i;
+  return NULL;
+}
+
+/* Returns X with its lowest-order bit set to 1 turned off. */
+static inline size_t
+turn_off_least_1bit (size_t x) 
+{
+  return x & (x - 1);
+}
+
+/* Returns true if X is a power of 2, otherwise false. */
+static inline size_t
+is_power_of_2 (size_t x) 
+{
+  return x != 0 && turn_off_least_1bit (x) == 0;
  }
+
+/* Element per bucket ratios. */
+#define MIN_ELEMS_PER_BUCKET  1 /* Elems/bucket < 1: reduce # of buckets. */
+#define BEST_ELEMS_PER_BUCKET 2 /* Ideal elems/bucket. */
+#define MAX_ELEMS_PER_BUCKET  4 /* Elems/bucket > 4: increase # of buckets. */
+
+/* Changes the number of buckets in hash table H to match the
+   ideal.  This function can fail because of an out-of-memory
+   condition, but that'll just make hash accesses less efficient;
+   we can still continue. */
+static void
+rehash (struct hash *h) 
+{
+  size_t old_bucket_cnt, new_bucket_cnt;
+  struct list *new_buckets, *old_buckets;
+  size_t i;
+
+  ASSERT (h != NULL);
+
+  /* Save old bucket info for later use. */
+  old_buckets = h->buckets;
+  old_bucket_cnt = h->bucket_cnt;
+
+  /* Calculate the number of buckets to use now.
+     We want one bucket for about every BEST_ELEMS_PER_BUCKET.
+     We must have at least four buckets, and the number of
+     buckets must be a power of 2. */
+  new_bucket_cnt = h->elem_cnt / BEST_ELEMS_PER_BUCKET;
+  if (new_bucket_cnt < 4)
+    new_bucket_cnt = 4;
+  while (!is_power_of_2 (new_bucket_cnt))
+    new_bucket_cnt = turn_off_least_1bit (new_bucket_cnt);
+
+  /* Don't do anything if the bucket count wouldn't change. */
+  if (new_bucket_cnt == old_bucket_cnt)
+    return;
+
+  /* Allocate new buckets and initialize them as empty. */
+  new_buckets = malloc (sizeof *new_buckets * new_bucket_cnt);
+  if (new_buckets == NULL) 
+    {
+      /* Allocation failed.  This means that use of the hash table will
+         be less efficient.  However, it is still usable, so
+         there's no reason for it to be an error. */
+      return;
+    }
+  for (i = 0; i < new_bucket_cnt; i++) 
+    list_init (&new_buckets[i]);
+
+  /* Install new bucket info. */
+  h->buckets = new_buckets;
+  h->bucket_cnt = new_bucket_cnt;
+
+  /* Move each old element into the appropriate new bucket. */
+  for (i = 0; i < old_bucket_cnt; i++) 
+    {
+      struct list *old_bucket;
+      struct list_elem *elem, *next;
+
+      old_bucket = &old_buckets[i];
+      for (elem = list_begin (old_bucket);
+           elem != list_end (old_bucket); elem = next) 
+        {
+          struct list *new_bucket = find_bucket (h, elem);
+          next = list_next (elem);
+          list_push_front (new_bucket, elem);
+        }
+    }
+}
+
+/* Inserts E into BUCKET (in hash table H).
+   Rehashes if this increases elems/bucket above
+   MAX_ELEMS_PER_BUCKET. */
+static void
+insert_elem (struct hash *h, struct list *bucket, hash_elem *e) 
+{
+  h->elem_cnt++;
+  if (h->elem_cnt > h->bucket_cnt * MAX_ELEMS_PER_BUCKET)
+    rehash (h);
+  list_push_front (bucket, e);
+}
+
+/* Removes E from hash table H.
+   Rehashes if this decreases elems/bucket below
+   MIN_ELEMS_PER_BUCKET. */
+static void
+remove_elem (struct hash *h, hash_elem *e) 
+{
+  h->elem_cnt--;
+  if (h->elem_cnt < h->bucket_cnt * MIN_ELEMS_PER_BUCKET)
+    rehash (h);
+  list_remove (e);
+}
+
diff --git a/src/lib/hash.h b/src/lib/hash.h

index da60d016a225fd92a6cc90ea7545d1a0642862c1..e37592bfbeae5d32ac2928dcecb8b6470b1f0ad2 100644 (file)
--- a/src/lib/hash.h
+++ b/src/lib/hash.h
@@ -1,53 +1,89 @@
  #ifndef HEADER_HASH_H
  #define HEADER_HASH_H 1
  
+/* Hash table.
+
+   This is a standard hash table with chaining.  To locate an
+   element in the table, we compute a hash function over the
+   element's data and use that as an index into an array of
+   doubly linked lists, then linearly search the list.
+
+   The chain lists do not use dynamic allocation.  Instead, each
+   structure that can potentially be in a hash must embed a
+   hash_elem member.  All of the hash functions operate on these
+   `hash_elem's.  The hash_entry macro allows conversion from a
+   hash_elem back to a structure object that contains it.
+
+   
+
+*/
+
  #include <stdbool.h>
  #include <stddef.h>
  #include <stdint.h>
  #include "list.h"
  
+/* Hash element. */
  typedef list_elem hash_elem;
  
+/* Converts pointer to hash element HASH_ELEM into a pointer to
+   the structure that HASH_ELEM is embedded inside.  Supply the
+   name of the outer structure STRUCT and the member name MEMBER
+   of the hash element.  See the big comment at the top of the
+   file for an example. */
  #define hash_entry(HASH_ELEM, STRUCT, MEMBER)                              \
          ((STRUCT *) ((uint8_t *) (HASH_ELEM) - offsetof (STRUCT, MEMBER)))
  
+/* Computes and returns the hash value for hash element E, given
+   auxiliary data AUX. */
+typedef unsigned hash_hash_func (const hash_elem *e, void *aux);
+
+/* Compares the value of two hash elements A and B, given
+   auxiliary data AUX.  Returns true if A is less than B, or
+   false if A is greater than or equal to B. */
  typedef bool hash_less_func (const hash_elem *a, const hash_elem *b,
                               void *aux);
-typedef unsigned hash_hash_func (const hash_elem *, void *aux);
  
+/* Hash table. */
  struct hash 
    {
-    size_t elem_cnt;
-    size_t bucket_cnt;
-    struct list *buckets;
-    hash_less_func *less;
-    hash_hash_func *hash;
-    void *aux;
+    size_t elem_cnt;            /* Number of elements in table. */
+    size_t bucket_cnt;          /* Number of buckets, a power of 2. */
+    struct list *buckets;       /* Array of `bucket_cnt' lists. */
+    hash_hash_func *hash;       /* Hash function. */
+    hash_less_func *less;       /* Comparison function. */
+    void *aux;                  /* Auxiliary data for `hash' and `less'. */
    };
  
+/* A hash table iterator. */
  struct hash_iterator 
    {
-    struct hash *hash;
-    struct list **bucket;
-    hash_elem *elem;
+    struct hash *hash;          /* The hash table. */
+    struct list *bucket;        /* Current bucket. */
+    hash_elem *elem;            /* Current hash element in current bucket. */
    };
  
-bool hash_init (struct hash *, hash_less_func *, hash_hash_func *, void *aux);
+/* Basic life cycle. */
+bool hash_init (struct hash *, hash_hash_func *, hash_less_func *, void *aux);
  void hash_clear (struct hash *);
  void hash_destroy (struct hash *);
  
+/* Search, insertion, deletion. */
  hash_elem *hash_insert (struct hash *, hash_elem *);
  hash_elem *hash_replace (struct hash *, hash_elem *);
  hash_elem *hash_find (struct hash *, hash_elem *);
  hash_elem *hash_delete (struct hash *, hash_elem *);
  
+/* Iteration. */
  void hash_first (struct hash_iterator *, struct hash *);
  hash_elem *hash_next (struct hash_iterator *);
  hash_elem *hash_cur (struct hash_iterator *);
  
+/* Information. */
  size_t hash_size (struct hash *);
  bool hash_empty (struct hash *);
  
+/* Sample hash functions. */
  unsigned hash_bytes (const void *, size_t);
  unsigned hash_string (const char *);
  unsigned hash_int (int);
author	Ben Pfaff <blp@cs.stanford.edu>
	Fri, 3 Sep 2004 06:25:44 +0000 (06:25 +0000)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Fri, 3 Sep 2004 06:25:44 +0000 (06:25 +0000)
src/lib/hash.c		patch \| blob \| history
src/lib/hash.h		patch \| blob \| history