New hmap and hmapx hash table implementations.

author Ben Pfaff <blp@gnu.org>

Sat, 27 Sep 2008 03:42:01 +0000 (20:42 -0700)

committer Ben Pfaff <blp@gnu.org>

Wed, 1 Oct 2008 03:36:03 +0000 (20:36 -0700)
author Ben Pfaff <blp@gnu.org>
Sat, 27 Sep 2008 03:42:01 +0000 (20:42 -0700)
committer Ben Pfaff <blp@gnu.org>
Wed, 1 Oct 2008 03:36:03 +0000 (20:36 -0700)
diff --git a/src/libpspp/automake.mk b/src/libpspp/automake.mk

index aaaa45b40459d09cd121e05ecd170e5f15d0bcd0..4a14382af9dd15628192693cdcf5fe492db8fb7d 100644 (file)
--- a/src/libpspp/automake.mk
+++ b/src/libpspp/automake.mk
@@ -23,10 +23,16 @@ src_libpspp_libpspp_la_SOURCES = \
         src/libpspp/freaderror.h \
         src/libpspp/getl.c \
         src/libpspp/getl.h \
+       src/libpspp/hash-functions.c \
+       src/libpspp/hash-functions.h \
         src/libpspp/hash.c \
         src/libpspp/hash.h \
         src/libpspp/heap.c \
         src/libpspp/heap.h \
+       src/libpspp/hmap.c \
+       src/libpspp/hmap.h \
+       src/libpspp/hmapx.c \
+       src/libpspp/hmapx.h \
         src/libpspp/i18n.c \
         src/libpspp/i18n.h \
         src/libpspp/integer-format.c \
diff --git a/src/libpspp/hash-functions.c b/src/libpspp/hash-functions.c

new file mode 100644 (file)

index 0000000..2318777
--- /dev/null
+++ b/src/libpspp/hash-functions.c
@@ -0,0 +1,90 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 1997-9, 2000, 2008 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include <libpspp/hash-functions.h>
+#include <assert.h>
+#include <ctype.h>
+#include <math.h>
+
+/* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */
+#define FNV_32_PRIME 16777619u
+#define FNV_32_BASIS 2166136261u
+
+/* Fowler-Noll-Vo 32-bit hash, for bytes. */
+unsigned
+hsh_hash_bytes (const void *buf_, size_t size)
+{
+  const unsigned char *buf = (const unsigned char *) buf_;
+  unsigned hash;
+
+  assert (buf != NULL);
+
+  hash = FNV_32_BASIS;
+  while (size-- > 0)
+    hash = (hash * FNV_32_PRIME) ^ *buf++;
+
+  return hash;
+}
+
+/* Fowler-Noll-Vo 32-bit hash, for strings. */
+unsigned
+hsh_hash_string (const char *s_)
+{
+  const unsigned char *s = (const unsigned char *) s_;
+  unsigned hash;
+
+  assert (s != NULL);
+
+  hash = FNV_32_BASIS;
+  while (*s != '\0')
+    hash = (hash * FNV_32_PRIME) ^ *s++;
+
+  return hash;
+}
+
+/* Fowler-Noll-Vo 32-bit hash, for case-insensitive strings. */
+unsigned
+hsh_hash_case_string (const char *s_)
+{
+  const unsigned char *s = (const unsigned char *) s_;
+  unsigned hash;
+
+  assert (s != NULL);
+
+  hash = FNV_32_BASIS;
+  while (*s != '\0')
+    hash = (hash * FNV_32_PRIME) ^ toupper (*s++);
+
+  return hash;
+}
+
+/* Hash for ints. */
+unsigned
+hsh_hash_int (int i)
+{
+  return hsh_hash_bytes (&i, sizeof i);
+}
+
+/* Hash for double. */
+unsigned
+hsh_hash_double (double d)
+{
+  if (!isnan (d))
+    return hsh_hash_bytes (&d, sizeof d);
+  else
+    return 0;
+}
diff --git a/src/libpspp/hash-functions.h b/src/libpspp/hash-functions.h

new file mode 100644 (file)

index 0000000..328f4de
--- /dev/null
+++ b/src/libpspp/hash-functions.h
@@ -0,0 +1,28 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef LIBPSPP_HASH_FUNCTIONS_H
+#define LIBPSPP_HASH_FUNCTIONS_H 1
+
+#include <stddef.h>
+
+unsigned hsh_hash_bytes (const void *, size_t);
+unsigned hsh_hash_string (const char *);
+unsigned hsh_hash_case_string (const char *);
+unsigned hsh_hash_int (int);
+unsigned hsh_hash_double (double);
+
+#endif /* libpspp/hash-functions.h */
diff --git a/src/libpspp/hash.c b/src/libpspp/hash.c

index 9da3deb120a92ba0ee3423f78441ad85dd9ab22f..eb43b54e7fedf24d7db0dd569cc34d885bb91dd4 100644 (file)
--- a/src/libpspp/hash.c
+++ b/src/libpspp/hash.c
@@ -1,5 +1,5 @@
  /* PSPP - a program for statistical analysis.
-   Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1997-9, 2000, 2008 Free Software Foundation, Inc.
  
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -19,7 +19,6 @@
  #include "hash.h"
  #include "message.h"
  #include <assert.h>
-#include <ctype.h>
  #include <limits.h>
  #include <stdbool.h>
  #include <stdlib.h>
@@ -70,74 +69,6 @@ next_power_of_2 (size_t x)
      }
  }
  
-/* Fowler-Noll-Vo hash constants, for 32-bit word sizes. */
-#define FNV_32_PRIME 16777619u
-#define FNV_32_BASIS 2166136261u
-
-/* Fowler-Noll-Vo 32-bit hash, for bytes. */
-unsigned
-hsh_hash_bytes (const void *buf_, size_t size)
-{
-  const unsigned char *buf = (const unsigned char *) buf_;
-  unsigned hash;
-
-  assert (buf != NULL);
-
-  hash = FNV_32_BASIS;
-  while (size-- > 0)
-    hash = (hash * FNV_32_PRIME) ^ *buf++;
-
-  return hash;
-}
-
-/* Fowler-Noll-Vo 32-bit hash, for strings. */
-unsigned
-hsh_hash_string (const char *s_)
-{
-  const unsigned char *s = (const unsigned char *) s_;
-  unsigned hash;
-
-  assert (s != NULL);
-
-  hash = FNV_32_BASIS;
-  while (*s != '\0')
-    hash = (hash * FNV_32_PRIME) ^ *s++;
-
-  return hash;
-}
-
-/* Fowler-Noll-Vo 32-bit hash, for case-insensitive strings. */
-unsigned
-hsh_hash_case_string (const char *s_)
-{
-  const unsigned char *s = (const unsigned char *) s_;
-  unsigned hash;
-
-  assert (s != NULL);
-
-  hash = FNV_32_BASIS;
-  while (*s != '\0')
-    hash = (hash * FNV_32_PRIME) ^ toupper (*s++);
-
-  return hash;
-}
-
-/* Hash for ints. */
-unsigned
-hsh_hash_int (int i)
-{
-  return hsh_hash_bytes (&i, sizeof i);
-}
-
-/* Hash for double. */
-unsigned
-hsh_hash_double (double d)
-{
-  if (!isnan (d))
-    return hsh_hash_bytes (&d, sizeof d);
-  else
-    return 0;
-}
  \f
  /* Hash tables. */
  
diff --git a/src/libpspp/hash.h b/src/libpspp/hash.h

index 59efbe56223a21e8bb253ea972d914148544b1d5..57fc2678d09548ad50922b9130298afb6cf80cac 100644 (file)
--- a/src/libpspp/hash.h
+++ b/src/libpspp/hash.h
@@ -19,6 +19,7 @@
  
  #include <stddef.h>
  #include <stdbool.h>
+#include <libpspp/hash-functions.h>
  
  typedef int hsh_compare_func (const void *, const void *, const void *aux);
  typedef unsigned hsh_hash_func (const void *, const void *aux);
@@ -30,13 +31,6 @@ struct hsh_iterator
      size_t next;               /* Index of next entry. */
    };
  
-/* Hash functions. */
-unsigned hsh_hash_bytes (const void *, size_t);
-unsigned hsh_hash_string (const char *);
-unsigned hsh_hash_case_string (const char *);
-unsigned hsh_hash_int (int);
-unsigned hsh_hash_double (double);
-
  /* Hash tables. */
  struct hsh_table *hsh_create (int m, hsh_compare_func *,
                                hsh_hash_func *, hsh_free_func *,
diff --git a/src/libpspp/hmap.c b/src/libpspp/hmap.c

new file mode 100644 (file)

index 0000000..081d7cb
--- /dev/null
+++ b/src/libpspp/hmap.c
@@ -0,0 +1,186 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2008 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <libpspp/hmap.h>
+#include <assert.h>
+#include <stdlib.h>
+
+static size_t capacity_to_mask (size_t capacity);
+
+/* Initializes MAP as a new hash map that is initially empty. */
+void
+hmap_init (struct hmap *map)
+{
+  map->count = 0;
+  map->mask = 0;
+  map->buckets = &map->one;
+  map->one = NULL;
+}
+
+/* Exchanges the contents of hash maps A and B. */
+void
+hmap_swap (struct hmap *a, struct hmap *b)
+{
+  struct hmap tmp = *a;
+  *a = *b;
+  *b = tmp;
+  if (!a->mask)
+    a->buckets = &a->one;
+  if (!b->mask)
+    b->buckets = &b->one;
+}
+
+/* Frees the memory, if any, allocated by hash map MAP.  This has
+   no effect on the actual data items in MAP, if any, because the
+   client is responsible for allocating and freeing them.  It
+   could, however, render them inaccessible if the only pointers
+   to them were from MAP itself, so in such a situation one
+   should iterate through the map and free the data items before
+   destroying it. */
+void
+hmap_destroy (struct hmap *map) 
+{
+  if (map != NULL && map->buckets != &map->one) 
+    free (map->buckets);
+}
+
+/* Reallocates MAP's hash buckets so that NEW_MASK becomes the
+   hash value bit-mask used to choose a hash bucket, then
+   rehashes any data elements in MAP into the new hash buckets.
+
+   NEW_MASK must be a power of 2 minus 1 (including 0), that is,
+   its value in binary must be all 1-bits.  */
+static void
+hmap_rehash (struct hmap *map, size_t new_mask) 
+{
+  struct hmap_node **new_buckets;
+  struct hmap_node *node, *next;
+
+  assert ((new_mask & (new_mask + 1)) == 0);
+  if (new_mask)
+    new_buckets = calloc (new_mask + 1, sizeof *new_buckets);
+  else 
+    {
+      new_buckets = &map->one;
+      new_buckets[0] = NULL;
+    }
+      
+  if (map->count > 0)
+    {
+      for (node = hmap_first (map); node != NULL; node = next)
+        {
+          size_t new_idx = node->hash & new_mask;
+          struct hmap_node **new_bucket = &new_buckets[new_idx];
+          next = hmap_next (map, node);
+          node->next = *new_bucket;
+          *new_bucket = node;
+        } 
+    }
+  if (map->buckets != &map->one)
+    free (map->buckets);
+  map->buckets = new_buckets;
+  map->mask = new_mask;
+}
+
+/* Ensures that MAP has sufficient space to store at least
+   CAPACITY data elements, allocating a new set of buckets and
+   rehashing if necessary. */
+void
+hmap_reserve (struct hmap *map, size_t capacity)
+{
+  if (capacity > hmap_capacity (map))
+    hmap_rehash (map, capacity_to_mask (capacity));
+}
+
+/* Shrinks MAP's set of buckets to the minimum number needed to
+   store its current number of elements, allocating a new set of
+   buckets and rehashing if that would save space. */
+void
+hmap_shrink (struct hmap *map) 
+{
+  size_t new_mask = capacity_to_mask (map->count);
+  if (new_mask < map->mask) 
+    hmap_rehash (map, new_mask); 
+}
+
+/* Moves NODE around in MAP to compensate for its hash value
+   having changed to NEW_HASH.
+
+   This function does not verify that MAP does not already
+   contain a data item that duplicates NODE's new value.  If
+   duplicates should be disallowed (which is the usual case),
+   then the client must check for duplicates before changing
+   NODE's value. */
+void
+hmap_changed (struct hmap *map, struct hmap_node *node, size_t new_hash)
+{
+  if ((new_hash ^ node->hash) & map->mask) 
+    {
+      hmap_delete (map, node);
+      hmap_insert_fast (map, node, new_hash);
+    }
+  else
+    node->hash = new_hash;
+}
+
+/* Hash map nodes may be moved around in memory as necessary,
+   e.g. as the result of an realloc operation on a block that
+   contains a node.  Once this is done, call this function
+   passing NODE that was moved, its former location in memory
+   OLD, and its hash map MAP before attempting any other
+   operation on MAP, NODE, or any other node in MAP.
+
+   It is not safe to move more than one node, then to call this
+   function for each node.  Instead, move a single node, call
+   this function, move another node, and so on.  Alternatively,
+   remove all affected nodes from the hash map, move them, then
+   re-insert all of them.
+
+   Assuming uniform hashing and no duplicate data items in MAP,
+   this function runs in constant time. */
+void
+hmap_moved (struct hmap *map,
+            struct hmap_node *node, const struct hmap_node *old) 
+{
+  struct hmap_node **p = &map->buckets[node->hash & map->mask];
+  while (*p != old)
+    p = &(*p)->next;
+  *p = node;
+}
+\f
+/* Returns the minimum-value mask required to allow for a hash
+   table capacity of at least CAPACITY.  The return value will be
+   a bit-mask suitable for use as the "mask" member of struct
+   hmap, that is, a power of 2 minus 1 (including 0). */
+static size_t
+capacity_to_mask (size_t capacity) 
+{
+  /* Calculate the minimum mask necesary to support the given
+     capacity. */
+  size_t mask = 0;
+  while (hmap_mask_to_capacity__ (mask) < capacity)
+    mask = (mask << 1) | 1;
+
+  /* If the mask is nonzero, make it at least 3, because there is
+     little point in allocating an array of just 2 pointers. */
+  mask |= (mask & 1) << 1;
+
+  return mask;
+}
diff --git a/src/libpspp/hmap.h b/src/libpspp/hmap.h

new file mode 100644 (file)

index 0000000..e73d84f
--- /dev/null
+++ b/src/libpspp/hmap.h
@@ -0,0 +1,509 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2008 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+/* Hash table with separate chaining.
+
+   This header (hmap.h) supplies an "embedded" implementation of
+   a hash table that uses linked lists to resolve collisions
+   ("separate chaining").  Its companion header (hmapx.h)
+   supplies a "external" implementation that is otherwise
+   similar.  The two variants are described briefly here.  The
+   embedded variant, for which this is the header, is described
+   in slightly more detail below.  Each function also has a
+   detailed usage comment at its point of definition.  (Many of
+   those definitions are inline in this file, because they are so
+   simple.  Others are in hmap.c.)
+
+   The "hmap" embedded hash table implementation puts the hash
+   table node (which includes the linked list used for resolving
+   collisions) within the data structure that the hash table
+   contains.  This makes allocation efficient, in space and time,
+   because no additional call into an allocator is needed to
+   obtain memory for the hash table node.  It also makes it easy
+   to find the hash table node associated with a given object.
+   However, it's difficult to include a given object in an
+   arbitrary number of hash tables.
+
+   The "hmapx" external hash table implementation allocates hash
+   table nodes separately from the objects in the hash table.
+   Inserting and removing hash table elements requires dynamic
+   allocation, so it is normally slower and takes more memory
+   than the embedded implementation.  It also requires searching
+   the table to find the node associated with a given object.
+   However, it's easy to include a given object in an arbitrary
+   number of hash tables.  It's also possible to create an
+   external hash table without adding a member to the data
+   structure that the hash table contains. */
+
+#ifndef LIBPSPP_HMAP_H
+#define LIBPSPP_HMAP_H 1
+
+/* Embedded hash table with separate chaining.
+
+   To create an embedded hash table, declare an instance of
+   struct hmap, then initialize it with hmap_init():
+     struct hmap map;
+     hmap_init (&map);
+   or, alternatively:
+     struct hmap map = HMAP_INITIALIZER (map);
+   
+   Each node in the hash table, presumably a structure type, must
+   include a struct hmap_node member.  Here's an example:
+     struct foo
+       {
+         struct hmap_node node;   // hmap_node member.
+         const char *string;      // Another member.
+       };
+   The hash table functions work with pointers to struct
+   hmap_node.  To obtain a pointer to your structure type given a
+   pointer to struct hmap_node, use the HMAP_DATA macro.
+
+   Inserting and deleting elements is straightforward.  Use
+   hmap_insert() to insert an element and hmap_delete() to delete
+   an element, e.g.:
+     struct foo my_foo;
+     my_foo.string = "My string";
+     hmap_insert (&map, &my_foo.node, hsh_hash_string (my_foo.string));
+     ...
+     hmap_delete (&map, &my_foo.node);
+   You must pass the element's hash value as one of
+   hmap_insert()'s arguments.  The hash table saves this hash
+   value for use later to speed searches and to rehash as the
+   hash table grows.
+
+   hmap_insert() does not check whether the newly inserted
+   element duplicates an element already in the hash table.  The
+   client is responsible for doing so, if this is desirable.
+
+   The hash table does not provide a direct way to search for an
+   existing element.  Instead, it provides the means to iterate
+   over all the elements in the hash table with a given hash
+   value.  It is easy to compose a search function from such a
+   building block.  For example:
+     const struct foo *
+     find_foo (const struct hmap *map, const char *name)
+     {
+       const struct foo *foo;
+       size_t hash;
+
+       hash = hsh_hash_string (name);
+       HMAP_FOR_EACH_WITH_HASH (foo, struct foo, node, hash, map)
+         if (!strcmp (foo->name, name))
+           break;
+       return foo;
+     }
+
+   Here is how to iterate through the elements currently in the
+   hash table:
+     struct foo *foo;
+     HMAP_FOR_EACH (foo, struct foo, node, &map)
+       {
+         ...do something with foo...
+       }
+   */
+
+#include <stddef.h>
+
+/* Returns the data structure corresponding to the given NODE,
+   assuming that NODE is embedded as the given MEMBER name in
+   data type STRUCT.  NODE must not be a null pointer. */
+#define HMAP_DATA(NODE, STRUCT, MEMBER)                         \
+  ((STRUCT *) ((char *) (NODE) - offsetof (STRUCT, MEMBER)))
+
+/* Like HMAP_DATA, except that a null NODE yields a null pointer
+   result. */
+#define HMAP_NULLABLE_DATA(NODE, STRUCT, MEMBER)        \
+  hmap_nullable_data__ (NODE, offsetof (STRUCT, MEMBER))
+
+/* Hash table node. */
+struct hmap_node
+  {
+    struct hmap_node *next;     /* Next in chain. */
+    size_t hash;                /* Hash value. */
+  };
+
+static inline size_t hmap_node_hash (const struct hmap_node *);
+
+/* Hash table. */
+struct hmap
+  {
+    size_t count;               /* Number of inserted nodes. */
+    size_t mask;                /* Number of buckets (power of 2), minus 1. */
+    struct hmap_node **buckets; /* Array of buckets. */
+    struct hmap_node *one;      /* One bucket, to eliminate corner cases. */
+  };
+
+/* Suitable for use as the initializer for a struct hmap named
+   MAP.  Typical usage:
+       struct hmap map = HMAP_INITIALIZER (map);
+   HMAP_INITIALIZER() is an alternative to hmap_init(). */
+#define HMAP_INITIALIZER(MAP) { 0, 0, &(MAP).one, NULL }
+
+/* Creation and destruction. */
+void hmap_init (struct hmap *);
+void hmap_swap (struct hmap *, struct hmap *);
+void hmap_destroy (struct hmap *);
+
+/* Storage management. */
+void hmap_reserve (struct hmap *, size_t capacity);
+void hmap_shrink (struct hmap *);
+
+/* Search.  Refer to the large comment near the top of this file
+   for an example.*/
+static inline struct hmap_node *hmap_first_with_hash (const struct hmap *,
+                                                      size_t hash);
+static inline struct hmap_node *hmap_next_with_hash (const struct hmap_node *);
+
+/* Insertion and deletion. */
+static inline void hmap_insert (struct hmap *, struct hmap_node *,
+                                size_t hash);
+static inline void hmap_insert_fast (struct hmap *, struct hmap_node *,
+                                     size_t hash);
+static inline void hmap_delete (struct hmap *, struct hmap_node *);
+
+/* Iteration. */
+static inline struct hmap_node *hmap_first (const struct hmap *);
+static inline struct hmap_node *hmap_next (const struct hmap *,
+                                           const struct hmap_node *);
+
+/* Counting. */
+static inline size_t hmap_count (const struct hmap *);
+static inline size_t hmap_capacity (const struct hmap *);
+
+/* Updating data elements. */
+void hmap_changed (struct hmap *, struct hmap_node *, size_t new_hash);
+void hmap_moved (struct hmap *,
+                 struct hmap_node *, const struct hmap_node *old);
+
+/* Convenience macros for search.
+
+   These macros automatically use HMAP_DATA to obtain the data
+   elements that encapsulate hmap nodes, which often saves typing
+   and can make code easier to read.  Refer to the large comment
+   near the top of this file for an example.
+
+   These macros evaluate HASH only once.  They evaluate their
+   other arguments many times. */
+#define HMAP_FIRST_WITH_HASH(STRUCT, MEMBER, HMAP, HASH)                \
+  HMAP_NULLABLE_DATA (hmap_first_with_hash (HMAP, HASH), STRUCT, MEMBER)
+#define HMAP_NEXT_WITH_HASH(DATA, STRUCT, MEMBER)                       \
+  HMAP_NULLABLE_DATA (hmap_next_with_hash (&(DATA)->MEMBER), STRUCT, MEMBER)
+#define HMAP_FOR_EACH_WITH_HASH(DATA, STRUCT, MEMBER, HASH, HMAP)       \
+  for ((DATA) = HMAP_FIRST_WITH_HASH (STRUCT, MEMBER, HMAP, HASH);      \
+       (DATA) != NULL;                                                  \
+       (DATA) = HMAP_NEXT_WITH_HASH (DATA, STRUCT, MEMBER))
+#define HMAP_FOR_EACH_WITH_HASH_SAFE(DATA, NEXT, STRUCT, MEMBER, HASH, HMAP) \
+  for ((DATA) = HMAP_FIRST_WITH_HASH (STRUCT, MEMBER, HMAP, HASH);      \
+       ((DATA) != NULL                                                  \
+        ? ((NEXT) = HMAP_NEXT_WITH_HASH (DATA, STRUCT, MEMBER), 1)      \
+        : 0);                                                           \
+       (DATA) = (NEXT))
+
+/* Convenience macros for iteration.
+
+   These macros automatically use HMAP_DATA to obtain the data
+   elements that encapsulate hmap nodes, which often saves typing
+   and can make code easier to read.  Refer to the large comment
+   near the top of this file for an example.
+
+   These macros evaluate their arguments many times. */
+#define HMAP_FIRST(STRUCT, MEMBER, HMAP)                        \
+  HMAP_NULLABLE_DATA (hmap_first (HMAP), STRUCT, MEMBER)
+#define HMAP_NEXT(DATA, STRUCT, MEMBER, HMAP)                           \
+  HMAP_NULLABLE_DATA (hmap_next (HMAP, &(DATA)->MEMBER), STRUCT, MEMBER)
+#define HMAP_FOR_EACH(DATA, STRUCT, MEMBER, HMAP)       \
+  for ((DATA) = HMAP_FIRST (STRUCT, MEMBER, HMAP);      \
+       (DATA) != NULL;                                  \
+       (DATA) = HMAP_NEXT (DATA, STRUCT, MEMBER, HMAP))
+#define HMAP_FOR_EACH_SAFE(DATA, NEXT, STRUCT, MEMBER, HMAP)    \
+  for ((DATA) = HMAP_FIRST (STRUCT, MEMBER, HMAP);              \
+       ((DATA) != NULL                                          \
+        ? ((NEXT) = HMAP_NEXT (DATA, STRUCT, MEMBER, HMAP), 1)  \
+        : 0);                                                   \
+       (DATA) = (NEXT))
+\f
+/* Inline definitions. */
+
+static inline struct hmap_node *hmap_find_hash__ (struct hmap_node *, size_t);
+static inline struct hmap_node *hmap_first_nonempty_bucket__ (
+  const struct hmap *, size_t start);
+static inline size_t hmap_mask_to_capacity__ (size_t mask);
+
+/* Returns the hash value associated with NODE. */
+size_t
+hmap_node_hash (const struct hmap_node *node) 
+{
+  return node->hash;
+}
+
+/* Returns the first node in MAP that has hash value HASH, or a
+   null pointer if MAP does not contain any node with that hash
+   value.
+
+   Assuming uniform hashing and no duplicate data items in MAP,
+   this function runs in constant time.  (Amortized over an
+   iteration over all data items with a given HASH, its runtime
+   is proportional to the length of the hash chain for HASH, so
+   given a pathological hash function, e.g. one that returns a
+   constant value, its runtime degenerates to linear in the
+   length of NODE's hash chain.)
+
+   Nodes are returned in arbitrary order that may change whenever
+   the hash table's current capacity changes, as reported by
+   hmap_capacity().  Calls to hmap_insert(), hmap_reserve(), and
+   hmap_shrink() can change the capacity of a hash map.
+   Inserting a node with hmap_insert_fast() or deleting one with
+   hmap_delete() will not change the relative ordering of nodes.
+
+   The HMAP_FOR_EACH_WITH_HASH and HMAP_FOR_EACH_WITH_HASH_SAFE
+   macros provide convenient ways to iterate over all the nodes
+   with a given hash.  The HMAP_FIRST_WITH_HASH macro is an
+   interface to this particular function that is often more
+   convenient. */
+static inline struct hmap_node *
+hmap_first_with_hash (const struct hmap *map, size_t hash)
+{
+  return hmap_find_hash__ (map->buckets[hash & map->mask], hash);
+}
+
+/* Returns the next node in MAP after NODE that has the same hash
+   value as NODE, or a null pointer if MAP does not contain any
+   more nodes with that hash value.
+
+   Assuming uniform hashing and no duplicate data items in MAP,
+   this function runs in constant time.  (Amortized over an
+   iteration over all data items with a given HASH, its runtime
+   is proportional to the length of the hash chain for HASH, so
+   given a pathological hash function, e.g. one that returns a
+   constant value, its runtime degenerates to linear in the
+   length of NODE's hash chain.)
+
+   Nodes are returned in arbitrary order that may change whenever
+   the hash table's current capacity changes, as reported by
+   hmap_capacity().  Calls to hmap_insert(), hmap_reserve(), and
+   hmap_shrink() can change the capacity of a hash map.
+   Inserting a node with hmap_insert_fast() or deleting one with
+   hmap_delete() will not change the relative ordering of nodes.
+
+   The HMAP_FOR_EACH_WITH_HASH and HMAP_FOR_EACH_WITH_HASH_SAFE
+   macros provide convenient ways to iterate over all the nodes
+   with a given hash.  The HMAP_NEXT_WITH_HASH macro is an
+   interface to this particular function that is often more
+   convenient. */
+static inline struct hmap_node *
+hmap_next_with_hash (const struct hmap_node *node) 
+{
+  return hmap_find_hash__ (node->next, node->hash);
+}
+
+/* Inserts NODE into MAP with hash value HASH.  If the insertion
+   causes MAP's current capacity, as reported by hmap_capacity(),
+   to be exceeded, rehashes MAP with an increased number of hash
+   buckets.
+
+   This function runs in constant time amortized over all the
+   insertions into MAP.
+
+   This function does not verify that MAP does not already
+   contain a data item with the same value as NODE.  If
+   duplicates should be disallowed (which is the usual case),
+   then the client must check for duplicates itself before
+   inserting the new node. */
+static inline void
+hmap_insert (struct hmap *map, struct hmap_node *node, size_t hash)
+{
+  hmap_insert_fast (map, node, hash);
+  if (map->count > hmap_capacity (map))
+    hmap_reserve (map, map->count);
+}
+
+/* Inserts NODE into MAP with hash value HASH.  Does not check
+   whether this causes MAP's current capacity to be exceeded.
+   The caller must take responsibility for that (or use
+   hmap_insert() instead).
+
+   This function runs in constant time.
+
+   This function does not verify that MAP does not already
+   contain a data item with the same value as NODE.  If
+   duplicates should be disallowed (which is the usual case),
+   then the client must check for duplicates itself before
+   inserting the new node. */
+static inline void
+hmap_insert_fast (struct hmap *map, struct hmap_node *node, size_t hash) 
+{
+  struct hmap_node **bucket = &map->buckets[hash & map->mask];
+  node->hash = hash;
+  node->next = *bucket;
+  *bucket = node;
+  map->count++;
+}
+
+/* Removes NODE from MAP.  The client is responsible for freeing
+   any data associated with NODE, if necessary.
+
+   Assuming uniform hashing, this function runs in constant time.
+   (Its runtime is proportional to the position of NODE in its
+   hash chain, so given a pathological hash function, e.g. one
+   that returns a constant value, its runtime degenerates to
+   linear in the length of NODE's hash chain.)
+
+   This function never reduces the number of buckets in MAP.
+   When one deletes a large number of nodes from a hash table,
+   calling hmap_shrink() afterward may therefore save a small
+   amount of memory.  It is also more expensive to iterate
+   through a very sparse hash table than a denser one, so
+   shrinking the hash table could also save some time.  However,
+   rehashing has an immediate cost that must be weighed against
+   these benefits.
+
+   hmap_delete() does not change NODE's hash value reported by
+   hmap_node_hash(). */
+static inline void
+hmap_delete (struct hmap *map, struct hmap_node *node)
+{
+  struct hmap_node **bucket = &map->buckets[node->hash & map->mask];
+  while (*bucket != node)
+    bucket = &(*bucket)->next;
+  *bucket = (*bucket)->next;
+  map->count--;
+}
+
+/* Returns the first node in MAP, or a null pointer if MAP is
+   empty.
+
+   Amortized over iterating through every data element in MAP,
+   this function runs in constant time.  However, this assumes
+   that MAP is not excessively sparse, that is, that
+   hmap_capacity(MAP) is at most a constant factor greater than
+   hmap_count(MAP).  This will always be true unless many nodes
+   have been inserted into MAP and then most or all of them
+   deleted; in such a case, calling hmap_shrink() is advised.
+
+   Nodes are returned in arbitrary order that may change whenever
+   the hash table's current capacity changes, as reported by
+   hmap_capacity().  Calls to hmap_insert(), hmap_reserve(), and
+   hmap_shrink() can change the capacity of a hash map.
+   Inserting a node with hmap_insert_fast() or deleting one with
+   hmap_delete() will not change the relative ordering of nodes.
+
+   The HMAP_FOR_EACH and HMAP_FOR_EACH_SAFE macros provide
+   convenient ways to iterate over all the nodes in a hash map.
+   The HMAP_FIRST macro is an interface to this particular
+   function that is often more convenient. */
+static inline struct hmap_node *
+hmap_first (const struct hmap *map) 
+{
+  return hmap_first_nonempty_bucket__ (map, 0);
+}
+
+/* Returns the next node in MAP following NODE, or a null pointer
+   if NODE is the last node in MAP.
+
+   Amortized over iterating through every data element in MAP,
+   this function runs in constant time.  However, this assumes
+   that MAP is not excessively sparse, that is, that
+   hmap_capacity(MAP) is at most a constant factor greater than
+   hmap_count(MAP).  This will always be true unless many nodes
+   have been inserted into MAP and then most or all of them
+   deleted; in such a case, calling hmap_shrink() is advised.
+
+   Nodes are returned in arbitrary order that may change whenever
+   the hash table's current capacity changes, as reported by
+   hmap_capacity().  Calls to hmap_insert(), hmap_reserve(), and
+   hmap_shrink() can change the capacity of a hash map.
+   Inserting a node with hmap_insert_fast() or deleting one with
+   hmap_delete() will not change the relative ordering of nodes.
+
+   The HMAP_FOR_EACH and HMAP_FOR_EACH_SAFE macros provide
+   convenient ways to iterate over all the nodes in a hash map.
+   The HMAP_NEXT macro is an interface to this particular
+   function that is often more convenient. */
+static inline struct hmap_node *
+hmap_next (const struct hmap *map, const struct hmap_node *node) 
+{
+  return (node->next != NULL
+          ? node->next
+          : hmap_first_nonempty_bucket__ (map, (node->hash & map->mask) + 1));
+}
+
+/* Returns the number of data items currently in MAP. */
+static inline size_t
+hmap_count (const struct hmap *map) 
+{
+  return map->count;
+}
+
+/* Returns the current capacity of MAP, that is, the maximum
+   number of data elements that MAP may hold before it becomes
+   advisable to rehash.
+
+   The capacity is advisory only: it is possible to insert any
+   number of data elements into a hash map regardless of its
+   capacity.  However, inserting many more elements than the
+   map's capacity will degrade search performance. */
+static inline size_t
+hmap_capacity (const struct hmap *map) 
+{
+  return hmap_mask_to_capacity__ (map->mask);
+}
+\f
+/* Implementation details. */
+
+/* Returns the first node at or after NODE in NODE's chain that
+   has hash value HASH. */
+static inline struct hmap_node *
+hmap_find_hash__ (struct hmap_node *node, size_t hash) 
+{
+  for (; node != NULL; node = node->next) 
+    if (node->hash == hash)
+      break;
+  return node;
+}
+
+/* Returns the first node in the lowest-numbered nonempty bucket
+   in MAP whose index is START or higher, or a null pointer if
+   all such buckets are empty. */
+static inline struct hmap_node *
+hmap_first_nonempty_bucket__ (const struct hmap *map, size_t start)
+{
+  size_t i;
+
+  for (i = start; i <= map->mask; i++)
+    if (map->buckets[i] != NULL)
+      return map->buckets[i];
+  return NULL;
+}
+
+/* Returns the hash table capacity associated with a given MASK,
+   which should be a value for the "mask" member of struct hmap.
+   MASK must be a power of 2 minus 1 (including 0), that is, its
+   value in binary must be all 1-bits.  */
+static inline size_t
+hmap_mask_to_capacity__ (size_t mask) 
+{
+  return (mask + 1) * 2;
+}
+
+/* Helper for HMAP_NULLABLE_DATA (to avoid evaluating its NODE
+   argument more than once).  */
+static inline void *
+hmap_nullable_data__ (struct hmap_node *node, size_t member_offset)
+{ 
+  return node != NULL ? (char *) node - member_offset : NULL;
+}
+
+#endif /* libpspp/hmap.h */
diff --git a/src/libpspp/hmapx.c b/src/libpspp/hmapx.c

new file mode 100644 (file)

index 0000000..d732450
--- /dev/null
+++ b/src/libpspp/hmapx.c
@@ -0,0 +1,99 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2008 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <libpspp/hmapx.h>
+#include <stdlib.h>
+#include "xalloc.h"
+
+/* Frees the memory, if any, allocated by hash map MAP, including
+   all hmapx_nodes that it contains.  The user-defined data items
+   that the hmapx_nodes point to are not affected.  If those
+   items should be freed, then it should be done by iterating
+   through MAP's contents before destroying MAP. */
+void
+hmapx_destroy (struct hmapx *map) 
+{
+  if (map != NULL) 
+    {
+      if (hmapx_count (map) > 0) 
+        {
+          struct hmapx_node *node, *next;
+          for (node = hmapx_first (map); node != NULL; node = next)
+            {
+              next = hmapx_next (map, node);
+              free (node); 
+            }
+        }
+      hmap_destroy (&map->hmap);
+    }
+}
+
+/* Allocates and returns a new hmapx_node with DATA as its data
+   item. */
+static struct hmapx_node *
+make_hmapx_node (void *data) 
+{
+  struct hmapx_node *node = xmalloc (sizeof *node);
+  node->data = data;
+  return node;
+}
+
+/* Inserts DATA into MAP with hash value HASH and returns the new
+   hmapx_node created to contain DATA.  If the insertion causes
+   MAP's current capacity, as reported by hmapx_capacity(), to be
+   exceeded, rehashes MAP with an increased number of hash
+   buckets.
+
+   This function runs in constant time amortized over all the
+   insertions into MAP.
+
+   This function does not verify that MAP does not already
+   contain a data item with the same value as DATA.  If
+   duplicates should be disallowed (which is the usual case),
+   then the client must check for duplicates itself before
+   inserting the new item. */
+struct hmapx_node *
+hmapx_insert (struct hmapx *map, void *data, size_t hash) 
+{
+  struct hmapx_node *node = make_hmapx_node (data);
+  hmap_insert (&map->hmap, &node->hmap_node, hash);
+  return node;
+}
+
+/* Inserts DATA into MAP with hash value HASH and returns the new
+   hmapx_node created to contain DATA.  Does not check whether
+   this causes MAP's current capacity to be exceeded.  The caller
+   must take responsibility for that (or use hmapx_insert()
+   instead).
+
+   This function runs in constant time.
+
+   This function does not verify that MAP does not already
+   contain a data item with the same value as DATA.  If
+   duplicates should be disallowed (which is the usual case),
+   then the client must check for duplicates itself before
+   inserting the new node. */
+struct hmapx_node *
+hmapx_insert_fast (struct hmapx *map, void *data, size_t hash) 
+{
+  struct hmapx_node *node = make_hmapx_node (data);
+  hmap_insert_fast (&map->hmap, &node->hmap_node, hash);
+  return node;
+}
diff --git a/src/libpspp/hmapx.h b/src/libpspp/hmapx.h

new file mode 100644 (file)

index 0000000..32a4452
--- /dev/null
+++ b/src/libpspp/hmapx.h
@@ -0,0 +1,468 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2008 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+/* Hash table with separate chaining.
+
+   This header (hmapx.h) supplies an "external" implementation of
+   a hash table that uses linked lists to resolve collisions
+   ("separate chaining").  Its companion header (hmap.h) supplies
+   a "embedded" implementation that is otherwise similar.  The
+   two variants are described briefly here.  The external
+   variant, for which this is the header, is described in
+   slightly more detail below.  Each function also has a detailed
+   usage comment at its point of definition.  (Many of those
+   definitions are inline in this file, because they are so
+   simple.  Others are in hmapx.c.)
+
+   The "hmap" embedded hash table implementation puts the hash
+   table node (which includes the linked list used for resolving
+   collisions) within the data structure that the hash table
+   contains.  This makes allocation efficient, in space and time,
+   because no additional call into an allocator is needed to
+   obtain memory for the hash table node.  It also makes it easy
+   to find the hash table node associated with a given object.
+   However, it's difficult to include a given object in an
+   arbitrary number of hash tables.
+
+   The "hmapx" external hash table implementation allocates hash
+   table nodes separately from the objects in the hash table.
+   Inserting and removing hash table elements requires dynamic
+   allocation, so it is normally slower and takes more memory
+   than the embedded implementation.  It also requires searching
+   the table to find the node associated with a given object.
+   However, it's easy to include a given object in an arbitrary
+   number of hash tables.  It's also possible to create an
+   external hash table without adding a member to the data
+   structure that the hash table contains. */
+
+#ifndef LIBPSPP_HMAPX_H
+#define LIBPSPP_HMAPX_H 1
+
+/* External hash table with separate chaining.
+
+   To create an external hash table, declare an instance of
+   struct hmapx, then initialize it with hmapx_init():
+     struct hmapx map;
+     hmapx_init (&map);
+   or, alternatively:
+     struct hmapx map = HMAPX_INITIALIZER (map);
+
+   An hmapx data structure contains data represented as void *.
+   The hmapx_insert() function inserts such a datum and returns
+   the address of a newly created struct hmapx_node that
+   represents the new element:
+     struct foo {
+       const char *key;
+       const char *value;
+     };
+     struct foo foo = {"key", "value"};
+     struct hmapx_node *node;
+     node = hmapx_insert (&map, &foo, hsh_hash_string (foo.key));
+   The element's hash value must be passed as one of
+   hmapx_insert()'s arguments.  The hash table saves this hash
+   value for use later to speed searches and to rehash as the
+   hash table grows.
+
+   hmapx_insert() does not check whether the newly inserted
+   element duplicates an element already in the hash table.  The
+   client is responsible for doing so, if this is desirable.
+
+   Use hmapx_delete() to delete an element from the hash table,
+   passing in its hmapx_node:
+     hmapx_delete (&map, node);
+   Deleting an element frees its node.
+
+   The hash table does not provide a direct way to search for an
+   existing element.  Instead, it provides the means to iterate
+   over all the elements in the hash table with a given hash
+   value.  It is easy to compose a search function from such a
+   building block.  For example:
+     struct hmapx_node *
+     find_node (const struct hmapx *map, const char *target)
+     {
+       struct hmapx_node *node;
+       struct foo *foo;
+       HMAPX_FOR_EACH_WITH_HASH (foo, node, hsh_hash_string (target), map)
+         if (!strcmp (foo->key, target))
+           break;
+       return node;
+     }
+   This function's client can extract the data item from the
+   returned hmapx_node using the hmapx_node_data() function.  The
+   hmapx_node can also be useful directly as an argument to other
+   hmapx functions, such as hmapx_delete().
+
+   Here is how to iterate through the elements currently in the
+   hash table:
+     struct hmapx_node *node;
+     const char *string;
+     HMAPX_FOR_EACH (data, node, &map)
+       {
+         ...do something with string...
+       }
+   */
+
+#include <libpspp/hmap.h>
+#include <stdlib.h>
+
+/* Hash table node. */
+struct hmapx_node
+  {
+    struct hmap_node hmap_node; /* Underlying hash node. */
+    void *data;                 /* User data. */
+  };
+
+static inline void *hmapx_node_data (const struct hmapx_node *);
+static inline size_t hmapx_node_hash (const struct hmapx_node *);
+
+/* Hash table. */
+struct hmapx
+  {
+    struct hmap hmap;
+  };
+
+/* Suitable for use as the initializer for a struct hmapx named
+   MAP.  Typical usage:
+       struct hmap map = HMAPX_INITIALIZER (map);
+   HMAPX_INITIALIZER() is an alternative to hmapx_init(). */
+#define HMAPX_INITIALIZER(MAP) { HMAP_INITIALIZER (MAP.hmap) }
+
+/* Creation and destruction. */
+static inline void hmapx_init (struct hmapx *);
+static inline void hmapx_swap (struct hmapx *, struct hmapx *);
+void hmapx_destroy (struct hmapx *);
+
+/* Storage management. */
+static inline void hmapx_reserve (struct hmapx *, size_t capacity);
+static inline void hmapx_shrink (struct hmapx *);
+
+/* Search. */
+static inline struct hmapx_node *hmapx_first_with_hash (struct hmapx *,
+                                                        size_t hash);
+static inline struct hmapx_node *hmapx_next_with_hash (struct hmapx_node *);
+
+/* Insertion and deletion. */
+struct hmapx_node *hmapx_insert (struct hmapx *, void *, size_t hash);
+struct hmapx_node *hmapx_insert_fast (struct hmapx *, void *, size_t hash);
+static inline void hmapx_delete (struct hmapx *, struct hmapx_node *);
+
+/* Iteration. */
+static inline struct hmapx_node *hmapx_first (const struct hmapx *);
+static inline struct hmapx_node *hmapx_next (const struct hmapx *,
+                                             const struct hmapx_node *);
+
+/* Counting. */
+static inline size_t hmapx_count (const struct hmapx *);
+static inline size_t hmapx_capacity (const struct hmapx *);
+
+/* Updating data elements. */
+static inline void hmapx_change (struct hmapx *,
+                                 struct hmapx_node *, void *, size_t new_hash);
+static inline void hmapx_changed (struct hmapx *, struct hmapx_node *,
+                                  size_t new_hash);
+static inline void hmapx_move (struct hmapx_node *, void *);
+
+/* Convenience macros for search.
+
+   These macros automatically use hmapx_node_data() to obtain the
+   data elements that encapsulate hmap nodes, which often saves
+   typing and can make code easier to read.  Refer to the large
+   comment near the top of this file for an example.
+
+   These macros evaluate HASH only once.  They evaluate their
+   other arguments many times. */
+#define HMAPX_FOR_EACH_WITH_HASH(DATA, NODE, HASH, HMAPX)               \
+  for ((NODE) = hmapx_first_with_hash (HMAPX, HASH);                    \
+       (NODE) != NULL ? ((DATA) = hmapx_node_data (NODE), 1) : 0;       \
+       (NODE) = hmapx_next_with_hash (NODE))
+#define HMAPX_FOR_EACH_WITH_HASH_SAFE(DATA, NODE, NEXT, HASH, HMAPX)    \
+  for ((NODE) = hmapx_first_with_hash (HMAPX, HASH);                    \
+       ((NODE) != NULL                                                  \
+        ? ((DATA) = hmapx_node_data (NODE),                             \
+           (NEXT) = hmapx_next_with_hash (NODE),                        \
+           1)                                                           \
+        : 0);                                                           \
+       (NODE) = (NEXT))
+
+/* Convenience macros for iteration.
+
+   These macros automatically use hmapx_node_data() to obtain the
+   data elements that encapsulate hmap nodes, which often saves
+   typing and can make code easier to read.  Refer to the large
+   comment near the top of this file for an example. 
+
+   These macros evaluate their arguments many times. */
+#define HMAPX_FOR_EACH(DATA, NODE, HMAPX)                               \
+  for ((NODE) = hmapx_first (HMAPX);                                    \
+       (NODE) != NULL ? ((DATA) = hmapx_node_data (NODE), 1) : 0;       \
+       (NODE) = hmapx_next (HMAPX, NODE))
+#define HMAPX_FOR_EACH_SAFE(DATA, NODE, NEXT, HMAPX)                    \
+  for ((NODE) = hmapx_first (HMAPX);                                    \
+       ((NODE) != NULL                                                  \
+        ? ((DATA) = hmapx_node_data (NODE),                             \
+           (NEXT) = hmapx_next (HMAPX, NODE),                           \
+           1)                                                           \
+        : 0);                                                           \
+       (NODE) = (NEXT))
+\f
+/* Inline definitions. */
+
+/* Returns the data stored in NODE. */
+static inline void *
+hmapx_node_data (const struct hmapx_node *node)
+{
+  return node->data;
+}
+
+/* Returns the hash value stored in NODE */
+static inline size_t
+hmapx_node_hash (const struct hmapx_node *node)
+{
+  return hmap_node_hash (&node->hmap_node);
+}
+
+/* Initializes MAP as a new hash map that is initially empty. */
+static inline void
+hmapx_init (struct hmapx *map) 
+{
+  hmap_init (&map->hmap);
+}
+
+/* Exchanges the contents of hash maps A and B. */
+static inline void
+hmapx_swap (struct hmapx *a, struct hmapx *b)
+{
+  hmap_swap (&a->hmap, &b->hmap);
+}
+
+/* Ensures that MAP has sufficient space to store at least
+   CAPACITY data elements, allocating a new set of buckets and
+   rehashing if necessary. */
+static inline void
+hmapx_reserve (struct hmapx *map, size_t capacity)
+{
+  hmap_reserve (&map->hmap, capacity);
+}
+
+/* Shrinks MAP's set of buckets to the minimum number needed to
+   store its current number of elements, allocating a new set of
+   buckets and rehashing if that would save space. */
+static inline void
+hmapx_shrink (struct hmapx *map) 
+{
+  hmap_shrink (&map->hmap);
+}
+
+/* Returns the first node in MAP that has hash value HASH, or a
+   null pointer if MAP does not contain any node with that hash
+   value.
+
+   Assuming uniform hashing and no duplicate data items in MAP,
+   this function runs in constant time.  (Amortized over an
+   iteration over all data items with a given HASH, its runtime
+   is proportional to the length of the hash chain for HASH, so
+   given a pathological hash function, e.g. one that returns a
+   constant value, its runtime degenerates to linear in the
+   length of NODE's hash chain.)
+
+   Nodes are returned in arbitrary order that may change whenever
+   the hash table's current capacity changes, as reported by
+   hmapx_capacity().  Calls to hmapx_insert(), hmapx_reserve(),
+   and hmapx_shrink() can change the capacity of a hash map.
+   Inserting a node with hmapx_insert_fast() or deleting one with
+   hmapx_delete() will not change the relative ordering of nodes.
+
+   The HMAPX_FOR_EACH_WITH_HASH and HMAPX_FOR_EACH_WITH_HASH_SAFE
+   macros provide convenient ways to iterate over all the nodes
+   with a given hash. */
+static inline struct hmapx_node *
+hmapx_first_with_hash (struct hmapx *map, size_t hash) 
+{
+  return HMAP_FIRST_WITH_HASH (struct hmapx_node, hmap_node, &map->hmap, hash);
+}
+
+/* Returns the next node in MAP after NODE that has the same hash
+   value as NODE, or a null pointer if MAP does not contain any
+   more nodes with that hash value.
+
+   Assuming uniform hashing and no duplicate data items in MAP,
+   this function runs in constant time.  (Amortized over an
+   iteration over all data items with a given HASH, its runtime
+   is proportional to the length of the hash chain for HASH, so
+   given a pathological hash function, e.g. one that returns a
+   constant value, its runtime degenerates to linear in the
+   length of NODE's hash chain.)
+
+   Nodes are returned in arbitrary order that may change whenever
+   the hash table's current capacity changes, as reported by
+   hmapx_capacity().  Calls to hmapx_insert(), hmapx_reserve(),
+   and hmapx_shrink() can change the capacity of a hash map.
+   Inserting a node with hmapx_insert_fast() or deleting one with
+   hmapx_delete() will not change the relative ordering of nodes.
+
+   The HMAPX_FOR_EACH_WITH_HASH and HMAPX_FOR_EACH_WITH_HASH_SAFE
+   macros provide convenient ways to iterate over all the nodes
+   with a given hash. */
+static inline struct hmapx_node *
+hmapx_next_with_hash (struct hmapx_node *node) 
+{
+  return HMAP_NEXT_WITH_HASH (node, struct hmapx_node, hmap_node);
+}
+
+/* Removes NODE from MAP and frees NODE.  The client is
+   responsible for freeing the user data associated with NODE, if
+   appropriate.
+
+   Assuming uniform hashing, this function runs in constant time.
+   (Its runtime is proportional to the position of NODE in its
+   hash chain, so given a pathological hash function, e.g. one
+   that returns a constant value, its runtime degenerates to
+   linear in the length of NODE's hash chain.)
+
+   This function never reduces the number of buckets in MAP.
+   When one deletes a large number of nodes from a hash table,
+   calling hmapx_shrink() afterward may therefore save a small
+   amount of memory.  It is also more expensive to iterate
+   through a very sparse hash table than a denser one, so
+   shrinking the hash table could also save some time.  However,
+   rehashing has an immediate cost that must be weighed against
+   these benefits.
+
+   hmapx_delete() does not change NODE's hash value reported by
+   hmapx_node_hash(). */
+static inline void
+hmapx_delete (struct hmapx *map, struct hmapx_node *node) 
+{
+  hmap_delete (&map->hmap, &node->hmap_node);
+  free (node);
+}
+
+/* Returns the first node in MAP, or a null pointer if MAP is
+   empty.
+
+   Amortized over iterating through every data element in MAP,
+   this function runs in constant time.  However, this assumes
+   that MAP is not excessively sparse, that is, that
+   hmapx_capacity(MAP) is at most a constant factor greater than
+   hmapx_count(MAP).  This will always be true unless many nodes
+   have been inserted into MAP and then most or all of them
+   deleted; in such a case, calling hmapx_shrink() is advised.
+
+   Nodes are returned in arbitrary order that may change whenever
+   the hash table's current capacity changes, as reported by
+   hmapx_capacity().  Calls to hmapx_insert(), hmapx_reserve(),
+   and hmapx_shrink() can change the capacity of a hash map.
+   Inserting a node with hmapx_insert_fast() or deleting one with
+   hmapx_delete() will not change the relative ordering of nodes.
+
+   The HMAPX_FOR_EACH and HMAPX_FOR_EACH_SAFE macros provide
+   convenient ways to iterate over all the nodes in a hash
+   map. */
+static inline struct hmapx_node *
+hmapx_first (const struct hmapx *map) 
+{
+  return HMAP_FIRST (struct hmapx_node, hmap_node, &map->hmap);
+}
+
+/* Returns the next node in MAP following NODE, or a null pointer
+   if NODE is the last node in MAP.
+
+   Amortized over iterating through every data element in MAP,
+   this function runs in constant time.  However, this assumes
+   that MAP is not excessively sparse, that is, that
+   hmapx_capacity(MAP) is at most a constant factor greater than
+   hmapx_count(MAP).  This will always be true unless many nodes
+   have been inserted into MAP and then most or all of them
+   deleted; in such a case, calling hmapx_shrink() is advised.
+
+   Nodes are returned in arbitrary order that may change whenever
+   the hash table's current capacity changes, as reported by
+   hmapx_capacity().  Calls to hmapx_insert(), hmapx_reserve(),
+   and hmapx_shrink() can change the capacity of a hash map.
+   Inserting a node with hmapx_insert_fast() or deleting one with
+   hmapx_delete() will not change the relative ordering of nodes.
+
+   The HMAPX_FOR_EACH and HMAPX_FOR_EACH_SAFE macros provide
+   convenient ways to iterate over all the nodes in a hash
+   map. */
+static inline struct hmapx_node *
+hmapx_next (const struct hmapx *map, const struct hmapx_node *node) 
+{
+  return HMAP_NEXT (node, struct hmapx_node, hmap_node, &map->hmap);
+}
+
+/* Returns the number of data items currently in MAP. */
+static inline size_t
+hmapx_count (const struct hmapx *map) 
+{
+  return hmap_count (&map->hmap);
+}
+
+/* Returns the current capacity of MAP, that is, the maximum
+   number of data elements that MAP may hold before it becomes
+   advisable to rehash.
+
+   The capacity is advisory only: it is possible to insert any
+   number of data elements into a hash map regardless of its
+   capacity.  However, inserting many more elements than the
+   map's capacity will degrade search performance. */
+static inline size_t
+hmapx_capacity (const struct hmapx *map) 
+{
+  return hmap_capacity (&map->hmap);
+}
+
+/* Changes NODE's data to DATA and its hash value to NEW_HASH.
+   NODE must reside in MAP.
+
+   This function does not verify that MAP does not already
+   contain a data item that duplicates DATA.  If duplicates
+   should be disallowed (which is the usual case), then the
+   client must check for duplicates before changing NODE's
+   value. */
+static inline void
+hmapx_change (struct hmapx *map,
+              struct hmapx_node *node, void *data, size_t new_hash) 
+{
+  hmapx_move (node, data);
+  hmapx_changed (map, node, new_hash);
+}
+
+/* Moves NODE around in MAP to compensate for its hash value
+   having changed to NEW_HASH.
+
+   This function does not verify that MAP does not already
+   contain a data item that duplicates the new value of NODE's
+   data.  If duplicates should be disallowed (which is the usual
+   case), then the client must check for duplicates before
+   changing NODE's value. */
+static inline void
+hmapx_changed (struct hmapx *map, struct hmapx_node *node, size_t new_hash) 
+{
+  hmap_changed (&map->hmap, &node->hmap_node, new_hash);
+}
+
+/* Updates NODE to compensate for its data item having moved
+   around in memory to new location DATA.  The data item's value
+   and hash value should not have changed.  (If they have
+   changed, call hmapx_change() instead.) */
+static inline void
+hmapx_move (struct hmapx_node *node, void *data)
+{
+  node->data = data;
+}
+
+#endif /* libpspp/hmapx.h */
diff --git a/tests/automake.mk b/tests/automake.mk

index e66da432abf254225cb655f9ab8a7af3d73a68d9..2f876cfbefea879bed780a8a127584272e18f438 100644 (file)
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -170,6 +170,8 @@ nodist_TESTS = \
         tests/libpspp/abt-test \
         tests/libpspp/bt-test \
         tests/libpspp/heap-test \
+       tests/libpspp/hmap-test \
+       tests/libpspp/hmapx-test \
         tests/libpspp/ll-test \
         tests/libpspp/llx-test \
         tests/libpspp/range-map-test \
@@ -209,6 +211,22 @@ tests_libpspp_heap_test_SOURCES = \
  tests_libpspp_heap_test_LDADD = gl/libgl.la @LIBINTL@
  tests_libpspp_heap_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10
  
+tests_libpspp_hmap_test_SOURCES = \
+       src/libpspp/hmap.c \
+       src/libpspp/hmap.h \
+       tests/libpspp/hmap-test.c
+tests_libpspp_hmap_test_LDADD = gl/libgl.la @LIBINTL@
+tests_libpspp_hmap_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10
+
+tests_libpspp_hmapx_test_SOURCES = \
+       src/libpspp/hmap.c \
+       src/libpspp/hmap.h \
+       src/libpspp/hmapx.c \
+       src/libpspp/hmapx.h \
+       tests/libpspp/hmapx-test.c
+tests_libpspp_hmapx_test_LDADD = gl/libgl.la @LIBINTL@
+tests_libpspp_hmapx_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10
+
  tests_libpspp_abt_test_SOURCES = \
         src/libpspp/abt.c \
         src/libpspp/abt.h \
diff --git a/tests/libpspp/hmap-test.c b/tests/libpspp/hmap-test.c

new file mode 100644 (file)

index 0000000..7ad8629
--- /dev/null
+++ b/tests/libpspp/hmap-test.c
@@ -0,0 +1,931 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+/* This is a test program for the hmap_* routines defined in
+   hmap.c.  This test program aims to be as comprehensive as
+   possible.  "gcov -a -b" should report 100% coverage of lines,
+   blocks and branches in hmap.c (when compiled with -DNDEBUG).
+   "valgrind --leak-check=yes --show-reachable=yes" should give a
+   clean report. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <libpspp/hmap.h>
+
+#include <assert.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <libpspp/compiler.h>
+\f
+/* Currently running test. */
+static const char *test_name;
+
+/* Exit with a failure code.
+   (Place a breakpoint on this function while debugging.) */
+static void
+check_die (void)
+{
+  exit (EXIT_FAILURE);
+}
+
+/* If OK is not true, prints a message about failure on the
+   current source file and the given LINE and terminates. */
+static void
+check_func (bool ok, int line)
+{
+  if (!ok)
+    {
+      printf ("Check failed in %s test at %s, line %d\n",
+              test_name, __FILE__, line);
+      check_die ();
+    }
+}
+
+/* Verifies that EXPR evaluates to true.
+   If not, prints a message citing the calling line number and
+   terminates. */
+#define check(EXPR) check_func ((EXPR), __LINE__)
+
+/* Prints a message about memory exhaustion and exits with a
+   failure code. */
+static void
+xalloc_die (void)
+{
+  printf ("virtual memory exhausted\n");
+  exit (EXIT_FAILURE);
+}
+
+/* Allocates and returns N bytes of memory. */
+static void *
+xmalloc (size_t n)
+{
+  if (n != 0)
+    {
+      void *p = malloc (n);
+      if (p == NULL)
+        xalloc_die ();
+
+      return p;
+    }
+  else
+    return NULL;
+}
+
+static void *
+xmemdup (const void *p, size_t n)
+{
+  void *q = xmalloc (n);
+  memcpy (q, p, n);
+  return q;
+}
+
+/* Allocates and returns N * M bytes of memory. */
+static void *
+xnmalloc (size_t n, size_t m)
+{
+  if ((size_t) -1 / m <= n)
+    xalloc_die ();
+  return xmalloc (n * m);
+}
+\f
+/* Node type and support routines. */
+
+/* Test data element. */
+struct element
+  {
+    struct hmap_node node;    /* Embedded hash table element. */
+    int data;                 /* Primary value. */
+  };
+
+/* Returns the `struct element' that NODE is embedded within. */
+static struct element *
+hmap_node_to_element (const struct hmap_node *node)
+{
+  return HMAP_DATA (node, struct element, node);
+}
+
+/* Compares A and B and returns a strcmp-type return value. */
+static int
+compare_ints (const void *a_, const void *b_)
+{
+  const int *a = a_;
+  const int *b = b_;
+
+  return *a < *b ? -1 : *a > *b;
+}
+
+/* Swaps *A and *B. */
+static void
+swap (int *a, int *b)
+{
+  int t = *a;
+  *a = *b;
+  *b = t;
+}
+
+/* Reverses the order of the CNT integers starting at VALUES. */
+static void
+reverse (int *values, size_t cnt)
+{
+  size_t i = 0;
+  size_t j = cnt;
+
+  while (j > i)
+    swap (&values[i++], &values[--j]);
+}
+
+/* Arranges the CNT elements in VALUES into the lexicographically
+   next greater permutation.  Returns true if successful.
+   If VALUES is already the lexicographically greatest
+   permutation of its elements (i.e. ordered from greatest to
+   smallest), arranges them into the lexicographically least
+   permutation (i.e. ordered from smallest to largest) and
+   returns false. */
+static bool
+next_permutation (int *values, size_t cnt)
+{
+  if (cnt > 0)
+    {
+      size_t i = cnt - 1;
+      while (i != 0)
+        {
+          i--;
+          if (values[i] < values[i + 1])
+            {
+              size_t j;
+              for (j = cnt - 1; values[i] >= values[j]; j--)
+                continue;
+              swap (values + i, values + j);
+              reverse (values + (i + 1), cnt - (i + 1));
+              return true;
+            }
+        }
+
+      reverse (values, cnt);
+    }
+
+  return false;
+}
+
+/* Returns N!. */
+static unsigned int
+factorial (unsigned int n)
+{
+  unsigned int value = 1;
+  while (n > 1)
+    value *= n--;
+  return value;
+}
+
+/* Randomly shuffles the CNT elements in ARRAY, each of which is
+   SIZE bytes in size. */
+static void
+random_shuffle (void *array_, size_t cnt, size_t size)
+{
+  char *array = array_;
+  char *tmp = xmalloc (size);
+  size_t i;
+
+  for (i = 0; i < cnt; i++)
+    {
+      size_t j = rand () % (cnt - i) + i;
+      if (i != j)
+        {
+          memcpy (tmp, array + j * size, size);
+          memcpy (array + j * size, array + i * size, size);
+          memcpy (array + i * size, tmp, size);
+        }
+    }
+
+  free (tmp);
+}
+
+typedef size_t hash_function (int data);
+
+static size_t
+identity_hash (int data) 
+{
+  return data;
+}
+
+static size_t
+constant_hash (int data UNUSED) 
+{
+  return 0x12345678u;
+}
+
+static inline uint32_t
+md4_round (uint32_t a, uint32_t b, uint32_t c, uint32_t d,
+           uint32_t data, uint32_t n)
+{
+  uint32_t x = a + (d ^ (b & (c ^ d))) + data;
+  return (x << n) | (x >> (32 - n));
+}
+
+static size_t
+random_hash (int data)
+{
+  uint32_t a = data;
+  uint32_t b = data;
+  uint32_t c = data;
+  uint32_t d = data;
+  a = md4_round (a, b, c, d, 0, 3);
+  d = md4_round (d, a, b, c, 1, 7);
+  c = md4_round (c, d, a, b, 2, 11);
+  b = md4_round (b, c, d, a, 3, 19);
+  return a ^ b ^ c ^ d;
+}
+
+static struct hmap_node *
+find_element (struct hmap *hmap, int data, hash_function *hash)
+{
+  struct element *e;
+  HMAP_FOR_EACH_WITH_HASH (e, struct element, node, hash (data), hmap)
+    if (e->data == data)
+      break;
+  return &e->node;
+}
+
+/* Checks that HMAP contains the CNT ints in DATA, that its
+   structure is correct, and that certain operations on HMAP
+   produce the expected results. */
+static void
+check_hmap (struct hmap *hmap, const int data[], size_t cnt,
+            hash_function *hash)
+{
+  size_t i, j;
+  int *order;
+
+  check (hmap_count (hmap) == cnt);
+  check (cnt <= hmap_capacity (hmap));
+
+  order = xmemdup (data, cnt * sizeof *data);
+  qsort (order, cnt, sizeof *order, compare_ints);
+
+  for (i = 0; i < cnt; i = j)
+    {
+      struct element *e;
+      int count;
+
+      for (j = i + 1; j < cnt; j++)
+        if (order[i] != order[j])
+          break;
+
+      count = 0;
+      HMAP_FOR_EACH_WITH_HASH (e, struct element, node, hash (order[i]), hmap)
+        if (e->data == order[i]) 
+          count++;
+
+      check (count == j - i);
+    }
+
+  check (find_element (hmap, -1, hash) == NULL);
+
+  if (cnt == 0)
+    check (hmap_first (hmap) == NULL);
+  else
+    {
+      struct hmap_node *p;
+      int left;
+
+      left = cnt;
+      for (p = hmap_first (hmap), i = 0; i < cnt; p = hmap_next (hmap, p), i++)
+        {
+          struct element *e = hmap_node_to_element (p);
+          size_t j;
+
+          check (hmap_node_hash (&e->node) == hash (e->data));
+          for (j = 0; j < left; j++)
+            if (order[j] == e->data) 
+              {
+                order[j] = order[--left];
+                goto next;
+              }
+          check_die ();
+
+        next: ;
+        }
+      check (p == NULL);
+    }
+
+  free (order);
+}
+
+/* Inserts the CNT values from 0 to CNT - 1 (inclusive) into an
+   HMAP in the order specified by INSERTIONS, then deletes them in
+   the order specified by DELETIONS, checking the HMAP's contents
+   for correctness after each operation.  Uses HASH as the hash
+   function. */
+static void
+test_insert_delete (const int insertions[],
+                    const int deletions[],
+                    size_t cnt,
+                    hash_function *hash)
+{
+  struct element *elements;
+  struct hmap hmap;
+  size_t i;
+
+  elements = xnmalloc (cnt, sizeof *elements);
+  for (i = 0; i < cnt; i++)
+    elements[i].data = i;
+
+  hmap_init (&hmap);
+  hmap_reserve (&hmap, 1);
+  check_hmap (&hmap, NULL, 0, hash);
+  for (i = 0; i < cnt; i++)
+    {
+      size_t capacity;
+      hmap_insert (&hmap, &elements[insertions[i]].node, hash (insertions[i]));
+      check_hmap (&hmap, insertions, i + 1, hash);
+
+      /* A series of insertions should not produce a shrinkable hmap. */
+      capacity = hmap_capacity (&hmap);
+      hmap_shrink (&hmap);
+      check (capacity == hmap_capacity (&hmap));
+    }
+  for (i = 0; i < cnt; i++)
+    {
+      hmap_delete (&hmap, &elements[deletions[i]].node);
+      check_hmap (&hmap, deletions + i + 1, cnt - i - 1, hash);
+    }
+  hmap_destroy (&hmap);
+
+  free (elements);
+}
+\f
+/* Inserts values into an HMAP in each possible order, then
+   removes them in each possible order, up to a specified maximum
+   size, using hash function HASH. */
+static void
+test_insert_any_remove_any (hash_function *hash)
+{
+  const int max_elems = 5;
+  int cnt;
+
+  for (cnt = 0; cnt <= max_elems; cnt++)
+    {
+      int *insertions, *deletions;
+      unsigned int ins_perm_cnt;
+      int i;
+
+      insertions = xnmalloc (cnt, sizeof *insertions);
+      deletions = xnmalloc (cnt, sizeof *deletions);
+      for (i = 0; i < cnt; i++)
+        insertions[i] = i;
+
+      for (ins_perm_cnt = 0;
+           ins_perm_cnt == 0 || next_permutation (insertions, cnt);
+           ins_perm_cnt++)
+        {
+          unsigned int del_perm_cnt;
+          int i;
+
+          for (i = 0; i < cnt; i++)
+            deletions[i] = i;
+
+          for (del_perm_cnt = 0;
+               del_perm_cnt == 0 || next_permutation (deletions, cnt);
+               del_perm_cnt++)
+            test_insert_delete (insertions, deletions, cnt, hash);
+
+          check (del_perm_cnt == factorial (cnt));
+        }
+      check (ins_perm_cnt == factorial (cnt));
+
+      free (insertions);
+      free (deletions);
+    }
+}
+
+static void
+test_insert_any_remove_any_random_hash (void) 
+{
+  test_insert_any_remove_any (random_hash);
+}
+
+static void
+test_insert_any_remove_any_identity_hash (void) 
+{
+  test_insert_any_remove_any (identity_hash);
+}
+
+static void
+test_insert_any_remove_any_constant_hash (void) 
+{
+  test_insert_any_remove_any (constant_hash);
+}
+
+/* Inserts values into an HMAP in each possible order, then
+   removes them in the same order, up to a specified maximum
+   size, using hash function HASH. */
+static void
+test_insert_any_remove_same (hash_function *hash)
+{
+  const int max_elems = 7;
+  int cnt;
+
+  for (cnt = 0; cnt <= max_elems; cnt++)
+    {
+      int *values;
+      unsigned int permutation_cnt;
+      int i;
+
+      values = xnmalloc (cnt, sizeof *values);
+      for (i = 0; i < cnt; i++)
+        values[i] = i;
+
+      for (permutation_cnt = 0;
+           permutation_cnt == 0 || next_permutation (values, cnt);
+           permutation_cnt++)
+        test_insert_delete (values, values, cnt, hash);
+      check (permutation_cnt == factorial (cnt));
+
+      free (values);
+    }
+}
+
+static void
+test_insert_any_remove_same_random_hash (void) 
+{
+  test_insert_any_remove_same (random_hash);
+}
+
+static void
+test_insert_any_remove_same_identity_hash (void) 
+{
+  test_insert_any_remove_same (identity_hash);
+}
+
+static void
+test_insert_any_remove_same_constant_hash (void) 
+{
+  test_insert_any_remove_same (constant_hash);
+}
+
+/* Inserts values into an HMAP in each possible order, then
+   removes them in reverse order, up to a specified maximum
+   size, using hash function HASH. */
+static void
+test_insert_any_remove_reverse (hash_function *hash)
+{
+  const int max_elems = 7;
+  int cnt;
+
+  for (cnt = 0; cnt <= max_elems; cnt++)
+    {
+      int *insertions, *deletions;
+      unsigned int permutation_cnt;
+      int i;
+
+      insertions = xnmalloc (cnt, sizeof *insertions);
+      deletions = xnmalloc (cnt, sizeof *deletions);
+      for (i = 0; i < cnt; i++)
+        insertions[i] = i;
+
+      for (permutation_cnt = 0;
+           permutation_cnt == 0 || next_permutation (insertions, cnt);
+           permutation_cnt++)
+        {
+          memcpy (deletions, insertions, sizeof *insertions * cnt);
+          reverse (deletions, cnt);
+
+          test_insert_delete (insertions, deletions, cnt, hash);
+        }
+      check (permutation_cnt == factorial (cnt));
+
+      free (insertions);
+      free (deletions);
+    }
+}
+
+static void
+test_insert_any_remove_reverse_random_hash (void)
+{
+  test_insert_any_remove_reverse (random_hash);
+}
+
+static void
+test_insert_any_remove_reverse_identity_hash (void)
+{
+  test_insert_any_remove_reverse (identity_hash);
+}
+
+static void
+test_insert_any_remove_reverse_constant_hash (void)
+{
+  test_insert_any_remove_reverse (constant_hash);
+}
+
+/* Inserts and removes up to MAX_ELEMS values in an hmap, in
+   random order, using hash function HASH. */
+static void
+test_random_sequence (int max_elems, hash_function *hash)
+{
+  const int max_trials = 8;
+  int cnt;
+
+  for (cnt = 0; cnt <= max_elems; cnt += 2)
+    {
+      int *insertions, *deletions;
+      int trial;
+      int i;
+
+      insertions = xnmalloc (cnt, sizeof *insertions);
+      deletions = xnmalloc (cnt, sizeof *deletions);
+      for (i = 0; i < cnt; i++)
+        insertions[i] = i;
+      for (i = 0; i < cnt; i++)
+        deletions[i] = i;
+
+      for (trial = 0; trial < max_trials; trial++)
+        {
+          random_shuffle (insertions, cnt, sizeof *insertions);
+          random_shuffle (deletions, cnt, sizeof *deletions);
+
+          test_insert_delete (insertions, deletions, cnt, hash);
+        }
+
+      free (insertions);
+      free (deletions);
+    }
+}
+
+static void
+test_random_sequence_random_hash (void) 
+{
+  test_random_sequence (64, random_hash);
+}
+
+static void
+test_random_sequence_identity_hash (void) 
+{
+  test_random_sequence (64, identity_hash);
+}
+
+static void
+test_random_sequence_constant_hash (void) 
+{
+  test_random_sequence (32, constant_hash);
+}
+
+/* Inserts MAX_ELEMS elements into an HMAP in ascending order,
+   then delete in ascending order and shrink the hmap at each
+   step, using hash function HASH. */
+static void
+test_insert_ordered (int max_elems, hash_function *hash)
+{
+  struct element *elements;
+  int *values;
+  struct hmap hmap;
+  int i;
+
+  hmap_init (&hmap);
+  elements = xnmalloc (max_elems, sizeof *elements);
+  values = xnmalloc (max_elems, sizeof *values);
+  for (i = 0; i < max_elems; i++)
+    {
+      values[i] = elements[i].data = i;
+      hmap_insert (&hmap, &elements[i].node, hash (elements[i].data));
+      check_hmap (&hmap, values, i + 1, hash);
+
+      if (hash == identity_hash) 
+        {
+          /* Check that every every hash bucket has (almost) the
+             same number of nodes in it.  */
+          int min = INT_MAX;
+          int max = INT_MIN;
+          int j;
+
+          for (j = 0; j <= hmap.mask; j++) 
+            {
+              int count = 0;
+              struct hmap_node *node;
+
+              for (node = hmap.buckets[j]; node != NULL; node = node->next)
+                count++;
+              if (count < min)
+                min = count;
+              if (count > max)
+                max = count;
+            }
+          check (max - min <= 1);
+        }
+    }
+  for (i = 0; i < max_elems; i++)
+    {
+      hmap_delete (&hmap, &elements[i].node);
+      hmap_shrink (&hmap);
+      check_hmap (&hmap, values + i + 1, max_elems - i - 1, hash);
+    }
+  hmap_destroy (&hmap);
+  free (elements);
+  free (values);
+}
+
+static void
+test_insert_ordered_random_hash (void)
+{
+  test_insert_ordered (1024, random_hash);
+}
+
+static void
+test_insert_ordered_identity_hash (void)
+{
+  test_insert_ordered (1024, identity_hash);
+}
+
+static void
+test_insert_ordered_constant_hash (void)
+{
+  test_insert_ordered (128, constant_hash);
+}
+
+/* Inserts up to MAX_ELEMS elements into an HMAP, then moves the
+   nodes around in memory, using hash function HASH. */
+static void
+test_moved (int max_elems, hash_function *hash)
+{
+  struct element *e[2];
+  int cur;
+  int *values;
+  struct hmap hmap;
+  int i, j;
+
+  hmap_init (&hmap);
+  e[0] = xnmalloc (max_elems, sizeof *e[0]);
+  e[1] = xnmalloc (max_elems, sizeof *e[1]);
+  values = xnmalloc (max_elems, sizeof *values);
+  cur = 0;
+  for (i = 0; i < max_elems; i++)
+    {
+      values[i] = e[cur][i].data = i;
+      hmap_insert (&hmap, &e[cur][i].node, hash (e[cur][i].data));
+      check_hmap (&hmap, values, i + 1, hash);
+
+      for (j = 0; j <= i; j++)
+        {
+          e[!cur][j] = e[cur][j];
+          hmap_moved (&hmap, &e[!cur][j].node, &e[cur][j].node);
+          check_hmap (&hmap, values, i + 1, hash);
+        }
+      cur = !cur;
+    }
+  hmap_destroy (&hmap);
+  free (e[0]);
+  free (e[1]);
+  free (values);
+}
+
+static void
+test_moved_random_hash (void) 
+{
+  test_moved (128, random_hash);
+}
+
+static void
+test_moved_identity_hash (void) 
+{
+  test_moved (128, identity_hash);
+}
+
+static void
+test_moved_constant_hash (void) 
+{
+  test_moved (32, constant_hash);
+}
+
+/* Inserts values into an HMAP, then changes their values, using
+   hash function HASH. */
+static void
+test_changed (hash_function *hash)
+{
+  const int max_elems = 6;
+  int cnt;
+
+  for (cnt = 0; cnt <= max_elems; cnt++)
+    {
+      int *values, *changed_values;
+      struct element *elements;
+      unsigned int permutation_cnt;
+      int i;
+
+      values = xnmalloc (cnt, sizeof *values);
+      changed_values = xnmalloc (cnt, sizeof *changed_values);
+      elements = xnmalloc (cnt, sizeof *elements);
+      for (i = 0; i < cnt; i++)
+        values[i] = i;
+
+      for (permutation_cnt = 0;
+           permutation_cnt == 0 || next_permutation (values, cnt);
+           permutation_cnt++)
+        {
+          for (i = 0; i < cnt; i++)
+            {
+              int j, k;
+              for (j = 0; j <= cnt; j++)
+                {
+                  struct hmap hmap;
+
+                  hmap_init (&hmap);
+
+                  /* Add to HMAP in order. */
+                  for (k = 0; k < cnt; k++)
+                    {
+                      int n = values[k];
+                      elements[n].data = n;
+                      hmap_insert (&hmap, &elements[n].node,
+                                   hash (elements[n].data));
+                    }
+                  check_hmap (&hmap, values, cnt, hash);
+
+                  /* Change value i to j. */
+                  elements[i].data = j;
+                  hmap_changed (&hmap, &elements[i].node,
+                                hash (elements[i].data));
+                  for (k = 0; k < cnt; k++)
+                    changed_values[k] = k;
+                  changed_values[i] = j;
+                  check_hmap (&hmap, changed_values, cnt, hash);
+
+                  hmap_destroy (&hmap);
+                }
+            }
+        }
+      check (permutation_cnt == factorial (cnt));
+
+      free (values);
+      free (changed_values);
+      free (elements);
+    }
+}
+
+static void
+test_changed_random_hash (void)
+{
+  test_changed (random_hash);
+}
+
+static void
+test_changed_identity_hash (void)
+{
+  test_changed (identity_hash);
+}
+
+static void
+test_changed_constant_hash (void)
+{
+  test_changed (constant_hash);
+}
+
+static void
+test_swap (int max_elems, hash_function *hash) 
+{
+  struct element *elements;
+  int *values;
+  struct hmap a, b;
+  struct hmap *working, *empty;
+  int i;
+
+  hmap_init (&a);
+  hmap_init (&b);
+  working = &a;
+  empty = &b;
+  elements = xnmalloc (max_elems, sizeof *elements);
+  values = xnmalloc (max_elems, sizeof *values);
+  for (i = 0; i < max_elems; i++)
+    {
+      struct hmap *tmp;
+      values[i] = elements[i].data = i;
+      hmap_insert (working, &elements[i].node, hash (elements[i].data));
+      check_hmap (working, values, i + 1, hash);
+      check_hmap (empty, NULL, 0, hash);
+      hmap_swap (&a, &b);
+      tmp = working;
+      working = empty;
+      empty = tmp;
+    }
+  hmap_destroy (&a);
+  hmap_destroy (&b);
+  free (elements);
+  free (values);
+}
+
+static void
+test_swap_random_hash (void) 
+{
+  test_swap (128, random_hash);
+}
+
+static void
+test_destroy_null (void) 
+{
+  hmap_destroy (NULL);
+}
+
+/* Test shrinking an empty hash table. */
+static void
+test_shrink_empty (void)
+{
+  struct hmap hmap;
+
+  hmap_init (&hmap);
+  hmap_reserve (&hmap, 123);
+  hmap_shrink (&hmap);
+  hmap_destroy (&hmap);
+}
+\f
+/* Main program. */
+
+/* Runs TEST_FUNCTION and prints a message about NAME. */
+static void
+run_test (void (*test_function) (void), const char *name)
+{
+  test_name = name;
+  putchar ('.');
+  fflush (stdout);
+  test_function ();
+}
+
+int
+main (void)
+{
+  run_test (test_insert_any_remove_any_random_hash,
+            "insert any order, delete any order (random hash)");
+  run_test (test_insert_any_remove_any_identity_hash,
+            "insert any order, delete any order (identity hash)");
+  run_test (test_insert_any_remove_any_constant_hash,
+            "insert any order, delete any order (constant hash)");
+
+  run_test (test_insert_any_remove_same_random_hash,
+            "insert any order, delete same order (random hash)");
+  run_test (test_insert_any_remove_same_identity_hash,
+            "insert any order, delete same order (identity hash)");
+  run_test (test_insert_any_remove_same_constant_hash,
+            "insert any order, delete same order (constant hash)");
+
+  run_test (test_insert_any_remove_reverse_random_hash,
+            "insert any order, delete reverse order (random hash)");
+  run_test (test_insert_any_remove_reverse_identity_hash,
+            "insert any order, delete reverse order (identity hash)");
+  run_test (test_insert_any_remove_reverse_constant_hash,
+            "insert any order, delete reverse order (constant hash)");
+
+  run_test (test_random_sequence_random_hash,
+            "insert and delete in random sequence (random hash)");
+  run_test (test_random_sequence_identity_hash,
+            "insert and delete in random sequence (identity hash)");
+  run_test (test_random_sequence_constant_hash,
+            "insert and delete in random sequence (constant hash)");
+
+  run_test (test_insert_ordered_random_hash,
+            "insert in ascending order (random hash)");
+  run_test (test_insert_ordered_identity_hash,
+            "insert in ascending order (identity hash)");
+  run_test (test_insert_ordered_constant_hash,
+            "insert in ascending order (constant hash)");
+
+  run_test (test_moved_random_hash,
+            "move elements around in memory (random hash)");
+  run_test (test_moved_identity_hash,
+            "move elements around in memory (identity hash)");
+  run_test (test_moved_constant_hash,
+            "move elements around in memory (constant hash)");
+
+  run_test (test_changed_random_hash,
+            "change key data in nodes (random hash)");
+  run_test (test_changed_identity_hash,
+            "change key data in nodes (identity hash)");
+  run_test (test_changed_constant_hash,
+            "change key data in nodes (constant hash)");
+
+  run_test (test_swap_random_hash, "test swapping tables");
+
+  run_test (test_destroy_null, "test destroying null table");
+  run_test (test_shrink_empty, "test shrinking an empty table");
+
+  putchar ('\n');
+
+  return 0;
+}
diff --git a/tests/libpspp/hmapx-test.c b/tests/libpspp/hmapx-test.c

new file mode 100644 (file)

index 0000000..fc08ca6
--- /dev/null
+++ b/tests/libpspp/hmapx-test.c
@@ -0,0 +1,1034 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+/* This is a test program for the hmapx_* routines defined in
+   hmapx.c.  This test program aims to be as comprehensive as
+   possible.  "gcov -a -b" should report 100% coverage of lines,
+   blocks and branches in hmapx.c (when compiled with -DNDEBUG).
+   "valgrind --leak-check=yes --show-reachable=yes" should give a
+   clean report. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <libpspp/hmapx.h>
+
+#include <assert.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <libpspp/compiler.h>
+\f
+/* Currently running test. */
+static const char *test_name;
+
+/* If OK is not true, prints a message about failure on the
+   current source file and the given LINE and terminates. */
+static void
+check_func (bool ok, int line)
+{
+  if (!ok)
+    {
+      printf ("Check failed in %s test at %s, line %d\n",
+              test_name, __FILE__, line);
+      abort ();
+    }
+}
+
+/* Verifies that EXPR evaluates to true.
+   If not, prints a message citing the calling line number and
+   terminates. */
+#define check(EXPR) check_func ((EXPR), __LINE__)
+
+/* Prints a message about memory exhaustion and exits with a
+   failure code. */
+static void
+xalloc_die (void)
+{
+  printf ("virtual memory exhausted\n");
+  exit (EXIT_FAILURE);
+}
+
+/* Allocates and returns N bytes of memory. */
+static void *
+xmalloc (size_t n)
+{
+  if (n != 0)
+    {
+      void *p = malloc (n);
+      if (p == NULL)
+        xalloc_die ();
+
+      return p;
+    }
+  else
+    return NULL;
+}
+
+static void *
+xmemdup (const void *p, size_t n)
+{
+  void *q = xmalloc (n);
+  memcpy (q, p, n);
+  return q;
+}
+
+/* Allocates and returns N * M bytes of memory. */
+static void *
+xnmalloc (size_t n, size_t m)
+{
+  if ((size_t) -1 / m <= n)
+    xalloc_die ();
+  return xmalloc (n * m);
+}
+\f
+/* Node type and support routines. */
+
+/* Test data element. */
+struct element
+  {
+    int data;                 /* Primary value. */
+  };
+
+/* Compares A and B and returns a strcmp-type return value. */
+static int
+compare_ints (const void *a_, const void *b_)
+{
+  const int *a = a_;
+  const int *b = b_;
+
+  return *a < *b ? -1 : *a > *b;
+}
+
+/* Swaps *A and *B. */
+static void
+swap (int *a, int *b)
+{
+  int t = *a;
+  *a = *b;
+  *b = t;
+}
+
+/* Reverses the order of the CNT integers starting at VALUES. */
+static void
+reverse (int *values, size_t cnt)
+{
+  size_t i = 0;
+  size_t j = cnt;
+
+  while (j > i)
+    swap (&values[i++], &values[--j]);
+}
+
+/* Arranges the CNT elements in VALUES into the lexicographically
+   next greater permutation.  Returns true if successful.
+   If VALUES is already the lexicographically greatest
+   permutation of its elements (i.e. ordered from greatest to
+   smallest), arranges them into the lexicographically least
+   permutation (i.e. ordered from smallest to largest) and
+   returns false. */
+static bool
+next_permutation (int *values, size_t cnt)
+{
+  if (cnt > 0)
+    {
+      size_t i = cnt - 1;
+      while (i != 0)
+        {
+          i--;
+          if (values[i] < values[i + 1])
+            {
+              size_t j;
+              for (j = cnt - 1; values[i] >= values[j]; j--)
+                continue;
+              swap (values + i, values + j);
+              reverse (values + (i + 1), cnt - (i + 1));
+              return true;
+            }
+        }
+
+      reverse (values, cnt);
+    }
+
+  return false;
+}
+
+/* Returns N!. */
+static unsigned int
+factorial (unsigned int n)
+{
+  unsigned int value = 1;
+  while (n > 1)
+    value *= n--;
+  return value;
+}
+
+/* Randomly shuffles the CNT elements in ARRAY, each of which is
+   SIZE bytes in size. */
+static void
+random_shuffle (void *array_, size_t cnt, size_t size)
+{
+  char *array = array_;
+  char *tmp = xmalloc (size);
+  size_t i;
+
+  for (i = 0; i < cnt; i++)
+    {
+      size_t j = rand () % (cnt - i) + i;
+      if (i != j)
+        {
+          memcpy (tmp, array + j * size, size);
+          memcpy (array + j * size, array + i * size, size);
+          memcpy (array + i * size, tmp, size);
+        }
+    }
+
+  free (tmp);
+}
+
+typedef size_t hash_function (int data);
+
+static size_t
+identity_hash (int data) 
+{
+  return data;
+}
+
+static size_t
+constant_hash (int data UNUSED) 
+{
+  return 0x12345678u;
+}
+
+static inline uint32_t
+md4_round (uint32_t a, uint32_t b, uint32_t c, uint32_t d,
+           uint32_t data, uint32_t n)
+{
+  uint32_t x = a + (d ^ (b & (c ^ d))) + data;
+  return (x << n) | (x >> (32 - n));
+}
+
+static size_t
+random_hash (int data)
+{
+  uint32_t a = data;
+  uint32_t b = data;
+  uint32_t c = data;
+  uint32_t d = data;
+  a = md4_round (a, b, c, d, 0, 3);
+  d = md4_round (d, a, b, c, 1, 7);
+  c = md4_round (c, d, a, b, 2, 11);
+  b = md4_round (b, c, d, a, 3, 19);
+  return a ^ b ^ c ^ d;
+}
+
+static struct hmapx_node *
+find_element (struct hmapx *hmapx, int data, hash_function *hash)
+{
+  struct hmapx_node *node;
+  struct element *e;
+  HMAPX_FOR_EACH_WITH_HASH (e, node, hash (data), hmapx)
+    if (e->data == data)
+      break;
+  return node;
+}
+
+/* Checks that HMAPX contains the CNT ints in DATA, that its
+   structure is correct, and that certain operations on HMAPX
+   produce the expected results. */
+static void
+check_hmapx (struct hmapx *hmapx, const int data[], size_t cnt,
+            hash_function *hash)
+{
+  size_t i, j;
+  int *order;
+
+  check (hmapx_count (hmapx) == cnt);
+  check (cnt <= hmapx_capacity (hmapx));
+
+  order = xmemdup (data, cnt * sizeof *data);
+  qsort (order, cnt, sizeof *order, compare_ints);
+
+  for (i = 0; i < cnt; i = j)
+    {
+      struct hmapx_node *node;
+      struct element *e;
+      int count;
+
+      for (j = i + 1; j < cnt; j++)
+        if (order[i] != order[j])
+          break;
+
+      count = 0;
+      HMAPX_FOR_EACH_WITH_HASH (e, node, hash (order[i]), hmapx)
+        if (e->data == order[i]) 
+          count++; 
+
+      check (count == j - i);
+    }
+
+  check (find_element (hmapx, -1, hash) == NULL);
+
+  if (cnt == 0)
+    check (hmapx_first (hmapx) == NULL);
+  else
+    {
+      struct hmapx_node *p;
+      int left;
+
+      left = cnt;
+      for (p = hmapx_first (hmapx), i = 0; i < cnt;
+           p = hmapx_next (hmapx, p), i++)
+        {
+          struct element *e = hmapx_node_data (p);
+          size_t j;
+
+          check (hmapx_node_hash (p) == hash (e->data));
+          for (j = 0; j < left; j++)
+            if (order[j] == e->data) 
+              {
+                order[j] = order[--left];
+                goto next;
+              }
+          abort ();
+
+        next: ;
+        }
+      check (p == NULL);
+    }
+
+  free (order);
+}
+
+/* Inserts the CNT values from 0 to CNT - 1 (inclusive) into an
+   HMAPX in the order specified by INSERTIONS, then deletes them in
+   the order specified by DELETIONS, checking the HMAPX's contents
+   for correctness after each operation.  Uses HASH as the hash
+   function. */
+static void
+test_insert_delete (const int insertions[],
+                    const int deletions[],
+                    size_t cnt,
+                    hash_function *hash,
+                    size_t reserve)
+{
+  struct element *elements;
+  struct hmapx_node **nodes;
+  struct hmapx hmapx;
+  size_t i;
+
+  elements = xnmalloc (cnt, sizeof *elements);
+  nodes = xnmalloc (cnt, sizeof *nodes);
+  for (i = 0; i < cnt; i++)
+    elements[i].data = i;
+
+  hmapx_init (&hmapx);
+  hmapx_reserve (&hmapx, reserve);
+  check_hmapx (&hmapx, NULL, 0, hash);
+  for (i = 0; i < cnt; i++)
+    {
+      struct hmapx_node *(*insert) (struct hmapx *, void *, size_t hash);
+      size_t capacity;
+
+      /* Insert the node.  Use hmapx_insert_fast if we have not
+         yet exceeded the reserve. */
+      insert = i < reserve ? hmapx_insert_fast : hmapx_insert;
+      nodes[insertions[i]] = insert (&hmapx, &elements[insertions[i]],
+                                     hash (insertions[i]));
+      check_hmapx (&hmapx, insertions, i + 1, hash);
+
+      /* A series of insertions should not produce a shrinkable hmapx. */
+      if (i >= reserve) 
+        {
+          capacity = hmapx_capacity (&hmapx);
+          hmapx_shrink (&hmapx);
+          check (capacity == hmapx_capacity (&hmapx)); 
+        }
+    }
+  for (i = 0; i < cnt; i++)
+    {
+      hmapx_delete (&hmapx, nodes[deletions[i]]);
+      check_hmapx (&hmapx, deletions + i + 1, cnt - i - 1, hash);
+    }
+  hmapx_destroy (&hmapx);
+
+  free (elements);
+  free (nodes);
+}
+\f
+/* Inserts values into an HMAPX in each possible order, then
+   removes them in each possible order, up to a specified maximum
+   size, using hash function HASH. */
+static void
+test_insert_any_remove_any (hash_function *hash)
+{
+  const int max_elems = 5;
+  int cnt;
+
+  for (cnt = 0; cnt <= max_elems; cnt++)
+    {
+      int *insertions, *deletions;
+      unsigned int ins_perm_cnt;
+      int i;
+
+      insertions = xnmalloc (cnt, sizeof *insertions);
+      deletions = xnmalloc (cnt, sizeof *deletions);
+      for (i = 0; i < cnt; i++)
+        insertions[i] = i;
+
+      for (ins_perm_cnt = 0;
+           ins_perm_cnt == 0 || next_permutation (insertions, cnt);
+           ins_perm_cnt++)
+        {
+          unsigned int del_perm_cnt;
+          int i;
+
+          for (i = 0; i < cnt; i++)
+            deletions[i] = i;
+
+          for (del_perm_cnt = 0;
+               del_perm_cnt == 0 || next_permutation (deletions, cnt);
+               del_perm_cnt++)
+            test_insert_delete (insertions, deletions, cnt, hash, 1);
+
+          check (del_perm_cnt == factorial (cnt));
+        }
+      check (ins_perm_cnt == factorial (cnt));
+
+      free (insertions);
+      free (deletions);
+    }
+}
+
+static void
+test_insert_any_remove_any_random_hash (void) 
+{
+  test_insert_any_remove_any (random_hash);
+}
+
+static void
+test_insert_any_remove_any_identity_hash (void) 
+{
+  test_insert_any_remove_any (identity_hash);
+}
+
+static void
+test_insert_any_remove_any_constant_hash (void) 
+{
+  test_insert_any_remove_any (constant_hash);
+}
+
+/* Inserts values into an HMAPX in each possible order, then
+   removes them in the same order, up to a specified maximum
+   size, using hash function HASH. */
+static void
+test_insert_any_remove_same (hash_function *hash)
+{
+  const int max_elems = 7;
+  int cnt;
+
+  for (cnt = 0; cnt <= max_elems; cnt++)
+    {
+      int *values;
+      unsigned int permutation_cnt;
+      int i;
+
+      values = xnmalloc (cnt, sizeof *values);
+      for (i = 0; i < cnt; i++)
+        values[i] = i;
+
+      for (permutation_cnt = 0;
+           permutation_cnt == 0 || next_permutation (values, cnt);
+           permutation_cnt++)
+        test_insert_delete (values, values, cnt, hash, cnt / 2);
+      check (permutation_cnt == factorial (cnt));
+
+      free (values);
+    }
+}
+
+static void
+test_insert_any_remove_same_random_hash (void) 
+{
+  test_insert_any_remove_same (random_hash);
+}
+
+static void
+test_insert_any_remove_same_identity_hash (void) 
+{
+  test_insert_any_remove_same (identity_hash);
+}
+
+static void
+test_insert_any_remove_same_constant_hash (void) 
+{
+  test_insert_any_remove_same (constant_hash);
+}
+
+/* Inserts values into an HMAPX in each possible order, then
+   removes them in reverse order, up to a specified maximum
+   size, using hash function HASH. */
+static void
+test_insert_any_remove_reverse (hash_function *hash)
+{
+  const int max_elems = 7;
+  int cnt;
+
+  for (cnt = 0; cnt <= max_elems; cnt++)
+    {
+      int *insertions, *deletions;
+      unsigned int permutation_cnt;
+      int i;
+
+      insertions = xnmalloc (cnt, sizeof *insertions);
+      deletions = xnmalloc (cnt, sizeof *deletions);
+      for (i = 0; i < cnt; i++)
+        insertions[i] = i;
+
+      for (permutation_cnt = 0;
+           permutation_cnt == 0 || next_permutation (insertions, cnt);
+           permutation_cnt++)
+        {
+          memcpy (deletions, insertions, sizeof *insertions * cnt);
+          reverse (deletions, cnt);
+
+          test_insert_delete (insertions, deletions, cnt, hash, cnt);
+        }
+      check (permutation_cnt == factorial (cnt));
+
+      free (insertions);
+      free (deletions);
+    }
+}
+
+static void
+test_insert_any_remove_reverse_random_hash (void)
+{
+  test_insert_any_remove_reverse (random_hash);
+}
+
+static void
+test_insert_any_remove_reverse_identity_hash (void)
+{
+  test_insert_any_remove_reverse (identity_hash);
+}
+
+static void
+test_insert_any_remove_reverse_constant_hash (void)
+{
+  test_insert_any_remove_reverse (constant_hash);
+}
+
+/* Inserts and removes up to MAX_ELEMS values in an hmapx, in
+   random order, using hash function HASH. */
+static void
+test_random_sequence (int max_elems, hash_function *hash)
+{
+  const int max_trials = 8;
+  int cnt;
+
+  for (cnt = 0; cnt <= max_elems; cnt += 2)
+    {
+      int *insertions, *deletions;
+      int trial;
+      int i;
+
+      insertions = xnmalloc (cnt, sizeof *insertions);
+      deletions = xnmalloc (cnt, sizeof *deletions);
+      for (i = 0; i < cnt; i++)
+        insertions[i] = i;
+      for (i = 0; i < cnt; i++)
+        deletions[i] = i;
+
+      for (trial = 0; trial < max_trials; trial++)
+        {
+          random_shuffle (insertions, cnt, sizeof *insertions);
+          random_shuffle (deletions, cnt, sizeof *deletions);
+
+          test_insert_delete (insertions, deletions, cnt, hash, 0);
+        }
+
+      free (insertions);
+      free (deletions);
+    }
+}
+
+static void
+test_random_sequence_random_hash (void) 
+{
+  test_random_sequence (64, random_hash);
+}
+
+static void
+test_random_sequence_identity_hash (void) 
+{
+  test_random_sequence (64, identity_hash);
+}
+
+static void
+test_random_sequence_constant_hash (void) 
+{
+  test_random_sequence (32, constant_hash);
+}
+
+/* Inserts MAX_ELEMS elements into an HMAPX in ascending order,
+   then delete in ascending order and shrink the hmapx at each
+   step, using hash function HASH. */
+static void
+test_insert_ordered (int max_elems, hash_function *hash)
+{
+  struct element *elements;
+  struct hmapx_node **nodes;
+  int *values;
+  struct hmapx hmapx;
+  int i;
+
+  hmapx_init (&hmapx);
+  elements = xnmalloc (max_elems, sizeof *elements);
+  nodes = xnmalloc (max_elems, sizeof *nodes);
+  values = xnmalloc (max_elems, sizeof *values);
+  for (i = 0; i < max_elems; i++)
+    {
+      values[i] = elements[i].data = i;
+      nodes[i] = hmapx_insert (&hmapx, &elements[i], hash (elements[i].data));
+      check_hmapx (&hmapx, values, i + 1, hash);
+
+      if (hash == identity_hash) 
+        {
+          /* Check that every every hash bucket has (almost) the
+             same number of nodes in it.  */
+          int min = INT_MAX;
+          int max = INT_MIN;
+          int j;
+
+          for (j = 0; j <= hmapx.hmap.mask; j++) 
+            {
+              int count = 0;
+              struct hmap_node *node;
+
+              for (node = hmapx.hmap.buckets[j]; node != NULL;
+                   node = node->next)
+                count++;
+              if (count < min)
+                min = count;
+              if (count > max)
+                max = count;
+            }
+          check (max - min <= 1);
+        }
+    }
+  for (i = 0; i < max_elems; i++)
+    {
+      hmapx_delete (&hmapx, nodes[i]);
+      hmapx_shrink (&hmapx);
+      check_hmapx (&hmapx, values + i + 1, max_elems - i - 1, hash);
+    }
+  hmapx_destroy (&hmapx);
+  free (elements);
+  free (nodes);
+  free (values);
+}
+
+static void
+test_insert_ordered_random_hash (void)
+{
+  test_insert_ordered (1024, random_hash);
+}
+
+static void
+test_insert_ordered_identity_hash (void)
+{
+  test_insert_ordered (1024, identity_hash);
+}
+
+static void
+test_insert_ordered_constant_hash (void)
+{
+  test_insert_ordered (128, constant_hash);
+}
+
+/* Inserts up to MAX_ELEMS elements into an HMAPX, then moves the
+   nodes around in memory, using hash function HASH. */
+static void
+test_moved (int max_elems, hash_function *hash)
+{
+  struct element *e[2];
+  int cur;
+  int *values;
+  struct hmapx_node **nodes;
+  struct hmapx hmapx;
+  int i, j;
+
+  hmapx_init (&hmapx);
+  e[0] = xnmalloc (max_elems, sizeof *e[0]);
+  e[1] = xnmalloc (max_elems, sizeof *e[1]);
+  values = xnmalloc (max_elems, sizeof *values);
+  nodes = xnmalloc (max_elems, sizeof *nodes);
+  cur = 0;
+  for (i = 0; i < max_elems; i++)
+    {
+      values[i] = e[cur][i].data = i;
+      nodes[i] = hmapx_insert (&hmapx, &e[cur][i], hash (e[cur][i].data));
+      check_hmapx (&hmapx, values, i + 1, hash);
+
+      for (j = 0; j <= i; j++)
+        {
+          e[!cur][j] = e[cur][j];
+          hmapx_move (nodes[j], &e[cur][j]);
+          check_hmapx (&hmapx, values, i + 1, hash);
+        }
+      cur = !cur;
+    }
+  hmapx_destroy (&hmapx);
+  free (e[0]);
+  free (e[1]);
+  free (values);
+  free (nodes);
+}
+
+static void
+test_moved_random_hash (void) 
+{
+  test_moved (128, random_hash);
+}
+
+static void
+test_moved_identity_hash (void) 
+{
+  test_moved (128, identity_hash);
+}
+
+static void
+test_moved_constant_hash (void) 
+{
+  test_moved (32, constant_hash);
+}
+
+/* Inserts values into an HMAPX, then changes their values, using
+   hash function HASH. */
+static void
+test_changed (hash_function *hash)
+{
+  const int max_elems = 6;
+  int cnt;
+
+  for (cnt = 0; cnt <= max_elems; cnt++)
+    {
+      int *values, *changed_values;
+      struct hmapx_node **nodes;
+      struct element *elements;
+      unsigned int permutation_cnt;
+      int i;
+
+      values = xnmalloc (cnt, sizeof *values);
+      changed_values = xnmalloc (cnt, sizeof *changed_values);
+      elements = xnmalloc (cnt, sizeof *elements);
+      nodes = xnmalloc (cnt, sizeof *nodes);
+      for (i = 0; i < cnt; i++)
+        values[i] = i;
+
+      for (permutation_cnt = 0;
+           permutation_cnt == 0 || next_permutation (values, cnt);
+           permutation_cnt++)
+        {
+          for (i = 0; i < cnt; i++)
+            {
+              int j, k;
+              for (j = 0; j <= cnt; j++)
+                {
+                  struct hmapx hmapx;
+
+                  hmapx_init (&hmapx);
+
+                  /* Add to HMAPX in order. */
+                  for (k = 0; k < cnt; k++)
+                    {
+                      int n = values[k];
+                      elements[n].data = n;
+                      nodes[n] = hmapx_insert (&hmapx, &elements[n],
+                                               hash (elements[n].data));
+                    }
+                  check_hmapx (&hmapx, values, cnt, hash);
+
+                  /* Change value i to j. */
+                  elements[i].data = j;
+                  hmapx_changed (&hmapx, nodes[i], 
+                                 hash (elements[i].data));
+                  for (k = 0; k < cnt; k++)
+                    changed_values[k] = k;
+                  changed_values[i] = j;
+                  check_hmapx (&hmapx, changed_values, cnt, hash);
+
+                  hmapx_destroy (&hmapx);
+                }
+            }
+        }
+      check (permutation_cnt == factorial (cnt));
+
+      free (values);
+      free (changed_values);
+      free (elements);
+      free (nodes);
+    }
+}
+
+static void
+test_changed_random_hash (void)
+{
+  test_changed (random_hash);
+}
+
+static void
+test_changed_identity_hash (void)
+{
+  test_changed (identity_hash);
+}
+
+static void
+test_changed_constant_hash (void)
+{
+  test_changed (constant_hash);
+}
+
+/* Inserts values into an HMAPX, then changes and moves their
+   values, using hash function HASH. */
+static void
+test_change (hash_function *hash)
+{
+  const int max_elems = 6;
+  int cnt;
+
+  for (cnt = 0; cnt <= max_elems; cnt++)
+    {
+      int *values, *changed_values;
+      struct hmapx_node **nodes;
+      struct element *elements;
+      struct element replacement;
+      unsigned int permutation_cnt;
+      int i;
+
+      values = xnmalloc (cnt, sizeof *values);
+      changed_values = xnmalloc (cnt, sizeof *changed_values);
+      elements = xnmalloc (cnt, sizeof *elements);
+      nodes = xnmalloc (cnt, sizeof *nodes);
+      for (i = 0; i < cnt; i++)
+        values[i] = i;
+
+      for (permutation_cnt = 0;
+           permutation_cnt == 0 || next_permutation (values, cnt);
+           permutation_cnt++)
+        {
+          for (i = 0; i < cnt; i++)
+            {
+              int j, k;
+              for (j = 0; j <= cnt; j++)
+                {
+                  struct hmapx hmapx;
+
+                  hmapx_init (&hmapx);
+
+                  /* Add to HMAPX in order. */
+                  for (k = 0; k < cnt; k++)
+                    {
+                      int n = values[k];
+                      elements[n].data = n;
+                      nodes[n] = hmapx_insert (&hmapx, &elements[n],
+                                               hash (elements[n].data));
+                    }
+                  check_hmapx (&hmapx, values, cnt, hash);
+
+                  /* Change value i to j. */
+                  replacement.data = j;
+                  hmapx_change (&hmapx, nodes[i], &replacement, hash (j));
+                  for (k = 0; k < cnt; k++)
+                    changed_values[k] = k;
+                  changed_values[i] = j;
+                  check_hmapx (&hmapx, changed_values, cnt, hash);
+
+                  hmapx_destroy (&hmapx);
+                }
+            }
+        }
+      check (permutation_cnt == factorial (cnt));
+
+      free (values);
+      free (changed_values);
+      free (elements);
+      free (nodes);
+    }
+}
+
+static void
+test_change_random_hash (void)
+{
+  test_change (random_hash);
+}
+
+static void
+test_change_identity_hash (void)
+{
+  test_change (identity_hash);
+}
+
+static void
+test_change_constant_hash (void)
+{
+  test_change (constant_hash);
+}
+
+static void
+test_swap (int max_elems, hash_function *hash) 
+{
+  struct element *elements;
+  int *values;
+  struct hmapx a, b;
+  struct hmapx *working, *empty;
+  int i;
+
+  hmapx_init (&a);
+  hmapx_init (&b);
+  working = &a;
+  empty = &b;
+  elements = xnmalloc (max_elems, sizeof *elements);
+  values = xnmalloc (max_elems, sizeof *values);
+  for (i = 0; i < max_elems; i++)
+    {
+      struct hmapx *tmp;
+      values[i] = elements[i].data = i;
+      hmapx_insert (working, &elements[i], hash (elements[i].data));
+      check_hmapx (working, values, i + 1, hash);
+      check_hmapx (empty, NULL, 0, hash);
+      hmapx_swap (&a, &b);
+      tmp = working;
+      working = empty;
+      empty = tmp;
+    }
+  hmapx_destroy (&a);
+  hmapx_destroy (&b);
+  free (elements);
+  free (values);
+}
+
+static void
+test_swap_random_hash (void) 
+{
+  test_swap (128, random_hash);
+}
+
+static void
+test_destroy_null (void) 
+{
+  hmapx_destroy (NULL);
+}
+
+/* Test shrinking an empty hash table. */
+static void
+test_shrink_empty (void)
+{
+  struct hmapx hmapx;
+
+  hmapx_init (&hmapx);
+  hmapx_reserve (&hmapx, 123);
+  hmapx_shrink (&hmapx);
+  hmapx_destroy (&hmapx);
+}
+\f
+/* Main program. */
+
+/* Runs TEST_FUNCTION and prints a message about NAME. */
+static void
+run_test (void (*test_function) (void), const char *name)
+{
+  test_name = name;
+  putchar ('.');
+  fflush (stdout);
+  test_function ();
+}
+
+int
+main (void)
+{
+  run_test (test_insert_any_remove_any_random_hash,
+            "insert any order, delete any order (random hash)");
+  run_test (test_insert_any_remove_any_identity_hash,
+            "insert any order, delete any order (identity hash)");
+  run_test (test_insert_any_remove_any_constant_hash,
+            "insert any order, delete any order (constant hash)");
+
+  run_test (test_insert_any_remove_same_random_hash,
+            "insert any order, delete same order (random hash)");
+  run_test (test_insert_any_remove_same_identity_hash,
+            "insert any order, delete same order (identity hash)");
+  run_test (test_insert_any_remove_same_constant_hash,
+            "insert any order, delete same order (constant hash)");
+
+  run_test (test_insert_any_remove_reverse_random_hash,
+            "insert any order, delete reverse order (random hash)");
+  run_test (test_insert_any_remove_reverse_identity_hash,
+            "insert any order, delete reverse order (identity hash)");
+  run_test (test_insert_any_remove_reverse_constant_hash,
+            "insert any order, delete reverse order (constant hash)");
+
+  run_test (test_random_sequence_random_hash,
+            "insert and delete in random sequence (random hash)");
+  run_test (test_random_sequence_identity_hash,
+            "insert and delete in random sequence (identity hash)");
+  run_test (test_random_sequence_constant_hash,
+            "insert and delete in random sequence (constant hash)");
+
+  run_test (test_insert_ordered_random_hash,
+            "insert in ascending order (random hash)");
+  run_test (test_insert_ordered_identity_hash,
+            "insert in ascending order (identity hash)");
+  run_test (test_insert_ordered_constant_hash,
+            "insert in ascending order (constant hash)");
+
+  run_test (test_moved_random_hash,
+            "move elements around in memory (random hash)");
+  run_test (test_moved_identity_hash,
+            "move elements around in memory (identity hash)");
+  run_test (test_moved_constant_hash,
+            "move elements around in memory (constant hash)");
+
+  run_test (test_changed_random_hash,
+            "change key data in nodes (random hash)");
+  run_test (test_changed_identity_hash,
+            "change key data in nodes (identity hash)");
+  run_test (test_changed_constant_hash,
+            "change key data in nodes (constant hash)");
+
+  run_test (test_change_random_hash,
+            "change and move key data in nodes (random hash)");
+  run_test (test_change_identity_hash,
+            "change and move key data in nodes (identity hash)");
+  run_test (test_change_constant_hash,
+            "change and move key data in nodes (constant hash)");
+
+  run_test (test_swap_random_hash, "test swapping tables");
+
+  run_test (test_destroy_null, "test destroying null table");
+  run_test (test_shrink_empty, "test shrinking an empty table");
+
+  putchar ('\n');
+
+  return 0;
+}
author	Ben Pfaff <blp@gnu.org>
	Sat, 27 Sep 2008 03:42:01 +0000 (20:42 -0700)
committer	Ben Pfaff <blp@gnu.org>
	Wed, 1 Oct 2008 03:36:03 +0000 (20:36 -0700)
src/libpspp/automake.mk		patch \| blob \| history
src/libpspp/hash-functions.c	[new file with mode: 0644]	patch \| blob
src/libpspp/hash-functions.h	[new file with mode: 0644]	patch \| blob
src/libpspp/hash.c		patch \| blob \| history
src/libpspp/hash.h		patch \| blob \| history
src/libpspp/hmap.c	[new file with mode: 0644]	patch \| blob
src/libpspp/hmap.h	[new file with mode: 0644]	patch \| blob
src/libpspp/hmapx.c	[new file with mode: 0644]	patch \| blob
src/libpspp/hmapx.h	[new file with mode: 0644]	patch \| blob
tests/automake.mk		patch \| blob \| history
tests/libpspp/hmap-test.c	[new file with mode: 0644]	patch \| blob
tests/libpspp/hmapx-test.c	[new file with mode: 0644]	patch \| blob