Patch #5812.
Thanks to John Darrington for review.
+2007-03-25 Ben Pfaff <blp@gnu.org>
+
+ * automake.mk (src_libpspp_libpspp_a_SOURCES): Add
+ sparse-array.[ch].
+
+ * pool.c (pool_zalloc): New function.
+ (pool_calloc): New function.
+
+ * sparse-array.c: New file.
+
+ * sparse-array.h: New file.
+
Mon Mar 5 20:55:49 CET 2007 John Darrington <john@darrington.wattle.id.au>
* i18n.c: Cast second argument of iconv using ICONV_CONST
src/libpspp/misc.h \
src/libpspp/pool.c \
src/libpspp/pool.h \
+ src/libpspp/sparse-array.c \
+ src/libpspp/sparse-array.h \
src/libpspp/syntax-gen.c \
src/libpspp/syntax-gen.h \
src/libpspp/message.c \
return pool_malloc (pool, n * s);
}
+/* Allocates AMT bytes using malloc(), to be managed by POOL,
+ zeros the block, and returns a pointer to the beginning of the
+ block.
+ If POOL is a null pointer, then allocates a normal memory block
+ with xmalloc(). */
+void *
+pool_zalloc (struct pool *pool, size_t amt)
+{
+ void *p = pool_malloc (pool, amt);
+ memset (p, 0, amt);
+ return p;
+}
+
+/* Allocates and returns N elements of S bytes each, to be
+ managed by POOL, and zeros the block.
+ If POOL is a null pointer, then allocates a normal memory block
+ with malloc().
+ N must be nonnegative, S must be positive.
+ Terminates the program if the memory cannot be obtained,
+ including the case where N * S overflows the range of size_t. */
+void *
+pool_calloc (struct pool *pool, size_t n, size_t s)
+{
+ void *p = pool_nmalloc (pool, n, s);
+ memset (p, 0, n * s);
+ return p;
+}
+
/* Changes the allocation size of the specified memory block P managed
by POOL to AMT bytes and returns a pointer to the beginning of the
block.
/* Standard allocation routines. */
void *pool_malloc (struct pool *, size_t) MALLOC_LIKE;
void *pool_nmalloc (struct pool *, size_t n, size_t s) MALLOC_LIKE;
+void *pool_zalloc (struct pool *, size_t) MALLOC_LIKE;
+void *pool_calloc (struct pool *, size_t n, size_t s) MALLOC_LIKE;
void *pool_realloc (struct pool *, void *, size_t);
void *pool_nrealloc (struct pool *, void *, size_t n, size_t s);
void *pool_2nrealloc (struct pool *, void *, size_t *pn, size_t s);
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+#include <config.h>
+
+#include <libpspp/sparse-array.h>
+
+#include <limits.h>
+#include <string.h>
+
+#include <libpspp/assertion.h>
+#include <libpspp/misc.h>
+#include <libpspp/pool.h>
+
+/* Sparse array data structure.
+
+ The sparse array is implemented in terms of a "radix tree", a
+ multiway tree in which a set of bits drawn from the key
+ determine the child chosen at each level during a search. The
+ most-significant bits determine a child of the root, the next
+ bits determine a child of that child, and so on, until the
+ least-significant bits determine a leaf node.
+
+ In this implementation, the branching factor at each level is
+ held constant at 2**BITS_PER_LEVEL. The tree is only made as
+ tall as need be for the currently largest key, and nodes that
+ would be entirely empty are not allocated at all. The
+ elements are stored in the leaf nodes. */
+
+/* Number of bits from the key used as the index at each level. */
+#define BITS_PER_LEVEL 5
+
+/* Branching factor. */
+#define PTRS_PER_LEVEL (1u << BITS_PER_LEVEL)
+
+/* Maximum height. */
+#define LONG_BITS (sizeof (unsigned long int) * CHAR_BIT)
+#define MAX_HEIGHT DIV_RND_UP (LONG_BITS, BITS_PER_LEVEL)
+
+/* Bit-mask for index. */
+#define LEVEL_MASK ((1ul << BITS_PER_LEVEL) - 1)
+
+/* Pointer to an internal node or a leaf node.
+ Pointers in internal nodes at level 1 point to leaf nodes;
+ other pointers point to internal nodes. */
+union pointer
+ {
+ struct internal_node *internal;
+ struct leaf_node *leaf;
+ };
+
+/* A sparse array. */
+struct sparse_array
+ {
+ struct pool *pool; /* Pool used for allocations. */
+ size_t elem_size; /* Element size, rounded for alignment. */
+ unsigned long count; /* Number of elements in tree. */
+
+ /* Radix tree. */
+ union pointer root; /* Root of tree. */
+ int height; /* 0=empty tree;
+ 1=root points to leaf,
+ 2=root points to internal node
+ that points to leaves,
+ and so on. */
+
+ /* Cache for speeding up access. */
+ unsigned long int cache_ofs; /* Group of keys that cache points to,
+ shifted right BITS_PER_LEVEL bits;
+ ULONG_MAX for empty cache. */
+ struct leaf_node *cache; /* Cached leaf node, or a null
+ pointer for a negative cache. */
+ };
+
+/* An internal node in the radix tree. */
+struct internal_node
+ {
+ int count; /* Number of nonnul children. */
+ union pointer down[PTRS_PER_LEVEL]; /* Children. */
+ };
+
+/* A leaf node in the radix tree. */
+struct leaf_node
+ {
+ /* Bit-vector of elements that are in use. */
+ unsigned long int in_use[DIV_RND_UP (PTRS_PER_LEVEL, LONG_BITS)];
+ /* element_type elements[PTRS_PER_LEVEL]; */
+ };
+
+/* Returns SIZE rounded up to a safe alignment. */
+#define ALIGN_SIZE(SIZE) ROUND_UP (SIZE, sizeof (long int))
+
+/* Returns the size of EXPR_OR_TYPE rounded up to a safe
+ alignment. */
+#define SIZEOF_ALIGNED(EXPR_OR_TYPE) ALIGN_SIZE (sizeof (EXPR_OR_TYPE))
+
+static inline bool index_in_range (const struct sparse_array *,
+ unsigned long int);
+static inline bool is_in_use (const struct leaf_node *, unsigned int);
+static inline bool any_in_use (const struct leaf_node *);
+static inline void set_in_use (struct leaf_node *, unsigned int);
+static inline void unset_in_use (struct leaf_node *, unsigned int);
+static inline int scan_in_use (struct leaf_node *, unsigned int);
+static inline void *leaf_element (const struct sparse_array *,
+ struct leaf_node *, unsigned int);
+static inline size_t leaf_size (const struct sparse_array *);
+
+static struct leaf_node *find_leaf_node (const struct sparse_array *,
+ unsigned long int);
+static void decrease_height (struct sparse_array *);
+static void *scan_leaf (struct sparse_array *, struct leaf_node *,
+ unsigned long int, unsigned long int *);
+static void *do_scan (struct sparse_array *, union pointer *, int,
+ unsigned long int, unsigned long int *);
+\f
+/* Creates and returns a new sparse array that will contain
+ elements that are ELEM_SIZE bytes in size. */
+struct sparse_array *
+sparse_array_create (size_t elem_size)
+{
+ return sparse_array_create_pool (NULL, elem_size);
+}
+
+/* Creates and returns a new sparse array that will contain
+ elements that are ELEM_SIZE bytes in size. Data in the sparse
+ array will be allocated from POOL, which may be null. */
+struct sparse_array *
+sparse_array_create_pool (struct pool *pool, size_t elem_size)
+{
+ struct sparse_array *spar = pool_malloc (pool, sizeof *spar);
+ spar->pool = pool;
+ spar->elem_size = ALIGN_SIZE (elem_size);
+ spar->height = 0;
+ spar->root.leaf = NULL;
+ spar->count = 0;
+ spar->cache_ofs = ULONG_MAX;
+ return spar;
+}
+
+/* Destroys SPAR node pointed to by P at the given LEVEL. */
+static void
+do_destroy (struct sparse_array *spar, union pointer *p, int level)
+{
+ if (level > 0)
+ {
+ struct internal_node *node = p->internal;
+ int count = node->count;
+ int i;
+
+ for (i = 0; ; i++)
+ {
+ union pointer *q = &p->internal->down[i];
+ if (level > 1 ? q->internal != NULL : q->leaf != NULL)
+ {
+ do_destroy (spar, q, level - 1);
+ if (--count == 0)
+ break;
+ }
+ }
+ pool_free (spar->pool, p->internal);
+ }
+ else if (level == 0)
+ pool_free (spar->pool, p->leaf);
+}
+
+/* Destroys SPAR.
+ Any elements in SPAR are deallocated. Thus, if per-element
+ destruction is necessary, it should be done before destroying
+ the sparse array. */
+void
+sparse_array_destroy (struct sparse_array *spar)
+{
+ do_destroy (spar, &spar->root, spar->height - 1);
+ pool_free (spar->pool, spar);
+}
+
+/* Returns the number of elements in SPAR. */
+unsigned long int
+sparse_array_count (const struct sparse_array *spar)
+{
+ return spar->count;
+}
+
+/* Increases SPAR's height by 1, allowing it to hold
+ PTRS_PER_LEVEL times more elements. */
+static void
+increase_height (struct sparse_array *spar)
+{
+ assert (spar->height < MAX_HEIGHT);
+ spar->height++;
+ if (spar->height == 1)
+ spar->root.leaf = pool_zalloc (spar->pool, leaf_size (spar));
+ else
+ {
+ struct internal_node *new_root;
+ new_root = pool_zalloc (spar->pool, sizeof *new_root);
+ new_root->count = 1;
+ new_root->down[0] = spar->root;
+ spar->root.internal = new_root;
+ }
+}
+
+/* Finds the leaf node in SPAR that contains the element for KEY.
+ SPAR must be tall enough to hold KEY.
+ Creates the leaf if it doesn't already exist. */
+static struct leaf_node *
+create_leaf_node (struct sparse_array *spar, unsigned long int key)
+{
+ union pointer *p;
+ int *count = NULL;
+ int level;
+
+ assert (index_in_range (spar, key));
+
+ /* Short-circuit everything if KEY is in the leaf cache. */
+ if (key >> BITS_PER_LEVEL == spar->cache_ofs && spar->cache != NULL)
+ return spar->cache;
+
+ /* Descend through internal nodes. */
+ p = &spar->root;
+ for (level = spar->height - 1; level > 0; level--)
+ {
+ if (p->internal == NULL)
+ {
+ p->internal = pool_zalloc (spar->pool, sizeof *p->internal);
+ ++*count;
+ }
+
+ count = &p->internal->count;
+ p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
+ }
+
+ /* Create leaf if necessary. */
+ if (p->leaf == NULL)
+ {
+ p->leaf = pool_zalloc (spar->pool, leaf_size (spar));
+ ++*count;
+ }
+
+ /* Update cache. */
+ spar->cache = p->leaf;
+ spar->cache_ofs = key >> BITS_PER_LEVEL;
+
+ return p->leaf;
+}
+
+/* Inserts into SPAR an element with the given KEY, which must not
+ already exist in SPAR.
+ Returns the new element for the caller to initialize. */
+void *
+sparse_array_insert (struct sparse_array *spar, unsigned long int key)
+{
+ struct leaf_node *leaf;
+
+ while (!index_in_range (spar, key))
+ increase_height (spar);
+
+ spar->count++;
+
+ leaf = create_leaf_node (spar, key);
+ assert (!is_in_use (leaf, key));
+ set_in_use (leaf, key);
+ return leaf_element (spar, leaf, key);
+}
+
+/* Finds and returns the element in SPAR with the given KEY.
+ Returns a null pointer if KEY does not exist in SPAR. */
+void *
+sparse_array_get (const struct sparse_array *spar, unsigned long int key)
+{
+ if (index_in_range (spar, key))
+ {
+ struct leaf_node *leaf = find_leaf_node (spar, key);
+ if (leaf != NULL && is_in_use (leaf, key))
+ return leaf_element (spar, leaf, key);
+ }
+ return NULL;
+}
+
+/* Removes the element with the given KEY from SPAR.
+ Returns true if an element was removed, false if SPAR hadn't
+ contained an element with the given KEY.
+
+ If elements need to be destructed, then the caller should have
+ already taken care of it before calling this function; the
+ element's content must be considered freed and of
+ indeterminate value after it is removed. */
+bool
+sparse_array_remove (struct sparse_array *spar, unsigned long int key)
+{
+ union pointer *path[MAX_HEIGHT], **last;
+ struct leaf_node *leaf;
+ union pointer *p;
+ int level;
+
+ if (!index_in_range (spar, key))
+ return false;
+
+ /* Find and free element in leaf. */
+ leaf = find_leaf_node (spar, key);
+ if (leaf == NULL || !is_in_use (leaf, key))
+ return false;
+#if ASSERT_LEVEL >= 10
+ memset (leaf_element (spar, leaf, key), 0xcc, spar->elem_size);
+#endif
+ unset_in_use (leaf, key);
+ spar->count--;
+ if (any_in_use (leaf))
+ return true;
+
+ /* The leaf node is empty.
+ Retrace the path of internal nodes traversed to the leaf. */
+ p = &spar->root;
+ last = path;
+ for (level = spar->height - 1; level > 0; level--)
+ {
+ *last++ = p;
+ p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
+ }
+
+ /* Free the leaf node and prune it from the tree. */
+ spar->cache_ofs = ULONG_MAX;
+ pool_free (spar->pool, leaf);
+ p->leaf = NULL;
+
+ /* Update counts in the internal nodes above the leaf.
+ Free any internal nodes that become empty. */
+ while (last > path)
+ {
+ p = *--last;
+ if (--p->internal->count > 0)
+ {
+ if (p == &spar->root)
+ decrease_height (spar);
+ return true;
+ }
+
+ pool_free (spar->pool, p->internal);
+ p->internal = NULL;
+ }
+ spar->height = 0;
+ return true;
+}
+
+/* Scans SPAR in increasing order of keys for in-use elements.
+ If SKIP is NULL, the scan starts from key 0;
+ otherwise, it starts just after key *SKIP.
+ If an element is found, returns it and stores the element's
+ key into *FOUND; otherwise, returns a null pointer and does
+ not modify *FOUND. */
+void *
+sparse_array_scan (const struct sparse_array *spar_, unsigned long int *skip,
+ unsigned long int *found)
+{
+ struct sparse_array *spar = (struct sparse_array *) spar_;
+ unsigned long int start;
+
+ /* Find our starting point. */
+ if (skip != NULL)
+ {
+ start = *skip + 1;
+ if (start == 0)
+ return NULL;
+ }
+ else
+ start = 0;
+
+ /* Check the cache. */
+ if (start >> BITS_PER_LEVEL == spar->cache_ofs)
+ {
+ void *p = scan_leaf (spar, spar->cache, start, found);
+ if (p)
+ return p;
+ start &= ~LEVEL_MASK;
+ start += PTRS_PER_LEVEL;
+ if (start == 0)
+ return NULL;
+ }
+
+ /* Do the scan. */
+ if (!index_in_range (spar, start))
+ return NULL;
+ return do_scan (spar, &spar->root, spar->height - 1, start, found);
+}
+\f
+/* Returns true iff KEY is in the range of keys currently
+ represented by SPAR. */
+static inline bool
+index_in_range (const struct sparse_array *spar, unsigned long int key)
+{
+ return (spar->height == 0 ? false
+ : spar->height >= MAX_HEIGHT ? true
+ : key < (1ul << (spar->height * BITS_PER_LEVEL)));
+}
+
+/* Returns true iff LEAF contains an in-use element with the
+ given KEY. */
+static inline bool
+is_in_use (const struct leaf_node *leaf, unsigned int key)
+{
+ key &= LEVEL_MASK;
+ return (leaf->in_use[key / LONG_BITS] & (1ul << (key % LONG_BITS))) != 0;
+}
+
+/* Returns true iff LEAF contains any in-use elements. */
+static inline bool
+any_in_use (const struct leaf_node *leaf)
+{
+ size_t i;
+ for (i = 0; i < sizeof leaf->in_use / sizeof *leaf->in_use; i++)
+ if (leaf->in_use[i])
+ return true;
+ return false;
+}
+
+/* Marks element KEY in LEAF as in-use. */
+static inline void
+set_in_use (struct leaf_node *leaf, unsigned int key)
+{
+ key &= LEVEL_MASK;
+ leaf->in_use[key / LONG_BITS] |= 1ul << (key % LONG_BITS);
+}
+
+/* Marks element KEY in LEAF as not in-use. */
+static inline void
+unset_in_use (struct leaf_node *leaf, unsigned int key)
+{
+ key &= LEVEL_MASK;
+ leaf->in_use[key / LONG_BITS] &= ~(1ul << (key % LONG_BITS));
+}
+
+/* Returns the number of trailing 0-bits in X.
+ Undefined if X is zero. */
+static inline int
+count_trailing_zeros (unsigned long int x)
+{
+ /* This algorithm is from _Hacker's Delight_ section 5.4. */
+ int n = 1;
+
+#define COUNT_STEP(BITS) \
+ if (!(x & ((1ul << (BITS)) - 1))) \
+ { \
+ n += BITS; \
+ x >>= BITS; \
+ }
+
+#if ULONG_MAX >> 31 >> 31 >> 2
+ COUNT_STEP (64);
+#endif
+#if ULONG_MAX >> 31 >> 1
+ COUNT_STEP (32);
+#endif
+ COUNT_STEP (16);
+ COUNT_STEP (8);
+ COUNT_STEP (4);
+ COUNT_STEP (2);
+
+ return n - (x & 1);
+}
+
+/* Returns the least index of the in-use element in LEAF greater
+ than or equal to IDX. */
+static inline int
+scan_in_use (struct leaf_node *leaf, unsigned int idx)
+{
+ for (; idx < PTRS_PER_LEVEL; idx = (idx & ~(LONG_BITS - 1)) + LONG_BITS)
+ {
+ int ofs = idx % LONG_BITS;
+ unsigned long int in_use = leaf->in_use[idx / LONG_BITS] >> ofs;
+ if (!in_use)
+ continue;
+ return count_trailing_zeros (in_use) + idx;
+ }
+ return -1;
+}
+
+/* Returns the address of element with the given KEY in LEAF,
+ which is a node in SPAR. */
+static inline void *
+leaf_element (const struct sparse_array *spar, struct leaf_node *leaf,
+ unsigned int key)
+{
+ key &= LEVEL_MASK;
+ return (char *) leaf + SIZEOF_ALIGNED (*leaf) + (spar->elem_size * key);
+}
+
+/* Returns the size of a leaf node in SPAR. */
+static inline size_t
+leaf_size (const struct sparse_array *spar)
+{
+ return SIZEOF_ALIGNED (struct leaf_node) + spar->elem_size * PTRS_PER_LEVEL;
+}
+
+/* Finds and returns the leaf node in SPAR that contains KEY.
+ Returns null if SPAR does not have a leaf node that contains
+ KEY. */
+static struct leaf_node *
+find_leaf_node (const struct sparse_array *spar_, unsigned long int key)
+{
+ struct sparse_array *spar = (struct sparse_array *) spar_;
+ const union pointer *p;
+ int level;
+
+ assert (index_in_range (spar, key));
+
+ /* Check the cache first. */
+ if (key >> BITS_PER_LEVEL == spar->cache_ofs)
+ return spar->cache;
+
+ /* Descend through internal nodes. */
+ p = &spar->root;
+ for (level = spar->height - 1; level > 0; level--)
+ {
+ if (p->internal == NULL)
+ return NULL;
+ p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
+ }
+
+ /* Update cache. */
+ spar->cache = p->leaf;
+ spar->cache_ofs = key >> BITS_PER_LEVEL;
+
+ return p->leaf;
+}
+
+/* Reduces SPAR's height to the minimum needed value by
+ eliminating levels that contain only a single entry for all
+ 0-bits. */
+static void
+decrease_height (struct sparse_array *spar)
+{
+ while (spar->height > 1
+ && spar->root.internal->count == 1
+ && spar->root.internal->down[0].internal)
+ {
+ struct internal_node *p = spar->root.internal;
+ spar->height--;
+ spar->root = p->down[0];
+ pool_free (spar->pool, p);
+ }
+}
+
+/* Scans leaf node LEAF, which is in SPAR, for the in-use element
+ with the least key greater than or equal to START. If such an
+ element is found, returns a pointer to it and stores its key
+ in *FOUND; otherwise, returns a null pointer and does not
+ modify *FOUND. */
+static void *
+scan_leaf (struct sparse_array *spar, struct leaf_node *leaf,
+ unsigned long int start, unsigned long int *found)
+{
+ int idx = scan_in_use (leaf, start & LEVEL_MASK);
+ if (idx >= 0)
+ {
+ *found = (start & ~LEVEL_MASK) | idx;
+ spar->cache = leaf;
+ spar->cache_ofs = *found >> BITS_PER_LEVEL;
+ return leaf_element (spar, leaf, idx);
+ }
+
+ return NULL;
+}
+
+/* Scans P, which is at LEVEL and within SPAR, and its subnodes,
+ for the in-use element with the least key greater than or
+ equal to START. If such an element is found, returns a
+ pointer to it and stores its key in *FOUND; otherwise, returns
+ a null pointer and does not modify *FOUND. */
+static inline void *
+scan_internal_node (struct sparse_array *spar, struct internal_node *node,
+ int level, unsigned long int start,
+ unsigned long int *found)
+{
+ int shift = level * BITS_PER_LEVEL;
+ int count = node->count;
+ int i;
+
+ for (i = (start >> shift) & LEVEL_MASK; i < PTRS_PER_LEVEL; i++)
+ {
+ union pointer *q = &node->down[i];
+ if (level > 1 ? q->internal != NULL : q->leaf != NULL)
+ {
+ void *element = do_scan (spar, q, level - 1, start, found);
+ if (element)
+ return element;
+ if (--count == 0)
+ return NULL;
+ }
+
+ start &= ~((1ul << shift) - 1);
+ start += 1ul << shift;
+ }
+ return NULL;
+}
+
+/* Scans P, which is at LEVEL and within SPAR, and its subnodes,
+ for the in-use element with the least key greater than or
+ equal to START. If such an element is found, returns a
+ pointer to it and stores its key in *FOUND; otherwise, returns
+ a null pointer and does not modify *FOUND. */
+static void *
+do_scan (struct sparse_array *spar, union pointer *p, int level,
+ unsigned long int start, unsigned long int *found)
+{
+ return (level == 0
+ ? scan_leaf (spar, p->leaf, start, found)
+ : scan_internal_node (spar, p->internal, level, start, found));
+}
+
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Sparse array data structure.
+
+ Implements a dictionary that associates a "unsigned long int"
+ key with fixed-size values (elements).
+
+ The implementation allocates elements in groups of moderate
+ size, so it achieves maximum space efficiency when elements
+ are clustered into groups of consecutive keys. For the same
+ reason, elements should be kept relatively small, perhaps a
+ few pointer elements in size.
+
+ The implementation is slightly more efficient both in time and
+ space when indexes are kept small. Thus, for example, if the
+ indexes in use start from some fixed base value, consider
+ using the offset from that base as the index value. */
+
+#ifndef LIBPSPP_SPARSE_ARRAY_H
+#define LIBPSPP_SPARSE_ARRAY_H 1
+
+#include <stddef.h>
+#include <stdbool.h>
+
+#include <libpspp/hash.h>
+
+struct sparse_array *sparse_array_create (size_t elem_size);
+struct sparse_array *sparse_array_create_pool (struct pool *,
+ size_t elem_size);
+void sparse_array_destroy (struct sparse_array *);
+
+unsigned long int sparse_array_count (const struct sparse_array *);
+
+void *sparse_array_insert (struct sparse_array *, unsigned long int key);
+void *sparse_array_get (const struct sparse_array *, unsigned long int key);
+bool sparse_array_remove (struct sparse_array *, unsigned long int key);
+
+void *sparse_array_scan (const struct sparse_array *,
+ unsigned long int *skip,
+ unsigned long int *key);
+
+#endif /* libpspp/sparse-array.h */
+2007-03-25 Ben Pfaff <blp@gnu.org>
+
+ * automake.mk: Add tests/libpspp/sparse-array-test.
+
+ * libpspp/sparse-array-test.c: New test.
+
2007-03-18 Ben Pfaff <blp@gnu.org>
* automake.mk: Don't try to distribute tests that are compiled
tests/libpspp/ll-test \
tests/libpspp/llx-test \
tests/libpspp/heap-test \
- tests/libpspp/abt-test
+ tests/libpspp/abt-test \
+ tests/libpspp/sparse-array-test
TESTS = $(dist_TESTS) $(nodist_TESTS)
check_PROGRAMS += \
- tests/libpspp/ll-test \
- tests/libpspp/llx-test \
- tests/formats/inexactify \
- tests/libpspp/heap-test \
- tests/libpspp/abt-test
+ $(nodist_TESTS) \
+ tests/formats/inexactify
tests_libpspp_ll_test_SOURCES = \
src/libpspp/ll.c \
tests_libpspp_abt_test_LDADD = gl/libgl.la
tests_libpspp_abt_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10
+tests_libpspp_sparse_array_test_SOURCES = \
+ src/libpspp/sparse-array.c \
+ src/libpspp/sparse-array.h \
+ src/libpspp/pool.c \
+ src/libpspp/pool.h \
+ tests/libpspp/sparse-array-test.c
+tests_libpspp_sparse_array_test_LDADD = gl/libgl.la
+tests_libpspp_sparse_array_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10
+
tests_formats_inexactify_SOURCES = tests/formats/inexactify.c
EXTRA_DIST += \
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* This is a test program for the sparse array routines defined
+ in sparse-array.c. This test program aims to be as
+ comprehensive as possible. "gcov -b" should report 100%
+ coverage of lines and branches in sparse-array.c when compiled
+ with -DNDEBUG. "valgrind --leak-check=yes
+ --show-reachable=yes" should give a clean report. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <libpspp/sparse-array.h>
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+\f
+/* Support preliminaries. */
+#if __GNUC__ >= 2 && !defined UNUSED
+#define UNUSED __attribute__ ((unused))
+#else
+#define UNUSED
+#endif
+
+/* Currently running test. */
+static const char *test_name;
+
+/* Exit with a failure code.
+ (Place a breakpoint on this function while debugging.) */
+static void
+check_die (void)
+{
+ exit (EXIT_FAILURE);
+}
+
+/* If OK is not true, prints a message about failure on the
+ current source file and the given LINE and terminates. */
+static void
+check_func (bool ok, int line)
+{
+ if (!ok)
+ {
+ printf ("%s:%d: Check failed in %s test\n",
+ __FILE__, line, test_name);
+ check_die ();
+ }
+}
+
+/* Verifies that EXPR evaluates to true.
+ If not, prints a message citing the calling line number and
+ terminates. */
+#define check(EXPR) check_func ((EXPR), __LINE__)
+
+/* Prints a message about memory exhaustion and exits with a
+ failure code. */
+static void
+xalloc_die (void)
+{
+ printf ("virtual memory exhausted\n");
+ exit (EXIT_FAILURE);
+}
+
+/* Allocates and returns N bytes of memory. */
+static void *
+xmalloc (size_t n)
+{
+ if (n != 0)
+ {
+ void *p = malloc (n);
+ if (p == NULL)
+ xalloc_die ();
+
+ return p;
+ }
+ else
+ return NULL;
+}
+
+/* Returns a malloc()'d duplicate of the N bytes starting at
+ P. */
+static void *
+xmemdup (const void *p, size_t n)
+{
+ void *q = xmalloc (n);
+ memcpy (q, p, n);
+ return q;
+}
+
+/* Allocates and returns N * M bytes of memory. */
+static void *
+xnmalloc (size_t n, size_t m)
+{
+ if ((size_t) -1 / m <= n)
+ xalloc_die ();
+ return xmalloc (n * m);
+}
+\f
+/* Compares A and B and returns a strcmp-type return value. */
+static int
+compare_unsigned_longs_noaux (const void *a_, const void *b_)
+{
+ const unsigned long *a = a_;
+ const unsigned long *b = b_;
+
+ return *a < *b ? -1 : *a > *b;
+}
+
+/* Checks that SPAR contains the CNT ints in DATA, that its
+ structure is correct, and that certain operations on SPAR
+ produce the expected results. */
+static void
+check_sparse_array (struct sparse_array *spar,
+ const unsigned long data[], size_t cnt)
+{
+ unsigned long idx;
+ unsigned long *order;
+ unsigned long *p;
+ size_t i;
+
+ check (sparse_array_count (spar) == cnt);
+
+ for (i = 0; i < cnt; i++)
+ {
+ p = sparse_array_get (spar, data[i]);
+ check (p != NULL);
+ check (*p == data[i]);
+ }
+
+ order = xmemdup (data, cnt * sizeof *data);
+ qsort (order, cnt, sizeof *order, compare_unsigned_longs_noaux);
+
+ for (i = 0; i < cnt; i++)
+ {
+ p = sparse_array_get (spar, order[i]);
+ check (p != NULL);
+ check (*p == order[i]);
+ }
+
+ if (cnt > 0 && order[0] - 1 != order[cnt - 1])
+ {
+ check (sparse_array_get (spar, order[0] - 1) == NULL);
+ check (!sparse_array_remove (spar, order[0] - 1));
+ }
+ if (cnt > 0 && order[0] != order[cnt - 1] + 1)
+ {
+ check (sparse_array_get (spar, order[cnt - 1] + 1) == NULL);
+ check (!sparse_array_remove (spar, order[cnt - 1] + 1));
+ }
+
+ for (i = 0, p = sparse_array_scan (spar, NULL, &idx); i < cnt;
+ i++, p = sparse_array_scan (spar, &idx, &idx))
+ {
+ check (p != NULL);
+ check (idx == order[i]);
+ check (*p == order[i]);
+ }
+ check (p == NULL);
+
+ free (order);
+}
+
+/* Inserts the CNT values from 0 to CNT - 1 (inclusive) into a
+ sparse array in the order specified by INSERTIONS, then
+ deletes them in the order specified by DELETIONS, checking the
+ array's contents for correctness after each operation. */
+static void
+test_insert_delete (const unsigned long insertions[],
+ const unsigned long deletions[],
+ size_t cnt)
+{
+ struct sparse_array *spar;
+ size_t i;
+
+ spar = sparse_array_create (sizeof *insertions);
+ for (i = 0; i < cnt; i++)
+ {
+ unsigned long *p = sparse_array_insert (spar, insertions[i]);
+ *p = insertions[i];
+ check_sparse_array (spar, insertions, i + 1);
+ }
+ for (i = 0; i < cnt; i++)
+ {
+ bool deleted = sparse_array_remove (spar, deletions[i]);
+ check (deleted);
+ check_sparse_array (spar, deletions + i + 1, cnt - (i + 1));
+ }
+ check_sparse_array (spar, NULL, 0);
+ sparse_array_destroy (spar);
+}
+
+/* Inserts the CNT values from 0 to CNT - 1 (inclusive) into a
+ sparse array in the order specified by INSERTIONS, then
+ destroys the sparse array, to check that sparse_cases_destroy
+ properly frees all the nodes. */
+static void
+test_destroy (const unsigned long insertions[], size_t cnt)
+{
+ struct sparse_array *spar;
+ size_t i;
+
+ spar = sparse_array_create (sizeof *insertions);
+ for (i = 0; i < cnt; i++)
+ {
+ unsigned long *p = sparse_array_insert (spar, insertions[i]);
+ *p = insertions[i];
+ check_sparse_array (spar, insertions, i + 1);
+ }
+ sparse_array_destroy (spar);
+}
+
+/* Randomly shuffles the CNT elements in ARRAY, each of which is
+ SIZE bytes in size. */
+static void
+random_shuffle (void *array_, size_t cnt, size_t size)
+{
+ char *array = array_;
+ char *tmp = xmalloc (size);
+ size_t i;
+
+ for (i = 0; i < cnt; i++)
+ {
+ size_t j = rand () % (cnt - i) + i;
+ if (i != j)
+ {
+ memcpy (tmp, array + j * size, size);
+ memcpy (array + j * size, array + i * size, size);
+ memcpy (array + i * size, tmp, size);
+ }
+ }
+
+ free (tmp);
+}
+\f
+/* Tests inserting and deleting elements whose values are
+ determined by starting from various offsets and skipping
+ across various strides, and doing so in various orders. */
+static void
+test_insert_delete_strides (void)
+{
+ static const unsigned long strides[] =
+ {
+ 1, 2, 4, 16, 64, 4096, 262144, 16777216,
+ 3, 5, 17, 67, 4099, 262147, 16777259,
+ };
+ const size_t stride_cnt = sizeof strides / sizeof *strides;
+
+ static const unsigned long offsets[] =
+ {
+ 0,
+ 1024ul * 1024 + 1,
+ 1024ul * 1024 * 512 + 23,
+ ULONG_MAX - 59,
+ };
+ const size_t offset_cnt = sizeof offsets / sizeof *offsets;
+
+ int cnt = 100;
+ unsigned long *insertions, *deletions;
+ const unsigned long *stride, *offset;
+
+ insertions = xnmalloc (cnt, sizeof *insertions);
+ deletions = xnmalloc (cnt, sizeof *deletions);
+ for (stride = strides; stride < strides + stride_cnt; stride++)
+ {
+ for (offset = offsets; offset < offsets + offset_cnt; offset++)
+ {
+ int k;
+
+ for (k = 0; k < cnt; k++)
+ insertions[k] = *stride * k + *offset;
+
+ test_insert_delete (insertions, insertions, cnt);
+ test_destroy (insertions, cnt);
+
+ for (k = 0; k < cnt; k++)
+ deletions[k] = insertions[cnt - k - 1];
+ test_insert_delete (insertions, deletions, cnt);
+
+ random_shuffle (insertions, cnt, sizeof *insertions);
+ test_insert_delete (insertions, insertions, cnt);
+ test_insert_delete (insertions, deletions, cnt);
+ }
+ putchar ('.');
+ fflush (stdout);
+ }
+ free (insertions);
+ free (deletions);
+}
+
+/* Returns the index in ARRAY of the (CNT+1)th element that has
+ the TARGET value. */
+static int
+scan_bools (bool target, bool array[], size_t cnt)
+{
+ size_t i;
+
+ for (i = 0; ; i++)
+ if (array[i] == target && cnt-- == 0)
+ return i;
+}
+
+/* Performs a random sequence of insertions and deletions in a
+ sparse array. */
+static void
+test_random_insert_delete (void)
+{
+ unsigned long int values[] =
+ {
+ 0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
+ 8192, 16384, 32768, 65536, 131072, 262144, 4194304, 8388608,
+ 16777216, 33554432, 67108864, 134217728, 268435456, 536870912,
+ 1073741824, 2147483648,
+
+ 3, 7, 15, 31, 63, 127, 257, 511, 1023, 2047, 4095,
+ 8191, 16383, 32767, 65535, 131071, 262143, 4194303, 8388607,
+ 16777215, 33554431, 67108863, 134217727, 268435455, 536870911,
+ 1073741823, 2147483647, 4294967295,
+ };
+ const int max_values = sizeof values / sizeof *values;
+
+ const int num_actions = 250000;
+ struct sparse_array *spar;
+ bool *has_values;
+ int cnt;
+ int insert_chance;
+ int i;
+
+ has_values = xnmalloc (max_values, sizeof *has_values);
+ memset (has_values, 0, max_values * sizeof *has_values);
+
+ cnt = 0;
+ insert_chance = 5;
+
+ spar = sparse_array_create (sizeof *values);
+ for (i = 0; i < num_actions; i++)
+ {
+ enum { INSERT, DELETE } action;
+ unsigned long *p;
+ int j;
+
+ if (cnt == 0)
+ {
+ action = INSERT;
+ if (insert_chance < 9)
+ insert_chance++;
+ }
+ else if (cnt == max_values)
+ {
+ action = DELETE;
+ if (insert_chance > 0)
+ insert_chance--;
+ }
+ else
+ action = rand () % 10 < insert_chance ? INSERT : DELETE;
+
+ if (action == INSERT)
+ {
+ int ins_index;
+
+ ins_index = scan_bools (false, has_values,
+ rand () % (max_values - cnt));
+ assert (has_values[ins_index] == false);
+ has_values[ins_index] = true;
+
+ p = sparse_array_insert (spar, values[ins_index]);
+ check (p != NULL);
+ *p = values[ins_index];
+
+ cnt++;
+ }
+ else if (action == DELETE)
+ {
+ int del_index;
+
+ del_index = scan_bools (true, has_values, rand () % cnt);
+ assert (has_values[del_index] == true);
+ has_values[del_index] = false;
+
+ check (sparse_array_remove (spar, values[del_index]));
+ cnt--;
+ }
+ else
+ abort ();
+
+ check (sparse_array_count (spar) == cnt);
+ for (j = 0; j < max_values; j++)
+ {
+ p = sparse_array_get (spar, values[j]);
+ if (has_values[j])
+ {
+ check (p != NULL);
+ check (*p == values[j]);
+ }
+ else
+ {
+ check (p == NULL);
+ if (rand () % 10 == 0)
+ sparse_array_remove (spar, values[j]);
+ }
+ }
+ }
+ sparse_array_destroy (spar);
+ free (has_values);
+}
+
+/* Main program. */
+
+/* Runs TEST_FUNCTION and prints a message about NAME. */
+static void
+run_test (void (*test_function) (void), const char *name)
+{
+ test_name = name;
+ putchar ('.');
+ fflush (stdout);
+ test_function ();
+}
+
+int
+main (void)
+{
+ run_test (test_random_insert_delete,
+ "random insertions and deletions");
+ run_test (test_insert_delete_strides,
+ "insert in ascending order with strides and offset");
+ putchar ('\n');
+
+ return 0;
+}