From: Ben Pfaff Date: Sun, 25 Mar 2007 22:13:06 +0000 (+0000) Subject: Add sparse array data structure. X-Git-Tag: v0.6.0~519 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c97f6581b874c6057dbd4d581552774a3ed56ac5;p=pspp-builds.git Add sparse array data structure. Patch #5812. Thanks to John Darrington for review. --- diff --git a/src/libpspp/ChangeLog b/src/libpspp/ChangeLog index f363ec98..e029f4e3 100644 --- a/src/libpspp/ChangeLog +++ b/src/libpspp/ChangeLog @@ -1,3 +1,15 @@ +2007-03-25 Ben Pfaff + + * automake.mk (src_libpspp_libpspp_a_SOURCES): Add + sparse-array.[ch]. + + * pool.c (pool_zalloc): New function. + (pool_calloc): New function. + + * sparse-array.c: New file. + + * sparse-array.h: New file. + Mon Mar 5 20:55:49 CET 2007 John Darrington * i18n.c: Cast second argument of iconv using ICONV_CONST diff --git a/src/libpspp/automake.mk b/src/libpspp/automake.mk index d9786900..fd0c8632 100644 --- a/src/libpspp/automake.mk +++ b/src/libpspp/automake.mk @@ -44,6 +44,8 @@ src_libpspp_libpspp_a_SOURCES = \ src/libpspp/misc.h \ src/libpspp/pool.c \ src/libpspp/pool.h \ + src/libpspp/sparse-array.c \ + src/libpspp/sparse-array.h \ src/libpspp/syntax-gen.c \ src/libpspp/syntax-gen.h \ src/libpspp/message.c \ diff --git a/src/libpspp/pool.c b/src/libpspp/pool.c index 141d8ab4..c39852dc 100644 --- a/src/libpspp/pool.c +++ b/src/libpspp/pool.c @@ -472,6 +472,34 @@ pool_nmalloc (struct pool *pool, size_t n, size_t s) return pool_malloc (pool, n * s); } +/* Allocates AMT bytes using malloc(), to be managed by POOL, + zeros the block, and returns a pointer to the beginning of the + block. + If POOL is a null pointer, then allocates a normal memory block + with xmalloc(). */ +void * +pool_zalloc (struct pool *pool, size_t amt) +{ + void *p = pool_malloc (pool, amt); + memset (p, 0, amt); + return p; +} + +/* Allocates and returns N elements of S bytes each, to be + managed by POOL, and zeros the block. + If POOL is a null pointer, then allocates a normal memory block + with malloc(). + N must be nonnegative, S must be positive. + Terminates the program if the memory cannot be obtained, + including the case where N * S overflows the range of size_t. */ +void * +pool_calloc (struct pool *pool, size_t n, size_t s) +{ + void *p = pool_nmalloc (pool, n, s); + memset (p, 0, n * s); + return p; +} + /* Changes the allocation size of the specified memory block P managed by POOL to AMT bytes and returns a pointer to the beginning of the block. diff --git a/src/libpspp/pool.h b/src/libpspp/pool.h index 0fba4a60..bc60e3a7 100644 --- a/src/libpspp/pool.h +++ b/src/libpspp/pool.h @@ -72,6 +72,8 @@ char *pool_asprintf (struct pool *, const char *, ...) /* Standard allocation routines. */ void *pool_malloc (struct pool *, size_t) MALLOC_LIKE; void *pool_nmalloc (struct pool *, size_t n, size_t s) MALLOC_LIKE; +void *pool_zalloc (struct pool *, size_t) MALLOC_LIKE; +void *pool_calloc (struct pool *, size_t n, size_t s) MALLOC_LIKE; void *pool_realloc (struct pool *, void *, size_t); void *pool_nrealloc (struct pool *, void *, size_t n, size_t s); void *pool_2nrealloc (struct pool *, void *, size_t *pn, size_t s); diff --git a/src/libpspp/sparse-array.c b/src/libpspp/sparse-array.c new file mode 100644 index 00000000..2a0a206e --- /dev/null +++ b/src/libpspp/sparse-array.c @@ -0,0 +1,624 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#include + +#include + +#include +#include + +#include +#include +#include + +/* Sparse array data structure. + + The sparse array is implemented in terms of a "radix tree", a + multiway tree in which a set of bits drawn from the key + determine the child chosen at each level during a search. The + most-significant bits determine a child of the root, the next + bits determine a child of that child, and so on, until the + least-significant bits determine a leaf node. + + In this implementation, the branching factor at each level is + held constant at 2**BITS_PER_LEVEL. The tree is only made as + tall as need be for the currently largest key, and nodes that + would be entirely empty are not allocated at all. The + elements are stored in the leaf nodes. */ + +/* Number of bits from the key used as the index at each level. */ +#define BITS_PER_LEVEL 5 + +/* Branching factor. */ +#define PTRS_PER_LEVEL (1u << BITS_PER_LEVEL) + +/* Maximum height. */ +#define LONG_BITS (sizeof (unsigned long int) * CHAR_BIT) +#define MAX_HEIGHT DIV_RND_UP (LONG_BITS, BITS_PER_LEVEL) + +/* Bit-mask for index. */ +#define LEVEL_MASK ((1ul << BITS_PER_LEVEL) - 1) + +/* Pointer to an internal node or a leaf node. + Pointers in internal nodes at level 1 point to leaf nodes; + other pointers point to internal nodes. */ +union pointer + { + struct internal_node *internal; + struct leaf_node *leaf; + }; + +/* A sparse array. */ +struct sparse_array + { + struct pool *pool; /* Pool used for allocations. */ + size_t elem_size; /* Element size, rounded for alignment. */ + unsigned long count; /* Number of elements in tree. */ + + /* Radix tree. */ + union pointer root; /* Root of tree. */ + int height; /* 0=empty tree; + 1=root points to leaf, + 2=root points to internal node + that points to leaves, + and so on. */ + + /* Cache for speeding up access. */ + unsigned long int cache_ofs; /* Group of keys that cache points to, + shifted right BITS_PER_LEVEL bits; + ULONG_MAX for empty cache. */ + struct leaf_node *cache; /* Cached leaf node, or a null + pointer for a negative cache. */ + }; + +/* An internal node in the radix tree. */ +struct internal_node + { + int count; /* Number of nonnul children. */ + union pointer down[PTRS_PER_LEVEL]; /* Children. */ + }; + +/* A leaf node in the radix tree. */ +struct leaf_node + { + /* Bit-vector of elements that are in use. */ + unsigned long int in_use[DIV_RND_UP (PTRS_PER_LEVEL, LONG_BITS)]; + /* element_type elements[PTRS_PER_LEVEL]; */ + }; + +/* Returns SIZE rounded up to a safe alignment. */ +#define ALIGN_SIZE(SIZE) ROUND_UP (SIZE, sizeof (long int)) + +/* Returns the size of EXPR_OR_TYPE rounded up to a safe + alignment. */ +#define SIZEOF_ALIGNED(EXPR_OR_TYPE) ALIGN_SIZE (sizeof (EXPR_OR_TYPE)) + +static inline bool index_in_range (const struct sparse_array *, + unsigned long int); +static inline bool is_in_use (const struct leaf_node *, unsigned int); +static inline bool any_in_use (const struct leaf_node *); +static inline void set_in_use (struct leaf_node *, unsigned int); +static inline void unset_in_use (struct leaf_node *, unsigned int); +static inline int scan_in_use (struct leaf_node *, unsigned int); +static inline void *leaf_element (const struct sparse_array *, + struct leaf_node *, unsigned int); +static inline size_t leaf_size (const struct sparse_array *); + +static struct leaf_node *find_leaf_node (const struct sparse_array *, + unsigned long int); +static void decrease_height (struct sparse_array *); +static void *scan_leaf (struct sparse_array *, struct leaf_node *, + unsigned long int, unsigned long int *); +static void *do_scan (struct sparse_array *, union pointer *, int, + unsigned long int, unsigned long int *); + +/* Creates and returns a new sparse array that will contain + elements that are ELEM_SIZE bytes in size. */ +struct sparse_array * +sparse_array_create (size_t elem_size) +{ + return sparse_array_create_pool (NULL, elem_size); +} + +/* Creates and returns a new sparse array that will contain + elements that are ELEM_SIZE bytes in size. Data in the sparse + array will be allocated from POOL, which may be null. */ +struct sparse_array * +sparse_array_create_pool (struct pool *pool, size_t elem_size) +{ + struct sparse_array *spar = pool_malloc (pool, sizeof *spar); + spar->pool = pool; + spar->elem_size = ALIGN_SIZE (elem_size); + spar->height = 0; + spar->root.leaf = NULL; + spar->count = 0; + spar->cache_ofs = ULONG_MAX; + return spar; +} + +/* Destroys SPAR node pointed to by P at the given LEVEL. */ +static void +do_destroy (struct sparse_array *spar, union pointer *p, int level) +{ + if (level > 0) + { + struct internal_node *node = p->internal; + int count = node->count; + int i; + + for (i = 0; ; i++) + { + union pointer *q = &p->internal->down[i]; + if (level > 1 ? q->internal != NULL : q->leaf != NULL) + { + do_destroy (spar, q, level - 1); + if (--count == 0) + break; + } + } + pool_free (spar->pool, p->internal); + } + else if (level == 0) + pool_free (spar->pool, p->leaf); +} + +/* Destroys SPAR. + Any elements in SPAR are deallocated. Thus, if per-element + destruction is necessary, it should be done before destroying + the sparse array. */ +void +sparse_array_destroy (struct sparse_array *spar) +{ + do_destroy (spar, &spar->root, spar->height - 1); + pool_free (spar->pool, spar); +} + +/* Returns the number of elements in SPAR. */ +unsigned long int +sparse_array_count (const struct sparse_array *spar) +{ + return spar->count; +} + +/* Increases SPAR's height by 1, allowing it to hold + PTRS_PER_LEVEL times more elements. */ +static void +increase_height (struct sparse_array *spar) +{ + assert (spar->height < MAX_HEIGHT); + spar->height++; + if (spar->height == 1) + spar->root.leaf = pool_zalloc (spar->pool, leaf_size (spar)); + else + { + struct internal_node *new_root; + new_root = pool_zalloc (spar->pool, sizeof *new_root); + new_root->count = 1; + new_root->down[0] = spar->root; + spar->root.internal = new_root; + } +} + +/* Finds the leaf node in SPAR that contains the element for KEY. + SPAR must be tall enough to hold KEY. + Creates the leaf if it doesn't already exist. */ +static struct leaf_node * +create_leaf_node (struct sparse_array *spar, unsigned long int key) +{ + union pointer *p; + int *count = NULL; + int level; + + assert (index_in_range (spar, key)); + + /* Short-circuit everything if KEY is in the leaf cache. */ + if (key >> BITS_PER_LEVEL == spar->cache_ofs && spar->cache != NULL) + return spar->cache; + + /* Descend through internal nodes. */ + p = &spar->root; + for (level = spar->height - 1; level > 0; level--) + { + if (p->internal == NULL) + { + p->internal = pool_zalloc (spar->pool, sizeof *p->internal); + ++*count; + } + + count = &p->internal->count; + p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK]; + } + + /* Create leaf if necessary. */ + if (p->leaf == NULL) + { + p->leaf = pool_zalloc (spar->pool, leaf_size (spar)); + ++*count; + } + + /* Update cache. */ + spar->cache = p->leaf; + spar->cache_ofs = key >> BITS_PER_LEVEL; + + return p->leaf; +} + +/* Inserts into SPAR an element with the given KEY, which must not + already exist in SPAR. + Returns the new element for the caller to initialize. */ +void * +sparse_array_insert (struct sparse_array *spar, unsigned long int key) +{ + struct leaf_node *leaf; + + while (!index_in_range (spar, key)) + increase_height (spar); + + spar->count++; + + leaf = create_leaf_node (spar, key); + assert (!is_in_use (leaf, key)); + set_in_use (leaf, key); + return leaf_element (spar, leaf, key); +} + +/* Finds and returns the element in SPAR with the given KEY. + Returns a null pointer if KEY does not exist in SPAR. */ +void * +sparse_array_get (const struct sparse_array *spar, unsigned long int key) +{ + if (index_in_range (spar, key)) + { + struct leaf_node *leaf = find_leaf_node (spar, key); + if (leaf != NULL && is_in_use (leaf, key)) + return leaf_element (spar, leaf, key); + } + return NULL; +} + +/* Removes the element with the given KEY from SPAR. + Returns true if an element was removed, false if SPAR hadn't + contained an element with the given KEY. + + If elements need to be destructed, then the caller should have + already taken care of it before calling this function; the + element's content must be considered freed and of + indeterminate value after it is removed. */ +bool +sparse_array_remove (struct sparse_array *spar, unsigned long int key) +{ + union pointer *path[MAX_HEIGHT], **last; + struct leaf_node *leaf; + union pointer *p; + int level; + + if (!index_in_range (spar, key)) + return false; + + /* Find and free element in leaf. */ + leaf = find_leaf_node (spar, key); + if (leaf == NULL || !is_in_use (leaf, key)) + return false; +#if ASSERT_LEVEL >= 10 + memset (leaf_element (spar, leaf, key), 0xcc, spar->elem_size); +#endif + unset_in_use (leaf, key); + spar->count--; + if (any_in_use (leaf)) + return true; + + /* The leaf node is empty. + Retrace the path of internal nodes traversed to the leaf. */ + p = &spar->root; + last = path; + for (level = spar->height - 1; level > 0; level--) + { + *last++ = p; + p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK]; + } + + /* Free the leaf node and prune it from the tree. */ + spar->cache_ofs = ULONG_MAX; + pool_free (spar->pool, leaf); + p->leaf = NULL; + + /* Update counts in the internal nodes above the leaf. + Free any internal nodes that become empty. */ + while (last > path) + { + p = *--last; + if (--p->internal->count > 0) + { + if (p == &spar->root) + decrease_height (spar); + return true; + } + + pool_free (spar->pool, p->internal); + p->internal = NULL; + } + spar->height = 0; + return true; +} + +/* Scans SPAR in increasing order of keys for in-use elements. + If SKIP is NULL, the scan starts from key 0; + otherwise, it starts just after key *SKIP. + If an element is found, returns it and stores the element's + key into *FOUND; otherwise, returns a null pointer and does + not modify *FOUND. */ +void * +sparse_array_scan (const struct sparse_array *spar_, unsigned long int *skip, + unsigned long int *found) +{ + struct sparse_array *spar = (struct sparse_array *) spar_; + unsigned long int start; + + /* Find our starting point. */ + if (skip != NULL) + { + start = *skip + 1; + if (start == 0) + return NULL; + } + else + start = 0; + + /* Check the cache. */ + if (start >> BITS_PER_LEVEL == spar->cache_ofs) + { + void *p = scan_leaf (spar, spar->cache, start, found); + if (p) + return p; + start &= ~LEVEL_MASK; + start += PTRS_PER_LEVEL; + if (start == 0) + return NULL; + } + + /* Do the scan. */ + if (!index_in_range (spar, start)) + return NULL; + return do_scan (spar, &spar->root, spar->height - 1, start, found); +} + +/* Returns true iff KEY is in the range of keys currently + represented by SPAR. */ +static inline bool +index_in_range (const struct sparse_array *spar, unsigned long int key) +{ + return (spar->height == 0 ? false + : spar->height >= MAX_HEIGHT ? true + : key < (1ul << (spar->height * BITS_PER_LEVEL))); +} + +/* Returns true iff LEAF contains an in-use element with the + given KEY. */ +static inline bool +is_in_use (const struct leaf_node *leaf, unsigned int key) +{ + key &= LEVEL_MASK; + return (leaf->in_use[key / LONG_BITS] & (1ul << (key % LONG_BITS))) != 0; +} + +/* Returns true iff LEAF contains any in-use elements. */ +static inline bool +any_in_use (const struct leaf_node *leaf) +{ + size_t i; + for (i = 0; i < sizeof leaf->in_use / sizeof *leaf->in_use; i++) + if (leaf->in_use[i]) + return true; + return false; +} + +/* Marks element KEY in LEAF as in-use. */ +static inline void +set_in_use (struct leaf_node *leaf, unsigned int key) +{ + key &= LEVEL_MASK; + leaf->in_use[key / LONG_BITS] |= 1ul << (key % LONG_BITS); +} + +/* Marks element KEY in LEAF as not in-use. */ +static inline void +unset_in_use (struct leaf_node *leaf, unsigned int key) +{ + key &= LEVEL_MASK; + leaf->in_use[key / LONG_BITS] &= ~(1ul << (key % LONG_BITS)); +} + +/* Returns the number of trailing 0-bits in X. + Undefined if X is zero. */ +static inline int +count_trailing_zeros (unsigned long int x) +{ + /* This algorithm is from _Hacker's Delight_ section 5.4. */ + int n = 1; + +#define COUNT_STEP(BITS) \ + if (!(x & ((1ul << (BITS)) - 1))) \ + { \ + n += BITS; \ + x >>= BITS; \ + } + +#if ULONG_MAX >> 31 >> 31 >> 2 + COUNT_STEP (64); +#endif +#if ULONG_MAX >> 31 >> 1 + COUNT_STEP (32); +#endif + COUNT_STEP (16); + COUNT_STEP (8); + COUNT_STEP (4); + COUNT_STEP (2); + + return n - (x & 1); +} + +/* Returns the least index of the in-use element in LEAF greater + than or equal to IDX. */ +static inline int +scan_in_use (struct leaf_node *leaf, unsigned int idx) +{ + for (; idx < PTRS_PER_LEVEL; idx = (idx & ~(LONG_BITS - 1)) + LONG_BITS) + { + int ofs = idx % LONG_BITS; + unsigned long int in_use = leaf->in_use[idx / LONG_BITS] >> ofs; + if (!in_use) + continue; + return count_trailing_zeros (in_use) + idx; + } + return -1; +} + +/* Returns the address of element with the given KEY in LEAF, + which is a node in SPAR. */ +static inline void * +leaf_element (const struct sparse_array *spar, struct leaf_node *leaf, + unsigned int key) +{ + key &= LEVEL_MASK; + return (char *) leaf + SIZEOF_ALIGNED (*leaf) + (spar->elem_size * key); +} + +/* Returns the size of a leaf node in SPAR. */ +static inline size_t +leaf_size (const struct sparse_array *spar) +{ + return SIZEOF_ALIGNED (struct leaf_node) + spar->elem_size * PTRS_PER_LEVEL; +} + +/* Finds and returns the leaf node in SPAR that contains KEY. + Returns null if SPAR does not have a leaf node that contains + KEY. */ +static struct leaf_node * +find_leaf_node (const struct sparse_array *spar_, unsigned long int key) +{ + struct sparse_array *spar = (struct sparse_array *) spar_; + const union pointer *p; + int level; + + assert (index_in_range (spar, key)); + + /* Check the cache first. */ + if (key >> BITS_PER_LEVEL == spar->cache_ofs) + return spar->cache; + + /* Descend through internal nodes. */ + p = &spar->root; + for (level = spar->height - 1; level > 0; level--) + { + if (p->internal == NULL) + return NULL; + p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK]; + } + + /* Update cache. */ + spar->cache = p->leaf; + spar->cache_ofs = key >> BITS_PER_LEVEL; + + return p->leaf; +} + +/* Reduces SPAR's height to the minimum needed value by + eliminating levels that contain only a single entry for all + 0-bits. */ +static void +decrease_height (struct sparse_array *spar) +{ + while (spar->height > 1 + && spar->root.internal->count == 1 + && spar->root.internal->down[0].internal) + { + struct internal_node *p = spar->root.internal; + spar->height--; + spar->root = p->down[0]; + pool_free (spar->pool, p); + } +} + +/* Scans leaf node LEAF, which is in SPAR, for the in-use element + with the least key greater than or equal to START. If such an + element is found, returns a pointer to it and stores its key + in *FOUND; otherwise, returns a null pointer and does not + modify *FOUND. */ +static void * +scan_leaf (struct sparse_array *spar, struct leaf_node *leaf, + unsigned long int start, unsigned long int *found) +{ + int idx = scan_in_use (leaf, start & LEVEL_MASK); + if (idx >= 0) + { + *found = (start & ~LEVEL_MASK) | idx; + spar->cache = leaf; + spar->cache_ofs = *found >> BITS_PER_LEVEL; + return leaf_element (spar, leaf, idx); + } + + return NULL; +} + +/* Scans P, which is at LEVEL and within SPAR, and its subnodes, + for the in-use element with the least key greater than or + equal to START. If such an element is found, returns a + pointer to it and stores its key in *FOUND; otherwise, returns + a null pointer and does not modify *FOUND. */ +static inline void * +scan_internal_node (struct sparse_array *spar, struct internal_node *node, + int level, unsigned long int start, + unsigned long int *found) +{ + int shift = level * BITS_PER_LEVEL; + int count = node->count; + int i; + + for (i = (start >> shift) & LEVEL_MASK; i < PTRS_PER_LEVEL; i++) + { + union pointer *q = &node->down[i]; + if (level > 1 ? q->internal != NULL : q->leaf != NULL) + { + void *element = do_scan (spar, q, level - 1, start, found); + if (element) + return element; + if (--count == 0) + return NULL; + } + + start &= ~((1ul << shift) - 1); + start += 1ul << shift; + } + return NULL; +} + +/* Scans P, which is at LEVEL and within SPAR, and its subnodes, + for the in-use element with the least key greater than or + equal to START. If such an element is found, returns a + pointer to it and stores its key in *FOUND; otherwise, returns + a null pointer and does not modify *FOUND. */ +static void * +do_scan (struct sparse_array *spar, union pointer *p, int level, + unsigned long int start, unsigned long int *found) +{ + return (level == 0 + ? scan_leaf (spar, p->leaf, start, found) + : scan_internal_node (spar, p->internal, level, start, found)); +} + diff --git a/src/libpspp/sparse-array.h b/src/libpspp/sparse-array.h new file mode 100644 index 00000000..f2e91a60 --- /dev/null +++ b/src/libpspp/sparse-array.h @@ -0,0 +1,58 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* Sparse array data structure. + + Implements a dictionary that associates a "unsigned long int" + key with fixed-size values (elements). + + The implementation allocates elements in groups of moderate + size, so it achieves maximum space efficiency when elements + are clustered into groups of consecutive keys. For the same + reason, elements should be kept relatively small, perhaps a + few pointer elements in size. + + The implementation is slightly more efficient both in time and + space when indexes are kept small. Thus, for example, if the + indexes in use start from some fixed base value, consider + using the offset from that base as the index value. */ + +#ifndef LIBPSPP_SPARSE_ARRAY_H +#define LIBPSPP_SPARSE_ARRAY_H 1 + +#include +#include + +#include + +struct sparse_array *sparse_array_create (size_t elem_size); +struct sparse_array *sparse_array_create_pool (struct pool *, + size_t elem_size); +void sparse_array_destroy (struct sparse_array *); + +unsigned long int sparse_array_count (const struct sparse_array *); + +void *sparse_array_insert (struct sparse_array *, unsigned long int key); +void *sparse_array_get (const struct sparse_array *, unsigned long int key); +bool sparse_array_remove (struct sparse_array *, unsigned long int key); + +void *sparse_array_scan (const struct sparse_array *, + unsigned long int *skip, + unsigned long int *key); + +#endif /* libpspp/sparse-array.h */ diff --git a/tests/ChangeLog b/tests/ChangeLog index 2a172f2e..a76dbc91 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,9 @@ +2007-03-25 Ben Pfaff + + * automake.mk: Add tests/libpspp/sparse-array-test. + + * libpspp/sparse-array-test.c: New test. + 2007-03-18 Ben Pfaff * automake.mk: Don't try to distribute tests that are compiled diff --git a/tests/automake.mk b/tests/automake.mk index 3fb24f6e..1296e28b 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -136,16 +136,14 @@ nodist_TESTS = \ tests/libpspp/ll-test \ tests/libpspp/llx-test \ tests/libpspp/heap-test \ - tests/libpspp/abt-test + tests/libpspp/abt-test \ + tests/libpspp/sparse-array-test TESTS = $(dist_TESTS) $(nodist_TESTS) check_PROGRAMS += \ - tests/libpspp/ll-test \ - tests/libpspp/llx-test \ - tests/formats/inexactify \ - tests/libpspp/heap-test \ - tests/libpspp/abt-test + $(nodist_TESTS) \ + tests/formats/inexactify tests_libpspp_ll_test_SOURCES = \ src/libpspp/ll.c \ @@ -175,6 +173,15 @@ tests_libpspp_abt_test_SOURCES = \ tests_libpspp_abt_test_LDADD = gl/libgl.la tests_libpspp_abt_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 +tests_libpspp_sparse_array_test_SOURCES = \ + src/libpspp/sparse-array.c \ + src/libpspp/sparse-array.h \ + src/libpspp/pool.c \ + src/libpspp/pool.h \ + tests/libpspp/sparse-array-test.c +tests_libpspp_sparse_array_test_LDADD = gl/libgl.la +tests_libpspp_sparse_array_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 + tests_formats_inexactify_SOURCES = tests/formats/inexactify.c EXTRA_DIST += \ diff --git a/tests/libpspp/sparse-array-test.c b/tests/libpspp/sparse-array-test.c new file mode 100644 index 00000000..419d0508 --- /dev/null +++ b/tests/libpspp/sparse-array-test.c @@ -0,0 +1,446 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* This is a test program for the sparse array routines defined + in sparse-array.c. This test program aims to be as + comprehensive as possible. "gcov -b" should report 100% + coverage of lines and branches in sparse-array.c when compiled + with -DNDEBUG. "valgrind --leak-check=yes + --show-reachable=yes" should give a clean report. */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include + +/* Support preliminaries. */ +#if __GNUC__ >= 2 && !defined UNUSED +#define UNUSED __attribute__ ((unused)) +#else +#define UNUSED +#endif + +/* Currently running test. */ +static const char *test_name; + +/* Exit with a failure code. + (Place a breakpoint on this function while debugging.) */ +static void +check_die (void) +{ + exit (EXIT_FAILURE); +} + +/* If OK is not true, prints a message about failure on the + current source file and the given LINE and terminates. */ +static void +check_func (bool ok, int line) +{ + if (!ok) + { + printf ("%s:%d: Check failed in %s test\n", + __FILE__, line, test_name); + check_die (); + } +} + +/* Verifies that EXPR evaluates to true. + If not, prints a message citing the calling line number and + terminates. */ +#define check(EXPR) check_func ((EXPR), __LINE__) + +/* Prints a message about memory exhaustion and exits with a + failure code. */ +static void +xalloc_die (void) +{ + printf ("virtual memory exhausted\n"); + exit (EXIT_FAILURE); +} + +/* Allocates and returns N bytes of memory. */ +static void * +xmalloc (size_t n) +{ + if (n != 0) + { + void *p = malloc (n); + if (p == NULL) + xalloc_die (); + + return p; + } + else + return NULL; +} + +/* Returns a malloc()'d duplicate of the N bytes starting at + P. */ +static void * +xmemdup (const void *p, size_t n) +{ + void *q = xmalloc (n); + memcpy (q, p, n); + return q; +} + +/* Allocates and returns N * M bytes of memory. */ +static void * +xnmalloc (size_t n, size_t m) +{ + if ((size_t) -1 / m <= n) + xalloc_die (); + return xmalloc (n * m); +} + +/* Compares A and B and returns a strcmp-type return value. */ +static int +compare_unsigned_longs_noaux (const void *a_, const void *b_) +{ + const unsigned long *a = a_; + const unsigned long *b = b_; + + return *a < *b ? -1 : *a > *b; +} + +/* Checks that SPAR contains the CNT ints in DATA, that its + structure is correct, and that certain operations on SPAR + produce the expected results. */ +static void +check_sparse_array (struct sparse_array *spar, + const unsigned long data[], size_t cnt) +{ + unsigned long idx; + unsigned long *order; + unsigned long *p; + size_t i; + + check (sparse_array_count (spar) == cnt); + + for (i = 0; i < cnt; i++) + { + p = sparse_array_get (spar, data[i]); + check (p != NULL); + check (*p == data[i]); + } + + order = xmemdup (data, cnt * sizeof *data); + qsort (order, cnt, sizeof *order, compare_unsigned_longs_noaux); + + for (i = 0; i < cnt; i++) + { + p = sparse_array_get (spar, order[i]); + check (p != NULL); + check (*p == order[i]); + } + + if (cnt > 0 && order[0] - 1 != order[cnt - 1]) + { + check (sparse_array_get (spar, order[0] - 1) == NULL); + check (!sparse_array_remove (spar, order[0] - 1)); + } + if (cnt > 0 && order[0] != order[cnt - 1] + 1) + { + check (sparse_array_get (spar, order[cnt - 1] + 1) == NULL); + check (!sparse_array_remove (spar, order[cnt - 1] + 1)); + } + + for (i = 0, p = sparse_array_scan (spar, NULL, &idx); i < cnt; + i++, p = sparse_array_scan (spar, &idx, &idx)) + { + check (p != NULL); + check (idx == order[i]); + check (*p == order[i]); + } + check (p == NULL); + + free (order); +} + +/* Inserts the CNT values from 0 to CNT - 1 (inclusive) into a + sparse array in the order specified by INSERTIONS, then + deletes them in the order specified by DELETIONS, checking the + array's contents for correctness after each operation. */ +static void +test_insert_delete (const unsigned long insertions[], + const unsigned long deletions[], + size_t cnt) +{ + struct sparse_array *spar; + size_t i; + + spar = sparse_array_create (sizeof *insertions); + for (i = 0; i < cnt; i++) + { + unsigned long *p = sparse_array_insert (spar, insertions[i]); + *p = insertions[i]; + check_sparse_array (spar, insertions, i + 1); + } + for (i = 0; i < cnt; i++) + { + bool deleted = sparse_array_remove (spar, deletions[i]); + check (deleted); + check_sparse_array (spar, deletions + i + 1, cnt - (i + 1)); + } + check_sparse_array (spar, NULL, 0); + sparse_array_destroy (spar); +} + +/* Inserts the CNT values from 0 to CNT - 1 (inclusive) into a + sparse array in the order specified by INSERTIONS, then + destroys the sparse array, to check that sparse_cases_destroy + properly frees all the nodes. */ +static void +test_destroy (const unsigned long insertions[], size_t cnt) +{ + struct sparse_array *spar; + size_t i; + + spar = sparse_array_create (sizeof *insertions); + for (i = 0; i < cnt; i++) + { + unsigned long *p = sparse_array_insert (spar, insertions[i]); + *p = insertions[i]; + check_sparse_array (spar, insertions, i + 1); + } + sparse_array_destroy (spar); +} + +/* Randomly shuffles the CNT elements in ARRAY, each of which is + SIZE bytes in size. */ +static void +random_shuffle (void *array_, size_t cnt, size_t size) +{ + char *array = array_; + char *tmp = xmalloc (size); + size_t i; + + for (i = 0; i < cnt; i++) + { + size_t j = rand () % (cnt - i) + i; + if (i != j) + { + memcpy (tmp, array + j * size, size); + memcpy (array + j * size, array + i * size, size); + memcpy (array + i * size, tmp, size); + } + } + + free (tmp); +} + +/* Tests inserting and deleting elements whose values are + determined by starting from various offsets and skipping + across various strides, and doing so in various orders. */ +static void +test_insert_delete_strides (void) +{ + static const unsigned long strides[] = + { + 1, 2, 4, 16, 64, 4096, 262144, 16777216, + 3, 5, 17, 67, 4099, 262147, 16777259, + }; + const size_t stride_cnt = sizeof strides / sizeof *strides; + + static const unsigned long offsets[] = + { + 0, + 1024ul * 1024 + 1, + 1024ul * 1024 * 512 + 23, + ULONG_MAX - 59, + }; + const size_t offset_cnt = sizeof offsets / sizeof *offsets; + + int cnt = 100; + unsigned long *insertions, *deletions; + const unsigned long *stride, *offset; + + insertions = xnmalloc (cnt, sizeof *insertions); + deletions = xnmalloc (cnt, sizeof *deletions); + for (stride = strides; stride < strides + stride_cnt; stride++) + { + for (offset = offsets; offset < offsets + offset_cnt; offset++) + { + int k; + + for (k = 0; k < cnt; k++) + insertions[k] = *stride * k + *offset; + + test_insert_delete (insertions, insertions, cnt); + test_destroy (insertions, cnt); + + for (k = 0; k < cnt; k++) + deletions[k] = insertions[cnt - k - 1]; + test_insert_delete (insertions, deletions, cnt); + + random_shuffle (insertions, cnt, sizeof *insertions); + test_insert_delete (insertions, insertions, cnt); + test_insert_delete (insertions, deletions, cnt); + } + putchar ('.'); + fflush (stdout); + } + free (insertions); + free (deletions); +} + +/* Returns the index in ARRAY of the (CNT+1)th element that has + the TARGET value. */ +static int +scan_bools (bool target, bool array[], size_t cnt) +{ + size_t i; + + for (i = 0; ; i++) + if (array[i] == target && cnt-- == 0) + return i; +} + +/* Performs a random sequence of insertions and deletions in a + sparse array. */ +static void +test_random_insert_delete (void) +{ + unsigned long int values[] = + { + 0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, + 8192, 16384, 32768, 65536, 131072, 262144, 4194304, 8388608, + 16777216, 33554432, 67108864, 134217728, 268435456, 536870912, + 1073741824, 2147483648, + + 3, 7, 15, 31, 63, 127, 257, 511, 1023, 2047, 4095, + 8191, 16383, 32767, 65535, 131071, 262143, 4194303, 8388607, + 16777215, 33554431, 67108863, 134217727, 268435455, 536870911, + 1073741823, 2147483647, 4294967295, + }; + const int max_values = sizeof values / sizeof *values; + + const int num_actions = 250000; + struct sparse_array *spar; + bool *has_values; + int cnt; + int insert_chance; + int i; + + has_values = xnmalloc (max_values, sizeof *has_values); + memset (has_values, 0, max_values * sizeof *has_values); + + cnt = 0; + insert_chance = 5; + + spar = sparse_array_create (sizeof *values); + for (i = 0; i < num_actions; i++) + { + enum { INSERT, DELETE } action; + unsigned long *p; + int j; + + if (cnt == 0) + { + action = INSERT; + if (insert_chance < 9) + insert_chance++; + } + else if (cnt == max_values) + { + action = DELETE; + if (insert_chance > 0) + insert_chance--; + } + else + action = rand () % 10 < insert_chance ? INSERT : DELETE; + + if (action == INSERT) + { + int ins_index; + + ins_index = scan_bools (false, has_values, + rand () % (max_values - cnt)); + assert (has_values[ins_index] == false); + has_values[ins_index] = true; + + p = sparse_array_insert (spar, values[ins_index]); + check (p != NULL); + *p = values[ins_index]; + + cnt++; + } + else if (action == DELETE) + { + int del_index; + + del_index = scan_bools (true, has_values, rand () % cnt); + assert (has_values[del_index] == true); + has_values[del_index] = false; + + check (sparse_array_remove (spar, values[del_index])); + cnt--; + } + else + abort (); + + check (sparse_array_count (spar) == cnt); + for (j = 0; j < max_values; j++) + { + p = sparse_array_get (spar, values[j]); + if (has_values[j]) + { + check (p != NULL); + check (*p == values[j]); + } + else + { + check (p == NULL); + if (rand () % 10 == 0) + sparse_array_remove (spar, values[j]); + } + } + } + sparse_array_destroy (spar); + free (has_values); +} + +/* Main program. */ + +/* Runs TEST_FUNCTION and prints a message about NAME. */ +static void +run_test (void (*test_function) (void), const char *name) +{ + test_name = name; + putchar ('.'); + fflush (stdout); + test_function (); +} + +int +main (void) +{ + run_test (test_random_insert_delete, + "random insertions and deletions"); + run_test (test_insert_delete_strides, + "insert in ascending order with strides and offset"); + putchar ('\n'); + + return 0; +}