-/* PSPP - computes sample statistics.
- Copyright (C) 2007 Free Software Foundation, Inc.
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2007, 2009 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include <libpspp/misc.h>
#include <libpspp/pool.h>
+#include "count-one-bits.h"
+#include "minmax.h"
+
/* Sparse array data structure.
The sparse array is implemented in terms of a "radix tree", a
/* Maximum height. */
#define LONG_BITS (sizeof (unsigned long int) * CHAR_BIT)
+#define LONG_MASK ((unsigned long int) LONG_BITS - 1)
#define MAX_HEIGHT DIV_RND_UP (LONG_BITS, BITS_PER_LEVEL)
/* Bit-mask for index. */
/* Pointer to an internal node or a leaf node.
Pointers in internal nodes at level 1 point to leaf nodes;
other pointers point to internal nodes. */
-union pointer
+union pointer
{
struct internal_node *internal;
struct leaf_node *leaf;
};
/* A sparse array. */
-struct sparse_array
+struct sparse_array
{
struct pool *pool; /* Pool used for allocations. */
size_t elem_size; /* Element size, rounded for alignment. */
/* An internal node in the radix tree. */
struct internal_node
{
- int count; /* Number of nonnul children. */
+ int count; /* Number of nonnull children. */
union pointer down[PTRS_PER_LEVEL]; /* Children. */
};
static inline bool any_in_use (const struct leaf_node *);
static inline void set_in_use (struct leaf_node *, unsigned int);
static inline void unset_in_use (struct leaf_node *, unsigned int);
-static inline int scan_in_use (struct leaf_node *, unsigned int);
static inline void *leaf_element (const struct sparse_array *,
struct leaf_node *, unsigned int);
static inline size_t leaf_size (const struct sparse_array *);
static struct leaf_node *find_leaf_node (const struct sparse_array *,
unsigned long int);
static void decrease_height (struct sparse_array *);
-static void *scan_leaf (struct sparse_array *, struct leaf_node *,
- unsigned long int, unsigned long int *);
-static void *do_scan (struct sparse_array *, union pointer *, int,
- unsigned long int, unsigned long int *);
+
+static int scan_in_use_forward (struct leaf_node *, unsigned int);
+static void *do_scan_forward (struct sparse_array *, union pointer *, int,
+ unsigned long int, unsigned long int *);
+static void *scan_forward (const struct sparse_array *,
+ unsigned long int start,
+ unsigned long int *found);
+
+static int scan_in_use_reverse (struct leaf_node *, unsigned int);
+static void *do_scan_reverse (struct sparse_array *, union pointer *, int,
+ unsigned long int, unsigned long int *);
+static void *scan_reverse (const struct sparse_array *,
+ unsigned long int start,
+ unsigned long int *found);
\f
/* Creates and returns a new sparse array that will contain
elements that are ELEM_SIZE bytes in size. */
struct sparse_array *
-sparse_array_create (size_t elem_size)
+sparse_array_create (size_t elem_size)
{
return sparse_array_create_pool (NULL, elem_size);
}
elements that are ELEM_SIZE bytes in size. Data in the sparse
array will be allocated from POOL, which may be null. */
struct sparse_array *
-sparse_array_create_pool (struct pool *pool, size_t elem_size)
+sparse_array_create_pool (struct pool *pool, size_t elem_size)
{
struct sparse_array *spar = pool_malloc (pool, sizeof *spar);
spar->pool = pool;
/* Destroys SPAR node pointed to by P at the given LEVEL. */
static void
-do_destroy (struct sparse_array *spar, union pointer *p, int level)
+do_destroy (struct sparse_array *spar, union pointer *p, int level)
{
- if (level > 0)
+ if (level > 0)
{
struct internal_node *node = p->internal;
int count = node->count;
int i;
- for (i = 0; ; i++)
+ for (i = 0; ; i++)
{
union pointer *q = &p->internal->down[i];
- if (level > 1 ? q->internal != NULL : q->leaf != NULL)
+ if (level > 1 ? q->internal != NULL : q->leaf != NULL)
{
do_destroy (spar, q, level - 1);
if (--count == 0)
- break;
+ break;
}
}
pool_free (spar->pool, p->internal);
destruction is necessary, it should be done before destroying
the sparse array. */
void
-sparse_array_destroy (struct sparse_array *spar)
+sparse_array_destroy (struct sparse_array *spar)
{
do_destroy (spar, &spar->root, spar->height - 1);
pool_free (spar->pool, spar);
/* Returns the number of elements in SPAR. */
unsigned long int
-sparse_array_count (const struct sparse_array *spar)
+sparse_array_count (const struct sparse_array *spar)
{
return spar->count;
}
/* Increases SPAR's height by 1, allowing it to hold
PTRS_PER_LEVEL times more elements. */
static void
-increase_height (struct sparse_array *spar)
+increase_height (struct sparse_array *spar)
{
assert (spar->height < MAX_HEIGHT);
spar->height++;
- if (spar->height == 1)
+ if (spar->height == 1)
spar->root.leaf = pool_zalloc (spar->pool, leaf_size (spar));
- else
+ else
{
struct internal_node *new_root;
new_root = pool_zalloc (spar->pool, sizeof *new_root);
p = &spar->root;
for (level = spar->height - 1; level > 0; level--)
{
- if (p->internal == NULL)
+ if (p->internal == NULL)
{
p->internal = pool_zalloc (spar->pool, sizeof *p->internal);
- ++*count;
+ ++*count;
}
count = &p->internal->count;
}
/* Create leaf if necessary. */
- if (p->leaf == NULL)
+ if (p->leaf == NULL)
{
p->leaf = pool_zalloc (spar->pool, leaf_size (spar));
++*count;
/* Inserts into SPAR an element with the given KEY, which must not
already exist in SPAR.
- Returns the new element for the caller to initialize. */
+ Returns the new element for the caller to initialize. */
void *
-sparse_array_insert (struct sparse_array *spar, unsigned long int key)
+sparse_array_insert (struct sparse_array *spar, unsigned long int key)
{
struct leaf_node *leaf;
-
+
while (!index_in_range (spar, key))
increase_height (spar);
/* Finds and returns the element in SPAR with the given KEY.
Returns a null pointer if KEY does not exist in SPAR. */
void *
-sparse_array_get (const struct sparse_array *spar, unsigned long int key)
+sparse_array_get (const struct sparse_array *spar, unsigned long int key)
{
- if (index_in_range (spar, key))
- {
- struct leaf_node *leaf = find_leaf_node (spar, key);
- if (leaf != NULL && is_in_use (leaf, key))
- return leaf_element (spar, leaf, key);
- }
+ struct leaf_node *leaf = find_leaf_node (spar, key);
+ if (leaf != NULL && is_in_use (leaf, key))
+ return leaf_element (spar, leaf, key);
return NULL;
}
element's content must be considered freed and of
indeterminate value after it is removed. */
bool
-sparse_array_remove (struct sparse_array *spar, unsigned long int key)
+sparse_array_remove (struct sparse_array *spar, unsigned long int key)
{
union pointer *path[MAX_HEIGHT], **last;
struct leaf_node *leaf;
union pointer *p;
int level;
-
- if (!index_in_range (spar, key))
- return false;
/* Find and free element in leaf. */
leaf = find_leaf_node (spar, key);
return true;
}
-/* Scans SPAR in increasing order of keys for in-use elements.
- If SKIP is NULL, the scan starts from key 0;
- otherwise, it starts just after key *SKIP.
- If an element is found, returns it and stores the element's
- key into *FOUND; otherwise, returns a null pointer and does
- not modify *FOUND. */
+/* Returns a pointer to the in-use element with the smallest
+ index and sets *IDXP to its index or, if SPAR has no in-use
+ elements, returns NULL without modifying *IDXP. */
void *
-sparse_array_scan (const struct sparse_array *spar_, unsigned long int *skip,
- unsigned long int *found)
+sparse_array_first (const struct sparse_array *spar, unsigned long int *idxp)
{
- struct sparse_array *spar = (struct sparse_array *) spar_;
- unsigned long int start;
+ return scan_forward (spar, 0, idxp);
+}
- /* Find our starting point. */
- if (skip != NULL)
- {
- start = *skip + 1;
- if (start == 0)
- return NULL;
- }
- else
- start = 0;
+/* Returns a pointer to the in-use element with the smallest
+ index greater than SKIP and sets *IDXP to its index or, if
+ SPAR has no in-use elements with index greater than SKIP,
+ returns NULL without modifying *IDXP. */
+void *
+sparse_array_next (const struct sparse_array *spar, unsigned long int skip,
+ unsigned long int *idxp)
+{
+ return skip < ULONG_MAX ? scan_forward (spar, skip + 1, idxp) : NULL;
+}
- /* Check the cache. */
- if (start >> BITS_PER_LEVEL == spar->cache_ofs)
- {
- void *p = scan_leaf (spar, spar->cache, start, found);
- if (p)
- return p;
- start &= ~LEVEL_MASK;
- start += PTRS_PER_LEVEL;
- if (start == 0)
- return NULL;
- }
+/* Returns a pointer to the in-use element with the greatest
+ index and sets *IDXP to its index or, if SPAR has no in-use
+ elements, returns NULL without modifying *IDXP. */
+void *
+sparse_array_last (const struct sparse_array *spar, unsigned long int *idxp)
+{
+ return scan_reverse (spar, ULONG_MAX, idxp);
+}
- /* Do the scan. */
- if (!index_in_range (spar, start))
- return NULL;
- return do_scan (spar, &spar->root, spar->height - 1, start, found);
+/* Returns a pointer to the in-use element with the greatest
+ index less than SKIP and sets *IDXP to its index or, if SPAR
+ has no in-use elements with index less than SKIP, returns NULL
+ without modifying *IDXP. */
+void *
+sparse_array_prev (const struct sparse_array *spar, unsigned long int skip,
+ unsigned long int *idxp)
+{
+ return skip > 0 ? scan_reverse (spar, skip - 1, idxp) : NULL;
}
+
\f
/* Returns true iff KEY is in the range of keys currently
represented by SPAR. */
static inline bool
-index_in_range (const struct sparse_array *spar, unsigned long int key)
+index_in_range (const struct sparse_array *spar, unsigned long int key)
{
return (spar->height == 0 ? false
: spar->height >= MAX_HEIGHT ? true
/* Returns true iff LEAF contains an in-use element with the
given KEY. */
static inline bool
-is_in_use (const struct leaf_node *leaf, unsigned int key)
+is_in_use (const struct leaf_node *leaf, unsigned int key)
{
key &= LEVEL_MASK;
return (leaf->in_use[key / LONG_BITS] & (1ul << (key % LONG_BITS))) != 0;
}
-/* Returns true iff LEAF contains any in-use elements. */
+/* Returns true iff LEAF contains any in-use elements. */
static inline bool
-any_in_use (const struct leaf_node *leaf)
+any_in_use (const struct leaf_node *leaf)
{
size_t i;
for (i = 0; i < sizeof leaf->in_use / sizeof *leaf->in_use; i++)
/* Marks element KEY in LEAF as in-use. */
static inline void
-set_in_use (struct leaf_node *leaf, unsigned int key)
+set_in_use (struct leaf_node *leaf, unsigned int key)
{
key &= LEVEL_MASK;
leaf->in_use[key / LONG_BITS] |= 1ul << (key % LONG_BITS);
/* Marks element KEY in LEAF as not in-use. */
static inline void
-unset_in_use (struct leaf_node *leaf, unsigned int key)
+unset_in_use (struct leaf_node *leaf, unsigned int key)
{
key &= LEVEL_MASK;
leaf->in_use[key / LONG_BITS] &= ~(1ul << (key % LONG_BITS));
/* Returns the number of trailing 0-bits in X.
Undefined if X is zero. */
static inline int
-count_trailing_zeros (unsigned long int x)
+count_trailing_zeros (unsigned long int x)
{
+#if __GNUC__ >= 4
+ return __builtin_ctzl (x);
+#else /* not GCC 4+ */
/* This algorithm is from _Hacker's Delight_ section 5.4. */
int n = 1;
COUNT_STEP (2);
return n - (x & 1);
+#endif /* not GCC 4+ */
}
/* Returns the least index of the in-use element in LEAF greater
- than or equal to IDX. */
-static inline int
-scan_in_use (struct leaf_node *leaf, unsigned int idx)
+ than or equal to IDX. Bits in IDX not in LEVEL_MASK are
+ considered to have value 0. */
+static int
+scan_in_use_forward (struct leaf_node *leaf, unsigned int idx)
{
- for (; idx < PTRS_PER_LEVEL; idx = (idx & ~(LONG_BITS - 1)) + LONG_BITS)
+ idx &= LEVEL_MASK;
+ for (; idx < PTRS_PER_LEVEL; idx = (idx & ~LONG_MASK) + LONG_BITS)
{
int ofs = idx % LONG_BITS;
unsigned long int in_use = leaf->in_use[idx / LONG_BITS] >> ofs;
if (!in_use)
continue;
- return count_trailing_zeros (in_use) + idx;
+ return idx + count_trailing_zeros (in_use);
}
return -1;
}
+/* Returns the number of leading 0-bits in X.
+ Undefined if X is zero. */
+static inline int
+count_leading_zeros (unsigned long int x)
+{
+#if __GNUC__ >= 4
+ return __builtin_clzl (x);
+#else
+ x |= x >> 1;
+ x |= x >> 2;
+ x |= x >> 4;
+ x |= x >> 8;
+ x |= x >> 16;
+#if ULONG_MAX >> 31 >> 1
+ x |= x >> 32;
+#endif
+#if ULONG_MAX >> 31 >> 31 >> 2
+ x |= x >> 64;
+#endif
+ return LONG_BITS - count_one_bits_l (x);
+#endif
+}
+
+/* Returns the greatest index of the in-use element in LEAF less
+ than or equal to IDX. Bits in IDX not in LEVEL_MASK are
+ considered to have value 0. */
+static int
+scan_in_use_reverse (struct leaf_node *leaf, unsigned int idx)
+{
+ idx &= LEVEL_MASK;
+ for (;;)
+ {
+ int ofs = idx % LONG_BITS;
+ unsigned long int in_use = leaf->in_use[idx / LONG_BITS] << (31 - ofs);
+ if (in_use)
+ return idx - count_leading_zeros (in_use);
+ if (idx < LONG_BITS)
+ return -1;
+ idx = (idx | LONG_MASK) - LONG_BITS;
+ }
+}
+
/* Returns the address of element with the given KEY in LEAF,
which is a node in SPAR. */
static inline void *
leaf_element (const struct sparse_array *spar, struct leaf_node *leaf,
- unsigned int key)
+ unsigned int key)
{
key &= LEVEL_MASK;
return (char *) leaf + SIZEOF_ALIGNED (*leaf) + (spar->elem_size * key);
}
+/* As leaf_element, returns the address of element with the given
+ KEY in LEAF, which is a node in SPAR. Also, updates SPAR's
+ cache to contain LEAF. */
+static void *
+cache_leaf_element (struct sparse_array *spar, struct leaf_node *leaf,
+ unsigned long int key)
+{
+ spar->cache = leaf;
+ spar->cache_ofs = key >> BITS_PER_LEVEL;
+ return leaf_element (spar, leaf, key);
+}
+
/* Returns the size of a leaf node in SPAR. */
static inline size_t
-leaf_size (const struct sparse_array *spar)
+leaf_size (const struct sparse_array *spar)
{
return SIZEOF_ALIGNED (struct leaf_node) + spar->elem_size * PTRS_PER_LEVEL;
}
const union pointer *p;
int level;
- assert (index_in_range (spar, key));
-
/* Check the cache first. */
if (key >> BITS_PER_LEVEL == spar->cache_ofs)
return spar->cache;
+ if (!index_in_range (spar, key))
+ return NULL;
+
/* Descend through internal nodes. */
p = &spar->root;
for (level = spar->height - 1; level > 0; level--)
/* Update cache. */
spar->cache = p->leaf;
spar->cache_ofs = key >> BITS_PER_LEVEL;
-
+
return p->leaf;
}
eliminating levels that contain only a single entry for all
0-bits. */
static void
-decrease_height (struct sparse_array *spar)
+decrease_height (struct sparse_array *spar)
{
while (spar->height > 1
&& spar->root.internal->count == 1
- && spar->root.internal->down[0].internal)
+ && spar->root.internal->down[0].internal)
{
struct internal_node *p = spar->root.internal;
spar->height--;
}
}
-/* Scans leaf node LEAF, which is in SPAR, for the in-use element
- with the least key greater than or equal to START. If such an
- element is found, returns a pointer to it and stores its key
- in *FOUND; otherwise, returns a null pointer and does not
- modify *FOUND. */
-static void *
-scan_leaf (struct sparse_array *spar, struct leaf_node *leaf,
- unsigned long int start, unsigned long int *found)
+/* Scans P, which is at LEVEL and within SPAR, and its subnodes,
+ for the in-use element with the least key greater than or
+ equal to START. If such an element is found, returns a
+ pointer to it and stores its key in *FOUND; otherwise, returns
+ a null pointer and does not modify *FOUND. */
+static inline void *
+scan_internal_node_forward (struct sparse_array *spar,
+ struct internal_node *node,
+ int level, unsigned long int start,
+ unsigned long int *found)
{
- int idx = scan_in_use (leaf, start & LEVEL_MASK);
- if (idx >= 0)
+ int shift = level * BITS_PER_LEVEL;
+ int count = node->count;
+ int i;
+
+ for (i = (start >> shift) & LEVEL_MASK; i < PTRS_PER_LEVEL; i++)
{
- *found = (start & ~LEVEL_MASK) | idx;
- spar->cache = leaf;
- spar->cache_ofs = *found >> BITS_PER_LEVEL;
- return leaf_element (spar, leaf, idx);
+ union pointer *q = &node->down[i];
+ if (level > 1 ? q->internal != NULL : q->leaf != NULL)
+ {
+ void *element = do_scan_forward (spar, q, level - 1, start, found);
+ if (element)
+ return element;
+ if (--count == 0)
+ return NULL;
+ }
+
+ start &= ~((1ul << shift) - 1);
+ start += 1ul << shift;
}
-
return NULL;
}
equal to START. If such an element is found, returns a
pointer to it and stores its key in *FOUND; otherwise, returns
a null pointer and does not modify *FOUND. */
+static void *
+do_scan_forward (struct sparse_array *spar, union pointer *p, int level,
+ unsigned long int start, unsigned long int *found)
+{
+ if (level == 0)
+ {
+ int idx = scan_in_use_forward (p->leaf, start);
+ if (idx >= 0)
+ {
+ unsigned long int key = *found = (start & ~LEVEL_MASK) | idx;
+ return cache_leaf_element (spar, p->leaf, key);
+ }
+ }
+ return scan_internal_node_forward (spar, p->internal, level, start, found);
+}
+
+static void *
+scan_forward (const struct sparse_array *spar_, unsigned long int start,
+ unsigned long int *found)
+{
+ struct sparse_array *spar = (struct sparse_array *) spar_;
+
+ /* Check the cache. */
+ if (start >> BITS_PER_LEVEL == spar->cache_ofs)
+ {
+ int idx = scan_in_use_forward (spar->cache, start);
+ if (idx >= 0)
+ {
+ *found = (start & ~LEVEL_MASK) | idx;
+ return leaf_element (spar, spar->cache, idx);
+ }
+ start = (start & ~LEVEL_MASK) + PTRS_PER_LEVEL;
+ if (start == 0)
+ return NULL;
+ }
+
+ /* Do the scan. */
+ if (!index_in_range (spar, start))
+ return NULL;
+ return do_scan_forward (spar, &spar->root, spar->height - 1, start, found);
+}
+
+/* Scans P, which is at LEVEL and within SPAR, and its subnodes,
+ for the in-use element with the greatest key less than or
+ equal to START. If such an element is found, returns a
+ pointer to it and stores its key in *FOUND; otherwise, returns
+ a null pointer and does not modify *FOUND. */
static inline void *
-scan_internal_node (struct sparse_array *spar, struct internal_node *node,
- int level, unsigned long int start,
- unsigned long int *found)
+scan_internal_node_reverse (struct sparse_array *spar,
+ struct internal_node *node,
+ int level, unsigned long int start,
+ unsigned long int *found)
{
int shift = level * BITS_PER_LEVEL;
int count = node->count;
int i;
- for (i = (start >> shift) & LEVEL_MASK; i < PTRS_PER_LEVEL; i++)
+ for (i = (start >> shift) & LEVEL_MASK; i >= 0; i--)
{
union pointer *q = &node->down[i];
if (level > 1 ? q->internal != NULL : q->leaf != NULL)
{
- void *element = do_scan (spar, q, level - 1, start, found);
+ void *element = do_scan_reverse (spar, q, level - 1, start, found);
if (element)
return element;
if (--count == 0)
return NULL;
- }
+ }
- start &= ~((1ul << shift) - 1);
- start += 1ul << shift;
+ start |= (1ul << shift) - 1;
+ start -= 1ul << shift;
}
return NULL;
}
/* Scans P, which is at LEVEL and within SPAR, and its subnodes,
- for the in-use element with the least key greater than or
+ for the in-use element with the greatest key less than or
equal to START. If such an element is found, returns a
pointer to it and stores its key in *FOUND; otherwise, returns
a null pointer and does not modify *FOUND. */
static void *
-do_scan (struct sparse_array *spar, union pointer *p, int level,
- unsigned long int start, unsigned long int *found)
+do_scan_reverse (struct sparse_array *spar, union pointer *p, int level,
+ unsigned long int start, unsigned long int *found)
{
- return (level == 0
- ? scan_leaf (spar, p->leaf, start, found)
- : scan_internal_node (spar, p->internal, level, start, found));
+ if (level == 0)
+ {
+ int idx = scan_in_use_reverse (p->leaf, start);
+ if (idx >= 0)
+ {
+ unsigned long int key = *found = (start & ~LEVEL_MASK) | idx;
+ return cache_leaf_element (spar, p->leaf, key);
+ }
+ else
+ return NULL;
+ }
+ return scan_internal_node_reverse (spar, p->internal, level, start, found);
}
+static void *
+scan_reverse (const struct sparse_array *spar_, unsigned long int start,
+ unsigned long int *found)
+{
+ struct sparse_array *spar = (struct sparse_array *) spar_;
+
+ /* Check the cache. */
+ if (start >> BITS_PER_LEVEL == spar->cache_ofs)
+ {
+ int idx = scan_in_use_reverse (spar->cache, start);
+ if (idx >= 0)
+ {
+ *found = (start & ~LEVEL_MASK) | idx;
+ return leaf_element (spar, spar->cache, idx);
+ }
+ start |= LEVEL_MASK;
+ if (start < PTRS_PER_LEVEL)
+ return NULL;
+ start -= PTRS_PER_LEVEL;
+ }
+ else
+ {
+ if (spar->height == 0)
+ return NULL;
+ else if (spar->height < MAX_HEIGHT)
+ {
+ unsigned long int max_key;
+ max_key = (1ul << (spar->height * BITS_PER_LEVEL)) - 1;
+ start = MIN (start, max_key);
+ }
+ }
+ return do_scan_reverse (spar, &spar->root, spar->height - 1, start, found);
+}