1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <libpspp/sparse-array.h>
24 #include <libpspp/assertion.h>
25 #include <libpspp/cast.h>
26 #include <libpspp/misc.h>
27 #include <libpspp/pool.h>
29 #include "count-one-bits.h"
32 /* Sparse array data structure.
34 The sparse array is implemented in terms of a "radix tree", a
35 multiway tree in which a set of bits drawn from the key
36 determine the child chosen at each level during a search. The
37 most-significant bits determine a child of the root, the next
38 bits determine a child of that child, and so on, until the
39 least-significant bits determine a leaf node.
41 In this implementation, the branching factor at each level is
42 held constant at 2**BITS_PER_LEVEL. The tree is only made as
43 tall as need be for the currently largest key, and nodes that
44 would be entirely empty are not allocated at all. The
45 elements are stored in the leaf nodes. */
47 /* Number of bits from the key used as the index at each level. */
48 #define BITS_PER_LEVEL 5
50 /* Branching factor. */
51 #define PTRS_PER_LEVEL (1u << BITS_PER_LEVEL)
54 #define LONG_BITS (sizeof (unsigned long int) * CHAR_BIT)
55 #define LONG_MASK ((unsigned long int) LONG_BITS - 1)
56 #define MAX_HEIGHT DIV_RND_UP (LONG_BITS, BITS_PER_LEVEL)
58 /* Bit-mask for index. */
59 #define LEVEL_MASK ((1ul << BITS_PER_LEVEL) - 1)
61 /* Pointer to an internal node or a leaf node.
62 Pointers in internal nodes at level 1 point to leaf nodes;
63 other pointers point to internal nodes. */
66 struct internal_node *internal;
67 struct leaf_node *leaf;
73 struct pool *pool; /* Pool used for allocations. */
74 size_t elem_size; /* Element size, rounded for alignment. */
75 unsigned long count; /* Number of elements in tree. */
78 union pointer root; /* Root of tree. */
79 int height; /* 0=empty tree;
80 1=root points to leaf,
81 2=root points to internal node
82 that points to leaves,
85 /* Cache for speeding up access. */
86 unsigned long int cache_ofs; /* Group of keys that cache points to,
87 shifted right BITS_PER_LEVEL bits;
88 ULONG_MAX for empty cache. */
89 struct leaf_node *cache; /* Cached leaf node, or a null
90 pointer for a negative cache. */
93 /* An internal node in the radix tree. */
96 int count; /* Number of nonnull children. */
97 union pointer down[PTRS_PER_LEVEL]; /* Children. */
100 /* A leaf node in the radix tree. */
103 /* Bit-vector of elements that are in use. */
104 unsigned long int in_use[DIV_RND_UP (PTRS_PER_LEVEL, LONG_BITS)];
105 /* element_type elements[PTRS_PER_LEVEL]; */
108 /* Returns SIZE rounded up to a safe alignment. */
109 #define ALIGN_SIZE(SIZE) ROUND_UP (SIZE, sizeof (long int))
111 /* Returns the size of EXPR_OR_TYPE rounded up to a safe
113 #define SIZEOF_ALIGNED(EXPR_OR_TYPE) ALIGN_SIZE (sizeof (EXPR_OR_TYPE))
115 static inline bool index_in_range (const struct sparse_array *,
117 static inline bool is_in_use (const struct leaf_node *, unsigned int);
118 static inline bool any_in_use (const struct leaf_node *);
119 static inline void set_in_use (struct leaf_node *, unsigned int);
120 static inline void unset_in_use (struct leaf_node *, unsigned int);
121 static inline void *leaf_element (const struct sparse_array *,
122 struct leaf_node *, unsigned int);
123 static inline size_t leaf_size (const struct sparse_array *);
125 static struct leaf_node *find_leaf_node (const struct sparse_array *,
127 static void decrease_height (struct sparse_array *);
129 static int scan_in_use_forward (struct leaf_node *, unsigned int);
130 static void *do_scan_forward (struct sparse_array *, union pointer *, int,
131 unsigned long int, unsigned long int *);
132 static void *scan_forward (const struct sparse_array *,
133 unsigned long int start,
134 unsigned long int *found);
136 static int scan_in_use_reverse (struct leaf_node *, unsigned int);
137 static void *do_scan_reverse (struct sparse_array *, union pointer *, int,
138 unsigned long int, unsigned long int *);
139 static void *scan_reverse (const struct sparse_array *,
140 unsigned long int start,
141 unsigned long int *found);
143 /* Creates and returns a new sparse array that will contain
144 elements that are ELEM_SIZE bytes in size. */
145 struct sparse_array *
146 sparse_array_create (size_t elem_size)
148 return sparse_array_create_pool (NULL, elem_size);
151 /* Creates and returns a new sparse array that will contain
152 elements that are ELEM_SIZE bytes in size. Data in the sparse
153 array will be allocated from POOL, which may be null. */
154 struct sparse_array *
155 sparse_array_create_pool (struct pool *pool, size_t elem_size)
157 struct sparse_array *spar = pool_malloc (pool, sizeof *spar);
159 spar->elem_size = ALIGN_SIZE (elem_size);
161 spar->root.leaf = NULL;
163 spar->cache_ofs = ULONG_MAX;
167 /* Destroys SPAR node pointed to by P at the given LEVEL. */
169 do_destroy (struct sparse_array *spar, union pointer *p, int level)
173 struct internal_node *node = p->internal;
174 int count = node->count;
179 union pointer *q = &p->internal->down[i];
180 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
182 do_destroy (spar, q, level - 1);
187 pool_free (spar->pool, p->internal);
190 pool_free (spar->pool, p->leaf);
194 Any elements in SPAR are deallocated. Thus, if per-element
195 destruction is necessary, it should be done before destroying
198 sparse_array_destroy (struct sparse_array *spar)
200 do_destroy (spar, &spar->root, spar->height - 1);
201 pool_free (spar->pool, spar);
204 /* Returns the number of elements in SPAR. */
206 sparse_array_count (const struct sparse_array *spar)
211 /* Increases SPAR's height by 1, allowing it to hold
212 PTRS_PER_LEVEL times more elements. */
214 increase_height (struct sparse_array *spar)
216 assert (spar->height < MAX_HEIGHT);
218 if (spar->height == 1)
219 spar->root.leaf = pool_zalloc (spar->pool, leaf_size (spar));
222 struct internal_node *new_root;
223 new_root = pool_zalloc (spar->pool, sizeof *new_root);
225 new_root->down[0] = spar->root;
226 spar->root.internal = new_root;
230 /* Finds the leaf node in SPAR that contains the element for KEY.
231 SPAR must be tall enough to hold KEY.
232 Creates the leaf if it doesn't already exist. */
233 static struct leaf_node *
234 create_leaf_node (struct sparse_array *spar, unsigned long int key)
240 assert (index_in_range (spar, key));
242 /* Short-circuit everything if KEY is in the leaf cache. */
243 if (key >> BITS_PER_LEVEL == spar->cache_ofs && spar->cache != NULL)
246 /* Descend through internal nodes. */
248 for (level = spar->height - 1; level > 0; level--)
250 if (p->internal == NULL)
252 p->internal = pool_zalloc (spar->pool, sizeof *p->internal);
256 count = &p->internal->count;
257 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
260 /* Create leaf if necessary. */
263 p->leaf = pool_zalloc (spar->pool, leaf_size (spar));
268 spar->cache = p->leaf;
269 spar->cache_ofs = key >> BITS_PER_LEVEL;
274 /* Inserts into SPAR an element with the given KEY, which must not
275 already exist in SPAR.
276 Returns the new element for the caller to initialize. */
278 sparse_array_insert (struct sparse_array *spar, unsigned long int key)
280 struct leaf_node *leaf;
282 while (!index_in_range (spar, key))
283 increase_height (spar);
287 leaf = create_leaf_node (spar, key);
288 assert (!is_in_use (leaf, key));
289 set_in_use (leaf, key);
290 return leaf_element (spar, leaf, key);
293 /* Finds and returns the element in SPAR with the given KEY.
294 Returns a null pointer if KEY does not exist in SPAR. */
296 sparse_array_get (const struct sparse_array *spar, unsigned long int key)
298 struct leaf_node *leaf = find_leaf_node (spar, key);
299 if (leaf != NULL && is_in_use (leaf, key))
300 return leaf_element (spar, leaf, key);
304 /* Removes the element with the given KEY from SPAR.
305 Returns true if an element was removed, false if SPAR hadn't
306 contained an element with the given KEY.
308 If elements need to be destructed, then the caller should have
309 already taken care of it before calling this function; the
310 element's content must be considered freed and of
311 indeterminate value after it is removed. */
313 sparse_array_remove (struct sparse_array *spar, unsigned long int key)
315 union pointer *path[MAX_HEIGHT], **last;
316 struct leaf_node *leaf;
320 /* Find and free element in leaf. */
321 leaf = find_leaf_node (spar, key);
322 if (leaf == NULL || !is_in_use (leaf, key))
324 #if ASSERT_LEVEL >= 10
325 memset (leaf_element (spar, leaf, key), 0xcc, spar->elem_size);
327 unset_in_use (leaf, key);
329 if (any_in_use (leaf))
332 /* The leaf node is empty.
333 Retrace the path of internal nodes traversed to the leaf. */
336 for (level = spar->height - 1; level > 0; level--)
339 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
342 /* Free the leaf node and prune it from the tree. */
343 spar->cache_ofs = ULONG_MAX;
344 pool_free (spar->pool, leaf);
347 /* Update counts in the internal nodes above the leaf.
348 Free any internal nodes that become empty. */
352 if (--p->internal->count > 0)
354 if (p == &spar->root)
355 decrease_height (spar);
359 pool_free (spar->pool, p->internal);
366 /* Returns a pointer to the in-use element with the smallest
367 index and sets *IDXP to its index or, if SPAR has no in-use
368 elements, returns NULL without modifying *IDXP. */
370 sparse_array_first (const struct sparse_array *spar, unsigned long int *idxp)
372 return scan_forward (spar, 0, idxp);
375 /* Returns a pointer to the in-use element with the smallest
376 index greater than SKIP and sets *IDXP to its index or, if
377 SPAR has no in-use elements with index greater than SKIP,
378 returns NULL without modifying *IDXP. */
380 sparse_array_next (const struct sparse_array *spar, unsigned long int skip,
381 unsigned long int *idxp)
383 return skip < ULONG_MAX ? scan_forward (spar, skip + 1, idxp) : NULL;
386 /* Returns a pointer to the in-use element with the greatest
387 index and sets *IDXP to its index or, if SPAR has no in-use
388 elements, returns NULL without modifying *IDXP. */
390 sparse_array_last (const struct sparse_array *spar, unsigned long int *idxp)
392 return scan_reverse (spar, ULONG_MAX, idxp);
395 /* Returns a pointer to the in-use element with the greatest
396 index less than SKIP and sets *IDXP to its index or, if SPAR
397 has no in-use elements with index less than SKIP, returns NULL
398 without modifying *IDXP. */
400 sparse_array_prev (const struct sparse_array *spar, unsigned long int skip,
401 unsigned long int *idxp)
403 return skip > 0 ? scan_reverse (spar, skip - 1, idxp) : NULL;
407 /* Returns true iff KEY is in the range of keys currently
408 represented by SPAR. */
410 index_in_range (const struct sparse_array *spar, unsigned long int key)
412 return (spar->height == 0 ? false
413 : spar->height >= MAX_HEIGHT ? true
414 : key < (1ul << (spar->height * BITS_PER_LEVEL)));
417 /* Returns true iff LEAF contains an in-use element with the
420 is_in_use (const struct leaf_node *leaf, unsigned int key)
423 return (leaf->in_use[key / LONG_BITS] & (1ul << (key % LONG_BITS))) != 0;
426 /* Returns true iff LEAF contains any in-use elements. */
428 any_in_use (const struct leaf_node *leaf)
431 for (i = 0; i < sizeof leaf->in_use / sizeof *leaf->in_use; i++)
437 /* Marks element KEY in LEAF as in-use. */
439 set_in_use (struct leaf_node *leaf, unsigned int key)
442 leaf->in_use[key / LONG_BITS] |= 1ul << (key % LONG_BITS);
445 /* Marks element KEY in LEAF as not in-use. */
447 unset_in_use (struct leaf_node *leaf, unsigned int key)
450 leaf->in_use[key / LONG_BITS] &= ~(1ul << (key % LONG_BITS));
453 /* Returns the number of trailing 0-bits in X.
454 Undefined if X is zero. */
456 count_trailing_zeros (unsigned long int x)
459 return __builtin_ctzl (x);
460 #else /* not GCC 4+ */
461 /* This algorithm is from _Hacker's Delight_ section 5.4. */
464 #define COUNT_STEP(BITS) \
465 if (!(x & ((1ul << (BITS)) - 1))) \
471 #if ULONG_MAX >> 31 >> 31 >> 2
474 #if ULONG_MAX >> 31 >> 1
483 #endif /* not GCC 4+ */
486 /* Returns the least index of the in-use element in LEAF greater
487 than or equal to IDX. Bits in IDX not in LEVEL_MASK are
488 considered to have value 0. */
490 scan_in_use_forward (struct leaf_node *leaf, unsigned int idx)
493 for (; idx < PTRS_PER_LEVEL; idx = (idx & ~LONG_MASK) + LONG_BITS)
495 int ofs = idx % LONG_BITS;
496 unsigned long int in_use = leaf->in_use[idx / LONG_BITS] >> ofs;
499 return idx + count_trailing_zeros (in_use);
504 /* Returns the number of leading 0-bits in X.
505 Undefined if X is zero. */
507 count_leading_zeros (unsigned long int x)
510 return __builtin_clzl (x);
517 #if ULONG_MAX >> 31 >> 1
520 #if ULONG_MAX >> 31 >> 31 >> 2
523 return LONG_BITS - count_one_bits_l (x);
527 /* Returns the greatest index of the in-use element in LEAF less
528 than or equal to IDX. Bits in IDX not in LEVEL_MASK are
529 considered to have value 0. */
531 scan_in_use_reverse (struct leaf_node *leaf, unsigned int idx)
536 int ofs = idx % LONG_BITS;
537 unsigned long int in_use = leaf->in_use[idx / LONG_BITS] << (31 - ofs);
539 return idx - count_leading_zeros (in_use);
542 idx = (idx | LONG_MASK) - LONG_BITS;
546 /* Returns the address of element with the given KEY in LEAF,
547 which is a node in SPAR. */
549 leaf_element (const struct sparse_array *spar, struct leaf_node *leaf,
553 return (char *) leaf + SIZEOF_ALIGNED (*leaf) + (spar->elem_size * key);
556 /* As leaf_element, returns the address of element with the given
557 KEY in LEAF, which is a node in SPAR. Also, updates SPAR's
558 cache to contain LEAF. */
560 cache_leaf_element (struct sparse_array *spar, struct leaf_node *leaf,
561 unsigned long int key)
564 spar->cache_ofs = key >> BITS_PER_LEVEL;
565 return leaf_element (spar, leaf, key);
568 /* Returns the size of a leaf node in SPAR. */
570 leaf_size (const struct sparse_array *spar)
572 return SIZEOF_ALIGNED (struct leaf_node) + spar->elem_size * PTRS_PER_LEVEL;
575 /* Finds and returns the leaf node in SPAR that contains KEY.
576 Returns null if SPAR does not have a leaf node that contains
578 static struct leaf_node *
579 find_leaf_node (const struct sparse_array *spar_, unsigned long int key)
581 struct sparse_array *spar = CONST_CAST (struct sparse_array *, spar_);
582 const union pointer *p;
585 /* Check the cache first. */
586 if (key >> BITS_PER_LEVEL == spar->cache_ofs)
589 if (!index_in_range (spar, key))
592 /* Descend through internal nodes. */
594 for (level = spar->height - 1; level > 0; level--)
596 if (p->internal == NULL)
598 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
602 spar->cache = p->leaf;
603 spar->cache_ofs = key >> BITS_PER_LEVEL;
608 /* Reduces SPAR's height to the minimum needed value by
609 eliminating levels that contain only a single entry for all
612 decrease_height (struct sparse_array *spar)
614 while (spar->height > 1
615 && spar->root.internal->count == 1
616 && spar->root.internal->down[0].internal)
618 struct internal_node *p = spar->root.internal;
620 spar->root = p->down[0];
621 pool_free (spar->pool, p);
625 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
626 for the in-use element with the least key greater than or
627 equal to START. If such an element is found, returns a
628 pointer to it and stores its key in *FOUND; otherwise, returns
629 a null pointer and does not modify *FOUND. */
631 scan_internal_node_forward (struct sparse_array *spar,
632 struct internal_node *node,
633 int level, unsigned long int start,
634 unsigned long int *found)
636 int shift = level * BITS_PER_LEVEL;
637 int count = node->count;
640 for (i = (start >> shift) & LEVEL_MASK; i < PTRS_PER_LEVEL; i++)
642 union pointer *q = &node->down[i];
643 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
645 void *element = do_scan_forward (spar, q, level - 1, start, found);
652 start &= ~((1ul << shift) - 1);
653 start += 1ul << shift;
658 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
659 for the in-use element with the least key greater than or
660 equal to START. If such an element is found, returns a
661 pointer to it and stores its key in *FOUND; otherwise, returns
662 a null pointer and does not modify *FOUND. */
664 do_scan_forward (struct sparse_array *spar, union pointer *p, int level,
665 unsigned long int start, unsigned long int *found)
669 int idx = scan_in_use_forward (p->leaf, start);
672 unsigned long int key = *found = (start & ~LEVEL_MASK) | idx;
673 return cache_leaf_element (spar, p->leaf, key);
676 return scan_internal_node_forward (spar, p->internal, level, start, found);
680 scan_forward (const struct sparse_array *spar_, unsigned long int start,
681 unsigned long int *found)
683 struct sparse_array *spar = CONST_CAST (struct sparse_array *, spar_);
685 /* Check the cache. */
686 if (start >> BITS_PER_LEVEL == spar->cache_ofs)
688 int idx = scan_in_use_forward (spar->cache, start);
691 *found = (start & ~LEVEL_MASK) | idx;
692 return leaf_element (spar, spar->cache, idx);
694 start = (start & ~LEVEL_MASK) + PTRS_PER_LEVEL;
700 if (!index_in_range (spar, start))
702 return do_scan_forward (spar, &spar->root, spar->height - 1, start, found);
705 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
706 for the in-use element with the greatest key less than or
707 equal to START. If such an element is found, returns a
708 pointer to it and stores its key in *FOUND; otherwise, returns
709 a null pointer and does not modify *FOUND. */
711 scan_internal_node_reverse (struct sparse_array *spar,
712 struct internal_node *node,
713 int level, unsigned long int start,
714 unsigned long int *found)
716 int shift = level * BITS_PER_LEVEL;
717 int count = node->count;
720 for (i = (start >> shift) & LEVEL_MASK; i >= 0; i--)
722 union pointer *q = &node->down[i];
723 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
725 void *element = do_scan_reverse (spar, q, level - 1, start, found);
732 start |= (1ul << shift) - 1;
733 start -= 1ul << shift;
738 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
739 for the in-use element with the greatest key less than or
740 equal to START. If such an element is found, returns a
741 pointer to it and stores its key in *FOUND; otherwise, returns
742 a null pointer and does not modify *FOUND. */
744 do_scan_reverse (struct sparse_array *spar, union pointer *p, int level,
745 unsigned long int start, unsigned long int *found)
749 int idx = scan_in_use_reverse (p->leaf, start);
752 unsigned long int key = *found = (start & ~LEVEL_MASK) | idx;
753 return cache_leaf_element (spar, p->leaf, key);
758 return scan_internal_node_reverse (spar, p->internal, level, start, found);
762 scan_reverse (const struct sparse_array *spar_, unsigned long int start,
763 unsigned long int *found)
765 struct sparse_array *spar = CONST_CAST (struct sparse_array *, spar_);
767 /* Check the cache. */
768 if (start >> BITS_PER_LEVEL == spar->cache_ofs)
770 int idx = scan_in_use_reverse (spar->cache, start);
773 *found = (start & ~LEVEL_MASK) | idx;
774 return leaf_element (spar, spar->cache, idx);
777 if (start < PTRS_PER_LEVEL)
779 start -= PTRS_PER_LEVEL;
783 if (spar->height == 0)
785 else if (spar->height < MAX_HEIGHT)
787 unsigned long int max_key;
788 max_key = (1ul << (spar->height * BITS_PER_LEVEL)) - 1;
789 start = MIN (start, max_key);
792 return do_scan_reverse (spar, &spar->root, spar->height - 1, start, found);