1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <libpspp/sparse-array.h>
24 #include <libpspp/assertion.h>
25 #include <libpspp/misc.h>
26 #include <libpspp/pool.h>
28 #include "count-one-bits.h"
31 /* Sparse array data structure.
33 The sparse array is implemented in terms of a "radix tree", a
34 multiway tree in which a set of bits drawn from the key
35 determine the child chosen at each level during a search. The
36 most-significant bits determine a child of the root, the next
37 bits determine a child of that child, and so on, until the
38 least-significant bits determine a leaf node.
40 In this implementation, the branching factor at each level is
41 held constant at 2**BITS_PER_LEVEL. The tree is only made as
42 tall as need be for the currently largest key, and nodes that
43 would be entirely empty are not allocated at all. The
44 elements are stored in the leaf nodes. */
46 /* Number of bits from the key used as the index at each level. */
47 #define BITS_PER_LEVEL 5
49 /* Branching factor. */
50 #define PTRS_PER_LEVEL (1u << BITS_PER_LEVEL)
53 #define LONG_BITS (sizeof (unsigned long int) * CHAR_BIT)
54 #define LONG_MASK ((unsigned long int) LONG_BITS - 1)
55 #define MAX_HEIGHT DIV_RND_UP (LONG_BITS, BITS_PER_LEVEL)
57 /* Bit-mask for index. */
58 #define LEVEL_MASK ((1ul << BITS_PER_LEVEL) - 1)
60 /* Pointer to an internal node or a leaf node.
61 Pointers in internal nodes at level 1 point to leaf nodes;
62 other pointers point to internal nodes. */
65 struct internal_node *internal;
66 struct leaf_node *leaf;
72 struct pool *pool; /* Pool used for allocations. */
73 size_t elem_size; /* Element size, rounded for alignment. */
74 unsigned long count; /* Number of elements in tree. */
77 union pointer root; /* Root of tree. */
78 int height; /* 0=empty tree;
79 1=root points to leaf,
80 2=root points to internal node
81 that points to leaves,
84 /* Cache for speeding up access. */
85 unsigned long int cache_ofs; /* Group of keys that cache points to,
86 shifted right BITS_PER_LEVEL bits;
87 ULONG_MAX for empty cache. */
88 struct leaf_node *cache; /* Cached leaf node, or a null
89 pointer for a negative cache. */
92 /* An internal node in the radix tree. */
95 int count; /* Number of nonnull children. */
96 union pointer down[PTRS_PER_LEVEL]; /* Children. */
99 /* A leaf node in the radix tree. */
102 /* Bit-vector of elements that are in use. */
103 unsigned long int in_use[DIV_RND_UP (PTRS_PER_LEVEL, LONG_BITS)];
104 /* element_type elements[PTRS_PER_LEVEL]; */
107 /* Returns SIZE rounded up to a safe alignment. */
108 #define ALIGN_SIZE(SIZE) ROUND_UP (SIZE, sizeof (long int))
110 /* Returns the size of EXPR_OR_TYPE rounded up to a safe
112 #define SIZEOF_ALIGNED(EXPR_OR_TYPE) ALIGN_SIZE (sizeof (EXPR_OR_TYPE))
114 static inline bool index_in_range (const struct sparse_array *,
116 static inline bool is_in_use (const struct leaf_node *, unsigned int);
117 static inline bool any_in_use (const struct leaf_node *);
118 static inline void set_in_use (struct leaf_node *, unsigned int);
119 static inline void unset_in_use (struct leaf_node *, unsigned int);
120 static inline void *leaf_element (const struct sparse_array *,
121 struct leaf_node *, unsigned int);
122 static inline size_t leaf_size (const struct sparse_array *);
124 static struct leaf_node *find_leaf_node (const struct sparse_array *,
126 static void decrease_height (struct sparse_array *);
128 static int scan_in_use_forward (struct leaf_node *, unsigned int);
129 static void *do_scan_forward (struct sparse_array *, union pointer *, int,
130 unsigned long int, unsigned long int *);
131 static void *scan_forward (const struct sparse_array *,
132 unsigned long int start,
133 unsigned long int *found);
135 static int scan_in_use_reverse (struct leaf_node *, unsigned int);
136 static void *do_scan_reverse (struct sparse_array *, union pointer *, int,
137 unsigned long int, unsigned long int *);
138 static void *scan_reverse (const struct sparse_array *,
139 unsigned long int start,
140 unsigned long int *found);
142 /* Creates and returns a new sparse array that will contain
143 elements that are ELEM_SIZE bytes in size. */
144 struct sparse_array *
145 sparse_array_create (size_t elem_size)
147 return sparse_array_create_pool (NULL, elem_size);
150 /* Creates and returns a new sparse array that will contain
151 elements that are ELEM_SIZE bytes in size. Data in the sparse
152 array will be allocated from POOL, which may be null. */
153 struct sparse_array *
154 sparse_array_create_pool (struct pool *pool, size_t elem_size)
156 struct sparse_array *spar = pool_malloc (pool, sizeof *spar);
158 spar->elem_size = ALIGN_SIZE (elem_size);
160 spar->root.leaf = NULL;
162 spar->cache_ofs = ULONG_MAX;
166 /* Destroys SPAR node pointed to by P at the given LEVEL. */
168 do_destroy (struct sparse_array *spar, union pointer *p, int level)
172 struct internal_node *node = p->internal;
173 int count = node->count;
178 union pointer *q = &p->internal->down[i];
179 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
181 do_destroy (spar, q, level - 1);
186 pool_free (spar->pool, p->internal);
189 pool_free (spar->pool, p->leaf);
193 Any elements in SPAR are deallocated. Thus, if per-element
194 destruction is necessary, it should be done before destroying
197 sparse_array_destroy (struct sparse_array *spar)
199 do_destroy (spar, &spar->root, spar->height - 1);
200 pool_free (spar->pool, spar);
203 /* Returns the number of elements in SPAR. */
205 sparse_array_count (const struct sparse_array *spar)
210 /* Increases SPAR's height by 1, allowing it to hold
211 PTRS_PER_LEVEL times more elements. */
213 increase_height (struct sparse_array *spar)
215 assert (spar->height < MAX_HEIGHT);
217 if (spar->height == 1)
218 spar->root.leaf = pool_zalloc (spar->pool, leaf_size (spar));
221 struct internal_node *new_root;
222 new_root = pool_zalloc (spar->pool, sizeof *new_root);
224 new_root->down[0] = spar->root;
225 spar->root.internal = new_root;
229 /* Finds the leaf node in SPAR that contains the element for KEY.
230 SPAR must be tall enough to hold KEY.
231 Creates the leaf if it doesn't already exist. */
232 static struct leaf_node *
233 create_leaf_node (struct sparse_array *spar, unsigned long int key)
239 assert (index_in_range (spar, key));
241 /* Short-circuit everything if KEY is in the leaf cache. */
242 if (key >> BITS_PER_LEVEL == spar->cache_ofs && spar->cache != NULL)
245 /* Descend through internal nodes. */
247 for (level = spar->height - 1; level > 0; level--)
249 if (p->internal == NULL)
251 p->internal = pool_zalloc (spar->pool, sizeof *p->internal);
255 count = &p->internal->count;
256 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
259 /* Create leaf if necessary. */
262 p->leaf = pool_zalloc (spar->pool, leaf_size (spar));
267 spar->cache = p->leaf;
268 spar->cache_ofs = key >> BITS_PER_LEVEL;
273 /* Inserts into SPAR an element with the given KEY, which must not
274 already exist in SPAR.
275 Returns the new element for the caller to initialize. */
277 sparse_array_insert (struct sparse_array *spar, unsigned long int key)
279 struct leaf_node *leaf;
281 while (!index_in_range (spar, key))
282 increase_height (spar);
286 leaf = create_leaf_node (spar, key);
287 assert (!is_in_use (leaf, key));
288 set_in_use (leaf, key);
289 return leaf_element (spar, leaf, key);
292 /* Finds and returns the element in SPAR with the given KEY.
293 Returns a null pointer if KEY does not exist in SPAR. */
295 sparse_array_get (const struct sparse_array *spar, unsigned long int key)
297 if (index_in_range (spar, key))
299 struct leaf_node *leaf = find_leaf_node (spar, key);
300 if (leaf != NULL && is_in_use (leaf, key))
301 return leaf_element (spar, leaf, key);
306 /* Removes the element with the given KEY from SPAR.
307 Returns true if an element was removed, false if SPAR hadn't
308 contained an element with the given KEY.
310 If elements need to be destructed, then the caller should have
311 already taken care of it before calling this function; the
312 element's content must be considered freed and of
313 indeterminate value after it is removed. */
315 sparse_array_remove (struct sparse_array *spar, unsigned long int key)
317 union pointer *path[MAX_HEIGHT], **last;
318 struct leaf_node *leaf;
322 if (!index_in_range (spar, key))
325 /* Find and free element in leaf. */
326 leaf = find_leaf_node (spar, key);
327 if (leaf == NULL || !is_in_use (leaf, key))
329 #if ASSERT_LEVEL >= 10
330 memset (leaf_element (spar, leaf, key), 0xcc, spar->elem_size);
332 unset_in_use (leaf, key);
334 if (any_in_use (leaf))
337 /* The leaf node is empty.
338 Retrace the path of internal nodes traversed to the leaf. */
341 for (level = spar->height - 1; level > 0; level--)
344 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
347 /* Free the leaf node and prune it from the tree. */
348 spar->cache_ofs = ULONG_MAX;
349 pool_free (spar->pool, leaf);
352 /* Update counts in the internal nodes above the leaf.
353 Free any internal nodes that become empty. */
357 if (--p->internal->count > 0)
359 if (p == &spar->root)
360 decrease_height (spar);
364 pool_free (spar->pool, p->internal);
371 /* Returns a pointer to the in-use element with the smallest
372 index and sets *IDXP to its index or, if SPAR has no in-use
373 elements, returns NULL without modifying *IDXP. */
375 sparse_array_first (const struct sparse_array *spar, unsigned long int *idxp)
377 return scan_forward (spar, 0, idxp);
380 /* Returns a pointer to the in-use element with the smallest
381 index greater than SKIP and sets *IDXP to its index or, if
382 SPAR has no in-use elements with index greater than SKIP,
383 returns NULL without modifying *IDXP. */
385 sparse_array_next (const struct sparse_array *spar, unsigned long int skip,
386 unsigned long int *idxp)
388 return skip < ULONG_MAX ? scan_forward (spar, skip + 1, idxp) : NULL;
391 /* Returns a pointer to the in-use element with the greatest
392 index and sets *IDXP to its index or, if SPAR has no in-use
393 elements, returns NULL without modifying *IDXP. */
395 sparse_array_last (const struct sparse_array *spar, unsigned long int *idxp)
397 return scan_reverse (spar, ULONG_MAX, idxp);
400 /* Returns a pointer to the in-use element with the greatest
401 index less than SKIP and sets *IDXP to its index or, if SPAR
402 has no in-use elements with index less than SKIP, returns NULL
403 without modifying *IDXP. */
405 sparse_array_prev (const struct sparse_array *spar, unsigned long int skip,
406 unsigned long int *idxp)
408 return skip > 0 ? scan_reverse (spar, skip - 1, idxp) : NULL;
412 /* Returns true iff KEY is in the range of keys currently
413 represented by SPAR. */
415 index_in_range (const struct sparse_array *spar, unsigned long int key)
417 return (spar->height == 0 ? false
418 : spar->height >= MAX_HEIGHT ? true
419 : key < (1ul << (spar->height * BITS_PER_LEVEL)));
422 /* Returns true iff LEAF contains an in-use element with the
425 is_in_use (const struct leaf_node *leaf, unsigned int key)
428 return (leaf->in_use[key / LONG_BITS] & (1ul << (key % LONG_BITS))) != 0;
431 /* Returns true iff LEAF contains any in-use elements. */
433 any_in_use (const struct leaf_node *leaf)
436 for (i = 0; i < sizeof leaf->in_use / sizeof *leaf->in_use; i++)
442 /* Marks element KEY in LEAF as in-use. */
444 set_in_use (struct leaf_node *leaf, unsigned int key)
447 leaf->in_use[key / LONG_BITS] |= 1ul << (key % LONG_BITS);
450 /* Marks element KEY in LEAF as not in-use. */
452 unset_in_use (struct leaf_node *leaf, unsigned int key)
455 leaf->in_use[key / LONG_BITS] &= ~(1ul << (key % LONG_BITS));
458 /* Returns the number of trailing 0-bits in X.
459 Undefined if X is zero. */
461 count_trailing_zeros (unsigned long int x)
464 return __builtin_ctzl (x);
465 #else /* not GCC 4+ */
466 /* This algorithm is from _Hacker's Delight_ section 5.4. */
469 #define COUNT_STEP(BITS) \
470 if (!(x & ((1ul << (BITS)) - 1))) \
476 #if ULONG_MAX >> 31 >> 31 >> 2
479 #if ULONG_MAX >> 31 >> 1
488 #endif /* not GCC 4+ */
491 /* Returns the least index of the in-use element in LEAF greater
492 than or equal to IDX. Bits in IDX not in LEVEL_MASK are
493 considered to have value 0. */
495 scan_in_use_forward (struct leaf_node *leaf, unsigned int idx)
498 for (; idx < PTRS_PER_LEVEL; idx = (idx & ~LONG_MASK) + LONG_BITS)
500 int ofs = idx % LONG_BITS;
501 unsigned long int in_use = leaf->in_use[idx / LONG_BITS] >> ofs;
504 return idx + count_trailing_zeros (in_use);
509 /* Returns the number of leading 0-bits in X.
510 Undefined if X is zero. */
512 count_leading_zeros (unsigned long int x)
515 return __builtin_clzl (x);
522 #if ULONG_MAX >> 31 >> 1
525 #if ULONG_MAX >> 31 >> 31 >> 2
528 return LONG_BITS - count_one_bits_l (x);
532 /* Returns the greatest index of the in-use element in LEAF less
533 than or equal to IDX. Bits in IDX not in LEVEL_MASK are
534 considered to have value 0. */
536 scan_in_use_reverse (struct leaf_node *leaf, unsigned int idx)
541 int ofs = idx % LONG_BITS;
542 unsigned long int in_use = leaf->in_use[idx / LONG_BITS] << (31 - ofs);
544 return idx - count_leading_zeros (in_use);
547 idx = (idx | LONG_MASK) - LONG_BITS;
551 /* Returns the address of element with the given KEY in LEAF,
552 which is a node in SPAR. */
554 leaf_element (const struct sparse_array *spar, struct leaf_node *leaf,
558 return (char *) leaf + SIZEOF_ALIGNED (*leaf) + (spar->elem_size * key);
561 /* As leaf_element, returns the address of element with the given
562 KEY in LEAF, which is a node in SPAR. Also, updates SPAR's
563 cache to contain LEAF. */
565 cache_leaf_element (struct sparse_array *spar, struct leaf_node *leaf,
566 unsigned long int key)
569 spar->cache_ofs = key >> BITS_PER_LEVEL;
570 return leaf_element (spar, leaf, key);
573 /* Returns the size of a leaf node in SPAR. */
575 leaf_size (const struct sparse_array *spar)
577 return SIZEOF_ALIGNED (struct leaf_node) + spar->elem_size * PTRS_PER_LEVEL;
580 /* Finds and returns the leaf node in SPAR that contains KEY.
581 Returns null if SPAR does not have a leaf node that contains
583 static struct leaf_node *
584 find_leaf_node (const struct sparse_array *spar_, unsigned long int key)
586 struct sparse_array *spar = (struct sparse_array *) spar_;
587 const union pointer *p;
590 assert (index_in_range (spar, key));
592 /* Check the cache first. */
593 if (key >> BITS_PER_LEVEL == spar->cache_ofs)
596 /* Descend through internal nodes. */
598 for (level = spar->height - 1; level > 0; level--)
600 if (p->internal == NULL)
602 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
606 spar->cache = p->leaf;
607 spar->cache_ofs = key >> BITS_PER_LEVEL;
612 /* Reduces SPAR's height to the minimum needed value by
613 eliminating levels that contain only a single entry for all
616 decrease_height (struct sparse_array *spar)
618 while (spar->height > 1
619 && spar->root.internal->count == 1
620 && spar->root.internal->down[0].internal)
622 struct internal_node *p = spar->root.internal;
624 spar->root = p->down[0];
625 pool_free (spar->pool, p);
629 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
630 for the in-use element with the least key greater than or
631 equal to START. If such an element is found, returns a
632 pointer to it and stores its key in *FOUND; otherwise, returns
633 a null pointer and does not modify *FOUND. */
635 scan_internal_node_forward (struct sparse_array *spar,
636 struct internal_node *node,
637 int level, unsigned long int start,
638 unsigned long int *found)
640 int shift = level * BITS_PER_LEVEL;
641 int count = node->count;
644 for (i = (start >> shift) & LEVEL_MASK; i < PTRS_PER_LEVEL; i++)
646 union pointer *q = &node->down[i];
647 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
649 void *element = do_scan_forward (spar, q, level - 1, start, found);
656 start &= ~((1ul << shift) - 1);
657 start += 1ul << shift;
662 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
663 for the in-use element with the least key greater than or
664 equal to START. If such an element is found, returns a
665 pointer to it and stores its key in *FOUND; otherwise, returns
666 a null pointer and does not modify *FOUND. */
668 do_scan_forward (struct sparse_array *spar, union pointer *p, int level,
669 unsigned long int start, unsigned long int *found)
673 int idx = scan_in_use_forward (p->leaf, start);
676 unsigned long int key = *found = (start & ~LEVEL_MASK) | idx;
677 return cache_leaf_element (spar, p->leaf, key);
680 return scan_internal_node_forward (spar, p->internal, level, start, found);
684 scan_forward (const struct sparse_array *spar_, unsigned long int start,
685 unsigned long int *found)
687 struct sparse_array *spar = (struct sparse_array *) spar_;
689 /* Check the cache. */
690 if (start >> BITS_PER_LEVEL == spar->cache_ofs)
692 int idx = scan_in_use_forward (spar->cache, start);
695 *found = (start & ~LEVEL_MASK) | idx;
696 return leaf_element (spar, spar->cache, idx);
698 start = (start & ~LEVEL_MASK) + PTRS_PER_LEVEL;
704 if (!index_in_range (spar, start))
706 return do_scan_forward (spar, &spar->root, spar->height - 1, start, found);
709 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
710 for the in-use element with the greatest key less than or
711 equal to START. If such an element is found, returns a
712 pointer to it and stores its key in *FOUND; otherwise, returns
713 a null pointer and does not modify *FOUND. */
715 scan_internal_node_reverse (struct sparse_array *spar,
716 struct internal_node *node,
717 int level, unsigned long int start,
718 unsigned long int *found)
720 int shift = level * BITS_PER_LEVEL;
721 int count = node->count;
724 for (i = (start >> shift) & LEVEL_MASK; i >= 0; i--)
726 union pointer *q = &node->down[i];
727 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
729 void *element = do_scan_reverse (spar, q, level - 1, start, found);
736 start |= (1ul << shift) - 1;
737 start -= 1ul << shift;
742 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
743 for the in-use element with the greatest key less than or
744 equal to START. If such an element is found, returns a
745 pointer to it and stores its key in *FOUND; otherwise, returns
746 a null pointer and does not modify *FOUND. */
748 do_scan_reverse (struct sparse_array *spar, union pointer *p, int level,
749 unsigned long int start, unsigned long int *found)
753 int idx = scan_in_use_reverse (p->leaf, start);
756 unsigned long int key = *found = (start & ~LEVEL_MASK) | idx;
757 return cache_leaf_element (spar, p->leaf, key);
762 return scan_internal_node_reverse (spar, p->internal, level, start, found);
766 scan_reverse (const struct sparse_array *spar_, unsigned long int start,
767 unsigned long int *found)
769 struct sparse_array *spar = (struct sparse_array *) spar_;
771 /* Check the cache. */
772 if (start >> BITS_PER_LEVEL == spar->cache_ofs)
774 int idx = scan_in_use_reverse (spar->cache, start);
777 *found = (start & ~LEVEL_MASK) | idx;
778 return leaf_element (spar, spar->cache, idx);
781 if (start < PTRS_PER_LEVEL)
783 start -= PTRS_PER_LEVEL;
787 if (spar->height == 0)
789 else if (spar->height < MAX_HEIGHT)
791 unsigned long int max_key;
792 max_key = (1ul << (spar->height * BITS_PER_LEVEL)) - 1;
793 start = MIN (start, max_key);
796 return do_scan_reverse (spar, &spar->root, spar->height - 1, start, found);