1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "libpspp/sparse-array.h"
24 #include "libpspp/assertion.h"
25 #include "libpspp/cast.h"
26 #include "libpspp/misc.h"
27 #include "libpspp/pool.h"
29 #include "gl/count-one-bits.h"
30 #include "gl/minmax.h"
32 /* Sparse array data structure.
34 The sparse array is implemented in terms of a "radix tree", a
35 multiway tree in which a set of bits drawn from the key
36 determine the child chosen at each level during a search. The
37 most-significant bits determine a child of the root, the next
38 bits determine a child of that child, and so on, until the
39 least-significant bits determine a leaf node.
41 In this implementation, the branching factor at each level is
42 held constant at 2**BITS_PER_LEVEL. The tree is only made as
43 tall as need be for the currently largest key, and nodes that
44 would be entirely empty are not allocated at all. The
45 elements are stored in the leaf nodes. */
47 /* Number of bits from the key used as the index at each level. */
48 #define BITS_PER_LEVEL 5
50 /* Branching factor. */
51 #define PTRS_PER_LEVEL (1u << BITS_PER_LEVEL)
54 #define LONG_BITS (sizeof (unsigned long int) * CHAR_BIT)
55 #define LONG_MASK ((unsigned long int) LONG_BITS - 1)
56 #define MAX_HEIGHT DIV_RND_UP (LONG_BITS, BITS_PER_LEVEL)
58 /* Bit-mask for index. */
59 #define LEVEL_MASK ((1ul << BITS_PER_LEVEL) - 1)
61 /* Pointer to an internal node or a leaf node.
62 Pointers in internal nodes at level 1 point to leaf nodes;
63 other pointers point to internal nodes. */
66 struct internal_node *internal;
67 struct leaf_node *leaf;
73 struct pool *pool; /* Pool used for allocations. */
74 size_t elem_size; /* Element size, rounded for alignment. */
75 unsigned long count; /* Number of elements in tree. */
78 union pointer root; /* Root of tree. */
79 int height; /* 0=empty tree;
80 1=root points to leaf,
81 2=root points to internal node
82 that points to leaves,
85 /* Cache for speeding up access. */
86 unsigned long int cache_ofs; /* Group of keys that cache points to,
87 shifted right BITS_PER_LEVEL bits;
88 ULONG_MAX for empty cache. */
89 struct leaf_node *cache; /* Cached leaf node, or a null
90 pointer for a negative cache. */
93 /* An internal node in the radix tree. */
96 int count; /* Number of nonnull children. */
97 union pointer down[PTRS_PER_LEVEL]; /* Children. */
100 /* A leaf node in the radix tree. */
103 /* Bit-vector of elements that are in use. */
104 unsigned long int in_use[DIV_RND_UP (PTRS_PER_LEVEL, LONG_BITS)];
105 /* element_type elements[PTRS_PER_LEVEL]; */
108 /* Returns SIZE rounded up to a safe alignment. */
109 #define ALIGN_SIZE(SIZE) ROUND_UP (SIZE, sizeof (long int))
111 /* Returns the size of EXPR_OR_TYPE rounded up to a safe
113 #define SIZEOF_ALIGNED(EXPR_OR_TYPE) ALIGN_SIZE (sizeof (EXPR_OR_TYPE))
115 static inline bool index_in_range (const struct sparse_array *,
117 static inline bool is_in_use (const struct leaf_node *, unsigned int);
118 static inline bool any_in_use (const struct leaf_node *);
119 static inline void set_in_use (struct leaf_node *, unsigned int);
120 static inline void unset_in_use (struct leaf_node *, unsigned int);
121 static inline void *leaf_element (const struct sparse_array *,
122 struct leaf_node *, unsigned int);
123 static inline size_t leaf_size (const struct sparse_array *);
125 static struct leaf_node *find_leaf_node (const struct sparse_array *,
127 static void decrease_height (struct sparse_array *);
129 static int scan_in_use_forward (struct leaf_node *, unsigned int);
130 static void *do_scan_forward (struct sparse_array *, union pointer *, int,
131 unsigned long int, unsigned long int *);
132 static void *scan_forward (const struct sparse_array *,
133 unsigned long int start,
134 unsigned long int *found);
136 static int scan_in_use_reverse (struct leaf_node *, unsigned int);
137 static void *do_scan_reverse (struct sparse_array *, union pointer *, int,
138 unsigned long int, unsigned long int *);
139 static void *scan_reverse (const struct sparse_array *,
140 unsigned long int start,
141 unsigned long int *found);
143 /* Creates and returns a new sparse array that will contain
144 elements that are ELEM_SIZE bytes in size. */
145 struct sparse_array *
146 sparse_array_create (size_t elem_size)
148 return sparse_array_create_pool (NULL, elem_size);
151 /* Creates and returns a new sparse array that will contain
152 elements that are ELEM_SIZE bytes in size. Data in the sparse
153 array will be allocated from POOL, which may be null. */
154 struct sparse_array *
155 sparse_array_create_pool (struct pool *pool, size_t elem_size)
157 struct sparse_array *spar = pool_malloc (pool, sizeof *spar);
159 spar->elem_size = ALIGN_SIZE (elem_size);
161 spar->root.leaf = NULL;
163 spar->cache_ofs = ULONG_MAX;
167 /* Destroys SPAR node pointed to by P at the given LEVEL. */
169 do_destroy (struct sparse_array *spar, union pointer *p, int level)
173 struct internal_node *node = p->internal;
174 int count = node->count;
179 union pointer *q = &p->internal->down[i];
180 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
182 do_destroy (spar, q, level - 1);
187 pool_free (spar->pool, p->internal);
190 pool_free (spar->pool, p->leaf);
194 Any elements in SPAR are deallocated. Thus, if per-element
195 destruction is necessary, it should be done before destroying
198 sparse_array_destroy (struct sparse_array *spar)
200 do_destroy (spar, &spar->root, spar->height - 1);
201 pool_free (spar->pool, spar);
204 /* Returns the number of elements in SPAR. */
206 sparse_array_count (const struct sparse_array *spar)
211 /* Increases SPAR's height by 1, allowing it to hold
212 PTRS_PER_LEVEL times more elements. */
214 increase_height (struct sparse_array *spar)
216 assert (spar->height < MAX_HEIGHT);
218 if (spar->height == 1)
219 spar->root.leaf = pool_zalloc (spar->pool, leaf_size (spar));
222 struct internal_node *new_root;
223 new_root = pool_zalloc (spar->pool, sizeof *new_root);
225 new_root->down[0] = spar->root;
226 spar->root.internal = new_root;
230 /* Finds the leaf node in SPAR that contains the element for KEY.
231 SPAR must be tall enough to hold KEY.
232 Creates the leaf if it doesn't already exist. */
233 static struct leaf_node *
234 create_leaf_node (struct sparse_array *spar, unsigned long int key)
240 assert (index_in_range (spar, key));
242 /* Short-circuit everything if KEY is in the leaf cache. */
243 if (key >> BITS_PER_LEVEL == spar->cache_ofs && spar->cache != NULL)
246 /* Descend through internal nodes. */
248 for (level = spar->height - 1; level > 0; level--)
250 if (p->internal == NULL)
252 p->internal = pool_zalloc (spar->pool, sizeof *p->internal);
256 count = &p->internal->count;
257 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
260 /* Create leaf if necessary. */
263 p->leaf = pool_zalloc (spar->pool, leaf_size (spar));
268 spar->cache = p->leaf;
269 spar->cache_ofs = key >> BITS_PER_LEVEL;
274 /* Inserts into SPAR an element with the given KEY, which must not
275 already exist in SPAR.
276 Returns the new element for the caller to initialize. */
278 sparse_array_insert (struct sparse_array *spar, unsigned long int key)
280 struct leaf_node *leaf;
282 while (!index_in_range (spar, key))
283 increase_height (spar);
287 leaf = create_leaf_node (spar, key);
288 assert (!is_in_use (leaf, key));
289 set_in_use (leaf, key);
290 return leaf_element (spar, leaf, key);
293 /* Finds and returns the element in SPAR with the given KEY.
294 Returns a null pointer if KEY does not exist in SPAR. */
296 sparse_array_get (const struct sparse_array *spar, unsigned long int key)
298 struct leaf_node *leaf = find_leaf_node (spar, key);
299 if (leaf != NULL && is_in_use (leaf, key))
300 return leaf_element (spar, leaf, key);
304 /* Removes the element with the given KEY from SPAR.
305 Returns true if an element was removed, false if SPAR hadn't
306 contained an element with the given KEY.
308 If elements need to be destructed, then the caller should have
309 already taken care of it before calling this function; the
310 element's content must be considered freed and of
311 indeterminate value after it is removed. */
313 sparse_array_remove (struct sparse_array *spar, unsigned long int key)
315 union pointer *path[MAX_HEIGHT], **last;
316 struct leaf_node *leaf;
320 /* Find and free element in leaf. */
321 leaf = find_leaf_node (spar, key);
322 if (leaf == NULL || !is_in_use (leaf, key))
324 #if ASSERT_LEVEL >= 10
325 memset (leaf_element (spar, leaf, key), 0xcc, spar->elem_size);
327 unset_in_use (leaf, key);
329 if (any_in_use (leaf))
332 /* The leaf node is empty.
333 Retrace the path of internal nodes traversed to the leaf. */
336 for (level = spar->height - 1; level > 0; level--)
339 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
342 /* Free the leaf node and prune it from the tree. */
343 spar->cache_ofs = ULONG_MAX;
344 pool_free (spar->pool, leaf);
347 /* Update counts in the internal nodes above the leaf.
348 Free any internal nodes that become empty. */
352 if (--p->internal->count > 0)
354 if (p == &spar->root)
355 decrease_height (spar);
359 pool_free (spar->pool, p->internal);
366 /* Returns a pointer to the in-use element with the smallest
367 index and sets *IDXP to its index or, if SPAR has no in-use
368 elements, returns NULL without modifying *IDXP. */
370 sparse_array_first (const struct sparse_array *spar, unsigned long int *idxp)
372 return scan_forward (spar, 0, idxp);
375 /* Returns a pointer to the in-use element with the smallest
376 index greater than SKIP and sets *IDXP to its index or, if
377 SPAR has no in-use elements with index greater than SKIP,
378 returns NULL without modifying *IDXP. */
380 sparse_array_next (const struct sparse_array *spar, unsigned long int skip,
381 unsigned long int *idxp)
383 return skip < ULONG_MAX ? scan_forward (spar, skip + 1, idxp) : NULL;
386 /* Returns a pointer to the in-use element with the greatest
387 index and sets *IDXP to its index or, if SPAR has no in-use
388 elements, returns NULL without modifying *IDXP. */
390 sparse_array_last (const struct sparse_array *spar, unsigned long int *idxp)
392 return scan_reverse (spar, ULONG_MAX, idxp);
395 /* Returns a pointer to the in-use element with the greatest
396 index less than SKIP and sets *IDXP to its index or, if SPAR
397 has no in-use elements with index less than SKIP, returns NULL
398 without modifying *IDXP. */
400 sparse_array_prev (const struct sparse_array *spar, unsigned long int skip,
401 unsigned long int *idxp)
403 return skip > 0 ? scan_reverse (spar, skip - 1, idxp) : NULL;
407 /* Returns true iff KEY is in the range of keys currently
408 represented by SPAR. */
410 index_in_range (const struct sparse_array *spar, unsigned long int key)
412 return (spar->height == 0 ? false
413 : spar->height >= MAX_HEIGHT ? true
414 : key < (1ul << (spar->height * BITS_PER_LEVEL)));
417 /* Returns true iff LEAF contains an in-use element with the
420 is_in_use (const struct leaf_node *leaf, unsigned int key)
423 return (leaf->in_use[key / LONG_BITS] & (1ul << (key % LONG_BITS))) != 0;
426 /* Returns true iff LEAF contains any in-use elements. */
428 any_in_use (const struct leaf_node *leaf)
431 for (i = 0; i < sizeof leaf->in_use / sizeof *leaf->in_use; i++)
437 /* Marks element KEY in LEAF as in-use. */
439 set_in_use (struct leaf_node *leaf, unsigned int key)
442 leaf->in_use[key / LONG_BITS] |= 1ul << (key % LONG_BITS);
445 /* Marks element KEY in LEAF as not in-use. */
447 unset_in_use (struct leaf_node *leaf, unsigned int key)
450 leaf->in_use[key / LONG_BITS] &= ~(1ul << (key % LONG_BITS));
453 /* Returns the number of trailing 0-bits in X.
454 Undefined if X is zero. */
456 count_trailing_zeros (unsigned long int x)
459 return __builtin_ctzl (x);
460 #else /* not GCC 4+ */
461 /* This algorithm is from _Hacker's Delight_ section 5.4. */
464 #define COUNT_STEP(BITS) \
465 if (!(x & ((1ul << (BITS)) - 1))) \
471 #if ULONG_MAX >> 31 >> 31 >> 2
474 #if ULONG_MAX >> 31 >> 1
483 #endif /* not GCC 4+ */
486 /* Returns the least index of the in-use element in LEAF greater
487 than or equal to IDX. Bits in IDX not in LEVEL_MASK are
488 considered to have value 0. */
490 scan_in_use_forward (struct leaf_node *leaf, unsigned int idx)
493 for (; idx < PTRS_PER_LEVEL; idx = (idx & ~LONG_MASK) + LONG_BITS)
495 int ofs = idx % LONG_BITS;
496 unsigned long int in_use = leaf->in_use[idx / LONG_BITS] >> ofs;
499 return idx + count_trailing_zeros (in_use);
504 /* Returns the number of leading 0-bits in X.
505 Undefined if X is zero. */
507 count_leading_zeros (unsigned long int x)
510 return __builtin_clzl (x);
517 #if ULONG_MAX >> 31 >> 1
520 #if ULONG_MAX >> 31 >> 31 >> 2
523 return LONG_BITS - count_one_bits_l (x);
527 /* Returns the greatest index of the in-use element in LEAF less
528 than or equal to IDX. Bits in IDX not in LEVEL_MASK are
529 considered to have value 0. */
531 scan_in_use_reverse (struct leaf_node *leaf, unsigned int idx)
536 int ofs = idx % LONG_BITS;
537 unsigned long int in_use;
539 in_use = leaf->in_use[idx / LONG_BITS] << (LONG_BITS - 1 - ofs);
541 return idx - count_leading_zeros (in_use);
544 idx = (idx | LONG_MASK) - LONG_BITS;
548 /* Returns the address of element with the given KEY in LEAF,
549 which is a node in SPAR. */
551 leaf_element (const struct sparse_array *spar, struct leaf_node *leaf,
555 return (char *) leaf + SIZEOF_ALIGNED (*leaf) + (spar->elem_size * key);
558 /* As leaf_element, returns the address of element with the given
559 KEY in LEAF, which is a node in SPAR. Also, updates SPAR's
560 cache to contain LEAF. */
562 cache_leaf_element (struct sparse_array *spar, struct leaf_node *leaf,
563 unsigned long int key)
566 spar->cache_ofs = key >> BITS_PER_LEVEL;
567 return leaf_element (spar, leaf, key);
570 /* Returns the size of a leaf node in SPAR. */
572 leaf_size (const struct sparse_array *spar)
574 return SIZEOF_ALIGNED (struct leaf_node) + spar->elem_size * PTRS_PER_LEVEL;
577 /* Finds and returns the leaf node in SPAR that contains KEY.
578 Returns null if SPAR does not have a leaf node that contains
580 static struct leaf_node *
581 find_leaf_node (const struct sparse_array *spar_, unsigned long int key)
583 struct sparse_array *spar = CONST_CAST (struct sparse_array *, spar_);
584 const union pointer *p;
587 /* Check the cache first. */
588 if (key >> BITS_PER_LEVEL == spar->cache_ofs)
591 if (!index_in_range (spar, key))
594 /* Descend through internal nodes. */
596 for (level = spar->height - 1; level > 0; level--)
598 if (p->internal == NULL)
600 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
604 spar->cache = p->leaf;
605 spar->cache_ofs = key >> BITS_PER_LEVEL;
610 /* Reduces SPAR's height to the minimum needed value by
611 eliminating levels that contain only a single entry for all
614 decrease_height (struct sparse_array *spar)
616 while (spar->height > 1
617 && spar->root.internal->count == 1
618 && spar->root.internal->down[0].internal)
620 struct internal_node *p = spar->root.internal;
622 spar->root = p->down[0];
623 pool_free (spar->pool, p);
627 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
628 for the in-use element with the least key greater than or
629 equal to START. If such an element is found, returns a
630 pointer to it and stores its key in *FOUND; otherwise, returns
631 a null pointer and does not modify *FOUND. */
633 scan_internal_node_forward (struct sparse_array *spar,
634 struct internal_node *node,
635 int level, unsigned long int start,
636 unsigned long int *found)
638 int shift = level * BITS_PER_LEVEL;
639 int count = node->count;
642 for (i = (start >> shift) & LEVEL_MASK; i < PTRS_PER_LEVEL; i++)
644 union pointer *q = &node->down[i];
645 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
647 void *element = do_scan_forward (spar, q, level - 1, start, found);
654 start &= ~((1ul << shift) - 1);
655 start += 1ul << shift;
660 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
661 for the in-use element with the least key greater than or
662 equal to START. If such an element is found, returns a
663 pointer to it and stores its key in *FOUND; otherwise, returns
664 a null pointer and does not modify *FOUND. */
666 do_scan_forward (struct sparse_array *spar, union pointer *p, int level,
667 unsigned long int start, unsigned long int *found)
671 int idx = scan_in_use_forward (p->leaf, start);
674 unsigned long int key = *found = (start & ~LEVEL_MASK) | idx;
675 return cache_leaf_element (spar, p->leaf, key);
678 return scan_internal_node_forward (spar, p->internal, level, start, found);
682 scan_forward (const struct sparse_array *spar_, unsigned long int start,
683 unsigned long int *found)
685 struct sparse_array *spar = CONST_CAST (struct sparse_array *, spar_);
687 /* Check the cache. */
688 if (start >> BITS_PER_LEVEL == spar->cache_ofs)
690 int idx = scan_in_use_forward (spar->cache, start);
693 *found = (start & ~LEVEL_MASK) | idx;
694 return leaf_element (spar, spar->cache, idx);
696 start = (start & ~LEVEL_MASK) + PTRS_PER_LEVEL;
702 if (!index_in_range (spar, start))
704 return do_scan_forward (spar, &spar->root, spar->height - 1, start, found);
707 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
708 for the in-use element with the greatest key less than or
709 equal to START. If such an element is found, returns a
710 pointer to it and stores its key in *FOUND; otherwise, returns
711 a null pointer and does not modify *FOUND. */
713 scan_internal_node_reverse (struct sparse_array *spar,
714 struct internal_node *node,
715 int level, unsigned long int start,
716 unsigned long int *found)
718 int shift = level * BITS_PER_LEVEL;
719 int count = node->count;
722 for (i = (start >> shift) & LEVEL_MASK; i >= 0; i--)
724 union pointer *q = &node->down[i];
725 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
727 void *element = do_scan_reverse (spar, q, level - 1, start, found);
734 start |= (1ul << shift) - 1;
735 start -= 1ul << shift;
740 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
741 for the in-use element with the greatest key less than or
742 equal to START. If such an element is found, returns a
743 pointer to it and stores its key in *FOUND; otherwise, returns
744 a null pointer and does not modify *FOUND. */
746 do_scan_reverse (struct sparse_array *spar, union pointer *p, int level,
747 unsigned long int start, unsigned long int *found)
751 int idx = scan_in_use_reverse (p->leaf, start);
754 unsigned long int key = *found = (start & ~LEVEL_MASK) | idx;
755 return cache_leaf_element (spar, p->leaf, key);
760 return scan_internal_node_reverse (spar, p->internal, level, start, found);
764 scan_reverse (const struct sparse_array *spar_, unsigned long int start,
765 unsigned long int *found)
767 struct sparse_array *spar = CONST_CAST (struct sparse_array *, spar_);
769 /* Check the cache. */
770 if (start >> BITS_PER_LEVEL == spar->cache_ofs)
772 int idx = scan_in_use_reverse (spar->cache, start);
775 *found = (start & ~LEVEL_MASK) | idx;
776 return leaf_element (spar, spar->cache, idx);
779 if (start < PTRS_PER_LEVEL)
781 start -= PTRS_PER_LEVEL;
785 if (spar->height == 0)
787 else if (spar->height < MAX_HEIGHT)
789 unsigned long int max_key;
790 max_key = (1ul << (spar->height * BITS_PER_LEVEL)) - 1;
791 start = MIN (start, max_key);
794 return do_scan_reverse (spar, &spar->root, spar->height - 1, start, found);