1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <libpspp/sparse-array.h>
24 #include <libpspp/assertion.h>
25 #include <libpspp/misc.h>
26 #include <libpspp/pool.h>
28 #include "count-one-bits.h"
31 /* Sparse array data structure.
33 The sparse array is implemented in terms of a "radix tree", a
34 multiway tree in which a set of bits drawn from the key
35 determine the child chosen at each level during a search. The
36 most-significant bits determine a child of the root, the next
37 bits determine a child of that child, and so on, until the
38 least-significant bits determine a leaf node.
40 In this implementation, the branching factor at each level is
41 held constant at 2**BITS_PER_LEVEL. The tree is only made as
42 tall as need be for the currently largest key, and nodes that
43 would be entirely empty are not allocated at all. The
44 elements are stored in the leaf nodes. */
46 /* Number of bits from the key used as the index at each level. */
47 #define BITS_PER_LEVEL 5
49 /* Branching factor. */
50 #define PTRS_PER_LEVEL (1u << BITS_PER_LEVEL)
53 #define LONG_BITS (sizeof (unsigned long int) * CHAR_BIT)
54 #define LONG_MASK ((unsigned long int) LONG_BITS - 1)
55 #define MAX_HEIGHT DIV_RND_UP (LONG_BITS, BITS_PER_LEVEL)
57 /* Bit-mask for index. */
58 #define LEVEL_MASK ((1ul << BITS_PER_LEVEL) - 1)
60 /* Pointer to an internal node or a leaf node.
61 Pointers in internal nodes at level 1 point to leaf nodes;
62 other pointers point to internal nodes. */
65 struct internal_node *internal;
66 struct leaf_node *leaf;
72 struct pool *pool; /* Pool used for allocations. */
73 size_t elem_size; /* Element size, rounded for alignment. */
74 unsigned long count; /* Number of elements in tree. */
77 union pointer root; /* Root of tree. */
78 int height; /* 0=empty tree;
79 1=root points to leaf,
80 2=root points to internal node
81 that points to leaves,
84 /* Cache for speeding up access. */
85 unsigned long int cache_ofs; /* Group of keys that cache points to,
86 shifted right BITS_PER_LEVEL bits;
87 ULONG_MAX for empty cache. */
88 struct leaf_node *cache; /* Cached leaf node, or a null
89 pointer for a negative cache. */
92 /* An internal node in the radix tree. */
95 int count; /* Number of nonnull children. */
96 union pointer down[PTRS_PER_LEVEL]; /* Children. */
99 /* A leaf node in the radix tree. */
102 /* Bit-vector of elements that are in use. */
103 unsigned long int in_use[DIV_RND_UP (PTRS_PER_LEVEL, LONG_BITS)];
104 /* element_type elements[PTRS_PER_LEVEL]; */
107 /* Returns SIZE rounded up to a safe alignment. */
108 #define ALIGN_SIZE(SIZE) ROUND_UP (SIZE, sizeof (long int))
110 /* Returns the size of EXPR_OR_TYPE rounded up to a safe
112 #define SIZEOF_ALIGNED(EXPR_OR_TYPE) ALIGN_SIZE (sizeof (EXPR_OR_TYPE))
114 static inline bool index_in_range (const struct sparse_array *,
116 static inline bool is_in_use (const struct leaf_node *, unsigned int);
117 static inline bool any_in_use (const struct leaf_node *);
118 static inline void set_in_use (struct leaf_node *, unsigned int);
119 static inline void unset_in_use (struct leaf_node *, unsigned int);
120 static inline void *leaf_element (const struct sparse_array *,
121 struct leaf_node *, unsigned int);
122 static inline size_t leaf_size (const struct sparse_array *);
124 static struct leaf_node *find_leaf_node (const struct sparse_array *,
126 static void decrease_height (struct sparse_array *);
128 static int scan_in_use_forward (struct leaf_node *, unsigned int);
129 static void *do_scan_forward (struct sparse_array *, union pointer *, int,
130 unsigned long int, unsigned long int *);
131 static void *scan_forward (const struct sparse_array *,
132 unsigned long int start,
133 unsigned long int *found);
135 static int scan_in_use_reverse (struct leaf_node *, unsigned int);
136 static void *do_scan_reverse (struct sparse_array *, union pointer *, int,
137 unsigned long int, unsigned long int *);
138 static void *scan_reverse (const struct sparse_array *,
139 unsigned long int start,
140 unsigned long int *found);
142 /* Creates and returns a new sparse array that will contain
143 elements that are ELEM_SIZE bytes in size. */
144 struct sparse_array *
145 sparse_array_create (size_t elem_size)
147 return sparse_array_create_pool (NULL, elem_size);
150 /* Creates and returns a new sparse array that will contain
151 elements that are ELEM_SIZE bytes in size. Data in the sparse
152 array will be allocated from POOL, which may be null. */
153 struct sparse_array *
154 sparse_array_create_pool (struct pool *pool, size_t elem_size)
156 struct sparse_array *spar = pool_malloc (pool, sizeof *spar);
158 spar->elem_size = ALIGN_SIZE (elem_size);
160 spar->root.leaf = NULL;
162 spar->cache_ofs = ULONG_MAX;
166 /* Destroys SPAR node pointed to by P at the given LEVEL. */
168 do_destroy (struct sparse_array *spar, union pointer *p, int level)
172 struct internal_node *node = p->internal;
173 int count = node->count;
178 union pointer *q = &p->internal->down[i];
179 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
181 do_destroy (spar, q, level - 1);
186 pool_free (spar->pool, p->internal);
189 pool_free (spar->pool, p->leaf);
193 Any elements in SPAR are deallocated. Thus, if per-element
194 destruction is necessary, it should be done before destroying
197 sparse_array_destroy (struct sparse_array *spar)
199 do_destroy (spar, &spar->root, spar->height - 1);
200 pool_free (spar->pool, spar);
203 /* Returns the number of elements in SPAR. */
205 sparse_array_count (const struct sparse_array *spar)
210 /* Increases SPAR's height by 1, allowing it to hold
211 PTRS_PER_LEVEL times more elements. */
213 increase_height (struct sparse_array *spar)
215 assert (spar->height < MAX_HEIGHT);
217 if (spar->height == 1)
218 spar->root.leaf = pool_zalloc (spar->pool, leaf_size (spar));
221 struct internal_node *new_root;
222 new_root = pool_zalloc (spar->pool, sizeof *new_root);
224 new_root->down[0] = spar->root;
225 spar->root.internal = new_root;
229 /* Finds the leaf node in SPAR that contains the element for KEY.
230 SPAR must be tall enough to hold KEY.
231 Creates the leaf if it doesn't already exist. */
232 static struct leaf_node *
233 create_leaf_node (struct sparse_array *spar, unsigned long int key)
239 assert (index_in_range (spar, key));
241 /* Short-circuit everything if KEY is in the leaf cache. */
242 if (key >> BITS_PER_LEVEL == spar->cache_ofs && spar->cache != NULL)
245 /* Descend through internal nodes. */
247 for (level = spar->height - 1; level > 0; level--)
249 if (p->internal == NULL)
251 p->internal = pool_zalloc (spar->pool, sizeof *p->internal);
255 count = &p->internal->count;
256 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
259 /* Create leaf if necessary. */
262 p->leaf = pool_zalloc (spar->pool, leaf_size (spar));
267 spar->cache = p->leaf;
268 spar->cache_ofs = key >> BITS_PER_LEVEL;
273 /* Inserts into SPAR an element with the given KEY, which must not
274 already exist in SPAR.
275 Returns the new element for the caller to initialize. */
277 sparse_array_insert (struct sparse_array *spar, unsigned long int key)
279 struct leaf_node *leaf;
281 while (!index_in_range (spar, key))
282 increase_height (spar);
286 leaf = create_leaf_node (spar, key);
287 assert (!is_in_use (leaf, key));
288 set_in_use (leaf, key);
289 return leaf_element (spar, leaf, key);
292 /* Finds and returns the element in SPAR with the given KEY.
293 Returns a null pointer if KEY does not exist in SPAR. */
295 sparse_array_get (const struct sparse_array *spar, unsigned long int key)
297 struct leaf_node *leaf = find_leaf_node (spar, key);
298 if (leaf != NULL && is_in_use (leaf, key))
299 return leaf_element (spar, leaf, key);
303 /* Removes the element with the given KEY from SPAR.
304 Returns true if an element was removed, false if SPAR hadn't
305 contained an element with the given KEY.
307 If elements need to be destructed, then the caller should have
308 already taken care of it before calling this function; the
309 element's content must be considered freed and of
310 indeterminate value after it is removed. */
312 sparse_array_remove (struct sparse_array *spar, unsigned long int key)
314 union pointer *path[MAX_HEIGHT], **last;
315 struct leaf_node *leaf;
319 /* Find and free element in leaf. */
320 leaf = find_leaf_node (spar, key);
321 if (leaf == NULL || !is_in_use (leaf, key))
323 #if ASSERT_LEVEL >= 10
324 memset (leaf_element (spar, leaf, key), 0xcc, spar->elem_size);
326 unset_in_use (leaf, key);
328 if (any_in_use (leaf))
331 /* The leaf node is empty.
332 Retrace the path of internal nodes traversed to the leaf. */
335 for (level = spar->height - 1; level > 0; level--)
338 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
341 /* Free the leaf node and prune it from the tree. */
342 spar->cache_ofs = ULONG_MAX;
343 pool_free (spar->pool, leaf);
346 /* Update counts in the internal nodes above the leaf.
347 Free any internal nodes that become empty. */
351 if (--p->internal->count > 0)
353 if (p == &spar->root)
354 decrease_height (spar);
358 pool_free (spar->pool, p->internal);
365 /* Returns a pointer to the in-use element with the smallest
366 index and sets *IDXP to its index or, if SPAR has no in-use
367 elements, returns NULL without modifying *IDXP. */
369 sparse_array_first (const struct sparse_array *spar, unsigned long int *idxp)
371 return scan_forward (spar, 0, idxp);
374 /* Returns a pointer to the in-use element with the smallest
375 index greater than SKIP and sets *IDXP to its index or, if
376 SPAR has no in-use elements with index greater than SKIP,
377 returns NULL without modifying *IDXP. */
379 sparse_array_next (const struct sparse_array *spar, unsigned long int skip,
380 unsigned long int *idxp)
382 return skip < ULONG_MAX ? scan_forward (spar, skip + 1, idxp) : NULL;
385 /* Returns a pointer to the in-use element with the greatest
386 index and sets *IDXP to its index or, if SPAR has no in-use
387 elements, returns NULL without modifying *IDXP. */
389 sparse_array_last (const struct sparse_array *spar, unsigned long int *idxp)
391 return scan_reverse (spar, ULONG_MAX, idxp);
394 /* Returns a pointer to the in-use element with the greatest
395 index less than SKIP and sets *IDXP to its index or, if SPAR
396 has no in-use elements with index less than SKIP, returns NULL
397 without modifying *IDXP. */
399 sparse_array_prev (const struct sparse_array *spar, unsigned long int skip,
400 unsigned long int *idxp)
402 return skip > 0 ? scan_reverse (spar, skip - 1, idxp) : NULL;
406 /* Returns true iff KEY is in the range of keys currently
407 represented by SPAR. */
409 index_in_range (const struct sparse_array *spar, unsigned long int key)
411 return (spar->height == 0 ? false
412 : spar->height >= MAX_HEIGHT ? true
413 : key < (1ul << (spar->height * BITS_PER_LEVEL)));
416 /* Returns true iff LEAF contains an in-use element with the
419 is_in_use (const struct leaf_node *leaf, unsigned int key)
422 return (leaf->in_use[key / LONG_BITS] & (1ul << (key % LONG_BITS))) != 0;
425 /* Returns true iff LEAF contains any in-use elements. */
427 any_in_use (const struct leaf_node *leaf)
430 for (i = 0; i < sizeof leaf->in_use / sizeof *leaf->in_use; i++)
436 /* Marks element KEY in LEAF as in-use. */
438 set_in_use (struct leaf_node *leaf, unsigned int key)
441 leaf->in_use[key / LONG_BITS] |= 1ul << (key % LONG_BITS);
444 /* Marks element KEY in LEAF as not in-use. */
446 unset_in_use (struct leaf_node *leaf, unsigned int key)
449 leaf->in_use[key / LONG_BITS] &= ~(1ul << (key % LONG_BITS));
452 /* Returns the number of trailing 0-bits in X.
453 Undefined if X is zero. */
455 count_trailing_zeros (unsigned long int x)
458 return __builtin_ctzl (x);
459 #else /* not GCC 4+ */
460 /* This algorithm is from _Hacker's Delight_ section 5.4. */
463 #define COUNT_STEP(BITS) \
464 if (!(x & ((1ul << (BITS)) - 1))) \
470 #if ULONG_MAX >> 31 >> 31 >> 2
473 #if ULONG_MAX >> 31 >> 1
482 #endif /* not GCC 4+ */
485 /* Returns the least index of the in-use element in LEAF greater
486 than or equal to IDX. Bits in IDX not in LEVEL_MASK are
487 considered to have value 0. */
489 scan_in_use_forward (struct leaf_node *leaf, unsigned int idx)
492 for (; idx < PTRS_PER_LEVEL; idx = (idx & ~LONG_MASK) + LONG_BITS)
494 int ofs = idx % LONG_BITS;
495 unsigned long int in_use = leaf->in_use[idx / LONG_BITS] >> ofs;
498 return idx + count_trailing_zeros (in_use);
503 /* Returns the number of leading 0-bits in X.
504 Undefined if X is zero. */
506 count_leading_zeros (unsigned long int x)
509 return __builtin_clzl (x);
516 #if ULONG_MAX >> 31 >> 1
519 #if ULONG_MAX >> 31 >> 31 >> 2
522 return LONG_BITS - count_one_bits_l (x);
526 /* Returns the greatest index of the in-use element in LEAF less
527 than or equal to IDX. Bits in IDX not in LEVEL_MASK are
528 considered to have value 0. */
530 scan_in_use_reverse (struct leaf_node *leaf, unsigned int idx)
535 int ofs = idx % LONG_BITS;
536 unsigned long int in_use = leaf->in_use[idx / LONG_BITS] << (31 - ofs);
538 return idx - count_leading_zeros (in_use);
541 idx = (idx | LONG_MASK) - LONG_BITS;
545 /* Returns the address of element with the given KEY in LEAF,
546 which is a node in SPAR. */
548 leaf_element (const struct sparse_array *spar, struct leaf_node *leaf,
552 return (char *) leaf + SIZEOF_ALIGNED (*leaf) + (spar->elem_size * key);
555 /* As leaf_element, returns the address of element with the given
556 KEY in LEAF, which is a node in SPAR. Also, updates SPAR's
557 cache to contain LEAF. */
559 cache_leaf_element (struct sparse_array *spar, struct leaf_node *leaf,
560 unsigned long int key)
563 spar->cache_ofs = key >> BITS_PER_LEVEL;
564 return leaf_element (spar, leaf, key);
567 /* Returns the size of a leaf node in SPAR. */
569 leaf_size (const struct sparse_array *spar)
571 return SIZEOF_ALIGNED (struct leaf_node) + spar->elem_size * PTRS_PER_LEVEL;
574 /* Finds and returns the leaf node in SPAR that contains KEY.
575 Returns null if SPAR does not have a leaf node that contains
577 static struct leaf_node *
578 find_leaf_node (const struct sparse_array *spar_, unsigned long int key)
580 struct sparse_array *spar = (struct sparse_array *) spar_;
581 const union pointer *p;
584 /* Check the cache first. */
585 if (key >> BITS_PER_LEVEL == spar->cache_ofs)
588 if (!index_in_range (spar, key))
591 /* Descend through internal nodes. */
593 for (level = spar->height - 1; level > 0; level--)
595 if (p->internal == NULL)
597 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
601 spar->cache = p->leaf;
602 spar->cache_ofs = key >> BITS_PER_LEVEL;
607 /* Reduces SPAR's height to the minimum needed value by
608 eliminating levels that contain only a single entry for all
611 decrease_height (struct sparse_array *spar)
613 while (spar->height > 1
614 && spar->root.internal->count == 1
615 && spar->root.internal->down[0].internal)
617 struct internal_node *p = spar->root.internal;
619 spar->root = p->down[0];
620 pool_free (spar->pool, p);
624 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
625 for the in-use element with the least key greater than or
626 equal to START. If such an element is found, returns a
627 pointer to it and stores its key in *FOUND; otherwise, returns
628 a null pointer and does not modify *FOUND. */
630 scan_internal_node_forward (struct sparse_array *spar,
631 struct internal_node *node,
632 int level, unsigned long int start,
633 unsigned long int *found)
635 int shift = level * BITS_PER_LEVEL;
636 int count = node->count;
639 for (i = (start >> shift) & LEVEL_MASK; i < PTRS_PER_LEVEL; i++)
641 union pointer *q = &node->down[i];
642 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
644 void *element = do_scan_forward (spar, q, level - 1, start, found);
651 start &= ~((1ul << shift) - 1);
652 start += 1ul << shift;
657 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
658 for the in-use element with the least key greater than or
659 equal to START. If such an element is found, returns a
660 pointer to it and stores its key in *FOUND; otherwise, returns
661 a null pointer and does not modify *FOUND. */
663 do_scan_forward (struct sparse_array *spar, union pointer *p, int level,
664 unsigned long int start, unsigned long int *found)
668 int idx = scan_in_use_forward (p->leaf, start);
671 unsigned long int key = *found = (start & ~LEVEL_MASK) | idx;
672 return cache_leaf_element (spar, p->leaf, key);
675 return scan_internal_node_forward (spar, p->internal, level, start, found);
679 scan_forward (const struct sparse_array *spar_, unsigned long int start,
680 unsigned long int *found)
682 struct sparse_array *spar = (struct sparse_array *) spar_;
684 /* Check the cache. */
685 if (start >> BITS_PER_LEVEL == spar->cache_ofs)
687 int idx = scan_in_use_forward (spar->cache, start);
690 *found = (start & ~LEVEL_MASK) | idx;
691 return leaf_element (spar, spar->cache, idx);
693 start = (start & ~LEVEL_MASK) + PTRS_PER_LEVEL;
699 if (!index_in_range (spar, start))
701 return do_scan_forward (spar, &spar->root, spar->height - 1, start, found);
704 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
705 for the in-use element with the greatest key less than or
706 equal to START. If such an element is found, returns a
707 pointer to it and stores its key in *FOUND; otherwise, returns
708 a null pointer and does not modify *FOUND. */
710 scan_internal_node_reverse (struct sparse_array *spar,
711 struct internal_node *node,
712 int level, unsigned long int start,
713 unsigned long int *found)
715 int shift = level * BITS_PER_LEVEL;
716 int count = node->count;
719 for (i = (start >> shift) & LEVEL_MASK; i >= 0; i--)
721 union pointer *q = &node->down[i];
722 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
724 void *element = do_scan_reverse (spar, q, level - 1, start, found);
731 start |= (1ul << shift) - 1;
732 start -= 1ul << shift;
737 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
738 for the in-use element with the greatest key less than or
739 equal to START. If such an element is found, returns a
740 pointer to it and stores its key in *FOUND; otherwise, returns
741 a null pointer and does not modify *FOUND. */
743 do_scan_reverse (struct sparse_array *spar, union pointer *p, int level,
744 unsigned long int start, unsigned long int *found)
748 int idx = scan_in_use_reverse (p->leaf, start);
751 unsigned long int key = *found = (start & ~LEVEL_MASK) | idx;
752 return cache_leaf_element (spar, p->leaf, key);
757 return scan_internal_node_reverse (spar, p->internal, level, start, found);
761 scan_reverse (const struct sparse_array *spar_, unsigned long int start,
762 unsigned long int *found)
764 struct sparse_array *spar = (struct sparse_array *) spar_;
766 /* Check the cache. */
767 if (start >> BITS_PER_LEVEL == spar->cache_ofs)
769 int idx = scan_in_use_reverse (spar->cache, start);
772 *found = (start & ~LEVEL_MASK) | idx;
773 return leaf_element (spar, spar->cache, idx);
776 if (start < PTRS_PER_LEVEL)
778 start -= PTRS_PER_LEVEL;
782 if (spar->height == 0)
784 else if (spar->height < MAX_HEIGHT)
786 unsigned long int max_key;
787 max_key = (1ul << (spar->height * BITS_PER_LEVEL)) - 1;
788 start = MIN (start, max_key);
791 return do_scan_reverse (spar, &spar->root, spar->height - 1, start, found);