1 /* PSPP - computes sample statistics.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <libpspp/sparse-array.h>
26 #include <libpspp/assertion.h>
27 #include <libpspp/misc.h>
28 #include <libpspp/pool.h>
30 /* Sparse array data structure.
32 The sparse array is implemented in terms of a "radix tree", a
33 multiway tree in which a set of bits drawn from the key
34 determine the child chosen at each level during a search. The
35 most-significant bits determine a child of the root, the next
36 bits determine a child of that child, and so on, until the
37 least-significant bits determine a leaf node.
39 In this implementation, the branching factor at each level is
40 held constant at 2**BITS_PER_LEVEL. The tree is only made as
41 tall as need be for the currently largest key, and nodes that
42 would be entirely empty are not allocated at all. The
43 elements are stored in the leaf nodes. */
45 /* Number of bits from the key used as the index at each level. */
46 #define BITS_PER_LEVEL 5
48 /* Branching factor. */
49 #define PTRS_PER_LEVEL (1u << BITS_PER_LEVEL)
52 #define LONG_BITS (sizeof (unsigned long int) * CHAR_BIT)
53 #define MAX_HEIGHT DIV_RND_UP (LONG_BITS, BITS_PER_LEVEL)
55 /* Bit-mask for index. */
56 #define LEVEL_MASK ((1ul << BITS_PER_LEVEL) - 1)
58 /* Pointer to an internal node or a leaf node.
59 Pointers in internal nodes at level 1 point to leaf nodes;
60 other pointers point to internal nodes. */
63 struct internal_node *internal;
64 struct leaf_node *leaf;
70 struct pool *pool; /* Pool used for allocations. */
71 size_t elem_size; /* Element size, rounded for alignment. */
72 unsigned long count; /* Number of elements in tree. */
75 union pointer root; /* Root of tree. */
76 int height; /* 0=empty tree;
77 1=root points to leaf,
78 2=root points to internal node
79 that points to leaves,
82 /* Cache for speeding up access. */
83 unsigned long int cache_ofs; /* Group of keys that cache points to,
84 shifted right BITS_PER_LEVEL bits;
85 ULONG_MAX for empty cache. */
86 struct leaf_node *cache; /* Cached leaf node, or a null
87 pointer for a negative cache. */
90 /* An internal node in the radix tree. */
93 int count; /* Number of nonnul children. */
94 union pointer down[PTRS_PER_LEVEL]; /* Children. */
97 /* A leaf node in the radix tree. */
100 /* Bit-vector of elements that are in use. */
101 unsigned long int in_use[DIV_RND_UP (PTRS_PER_LEVEL, LONG_BITS)];
102 /* element_type elements[PTRS_PER_LEVEL]; */
105 /* Returns SIZE rounded up to a safe alignment. */
106 #define ALIGN_SIZE(SIZE) ROUND_UP (SIZE, sizeof (long int))
108 /* Returns the size of EXPR_OR_TYPE rounded up to a safe
110 #define SIZEOF_ALIGNED(EXPR_OR_TYPE) ALIGN_SIZE (sizeof (EXPR_OR_TYPE))
112 static inline bool index_in_range (const struct sparse_array *,
114 static inline bool is_in_use (const struct leaf_node *, unsigned int);
115 static inline bool any_in_use (const struct leaf_node *);
116 static inline void set_in_use (struct leaf_node *, unsigned int);
117 static inline void unset_in_use (struct leaf_node *, unsigned int);
118 static inline int scan_in_use (struct leaf_node *, unsigned int);
119 static inline void *leaf_element (const struct sparse_array *,
120 struct leaf_node *, unsigned int);
121 static inline size_t leaf_size (const struct sparse_array *);
123 static struct leaf_node *find_leaf_node (const struct sparse_array *,
125 static void decrease_height (struct sparse_array *);
126 static void *scan_leaf (struct sparse_array *, struct leaf_node *,
127 unsigned long int, unsigned long int *);
128 static void *do_scan (struct sparse_array *, union pointer *, int,
129 unsigned long int, unsigned long int *);
131 /* Creates and returns a new sparse array that will contain
132 elements that are ELEM_SIZE bytes in size. */
133 struct sparse_array *
134 sparse_array_create (size_t elem_size)
136 return sparse_array_create_pool (NULL, elem_size);
139 /* Creates and returns a new sparse array that will contain
140 elements that are ELEM_SIZE bytes in size. Data in the sparse
141 array will be allocated from POOL, which may be null. */
142 struct sparse_array *
143 sparse_array_create_pool (struct pool *pool, size_t elem_size)
145 struct sparse_array *spar = pool_malloc (pool, sizeof *spar);
147 spar->elem_size = ALIGN_SIZE (elem_size);
149 spar->root.leaf = NULL;
151 spar->cache_ofs = ULONG_MAX;
155 /* Destroys SPAR node pointed to by P at the given LEVEL. */
157 do_destroy (struct sparse_array *spar, union pointer *p, int level)
161 struct internal_node *node = p->internal;
162 int count = node->count;
167 union pointer *q = &p->internal->down[i];
168 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
170 do_destroy (spar, q, level - 1);
175 pool_free (spar->pool, p->internal);
178 pool_free (spar->pool, p->leaf);
182 Any elements in SPAR are deallocated. Thus, if per-element
183 destruction is necessary, it should be done before destroying
186 sparse_array_destroy (struct sparse_array *spar)
188 do_destroy (spar, &spar->root, spar->height - 1);
189 pool_free (spar->pool, spar);
192 /* Returns the number of elements in SPAR. */
194 sparse_array_count (const struct sparse_array *spar)
199 /* Increases SPAR's height by 1, allowing it to hold
200 PTRS_PER_LEVEL times more elements. */
202 increase_height (struct sparse_array *spar)
204 assert (spar->height < MAX_HEIGHT);
206 if (spar->height == 1)
207 spar->root.leaf = pool_zalloc (spar->pool, leaf_size (spar));
210 struct internal_node *new_root;
211 new_root = pool_zalloc (spar->pool, sizeof *new_root);
213 new_root->down[0] = spar->root;
214 spar->root.internal = new_root;
218 /* Finds the leaf node in SPAR that contains the element for KEY.
219 SPAR must be tall enough to hold KEY.
220 Creates the leaf if it doesn't already exist. */
221 static struct leaf_node *
222 create_leaf_node (struct sparse_array *spar, unsigned long int key)
228 assert (index_in_range (spar, key));
230 /* Short-circuit everything if KEY is in the leaf cache. */
231 if (key >> BITS_PER_LEVEL == spar->cache_ofs && spar->cache != NULL)
234 /* Descend through internal nodes. */
236 for (level = spar->height - 1; level > 0; level--)
238 if (p->internal == NULL)
240 p->internal = pool_zalloc (spar->pool, sizeof *p->internal);
244 count = &p->internal->count;
245 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
248 /* Create leaf if necessary. */
251 p->leaf = pool_zalloc (spar->pool, leaf_size (spar));
256 spar->cache = p->leaf;
257 spar->cache_ofs = key >> BITS_PER_LEVEL;
262 /* Inserts into SPAR an element with the given KEY, which must not
263 already exist in SPAR.
264 Returns the new element for the caller to initialize. */
266 sparse_array_insert (struct sparse_array *spar, unsigned long int key)
268 struct leaf_node *leaf;
270 while (!index_in_range (spar, key))
271 increase_height (spar);
275 leaf = create_leaf_node (spar, key);
276 assert (!is_in_use (leaf, key));
277 set_in_use (leaf, key);
278 return leaf_element (spar, leaf, key);
281 /* Finds and returns the element in SPAR with the given KEY.
282 Returns a null pointer if KEY does not exist in SPAR. */
284 sparse_array_get (const struct sparse_array *spar, unsigned long int key)
286 if (index_in_range (spar, key))
288 struct leaf_node *leaf = find_leaf_node (spar, key);
289 if (leaf != NULL && is_in_use (leaf, key))
290 return leaf_element (spar, leaf, key);
295 /* Removes the element with the given KEY from SPAR.
296 Returns true if an element was removed, false if SPAR hadn't
297 contained an element with the given KEY.
299 If elements need to be destructed, then the caller should have
300 already taken care of it before calling this function; the
301 element's content must be considered freed and of
302 indeterminate value after it is removed. */
304 sparse_array_remove (struct sparse_array *spar, unsigned long int key)
306 union pointer *path[MAX_HEIGHT], **last;
307 struct leaf_node *leaf;
311 if (!index_in_range (spar, key))
314 /* Find and free element in leaf. */
315 leaf = find_leaf_node (spar, key);
316 if (leaf == NULL || !is_in_use (leaf, key))
318 #if ASSERT_LEVEL >= 10
319 memset (leaf_element (spar, leaf, key), 0xcc, spar->elem_size);
321 unset_in_use (leaf, key);
323 if (any_in_use (leaf))
326 /* The leaf node is empty.
327 Retrace the path of internal nodes traversed to the leaf. */
330 for (level = spar->height - 1; level > 0; level--)
333 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
336 /* Free the leaf node and prune it from the tree. */
337 spar->cache_ofs = ULONG_MAX;
338 pool_free (spar->pool, leaf);
341 /* Update counts in the internal nodes above the leaf.
342 Free any internal nodes that become empty. */
346 if (--p->internal->count > 0)
348 if (p == &spar->root)
349 decrease_height (spar);
353 pool_free (spar->pool, p->internal);
360 /* Scans SPAR in increasing order of keys for in-use elements.
361 If SKIP is NULL, the scan starts from key 0;
362 otherwise, it starts just after key *SKIP.
363 If an element is found, returns it and stores the element's
364 key into *FOUND; otherwise, returns a null pointer and does
365 not modify *FOUND. */
367 sparse_array_scan (const struct sparse_array *spar_, unsigned long int *skip,
368 unsigned long int *found)
370 struct sparse_array *spar = (struct sparse_array *) spar_;
371 unsigned long int start;
373 /* Find our starting point. */
383 /* Check the cache. */
384 if (start >> BITS_PER_LEVEL == spar->cache_ofs)
386 void *p = scan_leaf (spar, spar->cache, start, found);
389 start &= ~LEVEL_MASK;
390 start += PTRS_PER_LEVEL;
396 if (!index_in_range (spar, start))
398 return do_scan (spar, &spar->root, spar->height - 1, start, found);
401 /* Returns true iff KEY is in the range of keys currently
402 represented by SPAR. */
404 index_in_range (const struct sparse_array *spar, unsigned long int key)
406 return (spar->height == 0 ? false
407 : spar->height >= MAX_HEIGHT ? true
408 : key < (1ul << (spar->height * BITS_PER_LEVEL)));
411 /* Returns true iff LEAF contains an in-use element with the
414 is_in_use (const struct leaf_node *leaf, unsigned int key)
417 return (leaf->in_use[key / LONG_BITS] & (1ul << (key % LONG_BITS))) != 0;
420 /* Returns true iff LEAF contains any in-use elements. */
422 any_in_use (const struct leaf_node *leaf)
425 for (i = 0; i < sizeof leaf->in_use / sizeof *leaf->in_use; i++)
431 /* Marks element KEY in LEAF as in-use. */
433 set_in_use (struct leaf_node *leaf, unsigned int key)
436 leaf->in_use[key / LONG_BITS] |= 1ul << (key % LONG_BITS);
439 /* Marks element KEY in LEAF as not in-use. */
441 unset_in_use (struct leaf_node *leaf, unsigned int key)
444 leaf->in_use[key / LONG_BITS] &= ~(1ul << (key % LONG_BITS));
447 /* Returns the number of trailing 0-bits in X.
448 Undefined if X is zero. */
450 count_trailing_zeros (unsigned long int x)
452 /* This algorithm is from _Hacker's Delight_ section 5.4. */
455 #define COUNT_STEP(BITS) \
456 if (!(x & ((1ul << (BITS)) - 1))) \
462 #if ULONG_MAX >> 31 >> 31 >> 2
465 #if ULONG_MAX >> 31 >> 1
476 /* Returns the least index of the in-use element in LEAF greater
477 than or equal to IDX. */
479 scan_in_use (struct leaf_node *leaf, unsigned int idx)
481 for (; idx < PTRS_PER_LEVEL; idx = (idx & ~(LONG_BITS - 1)) + LONG_BITS)
483 int ofs = idx % LONG_BITS;
484 unsigned long int in_use = leaf->in_use[idx / LONG_BITS] >> ofs;
487 return count_trailing_zeros (in_use) + idx;
492 /* Returns the address of element with the given KEY in LEAF,
493 which is a node in SPAR. */
495 leaf_element (const struct sparse_array *spar, struct leaf_node *leaf,
499 return (char *) leaf + SIZEOF_ALIGNED (*leaf) + (spar->elem_size * key);
502 /* Returns the size of a leaf node in SPAR. */
504 leaf_size (const struct sparse_array *spar)
506 return SIZEOF_ALIGNED (struct leaf_node) + spar->elem_size * PTRS_PER_LEVEL;
509 /* Finds and returns the leaf node in SPAR that contains KEY.
510 Returns null if SPAR does not have a leaf node that contains
512 static struct leaf_node *
513 find_leaf_node (const struct sparse_array *spar_, unsigned long int key)
515 struct sparse_array *spar = (struct sparse_array *) spar_;
516 const union pointer *p;
519 assert (index_in_range (spar, key));
521 /* Check the cache first. */
522 if (key >> BITS_PER_LEVEL == spar->cache_ofs)
525 /* Descend through internal nodes. */
527 for (level = spar->height - 1; level > 0; level--)
529 if (p->internal == NULL)
531 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
535 spar->cache = p->leaf;
536 spar->cache_ofs = key >> BITS_PER_LEVEL;
541 /* Reduces SPAR's height to the minimum needed value by
542 eliminating levels that contain only a single entry for all
545 decrease_height (struct sparse_array *spar)
547 while (spar->height > 1
548 && spar->root.internal->count == 1
549 && spar->root.internal->down[0].internal)
551 struct internal_node *p = spar->root.internal;
553 spar->root = p->down[0];
554 pool_free (spar->pool, p);
558 /* Scans leaf node LEAF, which is in SPAR, for the in-use element
559 with the least key greater than or equal to START. If such an
560 element is found, returns a pointer to it and stores its key
561 in *FOUND; otherwise, returns a null pointer and does not
564 scan_leaf (struct sparse_array *spar, struct leaf_node *leaf,
565 unsigned long int start, unsigned long int *found)
567 int idx = scan_in_use (leaf, start & LEVEL_MASK);
570 *found = (start & ~LEVEL_MASK) | idx;
572 spar->cache_ofs = *found >> BITS_PER_LEVEL;
573 return leaf_element (spar, leaf, idx);
579 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
580 for the in-use element with the least key greater than or
581 equal to START. If such an element is found, returns a
582 pointer to it and stores its key in *FOUND; otherwise, returns
583 a null pointer and does not modify *FOUND. */
585 scan_internal_node (struct sparse_array *spar, struct internal_node *node,
586 int level, unsigned long int start,
587 unsigned long int *found)
589 int shift = level * BITS_PER_LEVEL;
590 int count = node->count;
593 for (i = (start >> shift) & LEVEL_MASK; i < PTRS_PER_LEVEL; i++)
595 union pointer *q = &node->down[i];
596 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
598 void *element = do_scan (spar, q, level - 1, start, found);
605 start &= ~((1ul << shift) - 1);
606 start += 1ul << shift;
611 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
612 for the in-use element with the least key greater than or
613 equal to START. If such an element is found, returns a
614 pointer to it and stores its key in *FOUND; otherwise, returns
615 a null pointer and does not modify *FOUND. */
617 do_scan (struct sparse_array *spar, union pointer *p, int level,
618 unsigned long int start, unsigned long int *found)
621 ? scan_leaf (spar, p->leaf, start, found)
622 : scan_internal_node (spar, p->internal, level, start, found));