1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <libpspp/sparse-array.h>
24 #include <libpspp/assertion.h>
25 #include <libpspp/misc.h>
26 #include <libpspp/pool.h>
28 /* Sparse array data structure.
30 The sparse array is implemented in terms of a "radix tree", a
31 multiway tree in which a set of bits drawn from the key
32 determine the child chosen at each level during a search. The
33 most-significant bits determine a child of the root, the next
34 bits determine a child of that child, and so on, until the
35 least-significant bits determine a leaf node.
37 In this implementation, the branching factor at each level is
38 held constant at 2**BITS_PER_LEVEL. The tree is only made as
39 tall as need be for the currently largest key, and nodes that
40 would be entirely empty are not allocated at all. The
41 elements are stored in the leaf nodes. */
43 /* Number of bits from the key used as the index at each level. */
44 #define BITS_PER_LEVEL 5
46 /* Branching factor. */
47 #define PTRS_PER_LEVEL (1u << BITS_PER_LEVEL)
50 #define LONG_BITS (sizeof (unsigned long int) * CHAR_BIT)
51 #define MAX_HEIGHT DIV_RND_UP (LONG_BITS, BITS_PER_LEVEL)
53 /* Bit-mask for index. */
54 #define LEVEL_MASK ((1ul << BITS_PER_LEVEL) - 1)
56 /* Pointer to an internal node or a leaf node.
57 Pointers in internal nodes at level 1 point to leaf nodes;
58 other pointers point to internal nodes. */
61 struct internal_node *internal;
62 struct leaf_node *leaf;
68 struct pool *pool; /* Pool used for allocations. */
69 size_t elem_size; /* Element size, rounded for alignment. */
70 unsigned long count; /* Number of elements in tree. */
73 union pointer root; /* Root of tree. */
74 int height; /* 0=empty tree;
75 1=root points to leaf,
76 2=root points to internal node
77 that points to leaves,
80 /* Cache for speeding up access. */
81 unsigned long int cache_ofs; /* Group of keys that cache points to,
82 shifted right BITS_PER_LEVEL bits;
83 ULONG_MAX for empty cache. */
84 struct leaf_node *cache; /* Cached leaf node, or a null
85 pointer for a negative cache. */
88 /* An internal node in the radix tree. */
91 int count; /* Number of nonnul children. */
92 union pointer down[PTRS_PER_LEVEL]; /* Children. */
95 /* A leaf node in the radix tree. */
98 /* Bit-vector of elements that are in use. */
99 unsigned long int in_use[DIV_RND_UP (PTRS_PER_LEVEL, LONG_BITS)];
100 /* element_type elements[PTRS_PER_LEVEL]; */
103 /* Returns SIZE rounded up to a safe alignment. */
104 #define ALIGN_SIZE(SIZE) ROUND_UP (SIZE, sizeof (long int))
106 /* Returns the size of EXPR_OR_TYPE rounded up to a safe
108 #define SIZEOF_ALIGNED(EXPR_OR_TYPE) ALIGN_SIZE (sizeof (EXPR_OR_TYPE))
110 static inline bool index_in_range (const struct sparse_array *,
112 static inline bool is_in_use (const struct leaf_node *, unsigned int);
113 static inline bool any_in_use (const struct leaf_node *);
114 static inline void set_in_use (struct leaf_node *, unsigned int);
115 static inline void unset_in_use (struct leaf_node *, unsigned int);
116 static inline int scan_in_use (struct leaf_node *, unsigned int);
117 static inline void *leaf_element (const struct sparse_array *,
118 struct leaf_node *, unsigned int);
119 static inline size_t leaf_size (const struct sparse_array *);
121 static struct leaf_node *find_leaf_node (const struct sparse_array *,
123 static void decrease_height (struct sparse_array *);
124 static void *scan_leaf (struct sparse_array *, struct leaf_node *,
125 unsigned long int, unsigned long int *);
126 static void *do_scan (struct sparse_array *, union pointer *, int,
127 unsigned long int, unsigned long int *);
129 /* Creates and returns a new sparse array that will contain
130 elements that are ELEM_SIZE bytes in size. */
131 struct sparse_array *
132 sparse_array_create (size_t elem_size)
134 return sparse_array_create_pool (NULL, elem_size);
137 /* Creates and returns a new sparse array that will contain
138 elements that are ELEM_SIZE bytes in size. Data in the sparse
139 array will be allocated from POOL, which may be null. */
140 struct sparse_array *
141 sparse_array_create_pool (struct pool *pool, size_t elem_size)
143 struct sparse_array *spar = pool_malloc (pool, sizeof *spar);
145 spar->elem_size = ALIGN_SIZE (elem_size);
147 spar->root.leaf = NULL;
149 spar->cache_ofs = ULONG_MAX;
153 /* Destroys SPAR node pointed to by P at the given LEVEL. */
155 do_destroy (struct sparse_array *spar, union pointer *p, int level)
159 struct internal_node *node = p->internal;
160 int count = node->count;
165 union pointer *q = &p->internal->down[i];
166 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
168 do_destroy (spar, q, level - 1);
173 pool_free (spar->pool, p->internal);
176 pool_free (spar->pool, p->leaf);
180 Any elements in SPAR are deallocated. Thus, if per-element
181 destruction is necessary, it should be done before destroying
184 sparse_array_destroy (struct sparse_array *spar)
186 do_destroy (spar, &spar->root, spar->height - 1);
187 pool_free (spar->pool, spar);
190 /* Returns the number of elements in SPAR. */
192 sparse_array_count (const struct sparse_array *spar)
197 /* Increases SPAR's height by 1, allowing it to hold
198 PTRS_PER_LEVEL times more elements. */
200 increase_height (struct sparse_array *spar)
202 assert (spar->height < MAX_HEIGHT);
204 if (spar->height == 1)
205 spar->root.leaf = pool_zalloc (spar->pool, leaf_size (spar));
208 struct internal_node *new_root;
209 new_root = pool_zalloc (spar->pool, sizeof *new_root);
211 new_root->down[0] = spar->root;
212 spar->root.internal = new_root;
216 /* Finds the leaf node in SPAR that contains the element for KEY.
217 SPAR must be tall enough to hold KEY.
218 Creates the leaf if it doesn't already exist. */
219 static struct leaf_node *
220 create_leaf_node (struct sparse_array *spar, unsigned long int key)
226 assert (index_in_range (spar, key));
228 /* Short-circuit everything if KEY is in the leaf cache. */
229 if (key >> BITS_PER_LEVEL == spar->cache_ofs && spar->cache != NULL)
232 /* Descend through internal nodes. */
234 for (level = spar->height - 1; level > 0; level--)
236 if (p->internal == NULL)
238 p->internal = pool_zalloc (spar->pool, sizeof *p->internal);
242 count = &p->internal->count;
243 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
246 /* Create leaf if necessary. */
249 p->leaf = pool_zalloc (spar->pool, leaf_size (spar));
254 spar->cache = p->leaf;
255 spar->cache_ofs = key >> BITS_PER_LEVEL;
260 /* Inserts into SPAR an element with the given KEY, which must not
261 already exist in SPAR.
262 Returns the new element for the caller to initialize. */
264 sparse_array_insert (struct sparse_array *spar, unsigned long int key)
266 struct leaf_node *leaf;
268 while (!index_in_range (spar, key))
269 increase_height (spar);
273 leaf = create_leaf_node (spar, key);
274 assert (!is_in_use (leaf, key));
275 set_in_use (leaf, key);
276 return leaf_element (spar, leaf, key);
279 /* Finds and returns the element in SPAR with the given KEY.
280 Returns a null pointer if KEY does not exist in SPAR. */
282 sparse_array_get (const struct sparse_array *spar, unsigned long int key)
284 if (index_in_range (spar, key))
286 struct leaf_node *leaf = find_leaf_node (spar, key);
287 if (leaf != NULL && is_in_use (leaf, key))
288 return leaf_element (spar, leaf, key);
293 /* Removes the element with the given KEY from SPAR.
294 Returns true if an element was removed, false if SPAR hadn't
295 contained an element with the given KEY.
297 If elements need to be destructed, then the caller should have
298 already taken care of it before calling this function; the
299 element's content must be considered freed and of
300 indeterminate value after it is removed. */
302 sparse_array_remove (struct sparse_array *spar, unsigned long int key)
304 union pointer *path[MAX_HEIGHT], **last;
305 struct leaf_node *leaf;
309 if (!index_in_range (spar, key))
312 /* Find and free element in leaf. */
313 leaf = find_leaf_node (spar, key);
314 if (leaf == NULL || !is_in_use (leaf, key))
316 #if ASSERT_LEVEL >= 10
317 memset (leaf_element (spar, leaf, key), 0xcc, spar->elem_size);
319 unset_in_use (leaf, key);
321 if (any_in_use (leaf))
324 /* The leaf node is empty.
325 Retrace the path of internal nodes traversed to the leaf. */
328 for (level = spar->height - 1; level > 0; level--)
331 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
334 /* Free the leaf node and prune it from the tree. */
335 spar->cache_ofs = ULONG_MAX;
336 pool_free (spar->pool, leaf);
339 /* Update counts in the internal nodes above the leaf.
340 Free any internal nodes that become empty. */
344 if (--p->internal->count > 0)
346 if (p == &spar->root)
347 decrease_height (spar);
351 pool_free (spar->pool, p->internal);
358 /* Scans SPAR in increasing order of keys for in-use elements.
359 If SKIP is NULL, the scan starts from key 0;
360 otherwise, it starts just after key *SKIP.
361 If an element is found, returns it and stores the element's
362 key into *FOUND; otherwise, returns a null pointer and does
363 not modify *FOUND. */
365 sparse_array_scan (const struct sparse_array *spar_, unsigned long int *skip,
366 unsigned long int *found)
368 struct sparse_array *spar = (struct sparse_array *) spar_;
369 unsigned long int start;
371 /* Find our starting point. */
381 /* Check the cache. */
382 if (start >> BITS_PER_LEVEL == spar->cache_ofs)
384 void *p = scan_leaf (spar, spar->cache, start, found);
387 start &= ~LEVEL_MASK;
388 start += PTRS_PER_LEVEL;
394 if (!index_in_range (spar, start))
396 return do_scan (spar, &spar->root, spar->height - 1, start, found);
399 /* Returns true iff KEY is in the range of keys currently
400 represented by SPAR. */
402 index_in_range (const struct sparse_array *spar, unsigned long int key)
404 return (spar->height == 0 ? false
405 : spar->height >= MAX_HEIGHT ? true
406 : key < (1ul << (spar->height * BITS_PER_LEVEL)));
409 /* Returns true iff LEAF contains an in-use element with the
412 is_in_use (const struct leaf_node *leaf, unsigned int key)
415 return (leaf->in_use[key / LONG_BITS] & (1ul << (key % LONG_BITS))) != 0;
418 /* Returns true iff LEAF contains any in-use elements. */
420 any_in_use (const struct leaf_node *leaf)
423 for (i = 0; i < sizeof leaf->in_use / sizeof *leaf->in_use; i++)
429 /* Marks element KEY in LEAF as in-use. */
431 set_in_use (struct leaf_node *leaf, unsigned int key)
434 leaf->in_use[key / LONG_BITS] |= 1ul << (key % LONG_BITS);
437 /* Marks element KEY in LEAF as not in-use. */
439 unset_in_use (struct leaf_node *leaf, unsigned int key)
442 leaf->in_use[key / LONG_BITS] &= ~(1ul << (key % LONG_BITS));
445 /* Returns the number of trailing 0-bits in X.
446 Undefined if X is zero. */
448 count_trailing_zeros (unsigned long int x)
450 /* This algorithm is from _Hacker's Delight_ section 5.4. */
453 #define COUNT_STEP(BITS) \
454 if (!(x & ((1ul << (BITS)) - 1))) \
460 #if ULONG_MAX >> 31 >> 31 >> 2
463 #if ULONG_MAX >> 31 >> 1
474 /* Returns the least index of the in-use element in LEAF greater
475 than or equal to IDX. */
477 scan_in_use (struct leaf_node *leaf, unsigned int idx)
479 for (; idx < PTRS_PER_LEVEL; idx = (idx & ~(LONG_BITS - 1)) + LONG_BITS)
481 int ofs = idx % LONG_BITS;
482 unsigned long int in_use = leaf->in_use[idx / LONG_BITS] >> ofs;
485 return count_trailing_zeros (in_use) + idx;
490 /* Returns the address of element with the given KEY in LEAF,
491 which is a node in SPAR. */
493 leaf_element (const struct sparse_array *spar, struct leaf_node *leaf,
497 return (char *) leaf + SIZEOF_ALIGNED (*leaf) + (spar->elem_size * key);
500 /* Returns the size of a leaf node in SPAR. */
502 leaf_size (const struct sparse_array *spar)
504 return SIZEOF_ALIGNED (struct leaf_node) + spar->elem_size * PTRS_PER_LEVEL;
507 /* Finds and returns the leaf node in SPAR that contains KEY.
508 Returns null if SPAR does not have a leaf node that contains
510 static struct leaf_node *
511 find_leaf_node (const struct sparse_array *spar_, unsigned long int key)
513 struct sparse_array *spar = (struct sparse_array *) spar_;
514 const union pointer *p;
517 assert (index_in_range (spar, key));
519 /* Check the cache first. */
520 if (key >> BITS_PER_LEVEL == spar->cache_ofs)
523 /* Descend through internal nodes. */
525 for (level = spar->height - 1; level > 0; level--)
527 if (p->internal == NULL)
529 p = &p->internal->down[(key >> (level * BITS_PER_LEVEL)) & LEVEL_MASK];
533 spar->cache = p->leaf;
534 spar->cache_ofs = key >> BITS_PER_LEVEL;
539 /* Reduces SPAR's height to the minimum needed value by
540 eliminating levels that contain only a single entry for all
543 decrease_height (struct sparse_array *spar)
545 while (spar->height > 1
546 && spar->root.internal->count == 1
547 && spar->root.internal->down[0].internal)
549 struct internal_node *p = spar->root.internal;
551 spar->root = p->down[0];
552 pool_free (spar->pool, p);
556 /* Scans leaf node LEAF, which is in SPAR, for the in-use element
557 with the least key greater than or equal to START. If such an
558 element is found, returns a pointer to it and stores its key
559 in *FOUND; otherwise, returns a null pointer and does not
562 scan_leaf (struct sparse_array *spar, struct leaf_node *leaf,
563 unsigned long int start, unsigned long int *found)
565 int idx = scan_in_use (leaf, start & LEVEL_MASK);
568 *found = (start & ~LEVEL_MASK) | idx;
570 spar->cache_ofs = *found >> BITS_PER_LEVEL;
571 return leaf_element (spar, leaf, idx);
577 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
578 for the in-use element with the least key greater than or
579 equal to START. If such an element is found, returns a
580 pointer to it and stores its key in *FOUND; otherwise, returns
581 a null pointer and does not modify *FOUND. */
583 scan_internal_node (struct sparse_array *spar, struct internal_node *node,
584 int level, unsigned long int start,
585 unsigned long int *found)
587 int shift = level * BITS_PER_LEVEL;
588 int count = node->count;
591 for (i = (start >> shift) & LEVEL_MASK; i < PTRS_PER_LEVEL; i++)
593 union pointer *q = &node->down[i];
594 if (level > 1 ? q->internal != NULL : q->leaf != NULL)
596 void *element = do_scan (spar, q, level - 1, start, found);
603 start &= ~((1ul << shift) - 1);
604 start += 1ul << shift;
609 /* Scans P, which is at LEVEL and within SPAR, and its subnodes,
610 for the in-use element with the least key greater than or
611 equal to START. If such an element is found, returns a
612 pointer to it and stores its key in *FOUND; otherwise, returns
613 a null pointer and does not modify *FOUND. */
615 do_scan (struct sparse_array *spar, union pointer *p, int level,
616 unsigned long int start, unsigned long int *found)
619 ? scan_leaf (spar, p->leaf, start, found)
620 : scan_internal_node (spar, p->internal, level, start, found));