Thanks to John Darrington for review.
+2007-03-31 Ben Pfaff <blp@gnu.org>
+
+ Patch #5827.
+
+ * automake.mk (src_libpspp_libpspp_a_SOURCES): Add bt.c.
+
+ * bt.h: New file.
+
+ * bt.c: New file.
+
2007-03-30 Ben Pfaff <blp@gnu.org>
Patch #5829.
src/libpspp/alloc.c \
src/libpspp/alloc.h \
src/libpspp/bit-vector.h \
+ src/libpspp/bt.c \
+ src/libpspp/bt.h \
src/libpspp/legacy-encoding.c \
src/libpspp/legacy-encoding.h \
src/libpspp/copyleft.c \
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Balanced tree (BT) data structure.
+
+ The client should not need to be aware of the form of
+ balancing applied to the balanced tree, as its operation is
+ fully encapsulated. The current implementation is a scapegoat
+ tree. Scapegoat trees have the advantage over many other
+ forms of balanced trees that they do not store any additional
+ information in each node; thus, they are slightly more
+ space-efficient than, say, red-black or AVL trees. Compared
+ to splay trees, scapegoat trees provide guaranteed logarithmic
+ worst-case search time and do not restructure the tree during
+ a search.
+
+ For information on scapegoat trees, see Galperin and Rivest,
+ "Scapegoat Trees", Proc. 4th ACM-SIAM Symposium on Discrete
+ Algorithms, or <http://en.wikipedia.org/wiki/Scapegoat_tree>,
+ which includes source code and links to other resources, such
+ as the Galperin and Rivest paper.
+
+ One potentially tricky part of scapegoat tree design is the
+ choice of alpha, which is a real constant that must be greater
+ than 0.5 and less than 1. We must be able to efficiently
+ calculate h_alpha = floor(log(n)/log(1/alpha)) for integer n >
+ 0. One way to do so would be to maintain a table relating
+ h_alpha to the minimum value of n that yields that h_alpha.
+ Then, we can track the value of h_alpha(n) in the number of
+ nodes in the tree n as nodes are inserted and deleted.
+
+ This implementation uses a different approach. We choose
+ alpha = sqrt(2)/2 = 1/sqrt(2) ~= .707. Then, computing
+ h_alpha is a matter of computing a logarithm in base sqrt(2).
+ This is easy: we simply compute twice the base-2 logarithm,
+ then adjust upward by 1 if necessary. See calculate_h_alpha
+ for details. */
+
+/* These library routines have no external dependencies other
+ than the standard C library.
+
+ If you add routines in this file, please add a corresponding
+ test to bt-test.c. This test program should achieve 100%
+ coverage of lines and branches in this code, as reported by
+ "gcov -b". */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <libpspp/bt.h>
+
+#include <stdbool.h>
+#include <stdint.h>
+
+static void rebalance_subtree (struct bt *, struct bt_node *, size_t);
+
+static struct bt_node **down_link (struct bt *, struct bt_node *);
+static inline struct bt_node *sibling (struct bt_node *p);
+static size_t count_nodes_in_subtree (const struct bt_node *);
+
+static inline int floor_log2 (size_t);
+static inline int calculate_h_alpha (size_t);
+
+/* Initializes BT as an empty BT that uses the given COMPARE
+ function, passing in AUX as auxiliary data. */
+void
+bt_init (struct bt *bt,
+ bt_compare_func *compare,
+ const void *aux)
+{
+ bt->root = NULL;
+ bt->compare = compare;
+ bt->aux = aux;
+ bt->size = 0;
+ bt->max_size = 0;
+}
+
+/* Inserts the given NODE into BT.
+ Returns a null pointer if successful.
+ Returns the existing node already in BT equal to NODE, on
+ failure. */
+struct bt_node *
+bt_insert (struct bt *bt, struct bt_node *node)
+{
+ int depth = 0;
+
+ node->down[0] = NULL;
+ node->down[1] = NULL;
+
+ if (bt->root == NULL)
+ {
+ bt->root = node;
+ node->up = NULL;
+ }
+ else
+ {
+ struct bt_node *p = bt->root;
+ for (;;)
+ {
+ int cmp, dir;
+
+ cmp = bt->compare (node, p, bt->aux);
+ if (cmp == 0)
+ return p;
+ depth++;
+
+ dir = cmp > 0;
+ if (p->down[dir] == NULL)
+ {
+ p->down[dir] = node;
+ node->up = p;
+ break;
+ }
+ p = p->down[dir];
+ }
+ }
+
+ bt->size++;
+ if (bt->size > bt->max_size)
+ bt->max_size = bt->size;
+
+ if (depth > calculate_h_alpha (bt->size))
+ {
+ /* We use the "alternative" method of finding a scapegoat
+ node described by Galperin and Rivest. */
+ struct bt_node *s = node;
+ size_t size = 1;
+ int i;
+
+ for (i = 1; ; i++)
+ if (i < depth)
+ {
+ size += 1 + count_nodes_in_subtree (sibling (s));
+ s = s->up;
+ if (i > calculate_h_alpha (size))
+ {
+ rebalance_subtree (bt, s, size);
+ break;
+ }
+ }
+ else
+ {
+ rebalance_subtree (bt, bt->root, bt->size);
+ bt->max_size = bt->size;
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+/* Deletes P from BT. */
+void
+bt_delete (struct bt *bt, struct bt_node *p)
+{
+ struct bt_node **q = down_link (bt, p);
+ struct bt_node *r = p->down[1];
+ if (r == NULL)
+ {
+ *q = p->down[0];
+ if (*q)
+ (*q)->up = p->up;
+ }
+ else if (r->down[0] == NULL)
+ {
+ r->down[0] = p->down[0];
+ *q = r;
+ r->up = p->up;
+ if (r->down[0] != NULL)
+ r->down[0]->up = r;
+ }
+ else
+ {
+ struct bt_node *s = r->down[0];
+ while (s->down[0] != NULL)
+ s = s->down[0];
+ r = s->up;
+ r->down[0] = s->down[1];
+ s->down[0] = p->down[0];
+ s->down[1] = p->down[1];
+ *q = s;
+ if (s->down[0] != NULL)
+ s->down[0]->up = s;
+ s->down[1]->up = s;
+ s->up = p->up;
+ if (r->down[0] != NULL)
+ r->down[0]->up = r;
+ }
+ bt->size--;
+
+ /* We approximate .707 as .75 here. This is conservative: it
+ will cause us to do a little more rebalancing than strictly
+ necessary to maintain the scapegoat tree's height
+ invariant. */
+ if (bt->size < bt->max_size * 3 / 4 && bt->size > 0)
+ {
+ rebalance_subtree (bt, bt->root, bt->size);
+ bt->max_size = bt->size;
+ }
+}
+
+/* Returns the node with minimum value in BT, or a null pointer
+ if BT is empty. */
+struct bt_node *
+bt_first (const struct bt *bt)
+{
+ struct bt_node *p = bt->root;
+ if (p != NULL)
+ while (p->down[0] != NULL)
+ p = p->down[0];
+ return p;
+}
+
+/* Returns the node with maximum value in BT, or a null pointer
+ if BT is empty. */
+struct bt_node *
+bt_last (const struct bt *bt)
+{
+ struct bt_node *p = bt->root;
+ if (p != NULL)
+ while (p->down[1] != NULL)
+ p = p->down[1];
+ return p;
+}
+
+/* Searches BT for a node equal to TARGET.
+ Returns the node if found, or a null pointer otherwise. */
+struct bt_node *
+bt_find (const struct bt *bt, const struct bt_node *target)
+{
+ const struct bt_node *p;
+ int cmp;
+
+ for (p = bt->root; p != NULL; p = p->down[cmp > 0])
+ {
+ cmp = bt->compare (target, p, bt->aux);
+ if (cmp == 0)
+ return (struct bt_node *) p;
+ }
+
+ return NULL;
+}
+
+/* Searches BT for, and returns, the first node in in-order whose
+ value is greater than or equal to TARGET. Returns a null
+ pointer if all nodes in BT are less than TARGET.
+
+ Another way to look at the return value is that it is the node
+ that would be returned by "bt_next (BT, TARGET)" if TARGET
+ were inserted in BT (assuming that TARGET would not be a
+ duplicate). */
+struct bt_node *
+bt_find_ge (const struct bt *bt, const struct bt_node *target)
+{
+ const struct bt_node *p = bt->root;
+ const struct bt_node *q = NULL;
+ while (p != NULL)
+ {
+ int cmp = bt->compare (target, p, bt->aux);
+ if (cmp > 0)
+ p = p->down[1];
+ else
+ {
+ q = p;
+ if (cmp < 0)
+ p = p->down[0];
+ else
+ break;
+ }
+ }
+ return (struct bt_node *) q;
+}
+
+/* Searches BT for, and returns, the last node in in-order whose
+ value is less than or equal to TARGET, which should not be in
+ BT. Returns a null pointer if all nodes in BT are greater
+ than TARGET.
+
+ Another way to look at the return value is that it is the node
+ that would be returned by "bt_prev (BT, TARGET)" if TARGET
+ were inserted in BT (assuming that TARGET would not be a
+ duplicate). */
+struct bt_node *
+bt_find_le (const struct bt *bt, const struct bt_node *target)
+{
+ const struct bt_node *p = bt->root;
+ const struct bt_node *q = NULL;
+ while (p != NULL)
+ {
+ int cmp = bt->compare (target, p, bt->aux);
+ if (cmp < 0)
+ p = p->down[0];
+ else
+ {
+ q = p;
+ if (cmp > 0)
+ p = p->down[1];
+ else
+ break;
+ }
+ }
+ return (struct bt_node *) q;
+}
+
+/* Returns the node in BT following P in in-order.
+ If P is null, returns the minimum node in BT.
+ Returns a null pointer if P is the maximum node in BT or if P
+ is null and BT is empty. */
+struct bt_node *
+bt_next (const struct bt *bt, const struct bt_node *p)
+{
+ if (p == NULL)
+ return bt_first (bt);
+ else if (p->down[1] == NULL)
+ {
+ struct bt_node *q;
+ for (q = p->up; ; p = q, q = q->up)
+ if (q == NULL || p == q->down[0])
+ return q;
+ }
+ else
+ {
+ p = p->down[1];
+ while (p->down[0] != NULL)
+ p = p->down[0];
+ return (struct bt_node *) p;
+ }
+}
+
+/* Returns the node in BT preceding P in in-order.
+ If P is null, returns the maximum node in BT.
+ Returns a null pointer if P is the minimum node in BT or if P
+ is null and BT is empty. */
+struct bt_node *
+bt_prev (const struct bt *bt, const struct bt_node *p)
+{
+ if (p == NULL)
+ return bt_last (bt);
+ else if (p->down[0] == NULL)
+ {
+ struct bt_node *q;
+ for (q = p->up; ; p = q, q = q->up)
+ if (q == NULL || p == q->down[1])
+ return q;
+ }
+ else
+ {
+ p = p->down[0];
+ while (p->down[1] != NULL)
+ p = p->down[1];
+ return (struct bt_node *) p;
+ }
+}
+
+/* Moves P around in BT to compensate for its key having
+ changed. Returns a null pointer if successful. If P's new
+ value is equal to that of some other node in BT, returns the
+ other node after removing P from BT.
+
+ This function is an optimization only if it is likely that P
+ can actually retain its relative position in BT, e.g. its key
+ has only been adjusted slightly. Otherwise, it is more
+ efficient to simply remove P from BT, change its key, and
+ re-insert P.
+
+ It is not safe to update more than one node's key, then to
+ call this function for each node. Instead, update a single
+ node's key, call this function, update another node's key, and
+ so on. Alternatively, remove all affected nodes from the
+ tree, update their keys, then re-insert all of them. */
+struct bt_node *
+bt_changed (struct bt *bt, struct bt_node *p)
+{
+ struct bt_node *prev = bt_prev (bt, p);
+ struct bt_node *next = bt_next (bt, p);
+
+ if ((prev != NULL && bt->compare (prev, p, bt->aux) >= 0)
+ || (next != NULL && bt->compare (p, next, bt->aux) >= 0))
+ {
+ bt_delete (bt, p);
+ return bt_insert (bt, p);
+ }
+ return NULL;
+ }
+
+/* BT nodes may be moved around in memory as necessary, e.g. as
+ the result of an realloc operation on a block that contains a
+ node. Once this is done, call this function passing node P
+ that was moved and its BT before attempting any other
+ operation on BT.
+
+ It is not safe to move more than one node, then to call this
+ function for each node. Instead, move a single node, call
+ this function, move another node, and so on. Alternatively,
+ remove all affected nodes from the tree, move them, then
+ re-insert all of them. */
+void
+bt_moved (struct bt *bt, struct bt_node *p)
+{
+ if (p->up != NULL)
+ {
+ int d = p->up->down[0] == NULL || bt->compare (p, p->up, bt->aux) > 0;
+ p->up->down[d] = p;
+ }
+ else
+ bt->root = p;
+ if (p->down[0] != NULL)
+ p->down[0]->up = p;
+ if (p->down[1] != NULL)
+ p->down[1]->up = p;
+}
+\f
+/* Tree rebalancing.
+
+ This algorithm is from Q. F. Stout and B. L. Warren, "Tree
+ Rebalancing in Optimal Time and Space", CACM 29(1986):9,
+ pp. 902-908. It uses O(N) time and O(1) space to rebalance a
+ subtree that contains N nodes. */
+
+static void tree_to_vine (struct bt_node **);
+static void vine_to_tree (struct bt_node **, size_t count);
+
+/* Rebalances the subtree in BT rooted at SUBTREE, which contains
+ exactly COUNT nodes. */
+static void
+rebalance_subtree (struct bt *bt, struct bt_node *subtree, size_t count)
+{
+ struct bt_node *up = subtree->up;
+ struct bt_node **q = down_link (bt, subtree);
+ tree_to_vine (q);
+ vine_to_tree (q, count);
+ (*q)->up = up;
+}
+
+/* Converts the subtree rooted at *Q into a vine (a binary search
+ tree in which all the right links are null), and updates *Q to
+ point to the new root of the subtree. */
+static void
+tree_to_vine (struct bt_node **q)
+{
+ struct bt_node *p = *q;
+ while (p != NULL)
+ if (p->down[1] == NULL)
+ {
+ q = &p->down[0];
+ p = *q;
+ }
+ else
+ {
+ struct bt_node *r = p->down[1];
+ p->down[1] = r->down[0];
+ r->down[0] = p;
+ p = r;
+ *q = r;
+ }
+}
+
+/* Performs a compression transformation COUNT times, starting at
+ *Q, and updates *Q to point to the new root of the subtree. */
+static void
+compress (struct bt_node **q, unsigned long count)
+{
+ while (count--)
+ {
+ struct bt_node *red = *q;
+ struct bt_node *black = red->down[0];
+
+ *q = black;
+ red->down[0] = black->down[1];
+ black->down[1] = red;
+ red->up = black;
+ if (red->down[0] != NULL)
+ red->down[0]->up = red;
+ q = &black->down[0];
+ }
+}
+
+/* Converts the vine rooted at *Q, which contains exactly COUNT
+ nodes, into a balanced tree, and updates *Q to point to the
+ new root of the balanced tree. */
+static void
+vine_to_tree (struct bt_node **q, size_t count)
+{
+ size_t leaf_nodes = count + 1 - ((size_t) 1 << floor_log2 (count + 1));
+ size_t vine_nodes = count - leaf_nodes;
+
+ compress (q, leaf_nodes);
+ while (vine_nodes > 1)
+ {
+ vine_nodes /= 2;
+ compress (q, vine_nodes);
+ }
+ while ((*q)->down[0] != NULL)
+ {
+ (*q)->down[0]->up = *q;
+ q = &(*q)->down[0];
+ }
+}
+\f
+/* Other binary tree helper functions. */
+
+/* Returns the address of the pointer that points down to P
+ within BT. */
+static struct bt_node **
+down_link (struct bt *bt, struct bt_node *p)
+{
+ struct bt_node *q = p->up;
+ return q != NULL ? &q->down[q->down[0] != p] : &bt->root;
+}
+
+/* Returns node P's sibling; that is, the other child of its
+ parent. P must not be the root. */
+static inline struct bt_node *
+sibling (struct bt_node *p)
+{
+ struct bt_node *q = p->up;
+ return q->down[q->down[0] == p];
+}
+
+/* Returns the number of nodes in the given SUBTREE. */
+static size_t
+count_nodes_in_subtree (const struct bt_node *subtree)
+{
+ /* This is an in-order traversal modified to iterate only the
+ nodes in SUBTREE. */
+ size_t count = 0;
+ if (subtree != NULL)
+ {
+ const struct bt_node *p = subtree;
+ while (p->down[0] != NULL)
+ p = p->down[0];
+ for (;;)
+ {
+ count++;
+ if (p->down[1] != NULL)
+ {
+ p = p->down[1];
+ while (p->down[0] != NULL)
+ p = p->down[0];
+ }
+ else
+ {
+ for (;;)
+ {
+ const struct bt_node *q;
+ if (p == subtree)
+ goto done;
+ q = p;
+ p = p->up;
+ if (p->down[0] == q)
+ break;
+ }
+ }
+ }
+ }
+ done:
+ return count;
+}
+\f
+/* Arithmetic. */
+
+/* Returns the number of high-order 0-bits in X.
+ Undefined if X is zero. */
+static inline int
+count_leading_zeros (size_t x)
+{
+#if __GNUC__ >= 4
+#if SIZE_MAX > ULONG_MAX
+ return __builtin_clzll (x);
+#elif SIZE_MAX > UINT_MAX
+ return __builtin_clzl (x);
+#else
+ return __builtin_clz (x);
+#endif
+#else
+ /* This algorithm is from _Hacker's Delight_ section 5.3. */
+ size_t y;
+ int n;
+
+#define COUNT_STEP(BITS) \
+ y = x >> BITS; \
+ if (y != 0) \
+ { \
+ n -= BITS; \
+ x = y; \
+ }
+
+ n = sizeof (size_t) * CHAR_BIT;
+#if SIZE_MAX >> 31 >> 31 >> 2
+ COUNT_STEP (64);
+#endif
+#if SIZE_MAX >> 31 >> 1
+ COUNT_STEP (32);
+#endif
+ COUNT_STEP (16);
+ COUNT_STEP (8);
+ COUNT_STEP (4);
+ COUNT_STEP (2);
+ y = x >> 1;
+ return y != 0 ? n - 2 : n - x;
+#endif
+}
+
+/* Returns floor(log2(x)).
+ Undefined if X is zero. */
+static inline int
+floor_log2 (size_t x)
+{
+ return sizeof (size_t) * CHAR_BIT - 1 - count_leading_zeros (x);
+}
+
+/* Returns floor(pow(sqrt(2), x * 2 + 1)).
+ Defined for X from 0 up to the number of bits in size_t minus
+ 1. */
+static inline size_t
+pow_sqrt2 (int x)
+{
+ /* These constants are sqrt(2) multiplied by 2**63 or 2**31,
+ respectively, and then rounded to nearest. */
+#if SIZE_MAX >> 31 >> 1
+ return (UINT64_C(0xb504f333f9de6484) >> (63 - x)) + 1;
+#else
+ return (0xb504f334 >> (31 - x)) + 1;
+#endif
+}
+
+/* Returns floor(log(n)/log(sqrt(2))).
+ Undefined if N is 0. */
+static inline int
+calculate_h_alpha (size_t n)
+{
+ int log2 = floor_log2 (n);
+
+ /* The correct answer is either 2 * log2 or one more. So we
+ see if n >= pow(sqrt(2), 2 * log2 + 1) and if so, add 1. */
+ return (2 * log2) + (n >= pow_sqrt2 (log2));
+}
+
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+#ifndef LIBPSPP_BT_H
+#define LIBPSPP_BT_H 1
+
+/* Balanced tree (BT) data structure.
+
+ The client should not need to be aware of the form of
+ balancing applied to the balanced tree, as its operation is
+ fully encapsulated. */
+
+#include <stddef.h>
+
+/* Returns the data structure corresponding to the given NODE,
+ assuming that NODE is embedded as the given MEMBER name in
+ data type STRUCT. */
+#define bt_data(NODE, STRUCT, MEMBER) \
+ ((STRUCT *) ((char *) (NODE) - offsetof (STRUCT, MEMBER)))
+
+/* Node in a balanced binary tree. */
+struct bt_node
+ {
+ struct bt_node *up; /* Parent (NULL for root). */
+ struct bt_node *down[2]; /* Left child, right child. */
+ };
+
+/* Compares nodes A and B, with the tree's AUX.
+ Returns a strcmp-like result. */
+typedef int bt_compare_func (const struct bt_node *a,
+ const struct bt_node *b,
+ const void *aux);
+
+/* A balanced binary tree. */
+struct bt
+ {
+ struct bt_node *root; /* Tree's root, NULL if empty. */
+ bt_compare_func *compare; /* To compare nodes. */
+ const void *aux; /* Auxiliary data. */
+ size_t size; /* Current node count. */
+ size_t max_size; /* Max size since last complete rebalance. */
+ };
+
+void bt_init (struct bt *, bt_compare_func *, const void *aux);
+
+struct bt_node *bt_insert (struct bt *, struct bt_node *);
+void bt_delete (struct bt *, struct bt_node *);
+
+struct bt_node *bt_find (const struct bt *, const struct bt_node *);
+struct bt_node *bt_find_ge (const struct bt *, const struct bt_node *);
+struct bt_node *bt_find_le (const struct bt *, const struct bt_node *);
+
+struct bt_node *bt_first (const struct bt *);
+struct bt_node *bt_last (const struct bt *);
+struct bt_node *bt_find (const struct bt *, const struct bt_node *);
+struct bt_node *bt_next (const struct bt *, const struct bt_node *);
+struct bt_node *bt_prev (const struct bt *, const struct bt_node *);
+
+struct bt_node *bt_changed (struct bt *, struct bt_node *);
+void bt_moved (struct bt *, struct bt_node *);
+
+/* Returns the number of nodes currently in BT. */
+static inline size_t bt_count (const struct bt *bt)
+{
+ return bt->size;
+}
+
+#endif /* libpspp/bt.h */
+2007-03-31 Ben Pfaff <blp@gnu.org>
+
+ * automake.mk (tests_libpspp_bt_test_LDADD): Add tests/libpspp/bt.
+
+ * libpspp/bt-test.c: New test.
+
2007-03-25 Ben Pfaff <blp@gnu.org>
* automake.mk: Add tests/libpspp/sparse-array-test.
tests/expressions/vectors.sh
nodist_TESTS = \
+ tests/libpspp/abt-test \
+ tests/libpspp/bt-test \
+ tests/libpspp/heap-test \
tests/libpspp/ll-test \
tests/libpspp/llx-test \
- tests/libpspp/heap-test \
- tests/libpspp/abt-test \
tests/libpspp/sparse-array-test
TESTS = $(dist_TESTS) $(nodist_TESTS)
tests_libpspp_abt_test_LDADD = gl/libgl.la
tests_libpspp_abt_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10
+tests_libpspp_bt_test_SOURCES = \
+ src/libpspp/bt.c \
+ src/libpspp/bt.h \
+ tests/libpspp/bt-test.c
+tests_libpspp_bt_test_LDADD = gl/libgl.la
+tests_libpspp_bt_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10
+
tests_libpspp_sparse_array_test_SOURCES = \
src/libpspp/sparse-array.c \
src/libpspp/sparse-array.h \
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* This is a test program for the bt_* routines defined in bt.c.
+ This test program aims to be as comprehensive as possible.
+ "gcov -b" should report 100% coverage of lines and branches in
+ bt.c. "valgrind --leak-check=yes --show-reachable=yes" should
+ give a clean report. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <libpspp/bt.h>
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <libpspp/compiler.h>
+\f
+/* Currently running test. */
+static const char *test_name;
+
+/* Exit with a failure code.
+ (Place a breakpoint on this function while debugging.) */
+static void
+check_die (void)
+{
+ exit (EXIT_FAILURE);
+}
+
+/* If OK is not true, prints a message about failure on the
+ current source file and the given LINE and terminates. */
+static void
+check_func (bool ok, int line)
+{
+ if (!ok)
+ {
+ printf ("Check failed in %s test at %s, line %d\n",
+ test_name, __FILE__, line);
+ check_die ();
+ }
+}
+
+/* Verifies that EXPR evaluates to true.
+ If not, prints a message citing the calling line number and
+ terminates. */
+#define check(EXPR) check_func ((EXPR), __LINE__)
+
+/* Prints a message about memory exhaustion and exits with a
+ failure code. */
+static void
+xalloc_die (void)
+{
+ printf ("virtual memory exhausted\n");
+ exit (EXIT_FAILURE);
+}
+
+/* Allocates and returns N bytes of memory. */
+static void *
+xmalloc (size_t n)
+{
+ if (n != 0)
+ {
+ void *p = malloc (n);
+ if (p == NULL)
+ xalloc_die ();
+
+ return p;
+ }
+ else
+ return NULL;
+}
+
+static void *
+xmemdup (const void *p, size_t n)
+{
+ void *q = xmalloc (n);
+ memcpy (q, p, n);
+ return q;
+}
+
+/* Allocates and returns N * M bytes of memory. */
+static void *
+xnmalloc (size_t n, size_t m)
+{
+ if ((size_t) -1 / m <= n)
+ xalloc_die ();
+ return xmalloc (n * m);
+}
+\f
+/* Node type and support routines. */
+
+/* Test data element. */
+struct element
+ {
+ struct bt_node node; /* Embedded binary tree element. */
+ int data; /* Primary value. */
+ };
+
+static int aux_data;
+
+/* Returns the `struct element' that NODE is embedded within. */
+static struct element *
+bt_node_to_element (const struct bt_node *node)
+{
+ return bt_data (node, struct element, node);
+}
+
+/* Compares the `x' values in A and B and returns a strcmp-type
+ return value. Verifies that AUX points to aux_data. */
+static int
+compare_elements (const struct bt_node *a_, const struct bt_node *b_,
+ const void *aux)
+{
+ const struct element *a = bt_node_to_element (a_);
+ const struct element *b = bt_node_to_element (b_);
+
+ check (aux == &aux_data);
+ return a->data < b->data ? -1 : a->data > b->data;
+}
+
+/* Compares A and B and returns a strcmp-type return value. */
+static int
+compare_ints_noaux (const void *a_, const void *b_)
+{
+ const int *a = a_;
+ const int *b = b_;
+
+ return *a < *b ? -1 : *a > *b;
+}
+
+/* Swaps *A and *B. */
+static void
+swap (int *a, int *b)
+{
+ int t = *a;
+ *a = *b;
+ *b = t;
+}
+
+/* Reverses the order of the CNT integers starting at VALUES. */
+static void
+reverse (int *values, size_t cnt)
+{
+ size_t i = 0;
+ size_t j = cnt;
+
+ while (j > i)
+ swap (&values[i++], &values[--j]);
+}
+
+/* Arranges the CNT elements in VALUES into the lexicographically
+ next greater permutation. Returns true if successful.
+ If VALUES is already the lexicographically greatest
+ permutation of its elements (i.e. ordered from greatest to
+ smallest), arranges them into the lexicographically least
+ permutation (i.e. ordered from smallest to largest) and
+ returns false. */
+static bool
+next_permutation (int *values, size_t cnt)
+{
+ if (cnt > 0)
+ {
+ size_t i = cnt - 1;
+ while (i != 0)
+ {
+ i--;
+ if (values[i] < values[i + 1])
+ {
+ size_t j;
+ for (j = cnt - 1; values[i] >= values[j]; j--)
+ continue;
+ swap (values + i, values + j);
+ reverse (values + (i + 1), cnt - (i + 1));
+ return true;
+ }
+ }
+
+ reverse (values, cnt);
+ }
+
+ return false;
+}
+
+/* Returns N!. */
+static unsigned int
+factorial (unsigned int n)
+{
+ unsigned int value = 1;
+ while (n > 1)
+ value *= n--;
+ return value;
+}
+
+/* Randomly shuffles the CNT elements in ARRAY, each of which is
+ SIZE bytes in size. */
+static void
+random_shuffle (void *array_, size_t cnt, size_t size)
+{
+ char *array = array_;
+ char *tmp = xmalloc (size);
+ size_t i;
+
+ for (i = 0; i < cnt; i++)
+ {
+ size_t j = rand () % (cnt - i) + i;
+ if (i != j)
+ {
+ memcpy (tmp, array + j * size, size);
+ memcpy (array + j * size, array + i * size, size);
+ memcpy (array + i * size, tmp, size);
+ }
+ }
+
+ free (tmp);
+}
+
+/* Calculates floor(log(n)/log(sqrt(2))). */
+static int
+calculate_h_alpha (size_t n)
+{
+ size_t thresholds[] =
+ {
+ 0, 2, 2, 3, 4, 6, 8, 12, 16, 23, 32, 46, 64, 91, 128, 182, 256, 363,
+ 512, 725, 1024, 1449, 2048, 2897, 4096, 5793, 8192, 11586, 16384,
+ 23171, 32768, 46341, 65536, 92682, 131072, 185364, 262144, 370728,
+ 524288, 741456, 1048576, 1482911, 2097152, 2965821, 4194304, 5931642,
+ 8388608, 11863284, 16777216, 23726567, 33554432, 47453133, 67108864,
+ 94906266, 134217728, 189812532, 268435456, 379625063, 536870912,
+ 759250125, 1073741824, 1518500250, 2147483648, 3037000500,
+ };
+ size_t threshold_cnt = sizeof thresholds / sizeof *thresholds;
+ size_t i;
+
+ for (i = 0; i < threshold_cnt; i++)
+ if (thresholds[i] > n)
+ break;
+ return i - 1;
+}
+
+/* Returns the height of the tree rooted at NODE. */
+static int
+get_height (struct bt_node *node)
+{
+ if (node == NULL)
+ return 0;
+ else
+ {
+ int left = get_height (node->down[0]);
+ int right = get_height (node->down[1]);
+ return 1 + (left > right ? left : right);
+ }
+}
+
+/* Checks that BT is loosely alpha-height balanced, that is, that
+ its height is no more than h_alpha(count) + 1, where
+ h_alpha(n) = floor(log(n)/log(1/alpha)). */
+static void
+check_balance (struct bt *bt)
+{
+ /* In the notation of the Galperin and Rivest paper (and of
+ CLR), the height of a tree is the number of edges in the
+ longest path from the root to a leaf, so we have to subtract
+ 1 from our measured height. */
+ int height = get_height (bt->root) - 1;
+ int max_height = calculate_h_alpha (bt_count (bt)) + 1;
+ check (height <= max_height);
+}
+
+/* Checks that BT contains the CNT ints in DATA, that its
+ structure is correct, and that certain operations on BT
+ produce the expected results. */
+static void
+check_bt (struct bt *bt, const int data[], size_t cnt)
+{
+ struct element e;
+ size_t i;
+ int *order;
+
+ order = xmemdup (data, cnt * sizeof *data);
+ qsort (order, cnt, sizeof *order, compare_ints_noaux);
+
+ for (i = 0; i < cnt; i++)
+ {
+ struct bt_node *p;
+
+ e.data = data[i];
+ if (rand () % 2)
+ p = bt_find (bt, &e.node);
+ else
+ p = bt_insert (bt, &e.node);
+ check (p != NULL);
+ check (p != &e.node);
+ check (bt_node_to_element (p)->data == data[i]);
+ }
+
+ e.data = -1;
+ check (bt_find (bt, &e.node) == NULL);
+
+ check_balance (bt);
+
+ if (cnt == 0)
+ {
+ check (bt_first (bt) == NULL);
+ check (bt_last (bt) == NULL);
+ check (bt_next (bt, NULL) == NULL);
+ check (bt_prev (bt, NULL) == NULL);
+ }
+ else
+ {
+ struct bt_node *p;
+
+ for (p = bt_first (bt), i = 0; i < cnt; p = bt_next (bt, p), i++)
+ check (bt_node_to_element (p)->data == order[i]);
+ check (p == NULL);
+
+ for (p = bt_last (bt), i = 0; i < cnt; p = bt_prev (bt, p), i++)
+ check (bt_node_to_element (p)->data == order[cnt - i - 1]);
+ check (p == NULL);
+ }
+
+ free (order);
+}
+
+/* Inserts the CNT values from 0 to CNT - 1 (inclusive) into an
+ BT in the order specified by INSERTIONS, then deletes them in
+ the order specified by DELETIONS, checking the BT's contents
+ for correctness after each operation. */
+static void
+test_insert_delete (const int insertions[],
+ const int deletions[],
+ size_t cnt)
+{
+ struct element *elements;
+ struct bt bt;
+ size_t i;
+
+ elements = xnmalloc (cnt, sizeof *elements);
+ for (i = 0; i < cnt; i++)
+ elements[i].data = i;
+
+ bt_init (&bt, compare_elements, &aux_data);
+ check_bt (&bt, NULL, 0);
+ for (i = 0; i < cnt; i++)
+ {
+ check (bt_insert (&bt, &elements[insertions[i]].node) == NULL);
+ check_bt (&bt, insertions, i + 1);
+ }
+ for (i = 0; i < cnt; i++)
+ {
+ bt_delete (&bt, &elements[deletions[i]].node);
+ check_bt (&bt, deletions + i + 1, cnt - i - 1);
+ }
+
+ free (elements);
+}
+\f
+/* Inserts values into an BT in each possible order, then
+ removes them in each possible order, up to a specified maximum
+ size. */
+static void
+test_insert_any_remove_any (void)
+{
+ const int max_elems = 5;
+ int cnt;
+
+ for (cnt = 0; cnt <= max_elems; cnt++)
+ {
+ int *insertions, *deletions;
+ unsigned int ins_perm_cnt;
+ int i;
+
+ insertions = xnmalloc (cnt, sizeof *insertions);
+ deletions = xnmalloc (cnt, sizeof *deletions);
+ for (i = 0; i < cnt; i++)
+ insertions[i] = i;
+
+ for (ins_perm_cnt = 0;
+ ins_perm_cnt == 0 || next_permutation (insertions, cnt);
+ ins_perm_cnt++)
+ {
+ unsigned int del_perm_cnt;
+ int i;
+
+ for (i = 0; i < cnt; i++)
+ deletions[i] = i;
+
+ for (del_perm_cnt = 0;
+ del_perm_cnt == 0 || next_permutation (deletions, cnt);
+ del_perm_cnt++)
+ test_insert_delete (insertions, deletions, cnt);
+
+ check (del_perm_cnt == factorial (cnt));
+ }
+ check (ins_perm_cnt == factorial (cnt));
+
+ free (insertions);
+ free (deletions);
+ }
+}
+
+/* Inserts values into an BT in each possible order, then
+ removes them in the same order, up to a specified maximum
+ size. */
+static void
+test_insert_any_remove_same (void)
+{
+ const int max_elems = 7;
+ int cnt;
+
+ for (cnt = 0; cnt <= max_elems; cnt++)
+ {
+ int *values;
+ unsigned int permutation_cnt;
+ int i;
+
+ values = xnmalloc (cnt, sizeof *values);
+ for (i = 0; i < cnt; i++)
+ values[i] = i;
+
+ for (permutation_cnt = 0;
+ permutation_cnt == 0 || next_permutation (values, cnt);
+ permutation_cnt++)
+ test_insert_delete (values, values, cnt);
+ check (permutation_cnt == factorial (cnt));
+
+ free (values);
+ }
+}
+
+/* Inserts values into an BT in each possible order, then
+ removes them in reverse order, up to a specified maximum
+ size. */
+static void
+test_insert_any_remove_reverse (void)
+{
+ const int max_elems = 7;
+ int cnt;
+
+ for (cnt = 0; cnt <= max_elems; cnt++)
+ {
+ int *insertions, *deletions;
+ unsigned int permutation_cnt;
+ int i;
+
+ insertions = xnmalloc (cnt, sizeof *insertions);
+ deletions = xnmalloc (cnt, sizeof *deletions);
+ for (i = 0; i < cnt; i++)
+ insertions[i] = i;
+
+ for (permutation_cnt = 0;
+ permutation_cnt == 0 || next_permutation (insertions, cnt);
+ permutation_cnt++)
+ {
+ memcpy (deletions, insertions, sizeof *insertions * cnt);
+ reverse (deletions, cnt);
+
+ test_insert_delete (insertions, deletions, cnt);
+ }
+ check (permutation_cnt == factorial (cnt));
+
+ free (insertions);
+ free (deletions);
+ }
+}
+
+/* Inserts and removes values in an BT in random orders. */
+static void
+test_random_sequence (void)
+{
+ const int max_elems = 128;
+ const int max_trials = 8;
+ int cnt;
+
+ for (cnt = 0; cnt <= max_elems; cnt += 2)
+ {
+ int *insertions, *deletions;
+ int trial;
+ int i;
+
+ insertions = xnmalloc (cnt, sizeof *insertions);
+ deletions = xnmalloc (cnt, sizeof *deletions);
+ for (i = 0; i < cnt; i++)
+ insertions[i] = i;
+ for (i = 0; i < cnt; i++)
+ deletions[i] = i;
+
+ for (trial = 0; trial < max_trials; trial++)
+ {
+ random_shuffle (insertions, cnt, sizeof *insertions);
+ random_shuffle (deletions, cnt, sizeof *deletions);
+
+ test_insert_delete (insertions, deletions, cnt);
+ }
+
+ free (insertions);
+ free (deletions);
+ }
+}
+
+/* Inserts elements into an BT in ascending order. */
+static void
+test_insert_ordered (void)
+{
+ const int max_elems = 1024;
+ struct element *elements;
+ int *values;
+ struct bt bt;
+ int i;
+
+ bt_init (&bt, compare_elements, &aux_data);
+ elements = xnmalloc (max_elems, sizeof *elements);
+ values = xnmalloc (max_elems, sizeof *values);
+ for (i = 0; i < max_elems; i++)
+ {
+ values[i] = elements[i].data = i;
+ check (bt_insert (&bt, &elements[i].node) == NULL);
+ check_bt (&bt, values, i + 1);
+ }
+ free (elements);
+ free (values);
+}
+
+/* Tests bt_find_ge and bt_find_le. */
+static void
+test_find_ge_le (void)
+{
+ const int max_elems = 10;
+ struct element *elements;
+ int *values;
+ unsigned int inc_pat;
+
+ elements = xnmalloc (max_elems, sizeof *elements);
+ values = xnmalloc (max_elems, sizeof *values);
+ for (inc_pat = 0; inc_pat < (1u << max_elems); inc_pat++)
+ {
+ struct bt bt;
+ int elem_cnt = 0;
+ int i;
+
+ /* Insert the values in the pattern into BT. */
+ bt_init (&bt, compare_elements, &aux_data);
+ for (i = 0; i < max_elems; i++)
+ if (inc_pat & (1u << i))
+ {
+ values[elem_cnt] = elements[elem_cnt].data = i;
+ check (bt_insert (&bt, &elements[elem_cnt].node) == NULL);
+ elem_cnt++;
+ }
+ check_bt (&bt, values, elem_cnt);
+
+ /* Try find_ge and find_le for each possible element value. */
+ for (i = -1; i <= max_elems; i++)
+ {
+ struct element tmp;
+ struct bt_node *ge, *le;
+ int j;
+
+ ge = le = NULL;
+ for (j = 0; j < elem_cnt; j++)
+ {
+ if (ge == NULL && values[j] >= i)
+ ge = &elements[j].node;
+ if (values[j] <= i)
+ le = &elements[j].node;
+ }
+
+ tmp.data = i;
+ check (bt_find_ge (&bt, &tmp.node) == ge);
+ check (bt_find_le (&bt, &tmp.node) == le);
+ }
+ }
+ free (elements);
+ free (values);
+}
+
+/* Inserts elements into an BT, then moves the nodes around in
+ memory. */
+static void
+test_moved (void)
+{
+ const int max_elems = 128;
+ struct element *e[2];
+ int cur;
+ int *values;
+ struct bt bt;
+ int i, j;
+
+ bt_init (&bt, compare_elements, &aux_data);
+ e[0] = xnmalloc (max_elems, sizeof *e[0]);
+ e[1] = xnmalloc (max_elems, sizeof *e[1]);
+ values = xnmalloc (max_elems, sizeof *values);
+ cur = 0;
+ for (i = 0; i < max_elems; i++)
+ {
+ values[i] = e[cur][i].data = i;
+ check (bt_insert (&bt, &e[cur][i].node) == NULL);
+ check_bt (&bt, values, i + 1);
+
+ for (j = 0; j <= i; j++)
+ {
+ e[!cur][j] = e[cur][j];
+ bt_moved (&bt, &e[!cur][j].node);
+ check_bt (&bt, values, i + 1);
+ }
+ cur = !cur;
+ }
+ free (e[0]);
+ free (e[1]);
+ free (values);
+}
+
+/* Inserts values into an BT, then changes their values. */
+static void
+test_changed (void)
+{
+ const int max_elems = 6;
+ int cnt;
+
+ for (cnt = 0; cnt <= max_elems; cnt++)
+ {
+ int *values, *changed_values;
+ struct element *elements;
+ unsigned int permutation_cnt;
+ int i;
+
+ values = xnmalloc (cnt, sizeof *values);
+ changed_values = xnmalloc (cnt, sizeof *changed_values);
+ elements = xnmalloc (cnt, sizeof *elements);
+ for (i = 0; i < cnt; i++)
+ values[i] = i;
+
+ for (permutation_cnt = 0;
+ permutation_cnt == 0 || next_permutation (values, cnt);
+ permutation_cnt++)
+ {
+ for (i = 0; i < cnt; i++)
+ {
+ int j, k;
+ for (j = 0; j <= cnt; j++)
+ {
+ struct bt bt;
+ struct bt_node *changed_retval;
+
+ bt_init (&bt, compare_elements, &aux_data);
+
+ /* Add to BT in order. */
+ for (k = 0; k < cnt; k++)
+ {
+ int n = values[k];
+ elements[n].data = n;
+ check (bt_insert (&bt, &elements[n].node) == NULL);
+ }
+ check_bt (&bt, values, cnt);
+
+ /* Change value i to j. */
+ elements[i].data = j;
+ for (k = 0; k < cnt; k++)
+ changed_values[k] = k;
+ changed_retval = bt_changed (&bt, &elements[i].node);
+ if (i != j && j < cnt)
+ {
+ /* Will cause duplicate. */
+ check (changed_retval == &elements[j].node);
+ changed_values[i] = changed_values[cnt - 1];
+ check_bt (&bt, changed_values, cnt - 1);
+ }
+ else
+ {
+ /* Succeeds. */
+ check (changed_retval == NULL);
+ changed_values[i] = j;
+ check_bt (&bt, changed_values, cnt);
+ }
+ }
+ }
+ }
+ check (permutation_cnt == factorial (cnt));
+
+ free (values);
+ free (changed_values);
+ free (elements);
+ }
+}
+\f
+/* Main program. */
+
+/* Runs TEST_FUNCTION and prints a message about NAME. */
+static void
+run_test (void (*test_function) (void), const char *name)
+{
+ test_name = name;
+ putchar ('.');
+ fflush (stdout);
+ test_function ();
+}
+
+int
+main (void)
+{
+ run_test (test_insert_any_remove_any,
+ "insert any order, delete any order");
+ run_test (test_insert_any_remove_same,
+ "insert any order, delete same order");
+ run_test (test_insert_any_remove_reverse,
+ "insert any order, delete reverse order");
+ run_test (test_random_sequence,
+ "insert and delete in random sequence");
+ run_test (test_insert_ordered,
+ "insert in ascending order");
+ run_test (test_find_ge_le, "find_ge and find_le");
+ run_test (test_moved, "move elements around in memory");
+ run_test (test_changed, "change key data in nodes");
+ putchar ('\n');
+
+ return 0;
+}