From 48baf40c86d10eac3525d4199c9bb0ecc94c9d4d Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 31 Mar 2007 17:24:02 +0000 Subject: [PATCH] Add plain balanced tree structure. Patch #5827. Thanks to John Darrington for review. --- src/libpspp/ChangeLog | 10 + src/libpspp/automake.mk | 2 + src/libpspp/bt.c | 654 +++++++++++++++++++++++++++++++++++ src/libpspp/bt.h | 83 +++++ tests/ChangeLog | 6 + tests/automake.mk | 12 +- tests/libpspp/bt-test.c | 738 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 1503 insertions(+), 2 deletions(-) create mode 100644 src/libpspp/bt.c create mode 100644 src/libpspp/bt.h create mode 100644 tests/libpspp/bt-test.c diff --git a/src/libpspp/ChangeLog b/src/libpspp/ChangeLog index 4a2c8263..4f036320 100644 --- a/src/libpspp/ChangeLog +++ b/src/libpspp/ChangeLog @@ -1,3 +1,13 @@ +2007-03-31 Ben Pfaff + + Patch #5827. + + * automake.mk (src_libpspp_libpspp_a_SOURCES): Add bt.c. + + * bt.h: New file. + + * bt.c: New file. + 2007-03-30 Ben Pfaff Patch #5829. diff --git a/src/libpspp/automake.mk b/src/libpspp/automake.mk index cc099a5f..1af66697 100644 --- a/src/libpspp/automake.mk +++ b/src/libpspp/automake.mk @@ -12,6 +12,8 @@ src_libpspp_libpspp_a_SOURCES = \ src/libpspp/alloc.c \ src/libpspp/alloc.h \ src/libpspp/bit-vector.h \ + src/libpspp/bt.c \ + src/libpspp/bt.h \ src/libpspp/legacy-encoding.c \ src/libpspp/legacy-encoding.h \ src/libpspp/copyleft.c \ diff --git a/src/libpspp/bt.c b/src/libpspp/bt.c new file mode 100644 index 00000000..62a7cdd6 --- /dev/null +++ b/src/libpspp/bt.c @@ -0,0 +1,654 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* Balanced tree (BT) data structure. + + The client should not need to be aware of the form of + balancing applied to the balanced tree, as its operation is + fully encapsulated. The current implementation is a scapegoat + tree. Scapegoat trees have the advantage over many other + forms of balanced trees that they do not store any additional + information in each node; thus, they are slightly more + space-efficient than, say, red-black or AVL trees. Compared + to splay trees, scapegoat trees provide guaranteed logarithmic + worst-case search time and do not restructure the tree during + a search. + + For information on scapegoat trees, see Galperin and Rivest, + "Scapegoat Trees", Proc. 4th ACM-SIAM Symposium on Discrete + Algorithms, or , + which includes source code and links to other resources, such + as the Galperin and Rivest paper. + + One potentially tricky part of scapegoat tree design is the + choice of alpha, which is a real constant that must be greater + than 0.5 and less than 1. We must be able to efficiently + calculate h_alpha = floor(log(n)/log(1/alpha)) for integer n > + 0. One way to do so would be to maintain a table relating + h_alpha to the minimum value of n that yields that h_alpha. + Then, we can track the value of h_alpha(n) in the number of + nodes in the tree n as nodes are inserted and deleted. + + This implementation uses a different approach. We choose + alpha = sqrt(2)/2 = 1/sqrt(2) ~= .707. Then, computing + h_alpha is a matter of computing a logarithm in base sqrt(2). + This is easy: we simply compute twice the base-2 logarithm, + then adjust upward by 1 if necessary. See calculate_h_alpha + for details. */ + +/* These library routines have no external dependencies other + than the standard C library. + + If you add routines in this file, please add a corresponding + test to bt-test.c. This test program should achieve 100% + coverage of lines and branches in this code, as reported by + "gcov -b". */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include +#include + +static void rebalance_subtree (struct bt *, struct bt_node *, size_t); + +static struct bt_node **down_link (struct bt *, struct bt_node *); +static inline struct bt_node *sibling (struct bt_node *p); +static size_t count_nodes_in_subtree (const struct bt_node *); + +static inline int floor_log2 (size_t); +static inline int calculate_h_alpha (size_t); + +/* Initializes BT as an empty BT that uses the given COMPARE + function, passing in AUX as auxiliary data. */ +void +bt_init (struct bt *bt, + bt_compare_func *compare, + const void *aux) +{ + bt->root = NULL; + bt->compare = compare; + bt->aux = aux; + bt->size = 0; + bt->max_size = 0; +} + +/* Inserts the given NODE into BT. + Returns a null pointer if successful. + Returns the existing node already in BT equal to NODE, on + failure. */ +struct bt_node * +bt_insert (struct bt *bt, struct bt_node *node) +{ + int depth = 0; + + node->down[0] = NULL; + node->down[1] = NULL; + + if (bt->root == NULL) + { + bt->root = node; + node->up = NULL; + } + else + { + struct bt_node *p = bt->root; + for (;;) + { + int cmp, dir; + + cmp = bt->compare (node, p, bt->aux); + if (cmp == 0) + return p; + depth++; + + dir = cmp > 0; + if (p->down[dir] == NULL) + { + p->down[dir] = node; + node->up = p; + break; + } + p = p->down[dir]; + } + } + + bt->size++; + if (bt->size > bt->max_size) + bt->max_size = bt->size; + + if (depth > calculate_h_alpha (bt->size)) + { + /* We use the "alternative" method of finding a scapegoat + node described by Galperin and Rivest. */ + struct bt_node *s = node; + size_t size = 1; + int i; + + for (i = 1; ; i++) + if (i < depth) + { + size += 1 + count_nodes_in_subtree (sibling (s)); + s = s->up; + if (i > calculate_h_alpha (size)) + { + rebalance_subtree (bt, s, size); + break; + } + } + else + { + rebalance_subtree (bt, bt->root, bt->size); + bt->max_size = bt->size; + break; + } + } + + return NULL; +} + +/* Deletes P from BT. */ +void +bt_delete (struct bt *bt, struct bt_node *p) +{ + struct bt_node **q = down_link (bt, p); + struct bt_node *r = p->down[1]; + if (r == NULL) + { + *q = p->down[0]; + if (*q) + (*q)->up = p->up; + } + else if (r->down[0] == NULL) + { + r->down[0] = p->down[0]; + *q = r; + r->up = p->up; + if (r->down[0] != NULL) + r->down[0]->up = r; + } + else + { + struct bt_node *s = r->down[0]; + while (s->down[0] != NULL) + s = s->down[0]; + r = s->up; + r->down[0] = s->down[1]; + s->down[0] = p->down[0]; + s->down[1] = p->down[1]; + *q = s; + if (s->down[0] != NULL) + s->down[0]->up = s; + s->down[1]->up = s; + s->up = p->up; + if (r->down[0] != NULL) + r->down[0]->up = r; + } + bt->size--; + + /* We approximate .707 as .75 here. This is conservative: it + will cause us to do a little more rebalancing than strictly + necessary to maintain the scapegoat tree's height + invariant. */ + if (bt->size < bt->max_size * 3 / 4 && bt->size > 0) + { + rebalance_subtree (bt, bt->root, bt->size); + bt->max_size = bt->size; + } +} + +/* Returns the node with minimum value in BT, or a null pointer + if BT is empty. */ +struct bt_node * +bt_first (const struct bt *bt) +{ + struct bt_node *p = bt->root; + if (p != NULL) + while (p->down[0] != NULL) + p = p->down[0]; + return p; +} + +/* Returns the node with maximum value in BT, or a null pointer + if BT is empty. */ +struct bt_node * +bt_last (const struct bt *bt) +{ + struct bt_node *p = bt->root; + if (p != NULL) + while (p->down[1] != NULL) + p = p->down[1]; + return p; +} + +/* Searches BT for a node equal to TARGET. + Returns the node if found, or a null pointer otherwise. */ +struct bt_node * +bt_find (const struct bt *bt, const struct bt_node *target) +{ + const struct bt_node *p; + int cmp; + + for (p = bt->root; p != NULL; p = p->down[cmp > 0]) + { + cmp = bt->compare (target, p, bt->aux); + if (cmp == 0) + return (struct bt_node *) p; + } + + return NULL; +} + +/* Searches BT for, and returns, the first node in in-order whose + value is greater than or equal to TARGET. Returns a null + pointer if all nodes in BT are less than TARGET. + + Another way to look at the return value is that it is the node + that would be returned by "bt_next (BT, TARGET)" if TARGET + were inserted in BT (assuming that TARGET would not be a + duplicate). */ +struct bt_node * +bt_find_ge (const struct bt *bt, const struct bt_node *target) +{ + const struct bt_node *p = bt->root; + const struct bt_node *q = NULL; + while (p != NULL) + { + int cmp = bt->compare (target, p, bt->aux); + if (cmp > 0) + p = p->down[1]; + else + { + q = p; + if (cmp < 0) + p = p->down[0]; + else + break; + } + } + return (struct bt_node *) q; +} + +/* Searches BT for, and returns, the last node in in-order whose + value is less than or equal to TARGET, which should not be in + BT. Returns a null pointer if all nodes in BT are greater + than TARGET. + + Another way to look at the return value is that it is the node + that would be returned by "bt_prev (BT, TARGET)" if TARGET + were inserted in BT (assuming that TARGET would not be a + duplicate). */ +struct bt_node * +bt_find_le (const struct bt *bt, const struct bt_node *target) +{ + const struct bt_node *p = bt->root; + const struct bt_node *q = NULL; + while (p != NULL) + { + int cmp = bt->compare (target, p, bt->aux); + if (cmp < 0) + p = p->down[0]; + else + { + q = p; + if (cmp > 0) + p = p->down[1]; + else + break; + } + } + return (struct bt_node *) q; +} + +/* Returns the node in BT following P in in-order. + If P is null, returns the minimum node in BT. + Returns a null pointer if P is the maximum node in BT or if P + is null and BT is empty. */ +struct bt_node * +bt_next (const struct bt *bt, const struct bt_node *p) +{ + if (p == NULL) + return bt_first (bt); + else if (p->down[1] == NULL) + { + struct bt_node *q; + for (q = p->up; ; p = q, q = q->up) + if (q == NULL || p == q->down[0]) + return q; + } + else + { + p = p->down[1]; + while (p->down[0] != NULL) + p = p->down[0]; + return (struct bt_node *) p; + } +} + +/* Returns the node in BT preceding P in in-order. + If P is null, returns the maximum node in BT. + Returns a null pointer if P is the minimum node in BT or if P + is null and BT is empty. */ +struct bt_node * +bt_prev (const struct bt *bt, const struct bt_node *p) +{ + if (p == NULL) + return bt_last (bt); + else if (p->down[0] == NULL) + { + struct bt_node *q; + for (q = p->up; ; p = q, q = q->up) + if (q == NULL || p == q->down[1]) + return q; + } + else + { + p = p->down[0]; + while (p->down[1] != NULL) + p = p->down[1]; + return (struct bt_node *) p; + } +} + +/* Moves P around in BT to compensate for its key having + changed. Returns a null pointer if successful. If P's new + value is equal to that of some other node in BT, returns the + other node after removing P from BT. + + This function is an optimization only if it is likely that P + can actually retain its relative position in BT, e.g. its key + has only been adjusted slightly. Otherwise, it is more + efficient to simply remove P from BT, change its key, and + re-insert P. + + It is not safe to update more than one node's key, then to + call this function for each node. Instead, update a single + node's key, call this function, update another node's key, and + so on. Alternatively, remove all affected nodes from the + tree, update their keys, then re-insert all of them. */ +struct bt_node * +bt_changed (struct bt *bt, struct bt_node *p) +{ + struct bt_node *prev = bt_prev (bt, p); + struct bt_node *next = bt_next (bt, p); + + if ((prev != NULL && bt->compare (prev, p, bt->aux) >= 0) + || (next != NULL && bt->compare (p, next, bt->aux) >= 0)) + { + bt_delete (bt, p); + return bt_insert (bt, p); + } + return NULL; + } + +/* BT nodes may be moved around in memory as necessary, e.g. as + the result of an realloc operation on a block that contains a + node. Once this is done, call this function passing node P + that was moved and its BT before attempting any other + operation on BT. + + It is not safe to move more than one node, then to call this + function for each node. Instead, move a single node, call + this function, move another node, and so on. Alternatively, + remove all affected nodes from the tree, move them, then + re-insert all of them. */ +void +bt_moved (struct bt *bt, struct bt_node *p) +{ + if (p->up != NULL) + { + int d = p->up->down[0] == NULL || bt->compare (p, p->up, bt->aux) > 0; + p->up->down[d] = p; + } + else + bt->root = p; + if (p->down[0] != NULL) + p->down[0]->up = p; + if (p->down[1] != NULL) + p->down[1]->up = p; +} + +/* Tree rebalancing. + + This algorithm is from Q. F. Stout and B. L. Warren, "Tree + Rebalancing in Optimal Time and Space", CACM 29(1986):9, + pp. 902-908. It uses O(N) time and O(1) space to rebalance a + subtree that contains N nodes. */ + +static void tree_to_vine (struct bt_node **); +static void vine_to_tree (struct bt_node **, size_t count); + +/* Rebalances the subtree in BT rooted at SUBTREE, which contains + exactly COUNT nodes. */ +static void +rebalance_subtree (struct bt *bt, struct bt_node *subtree, size_t count) +{ + struct bt_node *up = subtree->up; + struct bt_node **q = down_link (bt, subtree); + tree_to_vine (q); + vine_to_tree (q, count); + (*q)->up = up; +} + +/* Converts the subtree rooted at *Q into a vine (a binary search + tree in which all the right links are null), and updates *Q to + point to the new root of the subtree. */ +static void +tree_to_vine (struct bt_node **q) +{ + struct bt_node *p = *q; + while (p != NULL) + if (p->down[1] == NULL) + { + q = &p->down[0]; + p = *q; + } + else + { + struct bt_node *r = p->down[1]; + p->down[1] = r->down[0]; + r->down[0] = p; + p = r; + *q = r; + } +} + +/* Performs a compression transformation COUNT times, starting at + *Q, and updates *Q to point to the new root of the subtree. */ +static void +compress (struct bt_node **q, unsigned long count) +{ + while (count--) + { + struct bt_node *red = *q; + struct bt_node *black = red->down[0]; + + *q = black; + red->down[0] = black->down[1]; + black->down[1] = red; + red->up = black; + if (red->down[0] != NULL) + red->down[0]->up = red; + q = &black->down[0]; + } +} + +/* Converts the vine rooted at *Q, which contains exactly COUNT + nodes, into a balanced tree, and updates *Q to point to the + new root of the balanced tree. */ +static void +vine_to_tree (struct bt_node **q, size_t count) +{ + size_t leaf_nodes = count + 1 - ((size_t) 1 << floor_log2 (count + 1)); + size_t vine_nodes = count - leaf_nodes; + + compress (q, leaf_nodes); + while (vine_nodes > 1) + { + vine_nodes /= 2; + compress (q, vine_nodes); + } + while ((*q)->down[0] != NULL) + { + (*q)->down[0]->up = *q; + q = &(*q)->down[0]; + } +} + +/* Other binary tree helper functions. */ + +/* Returns the address of the pointer that points down to P + within BT. */ +static struct bt_node ** +down_link (struct bt *bt, struct bt_node *p) +{ + struct bt_node *q = p->up; + return q != NULL ? &q->down[q->down[0] != p] : &bt->root; +} + +/* Returns node P's sibling; that is, the other child of its + parent. P must not be the root. */ +static inline struct bt_node * +sibling (struct bt_node *p) +{ + struct bt_node *q = p->up; + return q->down[q->down[0] == p]; +} + +/* Returns the number of nodes in the given SUBTREE. */ +static size_t +count_nodes_in_subtree (const struct bt_node *subtree) +{ + /* This is an in-order traversal modified to iterate only the + nodes in SUBTREE. */ + size_t count = 0; + if (subtree != NULL) + { + const struct bt_node *p = subtree; + while (p->down[0] != NULL) + p = p->down[0]; + for (;;) + { + count++; + if (p->down[1] != NULL) + { + p = p->down[1]; + while (p->down[0] != NULL) + p = p->down[0]; + } + else + { + for (;;) + { + const struct bt_node *q; + if (p == subtree) + goto done; + q = p; + p = p->up; + if (p->down[0] == q) + break; + } + } + } + } + done: + return count; +} + +/* Arithmetic. */ + +/* Returns the number of high-order 0-bits in X. + Undefined if X is zero. */ +static inline int +count_leading_zeros (size_t x) +{ +#if __GNUC__ >= 4 +#if SIZE_MAX > ULONG_MAX + return __builtin_clzll (x); +#elif SIZE_MAX > UINT_MAX + return __builtin_clzl (x); +#else + return __builtin_clz (x); +#endif +#else + /* This algorithm is from _Hacker's Delight_ section 5.3. */ + size_t y; + int n; + +#define COUNT_STEP(BITS) \ + y = x >> BITS; \ + if (y != 0) \ + { \ + n -= BITS; \ + x = y; \ + } + + n = sizeof (size_t) * CHAR_BIT; +#if SIZE_MAX >> 31 >> 31 >> 2 + COUNT_STEP (64); +#endif +#if SIZE_MAX >> 31 >> 1 + COUNT_STEP (32); +#endif + COUNT_STEP (16); + COUNT_STEP (8); + COUNT_STEP (4); + COUNT_STEP (2); + y = x >> 1; + return y != 0 ? n - 2 : n - x; +#endif +} + +/* Returns floor(log2(x)). + Undefined if X is zero. */ +static inline int +floor_log2 (size_t x) +{ + return sizeof (size_t) * CHAR_BIT - 1 - count_leading_zeros (x); +} + +/* Returns floor(pow(sqrt(2), x * 2 + 1)). + Defined for X from 0 up to the number of bits in size_t minus + 1. */ +static inline size_t +pow_sqrt2 (int x) +{ + /* These constants are sqrt(2) multiplied by 2**63 or 2**31, + respectively, and then rounded to nearest. */ +#if SIZE_MAX >> 31 >> 1 + return (UINT64_C(0xb504f333f9de6484) >> (63 - x)) + 1; +#else + return (0xb504f334 >> (31 - x)) + 1; +#endif +} + +/* Returns floor(log(n)/log(sqrt(2))). + Undefined if N is 0. */ +static inline int +calculate_h_alpha (size_t n) +{ + int log2 = floor_log2 (n); + + /* The correct answer is either 2 * log2 or one more. So we + see if n >= pow(sqrt(2), 2 * log2 + 1) and if so, add 1. */ + return (2 * log2) + (n >= pow_sqrt2 (log2)); +} + diff --git a/src/libpspp/bt.h b/src/libpspp/bt.h new file mode 100644 index 00000000..2a014aaf --- /dev/null +++ b/src/libpspp/bt.h @@ -0,0 +1,83 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#ifndef LIBPSPP_BT_H +#define LIBPSPP_BT_H 1 + +/* Balanced tree (BT) data structure. + + The client should not need to be aware of the form of + balancing applied to the balanced tree, as its operation is + fully encapsulated. */ + +#include + +/* Returns the data structure corresponding to the given NODE, + assuming that NODE is embedded as the given MEMBER name in + data type STRUCT. */ +#define bt_data(NODE, STRUCT, MEMBER) \ + ((STRUCT *) ((char *) (NODE) - offsetof (STRUCT, MEMBER))) + +/* Node in a balanced binary tree. */ +struct bt_node + { + struct bt_node *up; /* Parent (NULL for root). */ + struct bt_node *down[2]; /* Left child, right child. */ + }; + +/* Compares nodes A and B, with the tree's AUX. + Returns a strcmp-like result. */ +typedef int bt_compare_func (const struct bt_node *a, + const struct bt_node *b, + const void *aux); + +/* A balanced binary tree. */ +struct bt + { + struct bt_node *root; /* Tree's root, NULL if empty. */ + bt_compare_func *compare; /* To compare nodes. */ + const void *aux; /* Auxiliary data. */ + size_t size; /* Current node count. */ + size_t max_size; /* Max size since last complete rebalance. */ + }; + +void bt_init (struct bt *, bt_compare_func *, const void *aux); + +struct bt_node *bt_insert (struct bt *, struct bt_node *); +void bt_delete (struct bt *, struct bt_node *); + +struct bt_node *bt_find (const struct bt *, const struct bt_node *); +struct bt_node *bt_find_ge (const struct bt *, const struct bt_node *); +struct bt_node *bt_find_le (const struct bt *, const struct bt_node *); + +struct bt_node *bt_first (const struct bt *); +struct bt_node *bt_last (const struct bt *); +struct bt_node *bt_find (const struct bt *, const struct bt_node *); +struct bt_node *bt_next (const struct bt *, const struct bt_node *); +struct bt_node *bt_prev (const struct bt *, const struct bt_node *); + +struct bt_node *bt_changed (struct bt *, struct bt_node *); +void bt_moved (struct bt *, struct bt_node *); + +/* Returns the number of nodes currently in BT. */ +static inline size_t bt_count (const struct bt *bt) +{ + return bt->size; +} + +#endif /* libpspp/bt.h */ diff --git a/tests/ChangeLog b/tests/ChangeLog index a76dbc91..e4cda349 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,9 @@ +2007-03-31 Ben Pfaff + + * automake.mk (tests_libpspp_bt_test_LDADD): Add tests/libpspp/bt. + + * libpspp/bt-test.c: New test. + 2007-03-25 Ben Pfaff * automake.mk: Add tests/libpspp/sparse-array-test. diff --git a/tests/automake.mk b/tests/automake.mk index 998be3a5..d65d0e49 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -133,10 +133,11 @@ dist_TESTS = \ tests/expressions/vectors.sh nodist_TESTS = \ + tests/libpspp/abt-test \ + tests/libpspp/bt-test \ + tests/libpspp/heap-test \ tests/libpspp/ll-test \ tests/libpspp/llx-test \ - tests/libpspp/heap-test \ - tests/libpspp/abt-test \ tests/libpspp/sparse-array-test TESTS = $(dist_TESTS) $(nodist_TESTS) @@ -173,6 +174,13 @@ tests_libpspp_abt_test_SOURCES = \ tests_libpspp_abt_test_LDADD = gl/libgl.la tests_libpspp_abt_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 +tests_libpspp_bt_test_SOURCES = \ + src/libpspp/bt.c \ + src/libpspp/bt.h \ + tests/libpspp/bt-test.c +tests_libpspp_bt_test_LDADD = gl/libgl.la +tests_libpspp_bt_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 + tests_libpspp_sparse_array_test_SOURCES = \ src/libpspp/sparse-array.c \ src/libpspp/sparse-array.h \ diff --git a/tests/libpspp/bt-test.c b/tests/libpspp/bt-test.c new file mode 100644 index 00000000..e0979daf --- /dev/null +++ b/tests/libpspp/bt-test.c @@ -0,0 +1,738 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* This is a test program for the bt_* routines defined in bt.c. + This test program aims to be as comprehensive as possible. + "gcov -b" should report 100% coverage of lines and branches in + bt.c. "valgrind --leak-check=yes --show-reachable=yes" should + give a clean report. */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +/* Currently running test. */ +static const char *test_name; + +/* Exit with a failure code. + (Place a breakpoint on this function while debugging.) */ +static void +check_die (void) +{ + exit (EXIT_FAILURE); +} + +/* If OK is not true, prints a message about failure on the + current source file and the given LINE and terminates. */ +static void +check_func (bool ok, int line) +{ + if (!ok) + { + printf ("Check failed in %s test at %s, line %d\n", + test_name, __FILE__, line); + check_die (); + } +} + +/* Verifies that EXPR evaluates to true. + If not, prints a message citing the calling line number and + terminates. */ +#define check(EXPR) check_func ((EXPR), __LINE__) + +/* Prints a message about memory exhaustion and exits with a + failure code. */ +static void +xalloc_die (void) +{ + printf ("virtual memory exhausted\n"); + exit (EXIT_FAILURE); +} + +/* Allocates and returns N bytes of memory. */ +static void * +xmalloc (size_t n) +{ + if (n != 0) + { + void *p = malloc (n); + if (p == NULL) + xalloc_die (); + + return p; + } + else + return NULL; +} + +static void * +xmemdup (const void *p, size_t n) +{ + void *q = xmalloc (n); + memcpy (q, p, n); + return q; +} + +/* Allocates and returns N * M bytes of memory. */ +static void * +xnmalloc (size_t n, size_t m) +{ + if ((size_t) -1 / m <= n) + xalloc_die (); + return xmalloc (n * m); +} + +/* Node type and support routines. */ + +/* Test data element. */ +struct element + { + struct bt_node node; /* Embedded binary tree element. */ + int data; /* Primary value. */ + }; + +static int aux_data; + +/* Returns the `struct element' that NODE is embedded within. */ +static struct element * +bt_node_to_element (const struct bt_node *node) +{ + return bt_data (node, struct element, node); +} + +/* Compares the `x' values in A and B and returns a strcmp-type + return value. Verifies that AUX points to aux_data. */ +static int +compare_elements (const struct bt_node *a_, const struct bt_node *b_, + const void *aux) +{ + const struct element *a = bt_node_to_element (a_); + const struct element *b = bt_node_to_element (b_); + + check (aux == &aux_data); + return a->data < b->data ? -1 : a->data > b->data; +} + +/* Compares A and B and returns a strcmp-type return value. */ +static int +compare_ints_noaux (const void *a_, const void *b_) +{ + const int *a = a_; + const int *b = b_; + + return *a < *b ? -1 : *a > *b; +} + +/* Swaps *A and *B. */ +static void +swap (int *a, int *b) +{ + int t = *a; + *a = *b; + *b = t; +} + +/* Reverses the order of the CNT integers starting at VALUES. */ +static void +reverse (int *values, size_t cnt) +{ + size_t i = 0; + size_t j = cnt; + + while (j > i) + swap (&values[i++], &values[--j]); +} + +/* Arranges the CNT elements in VALUES into the lexicographically + next greater permutation. Returns true if successful. + If VALUES is already the lexicographically greatest + permutation of its elements (i.e. ordered from greatest to + smallest), arranges them into the lexicographically least + permutation (i.e. ordered from smallest to largest) and + returns false. */ +static bool +next_permutation (int *values, size_t cnt) +{ + if (cnt > 0) + { + size_t i = cnt - 1; + while (i != 0) + { + i--; + if (values[i] < values[i + 1]) + { + size_t j; + for (j = cnt - 1; values[i] >= values[j]; j--) + continue; + swap (values + i, values + j); + reverse (values + (i + 1), cnt - (i + 1)); + return true; + } + } + + reverse (values, cnt); + } + + return false; +} + +/* Returns N!. */ +static unsigned int +factorial (unsigned int n) +{ + unsigned int value = 1; + while (n > 1) + value *= n--; + return value; +} + +/* Randomly shuffles the CNT elements in ARRAY, each of which is + SIZE bytes in size. */ +static void +random_shuffle (void *array_, size_t cnt, size_t size) +{ + char *array = array_; + char *tmp = xmalloc (size); + size_t i; + + for (i = 0; i < cnt; i++) + { + size_t j = rand () % (cnt - i) + i; + if (i != j) + { + memcpy (tmp, array + j * size, size); + memcpy (array + j * size, array + i * size, size); + memcpy (array + i * size, tmp, size); + } + } + + free (tmp); +} + +/* Calculates floor(log(n)/log(sqrt(2))). */ +static int +calculate_h_alpha (size_t n) +{ + size_t thresholds[] = + { + 0, 2, 2, 3, 4, 6, 8, 12, 16, 23, 32, 46, 64, 91, 128, 182, 256, 363, + 512, 725, 1024, 1449, 2048, 2897, 4096, 5793, 8192, 11586, 16384, + 23171, 32768, 46341, 65536, 92682, 131072, 185364, 262144, 370728, + 524288, 741456, 1048576, 1482911, 2097152, 2965821, 4194304, 5931642, + 8388608, 11863284, 16777216, 23726567, 33554432, 47453133, 67108864, + 94906266, 134217728, 189812532, 268435456, 379625063, 536870912, + 759250125, 1073741824, 1518500250, 2147483648, 3037000500, + }; + size_t threshold_cnt = sizeof thresholds / sizeof *thresholds; + size_t i; + + for (i = 0; i < threshold_cnt; i++) + if (thresholds[i] > n) + break; + return i - 1; +} + +/* Returns the height of the tree rooted at NODE. */ +static int +get_height (struct bt_node *node) +{ + if (node == NULL) + return 0; + else + { + int left = get_height (node->down[0]); + int right = get_height (node->down[1]); + return 1 + (left > right ? left : right); + } +} + +/* Checks that BT is loosely alpha-height balanced, that is, that + its height is no more than h_alpha(count) + 1, where + h_alpha(n) = floor(log(n)/log(1/alpha)). */ +static void +check_balance (struct bt *bt) +{ + /* In the notation of the Galperin and Rivest paper (and of + CLR), the height of a tree is the number of edges in the + longest path from the root to a leaf, so we have to subtract + 1 from our measured height. */ + int height = get_height (bt->root) - 1; + int max_height = calculate_h_alpha (bt_count (bt)) + 1; + check (height <= max_height); +} + +/* Checks that BT contains the CNT ints in DATA, that its + structure is correct, and that certain operations on BT + produce the expected results. */ +static void +check_bt (struct bt *bt, const int data[], size_t cnt) +{ + struct element e; + size_t i; + int *order; + + order = xmemdup (data, cnt * sizeof *data); + qsort (order, cnt, sizeof *order, compare_ints_noaux); + + for (i = 0; i < cnt; i++) + { + struct bt_node *p; + + e.data = data[i]; + if (rand () % 2) + p = bt_find (bt, &e.node); + else + p = bt_insert (bt, &e.node); + check (p != NULL); + check (p != &e.node); + check (bt_node_to_element (p)->data == data[i]); + } + + e.data = -1; + check (bt_find (bt, &e.node) == NULL); + + check_balance (bt); + + if (cnt == 0) + { + check (bt_first (bt) == NULL); + check (bt_last (bt) == NULL); + check (bt_next (bt, NULL) == NULL); + check (bt_prev (bt, NULL) == NULL); + } + else + { + struct bt_node *p; + + for (p = bt_first (bt), i = 0; i < cnt; p = bt_next (bt, p), i++) + check (bt_node_to_element (p)->data == order[i]); + check (p == NULL); + + for (p = bt_last (bt), i = 0; i < cnt; p = bt_prev (bt, p), i++) + check (bt_node_to_element (p)->data == order[cnt - i - 1]); + check (p == NULL); + } + + free (order); +} + +/* Inserts the CNT values from 0 to CNT - 1 (inclusive) into an + BT in the order specified by INSERTIONS, then deletes them in + the order specified by DELETIONS, checking the BT's contents + for correctness after each operation. */ +static void +test_insert_delete (const int insertions[], + const int deletions[], + size_t cnt) +{ + struct element *elements; + struct bt bt; + size_t i; + + elements = xnmalloc (cnt, sizeof *elements); + for (i = 0; i < cnt; i++) + elements[i].data = i; + + bt_init (&bt, compare_elements, &aux_data); + check_bt (&bt, NULL, 0); + for (i = 0; i < cnt; i++) + { + check (bt_insert (&bt, &elements[insertions[i]].node) == NULL); + check_bt (&bt, insertions, i + 1); + } + for (i = 0; i < cnt; i++) + { + bt_delete (&bt, &elements[deletions[i]].node); + check_bt (&bt, deletions + i + 1, cnt - i - 1); + } + + free (elements); +} + +/* Inserts values into an BT in each possible order, then + removes them in each possible order, up to a specified maximum + size. */ +static void +test_insert_any_remove_any (void) +{ + const int max_elems = 5; + int cnt; + + for (cnt = 0; cnt <= max_elems; cnt++) + { + int *insertions, *deletions; + unsigned int ins_perm_cnt; + int i; + + insertions = xnmalloc (cnt, sizeof *insertions); + deletions = xnmalloc (cnt, sizeof *deletions); + for (i = 0; i < cnt; i++) + insertions[i] = i; + + for (ins_perm_cnt = 0; + ins_perm_cnt == 0 || next_permutation (insertions, cnt); + ins_perm_cnt++) + { + unsigned int del_perm_cnt; + int i; + + for (i = 0; i < cnt; i++) + deletions[i] = i; + + for (del_perm_cnt = 0; + del_perm_cnt == 0 || next_permutation (deletions, cnt); + del_perm_cnt++) + test_insert_delete (insertions, deletions, cnt); + + check (del_perm_cnt == factorial (cnt)); + } + check (ins_perm_cnt == factorial (cnt)); + + free (insertions); + free (deletions); + } +} + +/* Inserts values into an BT in each possible order, then + removes them in the same order, up to a specified maximum + size. */ +static void +test_insert_any_remove_same (void) +{ + const int max_elems = 7; + int cnt; + + for (cnt = 0; cnt <= max_elems; cnt++) + { + int *values; + unsigned int permutation_cnt; + int i; + + values = xnmalloc (cnt, sizeof *values); + for (i = 0; i < cnt; i++) + values[i] = i; + + for (permutation_cnt = 0; + permutation_cnt == 0 || next_permutation (values, cnt); + permutation_cnt++) + test_insert_delete (values, values, cnt); + check (permutation_cnt == factorial (cnt)); + + free (values); + } +} + +/* Inserts values into an BT in each possible order, then + removes them in reverse order, up to a specified maximum + size. */ +static void +test_insert_any_remove_reverse (void) +{ + const int max_elems = 7; + int cnt; + + for (cnt = 0; cnt <= max_elems; cnt++) + { + int *insertions, *deletions; + unsigned int permutation_cnt; + int i; + + insertions = xnmalloc (cnt, sizeof *insertions); + deletions = xnmalloc (cnt, sizeof *deletions); + for (i = 0; i < cnt; i++) + insertions[i] = i; + + for (permutation_cnt = 0; + permutation_cnt == 0 || next_permutation (insertions, cnt); + permutation_cnt++) + { + memcpy (deletions, insertions, sizeof *insertions * cnt); + reverse (deletions, cnt); + + test_insert_delete (insertions, deletions, cnt); + } + check (permutation_cnt == factorial (cnt)); + + free (insertions); + free (deletions); + } +} + +/* Inserts and removes values in an BT in random orders. */ +static void +test_random_sequence (void) +{ + const int max_elems = 128; + const int max_trials = 8; + int cnt; + + for (cnt = 0; cnt <= max_elems; cnt += 2) + { + int *insertions, *deletions; + int trial; + int i; + + insertions = xnmalloc (cnt, sizeof *insertions); + deletions = xnmalloc (cnt, sizeof *deletions); + for (i = 0; i < cnt; i++) + insertions[i] = i; + for (i = 0; i < cnt; i++) + deletions[i] = i; + + for (trial = 0; trial < max_trials; trial++) + { + random_shuffle (insertions, cnt, sizeof *insertions); + random_shuffle (deletions, cnt, sizeof *deletions); + + test_insert_delete (insertions, deletions, cnt); + } + + free (insertions); + free (deletions); + } +} + +/* Inserts elements into an BT in ascending order. */ +static void +test_insert_ordered (void) +{ + const int max_elems = 1024; + struct element *elements; + int *values; + struct bt bt; + int i; + + bt_init (&bt, compare_elements, &aux_data); + elements = xnmalloc (max_elems, sizeof *elements); + values = xnmalloc (max_elems, sizeof *values); + for (i = 0; i < max_elems; i++) + { + values[i] = elements[i].data = i; + check (bt_insert (&bt, &elements[i].node) == NULL); + check_bt (&bt, values, i + 1); + } + free (elements); + free (values); +} + +/* Tests bt_find_ge and bt_find_le. */ +static void +test_find_ge_le (void) +{ + const int max_elems = 10; + struct element *elements; + int *values; + unsigned int inc_pat; + + elements = xnmalloc (max_elems, sizeof *elements); + values = xnmalloc (max_elems, sizeof *values); + for (inc_pat = 0; inc_pat < (1u << max_elems); inc_pat++) + { + struct bt bt; + int elem_cnt = 0; + int i; + + /* Insert the values in the pattern into BT. */ + bt_init (&bt, compare_elements, &aux_data); + for (i = 0; i < max_elems; i++) + if (inc_pat & (1u << i)) + { + values[elem_cnt] = elements[elem_cnt].data = i; + check (bt_insert (&bt, &elements[elem_cnt].node) == NULL); + elem_cnt++; + } + check_bt (&bt, values, elem_cnt); + + /* Try find_ge and find_le for each possible element value. */ + for (i = -1; i <= max_elems; i++) + { + struct element tmp; + struct bt_node *ge, *le; + int j; + + ge = le = NULL; + for (j = 0; j < elem_cnt; j++) + { + if (ge == NULL && values[j] >= i) + ge = &elements[j].node; + if (values[j] <= i) + le = &elements[j].node; + } + + tmp.data = i; + check (bt_find_ge (&bt, &tmp.node) == ge); + check (bt_find_le (&bt, &tmp.node) == le); + } + } + free (elements); + free (values); +} + +/* Inserts elements into an BT, then moves the nodes around in + memory. */ +static void +test_moved (void) +{ + const int max_elems = 128; + struct element *e[2]; + int cur; + int *values; + struct bt bt; + int i, j; + + bt_init (&bt, compare_elements, &aux_data); + e[0] = xnmalloc (max_elems, sizeof *e[0]); + e[1] = xnmalloc (max_elems, sizeof *e[1]); + values = xnmalloc (max_elems, sizeof *values); + cur = 0; + for (i = 0; i < max_elems; i++) + { + values[i] = e[cur][i].data = i; + check (bt_insert (&bt, &e[cur][i].node) == NULL); + check_bt (&bt, values, i + 1); + + for (j = 0; j <= i; j++) + { + e[!cur][j] = e[cur][j]; + bt_moved (&bt, &e[!cur][j].node); + check_bt (&bt, values, i + 1); + } + cur = !cur; + } + free (e[0]); + free (e[1]); + free (values); +} + +/* Inserts values into an BT, then changes their values. */ +static void +test_changed (void) +{ + const int max_elems = 6; + int cnt; + + for (cnt = 0; cnt <= max_elems; cnt++) + { + int *values, *changed_values; + struct element *elements; + unsigned int permutation_cnt; + int i; + + values = xnmalloc (cnt, sizeof *values); + changed_values = xnmalloc (cnt, sizeof *changed_values); + elements = xnmalloc (cnt, sizeof *elements); + for (i = 0; i < cnt; i++) + values[i] = i; + + for (permutation_cnt = 0; + permutation_cnt == 0 || next_permutation (values, cnt); + permutation_cnt++) + { + for (i = 0; i < cnt; i++) + { + int j, k; + for (j = 0; j <= cnt; j++) + { + struct bt bt; + struct bt_node *changed_retval; + + bt_init (&bt, compare_elements, &aux_data); + + /* Add to BT in order. */ + for (k = 0; k < cnt; k++) + { + int n = values[k]; + elements[n].data = n; + check (bt_insert (&bt, &elements[n].node) == NULL); + } + check_bt (&bt, values, cnt); + + /* Change value i to j. */ + elements[i].data = j; + for (k = 0; k < cnt; k++) + changed_values[k] = k; + changed_retval = bt_changed (&bt, &elements[i].node); + if (i != j && j < cnt) + { + /* Will cause duplicate. */ + check (changed_retval == &elements[j].node); + changed_values[i] = changed_values[cnt - 1]; + check_bt (&bt, changed_values, cnt - 1); + } + else + { + /* Succeeds. */ + check (changed_retval == NULL); + changed_values[i] = j; + check_bt (&bt, changed_values, cnt); + } + } + } + } + check (permutation_cnt == factorial (cnt)); + + free (values); + free (changed_values); + free (elements); + } +} + +/* Main program. */ + +/* Runs TEST_FUNCTION and prints a message about NAME. */ +static void +run_test (void (*test_function) (void), const char *name) +{ + test_name = name; + putchar ('.'); + fflush (stdout); + test_function (); +} + +int +main (void) +{ + run_test (test_insert_any_remove_any, + "insert any order, delete any order"); + run_test (test_insert_any_remove_same, + "insert any order, delete same order"); + run_test (test_insert_any_remove_reverse, + "insert any order, delete reverse order"); + run_test (test_random_sequence, + "insert and delete in random sequence"); + run_test (test_insert_ordered, + "insert in ascending order"); + run_test (test_find_ge_le, "find_ge and find_le"); + run_test (test_moved, "move elements around in memory"); + run_test (test_changed, "change key data in nodes"); + putchar ('\n'); + + return 0; +} -- 2.30.2