1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 /* Copyright (C) 2001 Free Software Foundation, Inc.
22 This file is part of the GNU ISO C++ Library. This library is free
23 software; you can redistribute it and/or modify it under the
24 terms of the GNU General Public License as published by the
25 Free Software Foundation; either version 2, or (at your option)
28 This library is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU General Public License for more details.
33 You should have received a copy of the GNU General Public License along
34 with this library; see the file COPYING. If not, write to the Free
35 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
38 As a special exception, you may use this file as part of a free software
39 library without restriction. Specifically, if other files instantiate
40 templates or use macros or inline functions from this file, or you compile
41 this file and link it with other files to produce an executable, this
42 file does not by itself cause the resulting executable to be covered by
43 the GNU General Public License. This exception does not however
44 invalidate any other reasons why the executable file might be covered by
45 the GNU General Public License. */
50 * Hewlett-Packard Company
52 * Permission to use, copy, modify, distribute and sell this software
53 * and its documentation for any purpose is hereby granted without fee,
54 * provided that the above copyright notice appear in all copies and
55 * that both that copyright notice and this permission notice appear
56 * in supporting documentation. Hewlett-Packard Company makes no
57 * representations about the suitability of this software for any
58 * purpose. It is provided "as is" without express or implied warranty.
62 * Silicon Graphics Computer Systems, Inc.
64 * Permission to use, copy, modify, distribute and sell this software
65 * and its documentation for any purpose is hereby granted without fee,
66 * provided that the above copyright notice appear in all copies and
67 * that both that copyright notice and this permission notice appear
68 * in supporting documentation. Silicon Graphics makes no
69 * representations about the suitability of this software for any
70 * purpose. It is provided "as is" without express or implied warranty.
73 /* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
74 This file is part of the GNU C Library.
75 Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
77 The GNU C Library is free software; you can redistribute it and/or
78 modify it under the terms of the GNU Lesser General Public
79 License as published by the Free Software Foundation; either
80 version 2.1 of the License, or (at your option) any later version.
82 The GNU C Library is distributed in the hope that it will be useful,
83 but WITHOUT ANY WARRANTY; without even the implied warranty of
84 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
85 Lesser General Public License for more details.
87 You should have received a copy of the GNU Lesser General Public
88 License along with the GNU C Library; if not, write to the Free
89 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
93 #include "algorithm.h"
94 #include <gsl/gsl_rng.h>
100 /* Some of the assertions in this file are very expensive. We
101 don't use them by default. */
103 #define expensive_assert(X) assert(X)
105 #define expensive_assert(X) ((void) 0)
109 /* Finds an element in ARRAY, which contains COUNT elements of
110 SIZE bytes each, using COMPARE for comparisons. Returns the
111 first element in ARRAY that matches TARGET, or a null pointer
112 on failure. AUX is passed to each comparison as auxiliary
115 find (const void *array, size_t count, size_t size,
117 algo_compare_func *compare, void *aux)
119 const char *element = array;
123 if (compare (target, element, aux) == 0)
124 return (void *) element;
132 /* Counts and return the number of elements in ARRAY, which
133 contains COUNT elements of SIZE bytes each, which are equal to
134 ELEMENT as compared with COMPARE. AUX is passed as auxiliary
137 count_equal (const void *array, size_t count, size_t size,
139 algo_compare_func *compare, void *aux)
141 const char *first = array;
142 size_t equal_cnt = 0;
146 if (compare (element, first, aux) == 0)
155 /* Counts and return the number of elements in ARRAY, which
156 contains COUNT elements of SIZE bytes each, for which
157 PREDICATE returns nonzero. AUX is passed as auxiliary data to
160 count_if (const void *array, size_t count, size_t size,
161 algo_predicate_func *predicate, void *aux)
163 const char *first = array;
164 size_t nonzero_cnt = 0;
168 if (predicate (first, aux) != 0)
177 /* Byte-wise swap two items of size SIZE. */
178 #define SWAP(a, b, size) \
181 register size_t __size = (size); \
182 register char *__a = (a), *__b = (b); \
188 } while (--__size > 0); \
191 /* Makes the elements in ARRAY unique, by moving up duplicates,
192 and returns the new number of elements in the array. Sorted
193 arrays only. Arguments same as for sort() above. */
195 unique (void *array, size_t count, size_t size,
196 algo_compare_func *compare, void *aux)
199 char *last = first + size * count;
200 char *result = array;
207 assert (adjacent_find_equal (array, count,
208 size, compare, aux) == NULL);
212 if (compare (result, first, aux))
216 memcpy (result, first, size);
223 /* Helper function that calls sort(), then unique(). */
225 sort_unique (void *array, size_t count, size_t size,
226 algo_compare_func *compare, void *aux)
228 sort (array, count, size, compare, aux);
229 return unique (array, count, size, compare, aux);
232 /* Reorders ARRAY, which contains COUNT elements of SIZE bytes
233 each, so that the elements for which PREDICATE returns nonzero
234 precede those for which PREDICATE returns zero. AUX is
235 passed to each predicate as auxiliary data. Returns the
236 number of elements for which PREDICATE returns nonzero. Not
239 partition (void *array, size_t count, size_t size,
240 algo_predicate_func *predicate, void *aux)
242 size_t nonzero_cnt = count;
244 char *last = first + nonzero_cnt * size;
248 /* Move FIRST forward to point to first element that fails
254 else if (!predicate (first, aux))
261 /* Move LAST backward to point to last element that passes
269 else if (predicate (last, aux))
275 /* By swapping FIRST and LAST we extend the starting and
276 ending sequences that pass and fail, respectively,
278 SWAP (first, last, size);
283 assert (is_partitioned (array, count, size, nonzero_cnt, predicate, aux));
287 /* Checks whether ARRAY, which contains COUNT elements of SIZE
288 bytes each, is partitioned such that PREDICATE returns nonzero
289 for the first NONZERO_CNT elements and zero for the remaining
290 elements. AUX is passed as auxiliary data to PREDICATE. */
292 is_partitioned (const void *array, size_t count, size_t size,
294 algo_predicate_func *predicate, void *aux)
296 const char *first = array;
299 assert (nonzero_cnt <= count);
300 for (idx = 0; idx < nonzero_cnt; idx++)
301 if (predicate (first + idx * size, aux) == 0)
303 for (idx = nonzero_cnt; idx < count; idx++)
304 if (predicate (first + idx * size, aux) != 0)
309 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
310 RESULT, except that elements for which PREDICATE is false are
311 not copied. Returns the number of elements copied. AUX is
312 passed to PREDICATE as auxiliary data. */
314 copy_if (const void *array, size_t count, size_t size,
316 algo_predicate_func *predicate, void *aux)
318 const char *input = array;
319 const char *last = input + size * count;
320 char *output = result;
321 size_t nonzero_cnt = 0;
325 if (predicate (input, aux))
327 memcpy (output, input, size);
335 assert (nonzero_cnt == count_if (array, count, size, predicate, aux));
336 assert (nonzero_cnt == count_if (result, nonzero_cnt, size, predicate, aux));
341 /* Removes N elements starting at IDX from ARRAY, which consists
342 of COUNT elements of SIZE bytes each, by shifting the elements
343 following them, if any, into its position. */
345 remove_range (void *array_, size_t count, size_t size,
346 size_t idx, size_t n)
348 char *array = array_;
350 assert (array != NULL);
351 assert (idx <= count);
352 assert (idx + n <= count);
355 memmove (array + idx * size, array + (idx + n) * size,
356 size * (count - idx - n));
359 /* Removes element IDX from ARRAY, which consists of COUNT
360 elements of SIZE bytes each, by shifting the elements
361 following it, if any, into its position. */
363 remove_element (void *array, size_t count, size_t size,
366 remove_range (array, count, size, idx, 1);
369 /* Moves an element in ARRAY, which consists of COUNT elements of
370 SIZE bytes each, from OLD_IDX to NEW_IDX, shifting around
371 other elements as needed. Runs in O(abs(OLD_IDX - NEW_IDX))
374 move_element (void *array_, size_t count, size_t size,
375 size_t old_idx, size_t new_idx)
377 assert (array_ != NULL || count == 0);
378 assert (old_idx < count);
379 assert (new_idx < count);
381 if (old_idx != new_idx)
383 char *array = array_;
384 char *element = xmalloc (size);
385 char *new = array + new_idx * size;
386 char *old = array + old_idx * size;
388 memcpy (element, old, size);
390 memmove (new + size, new, (old_idx - new_idx) * size);
392 memmove (old, old + size, (new_idx - old_idx) * size);
393 memcpy (new, element, size);
399 /* A predicate and its auxiliary data. */
402 algo_predicate_func *predicate;
407 not (const void *data, void *pred_aux_)
409 const struct pred_aux *pred_aux = pred_aux_;
411 return !pred_aux->predicate (data, pred_aux->aux);
414 /* Removes elements equal to ELEMENT from ARRAY, which consists
415 of COUNT elements of SIZE bytes each. Returns the number of
416 remaining elements. AUX is passed to COMPARE as auxiliary
419 remove_equal (void *array, size_t count, size_t size,
421 algo_compare_func *compare, void *aux)
424 char *last = first + count * size;
431 if (compare (first, element, aux) == 0)
445 if (compare (first, element, aux) == 0)
451 memcpy (result, first, size);
456 assert (count_equal (array, count, size, element, compare, aux) == 0);
460 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
461 RESULT, except that elements for which PREDICATE is true are
462 not copied. Returns the number of elements copied. AUX is
463 passed to PREDICATE as auxiliary data. */
465 remove_copy_if (const void *array, size_t count, size_t size,
467 algo_predicate_func *predicate, void *aux)
469 struct pred_aux pred_aux;
470 pred_aux.predicate = predicate;
472 return copy_if (array, count, size, result, not, &pred_aux);
475 /* Searches ARRAY, which contains COUNT of SIZE bytes each, using
476 a binary search. Returns any element that equals VALUE, if
477 one exists, or a null pointer otherwise. ARRAY must ordered
478 according to COMPARE. AUX is passed to COMPARE as auxiliary
481 binary_search (const void *array, size_t count, size_t size,
483 algo_compare_func *compare, void *aux)
485 assert (array != NULL);
486 assert (count <= INT_MAX);
487 assert (compare != NULL);
491 const char *first = array;
493 int high = count - 1;
497 int middle = (low + high) / 2;
498 const char *element = first + middle * size;
499 int cmp = compare (value, element, aux);
506 return (void *) element;
510 expensive_assert (find (array, count, size, value, compare, aux) == NULL);
514 /* Lexicographically compares ARRAY1, which contains COUNT1
515 elements of SIZE bytes each, to ARRAY2, which contains COUNT2
516 elements of SIZE bytes, according to COMPARE. Returns a
517 strcmp()-type result. AUX is passed to COMPARE as auxiliary
520 lexicographical_compare_3way (const void *array1, size_t count1,
521 const void *array2, size_t count2,
523 algo_compare_func *compare, void *aux)
525 const char *first1 = array1;
526 const char *first2 = array2;
527 size_t min_count = count1 < count2 ? count1 : count2;
529 while (min_count > 0)
531 int cmp = compare (first1, first2, aux);
540 return count1 < count2 ? -1 : count1 > count2;
543 /* If you consider tuning this algorithm, you should consult first:
544 Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
545 Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
551 /* Discontinue quicksort algorithm when partition gets below this size.
552 This particular magic number was chosen to work best on a Sun 4/260. */
555 /* Stack node declarations used to store unfulfilled partition obligations. */
562 /* The next 4 #defines implement a very fast in-line stack abstraction. */
563 /* The stack needs log (total_elements) entries (we could even subtract
564 log(MAX_THRESH)). Since total_elements has type size_t, we get as
565 upper bound for log (total_elements):
566 bits per byte (CHAR_BIT) * sizeof(size_t). */
567 #define STACK_SIZE (CHAR_BIT * sizeof(size_t))
568 #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
569 #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
570 #define STACK_NOT_EMPTY (stack < top)
573 /* Order size using quicksort. This implementation incorporates
574 four optimizations discussed in Sedgewick:
576 1. Non-recursive, using an explicit stack of pointer that store the
577 next array partition to sort. To save time, this maximum amount
578 of space required to store an array of SIZE_MAX is allocated on the
579 stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
580 only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
581 Pretty cheap, actually.
583 2. Chose the pivot element using a median-of-three decision tree.
584 This reduces the probability of selecting a bad pivot value and
585 eliminates certain extraneous comparisons.
587 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
588 insertion sort to order the MAX_THRESH items within each partition.
589 This is a big win, since insertion sort is faster for small, mostly
590 sorted array segments.
592 4. The larger of the two sub-partitions is always pushed onto the
593 stack first, with the algorithm then concentrating on the
594 smaller partition. This *guarantees* no more than log (total_elems)
595 stack size is needed (actually O(1) in this case)! */
598 sort (void *array, size_t count, size_t size,
599 algo_compare_func *compare, void *aux)
601 char *const first = array;
602 const size_t max_thresh = MAX_THRESH * size;
605 /* Avoid lossage with unsigned arithmetic below. */
608 if (count > MAX_THRESH)
611 char *hi = &lo[size * (count - 1)];
612 stack_node stack[STACK_SIZE];
613 stack_node *top = stack + 1;
615 while (STACK_NOT_EMPTY)
620 /* Select median value from among LO, MID, and HI. Rearrange
621 LO and HI so the three values are sorted. This lowers the
622 probability of picking a pathological pivot value and
623 skips a comparison for both the LEFT_PTR and RIGHT_PTR in
626 char *mid = lo + size * ((hi - lo) / size >> 1);
628 if (compare (mid, lo, aux) < 0)
629 SWAP (mid, lo, size);
630 if (compare (hi, mid, aux) < 0)
631 SWAP (mid, hi, size);
634 if (compare (mid, lo, aux) < 0)
635 SWAP (mid, lo, size);
638 left_ptr = lo + size;
639 right_ptr = hi - size;
641 /* Here's the famous ``collapse the walls'' section of quicksort.
642 Gotta like those tight inner loops! They are the main reason
643 that this algorithm runs much faster than others. */
646 while (compare (left_ptr, mid, aux) < 0)
649 while (compare (mid, right_ptr, aux) < 0)
652 if (left_ptr < right_ptr)
654 SWAP (left_ptr, right_ptr, size);
657 else if (mid == right_ptr)
662 else if (left_ptr == right_ptr)
669 while (left_ptr <= right_ptr);
671 /* Set up pointers for next iteration. First determine whether
672 left and right partitions are below the threshold size. If so,
673 ignore one or both. Otherwise, push the larger partition's
674 bounds on the stack and continue sorting the smaller one. */
676 if ((size_t) (right_ptr - lo) <= max_thresh)
678 if ((size_t) (hi - left_ptr) <= max_thresh)
679 /* Ignore both small partitions. */
682 /* Ignore small left partition. */
685 else if ((size_t) (hi - left_ptr) <= max_thresh)
686 /* Ignore small right partition. */
688 else if ((right_ptr - lo) > (hi - left_ptr))
690 /* Push larger left partition indices. */
691 PUSH (lo, right_ptr);
696 /* Push larger right partition indices. */
703 /* Once the FIRST array is partially sorted by quicksort the rest
704 is completely sorted using insertion sort, since this is efficient
705 for partitions below MAX_THRESH size. FIRST points to the beginning
706 of the array to sort, and END_PTR points at the very last element in
707 the array (*not* one beyond it!). */
709 #define min(x, y) ((x) < (y) ? (x) : (y))
712 char *const end_ptr = &first[size * (count - 1)];
713 char *tmp_ptr = first;
714 char *thresh = min(end_ptr, first + max_thresh);
715 register char *run_ptr;
717 /* Find smallest element in first threshold and place it at the
718 array's beginning. This is the smallest array element,
719 and the operation speeds up insertion sort's inner loop. */
721 for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
722 if (compare (run_ptr, tmp_ptr, aux) < 0)
725 if (tmp_ptr != first)
726 SWAP (tmp_ptr, first, size);
728 /* Insertion sort, running from left-hand-side up to right-hand-side. */
730 run_ptr = first + size;
731 while ((run_ptr += size) <= end_ptr)
733 tmp_ptr = run_ptr - size;
734 while (compare (run_ptr, tmp_ptr, aux) < 0)
738 if (tmp_ptr != run_ptr)
742 trav = run_ptr + size;
743 while (--trav >= run_ptr)
748 for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
756 assert (is_sorted (array, count, size, compare, aux));
759 /* Tests whether ARRAY, which contains COUNT elements of SIZE
760 bytes each, is sorted in order according to COMPARE. AUX is
761 passed to COMPARE as auxiliary data. */
763 is_sorted (const void *array, size_t count, size_t size,
764 algo_compare_func *compare, void *aux)
766 const char *first = array;
769 for (idx = 0; idx + 1 < count; idx++)
770 if (compare (first + idx * size, first + (idx + 1) * size, aux) > 0)
776 /* Computes the generalized set difference, ARRAY1 minus ARRAY2,
777 into RESULT, and returns the number of elements written to
778 RESULT. If a value appears M times in ARRAY1 and N times in
779 ARRAY2, then it will appear max(M - N, 0) in RESULT. ARRAY1
780 and ARRAY2 must be sorted, and RESULT is sorted and stable.
781 ARRAY1 consists of COUNT1 elements, ARRAY2 of COUNT2 elements,
782 each SIZE bytes. AUX is passed to COMPARE as auxiliary
784 size_t set_difference (const void *array1, size_t count1,
785 const void *array2, size_t count2,
788 algo_compare_func *compare, void *aux)
790 const char *first1 = array1;
791 const char *last1 = first1 + count1 * size;
792 const char *first2 = array2;
793 const char *last2 = first2 + count2 * size;
794 char *result = result_;
795 size_t result_count = 0;
797 while (first1 != last1 && first2 != last2)
799 int cmp = compare (first1, first2, aux);
802 memcpy (result, first1, size);
816 while (first1 != last1)
818 memcpy (result, first1, size);
827 /* Finds the first pair of adjacent equal elements in ARRAY,
828 which has COUNT elements of SIZE bytes. Returns the first
829 element in ARRAY such that COMPARE returns zero when it and
830 its successor element are compared, or a null pointer if no
831 such element exists. AUX is passed to COMPARE as auxiliary
834 adjacent_find_equal (const void *array, size_t count, size_t size,
835 algo_compare_func *compare, void *aux)
837 const char *first = array;
838 const char *last = first + count * size;
840 while (first < last && first + size < last)
842 if (compare (first, first + size, aux) == 0)
843 return (void *) first;
850 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
851 the first COUNT - 1 elements of these form a heap, followed by
852 a single element not part of the heap. This function adds the
853 final element, forming a heap of COUNT elements in ARRAY.
854 Uses COMPARE to compare elements, passing AUX as auxiliary
857 push_heap (void *array, size_t count, size_t size,
858 algo_compare_func *compare, void *aux)
863 expensive_assert (count < 1 || is_heap (array, count - 1,
864 size, compare, aux));
865 for (i = count; i > 1; i /= 2)
867 char *parent = first + (i / 2 - 1) * size;
868 char *element = first + (i - 1) * size;
869 if (compare (parent, element, aux) < 0)
870 SWAP (parent, element, size);
874 expensive_assert (is_heap (array, count, size, compare, aux));
877 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
878 the children of ARRAY[idx - 1] are heaps, but ARRAY[idx - 1]
879 may be smaller than its children. This function fixes that,
880 so that ARRAY[idx - 1] itself is a heap. Uses COMPARE to
881 compare elements, passing AUX as auxiliary data. */
883 heapify (void *array, size_t count, size_t size,
885 algo_compare_func *compare, void *aux)
891 size_t left = 2 * idx;
892 size_t right = left + 1;
893 size_t largest = idx;
896 && compare (first + size * (left - 1),
897 first + size * (idx - 1), aux) > 0)
901 && compare (first + size * (right - 1),
902 first + size * (largest - 1), aux) > 0)
908 SWAP (first + size * (idx - 1), first + size * (largest - 1), size);
913 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
914 all COUNT elements form a heap. This function moves the
915 largest element in the heap to the final position in ARRAY and
916 reforms a heap of the remaining COUNT - 1 elements at the
917 beginning of ARRAY. Uses COMPARE to compare elements, passing
918 AUX as auxiliary data. */
920 pop_heap (void *array, size_t count, size_t size,
921 algo_compare_func *compare, void *aux)
925 expensive_assert (is_heap (array, count, size, compare, aux));
926 SWAP (first, first + (count - 1) * size, size);
927 heapify (first, count - 1, size, 1, compare, aux);
928 expensive_assert (count < 1 || is_heap (array, count - 1,
929 size, compare, aux));
932 /* Turns ARRAY, which contains COUNT elements of SIZE bytes, into
933 a heap. Uses COMPARE to compare elements, passing AUX as
936 make_heap (void *array, size_t count, size_t size,
937 algo_compare_func *compare, void *aux)
941 for (idx = count / 2; idx >= 1; idx--)
942 heapify (array, count, size, idx, compare, aux);
943 expensive_assert (count < 1 || is_heap (array, count, size, compare, aux));
946 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
947 all COUNT elements form a heap. This function turns the heap
948 into a fully sorted array. Uses COMPARE to compare elements,
949 passing AUX as auxiliary data. */
951 sort_heap (void *array, size_t count, size_t size,
952 algo_compare_func *compare, void *aux)
957 expensive_assert (is_heap (array, count, size, compare, aux));
958 for (idx = count; idx >= 2; idx--)
960 SWAP (first, first + (idx - 1) * size, size);
961 heapify (array, idx - 1, size, 1, compare, aux);
963 expensive_assert (is_sorted (array, count, size, compare, aux));
966 /* ARRAY contains COUNT elements of SIZE bytes each. This
967 function tests whether ARRAY is a heap and returns 1 if so, 0
968 otherwise. Uses COMPARE to compare elements, passing AUX as
971 is_heap (const void *array, size_t count, size_t size,
972 algo_compare_func *compare, void *aux)
974 const char *first = array;
977 for (child = 2; child <= count; child++)
979 size_t parent = child / 2;
980 if (compare (first + (parent - 1) * size,
981 first + (child - 1) * size, aux) < 0)