1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 /* Copyright (C) 2001 Free Software Foundation, Inc.
22 This file is part of the GNU ISO C++ Library. This library is free
23 software; you can redistribute it and/or modify it under the
24 terms of the GNU General Public License as published by the
25 Free Software Foundation; either version 2, or (at your option)
28 This library is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU General Public License for more details.
33 You should have received a copy of the GNU General Public License along
34 with this library; see the file COPYING. If not, write to the Free
35 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
38 As a special exception, you may use this file as part of a free software
39 library without restriction. Specifically, if other files instantiate
40 templates or use macros or inline functions from this file, or you compile
41 this file and link it with other files to produce an executable, this
42 file does not by itself cause the resulting executable to be covered by
43 the GNU General Public License. This exception does not however
44 invalidate any other reasons why the executable file might be covered by
45 the GNU General Public License. */
50 * Hewlett-Packard Company
52 * Permission to use, copy, modify, distribute and sell this software
53 * and its documentation for any purpose is hereby granted without fee,
54 * provided that the above copyright notice appear in all copies and
55 * that both that copyright notice and this permission notice appear
56 * in supporting documentation. Hewlett-Packard Company makes no
57 * representations about the suitability of this software for any
58 * purpose. It is provided "as is" without express or implied warranty.
62 * Silicon Graphics Computer Systems, Inc.
64 * Permission to use, copy, modify, distribute and sell this software
65 * and its documentation for any purpose is hereby granted without fee,
66 * provided that the above copyright notice appear in all copies and
67 * that both that copyright notice and this permission notice appear
68 * in supporting documentation. Silicon Graphics makes no
69 * representations about the suitability of this software for any
70 * purpose. It is provided "as is" without express or implied warranty.
73 /* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
74 This file is part of the GNU C Library.
75 Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
77 The GNU C Library is free software; you can redistribute it and/or
78 modify it under the terms of the GNU Lesser General Public
79 License as published by the Free Software Foundation; either
80 version 2.1 of the License, or (at your option) any later version.
82 The GNU C Library is distributed in the hope that it will be useful,
83 but WITHOUT ANY WARRANTY; without even the implied warranty of
84 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
85 Lesser General Public License for more details.
87 You should have received a copy of the GNU Lesser General Public
88 License along with the GNU C Library; if not, write to the Free
89 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
94 #include <gsl/gsl_rng.h>
99 #include <libpspp/assertion.h>
103 /* Finds an element in ARRAY, which contains COUNT elements of
104 SIZE bytes each, using COMPARE for comparisons. Returns the
105 first element in ARRAY that matches TARGET, or a null pointer
106 on failure. AUX is passed to each comparison as auxiliary
109 find (const void *array, size_t count, size_t size,
111 algo_compare_func *compare, void *aux)
113 const char *element = array;
117 if (compare (target, element, aux) == 0)
118 return (void *) element;
126 /* Counts and return the number of elements in ARRAY, which
127 contains COUNT elements of SIZE bytes each, which are equal to
128 ELEMENT as compared with COMPARE. AUX is passed as auxiliary
131 count_equal (const void *array, size_t count, size_t size,
133 algo_compare_func *compare, void *aux)
135 const char *first = array;
136 size_t equal_cnt = 0;
140 if (compare (element, first, aux) == 0)
149 /* Counts and return the number of elements in ARRAY, which
150 contains COUNT elements of SIZE bytes each, for which
151 PREDICATE returns true. AUX is passed as auxiliary data to
154 count_if (const void *array, size_t count, size_t size,
155 algo_predicate_func *predicate, void *aux)
157 const char *first = array;
162 if (predicate (first, aux) != 0)
171 /* Byte-wise swap two items of size SIZE. */
172 #define SWAP(a, b, size) \
175 register size_t __size = (size); \
176 register char *__a = (a), *__b = (b); \
182 } while (--__size > 0); \
185 /* Makes the elements in ARRAY unique, by moving up duplicates,
186 and returns the new number of elements in the array. Sorted
187 arrays only. Arguments same as for sort() above. */
189 unique (void *array, size_t count, size_t size,
190 algo_compare_func *compare, void *aux)
193 char *last = first + size * count;
194 char *result = array;
201 assert (adjacent_find_equal (array, count,
202 size, compare, aux) == NULL);
206 if (compare (result, first, aux))
210 memcpy (result, first, size);
217 /* Helper function that calls sort(), then unique(). */
219 sort_unique (void *array, size_t count, size_t size,
220 algo_compare_func *compare, void *aux)
222 sort (array, count, size, compare, aux);
223 return unique (array, count, size, compare, aux);
226 /* Reorders ARRAY, which contains COUNT elements of SIZE bytes
227 each, so that the elements for which PREDICATE returns true
228 precede those for which PREDICATE returns zero. AUX is
229 passed to each predicate as auxiliary data. Returns the
230 number of elements for which PREDICATE returns true. Not
233 partition (void *array, size_t count, size_t size,
234 algo_predicate_func *predicate, void *aux)
236 size_t true_cnt = count;
238 char *last = first + true_cnt * size;
242 /* Move FIRST forward to point to first element that fails
248 else if (!predicate (first, aux))
255 /* Move LAST backward to point to last element that passes
263 else if (predicate (last, aux))
269 /* By swapping FIRST and LAST we extend the starting and
270 ending sequences that pass and fail, respectively,
272 SWAP (first, last, size);
277 assert (is_partitioned (array, count, size, true_cnt, predicate, aux));
281 /* Checks whether ARRAY, which contains COUNT elements of SIZE
282 bytes each, is partitioned such that PREDICATE returns true
283 for the first TRUE_CNT elements and zero for the remaining
284 elements. AUX is passed as auxiliary data to PREDICATE. */
286 is_partitioned (const void *array, size_t count, size_t size,
288 algo_predicate_func *predicate, void *aux)
290 const char *first = array;
293 assert (true_cnt <= count);
294 for (idx = 0; idx < true_cnt; idx++)
295 if (predicate (first + idx * size, aux) == 0)
297 for (idx = true_cnt; idx < count; idx++)
298 if (predicate (first + idx * size, aux) != 0)
303 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
304 RESULT, except that elements for which PREDICATE is false are
305 not copied. Returns the number of elements copied. AUX is
306 passed to PREDICATE as auxiliary data. */
308 copy_if (const void *array, size_t count, size_t size,
310 algo_predicate_func *predicate, void *aux)
312 const char *input = array;
313 const char *last = input + size * count;
314 char *output = result;
315 size_t nonzero_cnt = 0;
319 if (predicate (input, aux))
321 memcpy (output, input, size);
329 assert (nonzero_cnt == count_if (array, count, size, predicate, aux));
330 assert (nonzero_cnt == count_if (result, nonzero_cnt, size, predicate, aux));
335 /* Removes N elements starting at IDX from ARRAY, which consists
336 of COUNT elements of SIZE bytes each, by shifting the elements
337 following them, if any, into its position. */
339 remove_range (void *array_, size_t count, size_t size,
340 size_t idx, size_t n)
342 char *array = array_;
344 assert (array != NULL);
345 assert (idx <= count);
346 assert (idx + n <= count);
349 memmove (array + idx * size, array + (idx + n) * size,
350 size * (count - idx - n));
353 /* Removes element IDX from ARRAY, which consists of COUNT
354 elements of SIZE bytes each, by shifting the elements
355 following it, if any, into its position. */
357 remove_element (void *array, size_t count, size_t size,
360 remove_range (array, count, size, idx, 1);
363 /* Moves an element in ARRAY, which consists of COUNT elements of
364 SIZE bytes each, from OLD_IDX to NEW_IDX, shifting around
365 other elements as needed. Runs in O(abs(OLD_IDX - NEW_IDX))
368 move_element (void *array_, size_t count, size_t size,
369 size_t old_idx, size_t new_idx)
371 assert (array_ != NULL || count == 0);
372 assert (old_idx < count);
373 assert (new_idx < count);
375 if (old_idx != new_idx)
377 char *array = array_;
378 char *element = xmalloc (size);
379 char *new = array + new_idx * size;
380 char *old = array + old_idx * size;
382 memcpy (element, old, size);
384 memmove (new + size, new, (old_idx - new_idx) * size);
386 memmove (old, old + size, (new_idx - old_idx) * size);
387 memcpy (new, element, size);
393 /* A predicate and its auxiliary data. */
396 algo_predicate_func *predicate;
401 not (const void *data, void *pred_aux_)
403 const struct pred_aux *pred_aux = pred_aux_;
405 return !pred_aux->predicate (data, pred_aux->aux);
408 /* Removes elements equal to ELEMENT from ARRAY, which consists
409 of COUNT elements of SIZE bytes each. Returns the number of
410 remaining elements. AUX is passed to COMPARE as auxiliary
413 remove_equal (void *array, size_t count, size_t size,
415 algo_compare_func *compare, void *aux)
418 char *last = first + count * size;
425 if (compare (first, element, aux) == 0)
439 if (compare (first, element, aux) == 0)
445 memcpy (result, first, size);
450 assert (count_equal (array, count, size, element, compare, aux) == 0);
454 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
455 RESULT, except that elements for which PREDICATE is true are
456 not copied. Returns the number of elements copied. AUX is
457 passed to PREDICATE as auxiliary data. */
459 remove_copy_if (const void *array, size_t count, size_t size,
461 algo_predicate_func *predicate, void *aux)
463 struct pred_aux pred_aux;
464 pred_aux.predicate = predicate;
466 return copy_if (array, count, size, result, not, &pred_aux);
469 /* Searches ARRAY, which contains COUNT of SIZE bytes each, using
470 a binary search. Returns any element that equals VALUE, if
471 one exists, or a null pointer otherwise. ARRAY must ordered
472 according to COMPARE. AUX is passed to COMPARE as auxiliary
475 binary_search (const void *array, size_t count, size_t size,
477 algo_compare_func *compare, void *aux)
479 assert (array != NULL);
480 assert (count <= INT_MAX);
481 assert (compare != NULL);
485 const char *first = array;
487 int high = count - 1;
491 int middle = (low + high) / 2;
492 const char *element = first + middle * size;
493 int cmp = compare (value, element, aux);
500 return (void *) element;
504 expensive_assert (find (array, count, size, value, compare, aux) == NULL);
508 /* Lexicographically compares ARRAY1, which contains COUNT1
509 elements of SIZE bytes each, to ARRAY2, which contains COUNT2
510 elements of SIZE bytes, according to COMPARE. Returns a
511 strcmp()-type result. AUX is passed to COMPARE as auxiliary
514 lexicographical_compare_3way (const void *array1, size_t count1,
515 const void *array2, size_t count2,
517 algo_compare_func *compare, void *aux)
519 const char *first1 = array1;
520 const char *first2 = array2;
521 size_t min_count = count1 < count2 ? count1 : count2;
523 while (min_count > 0)
525 int cmp = compare (first1, first2, aux);
534 return count1 < count2 ? -1 : count1 > count2;
537 /* If you consider tuning this algorithm, you should consult first:
538 Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
539 Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
545 /* Discontinue quicksort algorithm when partition gets below this size.
546 This particular magic number was chosen to work best on a Sun 4/260. */
549 /* Stack node declarations used to store unfulfilled partition obligations. */
556 /* The next 4 #defines implement a very fast in-line stack abstraction. */
557 /* The stack needs log (total_elements) entries (we could even subtract
558 log(MAX_THRESH)). Since total_elements has type size_t, we get as
559 upper bound for log (total_elements):
560 bits per byte (CHAR_BIT) * sizeof(size_t). */
561 #define STACK_SIZE (CHAR_BIT * sizeof(size_t))
562 #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
563 #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
564 #define STACK_NOT_EMPTY (stack < top)
567 /* Order size using quicksort. This implementation incorporates
568 four optimizations discussed in Sedgewick:
570 1. Non-recursive, using an explicit stack of pointer that store the
571 next array partition to sort. To save time, this maximum amount
572 of space required to store an array of SIZE_MAX is allocated on the
573 stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
574 only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
575 Pretty cheap, actually.
577 2. Chose the pivot element using a median-of-three decision tree.
578 This reduces the probability of selecting a bad pivot value and
579 eliminates certain extraneous comparisons.
581 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
582 insertion sort to order the MAX_THRESH items within each partition.
583 This is a big win, since insertion sort is faster for small, mostly
584 sorted array segments.
586 4. The larger of the two sub-partitions is always pushed onto the
587 stack first, with the algorithm then concentrating on the
588 smaller partition. This *guarantees* no more than log (total_elems)
589 stack size is needed (actually O(1) in this case)! */
592 sort (void *array, size_t count, size_t size,
593 algo_compare_func *compare, void *aux)
595 char *const first = array;
596 const size_t max_thresh = MAX_THRESH * size;
599 /* Avoid lossage with unsigned arithmetic below. */
602 if (count > MAX_THRESH)
605 char *hi = &lo[size * (count - 1)];
606 stack_node stack[STACK_SIZE];
607 stack_node *top = stack + 1;
609 while (STACK_NOT_EMPTY)
614 /* Select median value from among LO, MID, and HI. Rearrange
615 LO and HI so the three values are sorted. This lowers the
616 probability of picking a pathological pivot value and
617 skips a comparison for both the LEFT_PTR and RIGHT_PTR in
620 char *mid = lo + size * ((hi - lo) / size >> 1);
622 if (compare (mid, lo, aux) < 0)
623 SWAP (mid, lo, size);
624 if (compare (hi, mid, aux) < 0)
625 SWAP (mid, hi, size);
628 if (compare (mid, lo, aux) < 0)
629 SWAP (mid, lo, size);
632 left_ptr = lo + size;
633 right_ptr = hi - size;
635 /* Here's the famous ``collapse the walls'' section of quicksort.
636 Gotta like those tight inner loops! They are the main reason
637 that this algorithm runs much faster than others. */
640 while (compare (left_ptr, mid, aux) < 0)
643 while (compare (mid, right_ptr, aux) < 0)
646 if (left_ptr < right_ptr)
648 SWAP (left_ptr, right_ptr, size);
651 else if (mid == right_ptr)
656 else if (left_ptr == right_ptr)
663 while (left_ptr <= right_ptr);
665 /* Set up pointers for next iteration. First determine whether
666 left and right partitions are below the threshold size. If so,
667 ignore one or both. Otherwise, push the larger partition's
668 bounds on the stack and continue sorting the smaller one. */
670 if ((size_t) (right_ptr - lo) <= max_thresh)
672 if ((size_t) (hi - left_ptr) <= max_thresh)
673 /* Ignore both small partitions. */
676 /* Ignore small left partition. */
679 else if ((size_t) (hi - left_ptr) <= max_thresh)
680 /* Ignore small right partition. */
682 else if ((right_ptr - lo) > (hi - left_ptr))
684 /* Push larger left partition indices. */
685 PUSH (lo, right_ptr);
690 /* Push larger right partition indices. */
697 /* Once the FIRST array is partially sorted by quicksort the rest
698 is completely sorted using insertion sort, since this is efficient
699 for partitions below MAX_THRESH size. FIRST points to the beginning
700 of the array to sort, and END_PTR points at the very last element in
701 the array (*not* one beyond it!). */
703 #define min(x, y) ((x) < (y) ? (x) : (y))
706 char *const end_ptr = &first[size * (count - 1)];
707 char *tmp_ptr = first;
708 char *thresh = min(end_ptr, first + max_thresh);
709 register char *run_ptr;
711 /* Find smallest element in first threshold and place it at the
712 array's beginning. This is the smallest array element,
713 and the operation speeds up insertion sort's inner loop. */
715 for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
716 if (compare (run_ptr, tmp_ptr, aux) < 0)
719 if (tmp_ptr != first)
720 SWAP (tmp_ptr, first, size);
722 /* Insertion sort, running from left-hand-side up to right-hand-side. */
724 run_ptr = first + size;
725 while ((run_ptr += size) <= end_ptr)
727 tmp_ptr = run_ptr - size;
728 while (compare (run_ptr, tmp_ptr, aux) < 0)
732 if (tmp_ptr != run_ptr)
736 trav = run_ptr + size;
737 while (--trav >= run_ptr)
742 for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
750 assert (is_sorted (array, count, size, compare, aux));
753 /* Tests whether ARRAY, which contains COUNT elements of SIZE
754 bytes each, is sorted in order according to COMPARE. AUX is
755 passed to COMPARE as auxiliary data. */
757 is_sorted (const void *array, size_t count, size_t size,
758 algo_compare_func *compare, void *aux)
760 const char *first = array;
763 for (idx = 0; idx + 1 < count; idx++)
764 if (compare (first + idx * size, first + (idx + 1) * size, aux) > 0)
770 /* Computes the generalized set difference, ARRAY1 minus ARRAY2,
771 into RESULT, and returns the number of elements written to
772 RESULT. If a value appears M times in ARRAY1 and N times in
773 ARRAY2, then it will appear max(M - N, 0) in RESULT. ARRAY1
774 and ARRAY2 must be sorted, and RESULT is sorted and stable.
775 ARRAY1 consists of COUNT1 elements, ARRAY2 of COUNT2 elements,
776 each SIZE bytes. AUX is passed to COMPARE as auxiliary
778 size_t set_difference (const void *array1, size_t count1,
779 const void *array2, size_t count2,
782 algo_compare_func *compare, void *aux)
784 const char *first1 = array1;
785 const char *last1 = first1 + count1 * size;
786 const char *first2 = array2;
787 const char *last2 = first2 + count2 * size;
788 char *result = result_;
789 size_t result_count = 0;
791 while (first1 != last1 && first2 != last2)
793 int cmp = compare (first1, first2, aux);
796 memcpy (result, first1, size);
810 while (first1 != last1)
812 memcpy (result, first1, size);
821 /* Finds the first pair of adjacent equal elements in ARRAY,
822 which has COUNT elements of SIZE bytes. Returns the first
823 element in ARRAY such that COMPARE returns zero when it and
824 its successor element are compared, or a null pointer if no
825 such element exists. AUX is passed to COMPARE as auxiliary
828 adjacent_find_equal (const void *array, size_t count, size_t size,
829 algo_compare_func *compare, void *aux)
831 const char *first = array;
832 const char *last = first + count * size;
834 while (first < last && first + size < last)
836 if (compare (first, first + size, aux) == 0)
837 return (void *) first;
844 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
845 the first COUNT - 1 elements of these form a heap, followed by
846 a single element not part of the heap. This function adds the
847 final element, forming a heap of COUNT elements in ARRAY.
848 Uses COMPARE to compare elements, passing AUX as auxiliary
851 push_heap (void *array, size_t count, size_t size,
852 algo_compare_func *compare, void *aux)
857 expensive_assert (count < 1 || is_heap (array, count - 1,
858 size, compare, aux));
859 for (i = count; i > 1; i /= 2)
861 char *parent = first + (i / 2 - 1) * size;
862 char *element = first + (i - 1) * size;
863 if (compare (parent, element, aux) < 0)
864 SWAP (parent, element, size);
868 expensive_assert (is_heap (array, count, size, compare, aux));
871 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
872 the children of ARRAY[idx - 1] are heaps, but ARRAY[idx - 1]
873 may be smaller than its children. This function fixes that,
874 so that ARRAY[idx - 1] itself is a heap. Uses COMPARE to
875 compare elements, passing AUX as auxiliary data. */
877 heapify (void *array, size_t count, size_t size,
879 algo_compare_func *compare, void *aux)
885 size_t left = 2 * idx;
886 size_t right = left + 1;
887 size_t largest = idx;
890 && compare (first + size * (left - 1),
891 first + size * (idx - 1), aux) > 0)
895 && compare (first + size * (right - 1),
896 first + size * (largest - 1), aux) > 0)
902 SWAP (first + size * (idx - 1), first + size * (largest - 1), size);
907 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
908 all COUNT elements form a heap. This function moves the
909 largest element in the heap to the final position in ARRAY and
910 reforms a heap of the remaining COUNT - 1 elements at the
911 beginning of ARRAY. Uses COMPARE to compare elements, passing
912 AUX as auxiliary data. */
914 pop_heap (void *array, size_t count, size_t size,
915 algo_compare_func *compare, void *aux)
919 expensive_assert (is_heap (array, count, size, compare, aux));
920 SWAP (first, first + (count - 1) * size, size);
921 heapify (first, count - 1, size, 1, compare, aux);
922 expensive_assert (count < 1 || is_heap (array, count - 1,
923 size, compare, aux));
926 /* Turns ARRAY, which contains COUNT elements of SIZE bytes, into
927 a heap. Uses COMPARE to compare elements, passing AUX as
930 make_heap (void *array, size_t count, size_t size,
931 algo_compare_func *compare, void *aux)
935 for (idx = count / 2; idx >= 1; idx--)
936 heapify (array, count, size, idx, compare, aux);
937 expensive_assert (count < 1 || is_heap (array, count, size, compare, aux));
940 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
941 all COUNT elements form a heap. This function turns the heap
942 into a fully sorted array. Uses COMPARE to compare elements,
943 passing AUX as auxiliary data. */
945 sort_heap (void *array, size_t count, size_t size,
946 algo_compare_func *compare, void *aux)
951 expensive_assert (is_heap (array, count, size, compare, aux));
952 for (idx = count; idx >= 2; idx--)
954 SWAP (first, first + (idx - 1) * size, size);
955 heapify (array, idx - 1, size, 1, compare, aux);
957 expensive_assert (is_sorted (array, count, size, compare, aux));
960 /* ARRAY contains COUNT elements of SIZE bytes each. This
961 function tests whether ARRAY is a heap and returns true if so,
962 false otherwise. Uses COMPARE to compare elements, passing
963 AUX as auxiliary data. */
965 is_heap (const void *array, size_t count, size_t size,
966 algo_compare_func *compare, void *aux)
968 const char *first = array;
971 for (child = 2; child <= count; child++)
973 size_t parent = child / 2;
974 if (compare (first + (parent - 1) * size,
975 first + (child - 1) * size, aux) < 0)