1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 /* Copyright (C) 2001 Free Software Foundation, Inc.
22 This file is part of the GNU ISO C++ Library. This library is free
23 software; you can redistribute it and/or modify it under the
24 terms of the GNU General Public License as published by the
25 Free Software Foundation; either version 2, or (at your option)
28 This library is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU General Public License for more details.
33 You should have received a copy of the GNU General Public License along
34 with this library; see the file COPYING. If not, write to the Free
35 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
38 As a special exception, you may use this file as part of a free software
39 library without restriction. Specifically, if other files instantiate
40 templates or use macros or inline functions from this file, or you compile
41 this file and link it with other files to produce an executable, this
42 file does not by itself cause the resulting executable to be covered by
43 the GNU General Public License. This exception does not however
44 invalidate any other reasons why the executable file might be covered by
45 the GNU General Public License. */
50 * Hewlett-Packard Company
52 * Permission to use, copy, modify, distribute and sell this software
53 * and its documentation for any purpose is hereby granted without fee,
54 * provided that the above copyright notice appear in all copies and
55 * that both that copyright notice and this permission notice appear
56 * in supporting documentation. Hewlett-Packard Company makes no
57 * representations about the suitability of this software for any
58 * purpose. It is provided "as is" without express or implied warranty.
62 * Silicon Graphics Computer Systems, Inc.
64 * Permission to use, copy, modify, distribute and sell this software
65 * and its documentation for any purpose is hereby granted without fee,
66 * provided that the above copyright notice appear in all copies and
67 * that both that copyright notice and this permission notice appear
68 * in supporting documentation. Silicon Graphics makes no
69 * representations about the suitability of this software for any
70 * purpose. It is provided "as is" without express or implied warranty.
73 /* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
74 This file is part of the GNU C Library.
75 Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
77 The GNU C Library is free software; you can redistribute it and/or
78 modify it under the terms of the GNU Lesser General Public
79 License as published by the Free Software Foundation; either
80 version 2.1 of the License, or (at your option) any later version.
82 The GNU C Library is distributed in the hope that it will be useful,
83 but WITHOUT ANY WARRANTY; without even the implied warranty of
84 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
85 Lesser General Public License for more details.
87 You should have received a copy of the GNU Lesser General Public
88 License along with the GNU C Library; if not, write to the Free
89 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
94 #include <gsl/gsl_rng.h>
99 #include <libpspp/assertion.h>
105 /* Finds an element in ARRAY, which contains COUNT elements of
106 SIZE bytes each, using COMPARE for comparisons. Returns the
107 first element in ARRAY that matches TARGET, or a null pointer
108 on failure. AUX is passed to each comparison as auxiliary
111 find (const void *array, size_t count, size_t size,
113 algo_compare_func *compare, const void *aux)
115 const char *element = array;
119 if (compare (target, element, aux) == 0)
120 return (void *) element;
128 /* Counts and return the number of elements in ARRAY, which
129 contains COUNT elements of SIZE bytes each, which are equal to
130 ELEMENT as compared with COMPARE. AUX is passed as auxiliary
133 count_equal (const void *array, size_t count, size_t size,
135 algo_compare_func *compare, const void *aux)
137 const char *first = array;
138 size_t equal_cnt = 0;
142 if (compare (element, first, aux) == 0)
151 /* Counts and return the number of elements in ARRAY, which
152 contains COUNT elements of SIZE bytes each, for which
153 PREDICATE returns true. AUX is passed as auxiliary data to
156 count_if (const void *array, size_t count, size_t size,
157 algo_predicate_func *predicate, const void *aux)
159 const char *first = array;
164 if (predicate (first, aux) != 0)
173 /* Byte-wise swap two items of size SIZE. */
174 #define SWAP(a, b, size) \
177 register size_t __size = (size); \
178 register char *__a = (a), *__b = (b); \
184 } while (--__size > 0); \
187 /* Makes the elements in ARRAY unique, by moving up duplicates,
188 and returns the new number of elements in the array. Sorted
189 arrays only. Arguments same as for sort() above. */
191 unique (void *array, size_t count, size_t size,
192 algo_compare_func *compare, const void *aux)
195 char *last = first + size * count;
196 char *result = array;
203 assert (adjacent_find_equal (array, count,
204 size, compare, aux) == NULL);
208 if (compare (result, first, aux))
212 memcpy (result, first, size);
219 /* Helper function that calls sort(), then unique(). */
221 sort_unique (void *array, size_t count, size_t size,
222 algo_compare_func *compare, const void *aux)
224 sort (array, count, size, compare, aux);
225 return unique (array, count, size, compare, aux);
228 /* Reorders ARRAY, which contains COUNT elements of SIZE bytes
229 each, so that the elements for which PREDICATE returns true
230 precede those for which PREDICATE returns zero. AUX is
231 passed to each predicate as auxiliary data. Returns the
232 number of elements for which PREDICATE returns true. Not
235 partition (void *array, size_t count, size_t size,
236 algo_predicate_func *predicate, const void *aux)
238 size_t true_cnt = count;
240 char *last = first + true_cnt * size;
244 /* Move FIRST forward to point to first element that fails
250 else if (!predicate (first, aux))
257 /* Move LAST backward to point to last element that passes
265 else if (predicate (last, aux))
271 /* By swapping FIRST and LAST we extend the starting and
272 ending sequences that pass and fail, respectively,
274 SWAP (first, last, size);
279 assert (is_partitioned (array, count, size, true_cnt, predicate, aux));
283 /* Checks whether ARRAY, which contains COUNT elements of SIZE
284 bytes each, is partitioned such that PREDICATE returns true
285 for the first TRUE_CNT elements and zero for the remaining
286 elements. AUX is passed as auxiliary data to PREDICATE. */
288 is_partitioned (const void *array, size_t count, size_t size,
290 algo_predicate_func *predicate, const void *aux)
292 const char *first = array;
295 assert (true_cnt <= count);
296 for (idx = 0; idx < true_cnt; idx++)
297 if (predicate (first + idx * size, aux) == 0)
299 for (idx = true_cnt; idx < count; idx++)
300 if (predicate (first + idx * size, aux) != 0)
305 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
306 RESULT, except that elements for which PREDICATE is false are
307 not copied. Returns the number of elements copied. AUX is
308 passed to PREDICATE as auxiliary data. */
310 copy_if (const void *array, size_t count, size_t size,
312 algo_predicate_func *predicate, const void *aux)
314 const char *input = array;
315 const char *last = input + size * count;
316 char *output = result;
317 size_t nonzero_cnt = 0;
321 if (predicate (input, aux))
323 memcpy (output, input, size);
331 assert (nonzero_cnt == count_if (array, count, size, predicate, aux));
332 assert (nonzero_cnt == count_if (result, nonzero_cnt, size, predicate, aux));
337 /* Removes N elements starting at IDX from ARRAY, which consists
338 of COUNT elements of SIZE bytes each, by shifting the elements
339 following them, if any, into its position. */
341 remove_range (void *array_, size_t count, size_t size,
342 size_t idx, size_t n)
344 char *array = array_;
346 assert (array != NULL);
347 assert (idx <= count);
348 assert (idx + n <= count);
351 memmove (array + idx * size, array + (idx + n) * size,
352 size * (count - idx - n));
355 /* Removes element IDX from ARRAY, which consists of COUNT
356 elements of SIZE bytes each, by shifting the elements
357 following it, if any, into its position. */
359 remove_element (void *array, size_t count, size_t size,
362 remove_range (array, count, size, idx, 1);
365 /* Moves an element in ARRAY, which consists of COUNT elements of
366 SIZE bytes each, from OLD_IDX to NEW_IDX, shifting around
367 other elements as needed. Runs in O(abs(OLD_IDX - NEW_IDX))
370 move_element (void *array_, size_t count, size_t size,
371 size_t old_idx, size_t new_idx)
373 assert (array_ != NULL || count == 0);
374 assert (old_idx < count);
375 assert (new_idx < count);
377 if (old_idx != new_idx)
379 char *array = array_;
380 char *element = xmalloc (size);
381 char *new = array + new_idx * size;
382 char *old = array + old_idx * size;
384 memcpy (element, old, size);
386 memmove (new + size, new, (old_idx - new_idx) * size);
388 memmove (old, old + size, (new_idx - old_idx) * size);
389 memcpy (new, element, size);
395 /* A predicate and its auxiliary data. */
398 algo_predicate_func *predicate;
403 not (const void *data, const void *pred_aux_)
405 const struct pred_aux *pred_aux = pred_aux_;
407 return !pred_aux->predicate (data, pred_aux->aux);
410 /* Removes elements equal to ELEMENT from ARRAY, which consists
411 of COUNT elements of SIZE bytes each. Returns the number of
412 remaining elements. AUX is passed to COMPARE as auxiliary
415 remove_equal (void *array, size_t count, size_t size,
417 algo_compare_func *compare, const void *aux)
420 char *last = first + count * size;
427 if (compare (first, element, aux) == 0)
441 if (compare (first, element, aux) == 0)
447 memcpy (result, first, size);
452 assert (count_equal (array, count, size, element, compare, aux) == 0);
456 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
457 RESULT, except that elements for which PREDICATE is true are
458 not copied. Returns the number of elements copied. AUX is
459 passed to PREDICATE as auxiliary data. */
461 remove_copy_if (const void *array, size_t count, size_t size,
463 algo_predicate_func *predicate, const void *aux)
465 struct pred_aux pred_aux;
466 pred_aux.predicate = predicate;
468 return copy_if (array, count, size, result, not, &pred_aux);
471 /* Searches ARRAY, which contains COUNT of SIZE bytes each, using
472 a binary search. Returns any element that equals VALUE, if
473 one exists, or a null pointer otherwise. ARRAY must ordered
474 according to COMPARE. AUX is passed to COMPARE as auxiliary
477 binary_search (const void *array, size_t count, size_t size,
479 algo_compare_func *compare, const void *aux)
481 assert (array != NULL);
482 assert (count <= INT_MAX);
483 assert (compare != NULL);
487 const char *first = array;
489 int high = count - 1;
493 int middle = (low + high) / 2;
494 const char *element = first + middle * size;
495 int cmp = compare (value, element, aux);
502 return (void *) element;
506 expensive_assert (find (array, count, size, value, compare, aux) == NULL);
510 /* Lexicographically compares ARRAY1, which contains COUNT1
511 elements of SIZE bytes each, to ARRAY2, which contains COUNT2
512 elements of SIZE bytes, according to COMPARE. Returns a
513 strcmp()-type result. AUX is passed to COMPARE as auxiliary
516 lexicographical_compare_3way (const void *array1, size_t count1,
517 const void *array2, size_t count2,
519 algo_compare_func *compare, const void *aux)
521 const char *first1 = array1;
522 const char *first2 = array2;
523 size_t min_count = count1 < count2 ? count1 : count2;
525 while (min_count > 0)
527 int cmp = compare (first1, first2, aux);
536 return count1 < count2 ? -1 : count1 > count2;
539 /* If you consider tuning this algorithm, you should consult first:
540 Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
541 Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
547 /* Discontinue quicksort algorithm when partition gets below this size.
548 This particular magic number was chosen to work best on a Sun 4/260. */
551 /* Stack node declarations used to store unfulfilled partition obligations. */
558 /* The next 4 #defines implement a very fast in-line stack abstraction. */
559 /* The stack needs log (total_elements) entries (we could even subtract
560 log(MAX_THRESH)). Since total_elements has type size_t, we get as
561 upper bound for log (total_elements):
562 bits per byte (CHAR_BIT) * sizeof(size_t). */
563 #define STACK_SIZE (CHAR_BIT * sizeof(size_t))
564 #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
565 #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
566 #define STACK_NOT_EMPTY (stack < top)
569 /* Order size using quicksort. This implementation incorporates
570 four optimizations discussed in Sedgewick:
572 1. Non-recursive, using an explicit stack of pointer that store the
573 next array partition to sort. To save time, this maximum amount
574 of space required to store an array of SIZE_MAX is allocated on the
575 stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
576 only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
577 Pretty cheap, actually.
579 2. Chose the pivot element using a median-of-three decision tree.
580 This reduces the probability of selecting a bad pivot value and
581 eliminates certain extraneous comparisons.
583 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
584 insertion sort to order the MAX_THRESH items within each partition.
585 This is a big win, since insertion sort is faster for small, mostly
586 sorted array segments.
588 4. The larger of the two sub-partitions is always pushed onto the
589 stack first, with the algorithm then concentrating on the
590 smaller partition. This *guarantees* no more than log (total_elems)
591 stack size is needed (actually O(1) in this case)! */
594 sort (void *array, size_t count, size_t size,
595 algo_compare_func *compare, const void *aux)
597 char *const first = array;
598 const size_t max_thresh = MAX_THRESH * size;
601 /* Avoid lossage with unsigned arithmetic below. */
604 if (count > MAX_THRESH)
607 char *hi = &lo[size * (count - 1)];
608 stack_node stack[STACK_SIZE];
609 stack_node *top = stack + 1;
611 while (STACK_NOT_EMPTY)
616 /* Select median value from among LO, MID, and HI. Rearrange
617 LO and HI so the three values are sorted. This lowers the
618 probability of picking a pathological pivot value and
619 skips a comparison for both the LEFT_PTR and RIGHT_PTR in
622 char *mid = lo + size * ((hi - lo) / size >> 1);
624 if (compare (mid, lo, aux) < 0)
625 SWAP (mid, lo, size);
626 if (compare (hi, mid, aux) < 0)
627 SWAP (mid, hi, size);
630 if (compare (mid, lo, aux) < 0)
631 SWAP (mid, lo, size);
634 left_ptr = lo + size;
635 right_ptr = hi - size;
637 /* Here's the famous ``collapse the walls'' section of quicksort.
638 Gotta like those tight inner loops! They are the main reason
639 that this algorithm runs much faster than others. */
642 while (compare (left_ptr, mid, aux) < 0)
645 while (compare (mid, right_ptr, aux) < 0)
648 if (left_ptr < right_ptr)
650 SWAP (left_ptr, right_ptr, size);
653 else if (mid == right_ptr)
658 else if (left_ptr == right_ptr)
665 while (left_ptr <= right_ptr);
667 /* Set up pointers for next iteration. First determine whether
668 left and right partitions are below the threshold size. If so,
669 ignore one or both. Otherwise, push the larger partition's
670 bounds on the stack and continue sorting the smaller one. */
672 if ((size_t) (right_ptr - lo) <= max_thresh)
674 if ((size_t) (hi - left_ptr) <= max_thresh)
675 /* Ignore both small partitions. */
678 /* Ignore small left partition. */
681 else if ((size_t) (hi - left_ptr) <= max_thresh)
682 /* Ignore small right partition. */
684 else if ((right_ptr - lo) > (hi - left_ptr))
686 /* Push larger left partition indices. */
687 PUSH (lo, right_ptr);
692 /* Push larger right partition indices. */
699 /* Once the FIRST array is partially sorted by quicksort the rest
700 is completely sorted using insertion sort, since this is efficient
701 for partitions below MAX_THRESH size. FIRST points to the beginning
702 of the array to sort, and END_PTR points at the very last element in
703 the array (*not* one beyond it!). */
706 char *const end_ptr = &first[size * (count - 1)];
707 char *tmp_ptr = first;
708 char *thresh = MIN (end_ptr, first + max_thresh);
709 register char *run_ptr;
711 /* Find smallest element in first threshold and place it at the
712 array's beginning. This is the smallest array element,
713 and the operation speeds up insertion sort's inner loop. */
715 for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
716 if (compare (run_ptr, tmp_ptr, aux) < 0)
719 if (tmp_ptr != first)
720 SWAP (tmp_ptr, first, size);
722 /* Insertion sort, running from left-hand-side up to right-hand-side. */
724 run_ptr = first + size;
725 while ((run_ptr += size) <= end_ptr)
727 tmp_ptr = run_ptr - size;
728 while (compare (run_ptr, tmp_ptr, aux) < 0)
732 if (tmp_ptr != run_ptr)
736 trav = run_ptr + size;
737 while (--trav >= run_ptr)
742 for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
750 assert (is_sorted (array, count, size, compare, aux));
753 /* Tests whether ARRAY, which contains COUNT elements of SIZE
754 bytes each, is sorted in order according to COMPARE. AUX is
755 passed to COMPARE as auxiliary data. */
757 is_sorted (const void *array, size_t count, size_t size,
758 algo_compare_func *compare, const void *aux)
760 const char *first = array;
763 for (idx = 0; idx + 1 < count; idx++)
764 if (compare (first + idx * size, first + (idx + 1) * size, aux) > 0)
770 /* Computes the generalized set difference, ARRAY1 minus ARRAY2,
771 into RESULT, and returns the number of elements written to
772 RESULT. If a value appears M times in ARRAY1 and N times in
773 ARRAY2, then it will appear max(M - N, 0) in RESULT. ARRAY1
774 and ARRAY2 must be sorted, and RESULT is sorted and stable.
775 ARRAY1 consists of COUNT1 elements, ARRAY2 of COUNT2 elements,
776 each SIZE bytes. AUX is passed to COMPARE as auxiliary
778 size_t set_difference (const void *array1, size_t count1,
779 const void *array2, size_t count2,
782 algo_compare_func *compare, const void *aux)
784 const char *first1 = array1;
785 const char *last1 = first1 + count1 * size;
786 const char *first2 = array2;
787 const char *last2 = first2 + count2 * size;
788 char *result = result_;
789 size_t result_count = 0;
791 while (first1 != last1 && first2 != last2)
793 int cmp = compare (first1, first2, aux);
796 memcpy (result, first1, size);
810 while (first1 != last1)
812 memcpy (result, first1, size);
821 /* Finds the first pair of adjacent equal elements in ARRAY,
822 which has COUNT elements of SIZE bytes. Returns the first
823 element in ARRAY such that COMPARE returns zero when it and
824 its successor element are compared, or a null pointer if no
825 such element exists. AUX is passed to COMPARE as auxiliary
828 adjacent_find_equal (const void *array, size_t count, size_t size,
829 algo_compare_func *compare, const void *aux)
831 const char *first = array;
832 const char *last = first + count * size;
834 while (first < last && first + size < last)
836 if (compare (first, first + size, aux) == 0)
837 return (void *) first;
844 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
845 the first COUNT - 1 elements of these form a heap, followed by
846 a single element not part of the heap. This function adds the
847 final element, forming a heap of COUNT elements in ARRAY.
848 Uses COMPARE to compare elements, passing AUX as auxiliary
851 push_heap (void *array, size_t count, size_t size,
852 algo_compare_func *compare, const void *aux)
857 expensive_assert (count < 1 || is_heap (array, count - 1,
858 size, compare, aux));
859 for (i = count; i > 1; i /= 2)
861 char *parent = first + (i / 2 - 1) * size;
862 char *element = first + (i - 1) * size;
863 if (compare (parent, element, aux) < 0)
864 SWAP (parent, element, size);
868 expensive_assert (is_heap (array, count, size, compare, aux));
871 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
872 the children of ARRAY[idx - 1] are heaps, but ARRAY[idx - 1]
873 may be smaller than its children. This function fixes that,
874 so that ARRAY[idx - 1] itself is a heap. Uses COMPARE to
875 compare elements, passing AUX as auxiliary data. */
877 heapify (void *array, size_t count, size_t size,
879 algo_compare_func *compare, const void *aux)
885 size_t left = 2 * idx;
886 size_t right = left + 1;
887 size_t largest = idx;
890 && compare (first + size * (left - 1),
891 first + size * (idx - 1), aux) > 0)
895 && compare (first + size * (right - 1),
896 first + size * (largest - 1), aux) > 0)
902 SWAP (first + size * (idx - 1), first + size * (largest - 1), size);
907 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
908 all COUNT elements form a heap. This function moves the
909 largest element in the heap to the final position in ARRAY and
910 reforms a heap of the remaining COUNT - 1 elements at the
911 beginning of ARRAY. Uses COMPARE to compare elements, passing
912 AUX as auxiliary data. */
914 pop_heap (void *array, size_t count, size_t size,
915 algo_compare_func *compare, const void *aux)
919 expensive_assert (is_heap (array, count, size, compare, aux));
920 SWAP (first, first + (count - 1) * size, size);
921 heapify (first, count - 1, size, 1, compare, aux);
922 expensive_assert (count < 1 || is_heap (array, count - 1,
923 size, compare, aux));
926 /* Turns ARRAY, which contains COUNT elements of SIZE bytes, into
927 a heap. Uses COMPARE to compare elements, passing AUX as
930 make_heap (void *array, size_t count, size_t size,
931 algo_compare_func *compare, const void *aux)
935 for (idx = count / 2; idx >= 1; idx--)
936 heapify (array, count, size, idx, compare, aux);
937 expensive_assert (count < 1 || is_heap (array, count, size, compare, aux));
940 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
941 all COUNT elements form a heap. This function turns the heap
942 into a fully sorted array. Uses COMPARE to compare elements,
943 passing AUX as auxiliary data. */
945 sort_heap (void *array, size_t count, size_t size,
946 algo_compare_func *compare, const void *aux)
951 expensive_assert (is_heap (array, count, size, compare, aux));
952 for (idx = count; idx >= 2; idx--)
954 SWAP (first, first + (idx - 1) * size, size);
955 heapify (array, idx - 1, size, 1, compare, aux);
957 expensive_assert (is_sorted (array, count, size, compare, aux));
960 /* ARRAY contains COUNT elements of SIZE bytes each. This
961 function tests whether ARRAY is a heap and returns true if so,
962 false otherwise. Uses COMPARE to compare elements, passing
963 AUX as auxiliary data. */
965 is_heap (const void *array, size_t count, size_t size,
966 algo_compare_func *compare, const void *aux)
968 const char *first = array;
971 for (child = 2; child <= count; child++)
973 size_t parent = child / 2;
974 if (compare (first + (parent - 1) * size,
975 first + (child - 1) * size, aux) < 0)