1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 /* Copyright (C) 2001 Free Software Foundation, Inc.
22 This file is part of the GNU ISO C++ Library. This library is free
23 software; you can redistribute it and/or modify it under the
24 terms of the GNU General Public License as published by the
25 Free Software Foundation; either version 2, or (at your option)
28 This library is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU General Public License for more details.
33 You should have received a copy of the GNU General Public License along
34 with this library; see the file COPYING. If not, write to the Free
35 Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
38 As a special exception, you may use this file as part of a free software
39 library without restriction. Specifically, if other files instantiate
40 templates or use macros or inline functions from this file, or you compile
41 this file and link it with other files to produce an executable, this
42 file does not by itself cause the resulting executable to be covered by
43 the GNU General Public License. This exception does not however
44 invalidate any other reasons why the executable file might be covered by
45 the GNU General Public License. */
50 * Hewlett-Packard Company
52 * Permission to use, copy, modify, distribute and sell this software
53 * and its documentation for any purpose is hereby granted without fee,
54 * provided that the above copyright notice appear in all copies and
55 * that both that copyright notice and this permission notice appear
56 * in supporting documentation. Hewlett-Packard Company makes no
57 * representations about the suitability of this software for any
58 * purpose. It is provided "as is" without express or implied warranty.
62 * Silicon Graphics Computer Systems, Inc.
64 * Permission to use, copy, modify, distribute and sell this software
65 * and its documentation for any purpose is hereby granted without fee,
66 * provided that the above copyright notice appear in all copies and
67 * that both that copyright notice and this permission notice appear
68 * in supporting documentation. Silicon Graphics makes no
69 * representations about the suitability of this software for any
70 * purpose. It is provided "as is" without express or implied warranty.
73 /* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
74 This file is part of the GNU C Library.
75 Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
77 The GNU C Library is free software; you can redistribute it and/or
78 modify it under the terms of the GNU Lesser General Public
79 License as published by the Free Software Foundation; either
80 version 2.1 of the License, or (at your option) any later version.
82 The GNU C Library is distributed in the hope that it will be useful,
83 but WITHOUT ANY WARRANTY; without even the implied warranty of
84 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
85 Lesser General Public License for more details.
87 You should have received a copy of the GNU Lesser General Public
88 License along with the GNU C Library; if not, write to the Free
89 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
93 #include "algorithm.h"
94 #include <gsl/gsl_rng.h>
101 /* Some of the assertions in this file are very expensive. We
102 don't use them by default. */
104 #define expensive_assert(X) assert(X)
106 #define expensive_assert(X) ((void) 0)
110 /* Finds an element in ARRAY, which contains COUNT elements of
111 SIZE bytes each, using COMPARE for comparisons. Returns the
112 first element in ARRAY that matches TARGET, or a null pointer
113 on failure. AUX is passed to each comparison as auxiliary
116 find (const void *array, size_t count, size_t size,
118 algo_compare_func *compare, void *aux)
120 const unsigned char *element = array;
124 if (compare (target, element, aux) == 0)
125 return (void *) element;
133 /* Counts and return the number of elements in ARRAY, which
134 contains COUNT elements of SIZE bytes each, which are equal to
135 ELEMENT as compared with COMPARE. AUX is passed as auxiliary
138 count_equal (const void *array, size_t count, size_t size,
140 algo_compare_func *compare, void *aux)
142 const unsigned char *first = array;
143 size_t equal_cnt = 0;
147 if (compare (element, first, aux) == 0)
156 /* Counts and return the number of elements in ARRAY, which
157 contains COUNT elements of SIZE bytes each, for which
158 PREDICATE returns nonzero. AUX is passed as auxiliary data to
161 count_if (const void *array, size_t count, size_t size,
162 algo_predicate_func *predicate, void *aux)
164 const unsigned char *first = array;
165 size_t nonzero_cnt = 0;
169 if (predicate (first, aux) != 0)
178 /* Byte-wise swap two items of size SIZE. */
179 #define SWAP(a, b, size) \
182 register size_t __size = (size); \
183 register char *__a = (a), *__b = (b); \
189 } while (--__size > 0); \
192 /* Makes the elements in ARRAY unique, by moving up duplicates,
193 and returns the new number of elements in the array. Sorted
194 arrays only. Arguments same as for sort() above. */
196 unique (void *array, size_t count, size_t size,
197 algo_compare_func *compare, void *aux)
200 char *last = first + size * count;
201 char *result = array;
208 assert (adjacent_find_equal (array, count,
209 size, compare, aux) == NULL);
213 if (compare (result, first, aux))
217 memcpy (result, first, size);
224 /* Helper function that calls sort(), then unique(). */
226 sort_unique (void *array, size_t count, size_t size,
227 algo_compare_func *compare, void *aux)
229 sort (array, count, size, compare, aux);
230 return unique (array, count, size, compare, aux);
233 /* Reorders ARRAY, which contains COUNT elements of SIZE bytes
234 each, so that the elements for which PREDICATE returns nonzero
235 precede those for which PREDICATE returns zero. AUX is
236 passed to each predicate as auxiliary data. Returns the
237 number of elements for which PREDICATE returns nonzero. Not
240 partition (void *array, size_t count, size_t size,
241 algo_predicate_func *predicate, void *aux)
243 size_t nonzero_cnt = count;
245 char *last = first + nonzero_cnt * size;
249 /* Move FIRST forward to point to first element that fails
255 else if (!predicate (first, aux))
262 /* Move LAST backward to point to last element that passes
270 else if (predicate (last, aux))
276 /* By swapping FIRST and LAST we extend the starting and
277 ending sequences that pass and fail, respectively,
279 SWAP (first, last, size);
284 assert (is_partitioned (array, count, size, nonzero_cnt, predicate, aux));
288 /* Checks whether ARRAY, which contains COUNT elements of SIZE
289 bytes each, is partitioned such that PREDICATE returns nonzero
290 for the first NONZERO_CNT elements and zero for the remaining
291 elements. AUX is passed as auxiliary data to PREDICATE. */
293 is_partitioned (const void *array, size_t count, size_t size,
295 algo_predicate_func *predicate, void *aux)
297 const unsigned char *first = array;
300 assert (nonzero_cnt <= count);
301 for (idx = 0; idx < nonzero_cnt; idx++)
302 if (predicate (first + idx * size, aux) == 0)
304 for (idx = nonzero_cnt; idx < count; idx++)
305 if (predicate (first + idx * size, aux) != 0)
310 /* A algo_random_func that uses random.h. */
312 algo_default_random (unsigned max, void *aux UNUSED)
314 unsigned long r_min = gsl_rng_min (get_rng ());
315 return (gsl_rng_get (get_rng ()) - r_min) % max;
318 /* Randomly reorders ARRAY, which contains COUNT elements of SIZE
319 bytes each. Uses RANDOM as a source of random data, passing
320 AUX as the auxiliary data. RANDOM may be null to use a
321 default random source. */
323 random_shuffle (void *array_, size_t count, size_t size,
324 algo_random_func *random, void *aux)
326 unsigned char *array = array_;
330 random = algo_default_random;
332 for (i = 1; i < count; i++)
333 SWAP (array + i * size, array + random (i + 1, aux) * size, size);
336 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
337 RESULT, except that elements for which PREDICATE is false are
338 not copied. Returns the number of elements copied. AUX is
339 passed to PREDICATE as auxiliary data. */
341 copy_if (const void *array, size_t count, size_t size,
343 algo_predicate_func *predicate, void *aux)
345 const unsigned char *input = array;
346 const unsigned char *last = input + size * count;
347 unsigned char *output = result;
348 size_t nonzero_cnt = 0;
352 if (predicate (input, aux))
354 memcpy (output, input, size);
362 assert (nonzero_cnt == count_if (array, count, size, predicate, aux));
363 assert (nonzero_cnt == count_if (result, nonzero_cnt, size, predicate, aux));
368 /* A predicate and its auxiliary data. */
371 algo_predicate_func *predicate;
376 not (const void *data, void *pred_aux_)
378 const struct pred_aux *pred_aux = pred_aux_;
380 return !pred_aux->predicate (data, pred_aux->aux);
383 /* Removes elements equal to ELEMENT from ARRAY, which consists
384 of COUNT elements of SIZE bytes each. Returns the number of
385 remaining elements. AUX is passed to COMPARE as auxiliary
388 remove_equal (void *array, size_t count, size_t size,
390 algo_compare_func *compare, void *aux)
392 unsigned char *first = array;
393 unsigned char *last = first + count * size;
394 unsigned char *result;
400 if (compare (first, element, aux) == 0)
414 if (compare (first, element, aux) == 0)
420 memcpy (result, first, size);
425 assert (count_equal (array, count, size, element, compare, aux) == 0);
429 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
430 RESULT, except that elements for which PREDICATE is true are
431 not copied. Returns the number of elements copied. AUX is
432 passed to PREDICATE as auxiliary data. */
434 remove_copy_if (const void *array, size_t count, size_t size,
436 algo_predicate_func *predicate, void *aux)
438 struct pred_aux pred_aux;
439 pred_aux.predicate = predicate;
441 return copy_if (array, count, size, result, not, &pred_aux);
444 /* Searches ARRAY, which contains COUNT of SIZE bytes each, using
445 a binary search. Returns any element that equals VALUE, if
446 one exists, or a null pointer otherwise. ARRAY must ordered
447 according to COMPARE. AUX is passed to COMPARE as auxiliary
450 binary_search (const void *array, size_t count, size_t size,
452 algo_compare_func *compare, void *aux)
454 assert (array != NULL);
455 assert (count <= INT_MAX);
456 assert (compare != NULL);
460 const unsigned char *first = array;
462 int high = count - 1;
466 int middle = (low + high) / 2;
467 const unsigned char *element = first + middle * size;
468 int cmp = compare (value, element, aux);
475 return (void *) element;
479 expensive_assert (find (array, count, size, value, compare, aux) == NULL);
483 /* Lexicographically compares ARRAY1, which contains COUNT1
484 elements of SIZE bytes each, to ARRAY2, which contains COUNT2
485 elements of SIZE bytes, according to COMPARE. Returns a
486 strcmp()-type result. AUX is passed to COMPARE as auxiliary
489 lexicographical_compare_3way (const void *array1, size_t count1,
490 const void *array2, size_t count2,
492 algo_compare_func *compare, void *aux)
494 const unsigned char *first1 = array1;
495 const unsigned char *first2 = array2;
496 size_t min_count = count1 < count2 ? count1 : count2;
498 while (min_count > 0)
500 int cmp = compare (first1, first2, aux);
509 return count1 < count2 ? -1 : count1 > count2;
512 /* If you consider tuning this algorithm, you should consult first:
513 Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
514 Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
524 /* Discontinue quicksort algorithm when partition gets below this size.
525 This particular magic number was chosen to work best on a Sun 4/260. */
528 /* Stack node declarations used to store unfulfilled partition obligations. */
535 /* The next 4 #defines implement a very fast in-line stack abstraction. */
536 /* The stack needs log (total_elements) entries (we could even subtract
537 log(MAX_THRESH)). Since total_elements has type size_t, we get as
538 upper bound for log (total_elements):
539 bits per byte (CHAR_BIT) * sizeof(size_t). */
540 #define STACK_SIZE (CHAR_BIT * sizeof(size_t))
541 #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
542 #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
543 #define STACK_NOT_EMPTY (stack < top)
546 /* Order size using quicksort. This implementation incorporates
547 four optimizations discussed in Sedgewick:
549 1. Non-recursive, using an explicit stack of pointer that store the
550 next array partition to sort. To save time, this maximum amount
551 of space required to store an array of SIZE_MAX is allocated on the
552 stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
553 only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
554 Pretty cheap, actually.
556 2. Chose the pivot element using a median-of-three decision tree.
557 This reduces the probability of selecting a bad pivot value and
558 eliminates certain extraneous comparisons.
560 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
561 insertion sort to order the MAX_THRESH items within each partition.
562 This is a big win, since insertion sort is faster for small, mostly
563 sorted array segments.
565 4. The larger of the two sub-partitions is always pushed onto the
566 stack first, with the algorithm then concentrating on the
567 smaller partition. This *guarantees* no more than log (total_elems)
568 stack size is needed (actually O(1) in this case)! */
571 sort (void *array, size_t count, size_t size,
572 algo_compare_func *compare, void *aux)
574 char *const first = array;
575 const size_t max_thresh = MAX_THRESH * size;
578 /* Avoid lossage with unsigned arithmetic below. */
581 if (count > MAX_THRESH)
584 char *hi = &lo[size * (count - 1)];
585 stack_node stack[STACK_SIZE];
586 stack_node *top = stack + 1;
588 while (STACK_NOT_EMPTY)
593 /* Select median value from among LO, MID, and HI. Rearrange
594 LO and HI so the three values are sorted. This lowers the
595 probability of picking a pathological pivot value and
596 skips a comparison for both the LEFT_PTR and RIGHT_PTR in
599 char *mid = lo + size * ((hi - lo) / size >> 1);
601 if (compare (mid, lo, aux) < 0)
602 SWAP (mid, lo, size);
603 if (compare (hi, mid, aux) < 0)
604 SWAP (mid, hi, size);
607 if (compare (mid, lo, aux) < 0)
608 SWAP (mid, lo, size);
611 left_ptr = lo + size;
612 right_ptr = hi - size;
614 /* Here's the famous ``collapse the walls'' section of quicksort.
615 Gotta like those tight inner loops! They are the main reason
616 that this algorithm runs much faster than others. */
619 while (compare (left_ptr, mid, aux) < 0)
622 while (compare (mid, right_ptr, aux) < 0)
625 if (left_ptr < right_ptr)
627 SWAP (left_ptr, right_ptr, size);
630 else if (mid == right_ptr)
635 else if (left_ptr == right_ptr)
642 while (left_ptr <= right_ptr);
644 /* Set up pointers for next iteration. First determine whether
645 left and right partitions are below the threshold size. If so,
646 ignore one or both. Otherwise, push the larger partition's
647 bounds on the stack and continue sorting the smaller one. */
649 if ((size_t) (right_ptr - lo) <= max_thresh)
651 if ((size_t) (hi - left_ptr) <= max_thresh)
652 /* Ignore both small partitions. */
655 /* Ignore small left partition. */
658 else if ((size_t) (hi - left_ptr) <= max_thresh)
659 /* Ignore small right partition. */
661 else if ((right_ptr - lo) > (hi - left_ptr))
663 /* Push larger left partition indices. */
664 PUSH (lo, right_ptr);
669 /* Push larger right partition indices. */
676 /* Once the FIRST array is partially sorted by quicksort the rest
677 is completely sorted using insertion sort, since this is efficient
678 for partitions below MAX_THRESH size. FIRST points to the beginning
679 of the array to sort, and END_PTR points at the very last element in
680 the array (*not* one beyond it!). */
682 #define min(x, y) ((x) < (y) ? (x) : (y))
685 char *const end_ptr = &first[size * (count - 1)];
686 char *tmp_ptr = first;
687 char *thresh = min(end_ptr, first + max_thresh);
688 register char *run_ptr;
690 /* Find smallest element in first threshold and place it at the
691 array's beginning. This is the smallest array element,
692 and the operation speeds up insertion sort's inner loop. */
694 for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
695 if (compare (run_ptr, tmp_ptr, aux) < 0)
698 if (tmp_ptr != first)
699 SWAP (tmp_ptr, first, size);
701 /* Insertion sort, running from left-hand-side up to right-hand-side. */
703 run_ptr = first + size;
704 while ((run_ptr += size) <= end_ptr)
706 tmp_ptr = run_ptr - size;
707 while (compare (run_ptr, tmp_ptr, aux) < 0)
711 if (tmp_ptr != run_ptr)
715 trav = run_ptr + size;
716 while (--trav >= run_ptr)
721 for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
729 assert (is_sorted (array, count, size, compare, aux));
732 /* Tests whether ARRAY, which contains COUNT elements of SIZE
733 bytes each, is sorted in order according to COMPARE. AUX is
734 passed to COMPARE as auxiliary data. */
736 is_sorted (const void *array, size_t count, size_t size,
737 algo_compare_func *compare, void *aux)
739 const unsigned char *first = array;
742 for (idx = 0; idx + 1 < count; idx++)
743 if (compare (first + idx * size, first + (idx + 1) * size, aux) > 0)
749 /* Computes the generalized set difference, ARRAY1 minus ARRAY2,
750 into RESULT, and returns the number of elements written to
751 RESULT. If a value appears M times in ARRAY1 and N times in
752 ARRAY2, then it will appear max(M - N, 0) in RESULT. ARRAY1
753 and ARRAY2 must be sorted, and RESULT is sorted and stable.
754 ARRAY1 consists of COUNT1 elements, ARRAY2 of COUNT2 elements,
755 each SIZE bytes. AUX is passed to COMPARE as auxiliary
757 size_t set_difference (const void *array1, size_t count1,
758 const void *array2, size_t count2,
761 algo_compare_func *compare, void *aux)
763 const unsigned char *first1 = array1;
764 const unsigned char *last1 = first1 + count1 * size;
765 const unsigned char *first2 = array2;
766 const unsigned char *last2 = first2 + count2 * size;
767 unsigned char *result = result_;
768 size_t result_count = 0;
770 while (first1 != last1 && first2 != last2)
772 int cmp = compare (first1, first2, aux);
775 memcpy (result, first1, size);
789 while (first1 != last1)
791 memcpy (result, first1, size);
800 /* Finds the first pair of adjacent equal elements in ARRAY,
801 which has COUNT elements of SIZE bytes. Returns the first
802 element in ARRAY such that COMPARE returns zero when it and
803 its successor element are compared, or a null pointer if no
804 such element exists. AUX is passed to COMPARE as auxiliary
807 adjacent_find_equal (const void *array, size_t count, size_t size,
808 algo_compare_func *compare, void *aux)
810 const unsigned char *first = array;
811 const unsigned char *last = first + count * size;
813 while (first < last && first + size < last)
815 if (compare (first, first + size, aux) == 0)
816 return (void *) first;
823 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
824 the first COUNT - 1 elements of these form a heap, followed by
825 a single element not part of the heap. This function adds the
826 final element, forming a heap of COUNT elements in ARRAY.
827 Uses COMPARE to compare elements, passing AUX as auxiliary
830 push_heap (void *array, size_t count, size_t size,
831 algo_compare_func *compare, void *aux)
833 unsigned char *first = array;
836 expensive_assert (count < 1 || is_heap (array, count - 1,
837 size, compare, aux));
838 for (i = count; i > 1; i /= 2)
840 unsigned char *parent = first + (i / 2 - 1) * size;
841 unsigned char *element = first + (i - 1) * size;
842 if (compare (parent, element, aux) < 0)
843 SWAP (parent, element, size);
847 expensive_assert (is_heap (array, count, size, compare, aux));
850 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
851 the children of ARRAY[idx - 1] are heaps, but ARRAY[idx - 1]
852 may be smaller than its children. This function fixes that,
853 so that ARRAY[idx - 1] itself is a heap. Uses COMPARE to
854 compare elements, passing AUX as auxiliary data. */
856 heapify (void *array, size_t count, size_t size,
858 algo_compare_func *compare, void *aux)
860 unsigned char *first = array;
864 size_t left = 2 * idx;
865 size_t right = left + 1;
866 size_t largest = idx;
869 && compare (first + size * (left - 1),
870 first + size * (idx - 1), aux) > 0)
874 && compare (first + size * (right - 1),
875 first + size * (largest - 1), aux) > 0)
881 SWAP (first + size * (idx - 1), first + size * (largest - 1), size);
886 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
887 all COUNT elements form a heap. This function moves the
888 largest element in the heap to the final position in ARRAY and
889 reforms a heap of the remaining COUNT - 1 elements at the
890 beginning of ARRAY. Uses COMPARE to compare elements, passing
891 AUX as auxiliary data. */
893 pop_heap (void *array, size_t count, size_t size,
894 algo_compare_func *compare, void *aux)
896 unsigned char *first = array;
898 expensive_assert (is_heap (array, count, size, compare, aux));
899 SWAP (first, first + (count - 1) * size, size);
900 heapify (first, count - 1, size, 1, compare, aux);
901 expensive_assert (count < 1 || is_heap (array, count - 1,
902 size, compare, aux));
905 /* Turns ARRAY, which contains COUNT elements of SIZE bytes, into
906 a heap. Uses COMPARE to compare elements, passing AUX as
909 make_heap (void *array, size_t count, size_t size,
910 algo_compare_func *compare, void *aux)
914 for (idx = count / 2; idx >= 1; idx--)
915 heapify (array, count, size, idx, compare, aux);
916 expensive_assert (count < 1 || is_heap (array, count, size, compare, aux));
919 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
920 all COUNT elements form a heap. This function turns the heap
921 into a fully sorted array. Uses COMPARE to compare elements,
922 passing AUX as auxiliary data. */
924 sort_heap (void *array, size_t count, size_t size,
925 algo_compare_func *compare, void *aux)
927 unsigned char *first = array;
930 expensive_assert (is_heap (array, count, size, compare, aux));
931 for (idx = count; idx >= 2; idx--)
933 SWAP (first, first + (idx - 1) * size, size);
934 heapify (array, idx - 1, size, 1, compare, aux);
936 expensive_assert (is_sorted (array, count, size, compare, aux));
939 /* ARRAY contains COUNT elements of SIZE bytes each. This
940 function tests whether ARRAY is a heap and returns 1 if so, 0
941 otherwise. Uses COMPARE to compare elements, passing AUX as
944 is_heap (const void *array, size_t count, size_t size,
945 algo_compare_func *compare, void *aux)
947 const unsigned char *first = array;
950 for (child = 2; child <= count; child++)
952 size_t parent = child / 2;
953 if (compare (first + (parent - 1) * size,
954 first + (child - 1) * size, aux) < 0)