1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 /* Copyright (C) 2001 Free Software Foundation, Inc.
22 This file is part of the GNU ISO C++ Library. This library is free
23 software; you can redistribute it and/or modify it under the
24 terms of the GNU General Public License as published by the
25 Free Software Foundation; either version 2, or (at your option)
28 This library is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU General Public License for more details.
33 You should have received a copy of the GNU General Public License along
34 with this library; see the file COPYING. If not, write to the Free
35 Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
38 As a special exception, you may use this file as part of a free software
39 library without restriction. Specifically, if other files instantiate
40 templates or use macros or inline functions from this file, or you compile
41 this file and link it with other files to produce an executable, this
42 file does not by itself cause the resulting executable to be covered by
43 the GNU General Public License. This exception does not however
44 invalidate any other reasons why the executable file might be covered by
45 the GNU General Public License. */
50 * Hewlett-Packard Company
52 * Permission to use, copy, modify, distribute and sell this software
53 * and its documentation for any purpose is hereby granted without fee,
54 * provided that the above copyright notice appear in all copies and
55 * that both that copyright notice and this permission notice appear
56 * in supporting documentation. Hewlett-Packard Company makes no
57 * representations about the suitability of this software for any
58 * purpose. It is provided "as is" without express or implied warranty.
62 * Silicon Graphics Computer Systems, Inc.
64 * Permission to use, copy, modify, distribute and sell this software
65 * and its documentation for any purpose is hereby granted without fee,
66 * provided that the above copyright notice appear in all copies and
67 * that both that copyright notice and this permission notice appear
68 * in supporting documentation. Silicon Graphics makes no
69 * representations about the suitability of this software for any
70 * purpose. It is provided "as is" without express or implied warranty.
73 /* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
74 This file is part of the GNU C Library.
75 Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
77 The GNU C Library is free software; you can redistribute it and/or
78 modify it under the terms of the GNU Lesser General Public
79 License as published by the Free Software Foundation; either
80 version 2.1 of the License, or (at your option) any later version.
82 The GNU C Library is distributed in the hope that it will be useful,
83 but WITHOUT ANY WARRANTY; without even the implied warranty of
84 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
85 Lesser General Public License for more details.
87 You should have received a copy of the GNU Lesser General Public
88 License along with the GNU C Library; if not, write to the Free
89 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
93 #include "algorithm.h"
100 /* Some of the assertions in this file are very expensive. We
101 don't use them by default. */
103 #define expensive_assert(X) assert(X)
105 #define expensive_assert(X) ((void) 0)
109 /* Finds an element in ARRAY, which contains COUNT elements of
110 SIZE bytes each, using COMPARE for comparisons. Returns the
111 first element in ARRAY that matches TARGET, or a null pointer
112 on failure. AUX is passed to each comparison as auxiliary
115 find (const void *array, size_t count, size_t size,
117 algo_compare_func *compare, void *aux)
119 const unsigned char *element = array;
123 if (compare (target, element, aux) == 0)
124 return (void *) element;
132 /* Counts and return the number of elements in ARRAY, which
133 contains COUNT elements of SIZE bytes each, which are equal to
134 ELEMENT as compared with COMPARE. AUX is passed as auxiliary
137 count_equal (const void *array, size_t count, size_t size,
139 algo_compare_func *compare, void *aux)
141 const unsigned char *first = array;
142 size_t equal_cnt = 0;
146 if (compare (element, first, aux) == 0)
155 /* Counts and return the number of elements in ARRAY, which
156 contains COUNT elements of SIZE bytes each, for which
157 PREDICATE returns nonzero. AUX is passed as auxiliary data to
160 count_if (const void *array, size_t count, size_t size,
161 algo_predicate_func *predicate, void *aux)
163 const unsigned char *first = array;
164 size_t nonzero_cnt = 0;
168 if (predicate (first, aux) != 0)
177 /* Byte-wise swap two items of size SIZE. */
178 #define SWAP(a, b, size) \
181 register size_t __size = (size); \
182 register char *__a = (a), *__b = (b); \
188 } while (--__size > 0); \
191 /* Makes the elements in ARRAY unique, by moving up duplicates,
192 and returns the new number of elements in the array. Sorted
193 arrays only. Arguments same as for sort() above. */
195 unique (void *array, size_t count, size_t size,
196 algo_compare_func *compare, void *aux)
199 char *last = first + size * count;
200 char *result = array;
207 assert (adjacent_find_equal (array, count,
208 size, compare, aux) == NULL);
212 if (compare (result, first, aux))
216 memcpy (result, first, size);
223 /* Helper function that calls sort(), then unique(). */
225 sort_unique (void *array, size_t count, size_t size,
226 algo_compare_func *compare, void *aux)
228 sort (array, count, size, compare, aux);
229 return unique (array, count, size, compare, aux);
232 /* Reorders ARRAY, which contains COUNT elements of SIZE bytes
233 each, so that the elements for which PREDICATE returns nonzero
234 precede those for which PREDICATE returns zero. AUX is
235 passed to each predicate as auxiliary data. Returns the
236 number of elements for which PREDICATE returns nonzero. Not
239 partition (void *array, size_t count, size_t size,
240 algo_predicate_func *predicate, void *aux)
242 size_t nonzero_cnt = count;
244 char *last = first + nonzero_cnt * size;
248 /* Move FIRST forward to point to first element that fails
254 else if (!predicate (first, aux))
261 /* Move LAST backward to point to last element that passes
269 else if (predicate (last, aux))
275 /* By swapping FIRST and LAST we extend the starting and
276 ending sequences that pass and fail, respectively,
278 SWAP (first, last, size);
283 assert (is_partitioned (array, count, size, nonzero_cnt, predicate, aux));
287 /* Checks whether ARRAY, which contains COUNT elements of SIZE
288 bytes each, is partitioned such that PREDICATE returns nonzero
289 for the first NONZERO_CNT elements and zero for the remaining
290 elements. AUX is passed as auxiliary data to PREDICATE. */
292 is_partitioned (const void *array, size_t count, size_t size,
294 algo_predicate_func *predicate, void *aux)
296 const unsigned char *first = array;
299 assert (nonzero_cnt <= count);
300 for (idx = 0; idx < nonzero_cnt; idx++)
301 if (predicate (first + idx * size, aux) == 0)
303 for (idx = nonzero_cnt; idx < count; idx++)
304 if (predicate (first + idx * size, aux) != 0)
309 /* A algo_random_func that uses random.h. */
311 algo_default_random (unsigned max, void *aux UNUSED)
313 return rng_get_unsigned (pspp_rng ()) % max;
316 /* Randomly reorders ARRAY, which contains COUNT elements of SIZE
317 bytes each. Uses RANDOM as a source of random data, passing
318 AUX as the auxiliary data. RANDOM may be null to use a
319 default random source. */
321 random_shuffle (void *array_, size_t count, size_t size,
322 algo_random_func *random, void *aux)
324 unsigned char *array = array_;
328 random = algo_default_random;
330 for (i = 1; i < count; i++)
331 SWAP (array + i * size, array + random (i + 1, aux) * size, size);
334 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
335 RESULT, except that elements for which PREDICATE is false are
336 not copied. Returns the number of elements copied. AUX is
337 passed to PREDICATE as auxiliary data. */
339 copy_if (const void *array, size_t count, size_t size,
341 algo_predicate_func *predicate, void *aux)
343 const unsigned char *input = array;
344 const unsigned char *last = input + size * count;
345 unsigned char *output = result;
346 size_t nonzero_cnt = 0;
350 if (predicate (input, aux))
352 memcpy (output, input, size);
360 assert (nonzero_cnt == count_if (array, count, size, predicate, aux));
361 assert (nonzero_cnt == count_if (result, nonzero_cnt, size, predicate, aux));
366 /* A predicate and its auxiliary data. */
369 algo_predicate_func *predicate;
374 not (const void *data, void *pred_aux_)
376 const struct pred_aux *pred_aux = pred_aux_;
378 return !pred_aux->predicate (data, pred_aux->aux);
381 /* Removes elements equal to ELEMENT from ARRAY, which consists
382 of COUNT elements of SIZE bytes each. Returns the number of
383 remaining elements. AUX is passed to COMPARE as auxiliary
386 remove_equal (void *array, size_t count, size_t size,
388 algo_compare_func *compare, void *aux)
390 unsigned char *first = array;
391 unsigned char *last = first + count * size;
392 unsigned char *result;
398 if (compare (first, element, aux) == 0)
412 if (compare (first, element, aux) == 0)
418 memcpy (result, first, size);
423 assert (count_equal (array, count, size, element, compare, aux) == 0);
427 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
428 RESULT, except that elements for which PREDICATE is true are
429 not copied. Returns the number of elements copied. AUX is
430 passed to PREDICATE as auxiliary data. */
432 remove_copy_if (const void *array, size_t count, size_t size,
434 algo_predicate_func *predicate, void *aux)
436 struct pred_aux pred_aux;
437 pred_aux.predicate = predicate;
439 return copy_if (array, count, size, result, not, &pred_aux);
442 /* Searches ARRAY, which contains COUNT of SIZE bytes each, using
443 a binary search. Returns any element that equals VALUE, if
444 one exists, or a null pointer otherwise. ARRAY must ordered
445 according to COMPARE. AUX is passed to COMPARE as auxiliary
448 binary_search (const void *array, size_t count, size_t size,
450 algo_compare_func *compare, void *aux)
452 assert (array != NULL);
453 assert (count <= INT_MAX);
454 assert (compare != NULL);
458 const unsigned char *first = array;
460 int high = count - 1;
464 int middle = (low + high) / 2;
465 const unsigned char *element = first + middle * size;
466 int cmp = compare (value, element, aux);
473 return (void *) element;
477 expensive_assert (find (array, count, size, value, compare, aux) == NULL);
481 /* Lexicographically compares ARRAY1, which contains COUNT1
482 elements of SIZE bytes each, to ARRAY2, which contains COUNT2
483 elements of SIZE bytes, according to COMPARE. Returns a
484 strcmp()-type result. AUX is passed to COMPARE as auxiliary
487 lexicographical_compare_3way (const void *array1, size_t count1,
488 const void *array2, size_t count2,
490 algo_compare_func *compare, void *aux)
492 const unsigned char *first1 = array1;
493 const unsigned char *first2 = array2;
494 size_t min_count = count1 < count2 ? count1 : count2;
496 while (min_count > 0)
498 int cmp = compare (first1, first2, aux);
507 return count1 < count2 ? -1 : count1 > count2;
510 /* If you consider tuning this algorithm, you should consult first:
511 Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
512 Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
519 /* Discontinue quicksort algorithm when partition gets below this size.
520 This particular magic number was chosen to work best on a Sun 4/260. */
523 /* Stack node declarations used to store unfulfilled partition obligations. */
530 /* The next 4 #defines implement a very fast in-line stack abstraction. */
531 /* The stack needs log (total_elements) entries (we could even subtract
532 log(MAX_THRESH)). Since total_elements has type size_t, we get as
533 upper bound for log (total_elements):
534 bits per byte (CHAR_BIT) * sizeof(size_t). */
535 #define STACK_SIZE (CHAR_BIT * sizeof(size_t))
536 #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
537 #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
538 #define STACK_NOT_EMPTY (stack < top)
541 /* Order size using quicksort. This implementation incorporates
542 four optimizations discussed in Sedgewick:
544 1. Non-recursive, using an explicit stack of pointer that store the
545 next array partition to sort. To save time, this maximum amount
546 of space required to store an array of SIZE_MAX is allocated on the
547 stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
548 only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
549 Pretty cheap, actually.
551 2. Chose the pivot element using a median-of-three decision tree.
552 This reduces the probability of selecting a bad pivot value and
553 eliminates certain extraneous comparisons.
555 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
556 insertion sort to order the MAX_THRESH items within each partition.
557 This is a big win, since insertion sort is faster for small, mostly
558 sorted array segments.
560 4. The larger of the two sub-partitions is always pushed onto the
561 stack first, with the algorithm then concentrating on the
562 smaller partition. This *guarantees* no more than log (total_elems)
563 stack size is needed (actually O(1) in this case)! */
566 sort (void *array, size_t count, size_t size,
567 algo_compare_func *compare, void *aux)
569 char *const first = array;
570 const size_t max_thresh = MAX_THRESH * size;
573 /* Avoid lossage with unsigned arithmetic below. */
576 if (count > MAX_THRESH)
579 char *hi = &lo[size * (count - 1)];
580 stack_node stack[STACK_SIZE];
581 stack_node *top = stack + 1;
583 while (STACK_NOT_EMPTY)
588 /* Select median value from among LO, MID, and HI. Rearrange
589 LO and HI so the three values are sorted. This lowers the
590 probability of picking a pathological pivot value and
591 skips a comparison for both the LEFT_PTR and RIGHT_PTR in
594 char *mid = lo + size * ((hi - lo) / size >> 1);
596 if (compare (mid, lo, aux) < 0)
597 SWAP (mid, lo, size);
598 if (compare (hi, mid, aux) < 0)
599 SWAP (mid, hi, size);
602 if (compare (mid, lo, aux) < 0)
603 SWAP (mid, lo, size);
606 left_ptr = lo + size;
607 right_ptr = hi - size;
609 /* Here's the famous ``collapse the walls'' section of quicksort.
610 Gotta like those tight inner loops! They are the main reason
611 that this algorithm runs much faster than others. */
614 while (compare (left_ptr, mid, aux) < 0)
617 while (compare (mid, right_ptr, aux) < 0)
620 if (left_ptr < right_ptr)
622 SWAP (left_ptr, right_ptr, size);
625 else if (mid == right_ptr)
630 else if (left_ptr == right_ptr)
637 while (left_ptr <= right_ptr);
639 /* Set up pointers for next iteration. First determine whether
640 left and right partitions are below the threshold size. If so,
641 ignore one or both. Otherwise, push the larger partition's
642 bounds on the stack and continue sorting the smaller one. */
644 if ((size_t) (right_ptr - lo) <= max_thresh)
646 if ((size_t) (hi - left_ptr) <= max_thresh)
647 /* Ignore both small partitions. */
650 /* Ignore small left partition. */
653 else if ((size_t) (hi - left_ptr) <= max_thresh)
654 /* Ignore small right partition. */
656 else if ((right_ptr - lo) > (hi - left_ptr))
658 /* Push larger left partition indices. */
659 PUSH (lo, right_ptr);
664 /* Push larger right partition indices. */
671 /* Once the FIRST array is partially sorted by quicksort the rest
672 is completely sorted using insertion sort, since this is efficient
673 for partitions below MAX_THRESH size. FIRST points to the beginning
674 of the array to sort, and END_PTR points at the very last element in
675 the array (*not* one beyond it!). */
677 #define min(x, y) ((x) < (y) ? (x) : (y))
680 char *const end_ptr = &first[size * (count - 1)];
681 char *tmp_ptr = first;
682 char *thresh = min(end_ptr, first + max_thresh);
683 register char *run_ptr;
685 /* Find smallest element in first threshold and place it at the
686 array's beginning. This is the smallest array element,
687 and the operation speeds up insertion sort's inner loop. */
689 for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
690 if (compare (run_ptr, tmp_ptr, aux) < 0)
693 if (tmp_ptr != first)
694 SWAP (tmp_ptr, first, size);
696 /* Insertion sort, running from left-hand-side up to right-hand-side. */
698 run_ptr = first + size;
699 while ((run_ptr += size) <= end_ptr)
701 tmp_ptr = run_ptr - size;
702 while (compare (run_ptr, tmp_ptr, aux) < 0)
706 if (tmp_ptr != run_ptr)
710 trav = run_ptr + size;
711 while (--trav >= run_ptr)
716 for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
724 assert (is_sorted (array, count, size, compare, aux));
727 /* Tests whether ARRAY, which contains COUNT elements of SIZE
728 bytes each, is sorted in order according to COMPARE. AUX is
729 passed to COMPARE as auxiliary data. */
731 is_sorted (const void *array, size_t count, size_t size,
732 algo_compare_func *compare, void *aux)
734 const unsigned char *first = array;
737 for (idx = 0; idx + 1 < count; idx++)
738 if (compare (first + idx * size, first + (idx + 1) * size, aux) > 0)
744 /* Computes the generalized set difference, ARRAY1 minus ARRAY2,
745 into RESULT, and returns the number of elements written to
746 RESULT. If a value appears M times in ARRAY1 and N times in
747 ARRAY2, then it will appear max(M - N, 0) in RESULT. ARRAY1
748 and ARRAY2 must be sorted, and RESULT is sorted and stable.
749 ARRAY1 consists of COUNT1 elements, ARRAY2 of COUNT2 elements,
750 each SIZE bytes. AUX is passed to COMPARE as auxiliary
752 size_t set_difference (const void *array1, size_t count1,
753 const void *array2, size_t count2,
756 algo_compare_func *compare, void *aux)
758 const unsigned char *first1 = array1;
759 const unsigned char *last1 = first1 + count1 * size;
760 const unsigned char *first2 = array2;
761 const unsigned char *last2 = first2 + count2 * size;
762 unsigned char *result = result_;
763 size_t result_count = 0;
765 while (first1 != last1 && first2 != last2)
767 int cmp = compare (first1, first2, aux);
770 memcpy (result, first1, size);
784 while (first1 != last1)
786 memcpy (result, first1, size);
795 /* Finds the first pair of adjacent equal elements in ARRAY,
796 which has COUNT elements of SIZE bytes. Returns the first
797 element in ARRAY such that COMPARE returns zero when it and
798 its successor element are compared, or a null pointer if no
799 such element exists. AUX is passed to COMPARE as auxiliary
802 adjacent_find_equal (const void *array, size_t count, size_t size,
803 algo_compare_func *compare, void *aux)
805 const unsigned char *first = array;
806 const unsigned char *last = first + count * size;
808 while (first < last && first + size < last)
810 if (compare (first, first + size, aux) == 0)
811 return (void *) first;
818 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
819 the first COUNT - 1 elements of these form a heap, followed by
820 a single element not part of the heap. This function adds the
821 final element, forming a heap of COUNT elements in ARRAY.
822 Uses COMPARE to compare elements, passing AUX as auxiliary
825 push_heap (void *array, size_t count, size_t size,
826 algo_compare_func *compare, void *aux)
828 unsigned char *first = array;
831 expensive_assert (count < 1 || is_heap (array, count - 1,
832 size, compare, aux));
833 for (i = count; i > 1; i /= 2)
835 unsigned char *parent = first + (i / 2 - 1) * size;
836 unsigned char *element = first + (i - 1) * size;
837 if (compare (parent, element, aux) < 0)
838 SWAP (parent, element, size);
842 expensive_assert (is_heap (array, count, size, compare, aux));
845 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
846 the children of ARRAY[idx - 1] are heaps, but ARRAY[idx - 1]
847 may be smaller than its children. This function fixes that,
848 so that ARRAY[idx - 1] itself is a heap. Uses COMPARE to
849 compare elements, passing AUX as auxiliary data. */
851 heapify (void *array, size_t count, size_t size,
853 algo_compare_func *compare, void *aux)
855 unsigned char *first = array;
859 size_t left = 2 * idx;
860 size_t right = left + 1;
861 size_t largest = idx;
864 && compare (first + size * (left - 1),
865 first + size * (idx - 1), aux) > 0)
869 && compare (first + size * (right - 1),
870 first + size * (largest - 1), aux) > 0)
876 SWAP (first + size * (idx - 1), first + size * (largest - 1), size);
881 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
882 all COUNT elements form a heap. This function moves the
883 largest element in the heap to the final position in ARRAY and
884 reforms a heap of the remaining COUNT - 1 elements at the
885 beginning of ARRAY. Uses COMPARE to compare elements, passing
886 AUX as auxiliary data. */
888 pop_heap (void *array, size_t count, size_t size,
889 algo_compare_func *compare, void *aux)
891 unsigned char *first = array;
893 expensive_assert (is_heap (array, count, size, compare, aux));
894 SWAP (first, first + (count - 1) * size, size);
895 heapify (first, count - 1, size, 1, compare, aux);
896 expensive_assert (count < 1 || is_heap (array, count - 1,
897 size, compare, aux));
900 /* Turns ARRAY, which contains COUNT elements of SIZE bytes, into
901 a heap. Uses COMPARE to compare elements, passing AUX as
904 make_heap (void *array, size_t count, size_t size,
905 algo_compare_func *compare, void *aux)
909 for (idx = count / 2; idx >= 1; idx--)
910 heapify (array, count, size, idx, compare, aux);
911 expensive_assert (count < 1 || is_heap (array, count, size, compare, aux));
914 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
915 all COUNT elements form a heap. This function turns the heap
916 into a fully sorted array. Uses COMPARE to compare elements,
917 passing AUX as auxiliary data. */
919 sort_heap (void *array, size_t count, size_t size,
920 algo_compare_func *compare, void *aux)
922 unsigned char *first = array;
925 expensive_assert (is_heap (array, count, size, compare, aux));
926 for (idx = count; idx >= 2; idx--)
928 SWAP (first, first + (idx - 1) * size, size);
929 heapify (array, idx - 1, size, 1, compare, aux);
931 expensive_assert (is_sorted (array, count, size, compare, aux));
934 /* ARRAY contains COUNT elements of SIZE bytes each. This
935 function tests whether ARRAY is a heap and returns 1 if so, 0
936 otherwise. Uses COMPARE to compare elements, passing AUX as
939 is_heap (const void *array, size_t count, size_t size,
940 algo_compare_func *compare, void *aux)
942 const unsigned char *first = array;
945 for (child = 2; child <= count; child++)
947 size_t parent = child / 2;
948 if (compare (first + (parent - 1) * size,
949 first + (child - 1) * size, aux) < 0)