1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 /* Copyright (C) 2001 Free Software Foundation, Inc.
22 This file is part of the GNU ISO C++ Library. This library is free
23 software; you can redistribute it and/or modify it under the
24 terms of the GNU General Public License as published by the
25 Free Software Foundation; either version 2, or (at your option)
28 This library is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU General Public License for more details.
33 You should have received a copy of the GNU General Public License along
34 with this library; see the file COPYING. If not, write to the Free
35 Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
38 As a special exception, you may use this file as part of a free software
39 library without restriction. Specifically, if other files instantiate
40 templates or use macros or inline functions from this file, or you compile
41 this file and link it with other files to produce an executable, this
42 file does not by itself cause the resulting executable to be covered by
43 the GNU General Public License. This exception does not however
44 invalidate any other reasons why the executable file might be covered by
45 the GNU General Public License. */
50 * Hewlett-Packard Company
52 * Permission to use, copy, modify, distribute and sell this software
53 * and its documentation for any purpose is hereby granted without fee,
54 * provided that the above copyright notice appear in all copies and
55 * that both that copyright notice and this permission notice appear
56 * in supporting documentation. Hewlett-Packard Company makes no
57 * representations about the suitability of this software for any
58 * purpose. It is provided "as is" without express or implied warranty.
62 * Silicon Graphics Computer Systems, Inc.
64 * Permission to use, copy, modify, distribute and sell this software
65 * and its documentation for any purpose is hereby granted without fee,
66 * provided that the above copyright notice appear in all copies and
67 * that both that copyright notice and this permission notice appear
68 * in supporting documentation. Silicon Graphics makes no
69 * representations about the suitability of this software for any
70 * purpose. It is provided "as is" without express or implied warranty.
73 /* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
74 This file is part of the GNU C Library.
75 Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
77 The GNU C Library is free software; you can redistribute it and/or
78 modify it under the terms of the GNU Lesser General Public
79 License as published by the Free Software Foundation; either
80 version 2.1 of the License, or (at your option) any later version.
82 The GNU C Library is distributed in the hope that it will be useful,
83 but WITHOUT ANY WARRANTY; without even the implied warranty of
84 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
85 Lesser General Public License for more details.
87 You should have received a copy of the GNU Lesser General Public
88 License along with the GNU C Library; if not, write to the Free
89 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
93 #include "algorithm.h"
100 /* Some of the assertions in this file are very expensive. We
101 don't use them by default. */
103 #define expensive_assert(X) assert(X)
105 #define expensive_assert(X) ((void) 0)
109 /* Finds an element in ARRAY, which contains COUNT elements of
110 SIZE bytes each, using COMPARE for comparisons. Returns the
111 first element in ARRAY that matches TARGET, or a null pointer
112 on failure. AUX is passed to each comparison as auxiliary
115 find (const void *array, size_t count, size_t size,
117 algo_compare_func *compare, void *aux)
119 const unsigned char *element = array;
123 if (compare (target, element, aux) == 0)
124 return (void *) element;
132 /* Counts and return the number of elements in ARRAY, which
133 contains COUNT elements of SIZE bytes each, which are equal to
134 ELEMENT as compared with COMPARE. AUX is passed as auxiliary
137 count_equal (const void *array, size_t count, size_t size,
139 algo_compare_func *compare, void *aux)
141 const unsigned char *first = array;
142 size_t equal_cnt = 0;
146 if (compare (element, first, aux) == 0)
155 /* Counts and return the number of elements in ARRAY, which
156 contains COUNT elements of SIZE bytes each, for which
157 PREDICATE returns nonzero. AUX is passed as auxiliary data to
160 count_if (const void *array, size_t count, size_t size,
161 algo_predicate_func *predicate, void *aux)
163 const unsigned char *first = array;
164 size_t nonzero_cnt = 0;
168 if (predicate (first, aux) != 0)
177 /* Byte-wise swap two items of size SIZE. */
178 #define SWAP(a, b, size) \
181 register size_t __size = (size); \
182 register char *__a = (a), *__b = (b); \
188 } while (--__size > 0); \
191 /* Makes the elements in ARRAY unique, by moving up duplicates,
192 and returns the new number of elements in the array. Sorted
193 arrays only. Arguments same as for sort() above. */
195 unique (void *array, size_t count, size_t size,
196 algo_compare_func *compare, void *aux)
199 char *last = first + size * count;
200 char *result = array;
207 assert (adjacent_find_equal (array, count,
208 size, compare, aux) == NULL);
212 if (compare (result, first, aux))
216 memcpy (result, first, size);
223 /* Helper function that calls sort(), then unique(). */
225 sort_unique (void *array, size_t count, size_t size,
226 algo_compare_func *compare, void *aux)
228 sort (array, count, size, compare, aux);
229 return unique (array, count, size, compare, aux);
232 /* Reorders ARRAY, which contains COUNT elements of SIZE bytes
233 each, so that the elements for which PREDICATE returns nonzero
234 precede those for which PREDICATE returns zero. AUX is
235 passed to each predicate as auxiliary data. Returns the
236 number of elements for which PREDICATE returns nonzero. Not
239 partition (void *array, size_t count, size_t size,
240 algo_predicate_func *predicate, void *aux)
242 size_t nonzero_cnt = count;
244 char *last = first + nonzero_cnt * size;
248 /* Move FIRST forward to point to first element that fails
254 else if (!predicate (first, aux))
261 /* Move LAST backward to point to last element that passes
269 else if (predicate (last, aux))
275 /* By swapping FIRST and LAST we extend the starting and
276 ending sequences that pass and fail, respectively,
278 SWAP (first, last, size);
283 assert (is_partitioned (array, count, size, nonzero_cnt, predicate, aux));
287 /* Checks whether ARRAY, which contains COUNT elements of SIZE
288 bytes each, is partitioned such that PREDICATE returns nonzero
289 for the first NONZERO_CNT elements and zero for the remaining
290 elements. AUX is passed as auxiliary data to PREDICATE. */
292 is_partitioned (const void *array, size_t count, size_t size,
294 algo_predicate_func *predicate, void *aux)
296 const unsigned char *first = array;
299 assert (nonzero_cnt <= count);
300 for (idx = 0; idx < nonzero_cnt; idx++)
301 if (predicate (first + idx * size, aux) == 0)
303 for (idx = nonzero_cnt; idx < count; idx++)
304 if (predicate (first + idx * size, aux) != 0)
309 /* A algo_random_func that uses random.h. */
311 algo_default_random (unsigned max, void *aux UNUSED)
313 return rng_get_unsigned (pspp_rng ()) % max;
316 /* Randomly reorders ARRAY, which contains COUNT elements of SIZE
317 bytes each. Uses RANDOM as a source of random data, passing
318 AUX as the auxiliary data. RANDOM may be null to use a
319 default random source. */
321 random_shuffle (void *array_, size_t count, size_t size,
322 algo_random_func *random, void *aux)
324 unsigned char *array = array_;
328 random = algo_default_random;
330 for (i = 1; i < count; i++)
331 SWAP (array + i * size, array + random (i + 1, aux) * size, size);
334 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
335 RESULT, except that elements for which PREDICATE is false are
336 not copied. Returns the number of elements copied. AUX is
337 passed to PREDICATE as auxiliary data. */
339 copy_if (const void *array, size_t count, size_t size,
341 algo_predicate_func *predicate, void *aux)
343 const unsigned char *input = array;
344 const unsigned char *last = input + size * count;
345 unsigned char *output = result;
346 size_t nonzero_cnt = 0;
350 if (predicate (input, aux))
352 memcpy (output, input, size);
360 assert (nonzero_cnt == count_if (array, count, size, predicate, aux));
361 assert (nonzero_cnt == count_if (result, nonzero_cnt, size, predicate, aux));
366 /* A predicate and its auxiliary data. */
369 algo_predicate_func *predicate;
374 not (const void *data, void *pred_aux_)
376 const struct pred_aux *pred_aux = pred_aux_;
378 return !pred_aux->predicate (data, pred_aux->aux);
381 /* Removes elements equal to ELEMENT from ARRAY, which consists
382 of COUNT elements of SIZE bytes each. Returns the number of
383 remaining elements. AUX is passed to COMPARE as auxiliary
386 remove_equal (void *array, size_t count, size_t size,
388 algo_compare_func *compare, void *aux)
390 unsigned char *first = array;
391 unsigned char *last = first + count * size;
392 unsigned char *result;
398 if (compare (first, element, aux) == 0)
412 if (compare (first, element, aux) == 0)
418 memcpy (result, first, size);
423 assert (count_equal (array, count, size, element, compare, aux) == 0);
427 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
428 RESULT, except that elements for which PREDICATE is true are
429 not copied. Returns the number of elements copied. AUX is
430 passed to PREDICATE as auxiliary data. */
432 remove_copy_if (const void *array, size_t count, size_t size,
434 algo_predicate_func *predicate, void *aux)
436 struct pred_aux pred_aux;
437 pred_aux.predicate = predicate;
439 return copy_if (array, count, size, result, not, &pred_aux);
442 /* Searches ARRAY, which contains COUNT of SIZE bytes each, using
443 a binary search. Returns any element that equals VALUE, if
444 one exists, or a null pointer otherwise. ARRAY must ordered
445 according to COMPARE. AUX is passed to COMPARE as auxiliary
448 binary_search (const void *array, size_t count, size_t size,
450 algo_compare_func *compare, void *aux)
452 assert (array != NULL);
453 assert (count <= INT_MAX);
454 assert (compare != NULL);
458 const unsigned char *first = array;
460 int high = count - 1;
464 int middle = (low + high) / 2;
465 const unsigned char *element = first + middle * size;
466 int cmp = compare (value, element, aux);
473 return (void *) element;
477 expensive_assert (find (array, count, size, value, compare, aux) == NULL);
481 /* Lexicographically compares ARRAY1, which contains COUNT1
482 elements of SIZE bytes each, to ARRAY2, which contains COUNT2
483 elements of SIZE bytes, according to COMPARE. Returns a
484 strcmp()-type result. AUX is passed to COMPARE as auxiliary
487 lexicographical_compare_3way (const void *array1, size_t count1,
488 const void *array2, size_t count2,
490 algo_compare_func *compare, void *aux)
492 const unsigned char *first1 = array1;
493 const unsigned char *first2 = array2;
494 size_t min_count = count1 < count2 ? count1 : count2;
496 while (min_count > 0)
498 int cmp = compare (first1, first2, aux);
507 return count1 < count2 ? -1 : count1 > count2;
510 /* If you consider tuning this algorithm, you should consult first:
511 Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
512 Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
522 /* Discontinue quicksort algorithm when partition gets below this size.
523 This particular magic number was chosen to work best on a Sun 4/260. */
526 /* Stack node declarations used to store unfulfilled partition obligations. */
533 /* The next 4 #defines implement a very fast in-line stack abstraction. */
534 /* The stack needs log (total_elements) entries (we could even subtract
535 log(MAX_THRESH)). Since total_elements has type size_t, we get as
536 upper bound for log (total_elements):
537 bits per byte (CHAR_BIT) * sizeof(size_t). */
538 #define STACK_SIZE (CHAR_BIT * sizeof(size_t))
539 #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
540 #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
541 #define STACK_NOT_EMPTY (stack < top)
544 /* Order size using quicksort. This implementation incorporates
545 four optimizations discussed in Sedgewick:
547 1. Non-recursive, using an explicit stack of pointer that store the
548 next array partition to sort. To save time, this maximum amount
549 of space required to store an array of SIZE_MAX is allocated on the
550 stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
551 only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
552 Pretty cheap, actually.
554 2. Chose the pivot element using a median-of-three decision tree.
555 This reduces the probability of selecting a bad pivot value and
556 eliminates certain extraneous comparisons.
558 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
559 insertion sort to order the MAX_THRESH items within each partition.
560 This is a big win, since insertion sort is faster for small, mostly
561 sorted array segments.
563 4. The larger of the two sub-partitions is always pushed onto the
564 stack first, with the algorithm then concentrating on the
565 smaller partition. This *guarantees* no more than log (total_elems)
566 stack size is needed (actually O(1) in this case)! */
569 sort (void *array, size_t count, size_t size,
570 algo_compare_func *compare, void *aux)
572 char *const first = array;
573 const size_t max_thresh = MAX_THRESH * size;
576 /* Avoid lossage with unsigned arithmetic below. */
579 if (count > MAX_THRESH)
582 char *hi = &lo[size * (count - 1)];
583 stack_node stack[STACK_SIZE];
584 stack_node *top = stack + 1;
586 while (STACK_NOT_EMPTY)
591 /* Select median value from among LO, MID, and HI. Rearrange
592 LO and HI so the three values are sorted. This lowers the
593 probability of picking a pathological pivot value and
594 skips a comparison for both the LEFT_PTR and RIGHT_PTR in
597 char *mid = lo + size * ((hi - lo) / size >> 1);
599 if (compare (mid, lo, aux) < 0)
600 SWAP (mid, lo, size);
601 if (compare (hi, mid, aux) < 0)
602 SWAP (mid, hi, size);
605 if (compare (mid, lo, aux) < 0)
606 SWAP (mid, lo, size);
609 left_ptr = lo + size;
610 right_ptr = hi - size;
612 /* Here's the famous ``collapse the walls'' section of quicksort.
613 Gotta like those tight inner loops! They are the main reason
614 that this algorithm runs much faster than others. */
617 while (compare (left_ptr, mid, aux) < 0)
620 while (compare (mid, right_ptr, aux) < 0)
623 if (left_ptr < right_ptr)
625 SWAP (left_ptr, right_ptr, size);
628 else if (mid == right_ptr)
633 else if (left_ptr == right_ptr)
640 while (left_ptr <= right_ptr);
642 /* Set up pointers for next iteration. First determine whether
643 left and right partitions are below the threshold size. If so,
644 ignore one or both. Otherwise, push the larger partition's
645 bounds on the stack and continue sorting the smaller one. */
647 if ((size_t) (right_ptr - lo) <= max_thresh)
649 if ((size_t) (hi - left_ptr) <= max_thresh)
650 /* Ignore both small partitions. */
653 /* Ignore small left partition. */
656 else if ((size_t) (hi - left_ptr) <= max_thresh)
657 /* Ignore small right partition. */
659 else if ((right_ptr - lo) > (hi - left_ptr))
661 /* Push larger left partition indices. */
662 PUSH (lo, right_ptr);
667 /* Push larger right partition indices. */
674 /* Once the FIRST array is partially sorted by quicksort the rest
675 is completely sorted using insertion sort, since this is efficient
676 for partitions below MAX_THRESH size. FIRST points to the beginning
677 of the array to sort, and END_PTR points at the very last element in
678 the array (*not* one beyond it!). */
680 #define min(x, y) ((x) < (y) ? (x) : (y))
683 char *const end_ptr = &first[size * (count - 1)];
684 char *tmp_ptr = first;
685 char *thresh = min(end_ptr, first + max_thresh);
686 register char *run_ptr;
688 /* Find smallest element in first threshold and place it at the
689 array's beginning. This is the smallest array element,
690 and the operation speeds up insertion sort's inner loop. */
692 for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
693 if (compare (run_ptr, tmp_ptr, aux) < 0)
696 if (tmp_ptr != first)
697 SWAP (tmp_ptr, first, size);
699 /* Insertion sort, running from left-hand-side up to right-hand-side. */
701 run_ptr = first + size;
702 while ((run_ptr += size) <= end_ptr)
704 tmp_ptr = run_ptr - size;
705 while (compare (run_ptr, tmp_ptr, aux) < 0)
709 if (tmp_ptr != run_ptr)
713 trav = run_ptr + size;
714 while (--trav >= run_ptr)
719 for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
727 assert (is_sorted (array, count, size, compare, aux));
730 /* Tests whether ARRAY, which contains COUNT elements of SIZE
731 bytes each, is sorted in order according to COMPARE. AUX is
732 passed to COMPARE as auxiliary data. */
734 is_sorted (const void *array, size_t count, size_t size,
735 algo_compare_func *compare, void *aux)
737 const unsigned char *first = array;
740 for (idx = 0; idx + 1 < count; idx++)
741 if (compare (first + idx * size, first + (idx + 1) * size, aux) > 0)
747 /* Computes the generalized set difference, ARRAY1 minus ARRAY2,
748 into RESULT, and returns the number of elements written to
749 RESULT. If a value appears M times in ARRAY1 and N times in
750 ARRAY2, then it will appear max(M - N, 0) in RESULT. ARRAY1
751 and ARRAY2 must be sorted, and RESULT is sorted and stable.
752 ARRAY1 consists of COUNT1 elements, ARRAY2 of COUNT2 elements,
753 each SIZE bytes. AUX is passed to COMPARE as auxiliary
755 size_t set_difference (const void *array1, size_t count1,
756 const void *array2, size_t count2,
759 algo_compare_func *compare, void *aux)
761 const unsigned char *first1 = array1;
762 const unsigned char *last1 = first1 + count1 * size;
763 const unsigned char *first2 = array2;
764 const unsigned char *last2 = first2 + count2 * size;
765 unsigned char *result = result_;
766 size_t result_count = 0;
768 while (first1 != last1 && first2 != last2)
770 int cmp = compare (first1, first2, aux);
773 memcpy (result, first1, size);
787 while (first1 != last1)
789 memcpy (result, first1, size);
798 /* Finds the first pair of adjacent equal elements in ARRAY,
799 which has COUNT elements of SIZE bytes. Returns the first
800 element in ARRAY such that COMPARE returns zero when it and
801 its successor element are compared, or a null pointer if no
802 such element exists. AUX is passed to COMPARE as auxiliary
805 adjacent_find_equal (const void *array, size_t count, size_t size,
806 algo_compare_func *compare, void *aux)
808 const unsigned char *first = array;
809 const unsigned char *last = first + count * size;
811 while (first < last && first + size < last)
813 if (compare (first, first + size, aux) == 0)
814 return (void *) first;
821 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
822 the first COUNT - 1 elements of these form a heap, followed by
823 a single element not part of the heap. This function adds the
824 final element, forming a heap of COUNT elements in ARRAY.
825 Uses COMPARE to compare elements, passing AUX as auxiliary
828 push_heap (void *array, size_t count, size_t size,
829 algo_compare_func *compare, void *aux)
831 unsigned char *first = array;
834 expensive_assert (count < 1 || is_heap (array, count - 1,
835 size, compare, aux));
836 for (i = count; i > 1; i /= 2)
838 unsigned char *parent = first + (i / 2 - 1) * size;
839 unsigned char *element = first + (i - 1) * size;
840 if (compare (parent, element, aux) < 0)
841 SWAP (parent, element, size);
845 expensive_assert (is_heap (array, count, size, compare, aux));
848 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
849 the children of ARRAY[idx - 1] are heaps, but ARRAY[idx - 1]
850 may be smaller than its children. This function fixes that,
851 so that ARRAY[idx - 1] itself is a heap. Uses COMPARE to
852 compare elements, passing AUX as auxiliary data. */
854 heapify (void *array, size_t count, size_t size,
856 algo_compare_func *compare, void *aux)
858 unsigned char *first = array;
862 size_t left = 2 * idx;
863 size_t right = left + 1;
864 size_t largest = idx;
867 && compare (first + size * (left - 1),
868 first + size * (idx - 1), aux) > 0)
872 && compare (first + size * (right - 1),
873 first + size * (largest - 1), aux) > 0)
879 SWAP (first + size * (idx - 1), first + size * (largest - 1), size);
884 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
885 all COUNT elements form a heap. This function moves the
886 largest element in the heap to the final position in ARRAY and
887 reforms a heap of the remaining COUNT - 1 elements at the
888 beginning of ARRAY. Uses COMPARE to compare elements, passing
889 AUX as auxiliary data. */
891 pop_heap (void *array, size_t count, size_t size,
892 algo_compare_func *compare, void *aux)
894 unsigned char *first = array;
896 expensive_assert (is_heap (array, count, size, compare, aux));
897 SWAP (first, first + (count - 1) * size, size);
898 heapify (first, count - 1, size, 1, compare, aux);
899 expensive_assert (count < 1 || is_heap (array, count - 1,
900 size, compare, aux));
903 /* Turns ARRAY, which contains COUNT elements of SIZE bytes, into
904 a heap. Uses COMPARE to compare elements, passing AUX as
907 make_heap (void *array, size_t count, size_t size,
908 algo_compare_func *compare, void *aux)
912 for (idx = count / 2; idx >= 1; idx--)
913 heapify (array, count, size, idx, compare, aux);
914 expensive_assert (count < 1 || is_heap (array, count, size, compare, aux));
917 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
918 all COUNT elements form a heap. This function turns the heap
919 into a fully sorted array. Uses COMPARE to compare elements,
920 passing AUX as auxiliary data. */
922 sort_heap (void *array, size_t count, size_t size,
923 algo_compare_func *compare, void *aux)
925 unsigned char *first = array;
928 expensive_assert (is_heap (array, count, size, compare, aux));
929 for (idx = count; idx >= 2; idx--)
931 SWAP (first, first + (idx - 1) * size, size);
932 heapify (array, idx - 1, size, 1, compare, aux);
934 expensive_assert (is_sorted (array, count, size, compare, aux));
937 /* ARRAY contains COUNT elements of SIZE bytes each. This
938 function tests whether ARRAY is a heap and returns 1 if so, 0
939 otherwise. Uses COMPARE to compare elements, passing AUX as
942 is_heap (const void *array, size_t count, size_t size,
943 algo_compare_func *compare, void *aux)
945 const unsigned char *first = array;
948 for (child = 2; child <= count; child++)
950 size_t parent = child / 2;
951 if (compare (first + (parent - 1) * size,
952 first + (child - 1) * size, aux) < 0)