1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 /* Copyright (C) 2001 Free Software Foundation, Inc.
22 This file is part of the GNU ISO C++ Library. This library is free
23 software; you can redistribute it and/or modify it under the
24 terms of the GNU General Public License as published by the
25 Free Software Foundation; either version 2, or (at your option)
28 This library is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU General Public License for more details.
33 You should have received a copy of the GNU General Public License along
34 with this library; see the file COPYING. If not, write to the Free
35 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
38 As a special exception, you may use this file as part of a free software
39 library without restriction. Specifically, if other files instantiate
40 templates or use macros or inline functions from this file, or you compile
41 this file and link it with other files to produce an executable, this
42 file does not by itself cause the resulting executable to be covered by
43 the GNU General Public License. This exception does not however
44 invalidate any other reasons why the executable file might be covered by
45 the GNU General Public License. */
50 * Hewlett-Packard Company
52 * Permission to use, copy, modify, distribute and sell this software
53 * and its documentation for any purpose is hereby granted without fee,
54 * provided that the above copyright notice appear in all copies and
55 * that both that copyright notice and this permission notice appear
56 * in supporting documentation. Hewlett-Packard Company makes no
57 * representations about the suitability of this software for any
58 * purpose. It is provided "as is" without express or implied warranty.
62 * Silicon Graphics Computer Systems, Inc.
64 * Permission to use, copy, modify, distribute and sell this software
65 * and its documentation for any purpose is hereby granted without fee,
66 * provided that the above copyright notice appear in all copies and
67 * that both that copyright notice and this permission notice appear
68 * in supporting documentation. Silicon Graphics makes no
69 * representations about the suitability of this software for any
70 * purpose. It is provided "as is" without express or implied warranty.
73 /* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
74 This file is part of the GNU C Library.
75 Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
77 The GNU C Library is free software; you can redistribute it and/or
78 modify it under the terms of the GNU Lesser General Public
79 License as published by the Free Software Foundation; either
80 version 2.1 of the License, or (at your option) any later version.
82 The GNU C Library is distributed in the hope that it will be useful,
83 but WITHOUT ANY WARRANTY; without even the implied warranty of
84 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
85 Lesser General Public License for more details.
87 You should have received a copy of the GNU Lesser General Public
88 License along with the GNU C Library; if not, write to the Free
89 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
98 #include <libpspp/assertion.h>
104 /* Finds an element in ARRAY, which contains COUNT elements of
105 SIZE bytes each, using COMPARE for comparisons. Returns the
106 first element in ARRAY that matches TARGET, or a null pointer
107 on failure. AUX is passed to each comparison as auxiliary
110 find (const void *array, size_t count, size_t size,
112 algo_compare_func *compare, const void *aux)
114 const char *element = array;
118 if (compare (target, element, aux) == 0)
119 return (void *) element;
127 /* Counts and return the number of elements in ARRAY, which
128 contains COUNT elements of SIZE bytes each, which are equal to
129 ELEMENT as compared with COMPARE. AUX is passed as auxiliary
132 count_equal (const void *array, size_t count, size_t size,
134 algo_compare_func *compare, const void *aux)
136 const char *first = array;
137 size_t equal_cnt = 0;
141 if (compare (element, first, aux) == 0)
150 /* Counts and return the number of elements in ARRAY, which
151 contains COUNT elements of SIZE bytes each, for which
152 PREDICATE returns true. AUX is passed as auxiliary data to
155 count_if (const void *array, size_t count, size_t size,
156 algo_predicate_func *predicate, const void *aux)
158 const char *first = array;
163 if (predicate (first, aux) != 0)
172 /* Byte-wise swap two items of size SIZE. */
173 #define SWAP(a, b, size) \
176 register size_t __size = (size); \
177 register char *__a = (a), *__b = (b); \
183 } while (--__size > 0); \
186 /* Makes the elements in ARRAY unique, by moving up duplicates,
187 and returns the new number of elements in the array. Sorted
188 arrays only. Arguments same as for sort() above. */
190 unique (void *array, size_t count, size_t size,
191 algo_compare_func *compare, const void *aux)
194 char *last = first + size * count;
195 char *result = array;
202 assert (adjacent_find_equal (array, count,
203 size, compare, aux) == NULL);
207 if (compare (result, first, aux))
211 memcpy (result, first, size);
218 /* Helper function that calls sort(), then unique(). */
220 sort_unique (void *array, size_t count, size_t size,
221 algo_compare_func *compare, const void *aux)
223 sort (array, count, size, compare, aux);
224 return unique (array, count, size, compare, aux);
227 /* Reorders ARRAY, which contains COUNT elements of SIZE bytes
228 each, so that the elements for which PREDICATE returns true
229 precede those for which PREDICATE returns zero. AUX is
230 passed to each predicate as auxiliary data. Returns the
231 number of elements for which PREDICATE returns true. Not
234 partition (void *array, size_t count, size_t size,
235 algo_predicate_func *predicate, const void *aux)
237 size_t true_cnt = count;
239 char *last = first + true_cnt * size;
243 /* Move FIRST forward to point to first element that fails
249 else if (!predicate (first, aux))
256 /* Move LAST backward to point to last element that passes
264 else if (predicate (last, aux))
270 /* By swapping FIRST and LAST we extend the starting and
271 ending sequences that pass and fail, respectively,
273 SWAP (first, last, size);
278 assert (is_partitioned (array, count, size, true_cnt, predicate, aux));
282 /* Checks whether ARRAY, which contains COUNT elements of SIZE
283 bytes each, is partitioned such that PREDICATE returns true
284 for the first TRUE_CNT elements and zero for the remaining
285 elements. AUX is passed as auxiliary data to PREDICATE. */
287 is_partitioned (const void *array, size_t count, size_t size,
289 algo_predicate_func *predicate, const void *aux)
291 const char *first = array;
294 assert (true_cnt <= count);
295 for (idx = 0; idx < true_cnt; idx++)
296 if (predicate (first + idx * size, aux) == 0)
298 for (idx = true_cnt; idx < count; idx++)
299 if (predicate (first + idx * size, aux) != 0)
304 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
305 RESULT, except that elements for which PREDICATE is false are
306 not copied. Returns the number of elements copied. AUX is
307 passed to PREDICATE as auxiliary data. */
309 copy_if (const void *array, size_t count, size_t size,
311 algo_predicate_func *predicate, const void *aux)
313 const char *input = array;
314 const char *last = input + size * count;
315 char *output = result;
316 size_t nonzero_cnt = 0;
320 if (predicate (input, aux))
322 memcpy (output, input, size);
330 assert (nonzero_cnt == count_if (array, count, size, predicate, aux));
331 assert (nonzero_cnt == count_if (result, nonzero_cnt, size, predicate, aux));
336 /* Removes N elements starting at IDX from ARRAY, which consists
337 of COUNT elements of SIZE bytes each, by shifting the elements
338 following them, if any, into its position. */
340 remove_range (void *array_, size_t count, size_t size,
341 size_t idx, size_t n)
343 char *array = array_;
345 assert (array != NULL);
346 assert (idx <= count);
347 assert (idx + n <= count);
350 memmove (array + idx * size, array + (idx + n) * size,
351 size * (count - idx - n));
354 /* Removes element IDX from ARRAY, which consists of COUNT
355 elements of SIZE bytes each, by shifting the elements
356 following it, if any, into its position. */
358 remove_element (void *array, size_t count, size_t size,
361 remove_range (array, count, size, idx, 1);
364 /* Moves an element in ARRAY, which consists of COUNT elements of
365 SIZE bytes each, from OLD_IDX to NEW_IDX, shifting around
366 other elements as needed. Runs in O(abs(OLD_IDX - NEW_IDX))
369 move_element (void *array_, size_t count, size_t size,
370 size_t old_idx, size_t new_idx)
372 assert (array_ != NULL || count == 0);
373 assert (old_idx < count);
374 assert (new_idx < count);
376 if (old_idx != new_idx)
378 char *array = array_;
379 char *element = xmalloc (size);
380 char *new = array + new_idx * size;
381 char *old = array + old_idx * size;
383 memcpy (element, old, size);
385 memmove (new + size, new, (old_idx - new_idx) * size);
387 memmove (old, old + size, (new_idx - old_idx) * size);
388 memcpy (new, element, size);
394 /* A predicate and its auxiliary data. */
397 algo_predicate_func *predicate;
402 not (const void *data, const void *pred_aux_)
404 const struct pred_aux *pred_aux = pred_aux_;
406 return !pred_aux->predicate (data, pred_aux->aux);
409 /* Removes elements equal to ELEMENT from ARRAY, which consists
410 of COUNT elements of SIZE bytes each. Returns the number of
411 remaining elements. AUX is passed to COMPARE as auxiliary
414 remove_equal (void *array, size_t count, size_t size,
416 algo_compare_func *compare, const void *aux)
419 char *last = first + count * size;
426 if (compare (first, element, aux) == 0)
440 if (compare (first, element, aux) == 0)
446 memcpy (result, first, size);
451 assert (count_equal (array, count, size, element, compare, aux) == 0);
455 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
456 RESULT, except that elements for which PREDICATE is true are
457 not copied. Returns the number of elements copied. AUX is
458 passed to PREDICATE as auxiliary data. */
460 remove_copy_if (const void *array, size_t count, size_t size,
462 algo_predicate_func *predicate, const void *aux)
464 struct pred_aux pred_aux;
465 pred_aux.predicate = predicate;
467 return copy_if (array, count, size, result, not, &pred_aux);
470 /* Searches ARRAY, which contains COUNT of SIZE bytes each, using
471 a binary search. Returns any element that equals VALUE, if
472 one exists, or a null pointer otherwise. ARRAY must ordered
473 according to COMPARE. AUX is passed to COMPARE as auxiliary
476 binary_search (const void *array, size_t count, size_t size,
478 algo_compare_func *compare, const void *aux)
480 assert (array != NULL);
481 assert (count <= INT_MAX);
482 assert (compare != NULL);
486 const char *first = array;
488 int high = count - 1;
492 int middle = (low + high) / 2;
493 const char *element = first + middle * size;
494 int cmp = compare (value, element, aux);
501 return (void *) element;
505 expensive_assert (find (array, count, size, value, compare, aux) == NULL);
509 /* Lexicographically compares ARRAY1, which contains COUNT1
510 elements of SIZE bytes each, to ARRAY2, which contains COUNT2
511 elements of SIZE bytes, according to COMPARE. Returns a
512 strcmp()-type result. AUX is passed to COMPARE as auxiliary
515 lexicographical_compare_3way (const void *array1, size_t count1,
516 const void *array2, size_t count2,
518 algo_compare_func *compare, const void *aux)
520 const char *first1 = array1;
521 const char *first2 = array2;
522 size_t min_count = count1 < count2 ? count1 : count2;
524 while (min_count > 0)
526 int cmp = compare (first1, first2, aux);
535 return count1 < count2 ? -1 : count1 > count2;
538 /* If you consider tuning this algorithm, you should consult first:
539 Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
540 Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
546 /* Discontinue quicksort algorithm when partition gets below this size.
547 This particular magic number was chosen to work best on a Sun 4/260. */
550 /* Stack node declarations used to store unfulfilled partition obligations. */
557 /* The next 4 #defines implement a very fast in-line stack abstraction. */
558 /* The stack needs log (total_elements) entries (we could even subtract
559 log(MAX_THRESH)). Since total_elements has type size_t, we get as
560 upper bound for log (total_elements):
561 bits per byte (CHAR_BIT) * sizeof(size_t). */
562 #define STACK_SIZE (CHAR_BIT * sizeof(size_t))
563 #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
564 #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
565 #define STACK_NOT_EMPTY (stack < top)
568 /* Order size using quicksort. This implementation incorporates
569 four optimizations discussed in Sedgewick:
571 1. Non-recursive, using an explicit stack of pointer that store the
572 next array partition to sort. To save time, this maximum amount
573 of space required to store an array of SIZE_MAX is allocated on the
574 stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
575 only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
576 Pretty cheap, actually.
578 2. Chose the pivot element using a median-of-three decision tree.
579 This reduces the probability of selecting a bad pivot value and
580 eliminates certain extraneous comparisons.
582 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
583 insertion sort to order the MAX_THRESH items within each partition.
584 This is a big win, since insertion sort is faster for small, mostly
585 sorted array segments.
587 4. The larger of the two sub-partitions is always pushed onto the
588 stack first, with the algorithm then concentrating on the
589 smaller partition. This *guarantees* no more than log (total_elems)
590 stack size is needed (actually O(1) in this case)! */
593 sort (void *array, size_t count, size_t size,
594 algo_compare_func *compare, const void *aux)
596 char *const first = array;
597 const size_t max_thresh = MAX_THRESH * size;
600 /* Avoid lossage with unsigned arithmetic below. */
603 if (count > MAX_THRESH)
606 char *hi = &lo[size * (count - 1)];
607 stack_node stack[STACK_SIZE];
608 stack_node *top = stack + 1;
610 while (STACK_NOT_EMPTY)
615 /* Select median value from among LO, MID, and HI. Rearrange
616 LO and HI so the three values are sorted. This lowers the
617 probability of picking a pathological pivot value and
618 skips a comparison for both the LEFT_PTR and RIGHT_PTR in
621 char *mid = lo + size * ((hi - lo) / size >> 1);
623 if (compare (mid, lo, aux) < 0)
624 SWAP (mid, lo, size);
625 if (compare (hi, mid, aux) < 0)
626 SWAP (mid, hi, size);
629 if (compare (mid, lo, aux) < 0)
630 SWAP (mid, lo, size);
633 left_ptr = lo + size;
634 right_ptr = hi - size;
636 /* Here's the famous ``collapse the walls'' section of quicksort.
637 Gotta like those tight inner loops! They are the main reason
638 that this algorithm runs much faster than others. */
641 while (compare (left_ptr, mid, aux) < 0)
644 while (compare (mid, right_ptr, aux) < 0)
647 if (left_ptr < right_ptr)
649 SWAP (left_ptr, right_ptr, size);
652 else if (mid == right_ptr)
657 else if (left_ptr == right_ptr)
664 while (left_ptr <= right_ptr);
666 /* Set up pointers for next iteration. First determine whether
667 left and right partitions are below the threshold size. If so,
668 ignore one or both. Otherwise, push the larger partition's
669 bounds on the stack and continue sorting the smaller one. */
671 if ((size_t) (right_ptr - lo) <= max_thresh)
673 if ((size_t) (hi - left_ptr) <= max_thresh)
674 /* Ignore both small partitions. */
677 /* Ignore small left partition. */
680 else if ((size_t) (hi - left_ptr) <= max_thresh)
681 /* Ignore small right partition. */
683 else if ((right_ptr - lo) > (hi - left_ptr))
685 /* Push larger left partition indices. */
686 PUSH (lo, right_ptr);
691 /* Push larger right partition indices. */
698 /* Once the FIRST array is partially sorted by quicksort the rest
699 is completely sorted using insertion sort, since this is efficient
700 for partitions below MAX_THRESH size. FIRST points to the beginning
701 of the array to sort, and END_PTR points at the very last element in
702 the array (*not* one beyond it!). */
705 char *const end_ptr = &first[size * (count - 1)];
706 char *tmp_ptr = first;
707 char *thresh = MIN (end_ptr, first + max_thresh);
708 register char *run_ptr;
710 /* Find smallest element in first threshold and place it at the
711 array's beginning. This is the smallest array element,
712 and the operation speeds up insertion sort's inner loop. */
714 for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
715 if (compare (run_ptr, tmp_ptr, aux) < 0)
718 if (tmp_ptr != first)
719 SWAP (tmp_ptr, first, size);
721 /* Insertion sort, running from left-hand-side up to right-hand-side. */
723 run_ptr = first + size;
724 while ((run_ptr += size) <= end_ptr)
726 tmp_ptr = run_ptr - size;
727 while (compare (run_ptr, tmp_ptr, aux) < 0)
731 if (tmp_ptr != run_ptr)
735 trav = run_ptr + size;
736 while (--trav >= run_ptr)
741 for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
749 assert (is_sorted (array, count, size, compare, aux));
752 /* Tests whether ARRAY, which contains COUNT elements of SIZE
753 bytes each, is sorted in order according to COMPARE. AUX is
754 passed to COMPARE as auxiliary data. */
756 is_sorted (const void *array, size_t count, size_t size,
757 algo_compare_func *compare, const void *aux)
759 const char *first = array;
762 for (idx = 0; idx + 1 < count; idx++)
763 if (compare (first + idx * size, first + (idx + 1) * size, aux) > 0)
769 /* Computes the generalized set difference, ARRAY1 minus ARRAY2,
770 into RESULT, and returns the number of elements written to
771 RESULT. If a value appears M times in ARRAY1 and N times in
772 ARRAY2, then it will appear max(M - N, 0) in RESULT. ARRAY1
773 and ARRAY2 must be sorted, and RESULT is sorted and stable.
774 ARRAY1 consists of COUNT1 elements, ARRAY2 of COUNT2 elements,
775 each SIZE bytes. AUX is passed to COMPARE as auxiliary
777 size_t set_difference (const void *array1, size_t count1,
778 const void *array2, size_t count2,
781 algo_compare_func *compare, const void *aux)
783 const char *first1 = array1;
784 const char *last1 = first1 + count1 * size;
785 const char *first2 = array2;
786 const char *last2 = first2 + count2 * size;
787 char *result = result_;
788 size_t result_count = 0;
790 while (first1 != last1 && first2 != last2)
792 int cmp = compare (first1, first2, aux);
795 memcpy (result, first1, size);
809 while (first1 != last1)
811 memcpy (result, first1, size);
820 /* Finds the first pair of adjacent equal elements in ARRAY,
821 which has COUNT elements of SIZE bytes. Returns the first
822 element in ARRAY such that COMPARE returns zero when it and
823 its successor element are compared, or a null pointer if no
824 such element exists. AUX is passed to COMPARE as auxiliary
827 adjacent_find_equal (const void *array, size_t count, size_t size,
828 algo_compare_func *compare, const void *aux)
830 const char *first = array;
831 const char *last = first + count * size;
833 while (first < last && first + size < last)
835 if (compare (first, first + size, aux) == 0)
836 return (void *) first;
843 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
844 the first COUNT - 1 elements of these form a heap, followed by
845 a single element not part of the heap. This function adds the
846 final element, forming a heap of COUNT elements in ARRAY.
847 Uses COMPARE to compare elements, passing AUX as auxiliary
850 push_heap (void *array, size_t count, size_t size,
851 algo_compare_func *compare, const void *aux)
856 expensive_assert (count < 1 || is_heap (array, count - 1,
857 size, compare, aux));
858 for (i = count; i > 1; i /= 2)
860 char *parent = first + (i / 2 - 1) * size;
861 char *element = first + (i - 1) * size;
862 if (compare (parent, element, aux) < 0)
863 SWAP (parent, element, size);
867 expensive_assert (is_heap (array, count, size, compare, aux));
870 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
871 the children of ARRAY[idx - 1] are heaps, but ARRAY[idx - 1]
872 may be smaller than its children. This function fixes that,
873 so that ARRAY[idx - 1] itself is a heap. Uses COMPARE to
874 compare elements, passing AUX as auxiliary data. */
876 heapify (void *array, size_t count, size_t size,
878 algo_compare_func *compare, const void *aux)
884 size_t left = 2 * idx;
885 size_t right = left + 1;
886 size_t largest = idx;
889 && compare (first + size * (left - 1),
890 first + size * (idx - 1), aux) > 0)
894 && compare (first + size * (right - 1),
895 first + size * (largest - 1), aux) > 0)
901 SWAP (first + size * (idx - 1), first + size * (largest - 1), size);
906 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
907 all COUNT elements form a heap. This function moves the
908 largest element in the heap to the final position in ARRAY and
909 reforms a heap of the remaining COUNT - 1 elements at the
910 beginning of ARRAY. Uses COMPARE to compare elements, passing
911 AUX as auxiliary data. */
913 pop_heap (void *array, size_t count, size_t size,
914 algo_compare_func *compare, const void *aux)
918 expensive_assert (is_heap (array, count, size, compare, aux));
919 SWAP (first, first + (count - 1) * size, size);
920 heapify (first, count - 1, size, 1, compare, aux);
921 expensive_assert (count < 1 || is_heap (array, count - 1,
922 size, compare, aux));
925 /* Turns ARRAY, which contains COUNT elements of SIZE bytes, into
926 a heap. Uses COMPARE to compare elements, passing AUX as
929 make_heap (void *array, size_t count, size_t size,
930 algo_compare_func *compare, const void *aux)
934 for (idx = count / 2; idx >= 1; idx--)
935 heapify (array, count, size, idx, compare, aux);
936 expensive_assert (count < 1 || is_heap (array, count, size, compare, aux));
939 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
940 all COUNT elements form a heap. This function turns the heap
941 into a fully sorted array. Uses COMPARE to compare elements,
942 passing AUX as auxiliary data. */
944 sort_heap (void *array, size_t count, size_t size,
945 algo_compare_func *compare, const void *aux)
950 expensive_assert (is_heap (array, count, size, compare, aux));
951 for (idx = count; idx >= 2; idx--)
953 SWAP (first, first + (idx - 1) * size, size);
954 heapify (array, idx - 1, size, 1, compare, aux);
956 expensive_assert (is_sorted (array, count, size, compare, aux));
959 /* ARRAY contains COUNT elements of SIZE bytes each. This
960 function tests whether ARRAY is a heap and returns true if so,
961 false otherwise. Uses COMPARE to compare elements, passing
962 AUX as auxiliary data. */
964 is_heap (const void *array, size_t count, size_t size,
965 algo_compare_func *compare, const void *aux)
967 const char *first = array;
970 for (child = 2; child <= count; child++)
972 size_t parent = child / 2;
973 if (compare (first + (parent - 1) * size,
974 first + (child - 1) * size, aux) < 0)