1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 /* Copyright (C) 2001 Free Software Foundation, Inc.
21 This file is part of the GNU ISO C++ Library. This library is free
22 software; you can redistribute it and/or modify it under the
23 terms of the GNU General Public License as published by the
24 Free Software Foundation; either version 2, or (at your option)
27 This library is distributed in the hope that it will be useful,
28 but WITHOUT ANY WARRANTY; without even the implied warranty of
29 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 GNU General Public License for more details.
32 You should have received a copy of the GNU General Public License along
33 with this library; see the file COPYING. If not, write to the Free
34 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
37 As a special exception, you may use this file as part of a free software
38 library without restriction. Specifically, if other files instantiate
39 templates or use macros or inline functions from this file, or you compile
40 this file and link it with other files to produce an executable, this
41 file does not by itself cause the resulting executable to be covered by
42 the GNU General Public License. This exception does not however
43 invalidate any other reasons why the executable file might be covered by
44 the GNU General Public License. */
49 * Hewlett-Packard Company
51 * Permission to use, copy, modify, distribute and sell this software
52 * and its documentation for any purpose is hereby granted without fee,
53 * provided that the above copyright notice appear in all copies and
54 * that both that copyright notice and this permission notice appear
55 * in supporting documentation. Hewlett-Packard Company makes no
56 * representations about the suitability of this software for any
57 * purpose. It is provided "as is" without express or implied warranty.
61 * Silicon Graphics Computer Systems, Inc.
63 * Permission to use, copy, modify, distribute and sell this software
64 * and its documentation for any purpose is hereby granted without fee,
65 * provided that the above copyright notice appear in all copies and
66 * that both that copyright notice and this permission notice appear
67 * in supporting documentation. Silicon Graphics makes no
68 * representations about the suitability of this software for any
69 * purpose. It is provided "as is" without express or implied warranty.
72 /* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
73 This file is part of the GNU C Library.
74 Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
76 The GNU C Library is free software; you can redistribute it and/or
77 modify it under the terms of the GNU Lesser General Public
78 License as published by the Free Software Foundation; either
79 version 2.1 of the License, or (at your option) any later version.
81 The GNU C Library is distributed in the hope that it will be useful,
82 but WITHOUT ANY WARRANTY; without even the implied warranty of
83 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
84 Lesser General Public License for more details.
86 You should have received a copy of the GNU Lesser General Public
87 License along with the GNU C Library; if not, write to the Free
88 Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
97 #include <libpspp/assertion.h>
103 /* Finds an element in ARRAY, which contains COUNT elements of
104 SIZE bytes each, using COMPARE for comparisons. Returns the
105 first element in ARRAY that matches TARGET, or a null pointer
106 on failure. AUX is passed to each comparison as auxiliary
109 find (const void *array, size_t count, size_t size,
111 algo_compare_func *compare, const void *aux)
113 const char *element = array;
117 if (compare (target, element, aux) == 0)
118 return (void *) element;
126 /* Counts and return the number of elements in ARRAY, which
127 contains COUNT elements of SIZE bytes each, which are equal to
128 ELEMENT as compared with COMPARE. AUX is passed as auxiliary
131 count_equal (const void *array, size_t count, size_t size,
133 algo_compare_func *compare, const void *aux)
135 const char *first = array;
136 size_t equal_cnt = 0;
140 if (compare (element, first, aux) == 0)
149 /* Counts and return the number of elements in ARRAY, which
150 contains COUNT elements of SIZE bytes each, for which
151 PREDICATE returns true. AUX is passed as auxiliary data to
154 count_if (const void *array, size_t count, size_t size,
155 algo_predicate_func *predicate, const void *aux)
157 const char *first = array;
162 if (predicate (first, aux) != 0)
171 /* Byte-wise swap two items of size SIZE. */
172 #define SWAP(a, b, size) \
175 register size_t __size = (size); \
176 register char *__a = (a), *__b = (b); \
182 } while (--__size > 0); \
185 /* Makes the elements in ARRAY unique, by moving up duplicates,
186 and returns the new number of elements in the array. Sorted
187 arrays only. Arguments same as for sort() above. */
189 unique (void *array, size_t count, size_t size,
190 algo_compare_func *compare, const void *aux)
193 char *last = first + size * count;
194 char *result = array;
201 assert (adjacent_find_equal (array, count,
202 size, compare, aux) == NULL);
206 if (compare (result, first, aux))
210 memcpy (result, first, size);
217 /* Helper function that calls sort(), then unique(). */
219 sort_unique (void *array, size_t count, size_t size,
220 algo_compare_func *compare, const void *aux)
222 sort (array, count, size, compare, aux);
223 return unique (array, count, size, compare, aux);
226 /* Reorders ARRAY, which contains COUNT elements of SIZE bytes
227 each, so that the elements for which PREDICATE returns true
228 precede those for which PREDICATE returns zero. AUX is
229 passed to each predicate as auxiliary data. Returns the
230 number of elements for which PREDICATE returns true. Not
233 partition (void *array, size_t count, size_t size,
234 algo_predicate_func *predicate, const void *aux)
236 size_t true_cnt = count;
238 char *last = first + true_cnt * size;
242 /* Move FIRST forward to point to first element that fails
248 else if (!predicate (first, aux))
255 /* Move LAST backward to point to last element that passes
263 else if (predicate (last, aux))
269 /* By swapping FIRST and LAST we extend the starting and
270 ending sequences that pass and fail, respectively,
272 SWAP (first, last, size);
277 assert (is_partitioned (array, count, size, true_cnt, predicate, aux));
281 /* Checks whether ARRAY, which contains COUNT elements of SIZE
282 bytes each, is partitioned such that PREDICATE returns true
283 for the first TRUE_CNT elements and zero for the remaining
284 elements. AUX is passed as auxiliary data to PREDICATE. */
286 is_partitioned (const void *array, size_t count, size_t size,
288 algo_predicate_func *predicate, const void *aux)
290 const char *first = array;
293 assert (true_cnt <= count);
294 for (idx = 0; idx < true_cnt; idx++)
295 if (predicate (first + idx * size, aux) == 0)
297 for (idx = true_cnt; idx < count; idx++)
298 if (predicate (first + idx * size, aux) != 0)
303 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
304 RESULT, except that elements for which PREDICATE is false are
305 not copied. Returns the number of elements copied. AUX is
306 passed to PREDICATE as auxiliary data. */
308 copy_if (const void *array, size_t count, size_t size,
310 algo_predicate_func *predicate, const void *aux)
312 const char *input = array;
313 const char *last = input + size * count;
314 char *output = result;
315 size_t nonzero_cnt = 0;
319 if (predicate (input, aux))
321 memcpy (output, input, size);
329 assert (nonzero_cnt == count_if (array, count, size, predicate, aux));
330 assert (nonzero_cnt == count_if (result, nonzero_cnt, size, predicate, aux));
335 /* Removes N elements starting at IDX from ARRAY, which consists
336 of COUNT elements of SIZE bytes each, by shifting the elements
337 following them, if any, into its position. */
339 remove_range (void *array_, size_t count, size_t size,
340 size_t idx, size_t n)
342 char *array = array_;
344 assert (array != NULL);
345 assert (idx <= count);
346 assert (idx + n <= count);
349 memmove (array + idx * size, array + (idx + n) * size,
350 size * (count - idx - n));
353 /* Removes element IDX from ARRAY, which consists of COUNT
354 elements of SIZE bytes each, by shifting the elements
355 following it, if any, into its position. */
357 remove_element (void *array, size_t count, size_t size,
360 remove_range (array, count, size, idx, 1);
363 /* Moves an element in ARRAY, which consists of COUNT elements of
364 SIZE bytes each, from OLD_IDX to NEW_IDX, shifting around
365 other elements as needed. Runs in O(abs(OLD_IDX - NEW_IDX))
368 move_element (void *array_, size_t count, size_t size,
369 size_t old_idx, size_t new_idx)
371 assert (array_ != NULL || count == 0);
372 assert (old_idx < count);
373 assert (new_idx < count);
375 if (old_idx != new_idx)
377 char *array = array_;
378 char *element = xmalloc (size);
379 char *new = array + new_idx * size;
380 char *old = array + old_idx * size;
382 memcpy (element, old, size);
384 memmove (new + size, new, (old_idx - new_idx) * size);
386 memmove (old, old + size, (new_idx - old_idx) * size);
387 memcpy (new, element, size);
393 /* A predicate and its auxiliary data. */
396 algo_predicate_func *predicate;
401 not (const void *data, const void *pred_aux_)
403 const struct pred_aux *pred_aux = pred_aux_;
405 return !pred_aux->predicate (data, pred_aux->aux);
408 /* Removes elements equal to ELEMENT from ARRAY, which consists
409 of COUNT elements of SIZE bytes each. Returns the number of
410 remaining elements. AUX is passed to COMPARE as auxiliary
413 remove_equal (void *array, size_t count, size_t size,
415 algo_compare_func *compare, const void *aux)
418 char *last = first + count * size;
425 if (compare (first, element, aux) == 0)
439 if (compare (first, element, aux) == 0)
445 memcpy (result, first, size);
450 assert (count_equal (array, count, size, element, compare, aux) == 0);
454 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
455 RESULT, except that elements for which PREDICATE is true are
456 not copied. Returns the number of elements copied. AUX is
457 passed to PREDICATE as auxiliary data. */
459 remove_copy_if (const void *array, size_t count, size_t size,
461 algo_predicate_func *predicate, const void *aux)
463 struct pred_aux pred_aux;
464 pred_aux.predicate = predicate;
466 return copy_if (array, count, size, result, not, &pred_aux);
469 /* Searches ARRAY, which contains COUNT of SIZE bytes each, using
470 a binary search. Returns any element that equals VALUE, if
471 one exists, or a null pointer otherwise. ARRAY must ordered
472 according to COMPARE. AUX is passed to COMPARE as auxiliary
475 binary_search (const void *array, size_t count, size_t size,
477 algo_compare_func *compare, const void *aux)
479 assert (array != NULL);
480 assert (count <= INT_MAX);
481 assert (compare != NULL);
485 const char *first = array;
487 int high = count - 1;
491 int middle = (low + high) / 2;
492 const char *element = first + middle * size;
493 int cmp = compare (value, element, aux);
500 return (void *) element;
504 expensive_assert (find (array, count, size, value, compare, aux) == NULL);
508 /* Lexicographically compares ARRAY1, which contains COUNT1
509 elements of SIZE bytes each, to ARRAY2, which contains COUNT2
510 elements of SIZE bytes, according to COMPARE. Returns a
511 strcmp()-type result. AUX is passed to COMPARE as auxiliary
514 lexicographical_compare_3way (const void *array1, size_t count1,
515 const void *array2, size_t count2,
517 algo_compare_func *compare, const void *aux)
519 const char *first1 = array1;
520 const char *first2 = array2;
521 size_t min_count = count1 < count2 ? count1 : count2;
523 while (min_count > 0)
525 int cmp = compare (first1, first2, aux);
534 return count1 < count2 ? -1 : count1 > count2;
537 /* If you consider tuning this algorithm, you should consult first:
538 Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
539 Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
545 /* Discontinue quicksort algorithm when partition gets below this size.
546 This particular magic number was chosen to work best on a Sun 4/260. */
549 /* Stack node declarations used to store unfulfilled partition obligations. */
556 /* The next 4 #defines implement a very fast in-line stack abstraction. */
557 /* The stack needs log (total_elements) entries (we could even subtract
558 log(MAX_THRESH)). Since total_elements has type size_t, we get as
559 upper bound for log (total_elements):
560 bits per byte (CHAR_BIT) * sizeof(size_t). */
561 #define STACK_SIZE (CHAR_BIT * sizeof(size_t))
562 #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
563 #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
564 #define STACK_NOT_EMPTY (stack < top)
567 /* Order size using quicksort. This implementation incorporates
568 four optimizations discussed in Sedgewick:
570 1. Non-recursive, using an explicit stack of pointer that store the
571 next array partition to sort. To save time, this maximum amount
572 of space required to store an array of SIZE_MAX is allocated on the
573 stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
574 only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
575 Pretty cheap, actually.
577 2. Chose the pivot element using a median-of-three decision tree.
578 This reduces the probability of selecting a bad pivot value and
579 eliminates certain extraneous comparisons.
581 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
582 insertion sort to order the MAX_THRESH items within each partition.
583 This is a big win, since insertion sort is faster for small, mostly
584 sorted array segments.
586 4. The larger of the two sub-partitions is always pushed onto the
587 stack first, with the algorithm then concentrating on the
588 smaller partition. This *guarantees* no more than log (total_elems)
589 stack size is needed (actually O(1) in this case)! */
592 sort (void *array, size_t count, size_t size,
593 algo_compare_func *compare, const void *aux)
595 char *const first = array;
596 const size_t max_thresh = MAX_THRESH * size;
599 /* Avoid lossage with unsigned arithmetic below. */
602 if (count > MAX_THRESH)
605 char *hi = &lo[size * (count - 1)];
606 stack_node stack[STACK_SIZE];
607 stack_node *top = stack + 1;
609 while (STACK_NOT_EMPTY)
614 /* Select median value from among LO, MID, and HI. Rearrange
615 LO and HI so the three values are sorted. This lowers the
616 probability of picking a pathological pivot value and
617 skips a comparison for both the LEFT_PTR and RIGHT_PTR in
620 char *mid = lo + size * ((hi - lo) / size >> 1);
622 if (compare (mid, lo, aux) < 0)
623 SWAP (mid, lo, size);
624 if (compare (hi, mid, aux) < 0)
625 SWAP (mid, hi, size);
628 if (compare (mid, lo, aux) < 0)
629 SWAP (mid, lo, size);
632 left_ptr = lo + size;
633 right_ptr = hi - size;
635 /* Here's the famous ``collapse the walls'' section of quicksort.
636 Gotta like those tight inner loops! They are the main reason
637 that this algorithm runs much faster than others. */
640 while (compare (left_ptr, mid, aux) < 0)
643 while (compare (mid, right_ptr, aux) < 0)
646 if (left_ptr < right_ptr)
648 SWAP (left_ptr, right_ptr, size);
651 else if (mid == right_ptr)
656 else if (left_ptr == right_ptr)
663 while (left_ptr <= right_ptr);
665 /* Set up pointers for next iteration. First determine whether
666 left and right partitions are below the threshold size. If so,
667 ignore one or both. Otherwise, push the larger partition's
668 bounds on the stack and continue sorting the smaller one. */
670 if ((size_t) (right_ptr - lo) <= max_thresh)
672 if ((size_t) (hi - left_ptr) <= max_thresh)
673 /* Ignore both small partitions. */
676 /* Ignore small left partition. */
679 else if ((size_t) (hi - left_ptr) <= max_thresh)
680 /* Ignore small right partition. */
682 else if ((right_ptr - lo) > (hi - left_ptr))
684 /* Push larger left partition indices. */
685 PUSH (lo, right_ptr);
690 /* Push larger right partition indices. */
697 /* Once the FIRST array is partially sorted by quicksort the rest
698 is completely sorted using insertion sort, since this is efficient
699 for partitions below MAX_THRESH size. FIRST points to the beginning
700 of the array to sort, and END_PTR points at the very last element in
701 the array (*not* one beyond it!). */
704 char *const end_ptr = &first[size * (count - 1)];
705 char *tmp_ptr = first;
706 char *thresh = MIN (end_ptr, first + max_thresh);
707 register char *run_ptr;
709 /* Find smallest element in first threshold and place it at the
710 array's beginning. This is the smallest array element,
711 and the operation speeds up insertion sort's inner loop. */
713 for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
714 if (compare (run_ptr, tmp_ptr, aux) < 0)
717 if (tmp_ptr != first)
718 SWAP (tmp_ptr, first, size);
720 /* Insertion sort, running from left-hand-side up to right-hand-side. */
722 run_ptr = first + size;
723 while ((run_ptr += size) <= end_ptr)
725 tmp_ptr = run_ptr - size;
726 while (compare (run_ptr, tmp_ptr, aux) < 0)
730 if (tmp_ptr != run_ptr)
734 trav = run_ptr + size;
735 while (--trav >= run_ptr)
740 for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
748 assert (is_sorted (array, count, size, compare, aux));
751 /* Tests whether ARRAY, which contains COUNT elements of SIZE
752 bytes each, is sorted in order according to COMPARE. AUX is
753 passed to COMPARE as auxiliary data. */
755 is_sorted (const void *array, size_t count, size_t size,
756 algo_compare_func *compare, const void *aux)
758 const char *first = array;
761 for (idx = 0; idx + 1 < count; idx++)
762 if (compare (first + idx * size, first + (idx + 1) * size, aux) > 0)
768 /* Computes the generalized set difference, ARRAY1 minus ARRAY2,
769 into RESULT, and returns the number of elements written to
770 RESULT. If a value appears M times in ARRAY1 and N times in
771 ARRAY2, then it will appear max(M - N, 0) in RESULT. ARRAY1
772 and ARRAY2 must be sorted, and RESULT is sorted and stable.
773 ARRAY1 consists of COUNT1 elements, ARRAY2 of COUNT2 elements,
774 each SIZE bytes. AUX is passed to COMPARE as auxiliary
776 size_t set_difference (const void *array1, size_t count1,
777 const void *array2, size_t count2,
780 algo_compare_func *compare, const void *aux)
782 const char *first1 = array1;
783 const char *last1 = first1 + count1 * size;
784 const char *first2 = array2;
785 const char *last2 = first2 + count2 * size;
786 char *result = result_;
787 size_t result_count = 0;
789 while (first1 != last1 && first2 != last2)
791 int cmp = compare (first1, first2, aux);
794 memcpy (result, first1, size);
808 while (first1 != last1)
810 memcpy (result, first1, size);
819 /* Finds the first pair of adjacent equal elements in ARRAY,
820 which has COUNT elements of SIZE bytes. Returns the first
821 element in ARRAY such that COMPARE returns zero when it and
822 its successor element are compared, or a null pointer if no
823 such element exists. AUX is passed to COMPARE as auxiliary
826 adjacent_find_equal (const void *array, size_t count, size_t size,
827 algo_compare_func *compare, const void *aux)
829 const char *first = array;
830 const char *last = first + count * size;
832 while (first < last && first + size < last)
834 if (compare (first, first + size, aux) == 0)
835 return (void *) first;
842 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
843 the first COUNT - 1 elements of these form a heap, followed by
844 a single element not part of the heap. This function adds the
845 final element, forming a heap of COUNT elements in ARRAY.
846 Uses COMPARE to compare elements, passing AUX as auxiliary
849 push_heap (void *array, size_t count, size_t size,
850 algo_compare_func *compare, const void *aux)
855 expensive_assert (count < 1 || is_heap (array, count - 1,
856 size, compare, aux));
857 for (i = count; i > 1; i /= 2)
859 char *parent = first + (i / 2 - 1) * size;
860 char *element = first + (i - 1) * size;
861 if (compare (parent, element, aux) < 0)
862 SWAP (parent, element, size);
866 expensive_assert (is_heap (array, count, size, compare, aux));
869 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
870 the children of ARRAY[idx - 1] are heaps, but ARRAY[idx - 1]
871 may be smaller than its children. This function fixes that,
872 so that ARRAY[idx - 1] itself is a heap. Uses COMPARE to
873 compare elements, passing AUX as auxiliary data. */
875 heapify (void *array, size_t count, size_t size,
877 algo_compare_func *compare, const void *aux)
883 size_t left = 2 * idx;
884 size_t right = left + 1;
885 size_t largest = idx;
888 && compare (first + size * (left - 1),
889 first + size * (idx - 1), aux) > 0)
893 && compare (first + size * (right - 1),
894 first + size * (largest - 1), aux) > 0)
900 SWAP (first + size * (idx - 1), first + size * (largest - 1), size);
905 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
906 all COUNT elements form a heap. This function moves the
907 largest element in the heap to the final position in ARRAY and
908 reforms a heap of the remaining COUNT - 1 elements at the
909 beginning of ARRAY. Uses COMPARE to compare elements, passing
910 AUX as auxiliary data. */
912 pop_heap (void *array, size_t count, size_t size,
913 algo_compare_func *compare, const void *aux)
917 expensive_assert (is_heap (array, count, size, compare, aux));
918 SWAP (first, first + (count - 1) * size, size);
919 heapify (first, count - 1, size, 1, compare, aux);
920 expensive_assert (count < 1 || is_heap (array, count - 1,
921 size, compare, aux));
924 /* Turns ARRAY, which contains COUNT elements of SIZE bytes, into
925 a heap. Uses COMPARE to compare elements, passing AUX as
928 make_heap (void *array, size_t count, size_t size,
929 algo_compare_func *compare, const void *aux)
933 for (idx = count / 2; idx >= 1; idx--)
934 heapify (array, count, size, idx, compare, aux);
935 expensive_assert (count < 1 || is_heap (array, count, size, compare, aux));
938 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
939 all COUNT elements form a heap. This function turns the heap
940 into a fully sorted array. Uses COMPARE to compare elements,
941 passing AUX as auxiliary data. */
943 sort_heap (void *array, size_t count, size_t size,
944 algo_compare_func *compare, const void *aux)
949 expensive_assert (is_heap (array, count, size, compare, aux));
950 for (idx = count; idx >= 2; idx--)
952 SWAP (first, first + (idx - 1) * size, size);
953 heapify (array, idx - 1, size, 1, compare, aux);
955 expensive_assert (is_sorted (array, count, size, compare, aux));
958 /* ARRAY contains COUNT elements of SIZE bytes each. This
959 function tests whether ARRAY is a heap and returns true if so,
960 false otherwise. Uses COMPARE to compare elements, passing
961 AUX as auxiliary data. */
963 is_heap (const void *array, size_t count, size_t size,
964 algo_compare_func *compare, const void *aux)
966 const char *first = array;
969 for (child = 2; child <= count; child++)
971 size_t parent = child / 2;
972 if (compare (first + (parent - 1) * size,
973 first + (child - 1) * size, aux) < 0)