1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 /* Copyright (C) 2001 Free Software Foundation, Inc.
22 This file is part of the GNU ISO C++ Library. This library is free
23 software; you can redistribute it and/or modify it under the
24 terms of the GNU General Public License as published by the
25 Free Software Foundation; either version 2, or (at your option)
28 This library is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU General Public License for more details.
33 You should have received a copy of the GNU General Public License along
34 with this library; see the file COPYING. If not, write to the Free
35 Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
38 As a special exception, you may use this file as part of a free software
39 library without restriction. Specifically, if other files instantiate
40 templates or use macros or inline functions from this file, or you compile
41 this file and link it with other files to produce an executable, this
42 file does not by itself cause the resulting executable to be covered by
43 the GNU General Public License. This exception does not however
44 invalidate any other reasons why the executable file might be covered by
45 the GNU General Public License. */
50 * Hewlett-Packard Company
52 * Permission to use, copy, modify, distribute and sell this software
53 * and its documentation for any purpose is hereby granted without fee,
54 * provided that the above copyright notice appear in all copies and
55 * that both that copyright notice and this permission notice appear
56 * in supporting documentation. Hewlett-Packard Company makes no
57 * representations about the suitability of this software for any
58 * purpose. It is provided "as is" without express or implied warranty.
62 * Silicon Graphics Computer Systems, Inc.
64 * Permission to use, copy, modify, distribute and sell this software
65 * and its documentation for any purpose is hereby granted without fee,
66 * provided that the above copyright notice appear in all copies and
67 * that both that copyright notice and this permission notice appear
68 * in supporting documentation. Silicon Graphics makes no
69 * representations about the suitability of this software for any
70 * purpose. It is provided "as is" without express or implied warranty.
73 /* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
74 This file is part of the GNU C Library.
75 Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
77 The GNU C Library is free software; you can redistribute it and/or
78 modify it under the terms of the GNU Lesser General Public
79 License as published by the Free Software Foundation; either
80 version 2.1 of the License, or (at your option) any later version.
82 The GNU C Library is distributed in the hope that it will be useful,
83 but WITHOUT ANY WARRANTY; without even the implied warranty of
84 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
85 Lesser General Public License for more details.
87 You should have received a copy of the GNU Lesser General Public
88 License along with the GNU C Library; if not, write to the Free
89 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
93 #include "algorithm.h"
100 /* Some of the assertions in this file are very expensive. If
101 we're optimizing, don't include them. */
107 /* Finds an element in ARRAY, which contains COUNT elements of
108 SIZE bytes each, using COMPARE for comparisons. Returns the
109 first element in ARRAY that matches TARGET, or a null pointer
110 on failure. AUX is passed to each comparison as auxiliary
113 find (const void *array, size_t count, size_t size,
115 algo_compare_func *compare, void *aux)
117 const unsigned char *element = array;
121 if (compare (target, element, aux) == 0)
122 return (void *) element;
130 /* Counts and return the number of elements in ARRAY, which
131 contains COUNT elements of SIZE bytes each, which are equal to
132 ELEMENT as compared with COMPARE. AUX is passed as auxiliary
135 count_equal (const void *array, size_t count, size_t size,
137 algo_compare_func *compare, void *aux)
139 const unsigned char *first = array;
140 size_t equal_cnt = 0;
144 if (compare (element, first, aux) == 0)
153 /* Counts and return the number of elements in ARRAY, which
154 contains COUNT elements of SIZE bytes each, for which
155 PREDICATE returns nonzero. AUX is passed as auxiliary data to
158 count_if (const void *array, size_t count, size_t size,
159 algo_predicate_func *predicate, void *aux)
161 const unsigned char *first = array;
162 size_t nonzero_cnt = 0;
166 if (predicate (first, aux) != 0)
175 /* Byte-wise swap two items of size SIZE. */
176 #define SWAP(a, b, size) \
179 register size_t __size = (size); \
180 register char *__a = (a), *__b = (b); \
186 } while (--__size > 0); \
189 /* Makes the elements in ARRAY unique, by moving up duplicates,
190 and returns the new number of elements in the array. Sorted
191 arrays only. Arguments same as for sort() above. */
193 unique (void *array, size_t count, size_t size,
194 algo_compare_func *compare, void *aux)
197 char *last = first + size * count;
198 char *result = array;
205 assert (adjacent_find_equal (array, count, size, compare, aux) == NULL);
209 if (compare (result, first, aux))
213 memcpy (result, first, size);
220 /* Helper function that calls sort(), then unique(). */
222 sort_unique (void *array, size_t count, size_t size,
223 algo_compare_func *compare, void *aux)
225 sort (array, count, size, compare, aux);
226 return unique (array, count, size, compare, aux);
229 /* Reorders ARRAY, which contains COUNT elements of SIZE bytes
230 each, so that the elements for which PREDICATE returns nonzero
231 precede those for which PREDICATE returns zero. AUX is
232 passed to each predicate as auxiliary data. Returns the
233 number of elements for which PREDICATE returns nonzero. Not
236 partition (void *array, size_t count, size_t size,
237 algo_predicate_func *predicate, void *aux)
239 size_t nonzero_cnt = count;
241 char *last = first + nonzero_cnt * size;
245 /* Move FIRST forward to point to first element that fails
251 else if (!predicate (first, aux))
258 /* Move LAST backward to point to last element that passes
266 else if (predicate (last, aux))
272 /* By swapping FIRST and LAST we extend the starting and
273 ending sequences that pass and fail, respectively,
275 SWAP (first, last, size);
280 assert (is_partitioned (array, count, size, nonzero_cnt, predicate, aux));
284 /* Checks whether ARRAY, which contains COUNT elements of SIZE
285 bytes each, is partitioned such that PREDICATE returns nonzero
286 for the first NONZERO_CNT elements and zero for the remaining
287 elements. AUX is passed as auxiliary data to PREDICATE. */
289 is_partitioned (const void *array, size_t count, size_t size,
291 algo_predicate_func *predicate, void *aux)
293 const unsigned char *first = array;
296 assert (nonzero_cnt <= count);
297 for (idx = 0; idx < nonzero_cnt; idx++)
298 if (predicate (first + idx * size, aux) == 0)
300 for (idx = nonzero_cnt; idx < count; idx++)
301 if (predicate (first + idx * size, aux) != 0)
306 /* A algo_random_func that uses random.h. */
308 algo_default_random (unsigned max, void *aux UNUSED)
310 return rng_get_unsigned (pspp_rng ()) % max;
313 /* Randomly reorders ARRAY, which contains COUNT elements of SIZE
314 bytes each. Uses RANDOM as a source of random data, passing
315 AUX as the auxiliary data. RANDOM may be null to use a
316 default random source. */
318 random_shuffle (void *array_, size_t count, size_t size,
319 algo_random_func *random, void *aux)
321 unsigned char *array = array_;
325 random = algo_default_random;
327 for (i = 1; i < count; i++)
328 SWAP (array + i * size, array + random (i + 1, aux) * size, size);
331 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
332 RESULT, except that elements for which PREDICATE is false are
333 not copied. Returns the number of elements copied. AUX is
334 passed to PREDICATE as auxiliary data. */
336 copy_if (const void *array, size_t count, size_t size,
338 algo_predicate_func *predicate, void *aux)
340 const unsigned char *input = array;
341 const unsigned char *last = input + size * count;
342 unsigned char *output = result;
343 size_t nonzero_cnt = 0;
347 if (predicate (input, aux))
349 memcpy (output, input, size);
357 assert (nonzero_cnt == count_if (array, count, size, predicate, aux));
358 assert (nonzero_cnt == count_if (result, nonzero_cnt, size, predicate, aux));
363 /* A predicate and its auxiliary data. */
366 algo_predicate_func *predicate;
371 not (const void *data, void *pred_aux_)
373 const struct pred_aux *pred_aux = pred_aux_;
375 return !pred_aux->predicate (data, pred_aux->aux);
378 /* Removes elements equal to ELEMENT from ARRAY, which consists
379 of COUNT elements of SIZE bytes each. Returns the number of
380 remaining elements. AUX is passed to COMPARE as auxiliary
383 remove_equal (void *array, size_t count, size_t size,
385 algo_compare_func *compare, void *aux)
387 unsigned char *first = array;
388 unsigned char *last = first + count * size;
389 unsigned char *result;
395 if (compare (first, element, aux) == 0)
409 if (compare (first, element, aux) == 0)
415 memcpy (result, first, size);
420 assert (count_equal (array, count, size, element, compare, aux) == 0);
424 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
425 RESULT, except that elements for which PREDICATE is true are
426 not copied. Returns the number of elements copied. AUX is
427 passed to PREDICATE as auxiliary data. */
429 remove_copy_if (const void *array, size_t count, size_t size,
431 algo_predicate_func *predicate, void *aux)
433 struct pred_aux pred_aux;
434 pred_aux.predicate = predicate;
436 return copy_if (array, count, size, result, not, &pred_aux);
439 /* Searches ARRAY, which contains COUNT of SIZE bytes each, using
440 a binary search. Returns any element that equals VALUE, if
441 one exists, or a null pointer otherwise. ARRAY must ordered
442 according to COMPARE. AUX is passed to COMPARE as auxiliary
445 binary_search (const void *array, size_t count, size_t size,
447 algo_compare_func *compare, void *aux)
449 assert (array != NULL);
450 assert (count <= INT_MAX);
451 assert (compare != NULL);
455 const unsigned char *first = array;
457 int high = count - 1;
461 int middle = (low + high) / 2;
462 const unsigned char *element = first + middle * size;
463 int cmp = compare (value, element, aux);
470 return (void *) element;
474 assert (find (array, count, size, value, compare, aux) == NULL);
478 /* Lexicographically compares ARRAY1, which contains COUNT1
479 elements of SIZE bytes each, to ARRAY2, which contains COUNT2
480 elements of SIZE bytes, according to COMPARE. Returns a
481 strcmp()-type result. AUX is passed to COMPARE as auxiliary
484 lexicographical_compare_3way (const void *array1, size_t count1,
485 const void *array2, size_t count2,
487 algo_compare_func *compare, void *aux)
489 const unsigned char *first1 = array1;
490 const unsigned char *first2 = array2;
491 size_t min_count = count1 < count2 ? count1 : count2;
493 while (min_count > 0)
495 int cmp = compare (first1, first2, aux);
504 return count1 < count2 ? -1 : count1 > count2;
507 /* If you consider tuning this algorithm, you should consult first:
508 Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
509 Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
516 /* Discontinue quicksort algorithm when partition gets below this size.
517 This particular magic number was chosen to work best on a Sun 4/260. */
520 /* Stack node declarations used to store unfulfilled partition obligations. */
527 /* The next 4 #defines implement a very fast in-line stack abstraction. */
528 /* The stack needs log (total_elements) entries (we could even subtract
529 log(MAX_THRESH)). Since total_elements has type size_t, we get as
530 upper bound for log (total_elements):
531 bits per byte (CHAR_BIT) * sizeof(size_t). */
532 #define STACK_SIZE (CHAR_BIT * sizeof(size_t))
533 #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
534 #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
535 #define STACK_NOT_EMPTY (stack < top)
538 /* Order size using quicksort. This implementation incorporates
539 four optimizations discussed in Sedgewick:
541 1. Non-recursive, using an explicit stack of pointer that store the
542 next array partition to sort. To save time, this maximum amount
543 of space required to store an array of SIZE_MAX is allocated on the
544 stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
545 only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
546 Pretty cheap, actually.
548 2. Chose the pivot element using a median-of-three decision tree.
549 This reduces the probability of selecting a bad pivot value and
550 eliminates certain extraneous comparisons.
552 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
553 insertion sort to order the MAX_THRESH items within each partition.
554 This is a big win, since insertion sort is faster for small, mostly
555 sorted array segments.
557 4. The larger of the two sub-partitions is always pushed onto the
558 stack first, with the algorithm then concentrating on the
559 smaller partition. This *guarantees* no more than log (total_elems)
560 stack size is needed (actually O(1) in this case)! */
563 sort (void *array, size_t count, size_t size,
564 algo_compare_func *compare, void *aux)
566 char *const first = array;
567 const size_t max_thresh = MAX_THRESH * size;
570 /* Avoid lossage with unsigned arithmetic below. */
573 if (count > MAX_THRESH)
576 char *hi = &lo[size * (count - 1)];
577 stack_node stack[STACK_SIZE];
578 stack_node *top = stack + 1;
580 while (STACK_NOT_EMPTY)
585 /* Select median value from among LO, MID, and HI. Rearrange
586 LO and HI so the three values are sorted. This lowers the
587 probability of picking a pathological pivot value and
588 skips a comparison for both the LEFT_PTR and RIGHT_PTR in
591 char *mid = lo + size * ((hi - lo) / size >> 1);
593 if (compare (mid, lo, aux) < 0)
594 SWAP (mid, lo, size);
595 if (compare (hi, mid, aux) < 0)
596 SWAP (mid, hi, size);
599 if (compare (mid, lo, aux) < 0)
600 SWAP (mid, lo, size);
603 left_ptr = lo + size;
604 right_ptr = hi - size;
606 /* Here's the famous ``collapse the walls'' section of quicksort.
607 Gotta like those tight inner loops! They are the main reason
608 that this algorithm runs much faster than others. */
611 while (compare (left_ptr, mid, aux) < 0)
614 while (compare (mid, right_ptr, aux) < 0)
617 if (left_ptr < right_ptr)
619 SWAP (left_ptr, right_ptr, size);
622 else if (mid == right_ptr)
627 else if (left_ptr == right_ptr)
634 while (left_ptr <= right_ptr);
636 /* Set up pointers for next iteration. First determine whether
637 left and right partitions are below the threshold size. If so,
638 ignore one or both. Otherwise, push the larger partition's
639 bounds on the stack and continue sorting the smaller one. */
641 if ((size_t) (right_ptr - lo) <= max_thresh)
643 if ((size_t) (hi - left_ptr) <= max_thresh)
644 /* Ignore both small partitions. */
647 /* Ignore small left partition. */
650 else if ((size_t) (hi - left_ptr) <= max_thresh)
651 /* Ignore small right partition. */
653 else if ((right_ptr - lo) > (hi - left_ptr))
655 /* Push larger left partition indices. */
656 PUSH (lo, right_ptr);
661 /* Push larger right partition indices. */
668 /* Once the FIRST array is partially sorted by quicksort the rest
669 is completely sorted using insertion sort, since this is efficient
670 for partitions below MAX_THRESH size. FIRST points to the beginning
671 of the array to sort, and END_PTR points at the very last element in
672 the array (*not* one beyond it!). */
674 #define min(x, y) ((x) < (y) ? (x) : (y))
677 char *const end_ptr = &first[size * (count - 1)];
678 char *tmp_ptr = first;
679 char *thresh = min(end_ptr, first + max_thresh);
680 register char *run_ptr;
682 /* Find smallest element in first threshold and place it at the
683 array's beginning. This is the smallest array element,
684 and the operation speeds up insertion sort's inner loop. */
686 for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
687 if (compare (run_ptr, tmp_ptr, aux) < 0)
690 if (tmp_ptr != first)
691 SWAP (tmp_ptr, first, size);
693 /* Insertion sort, running from left-hand-side up to right-hand-side. */
695 run_ptr = first + size;
696 while ((run_ptr += size) <= end_ptr)
698 tmp_ptr = run_ptr - size;
699 while (compare (run_ptr, tmp_ptr, aux) < 0)
703 if (tmp_ptr != run_ptr)
707 trav = run_ptr + size;
708 while (--trav >= run_ptr)
713 for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
721 assert (is_sorted (array, count, size, compare, aux));
724 /* Tests whether ARRAY, which contains COUNT elements of SIZE
725 bytes each, is sorted in order according to COMPARE. AUX is
726 passed to COMPARE as auxiliary data. */
728 is_sorted (const void *array, size_t count, size_t size,
729 algo_compare_func *compare, void *aux)
731 const unsigned char *first = array;
734 for (idx = 0; idx + 1 < count; idx++)
735 if (compare (first + idx * size, first + (idx + 1) * size, aux) > 0)
741 /* Computes the generalized set difference, ARRAY1 minus ARRAY2,
742 into RESULT, and returns the number of elements written to
743 RESULT. If a value appears M times in ARRAY1 and N times in
744 ARRAY2, then it will appear max(M - N, 0) in RESULT. ARRAY1
745 and ARRAY2 must be sorted, and RESULT is sorted and stable.
746 ARRAY1 consists of COUNT1 elements, ARRAY2 of COUNT2 elements,
747 each SIZE bytes. AUX is passed to COMPARE as auxiliary
749 size_t set_difference (const void *array1, size_t count1,
750 const void *array2, size_t count2,
753 algo_compare_func *compare, void *aux)
755 const unsigned char *first1 = array1;
756 const unsigned char *last1 = first1 + count1 * size;
757 const unsigned char *first2 = array2;
758 const unsigned char *last2 = first2 + count2 * size;
759 unsigned char *result = result_;
760 size_t result_count = 0;
762 while (first1 != last1 && first2 != last2)
764 int cmp = compare (first1, first2, aux);
767 memcpy (result, first1, size);
781 while (first1 != last1)
783 memcpy (result, first1, size);
792 /* Finds the first pair of adjacent equal elements in ARRAY,
793 which has COUNT elements of SIZE bytes. Returns the first
794 element in ARRAY such that COMPARE returns zero when it and
795 its successor element are compared, or a null pointer if no
796 such element exists. AUX is passed to COMPARE as auxiliary
799 adjacent_find_equal (const void *array, size_t count, size_t size,
800 algo_compare_func *compare, void *aux)
802 const unsigned char *first = array;
803 const unsigned char *last = first + count * size;
805 while (first < last && first + size < last)
807 if (compare (first, first + size, aux) == 0)
808 return (void *) first;
815 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
816 the first COUNT - 1 elements of these form a heap, followed by
817 a single element not part of the heap. This function adds the
818 final element, forming a heap of COUNT elements in ARRAY.
819 Uses COMPARE to compare elements, passing AUX as auxiliary
822 push_heap (void *array, size_t count, size_t size,
823 algo_compare_func *compare, void *aux)
825 unsigned char *first = array;
828 assert (count < 1 || is_heap (array, count - 1, size, compare, aux));
829 for (i = count; i > 1; i /= 2)
831 unsigned char *parent = first + (i / 2 - 1) * size;
832 unsigned char *element = first + (i - 1) * size;
833 if (compare (parent, element, aux) < 0)
834 SWAP (parent, element, size);
838 assert (is_heap (array, count, size, compare, aux));
841 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
842 the children of ARRAY[idx - 1] are heaps, but ARRAY[idx - 1]
843 may be smaller than its children. This function fixes that,
844 so that ARRAY[idx - 1] itself is a heap. Uses COMPARE to
845 compare elements, passing AUX as auxiliary data. */
847 heapify (void *array, size_t count, size_t size,
849 algo_compare_func *compare, void *aux)
851 unsigned char *first = array;
855 size_t left = 2 * idx;
856 size_t right = left + 1;
857 size_t largest = idx;
860 && compare (first + size * (left - 1),
861 first + size * (idx - 1), aux) > 0)
865 && compare (first + size * (right - 1),
866 first + size * (largest - 1), aux) > 0)
872 SWAP (first + size * (idx - 1), first + size * (largest - 1), size);
877 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
878 all COUNT elements form a heap. This function moves the
879 largest element in the heap to the final position in ARRAY and
880 reforms a heap of the remaining COUNT - 1 elements at the
881 beginning of ARRAY. Uses COMPARE to compare elements, passing
882 AUX as auxiliary data. */
884 pop_heap (void *array, size_t count, size_t size,
885 algo_compare_func *compare, void *aux)
887 unsigned char *first = array;
889 assert (is_heap (array, count, size, compare, aux));
890 SWAP (first, first + (count - 1) * size, size);
891 heapify (first, count - 1, size, 1, compare, aux);
892 assert (count < 1 || is_heap (array, count - 1, size, compare, aux));
895 /* Turns ARRAY, which contains COUNT elements of SIZE bytes, into
896 a heap. Uses COMPARE to compare elements, passing AUX as
899 make_heap (void *array, size_t count, size_t size,
900 algo_compare_func *compare, void *aux)
904 for (idx = count / 2; idx >= 1; idx--)
905 heapify (array, count, size, idx, compare, aux);
906 assert (count < 1 || is_heap (array, count, size, compare, aux));
909 /* ARRAY contains COUNT elements of SIZE bytes each. Initially
910 all COUNT elements form a heap. This function turns the heap
911 into a fully sorted array. Uses COMPARE to compare elements,
912 passing AUX as auxiliary data. */
914 sort_heap (void *array, size_t count, size_t size,
915 algo_compare_func *compare, void *aux)
917 unsigned char *first = array;
920 assert (is_heap (array, count, size, compare, aux));
921 for (idx = count; idx >= 2; idx--)
923 SWAP (first, first + (idx - 1) * size, size);
924 heapify (array, idx - 1, size, 1, compare, aux);
926 assert (is_sorted (array, count, size, compare, aux));
929 /* ARRAY contains COUNT elements of SIZE bytes each. This
930 function tests whether ARRAY is a heap and returns 1 if so, 0
931 otherwise. Uses COMPARE to compare elements, passing AUX as
934 is_heap (const void *array, size_t count, size_t size,
935 algo_compare_func *compare, void *aux)
937 const unsigned char *first = array;
940 for (child = 2; child <= count; child++)
942 size_t parent = child / 2;
943 if (compare (first + (parent - 1) * size,
944 first + (child - 1) * size, aux) < 0)