1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 /* Copyright (C) 2001 Free Software Foundation, Inc.
22 This file is part of the GNU ISO C++ Library. This library is free
23 software; you can redistribute it and/or modify it under the
24 terms of the GNU General Public License as published by the
25 Free Software Foundation; either version 2, or (at your option)
28 This library is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU General Public License for more details.
33 You should have received a copy of the GNU General Public License along
34 with this library; see the file COPYING. If not, write to the Free
35 Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
38 As a special exception, you may use this file as part of a free software
39 library without restriction. Specifically, if other files instantiate
40 templates or use macros or inline functions from this file, or you compile
41 this file and link it with other files to produce an executable, this
42 file does not by itself cause the resulting executable to be covered by
43 the GNU General Public License. This exception does not however
44 invalidate any other reasons why the executable file might be covered by
45 the GNU General Public License. */
50 * Hewlett-Packard Company
52 * Permission to use, copy, modify, distribute and sell this software
53 * and its documentation for any purpose is hereby granted without fee,
54 * provided that the above copyright notice appear in all copies and
55 * that both that copyright notice and this permission notice appear
56 * in supporting documentation. Hewlett-Packard Company makes no
57 * representations about the suitability of this software for any
58 * purpose. It is provided "as is" without express or implied warranty.
62 * Silicon Graphics Computer Systems, Inc.
64 * Permission to use, copy, modify, distribute and sell this software
65 * and its documentation for any purpose is hereby granted without fee,
66 * provided that the above copyright notice appear in all copies and
67 * that both that copyright notice and this permission notice appear
68 * in supporting documentation. Silicon Graphics makes no
69 * representations about the suitability of this software for any
70 * purpose. It is provided "as is" without express or implied warranty.
73 /* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
74 This file is part of the GNU C Library.
75 Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
77 The GNU C Library is free software; you can redistribute it and/or
78 modify it under the terms of the GNU Lesser General Public
79 License as published by the Free Software Foundation; either
80 version 2.1 of the License, or (at your option) any later version.
82 The GNU C Library is distributed in the hope that it will be useful,
83 but WITHOUT ANY WARRANTY; without even the implied warranty of
84 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
85 Lesser General Public License for more details.
87 You should have received a copy of the GNU Lesser General Public
88 License along with the GNU C Library; if not, write to the Free
89 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
93 #include "algorithm.h"
101 /* Finds an element in ARRAY, which contains COUNT elements of
102 SIZE bytes each, using COMPARE for comparisons. Returns the
103 first element in ARRAY that matches TARGET, or a null pointer
104 on failure. AUX is passed to each comparison as auxiliary
106 void *find (const void *array, size_t count, size_t size,
108 algo_compare_func *compare, void *aux)
110 const unsigned char *element = array;
114 if (compare (target, element, aux) == 0)
115 return (void *) element;
123 /* Byte-wise swap two items of size SIZE. */
124 #define SWAP(a, b, size) \
127 register size_t __size = (size); \
128 register char *__a = (a), *__b = (b); \
134 } while (--__size > 0); \
137 /* Makes the elements in ARRAY unique, by moving up duplicates,
138 and returns the new number of elements in the array. Sorted
139 arrays only. Arguments same as for sort() above. */
141 unique (void *array, size_t count, size_t size,
142 algo_compare_func *compare, void *aux)
145 char *last = first + size * count;
146 char *result = array;
154 if (compare (result, first, aux))
158 memcpy (result, first, size);
165 /* Helper function that calls sort(), then unique(). */
167 sort_unique (void *array, size_t count, size_t size,
168 algo_compare_func *compare, void *aux)
170 sort (array, count, size, compare, aux);
171 return unique (array, count, size, compare, aux);
174 /* Reorders ARRAY, which contains COUNT elements of SIZE bytes
175 each, so that the elements for which PREDICATE returns nonzero
176 precede those for which PREDICATE returns zero. AUX is
177 passed to each predicate as auxiliary data. Returns the
178 number of elements for which PREDICATE returns nonzero. Not
181 partition (void *array, size_t count, size_t size,
182 algo_predicate_func *predicate, void *aux)
185 char *last = first + count * size;
189 /* Move FIRST forward to point to first element that fails
195 else if (!predicate (first, aux))
202 /* Move LAST backward to point to last element that passes
210 else if (predicate (last, aux))
216 /* By swapping FIRST and LAST we extend the starting and
217 ending sequences that pass and fail, respectively,
219 SWAP (first, last, size);
224 /* A algo_random_func that uses random.h. */
226 algo_default_random (unsigned max, void *aux unused)
228 return rng_get_unsigned (pspp_rng ()) % max;
231 /* Randomly reorders ARRAY, which contains COUNT elements of SIZE
232 bytes each. Uses RANDOM as a source of random data, passing
233 AUX as the auxiliary data. RANDOM may be null to use a
234 default random source. */
236 random_shuffle (void *array_, size_t count, size_t size,
237 algo_random_func *random, void *aux)
239 unsigned char *array = array_;
243 random = algo_default_random;
245 for (i = 1; i < count; i++)
246 SWAP (array + i * size, array + random (i + 1, aux) * size, size);
249 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
250 RESULT, except that elements for which PREDICATE is false are
251 not copied. Returns the number of elements copied. AUX is
252 passed to PREDICATE as auxiliary data. */
254 copy_if (const void *array, size_t count, size_t size,
256 algo_predicate_func *predicate, void *aux)
258 const unsigned char *input = array;
259 const unsigned char *last = input + size * count;
260 unsigned char *output = result;
264 if (predicate (input, aux))
266 memcpy (output, input, size);
278 /* A predicate and its auxiliary data. */
281 algo_predicate_func *predicate;
286 not (const void *data, void *pred_aux_)
288 const struct pred_aux *pred_aux = pred_aux_;
290 return !pred_aux->predicate (data, pred_aux->aux);
293 /* Removes elements equal to ELEMENT from ARRAY, which consists
294 of COUNT elements of SIZE bytes each. Returns the number of
295 remaining elements. AUX is passed to COMPARE as auxiliary
298 remove_equal (void *array, size_t count, size_t size,
300 algo_compare_func *compare, void *aux)
302 unsigned char *first = array;
303 unsigned char *last = first + count * size;
304 unsigned char *result;
310 if (compare (first, element, aux) == 0)
324 if (compare (first, element, aux) == 0)
330 memcpy (result, first, size);
337 /* Copies the COUNT elements of SIZE bytes each from ARRAY to
338 RESULT, except that elements for which PREDICATE is true are
339 not copied. Returns the number of elements copied. AUX is
340 passed to PREDICATE as auxiliary data. */
342 remove_copy_if (const void *array, size_t count, size_t size,
344 algo_predicate_func *predicate, void *aux)
346 struct pred_aux pred_aux;
347 pred_aux.predicate = predicate;
349 return copy_if (array, count, size, result, not, &pred_aux);
352 /* Searches ARRAY, which contains COUNT of SIZE bytes each, using
353 a binary search. Returns any element that equals VALUE, if
354 one exists, or a null pointer otherwise. ARRAY must ordered
355 according to COMPARE. AUX is passed to COMPARE as auxiliary
358 binary_search (const void *array, size_t count, size_t size,
360 algo_compare_func *compare, void *aux)
362 assert (array != NULL);
363 assert (count <= INT_MAX);
364 assert (compare != NULL);
368 const unsigned char *first = array;
370 int high = count - 1;
374 int middle = (low + high) / 2;
375 const unsigned char *element = first + middle * size;
376 int cmp = compare (value, element, aux);
383 return (void *) element;
389 /* Lexicographically compares ARRAY1, which contains COUNT1
390 elements of SIZE bytes each, to ARRAY2, which contains COUNT2
391 elements of SIZE bytes, according to COMPARE. Returns a
392 strcmp()-type result. AUX is passed to COMPARE as auxiliary
395 lexicographical_compare (const void *array1, size_t count1,
396 const void *array2, size_t count2,
398 algo_compare_func *compare, void *aux)
400 const unsigned char *first1 = array1;
401 const unsigned char *first2 = array2;
402 size_t min_count = count1 < count2 ? count1 : count2;
404 while (min_count > 0)
406 int cmp = compare (first1, first2, aux);
415 return count1 < count2 ? -1 : count1 > count2;
418 /* If you consider tuning this algorithm, you should consult first:
419 Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
420 Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
427 /* Discontinue quicksort algorithm when partition gets below this size.
428 This particular magic number was chosen to work best on a Sun 4/260. */
431 /* Stack node declarations used to store unfulfilled partition obligations. */
438 /* The next 4 #defines implement a very fast in-line stack abstraction. */
439 /* The stack needs log (total_elements) entries (we could even subtract
440 log(MAX_THRESH)). Since total_elements has type size_t, we get as
441 upper bound for log (total_elements):
442 bits per byte (CHAR_BIT) * sizeof(size_t). */
443 #define STACK_SIZE (CHAR_BIT * sizeof(size_t))
444 #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
445 #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
446 #define STACK_NOT_EMPTY (stack < top)
449 /* Order size using quicksort. This implementation incorporates
450 four optimizations discussed in Sedgewick:
452 1. Non-recursive, using an explicit stack of pointer that store the
453 next array partition to sort. To save time, this maximum amount
454 of space required to store an array of SIZE_MAX is allocated on the
455 stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
456 only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
457 Pretty cheap, actually.
459 2. Chose the pivot element using a median-of-three decision tree.
460 This reduces the probability of selecting a bad pivot value and
461 eliminates certain extraneous comparisons.
463 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
464 insertion sort to order the MAX_THRESH items within each partition.
465 This is a big win, since insertion sort is faster for small, mostly
466 sorted array segments.
468 4. The larger of the two sub-partitions is always pushed onto the
469 stack first, with the algorithm then concentrating on the
470 smaller partition. This *guarantees* no more than log (total_elems)
471 stack size is needed (actually O(1) in this case)! */
474 sort (void *const pbase, size_t total_elems, size_t size,
475 algo_compare_func *cmp, void *aux)
477 register char *base_ptr = (char *) pbase;
479 const size_t max_thresh = MAX_THRESH * size;
481 if (total_elems == 0)
482 /* Avoid lossage with unsigned arithmetic below. */
485 if (total_elems > MAX_THRESH)
488 char *hi = &lo[size * (total_elems - 1)];
489 stack_node stack[STACK_SIZE];
490 stack_node *top = stack + 1;
492 while (STACK_NOT_EMPTY)
497 /* Select median value from among LO, MID, and HI. Rearrange
498 LO and HI so the three values are sorted. This lowers the
499 probability of picking a pathological pivot value and
500 skips a comparison for both the LEFT_PTR and RIGHT_PTR in
503 char *mid = lo + size * ((hi - lo) / size >> 1);
505 if ((*cmp) ((void *) mid, (void *) lo, aux) < 0)
506 SWAP (mid, lo, size);
507 if ((*cmp) ((void *) hi, (void *) mid, aux) < 0)
508 SWAP (mid, hi, size);
511 if ((*cmp) ((void *) mid, (void *) lo, aux) < 0)
512 SWAP (mid, lo, size);
515 left_ptr = lo + size;
516 right_ptr = hi - size;
518 /* Here's the famous ``collapse the walls'' section of quicksort.
519 Gotta like those tight inner loops! They are the main reason
520 that this algorithm runs much faster than others. */
523 while ((*cmp) ((void *) left_ptr, (void *) mid, aux) < 0)
526 while ((*cmp) ((void *) mid, (void *) right_ptr, aux) < 0)
529 if (left_ptr < right_ptr)
531 SWAP (left_ptr, right_ptr, size);
534 else if (mid == right_ptr)
539 else if (left_ptr == right_ptr)
546 while (left_ptr <= right_ptr);
548 /* Set up pointers for next iteration. First determine whether
549 left and right partitions are below the threshold size. If so,
550 ignore one or both. Otherwise, push the larger partition's
551 bounds on the stack and continue sorting the smaller one. */
553 if ((size_t) (right_ptr - lo) <= max_thresh)
555 if ((size_t) (hi - left_ptr) <= max_thresh)
556 /* Ignore both small partitions. */
559 /* Ignore small left partition. */
562 else if ((size_t) (hi - left_ptr) <= max_thresh)
563 /* Ignore small right partition. */
565 else if ((right_ptr - lo) > (hi - left_ptr))
567 /* Push larger left partition indices. */
568 PUSH (lo, right_ptr);
573 /* Push larger right partition indices. */
580 /* Once the BASE_PTR array is partially sorted by quicksort the rest
581 is completely sorted using insertion sort, since this is efficient
582 for partitions below MAX_THRESH size. BASE_PTR points to the beginning
583 of the array to sort, and END_PTR points at the very last element in
584 the array (*not* one beyond it!). */
586 #define min(x, y) ((x) < (y) ? (x) : (y))
589 char *const end_ptr = &base_ptr[size * (total_elems - 1)];
590 char *tmp_ptr = base_ptr;
591 char *thresh = min(end_ptr, base_ptr + max_thresh);
592 register char *run_ptr;
594 /* Find smallest element in first threshold and place it at the
595 array's beginning. This is the smallest array element,
596 and the operation speeds up insertion sort's inner loop. */
598 for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
599 if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, aux) < 0)
602 if (tmp_ptr != base_ptr)
603 SWAP (tmp_ptr, base_ptr, size);
605 /* Insertion sort, running from left-hand-side up to right-hand-side. */
607 run_ptr = base_ptr + size;
608 while ((run_ptr += size) <= end_ptr)
610 tmp_ptr = run_ptr - size;
611 while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, aux) < 0)
615 if (tmp_ptr != run_ptr)
619 trav = run_ptr + size;
620 while (--trav >= run_ptr)
625 for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
634 /* Computes the generalized set difference, ARRAY1 minus ARRAY2,
635 into RESULT, and returns the number of elements written to
636 RESULT. If a value appears M times in ARRAY1 and N times in
637 ARRAY2, then it will appear max(M - N, 0) in RESULT. ARRAY1
638 and ARRAY2 must be sorted, and RESULT is sorted and stable.
639 ARRAY1 consists of COUNT1 elements, ARRAY2 of COUNT2 elements,
640 each SIZE bytes. AUX is passed to COMPARE as auxiliary
642 size_t set_difference (const void *array1, size_t count1,
643 const void *array2, size_t count2,
646 algo_compare_func *compare, void *aux)
648 const unsigned char *first1 = array1;
649 const unsigned char *last1 = first1 + count1 * size;
650 const unsigned char *first2 = array2;
651 const unsigned char *last2 = first2 + count2 * size;
652 unsigned char *result = result_;
653 size_t result_count = 0;
655 while (first1 != last1 && first2 != last2)
657 int cmp = compare (first1, first2, aux);
660 memcpy (result, first1, size);
674 while (first1 != last1)
676 memcpy (result, first1, size);
685 /* Finds the first pair of adjacent equal elements in ARRAY,
686 which has COUNT elements of SIZE bytes. Returns the first
687 element in ARRAY such that COMPARE returns zero when it and
688 its successor element are compared, or a null pointer if no
689 such element exists. AUX is passed to COMPARE as auxiliary
692 adjacent_find_equal (const void *array, size_t count, size_t size,
693 algo_compare_func *compare, void *aux)
695 const unsigned char *first = array;
696 const unsigned char *last = first + count * size;
698 while (first < last && first + size < last)
700 if (compare (first, first + size, aux) == 0)
701 return (void *) first;