1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 #include <data/case-ordering.h>
26 #include <data/case.h>
27 #include <data/casereader.h>
28 #include <data/casewriter.h>
29 #include <data/casewriter-provider.h>
30 #include <data/settings.h>
31 #include <libpspp/alloc.h>
32 #include <libpspp/array.h>
33 #include <libpspp/assertion.h>
34 #include <math/merge.h>
37 #define _(msgid) gettext (msgid)
39 /* These should only be changed for testing purposes. */
41 int max_buffers = INT_MAX;
45 struct case_ordering *ordering;
47 struct pqueue *pqueue;
49 struct casewriter *run;
54 static struct casewriter_class sort_casewriter_class;
56 static struct pqueue *pqueue_create (const struct case_ordering *);
57 static void pqueue_destroy (struct pqueue *);
58 static bool pqueue_is_full (const struct pqueue *);
59 static bool pqueue_is_empty (const struct pqueue *);
60 static void pqueue_push (struct pqueue *, struct ccase *, casenumber);
61 static void pqueue_pop (struct pqueue *, struct ccase *, casenumber *);
63 static void output_record (struct sort_writer *);
66 sort_create_writer (struct case_ordering *ordering)
68 struct sort_writer *sort;
70 sort = xmalloc (sizeof *sort);
71 sort->ordering = case_ordering_clone (ordering);
72 sort->merge = merge_create (ordering);
73 sort->pqueue = pqueue_create (ordering);
76 case_nullify (&sort->run_end);
78 case_ordering_destroy (ordering);
80 return casewriter_create (&sort_casewriter_class, sort);
84 sort_casewriter_write (struct casewriter *writer UNUSED, void *sort_,
87 struct sort_writer *sort = sort_;
90 if (pqueue_is_full (sort->pqueue))
93 next_run = (case_is_null (&sort->run_end)
94 || case_ordering_compare_cases (c, &sort->run_end,
96 pqueue_push (sort->pqueue, c, sort->run_id + (next_run ? 1 : 0));
100 sort_casewriter_destroy (struct casewriter *writer UNUSED, void *sort_)
102 struct sort_writer *sort = sort_;
104 case_ordering_destroy (sort->ordering);
105 merge_destroy (sort->merge);
106 pqueue_destroy (sort->pqueue);
107 casewriter_destroy (sort->run);
108 case_destroy (&sort->run_end);
112 static struct casereader *
113 sort_casewriter_convert_to_reader (struct casewriter *writer, void *sort_)
115 struct sort_writer *sort = sort_;
116 struct casereader *output;
118 if (sort->run == NULL && sort->run_id == 0)
121 sort->run = mem_writer_create (case_ordering_get_value_cnt (
125 while (!pqueue_is_empty (sort->pqueue))
126 output_record (sort);
128 merge_append (sort->merge, casewriter_make_reader (sort->run));
131 output = merge_make_reader (sort->merge);
132 sort_casewriter_destroy (writer, sort);
137 output_record (struct sort_writer *sort)
139 struct ccase min_case;
140 casenumber min_run_id;
142 pqueue_pop (sort->pqueue, &min_case, &min_run_id);
144 printf ("\toutput: %f to run %d\n", case_num_idx (&min_case, 0), min_run_id);
147 if (sort->run_id != min_run_id && sort->run != NULL)
149 merge_append (sort->merge, casewriter_make_reader (sort->run));
152 if (sort->run == NULL)
154 sort->run = tmpfile_writer_create (case_ordering_get_value_cnt (
156 sort->run_id = min_run_id;
159 case_destroy (&sort->run_end);
160 case_clone (&sort->run_end, &min_case);
162 casewriter_write (sort->run, &min_case);
165 static struct casewriter_class sort_casewriter_class =
167 sort_casewriter_write,
168 sort_casewriter_destroy,
169 sort_casewriter_convert_to_reader,
172 /* Reads all the cases from INPUT. Sorts the cases according to
173 ORDERING. Returns the sorted cases in a new casereader, or a
174 null pointer if an I/O error occurs. Both INPUT and ORDERING
175 are destroyed upon return, regardless of success. */
177 sort_execute (struct casereader *input, struct case_ordering *ordering)
179 struct casewriter *output = sort_create_writer (ordering);
180 casereader_transfer (input, output);
181 return casewriter_make_reader (output);
186 struct case_ordering *ordering;
187 struct pqueue_record *records;
200 static int compare_pqueue_records_minheap (const void *a, const void *b,
203 static struct pqueue *
204 pqueue_create (const struct case_ordering *ordering)
208 pq = xmalloc (sizeof *pq);
209 pq->ordering = case_ordering_clone (ordering);
211 = get_workspace_cases (case_ordering_get_value_cnt (ordering));
212 if (pq->record_cap > max_buffers)
213 pq->record_cap = max_buffers;
214 else if (pq->record_cap < min_buffers)
215 pq->record_cap = min_buffers;
217 pq->records = xnmalloc (pq->record_cap, sizeof *pq->records);
224 pqueue_destroy (struct pqueue *pq)
228 while (!pqueue_is_empty (pq))
232 pqueue_pop (pq, &c, &id);
235 case_ordering_destroy (pq->ordering);
242 pqueue_is_full (const struct pqueue *pq)
244 return pq->record_cnt >= pq->record_cap;
248 pqueue_is_empty (const struct pqueue *pq)
250 return pq->record_cnt == 0;
254 pqueue_push (struct pqueue *pq, struct ccase *c, casenumber id)
256 struct pqueue_record *r;
258 assert (!pqueue_is_full (pq));
260 r = &pq->records[pq->record_cnt++];
262 case_move (&r->c, c);
265 push_heap (pq->records, pq->record_cnt, sizeof *pq->records,
266 compare_pqueue_records_minheap, pq);
270 pqueue_pop (struct pqueue *pq, struct ccase *c, casenumber *id)
272 struct pqueue_record *r;
274 assert (!pqueue_is_empty (pq));
276 pop_heap (pq->records, pq->record_cnt--, sizeof *pq->records,
277 compare_pqueue_records_minheap, pq);
279 r = &pq->records[pq->record_cnt];
281 case_move (c, &r->c);
284 /* Compares record-run tuples A and B on id, then on case data,
285 then on insertion order, in descending order. */
287 compare_pqueue_records_minheap (const void *a_, const void *b_,
290 const struct pqueue_record *a = a_;
291 const struct pqueue_record *b = b_;
292 const struct pqueue *pq = pq_;
293 int result = a->id < b->id ? -1 : a->id > b->id;
295 result = case_ordering_compare_cases (&a->c, &b->c, pq->ordering);
297 result = a->idx < b->idx ? -1 : a->idx > b->idx;