1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "math/sort.h"
23 #include "data/case.h"
24 #include "data/casereader.h"
25 #include "data/casewriter-provider.h"
26 #include "data/casewriter.h"
27 #include "data/settings.h"
28 #include "data/subcase.h"
29 #include "libpspp/array.h"
30 #include "libpspp/assertion.h"
31 #include "math/merge.h"
33 #include "gl/xalloc.h"
35 /* These should only be changed for testing purposes. */
37 int max_buffers = INT_MAX;
41 struct caseproto *proto;
42 struct subcase ordering;
44 struct pqueue *pqueue;
46 struct casewriter *run;
48 struct ccase *run_end;
51 static struct casewriter_class sort_casewriter_class;
53 static struct pqueue *pqueue_create (const struct subcase *,
54 const struct caseproto *);
55 static void pqueue_destroy (struct pqueue *);
56 static bool pqueue_is_full (const struct pqueue *);
57 static bool pqueue_is_empty (const struct pqueue *);
58 static void pqueue_push (struct pqueue *, struct ccase *, casenumber);
59 static struct ccase *pqueue_pop (struct pqueue *, casenumber *);
61 static void output_record (struct sort_writer *);
64 sort_create_writer (const struct subcase *ordering,
65 const struct caseproto *proto)
67 struct sort_writer *sort;
69 sort = xmalloc (sizeof *sort);
70 sort->proto = caseproto_ref (proto);
71 subcase_clone (&sort->ordering, ordering);
72 sort->merge = merge_create (ordering, proto);
73 sort->pqueue = pqueue_create (ordering, proto);
78 return casewriter_create (proto, &sort_casewriter_class, sort);
82 sort_casewriter_write (struct casewriter *writer UNUSED, void *sort_,
85 struct sort_writer *sort = sort_;
88 if (pqueue_is_full (sort->pqueue))
91 next_run = (sort->run_end == NULL
92 || subcase_compare_3way (&sort->ordering, c,
93 &sort->ordering, sort->run_end) < 0);
94 pqueue_push (sort->pqueue, c, sort->run_id + (next_run ? 1 : 0));
98 sort_casewriter_destroy (struct casewriter *writer UNUSED, void *sort_)
100 struct sort_writer *sort = sort_;
102 subcase_destroy (&sort->ordering);
103 merge_destroy (sort->merge);
104 pqueue_destroy (sort->pqueue);
105 casewriter_destroy (sort->run);
106 case_unref (sort->run_end);
107 caseproto_unref (sort->proto);
111 static struct casereader *
112 sort_casewriter_convert_to_reader (struct casewriter *writer, void *sort_)
114 struct sort_writer *sort = sort_;
115 struct casereader *output;
117 if (sort->run == NULL && sort->run_id == 0)
120 sort->run = mem_writer_create (sort->proto);
123 while (!pqueue_is_empty (sort->pqueue))
124 output_record (sort);
126 merge_append (sort->merge, casewriter_make_reader (sort->run));
129 output = merge_make_reader (sort->merge);
130 sort_casewriter_destroy (writer, sort);
135 output_record (struct sort_writer *sort)
137 struct ccase *min_case;
138 casenumber min_run_id;
140 min_case = pqueue_pop (sort->pqueue, &min_run_id);
142 printf ("\toutput: %f to run %d\n", case_num_idx (min_case, 0), min_run_id);
145 if (sort->run_id != min_run_id && sort->run != NULL)
147 merge_append (sort->merge, casewriter_make_reader (sort->run));
150 if (sort->run == NULL)
152 sort->run = tmpfile_writer_create (sort->proto);
153 sort->run_id = min_run_id;
156 case_unref (sort->run_end);
157 sort->run_end = case_ref (min_case);
158 casewriter_write (sort->run, min_case);
161 static struct casewriter_class sort_casewriter_class =
163 sort_casewriter_write,
164 sort_casewriter_destroy,
165 sort_casewriter_convert_to_reader,
168 /* Reads all the cases from INPUT. Sorts the cases according to
169 ORDERING. Returns the sorted cases in a new casereader.
170 INPUT is destroyed by this function.
173 sort_execute (struct casereader *input, const struct subcase *ordering)
175 struct casewriter *output =
176 sort_create_writer (ordering, casereader_get_proto (input));
177 casereader_transfer (input, output);
178 return casewriter_make_reader (output);
181 /* Reads all the cases from INPUT. Sorts the cases in ascending
182 order according to VARIABLE. Returns the sorted cases in a
183 new casereader. INPUT is destroyed by this function. */
185 sort_execute_1var (struct casereader *input, const struct variable *var)
188 struct casereader *reader;
190 subcase_init_var (&sc, var, SC_ASCEND);
191 reader = sort_execute (input, &sc);
192 subcase_destroy (&sc);
198 struct subcase ordering;
199 struct pqueue_record *records;
200 size_t n_records; /* Current number of records. */
201 size_t allocated_records; /* Space currently allocated for records. */
202 size_t max_records; /* Max space we are willing to allocate. */
213 static int compare_pqueue_records_minheap (const void *a, const void *b,
216 static struct pqueue *
217 pqueue_create (const struct subcase *ordering, const struct caseproto *proto)
221 pq = xmalloc (sizeof *pq);
222 subcase_clone (&pq->ordering, ordering);
223 pq->max_records = settings_get_workspace_cases (proto);
224 if (pq->max_records > max_buffers)
225 pq->max_records = max_buffers;
226 else if (pq->max_records < min_buffers)
227 pq->max_records = min_buffers;
229 pq->allocated_records = 0;
237 pqueue_destroy (struct pqueue *pq)
241 while (!pqueue_is_empty (pq))
244 struct ccase *c = pqueue_pop (pq, &id);
247 subcase_destroy (&pq->ordering);
254 pqueue_is_full (const struct pqueue *pq)
256 return pq->n_records >= pq->max_records;
260 pqueue_is_empty (const struct pqueue *pq)
262 return pq->n_records == 0;
266 pqueue_push (struct pqueue *pq, struct ccase *c, casenumber id)
268 struct pqueue_record *r;
270 assert (!pqueue_is_full (pq));
272 if (pq->n_records >= pq->allocated_records)
274 pq->allocated_records = pq->allocated_records * 2;
275 if (pq->allocated_records < 16)
276 pq->allocated_records = 16;
277 else if (pq->allocated_records > pq->max_records)
278 pq->allocated_records = pq->max_records;
279 pq->records = xrealloc (pq->records,
280 pq->allocated_records * sizeof *pq->records);
283 r = &pq->records[pq->n_records++];
288 push_heap (pq->records, pq->n_records, sizeof *pq->records,
289 compare_pqueue_records_minheap, pq);
292 static struct ccase *
293 pqueue_pop (struct pqueue *pq, casenumber *id)
295 struct pqueue_record *r;
297 assert (!pqueue_is_empty (pq));
299 pop_heap (pq->records, pq->n_records--, sizeof *pq->records,
300 compare_pqueue_records_minheap, pq);
302 r = &pq->records[pq->n_records];
307 /* Compares record-run tuples A and B on id, then on case data,
308 then on insertion order, in descending order. */
310 compare_pqueue_records_minheap (const void *a_, const void *b_,
313 const struct pqueue_record *a = a_;
314 const struct pqueue_record *b = b_;
315 const struct pqueue *pq = pq_;
316 int result = a->id < b->id ? -1 : a->id > b->id;
318 result = subcase_compare_3way (&pq->ordering, a->c, &pq->ordering, b->c);
320 result = a->idx < b->idx ? -1 : a->idx > b->idx;