1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009-12, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "math/sort.h"
23 #include "data/case.h"
24 #include "data/casereader.h"
25 #include "data/casewriter-provider.h"
26 #include "data/casewriter.h"
27 #include "data/settings.h"
28 #include "data/subcase.h"
29 #include "libpspp/array.h"
30 #include "libpspp/assertion.h"
31 #include "libpspp/bt.h"
32 #include "math/merge.h"
34 #include "gl/xalloc.h"
37 #define _(msgid) gettext (msgid)
39 /* These should only be changed for testing purposes. */
41 int max_buffers = INT_MAX;
45 struct caseproto *proto;
46 struct subcase ordering;
48 struct pqueue *pqueue;
50 sort_distinct_combine_func *combine;
51 sort_distinct_destroy_func *destroy;
54 struct casewriter *run;
56 struct ccase *run_end;
59 static struct casewriter_class sort_casewriter_class;
61 static struct pqueue *pqueue_create (const struct subcase *,
62 const struct caseproto *,
63 sort_distinct_combine_func *, void *aux);
64 static void pqueue_destroy (struct pqueue *);
65 static bool pqueue_is_full (const struct pqueue *);
66 static bool pqueue_is_empty (const struct pqueue *);
67 static void pqueue_push (struct pqueue *, struct ccase *, casenumber);
68 static struct ccase *pqueue_pop (struct pqueue *, casenumber *);
70 static void output_record (struct sort_writer *);
73 sort_create_writer (const struct subcase *ordering,
74 const struct caseproto *proto)
76 return sort_distinct_create_writer (ordering, proto, NULL, NULL, NULL);
80 sort_distinct_create_writer (const struct subcase *ordering,
81 const struct caseproto *proto,
82 sort_distinct_combine_func *combine,
83 sort_distinct_destroy_func *destroy,
86 struct sort_writer *sort;
88 sort = xmalloc (sizeof *sort);
89 sort->proto = caseproto_ref (proto);
90 subcase_clone (&sort->ordering, ordering);
91 sort->merge = merge_create (ordering, proto, combine, aux);
92 sort->pqueue = pqueue_create (ordering, proto, combine, aux);
94 sort->combine = combine;
95 sort->destroy = destroy;
100 sort->run_end = NULL;
102 return casewriter_create (proto, &sort_casewriter_class, sort);
106 sort_casewriter_write (struct casewriter *writer UNUSED, void *sort_,
109 struct sort_writer *sort = sort_;
112 if (pqueue_is_full (sort->pqueue))
113 output_record (sort);
115 next_run = (sort->run_end == NULL
116 || subcase_compare_3way (&sort->ordering, c,
117 &sort->ordering, sort->run_end) < 0);
118 pqueue_push (sort->pqueue, c, sort->run_id + (next_run ? 1 : 0));
122 sort_casewriter_destroy (struct casewriter *writer UNUSED, void *sort_)
124 struct sort_writer *sort = sort_;
126 if (sort->destroy != NULL)
127 sort->destroy (sort->aux);
129 subcase_destroy (&sort->ordering);
130 merge_destroy (sort->merge);
131 pqueue_destroy (sort->pqueue);
132 casewriter_destroy (sort->run);
133 case_unref (sort->run_end);
134 caseproto_unref (sort->proto);
138 static struct casereader *
139 sort_casewriter_convert_to_reader (struct casewriter *writer, void *sort_)
141 struct sort_writer *sort = sort_;
142 struct casereader *output;
144 if (sort->run == NULL && sort->run_id == 0)
147 sort->run = mem_writer_create (sort->proto);
150 while (!pqueue_is_empty (sort->pqueue))
151 output_record (sort);
153 merge_append (sort->merge, casewriter_make_reader (sort->run));
156 output = merge_make_reader (sort->merge);
157 sort_casewriter_destroy (writer, sort);
162 output_record (struct sort_writer *sort)
164 struct ccase *min_case;
165 casenumber min_run_id;
167 min_case = pqueue_pop (sort->pqueue, &min_run_id);
169 printf ("\toutput: %f to run %d\n", case_num_idx (min_case, 0), min_run_id);
172 if (sort->run_id != min_run_id && sort->run != NULL)
174 merge_append (sort->merge, casewriter_make_reader (sort->run));
177 if (sort->run == NULL)
179 sort->run = tmpfile_writer_create (sort->proto);
180 sort->run_id = min_run_id;
183 case_unref (sort->run_end);
184 sort->run_end = case_ref (min_case);
185 casewriter_write (sort->run, min_case);
188 static struct casewriter_class sort_casewriter_class =
190 sort_casewriter_write,
191 sort_casewriter_destroy,
192 sort_casewriter_convert_to_reader,
195 /* Reads all the cases from INPUT. Sorts the cases according to
196 ORDERING. Returns the sorted cases in a new casereader.
197 INPUT is destroyed by this function.
200 sort_execute (struct casereader *input, const struct subcase *ordering)
202 struct casewriter *output =
203 sort_create_writer (ordering, casereader_get_proto (input));
204 casereader_transfer (input, output);
205 return casewriter_make_reader (output);
208 /* Reads all the cases from INPUT. Sorts the cases in ascending
209 order according to VARIABLE. Returns the sorted cases in a
210 new casereader. INPUT is destroyed by this function. */
212 sort_execute_1var (struct casereader *input, const struct variable *var)
215 struct casereader *reader;
217 subcase_init_var (&sc, var, SC_ASCEND);
218 reader = sort_execute (input, &sc);
219 subcase_destroy (&sc);
225 struct subcase ordering;
230 sort_distinct_combine_func *combine;
236 struct bt_node bt_node;
242 static int compare_pqueue_records (const struct bt_node *a,
243 const struct bt_node *b,
244 const void *ordering);
246 static struct pqueue *
247 pqueue_create (const struct subcase *ordering, const struct caseproto *proto,
248 sort_distinct_combine_func *combine, void *aux)
252 pq = xmalloc (sizeof *pq);
253 subcase_clone (&pq->ordering, ordering);
254 pq->record_max = settings_get_workspace_cases (proto);
255 if (pq->record_max > max_buffers)
256 pq->record_max = max_buffers;
257 else if (pq->record_max < min_buffers)
258 pq->record_max = min_buffers;
259 bt_init (&pq->bt, compare_pqueue_records, &pq->ordering);
262 pq->combine = combine;
269 pqueue_destroy (struct pqueue *pq)
273 while (!pqueue_is_empty (pq))
276 struct ccase *c = pqueue_pop (pq, &id);
279 subcase_destroy (&pq->ordering);
285 pqueue_is_full (const struct pqueue *pq)
287 return bt_count (&pq->bt) >= pq->record_max;
291 pqueue_is_empty (const struct pqueue *pq)
293 return bt_is_empty (&pq->bt);
297 pqueue_push (struct pqueue *pq, struct ccase *c, casenumber id)
299 struct pqueue_record *r;
301 assert (!pqueue_is_full (pq));
303 r = xmalloc (sizeof *r);
307 bt_insert (&pq->bt, &r->bt_node);
309 if (pq->combine != NULL)
311 struct bt_node *q_ = bt_prev (&pq->bt, &r->bt_node);
314 struct pqueue_record *q = bt_data (q_, struct pqueue_record,
316 if (q->id == r->id && subcase_equal (&pq->ordering, q->c,
317 &pq->ordering, r->c))
319 bt_delete (&pq->bt, &r->bt_node);
320 q->c = pq->combine (q->c, r->c, pq->aux);
327 static struct ccase *
328 pqueue_pop (struct pqueue *pq, casenumber *id)
330 struct pqueue_record *r;
333 assert (!pqueue_is_empty (pq));
335 r = bt_data (bt_first (&pq->bt), struct pqueue_record, bt_node);
336 bt_delete (&pq->bt, &r->bt_node);
343 /* Compares record-run tuples A and B on id, then on case data,
344 then on insertion order. */
346 compare_pqueue_records (const struct bt_node *a_, const struct bt_node *b_,
347 const void *ordering_)
349 const struct pqueue_record *a = bt_data (a_, struct pqueue_record, bt_node);
350 const struct pqueue_record *b = bt_data (b_, struct pqueue_record, bt_node);
351 const struct subcase *ordering = ordering_;
352 int result = a->id < b->id ? -1 : a->id > b->id;
354 result = subcase_compare_3way (ordering, a->c, ordering, b->c);
356 result = a->idx < b->idx ? -1 : a->idx > b->idx;