X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Fsort.c;h=9c2118985c59583bfe42d66ed5152452e3417d3b;hb=86189d786fc34c9da174b5018bec27640f7062e4;hp=10b8a12cec59d76173a39a15532a53dbec62e306;hpb=f6b8e421cd29ae0aef4018bbe2e1a06d0bef57df;p=pspp diff --git a/src/math/sort.c b/src/math/sort.c index 10b8a12cec..9c2118985c 100644 --- a/src/math/sort.c +++ b/src/math/sort.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,21 +16,21 @@ #include -#include "sort.h" +#include "math/sort.h" #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "data/case.h" +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/casewriter-provider.h" +#include "data/settings.h" +#include "data/subcase.h" +#include "libpspp/assertion.h" +#include "libpspp/bt.h" +#include "math/merge.h" -#include "xalloc.h" +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -41,44 +41,64 @@ int max_buffers = INT_MAX; struct sort_writer { - size_t value_cnt; - struct case_ordering *ordering; + struct caseproto *proto; + struct subcase ordering; struct merge *merge; struct pqueue *pqueue; + sort_distinct_combine_func *combine; + sort_distinct_destroy_func *destroy; + void *aux; + struct casewriter *run; casenumber run_id; - struct ccase run_end; + struct ccase *run_end; }; static struct casewriter_class sort_casewriter_class; -static struct pqueue *pqueue_create (const struct case_ordering *, size_t); +static struct pqueue *pqueue_create (const struct subcase *, + const struct caseproto *, + sort_distinct_combine_func *, void *aux); static void pqueue_destroy (struct pqueue *); static bool pqueue_is_full (const struct pqueue *); static bool pqueue_is_empty (const struct pqueue *); static void pqueue_push (struct pqueue *, struct ccase *, casenumber); -static void pqueue_pop (struct pqueue *, struct ccase *, casenumber *); +static struct ccase *pqueue_pop (struct pqueue *, casenumber *); static void output_record (struct sort_writer *); struct casewriter * -sort_create_writer (struct case_ordering *ordering, size_t value_cnt) +sort_create_writer (const struct subcase *ordering, + const struct caseproto *proto) +{ + return sort_distinct_create_writer (ordering, proto, NULL, NULL, NULL); +} + +struct casewriter * +sort_distinct_create_writer (const struct subcase *ordering, + const struct caseproto *proto, + sort_distinct_combine_func *combine, + sort_distinct_destroy_func *destroy, + void *aux) { struct sort_writer *sort; sort = xmalloc (sizeof *sort); - sort->value_cnt = value_cnt; - sort->ordering = case_ordering_clone (ordering); - sort->merge = merge_create (ordering, value_cnt); - sort->pqueue = pqueue_create (ordering, value_cnt); + sort->proto = caseproto_ref (proto); + subcase_clone (&sort->ordering, ordering); + sort->merge = merge_create (ordering, proto, combine, aux); + sort->pqueue = pqueue_create (ordering, proto, combine, aux); + + sort->combine = combine; + sort->destroy = destroy; + sort->aux = aux; + sort->run = NULL; sort->run_id = 0; - case_nullify (&sort->run_end); - - case_ordering_destroy (ordering); + sort->run_end = NULL; - return casewriter_create (value_cnt, &sort_casewriter_class, sort); + return casewriter_create (proto, &sort_casewriter_class, sort); } static void @@ -91,9 +111,9 @@ sort_casewriter_write (struct casewriter *writer UNUSED, void *sort_, if (pqueue_is_full (sort->pqueue)) output_record (sort); - next_run = (case_is_null (&sort->run_end) - || case_ordering_compare_cases (c, &sort->run_end, - sort->ordering) < 0); + next_run = (sort->run_end == NULL + || subcase_compare_3way (&sort->ordering, c, + &sort->ordering, sort->run_end) < 0); pqueue_push (sort->pqueue, c, sort->run_id + (next_run ? 1 : 0)); } @@ -102,11 +122,15 @@ sort_casewriter_destroy (struct casewriter *writer UNUSED, void *sort_) { struct sort_writer *sort = sort_; - case_ordering_destroy (sort->ordering); + if (sort->destroy != NULL) + sort->destroy (sort->aux); + + subcase_destroy (&sort->ordering); merge_destroy (sort->merge); pqueue_destroy (sort->pqueue); casewriter_destroy (sort->run); - case_destroy (&sort->run_end); + case_unref (sort->run_end); + caseproto_unref (sort->proto); free (sort); } @@ -119,7 +143,7 @@ sort_casewriter_convert_to_reader (struct casewriter *writer, void *sort_) if (sort->run == NULL && sort->run_id == 0) { /* In-core sort. */ - sort->run = mem_writer_create (casewriter_get_value_cnt (writer)); + sort->run = mem_writer_create (sort->proto); sort->run_id = 1; } while (!pqueue_is_empty (sort->pqueue)) @@ -136,12 +160,12 @@ sort_casewriter_convert_to_reader (struct casewriter *writer, void *sort_) static void output_record (struct sort_writer *sort) { - struct ccase min_case; + struct ccase *min_case; casenumber min_run_id; - pqueue_pop (sort->pqueue, &min_case, &min_run_id); + min_case = pqueue_pop (sort->pqueue, &min_run_id); #if 0 - printf ("\toutput: %f to run %d\n", case_num_idx (&min_case, 0), min_run_id); + printf ("\toutput: %f to run %d\n", case_num_idx (min_case, 0), min_run_id); #endif if (sort->run_id != min_run_id && sort->run != NULL) @@ -151,14 +175,13 @@ output_record (struct sort_writer *sort) } if (sort->run == NULL) { - sort->run = tmpfile_writer_create (sort->value_cnt); + sort->run = tmpfile_writer_create (sort->proto); sort->run_id = min_run_id; } - case_destroy (&sort->run_end); - case_clone (&sort->run_end, &min_case); - - casewriter_write (sort->run, &min_case); + case_unref (sort->run_end); + sort->run_end = case_ref (min_case); + casewriter_write (sort->run, min_case); } static struct casewriter_class sort_casewriter_class = @@ -169,54 +192,73 @@ static struct casewriter_class sort_casewriter_class = }; /* Reads all the cases from INPUT. Sorts the cases according to - ORDERING. Returns the sorted cases in a new casereader, or a - null pointer if an I/O error occurs. Both INPUT and ORDERING - are destroyed upon return, regardless of success. */ + ORDERING. Returns the sorted cases in a new casereader. */ struct casereader * -sort_execute (struct casereader *input, struct case_ordering *ordering) +sort_execute (struct casereader *input, const struct subcase *ordering) { struct casewriter *output = - sort_create_writer (ordering, casereader_get_value_cnt (input)); + sort_create_writer (ordering, casereader_get_proto (input)); casereader_transfer (input, output); return casewriter_make_reader (output); } + +/* Reads all the cases from INPUT. Sorts the cases in ascending + order according to VARIABLE. Returns the sorted cases in a + new casereader. */ +struct casereader * +sort_execute_1var (struct casereader *input, const struct variable *var) +{ + struct subcase sc; + struct casereader *reader; + + subcase_init_var (&sc, var, SC_ASCEND); + reader = sort_execute (input, &sc); + subcase_destroy (&sc); + return reader; +} struct pqueue { - struct case_ordering *ordering; - struct pqueue_record *records; - size_t record_cnt; + struct subcase ordering; + struct bt bt; size_t record_cap; casenumber idx; + + sort_distinct_combine_func *combine; + void *aux; }; struct pqueue_record { + struct bt_node bt_node; casenumber id; - struct ccase c; + struct ccase *c; casenumber idx; }; -static int compare_pqueue_records_minheap (const void *a, const void *b, - const void *pq_); +static int compare_pqueue_records (const struct bt_node *a, + const struct bt_node *b, + const void *ordering); static struct pqueue * -pqueue_create (const struct case_ordering *ordering, size_t value_cnt) +pqueue_create (const struct subcase *ordering, const struct caseproto *proto, + sort_distinct_combine_func *combine, void *aux) { struct pqueue *pq; pq = xmalloc (sizeof *pq); - pq->ordering = case_ordering_clone (ordering); - pq->record_cap - = settings_get_workspace_cases (value_cnt); + subcase_clone (&pq->ordering, ordering); + pq->record_cap = settings_get_workspace_cases (proto); if (pq->record_cap > max_buffers) pq->record_cap = max_buffers; else if (pq->record_cap < min_buffers) pq->record_cap = min_buffers; - pq->record_cnt = 0; - pq->records = xnmalloc (pq->record_cap, sizeof *pq->records); + bt_init (&pq->bt, compare_pqueue_records, &pq->ordering); pq->idx = 0; + pq->combine = combine; + pq->aux = aux; + return pq; } @@ -227,13 +269,11 @@ pqueue_destroy (struct pqueue *pq) { while (!pqueue_is_empty (pq)) { - struct ccase c; casenumber id; - pqueue_pop (pq, &c, &id); - case_destroy (&c); + struct ccase *c = pqueue_pop (pq, &id); + case_unref (c); } - case_ordering_destroy (pq->ordering); - free (pq->records); + subcase_destroy (&pq->ordering); free (pq); } } @@ -241,13 +281,13 @@ pqueue_destroy (struct pqueue *pq) static bool pqueue_is_full (const struct pqueue *pq) { - return pq->record_cnt >= pq->record_cap; + return bt_count (&pq->bt) >= pq->record_cap; } static bool pqueue_is_empty (const struct pqueue *pq) { - return pq->record_cnt == 0; + return bt_is_empty (&pq->bt); } static void @@ -257,43 +297,59 @@ pqueue_push (struct pqueue *pq, struct ccase *c, casenumber id) assert (!pqueue_is_full (pq)); - r = &pq->records[pq->record_cnt++]; + r = xmalloc (sizeof *r); r->id = id; - case_move (&r->c, c); + r->c = c; r->idx = pq->idx++; + bt_insert (&pq->bt, &r->bt_node); - push_heap (pq->records, pq->record_cnt, sizeof *pq->records, - compare_pqueue_records_minheap, pq); + if (pq->combine != NULL) + { + struct bt_node *q_ = bt_prev (&pq->bt, &r->bt_node); + if (q_ != NULL) + { + struct pqueue_record *q = bt_data (q_, struct pqueue_record, + bt_node); + if (q->id == r->id && subcase_equal (&pq->ordering, q->c, + &pq->ordering, r->c)) + { + bt_delete (&pq->bt, &r->bt_node); + free (r); + q->c = pq->combine (q->c, r->c, pq->aux); + } + } + } } -static void -pqueue_pop (struct pqueue *pq, struct ccase *c, casenumber *id) +static struct ccase * +pqueue_pop (struct pqueue *pq, casenumber *id) { struct pqueue_record *r; + struct ccase *c; assert (!pqueue_is_empty (pq)); - pop_heap (pq->records, pq->record_cnt--, sizeof *pq->records, - compare_pqueue_records_minheap, pq); - - r = &pq->records[pq->record_cnt]; + r = bt_data (bt_first (&pq->bt), struct pqueue_record, bt_node); + bt_delete (&pq->bt, &r->bt_node); *id = r->id; - case_move (c, &r->c); + c = r->c; + free (r); + return c; } /* Compares record-run tuples A and B on id, then on case data, - then on insertion order, in descending order. */ + then on insertion order. */ static int -compare_pqueue_records_minheap (const void *a_, const void *b_, - const void *pq_) +compare_pqueue_records (const struct bt_node *a_, const struct bt_node *b_, + const void *ordering_) { - const struct pqueue_record *a = a_; - const struct pqueue_record *b = b_; - const struct pqueue *pq = pq_; + const struct pqueue_record *a = bt_data (a_, struct pqueue_record, bt_node); + const struct pqueue_record *b = bt_data (b_, struct pqueue_record, bt_node); + const struct subcase *ordering = ordering_; int result = a->id < b->id ? -1 : a->id > b->id; if (result == 0) - result = case_ordering_compare_cases (&a->c, &b->c, pq->ordering); + result = subcase_compare_3way (ordering, a->c, ordering, b->c); if (result == 0) result = a->idx < b->idx ? -1 : a->idx > b->idx; - return -result; + return result; }