X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Fsort.c;h=61256ccdbc6d1f8dd19e284232778263a585ed41;hb=490ac70d9c9f754f733552d64c23dd6aedced342;hp=3491a20bfd684abae50fe88f2f232d0bbe04a3e4;hpb=fe8dc2171009e90d2335f159d05f7e6660e24780;p=pspp diff --git a/src/math/sort.c b/src/math/sort.c index 3491a20bfd..61256ccdbc 100644 --- a/src/math/sort.c +++ b/src/math/sort.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2009, 2011 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009-12, 2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,6 +28,7 @@ #include "data/subcase.h" #include "libpspp/array.h" #include "libpspp/assertion.h" +#include "libpspp/bt.h" #include "math/merge.h" #include "gl/xalloc.h" @@ -46,6 +47,10 @@ struct sort_writer struct merge *merge; struct pqueue *pqueue; + sort_distinct_combine_func *combine; + sort_distinct_destroy_func *destroy; + void *aux; + struct casewriter *run; casenumber run_id; struct ccase *run_end; @@ -54,7 +59,8 @@ struct sort_writer static struct casewriter_class sort_casewriter_class; static struct pqueue *pqueue_create (const struct subcase *, - const struct caseproto *); + const struct caseproto *, + sort_distinct_combine_func *, void *aux); static void pqueue_destroy (struct pqueue *); static bool pqueue_is_full (const struct pqueue *); static bool pqueue_is_empty (const struct pqueue *); @@ -66,14 +72,29 @@ static void output_record (struct sort_writer *); struct casewriter * sort_create_writer (const struct subcase *ordering, const struct caseproto *proto) +{ + return sort_distinct_create_writer (ordering, proto, NULL, NULL, NULL); +} + +struct casewriter * +sort_distinct_create_writer (const struct subcase *ordering, + const struct caseproto *proto, + sort_distinct_combine_func *combine, + sort_distinct_destroy_func *destroy, + void *aux) { struct sort_writer *sort; sort = xmalloc (sizeof *sort); sort->proto = caseproto_ref (proto); subcase_clone (&sort->ordering, ordering); - sort->merge = merge_create (ordering, proto); - sort->pqueue = pqueue_create (ordering, proto); + sort->merge = merge_create (ordering, proto, combine, aux); + sort->pqueue = pqueue_create (ordering, proto, combine, aux); + + sort->combine = combine; + sort->destroy = destroy; + sort->aux = aux; + sort->run = NULL; sort->run_id = 0; sort->run_end = NULL; @@ -102,6 +123,9 @@ sort_casewriter_destroy (struct casewriter *writer UNUSED, void *sort_) { struct sort_writer *sort = sort_; + if (sort->destroy != NULL) + sort->destroy (sort->aux); + subcase_destroy (&sort->ordering); merge_destroy (sort->merge); pqueue_destroy (sort->pqueue); @@ -199,38 +223,45 @@ sort_execute_1var (struct casereader *input, const struct variable *var) struct pqueue { struct subcase ordering; - struct pqueue_record *records; - size_t record_cnt; - size_t record_cap; + struct bt bt; + size_t record_max; casenumber idx; + + sort_distinct_combine_func *combine; + void *aux; }; struct pqueue_record { + struct bt_node bt_node; casenumber id; struct ccase *c; casenumber idx; }; -static int compare_pqueue_records_minheap (const void *a, const void *b, - const void *pq_); +static int compare_pqueue_records (const struct bt_node *a, + const struct bt_node *b, + const void *ordering); static struct pqueue * -pqueue_create (const struct subcase *ordering, const struct caseproto *proto) +pqueue_create (const struct subcase *ordering, const struct caseproto *proto, + sort_distinct_combine_func *combine, void *aux) { struct pqueue *pq; pq = xmalloc (sizeof *pq); subcase_clone (&pq->ordering, ordering); - pq->record_cap = settings_get_workspace_cases (proto); - if (pq->record_cap > max_buffers) - pq->record_cap = max_buffers; - else if (pq->record_cap < min_buffers) - pq->record_cap = min_buffers; - pq->record_cnt = 0; - pq->records = xnmalloc (pq->record_cap, sizeof *pq->records); + pq->record_max = settings_get_workspace_cases (proto); + if (pq->record_max > max_buffers) + pq->record_max = max_buffers; + else if (pq->record_max < min_buffers) + pq->record_max = min_buffers; + bt_init (&pq->bt, compare_pqueue_records, &pq->ordering); pq->idx = 0; + pq->combine = combine; + pq->aux = aux; + return pq; } @@ -246,7 +277,6 @@ pqueue_destroy (struct pqueue *pq) case_unref (c); } subcase_destroy (&pq->ordering); - free (pq->records); free (pq); } } @@ -254,13 +284,13 @@ pqueue_destroy (struct pqueue *pq) static bool pqueue_is_full (const struct pqueue *pq) { - return pq->record_cnt >= pq->record_cap; + return bt_count (&pq->bt) >= pq->record_max; } static bool pqueue_is_empty (const struct pqueue *pq) { - return pq->record_cnt == 0; + return bt_is_empty (&pq->bt); } static void @@ -270,43 +300,59 @@ pqueue_push (struct pqueue *pq, struct ccase *c, casenumber id) assert (!pqueue_is_full (pq)); - r = &pq->records[pq->record_cnt++]; + r = xmalloc (sizeof *r); r->id = id; r->c = c; r->idx = pq->idx++; + bt_insert (&pq->bt, &r->bt_node); - push_heap (pq->records, pq->record_cnt, sizeof *pq->records, - compare_pqueue_records_minheap, pq); + if (pq->combine != NULL) + { + struct bt_node *q_ = bt_prev (&pq->bt, &r->bt_node); + if (q_ != NULL) + { + struct pqueue_record *q = bt_data (q_, struct pqueue_record, + bt_node); + if (q->id == r->id && subcase_equal (&pq->ordering, q->c, + &pq->ordering, r->c)) + { + bt_delete (&pq->bt, &r->bt_node); + q->c = pq->combine (q->c, r->c, pq->aux); + free (r); + } + } + } } static struct ccase * pqueue_pop (struct pqueue *pq, casenumber *id) { struct pqueue_record *r; + struct ccase *c; assert (!pqueue_is_empty (pq)); - pop_heap (pq->records, pq->record_cnt--, sizeof *pq->records, - compare_pqueue_records_minheap, pq); - - r = &pq->records[pq->record_cnt]; + r = bt_data (bt_first (&pq->bt), struct pqueue_record, bt_node); + bt_delete (&pq->bt, &r->bt_node); *id = r->id; - return r->c; + c = r->c; + free (r); + return c; } /* Compares record-run tuples A and B on id, then on case data, - then on insertion order, in descending order. */ + then on insertion order. */ static int -compare_pqueue_records_minheap (const void *a_, const void *b_, - const void *pq_) +compare_pqueue_records (const struct bt_node *a_, const struct bt_node *b_, + const void *ordering_) { - const struct pqueue_record *a = a_; - const struct pqueue_record *b = b_; - const struct pqueue *pq = pq_; + const struct pqueue_record *a = bt_data (a_, struct pqueue_record, bt_node); + const struct pqueue_record *b = bt_data (b_, struct pqueue_record, bt_node); + const struct subcase *ordering = ordering_; int result = a->id < b->id ? -1 : a->id > b->id; if (result == 0) - result = subcase_compare_3way (&pq->ordering, a->c, &pq->ordering, b->c); + result = subcase_compare_3way (ordering, a->c, ordering, b->c); if (result == 0) result = a->idx < b->idx ? -1 : a->idx > b->idx; - return -result; + return result; }