/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009-12, 2014 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <config.h>
-#include "sort.h"
+#include "math/sort.h"
#include <stdio.h>
-#include <data/case.h>
-#include <data/casereader.h>
-#include <data/casewriter.h>
-#include <data/casewriter-provider.h>
-#include <data/settings.h>
-#include <data/subcase.h>
-#include <libpspp/array.h>
-#include <libpspp/assertion.h>
-#include <math/merge.h>
+#include "data/case.h"
+#include "data/casereader.h"
+#include "data/casewriter-provider.h"
+#include "data/casewriter.h"
+#include "data/settings.h"
+#include "data/subcase.h"
+#include "libpspp/array.h"
+#include "libpspp/assertion.h"
+#include "libpspp/bt.h"
+#include "math/merge.h"
-#include "xalloc.h"
+#include "gl/xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
struct sort_writer
{
- size_t value_cnt;
+ struct caseproto *proto;
struct subcase ordering;
struct merge *merge;
struct pqueue *pqueue;
+ sort_distinct_combine_func *combine;
+ sort_distinct_destroy_func *destroy;
+ void *aux;
+
struct casewriter *run;
casenumber run_id;
struct ccase *run_end;
static struct casewriter_class sort_casewriter_class;
-static struct pqueue *pqueue_create (const struct subcase *, size_t);
+static struct pqueue *pqueue_create (const struct subcase *,
+ const struct caseproto *,
+ sort_distinct_combine_func *, void *aux);
static void pqueue_destroy (struct pqueue *);
static bool pqueue_is_full (const struct pqueue *);
static bool pqueue_is_empty (const struct pqueue *);
static void output_record (struct sort_writer *);
struct casewriter *
-sort_create_writer (const struct subcase *ordering, size_t value_cnt)
+sort_create_writer (const struct subcase *ordering,
+ const struct caseproto *proto)
+{
+ return sort_distinct_create_writer (ordering, proto, NULL, NULL, NULL);
+}
+
+struct casewriter *
+sort_distinct_create_writer (const struct subcase *ordering,
+ const struct caseproto *proto,
+ sort_distinct_combine_func *combine,
+ sort_distinct_destroy_func *destroy,
+ void *aux)
{
struct sort_writer *sort;
sort = xmalloc (sizeof *sort);
- sort->value_cnt = value_cnt;
+ sort->proto = caseproto_ref (proto);
subcase_clone (&sort->ordering, ordering);
- sort->merge = merge_create (ordering, value_cnt);
- sort->pqueue = pqueue_create (ordering, value_cnt);
+ sort->merge = merge_create (ordering, proto, combine, aux);
+ sort->pqueue = pqueue_create (ordering, proto, combine, aux);
+
+ sort->combine = combine;
+ sort->destroy = destroy;
+ sort->aux = aux;
+
sort->run = NULL;
sort->run_id = 0;
sort->run_end = NULL;
- return casewriter_create (value_cnt, &sort_casewriter_class, sort);
+ return casewriter_create (proto, &sort_casewriter_class, sort);
}
static void
{
struct sort_writer *sort = sort_;
+ if (sort->destroy != NULL)
+ sort->destroy (sort->aux);
+
subcase_destroy (&sort->ordering);
merge_destroy (sort->merge);
pqueue_destroy (sort->pqueue);
casewriter_destroy (sort->run);
case_unref (sort->run_end);
+ caseproto_unref (sort->proto);
free (sort);
}
if (sort->run == NULL && sort->run_id == 0)
{
/* In-core sort. */
- sort->run = mem_writer_create (casewriter_get_value_cnt (writer));
+ sort->run = mem_writer_create (sort->proto);
sort->run_id = 1;
}
while (!pqueue_is_empty (sort->pqueue))
}
if (sort->run == NULL)
{
- sort->run = tmpfile_writer_create (sort->value_cnt);
+ sort->run = tmpfile_writer_create (sort->proto);
sort->run_id = min_run_id;
}
};
\f
/* Reads all the cases from INPUT. Sorts the cases according to
- ORDERING. Returns the sorted cases in a new casereader. */
+ ORDERING. Returns the sorted cases in a new casereader.
+ INPUT is destroyed by this function.
+ */
struct casereader *
sort_execute (struct casereader *input, const struct subcase *ordering)
{
struct casewriter *output =
- sort_create_writer (ordering, casereader_get_value_cnt (input));
+ sort_create_writer (ordering, casereader_get_proto (input));
casereader_transfer (input, output);
return casewriter_make_reader (output);
}
/* Reads all the cases from INPUT. Sorts the cases in ascending
order according to VARIABLE. Returns the sorted cases in a
- new casereader. */
+ new casereader. INPUT is destroyed by this function. */
struct casereader *
sort_execute_1var (struct casereader *input, const struct variable *var)
{
subcase_destroy (&sc);
return reader;
}
+
+/* Reads all the cases from INPUT. Sorts the cases according to ORDERING,
+ combining cases that have the same ORDERING values using COMBINE.
+ Returns the sorted cases in a new casereader. */
+struct casereader *
+sort_distinct_execute (struct casereader *input,
+ const struct subcase *ordering,
+ sort_distinct_combine_func *combine,
+ sort_distinct_destroy_func *destroy,
+ void *aux)
+{
+ struct casewriter *output =
+ sort_distinct_create_writer (ordering, casereader_get_proto (input),
+ combine, destroy, aux);
+ casereader_transfer (input, output);
+ return casewriter_make_reader (output);
+}
\f
struct pqueue
{
struct subcase ordering;
- struct pqueue_record *records;
- size_t record_cnt;
- size_t record_cap;
+ struct bt bt;
+ size_t record_max;
casenumber idx;
+
+ sort_distinct_combine_func *combine;
+ void *aux;
};
struct pqueue_record
{
+ struct bt_node bt_node;
casenumber id;
struct ccase *c;
casenumber idx;
};
-static int compare_pqueue_records_minheap (const void *a, const void *b,
- const void *pq_);
+static int compare_pqueue_records (const struct bt_node *a,
+ const struct bt_node *b,
+ const void *ordering);
static struct pqueue *
-pqueue_create (const struct subcase *ordering, size_t value_cnt)
+pqueue_create (const struct subcase *ordering, const struct caseproto *proto,
+ sort_distinct_combine_func *combine, void *aux)
{
struct pqueue *pq;
pq = xmalloc (sizeof *pq);
subcase_clone (&pq->ordering, ordering);
- pq->record_cap
- = settings_get_workspace_cases (value_cnt);
- if (pq->record_cap > max_buffers)
- pq->record_cap = max_buffers;
- else if (pq->record_cap < min_buffers)
- pq->record_cap = min_buffers;
- pq->record_cnt = 0;
- pq->records = xnmalloc (pq->record_cap, sizeof *pq->records);
+ pq->record_max = settings_get_workspace_cases (proto);
+ if (pq->record_max > max_buffers)
+ pq->record_max = max_buffers;
+ else if (pq->record_max < min_buffers)
+ pq->record_max = min_buffers;
+ bt_init (&pq->bt, compare_pqueue_records, &pq->ordering);
pq->idx = 0;
+ pq->combine = combine;
+ pq->aux = aux;
+
return pq;
}
case_unref (c);
}
subcase_destroy (&pq->ordering);
- free (pq->records);
free (pq);
}
}
static bool
pqueue_is_full (const struct pqueue *pq)
{
- return pq->record_cnt >= pq->record_cap;
+ return bt_count (&pq->bt) >= pq->record_max;
}
static bool
pqueue_is_empty (const struct pqueue *pq)
{
- return pq->record_cnt == 0;
+ return bt_is_empty (&pq->bt);
}
static void
assert (!pqueue_is_full (pq));
- r = &pq->records[pq->record_cnt++];
+ r = xmalloc (sizeof *r);
r->id = id;
r->c = c;
r->idx = pq->idx++;
+ bt_insert (&pq->bt, &r->bt_node);
- push_heap (pq->records, pq->record_cnt, sizeof *pq->records,
- compare_pqueue_records_minheap, pq);
+ if (pq->combine != NULL)
+ {
+ struct bt_node *q_ = bt_prev (&pq->bt, &r->bt_node);
+ if (q_ != NULL)
+ {
+ struct pqueue_record *q = bt_data (q_, struct pqueue_record,
+ bt_node);
+ if (q->id == r->id && subcase_equal (&pq->ordering, q->c,
+ &pq->ordering, r->c))
+ {
+ bt_delete (&pq->bt, &r->bt_node);
+ q->c = pq->combine (q->c, r->c, pq->aux);
+ free (r);
+ }
+ }
+ }
}
static struct ccase *
pqueue_pop (struct pqueue *pq, casenumber *id)
{
struct pqueue_record *r;
+ struct ccase *c;
assert (!pqueue_is_empty (pq));
- pop_heap (pq->records, pq->record_cnt--, sizeof *pq->records,
- compare_pqueue_records_minheap, pq);
-
- r = &pq->records[pq->record_cnt];
+ r = bt_data (bt_first (&pq->bt), struct pqueue_record, bt_node);
+ bt_delete (&pq->bt, &r->bt_node);
*id = r->id;
- return r->c;
+ c = r->c;
+ free (r);
+ return c;
}
/* Compares record-run tuples A and B on id, then on case data,
- then on insertion order, in descending order. */
+ then on insertion order. */
static int
-compare_pqueue_records_minheap (const void *a_, const void *b_,
- const void *pq_)
+compare_pqueue_records (const struct bt_node *a_, const struct bt_node *b_,
+ const void *ordering_)
{
- const struct pqueue_record *a = a_;
- const struct pqueue_record *b = b_;
- const struct pqueue *pq = pq_;
+ const struct pqueue_record *a = bt_data (a_, struct pqueue_record, bt_node);
+ const struct pqueue_record *b = bt_data (b_, struct pqueue_record, bt_node);
+ const struct subcase *ordering = ordering_;
int result = a->id < b->id ? -1 : a->id > b->id;
if (result == 0)
- result = subcase_compare_3way (&pq->ordering, a->c, &pq->ordering, b->c);
+ result = subcase_compare_3way (ordering, a->c, ordering, b->c);
if (result == 0)
result = a->idx < b->idx ? -1 : a->idx > b->idx;
- return -result;
+ return result;
}