/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <config.h>
-#include "sort.h"
+#include "math/sort.h"
#include <stdio.h>
-#include <data/case-ordering.h>
-#include <data/case.h>
-#include <data/casereader.h>
-#include <data/casewriter.h>
-#include <data/casewriter-provider.h>
-#include <data/settings.h>
-#include <libpspp/array.h>
-#include <libpspp/assertion.h>
-#include <math/merge.h>
+#include "data/case.h"
+#include "data/casereader.h"
+#include "data/casewriter-provider.h"
+#include "data/casewriter.h"
+#include "data/settings.h"
+#include "data/subcase.h"
+#include "libpspp/array.h"
+#include "libpspp/assertion.h"
+#include "math/merge.h"
-#include "xalloc.h"
-
-#include "gettext.h"
-#define _(msgid) gettext (msgid)
+#include "gl/xalloc.h"
/* These should only be changed for testing purposes. */
int min_buffers = 64;
struct sort_writer
{
- size_t value_cnt;
- struct case_ordering *ordering;
+ struct caseproto *proto;
+ struct subcase ordering;
struct merge *merge;
struct pqueue *pqueue;
struct casewriter *run;
casenumber run_id;
- struct ccase run_end;
+ struct ccase *run_end;
};
static struct casewriter_class sort_casewriter_class;
-static struct pqueue *pqueue_create (const struct case_ordering *, size_t);
+static struct pqueue *pqueue_create (const struct subcase *,
+ const struct caseproto *);
static void pqueue_destroy (struct pqueue *);
static bool pqueue_is_full (const struct pqueue *);
static bool pqueue_is_empty (const struct pqueue *);
static void pqueue_push (struct pqueue *, struct ccase *, casenumber);
-static void pqueue_pop (struct pqueue *, struct ccase *, casenumber *);
+static struct ccase *pqueue_pop (struct pqueue *, casenumber *);
static void output_record (struct sort_writer *);
+/* Creates a casewriter that sorts the cases written to it. Once all the cases
+ have been written, use casewriter_make_reader() to obtain the sorted
+ results. */
struct casewriter *
-sort_create_writer (struct case_ordering *ordering, size_t value_cnt)
+sort_create_writer (const struct subcase *ordering,
+ const struct caseproto *proto)
{
struct sort_writer *sort;
sort = xmalloc (sizeof *sort);
- sort->value_cnt = value_cnt;
- sort->ordering = case_ordering_clone (ordering);
- sort->merge = merge_create (ordering, value_cnt);
- sort->pqueue = pqueue_create (ordering, value_cnt);
+ sort->proto = caseproto_ref (proto);
+ subcase_clone (&sort->ordering, ordering);
+ sort->merge = merge_create (ordering, proto);
+ sort->pqueue = pqueue_create (ordering, proto);
sort->run = NULL;
sort->run_id = 0;
- case_nullify (&sort->run_end);
-
- case_ordering_destroy (ordering);
+ sort->run_end = NULL;
- return casewriter_create (value_cnt, &sort_casewriter_class, sort);
+ return casewriter_create (proto, &sort_casewriter_class, sort);
}
static void
if (pqueue_is_full (sort->pqueue))
output_record (sort);
- next_run = (case_is_null (&sort->run_end)
- || case_ordering_compare_cases (c, &sort->run_end,
- sort->ordering) < 0);
+ next_run = (sort->run_end == NULL
+ || subcase_compare_3way (&sort->ordering, c,
+ &sort->ordering, sort->run_end) < 0);
pqueue_push (sort->pqueue, c, sort->run_id + (next_run ? 1 : 0));
}
{
struct sort_writer *sort = sort_;
- case_ordering_destroy (sort->ordering);
+ subcase_uninit (&sort->ordering);
merge_destroy (sort->merge);
pqueue_destroy (sort->pqueue);
casewriter_destroy (sort->run);
- case_destroy (&sort->run_end);
+ case_unref (sort->run_end);
+ caseproto_unref (sort->proto);
free (sort);
}
if (sort->run == NULL && sort->run_id == 0)
{
/* In-core sort. */
- sort->run = mem_writer_create (casewriter_get_value_cnt (writer));
+ sort->run = mem_writer_create (sort->proto);
sort->run_id = 1;
}
while (!pqueue_is_empty (sort->pqueue))
static void
output_record (struct sort_writer *sort)
{
- struct ccase min_case;
+ struct ccase *min_case;
casenumber min_run_id;
- pqueue_pop (sort->pqueue, &min_case, &min_run_id);
+ min_case = pqueue_pop (sort->pqueue, &min_run_id);
#if 0
- printf ("\toutput: %f to run %d\n", case_num_idx (&min_case, 0), min_run_id);
+ printf ("\toutput: %f to run %d\n", case_num_idx (min_case, 0), min_run_id);
#endif
if (sort->run_id != min_run_id && sort->run != NULL)
}
if (sort->run == NULL)
{
- sort->run = tmpfile_writer_create (sort->value_cnt);
+ sort->run = tmpfile_writer_create (sort->proto);
sort->run_id = min_run_id;
}
- case_destroy (&sort->run_end);
- case_clone (&sort->run_end, &min_case);
-
- casewriter_write (sort->run, &min_case);
+ case_unref (sort->run_end);
+ sort->run_end = case_ref (min_case);
+ casewriter_write (sort->run, min_case);
}
static struct casewriter_class sort_casewriter_class =
};
\f
/* Reads all the cases from INPUT. Sorts the cases according to
- ORDERING. Returns the sorted cases in a new casereader, or a
- null pointer if an I/O error occurs. Both INPUT and ORDERING
- are destroyed upon return, regardless of success. */
+ ORDERING. Returns the sorted cases in a new casereader.
+ INPUT is destroyed by this function.
+ */
struct casereader *
-sort_execute (struct casereader *input, struct case_ordering *ordering)
+sort_execute (struct casereader *input, const struct subcase *ordering)
{
struct casewriter *output =
- sort_create_writer (ordering, casereader_get_value_cnt (input));
+ sort_create_writer (ordering, casereader_get_proto (input));
casereader_transfer (input, output);
return casewriter_make_reader (output);
}
+
+/* Reads all the cases from INPUT. Sorts the cases in ascending
+ order according to VARIABLE. Returns the sorted cases in a
+ new casereader. INPUT is destroyed by this function. */
+struct casereader *
+sort_execute_1var (struct casereader *input, const struct variable *var)
+{
+ struct subcase sc;
+ struct casereader *reader;
+
+ subcase_init_var (&sc, var, SC_ASCEND);
+ reader = sort_execute (input, &sc);
+ subcase_uninit (&sc);
+ return reader;
+}
\f
struct pqueue
{
- struct case_ordering *ordering;
+ struct subcase ordering;
struct pqueue_record *records;
- size_t record_cnt;
- size_t record_cap;
+ size_t n_records; /* Current number of records. */
+ size_t allocated_records; /* Space currently allocated for records. */
+ size_t max_records; /* Max space we are willing to allocate. */
casenumber idx;
};
struct pqueue_record
{
casenumber id;
- struct ccase c;
+ struct ccase *c;
casenumber idx;
};
const void *pq_);
static struct pqueue *
-pqueue_create (const struct case_ordering *ordering, size_t value_cnt)
+pqueue_create (const struct subcase *ordering, const struct caseproto *proto)
{
struct pqueue *pq;
pq = xmalloc (sizeof *pq);
- pq->ordering = case_ordering_clone (ordering);
- pq->record_cap
- = settings_get_workspace_cases (value_cnt);
- if (pq->record_cap > max_buffers)
- pq->record_cap = max_buffers;
- else if (pq->record_cap < min_buffers)
- pq->record_cap = min_buffers;
- pq->record_cnt = 0;
- pq->records = xnmalloc (pq->record_cap, sizeof *pq->records);
+ subcase_clone (&pq->ordering, ordering);
+ pq->max_records = settings_get_workspace_cases (proto);
+ if (pq->max_records > max_buffers)
+ pq->max_records = max_buffers;
+ else if (pq->max_records < min_buffers)
+ pq->max_records = min_buffers;
+ pq->n_records = 0;
+ pq->allocated_records = 0;
+ pq->records = NULL;
pq->idx = 0;
return pq;
{
while (!pqueue_is_empty (pq))
{
- struct ccase c;
casenumber id;
- pqueue_pop (pq, &c, &id);
- case_destroy (&c);
+ struct ccase *c = pqueue_pop (pq, &id);
+ case_unref (c);
}
- case_ordering_destroy (pq->ordering);
+ subcase_uninit (&pq->ordering);
free (pq->records);
free (pq);
}
static bool
pqueue_is_full (const struct pqueue *pq)
{
- return pq->record_cnt >= pq->record_cap;
+ return pq->n_records >= pq->max_records;
}
static bool
pqueue_is_empty (const struct pqueue *pq)
{
- return pq->record_cnt == 0;
+ return pq->n_records == 0;
}
static void
assert (!pqueue_is_full (pq));
- r = &pq->records[pq->record_cnt++];
+ if (pq->n_records >= pq->allocated_records)
+ {
+ pq->allocated_records = pq->allocated_records * 2;
+ if (pq->allocated_records < 16)
+ pq->allocated_records = 16;
+ else if (pq->allocated_records > pq->max_records)
+ pq->allocated_records = pq->max_records;
+ pq->records = xrealloc (pq->records,
+ pq->allocated_records * sizeof *pq->records);
+ }
+
+ r = &pq->records[pq->n_records++];
r->id = id;
- case_move (&r->c, c);
+ r->c = c;
r->idx = pq->idx++;
- push_heap (pq->records, pq->record_cnt, sizeof *pq->records,
+ push_heap (pq->records, pq->n_records, sizeof *pq->records,
compare_pqueue_records_minheap, pq);
}
-static void
-pqueue_pop (struct pqueue *pq, struct ccase *c, casenumber *id)
+static struct ccase *
+pqueue_pop (struct pqueue *pq, casenumber *id)
{
struct pqueue_record *r;
assert (!pqueue_is_empty (pq));
- pop_heap (pq->records, pq->record_cnt--, sizeof *pq->records,
+ pop_heap (pq->records, pq->n_records--, sizeof *pq->records,
compare_pqueue_records_minheap, pq);
- r = &pq->records[pq->record_cnt];
+ r = &pq->records[pq->n_records];
*id = r->id;
- case_move (c, &r->c);
+ return r->c;
}
/* Compares record-run tuples A and B on id, then on case data,
const struct pqueue *pq = pq_;
int result = a->id < b->id ? -1 : a->id > b->id;
if (result == 0)
- result = case_ordering_compare_cases (&a->c, &b->c, pq->ordering);
+ result = subcase_compare_3way (&pq->ordering, a->c, &pq->ordering, b->c);
if (result == 0)
result = a->idx < b->idx ? -1 : a->idx > b->idx;
return -result;