From 92c09e564002d356d20fc1e2e131027ef89f6748 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 7 Jun 2007 06:41:58 +0000 Subject: [PATCH] Actually implement the new procedure code and adapt all of its clients to match. Also adapt all of the other case sources and sinks in the tree and their clients to use the casereader/casewriter infrastructure. --- ChangeLog | 4 + Smake | 1 + src/data/ChangeLog | 41 + src/data/any-reader.c | 113 +-- src/data/any-reader.h | 6 +- src/data/any-writer.c | 158 +--- src/data/any-writer.h | 15 +- src/data/automake.mk | 16 - src/data/case-sink.c | 66 -- src/data/case-sink.h | 68 -- src/data/case-source.c | 62 -- src/data/case-source.h | 61 -- src/data/casefile-factory.h | 28 - src/data/casefile-private.h | 102 --- src/data/casefile.c | 344 -------- src/data/casefile.h | 71 -- src/data/casefilter.c | 110 --- src/data/casefilter.h | 56 -- src/data/dictionary.c | 2 +- src/data/por-file-reader.c | 84 +- src/data/por-file-reader.h | 5 +- src/data/por-file-writer.c | 87 +- src/data/por-file-writer.h | 6 +- src/data/procedure.c | 819 ++++++----------- src/data/procedure.h | 73 +- src/data/scratch-handle.c | 8 +- src/data/scratch-handle.h | 2 +- src/data/scratch-reader.c | 58 +- src/data/scratch-reader.h | 7 +- src/data/scratch-writer.c | 87 +- src/data/scratch-writer.h | 7 +- src/data/storage-stream.c | 205 ----- src/data/storage-stream.h | 32 - src/data/sys-file-reader.c | 153 ++-- src/data/sys-file-reader.h | 5 +- src/data/sys-file-writer.c | 73 +- src/data/sys-file-writer.h | 6 +- src/language/ChangeLog | 3 +- src/language/command.c | 13 +- src/language/command.def | 1 - src/language/control/do-if.c | 1 + src/language/data-io/ChangeLog | 15 + src/language/data-io/data-list.c | 65 +- src/language/data-io/data-reader.c | 5 +- src/language/data-io/get.c | 355 +++----- src/language/data-io/inpt-pgm.c | 149 +--- src/language/data-io/list.q | 67 +- src/language/dictionary/ChangeLog | 9 + src/language/dictionary/apply-dictionary.c | 7 +- src/language/dictionary/delete-variables.c | 12 +- src/language/dictionary/modify-variables.c | 3 +- src/language/dictionary/sys-file-info.c | 5 +- src/language/expressions/evaluate.c | 2 +- src/language/lexer/variable-parser.c | 6 - src/language/stats/ChangeLog | 29 + src/language/stats/aggregate.c | 231 ++--- src/language/stats/autorecode.c | 15 +- src/language/stats/binomial.c | 172 ++-- src/language/stats/binomial.h | 6 +- src/language/stats/chisquare.c | 156 ++-- src/language/stats/chisquare.h | 12 +- src/language/stats/crosstabs.q | 73 +- src/language/stats/descriptives.c | 80 +- src/language/stats/examine.q | 56 +- src/language/stats/flip.c | 214 ++--- src/language/stats/frequencies.q | 56 +- src/language/stats/npar-summary.c | 38 +- src/language/stats/npar-summary.h | 9 +- src/language/stats/npar.h | 17 +- src/language/stats/npar.q | 75 +- src/language/stats/oneway.q | 90 +- src/language/stats/rank.q | 395 +++------ src/language/stats/regression.q | 308 +++---- src/language/stats/sort-cases.c | 23 +- src/language/stats/sort-criteria.c | 128 +-- src/language/stats/sort-criteria.h | 13 +- src/language/stats/t-test.q | 169 ++-- src/language/tests/automake.mk | 1 - src/language/tests/casefile-test.c | 290 ------ src/libpspp/deque.h | 1 + src/math/ChangeLog | 15 + src/math/automake.mk | 2 + src/math/levene.c | 135 ++- src/math/levene.h | 8 +- src/math/merge.c | 159 ++++ .../gui/flexifile-factory.h => math/merge.h} | 18 +- src/math/sort.c | 835 ++++-------------- src/math/sort.h | 53 +- src/ui/ChangeLog | 10 + src/ui/automake.mk | 4 +- src/ui/flexifile.c | 409 --------- src/ui/flexifile.h | 46 - src/ui/gui/ChangeLog | 16 + src/ui/gui/automake.mk | 2 - src/ui/gui/flexifile-factory.c | 59 -- src/ui/gui/helper.c | 13 +- src/ui/gui/missing-val-dialog.c | 4 +- src/ui/gui/psppire-case-file.c | 178 ++-- src/ui/gui/psppire-case-file.h | 16 +- src/ui/gui/psppire-data-store.c | 36 +- src/ui/gui/psppire.c | 39 +- src/ui/gui/val-labs-dialog.h | 1 + src/ui/terminal/ChangeLog | 7 + src/ui/terminal/main.c | 6 +- tests/ChangeLog | 6 + tests/automake.mk | 1 - tests/xforms/casefile.sh | 72 -- 107 files changed, 2600 insertions(+), 5966 deletions(-) delete mode 100644 src/data/case-sink.c delete mode 100644 src/data/case-sink.h delete mode 100644 src/data/case-source.c delete mode 100644 src/data/case-source.h delete mode 100644 src/data/casefile-factory.h delete mode 100644 src/data/casefile-private.h delete mode 100644 src/data/casefile.c delete mode 100644 src/data/casefile.h delete mode 100644 src/data/casefilter.c delete mode 100644 src/data/casefilter.h delete mode 100644 src/data/storage-stream.c delete mode 100644 src/data/storage-stream.h delete mode 100644 src/language/tests/casefile-test.c create mode 100644 src/math/merge.c rename src/{ui/gui/flexifile-factory.h => math/merge.h} (66%) delete mode 100644 src/ui/flexifile.c delete mode 100644 src/ui/flexifile.h delete mode 100644 src/ui/gui/flexifile-factory.c delete mode 100755 tests/xforms/casefile.sh diff --git a/ChangeLog b/ChangeLog index da5339d5..2bfa3441 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2007-06-06 Ben Pfaff + + * Smake: Add xallocsa to modules. + 2007-04-22 Ben Pfaff Implement model checker for testing purposes. diff --git a/Smake b/Smake index 14c19db9..902591c6 100644 --- a/Smake +++ b/Smake @@ -62,6 +62,7 @@ GNULIB_MODULES = \ vsnprintf \ xalloc \ xalloc-die \ + xallocsa \ xsize \ xstrndup \ xvasprintf diff --git a/src/data/ChangeLog b/src/data/ChangeLog index b80d60c2..913249f3 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,44 @@ +2007-06-06 Ben Pfaff + + Actually implement the new procedure code and adapt all of its + clients to match. Also adapt all of the other case sources and + sinks in the tree and their clients to use the + casereader/casewriter infrastructure. + + * automake.mk: Add and remove files. + + * any-reader.c: Change into a casereader. + * por-file-reader.c: Ditto. + * scratch-reader.c: Ditto. + * sys-file-reader.c: Ditto. + + * any-writer.c: Change into a casewriter. + * por-file-writer.c: Ditto. + * scratch-writer.c: Ditto. + * sys-file-writer.c: Ditto. + + * procedure.c: Change to use casereader, casewriter, caseinit, and + other new infrastructure. + + * scratch-handle.c: Adapt to new infrastructure. + + * case-sink.c: Removed, now dead code. + * case-sink.h: Ditto. + * case-source.c: Ditto. + * case-source.h: Ditto. + * casefile-factory.c: Ditto. + * casefile-private.h: Ditto. + * casefile.c: Ditto. + * casefile.h: Ditto. + * casefilter.c: Ditto. + * casefilter.h: Ditto. + * fastfile.c: Ditto. + * fastfile.h: Ditto. + * fastfile-factory.c: Ditto. + * fastfile-factory.h: Ditto. + * storage-stream.c: Ditto. + * storage-stream.h: Ditto. + 2007-06-06 Ben Pfaff Add datasheet code. diff --git a/src/data/any-reader.c b/src/data/any-reader.c index 4951d490..009e4227 100644 --- a/src/data/any-reader.c +++ b/src/data/any-reader.c @@ -36,21 +36,6 @@ #include "gettext.h" #define _(msgid) gettext (msgid) -/* Type of file backing an any_reader. */ -enum any_reader_type - { - SYSTEM_FILE, /* System file. */ - PORTABLE_FILE, /* Portable file. */ - SCRATCH_FILE /* Scratch file. */ - }; - -/* Reader for any type of case-structured file. */ -struct any_reader - { - enum any_reader_type type; /* Type of file. */ - void *private; /* Private data. */ - }; - /* Result of type detection. */ enum detect_result { @@ -83,27 +68,10 @@ try_detect (struct file_handle *handle, bool (*detect) (FILE *)) return is_type ? YES : NO; } -/* If PRIVATE is non-null, creates and returns a new any_reader, - initializing its fields to TYPE and PRIVATE. If PRIVATE is a - null pointer, just returns a null pointer. */ -static struct any_reader * -make_any_reader (enum any_reader_type type, void *private) -{ - if (private != NULL) - { - struct any_reader *reader = xmalloc (sizeof *reader); - reader->type = type; - reader->private = private; - return reader; - } - else - return NULL; -} - -/* Creates an any_reader for HANDLE. On success, returns the new - any_reader and stores the file's dictionary into *DICT. On +/* Returns a casereader for HANDLE. On success, returns the new + casereader and stores the file's dictionary into *DICT. On failure, returns a null pointer. */ -struct any_reader * +struct casereader * any_reader_open (struct file_handle *handle, struct dictionary **dict) { switch (fh_get_referent (handle)) @@ -116,15 +84,13 @@ any_reader_open (struct file_handle *handle, struct dictionary **dict) if (result == IO_ERROR) return NULL; else if (result == YES) - return make_any_reader (SYSTEM_FILE, - sfm_open_reader (handle, dict, NULL)); + return sfm_open_reader (handle, dict, NULL); result = try_detect (handle, pfm_detect); if (result == IO_ERROR) return NULL; else if (result == YES) - return make_any_reader (PORTABLE_FILE, - pfm_open_reader (handle, dict, NULL)); + return pfm_open_reader (handle, dict, NULL); msg (SE, _("\"%s\" is not a system or portable file."), fh_get_file_name (handle)); @@ -136,74 +102,7 @@ any_reader_open (struct file_handle *handle, struct dictionary **dict) return NULL; case FH_REF_SCRATCH: - return make_any_reader (SCRATCH_FILE, - scratch_reader_open (handle, dict)); - } - NOT_REACHED (); -} - -/* Reads a single case from READER into C. - Returns true if successful, false at end of file or on error. */ -bool -any_reader_read (struct any_reader *reader, struct ccase *c) -{ - switch (reader->type) - { - case SYSTEM_FILE: - return sfm_read_case (reader->private, c); - - case PORTABLE_FILE: - return pfm_read_case (reader->private, c); - - case SCRATCH_FILE: - return scratch_reader_read_case (reader->private, c); + return scratch_reader_open (handle, dict); } NOT_REACHED (); } - -/* Returns true if an I/O error has occurred on READER, false - otherwise. */ -bool -any_reader_error (struct any_reader *reader) -{ - switch (reader->type) - { - case SYSTEM_FILE: - return sfm_read_error (reader->private); - - case PORTABLE_FILE: - return pfm_read_error (reader->private); - - case SCRATCH_FILE: - return scratch_reader_error (reader->private); - } - NOT_REACHED (); -} - -/* Closes READER. */ -void -any_reader_close (struct any_reader *reader) -{ - if (reader == NULL) - return; - - switch (reader->type) - { - case SYSTEM_FILE: - sfm_close_reader (reader->private); - break; - - case PORTABLE_FILE: - pfm_close_reader (reader->private); - break; - - case SCRATCH_FILE: - scratch_reader_close (reader->private); - break; - - default: - NOT_REACHED (); - } - - free (reader); -} diff --git a/src/data/any-reader.h b/src/data/any-reader.h index bd3c2881..44c8cef7 100644 --- a/src/data/any-reader.h +++ b/src/data/any-reader.h @@ -23,11 +23,7 @@ struct file_handle; struct dictionary; -struct ccase; -struct any_reader *any_reader_open (struct file_handle *, +struct casereader *any_reader_open (struct file_handle *, struct dictionary **); -bool any_reader_read (struct any_reader *, struct ccase *); -bool any_reader_error (struct any_reader *); -void any_reader_close (struct any_reader *); #endif /* any-reader.h */ diff --git a/src/data/any-writer.c b/src/data/any-writer.c index de44df46..195292af 100644 --- a/src/data/any-writer.c +++ b/src/data/any-writer.c @@ -36,41 +36,26 @@ #include "gettext.h" #define _(msgid) gettext (msgid) -/* Type of file backing an any_writer. */ -enum any_writer_type - { - SYSTEM_FILE, /* System file. */ - PORTABLE_FILE, /* Portable file. */ - SCRATCH_FILE /* Scratch file. */ - }; - -/* Writer for any type of case-structured file. */ -struct any_writer - { - enum any_writer_type type; /* Type of file. */ - void *private; /* Private data. */ - }; - /* Creates and returns a writer for HANDLE with the given DICT. */ -struct any_writer * +struct casewriter * any_writer_open (struct file_handle *handle, struct dictionary *dict) { switch (fh_get_referent (handle)) { case FH_REF_FILE: { - struct any_writer *writer; + struct casewriter *writer; char *extension; extension = fn_extension (fh_get_file_name (handle)); str_lowercase (extension); if (!strcmp (extension, ".por")) - writer = any_writer_from_pfm_writer ( - pfm_open_writer (handle, dict, pfm_writer_default_options ())); + writer = pfm_open_writer (handle, dict, + pfm_writer_default_options ()); else - writer = any_writer_from_sfm_writer ( - sfm_open_writer (handle, dict, sfm_writer_default_options ())); + writer = sfm_open_writer (handle, dict, + sfm_writer_default_options ()); free (extension); return writer; @@ -81,137 +66,8 @@ any_writer_open (struct file_handle *handle, struct dictionary *dict) return NULL; case FH_REF_SCRATCH: - return any_writer_from_scratch_writer (scratch_writer_open (handle, - dict)); + return scratch_writer_open (handle, dict); } NOT_REACHED (); } - -/* If PRIVATE is non-null, creates and returns a new any_writer, - initializing its fields to TYPE and PRIVATE. If PRIVATE is a - null pointer, just returns a null pointer. */ -static struct any_writer * -make_any_writer (enum any_writer_type type, void *private) -{ - if (private != NULL) - { - struct any_writer *writer = xmalloc (sizeof *writer); - writer->type = type; - writer->private = private; - return writer; - } - else - return NULL; -} - -/* If SFM_WRITER is non-null, encapsulates SFM_WRITER in an - any_writer and returns it. If SFM_WRITER is null, just - returns a null pointer. - - Useful when you need to pass options to sfm_open_writer(). - Typical usage: - any_writer_from_sfm_writer (sfm_open_writer (fh, dict, opts)) - If you don't need to pass options, then any_writer_open() by - itself is easier and more straightforward. */ -struct any_writer * -any_writer_from_sfm_writer (struct sfm_writer *sfm_writer) -{ - return make_any_writer (SYSTEM_FILE, sfm_writer); -} - -/* If PFM_WRITER is non-null, encapsulates PFM_WRITER in an - any_writer and returns it. If PFM_WRITER is null, just - returns a null pointer. - - Useful when you need to pass options to pfm_open_writer(). - Typical usage: - any_writer_from_pfm_writer (pfm_open_writer (fh, dict, opts)) - If you don't need to pass options, then any_writer_open() by - itself is easier and more straightforward. */ -struct any_writer * -any_writer_from_pfm_writer (struct pfm_writer *pfm_writer) -{ - return make_any_writer (PORTABLE_FILE, pfm_writer); -} - -/* If SCRATCH_WRITER is non-null, encapsulates SCRATCH_WRITER in - an any_writer and returns it. If SCRATCH_WRITER is null, just - returns a null pointer. - - Not particularly useful. Included just for consistency. */ -struct any_writer * -any_writer_from_scratch_writer (struct scratch_writer *scratch_writer) -{ - return make_any_writer (SCRATCH_FILE, scratch_writer); -} - -/* Writes cases C to WRITER. - Returns true if successful, false on failure. */ -bool -any_writer_write (struct any_writer *writer, const struct ccase *c) -{ - switch (writer->type) - { - case SYSTEM_FILE: - return sfm_write_case (writer->private, c); - - case PORTABLE_FILE: - return pfm_write_case (writer->private, c); - - case SCRATCH_FILE: - return scratch_writer_write_case (writer->private, c); - } - NOT_REACHED (); -} - -/* Returns true if an I/O error has occurred on WRITER, false - otherwise. */ -bool -any_writer_error (const struct any_writer *writer) -{ - switch (writer->type) - { - case SYSTEM_FILE: - return sfm_write_error (writer->private); - - case PORTABLE_FILE: - return pfm_write_error (writer->private); - - case SCRATCH_FILE: - return scratch_writer_error (writer->private); - } - NOT_REACHED (); -} - -/* Closes WRITER. - Returns true if successful, false if an I/O error occurred. */ -bool -any_writer_close (struct any_writer *writer) -{ - bool ok; - - if (writer == NULL) - return true; - - switch (writer->type) - { - case SYSTEM_FILE: - ok = sfm_close_writer (writer->private); - break; - - case PORTABLE_FILE: - ok = pfm_close_writer (writer->private); - break; - - case SCRATCH_FILE: - ok = scratch_writer_close (writer->private); - break; - - default: - NOT_REACHED (); - } - - free (writer); - return ok; -} diff --git a/src/data/any-writer.h b/src/data/any-writer.h index 46c3624c..927e61d3 100644 --- a/src/data/any-writer.h +++ b/src/data/any-writer.h @@ -23,18 +23,7 @@ struct file_handle; struct dictionary; -struct ccase; -struct sfm_writer; -struct pfm_writer; -struct scratch_writer; - -struct any_writer *any_writer_open (struct file_handle *, struct dictionary *); -struct any_writer *any_writer_from_sfm_writer (struct sfm_writer *); -struct any_writer *any_writer_from_pfm_writer (struct pfm_writer *); -struct any_writer *any_writer_from_scratch_writer (struct scratch_writer *); - -bool any_writer_write (struct any_writer *, const struct ccase *); -bool any_writer_error (const struct any_writer *); -bool any_writer_close (struct any_writer *); + +struct casewriter *any_writer_open (struct file_handle *, struct dictionary *); #endif /* any-writer.h */ diff --git a/src/data/automake.mk b/src/data/automake.mk index ea4dd3c7..bc056531 100644 --- a/src/data/automake.mk +++ b/src/data/automake.mk @@ -10,17 +10,7 @@ src_data_libdata_a_SOURCES = \ src/data/calendar.h \ src/data/case-ordering.c \ src/data/case-ordering.h \ - src/data/case-sink.c \ - src/data/case-sink.h \ - src/data/case-source.c \ - src/data/case-source.h \ src/data/case.c \ - src/data/casefilter.c \ - src/data/casefilter.h \ - src/data/casefile.h \ - src/data/casefile.c \ - src/data/casefile-factory.h \ - src/data/casefile-private.h \ src/data/casegrouper.c \ src/data/casegrouper.h \ src/data/caseinit.c \ @@ -36,10 +26,6 @@ src_data_libdata_a_SOURCES = \ src/data/casewriter-translator.c \ src/data/casewriter.c \ src/data/casewriter.h \ - src/data/fastfile.c \ - src/data/fastfile.h \ - src/data/fastfile-factory.h \ - src/data/fastfile-factory.c \ src/data/case.h \ src/data/case-tmpfile.c \ src/data/case-tmpfile.h \ @@ -82,8 +68,6 @@ src_data_libdata_a_SOURCES = \ src/data/settings.h \ src/data/sparse-cases.c \ src/data/sparse-cases.h \ - src/data/storage-stream.c \ - src/data/storage-stream.h \ src/data/sys-file-private.c \ src/data/sys-file-private.h \ src/data/sys-file-reader.c \ diff --git a/src/data/case-sink.c b/src/data/case-sink.c deleted file mode 100644 index d7be3fa6..00000000 --- a/src/data/case-sink.c +++ /dev/null @@ -1,66 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#include - -#include - -#include - -#include - -#include "xalloc.h" - -/* Creates a case sink to accept cases from the given DICT with - class CLASS and auxiliary data AUX. */ -struct case_sink * -create_case_sink (const struct case_sink_class *class, - const struct dictionary *dict, struct casefile_factory *f, - void *aux) -{ - struct case_sink *sink = xmalloc (sizeof *sink); - sink->class = class; - sink->value_cnt = dict_get_compacted_value_cnt (dict); - sink->aux = aux; - sink->factory = f; - return sink; -} - -/* Destroys case sink SINK. */ -void -free_case_sink (struct case_sink *sink) -{ - if (sink != NULL) - { - if (sink->class->destroy != NULL) - sink->class->destroy (sink); - free (sink); - } -} -/* Null sink. Used by a few procedures that keep track of output - themselves and would throw away anything that the sink - contained anyway. */ - -const struct case_sink_class null_sink_class = - { - "null", - NULL, - NULL, - NULL, - NULL, - }; diff --git a/src/data/case-sink.h b/src/data/case-sink.h deleted file mode 100644 index ec2cfd21..00000000 --- a/src/data/case-sink.h +++ /dev/null @@ -1,68 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#ifndef CASE_SINK_H -#define CASE_SINK_H 1 - -#include -#include - -struct ccase; -struct dictionary; - -/* A case sink. */ -struct case_sink - { - const struct case_sink_class *class; /* Class. */ - void *aux; /* Auxiliary data. */ - struct casefile_factory *factory ; /* Factory used to create - the destination */ - size_t value_cnt; /* Number of `union value's in case. */ - }; - -/* A case sink class. */ -struct case_sink_class - { - const char *name; /* Identifying name. */ - - /* Opens the sink for writing. */ - void (*open) (struct case_sink *); - - /* Writes a case to the sink. */ - bool (*write) (struct case_sink *, const struct ccase *); - - /* Closes and destroys the sink. */ - void (*destroy) (struct case_sink *); - - /* Closes the sink and returns a source that can read back - the cases that were written, perhaps transformed in some - way. The sink must still be separately destroyed by - calling destroy(). */ - struct case_source *(*make_source) (struct case_sink *); - }; - -extern const struct case_sink_class null_sink_class; - -struct casefile_factory ; -struct case_sink *create_case_sink (const struct case_sink_class *, - const struct dictionary *, - struct casefile_factory *, - void *); -void free_case_sink (struct case_sink *); - -#endif /* case-sink.h */ diff --git a/src/data/case-source.c b/src/data/case-source.c deleted file mode 100644 index 542f3008..00000000 --- a/src/data/case-source.c +++ /dev/null @@ -1,62 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#include - -#include - -#include - -#include "xalloc.h" - -/* Creates a case source with class CLASS and auxiliary data AUX - and based on dictionary DICT. */ -struct case_source * -create_case_source (const struct case_source_class *class, - void *aux) -{ - struct case_source *source = xmalloc (sizeof *source); - source->class = class; - source->aux = aux; - return source; -} - -/* Destroys case source SOURCE. - Returns true if successful, - false if the source encountered an I/O error during - destruction or reading cases. */ -bool -free_case_source (struct case_source *source) -{ - bool ok = true; - if (source != NULL) - { - if (source->class->destroy != NULL) - ok = source->class->destroy (source); - free (source); - } - return ok; -} - -/* Returns true if CLASS is the class of SOURCE. */ -bool -case_source_is_class (const struct case_source *source, - const struct case_source_class *class) -{ - return source != NULL && source->class == class; -} diff --git a/src/data/case-source.h b/src/data/case-source.h deleted file mode 100644 index 833502cd..00000000 --- a/src/data/case-source.h +++ /dev/null @@ -1,61 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#ifndef CASE_SOURCE_H -#define CASE_SOURCE_H 1 - -#include - -struct ccase; - -/* A case source. */ -struct case_source - { - const struct case_source_class *class; /* Class. */ - void *aux; /* Auxiliary data. */ - }; - -/* A case source class. */ -struct case_source_class - { - const char *name; /* Identifying name. */ - - /* Returns the exact number of cases that READ will pass to - WRITE_CASE, if known, or -1 otherwise. */ - int (*count) (const struct case_source *); - - /* Reads one case into C. - Returns true if successful, false at end of file or if an - I/O error occurred. */ - bool (*read) (struct case_source *, struct ccase *); - - /* Destroys the source. - Returns true if successful read, false if an I/O occurred - during destruction or previously. */ - bool (*destroy) (struct case_source *); - }; - - -struct case_source *create_case_source (const struct case_source_class *, - void *); -bool free_case_source (struct case_source *); - -bool case_source_is_class (const struct case_source *, - const struct case_source_class *); - -#endif /* case-source.h */ diff --git a/src/data/casefile-factory.h b/src/data/casefile-factory.h deleted file mode 100644 index cc7423e9..00000000 --- a/src/data/casefile-factory.h +++ /dev/null @@ -1,28 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#ifndef CASEFILE_FACTORY_H -#define CASEFILE_FACTORY_H - -struct casefile_factory -{ - struct casefile * (*create_casefile) (struct casefile_factory *, size_t); -}; - -#endif - diff --git a/src/data/casefile-private.h b/src/data/casefile-private.h deleted file mode 100644 index 1375dd01..00000000 --- a/src/data/casefile-private.h +++ /dev/null @@ -1,102 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 2004, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#ifndef CASEFILE_PRIVATE_H -#define CASEFILE_PRIVATE_H - -#include -#include -#include - -struct ccase; -struct casereader; -struct casefile; -struct casefilter; - -struct class_casefile -{ - void (*destroy) (struct casefile *) ; - - bool (*error) (const struct casefile *) ; - - size_t (*get_value_cnt) (const struct casefile *) ; - unsigned long (*get_case_cnt) (const struct casefile *) ; - - struct casereader * (*get_reader) (const struct casefile *) ; - - bool (*append) (struct casefile *, const struct ccase *) ; - - - bool (*in_core) (const struct casefile *) ; - bool (*to_disk) (const struct casefile *) ; - bool (*sleep) (const struct casefile *) ; -}; - -struct casefile -{ - const struct class_casefile *class ; /* Class pointer */ - - struct ll_list reader_list ; /* List of our readers. */ - struct ll ll ; /* Element in the class' list - of casefiles. */ - bool being_destroyed; /* A destructive reader exists */ -}; - - -struct class_casereader -{ - struct ccase * (*get_next_case) (struct casereader *); - - unsigned long (*cnum) (const struct casereader *); - - void (*destroy) (struct casereader * r); - - struct casereader * (*clone) (const struct casereader *); -}; - - -#define CLASS_CASEREADER(K) ( (struct class_casereader *) K) - -struct casereader -{ - const struct class_casereader *class; /* Class pointer */ - - struct casefile *cf; /* The casefile to which this reader belongs */ - struct ll ll; /* Element in the casefile's list of readers */ - - struct casefilter *filter; /* The filter to be used */ - bool destructive; /* True if this reader is destructive */ -}; - - -#define CASEFILE(C) ( (struct casefile *) C) -#define CONST_CASEFILE(C) ( (const struct casefile *) C) - -#define CASEFILEREADER(CR) ((struct casereader *) CR) - - -/* Functions for implementations' use only */ - -void casefile_register (struct casefile *cf, - const struct class_casefile *k); - -void casereader_register (struct casefile *cf, - struct casereader *reader, - const struct class_casereader *k); - -#endif diff --git a/src/data/casefile.c b/src/data/casefile.c deleted file mode 100644 index d81ec29e..00000000 --- a/src/data/casefile.c +++ /dev/null @@ -1,344 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#include -#include -#include -#include -#include - -#include "case.h" -#include "casefile.h" -#include "casefile-private.h" -#include "casefilter.h" - - -struct ccase; - -/* A casefile is an abstract class representing an array of cases. In - general, cases are accessible sequentially, and are immutable once - appended to the casefile. However some implementations may provide - special methods for case mutation or random access. - - Use casefile_append or casefile_append_xfer to append a case to a - casefile. - - The casefile may be read sequentially, - starting from the beginning, by "casereaders". Any - number of casereaders may be created, at any time. - Each casereader has an independent position in the casefile. - - Casereaders may only move forward. They cannot move backward to - arbitrary records or seek randomly. Cloning casereaders is - possible, but it is not yet implemented. - - Use casereader_read() or casereader_read_xfer() to read - a case from a casereader. Use casereader_destroy() to - discard a casereader when it is no longer needed. - - When a casefile is no longer needed, it may be destroyed with - casefile_destroy(). This function will also destroy any - remaining casereaders. */ - -static struct ll_list all_casefiles = LL_INITIALIZER (all_casefiles); - -static struct casefile * -ll_to_casefile (const struct ll *ll) -{ - return ll_data (ll, struct casefile, ll); -} - -static struct casereader * -ll_to_casereader (const struct ll *ll) -{ - return ll_data (ll, struct casereader, ll); -} - - -/* atexit() handler that closes and deletes our temporary - files. */ -static void -exit_handler (void) -{ - while (!ll_is_empty (&all_casefiles)) - casefile_destroy (ll_to_casefile (ll_head (&all_casefiles))); -} - -/* Insert CF into the global list of casefiles */ -void -casefile_register (struct casefile *cf, const struct class_casefile *class) -{ - static bool initialised ; - if ( !initialised ) - { - atexit (exit_handler); - initialised = true; - } - - cf->class = class; - ll_push_head (&all_casefiles, &cf->ll); - ll_init (&cf->reader_list); -} - -/* Remove CF from the global list */ -static void -casefile_unregister(struct casefile *cf) -{ - ll_remove (&cf->ll); -} - -/* Return the casefile corresponding to this reader */ -struct casefile * -casereader_get_casefile (const struct casereader *r) -{ - return r->cf; -} - -/* Return the case number of the current case */ -unsigned long -casereader_cnum(const struct casereader *r) -{ - return r->class->cnum(r); -} - -static struct ccase * -get_next_case(struct casereader *reader) -{ - struct ccase *read_case = NULL; - struct casefile *cf = casereader_get_casefile (reader); - - do - { - if ( casefile_error (cf) ) - return NULL; - - read_case = reader->class->get_next_case (reader); - } - while ( read_case && reader->filter - && casefilter_skip_case (reader->filter, read_case) ) ; - - return read_case; -} - -/* Reads a copy of the next case from READER into C. - Caller is responsible for destroying C. - Returns true if successful, false at end of file. */ -bool -casereader_read (struct casereader *reader, struct ccase *c) -{ - struct ccase * read_case = get_next_case (reader) ; - - if ( NULL == read_case ) - return false; - - case_clone (c, read_case ); - - return true; -} - - -/* Reads the next case from READER into C and transfers ownership - to the caller. Caller is responsible for destroying C. - Returns true if successful, false at end of file or on I/O - error. */ -bool -casereader_read_xfer (struct casereader *reader, struct ccase *c) -{ - struct casefile *cf = casereader_get_casefile (reader); - struct ccase *read_case ; - case_nullify (c); - - read_case = get_next_case (reader) ; - - if ( NULL == read_case ) - return false; - - if ( reader->destructive && casefile_in_core (cf) ) - case_move (c, read_case); - else - case_clone (c, read_case); - - return true; -} - -/* Destroys R. */ -void -casereader_destroy (struct casereader *r) -{ - ll_remove (&r->ll); - - r->class->destroy(r); -} - -/* Creates a copy of R and returns it */ -struct casereader * -casereader_clone(const struct casereader *r) -{ - struct casereader *r2; - - /* Would we ever want to clone a destructive reader ?? */ - assert ( ! r->destructive ) ; - - r2 = r->class->clone (r); - - r2->filter = r->filter; - - return r2; -} - -/* Destroys casefile CF. */ -void -casefile_destroy(struct casefile *cf) -{ - if (!cf) return; - - assert(cf->class->destroy); - - while (!ll_is_empty (&cf->reader_list)) - casereader_destroy (ll_to_casereader (ll_head (&cf->reader_list))); - - casefile_unregister(cf); - - cf->class->destroy(cf); -} - -/* Returns true if an I/O error has occurred in casefile CF. */ -bool -casefile_error (const struct casefile *cf) -{ - return cf->class->error(cf); -} - -/* Returns the number of cases in casefile CF. */ -unsigned long -casefile_get_case_cnt (const struct casefile *cf) -{ - return cf->class->get_case_cnt(cf); -} - -/* Returns the number of `union value's in a case for CF. */ -size_t -casefile_get_value_cnt (const struct casefile *cf) -{ - return cf->class->get_value_cnt(cf); -} - -/* Creates and returns a casereader for CF. A casereader can be used to - sequentially read the cases in a casefile. */ -struct casereader * -casefile_get_reader (const struct casefile *cf, struct casefilter *filter) -{ - struct casereader *r = cf->class->get_reader(cf); - r->cf = (struct casefile *) cf; - r->filter = filter; - - assert (r->class); - - return r; -} - -/* Creates and returns a destructive casereader for CF. Like a - normal casereader, a destructive casereader sequentially reads - the cases in a casefile. Unlike a normal casereader, a - destructive reader cannot operate concurrently with any other - reader. (This restriction could be relaxed in a few ways, but - it is so far unnecessary for other code.) */ -struct casereader * -casefile_get_destructive_reader (struct casefile *cf) -{ - struct casereader *r = cf->class->get_reader (cf); - r->cf = cf; - r->destructive = true; - cf->being_destroyed = true; - - return r; -} - -/* Appends a copy of case C to casefile CF. - Returns true if successful, false if an I/O error occurred. */ -bool -casefile_append (struct casefile *cf, const struct ccase *c) -{ - assert (case_get_value_cnt (c) >= casefile_get_value_cnt (cf)); - - return cf->class->append(cf, c); -} - -/* Appends case C to casefile CF, which takes over ownership of - C. - Returns true if successful, false if an I/O error occurred. */ -bool -casefile_append_xfer (struct casefile *cf, struct ccase *c) -{ - assert (case_get_value_cnt (c) >= casefile_get_value_cnt (cf)); - - cf->class->append (cf, c); - case_destroy (c); - - return cf->class->error (cf); -} - - - - -/* Puts a casefile to "sleep", that is, minimizes the resources - needed for it by closing its file descriptor and freeing its - buffer. This is useful if we need so many casefiles that we - might not have enough memory and file descriptors to go - around. - - Implementations may choose to silently ignore this function. - - Returns true if successful, false if an I/O error occurred. */ -bool -casefile_sleep (const struct casefile *cf) -{ - return cf->class->sleep ? cf->class->sleep(cf) : true; -} - -/* Returns true only if casefile CF is stored in memory (instead of on - disk), false otherwise. -*/ -bool -casefile_in_core (const struct casefile *cf) -{ - return cf->class->in_core(cf); -} - -/* If CF is currently stored in memory, writes it to disk. Readers, if any, - retain their current positions. - - Implementations may choose to silently ignore this function. - - Returns true if successful, false if an I/O error occurred. */ -bool -casefile_to_disk (const struct casefile *cf) -{ - return cf->class->to_disk ? cf->class->to_disk(cf) : true; -} - -void -casereader_register(struct casefile *cf, - struct casereader *reader, - const struct class_casereader *class) -{ - reader->class = class; - reader->cf = cf; - - ll_push_head (&cf->reader_list, &reader->ll); -} diff --git a/src/data/casefile.h b/src/data/casefile.h deleted file mode 100644 index 8e765c95..00000000 --- a/src/data/casefile.h +++ /dev/null @@ -1,71 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 2004, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#ifndef CASEFILE_H -#define CASEFILE_H - -#include -#include -#include - - -struct ccase; -struct casereader; -struct casefile; -struct casefilter; - -/* Casereader functions */ - -struct casefile *casereader_get_casefile (const struct casereader *r); - -unsigned long casereader_cnum (const struct casereader *r); - -bool casereader_read (struct casereader *r, struct ccase *c); - -bool casereader_read_xfer (struct casereader *r, struct ccase *c); - -void casereader_destroy (struct casereader *r); - -struct casereader *casereader_clone(const struct casereader *r); - - -/* Casefile functions */ - -void casefile_destroy (struct casefile *cf); - -bool casefile_error (const struct casefile *cf); - -unsigned long casefile_get_case_cnt (const struct casefile *cf); - -size_t casefile_get_value_cnt (const struct casefile *cf); - -struct casereader *casefile_get_reader (const struct casefile *cf, struct casefilter *filter); - -struct casereader *casefile_get_destructive_reader (struct casefile *cf); - -bool casefile_append (struct casefile *cf, const struct ccase *c); - -bool casefile_append_xfer (struct casefile *cf, struct ccase *c); - -bool casefile_sleep (const struct casefile *cf); - -bool casefile_in_core (const struct casefile *cf); - -bool casefile_to_disk (const struct casefile *cf); - -#endif diff --git a/src/data/casefilter.c b/src/data/casefilter.c deleted file mode 100644 index 2c6336d6..00000000 --- a/src/data/casefilter.c +++ /dev/null @@ -1,110 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#include -#include -#include -#include "casefilter.h" -#include - -#include -#include -#include -#include - -struct casefilter - { - enum mv_class class; - - const struct variable **vars; - int n_vars; - }; - - -/* Returns true iff the entire case should be skipped */ -bool -casefilter_skip_case (const struct casefilter *filter, const struct ccase *c) -{ - int i; - - for (i = 0; i < filter->n_vars; ++i) - { - if ( casefilter_variable_missing (filter, c, filter->vars[i])) - return true; - } - - return false; -} - -/* Returns true iff the variable V in case C is missing */ -bool -casefilter_variable_missing (const struct casefilter *filter, - const struct ccase *c, - const struct variable *var) -{ - const union value *val = case_data (c, var) ; - return var_is_value_missing (var, val, filter->class); -} - -/* Create a new casefilter that drops cases in which any of the - N_VARS variables in VARS are in the given CLASS of missing values. - VARS is an array of variables which if *any* of them are missing. - N_VARS is the size of VARS. - */ -struct casefilter * -casefilter_create (enum mv_class class, const struct variable **vars, int n_vars) -{ - int i; - struct casefilter * filter = xmalloc (sizeof (*filter)) ; - - filter->class = class; - filter->vars = xnmalloc (n_vars, sizeof (*filter->vars) ); - - for ( i = 0 ; i < n_vars ; ++i ) - filter->vars[i] = vars[i]; - - filter->n_vars = n_vars ; - - return filter ; -} - - -/* Add the variables in VARS to the list of variables for which the - filter considers. N_VARS is the size of VARS */ -void -casefilter_add_variables (struct casefilter *filter, - const struct variable *const *vars, int n_vars) -{ - int i; - - filter->vars = xnrealloc (filter->vars, filter->n_vars + n_vars, - sizeof (*filter->vars) ); - - for ( i = 0 ; i < n_vars ; ++i ) - filter->vars[i + filter->n_vars] = vars[i]; - - filter->n_vars += n_vars ; -} - -/* Destroy the filter FILTER */ -void -casefilter_destroy (struct casefilter *filter) -{ - free (filter->vars); - free (filter); -} diff --git a/src/data/casefilter.h b/src/data/casefilter.h deleted file mode 100644 index 6afad6a9..00000000 --- a/src/data/casefilter.h +++ /dev/null @@ -1,56 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#if !casefilter_h -#define casefilter_h 1 - -#include -#include - -struct ccase; -struct casefilter; -struct variable ; - -/* Create a new casefilter that drops cases in which any of the - N_VARS variables in VARS are missing in the given CLASS. - VARS is an array of variables which if *any* of them are missing. - N_VARS is the size of VARS. - */ -struct casefilter * casefilter_create (enum mv_class class, - const struct variable **, int); - -/* Add the variables in VARS to the list of variables for which the - filter considers. N_VARS is the size of VARS */ -void casefilter_add_variables (struct casefilter *, - const struct variable *const*, int); - -/* Destroy the filter FILTER */ -void casefilter_destroy (struct casefilter *); - -/* Returns true iff the entire case should be skipped */ -bool casefilter_skip_case (const struct casefilter *, const struct ccase *); - -/* Returns true iff the variable V in case C is missing. - Note that this function's behaviour is independent of the set of - variables contained by the filter. - */ -bool casefilter_variable_missing (const struct casefilter *f, - const struct ccase *c, - const struct variable *v); - -#endif diff --git a/src/data/dictionary.c b/src/data/dictionary.c index d77d9fdd..34ffc6af 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -718,7 +718,7 @@ dict_get_case_weight (const struct dictionary *d, const struct ccase *c, double w = case_num (c, d->weight); if (w < 0.0 || var_is_num_missing (d->weight, w, MV_ANY)) w = 0.0; - if ( w == 0.0 && *warn_on_invalid ) { + if ( w == 0.0 && warn_on_invalid != NULL && *warn_on_invalid ) { *warn_on_invalid = false; msg (SW, _("At least one case in the data file had a weight value " "that was user-missing, system-missing, zero, or " diff --git a/src/data/por-file-reader.c b/src/data/por-file-reader.c index a9b1de81..2c99bde6 100644 --- a/src/data/por-file-reader.c +++ b/src/data/por-file-reader.c @@ -20,29 +20,32 @@ #include #include "por-file-reader.h" -#include -#include -#include -#include + #include #include #include #include -#include +#include #include -#include "case.h" +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include "dictionary.h" -#include "file-handle-def.h" -#include "format.h" -#include "missing-values.h" #include #include +#include #include #include #include -#include "value-labels.h" -#include "variable.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -71,10 +74,12 @@ struct pfm_reader int var_cnt; /* Number of variables. */ int weight_index; /* 0-based index of weight variable, or -1. */ int *widths; /* Variable widths, 0 for numeric. */ - int value_cnt; /* Number of `value's per case. */ + size_t value_cnt; /* Number of `value's per case. */ bool ok; /* Set false on I/O error. */ }; +static struct casereader_class por_file_casereader_class; + static void error (struct pfm_reader *r, const char *msg,...) PRINTF_FORMAT (2, 3) @@ -110,11 +115,11 @@ error (struct pfm_reader *r, const char *msg, ...) } /* Closes portable file reader R, after we're done with it. */ -void -pfm_close_reader (struct pfm_reader *r) +static void +por_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) { - if (r != NULL) - pool_destroy (r->pool); + struct pfm_reader *r = r_; + pool_destroy (r->pool); } /* Read a single character into cur_char. */ @@ -156,7 +161,7 @@ void dump_dictionary (struct dictionary *); /* Reads the dictionary from file with handle H, and returns it in a dictionary structure. This dictionary may be modified in order to rename, reorder, and delete variables, etc. */ -struct pfm_reader * +struct casereader * pfm_open_reader (struct file_handle *fh, struct dictionary **dict, struct pfm_read_info *info) { @@ -204,10 +209,12 @@ pfm_open_reader (struct file_handle *fh, struct dictionary **dict, if (!match (r, 'F')) error (r, _("Data record expected.")); - return r; + r->value_cnt = dict_get_next_value_idx (*dict); + return casereader_create_sequential (NULL, r->value_cnt, CASENUMBER_MAX, + &por_file_casereader_class, r); error: - pfm_close_reader (r); + pool_destroy (r->pool); dict_destroy (*dict); *dict = NULL; return NULL; @@ -677,19 +684,28 @@ read_value_label (struct pfm_reader *r, struct dictionary *dict) } /* Reads one case from portable file R into C. */ -bool -pfm_read_case (struct pfm_reader *r, struct ccase *c) +static bool +por_file_casereader_read (struct casereader *reader, void *r_, struct ccase *c) { + struct pfm_reader *r = r_; size_t i; size_t idx; + case_create (c, casereader_get_value_cnt (reader)); setjmp (r->bail_out); - if (!r->ok) - return false; + if (!r->ok) + { + casereader_force_error (reader); + case_destroy (c); + return false; + } /* Check for end of file. */ - if (r->cc == 'Z') - return false; + if (r->cc == 'Z') + { + case_destroy (c); + return false; + } idx = 0; for (i = 0; i < r->var_cnt; i++) @@ -713,14 +729,6 @@ pfm_read_case (struct pfm_reader *r, struct ccase *c) return true; } -/* Returns true if an I/O error has occurred on READER, false - otherwise. */ -bool -pfm_read_error (const struct pfm_reader *reader) -{ - return !reader->ok; -} - /* Returns true if FILE is an SPSS portable file, false otherwise. */ bool @@ -755,3 +763,11 @@ pfm_detect (FILE *file) return true; } + +static struct casereader_class por_file_casereader_class = + { + por_file_casereader_read, + por_file_casereader_destroy, + NULL, + NULL, + }; diff --git a/src/data/por-file-reader.h b/src/data/por-file-reader.h index 50ce46b5..81221044 100644 --- a/src/data/por-file-reader.h +++ b/src/data/por-file-reader.h @@ -37,12 +37,9 @@ struct pfm_read_info struct dictionary; struct file_handle; struct ccase; -struct pfm_reader *pfm_open_reader (struct file_handle *, +struct casereader *pfm_open_reader (struct file_handle *, struct dictionary **, struct pfm_read_info *); -bool pfm_read_case (struct pfm_reader *, struct ccase *); -bool pfm_read_error (const struct pfm_reader *); -void pfm_close_reader (struct pfm_reader *); bool pfm_detect (FILE *); #endif /* por-file-reader.h */ diff --git a/src/data/por-file-writer.c b/src/data/por-file-writer.c index d9f16ef4..bab453d0 100644 --- a/src/data/por-file-writer.c +++ b/src/data/por-file-writer.c @@ -30,13 +30,15 @@ #include #include -#include "case.h" -#include "dictionary.h" -#include "file-handle-def.h" -#include "format.h" -#include "missing-values.h" -#include "value-labels.h" -#include "variable.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -70,6 +72,9 @@ struct pfm_var int fv; /* Starting case index. */ }; +static struct casewriter_class por_file_casewriter_class; + +static bool close_writer (struct pfm_writer *); static void buf_write (struct pfm_writer *, const void *, size_t); static void write_header (struct pfm_writer *); static void write_version_data (struct pfm_writer *); @@ -94,7 +99,7 @@ pfm_writer_default_options (void) /* Writes the dictionary DICT to portable file HANDLE according to the given OPTS. Returns nonzero only if successful. DICT will not be modified, except to assign short names. */ -struct pfm_writer * +struct casewriter * pfm_open_writer (struct file_handle *fh, struct dictionary *dict, struct pfm_write_options opts) { @@ -153,12 +158,12 @@ pfm_open_writer (struct file_handle *fh, struct dictionary *dict, write_variables (w, dict); write_value_labels (w, dict); buf_write (w, "F", 1); - if (pfm_write_error (w)) + if (ferror (w->file)) goto error; - return w; + return casewriter_create (&por_file_casewriter_class, w); error: - pfm_close_writer (w); + close_writer (w); return NULL; open_error: @@ -356,6 +361,7 @@ write_variables (struct pfm_writer *w, struct dictionary *dict) write_value (w, &value, v); } + /* Write variable label. */ if (var_get_label (v) != NULL) { buf_write (w, "C", 1); @@ -394,41 +400,47 @@ write_value_labels (struct pfm_writer *w, const struct dictionary *dict) } } -/* Writes case ELEM to the portable file represented by H. */ -int -pfm_write_case (struct pfm_writer *w, const struct ccase *c) +/* Writes case C to the portable file represented by H. */ +static void +por_file_casewriter_write (struct casewriter *writer, void *w_, + struct ccase *c) { + struct pfm_writer *w = w_; int i; - if (ferror (w->file)) - return 0; - - for (i = 0; i < w->var_cnt; i++) + if (!ferror (w->file)) { - struct pfm_var *v = &w->vars[i]; + for (i = 0; i < w->var_cnt; i++) + { + struct pfm_var *v = &w->vars[i]; - if (v->width == 0) - write_float (w, case_num_idx (c, v->fv)); - else - { - write_int (w, v->width); - buf_write (w, case_str_idx (c, v->fv), v->width); - } + if (v->width == 0) + write_float (w, case_num_idx (c, v->fv)); + else + { + write_int (w, v->width); + buf_write (w, case_str_idx (c, v->fv), v->width); + } + } } - - return !pfm_write_error (w); + else + casewriter_force_error (writer); + + case_destroy (c); } -bool -pfm_write_error (const struct pfm_writer *w) +static void +por_file_casewriter_destroy (struct casewriter *writer, void *w_) { - return ferror (w->file); + struct pfm_writer *w = w_; + if (!close_writer (w)) + casewriter_force_error (writer); } /* Closes a portable file after we're done with it. Returns true if successful, false if an I/O error occurred. */ -bool -pfm_close_writer (struct pfm_writer *w) +static bool +close_writer (struct pfm_writer *w) { bool ok; @@ -442,7 +454,7 @@ pfm_close_writer (struct pfm_writer *w) memset (buf, 'Z', sizeof buf); buf_write (w, buf, w->lc >= 80 ? 80 : 80 - w->lc); - ok = !pfm_write_error (w); + ok = !ferror (w->file); if (fclose (w->file) == EOF) ok = false; @@ -844,3 +856,10 @@ format_trig_double (long double value, int base_10_precision, char output[]) strcpy (output, "*."); return; } + +static struct casewriter_class por_file_casewriter_class = + { + por_file_casewriter_write, + por_file_casewriter_destroy, + NULL, + }; diff --git a/src/data/por-file-writer.h b/src/data/por-file-writer.h index e1882882..6732abde 100644 --- a/src/data/por-file-writer.h +++ b/src/data/por-file-writer.h @@ -41,12 +41,8 @@ struct pfm_write_options struct file_handle; struct dictionary; struct ccase; -struct pfm_writer *pfm_open_writer (struct file_handle *, struct dictionary *, +struct casewriter *pfm_open_writer (struct file_handle *, struct dictionary *, struct pfm_write_options); struct pfm_write_options pfm_writer_default_options (void); -int pfm_write_case (struct pfm_writer *, const struct ccase *); -bool pfm_write_error (const struct pfm_writer *); -bool pfm_close_writer (struct pfm_writer *); - #endif /* por-file-writer.h */ diff --git a/src/data/procedure.c b/src/data/procedure.c index 7a9b4321..46a18bb4 100644 --- a/src/data/procedure.c +++ b/src/data/procedure.c @@ -23,48 +23,50 @@ #include #include -#include -#include #include -#include -#include +#include +#include +#include +#include #include #include #include -#include #include #include #include #include #include #include +#include struct dataset { - - /* An abstract factory which creates casefiles */ - struct casefile_factory *cf_factory; - - /* Callback which occurs when a procedure provides a new source for - the dataset */ - replace_source_callback *replace_source ; - - /* Callback which occurs whenever the DICT is replaced by a new one */ - replace_dictionary_callback *replace_dict; - - /* Cases are read from proc_source, + /* Cases are read from source, + their transformation variables are initialized, pass through permanent_trns_chain (which transforms them into the format described by permanent_dict), - are written to proc_sink, + are written to sink, pass through temporary_trns_chain (which transforms them into the format described by dict), and are finally passed to the procedure. */ - struct case_source *proc_source; + struct casereader *source; + struct caseinit *caseinit; struct trns_chain *permanent_trns_chain; struct dictionary *permanent_dict; - struct case_sink *proc_sink; + struct casewriter *sink; struct trns_chain *temporary_trns_chain; struct dictionary *dict; + /* Callback which occurs when a procedure provides a new source for + the dataset */ + replace_source_callback *replace_source ; + + /* Callback which occurs whenever the DICT is replaced by a new one */ + replace_dictionary_callback *replace_dict; + + /* If true, cases are discarded instead of being written to + sink. */ + bool discard_output; + /* The transformation chain that the next transformation will be added to. */ struct trns_chain *cur_trns_chain; @@ -82,26 +84,22 @@ struct dataset { struct ccase *lag_cases; /* Lagged cases managed by deque. */ /* Procedure data. */ - bool is_open; /* Procedure open? */ - struct ccase trns_case; /* Case used for transformations. */ - struct ccase sink_case; /* Case written to sink, if - compacting is necessary. */ + enum + { + PROC_COMMITTED, + PROC_OPEN, + PROC_CLOSED + } + proc_state; size_t cases_written; /* Cases output so far. */ - bool ok; + bool ok; /* Error status. */ }; /* struct dataset */ static void add_case_limit_trns (struct dataset *ds); static void add_filter_trns (struct dataset *ds); -static bool internal_procedure (struct dataset *ds, case_func *, - end_func *, - void *aux); static void update_last_proc_invocation (struct dataset *ds); -static void create_trns_case (struct ccase *, struct dictionary *); -static void open_active_file (struct dataset *ds); -static void clear_case (const struct dataset *ds, struct ccase *c); -static bool close_active_file (struct dataset *ds); /* Public functions. */ @@ -116,146 +114,89 @@ time_of_last_procedure (struct dataset *ds) /* Regular procedure. */ - - -/* Reads the data from the input program and writes it to a new - active file. For each case we read from the input program, we - do the following: - - 1. Execute permanent transformations. If these drop the case, - start the next case from step 1. - - 2. Write case to replacement active file. - - 3. Execute temporary transformations. If these drop the case, - start the next case from step 1. - - 4. Pass case to PROC_FUNC, passing AUX as auxiliary data. - - Returns true if successful, false if an I/O error occurred. */ +/* Executes any pending transformations, if necessary. + This is not identical to the EXECUTE command in that it won't + always read the source data. This can be important when the + source data is given inline within BEGIN DATA...END FILE. */ bool -procedure (struct dataset *ds, case_func *cf, void *aux) +proc_execute (struct dataset *ds) { - update_last_proc_invocation (ds); + bool ok; - /* Optimize the trivial case where we're not going to do - anything with the data, by not reading the data at all. */ - if (cf == NULL - && case_source_is_class (ds->proc_source, &storage_source_class) - && ds->proc_sink == NULL - && (ds->temporary_trns_chain == NULL - || trns_chain_is_empty (ds->temporary_trns_chain)) + if ((ds->temporary_trns_chain == NULL + || trns_chain_is_empty (ds->temporary_trns_chain)) && trns_chain_is_empty (ds->permanent_trns_chain)) { ds->n_lag = 0; + ds->discard_output = false; dict_set_case_limit (ds->dict, 0); dict_clear_vectors (ds->dict); return true; } - return internal_procedure (ds, cf, NULL, aux); + ok = casereader_destroy (proc_open (ds)); + return proc_commit (ds) && ok; } - -/* Multipass procedure. */ -struct multipass_aux_data - { - struct casefile *casefile; +static struct casereader_class proc_casereader_class; - bool (*proc_func) (const struct casefile *, void *aux); - void *aux; - }; - -/* Case processing function for multipass_procedure(). */ -static bool -multipass_case_func (const struct ccase *c, void *aux_data_, const struct dataset *ds UNUSED) -{ - struct multipass_aux_data *aux_data = aux_data_; - return casefile_append (aux_data->casefile, c); -} - -/* End-of-file function for multipass_procedure(). */ -static bool -multipass_end_func (void *aux_data_, const struct dataset *ds UNUSED) -{ - struct multipass_aux_data *aux_data = aux_data_; - return (aux_data->proc_func == NULL - || aux_data->proc_func (aux_data->casefile, aux_data->aux)); -} - -/* Procedure that allows multiple passes over the input data. - The entire active file is passed to PROC_FUNC, with the given - AUX as auxiliary data, as a unit. */ -bool -multipass_procedure (struct dataset *ds, casefile_func *proc_func, void *aux) +/* Opens dataset DS for reading cases with proc_read. + proc_commit must be called when done. */ +struct casereader * +proc_open (struct dataset *ds) { - struct multipass_aux_data aux_data; - bool ok; + assert (ds->source != NULL); + assert (ds->proc_state == PROC_COMMITTED); - aux_data.casefile = - ds->cf_factory->create_casefile (ds->cf_factory, - dict_get_next_value_idx (ds->dict)); - - aux_data.proc_func = proc_func; - aux_data.aux = aux; - - ok = internal_procedure (ds, multipass_case_func, multipass_end_func, &aux_data); - ok = !casefile_error (aux_data.casefile) && ok; - - casefile_destroy (aux_data.casefile); - - return ok; -} - + update_last_proc_invocation (ds); -/* Procedure implementation. */ + caseinit_mark_for_init (ds->caseinit, ds->dict); -/* Executes a procedure. - Passes each case to CASE_FUNC. - Calls END_FUNC after the last case. - Returns true if successful, false if an I/O error occurred (or - if CASE_FUNC or END_FUNC ever returned false). */ -static bool -internal_procedure (struct dataset *ds, case_func *proc, - end_func *end, - void *aux) -{ - struct ccase *c; - bool ok = true; + /* Finish up the collection of transformations. */ + add_case_limit_trns (ds); + add_filter_trns (ds); + trns_chain_finalize (ds->cur_trns_chain); - proc_open (ds); - while (ok && proc_read (ds, &c)) - if (proc != NULL) - ok = proc (c, aux, ds) && ok; - if (end != NULL) - ok = end (aux, ds) && ok; + /* Make permanent_dict refer to the dictionary right before + data reaches the sink. */ + if (ds->permanent_dict == NULL) + ds->permanent_dict = ds->dict; - if ( proc_close (ds) && ok ) + /* Prepare sink. */ + if (!ds->discard_output) { - - return true; + ds->compactor = (dict_compacting_would_shrink (ds->permanent_dict) + ? dict_make_compactor (ds->permanent_dict) + : NULL); + ds->sink = autopaging_writer_create (dict_get_compacted_value_cnt ( + ds->permanent_dict)); + } + else + { + ds->compactor = NULL; + ds->sink = NULL; } - return false; -} - -/* Opens dataset DS for reading cases with proc_read. - proc_close must be called when done. */ -void -proc_open (struct dataset *ds) -{ - assert (ds->proc_source != NULL); - assert (!ds->is_open); - - update_last_proc_invocation (ds); - - open_active_file (ds); + /* Allocate memory for lagged cases. */ + ds->lag_cases = deque_init (&ds->lag, ds->n_lag, sizeof *ds->lag_cases); - ds->is_open = true; - create_trns_case (&ds->trns_case, ds->dict); - case_create (&ds->sink_case, dict_get_compacted_value_cnt (ds->dict)); + ds->proc_state = PROC_OPEN; ds->cases_written = 0; ds->ok = true; + + /* FIXME: use taint in dataset in place of `ok'? */ + /* FIXME: for trivial cases we can just return a clone of + ds->source? */ + return casereader_create_sequential (NULL, + dict_get_next_value_idx (ds->dict), + CASENUMBER_MAX, + &proc_casereader_class, ds); +} + +bool +proc_is_open (const struct dataset *ds) +{ + return ds->proc_state != PROC_COMMITTED; } /* Reads the next case from dataset DS, which must have been @@ -264,14 +205,15 @@ proc_open (struct dataset *ds) case is stored in *C. Return false at end of file or if a read error occurs. In this case a null pointer is stored in *C. */ -bool -proc_read (struct dataset *ds, struct ccase **c) +static bool +proc_casereader_read (struct casereader *reader UNUSED, void *ds_, + struct ccase *c) { + struct dataset *ds = ds_; enum trns_result retval = TRNS_DROP_CASE; - assert (ds->is_open); - *c = NULL; - for (;;) + assert (ds->proc_state == PROC_OPEN); + for (;;) { size_t case_nr; @@ -281,51 +223,59 @@ proc_read (struct dataset *ds, struct ccase **c) if (!ds->ok) return false; - /* Read a case from proc_source. */ - clear_case (ds, &ds->trns_case); - if (!ds->proc_source->class->read (ds->proc_source, &ds->trns_case)) + /* Read a case from source. */ + if (!casereader_read (ds->source, c)) return false; + case_resize (c, dict_get_next_value_idx (ds->dict)); + caseinit_init_reinit_vars (ds->caseinit, c); + caseinit_init_left_vars (ds->caseinit, c); /* Execute permanent transformations. */ case_nr = ds->cases_written + 1; retval = trns_chain_execute (ds->permanent_trns_chain, TRNS_CONTINUE, - &ds->trns_case, &case_nr); - if (retval != TRNS_CONTINUE) - continue; - + c, &case_nr); + caseinit_update_left_vars (ds->caseinit, c); + if (retval != TRNS_CONTINUE) + { + case_destroy (c); + continue; + } + /* Write case to collection of lagged cases. */ if (ds->n_lag > 0) { while (deque_count (&ds->lag) >= ds->n_lag) case_destroy (&ds->lag_cases[deque_pop_back (&ds->lag)]); - case_clone (&ds->lag_cases[deque_push_front (&ds->lag)], - &ds->trns_case); + case_clone (&ds->lag_cases[deque_push_front (&ds->lag)], c); } /* Write case to replacement active file. */ ds->cases_written++; - if (ds->proc_sink->class->write != NULL) + if (ds->sink != NULL) { - if (ds->compactor != NULL) + struct ccase tmp; + if (ds->compactor != NULL) { - dict_compactor_compact (ds->compactor, &ds->sink_case, - &ds->trns_case); - ds->proc_sink->class->write (ds->proc_sink, &ds->sink_case); + case_create (&tmp, dict_get_compacted_value_cnt (ds->dict)); + dict_compactor_compact (ds->compactor, &tmp, c); } else - ds->proc_sink->class->write (ds->proc_sink, &ds->trns_case); + case_clone (&tmp, c); + casewriter_write (ds->sink, &tmp); } /* Execute temporary transformations. */ if (ds->temporary_trns_chain != NULL) { retval = trns_chain_execute (ds->temporary_trns_chain, TRNS_CONTINUE, - &ds->trns_case, &ds->cases_written); + c, &ds->cases_written); if (retval != TRNS_CONTINUE) - continue; + { + case_destroy (c); + continue; + } } - *c = &ds->trns_case; return true; } } @@ -335,120 +285,35 @@ proc_read (struct dataset *ds, struct ccase **c) while reading or closing the data set. If DS has not been opened, returns true without doing anything else. */ -bool -proc_close (struct dataset *ds) -{ - if (!ds->is_open) - return true; - - /* Drain any remaining cases. */ - while (ds->ok) - { - struct ccase *c; - if (!proc_read (ds, &c)) - break; - } - ds->ok = free_case_source (ds->proc_source) && ds->ok; - proc_set_source (ds, NULL); - - case_destroy (&ds->sink_case); - case_destroy (&ds->trns_case); - - ds->ok = close_active_file (ds) && ds->ok; - ds->is_open = false; - - return ds->ok; -} - -/* Updates last_proc_invocation. */ -static void -update_last_proc_invocation (struct dataset *ds) -{ - ds->last_proc_invocation = time (NULL); -} - -/* Creates and returns a case, initializing it from the vectors - that say which `value's need to be initialized just once, and - which ones need to be re-initialized before every case. */ static void -create_trns_case (struct ccase *trns_case, struct dictionary *dict) +proc_casereader_destroy (struct casereader *reader, void *ds_) { - size_t var_cnt = dict_get_var_cnt (dict); - size_t i; + struct dataset *ds = ds_; + struct ccase c; - case_create (trns_case, dict_get_next_value_idx (dict)); - for (i = 0; i < var_cnt; i++) - { - struct variable *v = dict_get_var (dict, i); - union value *value = case_data_rw (trns_case, v); + /* Make sure transformations happen for every input case, in + case they have side effects, and ensure that the replacement + active file gets all the cases it should. */ + while (casereader_read (reader, &c)) + case_destroy (&c); - if (var_is_numeric (v)) - value->f = var_get_leave (v) ? 0.0 : SYSMIS; - else - memset (value->s, ' ', var_get_width (v)); - } + ds->proc_state = PROC_CLOSED; + ds->ok = casereader_destroy (ds->source) && ds->ok; + ds->source = NULL; + proc_set_active_file_data (ds, NULL); } -/* Makes all preparations for reading from the data source and writing - to the data sink. */ -static void -open_active_file (struct dataset *ds) -{ - add_case_limit_trns (ds); - add_filter_trns (ds); - - /* Finalize transformations. */ - trns_chain_finalize (ds->cur_trns_chain); - - /* Make permanent_dict refer to the dictionary right before - data reaches the sink. */ - if (ds->permanent_dict == NULL) - ds->permanent_dict = ds->dict; - - /* Figure out whether to compact. */ - ds->compactor = - (dict_compacting_would_shrink (ds->permanent_dict) - ? dict_make_compactor (ds->permanent_dict) - : NULL); - - /* Prepare sink. */ - if (ds->proc_sink == NULL) - ds->proc_sink = create_case_sink (&storage_sink_class, - ds->permanent_dict, - ds->cf_factory, - NULL); - if (ds->proc_sink->class->open != NULL) - ds->proc_sink->class->open (ds->proc_sink); - - /* Allocate memory for lagged cases. */ - ds->lag_cases = deque_init (&ds->lag, ds->n_lag, sizeof *ds->lag_cases); -} - -/* Clears the variables in C that need to be cleared between - processing cases. */ -static void -clear_case (const struct dataset *ds, struct ccase *c) +/* Must return false if the source casereader, a transformation, + or the sink casewriter signaled an error. (If a temporary + transformation signals an error, then the return value is + false, but the replacement active file may still be + untainted.) */ +bool +proc_commit (struct dataset *ds) { - size_t var_cnt = dict_get_var_cnt (ds->dict); - size_t i; + assert (ds->proc_state == PROC_CLOSED); + ds->proc_state = PROC_COMMITTED; - for (i = 0; i < var_cnt; i++) - { - struct variable *v = dict_get_var (ds->dict, i); - if (!var_get_leave (v)) - { - if (var_is_numeric (v)) - case_data_rw (c, v)->f = SYSMIS; - else - memset (case_data_rw (c, v)->s, ' ', var_get_width (v)); - } - } -} - -/* Closes the active file. */ -static bool -close_active_file (struct dataset *ds) -{ /* Free memory for lagged cases. */ while (!deque_is_empty (&ds->lag)) case_destroy (&ds->lag_cases[deque_pop_back (&ds->lag)]); @@ -457,23 +322,49 @@ close_active_file (struct dataset *ds) /* Dictionary from before TEMPORARY becomes permanent. */ proc_cancel_temporary_transformations (ds); - /* Finish compacting. */ - if (ds->compactor != NULL) + if (!ds->discard_output) { - dict_compactor_destroy (ds->compactor); - dict_compact_values (ds->dict); - ds->compactor = NULL; + /* Finish compacting. */ + if (ds->compactor != NULL) + { + dict_compactor_destroy (ds->compactor); + dict_compact_values (ds->dict); + ds->compactor = NULL; + } + + /* Old data sink becomes new data source. */ + if (ds->sink != NULL) + ds->source = casewriter_make_reader (ds->sink); } + else + { + ds->source = NULL; + ds->discard_output = false; + } + ds->sink = NULL; + if ( ds->replace_source) ds->replace_source (ds->source); - /* Old data sink becomes new data source. */ - if (ds->proc_sink->class->make_source != NULL) - proc_set_source (ds, ds->proc_sink->class->make_source (ds->proc_sink) ); - free_case_sink (ds->proc_sink); - ds->proc_sink = NULL; + caseinit_clear (ds->caseinit); + caseinit_mark_as_preinited (ds->caseinit, ds->dict); dict_clear_vectors (ds->dict); ds->permanent_dict = NULL; - return proc_cancel_all_transformations (ds); + return proc_cancel_all_transformations (ds) && ds->ok; +} + +static struct casereader_class proc_casereader_class = + { + proc_casereader_read, + proc_casereader_destroy, + NULL, + NULL, + }; + +/* Updates last_proc_invocation. */ +static void +update_last_proc_invocation (struct dataset *ds) +{ + ds->last_proc_invocation = time (NULL); } /* Returns a pointer to the lagged case from N_BEFORE cases before the @@ -490,218 +381,6 @@ lagged_case (const struct dataset *ds, int n_before) return NULL; } -/* Procedure that separates the data into SPLIT FILE groups. */ - -/* Represents auxiliary data for handling SPLIT FILE. */ -struct split_aux_data - { - struct dataset *dataset; /* The dataset */ - struct ccase prev_case; /* Data in previous case. */ - - /* Callback functions. */ - begin_func *begin; - case_func *proc; - end_func *end; - void *func_aux; - }; - -static int equal_splits (const struct ccase *, const struct ccase *, const struct dataset *ds); -static bool split_procedure_case_func (const struct ccase *c, void *, const struct dataset *); -static bool split_procedure_end_func (void *, const struct dataset *); - -/* Like procedure(), but it automatically breaks the case stream - into SPLIT FILE break groups. Before each group of cases with - identical SPLIT FILE variable values, BEGIN_FUNC is called - with the first case in the group. - Then PROC_FUNC is called for each case in the group (including - the first). - END_FUNC is called when the group is finished. FUNC_AUX is - passed to each of the functions as auxiliary data. - - If the active file is empty, none of BEGIN_FUNC, PROC_FUNC, - and END_FUNC will be called at all. - - If SPLIT FILE is not in effect, then there is one break group - (if the active file is nonempty), and BEGIN_FUNC and END_FUNC - will be called once. - - Returns true if successful, false if an I/O error occurred. */ -bool -procedure_with_splits (struct dataset *ds, - begin_func begin, - case_func *proc, - end_func *end, - void *func_aux) -{ - struct split_aux_data split_aux; - bool ok; - - case_nullify (&split_aux.prev_case); - split_aux.begin = begin; - split_aux.proc = proc; - split_aux.end = end; - split_aux.func_aux = func_aux; - split_aux.dataset = ds; - - ok = internal_procedure (ds, split_procedure_case_func, - split_procedure_end_func, &split_aux); - - case_destroy (&split_aux.prev_case); - - return ok; -} - -/* Case callback used by procedure_with_splits(). */ -static bool -split_procedure_case_func (const struct ccase *c, void *split_aux_, const struct dataset *ds) -{ - struct split_aux_data *split_aux = split_aux_; - - /* Start a new series if needed. */ - if (case_is_null (&split_aux->prev_case) - || !equal_splits (c, &split_aux->prev_case, split_aux->dataset)) - { - if (!case_is_null (&split_aux->prev_case) && split_aux->end != NULL) - split_aux->end (split_aux->func_aux, ds); - - case_destroy (&split_aux->prev_case); - case_clone (&split_aux->prev_case, c); - - if (split_aux->begin != NULL) - split_aux->begin (&split_aux->prev_case, split_aux->func_aux, ds); - } - - return (split_aux->proc == NULL - || split_aux->proc (c, split_aux->func_aux, ds)); -} - -/* End-of-file callback used by procedure_with_splits(). */ -static bool -split_procedure_end_func (void *split_aux_, const struct dataset *ds) -{ - struct split_aux_data *split_aux = split_aux_; - - if (!case_is_null (&split_aux->prev_case) && split_aux->end != NULL) - split_aux->end (split_aux->func_aux, ds); - return true; -} - -/* Compares the SPLIT FILE variables in cases A and B and returns - nonzero only if they differ. */ -static int -equal_splits (const struct ccase *a, const struct ccase *b, - const struct dataset *ds) -{ - return case_compare (a, b, - dict_get_split_vars (ds->dict), - dict_get_split_cnt (ds->dict)) == 0; -} - -/* Multipass procedure that separates the data into SPLIT FILE - groups. */ - -/* Represents auxiliary data for handling SPLIT FILE in a - multipass procedure. */ -struct multipass_split_aux_data - { - struct dataset *dataset; /* The dataset of the split */ - struct ccase prev_case; /* Data in previous case. */ - struct casefile *casefile; /* Accumulates data for a split. */ - split_func *split; /* Function to call with the accumulated - data. */ - void *func_aux; /* Auxiliary data. */ - }; - -static bool multipass_split_case_func (const struct ccase *c, void *aux_, const struct dataset *); -static bool multipass_split_end_func (void *aux_, const struct dataset *ds); -static bool multipass_split_output (struct multipass_split_aux_data *, const struct dataset *ds); - -/* Returns true if successful, false if an I/O error occurred. */ -bool -multipass_procedure_with_splits (struct dataset *ds, - split_func *split, - void *func_aux) -{ - struct multipass_split_aux_data aux; - bool ok; - - case_nullify (&aux.prev_case); - aux.casefile = NULL; - aux.split = split; - aux.func_aux = func_aux; - aux.dataset = ds; - - ok = internal_procedure (ds, multipass_split_case_func, - multipass_split_end_func, &aux); - case_destroy (&aux.prev_case); - - return ok; -} - -/* Case callback used by multipass_procedure_with_splits(). */ -static bool -multipass_split_case_func (const struct ccase *c, void *aux_, const struct dataset *ds) -{ - struct multipass_split_aux_data *aux = aux_; - bool ok = true; - - /* Start a new series if needed. */ - if (aux->casefile == NULL || ! equal_splits (c, &aux->prev_case, ds)) - { - /* Record split values. */ - case_destroy (&aux->prev_case); - case_clone (&aux->prev_case, c); - - /* Pass any cases to split_func. */ - if (aux->casefile != NULL) - ok = multipass_split_output (aux, ds); - - /* Start a new casefile. */ - aux->casefile = - ds->cf_factory->create_casefile (ds->cf_factory, - dict_get_next_value_idx (ds->dict)); - } - - return casefile_append (aux->casefile, c) && ok; -} - -/* End-of-file callback used by multipass_procedure_with_splits(). */ -static bool -multipass_split_end_func (void *aux_, const struct dataset *ds) -{ - struct multipass_split_aux_data *aux = aux_; - return (aux->casefile == NULL || multipass_split_output (aux, ds)); -} - -static bool -multipass_split_output (struct multipass_split_aux_data *aux, const struct dataset *ds) -{ - bool ok; - - assert (aux->casefile != NULL); - ok = aux->split (&aux->prev_case, aux->casefile, aux->func_aux, ds); - casefile_destroy (aux->casefile); - aux->casefile = NULL; - - return ok; -} - -/* Discards all the current state in preparation for a data-input - command like DATA LIST or GET. */ -void -discard_variables (struct dataset *ds) -{ - dict_clear (ds->dict); - fh_set_default_handle (NULL); - - ds->n_lag = 0; - - free_case_source (ds->proc_source); - proc_set_source (ds, NULL); - - proc_cancel_all_transformations (ds); -} - /* Returns the current set of permanent transformations, and clears the permanent transformations. For use by INPUT PROGRAM. */ @@ -804,8 +483,10 @@ proc_cancel_temporary_transformations (struct dataset *ds) { if (proc_in_temporary_transformations (ds)) { - dataset_set_dict (ds, ds->permanent_dict); + dict_destroy (ds->dict); + ds->dict = ds->permanent_dict; ds->permanent_dict = NULL; + if (ds->replace_dict) ds->replace_dict (ds->dict); trns_chain_destroy (ds->temporary_trns_chain); ds->temporary_trns_chain = NULL; @@ -822,6 +503,7 @@ bool proc_cancel_all_transformations (struct dataset *ds) { bool ok; + assert (ds->proc_state == PROC_COMMITTED); ok = trns_chain_destroy (ds->permanent_trns_chain); ok = trns_chain_destroy (ds->temporary_trns_chain) && ok; ds->permanent_trns_chain = ds->cur_trns_chain = trns_chain_create (); @@ -831,14 +513,12 @@ proc_cancel_all_transformations (struct dataset *ds) /* Initializes procedure handling. */ struct dataset * -create_dataset (struct casefile_factory *fact, - replace_source_callback *rps, - replace_dictionary_callback *rds - ) +create_dataset (replace_source_callback *rps, + replace_dictionary_callback *rds) { struct dataset *ds = xzalloc (sizeof(*ds)); ds->dict = dict_create (); - ds->cf_factory = fact; + ds->caseinit = caseinit_create (); ds->replace_source = rps; ds->replace_dict = rds; proc_cancel_all_transformations (ds); @@ -849,60 +529,103 @@ create_dataset (struct casefile_factory *fact, void destroy_dataset (struct dataset *ds) { - discard_variables (ds); + proc_discard_active_file (ds); dict_destroy (ds->dict); + caseinit_destroy (ds->caseinit); trns_chain_destroy (ds->permanent_trns_chain); free (ds); } -/* Sets SINK as the destination for procedure output from the - next procedure. */ +/* Causes output from the next procedure to be discarded, instead + of being preserved for use as input for the next procedure. */ void -proc_set_sink (struct dataset *ds, struct case_sink *sink) +proc_discard_output (struct dataset *ds) { - assert (ds->proc_sink == NULL); - ds->proc_sink = sink; + ds->discard_output = true; +} + +/* Discards the active file dictionary, data, and + transformations. */ +void +proc_discard_active_file (struct dataset *ds) +{ + assert (ds->proc_state == PROC_COMMITTED); + + dict_clear (ds->dict); + fh_set_default_handle (NULL); + + ds->n_lag = 0; + + casereader_destroy (ds->source); + ds->source = NULL; + if ( ds->replace_source) ds->replace_source (NULL); + + proc_cancel_all_transformations (ds); } /* Sets SOURCE as the source for procedure input for the next procedure. */ void -proc_set_source (struct dataset *ds, struct case_source *source) +proc_set_active_file (struct dataset *ds, + struct casereader *source, + struct dictionary *dict) { - ds->proc_source = source; + assert (ds->proc_state == PROC_COMMITTED); + assert (ds->dict != dict); + + proc_discard_active_file (ds); - if ( ds->replace_source ) - ds->replace_source (ds->proc_source); + dict_destroy (ds->dict); + ds->dict = dict; + if ( ds->replace_dict) ds->replace_dict (dict); + + proc_set_active_file_data (ds, source); } -/* Returns true if a source for the next procedure has been - configured, false otherwise. */ +/* Replaces the active file's data by READER without replacing + the associated dictionary. */ bool -proc_has_source (const struct dataset *ds) +proc_set_active_file_data (struct dataset *ds, struct casereader *reader) { - return ds->proc_source != NULL; -} + casereader_destroy (ds->source); + ds->source = reader; + if (ds->replace_source) ds->replace_source (reader); -/* Returns the output from the previous procedure. - For use only immediately after executing a procedure. - The returned casefile is owned by the caller; it will not be - automatically used for the next procedure's input. */ -struct casefile * -proc_capture_output (struct dataset *ds) -{ - struct casefile *casefile; + caseinit_clear (ds->caseinit); + caseinit_mark_as_preinited (ds->caseinit, ds->dict); - /* Try to make sure that this function is called immediately - after procedure() or a similar function. */ - assert (ds->proc_source != NULL); - assert (case_source_is_class (ds->proc_source, &storage_source_class)); - assert (trns_chain_is_empty (ds->permanent_trns_chain)); - assert (!proc_in_temporary_transformations (ds)); + return reader == NULL || !casereader_error (reader); +} - casefile = storage_source_decapsulate (ds->proc_source); - proc_set_source (ds, NULL); +/* Returns true if an active file data source is available, false + otherwise. */ +bool +proc_has_active_file (const struct dataset *ds) +{ + return ds->source != NULL; +} - return casefile; +/* Checks whether DS has a corrupted active file. If so, + discards it and returns false. If not, returns true without + doing anything. */ +bool +dataset_end_of_command (struct dataset *ds) +{ + if (ds->source != NULL) + { + if (casereader_error (ds->source)) + { + proc_discard_active_file (ds); + return false; + } + else + { + const struct taint *taint = casereader_get_taint (ds->source); + taint_reset_successor_taint ((struct taint *) taint); + assert (!taint_has_tainted_successor (taint)); + } + } + return true; } static trns_proc_func case_limit_trns_proc; @@ -983,32 +706,8 @@ dataset_dict (const struct dataset *ds) return ds->dict; } - -/* Set or replace dataset DS's dictionary with DICT. - The old dictionary is destroyed */ -void -dataset_set_dict (struct dataset *ds, struct dictionary *dict) -{ - struct dictionary *old_dict = ds->dict; - - dict_copy_callbacks (dict, ds->dict); - ds->dict = dict; - - if ( ds->replace_dict ) - ds->replace_dict (dict); - - dict_destroy (old_dict); -} - void dataset_need_lag (struct dataset *ds, int n_before) { ds->n_lag = MAX (ds->n_lag, n_before); } - -struct casefile_factory * -dataset_get_casefile_factory (const struct dataset *ds) -{ - return ds->cf_factory; -} - diff --git a/src/data/procedure.h b/src/data/procedure.h index 0e8d286b..7803e0e7 100644 --- a/src/data/procedure.h +++ b/src/data/procedure.h @@ -23,16 +23,11 @@ #include #include -#include #include -struct ccase; -struct casefile; -struct case_sink; -struct case_source; - +struct casereader; struct dataset; - +struct dictionary; /* Transformations. */ @@ -44,10 +39,6 @@ void add_transformation_with_finalizer (struct dataset *ds, trns_free_func *, void *); size_t next_transformation (const struct dataset *ds); -void discard_variables (struct dataset *ds); - - - bool proc_cancel_all_transformations (struct dataset *ds); struct trns_chain *proc_capture_transformations (struct dataset *ds); @@ -59,63 +50,35 @@ bool proc_cancel_temporary_transformations (struct dataset *ds); /* Procedures. */ struct dictionary ; -typedef void replace_source_callback (struct case_source *); +typedef void replace_source_callback (struct casereader *); typedef void replace_dictionary_callback (struct dictionary *); -struct dataset * create_dataset (struct casefile_factory *fact, - replace_source_callback *, - replace_dictionary_callback * - ); +struct dataset * create_dataset (replace_source_callback *, + replace_dictionary_callback *); void destroy_dataset (struct dataset *); -struct casefile_factory *dataset_get_casefile_factory (const struct dataset *); - -void proc_set_source (struct dataset *ds, struct case_source *); -bool proc_has_source (const struct dataset *ds); - -void proc_set_sink (struct dataset *ds, struct case_sink *); -struct casefile *proc_capture_output (struct dataset *ds); - -typedef bool casefile_func (const struct casefile *, void *); -typedef bool case_func (const struct ccase *, void *, const struct dataset *); -typedef void begin_func (const struct ccase *, void *, const struct dataset*); +void proc_discard_active_file (struct dataset *); +void proc_set_active_file (struct dataset *, + struct casereader *, struct dictionary *); +bool proc_set_active_file_data (struct dataset *, struct casereader *); +bool proc_has_active_file (const struct dataset *ds); -typedef bool end_func (void *, const struct dataset *); - -typedef bool split_func (const struct ccase *, const struct casefile *, - void *, const struct dataset *); - - - -bool procedure (struct dataset *ds, case_func *, void *aux) WARN_UNUSED_RESULT; - -bool procedure_with_splits (struct dataset *ds, - begin_func *, - case_func *, - end_func *, - void *aux) - WARN_UNUSED_RESULT; -bool multipass_procedure (struct dataset *ds, casefile_func *, void *aux) - WARN_UNUSED_RESULT; -bool multipass_procedure_with_splits (struct dataset *ds, - split_func *, - void *aux) - WARN_UNUSED_RESULT; +void proc_discard_output (struct dataset *ds); +bool proc_execute (struct dataset *ds); time_t time_of_last_procedure (struct dataset *ds); -void proc_open (struct dataset *); -bool proc_read (struct dataset *, struct ccase **); -bool proc_close (struct dataset *); +struct casereader *proc_open (struct dataset *); +bool proc_is_open (const struct dataset *); +bool proc_commit (struct dataset *); + +bool dataset_end_of_command (struct dataset *); +struct dictionary *dataset_dict (const struct dataset *ds); struct ccase *lagged_case (const struct dataset *ds, int n_before); - -inline struct dictionary *dataset_dict (const struct dataset *ds); -inline void dataset_set_dict ( struct dataset *ds, struct dictionary *dict); - void dataset_need_lag (struct dataset *ds, int n_before); #endif /* procedure.h */ diff --git a/src/data/scratch-handle.c b/src/data/scratch-handle.c index 2a08dbb0..0ac56433 100644 --- a/src/data/scratch-handle.c +++ b/src/data/scratch-handle.c @@ -18,9 +18,9 @@ #include #include -#include "scratch-handle.h" -#include "casefile.h" -#include "dictionary.h" +#include +#include +#include /* Destroys HANDLE. */ void @@ -29,7 +29,7 @@ scratch_handle_destroy (struct scratch_handle *handle) if (handle != NULL) { dict_destroy (handle->dictionary); - casefile_destroy (handle->casefile); + casereader_destroy (handle->casereader); free (handle); } } diff --git a/src/data/scratch-handle.h b/src/data/scratch-handle.h index 8a82650b..e70150a2 100644 --- a/src/data/scratch-handle.h +++ b/src/data/scratch-handle.h @@ -25,7 +25,7 @@ struct scratch_handle { struct dictionary *dictionary; /* Dictionary. */ - struct casefile *casefile; /* Cases. */ + struct casereader *casereader; /* Cases. */ }; void scratch_handle_destroy (struct scratch_handle *); diff --git a/src/data/scratch-reader.c b/src/data/scratch-reader.c index 4459126b..17c1aeaa 100644 --- a/src/data/scratch-reader.c +++ b/src/data/scratch-reader.c @@ -22,11 +22,11 @@ #include -#include "casefile.h" #include "dictionary.h" #include "file-handle-def.h" #include "scratch-handle.h" #include +#include #include #include "xalloc.h" @@ -34,31 +34,20 @@ #include "gettext.h" #define _(msgid) gettext (msgid) -/* A reader for a scratch file. */ -struct scratch_reader - { - struct file_handle *fh; /* Underlying file handle. */ - struct casereader *casereader; /* Case reader. */ - }; - /* Opens FH, which must have referent type FH_REF_SCRATCH, and returns a scratch_reader for it, or a null pointer on failure. Stores the dictionary for the scratch file into - *DICT. - - If you use an any_reader instead, then your code can be more - flexible without being any harder to write. */ -struct scratch_reader * + *DICT. */ +struct casereader * scratch_reader_open (struct file_handle *fh, struct dictionary **dict) { struct scratch_handle *sh; - struct scratch_reader *reader; if (!fh_open (fh, FH_REF_SCRATCH, "scratch file", "rs")) return NULL; sh = fh_get_scratch_handle (fh); - if (sh == NULL) + if (sh == NULL || sh->casereader == NULL) { msg (SE, _("Scratch file handle %s has not yet been written, " "using SAVE or another procedure, so it cannot yet " @@ -68,42 +57,5 @@ scratch_reader_open (struct file_handle *fh, struct dictionary **dict) } *dict = dict_clone (sh->dictionary); - reader = xmalloc (sizeof *reader); - reader->fh = fh; - reader->casereader = casefile_get_reader (sh->casefile, NULL); - return reader; -} - -/* Reads a case from READER and copies it into C. - Returns true if successful, false on error or at end of file. */ -bool -scratch_reader_read_case (struct scratch_reader *reader, struct ccase *c) -{ - struct ccase tmp; - if (casereader_read (reader->casereader, &tmp)) - { - case_copy (c, 0, &tmp, 0, - casefile_get_value_cnt ( - casereader_get_casefile (reader->casereader))); - case_destroy (&tmp); - return true; - } - else - return false; -} - -/* Returns true if an I/O error occurred on READER, false otherwise. */ -bool -scratch_reader_error (const struct scratch_reader *reader) -{ - return casefile_error (casereader_get_casefile (reader->casereader)); -} - -/* Closes READER. */ -void -scratch_reader_close (struct scratch_reader *reader) -{ - fh_close (reader->fh, "scratch file", "rs"); - casereader_destroy (reader->casereader); - free (reader); + return casereader_clone (sh->casereader); } diff --git a/src/data/scratch-reader.h b/src/data/scratch-reader.h index 7d5f28b4..16490df7 100644 --- a/src/data/scratch-reader.h +++ b/src/data/scratch-reader.h @@ -24,10 +24,7 @@ struct dictionary; struct file_handle; struct ccase; -struct scratch_reader *scratch_reader_open (struct file_handle *, - struct dictionary **); -bool scratch_reader_read_case (struct scratch_reader *, struct ccase *); -bool scratch_reader_error (const struct scratch_reader *); -void scratch_reader_close (struct scratch_reader *); +struct casereader *scratch_reader_open (struct file_handle *, + struct dictionary **); #endif /* scratch-reader.h */ diff --git a/src/data/scratch-writer.c b/src/data/scratch-writer.c index 42e77aa7..67e371a4 100644 --- a/src/data/scratch-writer.c +++ b/src/data/scratch-writer.c @@ -17,14 +17,21 @@ 02110-1301, USA. */ #include + #include "scratch-writer.h" + #include -#include "case.h" -#include "casefile.h" -#include "fastfile.h" -#include "dictionary.h" -#include "file-handle-def.h" -#include "scratch-handle.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include "xalloc.h" /* A scratch file writer. */ @@ -33,16 +40,16 @@ struct scratch_writer struct scratch_handle *handle; /* Underlying scratch handle. */ struct file_handle *fh; /* Underlying file handle. */ struct dict_compactor *compactor; /* Compacts into handle->dictionary. */ + struct casewriter *subwriter; /* Data output. */ }; +static struct casewriter_class scratch_writer_casewriter_class; + /* Opens FH, which must have referent type FH_REF_SCRATCH, and returns a scratch_writer for it, or a null pointer on failure. Cases stored in the scratch_writer will be expected - to be drawn from DICTIONARY. - - If you use an any_writer instead, then your code can be more - flexible without being any harder to write. */ -struct scratch_writer * + to be drawn from DICTIONARY. */ +struct casewriter * scratch_writer_open (struct file_handle *fh, const struct dictionary *dictionary) { @@ -50,6 +57,7 @@ scratch_writer_open (struct file_handle *fh, struct scratch_writer *writer; struct dictionary *scratch_dict; struct dict_compactor *compactor; + struct casewriter *casewriter; if (!fh_open (fh, FH_REF_SCRATCH, "scratch file", "we")) return NULL; @@ -72,50 +80,57 @@ scratch_writer_open (struct file_handle *fh, /* Create new contents. */ sh = xmalloc (sizeof *sh); sh->dictionary = scratch_dict; - sh->casefile = fastfile_create (dict_get_next_value_idx (sh->dictionary)); + sh->casereader = NULL; /* Create writer. */ writer = xmalloc (sizeof *writer); writer->handle = sh; writer->fh = fh; writer->compactor = compactor; + writer->subwriter = autopaging_writer_create (dict_get_next_value_idx ( + scratch_dict)); fh_set_scratch_handle (fh, sh); - return writer; + casewriter = casewriter_create (&scratch_writer_casewriter_class, writer); + taint_propagate (casewriter_get_taint (writer->subwriter), + casewriter_get_taint (casewriter)); + return casewriter; } /* Writes case C to WRITER. */ -bool -scratch_writer_write_case (struct scratch_writer *writer, - const struct ccase *c) +static void +scratch_writer_casewriter_write (struct casewriter *w UNUSED, void *writer_, + struct ccase *c) { + struct scratch_writer *writer = writer_; struct scratch_handle *handle = writer->handle; + struct ccase tmp; if (writer->compactor) { - struct ccase tmp_case; - case_create (&tmp_case, dict_get_next_value_idx (handle->dictionary)); - dict_compactor_compact (writer->compactor, &tmp_case, c); - return casefile_append_xfer (handle->casefile, &tmp_case); + case_create (&tmp, dict_get_next_value_idx (handle->dictionary)); + dict_compactor_compact (writer->compactor, &tmp, c); + case_destroy (c); } - else - return casefile_append (handle->casefile, c); -} - -/* Returns true if an I/O error occurred on WRITER, false otherwise. */ -bool -scratch_writer_error (const struct scratch_writer *writer) -{ - return casefile_error (writer->handle->casefile); + else + case_move (&tmp, c); + casewriter_write (writer->subwriter, &tmp); } -/* Closes WRITER. - Returns true if successful, false if an I/O error occurred on WRITER. */ -bool -scratch_writer_close (struct scratch_writer *writer) +/* Closes WRITER. */ +static void +scratch_writer_casewriter_destroy (struct casewriter *w UNUSED, void *writer_) { - struct casefile *cf = writer->handle->casefile; - bool ok = casefile_error (cf); + struct scratch_writer *writer = writer_; + struct casereader *reader = casewriter_make_reader (writer->subwriter); + if (!casereader_error (reader)) + writer->handle->casereader = reader; fh_close (writer->fh, "scratch file", "we"); free (writer); - return ok; } + +static struct casewriter_class scratch_writer_casewriter_class = + { + scratch_writer_casewriter_write, + scratch_writer_casewriter_destroy, + NULL, + }; diff --git a/src/data/scratch-writer.h b/src/data/scratch-writer.h index d4832a4f..95b0bddb 100644 --- a/src/data/scratch-writer.h +++ b/src/data/scratch-writer.h @@ -24,10 +24,7 @@ struct dictionary; struct file_handle; struct ccase; -struct scratch_writer *scratch_writer_open (struct file_handle *, - const struct dictionary *); -bool scratch_writer_write_case (struct scratch_writer *, const struct ccase *); -bool scratch_writer_error (const struct scratch_writer *); -bool scratch_writer_close (struct scratch_writer *); +struct casewriter *scratch_writer_open (struct file_handle *, + const struct dictionary *); #endif /* scratch-writer.h */ diff --git a/src/data/storage-stream.c b/src/data/storage-stream.c deleted file mode 100644 index 4ff939cd..00000000 --- a/src/data/storage-stream.c +++ /dev/null @@ -1,205 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#include - -#include - -#include -#include - -#include -#include -#include -#include -#include - -#include "xalloc.h" - -/* Storage sink. */ - -/* Information about storage sink. */ -struct storage_sink_info - { - struct casefile *casefile; /* Storage. */ - }; - -static struct storage_sink_info * -get_storage_sink_info (struct case_sink *sink) -{ - assert (sink->class == &storage_sink_class); - return sink->aux; -} - -/* Initializes a storage sink. */ -static void -storage_sink_open (struct case_sink *sink) -{ - struct storage_sink_info *info; - - sink->aux = info = xmalloc (sizeof *info); - info->casefile = sink->factory->create_casefile (sink->factory, - sink->value_cnt); -} - -/* Writes case C to the storage sink SINK. - Returns true if successful, false if an I/O error occurred. */ -static bool -storage_sink_write (struct case_sink *sink, const struct ccase *c) -{ - struct storage_sink_info *info = get_storage_sink_info (sink); - return casefile_append (info->casefile, c); -} - -/* Destroys internal data in SINK. */ -static void -storage_sink_destroy (struct case_sink *sink) -{ - struct storage_sink_info *info = get_storage_sink_info (sink); - casefile_destroy (info->casefile); - free (info); -} - -/* Closes the sink and returns a storage source to read back the - written data. */ -static struct case_source * -storage_sink_make_source (struct case_sink *sink) -{ - struct storage_sink_info *info = get_storage_sink_info (sink); - struct case_source *source = storage_source_create (info->casefile); - info->casefile = NULL; - return source; -} - -/* Storage sink. */ -const struct case_sink_class storage_sink_class = - { - "storage", - storage_sink_open, - storage_sink_write, - storage_sink_destroy, - storage_sink_make_source, - }; - -/* Storage source. */ - -struct storage_source_info - { - struct casefile *casefile; /* Storage. */ - struct casereader *reader; /* Reader. */ - }; - -static struct storage_source_info * -get_storage_source_info (const struct case_source *source) -{ - assert (source->class == &storage_source_class); - return source->aux; -} - -/* Returns the number of cases that will be read by - storage_source_read(). */ -static int -storage_source_count (const struct case_source *source) -{ - struct storage_source_info *info = get_storage_source_info (source); - return casefile_get_case_cnt (info->casefile); -} - -/* Reads one case into OUTPUT_CASE. - Returns true if successful, false at end of file or if an - I/O error occurred. */ -static bool -storage_source_read (struct case_source *source, struct ccase *output_case) -{ - struct storage_source_info *info = get_storage_source_info (source); - struct ccase casefile_case; - - if (info->reader == NULL) - info->reader = casefile_get_reader (info->casefile, NULL); - - if (casereader_read (info->reader, &casefile_case)) - { - case_copy (output_case, 0, - &casefile_case, 0, - casefile_get_value_cnt (info->casefile)); - return true; - } - else - return false; -} - -/* Destroys the source. - Returns true if successful read, false if an I/O occurred - during destruction or previously. */ -static bool -storage_source_destroy (struct case_source *source) -{ - struct storage_source_info *info = get_storage_source_info (source); - bool ok = true; - if (info->casefile) - { - ok = !casefile_error (info->casefile); - casefile_destroy (info->casefile); - } - free (info); - return ok; -} - -/* Returns the casefile encapsulated by SOURCE. */ -struct casefile * -storage_source_get_casefile (struct case_source *source) -{ - struct storage_source_info *info = get_storage_source_info (source); - return info->casefile; -} - -/* Destroys SOURCE and returns the casefile that it - encapsulated. */ -struct casefile * -storage_source_decapsulate (struct case_source *source) -{ - struct storage_source_info *info = get_storage_source_info (source); - struct casefile *casefile = info->casefile; - assert (info->reader == NULL); - info->casefile = NULL; - free_case_source (source); - return casefile; -} - -/* Creates and returns a new storage source that encapsulates - CASEFILE. */ -struct case_source * -storage_source_create (struct casefile *casefile) -{ - struct storage_source_info *info; - - info = xmalloc (sizeof *info); - info->casefile = casefile; - info->reader = NULL; - - return create_case_source (&storage_source_class, info); -} - -/* Storage source. */ -const struct case_source_class storage_source_class = - { - "storage", - storage_source_count, - storage_source_read, - storage_source_destroy, - }; diff --git a/src/data/storage-stream.h b/src/data/storage-stream.h deleted file mode 100644 index 980b1c70..00000000 --- a/src/data/storage-stream.h +++ /dev/null @@ -1,32 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#ifndef STORAGE_STREAM_H -#define STORAGE_STREAM_H 1 - -struct case_source; -struct casefile; - -extern const struct case_sink_class storage_sink_class; -extern const struct case_source_class storage_source_class; - -struct casefile *storage_source_get_casefile (struct case_source *); -struct casefile *storage_source_decapsulate (struct case_source *); -struct case_source *storage_source_create (struct casefile *); - -#endif /* storage-stream.h */ diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 401e3e27..9589747e 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -18,8 +18,8 @@ #include -#include "sys-file-reader.h" -#include "sys-file-private.h" +#include +#include #include #include @@ -38,15 +38,17 @@ #include #include -#include "case.h" -#include "dictionary.h" -#include "file-handle-def.h" -#include "file-name.h" -#include "format.h" -#include "missing-values.h" -#include "value-labels.h" -#include "variable.h" -#include "value.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "c-ctype.h" #include "inttostr.h" @@ -69,11 +71,12 @@ struct sfm_reader struct file_handle *fh; /* File handle. */ FILE *file; /* File stream. */ bool error; /* I/O or corruption error? */ + size_t value_cnt; /* Number of "union value"s in struct case. */ /* File format. */ enum integer_format integer_format; /* On-disk integer format. */ enum float_format float_format; /* On-disk floating point format. */ - int value_cnt; /* Number of 8-byte units per case. */ + int flt64_cnt; /* Number of 8-byte units per case. */ struct sfm_var *vars; /* Variables. */ size_t var_cnt; /* Number of variables. */ bool has_long_var_names; /* File has a long variable name map */ @@ -93,6 +96,10 @@ struct sfm_var int case_index; /* Index into case. */ }; +static struct casereader_class sys_file_casereader_class; + +static bool close_reader (struct sfm_reader *); + static struct variable **make_var_by_value_idx (struct sfm_reader *, struct dictionary *); static struct variable *lookup_var_by_value_idx (struct sfm_reader *, @@ -125,6 +132,8 @@ static bool read_variable_to_value_map (struct sfm_reader *, struct variable_to_value_map *, struct variable **var, char **value, int *warning_cnt); + +static bool close_reader (struct sfm_reader *r); /* Dictionary reader. */ @@ -135,7 +144,7 @@ enum which_format }; static void read_header (struct sfm_reader *, struct dictionary *, - int *weight_idx, int *claimed_value_cnt, + int *weight_idx, int *claimed_flt64_cnt, struct sfm_read_info *); static void read_variable_record (struct sfm_reader *, struct dictionary *, int *format_warning_cnt); @@ -169,7 +178,7 @@ static void read_long_string_map (struct sfm_reader *, reading. Reads the system file's dictionary into *DICT. If INFO is non-null, then it receives additional info about the system file. */ -struct sfm_reader * +struct casereader * sfm_open_reader (struct file_handle *fh, struct dictionary **dict, struct sfm_read_info *info) { @@ -177,7 +186,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, struct variable **var_by_value_idx; int format_warning_cnt = 0; int weight_idx; - int claimed_value_cnt; + int claimed_flt64_cnt; int rec_type; size_t i; @@ -191,14 +200,14 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, r->fh = fh; r->file = fn_open (fh_get_file_name (fh), "rb"); r->error = false; - r->value_cnt = 0; + r->flt64_cnt = 0; r->has_vls = false; r->has_long_var_names = false; r->opcode_idx = sizeof r->opcodes; if (setjmp (r->bail_out)) { - sfm_close_reader (r); + close_reader (r); dict_destroy (*dict); *dict = NULL; return NULL; @@ -212,7 +221,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, } /* Read header. */ - read_header (r, *dict, &weight_idx, &claimed_value_cnt, info); + read_header (r, *dict, &weight_idx, &claimed_flt64_cnt, info); /* Read all the variable definition records. */ rec_type = read_int32 (r); @@ -280,10 +289,10 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, /* Read record 999 data, which is just filler. */ read_int32 (r); - if (claimed_value_cnt != -1 && claimed_value_cnt != r->value_cnt) + if (claimed_flt64_cnt != -1 && claimed_flt64_cnt != r->flt64_cnt) sys_warn (r, _("File header claims %d variable positions but " "%d were read from file."), - claimed_value_cnt, r->value_cnt); + claimed_flt64_cnt, r->flt64_cnt); /* Create an index of dictionary variable widths for sfm_read_case to use. We cannot use the `struct variable's @@ -300,36 +309,48 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, } pool_free (r->pool, var_by_value_idx); - return r; + r->value_cnt = dict_get_next_value_idx (*dict); + return casereader_create_sequential (NULL, r->value_cnt, CASENUMBER_MAX, + &sys_file_casereader_class, r); } -/* Closes a system file after we're done with it. */ -void -sfm_close_reader (struct sfm_reader *r) +/* Closes a system file after we're done with it. + Returns true if an I/O error has occurred on READER, false + otherwise. */ +static bool +close_reader (struct sfm_reader *r) { + bool error; + if (r == NULL) - return; + return true; if (r->file) { - if (fn_close (fh_get_file_name (r->fh), r->file) == EOF) - msg (ME, _("Error closing system file \"%s\": %s."), - fh_get_file_name (r->fh), strerror (errno)); + if (fn_close (fh_get_file_name (r->fh), r->file) == EOF) + { + msg (ME, _("Error closing system file \"%s\": %s."), + fh_get_file_name (r->fh), strerror (errno)); + r->error = true; + } r->file = NULL; } if (r->fh != NULL) fh_close (r->fh, "system file", "rs"); + error = r->error; pool_destroy (r->pool); + + return !error; } -/* Returns true if an I/O error has occurred on READER, false - otherwise. */ -bool -sfm_read_error (const struct sfm_reader *reader) +/* Destroys READER. */ +static void +sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) { - return reader->error; + struct sfm_reader *r = r_; + close_reader (r); } /* Returns true if FILE is an SPSS system file, @@ -350,13 +371,13 @@ sfm_detect (FILE *file) Sets DICT's file label to the system file's label. Sets *WEIGHT_IDX to 0 if the system file is unweighted, or to the value index of the weight variable otherwise. - Sets *CLAIMED_VALUE_CNT to the number of values that the file + Sets *CLAIMED_FLT64_CNT to the number of values that the file claims to have (although it is not always correct). If INFO is non-null, initializes *INFO with header information. */ static void read_header (struct sfm_reader *r, struct dictionary *dict, - int *weight_idx, int *claimed_value_cnt, + int *weight_idx, int *claimed_flt64_cnt, struct sfm_read_info *info) { char rec_type[5]; @@ -385,9 +406,9 @@ read_header (struct sfm_reader *r, struct dictionary *dict, && r->integer_format != INTEGER_LSB_FIRST)) sys_error (r, _("This is not an SPSS system file.")); - *claimed_value_cnt = read_int32 (r); - if (*claimed_value_cnt < 0 || *claimed_value_cnt > INT_MAX / 16) - *claimed_value_cnt = -1; + *claimed_flt64_cnt = read_int32 (r); + if (*claimed_flt64_cnt < 0 || *claimed_flt64_cnt > INT_MAX / 16) + *claimed_flt64_cnt = -1; r->compressed = read_int32 (r) != 0; @@ -564,7 +585,7 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, /* Account for values. Skip long string continuation records, if any. */ nv = width == 0 ? 1 : DIV_RND_UP (width, 8); - r->value_cnt += nv; + r->flt64_cnt += nv; if (width > 8) { int i; @@ -1110,29 +1131,39 @@ static bool read_compressed_number (struct sfm_reader *, double *); static bool read_compressed_string (struct sfm_reader *, char *); static bool read_whole_strings (struct sfm_reader *, char *, size_t); -/* Reads one case from READER's file into C. Returns nonzero - only if successful. */ -int -sfm_read_case (struct sfm_reader *r, struct ccase *c) +/* Reads one case from READER's file into C. Returns true only + if successful. */ +static bool +sys_file_casereader_read (struct casereader *reader, void *r_, + struct ccase *c) { + struct sfm_reader *r = r_; if (r->error) - return 0; + return false; - if (setjmp (r->bail_out)) - return 0; + case_create (c, r->value_cnt); + if (setjmp (r->bail_out)) + { + casereader_force_error (reader); + case_destroy (c); + return false; + } if (!r->compressed && sizeof (double) == 8 && !r->has_vls) { /* Fast path. Read the whole case directly. */ if (!try_read_bytes (r, case_data_all_rw (c), - sizeof (union value) * r->value_cnt)) - return 0; + sizeof (union value) * r->flt64_cnt)) + { + case_destroy (c); + return false; + } /* Convert floating point numbers to native format if needed. */ if (r->float_format != FLOAT_NATIVE_DOUBLE) { int i; - + for (i = 0; i < r->var_cnt; i++) if (r->vars[i].width == 0) { @@ -1140,7 +1171,7 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) float_convert (r->float_format, d, FLOAT_NATIVE_DOUBLE, d); } } - return 1; + return true; } else { @@ -1194,12 +1225,13 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) } } } - return 1; + return true; eof: + case_destroy (c); if (i != 0) partial_record (r); - return 0; + return false; } } @@ -1386,7 +1418,7 @@ make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict) int i; var_by_value_idx = pool_nmalloc (r->pool, - r->value_cnt, sizeof *var_by_value_idx); + r->flt64_cnt, sizeof *var_by_value_idx); for (i = 0; i < dict_get_var_cnt (dict); i++) { struct variable *v = dict_get_var (dict, i); @@ -1397,7 +1429,7 @@ make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict) for (j = 1; j < nv; j++) var_by_value_idx[value_idx++] = NULL; } - assert (value_idx == r->value_cnt); + assert (value_idx == r->flt64_cnt); return var_by_value_idx; } @@ -1411,9 +1443,9 @@ lookup_var_by_value_idx (struct sfm_reader *r, { struct variable *var; - if (value_idx < 1 || value_idx > r->value_cnt) + if (value_idx < 1 || value_idx > r->flt64_cnt) sys_error (r, _("Variable index %d not in valid range 1...%d."), - value_idx, r->value_cnt); + value_idx, r->flt64_cnt); var = var_by_value_idx[value_idx - 1]; if (var == NULL) @@ -1686,4 +1718,11 @@ flt64_to_double (const struct sfm_reader *r, const uint8_t flt64[8]) float_convert (r->float_format, flt64, FLOAT_NATIVE_DOUBLE, &x); return x; } - + +static struct casereader_class sys_file_casereader_class = + { + sys_file_casereader_read, + sys_file_casereader_destroy, + NULL, + NULL, + }; diff --git a/src/data/sys-file-reader.h b/src/data/sys-file-reader.h index 6a3e7029..33740475 100644 --- a/src/data/sys-file-reader.h +++ b/src/data/sys-file-reader.h @@ -42,12 +42,9 @@ struct sfm_read_info struct dictionary; struct file_handle; struct ccase; -struct sfm_reader *sfm_open_reader (struct file_handle *, +struct casereader *sfm_open_reader (struct file_handle *, struct dictionary **, struct sfm_read_info *); -int sfm_read_case (struct sfm_reader *, struct ccase *); -bool sfm_read_error (const struct sfm_reader *); -void sfm_close_reader (struct sfm_reader *); bool sfm_detect (FILE *); #endif /* sys-file-reader.h */ diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index 808a307e..c7b988da 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -37,14 +37,16 @@ #include #include -#include "case.h" -#include "dictionary.h" -#include "file-handle-def.h" -#include "format.h" -#include "missing-values.h" -#include "settings.h" -#include "value-labels.h" -#include "variable.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "minmax.h" @@ -144,6 +146,8 @@ struct sfm_var size_t flt64_cnt; /* Number of flt64 elements. */ }; +static struct casewriter_class sys_file_casewriter_class; + static char *append_string_max (char *, const char *, const char *); static void write_header (struct sfm_writer *, const struct dictionary *); static void buf_write (struct sfm_writer *, const void *, size_t); @@ -164,6 +168,9 @@ static void write_variable_display_parameters (struct sfm_writer *w, static void write_documents (struct sfm_writer *, const struct dictionary *); +bool write_error (const struct sfm_writer *); +bool close_writer (struct sfm_writer *); + static inline int var_flt64_cnt (const struct variable *v) { @@ -219,7 +226,7 @@ cont_var_name(const char *sn, int idx) No reference to D is retained, so it may be modified or destroyed at will after this function returns. D is not modified by this function, except to assign short names. */ -struct sfm_writer * +struct casewriter * sfm_open_writer (struct file_handle *fh, struct dictionary *d, struct sfm_write_options opts) { @@ -374,13 +381,13 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, w->y = (unsigned char *) w->ptr; } - if (sfm_write_error (w)) + if (write_error (w)) goto error; - return w; + return casewriter_create (&sys_file_casewriter_class, w); error: - sfm_close_writer (w); + close_writer (w); return NULL; open_error: @@ -925,13 +932,18 @@ ensure_buf_space (struct sfm_writer *w) static void write_compressed_data (struct sfm_writer *w, const flt64 *elem); -/* Writes case C to system file W. - Returns 1 if successful, 0 if an I/O error occurred. */ -bool -sfm_write_case (struct sfm_writer *w, const struct ccase *c) +/* Writes case C to system file W. */ +static void +sys_file_casewriter_write (struct casewriter *writer, void *w_, + struct ccase *c) { - if (ferror (w->file)) - return 0; + struct sfm_writer *w = w_; + if (ferror (w->file)) + { + casewriter_force_error (writer); + case_destroy (c); + return; + } w->case_cnt++; @@ -990,8 +1002,16 @@ sfm_write_case (struct sfm_writer *w, const struct ccase *c) local_free (bounce); } - - return !sfm_write_error (w); + + case_destroy (c); +} + +static void +sys_file_casewriter_destroy (struct casewriter *writer, void *w_) +{ + struct sfm_writer *w = w_; + if (!close_writer (w)) + casewriter_force_error (writer); } static void @@ -1057,7 +1077,7 @@ write_compressed_data (struct sfm_writer *w, const flt64 *elem) /* Returns true if an I/O error has occurred on WRITER, false otherwise. */ bool -sfm_write_error (const struct sfm_writer *writer) +write_error (const struct sfm_writer *writer) { return ferror (writer->file); } @@ -1065,7 +1085,7 @@ sfm_write_error (const struct sfm_writer *writer) /* Closes a system file after we're done with it. Returns true if successful, false if an I/O error occurred. */ bool -sfm_close_writer (struct sfm_writer *w) +close_writer (struct sfm_writer *w) { bool ok; @@ -1083,7 +1103,7 @@ sfm_close_writer (struct sfm_writer *w) } fflush (w->file); - ok = !sfm_write_error (w); + ok = !write_error (w); /* Seek back to the beginning and update the number of cases. This is just a courtesy to later readers, so there's no need @@ -1112,3 +1132,10 @@ sfm_close_writer (struct sfm_writer *w) return ok; } + +static struct casewriter_class sys_file_casewriter_class = + { + sys_file_casewriter_write, + sys_file_casewriter_destroy, + NULL, + }; diff --git a/src/data/sys-file-writer.h b/src/data/sys-file-writer.h index 9773230e..9e850156 100644 --- a/src/data/sys-file-writer.h +++ b/src/data/sys-file-writer.h @@ -34,12 +34,8 @@ struct sfm_write_options struct file_handle; struct dictionary; struct ccase; -struct sfm_writer *sfm_open_writer (struct file_handle *, struct dictionary *, +struct casewriter *sfm_open_writer (struct file_handle *, struct dictionary *, struct sfm_write_options); struct sfm_write_options sfm_writer_default_options (void); -bool sfm_write_case (struct sfm_writer *, const struct ccase *); -bool sfm_write_error (const struct sfm_writer *); -bool sfm_close_writer (struct sfm_writer *); - #endif /* sys-file-writer.h */ diff --git a/src/language/ChangeLog b/src/language/ChangeLog index 0a806883..8949ff75 100644 --- a/src/language/ChangeLog +++ b/src/language/ChangeLog @@ -1,6 +1,7 @@ 2007-06-06 Ben Pfaff - * command.def: Add DEBUG DATASHEET command. + * command.def: Add DEBUG DATASHEET command. Remove DEBUG CASEFILE + command. 2007-03-18 Ben Pfaff diff --git a/src/language/command.c b/src/language/command.c index a4821db4..49ec22fd 100644 --- a/src/language/command.c +++ b/src/language/command.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -147,8 +148,11 @@ cmd_parse_in_state (struct lexer *lexer, struct dataset *ds, if (cmd_result_is_failure (result)) lex_discard_rest_of_command (lexer); + assert (!proc_is_open (ds)); unset_cmd_algorithm (); dict_clear_aux (dataset_dict (ds)); + if (!dataset_end_of_command (ds)) + result = CMD_CASCADING_FAILURE; return result; } @@ -158,7 +162,7 @@ cmd_parse (struct lexer *lexer, struct dataset *ds) { const struct dictionary *dict = dataset_dict (ds); return cmd_parse_in_state (lexer, ds, - proc_has_source (ds) && + proc_has_active_file (ds) && dict_get_var_cnt (dict) > 0 ? CMD_STATE_DATA : CMD_STATE_INITIAL); } @@ -203,7 +207,7 @@ do_parse_command (struct lexer *lexer, struct dataset *ds, enum cmd_state state) { msg (SE, _("%s may be used only in enhanced syntax mode."), command->name); - return CMD_FAILURE; + return CMD_FAILURE; } else if (!in_correct_state (command, state)) { @@ -687,7 +691,8 @@ cmd_n_of_cases (struct lexer *lexer, struct dataset *ds) int cmd_execute (struct lexer *lexer, struct dataset *ds) { - if (!procedure (ds, NULL, NULL)) + bool ok = casereader_destroy (proc_open (ds)); + if (!proc_commit (ds) || !ok) return CMD_CASCADING_FAILURE; return lex_end_of_command (lexer); } @@ -840,7 +845,7 @@ cmd_host (struct lexer *lexer, struct dataset *ds UNUSED) int cmd_new_file (struct lexer *lexer, struct dataset *ds) { - discard_variables (ds); + proc_discard_active_file (ds); return lex_end_of_command (lexer); } diff --git a/src/language/command.def b/src/language/command.def index ef8b385b..a0e974e1 100644 --- a/src/language/command.def +++ b/src/language/command.def @@ -128,7 +128,6 @@ DEF_CMD (S_INPUT_PROGRAM, 0, "END INPUT PROGRAM", cmd_end_input_program) DEF_CMD (S_INPUT_PROGRAM, 0, "REREAD", cmd_reread) /* Commands for testing PSPP. */ -DEF_CMD (S_ANY, F_TESTING, "DEBUG CASEFILE", cmd_debug_casefile) DEF_CMD (S_ANY, F_TESTING, "DEBUG DATASHEET", cmd_debug_datasheet) DEF_CMD (S_ANY, F_TESTING, "DEBUG EVALUATE", cmd_debug_evaluate) DEF_CMD (S_ANY, F_TESTING, "DEBUG MOMENTS", cmd_debug_moments) diff --git a/src/language/control/do-if.c b/src/language/control/do-if.c index cf3a8b0e..7de886ef 100644 --- a/src/language/control/do-if.c +++ b/src/language/control/do-if.c @@ -21,6 +21,7 @@ #include #include "control-stack.h" +#include #include #include #include diff --git a/src/language/data-io/ChangeLog b/src/language/data-io/ChangeLog index 7d329741..0c1f3791 100644 --- a/src/language/data-io/ChangeLog +++ b/src/language/data-io/ChangeLog @@ -1,3 +1,18 @@ +2007-06-06 Ben Pfaff + + Adapt case sources, sinks, and clients of procedure code to the + new infrastructure. + + * data-list.c: Make DATA LIST into a casereader. + + * get.c: Change GET, IMPORT, SAVE, EXPORT to use casereaders, + casewriters. + + * inpt-pgm.c: Use caseinit code. Turn INPUT PROGRAM into a + casereader. + + * list.q: Adapt to new procedure code. + 2007-05-06 Ben Pfaff Abstract the documents within a dictionary a little better. diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c index 9594ead0..07a130c3 100644 --- a/src/language/data-io/data-list.c +++ b/src/language/data-io/data-list.c @@ -23,10 +23,10 @@ #include #include -#include #include -#include #include +#include +#include #include #include #include @@ -99,9 +99,10 @@ struct data_list_pgm int record_cnt; /* Number of records. */ struct string delims; /* Field delimiters. */ int skip_records; /* Records to skip before first case. */ + size_t value_cnt; /* Number of `union value's in case. */ }; -static const struct case_source_class data_list_source_class; +static const struct casereader_class data_list_casereader_class; static bool parse_fixed (struct lexer *, struct dictionary *dict, struct pool *tmp_pool, struct data_list_pgm *); @@ -118,15 +119,14 @@ static trns_proc_func data_list_trns_proc; int cmd_data_list (struct lexer *lexer, struct dataset *ds) { - struct dictionary *dict = dataset_dict (ds); + struct dictionary *dict; struct data_list_pgm *dls; int table = -1; /* Print table if nonzero, -1=undecided. */ struct file_handle *fh = fh_inline_file (); struct pool *tmp_pool; bool ok; - if (!in_input_program ()) - discard_variables (ds); + dict = in_input_program () ? dataset_dict (ds) : dict_create (); dls = pool_create_container (struct data_list_pgm, pool); ll_init (&dls->specs); @@ -178,9 +178,9 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) lex_match (lexer, '='); if (!lex_force_id (lexer)) goto error; - dls->end = dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)); + dls->end = dict_lookup_var (dict, lex_tokid (lexer)); if (!dls->end) - dls->end = dict_create_var_assert (dataset_dict (ds), lex_tokid (lexer), 0); + dls->end = dict_create_var_assert (dict, lex_tokid (lexer), 0); lex_get (lexer); } else if (lex_token (lexer) == T_ID) @@ -273,10 +273,19 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) if (dls->reader == NULL) goto error; + dls->value_cnt = dict_get_next_value_idx (dict); + if (in_input_program ()) add_transformation (ds, data_list_trns_proc, data_list_trns_free, dls); else - proc_set_source (ds, create_case_source (&data_list_source_class, dls)); + { + struct casereader *reader; + reader = casereader_create_sequential (NULL, + dict_get_next_value_idx (dict), + -1, &data_list_casereader_class, + dls); + proc_set_active_file (ds, reader, dict); + } pool_destroy (tmp_pool); @@ -810,10 +819,12 @@ data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED) Returns true if successful, false at end of file or if an I/O error occurred. */ static bool -data_list_source_read (struct case_source *source, struct ccase *c) +data_list_casereader_read (struct casereader *reader UNUSED, void *dls_, + struct ccase *c) { - struct data_list_pgm *dls = source->aux; - + struct data_list_pgm *dls = dls_; + bool ok; + /* Skip the requested number of records before reading the first case. */ while (dls->skip_records > 0) @@ -823,26 +834,28 @@ data_list_source_read (struct case_source *source, struct ccase *c) dfm_forward_record (dls->reader); dls->skip_records--; } - - return read_from_data_list (dls, c); + + case_create (c, dls->value_cnt); + ok = read_from_data_list (dls, c); + if (!ok) + case_destroy (c); + return ok; } -/* Destroys the source. - Returns true if successful read, false if an I/O occurred - during destruction or previously. */ -static bool -data_list_source_destroy (struct case_source *source) +/* Destroys the casereader. */ +static void +data_list_casereader_destroy (struct casereader *reader UNUSED, void *dls_) { - struct data_list_pgm *dls = source->aux; - bool ok = !dfm_reader_error (dls->reader); + struct data_list_pgm *dls = dls_; + if (dfm_reader_error (dls->reader)) + casereader_force_error (reader); data_list_trns_free (dls); - return ok; } -static const struct case_source_class data_list_source_class = +static const struct casereader_class data_list_casereader_class = { - "DATA LIST", + data_list_casereader_read, + data_list_casereader_destroy, + NULL, NULL, - data_list_source_read, - data_list_source_destroy, }; diff --git a/src/language/data-io/data-reader.c b/src/language/data-io/data-reader.c index fa969223..0650a546 100644 --- a/src/language/data-io/data-reader.c +++ b/src/language/data-io/data-reader.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -444,8 +445,8 @@ cmd_begin_data (struct lexer *lexer, struct dataset *ds) /* Input procedure reads from inline file. */ prompt_set_style (PROMPT_DATA); - ok = procedure (ds, NULL, NULL); - + casereader_destroy (proc_open (ds)); + ok = proc_commit (ds); dfm_close_reader (r); return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; diff --git a/src/language/data-io/get.c b/src/language/data-io/get.c index 32b3764e..d06a8e65 100644 --- a/src/language/data-io/get.c +++ b/src/language/data-io/get.c @@ -22,17 +22,14 @@ #include #include -#include -#include #include -#include -#include +#include +#include #include #include #include #include #include -#include #include #include #include @@ -46,9 +43,9 @@ #include #include #include -#include #include #include +#include #include "gettext.h" #define _(msgid) gettext (msgid) @@ -71,25 +68,18 @@ enum reader_command IMPORT_CMD }; -/* Case reader input program. */ -struct case_reader_pgm - { - struct any_reader *reader; /* File reader. */ - struct case_map *map; /* Map from file dict to active file dict. */ - struct ccase bounce; /* Bounce buffer. */ - }; - -static const struct case_source_class case_reader_source_class; - -static void case_reader_pgm_free (struct case_reader_pgm *); +static void get_translate_case (const struct ccase *, struct ccase *, + void *map_); +static bool get_destroy_case_map (void *map_); /* Parses a GET or IMPORT command. */ static int parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command type) { - struct case_reader_pgm *pgm = NULL; + struct casereader *reader = NULL; struct file_handle *fh = NULL; struct dictionary *dict = NULL; + struct case_map *map = NULL; for (;;) { @@ -127,17 +117,10 @@ parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command goto error; } - discard_variables (ds); - - pgm = xmalloc (sizeof *pgm); - pgm->reader = any_reader_open (fh, &dict); - pgm->map = NULL; - case_nullify (&pgm->bounce); - if (pgm->reader == NULL) + reader = any_reader_open (fh, &dict); + if (reader == NULL) goto error; - case_create (&pgm->bounce, dict_get_next_value_idx (dict)); - start_case_map (dict); while (lex_token (lexer) != '.') @@ -147,71 +130,40 @@ parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command goto error; } - pgm->map = finish_case_map (dict); - - dataset_set_dict (ds, dict); - - proc_set_source (ds, - create_case_source (&case_reader_source_class, pgm)); + map = finish_case_map (dict); + if (map != NULL) + reader = casereader_create_translator (reader, + dict_get_next_value_idx (dict), + get_translate_case, + get_destroy_case_map, + map); + + proc_set_active_file (ds, reader, dict); return CMD_SUCCESS; error: - case_reader_pgm_free (pgm); + casereader_destroy (reader); if (dict != NULL) dict_destroy (dict); return CMD_CASCADING_FAILURE; } -/* Frees a struct case_reader_pgm. */ static void -case_reader_pgm_free (struct case_reader_pgm *pgm) +get_translate_case (const struct ccase *input, struct ccase *output, + void *map_) { - if (pgm != NULL) - { - any_reader_close (pgm->reader); - destroy_case_map (pgm->map); - case_destroy (&pgm->bounce); - free (pgm); - } + struct case_map *map = map_; + map_case (map, input, output); } -/* Reads one case into C. - Returns true if successful, false at end of file or if an - I/O error occurred. */ static bool -case_reader_source_read (struct case_source *source, struct ccase *c) +get_destroy_case_map (void *map_) { - struct case_reader_pgm *pgm = source->aux; - if (any_reader_read (pgm->reader, pgm->map == NULL ? c : &pgm->bounce)) - { - if (pgm->map != NULL) - map_case (pgm->map, &pgm->bounce, c); - return true; - } - else - return false; -} - -/* Destroys the source. - Returns true if successful read, false if an I/O occurred - during destruction or previously. */ -static bool -case_reader_source_destroy (struct case_source *source) -{ - struct case_reader_pgm *pgm = source->aux; - bool ok = !any_reader_error (pgm->reader); - case_reader_pgm_free (pgm); - return ok; + struct case_map *map = map_; + destroy_case_map (map); + return true; } - -static const struct case_source_class case_reader_source_class = - { - "case reader", - NULL, - case_reader_source_read, - case_reader_source_destroy, - }; /* GET. */ int @@ -243,30 +195,6 @@ enum command_type PROC_CMD /* Procedure. */ }; -/* File writer plus a case map. */ -struct case_writer - { - struct any_writer *writer; /* File writer. */ - struct case_map *map; /* Map to output file dictionary - (null pointer for identity mapping). */ - struct ccase bounce; /* Bounce buffer for mapping (if needed). */ - }; - -/* Destroys AW. */ -static bool -case_writer_destroy (struct case_writer *aw) -{ - bool ok = true; - if (aw != NULL) - { - ok = any_writer_close (aw->writer); - destroy_case_map (aw->map); - case_destroy (&aw->bounce); - free (aw); - } - return ok; -} - /* Parses SAVE or XSAVE or EXPORT or XEXPORT command. WRITER_TYPE identifies the type of file to write, and COMMAND_TYPE identifies the type of command. @@ -277,7 +205,7 @@ case_writer_destroy (struct case_writer *aw) included. On failure, returns a null pointer. */ -static struct case_writer * +static struct casewriter * parse_write_command (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type, enum command_type command_type, @@ -286,7 +214,8 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, /* Common data. */ struct file_handle *handle; /* Output file. */ struct dictionary *dict; /* Dictionary for output file. */ - struct case_writer *aw; /* Writer. */ + struct casewriter *writer; /* Writer. */ + struct case_map *map; /* Map from input data to data for writer. */ /* Common options. */ bool print_map; /* Print map? TODO. */ @@ -303,10 +232,8 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, handle = NULL; dict = dict_clone (dataset_dict (ds)); - aw = xmalloc (sizeof *aw); - aw->writer = NULL; - aw->map = NULL; - case_nullify (&aw->bounce); + writer = NULL; + map = NULL; print_map = false; print_short_names = false; sysfile_opts = sfm_writer_default_options (); @@ -412,49 +339,40 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, } dict_compact_values (dict); - aw->map = finish_case_map (dict); - if (aw->map != NULL) - case_create (&aw->bounce, dict_get_next_value_idx (dict)); if (fh_get_referent (handle) == FH_REF_FILE) { switch (writer_type) { case SYSFILE_WRITER: - aw->writer = any_writer_from_sfm_writer ( - sfm_open_writer (handle, dict, sysfile_opts)); + writer = sfm_open_writer (handle, dict, sysfile_opts); break; case PORFILE_WRITER: - aw->writer = any_writer_from_pfm_writer ( - pfm_open_writer (handle, dict, porfile_opts)); + writer = pfm_open_writer (handle, dict, porfile_opts); break; } } else - aw->writer = any_writer_open (handle, dict); - if (aw->writer == NULL) + writer = any_writer_open (handle, dict); + if (writer == NULL) goto error; + + map = finish_case_map (dict); + if (map != NULL) + writer = casewriter_create_translator (writer, + get_translate_case, + get_destroy_case_map, + map); dict_destroy (dict); - return aw; + return writer; error: - case_writer_destroy (aw); + casewriter_destroy (writer); dict_destroy (dict); + destroy_case_map (map); return NULL; } - -/* Writes case C to writer AW. */ -static bool -case_writer_write_case (struct case_writer *aw, const struct ccase *c) -{ - if (aw->map != NULL) - { - map_case (aw->map, c, &aw->bounce); - c = &aw->bounce; - } - return any_writer_write (aw->writer, c); -} /* SAVE and EXPORT. */ @@ -464,26 +382,24 @@ parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type wri { bool retain_unselected; struct variable *saved_filter_variable; - struct case_writer *aw; - struct ccase *c; - bool ok = true; + struct casewriter *output; + bool ok; - aw = parse_write_command (lexer, ds, writer_type, PROC_CMD, &retain_unselected); - if (aw == NULL) + output = parse_write_command (lexer, ds, writer_type, PROC_CMD, + &retain_unselected); + if (output == NULL) return CMD_CASCADING_FAILURE; saved_filter_variable = dict_get_filter (dataset_dict (ds)); if (retain_unselected) dict_set_filter (dataset_dict (ds), NULL); - proc_open (ds); - while (ok && proc_read (ds, &c)) - ok = case_writer_write_case (aw, c); - ok = proc_close (ds) && ok; + casereader_transfer (proc_open (ds), output); + ok = casewriter_destroy (output); + ok = proc_commit (ds) && ok; dict_set_filter (dataset_dict (ds), saved_filter_variable); - case_writer_destroy (aw); return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; } @@ -504,7 +420,7 @@ cmd_export (struct lexer *lexer, struct dataset *ds) /* Transformation. */ struct output_trns { - struct case_writer *aw; /* Writer. */ + struct casewriter *writer; /* Writer. */ }; static trns_proc_func output_trns_proc; @@ -515,8 +431,8 @@ static int parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type) { struct output_trns *t = xmalloc (sizeof *t); - t->aw = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL); - if (t->aw == NULL) + t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL); + if (t->writer == NULL) { free (t); return CMD_CASCADING_FAILURE; @@ -531,7 +447,9 @@ static int output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED) { struct output_trns *t = trns_; - case_writer_write_case (t->aw, c); + struct ccase tmp; + case_clone (&tmp, c); + casewriter_write (t->writer, &tmp); return TRNS_CONTINUE; } @@ -541,13 +459,8 @@ static bool output_trns_free (void *trns_) { struct output_trns *t = trns_; - bool ok = true; - - if (t != NULL) - { - ok = case_writer_destroy (t->aw); - free (t); - } + bool ok = casewriter_destroy (t->writer); + free (t); return ok; } @@ -748,15 +661,15 @@ struct mtf_file int type; /* One of MTF_*. */ const struct variable **by; /* List of BY variables for this file. */ struct file_handle *handle; /* File handle. */ - struct any_reader *reader; /* File reader. */ + struct casereader *reader; /* File reader. */ struct dictionary *dict; /* Dictionary from system file. */ + bool active_file; /* Active file? */ /* IN subcommand. */ char *in_name; /* Variable name. */ struct variable *in_var; /* Variable (in master dictionary). */ - struct ccase input_storage; /* Input record storage. */ - struct ccase *input; /* Input record. */ + struct ccase input; /* Input record. */ }; /* MATCH FILES procedure. */ @@ -773,7 +686,7 @@ struct mtf_proc char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1]; struct dictionary *dict; /* Dictionary of output file. */ - struct casefile *output; /* MATCH FILES output. */ + struct casewriter *output; /* MATCH FILES output. */ struct ccase mtf_case; /* Case used for output. */ unsigned seq_num; /* Have we initialized this variable? */ @@ -782,11 +695,12 @@ struct mtf_proc static bool mtf_free (struct mtf_proc *); static bool mtf_close_file (struct mtf_file *); +static bool mtf_close_all_files (struct mtf_proc *); static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *); -static bool mtf_read_records (struct mtf_proc *, struct dataset *); +static bool mtf_read_records (struct mtf_proc *); static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **); -static bool mtf_processing (struct mtf_proc *, struct dataset *); +static bool mtf_processing (struct mtf_proc *); static char *var_type_description (struct variable *); @@ -804,6 +718,7 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds) bool used_active_file = false; bool saw_table = false; bool saw_in = false; + bool open_active_file = false; mtf.head = mtf.tail = NULL; mtf.by_cnt = 0; @@ -840,8 +755,8 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds) file->dict = NULL; file->in_name = NULL; file->in_var = NULL; - case_nullify (&file->input_storage); - file->input = &file->input_storage; + file->active_file = false; + case_nullify (&file->input); /* FILEs go first, then TABLEs. */ if (file->type == MTF_TABLE || first_table == NULL) @@ -881,7 +796,7 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds) } used_active_file = true; - if (!proc_has_source (ds)) + if (!proc_has_active_file (ds)) { msg (SE, _("Cannot specify the active file since no active " "file has been defined.")); @@ -895,6 +810,7 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds) "Temporary transformations will be made permanent.")); file->dict = dataset_dict (ds); + file->active_file = true; } else { @@ -905,9 +821,6 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds) file->reader = any_reader_open (file->handle, &file->dict); if (file->reader == NULL) goto error; - - case_create (&file->input_storage, - dict_get_next_value_idx (file->dict)); } while (lex_match (lexer, '/')) @@ -1109,63 +1022,50 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds) if (used_active_file) { - proc_set_sink (ds, create_case_sink (&null_sink_class, - dataset_dict (ds), - dataset_get_casefile_factory (ds), - NULL)); - proc_open (ds); + proc_discard_output (ds); + for (iter = mtf.head; iter != NULL; iter = iter->next) + if (iter->reader == NULL) + iter->reader = proc_open (ds); + open_active_file = true; } - else - discard_variables (ds); dict_compact_values (mtf.dict); - mtf.output = dataset_get_casefile_factory (ds)->create_casefile - (dataset_get_casefile_factory (ds), - dict_get_next_value_idx (mtf.dict)); - + mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict)); mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums); case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict)); - if (!mtf_read_records (&mtf, ds)) - goto error; + if (!mtf_read_records (&mtf)) + goto error; while (mtf.head && mtf.head->type == MTF_FILE) - if (!mtf_processing (&mtf, ds)) - goto error; - if (!proc_close (ds)) + if (!mtf_processing (&mtf)) + goto error; + if (!mtf_close_all_files (&mtf)) goto error; + if (open_active_file) + proc_commit (ds); - discard_variables (ds); - - dataset_set_dict (ds, mtf.dict); + proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict); mtf.dict = NULL; - proc_set_source (ds, storage_source_create (mtf.output)); mtf.output = NULL; return mtf_free (&mtf) ? CMD_SUCCESS : CMD_CASCADING_FAILURE; error: - proc_close (ds); + if (open_active_file) + proc_commit (ds); mtf_free (&mtf); return CMD_CASCADING_FAILURE; } -/* Return a string in a static buffer describing V's variable type and - width. */ +/* Return a string in an allocated buffer describing V's variable + type and width. */ static char * var_type_description (struct variable *v) { - static char buf[2][32]; - static int x = 0; - char *s; - - x ^= 1; - s = buf[x]; - if (var_is_numeric (v)) - strcpy (s, "numeric"); + return xstrdup ("numeric"); else - sprintf (s, "string with width %d", var_get_width (v)); - return s; + return xasprintf ("string with width %d", var_get_width (v)); } /* Closes FILE and frees its associated data. @@ -1174,22 +1074,18 @@ var_type_description (struct variable *v) static bool mtf_close_file (struct mtf_file *file) { - bool ok = file->reader == NULL || !any_reader_error (file->reader); + bool ok = casereader_destroy (file->reader); free (file->by); - any_reader_close (file->reader); - if (file->handle != NULL) + if (!file->active_file) dict_destroy (file->dict); - case_destroy (&file->input_storage); free (file->in_name); + case_destroy (&file->input); free (file); return ok; } -/* Free all the data for the MATCH FILES procedure. - Returns true if successful, false if an I/O error - occurred. */ static bool -mtf_free (struct mtf_proc *mtf) +mtf_close_all_files (struct mtf_proc *mtf) { struct mtf_file *iter, *next; bool ok = true; @@ -1201,9 +1097,22 @@ mtf_free (struct mtf_proc *mtf) if (!mtf_close_file (iter)) ok = false; } - - if (mtf->dict) - dict_destroy (mtf->dict); + mtf->head = NULL; + return ok; +} + +/* Free all the data for the MATCH FILES procedure. + Returns true if successful, false if an I/O error + occurred. */ +static bool +mtf_free (struct mtf_proc *mtf) +{ + bool ok; + + ok = mtf_close_all_files (mtf); + + casewriter_destroy (mtf->output); + dict_destroy (mtf->dict); case_destroy (&mtf->mtf_case); free (mtf->seq_nums); @@ -1252,7 +1161,7 @@ mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file) /* Read a record from every input file. Returns true if successful, false if an I/O error occurred. */ static bool -mtf_read_records (struct mtf_proc *mtf, struct dataset *ds) +mtf_read_records (struct mtf_proc *mtf) { struct mtf_file *iter, *next; bool ok = true; @@ -1260,9 +1169,7 @@ mtf_read_records (struct mtf_proc *mtf, struct dataset *ds) for (iter = mtf->head; ok && iter != NULL; iter = next) { next = iter->next; - if (iter->handle - ? !any_reader_read (iter->reader, iter->input) - : !proc_read (ds, &iter->input)) + if (!casereader_read (iter->reader, &iter->input)) { if (!mtf_delete_file_in_place (mtf, &iter)) ok = false; @@ -1277,17 +1184,18 @@ static inline int mtf_compare_BY_values (struct mtf_proc *mtf, struct mtf_file *a, struct mtf_file *b) { - return case_compare_2dict (a->input, b->input, a->by, b->by, mtf->by_cnt); + return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt); } /* Perform one iteration of steps 3...7 above. Returns true if successful, false if an I/O error occurred. */ static bool -mtf_processing (struct mtf_proc *mtf, struct dataset *ds) +mtf_processing (struct mtf_proc *mtf) { struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */ struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */ struct mtf_file *iter, *next; + struct ccase out_case; /* 3. Find the FILE input record(s) that have minimum BY values. Store all the values from these input records into @@ -1346,9 +1254,8 @@ mtf_processing (struct mtf_proc *mtf, struct dataset *ds) min_tail = min_tail->next_min = iter; else /* cmp > 0 */ { - if (iter->handle - ? any_reader_read (iter->reader, iter->input) - : proc_read (ds, &iter->input)) + case_destroy (&iter->input); + if (casereader_read (iter->reader, &iter->input)) continue; if (!mtf_delete_file_in_place (mtf, &iter)) return false; @@ -1375,14 +1282,13 @@ mtf_processing (struct mtf_proc *mtf, struct dataset *ds) if (mv != NULL && mtf->seq_nums[mv_index] != mtf->seq_num) { - const struct ccase *record = iter->input; union value *out = case_data_rw (&mtf->mtf_case, mv); mtf->seq_nums[mv_index] = mtf->seq_num; if (var_is_numeric (v)) - out->f = case_num (record, v); + out->f = case_num (&iter->input, v); else - memcpy (out->s, case_str (record, v), var_get_width (v)); + memcpy (out->s, case_str (&iter->input, v), var_get_width (v)); } } if (iter->in_var != NULL) @@ -1418,7 +1324,8 @@ mtf_processing (struct mtf_proc *mtf, struct dataset *ds) } /* 5. Write the output record. */ - casefile_append (mtf->output, &mtf->mtf_case); + case_clone (&out_case, &mtf->mtf_case); + casewriter_write (mtf->output, &out_case); /* 6. Read another record from each input file FILE and TABLE that we stored values from above. If we come to the end of @@ -1427,9 +1334,8 @@ mtf_processing (struct mtf_proc *mtf, struct dataset *ds) for (iter = min_head; iter && iter->type == MTF_FILE; iter = next) { next = iter->next_min; - if (iter->reader != NULL - ? !any_reader_read (iter->reader, iter->input) - : !proc_read (ds, &iter->input)) + case_destroy (&iter->input); + if (!casereader_read (iter->reader, &iter->input)) if (!mtf_delete_file_in_place (mtf, &iter)) return false; } @@ -1614,11 +1520,6 @@ map_case (const struct case_map *map, { size_t dst_idx; - assert (map != NULL); - assert (src != NULL); - assert (dst != NULL); - assert (src != dst); - for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++) { int src_idx = map->map[dst_idx]; diff --git a/src/language/data-io/inpt-pgm.c b/src/language/data-io/inpt-pgm.c index 71860ebf..97fbbf1e 100644 --- a/src/language/data-io/inpt-pgm.c +++ b/src/language/data-io/inpt-pgm.c @@ -23,9 +23,9 @@ #include #include -#include #include -#include +#include +#include #include #include #include @@ -68,12 +68,10 @@ struct input_program_pgm struct trns_chain *trns_chain; enum trns_result restart; - bool inited_case; /* Did one-time case initialization? */ size_t case_nr; /* Incremented by END CASE transformation. */ - enum value_init_type *init; /* How to initialize each `union value'. */ - size_t init_cnt; /* Number of elements in inp_init. */ - size_t case_size; /* Size of case in bytes. */ + struct caseinit *init; + size_t value_cnt; }; static void destroy_input_program (struct input_program_pgm *); @@ -82,7 +80,7 @@ static trns_proc_func reread_trns_proc; static trns_proc_func end_file_trns_proc; static trns_free_func reread_trns_free; -static const struct case_source_class input_program_source_class; +static const struct casereader_class input_program_casereader_class; static bool inside_input_program; @@ -105,10 +103,9 @@ int cmd_input_program (struct lexer *lexer, struct dataset *ds) { struct input_program_pgm *inp; - size_t i; bool saw_END_CASE = false; - discard_variables (ds); + proc_discard_active_file (ds); if (lex_token (lexer) != '.') return lex_end_of_command (lexer); @@ -132,7 +129,7 @@ cmd_input_program (struct lexer *lexer, struct dataset *ds) if (result == CMD_EOF) msg (SE, _("Unexpected end-of-file within INPUT PROGRAM.")); inside_input_program = false; - discard_variables (ds); + proc_discard_active_file (ds); destroy_input_program (inp); return result; } @@ -144,7 +141,7 @@ cmd_input_program (struct lexer *lexer, struct dataset *ds) if (dict_get_next_value_idx (dataset_dict (ds)) == 0) { msg (SE, _("Input program did not create any variables.")); - discard_variables (ds); + proc_discard_active_file (ds); destroy_input_program (inp); return CMD_FAILURE; } @@ -153,33 +150,15 @@ cmd_input_program (struct lexer *lexer, struct dataset *ds) trns_chain_finalize (inp->trns_chain); inp->restart = TRNS_CONTINUE; - inp->inited_case = false; - inp->case_nr = 1; /* Figure out how to initialize each input case. */ - inp->init_cnt = dict_get_next_value_idx (dataset_dict (ds)); - inp->init = xnmalloc (inp->init_cnt, sizeof *inp->init); - for (i = 0; i < inp->init_cnt; i++) - inp->init[i] = -1; - for (i = 0; i < dict_get_var_cnt (dataset_dict (ds)); i++) - { - struct variable *var = dict_get_var (dataset_dict (ds), i); - size_t value_cnt = var_get_value_cnt (var); - enum value_init_type value_init; - size_t j; - - value_init = var_is_numeric (var) ? INP_NUMERIC : INP_STRING; - value_init |= var_get_leave (var) ? INP_INIT_ONCE : INP_REINIT; - - for (j = 0; j < value_cnt; j++) - inp->init[j + var_get_case_index (var)] = value_init; - } - for (i = 0; i < inp->init_cnt; i++) - assert (inp->init[i] != -1); - inp->case_size = dict_get_case_size (dataset_dict (ds)); - - proc_set_source (ds, - create_case_source (&input_program_source_class, inp)); + inp->init = caseinit_create (); + caseinit_mark_for_init (inp->init, dataset_dict (ds)); + inp->value_cnt = dict_get_next_value_idx (dataset_dict (ds)); + + proc_set_active_file_data ( + ds, casereader_create_sequential (NULL, inp->value_cnt, CASENUMBER_MAX, + &input_program_casereader_class, inp)); return CMD_SUCCESS; } @@ -191,56 +170,6 @@ cmd_end_input_program (struct lexer *lexer UNUSED, struct dataset *ds UNUSED) return CMD_END_INPUT_PROGRAM; } -/* Initializes case C. Called before the first case is read. */ -static void -init_case (const struct input_program_pgm *inp, struct ccase *c) -{ - size_t i; - - for (i = 0; i < inp->init_cnt; i++) - switch (inp->init[i]) - { - case INP_NUMERIC | INP_INIT_ONCE: - case_data_rw_idx (c, i)->f = 0.0; - break; - case INP_NUMERIC | INP_REINIT: - case_data_rw_idx (c, i)->f = SYSMIS; - break; - case INP_STRING | INP_INIT_ONCE: - case INP_STRING | INP_REINIT: - memset (case_data_rw_idx (c, i)->s, ' ', - sizeof case_data_rw_idx (c, i)->s); - break; - default: - NOT_REACHED (); - } -} - -/* Clears case C. Called between reading successive records. */ -static void -clear_case (const struct input_program_pgm *inp, struct ccase *c) -{ - size_t i; - - for (i = 0; i < inp->init_cnt; i++) - switch (inp->init[i]) - { - case INP_NUMERIC | INP_INIT_ONCE: - break; - case INP_NUMERIC | INP_REINIT: - case_data_rw_idx (c, i)->f = SYSMIS; - break; - case INP_STRING | INP_INIT_ONCE: - break; - case INP_STRING | INP_REINIT: - memset (case_data_rw_idx (c, i)->s, ' ', - sizeof case_data_rw_idx (c, i)->s); - break; - default: - NOT_REACHED (); - } -} - /* Returns true if STATE is valid given the transformations that are allowed within INPUT PROGRAM. */ static bool @@ -256,26 +185,28 @@ is_valid_state (enum trns_result state) Returns true if successful, false at end of file or if an I/O error occurred. */ static bool -input_program_source_read (struct case_source *source, struct ccase *c) +input_program_casereader_read (struct casereader *reader UNUSED, void *inp_, + struct ccase *c) { - struct input_program_pgm *inp = source->aux; + struct input_program_pgm *inp = inp_; - if (!inp->inited_case) - { - init_case (inp, c); - inp->inited_case = true; - } + case_create (c, inp->value_cnt); do { assert (is_valid_state (inp->restart)); - if (inp->restart == TRNS_ERROR || inp->restart == TRNS_END_FILE) - return false; + if (inp->restart == TRNS_ERROR || inp->restart == TRNS_END_FILE) + { + case_destroy (c); + return false; + } - clear_case (inp, c); + caseinit_init_reinit_vars (inp->init, c); + caseinit_init_left_vars (inp->init, c); inp->restart = trns_chain_execute (inp->trns_chain, inp->restart, c, &inp->case_nr); assert (is_valid_state (inp->restart)); + caseinit_update_left_vars (inp->init, c); } while (inp->restart < 0); @@ -288,29 +219,27 @@ destroy_input_program (struct input_program_pgm *pgm) if (pgm != NULL) { trns_chain_destroy (pgm->trns_chain); - free (pgm->init); + caseinit_destroy (pgm->init); free (pgm); } } -/* Destroys the source. - Returns true if successful read, false if an I/O occurred - during destruction or previously. */ -static bool -input_program_source_destroy (struct case_source *source) +/* Destroys the casereader. */ +static void +input_program_casereader_destroy (struct casereader *reader UNUSED, void *inp_) { - struct input_program_pgm *inp = source->aux; - bool ok = inp->restart != TRNS_ERROR; + struct input_program_pgm *inp = inp_; + if (inp->restart == TRNS_ERROR) + casereader_force_error (reader); destroy_input_program (inp); - return ok; } -static const struct case_source_class input_program_source_class = +static const struct casereader_class input_program_casereader_class = { - "INPUT PROGRAM", + input_program_casereader_read, + input_program_casereader_destroy, + NULL, NULL, - input_program_source_read, - input_program_source_destroy, }; int @@ -322,7 +251,7 @@ cmd_end_case (struct lexer *lexer, struct dataset *ds UNUSED) return lex_end_of_command (lexer); } -/* Sends the current case as the source's output. */ +/* Outputs the current case */ int end_case_trns_proc (void *inp_, struct ccase *c UNUSED, casenumber case_nr UNUSED) diff --git a/src/language/data-io/list.q b/src/language/data-io/list.q index 50fe1201..28627bbb 100644 --- a/src/language/data-io/list.q +++ b/src/language/data-io/list.q @@ -23,7 +23,8 @@ #include "intprops.h" #include "size_max.h" -#include +#include +#include #include #include #include @@ -73,9 +74,6 @@ struct list_ext /* Parsed command. */ static struct cmd_list cmd; -/* Current case number. */ -static int case_idx; - /* Line buffer. */ static struct string line_buffer; @@ -85,11 +83,12 @@ static unsigned n_chars_width (struct outp_driver *d); static void write_line (struct outp_driver *d, const char *s); /* Other functions. */ -static bool list_cases (const struct ccase *, void *, const struct dataset *); +static void list_case (struct ccase *, casenumber case_idx, + const struct dataset *); static void determine_layout (void); static void clean_up (void); static void write_header (struct outp_driver *); -static void write_all_headers (const struct ccase *, void *, const struct dataset*); +static void write_all_headers (struct casereader *, const struct dataset*); /* Returns the number of text lines that can fit on the remainder of the page. */ @@ -133,7 +132,11 @@ write_line (struct outp_driver *d, const char *s) int cmd_list (struct lexer *lexer, struct dataset *ds) { + struct dictionary *dict = dataset_dict (ds); struct variable *casenum_var = NULL; + struct casegrouper *grouper; + struct casereader *group; + casenumber case_idx; bool ok; if (!parse_list (lexer, ds, &cmd, NULL)) @@ -147,7 +150,7 @@ cmd_list (struct lexer *lexer, struct dataset *ds) if (cmd.last == NOT_LONG) cmd.last = LONG_MAX; if (!cmd.sbc_variables) - dict_get_vars (dataset_dict (ds), &cmd.v_variables, &cmd.n_variables, + dict_get_vars (dict, &cmd.v_variables, &cmd.n_variables, (1u << DC_SYSTEM) | (1u << DC_SCRATCH)); if (cmd.n_variables == 0) { @@ -187,12 +190,12 @@ cmd_list (struct lexer *lexer, struct dataset *ds) /* Weighting variable. */ if (cmd.weight == LST_WEIGHT) { - if (dict_get_weight (dataset_dict (ds)) != NULL) + if (dict_get_weight (dict) != NULL) { size_t i; for (i = 0; i < cmd.n_variables; i++) - if (cmd.v_variables[i] == dict_get_weight (dataset_dict (ds))) + if (cmd.v_variables[i] == dict_get_weight (dict)) break; if (i >= cmd.n_variables) { @@ -201,7 +204,7 @@ cmd_list (struct lexer *lexer, struct dataset *ds) cmd.v_variables = xnrealloc (cmd.v_variables, cmd.n_variables, sizeof *cmd.v_variables); cmd.v_variables[cmd.n_variables - 1] - = dict_get_weight (dataset_dict (ds)); + = dict_get_weight (dict); } } else @@ -229,7 +232,24 @@ cmd_list (struct lexer *lexer, struct dataset *ds) determine_layout (); case_idx = 0; - ok = procedure_with_splits (ds, write_all_headers, list_cases, NULL, NULL); + for (grouper = casegrouper_create_splits (proc_open (ds), dict); + casegrouper_get_next_group (grouper, &group); + casereader_destroy (group)) + { + struct ccase c; + + write_all_headers (group, ds); + for (; casereader_read (group, &c); case_destroy (&c)) + { + case_idx++; + if (case_idx >= cmd.first && case_idx <= cmd.last + && (case_idx - cmd.first) % cmd.step == 0) + list_case (&c, case_idx, ds); + } + } + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; + ds_destroy(&line_buffer); clean_up (); @@ -242,11 +262,16 @@ cmd_list (struct lexer *lexer, struct dataset *ds) /* Writes headers to all devices. This is done at the beginning of each SPLIT FILE group. */ static void -write_all_headers (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) +write_all_headers (struct casereader *input, const struct dataset *ds) { struct outp_driver *d; + struct ccase c; + + if (!casereader_peek (input, 0, &c)) + return; + output_split_file_values (ds, &c); + case_destroy (&c); - output_split_file_values (ds, c); for (d = outp_drivers (NULL); d; d = outp_drivers (d)) { if (!d->class->special) @@ -623,16 +648,12 @@ determine_layout (void) } /* Writes case C to output. */ -static bool -list_cases (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) +static void +list_case (struct ccase *c, casenumber case_idx, const struct dataset *ds) { + struct dictionary *dict = dataset_dict (ds); struct outp_driver *d; - case_idx++; - if (case_idx < cmd.first || case_idx > cmd.last - || (cmd.step != 1 && (case_idx - cmd.first) % cmd.step)) - return true; - for (d = outp_drivers (NULL); d; d = outp_drivers (d)) if (d->class->special == 0) { @@ -681,7 +702,7 @@ list_cases (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) ds_put_char_multiple(&line_buffer, ' ', width - print->w); if (fmt_is_string (print->type) - || dict_contains_var (dataset_dict (ds), v)) + || dict_contains_var (dict, v)) { data_out (case_data (c, v), print, ds_put_uninit (&line_buffer, print->w)); @@ -720,7 +741,7 @@ list_cases (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) char buf[256]; if (fmt_is_string (print->type) - || dict_contains_var (dataset_dict (ds), v)) + || dict_contains_var (dict, v)) data_out (case_data (c, v), print, buf); else { @@ -738,8 +759,6 @@ list_cases (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) } else NOT_REACHED (); - - return true; } /* diff --git a/src/language/dictionary/ChangeLog b/src/language/dictionary/ChangeLog index bd4c23ae..b7d543c1 100644 --- a/src/language/dictionary/ChangeLog +++ b/src/language/dictionary/ChangeLog @@ -1,3 +1,12 @@ +2007-06-06 Ben Pfaff + + Adapt case sources, sinks, and clients of procedure code to the + new infrastructure. + + * apply-dictionary.c: Now any_reader_open returns a casereader. + + * sys-file-open.c: Now sfm_reader_open returns a casereader. + Sat Feb 3 21:52:35 2007 Ben Pfaff * vector.c (cmd_vector): Add support for specifying an output diff --git a/src/language/dictionary/apply-dictionary.c b/src/language/dictionary/apply-dictionary.c index 0c9f2ade..ac38a089 100644 --- a/src/language/dictionary/apply-dictionary.c +++ b/src/language/dictionary/apply-dictionary.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -42,7 +43,7 @@ int cmd_apply_dictionary (struct lexer *lexer, struct dataset *ds) { struct file_handle *handle; - struct any_reader *reader; + struct casereader *reader; struct dictionary *dict; int n_matched = 0; @@ -58,7 +59,7 @@ cmd_apply_dictionary (struct lexer *lexer, struct dataset *ds) reader = any_reader_open (handle, &dict); if (dict == NULL) return CMD_FAILURE; - any_reader_close (reader); + casereader_destroy (reader); for (i = 0; i < dict_get_var_cnt (dict); i++) { @@ -136,7 +137,5 @@ cmd_apply_dictionary (struct lexer *lexer, struct dataset *ds) dict_set_weight (dataset_dict (ds), new_weight); } - any_reader_close (reader); - return lex_end_of_command (lexer); } diff --git a/src/language/dictionary/delete-variables.c b/src/language/dictionary/delete-variables.c index bd26a7d1..a0a1fb24 100644 --- a/src/language/dictionary/delete-variables.c +++ b/src/language/dictionary/delete-variables.c @@ -1,5 +1,5 @@ /* PSPP - computes sample statistics. - Copyright (C) 2006 Free Software Foundation, Inc. + Copyright (C) 2006, 2007 Free Software Foundation, Inc. Written by Ben Pfaff . This program is free software; you can redistribute it and/or @@ -21,6 +21,7 @@ #include +#include #include #include #include @@ -36,6 +37,7 @@ cmd_delete_variables (struct lexer *lexer, struct dataset *ds) { struct variable **vars; size_t var_cnt; + bool ok; if (proc_make_temporary_transformations_permanent (ds)) msg (SE, _("DELETE VARIABLES may not be used after TEMPORARY. " @@ -50,11 +52,13 @@ cmd_delete_variables (struct lexer *lexer, struct dataset *ds) "from the active file dictionary. Use NEW FILE instead.")); goto error; } - - if (!procedure (ds, NULL, NULL)) + + ok = casereader_destroy (proc_open (ds)); + ok = proc_commit (ds) && ok; + if (!ok) goto error; - dict_delete_vars (dataset_dict (ds), vars, var_cnt); + free (vars); return CMD_SUCCESS; diff --git a/src/language/dictionary/modify-variables.c b/src/language/dictionary/modify-variables.c index f3a18325..41709416 100644 --- a/src/language/dictionary/modify-variables.c +++ b/src/language/dictionary/modify-variables.c @@ -40,7 +40,6 @@ #include "gettext.h" #define _(msgid) gettext (msgid) -/* FIXME: should change weighting variable, etc. */ /* These control the ordering produced by compare_variables_given_ordering(). */ struct ordering @@ -322,7 +321,7 @@ cmd_modify_vars (struct lexer *lexer, struct dataset *ds) if (already_encountered & (1 | 4)) { /* Read the data. */ - if (!procedure (ds,NULL, NULL)) + if (!proc_execute (ds)) goto done; } diff --git a/src/language/dictionary/sys-file-info.c b/src/language/dictionary/sys-file-info.c index ec93ea20..180188ab 100644 --- a/src/language/dictionary/sys-file-info.c +++ b/src/language/dictionary/sys-file-info.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -87,7 +88,7 @@ cmd_sysfile_info (struct lexer *lexer, struct dataset *ds UNUSED) struct file_handle *h; struct dictionary *d; struct tab_table *t; - struct sfm_reader *reader; + struct casereader *reader; struct sfm_read_info info; int r, nr; int i; @@ -102,7 +103,7 @@ cmd_sysfile_info (struct lexer *lexer, struct dataset *ds UNUSED) reader = sfm_open_reader (h, &d, &info); if (!reader) return CMD_FAILURE; - sfm_close_reader (reader); + casereader_destroy (reader); t = tab_create (2, 10, 0); tab_vline (t, TAL_GAP, 1, 0, 8); diff --git a/src/language/expressions/evaluate.c b/src/language/expressions/evaluate.c index 92f0a0e3..05b19ff1 100644 --- a/src/language/expressions/evaluate.c +++ b/src/language/expressions/evaluate.c @@ -158,7 +158,7 @@ cmd_debug_evaluate (struct lexer *lexer, struct dataset *dsother UNUSED) if ( ds == NULL ) { - ds = create_dataset (NULL, NULL, NULL); + ds = create_dataset (NULL, NULL); d = dataset_dict (ds); } diff --git a/src/language/lexer/variable-parser.c b/src/language/lexer/variable-parser.c index 26b2e1bf..055bfe09 100644 --- a/src/language/lexer/variable-parser.c +++ b/src/language/lexer/variable-parser.c @@ -116,12 +116,6 @@ parse_variables (struct lexer *lexer, const struct dictionary *d, vs = var_set_create_from_dict (d); success = parse_var_set_vars (lexer, vs, var, cnt, opts); - if ( success == 0 ) - { - free ( *var ) ; - *var = NULL; - *cnt = 0; - } var_set_destroy (vs); return success; } diff --git a/src/language/stats/ChangeLog b/src/language/stats/ChangeLog index c3cb4fad..91956d82 100644 --- a/src/language/stats/ChangeLog +++ b/src/language/stats/ChangeLog @@ -1,3 +1,32 @@ +2007-06-06 Ben Pfaff + + Adapt case sources, sinks, and clients of procedure code to the + new infrastructure. + + * aggregate.c: Simplify greatly since everything is more uniform + now. + + * autorecode.c: Adapt to new procedure code. + * binomial.c: Ditto. + * chisquare.c: Ditto. + * crosstabs.q: Ditto. + * descriptives.c: Ditto. + * examine.q: Ditto. + * npar-summary.c: Ditto. + * frequencies.q: Ditto. + * npar.q: Ditto. + * oneway.q: Ditto. + * regression.q: Ditto. + * sort-cases.c: Ditto. + * t-test.c: Ditto. + + * sort-criteria.c: Rewrite to output a struct case_ordering. + + * flip.c: Rewrite to be a casereader. + + * rank.q: Simplify greatly since casereaders are much more + flexible than what we had before. + 2007-05-15 Jason Stover * regression.q (run_regression): Tell the user when the data diff --git a/src/language/stats/aggregate.c b/src/language/stats/aggregate.c index 297d2abe..22ca39d7 100644 --- a/src/language/stats/aggregate.c +++ b/src/language/stats/aggregate.c @@ -21,15 +21,16 @@ #include #include -#include +#include #include -#include +#include +#include +#include #include #include #include #include #include -#include #include #include #include @@ -135,12 +136,8 @@ enum missing_treatment /* An entire AGGREGATE procedure. */ struct agr_proc { - /* We have either an output file or a sink. */ - struct any_writer *writer; /* Output file, or null if none. */ - struct case_sink *sink; /* Sink, or null if none. */ - /* Break variables. */ - struct sort_criteria *sort; /* Sort criteria. */ + struct case_ordering *sort; /* Sort criteria. */ const struct variable **break_vars; /* Break variables. */ size_t break_var_cnt; /* Number of break variables. */ struct ccase break_case; /* Last values of break variables. */ @@ -150,20 +147,18 @@ struct agr_proc struct dictionary *dict; /* Aggregate dictionary. */ const struct dictionary *src_dict; /* Dict of the source */ int case_cnt; /* Counts aggregated cases. */ - struct ccase agr_case; /* Aggregate case for output. */ }; static void initialize_aggregate_info (struct agr_proc *, const struct ccase *); - +static void accumulate_aggregate_info (struct agr_proc *, + const struct ccase *); /* Prototypes. */ static bool parse_aggregate_functions (struct lexer *, const struct dictionary *, struct agr_proc *); static void agr_destroy (struct agr_proc *); -static bool aggregate_single_case (struct agr_proc *agr, - const struct ccase *input, - struct ccase *output); -static void dump_aggregate_info (struct agr_proc *agr, struct ccase *output); +static void dump_aggregate_info (struct agr_proc *agr, + struct casewriter *output); /* Parsing. */ @@ -174,10 +169,14 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) struct dictionary *dict = dataset_dict (ds); struct agr_proc agr; struct file_handle *out_file = NULL; + struct casereader *input = NULL, *group; + struct casegrouper *grouper; + struct casewriter *output = NULL; bool copy_documents = false; bool presorted = false; bool saw_direction; + bool ok; memset(&agr, 0 , sizeof (agr)); agr.missing = ITEMWISE; @@ -223,11 +222,13 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) int i; lex_match (lexer, '='); - agr.sort = sort_parse_criteria (lexer, dict, - &agr.break_vars, &agr.break_var_cnt, - &saw_direction, NULL); + agr.sort = parse_case_ordering (lexer, dict, + + &saw_direction); if (agr.sort == NULL) goto error; + case_ordering_get_vars (agr.sort, + &agr.break_vars, &agr.break_var_cnt); for (i = 0; i < agr.break_var_cnt; i++) dict_clone_var_assert (agr.dict, agr.break_vars[i], @@ -261,109 +262,69 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) /* Initialize. */ agr.case_cnt = 0; - case_create (&agr.agr_case, dict_get_next_value_idx (agr.dict)); - /* Output to active file or external file? */ if (out_file == NULL) { - struct ccase *c; - /* The active file will be replaced by the aggregated data, so TEMPORARY is moot. */ proc_cancel_temporary_transformations (ds); + proc_discard_output (ds); + output = autopaging_writer_create (dict_get_next_value_idx (agr.dict)); + } + else + { + output = any_writer_open (out_file, agr.dict); + if (output == NULL) + goto error; + } - if (agr.sort != NULL && !presorted) - { - if (!sort_active_file_in_place (ds, agr.sort)) - goto error; - } + input = proc_open (ds); + if (agr.sort != NULL && !presorted) + { + input = sort_execute (input, agr.sort); + agr.sort = NULL; + } - agr.sink = create_case_sink (&storage_sink_class, agr.dict, - dataset_get_casefile_factory (ds), - NULL); - if (agr.sink->class->open != NULL) - agr.sink->class->open (agr.sink); - proc_set_sink (ds, - create_case_sink (&null_sink_class, dict, - dataset_get_casefile_factory (ds), - NULL)); - proc_open (ds); - while (proc_read (ds, &c)) - if (aggregate_single_case (&agr, c, &agr.agr_case)) - if (!agr.sink->class->write (agr.sink, &agr.agr_case)) - { - proc_close (ds); - goto error; - } - if (!proc_close (ds)) - goto error; + for (grouper = casegrouper_create_vars (input, agr.break_vars, + agr.break_var_cnt); + casegrouper_get_next_group (grouper, &group); + casereader_destroy (group)) + { + struct ccase c; + + if (!casereader_peek (group, 0, &c)) + continue; + initialize_aggregate_info (&agr, &c); + case_destroy (&c); + + for (; casereader_read (group, &c); case_destroy (&c)) + accumulate_aggregate_info (&agr, &c); + dump_aggregate_info (&agr, output); + } + if (!casegrouper_destroy (grouper)) + goto error; - if (agr.case_cnt > 0) - { - dump_aggregate_info (&agr, &agr.agr_case); - if (!agr.sink->class->write (agr.sink, &agr.agr_case)) - goto error; - } - discard_variables (ds); - dataset_set_dict (ds, agr.dict); - agr.dict = NULL; - proc_set_source (ds, agr.sink->class->make_source (agr.sink)); - free_case_sink (agr.sink); + if (!proc_commit (ds)) + { + input = NULL; + goto error; } - else + input = NULL; + + if (out_file == NULL) { - agr.writer = any_writer_open (out_file, agr.dict); - if (agr.writer == NULL) + struct casereader *next_input = casewriter_make_reader (output); + if (next_input == NULL) goto error; - if (agr.sort != NULL && !presorted) - { - /* Sorting is needed. */ - struct casefile *dst; - struct casereader *reader; - struct ccase c; - bool ok = true; - - dst = sort_active_file_to_casefile (ds, agr.sort); - if (dst == NULL) - goto error; - reader = casefile_get_destructive_reader (dst); - while (ok && casereader_read_xfer (reader, &c)) - { - if (aggregate_single_case (&agr, &c, &agr.agr_case)) - ok = any_writer_write (agr.writer, &agr.agr_case); - case_destroy (&c); - } - casereader_destroy (reader); - if (ok) - ok = !casefile_error (dst); - casefile_destroy (dst); - if (!ok) - goto error; - } - else - { - /* Active file is already sorted. */ - struct ccase *c; - - proc_open (ds); - while (proc_read (ds, &c)) - if (aggregate_single_case (&agr, c, &agr.agr_case)) - if (!any_writer_write (agr.writer, &agr.agr_case)) - { - proc_close (ds); - goto error; - } - if (!proc_close (ds)) - goto error; - } - - if (agr.case_cnt > 0) - { - dump_aggregate_info (&agr, &agr.agr_case); - any_writer_write (agr.writer, &agr.agr_case); - } - if (any_writer_error (agr.writer)) + proc_set_active_file (ds, next_input, agr.dict); + agr.dict = NULL; + } + else + { + ok = casewriter_destroy (output); + output = NULL; + if (!ok) goto error; } @@ -371,6 +332,9 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) return CMD_SUCCESS; error: + if (input != NULL) + proc_commit (ds); + casewriter_destroy (output); agr_destroy (&agr); return CMD_CASCADING_FAILURE; } @@ -717,9 +681,7 @@ agr_destroy (struct agr_proc *agr) { struct agr_var *iter, *next; - any_writer_close (agr->writer); - if (agr->sort != NULL) - sort_destroy_criteria (agr->sort); + case_ordering_destroy (agr->sort); free (agr->break_vars); case_destroy (&agr->break_case); for (iter = agr->agr_vars; iter; iter = next) @@ -742,44 +704,13 @@ agr_destroy (struct agr_proc *agr) } if (agr->dict != NULL) dict_destroy (agr->dict); - - case_destroy (&agr->agr_case); } /* Execution. */ -static void accumulate_aggregate_info (struct agr_proc *, - const struct ccase *); -static void dump_aggregate_info (struct agr_proc *, struct ccase *); - -/* Processes a single case INPUT for aggregation. If output is - warranted, writes it to OUTPUT and returns true. - Otherwise, returns false and OUTPUT is unmodified. */ -static bool -aggregate_single_case (struct agr_proc *agr, - const struct ccase *input, struct ccase *output) -{ - bool finished_group = false; - - if (agr->case_cnt++ == 0) - initialize_aggregate_info (agr, input); - else if (case_compare (&agr->break_case, input, - agr->break_vars, agr->break_var_cnt)) - { - dump_aggregate_info (agr, output); - finished_group = true; - - initialize_aggregate_info (agr, input); - } - - accumulate_aggregate_info (agr, input); - return finished_group; -} - /* Accumulates aggregation data from the case INPUT. */ static void -accumulate_aggregate_info (struct agr_proc *agr, - const struct ccase *input) +accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input) { struct agr_var *iter; double weight; @@ -947,12 +878,14 @@ accumulate_aggregate_info (struct agr_proc *agr, } } -/* We've come to a record that differs from the previous in one or - more of the break variables. Make an output record from the - accumulated statistics in the OUTPUT case. */ +/* Writes an aggregated record to OUTPUT. */ static void -dump_aggregate_info (struct agr_proc *agr, struct ccase *output) +dump_aggregate_info (struct agr_proc *agr, struct casewriter *output) { + struct ccase c; + + case_create (&c, dict_get_next_value_idx (agr->dict)); + { int value_idx = 0; int i; @@ -961,7 +894,7 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output) { const struct variable *v = agr->break_vars[i]; size_t value_cnt = var_get_value_cnt (v); - memcpy (case_data_rw_idx (output, value_idx), + memcpy (case_data_rw_idx (&c, value_idx), case_data (&agr->break_case, v), sizeof (union value) * value_cnt); value_idx += value_cnt; @@ -973,7 +906,7 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output) for (i = agr->agr_vars; i; i = i->next) { - union value *v = case_data_rw (output, i->dest); + union value *v = case_data_rw (&c, i->dest); if (agr->missing == COLUMNWISE && i->saw_missing && (i->function & FUNC) != N && (i->function & FUNC) != NU @@ -1076,6 +1009,8 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output) } } } + + casewriter_write (output, &c); } /* Resets the state for all the aggregate functions. */ diff --git a/src/language/stats/autorecode.c b/src/language/stats/autorecode.c index 2944f912..4e5628a0 100644 --- a/src/language/stats/autorecode.c +++ b/src/language/stats/autorecode.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -103,7 +104,8 @@ int cmd_autorecode (struct lexer *lexer, struct dataset *ds) { struct autorecode_pgm arc; - struct ccase *c; + struct casereader *input; + struct ccase c; size_t dst_cnt; size_t i; bool ok; @@ -188,16 +190,16 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds) hash_numeric_value, NULL, NULL); } - proc_open (ds); - while (proc_read (ds, &c)) + input = proc_open (ds); + for (; casereader_read (input, &c); case_destroy (&c)) for (i = 0; i < arc.var_cnt; i++) { union arc_value v, *vp, **vpp; if (var_is_numeric (arc.src_vars[i])) - v.f = case_num (c, arc.src_vars[i]); + v.f = case_num (&c, arc.src_vars[i]); else - v.c = (char *) case_str (c, arc.src_vars[i]); + v.c = (char *) case_str (&c, arc.src_vars[i]); vpp = (union arc_value **) hsh_probe (arc.src_values[i], &v); if (*vpp == NULL) @@ -211,7 +213,8 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds) *vpp = vp; } } - ok = proc_close (ds); + ok = casereader_destroy (input); + ok = proc_commit (ds) && ok; for (i = 0; i < arc.var_cnt; i++) arc.dst_vars[i] = dict_create_var_assert (dataset_dict (ds), diff --git a/src/language/stats/binomial.c b/src/language/stats/binomial.c index 33c47989..91910ee5 100644 --- a/src/language/stats/binomial.c +++ b/src/language/stats/binomial.c @@ -22,13 +22,12 @@ #include #include -#include +#include #include #include #include #include #include -#include #include #include @@ -89,50 +88,47 @@ calculate_binomial_internal (double n1, double n2, double p) return sig1tailed ; } -static void +static bool do_binomial (const struct dictionary *dict, - const struct casefile *cf, + struct casereader *input, const struct binomial_test *bst, - struct freq *cat1, - struct freq *cat2, - const struct casefilter *filter + struct freq_mutable *cat1, + struct freq_mutable *cat2, + enum mv_class exclude ) { bool warn = true; const struct one_sample_test *ost = (const struct one_sample_test *) bst; struct ccase c; - struct casereader *r = casefile_get_reader (cf, NULL); - while (casereader_read(r, &c)) + while (casereader_read(input, &c)) { int v; - double w = - dict_get_case_weight (dict, &c, &warn); + double w = dict_get_case_weight (dict, &c, &warn); for (v = 0 ; v < ost->n_vars ; ++v ) { const struct variable *var = ost->vars[v]; const union value *value = case_data (&c, var); + int width = var_get_width (var); - if ( casefilter_variable_missing (filter, &c, var)) + if (var_is_value_missing (var, value, exclude)) break; if ( NULL == cat1[v].value ) { - cat1[v].value = value_dup (value, var_get_width (var)); + cat1[v].value = value_dup (value, width); cat1[v].count = w; } - else if ( 0 == compare_values (cat1[v].value, value, - var_get_width (var))) + else if ( 0 == compare_values (cat1[v].value, value, width)) cat1[v].count += w; else if ( NULL == cat2[v].value ) { - cat2[v].value = value_dup (value, var_get_width (var)); + cat2[v].value = value_dup (value, width); cat2[v].count = w; } - else if ( 0 == compare_values (cat2[v].value, value, - var_get_width (var))) + else if ( 0 == compare_values (cat2[v].value, value, width)) cat2[v].count += w; else if ( bst->category1 == SYSMIS) msg (ME, _("Variable %s is not dichotomous"), var_get_name (var)); @@ -140,24 +136,23 @@ do_binomial (const struct dictionary *dict, case_destroy (&c); } - casereader_destroy (r); + return casereader_destroy (input); } void binomial_execute (const struct dataset *ds, - const struct casefile *cf, - struct casefilter *filter, + struct casereader *input, + enum mv_class exclude, const struct npar_test *test) { int v; const struct binomial_test *bst = (const struct binomial_test *) test; const struct one_sample_test *ost = (const struct one_sample_test*) test; - struct freq *cat1 = xzalloc (sizeof (*cat1) * ost->n_vars); - struct freq *cat2 = xzalloc (sizeof (*cat1) * ost->n_vars); - struct tab_table *table ; + struct freq_mutable *cat1 = xzalloc (sizeof (*cat1) * ost->n_vars); + struct freq_mutable *cat2 = xzalloc (sizeof (*cat1) * ost->n_vars); assert ((bst->category1 == SYSMIS) == (bst->category2 == SYSMIS) ); @@ -175,95 +170,78 @@ binomial_execute (const struct dataset *ds, cat2->value = value_dup (&v, 0); } - do_binomial (dataset_dict(ds), cf, bst, cat1, cat2, filter); - - table = tab_create (7, ost->n_vars * 3 + 1, 0); - - tab_dim (table, tab_natural_dimensions); - - tab_title (table, _("Binomial Test")); - - tab_headers (table, 2, 0, 1, 0); - - tab_box (table, TAL_1, TAL_1, -1, TAL_1, - 0, 0, table->nc - 1, tab_nr(table) - 1 ); - - for (v = 0 ; v < ost->n_vars; ++v) + if (do_binomial (dataset_dict(ds), input, bst, cat1, cat2, exclude)) { - double n_total, sig; - const struct variable *var = ost->vars[v]; - tab_hline (table, TAL_1, 0, tab_nc (table) -1, 1 + v * 3); - - /* Titles */ - tab_text (table, 0, 1 + v * 3, TAB_LEFT, - var_to_string (var)); - - tab_text (table, 1, 1 + v * 3, TAB_LEFT, - _("Group1")); - - tab_text (table, 1, 2 + v * 3, TAB_LEFT, - _("Group2")); + struct tab_table *table = tab_create (7, ost->n_vars * 3 + 1, 0); - tab_text (table, 1, 3 + v * 3, TAB_LEFT, - _("Total")); + tab_dim (table, tab_natural_dimensions); - /* Test Prop */ - tab_float (table, 5, 1 + v * 3, TAB_NONE, bst->p, 8, 3); + tab_title (table, _("Binomial Test")); - /* Category labels */ - tab_text (table, 2, 1 + v * 3, TAB_NONE, - var_get_value_name (var, cat1[v].value)); + tab_headers (table, 2, 0, 1, 0); - tab_text (table, 2, 2 + v * 3, TAB_NONE, - var_get_value_name (var, cat2[v].value)); + tab_box (table, TAL_1, TAL_1, -1, TAL_1, + 0, 0, table->nc - 1, tab_nr(table) - 1 ); - /* Observed N */ - tab_float (table, 3, 1 + v * 3, TAB_NONE, - cat1[v].count, 8, 0); + for (v = 0 ; v < ost->n_vars; ++v) + { + double n_total, sig; + const struct variable *var = ost->vars[v]; + tab_hline (table, TAL_1, 0, tab_nc (table) -1, 1 + v * 3); - tab_float (table, 3, 2 + v * 3, TAB_NONE, - cat2[v].count, 8, 0); + /* Titles */ + tab_text (table, 0, 1 + v * 3, TAB_LEFT, var_to_string (var)); + tab_text (table, 1, 1 + v * 3, TAB_LEFT, _("Group1")); + tab_text (table, 1, 2 + v * 3, TAB_LEFT, _("Group2")); + tab_text (table, 1, 3 + v * 3, TAB_LEFT, _("Total")); - n_total = cat1[v].count + cat2[v].count; + /* Test Prop */ + tab_float (table, 5, 1 + v * 3, TAB_NONE, bst->p, 8, 3); + /* Category labels */ + tab_text (table, 2, 1 + v * 3, TAB_NONE, + var_get_value_name (var, cat1[v].value)); + tab_text (table, 2, 2 + v * 3, TAB_NONE, + var_get_value_name (var, cat2[v].value)); - tab_float (table, 3, 3 + v * 3, TAB_NONE, - n_total, 8, 0); + /* Observed N */ + tab_float (table, 3, 1 + v * 3, TAB_NONE, cat1[v].count, 8, 0); + tab_float (table, 3, 2 + v * 3, TAB_NONE, cat2[v].count, 8, 0); - /* Observed Proportions */ + n_total = cat1[v].count + cat2[v].count; + tab_float (table, 3, 3 + v * 3, TAB_NONE, n_total, 8, 0); - tab_float (table, 4, 1 + v * 3, TAB_NONE, - cat1[v].count / n_total, 8, 3); + /* Observed Proportions */ + tab_float (table, 4, 1 + v * 3, TAB_NONE, + cat1[v].count / n_total, 8, 3); + tab_float (table, 4, 2 + v * 3, TAB_NONE, + cat2[v].count / n_total, 8, 3); + tab_float (table, 4, 3 + v * 3, TAB_NONE, + (cat1[v].count + cat2[v].count) / n_total, 8, 2); - tab_float (table, 4, 2 + v * 3, TAB_NONE, - cat2[v].count / n_total, 8, 3); + /* Significance */ + sig = calculate_binomial (cat1[v].count, cat2[v].count, bst->p); + tab_float (table, 6, 1 + v * 3, TAB_NONE, sig, 8, 3); + } - tab_float (table, 4, 3 + v * 3, TAB_NONE, - (cat1[v].count + cat2[v].count) / n_total, 8, 2); + tab_text (table, 2, 0, TAB_CENTER, _("Category")); + tab_text (table, 3, 0, TAB_CENTER, _("N")); + tab_text (table, 4, 0, TAB_CENTER, _("Observed Prop.")); + tab_text (table, 5, 0, TAB_CENTER, _("Test Prop.")); + tab_text (table, 6, 0, TAB_CENTER | TAT_PRINTF, + _("Exact Sig. (%d-tailed)"), + bst->p == 0.5 ? 2: 1); - /* Significance */ - sig = calculate_binomial (cat1[v].count, cat2[v].count, - bst->p); - - tab_float (table, 6, 1 + v * 3, TAB_NONE, - sig, 8, 3); + tab_vline (table, TAL_2, 2, 0, tab_nr (table) -1); + tab_submit (table); + } + + for (v = 0; v < ost->n_vars; v++) + { + free (cat1[v].value); + free (cat2[v].value); } - - tab_text (table, 2, 0, TAB_CENTER, _("Category")); - tab_text (table, 3, 0, TAB_CENTER, _("N")); - tab_text (table, 4, 0, TAB_CENTER, _("Observed Prop.")); - tab_text (table, 5, 0, TAB_CENTER, _("Test Prop.")); - - tab_text (table, 6, 0, TAB_CENTER | TAT_PRINTF, - _("Exact Sig. (%d-tailed)"), - bst->p == 0.5 ? 2: 1); - - tab_vline (table, TAL_2, 2, 0, tab_nr (table) -1); - free (cat1); - free (cat2); - - tab_submit (table); - + free (cat2); } diff --git a/src/language/stats/binomial.h b/src/language/stats/binomial.h index 10f5d390..e54a1ded 100644 --- a/src/language/stats/binomial.h +++ b/src/language/stats/binomial.h @@ -36,13 +36,13 @@ struct binomial_test }; -struct casefile; +struct casereader; struct dataset; void binomial_execute (const struct dataset *, - const struct casefile *, - struct casefilter *, + struct casereader *, + enum mv_class, const struct npar_test *); #endif diff --git a/src/language/stats/chisquare.c b/src/language/stats/chisquare.c index a406edc7..3dceb1c3 100644 --- a/src/language/stats/chisquare.c +++ b/src/language/stats/chisquare.c @@ -17,39 +17,33 @@ 02110-1301, USA. */ #include -#include -#include + +#include #include +#include #include -#include -#include -#include +#include #include #include - -#include -#include +#include +#include +#include +#include #include - -#include - +#include +#include +#include +#include +#include #include -#include -#include "npar.h" -#include "chisquare.h" -#include "freq.h" - -#include +#include #include "gettext.h" #define _(msgid) gettext (msgid) - - - /* Return a hash table containing the frequency counts of each value of VAR in CF . It is the caller's responsibility to free the hash table when @@ -57,8 +51,7 @@ */ static struct hsh_table * create_freq_hash_with_range (const struct dictionary *dict, - const struct casefile *cf, - struct casefilter *filter, + struct casereader *input, const struct variable *var, double lo, double hi) @@ -66,7 +59,6 @@ create_freq_hash_with_range (const struct dictionary *dict, bool warn = true; float i_d; struct ccase c; - struct casereader *r = casefile_get_reader (cf, filter); struct hsh_table *freq_hash = hsh_create (4, compare_freq, hash_freq, @@ -87,19 +79,13 @@ create_freq_hash_with_range (const struct dictionary *dict, hsh_insert (freq_hash, fr); } - while (casereader_read(r, &c)) + while (casereader_read (input, &c)) { union value obs_value; struct freq **existing_fr; struct freq *fr = xmalloc(sizeof (*fr)); fr->value = case_data (&c, var); - if ( casefilter_variable_missing (filter, &c, var)) - { - free (fr); - continue; - } - fr->count = dict_get_case_weight (dict, &c, &warn); obs_value.f = trunc (fr->value->f); @@ -124,43 +110,39 @@ create_freq_hash_with_range (const struct dictionary *dict, case_destroy (&c); } - casereader_destroy (r); - - return freq_hash; + if (casereader_destroy (input)) + return freq_hash; + else + { + hsh_destroy (freq_hash); + return NULL; + } } /* Return a hash table containing the frequency counts of each - value of VAR in CF . + value of VAR in INPUT . It is the caller's responsibility to free the hash table when no longer required. */ static struct hsh_table * create_freq_hash (const struct dictionary *dict, - const struct casefile *cf, - struct casefilter *filter, + struct casereader *input, const struct variable *var) { bool warn = true; struct ccase c; - struct casereader *r = casefile_get_reader (cf, filter); struct hsh_table *freq_hash = hsh_create (4, compare_freq, hash_freq, free_freq_mutable_hash, (void *) var); - while (casereader_read(r, &c)) + for (; casereader_read (input, &c); case_destroy (&c)) { struct freq **existing_fr; struct freq *fr = xmalloc(sizeof (*fr)); - fr->value = case_data (&c, var ); - - if ( casefilter_variable_missing (filter, &c, var)) - { - free (fr); - continue; - } + fr->value = case_data (&c, var); fr->count = dict_get_case_weight (dict, &c, &warn); @@ -175,20 +157,21 @@ create_freq_hash (const struct dictionary *dict, *existing_fr = fr; fr->value = value_dup (fr->value, var_get_width (var)); } - - case_destroy (&c); } - casereader_destroy (r); - - return freq_hash; + if (casereader_destroy (input)) + return freq_hash; + else + { + hsh_destroy (freq_hash); + return NULL; + } } static struct tab_table * create_variable_frequency_table (const struct dictionary *dict, - const struct casefile *cf, - struct casefilter *filter, + struct casereader *input, const struct chisquare_test *test, int v, struct hsh_table **freq_hash) @@ -200,7 +183,9 @@ create_variable_frequency_table (const struct dictionary *dict, struct tab_table *table ; const struct variable *var = ost->vars[v]; - *freq_hash = create_freq_hash (dict, cf, filter, var); + *freq_hash = create_freq_hash (dict, input, var); + if (*freq_hash == NULL) + return NULL; n_cells = hsh_count (*freq_hash); @@ -305,7 +290,8 @@ create_stats_table (const struct chisquare_test *test) { const struct one_sample_test *ost = (const struct one_sample_test*) test; - struct tab_table *table = tab_create (1 + ost->n_vars, 4, 0); + struct tab_table *table; + table = tab_create (1 + ost->n_vars, 4, 0); tab_dim (table, tab_natural_dimensions); tab_title (table, _("Test Statistics")); tab_headers (table, 1, 0, 1, 0); @@ -331,20 +317,20 @@ create_stats_table (const struct chisquare_test *test) void chisquare_execute (const struct dataset *ds, - const struct casefile *cf, - struct casefilter *filter, + struct casereader *input, + enum mv_class exclude, const struct npar_test *test) { const struct dictionary *dict = dataset_dict (ds); int v, i; struct one_sample_test *ost = (struct one_sample_test *) test; struct chisquare_test *cst = (struct chisquare_test *) test; - struct tab_table *stats_table = create_stats_table (cst); int n_cells = 0; double total_expected = 0.0; double *df = xzalloc (sizeof (*df) * ost->n_vars); double *xsq = xzalloc (sizeof (*df) * ost->n_vars); + bool ok; for ( i = 0 ; i < cst->n_expected ; ++i ) total_expected += cst->expected[i]; @@ -355,17 +341,17 @@ chisquare_execute (const struct dataset *ds, { double total_obs = 0.0; struct hsh_table *freq_hash = NULL; + struct casereader *reader = + casereader_create_filter_missing (casereader_clone (input), + &ost->vars[v], 1, exclude, NULL); struct tab_table *freq_table = - create_variable_frequency_table(dict, cf, filter, cst, - v, &freq_hash); + create_variable_frequency_table(dict, reader, cst, v, &freq_hash); - struct freq **ff = (struct freq **) hsh_sort (freq_hash); + struct freq **ff; if ( NULL == freq_table ) - { - hsh_destroy (freq_hash); - continue; - } + continue; + ff = (struct freq **) hsh_sort (freq_hash); n_cells = hsh_count (freq_hash); @@ -420,12 +406,19 @@ chisquare_execute (const struct dataset *ds, for ( v = 0 ; v < ost->n_vars ; ++v ) { double total_obs = 0.0; + struct casereader *reader = + casereader_create_filter_missing (casereader_clone (input), + &ost->vars[v], 1, exclude, NULL); struct hsh_table *freq_hash = - create_freq_hash_with_range (dict, cf, filter, ost->vars[v], - cst->lo, cst->hi); + create_freq_hash_with_range (dict, reader, + ost->vars[v], cst->lo, cst->hi); + + struct freq **ff; - struct freq **ff = (struct freq **) hsh_sort (freq_hash); + if (freq_hash == NULL) + continue; + ff = (struct freq **) hsh_sort (freq_hash); assert ( n_cells == hsh_count (freq_hash)); for ( i = 0 ; i < hsh_count (freq_hash) ; ++i ) @@ -473,25 +466,30 @@ chisquare_execute (const struct dataset *ds, tab_submit (freq_table); } + ok = !taint_has_tainted_successor (casereader_get_taint (input)); + casereader_destroy (input); - - /* Populate the summary statistics table */ - for ( v = 0 ; v < ost->n_vars ; ++v ) + if (ok) { - const struct variable *var = ost->vars[v]; + struct tab_table *stats_table = create_stats_table (cst); + + /* Populate the summary statistics table */ + for ( v = 0 ; v < ost->n_vars ; ++v ) + { + const struct variable *var = ost->vars[v]; - tab_text (stats_table, 1 + v, 0, TAB_CENTER, var_get_name (var)); + tab_text (stats_table, 1 + v, 0, TAB_CENTER, var_get_name (var)); - tab_float (stats_table, 1 + v, 1, TAB_NONE, xsq[v], 8,3); - tab_float (stats_table, 1 + v, 2, TAB_NONE, df[v], 8,0); + tab_float (stats_table, 1 + v, 1, TAB_NONE, xsq[v], 8,3); + tab_float (stats_table, 1 + v, 2, TAB_NONE, df[v], 8,0); - tab_float (stats_table, 1 + v, 3, TAB_NONE, - gsl_cdf_chisq_Q (xsq[v], df[v]), 8,3); + tab_float (stats_table, 1 + v, 3, TAB_NONE, + gsl_cdf_chisq_Q (xsq[v], df[v]), 8,3); + } + tab_submit (stats_table); } - + free (xsq); free (df); - - tab_submit (stats_table); } diff --git a/src/language/stats/chisquare.h b/src/language/stats/chisquare.h index 71a3c174..d9d34abd 100644 --- a/src/language/stats/chisquare.h +++ b/src/language/stats/chisquare.h @@ -19,11 +19,10 @@ #if !chisquare_h #define chisquare_h 1 -#include #include #include +#include -#include "npar.h" struct chisquare_test { struct one_sample_test parent; @@ -37,17 +36,18 @@ struct chisquare_test int n_expected; }; -struct casefile; -struct dictionary ; +struct casereader; +struct dictionary; struct hsh_table; +struct dataset; void chisquare_insert_variables (const struct npar_test *test, struct hsh_table *variables); void chisquare_execute (const struct dataset *ds, - const struct casefile *cf, - struct casefilter *filter, + struct casereader *input, + enum mv_class exclude, const struct npar_test *test); diff --git a/src/language/stats/crosstabs.q b/src/language/stats/crosstabs.q index e54fa2d7..1d2bdf74 100644 --- a/src/language/stats/crosstabs.q +++ b/src/language/stats/crosstabs.q @@ -36,6 +36,8 @@ #include #include +#include +#include #include #include #include @@ -177,10 +179,10 @@ static struct pool *pl_tc; /* For table cells. */ static struct pool *pl_col; /* For column data. */ static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds); -static void precalc (const struct ccase *, void *, const struct dataset *); -static bool calc_general (const struct ccase *, void *, const struct dataset *); -static bool calc_integer (const struct ccase *, void *, const struct dataset *); -static bool postcalc (void *, const struct dataset *); +static void precalc (struct casereader *, const struct dataset *); +static void calc_general (struct ccase *, const struct dataset *); +static void calc_integer (struct ccase *, const struct dataset *); +static void postcalc (void); static void submit (struct tab_table *); static void format_short (char *s, const struct fmt_spec *fp, @@ -203,8 +205,10 @@ cmd_crosstabs (struct lexer *lexer, struct dataset *ds) static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds) { - int i; + struct casegrouper *grouper; + struct casereader *input, *group; bool ok; + int i; variables = NULL; variables_cnt = 0; @@ -294,9 +298,28 @@ internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds) else write_style = CRS_WR_NONE; - ok = procedure_with_splits (ds, precalc, - mode == GENERAL ? calc_general : calc_integer, - postcalc, NULL); + input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds), + NULL, NULL); + grouper = casegrouper_create_splits (input, dataset_dict (ds)); + while (casegrouper_get_next_group (grouper, &group)) + { + struct ccase c; + + precalc (group, ds); + + for (; casereader_read (group, &c); case_destroy (&c)) + { + if (mode == GENERAL) + calc_general (&c, ds); + else + calc_integer (&c, ds); + } + casereader_destroy (group); + + postcalc (); + } + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; } @@ -490,10 +513,16 @@ static int compare_table_entry (const void *, const void *, const void *); static unsigned hash_table_entry (const void *, const void *); /* Set up the crosstabulation tables for processing. */ -static void -precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds) +static void +precalc (struct casereader *input, const struct dataset *ds) { - output_split_file_values (ds, first); + struct ccase c; + + if (!casereader_peek (input, 0, &c)) + return; + output_split_file_values (ds, &c); + case_destroy (&c); + if (mode == GENERAL) { gen_tab = hsh_create (512, compare_table_entry, hash_table_entry, @@ -565,18 +594,16 @@ precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds) } /* Form crosstabulations for general mode. */ -static bool -calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) +static void +calc_general (struct ccase *c, const struct dataset *ds) { - bool bad_warn = true; - /* Missing values to exclude. */ enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY : cmd.miss == CRS_INCLUDE ? MV_SYSTEM : MV_NEVER); /* Case weight. */ - double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn); + double weight = dict_get_case_weight (dataset_dict (ds), c, NULL); /* Flattened current table index. */ int t; @@ -637,12 +664,10 @@ calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) next_crosstab: local_free (te); } - - return true; } -static bool -calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) +static void +calc_integer (struct ccase *c, const struct dataset *ds) { bool bad_warn = true; @@ -695,8 +720,6 @@ calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) next_crosstab: ; } - - return true; } /* Compare the table_entry's at A and B and return a strcmp()-type @@ -764,8 +787,8 @@ static void output_pivot_table (struct table_entry **, struct table_entry **, int *, int *, int *); static void make_summary_table (void); -static bool -postcalc (void *aux UNUSED, const struct dataset *ds UNUSED) +static void +postcalc (void) { if (mode == GENERAL) { @@ -801,8 +824,6 @@ postcalc (void *aux UNUSED, const struct dataset *ds UNUSED) } hsh_destroy (gen_tab); - - return true; } static void insert_summary (struct tab_table *, int tab_index, double valid); diff --git a/src/language/stats/descriptives.c b/src/language/stats/descriptives.c index 8b06aa52..3eb638bd 100644 --- a/src/language/stats/descriptives.c +++ b/src/language/stats/descriptives.c @@ -16,16 +16,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -/* FIXME: Many possible optimizations. */ - #include #include #include #include -#include -#include +#include +#include #include #include #include @@ -180,9 +178,8 @@ static void dump_z_table (struct dsc_proc *); static void setup_z_trns (struct dsc_proc *, struct dataset *); /* Procedure execution functions. */ -static bool calc_descriptives (const struct ccase *first, - const struct casefile *, void *dsc_, - const struct dataset *); +static void calc_descriptives (struct dsc_proc *, struct casereader *, + struct dataset *); static void display (struct dsc_proc *dsc); /* Parser and outline. */ @@ -200,6 +197,9 @@ cmd_descriptives (struct lexer *lexer, struct dataset *ds) size_t i; bool ok; + struct casegrouper *grouper; + struct casereader *group; + /* Create and initialize dsc. */ dsc = xmalloc (sizeof *dsc); dsc->vars = NULL; @@ -316,8 +316,7 @@ cmd_descriptives (struct lexer *lexer, struct dataset *ds) { int i; - if (!parse_variables_const (lexer, dataset_dict (ds), - &vars, &var_cnt, + if (!parse_variables_const (lexer, dict, &vars, &var_cnt, PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC)) goto error; @@ -413,8 +412,12 @@ cmd_descriptives (struct lexer *lexer, struct dataset *ds) for (i = 0; i < dsc->var_cnt; i++) dsc->vars[i].moments = moments_create (dsc->max_moment); - /* Data pass. */ - ok = multipass_procedure_with_splits (ds, calc_descriptives, dsc); + /* Data pass. FIXME: error handling. */ + grouper = casegrouper_create_splits (proc_open (ds), dict); + while (casegrouper_get_next_group (grouper, &group)) + calc_descriptives (dsc, group, ds); + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; /* Z-scoring! */ if (ok && z_cnt) @@ -689,17 +692,25 @@ static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c); /* Calculates and displays descriptive statistics for the cases in CF. */ -static bool -calc_descriptives (const struct ccase *first, - const struct casefile *cf, void *dsc_, - const struct dataset *ds) +static void +calc_descriptives (struct dsc_proc *dsc, struct casereader *group, + struct dataset *ds) { - struct dsc_proc *dsc = dsc_; - struct casereader *reader; + struct casereader *pass1, *pass2; struct ccase c; size_t i; - output_split_file_values (ds, first); + if (!casereader_peek (group, 0, &c)) + return; + output_split_file_values (ds, &c); + case_destroy (&c); + + group = casereader_create_filter_weight (group, dataset_dict (ds), + NULL, NULL); + + casereader_split (group, &pass1, &pass2); + if (dsc->max_moment <= MOMENT_MEAN) + casereader_destroy (pass2); for (i = 0; i < dsc->var_cnt; i++) { @@ -715,13 +726,9 @@ calc_descriptives (const struct ccase *first, dsc->valid = 0.; /* First pass to handle most of the work. */ - for (reader = casefile_get_reader (cf, NULL); - casereader_read (reader, &c); - case_destroy (&c)) + for (; casereader_read (pass1, &c); case_destroy (&c)) { - double weight = dict_get_case_weight (dataset_dict (ds), &c, &dsc->bad_warn); - if (weight <= 0.0) - continue; + double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL); /* Check for missing values. */ if (listwise_missing (dsc, &c)) @@ -737,8 +744,7 @@ calc_descriptives (const struct ccase *first, struct dsc_var *dv = &dsc->vars[i]; double x = case_num (&c, dv->v); - if (dsc->missing_type != DSC_LISTWISE - && var_is_num_missing (dv->v, x, dsc->exclude)) + if (var_is_num_missing (dv->v, x, dsc->exclude)) { dv->missing += weight; continue; @@ -753,19 +759,15 @@ calc_descriptives (const struct ccase *first, dv->max = x; } } - casereader_destroy (reader); + if (!casereader_destroy (pass1)) + return; /* Second pass for higher-order moments. */ if (dsc->max_moment > MOMENT_MEAN) { - for (reader = casefile_get_reader (cf, NULL); - casereader_read (reader, &c); - case_destroy (&c)) + for (; casereader_read (pass2, &c); case_destroy (&c)) { - double weight = dict_get_case_weight (dataset_dict (ds), &c, - &dsc->bad_warn); - if (weight <= 0.0) - continue; + double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL); /* Check for missing values. */ if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, &c)) @@ -776,17 +778,17 @@ calc_descriptives (const struct ccase *first, struct dsc_var *dv = &dsc->vars[i]; double x = case_num (&c, dv->v); - if (dsc->missing_type != DSC_LISTWISE - && var_is_num_missing (dv->v, x, dsc->exclude)) + if (var_is_num_missing (dv->v, x, dsc->exclude)) continue; if (dv->moments != NULL) moments_pass_two (dv->moments, x, weight); } } - casereader_destroy (reader); + if (!casereader_destroy (pass2)) + return; } - + /* Calculate results. */ for (i = 0; i < dsc->var_cnt; i++) { @@ -825,8 +827,6 @@ calc_descriptives (const struct ccase *first, /* Output results. */ display (dsc); - - return true; } /* Returns true if any of the descriptives variables in DSC's diff --git a/src/language/stats/examine.q b/src/language/stats/examine.q index bb6f9eec..e15d2949 100644 --- a/src/language/stats/examine.q +++ b/src/language/stats/examine.q @@ -26,7 +26,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA #include #include -#include +#include +#include #include #include #include @@ -152,8 +153,8 @@ void box_plot_variables (const struct factor *fctr, /* Per Split function */ -static bool run_examine (const struct ccase *, - const struct casefile *cf, void *cmd_, const struct dataset *); +static void run_examine (struct cmd_examine *, struct casereader *, + struct dataset *); static void output_examine (void); @@ -193,6 +194,8 @@ static short sbc_percentile; int cmd_examine (struct lexer *lexer, struct dataset *ds) { + struct casegrouper *grouper; + struct casereader *group; bool ok; subc_list_double_create (&percentile_list); @@ -222,7 +225,11 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) subc_list_double_push (&percentile_list, 75); } - ok = multipass_procedure_with_splits (ds, run_examine, &cmd); + grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); + while (casegrouper_get_next_group (grouper, &group)) + run_examine (&cmd, group, ds); + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; if ( totals ) { @@ -627,9 +634,6 @@ void populate_summary (struct tab_table *t, int col, int row, -static bool bad_weight_warn = true; - - /* Perform calculations for the sub factors */ void factor_calc (const struct ccase *c, int case_no, double weight, @@ -706,23 +710,28 @@ factor_calc (const struct ccase *c, int case_no, double weight, } } -static bool -run_examine (const struct ccase *first, const struct casefile *cf, - void *cmd_, const struct dataset *ds) +static void +run_examine (struct cmd_examine *cmd, struct casereader *input, + struct dataset *ds) { struct dictionary *dict = dataset_dict (ds); - struct casereader *r; + casenumber case_no; struct ccase c; int v; - - const struct cmd_examine *cmd = (struct cmd_examine *) cmd_; + bool ok; struct factor *fctr; - output_split_file_values (ds, first); + if (!casereader_peek (input, 0, &c)) + return; + output_split_file_values (ds, &c); + case_destroy (&c); + + input = casereader_create_filter_weight (input, dict, NULL, NULL); + input = casereader_create_counter (input, &case_no, 0); /* Make sure we haven't got rubbish left over from a - previous split */ + previous split. */ fctr = factors; while (fctr) { @@ -738,15 +747,10 @@ run_examine (const struct ccase *first, const struct casefile *cf, for ( v = 0 ; v < n_dependent_vars ; ++v ) metrics_precalc (&totals[v]); - for (r = casefile_get_reader (cf, NULL); - casereader_read (r, &c) ; - case_destroy (&c) ) + for (; casereader_read (input, &c); case_destroy (&c)) { - int case_missing=0; - const int case_no = casereader_cnum (r); - - const double weight = - dict_get_case_weight (dict, &c, &bad_weight_warn); + int case_missing = 0; + const double weight = dict_get_case_weight (dict, &c, NULL); if ( cmd->miss == XMN_LISTWISE ) { @@ -787,6 +791,7 @@ run_examine (const struct ccase *first, const struct casefile *cf, factor_calc (&c, case_no, weight, case_missing); } + ok = casereader_destroy (input); for ( v = 0 ; v < n_dependent_vars ; ++v) { @@ -882,7 +887,8 @@ run_examine (const struct ccase *first, const struct casefile *cf, fctr = fctr->next; } - output_examine (); + if (ok) + output_examine (); if ( totals ) @@ -893,8 +899,6 @@ run_examine (const struct ccase *first, const struct casefile *cf, metrics_destroy (&totals[i]); } } - - return true; } diff --git a/src/language/stats/flip.c b/src/language/stats/flip.c index 5c6e7740..0bbe637e 100644 --- a/src/language/stats/flip.c +++ b/src/language/stats/flip.c @@ -27,9 +27,9 @@ #include #endif -#include -#include #include +#include +#include #include #include #include @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -70,8 +69,6 @@ struct flip_pgm int case_cnt; /* Pre-flip case count. */ size_t case_size; /* Post-flip bytes per case. */ - union value *output_buf; /* Case output buffer. */ - struct variable *new_names; /* Variable containing new variable names. */ struct varname *new_names_head; /* First new variable. */ struct varname *new_names_tail; /* Last new variable. */ @@ -82,22 +79,23 @@ struct flip_pgm bool error; /* Error reading temporary file? */ }; +static const struct casereader_class flip_casereader_class; + static void destroy_flip_pgm (struct flip_pgm *); -static struct case_sink *flip_sink_create (struct dataset *ds, struct flip_pgm *); -static struct case_source *flip_source_create (struct flip_pgm *); static bool flip_file (struct flip_pgm *); -static int build_dictionary (struct dictionary *, struct flip_pgm *); - -static const struct case_source_class flip_source_class; -static const struct case_sink_class flip_sink_class; +static bool build_dictionary (struct dictionary *, struct flip_pgm *); +static bool write_flip_case (struct flip_pgm *, const struct ccase *); /* Parses and executes FLIP. */ int cmd_flip (struct lexer *lexer, struct dataset *ds) { - struct flip_pgm *flip; - struct case_sink *sink; struct dictionary *dict = dataset_dict (ds); + struct flip_pgm *flip; + struct casereader *input, *reader; + union value *output_buf; + struct ccase c; + size_t i; bool ok; if (proc_make_temporary_transformations_permanent (ds)) @@ -144,8 +142,6 @@ cmd_flip (struct lexer *lexer, struct dataset *ds) if (flip->new_names) { - size_t i; - for (i = 0; i < flip->var_cnt; i++) if (flip->var[i] == flip->new_names) { @@ -155,20 +151,46 @@ cmd_flip (struct lexer *lexer, struct dataset *ds) } } + output_buf = pool_nalloc (flip->pool, + flip->var_cnt, sizeof *output_buf); + + flip->file = pool_tmpfile (flip->pool); + if (flip->file == NULL) + { + msg (SE, _("Could not create temporary file for FLIP.")); + goto error; + } + + /* Write variable names as first case. */ + for (i = 0; i < flip->var_cnt; i++) + buf_copy_str_rpad (output_buf[i].s, MAX_SHORT_STRING, + var_get_name (flip->var[i])); + if (fwrite (output_buf, sizeof *output_buf, + flip->var_cnt, flip->file) != (size_t) flip->var_cnt) + { + msg (SE, _("Error writing FLIP file: %s."), strerror (errno)); + goto error; + } + + flip->case_cnt = 1; + /* Read the active file into a flip_sink. */ - flip->case_cnt = 0; proc_make_temporary_transformations_permanent (ds); - sink = flip_sink_create (ds, flip); - if (sink == NULL) - goto error; - proc_set_sink (ds, sink); - flip->new_names_tail = NULL; - ok = procedure (ds,NULL, NULL); + proc_discard_output (ds); + + input = proc_open (ds); + while (casereader_read (input, &c)) + { + write_flip_case (flip, &c); + case_destroy (&c); + } + ok = casereader_destroy (input); + ok = proc_commit (ds) && ok; /* Flip the data we read. */ - if (!flip_file (flip)) + if (!ok || !flip_file (flip)) { - discard_variables (ds); + proc_discard_active_file (ds); goto error; } @@ -176,15 +198,17 @@ cmd_flip (struct lexer *lexer, struct dataset *ds) dict_clear (dict); if (!build_dictionary (dict, flip)) { - discard_variables (ds); + proc_discard_active_file (ds); goto error; } flip->case_size = dict_get_case_size (dict); /* Set up flipped data for reading. */ - proc_set_source (ds, flip_source_create (flip)); - - return ok ? lex_end_of_command (lexer) : CMD_CASCADING_FAILURE; + reader = casereader_create_sequential (NULL, dict_get_next_value_idx (dict), + flip->case_cnt, + &flip_casereader_class, flip); + proc_set_active_file_data (ds, reader); + return lex_end_of_command (lexer); error: destroy_flip_pgm (flip); @@ -251,7 +275,7 @@ make_new_var (struct dictionary *dict, char name[]) } /* Make a new dictionary for all the new variable names. */ -static int +static bool build_dictionary (struct dictionary *dict, struct flip_pgm *flip) { dict_create_var_assert (dict, "CASE_LBL", 8); @@ -263,7 +287,7 @@ build_dictionary (struct dictionary *dict, struct flip_pgm *flip) if (flip->case_cnt > 99999) { msg (SE, _("Cannot create more than 99999 variable names.")); - return 0; + return false; } for (i = 0; i < flip->case_cnt; i++) @@ -281,54 +305,17 @@ build_dictionary (struct dictionary *dict, struct flip_pgm *flip) for (v = flip->new_names_head; v; v = v->next) if (!make_new_var (dict, v->name)) - return 0; + return false; } - return 1; + return true; } -/* Creates a flip sink based on FLIP. */ -static struct case_sink * -flip_sink_create (struct dataset *ds, struct flip_pgm *flip) -{ - size_t i; - - flip->output_buf = pool_nalloc (flip->pool, - flip->var_cnt, sizeof *flip->output_buf); - - flip->file = pool_tmpfile (flip->pool); - if (flip->file == NULL) - { - msg (SE, _("Could not create temporary file for FLIP: %s."), - strerror (errno)); - return NULL; - } - - /* Write variable names as first case. */ - for (i = 0; i < flip->var_cnt; i++) - buf_copy_str_rpad (flip->output_buf[i].s, MAX_SHORT_STRING, - var_get_name (flip->var[i])); - if (fwrite (flip->output_buf, sizeof *flip->output_buf, - flip->var_cnt, flip->file) != (size_t) flip->var_cnt) - { - msg (SE, _("Error writing FLIP file: %s."), strerror (errno)); - return NULL; - } - - flip->case_cnt = 1; - - return create_case_sink (&flip_sink_class, - dataset_dict (ds), - dataset_get_casefile_factory (ds), - flip); -} - /* Writes case C to the FLIP sink. Returns true if successful, false if an I/O error occurred. */ static bool -flip_sink_write (struct case_sink *sink, const struct ccase *c) +write_flip_case (struct flip_pgm *flip, const struct ccase *c) { - struct flip_pgm *flip = sink->aux; size_t i; flip->case_cnt++; @@ -377,14 +364,7 @@ flip_sink_write (struct case_sink *sink, const struct ccase *c) } else out = SYSMIS; - flip->output_buf[i].f = out; - } - - if (fwrite (flip->output_buf, sizeof *flip->output_buf, - flip->var_cnt, flip->file) != (size_t) flip->var_cnt) - { - msg (SE, _("Error writing FLIP file: %s."), strerror (errno)); - return false; + fwrite (&out, sizeof out, 1, flip->file); } return true; } @@ -511,57 +491,39 @@ flip_file (struct flip_pgm *flip) return true; } -/* FLIP sink class. */ -static const struct case_sink_class flip_sink_class = - { - "FLIP", - NULL, - flip_sink_write, - NULL, - NULL, - }; - -/* Creates and returns a FLIP source based on PGM, - which should have already been used as a sink. */ -static struct case_source * -flip_source_create (struct flip_pgm *pgm) -{ - return create_case_source (&flip_source_class, pgm); -} - /* Reads one case into C. Returns true if successful, false at end of file or if an I/O error occurred. */ static bool -flip_source_read (struct case_source *source, struct ccase *c) +flip_casereader_read (struct casereader *reader UNUSED, void *flip_, + struct ccase *c) { - struct flip_pgm *flip = source->aux; + struct flip_pgm *flip = flip_; size_t i; if (flip->error || flip->cases_read >= flip->var_cnt) return false; - - if (flip->input_buf == NULL) - flip->input_buf = pool_nmalloc (flip->pool, - flip->case_cnt, sizeof *flip->input_buf); - if (fread (flip->input_buf, sizeof *flip->input_buf, flip->case_cnt, - flip->file) != flip->case_cnt) + case_create (c, flip->case_cnt); + for (i = 0; i < flip->case_cnt; i++) { - if (ferror (flip->file)) - msg (SE, _("Error reading FLIP temporary file: %s."), - strerror (errno)); - else if (feof (flip->file)) - msg (SE, _("Unexpected end of file reading FLIP temporary file.")); - else - NOT_REACHED (); - flip->error = true; - return false; + double in; + if (fread (&in, sizeof in, 1, flip->file) != 1) + { + case_destroy (c); + if (ferror (flip->file)) + msg (SE, _("Error reading FLIP temporary file: %s."), + strerror (errno)); + else if (feof (flip->file)) + msg (SE, _("Unexpected end of file reading FLIP temporary file.")); + else + NOT_REACHED (); + flip->error = true; + return false; + } + case_data_rw_idx (c, i)->f = in; } - - for (i = 0; i < flip->case_cnt; i++) - case_data_rw_idx (c, i)->f = flip->input_buf[i].f; - + flip->cases_read++; return true; @@ -570,19 +532,19 @@ flip_source_read (struct case_source *source, struct ccase *c) /* Destroys the source. Returns true if successful read, false if an I/O occurred during destruction or previously. */ -static bool -flip_source_destroy (struct case_source *source) +static void +flip_casereader_destroy (struct casereader *reader UNUSED, void *flip_) { - struct flip_pgm *flip = source->aux; - bool ok = !flip->error; + struct flip_pgm *flip = flip_; + if (flip->error) + casereader_force_error (reader); destroy_flip_pgm (flip); - return ok; } -static const struct case_source_class flip_source_class = +static const struct casereader_class flip_casereader_class = { - "FLIP", + flip_casereader_read, + flip_casereader_destroy, + NULL, NULL, - flip_source_read, - flip_source_destroy }; diff --git a/src/language/stats/frequencies.q b/src/language/stats/frequencies.q index 29e85bab..5f0e96e9 100644 --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@ -29,6 +29,8 @@ #include #include +#include +#include #include #include #include @@ -45,7 +47,6 @@ #include #include #include -#include #include #include #include @@ -271,9 +272,9 @@ static void determine_charts (void); static void calc_stats (const struct variable *v, double d[frq_n_stats]); -static void precalc (const struct ccase *, void *, const struct dataset *); -static bool calc (const struct ccase *, void *, const struct dataset *); -static bool postcalc (void *, const struct dataset *); +static void precalc (struct casereader *, struct dataset *); +static void calc (const struct ccase *, const struct dataset *); +static void postcalc (void); static void postprocess_freq_tab (const struct variable *); static void dump_full (const struct variable *); @@ -318,8 +319,10 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) static int internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds) { - int i; + struct casegrouper *grouper; + struct casereader *input, *group; bool ok; + int i; n_percentiles = 0; percentiles = NULL; @@ -383,7 +386,21 @@ internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds) /* Do it! */ - ok = procedure_with_splits (ds, precalc, calc, postcalc, NULL); + input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds), + NULL, NULL); + grouper = casegrouper_create_splits (input, dataset_dict (ds)); + for (; casegrouper_get_next_group (grouper, &group); + casereader_destroy (group)) + { + struct ccase c; + + precalc (group, ds); + for (; casereader_read (group, &c); case_destroy (&c)) + calc (&c, ds); + postcalc (); + } + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; free_frequencies(&cmd); @@ -496,14 +513,11 @@ determine_charts (void) } /* Add data from case C to the frequency table. */ -static bool -calc (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) +static void +calc (const struct ccase *c, const struct dataset *ds) { - double weight; + double weight = dict_get_case_weight (dataset_dict (ds), c, NULL); size_t i; - bool bad_warn = true; - - weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn); for (i = 0; i < n_variables; i++) { @@ -530,7 +544,8 @@ calc (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) struct freq *fp = pool_alloc (gen_pool, sizeof *fp); fp->count = weight; fp->value = pool_clone (gen_pool, - val, MAX (MAX_SHORT_STRING, vf->width)); + val, + MAX (MAX_SHORT_STRING, vf->width)); *fpp = fp; } } @@ -552,17 +567,20 @@ calc (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) NOT_REACHED (); } } - return true; } /* Prepares each variable that is the target of FREQUENCIES by setting up its hash table. */ static void -precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds) +precalc (struct casereader *input, struct dataset *ds) { + struct ccase c; size_t i; - output_split_file_values (ds, first); + if (!casereader_peek (input, 0, &c)) + return; + output_split_file_values (ds, &c); + case_destroy (&c); pool_destroy (gen_pool); gen_pool = pool_create (); @@ -590,8 +608,8 @@ precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds) /* Finishes up with the variables after frequencies have been calculated. Displays statistics, percentiles, ... */ -static bool -postcalc (void *aux UNUSED, const struct dataset *ds UNUSED) +static void +postcalc (void) { size_t i; @@ -666,8 +684,6 @@ postcalc (void *aux UNUSED, const struct dataset *ds UNUSED) cleanup_freq_tab (v); } - - return true; } /* Returns the comparison function that should be used for diff --git a/src/language/stats/npar-summary.c b/src/language/stats/npar-summary.c index 349fcc1e..74532231 100644 --- a/src/language/stats/npar-summary.c +++ b/src/language/stats/npar-summary.c @@ -18,12 +18,11 @@ #include #include +#include #include #include #include "npar-summary.h" #include -#include -#include #include #include #include @@ -35,38 +34,38 @@ void npar_summary_calc_descriptives (struct descriptives *desc, - const struct casefile *cf, - struct casefilter *filter, + struct casereader *input, const struct dictionary *dict, const struct variable *const *vv, - int n_vars UNUSED) + int n_vars UNUSED, + enum mv_class filter) { int i = 0; while (*vv) { - bool warn = true; double minimum = DBL_MAX; double maximum = -DBL_MAX; double var; struct moments1 *moments = moments1_create (MOMENT_VARIANCE); - struct casereader *r = casefile_get_reader (cf, filter); struct ccase c; const struct variable *v = *vv++; - - while (casereader_read(r, &c)) + struct casereader *pass; + + pass = casereader_clone (input); + pass = casereader_create_filter_missing (pass, + (struct variable **) &v, 1, + filter, NULL); + pass = casereader_create_filter_weight (pass, dict, NULL, NULL); + while (casereader_read(pass, &c)) { - const union value *val = case_data (&c, v); - double w = dict_get_case_weight (dict, &c, &warn); - - if ( ! casefilter_variable_missing (filter, &c, v )) - { - minimum = MIN (minimum, val->f); - maximum = MAX (maximum, val->f); - moments1_add (moments, val->f, w); - } + double val = case_num (&c, v); + double w = dict_get_case_weight (dict, &c, NULL); + minimum = MIN (minimum, val); + maximum = MAX (maximum, val); + moments1_add (moments, val, w); case_destroy (&c); } - casereader_destroy (r); + casereader_destroy (pass); moments1_calculate (moments, &desc[i].n, @@ -83,6 +82,7 @@ npar_summary_calc_descriptives (struct descriptives *desc, i++; } + casereader_destroy (input); } diff --git a/src/language/stats/npar-summary.h b/src/language/stats/npar-summary.h index 1a5125b6..f57fa1cf 100644 --- a/src/language/stats/npar-summary.h +++ b/src/language/stats/npar-summary.h @@ -22,9 +22,8 @@ #include struct variable ; -struct casefile ; +struct casereader ; struct dictionary; -struct casefilter; struct descriptives { @@ -36,11 +35,11 @@ struct descriptives }; void npar_summary_calc_descriptives (struct descriptives *desc, - const struct casefile *cf, - struct casefilter *filter, + struct casereader *input, const struct dictionary *dict, const struct variable *const *vv, - int n_vars); + int n_vars, + enum mv_class filter); void do_summary_box (const struct descriptives *desc, diff --git a/src/language/stats/npar.h b/src/language/stats/npar.h index 3907bb26..80446c3a 100644 --- a/src/language/stats/npar.h +++ b/src/language/stats/npar.h @@ -19,18 +19,25 @@ #if !npar_h #define npar_h 1 -typedef const struct variable *var_ptr; -typedef var_ptr variable_pair[2]; +#include +#include + +#include +#include + +typedef struct variable *variable_pair[2]; struct hsh_table; struct const_hsh_table; -struct casefilter ; +struct casefilter; +struct casereader; +struct dataset; struct npar_test { void (*execute) (const struct dataset *, - const struct casefile *, - struct casefilter *, + struct casereader *, + enum mv_class exclude, const struct npar_test * ); diff --git a/src/language/stats/npar.q b/src/language/stats/npar.q index ab5047cf..74e8364f 100644 --- a/src/language/stats/npar.q +++ b/src/language/stats/npar.q @@ -20,23 +20,25 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA #include -#include -#include -#include -#include -#include -#include +#include + +#include -#include #include -#include -#include +#include +#include #include -#include +#include +#include +#include +#include #include -#include +#include +#include +#include +#include +#include -#include "npar.h" #include "npar-summary.h" #include "gettext.h" @@ -75,7 +77,7 @@ struct npar_specs (those mentioned on ANY subcommand */ int n_vars; /* Number of variables in vv */ - struct casefilter *filter; /* The missing value filter */ + enum mv_class filter; /* Missing values to filter. */ bool descriptives; /* Descriptive statistics should be calculated */ bool quartiles; /* Quartiles should be calculated */ @@ -84,13 +86,12 @@ struct npar_specs void one_sample_insert_variables (const struct npar_test *test, struct const_hsh_table *variables); -static bool -npar_execute(const struct ccase *first UNUSED, - const struct casefile *cf, void *aux, +static void +npar_execute(struct casereader *input, + const struct npar_specs *specs, const struct dataset *ds) { int t; - const struct npar_specs *specs = aux; struct descriptives *summary_descriptives = NULL; for ( t = 0 ; t < specs->n_tests; ++t ) @@ -101,7 +102,7 @@ npar_execute(const struct ccase *first UNUSED, msg (SW, _("NPAR subcommand not currently implemented.")); continue; } - test->execute (ds, cf, specs->filter, test); + test->execute (ds, casereader_clone (input), specs->filter, test); } if ( specs->descriptives ) @@ -109,21 +110,21 @@ npar_execute(const struct ccase *first UNUSED, summary_descriptives = xnmalloc (sizeof (*summary_descriptives), specs->n_vars); - npar_summary_calc_descriptives (summary_descriptives, cf, - specs->filter, + npar_summary_calc_descriptives (summary_descriptives, + casereader_clone (input), dataset_dict (ds), - specs->vv, specs->n_vars); + specs->vv, specs->n_vars, + specs->filter); } - if ( specs->descriptives || specs->quartiles ) + if ( (specs->descriptives || specs->quartiles) + && !taint_has_tainted_successor (casereader_get_taint (input)) ) do_summary_box (summary_descriptives, specs->vv, specs->n_vars ); free (summary_descriptives); - - return true; + casereader_destroy (input); } - int cmd_npar_tests (struct lexer *lexer, struct dataset *ds) { @@ -131,6 +132,9 @@ cmd_npar_tests (struct lexer *lexer, struct dataset *ds) int i; struct npar_specs npar_specs = {0, 0, 0, 0, 0, 0, 0, 0}; struct const_hsh_table *var_hash; + struct casegrouper *grouper; + struct casereader *input, *group; + npar_specs.pool = pool_create (); var_hash = const_hsh_create_pool (npar_specs.pool, 0, @@ -179,17 +183,20 @@ cmd_npar_tests (struct lexer *lexer, struct dataset *ds) } } - npar_specs.filter = - casefilter_create (cmd.incl == NPAR_EXCLUDE ? MV_ANY : MV_SYSTEM, 0, 0); - - if ( cmd.miss == NPAR_LISTWISE ) - casefilter_add_variables (npar_specs.filter, - npar_specs.vv, - npar_specs.n_vars); + npar_specs.filter = cmd.incl == NPAR_EXCLUDE ? MV_ANY : MV_SYSTEM; - ok = multipass_procedure_with_splits (ds, npar_execute, &npar_specs); + input = proc_open (ds); + if ( cmd.miss == NPAR_LISTWISE ) + input = casereader_create_filter_missing (input, + (struct variable **) npar_specs.vv, + npar_specs.n_vars, + npar_specs.filter, NULL); - casefilter_destroy (npar_specs.filter); + grouper = casegrouper_create_splits (input, dataset_dict (ds)); + while (casegrouper_get_next_group (grouper, &group)) + npar_execute (group, &npar_specs, ds); + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; const_hsh_destroy (var_hash); diff --git a/src/language/stats/oneway.q b/src/language/stats/oneway.q index 9b45119f..78300723 100644 --- a/src/language/stats/oneway.q +++ b/src/language/stats/oneway.q @@ -25,12 +25,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA #include #include -#include +#include +#include #include #include #include #include -#include #include #include #include @@ -39,9 +39,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA #include #include #include -#include #include #include +#include #include #include #include @@ -65,9 +65,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA /* (declarations) */ /* (functions) */ -static bool bad_weight_warn = true; - - static struct cmd_oneway cmd; /* The independent variable */ @@ -89,9 +86,8 @@ static struct hsh_table *global_group_hash ; static int ostensible_number_of_groups = -1; -static bool run_oneway(const struct ccase *first, - const struct casefile *cf, - void *_mode, const struct dataset *); +static void run_oneway (struct cmd_oneway *, struct casereader *, + const struct dataset *); /* Routines to show the output tables */ @@ -113,6 +109,8 @@ void output_oneway(void); int cmd_oneway (struct lexer *lexer, struct dataset *ds) { + struct casegrouper *grouper; + struct casereader *group; int i; bool ok; @@ -138,7 +136,12 @@ cmd_oneway (struct lexer *lexer, struct dataset *ds) } } - ok = multipass_procedure_with_splits (ds, run_oneway, &cmd); + /* Data pass. FIXME: error handling. */ + grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); + while (casegrouper_get_next_group (grouper, &group)) + run_oneway (&cmd, group, ds); + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; free (vars); free_oneway (&cmd); @@ -887,17 +890,23 @@ free_value (void *value_, const void *aux UNUSED) free (value); } -static bool -run_oneway(const struct ccase *first, const struct casefile *cf, - void *cmd_, const struct dataset *ds) +static void +run_oneway (struct cmd_oneway *cmd, + struct casereader *input, + const struct dataset *ds) { - struct casereader *r; + struct taint *taint; + struct dictionary *dict = dataset_dict (ds); + enum mv_class exclude; + struct casereader *reader; struct ccase c; - struct casefilter *filter = NULL; - struct cmd_oneway *cmd = (struct cmd_oneway *) cmd_; + if (!casereader_peek (input, 0, &c)) + return; + output_split_file_values (ds, &c); + case_destroy (&c); - output_split_file_values (ds, first); + taint = taint_clone (casereader_get_taint (input)); global_group_hash = hsh_create(4, (hsh_compare_func *) compare_values, @@ -907,31 +916,25 @@ run_oneway(const struct ccase *first, const struct casefile *cf, precalc(cmd); - filter = casefilter_create ( (cmd->incl != ONEWAY_INCLUDE - ? MV_ANY : MV_SYSTEM), - vars, n_vars ); + exclude = cmd->incl != ONEWAY_INCLUDE ? MV_ANY : MV_SYSTEM; + input = casereader_create_filter_missing (input, &indep_var, 1, + exclude, NULL); + if (cmd->miss == ONEWAY_LISTWISE) + input = casereader_create_filter_missing (input, vars, n_vars, + exclude, NULL); + input = casereader_create_filter_weight (input, dict, NULL, NULL); - for(r = casefile_get_reader (cf, filter); - casereader_read (r, &c) ; - case_destroy (&c)) + reader = casereader_clone (input); + for (; casereader_read (reader, &c); case_destroy (&c)) { size_t i; - const double weight = - dict_get_case_weight (dataset_dict (ds), &c, &bad_weight_warn); - - const union value *indep_val; - void **p; + const double weight = dict_get_case_weight (dict, &c, NULL); - if ( casefilter_variable_missing (filter, &c, indep_var)) - continue; - - indep_val = case_data (&c, indep_var); - p = hsh_probe (global_group_hash, indep_val); + const union value *indep_val = case_data (&c, indep_var); + void **p = hsh_probe (global_group_hash, indep_val); if (*p == NULL) *p = value_dup (indep_val, var_get_width (indep_var)); - - hsh_insert ( global_group_hash, (void *) indep_val ); for ( i = 0 ; i < n_vars ; ++i ) { @@ -960,7 +963,7 @@ run_oneway(const struct ccase *first, const struct casefile *cf, hsh_insert ( group_hash, (void *) gs ); } - if (! casefilter_variable_missing (filter, &c, v)) + if (!var_is_value_missing (v, val, exclude)) { struct group_statistics *totals = &gp->ugs; @@ -989,24 +992,21 @@ run_oneway(const struct ccase *first, const struct casefile *cf, } } - - casereader_destroy (r); + casereader_destroy (reader); postcalc(cmd); if ( stat_tables & STAT_HOMO ) - levene (dataset_dict (ds), cf, indep_var, n_vars, vars, - filter); + levene (dict, casereader_clone (input), indep_var, n_vars, vars, exclude); - casefilter_destroy (filter); + casereader_destroy (input); ostensible_number_of_groups = hsh_count (global_group_hash); - - output_oneway(); - - return true; + if (!taint_has_tainted_successor (taint)) + output_oneway(); + taint_destroy (taint); } diff --git a/src/language/stats/rank.q b/src/language/stats/rank.q index 3f1dd3a9..c42f896a 100644 --- a/src/language/stats/rank.q +++ b/src/language/stats/rank.q @@ -18,27 +18,28 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA #include -#include "sort-criteria.h" +#include +#include #include #include #include #include #include +#include #include -#include -#include -#include +#include +#include +#include #include #include -#include #include +#include #include #include #include #include -#include #include "gettext.h" #define _(msgid) gettext (msgid) @@ -152,7 +153,7 @@ static enum mv_class exclude_values; static struct rank_spec *rank_specs; static size_t n_rank_specs; -static struct sort_criteria *sc; +static struct case_ordering *sc; static const struct variable **group_vars; static size_t n_group_vars; @@ -165,14 +166,14 @@ static int k_ntiles; static struct cmd_rank cmd; -static struct casefile *rank_sorted_casefile (struct casefile *cf, - const struct sort_criteria *, - const struct dictionary *, - const struct rank_spec *rs, - int n_rank_specs, - int idx, - const struct missing_values *miss - ); +static void rank_sorted_file (struct casereader *, + struct casewriter *, + const struct dictionary *, + const struct rank_spec *rs, + int n_rank_specs, + int idx, + struct variable *rank_var); + static const char * fraction_name(void) { @@ -232,69 +233,56 @@ create_var_label (struct variable *dest_var, } -static bool -rank_cmd (struct dataset *ds, const struct sort_criteria *sc, +static bool +rank_cmd (struct dataset *ds, const struct case_ordering *sc, const struct rank_spec *rank_specs, int n_rank_specs) { - struct sort_criteria criteria; - bool result = true; + struct case_ordering *base_ordering; + bool ok = true; int i; const int n_splits = dict_get_split_cnt (dataset_dict (ds)); - criteria.crit_cnt = n_splits + n_group_vars + 1; - criteria.crits = xnmalloc (criteria.crit_cnt, sizeof *criteria.crits); + base_ordering = case_ordering_create (dataset_dict (ds)); for (i = 0; i < n_splits ; i++) - { - const struct variable *v = dict_get_split_vars (dataset_dict (ds))[i]; - criteria.crits[i].fv = var_get_case_index (v); - criteria.crits[i].width = var_get_width (v); - criteria.crits[i].dir = SRT_ASCEND; - } + case_ordering_add_var (base_ordering, + dict_get_split_vars (dataset_dict (ds))[i], + SRT_ASCEND); + for (i = 0; i < n_group_vars; i++) + case_ordering_add_var (base_ordering, group_vars[i], SRT_ASCEND); + for (i = 0 ; i < case_ordering_get_var_cnt (sc) ; ++i ) { - criteria.crits[i + n_splits].fv = var_get_case_index (group_vars[i]); - criteria.crits[i + n_splits].width = var_get_width (group_vars[i]); - criteria.crits[i + n_splits].dir = SRT_ASCEND; - } - for (i = 0 ; i < sc->crit_cnt ; ++i ) - { - struct casefile *out ; - struct casefile *cf ; - struct casereader *reader ; - struct casefile *sorted_cf ; - - /* Obtain active file in CF. */ - if (!procedure (ds, NULL, NULL)) - goto error; - - cf = proc_capture_output (ds); - - /* Sort CF into SORTED_CF. */ - reader = casefile_get_destructive_reader (cf) ; - criteria.crits[criteria.crit_cnt - 1] = sc->crits[i]; - assert ( sc->crits[i].fv == var_get_case_index (src_vars[i]) ); - sorted_cf = sort_execute (reader, &criteria, NULL); - casefile_destroy (cf); - - out = rank_sorted_casefile (sorted_cf, &criteria, - dataset_dict (ds), - rank_specs, n_rank_specs, - i, var_get_missing_values (src_vars[i])); - if ( NULL == out ) - { - result = false ; - continue ; - } - - proc_set_source (ds, storage_source_create (out)); + struct case_ordering *ordering; + struct casegrouper *grouper; + struct casereader *group; + struct casewriter *output; + struct casereader *ranked_file; + + ordering = case_ordering_clone (base_ordering); + case_ordering_add_var (ordering, + case_ordering_get_var (sc, i), + case_ordering_get_direction (sc, i)); + + proc_discard_output (ds); + grouper = casegrouper_create_case_ordering (sort_execute (proc_open (ds), + ordering), + base_ordering); + output = autopaging_writer_create (dict_get_next_value_idx ( + dataset_dict (ds))); + while (casegrouper_get_next_group (grouper, &group)) + rank_sorted_file (group, output, dataset_dict (ds), + rank_specs, n_rank_specs, + i, src_vars[i]); + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; + ranked_file = casewriter_make_reader (output); + ok = proc_set_active_file_data (ds, ranked_file) && ok; + if (!ok) + break; } + case_ordering_destroy (base_ordering); - free (criteria.crits); - return result ; - -error: - free (criteria.crits); - return false ; + return ok; } /* Hardly a rank function !! */ @@ -311,7 +299,8 @@ rank_rank (double c, double cc, double cc_1, int i, double w UNUSED) { double rank; - if ( c >= 1.0 ) + + if ( c >= 1.0 ) { switch (cmd.ties) { @@ -471,192 +460,71 @@ rank_savage (double c, double cc, double cc_1, NOT_REACHED(); } - -/* Rank the casefile belonging to CR, starting from the current - postition of CR continuing up to and including the ENDth case. - - RS points to an array containing the rank specifications to - use. N_RANK_SPECS is the number of elements of RS. - - - DEST_VAR_INDEX is the index into the rank_spec destvar element - to be used for this ranking. - - Prerequisites: 1. The casefile must be sorted according to CRITERION. - 2. W is the sum of the non-missing caseweights for this - range of the casefile. -*/ static void -rank_cases (struct casereader *cr, - unsigned long end, - const struct dictionary *dict, - const struct sort_criterion *criterion, - const struct missing_values *mv, - double w, - const struct rank_spec *rs, - int n_rank_specs, - int dest_var_index, - struct casefile *dest) +rank_sorted_file (struct casereader *input, + struct casewriter *output, + const struct dictionary *dict, + const struct rank_spec *rs, + int n_rank_specs, + int dest_idx, + struct variable *rank_var) { - bool warn = true; + struct casereader *pass1, *pass2, *pass2_1; + struct casegrouper *tie_grouper; + struct ccase c; + double w = 0.0; double cc = 0.0; - double cc_1; - int iter = 1; + int tie_group = 1; - const int fv = criterion->fv; - const int width = criterion->width; - while (casereader_cnum (cr) < end) - { - struct casereader *lookahead; - const union value *this_value; - bool this_value_is_missing; - struct ccase this_case, lookahead_case; - double c; - int i; - size_t n = 0; - - if (!casereader_read_xfer (cr, &this_case)) - break; + input = casereader_create_filter_missing (input, &rank_var, 1, + exclude_values, output); + input = casereader_create_filter_weight (input, dict, NULL, output); - this_value = case_data_idx (&this_case, fv); - this_value_is_missing = mv_is_value_missing (mv, this_value, - exclude_values); - c = dict_get_case_weight (dict, &this_case, &warn); + casereader_split (input, &pass1, &pass2); - lookahead = casereader_clone (cr); - n = 0; - while (casereader_cnum (lookahead) < end - && casereader_read_xfer (lookahead, &lookahead_case)) - { - const union value *lookahead_value = case_data_idx (&lookahead_case, fv); - int diff = compare_values (this_value, lookahead_value, width); + /* Pass 1: Get total group weight. */ + for (; casereader_read (pass1, &c); case_destroy (&c)) + w += dict_get_case_weight (dict, &c, NULL); + casereader_destroy (pass1); - if (diff != 0) - { - /* Make sure the casefile was sorted */ - assert ( diff == ((criterion->dir == SRT_ASCEND) ? -1 :1)); - - case_destroy (&lookahead_case); - break; - } - - c += dict_get_case_weight (dict, &lookahead_case, &warn); - case_destroy (&lookahead_case); - n++; - } - casereader_destroy (lookahead); - - cc_1 = cc; - if ( !this_value_is_missing ) - cc += c; - - do - { - for (i = 0; i < n_rank_specs; ++i) - { - const struct variable *dst_var = rs[i].destvars[dest_var_index]; - - if (this_value_is_missing) - case_data_rw (&this_case, dst_var)->f = SYSMIS; - else - case_data_rw (&this_case, dst_var)->f = - rank_func[rs[i].rfunc](c, cc, cc_1, iter, w); - } - casefile_append_xfer (dest, &this_case); - } - while (n-- > 0 && casereader_read_xfer (cr, &this_case)); - - if ( !this_value_is_missing ) - iter++; - } - - /* If this isn't true, then all the results will be wrong */ - assert ( w == cc ); -} - -static bool -same_group (const struct ccase *a, const struct ccase *b, - const struct sort_criteria *crit) -{ - size_t i; - - for (i = 0; i < crit->crit_cnt - 1; i++) + /* Pass 2: Do ranking. */ + tie_grouper = casegrouper_create_vars (pass2, &rank_var, 1); + while (casegrouper_get_next_group (tie_grouper, &pass2_1)) { - struct sort_criterion *c = &crit->crits[i]; - if (compare_values (case_data_idx (a, c->fv), - case_data_idx (b, c->fv), c->width) != 0) - return false; - } - - return true; -} - -static struct casefile * -rank_sorted_casefile (struct casefile *cf, - const struct sort_criteria *crit, - const struct dictionary *dict, - const struct rank_spec *rs, - int n_rank_specs, - int dest_idx, - const struct missing_values *mv) -{ - struct casefile *dest = fastfile_create (casefile_get_value_cnt (cf)); - struct casereader *lookahead = casefile_get_reader (cf, NULL); - struct casereader *pos = casereader_clone (lookahead); - struct ccase group_case; - bool warn = true; - - struct sort_criterion *ultimate_crit = &crit->crits[crit->crit_cnt - 1]; + struct casereader *pass2_2; + double cc_1 = cc; + double tw = 0.0; + int i; - if (casereader_read (lookahead, &group_case)) - { - struct ccase this_case; - const union value *this_value ; - double w = 0.0; - this_value = case_data_idx( &group_case, ultimate_crit->fv); + pass2_2 = casereader_clone (pass2_1); + taint_propagate (casereader_get_taint (pass2_2), + casewriter_get_taint (output)); - if ( !mv_is_value_missing (mv, this_value, exclude_values) ) - w = dict_get_case_weight (dict, &group_case, &warn); + /* Pass 2.1: Sum up weight for tied cases. */ + for (; casereader_read (pass2_1, &c); case_destroy (&c)) + tw += dict_get_case_weight (dict, &c, NULL); + cc += tw; + casereader_destroy (pass2_1); - while (casereader_read (lookahead, &this_case)) + /* Pass 2.2: Rank tied cases. */ + while (casereader_read (pass2_2, &c)) { - const union value *this_value = - case_data_idx(&this_case, ultimate_crit->fv); - double c = dict_get_case_weight (dict, &this_case, &warn); - if (!same_group (&group_case, &this_case, crit)) + for (i = 0; i < n_rank_specs; ++i) { - rank_cases (pos, casereader_cnum (lookahead) - 1, - dict, - ultimate_crit, - mv, w, - rs, n_rank_specs, - dest_idx, dest); - - w = 0.0; - case_destroy (&group_case); - case_move (&group_case, &this_case); + const struct variable *dst_var = rs[i].destvars[dest_idx]; + double *dst_value = &case_data_rw (&c, dst_var)->f; + *dst_value = rank_func[rs[i].rfunc] (tw, cc, cc_1, tie_group, w); } - if ( !mv_is_value_missing (mv, this_value, exclude_values) ) - w += c; - case_destroy (&this_case); + casewriter_write (output, &c); } - case_destroy (&group_case); - rank_cases (pos, ULONG_MAX, dict, ultimate_crit, mv, w, - rs, n_rank_specs, dest_idx, dest); - } - - if (casefile_error (dest)) - { - casefile_destroy (dest); - dest = NULL; + casereader_destroy (pass2_2); + + tie_group++; } - - casefile_destroy (cf); - return dest; + casegrouper_destroy (tie_grouper); } - /* Transformation function to enumerate all the cases */ static int create_resort_key (void *key_var_, struct ccase *cc, casenumber case_num) @@ -749,7 +617,7 @@ rank_cleanup(void) rank_specs = NULL; n_rank_specs = 0; - sort_destroy_criteria (sc); + case_ordering_destroy (sc); sc = NULL; free (src_vars); @@ -783,13 +651,13 @@ cmd_rank (struct lexer *lexer, struct dataset *ds) rank_specs = xmalloc (sizeof (*rank_specs)); rank_specs[0].rfunc = RANK; - rank_specs[0].destvars = - xcalloc (sc->crit_cnt, sizeof (struct variable *)); + rank_specs[0].destvars = + xcalloc (case_ordering_get_var_cnt (sc), sizeof (struct variable *)); n_rank_specs = 1; } - assert ( sc->crit_cnt == n_src_vars); + assert ( case_ordering_get_var_cnt (sc) == n_src_vars); /* Create variables for all rank destinations which haven't already been created with INTO. @@ -891,31 +759,29 @@ cmd_rank (struct lexer *lexer, struct dataset *ds) msg(MW, _("FRACTION has been specified, but NORMAL and PROPORTION rank functions have not been requested. The FRACTION subcommand will be ignored.") ); /* Add a variable which we can sort by to get back the original - order */ - order = dict_create_var_assert (dataset_dict (ds), "$ORDER_", 0); + order */ + order = dict_create_var_assert (dataset_dict (ds), "$ORDER_", 0); add_transformation (ds, create_resort_key, 0, order); /* Do the ranking */ result = rank_cmd (ds, sc, rank_specs, n_rank_specs); - /* Put the active file back in its original order */ + /* Put the active file back in its original order. Delete + our sort key, which we don't need anymore. */ { - struct sort_criteria criteria; - struct sort_criterion restore_criterion ; - restore_criterion.fv = var_get_case_index (order); - restore_criterion.width = 0; - restore_criterion.dir = SRT_ASCEND; - - criteria.crits = &restore_criterion; - criteria.crit_cnt = 1; - - sort_active_file_in_place (ds, &criteria); + struct case_ordering *ordering = case_ordering_create (dataset_dict (ds)); + struct casereader *sorted; + case_ordering_add_var (ordering, order, SRT_ASCEND); + /* FIXME: loses error conditions. */ + proc_discard_output (ds); + sorted = sort_execute (proc_open (ds), ordering); + result = proc_commit (ds) && result; + + dict_delete_var (dataset_dict (ds), order); + result = proc_set_active_file_data (ds, sorted) && result; } - /* ... and we don't need our sort key anymore. So delete it */ - dict_delete_var (dataset_dict (ds), order); - rank_cleanup(); @@ -928,16 +794,16 @@ cmd_rank (struct lexer *lexer, struct dataset *ds) static int rank_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd UNUSED, void *aux UNUSED) { - static const int terminators[2] = {T_BY, 0}; - lex_match (lexer, '='); if ((lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL) && lex_token (lexer) != T_ALL) return 2; - sc = sort_parse_criteria (lexer, dataset_dict (ds), - &src_vars, &n_src_vars, 0, terminators); + sc = parse_case_ordering (lexer, dataset_dict (ds), NULL); + if (sc == NULL) + return 0; + case_ordering_get_vars (sc, &src_vars, &n_src_vars); if ( lex_match (lexer, T_BY) ) { @@ -970,9 +836,10 @@ parse_rank_function (struct lexer *lexer, struct dictionary *dict, struct cmd_ra rank_specs[n_rank_specs - 1].rfunc = f; rank_specs[n_rank_specs - 1].destvars = NULL; - rank_specs[n_rank_specs - 1].destvars = - xcalloc (sc->crit_cnt, sizeof (struct variable *)); - + rank_specs[n_rank_specs - 1].destvars = + xcalloc (case_ordering_get_var_cnt (sc), + sizeof (struct variable *)); + if (lex_match_id (lexer, "INTO")) { struct variable *destvar; @@ -985,7 +852,7 @@ parse_rank_function (struct lexer *lexer, struct dictionary *dict, struct cmd_ra msg(SE, _("Variable %s already exists."), lex_tokid (lexer)); return 0; } - if ( var_count >= sc->crit_cnt ) + if ( var_count >= case_ordering_get_var_cnt (sc) ) { msg(SE, _("Too many variables in INTO clause.")); return 0; diff --git a/src/language/stats/regression.q b/src/language/stats/regression.q index e10b8962..690b6809 100644 --- a/src/language/stats/regression.q +++ b/src/language/stats/regression.q @@ -26,7 +26,8 @@ #include "regression-export.h" #include -#include +#include +#include #include #include #include @@ -41,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +50,7 @@ #include #include "gettext.h" +#define _(msgid) gettext (msgid) #define REG_LARGE_DATA 1000 @@ -120,14 +123,8 @@ static size_t n_variables; */ static struct file_handle *model_file; -/* - Return value for the procedure. - */ -static int pspp_reg_rc = CMD_SUCCESS; - -static bool run_regression (const struct ccase *, - const struct casefile *, void *, - const struct dataset *); +static bool run_regression (struct casereader *, struct cmd_regression *, + struct dataset *); /* STATISTICS subcommand output functions. @@ -951,6 +948,9 @@ regression_custom_export (struct lexer *lexer, struct dataset *ds UNUSED, int cmd_regression (struct lexer *lexer, struct dataset *ds) { + struct casegrouper *grouper; + struct casereader *group; + bool ok; size_t i; if (!parse_regression (lexer, ds, &cmd, NULL)) @@ -961,12 +961,18 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) { models[i] = NULL; } - if (!multipass_procedure_with_splits (ds, run_regression, &cmd)) - return CMD_CASCADING_FAILURE; + + /* Data pass. */ + grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); + while (casegrouper_get_next_group (grouper, &group)) + run_regression (group, &cmd, ds); + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; + subcommand_save (ds, cmd.sbc_save, models); free (v_variables); free (models); - return pspp_reg_rc; + return ok ? CMD_SUCCESS : CMD_FAILURE; } /* @@ -978,47 +984,6 @@ is_depvar (size_t k, const struct variable *v) return v == v_variables[k]; } -/* - Mark missing cases. Return the number of non-missing cases. - Compute the first two moments. - */ -static size_t -mark_missing_cases (const struct casefile *cf, const struct variable *v, - int *is_missing_case, double n_data, - struct moments_var *mom) -{ - struct casereader *r; - struct ccase c; - size_t row; - const union value *val; - double w = 1.0; - - for (r = casefile_get_reader (cf, NULL); - casereader_read (r, &c); case_destroy (&c)) - { - row = casereader_cnum (r) - 1; - - val = case_data (&c, v); - if (mom != NULL) - { - moments1_add (mom->m, val->f, w); - } - cat_value_update (v, val); - if (var_is_value_missing (v, val, MV_ANY)) - { - if (!is_missing_case[row]) - { - /* Now it is missing. */ - n_data--; - is_missing_case[row] = 1; - } - } - } - casereader_destroy (r); - - return n_data; -} - /* Parser for the variables sub command */ static int regression_custom_variables (struct lexer *lexer, struct dataset *ds, @@ -1046,74 +1011,59 @@ regression_custom_variables (struct lexer *lexer, struct dataset *ds, return 1; } -/* - Count the explanatory variables. The user may or may - not have specified a response variable in the syntax. - */ +/* Identify the explanatory variables in v_variables. Returns + the number of independent variables. */ static int -get_n_indep (const struct variable *v) +identify_indep_vars (struct variable **indep_vars, struct variable *depvar) { - int result; - int i = 0; + int n_indep_vars = 0; + int i; - result = n_variables; - while (i < n_variables) - { - if (is_depvar (i, v)) - { - result--; - i = n_variables; - } - i++; - } - return (result == 0) ? 1 : result; + for (i = 0; i < n_variables; i++) + if (!is_depvar (i, depvar)) + indep_vars[n_indep_vars++] = v_variables[i]; + + return n_indep_vars; } -/* - Read from the active file. Identify the explanatory variables in - v_variables. Encode categorical variables. Drop cases with missing - values. -*/ +/* Encode categorical variables. + Returns number of valid cases. */ static int -prepare_data (int n_data, int is_missing_case[], - const struct variable **indep_vars, - const struct variable *depvar, const struct casefile *cf, - struct moments_var *mom) +prepare_categories (struct casereader *input, + struct variable **vars, size_t n_vars, + struct moments_var *mom) { - int i; - int j; + int n_data; + struct ccase c; + size_t i; - assert (indep_vars != NULL); - j = 0; - for (i = 0; i < n_variables; i++) + for (i = 0; i < n_vars; i++) + if (var_is_alpha (vars[i])) + cat_stored_values_create (vars[i]); + + n_data = 0; + for (; casereader_read (input, &c); case_destroy (&c)) { /* The second condition ensures the program will run even if there is only one variable to act as both explanatory and response. */ - if ((!is_depvar (i, depvar)) || (n_variables == 1)) - { - indep_vars[j] = v_variables[i]; - j++; - if (var_is_alpha (v_variables[i])) - { - /* Make a place to hold the binary vectors - corresponding to this variable's values. */ - cat_stored_values_create (v_variables[i]); - } - n_data = - mark_missing_cases (cf, v_variables[i], is_missing_case, n_data, - mom + i); - } - } - /* - Mark missing cases for the dependent variable. - */ - n_data = mark_missing_cases (cf, depvar, is_missing_case, n_data, NULL); + for (i = 0; i < n_vars; i++) + { + const union value *val = case_data (&c, vars[i]); + if (var_is_alpha (vars[i])) + cat_value_update (vars[i], val); + else + moments1_add (mom[i].m, val->f, 1.0); + } + n_data++; + } + casereader_destroy (input); return n_data; } + static void coeff_init (pspp_linreg_cache * c, struct design_matrix *dm) { @@ -1155,24 +1105,14 @@ compute_moments (pspp_linreg_cache * c, struct moments_var *mom, } } } + static bool -run_regression (const struct ccase *first, - const struct casefile *cf, void *cmd_ UNUSED, - const struct dataset *ds) +run_regression (struct casereader *input, struct cmd_regression *cmd, + struct dataset *ds) { size_t i; - size_t n_data = 0; /* Number of valide cases. */ - size_t n_cases; /* Number of cases. */ - size_t row; - size_t case_num; int n_indep = 0; int k; - /* - Keep track of the missing cases. - */ - int *is_missing_case; - const union value *val; - struct casereader *r; struct ccase c; const struct variable **indep_vars; struct design_matrix *X; @@ -1183,7 +1123,10 @@ run_regression (const struct ccase *first, assert (models != NULL); - output_split_file_values (ds, first); + if (!casereader_peek (input, 0, &c)) + return true; + output_split_file_values (ds, &c); + case_destroy (&c); if (!v_variables) { @@ -1191,19 +1134,15 @@ run_regression (const struct ccase *first, 1u << DC_SYSTEM); } - n_cases = casefile_get_case_cnt (cf); - - for (i = 0; i < cmd.n_dependent; i++) + for (i = 0; i < cmd->n_dependent; i++) { - if (!var_is_numeric (cmd.v_dependent[i])) + if (!var_is_numeric (cmd->v_dependent[i])) { - msg (SE, gettext ("Dependent variable must be numeric.")); - pspp_reg_rc = CMD_FAILURE; - return true; + msg (SE, _("Dependent variable must be numeric.")); + return false; } } - is_missing_case = xnmalloc (n_cases, sizeof (*is_missing_case)); mom = xnmalloc (n_variables, sizeof (*mom)); for (i = 0; i < n_variables; i++) { @@ -1212,20 +1151,28 @@ run_regression (const struct ccase *first, } lopts.get_depvar_mean_std = 1; - for (k = 0; k < cmd.n_dependent; k++) + lopts.get_indep_mean_std = xnmalloc (n_variables, sizeof (int)); + indep_vars = xnmalloc (n_variables, sizeof *indep_vars); + + for (k = 0; k < cmd->n_dependent; k++) { - n_indep = get_n_indep ((const struct variable *) cmd.v_dependent[k]); - lopts.get_indep_mean_std = xnmalloc (n_indep, sizeof (int)); - indep_vars = xnmalloc (n_indep, sizeof *indep_vars); - assert (indep_vars != NULL); + struct variable *dep_var; + struct casereader *reader; + casenumber row; + struct ccase c; + size_t n_data; /* Number of valid cases. */ + + dep_var = cmd->v_dependent[k]; + n_indep = identify_indep_vars (indep_vars, dep_var); + + reader = casereader_clone (input); + reader = casereader_create_filter_missing (reader, indep_vars, n_indep, + MV_ANY, NULL); + reader = casereader_create_filter_missing (reader, &dep_var, 1, + MV_ANY, NULL); + n_data = prepare_categories (casereader_clone (reader), + indep_vars, n_indep, mom); - for (i = 0; i < n_cases; i++) - { - is_missing_case[i] = 0; - } - n_data = prepare_data (n_cases, is_missing_case, indep_vars, - cmd.v_dependent[k], - (const struct casefile *) cf, mom); if ((n_data > 0) && (n_indep > 0)) { Y = gsl_vector_alloc (n_data); @@ -1240,8 +1187,8 @@ run_regression (const struct ccase *first, models[k] = pspp_linreg_cache_alloc (X->m->size1, X->m->size2); models[k]->indep_means = gsl_vector_alloc (X->m->size2); models[k]->indep_std = gsl_vector_alloc (X->m->size2); - models[k]->depvar = (const struct variable *) cmd.v_dependent[k]; - /* + models[k]->depvar = dep_var; + /* For large data sets, use QR decomposition. */ if (n_data > sqrt (n_indep) && n_data > REG_LARGE_DATA) @@ -1250,50 +1197,23 @@ run_regression (const struct ccase *first, } /* - The second pass fills the design matrix. - */ - row = 0; - for (r = casefile_get_reader (cf, NULL); casereader_read (r, &c); - case_destroy (&c)) - /* Iterate over the cases. */ - { - case_num = casereader_cnum (r) - 1; - if (!is_missing_case[case_num]) - { - for (i = 0; i < n_variables; ++i) /* Iterate over the - variables for the - current case. - */ - { - val = case_data (&c, v_variables[i]); - /* - Independent/dependent variable separation. The - 'variables' subcommand specifies a varlist which contains - both dependent and independent variables. The dependent - variables are specified with the 'dependent' - subcommand, and maybe also in the 'variables' subcommand. - We need to separate the two. - */ - if (!is_depvar (i, cmd.v_dependent[k])) - { - if (var_is_alpha (v_variables[i])) - { - design_matrix_set_categorical (X, row, - v_variables[i], - val); - } - else - { - design_matrix_set_numeric (X, row, - v_variables[i], val); - } - } - } - val = case_data (&c, cmd.v_dependent[k]); - gsl_vector_set (Y, row, val->f); - row++; - } - } + The second pass fills the design matrix. + */ + reader = casereader_create_counter (reader, &row, -1); + for (; casereader_read (reader, &c); case_destroy (&c)) + { + for (i = 0; i < n_indep; ++i) + { + struct variable *v = indep_vars[i]; + const union value *val = case_data (&c, v); + if (var_is_alpha (v)) + design_matrix_set_categorical (X, row, v, val); + else + design_matrix_set_numeric (X, row, v, val); + } + gsl_vector_set (Y, row, case_num (&c, dep_var)); + } + casereader_destroy (reader); /* Now that we know the number of coefficients, allocate space and store pointers to the variables that correspond to the @@ -1306,26 +1226,24 @@ run_regression (const struct ccase *first, */ pspp_linreg ((const gsl_vector *) Y, X->m, &lopts, models[k]); compute_moments (models[k], mom, X, n_variables); - subcommand_statistics (cmd.a_statistics, models[k]); - subcommand_export (cmd.sbc_export, models[k]); + + if (!taint_has_tainted_successor (casereader_get_taint (input))) + { + subcommand_statistics (cmd->a_statistics, models[k]); + subcommand_export (cmd->sbc_export, models[k]); + } gsl_vector_free (Y); design_matrix_destroy (X); - free (indep_vars); - free (lopts.get_indep_mean_std); - casereader_destroy (r); } else { msg (SE, gettext ("No valid data found. This command was skipped.")); } } - for (i = 0; i < n_variables; i++) - { - moments1_destroy ((mom + i)->m); - } - free (mom); - free (is_missing_case); + free (indep_vars); + free (lopts.get_indep_mean_std); + casereader_destroy (input); return true; } diff --git a/src/language/stats/sort-cases.c b/src/language/stats/sort-cases.c index 13e0c6ed..913718f4 100644 --- a/src/language/stats/sort-cases.c +++ b/src/language/stats/sort-cases.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -41,13 +42,15 @@ int cmd_sort_cases (struct lexer *lexer, struct dataset *ds) { - struct sort_criteria *criteria; - bool success = false; + struct case_ordering *ordering; + struct casereader *output; + bool ok = false; lex_match (lexer, T_BY); - criteria = sort_parse_criteria (lexer, dataset_dict (ds), NULL, NULL, NULL, NULL); - if (criteria == NULL) + proc_cancel_temporary_transformations (ds); + ordering = parse_case_ordering (lexer, dataset_dict (ds), NULL); + if (ordering == NULL) return CMD_CASCADING_FAILURE; if (get_testing_mode () && lex_match (lexer, '/')) @@ -57,7 +60,6 @@ cmd_sort_cases (struct lexer *lexer, struct dataset *ds) goto done; min_buffers = max_buffers = lex_integer (lexer); - allow_internal_sort = false; if (max_buffers < 2) { msg (SE, _("Buffer limit must be at least 2.")); @@ -67,14 +69,17 @@ cmd_sort_cases (struct lexer *lexer, struct dataset *ds) lex_get (lexer); } - success = sort_active_file_in_place (ds, criteria); + proc_discard_output (ds); + output = sort_execute (proc_open (ds), ordering); + ordering = NULL; + ok = proc_commit (ds); + ok = proc_set_active_file_data (ds, output) && ok; done: min_buffers = 64; max_buffers = INT_MAX; - allow_internal_sort = true; - sort_destroy_criteria (criteria); - return success ? lex_end_of_command (lexer) : CMD_CASCADING_FAILURE; + case_ordering_destroy (ordering); + return ok ? lex_end_of_command (lexer) : CMD_CASCADING_FAILURE; } diff --git a/src/language/stats/sort-criteria.c b/src/language/stats/sort-criteria.c index 81b68eab..bd1983dd 100644 --- a/src/language/stats/sort-criteria.c +++ b/src/language/stats/sort-criteria.c @@ -1,5 +1,5 @@ /* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -17,72 +17,46 @@ 02110-1301, USA. */ #include -#include -#include + +#include + #include -#include -#include -#include -#include + +#include +#include +#include #include #include -#include -#include -#include "sort-criteria.h" -#include +#include #include "gettext.h" #define _(msgid) gettext (msgid) -static bool is_terminator(int tok, const int *terminators); - - /* Parses a list of sort keys and returns a struct sort_criteria based on it. Returns a null pointer on error. If SAW_DIRECTION is nonnull, sets *SAW_DIRECTION to true if at least one parenthesized sort direction was specified, false - otherwise. - If TERMINATORS is non-null, then it must be a pointer to a - null terminated list of tokens, in addition to the defaults, - which are to be considered terminators of the clause being parsed. - The default terminators are '/' and '.' - -*/ -struct sort_criteria * -sort_parse_criteria (struct lexer *lexer, const struct dictionary *dict, - const struct variable ***vars, size_t *var_cnt, - bool *saw_direction, - const int *terminators - ) + otherwise. */ +struct case_ordering * +parse_case_ordering (struct lexer *lexer, const struct dictionary *dict, + bool *saw_direction) { - struct sort_criteria *criteria; - const struct variable **local_vars = NULL; - size_t local_var_cnt; - - assert ((vars == NULL) == (var_cnt == NULL)); - if (vars == NULL) - { - vars = &local_vars; - var_cnt = &local_var_cnt; - } - - criteria = xmalloc (sizeof *criteria); - criteria->crits = NULL; - criteria->crit_cnt = 0; - - *vars = NULL; - *var_cnt = 0; - if (saw_direction != NULL) + struct case_ordering *ordering = case_ordering_create (dict); + struct variable **vars = NULL; + size_t var_cnt = 0; + + if (saw_direction != NULL) *saw_direction = false; do { - size_t prev_var_cnt = *var_cnt; enum sort_direction direction; + size_t i; /* Variables. */ - if (!parse_variables_const (lexer, dict, vars, var_cnt, - PV_NO_DUPLICATE | PV_APPEND | PV_NO_SCRATCH)) + free (vars); + vars = NULL; + if (!parse_variables_const (lexer, dict, &vars, &var_cnt, PV_NO_SCRATCH)) goto error; /* Sort direction. */ @@ -108,57 +82,19 @@ sort_parse_criteria (struct lexer *lexer, const struct dictionary *dict, else direction = SRT_ASCEND; - criteria->crits = xnrealloc (criteria->crits, - *var_cnt, sizeof *criteria->crits); - criteria->crit_cnt = *var_cnt; - for (; prev_var_cnt < criteria->crit_cnt; prev_var_cnt++) - { - struct sort_criterion *c = &criteria->crits[prev_var_cnt]; - c->fv = var_get_case_index ((*vars)[prev_var_cnt]); - c->width = var_get_width ((*vars)[prev_var_cnt]); - c->dir = direction; - } + for (i = 0; i < var_cnt; i++) + if (!case_ordering_add_var (ordering, vars[i], direction)) + msg (SW, _("Variable %s specified twice in sort criteria."), + var_get_name (vars[i])); } - while (lex_token (lexer) != '.' && lex_token (lexer) != '/' && !is_terminator(lex_token (lexer), terminators)); + while (lex_token (lexer) == T_ID + && dict_lookup_var (dict, lex_tokid (lexer)) != NULL); - free (local_vars); - return criteria; + free (vars); + return ordering; error: - free (local_vars); - sort_destroy_criteria (criteria); + free (vars); + case_ordering_destroy (ordering); return NULL; } - -/* Return TRUE if TOK is a member of the list of TERMINATORS. - FALSE otherwise */ -static bool -is_terminator(int tok, const int *terminators) -{ - if (terminators == NULL ) - return false; - - while ( *terminators) - { - if (tok == *terminators++) - return true; - } - - return false; -} - - - -/* Destroys a SORT CASES program. */ -void -sort_destroy_criteria (struct sort_criteria *criteria) -{ - if (criteria != NULL) - { - free (criteria->crits); - free (criteria); - } -} - - - diff --git a/src/language/stats/sort-criteria.h b/src/language/stats/sort-criteria.h index 1c44cc58..b2bd9ab4 100644 --- a/src/language/stats/sort-criteria.h +++ b/src/language/stats/sort-criteria.h @@ -23,17 +23,12 @@ #include #include -struct variable; struct dictionary; -struct lexer ; +struct lexer; -struct sort_criteria *sort_parse_criteria (struct lexer *, const struct dictionary *, - const struct variable ***, size_t *, - bool *saw_direction, - const int *terminators - ); - -void sort_destroy_criteria (struct sort_criteria *criteria) ; +struct case_ordering *parse_case_ordering (struct lexer *, + const struct dictionary *, + bool *saw_direction); #endif /* SORT_PRS_H */ diff --git a/src/language/stats/t-test.q b/src/language/stats/t-test.q index b593ebc4..91a7179e 100644 --- a/src/language/stats/t-test.q +++ b/src/language/stats/t-test.q @@ -25,13 +25,12 @@ #include #include -#include +#include +#include #include #include #include #include -#include - #include #include #include @@ -41,9 +40,9 @@ #include #include #include -#include #include #include +#include #include #include #include @@ -215,28 +214,28 @@ enum { static int common_calc (const struct dictionary *dict, const struct ccase *, void *, - const struct casefilter *filter); + enum mv_class); static void common_precalc (struct cmd_t_test *); static void common_postcalc (struct cmd_t_test *); -static int one_sample_calc (const struct dictionary *dict, const struct ccase *, void *, const struct casefilter *); +static int one_sample_calc (const struct dictionary *dict, const struct ccase *, void *, enum mv_class); static void one_sample_precalc (struct cmd_t_test *); static void one_sample_postcalc (struct cmd_t_test *); static int paired_calc (const struct dictionary *dict, const struct ccase *, - struct cmd_t_test*, const struct casefilter *); + struct cmd_t_test*, enum mv_class); static void paired_precalc (struct cmd_t_test *); static void paired_postcalc (struct cmd_t_test *); static void group_precalc (struct cmd_t_test *); static int group_calc (const struct dictionary *dict, const struct ccase *, - struct cmd_t_test *, const struct casefilter *); + struct cmd_t_test *, enum mv_class); static void group_postcalc (struct cmd_t_test *); -static bool calculate(const struct ccase *first, - const struct casefile *cf, void *_mode, - const struct dataset *ds); +static void calculate(struct cmd_t_test *, + struct casereader *, + const struct dataset *); static int mode; @@ -258,6 +257,8 @@ static unsigned hash_group_binary(const struct group_statistics *g, int cmd_t_test (struct lexer *lexer, struct dataset *ds) { + struct casegrouper *grouper; + struct casereader *group; bool ok; if ( !parse_t_test (lexer, ds, &cmd, NULL) ) @@ -338,7 +339,12 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds) bad_weight_warn = true; - ok = multipass_procedure_with_splits (ds, calculate, &cmd); + /* Data pass. */ + grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); + while (casegrouper_get_next_group (grouper, &group)) + calculate (&cmd, group, ds); + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; n_pairs=0; free(pairs); @@ -1411,30 +1417,30 @@ static int common_calc (const struct dictionary *dict, const struct ccase *c, void *_cmd, - const struct casefilter *filter) + enum mv_class exclude) { int i; struct cmd_t_test *cmd = (struct cmd_t_test *)_cmd; - double weight = dict_get_case_weight (dict, c, &bad_weight_warn); + double weight = dict_get_case_weight (dict, c, NULL); /* Listwise has to be implicit if the independent variable is missing ?? */ if ( cmd->sbc_groups ) { - if ( casefilter_variable_missing (filter, c, indep_var) ) + if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude)) return 0; } for(i = 0; i < cmd->n_variables ; ++i) { const struct variable *v = cmd->v_variables[i]; - - if (! casefilter_variable_missing (filter, c, v) ) + const union value *val = case_data (c, v); + + if (!var_is_value_missing (v, val, exclude)) { struct group_statistics *gs; - const union value *val = case_data (c, v); - gs = &group_proc_get (cmd->v_variables[i])->ugs; + gs = &group_proc_get (v)->ugs; gs->n += weight; gs->sum += weight * val->f; @@ -1492,13 +1498,13 @@ common_postcalc (struct cmd_t_test *cmd) static int one_sample_calc (const struct dictionary *dict, const struct ccase *c, void *cmd_, - const struct casefilter *filter) + enum mv_class exclude) { int i; struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_; - double weight = dict_get_case_weight (dict, c, &bad_weight_warn); + double weight = dict_get_case_weight (dict, c, NULL); for(i=0; i< cmd->n_variables ; ++i) @@ -1509,7 +1515,7 @@ one_sample_calc (const struct dictionary *dict, gs= &group_proc_get (cmd->v_variables[i])->ugs; - if ( ! casefilter_variable_missing (filter, c, v)) + if (!var_is_value_missing (v, val, exclude)) gs->sum_diff += weight * (val->f - cmd->n_testval[0]); } @@ -1569,11 +1575,11 @@ paired_precalc (struct cmd_t_test *cmd UNUSED) static int paired_calc (const struct dictionary *dict, const struct ccase *c, - struct cmd_t_test *cmd UNUSED, const struct casefilter *filter) + struct cmd_t_test *cmd UNUSED, enum mv_class exclude) { int i; - double weight = dict_get_case_weight (dict, c, &bad_weight_warn); + double weight = dict_get_case_weight (dict, c, NULL); for(i=0; i < n_pairs ; ++i ) { @@ -1583,8 +1589,8 @@ paired_calc (const struct dictionary *dict, const struct ccase *c, const union value *val0 = case_data (c, v0); const union value *val1 = case_data (c, v1); - if ( ! casefilter_variable_missing (filter, c, v0) && - ! casefilter_variable_missing (filter, c, v1) ) + if (!var_is_value_missing (v0, val0, exclude) && + !var_is_value_missing (v1, val1, exclude)) { pairs[i].n += weight; pairs[i].sum[0] += weight * val0->f; @@ -1694,16 +1700,15 @@ group_precalc (struct cmd_t_test *cmd ) static int group_calc (const struct dictionary *dict, const struct ccase *c, struct cmd_t_test *cmd, - const struct casefilter *filter) + enum mv_class exclude) { int i; - const double weight = - dict_get_case_weight (dict, c, &bad_weight_warn); + const double weight = dict_get_case_weight (dict, c, NULL); const union value *gv; - if ( casefilter_variable_missing (filter, c, indep_var)) + if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude)) return 0; gv = case_data (c, indep_var); @@ -1722,7 +1727,7 @@ group_calc (const struct dictionary *dict, if ( ! gs ) return 0; - if ( ! casefilter_variable_missing (filter, c, var) ) + if (!var_is_value_missing (var, val, exclude)) { gs->n += weight; gs->sum += weight * val->f; @@ -1771,95 +1776,83 @@ group_postcalc ( struct cmd_t_test *cmd ) -static bool -calculate(const struct ccase *first, const struct casefile *cf, - void *cmd_, const struct dataset *ds) +static void +calculate(struct cmd_t_test *cmd, + struct casereader *input, const struct dataset *ds) { const struct dictionary *dict = dataset_dict (ds); struct ssbox stat_summary_box; struct trbox test_results_box; - struct casereader *r; + struct casereader *pass1, *pass2, *pass3; + struct taint *taint; struct ccase c; - struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_; + enum mv_class exclude = cmd->miss != TTS_INCLUDE ? MV_ANY : MV_SYSTEM; - struct casefilter *filter = casefilter_create ((cmd->miss != TTS_INCLUDE - ? MV_ANY : MV_SYSTEM), - NULL, 0); + if (!casereader_peek (input, 0, &c)) + return; + output_split_file_values (ds, &c); + case_destroy (&c); if ( cmd->miss == TTS_LISTWISE ) - casefilter_add_variables (filter, - cmd->v_variables, cmd->n_variables); + input = casereader_create_filter_missing (input, + cmd->v_variables, + cmd->n_variables, + exclude, NULL); + + input = casereader_create_filter_weight (input, dict, NULL, NULL); + + taint = taint_clone (casereader_get_taint (input)); + casereader_split (input, &pass1, &pass2); - output_split_file_values (ds, first); common_precalc (cmd); - for(r = casefile_get_reader (cf, filter); - casereader_read (r, &c) ; - case_destroy (&c)) - { - common_calc (dict, &c, cmd, filter); - } - - casereader_destroy (r); + for (; casereader_read (pass1, &c); case_destroy (&c)) + common_calc (dict, &c, cmd, exclude); + casereader_destroy (pass1); common_postcalc (cmd); switch(mode) { case T_1_SAMPLE: one_sample_precalc (cmd); - for(r = casefile_get_reader (cf, filter); - casereader_read (r, &c) ; - case_destroy (&c)) - { - one_sample_calc (dict, &c, cmd, filter); - } - casereader_destroy (r); + for (; casereader_read (pass2, &c); case_destroy (&c)) + one_sample_calc (dict, &c, cmd, exclude); one_sample_postcalc (cmd); break; case T_PAIRED: paired_precalc(cmd); - for(r = casefile_get_reader (cf, filter); - casereader_read (r, &c) ; - case_destroy (&c)) - { - paired_calc (dict, &c, cmd, filter); - } - casereader_destroy (r); + for (; casereader_read (pass2, &c); case_destroy (&c)) + paired_calc (dict, &c, cmd, exclude); paired_postcalc (cmd); - break; case T_IND_SAMPLES: + pass3 = casereader_clone (pass2); group_precalc(cmd); - for(r = casefile_get_reader (cf, filter); - casereader_read (r, &c) ; - case_destroy (&c)) - { - group_calc (dict, &c, cmd, filter); - } - casereader_destroy (r); + for(; casereader_read (pass2, &c); case_destroy (&c)) + group_calc (dict, &c, cmd, exclude); group_postcalc(cmd); - levene (dict, cf, indep_var, cmd->n_variables, cmd->v_variables, - filter); + levene (dict, pass3, indep_var, cmd->n_variables, cmd->v_variables, + exclude); break; } + casereader_destroy (pass2); + + if (!taint_has_tainted_successor (taint)) + { + ssbox_create(&stat_summary_box,cmd,mode); + ssbox_populate(&stat_summary_box,cmd); + ssbox_finalize(&stat_summary_box); - casefilter_destroy (filter); - - ssbox_create(&stat_summary_box,cmd,mode); - ssbox_populate(&stat_summary_box,cmd); - ssbox_finalize(&stat_summary_box); - - if ( mode == T_PAIRED) - pscbox(); - - trbox_create(&test_results_box,cmd,mode); - trbox_populate(&test_results_box,cmd); - trbox_finalize(&test_results_box); - - return true; + if ( mode == T_PAIRED ) + pscbox(); + + trbox_create(&test_results_box,cmd,mode); + trbox_populate(&test_results_box,cmd); + trbox_finalize(&test_results_box); + } } short which_group(const struct group_statistics *g, diff --git a/src/language/tests/automake.mk b/src/language/tests/automake.mk index 198c1497..bbcd777b 100644 --- a/src/language/tests/automake.mk +++ b/src/language/tests/automake.mk @@ -4,7 +4,6 @@ language_tests_built_sources = \ src/language/tests/check-model.c language_tests_sources = \ - src/language/tests/casefile-test.c \ src/language/tests/check-model.h \ src/language/tests/datasheet-test.c \ src/language/tests/float-format.c \ diff --git a/src/language/tests/casefile-test.c b/src/language/tests/casefile-test.c deleted file mode 100644 index eb8ee061..00000000 --- a/src/language/tests/casefile-test.c +++ /dev/null @@ -1,290 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 2004 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include - -#include "xalloc.h" - -static void test_casefile (int pattern, size_t value_cnt, size_t case_cnt); -static void get_random_case (struct ccase *, size_t value_cnt, - size_t case_idx); -static void write_random_case (struct casefile *cf, size_t case_idx); -static void read_and_verify_random_case (struct casefile *cf, - struct casereader *reader, - size_t case_idx); -static void test_casereader_clone (struct casereader *reader1, size_t case_cnt); - - -static void fail_test (const char *message, ...); - -int -cmd_debug_casefile (struct lexer *lexer, struct dataset *ds UNUSED) -{ - static const size_t sizes[] = - { - 1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 31, 55, 73, - 100, 137, 257, 521, 1031, 2053 - }; - int size_max; - int case_max; - int pattern; - - size_max = sizeof sizes / sizeof *sizes; - if (lex_match_id (lexer, "SMALL")) - { - size_max -= 4; - case_max = 511; - } - else - case_max = 4095; - if (lex_token (lexer) != '.') - return lex_end_of_command (lexer); - - for (pattern = 0; pattern < 7; pattern++) - { - const size_t *size; - - for (size = sizes; size < sizes + size_max; size++) - { - size_t case_cnt; - - for (case_cnt = 0; case_cnt <= case_max; - case_cnt = (case_cnt * 2) + 1) - test_casefile (pattern, *size, case_cnt); - } - } - printf ("Casefile tests succeeded.\n"); - return CMD_SUCCESS; -} - -static void -test_casefile (int pattern, size_t value_cnt, size_t case_cnt) -{ - struct casefile *cf; - struct casereader *r1, *r2; - struct ccase c; - gsl_rng *rng; - size_t i, j; - - rng = gsl_rng_alloc (gsl_rng_mt19937); - cf = fastfile_create (value_cnt); - if (pattern == 5) - casefile_to_disk (cf); - for (i = 0; i < case_cnt; i++) - write_random_case (cf, i); - if (pattern == 5) - casefile_sleep (cf); - r1 = casefile_get_reader (cf, NULL); - r2 = casefile_get_reader (cf, NULL); - switch (pattern) - { - case 0: - case 5: - for (i = 0; i < case_cnt; i++) - { - read_and_verify_random_case (cf, r1, i); - read_and_verify_random_case (cf, r2, i); - } - break; - case 1: - for (i = 0; i < case_cnt; i++) - read_and_verify_random_case (cf, r1, i); - for (i = 0; i < case_cnt; i++) - read_and_verify_random_case (cf, r2, i); - break; - case 2: - case 3: - case 4: - for (i = j = 0; i < case_cnt; i++) - { - read_and_verify_random_case (cf, r1, i); - if (gsl_rng_get (rng) % pattern == 0) - read_and_verify_random_case (cf, r2, j++); - if (i == case_cnt / 2) - casefile_to_disk (cf); - } - for (; j < case_cnt; j++) - read_and_verify_random_case (cf, r2, j); - break; - case 6: - test_casereader_clone (r1, case_cnt); - test_casereader_clone (r2, case_cnt); - break; - default: - NOT_REACHED (); - } - if (casereader_read (r1, &c)) - fail_test ("Casereader 1 not at end of file."); - if (casereader_read (r2, &c)) - fail_test ("Casereader 2 not at end of file."); - if (pattern != 1) - casereader_destroy (r1); - if (pattern != 2) - casereader_destroy (r2); - if (pattern > 2) - { - r1 = casefile_get_destructive_reader (cf); - for (i = 0; i < case_cnt; i++) - { - struct ccase read_case, expected_case; - - get_random_case (&expected_case, value_cnt, i); - if (!casereader_read_xfer (r1, &read_case)) - fail_test ("Premature end of casefile."); - for (j = 0; j < value_cnt; j++) - { - double a = case_num_idx (&read_case, j); - double b = case_num_idx (&expected_case, j); - if (a != b) - fail_test ("Case %lu fails comparison.", (unsigned long) i); - } - case_destroy (&expected_case); - case_destroy (&read_case); - } - casereader_destroy (r1); - } - casefile_destroy (cf); - gsl_rng_free (rng); -} - -static void -get_random_case (struct ccase *c, size_t value_cnt, size_t case_idx) -{ - int i; - case_create (c, value_cnt); - for (i = 0; i < value_cnt; i++) - case_data_rw_idx (c, i)->f = case_idx % 257 + i; -} - -static void -write_random_case (struct casefile *cf, size_t case_idx) -{ - struct ccase c; - get_random_case (&c, casefile_get_value_cnt (cf), case_idx); - casefile_append_xfer (cf, &c); -} - -static void -read_and_verify_random_case (struct casefile *cf, - struct casereader *reader, size_t case_idx) -{ - struct ccase read_case, expected_case; - size_t value_cnt; - size_t i; - - value_cnt = casefile_get_value_cnt (cf); - get_random_case (&expected_case, value_cnt, case_idx); - if (!casereader_read (reader, &read_case)) - fail_test ("Premature end of casefile."); - for (i = 0; i < value_cnt; i++) - { - double a = case_num_idx (&read_case, i); - double b = case_num_idx (&expected_case, i); - if (a != b) - fail_test ("Case %lu fails comparison.", (unsigned long) case_idx); - } - case_destroy (&read_case); - case_destroy (&expected_case); -} - -static void -test_casereader_clone (struct casereader *reader1, size_t case_cnt) -{ - size_t i; - size_t cases = 0; - struct ccase c1; - struct ccase c2; - struct casefile *src = casereader_get_casefile (reader1); - struct casereader *clone = NULL; - - size_t value_cnt = casefile_get_value_cnt (src); - - struct casefile *newfile = fastfile_create (value_cnt); - struct casereader *newreader; - - - /* Read a 3rd of the cases */ - for ( i = 0 ; i < case_cnt / 3 ; ++i ) - { - casereader_read (reader1, &c1); - case_destroy (&c1); - } - - clone = casereader_clone (reader1); - - /* Copy all the cases into a new file */ - while( casereader_read (reader1, &c1)) - { - casefile_append_xfer (newfile, &c1); - cases ++; - } - - newreader = casefile_get_reader (newfile, NULL); - - /* Make sure that the new file's are identical to those returned from - the cloned reader */ - while( casereader_read (clone, &c1)) - { - const union value *v1; - const union value *v2; - cases --; - - if ( ! casereader_read_xfer (newreader, &c2) ) - { - case_destroy (&c1); - break; - } - - v1 = case_data_all (&c1) ; - v2 = case_data_all (&c2) ; - - if ( 0 != memcmp (v1, v2, value_cnt * MAX_SHORT_STRING)) - fail_test ("Cloned reader read different value at case %ld", cases); - - case_destroy (&c1); - case_destroy (&c2); - } - - if ( cases > 0 ) - fail_test ("Cloned reader reads different number of cases."); - -} - -static void -fail_test (const char *message, ...) -{ - va_list args; - - va_start (args, message); - vprintf (message, args); - putchar ('\n'); - va_end (args); - - exit (1); -} diff --git a/src/libpspp/deque.h b/src/libpspp/deque.h index be1121bf..0233c13d 100644 --- a/src/libpspp/deque.h +++ b/src/libpspp/deque.h @@ -66,6 +66,7 @@ #include #include +#include #include diff --git a/src/math/ChangeLog b/src/math/ChangeLog index acc40388..2c0df305 100644 --- a/src/math/ChangeLog +++ b/src/math/ChangeLog @@ -1,3 +1,18 @@ +2007-06-06 Ben Pfaff + + Adapt case sources, sinks, and clients of procedure code to the + new infrastructure. + + * automake.mk: Add new files. + + * levene.c: Adapt to new casereaders. Abstract better. + + * merge.c: New file. + + * merge.h: New file. + + * sort.c: Rewrite in terms of case_ordering, merger. + 2007-05-31 Jason Stover * interaction.c: New file. diff --git a/src/math/automake.mk b/src/math/automake.mk index 2a8f4408..5bbf24fa 100644 --- a/src/math/automake.mk +++ b/src/math/automake.mk @@ -19,6 +19,8 @@ src_math_libpspp_math_a_SOURCES = \ src/math/interaction.h \ src/math/levene.c \ src/math/levene.h \ + src/math/merge.c \ + src/math/merge.h \ src/math/moments.c src/math/moments.h \ src/math/percentiles.c src/math/percentiles.h \ src/math/design-matrix.c src/math/design-matrix.h \ diff --git a/src/math/levene.c b/src/math/levene.c index a325138b..15f9a583 100644 --- a/src/math/levene.c +++ b/src/math/levene.c @@ -22,14 +22,13 @@ #include "levene.h" #include #include -#include +#include #include #include "group-proc.h" #include #include #include #include -#include #include #include #include "group.h" @@ -74,90 +73,87 @@ struct levene_info const struct variable **v_dep; /* Filter for missing values */ - struct casefilter *filter; + enum mv_class exclude; + + /* An array of lz_stats for each variable */ + struct lz_stats *lz; + + /* The denominator for the expression for the Levene */ + double *lz_denominator; + +}; + +/* Per variable statistics */ +struct lz_stats +{ + /* Total of all lz */ + double grand_total; + + /* Mean of all lz */ + double grand_mean; + + /* The total number of cases */ + double total_n ; + + /* Number of groups */ + int n_groups; }; /* First pass */ static void levene_precalc (const struct levene_info *l); static int levene_calc (const struct dictionary *dict, const struct ccase *, const struct levene_info *l); -static void levene_postcalc (void *); +static void levene_postcalc (struct levene_info *); /* Second pass */ static void levene2_precalc (struct levene_info *l); static int levene2_calc (const struct dictionary *, const struct ccase *, struct levene_info *l); -static void levene2_postcalc (void *); +static void levene2_postcalc (struct levene_info *); -void +void levene(const struct dictionary *dict, - const struct casefile *cf, + struct casereader *reader, const struct variable *v_indep, size_t n_dep, const struct variable **v_dep, - struct casefilter *filter) + enum mv_class exclude) { - struct casereader *r; + struct casereader *pass1, *pass2; struct ccase c; struct levene_info l; l.n_dep = n_dep; l.v_indep = v_indep; l.v_dep = v_dep; - l.filter = filter; + l.exclude = exclude; + l.lz = xnmalloc (l.n_dep, sizeof *l.lz); + l.lz_denominator = xnmalloc (l.n_dep, sizeof *l.lz_denominator); + casereader_split (reader, &pass1, &pass2); levene_precalc (&l); - for(r = casefile_get_reader (cf, filter); - casereader_read (r, &c) ; - case_destroy (&c)) - { - levene_calc (dict, &c, &l); - } - casereader_destroy (r); + for (; casereader_read (pass1, &c); case_destroy (&c)) + levene_calc (dict, &c, &l); + casereader_destroy (pass1); levene_postcalc (&l); levene2_precalc(&l); - for(r = casefile_get_reader (cf, filter); - casereader_read (r, &c) ; - case_destroy (&c)) - { - levene2_calc (dict, &c,&l); - } - casereader_destroy (r); + for (; casereader_read (pass2, &c); case_destroy (&c)) + levene2_calc (dict, &c, &l); + casereader_destroy (pass2); levene2_postcalc (&l); -} - -/* Internal variables used in calculating the Levene statistic */ - -/* Per variable statistics */ -struct lz_stats -{ - /* Total of all lz */ - double grand_total; - - /* Mean of all lz */ - double grand_mean; - - /* The total number of cases */ - double total_n ; - - /* Number of groups */ - int n_groups; -}; - -/* An array of lz_stats for each variable */ -static struct lz_stats *lz; + free (l.lz_denominator); + free (l.lz); +} static void levene_precalc (const struct levene_info *l) { size_t i; - lz = xnmalloc (l->n_dep, sizeof *lz); - for(i = 0; i < l->n_dep ; ++i ) { const struct variable *var = l->v_dep[i]; @@ -165,9 +161,9 @@ levene_precalc (const struct levene_info *l) struct group_statistics *gs; struct hsh_iterator hi; - lz[i].grand_total = 0; - lz[i].total_n = 0; - lz[i].n_groups = gp->n_groups ; + l->lz[i].grand_total = 0; + l->lz[i].total_n = 0; + l->lz[i].n_groups = gp->n_groups ; for ( gs = hsh_first(gp->group_hash, &hi); @@ -206,11 +202,11 @@ levene_calc (const struct dictionary *dict, const struct ccase *c, if ( 0 == gs ) continue ; - if ( ! casefilter_variable_missing (l->filter, c, var)) + if ( !var_is_value_missing (var, v, l->exclude)) { levene_z= fabs(v->f - gs->mean); - lz[i].grand_total += levene_z * weight; - lz[i].total_n += weight; + l->lz[i].grand_total += levene_z * weight; + l->lz[i].total_n += weight; gs->lz_total += levene_z * weight; } @@ -220,16 +216,14 @@ levene_calc (const struct dictionary *dict, const struct ccase *c, static void -levene_postcalc (void *_l) +levene_postcalc (struct levene_info *l) { size_t v; - struct levene_info *l = (struct levene_info *) _l; - for (v = 0; v < l->n_dep; ++v) { /* This is Z_LL */ - lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ; + l->lz[v].grand_mean = l->lz[v].grand_total / l->lz[v].total_n ; } @@ -237,15 +231,11 @@ levene_postcalc (void *_l) -/* The denominator for the expression for the Levene */ -static double *lz_denominator = 0; - static void levene2_precalc (struct levene_info *l) { size_t v; - lz_denominator = xnmalloc (l->n_dep, sizeof *lz_denominator); /* This stuff could go in the first post calc . . . */ for (v = 0; @@ -265,7 +255,7 @@ levene2_precalc (struct levene_info *l) { g->lz_mean = g->lz_total / g->n ; } - lz_denominator[v] = 0; + l->lz_denominator[v] = 0; } } @@ -295,11 +285,10 @@ levene2_calc (const struct dictionary *dict, const struct ccase *c, if ( 0 == gs ) continue; - if ( ! casefilter_variable_missing (l->filter, c, var)) - + if ( !var_is_value_missing (var, v, l->exclude)) { levene_z = fabs(v->f - gs->mean); - lz_denominator[i] += weight * pow2 (levene_z - gs->lz_mean); + l->lz_denominator[i] += weight * pow2 (levene_z - gs->lz_mean); } } @@ -308,12 +297,10 @@ levene2_calc (const struct dictionary *dict, const struct ccase *c, static void -levene2_postcalc (void *_l) +levene2_postcalc (struct levene_info *l) { size_t v; - struct levene_info *l = (struct levene_info *) _l; - for (v = 0; v < l->n_dep; ++v) { double lz_numerator = 0; @@ -328,18 +315,14 @@ levene2_postcalc (void *_l) g != 0 ; g = (struct group_statistics *) hsh_next(hash,&hi) ) { - lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean ); + lz_numerator += g->n * pow2(g->lz_mean - l->lz[v].grand_mean ); } lz_numerator *= ( gp->ugs.n - gp->n_groups ); - lz_denominator[v] *= (gp->n_groups - 1); + l->lz_denominator[v] *= (gp->n_groups - 1); - gp->levene = lz_numerator / lz_denominator[v] ; + gp->levene = lz_numerator / l->lz_denominator[v] ; } - - /* Now clear up after ourselves */ - free(lz_denominator); - free(lz); } diff --git a/src/math/levene.h b/src/math/levene.h index 66944daf..40ed52ce 100644 --- a/src/math/levene.h +++ b/src/math/levene.h @@ -21,9 +21,9 @@ #if !levene_h #define levene_h 1 - +#include +#include #include -#include /* Calculate the Levene statistic @@ -39,10 +39,10 @@ The dependent variables : v_dep; struct dictionary ; struct casefilter ; -void levene(const struct dictionary *dict, const struct casefile *cf, +void levene(const struct dictionary *dict, struct casereader *, const struct variable *v_indep, size_t n_dep, const struct variable **v_dep, - struct casefilter *filter); + enum mv_class exclude); diff --git a/src/math/merge.c b/src/math/merge.c new file mode 100644 index 00000000..33445472 --- /dev/null +++ b/src/math/merge.c @@ -0,0 +1,159 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* FIXME: error checking. */ +/* FIXME: merge pattern should be improved, this one causes a + performance regression. */ +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "xalloc.h" + +#define MAX_MERGE_ORDER 7 + +struct merge_input + { + struct casereader *reader; + struct ccase c; + }; + +struct merge + { + struct case_ordering *ordering; + struct merge_input inputs[MAX_MERGE_ORDER]; + size_t input_cnt; + }; + +static void do_merge (struct merge *m); + +struct merge * +merge_create (const struct case_ordering *ordering) +{ + struct merge *m = xmalloc (sizeof *m); + m->ordering = case_ordering_clone (ordering); + m->input_cnt = 0; + return m; +} + +void +merge_destroy (struct merge *m) +{ + if (m != NULL) + { + size_t i; + + case_ordering_destroy (m->ordering); + for (i = 0; i < m->input_cnt; i++) + casereader_destroy (m->inputs[i].reader); + free (m); + } +} + +void +merge_append (struct merge *m, struct casereader *r) +{ + r = casereader_rename (r); + m->inputs[m->input_cnt++].reader = r; + if (m->input_cnt >= MAX_MERGE_ORDER) + do_merge (m); +} + +struct casereader * +merge_make_reader (struct merge *m) +{ + struct casereader *r; + + if (m->input_cnt > 1) + do_merge (m); + + if (m->input_cnt == 1) + { + r = m->inputs[0].reader; + m->input_cnt = 0; + } + else if (m->input_cnt == 0) + { + size_t value_cnt = case_ordering_get_value_cnt (m->ordering); + struct casewriter *writer = mem_writer_create (value_cnt); + r = casewriter_make_reader (writer); + } + else + NOT_REACHED (); + + return r; +} + +static bool +read_input_case (struct merge *m, size_t idx) +{ + struct merge_input *i = &m->inputs[idx]; + + if (casereader_read (i->reader, &i->c)) + return true; + else + { + casereader_destroy (i->reader); + remove_element (m->inputs, m->input_cnt, sizeof *m->inputs, idx); + m->input_cnt--; + return false; + } +} + +static void +do_merge (struct merge *m) +{ + struct casewriter *w; + size_t i; + + assert (m->input_cnt > 1); + + w = tmpfile_writer_create (case_ordering_get_value_cnt (m->ordering)); + for (i = 0; i < m->input_cnt; i++) + taint_propagate (casereader_get_taint (m->inputs[i].reader), + casewriter_get_taint (w)); + + for (i = 0; i < m->input_cnt; ) + if (read_input_case (m, i)) + i++; + while (m->input_cnt > 0) + { + size_t min; + + min = 0; + for (i = 1; i < m->input_cnt; i++) + if (case_ordering_compare_cases (&m->inputs[i].c, &m->inputs[min].c, + m->ordering) < 0) + min = i; + + casewriter_write (w, &m->inputs[min].c); + read_input_case (m, min); + } + + m->input_cnt = 1; + m->inputs[0].reader = casewriter_make_reader (w); +} + diff --git a/src/ui/gui/flexifile-factory.h b/src/math/merge.h similarity index 66% rename from src/ui/gui/flexifile-factory.h rename to src/math/merge.h index eecb901c..61852057 100644 --- a/src/ui/gui/flexifile-factory.h +++ b/src/math/merge.h @@ -1,5 +1,5 @@ /* PSPP - computes sample statistics. - Copyright (C) 2006 Free Software Foundation, Inc. + Copyright (C) 2007 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -16,13 +16,17 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#ifndef FLEXIFILE_FACTORY_H -#define FLEXIFILE_FACTORY_H +#ifndef MATH_MERGE_H +#define MATH_MERGE_H 1 +#include -struct casefile_factory ; +struct case_ordering; +struct casereader; -struct casefile_factory * flexifile_factory_create (void); -void flexifile_factory_destroy (struct casefile_factory *); +struct merge *merge_create (const struct case_ordering *); +void merge_destroy (struct merge *); +void merge_append (struct merge *, struct casereader *); +struct casereader *merge_make_reader (struct merge *); -#endif +#endif /* math/merge.h */ diff --git a/src/math/sort.c b/src/math/sort.c index 46da0ec0..aa7d2071 100644 --- a/src/math/sort.c +++ b/src/math/sort.c @@ -20,31 +20,18 @@ #include "sort.h" -#include -#include -#include #include -#include -#include +#include #include -#include -#include -#include -#include -#include +#include +#include +#include #include -#include -#include #include #include #include -#include -#include -#include -#include - -#include "minmax.h" +#include #include "gettext.h" #define _(msgid) gettext (msgid) @@ -52,701 +39,261 @@ /* These should only be changed for testing purposes. */ int min_buffers = 64; int max_buffers = INT_MAX; -bool allow_internal_sort = true; - -static int compare_record (const struct ccase *, const struct ccase *, - const struct sort_criteria *); -static struct casefile *do_internal_sort (struct casereader *, - const struct sort_criteria *, - struct casefile_factory * - ); -static struct casefile *do_external_sort (struct casereader *, - const struct sort_criteria *, - struct casefile_factory * - ); - - -/* Sorts the active file in-place according to CRITERIA. - Returns true if successful. */ -bool -sort_active_file_in_place (struct dataset *ds, - const struct sort_criteria *criteria) -{ - struct casefile *in, *out; - - proc_cancel_temporary_transformations (ds); - if (!procedure (ds, NULL, NULL)) - return false; - - in = proc_capture_output (ds); - out = sort_execute (casefile_get_destructive_reader (in), criteria, - dataset_get_casefile_factory (ds)); - if (out == NULL) - return false; - - proc_set_source (ds, storage_source_create (out)); - return true; -} -/* Data passed to sort_to_casefile_callback(). */ -struct sort_to_casefile_cb_data +struct sort_writer { - const struct sort_criteria *criteria; - struct casefile *output; - struct casefile_factory *factory ; + struct case_ordering *ordering; + struct merge *merge; + struct pqueue *pqueue; + + struct casewriter *run; + casenumber run_id; + struct ccase run_end; }; -/* Sorts casefile CF according to the criteria in CB_DATA. */ -static bool -sort_to_casefile_callback (const struct casefile *cf, void *cb_data_) -{ - struct sort_to_casefile_cb_data *cb_data = cb_data_; - cb_data->output = sort_execute (casefile_get_reader (cf, NULL), - cb_data->criteria, - cb_data->factory - ); - return cb_data->output != NULL; -} +static struct casewriter_class sort_casewriter_class; -/* Sorts the active file to a separate casefile. If successful, - returns the sorted casefile. Returns a null pointer on - failure. */ -struct casefile * -sort_active_file_to_casefile (struct dataset *ds, - const struct sort_criteria *criteria) +static struct pqueue *pqueue_create (const struct case_ordering *); +static void pqueue_destroy (struct pqueue *); +static bool pqueue_is_full (const struct pqueue *); +static bool pqueue_is_empty (const struct pqueue *); +static void pqueue_push (struct pqueue *, struct ccase *, casenumber); +static void pqueue_pop (struct pqueue *, struct ccase *, casenumber *); + +static void output_record (struct sort_writer *); + +struct casewriter * +sort_create_writer (struct case_ordering *ordering) { - struct sort_to_casefile_cb_data cb_data; - - proc_cancel_temporary_transformations (ds); + struct sort_writer *sort; - cb_data.criteria = criteria; - cb_data.output = NULL; - cb_data.factory = dataset_get_casefile_factory (ds); - if (!multipass_procedure (ds, sort_to_casefile_callback, &cb_data)) - { - casefile_destroy (cb_data.output); - return NULL; - } - return cb_data.output; -} + sort = xmalloc (sizeof *sort); + sort->ordering = case_ordering_clone (ordering); + sort->merge = merge_create (ordering); + sort->pqueue = pqueue_create (ordering); + sort->run = NULL; + sort->run_id = 0; + case_nullify (&sort->run_end); + case_ordering_destroy (ordering); -/* Reads all the cases from READER, which is destroyed. Sorts - the cases according to CRITERIA. Returns the sorted cases in - a newly created casefile, which will be created by FACTORY. - If FACTORY is NULL, then a local fastfile_factory will be used. -*/ -struct casefile * -sort_execute (struct casereader *reader, - const struct sort_criteria *criteria, - struct casefile_factory *factory - ) -{ - struct casefile_factory *local_factory = NULL; - struct casefile *output ; - if ( factory == NULL ) - factory = local_factory = fastfile_factory_create (); + return casewriter_create (&sort_casewriter_class, sort); +} - output = do_internal_sort (reader, criteria, factory); - if (output == NULL) - output = do_external_sort (reader, criteria, factory); - casereader_destroy (reader); +static void +sort_casewriter_write (struct casewriter *writer UNUSED, void *sort_, + struct ccase *c) +{ + struct sort_writer *sort = sort_; + bool next_run; - fastfile_factory_destroy (local_factory); + if (pqueue_is_full (sort->pqueue)) + output_record (sort); - return output; + next_run = (case_is_null (&sort->run_end) + || case_ordering_compare_cases (c, &sort->run_end, + sort->ordering) < 0); + pqueue_push (sort->pqueue, c, sort->run_id + (next_run ? 1 : 0)); } - -/* A case and its index. */ -struct indexed_case - { - struct ccase c; /* Case. */ - unsigned long idx; /* Index to allow for stable sorting. */ - }; -static int compare_indexed_cases (const void *, const void *, const void *); +static void +sort_casewriter_destroy (struct casewriter *writer UNUSED, void *sort_) +{ + struct sort_writer *sort = sort_; + + case_ordering_destroy (sort->ordering); + merge_destroy (sort->merge); + pqueue_destroy (sort->pqueue); + casewriter_destroy (sort->run); + case_destroy (&sort->run_end); + free (sort); +} -/* If the data is in memory, do an internal sort and return a new - casefile for the data. Otherwise, return a null pointer. */ -static struct casefile * -do_internal_sort (struct casereader *reader, - const struct sort_criteria *criteria, - struct casefile_factory *factory) +static struct casereader * +sort_casewriter_convert_to_reader (struct casewriter *writer, void *sort_) { - const struct casefile *src; - struct casefile *dst; - unsigned long case_cnt; - - if (!allow_internal_sort) - return NULL; - - src = casereader_get_casefile (reader); - if (casefile_get_case_cnt (src) > 1 && !casefile_in_core (src)) - return NULL; - - case_cnt = casefile_get_case_cnt (src); - dst = factory->create_casefile (factory, casefile_get_value_cnt (src)); - if (case_cnt != 0) + struct sort_writer *sort = sort_; + struct casereader *output; + + if (sort->run == NULL && sort->run_id == 0) { - struct indexed_case *cases = nmalloc (sizeof *cases, case_cnt); - if (cases != NULL) - { - unsigned long i; - - for (i = 0; i < case_cnt; i++) - { - bool ok = casereader_read_xfer (reader, &cases[i].c); - if (!ok) - NOT_REACHED (); - cases[i].idx = i; - } - - sort (cases, case_cnt, sizeof *cases, compare_indexed_cases, - (void *) criteria); - - for (i = 0; i < case_cnt; i++) - casefile_append_xfer (dst, &cases[i].c); - if (casefile_error (dst)) - NOT_REACHED (); - - free (cases); - } - else - { - /* Failure. */ - casefile_destroy (dst); - dst = NULL; - } + /* In-core sort. */ + sort->run = mem_writer_create (case_ordering_get_value_cnt ( + sort->ordering)); + sort->run_id = 1; } + while (!pqueue_is_empty (sort->pqueue)) + output_record (sort); - return dst; -} + merge_append (sort->merge, casewriter_make_reader (sort->run)); + sort->run = NULL; -/* Compares the variables specified by CRITERIA between the cases - at A and B, with a "last resort" comparison for stability, and - returns a strcmp()-type result. */ -static int -compare_indexed_cases (const void *a_, const void *b_, const void *criteria_) -{ - const struct sort_criteria *criteria = criteria_; - const struct indexed_case *a = a_; - const struct indexed_case *b = b_; - int result = compare_record (&a->c, &b->c, criteria); - if (result == 0) - result = a->idx < b->idx ? -1 : a->idx > b->idx; - return result; + output = merge_make_reader (sort->merge); + sort_casewriter_destroy (writer, sort); + return output; } - -/* External sort. */ -/* Maximum order of merge (external sort only). The maximum - reasonable value is about 7. Above that, it would be a good - idea to use a heap in merge_once() to select the minimum. */ -#define MAX_MERGE_ORDER 7 +static void +output_record (struct sort_writer *sort) +{ + struct ccase min_case; + casenumber min_run_id; -/* Results of an external sort. */ -struct external_sort - { - const struct sort_criteria *criteria; /* Sort criteria. */ - size_t value_cnt; /* Size of data in `union value's. */ - struct casefile **runs; /* Array of initial runs. */ - size_t run_cnt, run_cap; /* Number of runs, allocated capacity. */ - struct casefile_factory *factory; /* Factory used to create the result */ - }; + pqueue_pop (sort->pqueue, &min_case, &min_run_id); +#if 0 + printf ("\toutput: %f to run %d\n", case_num_idx (&min_case, 0), min_run_id); +#endif -/* Prototypes for helper functions. */ -static int write_runs (struct external_sort *, struct casereader *); -static struct casefile *merge (struct external_sort *); -static void destroy_external_sort (struct external_sort *); - -/* Performs a stable external sort of the active file according - to the specification in SCP. Forms initial runs using a heap - as a reservoir. Merges the initial runs according to a - pattern that assures stability. */ -static struct casefile * -do_external_sort (struct casereader *reader, - const struct sort_criteria *criteria, - struct casefile_factory *factory - ) -{ - struct external_sort *xsrt; - - if (!casefile_to_disk (casereader_get_casefile (reader))) - return NULL; - - xsrt = xmalloc (sizeof *xsrt); - xsrt->criteria = criteria; - xsrt->value_cnt = casefile_get_value_cnt (casereader_get_casefile (reader)); - xsrt->run_cap = 512; - xsrt->run_cnt = 0; - xsrt->runs = xnmalloc (xsrt->run_cap, sizeof *xsrt->runs); - xsrt->factory = factory; - if (write_runs (xsrt, reader)) + if (sort->run_id != min_run_id && sort->run != NULL) { - struct casefile *output = merge (xsrt); - destroy_external_sort (xsrt); - return output; + merge_append (sort->merge, casewriter_make_reader (sort->run)); + sort->run = NULL; } - else + if (sort->run == NULL) { - destroy_external_sort (xsrt); - return NULL; + sort->run = tmpfile_writer_create (case_ordering_get_value_cnt ( + sort->ordering)); + sort->run_id = min_run_id; } + + case_destroy (&sort->run_end); + case_clone (&sort->run_end, &min_case); + + casewriter_write (sort->run, &min_case); } -/* Destroys XSRT. */ -static void -destroy_external_sort (struct external_sort *xsrt) +static struct casewriter_class sort_casewriter_class = + { + sort_casewriter_write, + sort_casewriter_destroy, + sort_casewriter_convert_to_reader, + }; + +/* Reads all the cases from INPUT. Sorts the cases according to + ORDERING. Returns the sorted cases in a new casereader, or a + null pointer if an I/O error occurs. Both INPUT and ORDERING + are destroyed upon return, regardless of success. */ +struct casereader * +sort_execute (struct casereader *input, struct case_ordering *ordering) { - if (xsrt != NULL) - { - int i; - - for (i = 0; i < xsrt->run_cnt; i++) - casefile_destroy (xsrt->runs[i]); - free (xsrt->runs); - free (xsrt); - } + struct casewriter *output = sort_create_writer (ordering); + casereader_transfer (input, output); + return casewriter_make_reader (output); } -/* Replacement selection. */ - -/* Pairs a record with a run number. */ -struct record_run +struct pqueue { - int run; /* Run number of case. */ - struct ccase record; /* Case data. */ - size_t idx; /* Case number (for stability). */ + struct case_ordering *ordering; + struct pqueue_record *records; + size_t record_cnt; + size_t record_cap; + casenumber idx; }; -/* Represents a set of initial runs during an external sort. */ -struct initial_run_state +struct pqueue_record { - struct external_sort *xsrt; - - /* Reservoir. */ - struct record_run *records; /* Records arranged as a heap. */ - size_t record_cnt; /* Current number of records. */ - size_t record_cap; /* Capacity for records. */ - - /* Run currently being output. */ - int run; /* Run number. */ - size_t case_cnt; /* Number of cases so far. */ - struct casefile *casefile; /* Output file. */ - struct ccase last_output; /* Record last output. */ - - int okay; /* Zero if an error has been encountered. */ + casenumber id; + struct ccase c; + casenumber idx; }; -static bool destroy_initial_run_state (struct initial_run_state *); -static void process_case (struct initial_run_state *, - const struct ccase *, size_t); -static int allocate_cases (struct initial_run_state *); -static void output_record (struct initial_run_state *); -static void start_run (struct initial_run_state *); -static void end_run (struct initial_run_state *); -static int compare_record_run (const struct record_run *, - const struct record_run *, - const struct initial_run_state *); -static int compare_record_run_minheap (const void *, const void *, - const void *); - -/* Reads cases from READER and composes initial runs in XSRT. */ -static int -write_runs (struct external_sort *xsrt, struct casereader *reader) -{ - struct initial_run_state *irs; - struct ccase c; - size_t idx = 0; - int success = 0; - - /* Allocate memory for cases. */ - irs = xmalloc (sizeof *irs); - irs->xsrt = xsrt; - irs->records = NULL; - irs->record_cnt = irs->record_cap = 0; - irs->run = 0; - irs->case_cnt = 0; - irs->casefile = NULL; - case_nullify (&irs->last_output); - irs->okay = 1; - if (!allocate_cases (irs)) - goto done; - - /* Create initial runs. */ - start_run (irs); - for (; irs->okay && casereader_read (reader, &c); case_destroy (&c)) - process_case (irs, &c, idx++); - while (irs->okay && irs->record_cnt > 0) - output_record (irs); - end_run (irs); - - success = irs->okay; - - done: - if (!destroy_initial_run_state (irs)) - success = false; - - return success; -} - -/* Add a single case to an initial run. */ -static void -process_case (struct initial_run_state *irs, const struct ccase *c, - size_t idx) -{ - struct record_run *rr; - - /* Compose record_run for this run and add to heap. */ - assert (irs->record_cnt < irs->record_cap - 1); - rr = irs->records + irs->record_cnt++; - case_copy (&rr->record, 0, c, 0, irs->xsrt->value_cnt); - rr->run = irs->run; - rr->idx = idx; - if (!case_is_null (&irs->last_output) - && compare_record (c, &irs->last_output, irs->xsrt->criteria) < 0) - rr->run = irs->run + 1; - push_heap (irs->records, irs->record_cnt, sizeof *irs->records, - compare_record_run_minheap, irs); - - /* Output a record if the reservoir is full. */ - if (irs->record_cnt == irs->record_cap - 1 && irs->okay) - output_record (irs); -} +static int compare_pqueue_records_minheap (const void *a, const void *b, + const void *pq_); -/* Destroys the initial run state represented by IRS. - Returns true if successful, false if an I/O error occurred. */ -static bool -destroy_initial_run_state (struct initial_run_state *irs) +static struct pqueue * +pqueue_create (const struct case_ordering *ordering) { - int i; - bool ok = true; - - if (irs == NULL) - return true; - - for (i = 0; i < irs->record_cap; i++) - case_destroy (&irs->records[i].record); - free (irs->records); - - if (irs->casefile != NULL) - ok = casefile_sleep (irs->casefile); - - free (irs); - return ok; + struct pqueue *pq; + + pq = xmalloc (sizeof *pq); + pq->ordering = case_ordering_clone (ordering); + pq->record_cap + = get_workspace_cases (case_ordering_get_value_cnt (ordering)); + if (pq->record_cap > max_buffers) + pq->record_cap = max_buffers; + else if (pq->record_cap < min_buffers) + pq->record_cap = min_buffers; + pq->record_cnt = 0; + pq->records = xnmalloc (pq->record_cap, sizeof *pq->records); + pq->idx = 0; + + return pq; } -/* Allocates room for lots of cases as a buffer. */ -static int -allocate_cases (struct initial_run_state *irs) -{ - int approx_case_cost; /* Approximate memory cost of one case in bytes. */ - int max_cases; /* Maximum number of cases to allocate. */ - int i; - - /* Allocate as many cases as we can within the workspace - limit. */ - approx_case_cost = (sizeof *irs->records - + irs->xsrt->value_cnt * sizeof (union value) - + 4 * sizeof (void *)); - max_cases = get_workspace() / approx_case_cost; - if (max_cases > max_buffers) - max_cases = max_buffers; - irs->records = nmalloc (sizeof *irs->records, max_cases); - if (irs->records != NULL) - for (i = 0; i < max_cases; i++) - if (!case_try_create (&irs->records[i].record, irs->xsrt->value_cnt)) - { - max_cases = i; - break; - } - irs->record_cap = max_cases; - - /* Fail if we didn't allocate an acceptable number of cases. */ - if (irs->records == NULL || max_cases < min_buffers) - { - msg (SE, _("Out of memory. Could not allocate room for minimum of %d " - "cases of %d bytes each. (PSPP workspace is currently " - "restricted to a maximum of %lu KB.)"), - min_buffers, approx_case_cost, - (unsigned long int) (get_workspace() / 1024)); - return 0; - } - return 1; -} - -/* Compares the VAR_CNT variables in VARS[] between the `value's at - A and B, and returns a strcmp()-type result. */ -static int -compare_record (const struct ccase *a, const struct ccase *b, - const struct sort_criteria *criteria) +static void +pqueue_destroy (struct pqueue *pq) { - int i; - - assert (a != NULL); - assert (b != NULL); - - for (i = 0; i < criteria->crit_cnt; i++) + if (pq != NULL) { - const struct sort_criterion *c = &criteria->crits[i]; - int result; - - if (c->width == 0) + while (!pqueue_is_empty (pq)) { - double af = case_num_idx (a, c->fv); - double bf = case_num_idx (b, c->fv); - - result = af < bf ? -1 : af > bf; + struct ccase c; + casenumber id; + pqueue_pop (pq, &c, &id); + case_destroy (&c); } - else - result = memcmp (case_str_idx (a, c->fv), - case_str_idx (b, c->fv), c->width); - - if (result != 0) - return c->dir == SRT_ASCEND ? result : -result; + case_ordering_destroy (pq->ordering); + free (pq->records); + free (pq); } - - return 0; } -/* Compares record-run tuples A and B on run number first, then - on record, then on case index. */ -static int -compare_record_run (const struct record_run *a, - const struct record_run *b, - const struct initial_run_state *irs) +static bool +pqueue_is_full (const struct pqueue *pq) { - int result = a->run < b->run ? -1 : a->run > b->run; - if (result == 0) - result = compare_record (&a->record, &b->record, irs->xsrt->criteria); - if (result == 0) - result = a->idx < b->idx ? -1 : a->idx > b->idx; - return result; + return pq->record_cnt >= pq->record_cap; } -/* Compares record-run tuples A and B on run number first, then - on the current record according to SCP, but in descending - order. */ -static int -compare_record_run_minheap (const void *a, const void *b, const void *irs) +static bool +pqueue_is_empty (const struct pqueue *pq) { - return -compare_record_run (a, b, irs); + return pq->record_cnt == 0; } -/* Begins a new initial run, specifically its output file. */ static void -start_run (struct initial_run_state *irs) +pqueue_push (struct pqueue *pq, struct ccase *c, casenumber id) { - irs->run++; - irs->case_cnt = 0; - - /* This casefile is internal to the sort, so don't use the factory - to create it. */ - irs->casefile = fastfile_create (irs->xsrt->value_cnt); - casefile_to_disk (irs->casefile); - case_nullify (&irs->last_output); -} + struct pqueue_record *r; + + assert (!pqueue_is_full (pq)); -/* Ends the current initial run. */ -static void -end_run (struct initial_run_state *irs) -{ - struct external_sort *xsrt = irs->xsrt; + r = &pq->records[pq->record_cnt++]; + r->id = id; + case_move (&r->c, c); + r->idx = pq->idx++; - /* Record initial run. */ - if (irs->casefile != NULL) - { - casefile_sleep (irs->casefile); - if (xsrt->run_cnt >= xsrt->run_cap) - { - xsrt->run_cap *= 2; - xsrt->runs = xnrealloc (xsrt->runs, - xsrt->run_cap, sizeof *xsrt->runs); - } - xsrt->runs[xsrt->run_cnt++] = irs->casefile; - if (casefile_error (irs->casefile)) - irs->okay = false; - irs->casefile = NULL; - } + push_heap (pq->records, pq->record_cnt, sizeof *pq->records, + compare_pqueue_records_minheap, pq); } -/* Writes a record to the current initial run. */ static void -output_record (struct initial_run_state *irs) +pqueue_pop (struct pqueue *pq, struct ccase *c, casenumber *id) { - struct record_run *record_run; - struct ccase case_tmp; - - /* Extract minimum case from heap. */ - assert (irs->record_cnt > 0); - pop_heap (irs->records, irs->record_cnt--, sizeof *irs->records, - compare_record_run_minheap, irs); - record_run = irs->records + irs->record_cnt; - - /* Bail if an error has occurred. */ - if (!irs->okay) - return; - - /* Start new run if necessary. */ - assert (record_run->run == irs->run - || record_run->run == irs->run + 1); - if (record_run->run != irs->run) - { - end_run (irs); - start_run (irs); - } - assert (record_run->run == irs->run); - irs->case_cnt++; + struct pqueue_record *r; - /* Write to disk. */ - if (irs->casefile != NULL) - casefile_append (irs->casefile, &record_run->record); - - /* This record becomes last_output. */ - irs->last_output = case_tmp = record_run->record; - record_run->record = irs->records[irs->record_cap - 1].record; - irs->records[irs->record_cap - 1].record = case_tmp; -} - -/* Merging. */ + assert (!pqueue_is_empty (pq)); -static int choose_merge (struct casefile *runs[], int run_cnt, int order); -static struct casefile *merge_once (struct external_sort *, - struct casefile *[], size_t); + pop_heap (pq->records, pq->record_cnt--, sizeof *pq->records, + compare_pqueue_records_minheap, pq); -/* Repeatedly merges run until only one is left, - and returns the final casefile. - Returns a null pointer if an I/O error occurs. */ -static struct casefile * -merge (struct external_sort *xsrt) -{ - while (xsrt->run_cnt > 1) - { - int order = MIN (MAX_MERGE_ORDER, xsrt->run_cnt); - int idx = choose_merge (xsrt->runs, xsrt->run_cnt, order); - xsrt->runs[idx] = merge_once (xsrt, xsrt->runs + idx, order); - remove_range (xsrt->runs, xsrt->run_cnt, sizeof *xsrt->runs, - idx + 1, order - 1); - xsrt->run_cnt -= order - 1; - - if (xsrt->runs[idx] == NULL) - return NULL; - } - assert (xsrt->run_cnt == 1); - xsrt->run_cnt = 0; - return xsrt->runs[0]; + r = &pq->records[pq->record_cnt]; + *id = r->id; + case_move (c, &r->c); } -/* Chooses ORDER runs out of the RUN_CNT runs in RUNS to merge, - and returns the index of the first one. - - For stability, we must merge only consecutive runs. For - efficiency, we choose the shortest consecutive sequence of - runs. */ +/* Compares record-run tuples A and B on id, then on case data, + then on insertion order, in descending order. */ static int -choose_merge (struct casefile *runs[], int run_cnt, int order) +compare_pqueue_records_minheap (const void *a_, const void *b_, + const void *pq_) { - int min_idx, min_sum; - int cur_idx, cur_sum; - int i; - - /* Sum up the length of the first ORDER runs. */ - cur_sum = 0; - for (i = 0; i < order; i++) - cur_sum += casefile_get_case_cnt (runs[i]); - - /* Find the shortest group of ORDER runs, - using a running total for efficiency. */ - min_idx = 0; - min_sum = cur_sum; - for (cur_idx = 1; cur_idx + order <= run_cnt; cur_idx++) - { - cur_sum -= casefile_get_case_cnt (runs[cur_idx - 1]); - cur_sum += casefile_get_case_cnt (runs[cur_idx + order - 1]); - if (cur_sum < min_sum) - { - min_sum = cur_sum; - min_idx = cur_idx; - } - } - - return min_idx; -} - -/* Merges the RUN_CNT initial runs specified in INPUT_FILES into a - new run, and returns the new run. - Returns a null pointer if an I/O error occurs. */ -static struct casefile * -merge_once (struct external_sort *xsrt, - struct casefile **const input_files, - size_t run_cnt) -{ - struct run - { - struct casefile *file; - struct casereader *reader; - struct ccase ccase; - } - *runs; - - struct casefile *output = NULL; - int i; - - /* Open input files. */ - runs = xnmalloc (run_cnt, sizeof *runs); - for (i = 0; i < run_cnt; i++) - { - struct run *r = &runs[i]; - r->file = input_files[i]; - r->reader = casefile_get_destructive_reader (r->file); - if (!casereader_read_xfer (r->reader, &r->ccase)) - { - run_cnt--; - i--; - } - } - - /* Create output file. */ - output = xsrt->factory->create_casefile (xsrt->factory, xsrt->value_cnt); - casefile_to_disk (output); - - /* Merge. */ - while (run_cnt > 0) - { - struct run *min_run, *run; - - /* Find minimum. */ - min_run = runs; - for (run = runs + 1; run < runs + run_cnt; run++) - if (compare_record (&run->ccase, &min_run->ccase, xsrt->criteria) < 0) - min_run = run; - - /* Write minimum to output file. */ - casefile_append_xfer (output, &min_run->ccase); - - /* Read another case from minimum run. */ - if (!casereader_read_xfer (min_run->reader, &min_run->ccase)) - { - if (casefile_error (min_run->file) || casefile_error (output)) - goto error; - casereader_destroy (min_run->reader); - casefile_destroy (min_run->file); - - remove_element (runs, run_cnt, sizeof *runs, min_run - runs); - run_cnt--; - } - } - - if (!casefile_sleep (output)) - goto error; - free (runs); - - return output; - - error: - for (i = 0; i < run_cnt; i++) - casefile_destroy (runs[i].file); - casefile_destroy (output); - free (runs); - return NULL; + const struct pqueue_record *a = a_; + const struct pqueue_record *b = b_; + const struct pqueue *pq = pq_; + int result = a->id < b->id ? -1 : a->id > b->id; + if (result == 0) + result = case_ordering_compare_cases (&a->c, &b->c, pq->ordering); + if (result == 0) + result = a->idx < b->idx ? -1 : a->idx > b->idx; + return -result; } diff --git a/src/math/sort.h b/src/math/sort.h index c6f86e9f..0a0fe4ce 100644 --- a/src/math/sort.h +++ b/src/math/sort.h @@ -1,5 +1,5 @@ /* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -16,57 +16,18 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#if !sort_h -#define sort_h 1 +#ifndef MATH_SORT_H +#define MATH_SORT_H 1 #include #include -struct casereader; -struct dictionary; -struct variable; -struct casefile_factory; +struct case_ordering; extern int min_buffers ; extern int max_buffers ; -extern bool allow_internal_sort ; +struct casewriter *sort_create_writer (struct case_ordering *); +struct casereader *sort_execute (struct casereader *, struct case_ordering *); -/* Sort direction. */ -enum sort_direction - { - SRT_ASCEND, /* A, B, C, ..., X, Y, Z. */ - SRT_DESCEND /* Z, Y, X, ..., C, B, A. */ - }; - -/* A sort criterion. */ -struct sort_criterion - { - int fv; /* Variable data index. */ - int width; /* 0=numeric, otherwise string width. */ - enum sort_direction dir; /* Sort direction. */ - }; - -/* A set of sort criteria. */ -struct sort_criteria - { - struct sort_criterion *crits; - size_t crit_cnt; - }; - - -void sort_destroy_criteria (struct sort_criteria *); - -struct casefile *sort_execute (struct casereader *, - const struct sort_criteria *, - struct casefile_factory * - ); - -struct dataset ; -bool sort_active_file_in_place (struct dataset *ds, - const struct sort_criteria *); - -struct casefile *sort_active_file_to_casefile (struct dataset *ds, - const struct sort_criteria *); - -#endif /* !sort_h */ +#endif /* math/sort.h */ diff --git a/src/ui/ChangeLog b/src/ui/ChangeLog index 3f59ee01..38443f8e 100644 --- a/src/ui/ChangeLog +++ b/src/ui/ChangeLog @@ -1,3 +1,13 @@ +2007-06-06 Ben Pfaff + + Adapt case sources, sinks, and clients of procedure code to the + new infrastructure. + + * automake.mk: Remove files. + + * flexifile.c: Removed, dead code. + * flexifile.h: Ditto. + Thu Feb 8 06:34:52 2007 Ben Pfaff * [!(HAVE_SYS_TYPES_H && HAVE_SYS_WAIT_H)] (connect_debugger) In diff --git a/src/ui/automake.mk b/src/ui/automake.mk index f2d56bad..e1915b28 100644 --- a/src/ui/automake.mk +++ b/src/ui/automake.mk @@ -10,6 +10,4 @@ noinst_LIBRARIES += src/ui/libuicommon.a src_ui_libuicommon_a_SOURCES = \ src/ui/debugger.c \ - src/ui/debugger.h \ - src/ui/flexifile.c \ - src/ui/flexifile.h + src/ui/debugger.h diff --git a/src/ui/flexifile.c b/src/ui/flexifile.c deleted file mode 100644 index 339764e0..00000000 --- a/src/ui/flexifile.c +++ /dev/null @@ -1,409 +0,0 @@ -/* PSPP - computes sample statistics. - - Copyright (C) 2006, 2007 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#include -#include -#include -#include "flexifile.h" -#include -#include -#include -#include -#include - - -struct class_flexifile -{ - struct class_casefile parent; - - bool (*get_case) (const struct flexifile *, unsigned long, struct ccase *); - - bool (*insert_case) (struct flexifile *, struct ccase *, int ); - bool (*delete_cases) (struct flexifile *, int, int ); - - bool (*resize) (struct flexifile *, int, int ); -}; - -static const struct class_flexifile class; - -#define CLASS_FLEXIFILE(K) ((struct class_flexifile *) K) -#define CONST_CLASS_FLEXIFILE(K) ((const struct class_flexifile *) K) - - -/* A flexifile. */ -struct flexifile -{ - struct casefile cf; /* Parent */ - - size_t value_cnt; /* Case size in `union value's. */ - unsigned long case_cnt; /* Number of cases stored. */ - - - /* Memory storage. */ - struct ccase *cases; /* Pointer to array of cases. */ - unsigned long capacity; /* size of array in cases */ -}; - -struct class_flexifilereader -{ - struct class_casereader parent ; -}; - -static const struct class_flexifilereader class_reader; - -/* For reading out the cases in a flexifile. */ -struct flexifilereader -{ - struct casereader cr; /* Parent */ - - unsigned long case_idx; /* Case number of current case. */ - bool destructive; /* Is this a destructive reader? */ -}; - - - -#define CHUNK_SIZE 10 - -static bool -impl_get_case(const struct flexifile *ff, unsigned long casenum, - struct ccase *); -static bool -impl_insert_case (struct flexifile *ff, struct ccase *c, int posn); - -static bool -impl_delete_cases (struct flexifile *ff, int n_cases, int first); - -static bool -impl_resize (struct flexifile *ff, int n_values, int posn); - - -/* Gets a case, for which writing may not be safe */ -bool -flexifile_get_case(const struct flexifile *ff, unsigned long casenum, - struct ccase *c) -{ - const struct class_flexifile *class = - CONST_CLASS_FLEXIFILE (CONST_CASEFILE(ff)->class) ; - - return class->get_case(ff, casenum, c); -} - - -/* Insert N_VALUES before POSN. - If N_VALUES is negative, then deleted -N_VALUES instead -*/ -bool -flexifile_resize (struct flexifile *ff, int n_values, int posn) -{ - const struct class_flexifile *class = - CONST_CLASS_FLEXIFILE (CONST_CASEFILE(ff)->class) ; - - return class->resize(ff, n_values, posn); -} - - - -bool -flexifile_insert_case (struct flexifile *ff, struct ccase *c, int posn) -{ - const struct class_flexifile *class = - CONST_CLASS_FLEXIFILE (CONST_CASEFILE(ff)->class) ; - - return class->insert_case(ff, c, posn); -} - - -bool -flexifile_delete_cases (struct flexifile *ff, int n_cases, int first) -{ - const struct class_flexifile *class = - CONST_CLASS_FLEXIFILE (CONST_CASEFILE(ff)->class) ; - - return class->delete_cases (ff, n_cases, first); -} - - -static unsigned long -flexifile_get_case_cnt (const struct casefile *cf) -{ - return FLEXIFILE(cf)->case_cnt; -} - -static size_t -flexifile_get_value_cnt (const struct casefile *cf) -{ - return FLEXIFILE(cf)->value_cnt; -} - - -static void -flexifile_destroy (struct casefile *cf) -{ - int i ; - for ( i = 0 ; i < FLEXIFILE(cf)->case_cnt; ++i ) - case_destroy( &FLEXIFILE(cf)->cases[i]); - - free(FLEXIFILE(cf)->cases); -} - -static void -grow(struct flexifile *ff) -{ - ff->capacity += CHUNK_SIZE; - ff->cases = xrealloc(ff->cases, ff->capacity * sizeof ( *ff->cases) ); -} - -static bool -flexifile_append (struct casefile *cf, const struct ccase *c) -{ - struct flexifile *ff = FLEXIFILE(cf); - - if (ff->case_cnt >= ff->capacity) - grow(ff); - - case_clone (&ff->cases[ff->case_cnt++], c); - - return true; -} - -static unsigned long -flexifilereader_cnum (const struct casereader *cr) -{ - struct flexifilereader *ffr = FLEXIFILEREADER(cr); - - return ffr->case_idx; -} - -static struct ccase * -flexifilereader_get_next_case (struct casereader *cr) -{ - struct flexifilereader *ffr = FLEXIFILEREADER(cr); - struct flexifile *ff = FLEXIFILE(casereader_get_casefile(cr)); - - if ( ffr->case_idx >= ff->case_cnt) - return NULL; - - return &ff->cases[ffr->case_idx++]; -} - -static void -flexifilereader_destroy(struct casereader *r) -{ - free(r); -} - -static struct casereader * -flexifile_get_reader (const struct casefile *cf_) -{ - struct casefile *cf = (struct casefile *) cf_; - struct flexifilereader *ffr = xzalloc (sizeof *ffr); - struct casereader *reader = (struct casereader *) ffr; - - casereader_register (cf, reader, CLASS_CASEREADER(&class_reader)); - - return reader; -} - - -static struct casereader * -flexifilereader_clone (const struct casereader *cr) -{ - const struct flexifilereader *ffr = (const struct flexifilereader *) cr; - struct flexifilereader *new_ffr = xzalloc (sizeof *new_ffr); - struct casereader *new_reader = (struct casereader *) new_ffr; - struct casefile *cf = casereader_get_casefile (cr); - - casereader_register (cf, new_reader, CLASS_CASEREADER(&class_reader)); - - new_ffr->case_idx = ffr->case_idx ; - new_ffr->destructive = ffr->destructive ; - - return new_reader; -} - - -static bool -flexifile_in_core(const struct casefile *cf UNUSED) -{ - /* Always in memory */ - return true; -} - -static bool -flexifile_error (const struct casefile *cf UNUSED ) -{ - return false; -} - - -struct casefile * -flexifile_create (size_t value_cnt) -{ - struct flexifile *ff = xzalloc (sizeof *ff); - struct casefile *cf = (struct casefile *) ff; - - casefile_register (cf, (struct class_casefile *) &class); - - ff->value_cnt = value_cnt; - - ff->cases = xzalloc(sizeof (struct ccase *) * CHUNK_SIZE); - ff->capacity = CHUNK_SIZE; - - return cf; -} - -static const struct class_flexifile class = { - { - flexifile_destroy, - flexifile_error, - flexifile_get_value_cnt, - flexifile_get_case_cnt, - flexifile_get_reader, - flexifile_append, - - flexifile_in_core, - 0, /* to_disk */ - 0 /* sleep */ - }, - - impl_get_case , - impl_insert_case , - impl_delete_cases, - impl_resize, -}; - - -static const struct class_flexifilereader class_reader = - { - { - flexifilereader_get_next_case, - flexifilereader_cnum, - flexifilereader_destroy, - flexifilereader_clone - } - }; - - -/* Implementations of class methods */ - -static bool -impl_get_case(const struct flexifile *ff, unsigned long casenum, - struct ccase *c) -{ - if ( casenum >= ff->case_cnt) - return false; - - case_clone (c, &ff->cases[casenum]); - - return true; -} - -#if DEBUGGING -#include - -static void -dumpcasedata(struct ccase *c) -{ - size_t value_cnt = case_get_value_cnt (c); - int i; - for ( i = 0 ; i < value_cnt * MAX_SHORT_STRING; ++i ) - putchar (case_str (c, 0)[i]); - putchar('\n'); -} -#endif - -static bool -impl_resize (struct flexifile *ff, int n_values, int posn) -{ - int i; - - for( i = 0 ; i < ff->case_cnt ; ++i ) - { - struct ccase c; - case_create (&c, ff->value_cnt + n_values); - - case_copy (&c, 0, &ff->cases[i], 0, posn); - if ( n_values > 0 ) - memset (case_data_rw_idx(&c, posn), ' ', n_values * MAX_SHORT_STRING) ; - case_copy (&c, posn + n_values, - &ff->cases[i], posn, ff->value_cnt - posn); - - case_destroy (&ff->cases[i]); - ff->cases[i] = c; - } - - ff->value_cnt += n_values; - - return true; -} - -static bool -impl_insert_case (struct flexifile *ff, struct ccase *c, int posn) -{ - int i; - struct ccase blank; - - assert (ff); - - if ( posn > ff->case_cnt ) - return false; - - if ( posn >= ff->capacity ) - grow(ff); - - case_create(&blank, ff->value_cnt); - - flexifile_append(CASEFILE(ff), &blank); - - case_destroy(&blank); - - /* Shift the existing cases down one */ - for ( i = ff->case_cnt ; i > posn; --i) - case_move(&ff->cases[i], &ff->cases[i-1]); - - case_clone (&ff->cases[posn], c); - - return true; -} - - -static bool -impl_delete_cases (struct flexifile *ff, int n_cases, int first) -{ - int i; - - if ( ff->case_cnt < first + n_cases ) - return false; - - for ( i = first ; i < first + n_cases; ++i ) - case_destroy (&ff->cases[i]); - - /* Shift the cases up by N_CASES */ - for ( i = first; i < ff->case_cnt - n_cases; ++i ) - { - case_move (&ff->cases[i], &ff->cases[i+ n_cases]); - } - - ff->case_cnt -= n_cases; - - return true; -} - - - diff --git a/src/ui/flexifile.h b/src/ui/flexifile.h deleted file mode 100644 index d1dc6b40..00000000 --- a/src/ui/flexifile.h +++ /dev/null @@ -1,46 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#ifndef FLEXIFILE_H -#define FLEXIFILE_H - -#include -#include -#include - -struct ccase; -struct casefile; -struct casereader; -struct flexifile; -struct flexifilereader; - -#define FLEXIFILE(CF) ( (struct flexifile *) CF) -#define FLEXIFILEREADER(CR) ( (struct flexifilereader *) CR) - -struct casefile *flexifile_create (size_t value_cnt); - -bool flexifile_get_case(const struct flexifile *ff, unsigned long casenum, - struct ccase *const c); - -bool flexifile_resize (struct flexifile *ff, int n_values, int posn); - -bool flexifile_insert_case (struct flexifile *ff, struct ccase *c, int posn); -bool flexifile_delete_cases (struct flexifile *ff, int n_cases, int first); - - -#endif diff --git a/src/ui/gui/ChangeLog b/src/ui/gui/ChangeLog index 82ea6d63..4b2fc7c4 100644 --- a/src/ui/gui/ChangeLog +++ b/src/ui/gui/ChangeLog @@ -1,3 +1,19 @@ +2007-06-06 Ben Pfaff + + Adapt case sources, sinks, and clients of procedure code to the + new infrastructure. + + * automake.mk: Removed files. + + * flexifile-factory.c: Removed, dead code. + * flexifile-factory.h: Ditto. + + * helper.c: Adapt to new procedure and datasheet code. + * missing-val-dialog.c: Ditto. + * psppire-case-file.c: Ditto. + * psppire-data-store.c: Ditto. + * psppire.c: Ditto. + 2007-06-03 Ben Pfaff * psppire-var-store.c (psppire_var_store_item_editable): Use diff --git a/src/ui/gui/automake.mk b/src/ui/gui/automake.mk index 969ede19..d14f54a4 100644 --- a/src/ui/gui/automake.mk +++ b/src/ui/gui/automake.mk @@ -84,8 +84,6 @@ src_ui_gui_psppire_SOURCES = \ src/ui/gui/dialog-common.h \ src/ui/gui/dict-display.c \ src/ui/gui/dict-display.h \ - src/ui/gui/flexifile-factory.h \ - src/ui/gui/flexifile-factory.c \ src/ui/gui/main.c \ src/ui/gui/message-dialog.c \ src/ui/gui/message-dialog.h \ diff --git a/src/ui/gui/flexifile-factory.c b/src/ui/gui/flexifile-factory.c deleted file mode 100644 index cdb7d9d5..00000000 --- a/src/ui/gui/flexifile-factory.c +++ /dev/null @@ -1,59 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#include - -#include -#include -#include -#include "flexifile-factory.h" -#include -#include - - -struct flexifile_factory - { - struct casefile_factory parent; - }; - - -static struct casefile * -produce_flexifile (struct casefile_factory *this UNUSED, size_t value_cnt) -{ - struct casefile *ff = flexifile_create (value_cnt); - - return ff; -} - - -struct casefile_factory * -flexifile_factory_create (void) -{ - struct flexifile_factory *fact = xzalloc (sizeof (*fact)); - - fact->parent.create_casefile = produce_flexifile; - - return (struct casefile_factory *) fact; -} - - -void -flexifile_factory_destroy (struct casefile_factory *factory) -{ - free (factory); -} diff --git a/src/ui/gui/helper.c b/src/ui/gui/helper.c index 69b5d437..f8e3ddba 100644 --- a/src/ui/gui/helper.c +++ b/src/ui/gui/helper.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include @@ -171,7 +170,7 @@ execute_syntax (struct getl_interface *sss) { struct lexer *lexer; - g_return_val_if_fail (proc_has_source (the_dataset), FALSE); + g_return_val_if_fail (proc_has_active_file (the_dataset), FALSE); lexer = lex_create (the_source_stream); @@ -189,18 +188,10 @@ execute_syntax (struct getl_interface *sss) lex_destroy (lexer); - /* The GUI must *always* have a data source, even if it's an empty one. - Therefore, we find that there is none, (for example NEW FILE was the last - item in the syntax) then we create a new one. */ - if ( ! proc_has_source (the_dataset)) - proc_set_source (the_dataset, - storage_source_create (the_data_store->case_file->flexifile) - ); - /* GUI syntax needs this implicit EXECUTE command at the end of every script. Otherwise commands like GET could leave the GUI without a casefile. */ - return procedure (the_dataset, NULL, NULL); + return proc_execute (the_dataset); } diff --git a/src/ui/gui/missing-val-dialog.c b/src/ui/gui/missing-val-dialog.c index 63403d92..86f7d86f 100644 --- a/src/ui/gui/missing-val-dialog.c +++ b/src/ui/gui/missing-val-dialog.c @@ -90,8 +90,8 @@ missing_val_dialog_accept (GtkWidget *w, gpointer data) gint nvals = 0; gint badvals = 0; gint i; - mv_clear (&dialog->mvl); - for (i = 0 ; i < 3 ; ++i ) + mv_clear(&dialog->mvl); + for(i = 0 ; i < 3 ; ++i ) { gchar *text = g_strdup (gtk_entry_get_text (GTK_ENTRY (dialog->mv[i]))); diff --git a/src/ui/gui/psppire-case-file.c b/src/ui/gui/psppire-case-file.c index ec50b152..8a4f196d 100644 --- a/src/ui/gui/psppire-case-file.c +++ b/src/ui/gui/psppire-case-file.c @@ -26,13 +26,14 @@ #include #include -#include -#include "flexifile-factory.h" -#include #include +#include #include #include +#include "xalloc.h" +#include "xallocsa.h" + /* --- prototypes --- */ static void psppire_case_file_class_init (PsppireCaseFileClass *class); static void psppire_case_file_init (PsppireCaseFile *case_file); @@ -132,8 +133,7 @@ psppire_case_file_finalize (GObject *object) { PsppireCaseFile *cf = PSPPIRE_CASE_FILE (object); - if ( cf->flexifile) - casefile_destroy (cf->flexifile); + datasheet_destroy (cf->datasheet); G_OBJECT_CLASS (parent_class)->finalize (object); } @@ -141,7 +141,7 @@ psppire_case_file_finalize (GObject *object) static void psppire_case_file_init (PsppireCaseFile *cf) { - cf->flexifile = 0; + cf->datasheet = NULL; } @@ -156,16 +156,16 @@ psppire_case_file_new (void) { PsppireCaseFile *cf = g_object_new (G_TYPE_PSPPIRE_CASE_FILE, NULL); - cf->flexifile = flexifile_create (0); + cf->datasheet = datasheet_create (NULL); return cf; } void -psppire_case_file_replace_flexifile (PsppireCaseFile *cf, struct flexifile *ff) +psppire_case_file_replace_datasheet (PsppireCaseFile *cf, struct datasheet *ds) { - cf->flexifile = (struct casefile *) ff; + cf->datasheet = ds; } @@ -173,16 +173,14 @@ psppire_case_file_replace_flexifile (PsppireCaseFile *cf, struct flexifile *ff) gboolean psppire_case_file_delete_cases (PsppireCaseFile *cf, gint n_cases, gint first) { - int result; - g_return_val_if_fail (cf, FALSE); - g_return_val_if_fail (cf->flexifile, FALSE); + g_return_val_if_fail (cf->datasheet, FALSE); - result = flexifile_delete_cases (FLEXIFILE (cf->flexifile), n_cases, first); + datasheet_delete_rows (cf->datasheet, first, n_cases); g_signal_emit (cf, signals [CASES_DELETED], 0, n_cases, first); - return result; + return TRUE; } /* Insert case CC into the case file before POSN */ @@ -191,12 +189,14 @@ psppire_case_file_insert_case (PsppireCaseFile *cf, struct ccase *cc, gint posn) { + struct ccase tmp; bool result ; g_return_val_if_fail (cf, FALSE); - g_return_val_if_fail (cf->flexifile, FALSE); + g_return_val_if_fail (cf->datasheet, FALSE); - result = flexifile_insert_case (FLEXIFILE (cf->flexifile), cc, posn); + case_clone (&tmp, cc); + result = datasheet_insert_rows (cf->datasheet, posn, &tmp, 1); if ( result ) g_signal_emit (cf, signals [CASE_INSERTED], 0, posn); @@ -212,15 +212,17 @@ gboolean psppire_case_file_append_case (PsppireCaseFile *cf, struct ccase *c) { + struct ccase tmp; bool result ; gint posn ; g_return_val_if_fail (cf, FALSE); - g_return_val_if_fail (cf->flexifile, FALSE); + g_return_val_if_fail (cf->datasheet, FALSE); - posn = casefile_get_case_cnt (cf->flexifile); + posn = datasheet_get_row_cnt (cf->datasheet); - result = casefile_append (cf->flexifile, c); + case_clone (&tmp, c); + result = datasheet_insert_rows (cf->datasheet, posn, &tmp, 1); g_signal_emit (cf, signals [CASE_INSERTED], 0, posn); @@ -233,69 +235,68 @@ psppire_case_file_get_case_count (const PsppireCaseFile *cf) { g_return_val_if_fail (cf, FALSE); - if ( ! cf->flexifile) + if ( ! cf->datasheet) return 0; - return casefile_get_case_cnt (cf->flexifile); + return datasheet_get_row_cnt (cf->datasheet); } -/* Return the IDXth value from case CASENUM. - The return value must not be freed or written to - */ -const union value * -psppire_case_file_get_value (const PsppireCaseFile *cf, gint casenum, gint idx) +/* Copies the IDXth value from case CASENUM into VALUE. + If VALUE is null, then memory is allocated is allocated with + malloc. Returns the value if successful, NULL on failure. */ +union value * +psppire_case_file_get_value (const PsppireCaseFile *cf, + casenumber casenum, size_t idx, + union value *value, int width) { - const union value *v; - struct ccase c; - - g_return_val_if_fail (cf, NULL); - g_return_val_if_fail (cf->flexifile, NULL); - - g_return_val_if_fail (idx < casefile_get_value_cnt (cf->flexifile), NULL); + bool allocated; + + g_return_val_if_fail (cf, false); + g_return_val_if_fail (cf->datasheet, false); - flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, &c); + g_return_val_if_fail (idx < datasheet_get_column_cnt (cf->datasheet), false); - v = case_data_idx (&c, idx); - case_destroy (&c); - - return v; + if (value == NULL) + { + value = xnmalloc (value_cnt_from_width (width), sizeof *value); + allocated = true; + } + else + allocated = false; + if (!datasheet_get_value (cf->datasheet, casenum, idx, value, width)) + { + if (allocated) + free (value); + value = NULL; + } + return value; } void psppire_case_file_clear (PsppireCaseFile *cf) { - casefile_destroy (cf->flexifile); - cf->flexifile = 0; + datasheet_destroy (cf->datasheet); + cf->datasheet = NULL; g_signal_emit (cf, signals [CASES_DELETED], 0, 0, -1); } -/* Set the IDXth value of case C to SYSMIS/EMPTY */ +/* Set the IDXth value of case C to V. + Returns true if successful, false on I/O error. */ gboolean psppire_case_file_set_value (PsppireCaseFile *cf, gint casenum, gint idx, union value *v, gint width) { - struct ccase cc ; - int bytes; + bool ok; g_return_val_if_fail (cf, FALSE); - g_return_val_if_fail (cf->flexifile, FALSE); - - g_return_val_if_fail (idx < casefile_get_value_cnt (cf->flexifile), FALSE); + g_return_val_if_fail (cf->datasheet, FALSE); - if ( ! flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, &cc) ) - return FALSE; + g_return_val_if_fail (idx < datasheet_get_column_cnt (cf->datasheet), FALSE); - if ( width == 0 ) - bytes = MAX_SHORT_STRING; - else - bytes = DIV_RND_UP (width, MAX_SHORT_STRING) * MAX_SHORT_STRING ; - - /* Cast away const in flagrant abuse of the casefile */ - memcpy ((union value *)case_data_idx (&cc, idx), v, bytes); - - g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum); - - return TRUE; + ok = datasheet_put_value (cf->datasheet, casenum, idx, v, width); + if (ok) + g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum); + return ok; } @@ -305,49 +306,43 @@ gboolean psppire_case_file_data_in (PsppireCaseFile *cf, gint casenum, gint idx, struct substring input, const struct fmt_spec *fmt) { - struct ccase cc ; + union value *value; + int width; + bool ok; g_return_val_if_fail (cf, FALSE); - g_return_val_if_fail (cf->flexifile, FALSE); + g_return_val_if_fail (cf->datasheet, FALSE); - g_return_val_if_fail (idx < casefile_get_value_cnt (cf->flexifile), FALSE); + g_return_val_if_fail (idx < datasheet_get_column_cnt (cf->datasheet), FALSE); - if ( ! flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, &cc) ) - return FALSE; + width = fmt_var_width (fmt); + value = xallocsa (value_cnt_from_width (width) * sizeof *value); + ok = (datasheet_get_value (cf->datasheet, casenum, idx, value, width) + && data_in (input, fmt->type, 0, 0, value, width) + && datasheet_put_value (cf->datasheet, casenum, idx, value, width)); - /* Cast away const in flagrant abuse of the casefile */ - if (!data_in (input, fmt->type, 0, 0, - (union value *) case_data_idx (&cc, idx), fmt_var_width (fmt))) - g_warning ("Cant set value\n"); + if (ok) + g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum); - g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum); + freesa (value); return TRUE; } void -psppire_case_file_sort (PsppireCaseFile *cf, const struct sort_criteria *sc) +psppire_case_file_sort (PsppireCaseFile *cf, struct case_ordering *ordering) { + struct casereader *sorted_data; gint c; - struct casereader *reader = casefile_get_reader (cf->flexifile, NULL); - struct casefile *cfile; - - struct casefile_factory *factory = flexifile_factory_create (); - - cfile = sort_execute (reader, sc, factory); - - casefile_destroy (cf->flexifile); - - cf->flexifile = cfile; + sorted_data = sort_execute (datasheet_make_reader (cf->datasheet), ordering); + cf->datasheet = datasheet_create (sorted_data); /* FIXME: Need to have a signal to change a range of cases, instead of calling a signal many times */ - for ( c = 0 ; c < casefile_get_case_cnt (cf->flexifile) ; ++c ) + for ( c = 0 ; c < datasheet_get_row_cnt (cf->datasheet) ; ++c ) g_signal_emit (cf, signals [CASE_CHANGED], 0, c); - - flexifile_factory_destroy (factory); } @@ -357,16 +352,17 @@ gboolean psppire_case_file_insert_values (PsppireCaseFile *cf, gint n_values, gint before) { + union value *values; g_return_val_if_fail (cf, FALSE); - if ( ! cf->flexifile ) - { - cf->flexifile = flexifile_create (n_values); + if ( ! cf->datasheet ) + cf->datasheet = datasheet_create (NULL); - return TRUE; - } + values = xcalloc (n_values, sizeof *values); + datasheet_insert_columns (cf->datasheet, values, n_values, before); + free (values); - return flexifile_resize (FLEXIFILE (cf->flexifile), n_values, before); + return TRUE; } /* Fills C with the CASENUMth case. @@ -377,7 +373,7 @@ psppire_case_file_get_case (const PsppireCaseFile *cf, gint casenum, struct ccase *c) { g_return_val_if_fail (cf, FALSE); - g_return_val_if_fail (cf->flexifile, FALSE); + g_return_val_if_fail (cf->datasheet, FALSE); - return flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, c); + return datasheet_get_row (cf->datasheet, casenum, c); } diff --git a/src/ui/gui/psppire-case-file.h b/src/ui/gui/psppire-case-file.h index fd8af079..b3fdfcda 100644 --- a/src/ui/gui/psppire-case-file.h +++ b/src/ui/gui/psppire-case-file.h @@ -26,6 +26,7 @@ #include #include +#include @@ -55,7 +56,7 @@ struct _PsppireCaseFile { GObject parent; - struct casefile *flexifile; + struct datasheet *datasheet; }; @@ -75,8 +76,9 @@ gboolean psppire_case_file_insert_case (PsppireCaseFile *cf, struct ccase *c, gi gint psppire_case_file_get_case_count (const PsppireCaseFile *cf); -const union value * psppire_case_file_get_value (const PsppireCaseFile *cf, - gint c, gint idx); +union value * psppire_case_file_get_value (const PsppireCaseFile *cf, + casenumber, size_t idx, + union value *, int width); struct fmt_spec; @@ -95,14 +97,14 @@ gboolean psppire_case_file_delete_cases (PsppireCaseFile *cf, gint n_rows, gboolean psppire_case_file_insert_values (PsppireCaseFile *cf, gint n_values, gint before); -struct sort_criteria; -void psppire_case_file_sort (PsppireCaseFile *cf, const struct sort_criteria *); +struct case_ordering; +void psppire_case_file_sort (PsppireCaseFile *cf, struct case_ordering *); gboolean psppire_case_file_get_case (const PsppireCaseFile *cf, gint casenum, struct ccase *c); -void psppire_case_file_replace_flexifile (PsppireCaseFile *, - struct flexifile *); +void psppire_case_file_replace_datasheet (PsppireCaseFile *, + struct datasheet *); diff --git a/src/ui/gui/psppire-data-store.c b/src/ui/gui/psppire-data-store.c index 6af1b224..d0a34617 100644 --- a/src/ui/gui/psppire-data-store.c +++ b/src/ui/gui/psppire-data-store.c @@ -25,8 +25,8 @@ #define _(msgid) gettext (msgid) #define N_(msgid) msgid -#include -#include +#include +#include #include #include @@ -454,7 +454,7 @@ psppire_data_store_insert_new_case (PsppireDataStore *ds, gint posn) /* Opportunity for optimisation exists here when creating a blank case */ - val_cnt = casefile_get_value_cnt (ds->case_file->flexifile) ; + val_cnt = datasheet_get_column_cnt (ds->case_file->datasheet) ; case_create (&cc, val_cnt); @@ -484,7 +484,7 @@ psppire_data_store_get_string (const GSheetModel *model, gint row, gint column) char *text; const struct fmt_spec *fp ; const struct variable *pv ; - const union value *v ; + union value *v ; GString *s; PsppireDataStore *store = PSPPIRE_DATA_STORE (model); @@ -505,19 +505,19 @@ psppire_data_store_get_string (const GSheetModel *model, gint row, gint column) g_assert (idx >= 0); - v = psppire_case_file_get_value (store->case_file, row, idx); - + v = psppire_case_file_get_value (store->case_file, row, idx, NULL, + var_get_width (pv)); + g_return_val_if_fail (v, NULL); if ( store->show_labels) { - const struct val_labs * vl = var_get_value_labels (pv); - - const gchar *label; - if ( (label = val_labs_find (vl, *v)) ) - { + const gchar *label = var_lookup_value_label (pv, v); + if (label) + { + free (v); return pspp_locale_to_utf8 (label, -1, 0); - } + } } fp = var_get_write_format (pv); @@ -539,6 +539,7 @@ psppire_data_store_get_string (const GSheetModel *model, gint row, gint column) g_strchomp (text); + free (v); return text; } @@ -649,7 +650,7 @@ psppire_data_store_create_system_file (PsppireDataStore *store, 3 /* version */ }; - struct sfm_writer *writer ; + struct casewriter *writer; g_assert (handle); @@ -664,15 +665,10 @@ psppire_data_store_create_system_file (PsppireDataStore *store, for (i = 0 ; i < psppire_case_file_get_case_count (store->case_file); ++i ) { struct ccase c; - - case_create (&c, var_cnt); psppire_case_file_get_case (store->case_file, i, &c); - sfm_write_case (writer, &c); - - case_destroy (&c); + casewriter_write (writer, &c); } - - sfm_close_writer (writer); + casewriter_destroy (writer); } diff --git a/src/ui/gui/psppire.c b/src/ui/gui/psppire.c index c575534c..d636ab49 100644 --- a/src/ui/gui/psppire.c +++ b/src/ui/gui/psppire.c @@ -29,16 +29,15 @@ #include "psppire.h" +#include +#include #include #include -#include -#include #include #include #include #include #include -#include #include #include @@ -50,7 +49,6 @@ #include "data-sheet.h" #include "var-sheet.h" #include "message-dialog.h" -#include "flexifile-factory.h" PsppireDataStore *the_data_store = 0; PsppireVarStore *the_var_store = 0; @@ -68,28 +66,17 @@ replace_dictionary (struct dictionary *d) static void -replace_flexifile (struct case_source *s) +replace_casereader (struct casereader *s) { - if ( NULL == s ) - psppire_case_file_replace_flexifile (the_data_store->case_file, - (struct flexifile *) flexifile_create (0)); - else - { - if ( ! case_source_is_class (s, &storage_source_class)) - return ; - - psppire_case_file_replace_flexifile (the_data_store->case_file, - (struct flexifile *) - storage_source_get_casefile (s)); - } -} - + struct datasheet *datasheet = datasheet_create (s); + psppire_case_file_replace_datasheet (the_data_store->case_file, + datasheet); +} void initialize (void) { - struct casefile_factory *factory; PsppireDict *dictionary = 0; /* gtk_init messes with the locale. @@ -105,14 +92,12 @@ initialize (void) fmt_init (); settings_init (); fh_init (); - factory = flexifile_factory_create (); the_source_stream = create_source_stream ( fn_getenv_default ("STAT_INCLUDE_PATH", include_path) ); - the_dataset = create_dataset (factory, - replace_flexifile, + the_dataset = create_dataset (replace_casereader, replace_dictionary); message_dialog_init (the_source_stream); @@ -127,12 +112,12 @@ initialize (void) /* Create the model for the var_sheet */ the_var_store = psppire_var_store_new (dictionary); - the_data_store = psppire_data_store_new (dictionary); - proc_set_source (the_dataset, - storage_source_create (the_data_store->case_file->flexifile) - ); + + proc_set_active_file_data (the_dataset, + datasheet_make_reader (the_data_store->case_file->datasheet)); + create_icon_factory (); diff --git a/src/ui/gui/val-labs-dialog.h b/src/ui/gui/val-labs-dialog.h index e11b4330..404b7a5a 100644 --- a/src/ui/gui/val-labs-dialog.h +++ b/src/ui/gui/val-labs-dialog.h @@ -29,6 +29,7 @@ #include #include +#include struct val_labs; diff --git a/src/ui/terminal/ChangeLog b/src/ui/terminal/ChangeLog index ad09bff3..10d7d365 100644 --- a/src/ui/terminal/ChangeLog +++ b/src/ui/terminal/ChangeLog @@ -1,3 +1,10 @@ +2007-06-06 Ben Pfaff + + Adapt case sources, sinks, and clients of procedure code to the + new infrastructure. + + * main.c: No need for fastfile_factory any more. + 2007-02-25 Ben Pfaff Thanks to Jason Stover for verifying that this patch helps under diff --git a/src/ui/terminal/main.c b/src/ui/terminal/main.c index b95ae2e3..1a1c436e 100644 --- a/src/ui/terminal/main.c +++ b/src/ui/terminal/main.c @@ -27,7 +27,6 @@ #include "progname.h" #include "read-line.h" -#include #include #include #include @@ -86,7 +85,6 @@ static struct source_stream *the_source_stream ; int main (int argc, char **argv) { - struct casefile_factory *factory; signal (SIGABRT, bug_handler); signal (SIGSEGV, bug_handler); signal (SIGFPE, bug_handler); @@ -111,9 +109,7 @@ main (int argc, char **argv) settings_init (); random_init (); - factory = fastfile_factory_create (); - - the_dataset = create_dataset (factory, NULL, NULL); + the_dataset = create_dataset (NULL, NULL); if (parse_command_line (argc, argv, the_source_stream)) { diff --git a/tests/ChangeLog b/tests/ChangeLog index 4bec5f1d..0f0d2c89 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,9 @@ +2007-06-06 Ben Pfaff + + * automake.mk: Remove test. + + * tests/xforms/casefile.sh: Removed test. + 2007-06-06 Ben Pfaff * automake.mk: Add new test. diff --git a/tests/automake.mk b/tests/automake.mk index 66a67ed7..aead28ae 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -118,7 +118,6 @@ dist_TESTS = \ tests/bugs/temp-freq.sh \ tests/bugs/print-crash.sh \ tests/bugs/keep-all.sh \ - tests/xforms/casefile.sh \ tests/xforms/recode.sh \ tests/stats/descript-basic.sh \ tests/stats/descript-missing.sh \ diff --git a/tests/xforms/casefile.sh b/tests/xforms/casefile.sh deleted file mode 100755 index c3548572..00000000 --- a/tests/xforms/casefile.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/sh - -# This program tests casefiles by running DEBUG CASEFILE. - -TEMPDIR=/tmp/pspp-tst-$$ - -# ensure that top_builddir are absolute -if [ -z "$top_builddir" ] ; then top_builddir=. ; fi -if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi -top_builddir=`cd $top_builddir; pwd` -PSPP=$top_builddir/src/ui/terminal/pspp - -# ensure that top_srcdir is absolute -top_srcdir=`cd $top_srcdir; pwd` - -STAT_CONFIG_PATH=$top_srcdir/config -export STAT_CONFIG_PATH - - -cleanup() -{ - cd / - rm -rf $TEMPDIR -} - - -fail() -{ - echo $activity - echo FAILED - cleanup; - exit 1; -} - - -no_result() -{ - echo $activity - echo NO RESULT; - cleanup; - exit 2; -} - -pass() -{ - cleanup; - exit 0; -} - -mkdir -p $TEMPDIR - -cd $TEMPDIR - -activity="create program" -cat > $TEMPDIR/casefile.stat < $TEMPDIR/casefile.out -if [ $? -ne 0 ] ; then no_result ; fi - -activity="compare results" -perl -pi -e 's/^\s*$//g' $TEMPDIR/casefile.out -diff -b $TEMPDIR/casefile.out - <