+2007-06-06 Ben Pfaff <blp@gnu.org>
+
+ * Smake: Add xallocsa to modules.
+
2007-04-22 Ben Pfaff <blp@gnu.org>
Implement model checker for testing purposes.
vsnprintf \
xalloc \
xalloc-die \
+ xallocsa \
xsize \
xstrndup \
xvasprintf
+2007-06-06 Ben Pfaff <blp@gnu.org>
+
+ Actually implement the new procedure code and adapt all of its
+ clients to match. Also adapt all of the other case sources and
+ sinks in the tree and their clients to use the
+ casereader/casewriter infrastructure.
+
+ * automake.mk: Add and remove files.
+
+ * any-reader.c: Change into a casereader.
+ * por-file-reader.c: Ditto.
+ * scratch-reader.c: Ditto.
+ * sys-file-reader.c: Ditto.
+
+ * any-writer.c: Change into a casewriter.
+ * por-file-writer.c: Ditto.
+ * scratch-writer.c: Ditto.
+ * sys-file-writer.c: Ditto.
+
+ * procedure.c: Change to use casereader, casewriter, caseinit, and
+ other new infrastructure.
+
+ * scratch-handle.c: Adapt to new infrastructure.
+
+ * case-sink.c: Removed, now dead code.
+ * case-sink.h: Ditto.
+ * case-source.c: Ditto.
+ * case-source.h: Ditto.
+ * casefile-factory.c: Ditto.
+ * casefile-private.h: Ditto.
+ * casefile.c: Ditto.
+ * casefile.h: Ditto.
+ * casefilter.c: Ditto.
+ * casefilter.h: Ditto.
+ * fastfile.c: Ditto.
+ * fastfile.h: Ditto.
+ * fastfile-factory.c: Ditto.
+ * fastfile-factory.h: Ditto.
+ * storage-stream.c: Ditto.
+ * storage-stream.h: Ditto.
+
2007-06-06 Ben Pfaff <blp@gnu.org>
Add datasheet code.
#include "gettext.h"
#define _(msgid) gettext (msgid)
-/* Type of file backing an any_reader. */
-enum any_reader_type
- {
- SYSTEM_FILE, /* System file. */
- PORTABLE_FILE, /* Portable file. */
- SCRATCH_FILE /* Scratch file. */
- };
-
-/* Reader for any type of case-structured file. */
-struct any_reader
- {
- enum any_reader_type type; /* Type of file. */
- void *private; /* Private data. */
- };
-
/* Result of type detection. */
enum detect_result
{
return is_type ? YES : NO;
}
-/* If PRIVATE is non-null, creates and returns a new any_reader,
- initializing its fields to TYPE and PRIVATE. If PRIVATE is a
- null pointer, just returns a null pointer. */
-static struct any_reader *
-make_any_reader (enum any_reader_type type, void *private)
-{
- if (private != NULL)
- {
- struct any_reader *reader = xmalloc (sizeof *reader);
- reader->type = type;
- reader->private = private;
- return reader;
- }
- else
- return NULL;
-}
-
-/* Creates an any_reader for HANDLE. On success, returns the new
- any_reader and stores the file's dictionary into *DICT. On
+/* Returns a casereader for HANDLE. On success, returns the new
+ casereader and stores the file's dictionary into *DICT. On
failure, returns a null pointer. */
-struct any_reader *
+struct casereader *
any_reader_open (struct file_handle *handle, struct dictionary **dict)
{
switch (fh_get_referent (handle))
if (result == IO_ERROR)
return NULL;
else if (result == YES)
- return make_any_reader (SYSTEM_FILE,
- sfm_open_reader (handle, dict, NULL));
+ return sfm_open_reader (handle, dict, NULL);
result = try_detect (handle, pfm_detect);
if (result == IO_ERROR)
return NULL;
else if (result == YES)
- return make_any_reader (PORTABLE_FILE,
- pfm_open_reader (handle, dict, NULL));
+ return pfm_open_reader (handle, dict, NULL);
msg (SE, _("\"%s\" is not a system or portable file."),
fh_get_file_name (handle));
return NULL;
case FH_REF_SCRATCH:
- return make_any_reader (SCRATCH_FILE,
- scratch_reader_open (handle, dict));
- }
- NOT_REACHED ();
-}
-
-/* Reads a single case from READER into C.
- Returns true if successful, false at end of file or on error. */
-bool
-any_reader_read (struct any_reader *reader, struct ccase *c)
-{
- switch (reader->type)
- {
- case SYSTEM_FILE:
- return sfm_read_case (reader->private, c);
-
- case PORTABLE_FILE:
- return pfm_read_case (reader->private, c);
-
- case SCRATCH_FILE:
- return scratch_reader_read_case (reader->private, c);
+ return scratch_reader_open (handle, dict);
}
NOT_REACHED ();
}
-
-/* Returns true if an I/O error has occurred on READER, false
- otherwise. */
-bool
-any_reader_error (struct any_reader *reader)
-{
- switch (reader->type)
- {
- case SYSTEM_FILE:
- return sfm_read_error (reader->private);
-
- case PORTABLE_FILE:
- return pfm_read_error (reader->private);
-
- case SCRATCH_FILE:
- return scratch_reader_error (reader->private);
- }
- NOT_REACHED ();
-}
-
-/* Closes READER. */
-void
-any_reader_close (struct any_reader *reader)
-{
- if (reader == NULL)
- return;
-
- switch (reader->type)
- {
- case SYSTEM_FILE:
- sfm_close_reader (reader->private);
- break;
-
- case PORTABLE_FILE:
- pfm_close_reader (reader->private);
- break;
-
- case SCRATCH_FILE:
- scratch_reader_close (reader->private);
- break;
-
- default:
- NOT_REACHED ();
- }
-
- free (reader);
-}
struct file_handle;
struct dictionary;
-struct ccase;
-struct any_reader *any_reader_open (struct file_handle *,
+struct casereader *any_reader_open (struct file_handle *,
struct dictionary **);
-bool any_reader_read (struct any_reader *, struct ccase *);
-bool any_reader_error (struct any_reader *);
-void any_reader_close (struct any_reader *);
#endif /* any-reader.h */
#include "gettext.h"
#define _(msgid) gettext (msgid)
-/* Type of file backing an any_writer. */
-enum any_writer_type
- {
- SYSTEM_FILE, /* System file. */
- PORTABLE_FILE, /* Portable file. */
- SCRATCH_FILE /* Scratch file. */
- };
-
-/* Writer for any type of case-structured file. */
-struct any_writer
- {
- enum any_writer_type type; /* Type of file. */
- void *private; /* Private data. */
- };
-
/* Creates and returns a writer for HANDLE with the given DICT. */
-struct any_writer *
+struct casewriter *
any_writer_open (struct file_handle *handle, struct dictionary *dict)
{
switch (fh_get_referent (handle))
{
case FH_REF_FILE:
{
- struct any_writer *writer;
+ struct casewriter *writer;
char *extension;
extension = fn_extension (fh_get_file_name (handle));
str_lowercase (extension);
if (!strcmp (extension, ".por"))
- writer = any_writer_from_pfm_writer (
- pfm_open_writer (handle, dict, pfm_writer_default_options ()));
+ writer = pfm_open_writer (handle, dict,
+ pfm_writer_default_options ());
else
- writer = any_writer_from_sfm_writer (
- sfm_open_writer (handle, dict, sfm_writer_default_options ()));
+ writer = sfm_open_writer (handle, dict,
+ sfm_writer_default_options ());
free (extension);
return writer;
return NULL;
case FH_REF_SCRATCH:
- return any_writer_from_scratch_writer (scratch_writer_open (handle,
- dict));
+ return scratch_writer_open (handle, dict);
}
NOT_REACHED ();
}
-
-/* If PRIVATE is non-null, creates and returns a new any_writer,
- initializing its fields to TYPE and PRIVATE. If PRIVATE is a
- null pointer, just returns a null pointer. */
-static struct any_writer *
-make_any_writer (enum any_writer_type type, void *private)
-{
- if (private != NULL)
- {
- struct any_writer *writer = xmalloc (sizeof *writer);
- writer->type = type;
- writer->private = private;
- return writer;
- }
- else
- return NULL;
-}
-
-/* If SFM_WRITER is non-null, encapsulates SFM_WRITER in an
- any_writer and returns it. If SFM_WRITER is null, just
- returns a null pointer.
-
- Useful when you need to pass options to sfm_open_writer().
- Typical usage:
- any_writer_from_sfm_writer (sfm_open_writer (fh, dict, opts))
- If you don't need to pass options, then any_writer_open() by
- itself is easier and more straightforward. */
-struct any_writer *
-any_writer_from_sfm_writer (struct sfm_writer *sfm_writer)
-{
- return make_any_writer (SYSTEM_FILE, sfm_writer);
-}
-
-/* If PFM_WRITER is non-null, encapsulates PFM_WRITER in an
- any_writer and returns it. If PFM_WRITER is null, just
- returns a null pointer.
-
- Useful when you need to pass options to pfm_open_writer().
- Typical usage:
- any_writer_from_pfm_writer (pfm_open_writer (fh, dict, opts))
- If you don't need to pass options, then any_writer_open() by
- itself is easier and more straightforward. */
-struct any_writer *
-any_writer_from_pfm_writer (struct pfm_writer *pfm_writer)
-{
- return make_any_writer (PORTABLE_FILE, pfm_writer);
-}
-
-/* If SCRATCH_WRITER is non-null, encapsulates SCRATCH_WRITER in
- an any_writer and returns it. If SCRATCH_WRITER is null, just
- returns a null pointer.
-
- Not particularly useful. Included just for consistency. */
-struct any_writer *
-any_writer_from_scratch_writer (struct scratch_writer *scratch_writer)
-{
- return make_any_writer (SCRATCH_FILE, scratch_writer);
-}
-
-/* Writes cases C to WRITER.
- Returns true if successful, false on failure. */
-bool
-any_writer_write (struct any_writer *writer, const struct ccase *c)
-{
- switch (writer->type)
- {
- case SYSTEM_FILE:
- return sfm_write_case (writer->private, c);
-
- case PORTABLE_FILE:
- return pfm_write_case (writer->private, c);
-
- case SCRATCH_FILE:
- return scratch_writer_write_case (writer->private, c);
- }
- NOT_REACHED ();
-}
-
-/* Returns true if an I/O error has occurred on WRITER, false
- otherwise. */
-bool
-any_writer_error (const struct any_writer *writer)
-{
- switch (writer->type)
- {
- case SYSTEM_FILE:
- return sfm_write_error (writer->private);
-
- case PORTABLE_FILE:
- return pfm_write_error (writer->private);
-
- case SCRATCH_FILE:
- return scratch_writer_error (writer->private);
- }
- NOT_REACHED ();
-}
-
-/* Closes WRITER.
- Returns true if successful, false if an I/O error occurred. */
-bool
-any_writer_close (struct any_writer *writer)
-{
- bool ok;
-
- if (writer == NULL)
- return true;
-
- switch (writer->type)
- {
- case SYSTEM_FILE:
- ok = sfm_close_writer (writer->private);
- break;
-
- case PORTABLE_FILE:
- ok = pfm_close_writer (writer->private);
- break;
-
- case SCRATCH_FILE:
- ok = scratch_writer_close (writer->private);
- break;
-
- default:
- NOT_REACHED ();
- }
-
- free (writer);
- return ok;
-}
struct file_handle;
struct dictionary;
-struct ccase;
-struct sfm_writer;
-struct pfm_writer;
-struct scratch_writer;
-
-struct any_writer *any_writer_open (struct file_handle *, struct dictionary *);
-struct any_writer *any_writer_from_sfm_writer (struct sfm_writer *);
-struct any_writer *any_writer_from_pfm_writer (struct pfm_writer *);
-struct any_writer *any_writer_from_scratch_writer (struct scratch_writer *);
-
-bool any_writer_write (struct any_writer *, const struct ccase *);
-bool any_writer_error (const struct any_writer *);
-bool any_writer_close (struct any_writer *);
+
+struct casewriter *any_writer_open (struct file_handle *, struct dictionary *);
#endif /* any-writer.h */
src/data/calendar.h \
src/data/case-ordering.c \
src/data/case-ordering.h \
- src/data/case-sink.c \
- src/data/case-sink.h \
- src/data/case-source.c \
- src/data/case-source.h \
src/data/case.c \
- src/data/casefilter.c \
- src/data/casefilter.h \
- src/data/casefile.h \
- src/data/casefile.c \
- src/data/casefile-factory.h \
- src/data/casefile-private.h \
src/data/casegrouper.c \
src/data/casegrouper.h \
src/data/caseinit.c \
src/data/casewriter-translator.c \
src/data/casewriter.c \
src/data/casewriter.h \
- src/data/fastfile.c \
- src/data/fastfile.h \
- src/data/fastfile-factory.h \
- src/data/fastfile-factory.c \
src/data/case.h \
src/data/case-tmpfile.c \
src/data/case-tmpfile.h \
src/data/settings.h \
src/data/sparse-cases.c \
src/data/sparse-cases.h \
- src/data/storage-stream.c \
- src/data/storage-stream.h \
src/data/sys-file-private.c \
src/data/sys-file-private.h \
src/data/sys-file-reader.c \
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#include <config.h>
-
-#include <data/case-sink.h>
-
-#include <stdlib.h>
-
-#include <data/dictionary.h>
-
-#include "xalloc.h"
-
-/* Creates a case sink to accept cases from the given DICT with
- class CLASS and auxiliary data AUX. */
-struct case_sink *
-create_case_sink (const struct case_sink_class *class,
- const struct dictionary *dict, struct casefile_factory *f,
- void *aux)
-{
- struct case_sink *sink = xmalloc (sizeof *sink);
- sink->class = class;
- sink->value_cnt = dict_get_compacted_value_cnt (dict);
- sink->aux = aux;
- sink->factory = f;
- return sink;
-}
-
-/* Destroys case sink SINK. */
-void
-free_case_sink (struct case_sink *sink)
-{
- if (sink != NULL)
- {
- if (sink->class->destroy != NULL)
- sink->class->destroy (sink);
- free (sink);
- }
-}
-/* Null sink. Used by a few procedures that keep track of output
- themselves and would throw away anything that the sink
- contained anyway. */
-
-const struct case_sink_class null_sink_class =
- {
- "null",
- NULL,
- NULL,
- NULL,
- NULL,
- };
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#ifndef CASE_SINK_H
-#define CASE_SINK_H 1
-
-#include <stdbool.h>
-#include <stddef.h>
-
-struct ccase;
-struct dictionary;
-
-/* A case sink. */
-struct case_sink
- {
- const struct case_sink_class *class; /* Class. */
- void *aux; /* Auxiliary data. */
- struct casefile_factory *factory ; /* Factory used to create
- the destination */
- size_t value_cnt; /* Number of `union value's in case. */
- };
-
-/* A case sink class. */
-struct case_sink_class
- {
- const char *name; /* Identifying name. */
-
- /* Opens the sink for writing. */
- void (*open) (struct case_sink *);
-
- /* Writes a case to the sink. */
- bool (*write) (struct case_sink *, const struct ccase *);
-
- /* Closes and destroys the sink. */
- void (*destroy) (struct case_sink *);
-
- /* Closes the sink and returns a source that can read back
- the cases that were written, perhaps transformed in some
- way. The sink must still be separately destroyed by
- calling destroy(). */
- struct case_source *(*make_source) (struct case_sink *);
- };
-
-extern const struct case_sink_class null_sink_class;
-
-struct casefile_factory ;
-struct case_sink *create_case_sink (const struct case_sink_class *,
- const struct dictionary *,
- struct casefile_factory *,
- void *);
-void free_case_sink (struct case_sink *);
-
-#endif /* case-sink.h */
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#include <config.h>
-
-#include <data/case-source.h>
-
-#include <stdlib.h>
-
-#include "xalloc.h"
-
-/* Creates a case source with class CLASS and auxiliary data AUX
- and based on dictionary DICT. */
-struct case_source *
-create_case_source (const struct case_source_class *class,
- void *aux)
-{
- struct case_source *source = xmalloc (sizeof *source);
- source->class = class;
- source->aux = aux;
- return source;
-}
-
-/* Destroys case source SOURCE.
- Returns true if successful,
- false if the source encountered an I/O error during
- destruction or reading cases. */
-bool
-free_case_source (struct case_source *source)
-{
- bool ok = true;
- if (source != NULL)
- {
- if (source->class->destroy != NULL)
- ok = source->class->destroy (source);
- free (source);
- }
- return ok;
-}
-
-/* Returns true if CLASS is the class of SOURCE. */
-bool
-case_source_is_class (const struct case_source *source,
- const struct case_source_class *class)
-{
- return source != NULL && source->class == class;
-}
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#ifndef CASE_SOURCE_H
-#define CASE_SOURCE_H 1
-
-#include <stdbool.h>
-
-struct ccase;
-
-/* A case source. */
-struct case_source
- {
- const struct case_source_class *class; /* Class. */
- void *aux; /* Auxiliary data. */
- };
-
-/* A case source class. */
-struct case_source_class
- {
- const char *name; /* Identifying name. */
-
- /* Returns the exact number of cases that READ will pass to
- WRITE_CASE, if known, or -1 otherwise. */
- int (*count) (const struct case_source *);
-
- /* Reads one case into C.
- Returns true if successful, false at end of file or if an
- I/O error occurred. */
- bool (*read) (struct case_source *, struct ccase *);
-
- /* Destroys the source.
- Returns true if successful read, false if an I/O occurred
- during destruction or previously. */
- bool (*destroy) (struct case_source *);
- };
-
-
-struct case_source *create_case_source (const struct case_source_class *,
- void *);
-bool free_case_source (struct case_source *);
-
-bool case_source_is_class (const struct case_source *,
- const struct case_source_class *);
-
-#endif /* case-source.h */
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 2006 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#ifndef CASEFILE_FACTORY_H
-#define CASEFILE_FACTORY_H
-
-struct casefile_factory
-{
- struct casefile * (*create_casefile) (struct casefile_factory *, size_t);
-};
-
-#endif
-
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 2004, 2006 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#ifndef CASEFILE_PRIVATE_H
-#define CASEFILE_PRIVATE_H
-
-#include <config.h>
-#include <stdbool.h>
-#include <libpspp/ll.h>
-
-struct ccase;
-struct casereader;
-struct casefile;
-struct casefilter;
-
-struct class_casefile
-{
- void (*destroy) (struct casefile *) ;
-
- bool (*error) (const struct casefile *) ;
-
- size_t (*get_value_cnt) (const struct casefile *) ;
- unsigned long (*get_case_cnt) (const struct casefile *) ;
-
- struct casereader * (*get_reader) (const struct casefile *) ;
-
- bool (*append) (struct casefile *, const struct ccase *) ;
-
-
- bool (*in_core) (const struct casefile *) ;
- bool (*to_disk) (const struct casefile *) ;
- bool (*sleep) (const struct casefile *) ;
-};
-
-struct casefile
-{
- const struct class_casefile *class ; /* Class pointer */
-
- struct ll_list reader_list ; /* List of our readers. */
- struct ll ll ; /* Element in the class' list
- of casefiles. */
- bool being_destroyed; /* A destructive reader exists */
-};
-
-
-struct class_casereader
-{
- struct ccase * (*get_next_case) (struct casereader *);
-
- unsigned long (*cnum) (const struct casereader *);
-
- void (*destroy) (struct casereader * r);
-
- struct casereader * (*clone) (const struct casereader *);
-};
-
-
-#define CLASS_CASEREADER(K) ( (struct class_casereader *) K)
-
-struct casereader
-{
- const struct class_casereader *class; /* Class pointer */
-
- struct casefile *cf; /* The casefile to which this reader belongs */
- struct ll ll; /* Element in the casefile's list of readers */
-
- struct casefilter *filter; /* The filter to be used */
- bool destructive; /* True if this reader is destructive */
-};
-
-
-#define CASEFILE(C) ( (struct casefile *) C)
-#define CONST_CASEFILE(C) ( (const struct casefile *) C)
-
-#define CASEFILEREADER(CR) ((struct casereader *) CR)
-
-
-/* Functions for implementations' use only */
-
-void casefile_register (struct casefile *cf,
- const struct class_casefile *k);
-
-void casereader_register (struct casefile *cf,
- struct casereader *reader,
- const struct class_casereader *k);
-
-#endif
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#include <config.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <assert.h>
-
-#include "case.h"
-#include "casefile.h"
-#include "casefile-private.h"
-#include "casefilter.h"
-
-
-struct ccase;
-
-/* A casefile is an abstract class representing an array of cases. In
- general, cases are accessible sequentially, and are immutable once
- appended to the casefile. However some implementations may provide
- special methods for case mutation or random access.
-
- Use casefile_append or casefile_append_xfer to append a case to a
- casefile.
-
- The casefile may be read sequentially,
- starting from the beginning, by "casereaders". Any
- number of casereaders may be created, at any time.
- Each casereader has an independent position in the casefile.
-
- Casereaders may only move forward. They cannot move backward to
- arbitrary records or seek randomly. Cloning casereaders is
- possible, but it is not yet implemented.
-
- Use casereader_read() or casereader_read_xfer() to read
- a case from a casereader. Use casereader_destroy() to
- discard a casereader when it is no longer needed.
-
- When a casefile is no longer needed, it may be destroyed with
- casefile_destroy(). This function will also destroy any
- remaining casereaders. */
-
-static struct ll_list all_casefiles = LL_INITIALIZER (all_casefiles);
-
-static struct casefile *
-ll_to_casefile (const struct ll *ll)
-{
- return ll_data (ll, struct casefile, ll);
-}
-
-static struct casereader *
-ll_to_casereader (const struct ll *ll)
-{
- return ll_data (ll, struct casereader, ll);
-}
-
-
-/* atexit() handler that closes and deletes our temporary
- files. */
-static void
-exit_handler (void)
-{
- while (!ll_is_empty (&all_casefiles))
- casefile_destroy (ll_to_casefile (ll_head (&all_casefiles)));
-}
-
-/* Insert CF into the global list of casefiles */
-void
-casefile_register (struct casefile *cf, const struct class_casefile *class)
-{
- static bool initialised ;
- if ( !initialised )
- {
- atexit (exit_handler);
- initialised = true;
- }
-
- cf->class = class;
- ll_push_head (&all_casefiles, &cf->ll);
- ll_init (&cf->reader_list);
-}
-
-/* Remove CF from the global list */
-static void
-casefile_unregister(struct casefile *cf)
-{
- ll_remove (&cf->ll);
-}
-
-/* Return the casefile corresponding to this reader */
-struct casefile *
-casereader_get_casefile (const struct casereader *r)
-{
- return r->cf;
-}
-
-/* Return the case number of the current case */
-unsigned long
-casereader_cnum(const struct casereader *r)
-{
- return r->class->cnum(r);
-}
-
-static struct ccase *
-get_next_case(struct casereader *reader)
-{
- struct ccase *read_case = NULL;
- struct casefile *cf = casereader_get_casefile (reader);
-
- do
- {
- if ( casefile_error (cf) )
- return NULL;
-
- read_case = reader->class->get_next_case (reader);
- }
- while ( read_case && reader->filter
- && casefilter_skip_case (reader->filter, read_case) ) ;
-
- return read_case;
-}
-
-/* Reads a copy of the next case from READER into C.
- Caller is responsible for destroying C.
- Returns true if successful, false at end of file. */
-bool
-casereader_read (struct casereader *reader, struct ccase *c)
-{
- struct ccase * read_case = get_next_case (reader) ;
-
- if ( NULL == read_case )
- return false;
-
- case_clone (c, read_case );
-
- return true;
-}
-
-
-/* Reads the next case from READER into C and transfers ownership
- to the caller. Caller is responsible for destroying C.
- Returns true if successful, false at end of file or on I/O
- error. */
-bool
-casereader_read_xfer (struct casereader *reader, struct ccase *c)
-{
- struct casefile *cf = casereader_get_casefile (reader);
- struct ccase *read_case ;
- case_nullify (c);
-
- read_case = get_next_case (reader) ;
-
- if ( NULL == read_case )
- return false;
-
- if ( reader->destructive && casefile_in_core (cf) )
- case_move (c, read_case);
- else
- case_clone (c, read_case);
-
- return true;
-}
-
-/* Destroys R. */
-void
-casereader_destroy (struct casereader *r)
-{
- ll_remove (&r->ll);
-
- r->class->destroy(r);
-}
-
-/* Creates a copy of R and returns it */
-struct casereader *
-casereader_clone(const struct casereader *r)
-{
- struct casereader *r2;
-
- /* Would we ever want to clone a destructive reader ?? */
- assert ( ! r->destructive ) ;
-
- r2 = r->class->clone (r);
-
- r2->filter = r->filter;
-
- return r2;
-}
-
-/* Destroys casefile CF. */
-void
-casefile_destroy(struct casefile *cf)
-{
- if (!cf) return;
-
- assert(cf->class->destroy);
-
- while (!ll_is_empty (&cf->reader_list))
- casereader_destroy (ll_to_casereader (ll_head (&cf->reader_list)));
-
- casefile_unregister(cf);
-
- cf->class->destroy(cf);
-}
-
-/* Returns true if an I/O error has occurred in casefile CF. */
-bool
-casefile_error (const struct casefile *cf)
-{
- return cf->class->error(cf);
-}
-
-/* Returns the number of cases in casefile CF. */
-unsigned long
-casefile_get_case_cnt (const struct casefile *cf)
-{
- return cf->class->get_case_cnt(cf);
-}
-
-/* Returns the number of `union value's in a case for CF. */
-size_t
-casefile_get_value_cnt (const struct casefile *cf)
-{
- return cf->class->get_value_cnt(cf);
-}
-
-/* Creates and returns a casereader for CF. A casereader can be used to
- sequentially read the cases in a casefile. */
-struct casereader *
-casefile_get_reader (const struct casefile *cf, struct casefilter *filter)
-{
- struct casereader *r = cf->class->get_reader(cf);
- r->cf = (struct casefile *) cf;
- r->filter = filter;
-
- assert (r->class);
-
- return r;
-}
-
-/* Creates and returns a destructive casereader for CF. Like a
- normal casereader, a destructive casereader sequentially reads
- the cases in a casefile. Unlike a normal casereader, a
- destructive reader cannot operate concurrently with any other
- reader. (This restriction could be relaxed in a few ways, but
- it is so far unnecessary for other code.) */
-struct casereader *
-casefile_get_destructive_reader (struct casefile *cf)
-{
- struct casereader *r = cf->class->get_reader (cf);
- r->cf = cf;
- r->destructive = true;
- cf->being_destroyed = true;
-
- return r;
-}
-
-/* Appends a copy of case C to casefile CF.
- Returns true if successful, false if an I/O error occurred. */
-bool
-casefile_append (struct casefile *cf, const struct ccase *c)
-{
- assert (case_get_value_cnt (c) >= casefile_get_value_cnt (cf));
-
- return cf->class->append(cf, c);
-}
-
-/* Appends case C to casefile CF, which takes over ownership of
- C.
- Returns true if successful, false if an I/O error occurred. */
-bool
-casefile_append_xfer (struct casefile *cf, struct ccase *c)
-{
- assert (case_get_value_cnt (c) >= casefile_get_value_cnt (cf));
-
- cf->class->append (cf, c);
- case_destroy (c);
-
- return cf->class->error (cf);
-}
-
-
-
-
-/* Puts a casefile to "sleep", that is, minimizes the resources
- needed for it by closing its file descriptor and freeing its
- buffer. This is useful if we need so many casefiles that we
- might not have enough memory and file descriptors to go
- around.
-
- Implementations may choose to silently ignore this function.
-
- Returns true if successful, false if an I/O error occurred. */
-bool
-casefile_sleep (const struct casefile *cf)
-{
- return cf->class->sleep ? cf->class->sleep(cf) : true;
-}
-
-/* Returns true only if casefile CF is stored in memory (instead of on
- disk), false otherwise.
-*/
-bool
-casefile_in_core (const struct casefile *cf)
-{
- return cf->class->in_core(cf);
-}
-
-/* If CF is currently stored in memory, writes it to disk. Readers, if any,
- retain their current positions.
-
- Implementations may choose to silently ignore this function.
-
- Returns true if successful, false if an I/O error occurred. */
-bool
-casefile_to_disk (const struct casefile *cf)
-{
- return cf->class->to_disk ? cf->class->to_disk(cf) : true;
-}
-
-void
-casereader_register(struct casefile *cf,
- struct casereader *reader,
- const struct class_casereader *class)
-{
- reader->class = class;
- reader->cf = cf;
-
- ll_push_head (&cf->reader_list, &reader->ll);
-}
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 2004, 2006 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#ifndef CASEFILE_H
-#define CASEFILE_H
-
-#include <config.h>
-#include <stddef.h>
-#include <stdbool.h>
-
-
-struct ccase;
-struct casereader;
-struct casefile;
-struct casefilter;
-
-/* Casereader functions */
-
-struct casefile *casereader_get_casefile (const struct casereader *r);
-
-unsigned long casereader_cnum (const struct casereader *r);
-
-bool casereader_read (struct casereader *r, struct ccase *c);
-
-bool casereader_read_xfer (struct casereader *r, struct ccase *c);
-
-void casereader_destroy (struct casereader *r);
-
-struct casereader *casereader_clone(const struct casereader *r);
-
-
-/* Casefile functions */
-
-void casefile_destroy (struct casefile *cf);
-
-bool casefile_error (const struct casefile *cf);
-
-unsigned long casefile_get_case_cnt (const struct casefile *cf);
-
-size_t casefile_get_value_cnt (const struct casefile *cf);
-
-struct casereader *casefile_get_reader (const struct casefile *cf, struct casefilter *filter);
-
-struct casereader *casefile_get_destructive_reader (struct casefile *cf);
-
-bool casefile_append (struct casefile *cf, const struct ccase *c);
-
-bool casefile_append_xfer (struct casefile *cf, struct ccase *c);
-
-bool casefile_sleep (const struct casefile *cf);
-
-bool casefile_in_core (const struct casefile *cf);
-
-bool casefile_to_disk (const struct casefile *cf);
-
-#endif
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 2006 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#include <config.h>
-#include <libpspp/alloc.h>
-#include <libpspp/compiler.h>
-#include "casefilter.h"
-#include <stdlib.h>
-
-#include <stdio.h>
-#include <data/case.h>
-#include <data/variable.h>
-#include <data/missing-values.h>
-
-struct casefilter
- {
- enum mv_class class;
-
- const struct variable **vars;
- int n_vars;
- };
-
-
-/* Returns true iff the entire case should be skipped */
-bool
-casefilter_skip_case (const struct casefilter *filter, const struct ccase *c)
-{
- int i;
-
- for (i = 0; i < filter->n_vars; ++i)
- {
- if ( casefilter_variable_missing (filter, c, filter->vars[i]))
- return true;
- }
-
- return false;
-}
-
-/* Returns true iff the variable V in case C is missing */
-bool
-casefilter_variable_missing (const struct casefilter *filter,
- const struct ccase *c,
- const struct variable *var)
-{
- const union value *val = case_data (c, var) ;
- return var_is_value_missing (var, val, filter->class);
-}
-
-/* Create a new casefilter that drops cases in which any of the
- N_VARS variables in VARS are in the given CLASS of missing values.
- VARS is an array of variables which if *any* of them are missing.
- N_VARS is the size of VARS.
- */
-struct casefilter *
-casefilter_create (enum mv_class class, const struct variable **vars, int n_vars)
-{
- int i;
- struct casefilter * filter = xmalloc (sizeof (*filter)) ;
-
- filter->class = class;
- filter->vars = xnmalloc (n_vars, sizeof (*filter->vars) );
-
- for ( i = 0 ; i < n_vars ; ++i )
- filter->vars[i] = vars[i];
-
- filter->n_vars = n_vars ;
-
- return filter ;
-}
-
-
-/* Add the variables in VARS to the list of variables for which the
- filter considers. N_VARS is the size of VARS */
-void
-casefilter_add_variables (struct casefilter *filter,
- const struct variable *const *vars, int n_vars)
-{
- int i;
-
- filter->vars = xnrealloc (filter->vars, filter->n_vars + n_vars,
- sizeof (*filter->vars) );
-
- for ( i = 0 ; i < n_vars ; ++i )
- filter->vars[i + filter->n_vars] = vars[i];
-
- filter->n_vars += n_vars ;
-}
-
-/* Destroy the filter FILTER */
-void
-casefilter_destroy (struct casefilter *filter)
-{
- free (filter->vars);
- free (filter);
-}
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 2006 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#if !casefilter_h
-#define casefilter_h 1
-
-#include <stdbool.h>
-#include <data/missing-values.h>
-
-struct ccase;
-struct casefilter;
-struct variable ;
-
-/* Create a new casefilter that drops cases in which any of the
- N_VARS variables in VARS are missing in the given CLASS.
- VARS is an array of variables which if *any* of them are missing.
- N_VARS is the size of VARS.
- */
-struct casefilter * casefilter_create (enum mv_class class,
- const struct variable **, int);
-
-/* Add the variables in VARS to the list of variables for which the
- filter considers. N_VARS is the size of VARS */
-void casefilter_add_variables (struct casefilter *,
- const struct variable *const*, int);
-
-/* Destroy the filter FILTER */
-void casefilter_destroy (struct casefilter *);
-
-/* Returns true iff the entire case should be skipped */
-bool casefilter_skip_case (const struct casefilter *, const struct ccase *);
-
-/* Returns true iff the variable V in case C is missing.
- Note that this function's behaviour is independent of the set of
- variables contained by the filter.
- */
-bool casefilter_variable_missing (const struct casefilter *f,
- const struct ccase *c,
- const struct variable *v);
-
-#endif
double w = case_num (c, d->weight);
if (w < 0.0 || var_is_num_missing (d->weight, w, MV_ANY))
w = 0.0;
- if ( w == 0.0 && *warn_on_invalid ) {
+ if ( w == 0.0 && warn_on_invalid != NULL && *warn_on_invalid ) {
*warn_on_invalid = false;
msg (SW, _("At least one case in the data file had a weight value "
"that was user-missing, system-missing, zero, or "
#include <config.h>
#include "por-file-reader.h"
-#include <libpspp/message.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
+
#include <ctype.h>
#include <errno.h>
#include <math.h>
#include <setjmp.h>
-#include <libpspp/alloc.h>
+#include <stdarg.h>
#include <stdbool.h>
-#include "case.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <data/casereader-provider.h>
+#include <data/casereader.h>
+#include <data/dictionary.h>
+#include <data/file-handle-def.h>
+#include <data/format.h>
+#include <data/missing-values.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
+#include <libpspp/alloc.h>
#include <libpspp/compiler.h>
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "format.h"
-#include "missing-values.h"
#include <libpspp/hash.h>
#include <libpspp/magic.h>
+#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/pool.h>
#include <libpspp/str.h>
-#include "value-labels.h"
-#include "variable.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
int var_cnt; /* Number of variables. */
int weight_index; /* 0-based index of weight variable, or -1. */
int *widths; /* Variable widths, 0 for numeric. */
- int value_cnt; /* Number of `value's per case. */
+ size_t value_cnt; /* Number of `value's per case. */
bool ok; /* Set false on I/O error. */
};
+static struct casereader_class por_file_casereader_class;
+
static void
error (struct pfm_reader *r, const char *msg,...)
PRINTF_FORMAT (2, 3)
}
/* Closes portable file reader R, after we're done with it. */
-void
-pfm_close_reader (struct pfm_reader *r)
+static void
+por_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
{
- if (r != NULL)
- pool_destroy (r->pool);
+ struct pfm_reader *r = r_;
+ pool_destroy (r->pool);
}
/* Read a single character into cur_char. */
/* Reads the dictionary from file with handle H, and returns it in a
dictionary structure. This dictionary may be modified in order to
rename, reorder, and delete variables, etc. */
-struct pfm_reader *
+struct casereader *
pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
struct pfm_read_info *info)
{
if (!match (r, 'F'))
error (r, _("Data record expected."));
- return r;
+ r->value_cnt = dict_get_next_value_idx (*dict);
+ return casereader_create_sequential (NULL, r->value_cnt, CASENUMBER_MAX,
+ &por_file_casereader_class, r);
error:
- pfm_close_reader (r);
+ pool_destroy (r->pool);
dict_destroy (*dict);
*dict = NULL;
return NULL;
}
/* Reads one case from portable file R into C. */
-bool
-pfm_read_case (struct pfm_reader *r, struct ccase *c)
+static bool
+por_file_casereader_read (struct casereader *reader, void *r_, struct ccase *c)
{
+ struct pfm_reader *r = r_;
size_t i;
size_t idx;
+ case_create (c, casereader_get_value_cnt (reader));
setjmp (r->bail_out);
- if (!r->ok)
- return false;
+ if (!r->ok)
+ {
+ casereader_force_error (reader);
+ case_destroy (c);
+ return false;
+ }
/* Check for end of file. */
- if (r->cc == 'Z')
- return false;
+ if (r->cc == 'Z')
+ {
+ case_destroy (c);
+ return false;
+ }
idx = 0;
for (i = 0; i < r->var_cnt; i++)
return true;
}
-/* Returns true if an I/O error has occurred on READER, false
- otherwise. */
-bool
-pfm_read_error (const struct pfm_reader *reader)
-{
- return !reader->ok;
-}
-
/* Returns true if FILE is an SPSS portable file,
false otherwise. */
bool
return true;
}
+
+static struct casereader_class por_file_casereader_class =
+ {
+ por_file_casereader_read,
+ por_file_casereader_destroy,
+ NULL,
+ NULL,
+ };
struct dictionary;
struct file_handle;
struct ccase;
-struct pfm_reader *pfm_open_reader (struct file_handle *,
+struct casereader *pfm_open_reader (struct file_handle *,
struct dictionary **,
struct pfm_read_info *);
-bool pfm_read_case (struct pfm_reader *, struct ccase *);
-bool pfm_read_error (const struct pfm_reader *);
-void pfm_close_reader (struct pfm_reader *);
bool pfm_detect (FILE *);
#endif /* por-file-reader.h */
#include <time.h>
#include <unistd.h>
-#include "case.h"
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "format.h"
-#include "missing-values.h"
-#include "value-labels.h"
-#include "variable.h"
+#include <data/case.h>
+#include <data/casewriter-provider.h>
+#include <data/casewriter.h>
+#include <data/dictionary.h>
+#include <data/file-handle-def.h>
+#include <data/format.h>
+#include <data/missing-values.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
#include <libpspp/alloc.h>
#include <libpspp/hash.h>
int fv; /* Starting case index. */
};
+static struct casewriter_class por_file_casewriter_class;
+
+static bool close_writer (struct pfm_writer *);
static void buf_write (struct pfm_writer *, const void *, size_t);
static void write_header (struct pfm_writer *);
static void write_version_data (struct pfm_writer *);
/* Writes the dictionary DICT to portable file HANDLE according
to the given OPTS. Returns nonzero only if successful. DICT
will not be modified, except to assign short names. */
-struct pfm_writer *
+struct casewriter *
pfm_open_writer (struct file_handle *fh, struct dictionary *dict,
struct pfm_write_options opts)
{
write_variables (w, dict);
write_value_labels (w, dict);
buf_write (w, "F", 1);
- if (pfm_write_error (w))
+ if (ferror (w->file))
goto error;
- return w;
+ return casewriter_create (&por_file_casewriter_class, w);
error:
- pfm_close_writer (w);
+ close_writer (w);
return NULL;
open_error:
write_value (w, &value, v);
}
+ /* Write variable label. */
if (var_get_label (v) != NULL)
{
buf_write (w, "C", 1);
}
}
-/* Writes case ELEM to the portable file represented by H. */
-int
-pfm_write_case (struct pfm_writer *w, const struct ccase *c)
+/* Writes case C to the portable file represented by H. */
+static void
+por_file_casewriter_write (struct casewriter *writer, void *w_,
+ struct ccase *c)
{
+ struct pfm_writer *w = w_;
int i;
- if (ferror (w->file))
- return 0;
-
- for (i = 0; i < w->var_cnt; i++)
+ if (!ferror (w->file))
{
- struct pfm_var *v = &w->vars[i];
+ for (i = 0; i < w->var_cnt; i++)
+ {
+ struct pfm_var *v = &w->vars[i];
- if (v->width == 0)
- write_float (w, case_num_idx (c, v->fv));
- else
- {
- write_int (w, v->width);
- buf_write (w, case_str_idx (c, v->fv), v->width);
- }
+ if (v->width == 0)
+ write_float (w, case_num_idx (c, v->fv));
+ else
+ {
+ write_int (w, v->width);
+ buf_write (w, case_str_idx (c, v->fv), v->width);
+ }
+ }
}
-
- return !pfm_write_error (w);
+ else
+ casewriter_force_error (writer);
+
+ case_destroy (c);
}
-bool
-pfm_write_error (const struct pfm_writer *w)
+static void
+por_file_casewriter_destroy (struct casewriter *writer, void *w_)
{
- return ferror (w->file);
+ struct pfm_writer *w = w_;
+ if (!close_writer (w))
+ casewriter_force_error (writer);
}
/* Closes a portable file after we're done with it.
Returns true if successful, false if an I/O error occurred. */
-bool
-pfm_close_writer (struct pfm_writer *w)
+static bool
+close_writer (struct pfm_writer *w)
{
bool ok;
memset (buf, 'Z', sizeof buf);
buf_write (w, buf, w->lc >= 80 ? 80 : 80 - w->lc);
- ok = !pfm_write_error (w);
+ ok = !ferror (w->file);
if (fclose (w->file) == EOF)
ok = false;
strcpy (output, "*.");
return;
}
+\f
+static struct casewriter_class por_file_casewriter_class =
+ {
+ por_file_casewriter_write,
+ por_file_casewriter_destroy,
+ NULL,
+ };
struct file_handle;
struct dictionary;
struct ccase;
-struct pfm_writer *pfm_open_writer (struct file_handle *, struct dictionary *,
+struct casewriter *pfm_open_writer (struct file_handle *, struct dictionary *,
struct pfm_write_options);
struct pfm_write_options pfm_writer_default_options (void);
-int pfm_write_case (struct pfm_writer *, const struct ccase *);
-bool pfm_write_error (const struct pfm_writer *);
-bool pfm_close_writer (struct pfm_writer *);
-
#endif /* por-file-writer.h */
#include <stdlib.h>
#include <unistd.h>
-#include <data/case-source.h>
-#include <data/case-sink.h>
#include <data/case.h>
-#include <data/casefile.h>
-#include <data/fastfile.h>
+#include <data/caseinit.h>
+#include <data/casereader.h>
+#include <data/casereader-provider.h>
+#include <data/casewriter.h>
#include <data/dictionary.h>
#include <data/file-handle-def.h>
#include <data/procedure.h>
-#include <data/storage-stream.h>
#include <data/transformations.h>
#include <data/variable.h>
#include <libpspp/alloc.h>
#include <libpspp/deque.h>
#include <libpspp/misc.h>
#include <libpspp/str.h>
+#include <libpspp/taint.h>
struct dataset {
-
- /* An abstract factory which creates casefiles */
- struct casefile_factory *cf_factory;
-
- /* Callback which occurs when a procedure provides a new source for
- the dataset */
- replace_source_callback *replace_source ;
-
- /* Callback which occurs whenever the DICT is replaced by a new one */
- replace_dictionary_callback *replace_dict;
-
- /* Cases are read from proc_source,
+ /* Cases are read from source,
+ their transformation variables are initialized,
pass through permanent_trns_chain (which transforms them into
the format described by permanent_dict),
- are written to proc_sink,
+ are written to sink,
pass through temporary_trns_chain (which transforms them into
the format described by dict),
and are finally passed to the procedure. */
- struct case_source *proc_source;
+ struct casereader *source;
+ struct caseinit *caseinit;
struct trns_chain *permanent_trns_chain;
struct dictionary *permanent_dict;
- struct case_sink *proc_sink;
+ struct casewriter *sink;
struct trns_chain *temporary_trns_chain;
struct dictionary *dict;
+ /* Callback which occurs when a procedure provides a new source for
+ the dataset */
+ replace_source_callback *replace_source ;
+
+ /* Callback which occurs whenever the DICT is replaced by a new one */
+ replace_dictionary_callback *replace_dict;
+
+ /* If true, cases are discarded instead of being written to
+ sink. */
+ bool discard_output;
+
/* The transformation chain that the next transformation will be
added to. */
struct trns_chain *cur_trns_chain;
struct ccase *lag_cases; /* Lagged cases managed by deque. */
/* Procedure data. */
- bool is_open; /* Procedure open? */
- struct ccase trns_case; /* Case used for transformations. */
- struct ccase sink_case; /* Case written to sink, if
- compacting is necessary. */
+ enum
+ {
+ PROC_COMMITTED,
+ PROC_OPEN,
+ PROC_CLOSED
+ }
+ proc_state;
size_t cases_written; /* Cases output so far. */
- bool ok;
+ bool ok; /* Error status. */
}; /* struct dataset */
static void add_case_limit_trns (struct dataset *ds);
static void add_filter_trns (struct dataset *ds);
-static bool internal_procedure (struct dataset *ds, case_func *,
- end_func *,
- void *aux);
static void update_last_proc_invocation (struct dataset *ds);
-static void create_trns_case (struct ccase *, struct dictionary *);
-static void open_active_file (struct dataset *ds);
-static void clear_case (const struct dataset *ds, struct ccase *c);
-static bool close_active_file (struct dataset *ds);
\f
/* Public functions. */
\f
/* Regular procedure. */
-
-
-/* Reads the data from the input program and writes it to a new
- active file. For each case we read from the input program, we
- do the following:
-
- 1. Execute permanent transformations. If these drop the case,
- start the next case from step 1.
-
- 2. Write case to replacement active file.
-
- 3. Execute temporary transformations. If these drop the case,
- start the next case from step 1.
-
- 4. Pass case to PROC_FUNC, passing AUX as auxiliary data.
-
- Returns true if successful, false if an I/O error occurred. */
+/* Executes any pending transformations, if necessary.
+ This is not identical to the EXECUTE command in that it won't
+ always read the source data. This can be important when the
+ source data is given inline within BEGIN DATA...END FILE. */
bool
-procedure (struct dataset *ds, case_func *cf, void *aux)
+proc_execute (struct dataset *ds)
{
- update_last_proc_invocation (ds);
+ bool ok;
- /* Optimize the trivial case where we're not going to do
- anything with the data, by not reading the data at all. */
- if (cf == NULL
- && case_source_is_class (ds->proc_source, &storage_source_class)
- && ds->proc_sink == NULL
- && (ds->temporary_trns_chain == NULL
- || trns_chain_is_empty (ds->temporary_trns_chain))
+ if ((ds->temporary_trns_chain == NULL
+ || trns_chain_is_empty (ds->temporary_trns_chain))
&& trns_chain_is_empty (ds->permanent_trns_chain))
{
ds->n_lag = 0;
+ ds->discard_output = false;
dict_set_case_limit (ds->dict, 0);
dict_clear_vectors (ds->dict);
return true;
}
- return internal_procedure (ds, cf, NULL, aux);
+ ok = casereader_destroy (proc_open (ds));
+ return proc_commit (ds) && ok;
}
-\f
-/* Multipass procedure. */
-struct multipass_aux_data
- {
- struct casefile *casefile;
+static struct casereader_class proc_casereader_class;
- bool (*proc_func) (const struct casefile *, void *aux);
- void *aux;
- };
-
-/* Case processing function for multipass_procedure(). */
-static bool
-multipass_case_func (const struct ccase *c, void *aux_data_, const struct dataset *ds UNUSED)
-{
- struct multipass_aux_data *aux_data = aux_data_;
- return casefile_append (aux_data->casefile, c);
-}
-
-/* End-of-file function for multipass_procedure(). */
-static bool
-multipass_end_func (void *aux_data_, const struct dataset *ds UNUSED)
-{
- struct multipass_aux_data *aux_data = aux_data_;
- return (aux_data->proc_func == NULL
- || aux_data->proc_func (aux_data->casefile, aux_data->aux));
-}
-
-/* Procedure that allows multiple passes over the input data.
- The entire active file is passed to PROC_FUNC, with the given
- AUX as auxiliary data, as a unit. */
-bool
-multipass_procedure (struct dataset *ds, casefile_func *proc_func, void *aux)
+/* Opens dataset DS for reading cases with proc_read.
+ proc_commit must be called when done. */
+struct casereader *
+proc_open (struct dataset *ds)
{
- struct multipass_aux_data aux_data;
- bool ok;
+ assert (ds->source != NULL);
+ assert (ds->proc_state == PROC_COMMITTED);
- aux_data.casefile =
- ds->cf_factory->create_casefile (ds->cf_factory,
- dict_get_next_value_idx (ds->dict));
-
- aux_data.proc_func = proc_func;
- aux_data.aux = aux;
-
- ok = internal_procedure (ds, multipass_case_func, multipass_end_func, &aux_data);
- ok = !casefile_error (aux_data.casefile) && ok;
-
- casefile_destroy (aux_data.casefile);
-
- return ok;
-}
-\f
+ update_last_proc_invocation (ds);
-/* Procedure implementation. */
+ caseinit_mark_for_init (ds->caseinit, ds->dict);
-/* Executes a procedure.
- Passes each case to CASE_FUNC.
- Calls END_FUNC after the last case.
- Returns true if successful, false if an I/O error occurred (or
- if CASE_FUNC or END_FUNC ever returned false). */
-static bool
-internal_procedure (struct dataset *ds, case_func *proc,
- end_func *end,
- void *aux)
-{
- struct ccase *c;
- bool ok = true;
+ /* Finish up the collection of transformations. */
+ add_case_limit_trns (ds);
+ add_filter_trns (ds);
+ trns_chain_finalize (ds->cur_trns_chain);
- proc_open (ds);
- while (ok && proc_read (ds, &c))
- if (proc != NULL)
- ok = proc (c, aux, ds) && ok;
- if (end != NULL)
- ok = end (aux, ds) && ok;
+ /* Make permanent_dict refer to the dictionary right before
+ data reaches the sink. */
+ if (ds->permanent_dict == NULL)
+ ds->permanent_dict = ds->dict;
- if ( proc_close (ds) && ok )
+ /* Prepare sink. */
+ if (!ds->discard_output)
{
-
- return true;
+ ds->compactor = (dict_compacting_would_shrink (ds->permanent_dict)
+ ? dict_make_compactor (ds->permanent_dict)
+ : NULL);
+ ds->sink = autopaging_writer_create (dict_get_compacted_value_cnt (
+ ds->permanent_dict));
+ }
+ else
+ {
+ ds->compactor = NULL;
+ ds->sink = NULL;
}
- return false;
-}
-
-/* Opens dataset DS for reading cases with proc_read.
- proc_close must be called when done. */
-void
-proc_open (struct dataset *ds)
-{
- assert (ds->proc_source != NULL);
- assert (!ds->is_open);
-
- update_last_proc_invocation (ds);
-
- open_active_file (ds);
+ /* Allocate memory for lagged cases. */
+ ds->lag_cases = deque_init (&ds->lag, ds->n_lag, sizeof *ds->lag_cases);
- ds->is_open = true;
- create_trns_case (&ds->trns_case, ds->dict);
- case_create (&ds->sink_case, dict_get_compacted_value_cnt (ds->dict));
+ ds->proc_state = PROC_OPEN;
ds->cases_written = 0;
ds->ok = true;
+
+ /* FIXME: use taint in dataset in place of `ok'? */
+ /* FIXME: for trivial cases we can just return a clone of
+ ds->source? */
+ return casereader_create_sequential (NULL,
+ dict_get_next_value_idx (ds->dict),
+ CASENUMBER_MAX,
+ &proc_casereader_class, ds);
+}
+
+bool
+proc_is_open (const struct dataset *ds)
+{
+ return ds->proc_state != PROC_COMMITTED;
}
/* Reads the next case from dataset DS, which must have been
case is stored in *C.
Return false at end of file or if a read error occurs. In
this case a null pointer is stored in *C. */
-bool
-proc_read (struct dataset *ds, struct ccase **c)
+static bool
+proc_casereader_read (struct casereader *reader UNUSED, void *ds_,
+ struct ccase *c)
{
+ struct dataset *ds = ds_;
enum trns_result retval = TRNS_DROP_CASE;
- assert (ds->is_open);
- *c = NULL;
- for (;;)
+ assert (ds->proc_state == PROC_OPEN);
+ for (;;)
{
size_t case_nr;
if (!ds->ok)
return false;
- /* Read a case from proc_source. */
- clear_case (ds, &ds->trns_case);
- if (!ds->proc_source->class->read (ds->proc_source, &ds->trns_case))
+ /* Read a case from source. */
+ if (!casereader_read (ds->source, c))
return false;
+ case_resize (c, dict_get_next_value_idx (ds->dict));
+ caseinit_init_reinit_vars (ds->caseinit, c);
+ caseinit_init_left_vars (ds->caseinit, c);
/* Execute permanent transformations. */
case_nr = ds->cases_written + 1;
retval = trns_chain_execute (ds->permanent_trns_chain, TRNS_CONTINUE,
- &ds->trns_case, &case_nr);
- if (retval != TRNS_CONTINUE)
- continue;
-
+ c, &case_nr);
+ caseinit_update_left_vars (ds->caseinit, c);
+ if (retval != TRNS_CONTINUE)
+ {
+ case_destroy (c);
+ continue;
+ }
+
/* Write case to collection of lagged cases. */
if (ds->n_lag > 0)
{
while (deque_count (&ds->lag) >= ds->n_lag)
case_destroy (&ds->lag_cases[deque_pop_back (&ds->lag)]);
- case_clone (&ds->lag_cases[deque_push_front (&ds->lag)],
- &ds->trns_case);
+ case_clone (&ds->lag_cases[deque_push_front (&ds->lag)], c);
}
/* Write case to replacement active file. */
ds->cases_written++;
- if (ds->proc_sink->class->write != NULL)
+ if (ds->sink != NULL)
{
- if (ds->compactor != NULL)
+ struct ccase tmp;
+ if (ds->compactor != NULL)
{
- dict_compactor_compact (ds->compactor, &ds->sink_case,
- &ds->trns_case);
- ds->proc_sink->class->write (ds->proc_sink, &ds->sink_case);
+ case_create (&tmp, dict_get_compacted_value_cnt (ds->dict));
+ dict_compactor_compact (ds->compactor, &tmp, c);
}
else
- ds->proc_sink->class->write (ds->proc_sink, &ds->trns_case);
+ case_clone (&tmp, c);
+ casewriter_write (ds->sink, &tmp);
}
/* Execute temporary transformations. */
if (ds->temporary_trns_chain != NULL)
{
retval = trns_chain_execute (ds->temporary_trns_chain, TRNS_CONTINUE,
- &ds->trns_case, &ds->cases_written);
+ c, &ds->cases_written);
if (retval != TRNS_CONTINUE)
- continue;
+ {
+ case_destroy (c);
+ continue;
+ }
}
- *c = &ds->trns_case;
return true;
}
}
while reading or closing the data set.
If DS has not been opened, returns true without doing
anything else. */
-bool
-proc_close (struct dataset *ds)
-{
- if (!ds->is_open)
- return true;
-
- /* Drain any remaining cases. */
- while (ds->ok)
- {
- struct ccase *c;
- if (!proc_read (ds, &c))
- break;
- }
- ds->ok = free_case_source (ds->proc_source) && ds->ok;
- proc_set_source (ds, NULL);
-
- case_destroy (&ds->sink_case);
- case_destroy (&ds->trns_case);
-
- ds->ok = close_active_file (ds) && ds->ok;
- ds->is_open = false;
-
- return ds->ok;
-}
-
-/* Updates last_proc_invocation. */
-static void
-update_last_proc_invocation (struct dataset *ds)
-{
- ds->last_proc_invocation = time (NULL);
-}
-
-/* Creates and returns a case, initializing it from the vectors
- that say which `value's need to be initialized just once, and
- which ones need to be re-initialized before every case. */
static void
-create_trns_case (struct ccase *trns_case, struct dictionary *dict)
+proc_casereader_destroy (struct casereader *reader, void *ds_)
{
- size_t var_cnt = dict_get_var_cnt (dict);
- size_t i;
+ struct dataset *ds = ds_;
+ struct ccase c;
- case_create (trns_case, dict_get_next_value_idx (dict));
- for (i = 0; i < var_cnt; i++)
- {
- struct variable *v = dict_get_var (dict, i);
- union value *value = case_data_rw (trns_case, v);
+ /* Make sure transformations happen for every input case, in
+ case they have side effects, and ensure that the replacement
+ active file gets all the cases it should. */
+ while (casereader_read (reader, &c))
+ case_destroy (&c);
- if (var_is_numeric (v))
- value->f = var_get_leave (v) ? 0.0 : SYSMIS;
- else
- memset (value->s, ' ', var_get_width (v));
- }
+ ds->proc_state = PROC_CLOSED;
+ ds->ok = casereader_destroy (ds->source) && ds->ok;
+ ds->source = NULL;
+ proc_set_active_file_data (ds, NULL);
}
-/* Makes all preparations for reading from the data source and writing
- to the data sink. */
-static void
-open_active_file (struct dataset *ds)
-{
- add_case_limit_trns (ds);
- add_filter_trns (ds);
-
- /* Finalize transformations. */
- trns_chain_finalize (ds->cur_trns_chain);
-
- /* Make permanent_dict refer to the dictionary right before
- data reaches the sink. */
- if (ds->permanent_dict == NULL)
- ds->permanent_dict = ds->dict;
-
- /* Figure out whether to compact. */
- ds->compactor =
- (dict_compacting_would_shrink (ds->permanent_dict)
- ? dict_make_compactor (ds->permanent_dict)
- : NULL);
-
- /* Prepare sink. */
- if (ds->proc_sink == NULL)
- ds->proc_sink = create_case_sink (&storage_sink_class,
- ds->permanent_dict,
- ds->cf_factory,
- NULL);
- if (ds->proc_sink->class->open != NULL)
- ds->proc_sink->class->open (ds->proc_sink);
-
- /* Allocate memory for lagged cases. */
- ds->lag_cases = deque_init (&ds->lag, ds->n_lag, sizeof *ds->lag_cases);
-}
-
-/* Clears the variables in C that need to be cleared between
- processing cases. */
-static void
-clear_case (const struct dataset *ds, struct ccase *c)
+/* Must return false if the source casereader, a transformation,
+ or the sink casewriter signaled an error. (If a temporary
+ transformation signals an error, then the return value is
+ false, but the replacement active file may still be
+ untainted.) */
+bool
+proc_commit (struct dataset *ds)
{
- size_t var_cnt = dict_get_var_cnt (ds->dict);
- size_t i;
+ assert (ds->proc_state == PROC_CLOSED);
+ ds->proc_state = PROC_COMMITTED;
- for (i = 0; i < var_cnt; i++)
- {
- struct variable *v = dict_get_var (ds->dict, i);
- if (!var_get_leave (v))
- {
- if (var_is_numeric (v))
- case_data_rw (c, v)->f = SYSMIS;
- else
- memset (case_data_rw (c, v)->s, ' ', var_get_width (v));
- }
- }
-}
-
-/* Closes the active file. */
-static bool
-close_active_file (struct dataset *ds)
-{
/* Free memory for lagged cases. */
while (!deque_is_empty (&ds->lag))
case_destroy (&ds->lag_cases[deque_pop_back (&ds->lag)]);
/* Dictionary from before TEMPORARY becomes permanent. */
proc_cancel_temporary_transformations (ds);
- /* Finish compacting. */
- if (ds->compactor != NULL)
+ if (!ds->discard_output)
{
- dict_compactor_destroy (ds->compactor);
- dict_compact_values (ds->dict);
- ds->compactor = NULL;
+ /* Finish compacting. */
+ if (ds->compactor != NULL)
+ {
+ dict_compactor_destroy (ds->compactor);
+ dict_compact_values (ds->dict);
+ ds->compactor = NULL;
+ }
+
+ /* Old data sink becomes new data source. */
+ if (ds->sink != NULL)
+ ds->source = casewriter_make_reader (ds->sink);
}
+ else
+ {
+ ds->source = NULL;
+ ds->discard_output = false;
+ }
+ ds->sink = NULL;
+ if ( ds->replace_source) ds->replace_source (ds->source);
- /* Old data sink becomes new data source. */
- if (ds->proc_sink->class->make_source != NULL)
- proc_set_source (ds, ds->proc_sink->class->make_source (ds->proc_sink) );
- free_case_sink (ds->proc_sink);
- ds->proc_sink = NULL;
+ caseinit_clear (ds->caseinit);
+ caseinit_mark_as_preinited (ds->caseinit, ds->dict);
dict_clear_vectors (ds->dict);
ds->permanent_dict = NULL;
- return proc_cancel_all_transformations (ds);
+ return proc_cancel_all_transformations (ds) && ds->ok;
+}
+
+static struct casereader_class proc_casereader_class =
+ {
+ proc_casereader_read,
+ proc_casereader_destroy,
+ NULL,
+ NULL,
+ };
+
+/* Updates last_proc_invocation. */
+static void
+update_last_proc_invocation (struct dataset *ds)
+{
+ ds->last_proc_invocation = time (NULL);
}
\f
/* Returns a pointer to the lagged case from N_BEFORE cases before the
return NULL;
}
\f
-/* Procedure that separates the data into SPLIT FILE groups. */
-
-/* Represents auxiliary data for handling SPLIT FILE. */
-struct split_aux_data
- {
- struct dataset *dataset; /* The dataset */
- struct ccase prev_case; /* Data in previous case. */
-
- /* Callback functions. */
- begin_func *begin;
- case_func *proc;
- end_func *end;
- void *func_aux;
- };
-
-static int equal_splits (const struct ccase *, const struct ccase *, const struct dataset *ds);
-static bool split_procedure_case_func (const struct ccase *c, void *, const struct dataset *);
-static bool split_procedure_end_func (void *, const struct dataset *);
-
-/* Like procedure(), but it automatically breaks the case stream
- into SPLIT FILE break groups. Before each group of cases with
- identical SPLIT FILE variable values, BEGIN_FUNC is called
- with the first case in the group.
- Then PROC_FUNC is called for each case in the group (including
- the first).
- END_FUNC is called when the group is finished. FUNC_AUX is
- passed to each of the functions as auxiliary data.
-
- If the active file is empty, none of BEGIN_FUNC, PROC_FUNC,
- and END_FUNC will be called at all.
-
- If SPLIT FILE is not in effect, then there is one break group
- (if the active file is nonempty), and BEGIN_FUNC and END_FUNC
- will be called once.
-
- Returns true if successful, false if an I/O error occurred. */
-bool
-procedure_with_splits (struct dataset *ds,
- begin_func begin,
- case_func *proc,
- end_func *end,
- void *func_aux)
-{
- struct split_aux_data split_aux;
- bool ok;
-
- case_nullify (&split_aux.prev_case);
- split_aux.begin = begin;
- split_aux.proc = proc;
- split_aux.end = end;
- split_aux.func_aux = func_aux;
- split_aux.dataset = ds;
-
- ok = internal_procedure (ds, split_procedure_case_func,
- split_procedure_end_func, &split_aux);
-
- case_destroy (&split_aux.prev_case);
-
- return ok;
-}
-
-/* Case callback used by procedure_with_splits(). */
-static bool
-split_procedure_case_func (const struct ccase *c, void *split_aux_, const struct dataset *ds)
-{
- struct split_aux_data *split_aux = split_aux_;
-
- /* Start a new series if needed. */
- if (case_is_null (&split_aux->prev_case)
- || !equal_splits (c, &split_aux->prev_case, split_aux->dataset))
- {
- if (!case_is_null (&split_aux->prev_case) && split_aux->end != NULL)
- split_aux->end (split_aux->func_aux, ds);
-
- case_destroy (&split_aux->prev_case);
- case_clone (&split_aux->prev_case, c);
-
- if (split_aux->begin != NULL)
- split_aux->begin (&split_aux->prev_case, split_aux->func_aux, ds);
- }
-
- return (split_aux->proc == NULL
- || split_aux->proc (c, split_aux->func_aux, ds));
-}
-
-/* End-of-file callback used by procedure_with_splits(). */
-static bool
-split_procedure_end_func (void *split_aux_, const struct dataset *ds)
-{
- struct split_aux_data *split_aux = split_aux_;
-
- if (!case_is_null (&split_aux->prev_case) && split_aux->end != NULL)
- split_aux->end (split_aux->func_aux, ds);
- return true;
-}
-
-/* Compares the SPLIT FILE variables in cases A and B and returns
- nonzero only if they differ. */
-static int
-equal_splits (const struct ccase *a, const struct ccase *b,
- const struct dataset *ds)
-{
- return case_compare (a, b,
- dict_get_split_vars (ds->dict),
- dict_get_split_cnt (ds->dict)) == 0;
-}
-\f
-/* Multipass procedure that separates the data into SPLIT FILE
- groups. */
-
-/* Represents auxiliary data for handling SPLIT FILE in a
- multipass procedure. */
-struct multipass_split_aux_data
- {
- struct dataset *dataset; /* The dataset of the split */
- struct ccase prev_case; /* Data in previous case. */
- struct casefile *casefile; /* Accumulates data for a split. */
- split_func *split; /* Function to call with the accumulated
- data. */
- void *func_aux; /* Auxiliary data. */
- };
-
-static bool multipass_split_case_func (const struct ccase *c, void *aux_, const struct dataset *);
-static bool multipass_split_end_func (void *aux_, const struct dataset *ds);
-static bool multipass_split_output (struct multipass_split_aux_data *, const struct dataset *ds);
-
-/* Returns true if successful, false if an I/O error occurred. */
-bool
-multipass_procedure_with_splits (struct dataset *ds,
- split_func *split,
- void *func_aux)
-{
- struct multipass_split_aux_data aux;
- bool ok;
-
- case_nullify (&aux.prev_case);
- aux.casefile = NULL;
- aux.split = split;
- aux.func_aux = func_aux;
- aux.dataset = ds;
-
- ok = internal_procedure (ds, multipass_split_case_func,
- multipass_split_end_func, &aux);
- case_destroy (&aux.prev_case);
-
- return ok;
-}
-
-/* Case callback used by multipass_procedure_with_splits(). */
-static bool
-multipass_split_case_func (const struct ccase *c, void *aux_, const struct dataset *ds)
-{
- struct multipass_split_aux_data *aux = aux_;
- bool ok = true;
-
- /* Start a new series if needed. */
- if (aux->casefile == NULL || ! equal_splits (c, &aux->prev_case, ds))
- {
- /* Record split values. */
- case_destroy (&aux->prev_case);
- case_clone (&aux->prev_case, c);
-
- /* Pass any cases to split_func. */
- if (aux->casefile != NULL)
- ok = multipass_split_output (aux, ds);
-
- /* Start a new casefile. */
- aux->casefile =
- ds->cf_factory->create_casefile (ds->cf_factory,
- dict_get_next_value_idx (ds->dict));
- }
-
- return casefile_append (aux->casefile, c) && ok;
-}
-
-/* End-of-file callback used by multipass_procedure_with_splits(). */
-static bool
-multipass_split_end_func (void *aux_, const struct dataset *ds)
-{
- struct multipass_split_aux_data *aux = aux_;
- return (aux->casefile == NULL || multipass_split_output (aux, ds));
-}
-
-static bool
-multipass_split_output (struct multipass_split_aux_data *aux, const struct dataset *ds)
-{
- bool ok;
-
- assert (aux->casefile != NULL);
- ok = aux->split (&aux->prev_case, aux->casefile, aux->func_aux, ds);
- casefile_destroy (aux->casefile);
- aux->casefile = NULL;
-
- return ok;
-}
-\f
-/* Discards all the current state in preparation for a data-input
- command like DATA LIST or GET. */
-void
-discard_variables (struct dataset *ds)
-{
- dict_clear (ds->dict);
- fh_set_default_handle (NULL);
-
- ds->n_lag = 0;
-
- free_case_source (ds->proc_source);
- proc_set_source (ds, NULL);
-
- proc_cancel_all_transformations (ds);
-}
-\f
/* Returns the current set of permanent transformations,
and clears the permanent transformations.
For use by INPUT PROGRAM. */
{
if (proc_in_temporary_transformations (ds))
{
- dataset_set_dict (ds, ds->permanent_dict);
+ dict_destroy (ds->dict);
+ ds->dict = ds->permanent_dict;
ds->permanent_dict = NULL;
+ if (ds->replace_dict) ds->replace_dict (ds->dict);
trns_chain_destroy (ds->temporary_trns_chain);
ds->temporary_trns_chain = NULL;
proc_cancel_all_transformations (struct dataset *ds)
{
bool ok;
+ assert (ds->proc_state == PROC_COMMITTED);
ok = trns_chain_destroy (ds->permanent_trns_chain);
ok = trns_chain_destroy (ds->temporary_trns_chain) && ok;
ds->permanent_trns_chain = ds->cur_trns_chain = trns_chain_create ();
\f
/* Initializes procedure handling. */
struct dataset *
-create_dataset (struct casefile_factory *fact,
- replace_source_callback *rps,
- replace_dictionary_callback *rds
- )
+create_dataset (replace_source_callback *rps,
+ replace_dictionary_callback *rds)
{
struct dataset *ds = xzalloc (sizeof(*ds));
ds->dict = dict_create ();
- ds->cf_factory = fact;
+ ds->caseinit = caseinit_create ();
ds->replace_source = rps;
ds->replace_dict = rds;
proc_cancel_all_transformations (ds);
void
destroy_dataset (struct dataset *ds)
{
- discard_variables (ds);
+ proc_discard_active_file (ds);
dict_destroy (ds->dict);
+ caseinit_destroy (ds->caseinit);
trns_chain_destroy (ds->permanent_trns_chain);
free (ds);
}
-/* Sets SINK as the destination for procedure output from the
- next procedure. */
+/* Causes output from the next procedure to be discarded, instead
+ of being preserved for use as input for the next procedure. */
void
-proc_set_sink (struct dataset *ds, struct case_sink *sink)
+proc_discard_output (struct dataset *ds)
{
- assert (ds->proc_sink == NULL);
- ds->proc_sink = sink;
+ ds->discard_output = true;
+}
+
+/* Discards the active file dictionary, data, and
+ transformations. */
+void
+proc_discard_active_file (struct dataset *ds)
+{
+ assert (ds->proc_state == PROC_COMMITTED);
+
+ dict_clear (ds->dict);
+ fh_set_default_handle (NULL);
+
+ ds->n_lag = 0;
+
+ casereader_destroy (ds->source);
+ ds->source = NULL;
+ if ( ds->replace_source) ds->replace_source (NULL);
+
+ proc_cancel_all_transformations (ds);
}
/* Sets SOURCE as the source for procedure input for the next
procedure. */
void
-proc_set_source (struct dataset *ds, struct case_source *source)
+proc_set_active_file (struct dataset *ds,
+ struct casereader *source,
+ struct dictionary *dict)
{
- ds->proc_source = source;
+ assert (ds->proc_state == PROC_COMMITTED);
+ assert (ds->dict != dict);
+
+ proc_discard_active_file (ds);
- if ( ds->replace_source )
- ds->replace_source (ds->proc_source);
+ dict_destroy (ds->dict);
+ ds->dict = dict;
+ if ( ds->replace_dict) ds->replace_dict (dict);
+
+ proc_set_active_file_data (ds, source);
}
-/* Returns true if a source for the next procedure has been
- configured, false otherwise. */
+/* Replaces the active file's data by READER without replacing
+ the associated dictionary. */
bool
-proc_has_source (const struct dataset *ds)
+proc_set_active_file_data (struct dataset *ds, struct casereader *reader)
{
- return ds->proc_source != NULL;
-}
+ casereader_destroy (ds->source);
+ ds->source = reader;
+ if (ds->replace_source) ds->replace_source (reader);
-/* Returns the output from the previous procedure.
- For use only immediately after executing a procedure.
- The returned casefile is owned by the caller; it will not be
- automatically used for the next procedure's input. */
-struct casefile *
-proc_capture_output (struct dataset *ds)
-{
- struct casefile *casefile;
+ caseinit_clear (ds->caseinit);
+ caseinit_mark_as_preinited (ds->caseinit, ds->dict);
- /* Try to make sure that this function is called immediately
- after procedure() or a similar function. */
- assert (ds->proc_source != NULL);
- assert (case_source_is_class (ds->proc_source, &storage_source_class));
- assert (trns_chain_is_empty (ds->permanent_trns_chain));
- assert (!proc_in_temporary_transformations (ds));
+ return reader == NULL || !casereader_error (reader);
+}
- casefile = storage_source_decapsulate (ds->proc_source);
- proc_set_source (ds, NULL);
+/* Returns true if an active file data source is available, false
+ otherwise. */
+bool
+proc_has_active_file (const struct dataset *ds)
+{
+ return ds->source != NULL;
+}
- return casefile;
+/* Checks whether DS has a corrupted active file. If so,
+ discards it and returns false. If not, returns true without
+ doing anything. */
+bool
+dataset_end_of_command (struct dataset *ds)
+{
+ if (ds->source != NULL)
+ {
+ if (casereader_error (ds->source))
+ {
+ proc_discard_active_file (ds);
+ return false;
+ }
+ else
+ {
+ const struct taint *taint = casereader_get_taint (ds->source);
+ taint_reset_successor_taint ((struct taint *) taint);
+ assert (!taint_has_tainted_successor (taint));
+ }
+ }
+ return true;
}
\f
static trns_proc_func case_limit_trns_proc;
return ds->dict;
}
-
-/* Set or replace dataset DS's dictionary with DICT.
- The old dictionary is destroyed */
-void
-dataset_set_dict (struct dataset *ds, struct dictionary *dict)
-{
- struct dictionary *old_dict = ds->dict;
-
- dict_copy_callbacks (dict, ds->dict);
- ds->dict = dict;
-
- if ( ds->replace_dict )
- ds->replace_dict (dict);
-
- dict_destroy (old_dict);
-}
-
void
dataset_need_lag (struct dataset *ds, int n_before)
{
ds->n_lag = MAX (ds->n_lag, n_before);
}
-
-struct casefile_factory *
-dataset_get_casefile_factory (const struct dataset *ds)
-{
- return ds->cf_factory;
-}
-
#include <stdbool.h>
#include <data/transformations.h>
-#include <data/casefile-factory.h>
#include <libpspp/compiler.h>
-struct ccase;
-struct casefile;
-struct case_sink;
-struct case_source;
-
+struct casereader;
struct dataset;
-
+struct dictionary;
\f
/* Transformations. */
trns_free_func *, void *);
size_t next_transformation (const struct dataset *ds);
-void discard_variables (struct dataset *ds);
-
-
-
bool proc_cancel_all_transformations (struct dataset *ds);
struct trns_chain *proc_capture_transformations (struct dataset *ds);
/* Procedures. */
struct dictionary ;
-typedef void replace_source_callback (struct case_source *);
+typedef void replace_source_callback (struct casereader *);
typedef void replace_dictionary_callback (struct dictionary *);
-struct dataset * create_dataset (struct casefile_factory *fact,
- replace_source_callback *,
- replace_dictionary_callback *
- );
+struct dataset * create_dataset (replace_source_callback *,
+ replace_dictionary_callback *);
void destroy_dataset (struct dataset *);
-struct casefile_factory *dataset_get_casefile_factory (const struct dataset *);
-
-void proc_set_source (struct dataset *ds, struct case_source *);
-bool proc_has_source (const struct dataset *ds);
-
-void proc_set_sink (struct dataset *ds, struct case_sink *);
-struct casefile *proc_capture_output (struct dataset *ds);
-
-typedef bool casefile_func (const struct casefile *, void *);
-typedef bool case_func (const struct ccase *, void *, const struct dataset *);
-typedef void begin_func (const struct ccase *, void *, const struct dataset*);
+void proc_discard_active_file (struct dataset *);
+void proc_set_active_file (struct dataset *,
+ struct casereader *, struct dictionary *);
+bool proc_set_active_file_data (struct dataset *, struct casereader *);
+bool proc_has_active_file (const struct dataset *ds);
-typedef bool end_func (void *, const struct dataset *);
-
-typedef bool split_func (const struct ccase *, const struct casefile *,
- void *, const struct dataset *);
-
-
-
-bool procedure (struct dataset *ds, case_func *, void *aux) WARN_UNUSED_RESULT;
-
-bool procedure_with_splits (struct dataset *ds,
- begin_func *,
- case_func *,
- end_func *,
- void *aux)
- WARN_UNUSED_RESULT;
-bool multipass_procedure (struct dataset *ds, casefile_func *, void *aux)
- WARN_UNUSED_RESULT;
-bool multipass_procedure_with_splits (struct dataset *ds,
- split_func *,
- void *aux)
- WARN_UNUSED_RESULT;
+void proc_discard_output (struct dataset *ds);
+bool proc_execute (struct dataset *ds);
time_t time_of_last_procedure (struct dataset *ds);
-void proc_open (struct dataset *);
-bool proc_read (struct dataset *, struct ccase **);
-bool proc_close (struct dataset *);
+struct casereader *proc_open (struct dataset *);
+bool proc_is_open (const struct dataset *);
+bool proc_commit (struct dataset *);
+
+bool dataset_end_of_command (struct dataset *);
\f
+struct dictionary *dataset_dict (const struct dataset *ds);
struct ccase *lagged_case (const struct dataset *ds, int n_before);
-
-inline struct dictionary *dataset_dict (const struct dataset *ds);
-inline void dataset_set_dict ( struct dataset *ds, struct dictionary *dict);
-
void dataset_need_lag (struct dataset *ds, int n_before);
#endif /* procedure.h */
#include <config.h>
#include <stdlib.h>
-#include "scratch-handle.h"
-#include "casefile.h"
-#include "dictionary.h"
+#include <data/casereader.h>
+#include <data/scratch-handle.h>
+#include <data/dictionary.h>
/* Destroys HANDLE. */
void
if (handle != NULL)
{
dict_destroy (handle->dictionary);
- casefile_destroy (handle->casefile);
+ casereader_destroy (handle->casereader);
free (handle);
}
}
struct scratch_handle
{
struct dictionary *dictionary; /* Dictionary. */
- struct casefile *casefile; /* Cases. */
+ struct casereader *casereader; /* Cases. */
};
void scratch_handle_destroy (struct scratch_handle *);
#include <stdlib.h>
-#include "casefile.h"
#include "dictionary.h"
#include "file-handle-def.h"
#include "scratch-handle.h"
#include <data/case.h>
+#include <data/casereader.h>
#include <libpspp/message.h>
#include "xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
-/* A reader for a scratch file. */
-struct scratch_reader
- {
- struct file_handle *fh; /* Underlying file handle. */
- struct casereader *casereader; /* Case reader. */
- };
-
/* Opens FH, which must have referent type FH_REF_SCRATCH, and
returns a scratch_reader for it, or a null pointer on
failure. Stores the dictionary for the scratch file into
- *DICT.
-
- If you use an any_reader instead, then your code can be more
- flexible without being any harder to write. */
-struct scratch_reader *
+ *DICT. */
+struct casereader *
scratch_reader_open (struct file_handle *fh, struct dictionary **dict)
{
struct scratch_handle *sh;
- struct scratch_reader *reader;
if (!fh_open (fh, FH_REF_SCRATCH, "scratch file", "rs"))
return NULL;
sh = fh_get_scratch_handle (fh);
- if (sh == NULL)
+ if (sh == NULL || sh->casereader == NULL)
{
msg (SE, _("Scratch file handle %s has not yet been written, "
"using SAVE or another procedure, so it cannot yet "
}
*dict = dict_clone (sh->dictionary);
- reader = xmalloc (sizeof *reader);
- reader->fh = fh;
- reader->casereader = casefile_get_reader (sh->casefile, NULL);
- return reader;
-}
-
-/* Reads a case from READER and copies it into C.
- Returns true if successful, false on error or at end of file. */
-bool
-scratch_reader_read_case (struct scratch_reader *reader, struct ccase *c)
-{
- struct ccase tmp;
- if (casereader_read (reader->casereader, &tmp))
- {
- case_copy (c, 0, &tmp, 0,
- casefile_get_value_cnt (
- casereader_get_casefile (reader->casereader)));
- case_destroy (&tmp);
- return true;
- }
- else
- return false;
-}
-
-/* Returns true if an I/O error occurred on READER, false otherwise. */
-bool
-scratch_reader_error (const struct scratch_reader *reader)
-{
- return casefile_error (casereader_get_casefile (reader->casereader));
-}
-
-/* Closes READER. */
-void
-scratch_reader_close (struct scratch_reader *reader)
-{
- fh_close (reader->fh, "scratch file", "rs");
- casereader_destroy (reader->casereader);
- free (reader);
+ return casereader_clone (sh->casereader);
}
struct dictionary;
struct file_handle;
struct ccase;
-struct scratch_reader *scratch_reader_open (struct file_handle *,
- struct dictionary **);
-bool scratch_reader_read_case (struct scratch_reader *, struct ccase *);
-bool scratch_reader_error (const struct scratch_reader *);
-void scratch_reader_close (struct scratch_reader *);
+struct casereader *scratch_reader_open (struct file_handle *,
+ struct dictionary **);
#endif /* scratch-reader.h */
02110-1301, USA. */
#include <config.h>
+
#include "scratch-writer.h"
+
#include <stdlib.h>
-#include "case.h"
-#include "casefile.h"
-#include "fastfile.h"
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "scratch-handle.h"
+
+#include <data/case.h>
+#include <data/casereader.h>
+#include <data/casewriter-provider.h>
+#include <data/casewriter.h>
+#include <data/dictionary.h>
+#include <data/file-handle-def.h>
+#include <data/scratch-handle.h>
+#include <libpspp/compiler.h>
+#include <libpspp/taint.h>
+
#include "xalloc.h"
/* A scratch file writer. */
struct scratch_handle *handle; /* Underlying scratch handle. */
struct file_handle *fh; /* Underlying file handle. */
struct dict_compactor *compactor; /* Compacts into handle->dictionary. */
+ struct casewriter *subwriter; /* Data output. */
};
+static struct casewriter_class scratch_writer_casewriter_class;
+
/* Opens FH, which must have referent type FH_REF_SCRATCH, and
returns a scratch_writer for it, or a null pointer on
failure. Cases stored in the scratch_writer will be expected
- to be drawn from DICTIONARY.
-
- If you use an any_writer instead, then your code can be more
- flexible without being any harder to write. */
-struct scratch_writer *
+ to be drawn from DICTIONARY. */
+struct casewriter *
scratch_writer_open (struct file_handle *fh,
const struct dictionary *dictionary)
{
struct scratch_writer *writer;
struct dictionary *scratch_dict;
struct dict_compactor *compactor;
+ struct casewriter *casewriter;
if (!fh_open (fh, FH_REF_SCRATCH, "scratch file", "we"))
return NULL;
/* Create new contents. */
sh = xmalloc (sizeof *sh);
sh->dictionary = scratch_dict;
- sh->casefile = fastfile_create (dict_get_next_value_idx (sh->dictionary));
+ sh->casereader = NULL;
/* Create writer. */
writer = xmalloc (sizeof *writer);
writer->handle = sh;
writer->fh = fh;
writer->compactor = compactor;
+ writer->subwriter = autopaging_writer_create (dict_get_next_value_idx (
+ scratch_dict));
fh_set_scratch_handle (fh, sh);
- return writer;
+ casewriter = casewriter_create (&scratch_writer_casewriter_class, writer);
+ taint_propagate (casewriter_get_taint (writer->subwriter),
+ casewriter_get_taint (casewriter));
+ return casewriter;
}
/* Writes case C to WRITER. */
-bool
-scratch_writer_write_case (struct scratch_writer *writer,
- const struct ccase *c)
+static void
+scratch_writer_casewriter_write (struct casewriter *w UNUSED, void *writer_,
+ struct ccase *c)
{
+ struct scratch_writer *writer = writer_;
struct scratch_handle *handle = writer->handle;
+ struct ccase tmp;
if (writer->compactor)
{
- struct ccase tmp_case;
- case_create (&tmp_case, dict_get_next_value_idx (handle->dictionary));
- dict_compactor_compact (writer->compactor, &tmp_case, c);
- return casefile_append_xfer (handle->casefile, &tmp_case);
+ case_create (&tmp, dict_get_next_value_idx (handle->dictionary));
+ dict_compactor_compact (writer->compactor, &tmp, c);
+ case_destroy (c);
}
- else
- return casefile_append (handle->casefile, c);
-}
-
-/* Returns true if an I/O error occurred on WRITER, false otherwise. */
-bool
-scratch_writer_error (const struct scratch_writer *writer)
-{
- return casefile_error (writer->handle->casefile);
+ else
+ case_move (&tmp, c);
+ casewriter_write (writer->subwriter, &tmp);
}
-/* Closes WRITER.
- Returns true if successful, false if an I/O error occurred on WRITER. */
-bool
-scratch_writer_close (struct scratch_writer *writer)
+/* Closes WRITER. */
+static void
+scratch_writer_casewriter_destroy (struct casewriter *w UNUSED, void *writer_)
{
- struct casefile *cf = writer->handle->casefile;
- bool ok = casefile_error (cf);
+ struct scratch_writer *writer = writer_;
+ struct casereader *reader = casewriter_make_reader (writer->subwriter);
+ if (!casereader_error (reader))
+ writer->handle->casereader = reader;
fh_close (writer->fh, "scratch file", "we");
free (writer);
- return ok;
}
+
+static struct casewriter_class scratch_writer_casewriter_class =
+ {
+ scratch_writer_casewriter_write,
+ scratch_writer_casewriter_destroy,
+ NULL,
+ };
struct dictionary;
struct file_handle;
struct ccase;
-struct scratch_writer *scratch_writer_open (struct file_handle *,
- const struct dictionary *);
-bool scratch_writer_write_case (struct scratch_writer *, const struct ccase *);
-bool scratch_writer_error (const struct scratch_writer *);
-bool scratch_writer_close (struct scratch_writer *);
+struct casewriter *scratch_writer_open (struct file_handle *,
+ const struct dictionary *);
#endif /* scratch-writer.h */
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#include <config.h>
-
-#include <data/storage-stream.h>
-
-#include <assert.h>
-#include <stdlib.h>
-
-#include <data/case-sink.h>
-#include <data/case-source.h>
-#include <data/case.h>
-#include <data/casefile.h>
-#include <data/casefile-factory.h>
-
-#include "xalloc.h"
-
-/* Storage sink. */
-
-/* Information about storage sink. */
-struct storage_sink_info
- {
- struct casefile *casefile; /* Storage. */
- };
-
-static struct storage_sink_info *
-get_storage_sink_info (struct case_sink *sink)
-{
- assert (sink->class == &storage_sink_class);
- return sink->aux;
-}
-
-/* Initializes a storage sink. */
-static void
-storage_sink_open (struct case_sink *sink)
-{
- struct storage_sink_info *info;
-
- sink->aux = info = xmalloc (sizeof *info);
- info->casefile = sink->factory->create_casefile (sink->factory,
- sink->value_cnt);
-}
-
-/* Writes case C to the storage sink SINK.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-storage_sink_write (struct case_sink *sink, const struct ccase *c)
-{
- struct storage_sink_info *info = get_storage_sink_info (sink);
- return casefile_append (info->casefile, c);
-}
-
-/* Destroys internal data in SINK. */
-static void
-storage_sink_destroy (struct case_sink *sink)
-{
- struct storage_sink_info *info = get_storage_sink_info (sink);
- casefile_destroy (info->casefile);
- free (info);
-}
-
-/* Closes the sink and returns a storage source to read back the
- written data. */
-static struct case_source *
-storage_sink_make_source (struct case_sink *sink)
-{
- struct storage_sink_info *info = get_storage_sink_info (sink);
- struct case_source *source = storage_source_create (info->casefile);
- info->casefile = NULL;
- return source;
-}
-
-/* Storage sink. */
-const struct case_sink_class storage_sink_class =
- {
- "storage",
- storage_sink_open,
- storage_sink_write,
- storage_sink_destroy,
- storage_sink_make_source,
- };
-\f
-/* Storage source. */
-
-struct storage_source_info
- {
- struct casefile *casefile; /* Storage. */
- struct casereader *reader; /* Reader. */
- };
-
-static struct storage_source_info *
-get_storage_source_info (const struct case_source *source)
-{
- assert (source->class == &storage_source_class);
- return source->aux;
-}
-
-/* Returns the number of cases that will be read by
- storage_source_read(). */
-static int
-storage_source_count (const struct case_source *source)
-{
- struct storage_source_info *info = get_storage_source_info (source);
- return casefile_get_case_cnt (info->casefile);
-}
-
-/* Reads one case into OUTPUT_CASE.
- Returns true if successful, false at end of file or if an
- I/O error occurred. */
-static bool
-storage_source_read (struct case_source *source, struct ccase *output_case)
-{
- struct storage_source_info *info = get_storage_source_info (source);
- struct ccase casefile_case;
-
- if (info->reader == NULL)
- info->reader = casefile_get_reader (info->casefile, NULL);
-
- if (casereader_read (info->reader, &casefile_case))
- {
- case_copy (output_case, 0,
- &casefile_case, 0,
- casefile_get_value_cnt (info->casefile));
- return true;
- }
- else
- return false;
-}
-
-/* Destroys the source.
- Returns true if successful read, false if an I/O occurred
- during destruction or previously. */
-static bool
-storage_source_destroy (struct case_source *source)
-{
- struct storage_source_info *info = get_storage_source_info (source);
- bool ok = true;
- if (info->casefile)
- {
- ok = !casefile_error (info->casefile);
- casefile_destroy (info->casefile);
- }
- free (info);
- return ok;
-}
-
-/* Returns the casefile encapsulated by SOURCE. */
-struct casefile *
-storage_source_get_casefile (struct case_source *source)
-{
- struct storage_source_info *info = get_storage_source_info (source);
- return info->casefile;
-}
-
-/* Destroys SOURCE and returns the casefile that it
- encapsulated. */
-struct casefile *
-storage_source_decapsulate (struct case_source *source)
-{
- struct storage_source_info *info = get_storage_source_info (source);
- struct casefile *casefile = info->casefile;
- assert (info->reader == NULL);
- info->casefile = NULL;
- free_case_source (source);
- return casefile;
-}
-
-/* Creates and returns a new storage source that encapsulates
- CASEFILE. */
-struct case_source *
-storage_source_create (struct casefile *casefile)
-{
- struct storage_source_info *info;
-
- info = xmalloc (sizeof *info);
- info->casefile = casefile;
- info->reader = NULL;
-
- return create_case_source (&storage_source_class, info);
-}
-
-/* Storage source. */
-const struct case_source_class storage_source_class =
- {
- "storage",
- storage_source_count,
- storage_source_read,
- storage_source_destroy,
- };
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#ifndef STORAGE_STREAM_H
-#define STORAGE_STREAM_H 1
-
-struct case_source;
-struct casefile;
-
-extern const struct case_sink_class storage_sink_class;
-extern const struct case_source_class storage_source_class;
-
-struct casefile *storage_source_get_casefile (struct case_source *);
-struct casefile *storage_source_decapsulate (struct case_source *);
-struct case_source *storage_source_create (struct casefile *);
-
-#endif /* storage-stream.h */
#include <config.h>
-#include "sys-file-reader.h"
-#include "sys-file-private.h"
+#include <data/sys-file-reader.h>
+#include <data/sys-file-private.h>
#include <errno.h>
#include <float.h>
#include <libpspp/hash.h>
#include <libpspp/array.h>
-#include "case.h"
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "file-name.h"
-#include "format.h"
-#include "missing-values.h"
-#include "value-labels.h"
-#include "variable.h"
-#include "value.h"
+#include <data/case.h>
+#include <data/casereader-provider.h>
+#include <data/casereader.h>
+#include <data/dictionary.h>
+#include <data/file-handle-def.h>
+#include <data/file-name.h>
+#include <data/format.h>
+#include <data/missing-values.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
+#include <data/value.h>
#include "c-ctype.h"
#include "inttostr.h"
struct file_handle *fh; /* File handle. */
FILE *file; /* File stream. */
bool error; /* I/O or corruption error? */
+ size_t value_cnt; /* Number of "union value"s in struct case. */
/* File format. */
enum integer_format integer_format; /* On-disk integer format. */
enum float_format float_format; /* On-disk floating point format. */
- int value_cnt; /* Number of 8-byte units per case. */
+ int flt64_cnt; /* Number of 8-byte units per case. */
struct sfm_var *vars; /* Variables. */
size_t var_cnt; /* Number of variables. */
bool has_long_var_names; /* File has a long variable name map */
int case_index; /* Index into case. */
};
+static struct casereader_class sys_file_casereader_class;
+
+static bool close_reader (struct sfm_reader *);
+
static struct variable **make_var_by_value_idx (struct sfm_reader *,
struct dictionary *);
static struct variable *lookup_var_by_value_idx (struct sfm_reader *,
struct variable_to_value_map *,
struct variable **var, char **value,
int *warning_cnt);
+
+static bool close_reader (struct sfm_reader *r);
\f
/* Dictionary reader. */
};
static void read_header (struct sfm_reader *, struct dictionary *,
- int *weight_idx, int *claimed_value_cnt,
+ int *weight_idx, int *claimed_flt64_cnt,
struct sfm_read_info *);
static void read_variable_record (struct sfm_reader *, struct dictionary *,
int *format_warning_cnt);
reading. Reads the system file's dictionary into *DICT.
If INFO is non-null, then it receives additional info about the
system file. */
-struct sfm_reader *
+struct casereader *
sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
struct sfm_read_info *info)
{
struct variable **var_by_value_idx;
int format_warning_cnt = 0;
int weight_idx;
- int claimed_value_cnt;
+ int claimed_flt64_cnt;
int rec_type;
size_t i;
r->fh = fh;
r->file = fn_open (fh_get_file_name (fh), "rb");
r->error = false;
- r->value_cnt = 0;
+ r->flt64_cnt = 0;
r->has_vls = false;
r->has_long_var_names = false;
r->opcode_idx = sizeof r->opcodes;
if (setjmp (r->bail_out))
{
- sfm_close_reader (r);
+ close_reader (r);
dict_destroy (*dict);
*dict = NULL;
return NULL;
}
/* Read header. */
- read_header (r, *dict, &weight_idx, &claimed_value_cnt, info);
+ read_header (r, *dict, &weight_idx, &claimed_flt64_cnt, info);
/* Read all the variable definition records. */
rec_type = read_int32 (r);
/* Read record 999 data, which is just filler. */
read_int32 (r);
- if (claimed_value_cnt != -1 && claimed_value_cnt != r->value_cnt)
+ if (claimed_flt64_cnt != -1 && claimed_flt64_cnt != r->flt64_cnt)
sys_warn (r, _("File header claims %d variable positions but "
"%d were read from file."),
- claimed_value_cnt, r->value_cnt);
+ claimed_flt64_cnt, r->flt64_cnt);
/* Create an index of dictionary variable widths for
sfm_read_case to use. We cannot use the `struct variable's
}
pool_free (r->pool, var_by_value_idx);
- return r;
+ r->value_cnt = dict_get_next_value_idx (*dict);
+ return casereader_create_sequential (NULL, r->value_cnt, CASENUMBER_MAX,
+ &sys_file_casereader_class, r);
}
-/* Closes a system file after we're done with it. */
-void
-sfm_close_reader (struct sfm_reader *r)
+/* Closes a system file after we're done with it.
+ Returns true if an I/O error has occurred on READER, false
+ otherwise. */
+static bool
+close_reader (struct sfm_reader *r)
{
+ bool error;
+
if (r == NULL)
- return;
+ return true;
if (r->file)
{
- if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
- msg (ME, _("Error closing system file \"%s\": %s."),
- fh_get_file_name (r->fh), strerror (errno));
+ if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
+ {
+ msg (ME, _("Error closing system file \"%s\": %s."),
+ fh_get_file_name (r->fh), strerror (errno));
+ r->error = true;
+ }
r->file = NULL;
}
if (r->fh != NULL)
fh_close (r->fh, "system file", "rs");
+ error = r->error;
pool_destroy (r->pool);
+
+ return !error;
}
-/* Returns true if an I/O error has occurred on READER, false
- otherwise. */
-bool
-sfm_read_error (const struct sfm_reader *reader)
+/* Destroys READER. */
+static void
+sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
{
- return reader->error;
+ struct sfm_reader *r = r_;
+ close_reader (r);
}
/* Returns true if FILE is an SPSS system file,
Sets DICT's file label to the system file's label.
Sets *WEIGHT_IDX to 0 if the system file is unweighted,
or to the value index of the weight variable otherwise.
- Sets *CLAIMED_VALUE_CNT to the number of values that the file
+ Sets *CLAIMED_FLT64_CNT to the number of values that the file
claims to have (although it is not always correct).
If INFO is non-null, initializes *INFO with header
information. */
static void
read_header (struct sfm_reader *r, struct dictionary *dict,
- int *weight_idx, int *claimed_value_cnt,
+ int *weight_idx, int *claimed_flt64_cnt,
struct sfm_read_info *info)
{
char rec_type[5];
&& r->integer_format != INTEGER_LSB_FIRST))
sys_error (r, _("This is not an SPSS system file."));
- *claimed_value_cnt = read_int32 (r);
- if (*claimed_value_cnt < 0 || *claimed_value_cnt > INT_MAX / 16)
- *claimed_value_cnt = -1;
+ *claimed_flt64_cnt = read_int32 (r);
+ if (*claimed_flt64_cnt < 0 || *claimed_flt64_cnt > INT_MAX / 16)
+ *claimed_flt64_cnt = -1;
r->compressed = read_int32 (r) != 0;
/* Account for values.
Skip long string continuation records, if any. */
nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
- r->value_cnt += nv;
+ r->flt64_cnt += nv;
if (width > 8)
{
int i;
static bool read_compressed_string (struct sfm_reader *, char *);
static bool read_whole_strings (struct sfm_reader *, char *, size_t);
-/* Reads one case from READER's file into C. Returns nonzero
- only if successful. */
-int
-sfm_read_case (struct sfm_reader *r, struct ccase *c)
+/* Reads one case from READER's file into C. Returns true only
+ if successful. */
+static bool
+sys_file_casereader_read (struct casereader *reader, void *r_,
+ struct ccase *c)
{
+ struct sfm_reader *r = r_;
if (r->error)
- return 0;
+ return false;
- if (setjmp (r->bail_out))
- return 0;
+ case_create (c, r->value_cnt);
+ if (setjmp (r->bail_out))
+ {
+ casereader_force_error (reader);
+ case_destroy (c);
+ return false;
+ }
if (!r->compressed && sizeof (double) == 8 && !r->has_vls)
{
/* Fast path. Read the whole case directly. */
if (!try_read_bytes (r, case_data_all_rw (c),
- sizeof (union value) * r->value_cnt))
- return 0;
+ sizeof (union value) * r->flt64_cnt))
+ {
+ case_destroy (c);
+ return false;
+ }
/* Convert floating point numbers to native format if needed. */
if (r->float_format != FLOAT_NATIVE_DOUBLE)
{
int i;
-
+
for (i = 0; i < r->var_cnt; i++)
if (r->vars[i].width == 0)
{
float_convert (r->float_format, d, FLOAT_NATIVE_DOUBLE, d);
}
}
- return 1;
+ return true;
}
else
{
}
}
}
- return 1;
+ return true;
eof:
+ case_destroy (c);
if (i != 0)
partial_record (r);
- return 0;
+ return false;
}
}
int i;
var_by_value_idx = pool_nmalloc (r->pool,
- r->value_cnt, sizeof *var_by_value_idx);
+ r->flt64_cnt, sizeof *var_by_value_idx);
for (i = 0; i < dict_get_var_cnt (dict); i++)
{
struct variable *v = dict_get_var (dict, i);
for (j = 1; j < nv; j++)
var_by_value_idx[value_idx++] = NULL;
}
- assert (value_idx == r->value_cnt);
+ assert (value_idx == r->flt64_cnt);
return var_by_value_idx;
}
{
struct variable *var;
- if (value_idx < 1 || value_idx > r->value_cnt)
+ if (value_idx < 1 || value_idx > r->flt64_cnt)
sys_error (r, _("Variable index %d not in valid range 1...%d."),
- value_idx, r->value_cnt);
+ value_idx, r->flt64_cnt);
var = var_by_value_idx[value_idx - 1];
if (var == NULL)
float_convert (r->float_format, flt64, FLOAT_NATIVE_DOUBLE, &x);
return x;
}
-
+\f
+static struct casereader_class sys_file_casereader_class =
+ {
+ sys_file_casereader_read,
+ sys_file_casereader_destroy,
+ NULL,
+ NULL,
+ };
struct dictionary;
struct file_handle;
struct ccase;
-struct sfm_reader *sfm_open_reader (struct file_handle *,
+struct casereader *sfm_open_reader (struct file_handle *,
struct dictionary **,
struct sfm_read_info *);
-int sfm_read_case (struct sfm_reader *, struct ccase *);
-bool sfm_read_error (const struct sfm_reader *);
-void sfm_close_reader (struct sfm_reader *);
bool sfm_detect (FILE *);
#endif /* sys-file-reader.h */
#include <libpspp/str.h>
#include <libpspp/version.h>
-#include "case.h"
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "format.h"
-#include "missing-values.h"
-#include "settings.h"
-#include "value-labels.h"
-#include "variable.h"
+#include <data/case.h>
+#include <data/casewriter-provider.h>
+#include <data/casewriter.h>
+#include <data/dictionary.h>
+#include <data/file-handle-def.h>
+#include <data/format.h>
+#include <data/missing-values.h>
+#include <data/settings.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
#include "minmax.h"
size_t flt64_cnt; /* Number of flt64 elements. */
};
+static struct casewriter_class sys_file_casewriter_class;
+
static char *append_string_max (char *, const char *, const char *);
static void write_header (struct sfm_writer *, const struct dictionary *);
static void buf_write (struct sfm_writer *, const void *, size_t);
static void write_documents (struct sfm_writer *, const struct dictionary *);
+bool write_error (const struct sfm_writer *);
+bool close_writer (struct sfm_writer *);
+
static inline int
var_flt64_cnt (const struct variable *v)
{
No reference to D is retained, so it may be modified or
destroyed at will after this function returns. D is not
modified by this function, except to assign short names. */
-struct sfm_writer *
+struct casewriter *
sfm_open_writer (struct file_handle *fh, struct dictionary *d,
struct sfm_write_options opts)
{
w->y = (unsigned char *) w->ptr;
}
- if (sfm_write_error (w))
+ if (write_error (w))
goto error;
- return w;
+ return casewriter_create (&sys_file_casewriter_class, w);
error:
- sfm_close_writer (w);
+ close_writer (w);
return NULL;
open_error:
static void write_compressed_data (struct sfm_writer *w, const flt64 *elem);
-/* Writes case C to system file W.
- Returns 1 if successful, 0 if an I/O error occurred. */
-bool
-sfm_write_case (struct sfm_writer *w, const struct ccase *c)
+/* Writes case C to system file W. */
+static void
+sys_file_casewriter_write (struct casewriter *writer, void *w_,
+ struct ccase *c)
{
- if (ferror (w->file))
- return 0;
+ struct sfm_writer *w = w_;
+ if (ferror (w->file))
+ {
+ casewriter_force_error (writer);
+ case_destroy (c);
+ return;
+ }
w->case_cnt++;
local_free (bounce);
}
-
- return !sfm_write_error (w);
+
+ case_destroy (c);
+}
+
+static void
+sys_file_casewriter_destroy (struct casewriter *writer, void *w_)
+{
+ struct sfm_writer *w = w_;
+ if (!close_writer (w))
+ casewriter_force_error (writer);
}
static void
/* Returns true if an I/O error has occurred on WRITER, false otherwise. */
bool
-sfm_write_error (const struct sfm_writer *writer)
+write_error (const struct sfm_writer *writer)
{
return ferror (writer->file);
}
/* Closes a system file after we're done with it.
Returns true if successful, false if an I/O error occurred. */
bool
-sfm_close_writer (struct sfm_writer *w)
+close_writer (struct sfm_writer *w)
{
bool ok;
}
fflush (w->file);
- ok = !sfm_write_error (w);
+ ok = !write_error (w);
/* Seek back to the beginning and update the number of cases.
This is just a courtesy to later readers, so there's no need
return ok;
}
+\f
+static struct casewriter_class sys_file_casewriter_class =
+ {
+ sys_file_casewriter_write,
+ sys_file_casewriter_destroy,
+ NULL,
+ };
struct file_handle;
struct dictionary;
struct ccase;
-struct sfm_writer *sfm_open_writer (struct file_handle *, struct dictionary *,
+struct casewriter *sfm_open_writer (struct file_handle *, struct dictionary *,
struct sfm_write_options);
struct sfm_write_options sfm_writer_default_options (void);
-bool sfm_write_case (struct sfm_writer *, const struct ccase *);
-bool sfm_write_error (const struct sfm_writer *);
-bool sfm_close_writer (struct sfm_writer *);
-
#endif /* sys-file-writer.h */
2007-06-06 Ben Pfaff <blp@gnu.org>
- * command.def: Add DEBUG DATASHEET command.
+ * command.def: Add DEBUG DATASHEET command. Remove DEBUG CASEFILE
+ command.
2007-03-18 Ben Pfaff <blp@gnu.org>
#include <errno.h>
#include <unistd.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/settings.h>
if (cmd_result_is_failure (result))
lex_discard_rest_of_command (lexer);
+ assert (!proc_is_open (ds));
unset_cmd_algorithm ();
dict_clear_aux (dataset_dict (ds));
+ if (!dataset_end_of_command (ds))
+ result = CMD_CASCADING_FAILURE;
return result;
}
{
const struct dictionary *dict = dataset_dict (ds);
return cmd_parse_in_state (lexer, ds,
- proc_has_source (ds) &&
+ proc_has_active_file (ds) &&
dict_get_var_cnt (dict) > 0 ?
CMD_STATE_DATA : CMD_STATE_INITIAL);
}
{
msg (SE, _("%s may be used only in enhanced syntax mode."),
command->name);
- return CMD_FAILURE;
+ return CMD_FAILURE;
}
else if (!in_correct_state (command, state))
{
int
cmd_execute (struct lexer *lexer, struct dataset *ds)
{
- if (!procedure (ds, NULL, NULL))
+ bool ok = casereader_destroy (proc_open (ds));
+ if (!proc_commit (ds) || !ok)
return CMD_CASCADING_FAILURE;
return lex_end_of_command (lexer);
}
int
cmd_new_file (struct lexer *lexer, struct dataset *ds)
{
- discard_variables (ds);
+ proc_discard_active_file (ds);
return lex_end_of_command (lexer);
}
DEF_CMD (S_INPUT_PROGRAM, 0, "REREAD", cmd_reread)
/* Commands for testing PSPP. */
-DEF_CMD (S_ANY, F_TESTING, "DEBUG CASEFILE", cmd_debug_casefile)
DEF_CMD (S_ANY, F_TESTING, "DEBUG DATASHEET", cmd_debug_datasheet)
DEF_CMD (S_ANY, F_TESTING, "DEBUG EVALUATE", cmd_debug_evaluate)
DEF_CMD (S_ANY, F_TESTING, "DEBUG MOMENTS", cmd_debug_moments)
#include <stdlib.h>
#include "control-stack.h"
+#include <data/case.h>
#include <data/procedure.h>
#include <data/transformations.h>
#include <data/value.h>
+2007-06-06 Ben Pfaff <blp@gnu.org>
+
+ Adapt case sources, sinks, and clients of procedure code to the
+ new infrastructure.
+
+ * data-list.c: Make DATA LIST into a casereader.
+
+ * get.c: Change GET, IMPORT, SAVE, EXPORT to use casereaders,
+ casewriters.
+
+ * inpt-pgm.c: Use caseinit code. Turn INPUT PROGRAM into a
+ casereader.
+
+ * list.q: Adapt to new procedure code.
+
2007-05-06 Ben Pfaff <blp@gnu.org>
Abstract the documents within a dictionary a little better.
#include <stdio.h>
#include <stdlib.h>
-#include <data/case-source.h>
#include <data/case.h>
-#include <data/case-source.h>
#include <data/data-in.h>
+#include <data/casereader.h>
+#include <data/casereader-provider.h>
#include <data/dictionary.h>
#include <data/format.h>
#include <data/procedure.h>
int record_cnt; /* Number of records. */
struct string delims; /* Field delimiters. */
int skip_records; /* Records to skip before first case. */
+ size_t value_cnt; /* Number of `union value's in case. */
};
-static const struct case_source_class data_list_source_class;
+static const struct casereader_class data_list_casereader_class;
static bool parse_fixed (struct lexer *, struct dictionary *dict,
struct pool *tmp_pool, struct data_list_pgm *);
int
cmd_data_list (struct lexer *lexer, struct dataset *ds)
{
- struct dictionary *dict = dataset_dict (ds);
+ struct dictionary *dict;
struct data_list_pgm *dls;
int table = -1; /* Print table if nonzero, -1=undecided. */
struct file_handle *fh = fh_inline_file ();
struct pool *tmp_pool;
bool ok;
- if (!in_input_program ())
- discard_variables (ds);
+ dict = in_input_program () ? dataset_dict (ds) : dict_create ();
dls = pool_create_container (struct data_list_pgm, pool);
ll_init (&dls->specs);
lex_match (lexer, '=');
if (!lex_force_id (lexer))
goto error;
- dls->end = dict_lookup_var (dataset_dict (ds), lex_tokid (lexer));
+ dls->end = dict_lookup_var (dict, lex_tokid (lexer));
if (!dls->end)
- dls->end = dict_create_var_assert (dataset_dict (ds), lex_tokid (lexer), 0);
+ dls->end = dict_create_var_assert (dict, lex_tokid (lexer), 0);
lex_get (lexer);
}
else if (lex_token (lexer) == T_ID)
if (dls->reader == NULL)
goto error;
+ dls->value_cnt = dict_get_next_value_idx (dict);
+
if (in_input_program ())
add_transformation (ds, data_list_trns_proc, data_list_trns_free, dls);
else
- proc_set_source (ds, create_case_source (&data_list_source_class, dls));
+ {
+ struct casereader *reader;
+ reader = casereader_create_sequential (NULL,
+ dict_get_next_value_idx (dict),
+ -1, &data_list_casereader_class,
+ dls);
+ proc_set_active_file (ds, reader, dict);
+ }
pool_destroy (tmp_pool);
Returns true if successful, false at end of file or if an
I/O error occurred. */
static bool
-data_list_source_read (struct case_source *source, struct ccase *c)
+data_list_casereader_read (struct casereader *reader UNUSED, void *dls_,
+ struct ccase *c)
{
- struct data_list_pgm *dls = source->aux;
-
+ struct data_list_pgm *dls = dls_;
+ bool ok;
+
/* Skip the requested number of records before reading the
first case. */
while (dls->skip_records > 0)
dfm_forward_record (dls->reader);
dls->skip_records--;
}
-
- return read_from_data_list (dls, c);
+
+ case_create (c, dls->value_cnt);
+ ok = read_from_data_list (dls, c);
+ if (!ok)
+ case_destroy (c);
+ return ok;
}
-/* Destroys the source.
- Returns true if successful read, false if an I/O occurred
- during destruction or previously. */
-static bool
-data_list_source_destroy (struct case_source *source)
+/* Destroys the casereader. */
+static void
+data_list_casereader_destroy (struct casereader *reader UNUSED, void *dls_)
{
- struct data_list_pgm *dls = source->aux;
- bool ok = !dfm_reader_error (dls->reader);
+ struct data_list_pgm *dls = dls_;
+ if (dfm_reader_error (dls->reader))
+ casereader_force_error (reader);
data_list_trns_free (dls);
- return ok;
}
-static const struct case_source_class data_list_source_class =
+static const struct casereader_class data_list_casereader_class =
{
- "DATA LIST",
+ data_list_casereader_read,
+ data_list_casereader_destroy,
+ NULL,
NULL,
- data_list_source_read,
- data_list_source_destroy,
};
#include <stdio.h>
#include <stdlib.h>
+#include <data/casereader.h>
#include <data/file-handle-def.h>
#include <data/file-name.h>
#include <data/procedure.h>
/* Input procedure reads from inline file. */
prompt_set_style (PROMPT_DATA);
- ok = procedure (ds, NULL, NULL);
-
+ casereader_destroy (proc_open (ds));
+ ok = proc_commit (ds);
dfm_close_reader (r);
return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
#include <data/any-reader.h>
#include <data/any-writer.h>
-#include <data/case-sink.h>
-#include <data/case-source.h>
#include <data/case.h>
-#include <data/casefile.h>
-#include <data/fastfile.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
#include <data/format.h>
#include <data/dictionary.h>
#include <data/por-file-writer.h>
#include <data/procedure.h>
#include <data/settings.h>
-#include <data/storage-stream.h>
#include <data/sys-file-writer.h>
#include <data/transformations.h>
#include <data/value-labels.h>
#include <libpspp/compiler.h>
#include <libpspp/hash.h>
#include <libpspp/message.h>
-#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/str.h>
+#include <libpspp/taint.h>
#include "gettext.h"
#define _(msgid) gettext (msgid)
IMPORT_CMD
};
-/* Case reader input program. */
-struct case_reader_pgm
- {
- struct any_reader *reader; /* File reader. */
- struct case_map *map; /* Map from file dict to active file dict. */
- struct ccase bounce; /* Bounce buffer. */
- };
-
-static const struct case_source_class case_reader_source_class;
-
-static void case_reader_pgm_free (struct case_reader_pgm *);
+static void get_translate_case (const struct ccase *, struct ccase *,
+ void *map_);
+static bool get_destroy_case_map (void *map_);
/* Parses a GET or IMPORT command. */
static int
parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command type)
{
- struct case_reader_pgm *pgm = NULL;
+ struct casereader *reader = NULL;
struct file_handle *fh = NULL;
struct dictionary *dict = NULL;
+ struct case_map *map = NULL;
for (;;)
{
goto error;
}
- discard_variables (ds);
-
- pgm = xmalloc (sizeof *pgm);
- pgm->reader = any_reader_open (fh, &dict);
- pgm->map = NULL;
- case_nullify (&pgm->bounce);
- if (pgm->reader == NULL)
+ reader = any_reader_open (fh, &dict);
+ if (reader == NULL)
goto error;
- case_create (&pgm->bounce, dict_get_next_value_idx (dict));
-
start_case_map (dict);
while (lex_token (lexer) != '.')
goto error;
}
- pgm->map = finish_case_map (dict);
-
- dataset_set_dict (ds, dict);
-
- proc_set_source (ds,
- create_case_source (&case_reader_source_class, pgm));
+ map = finish_case_map (dict);
+ if (map != NULL)
+ reader = casereader_create_translator (reader,
+ dict_get_next_value_idx (dict),
+ get_translate_case,
+ get_destroy_case_map,
+ map);
+
+ proc_set_active_file (ds, reader, dict);
return CMD_SUCCESS;
error:
- case_reader_pgm_free (pgm);
+ casereader_destroy (reader);
if (dict != NULL)
dict_destroy (dict);
return CMD_CASCADING_FAILURE;
}
-/* Frees a struct case_reader_pgm. */
static void
-case_reader_pgm_free (struct case_reader_pgm *pgm)
+get_translate_case (const struct ccase *input, struct ccase *output,
+ void *map_)
{
- if (pgm != NULL)
- {
- any_reader_close (pgm->reader);
- destroy_case_map (pgm->map);
- case_destroy (&pgm->bounce);
- free (pgm);
- }
+ struct case_map *map = map_;
+ map_case (map, input, output);
}
-/* Reads one case into C.
- Returns true if successful, false at end of file or if an
- I/O error occurred. */
static bool
-case_reader_source_read (struct case_source *source, struct ccase *c)
+get_destroy_case_map (void *map_)
{
- struct case_reader_pgm *pgm = source->aux;
- if (any_reader_read (pgm->reader, pgm->map == NULL ? c : &pgm->bounce))
- {
- if (pgm->map != NULL)
- map_case (pgm->map, &pgm->bounce, c);
- return true;
- }
- else
- return false;
-}
-
-/* Destroys the source.
- Returns true if successful read, false if an I/O occurred
- during destruction or previously. */
-static bool
-case_reader_source_destroy (struct case_source *source)
-{
- struct case_reader_pgm *pgm = source->aux;
- bool ok = !any_reader_error (pgm->reader);
- case_reader_pgm_free (pgm);
- return ok;
+ struct case_map *map = map_;
+ destroy_case_map (map);
+ return true;
}
-
-static const struct case_source_class case_reader_source_class =
- {
- "case reader",
- NULL,
- case_reader_source_read,
- case_reader_source_destroy,
- };
\f
/* GET. */
int
PROC_CMD /* Procedure. */
};
-/* File writer plus a case map. */
-struct case_writer
- {
- struct any_writer *writer; /* File writer. */
- struct case_map *map; /* Map to output file dictionary
- (null pointer for identity mapping). */
- struct ccase bounce; /* Bounce buffer for mapping (if needed). */
- };
-
-/* Destroys AW. */
-static bool
-case_writer_destroy (struct case_writer *aw)
-{
- bool ok = true;
- if (aw != NULL)
- {
- ok = any_writer_close (aw->writer);
- destroy_case_map (aw->map);
- case_destroy (&aw->bounce);
- free (aw);
- }
- return ok;
-}
-
/* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
WRITER_TYPE identifies the type of file to write,
and COMMAND_TYPE identifies the type of command.
included.
On failure, returns a null pointer. */
-static struct case_writer *
+static struct casewriter *
parse_write_command (struct lexer *lexer, struct dataset *ds,
enum writer_type writer_type,
enum command_type command_type,
/* Common data. */
struct file_handle *handle; /* Output file. */
struct dictionary *dict; /* Dictionary for output file. */
- struct case_writer *aw; /* Writer. */
+ struct casewriter *writer; /* Writer. */
+ struct case_map *map; /* Map from input data to data for writer. */
/* Common options. */
bool print_map; /* Print map? TODO. */
handle = NULL;
dict = dict_clone (dataset_dict (ds));
- aw = xmalloc (sizeof *aw);
- aw->writer = NULL;
- aw->map = NULL;
- case_nullify (&aw->bounce);
+ writer = NULL;
+ map = NULL;
print_map = false;
print_short_names = false;
sysfile_opts = sfm_writer_default_options ();
}
dict_compact_values (dict);
- aw->map = finish_case_map (dict);
- if (aw->map != NULL)
- case_create (&aw->bounce, dict_get_next_value_idx (dict));
if (fh_get_referent (handle) == FH_REF_FILE)
{
switch (writer_type)
{
case SYSFILE_WRITER:
- aw->writer = any_writer_from_sfm_writer (
- sfm_open_writer (handle, dict, sysfile_opts));
+ writer = sfm_open_writer (handle, dict, sysfile_opts);
break;
case PORFILE_WRITER:
- aw->writer = any_writer_from_pfm_writer (
- pfm_open_writer (handle, dict, porfile_opts));
+ writer = pfm_open_writer (handle, dict, porfile_opts);
break;
}
}
else
- aw->writer = any_writer_open (handle, dict);
- if (aw->writer == NULL)
+ writer = any_writer_open (handle, dict);
+ if (writer == NULL)
goto error;
+
+ map = finish_case_map (dict);
+ if (map != NULL)
+ writer = casewriter_create_translator (writer,
+ get_translate_case,
+ get_destroy_case_map,
+ map);
dict_destroy (dict);
- return aw;
+ return writer;
error:
- case_writer_destroy (aw);
+ casewriter_destroy (writer);
dict_destroy (dict);
+ destroy_case_map (map);
return NULL;
}
-
-/* Writes case C to writer AW. */
-static bool
-case_writer_write_case (struct case_writer *aw, const struct ccase *c)
-{
- if (aw->map != NULL)
- {
- map_case (aw->map, c, &aw->bounce);
- c = &aw->bounce;
- }
- return any_writer_write (aw->writer, c);
-}
\f
/* SAVE and EXPORT. */
{
bool retain_unselected;
struct variable *saved_filter_variable;
- struct case_writer *aw;
- struct ccase *c;
- bool ok = true;
+ struct casewriter *output;
+ bool ok;
- aw = parse_write_command (lexer, ds, writer_type, PROC_CMD, &retain_unselected);
- if (aw == NULL)
+ output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
+ &retain_unselected);
+ if (output == NULL)
return CMD_CASCADING_FAILURE;
saved_filter_variable = dict_get_filter (dataset_dict (ds));
if (retain_unselected)
dict_set_filter (dataset_dict (ds), NULL);
- proc_open (ds);
- while (ok && proc_read (ds, &c))
- ok = case_writer_write_case (aw, c);
- ok = proc_close (ds) && ok;
+ casereader_transfer (proc_open (ds), output);
+ ok = casewriter_destroy (output);
+ ok = proc_commit (ds) && ok;
dict_set_filter (dataset_dict (ds), saved_filter_variable);
- case_writer_destroy (aw);
return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
}
/* Transformation. */
struct output_trns
{
- struct case_writer *aw; /* Writer. */
+ struct casewriter *writer; /* Writer. */
};
static trns_proc_func output_trns_proc;
parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
{
struct output_trns *t = xmalloc (sizeof *t);
- t->aw = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
- if (t->aw == NULL)
+ t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
+ if (t->writer == NULL)
{
free (t);
return CMD_CASCADING_FAILURE;
output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
{
struct output_trns *t = trns_;
- case_writer_write_case (t->aw, c);
+ struct ccase tmp;
+ case_clone (&tmp, c);
+ casewriter_write (t->writer, &tmp);
return TRNS_CONTINUE;
}
output_trns_free (void *trns_)
{
struct output_trns *t = trns_;
- bool ok = true;
-
- if (t != NULL)
- {
- ok = case_writer_destroy (t->aw);
- free (t);
- }
+ bool ok = casewriter_destroy (t->writer);
+ free (t);
return ok;
}
int type; /* One of MTF_*. */
const struct variable **by; /* List of BY variables for this file. */
struct file_handle *handle; /* File handle. */
- struct any_reader *reader; /* File reader. */
+ struct casereader *reader; /* File reader. */
struct dictionary *dict; /* Dictionary from system file. */
+ bool active_file; /* Active file? */
/* IN subcommand. */
char *in_name; /* Variable name. */
struct variable *in_var; /* Variable (in master dictionary). */
- struct ccase input_storage; /* Input record storage. */
- struct ccase *input; /* Input record. */
+ struct ccase input; /* Input record. */
};
/* MATCH FILES procedure. */
char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
struct dictionary *dict; /* Dictionary of output file. */
- struct casefile *output; /* MATCH FILES output. */
+ struct casewriter *output; /* MATCH FILES output. */
struct ccase mtf_case; /* Case used for output. */
unsigned seq_num; /* Have we initialized this variable? */
static bool mtf_free (struct mtf_proc *);
static bool mtf_close_file (struct mtf_file *);
+static bool mtf_close_all_files (struct mtf_proc *);
static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
-static bool mtf_read_records (struct mtf_proc *, struct dataset *);
+static bool mtf_read_records (struct mtf_proc *);
static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
-static bool mtf_processing (struct mtf_proc *, struct dataset *);
+static bool mtf_processing (struct mtf_proc *);
static char *var_type_description (struct variable *);
bool used_active_file = false;
bool saw_table = false;
bool saw_in = false;
+ bool open_active_file = false;
mtf.head = mtf.tail = NULL;
mtf.by_cnt = 0;
file->dict = NULL;
file->in_name = NULL;
file->in_var = NULL;
- case_nullify (&file->input_storage);
- file->input = &file->input_storage;
+ file->active_file = false;
+ case_nullify (&file->input);
/* FILEs go first, then TABLEs. */
if (file->type == MTF_TABLE || first_table == NULL)
}
used_active_file = true;
- if (!proc_has_source (ds))
+ if (!proc_has_active_file (ds))
{
msg (SE, _("Cannot specify the active file since no active "
"file has been defined."));
"Temporary transformations will be made permanent."));
file->dict = dataset_dict (ds);
+ file->active_file = true;
}
else
{
file->reader = any_reader_open (file->handle, &file->dict);
if (file->reader == NULL)
goto error;
-
- case_create (&file->input_storage,
- dict_get_next_value_idx (file->dict));
}
while (lex_match (lexer, '/'))
if (used_active_file)
{
- proc_set_sink (ds, create_case_sink (&null_sink_class,
- dataset_dict (ds),
- dataset_get_casefile_factory (ds),
- NULL));
- proc_open (ds);
+ proc_discard_output (ds);
+ for (iter = mtf.head; iter != NULL; iter = iter->next)
+ if (iter->reader == NULL)
+ iter->reader = proc_open (ds);
+ open_active_file = true;
}
- else
- discard_variables (ds);
dict_compact_values (mtf.dict);
- mtf.output = dataset_get_casefile_factory (ds)->create_casefile
- (dataset_get_casefile_factory (ds),
- dict_get_next_value_idx (mtf.dict));
-
+ mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
- if (!mtf_read_records (&mtf, ds))
- goto error;
+ if (!mtf_read_records (&mtf))
+ goto error;
while (mtf.head && mtf.head->type == MTF_FILE)
- if (!mtf_processing (&mtf, ds))
- goto error;
- if (!proc_close (ds))
+ if (!mtf_processing (&mtf))
+ goto error;
+ if (!mtf_close_all_files (&mtf))
goto error;
+ if (open_active_file)
+ proc_commit (ds);
- discard_variables (ds);
-
- dataset_set_dict (ds, mtf.dict);
+ proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
mtf.dict = NULL;
- proc_set_source (ds, storage_source_create (mtf.output));
mtf.output = NULL;
return mtf_free (&mtf) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
error:
- proc_close (ds);
+ if (open_active_file)
+ proc_commit (ds);
mtf_free (&mtf);
return CMD_CASCADING_FAILURE;
}
-/* Return a string in a static buffer describing V's variable type and
- width. */
+/* Return a string in an allocated buffer describing V's variable
+ type and width. */
static char *
var_type_description (struct variable *v)
{
- static char buf[2][32];
- static int x = 0;
- char *s;
-
- x ^= 1;
- s = buf[x];
-
if (var_is_numeric (v))
- strcpy (s, "numeric");
+ return xstrdup ("numeric");
else
- sprintf (s, "string with width %d", var_get_width (v));
- return s;
+ return xasprintf ("string with width %d", var_get_width (v));
}
/* Closes FILE and frees its associated data.
static bool
mtf_close_file (struct mtf_file *file)
{
- bool ok = file->reader == NULL || !any_reader_error (file->reader);
+ bool ok = casereader_destroy (file->reader);
free (file->by);
- any_reader_close (file->reader);
- if (file->handle != NULL)
+ if (!file->active_file)
dict_destroy (file->dict);
- case_destroy (&file->input_storage);
free (file->in_name);
+ case_destroy (&file->input);
free (file);
return ok;
}
-/* Free all the data for the MATCH FILES procedure.
- Returns true if successful, false if an I/O error
- occurred. */
static bool
-mtf_free (struct mtf_proc *mtf)
+mtf_close_all_files (struct mtf_proc *mtf)
{
struct mtf_file *iter, *next;
bool ok = true;
if (!mtf_close_file (iter))
ok = false;
}
-
- if (mtf->dict)
- dict_destroy (mtf->dict);
+ mtf->head = NULL;
+ return ok;
+}
+
+/* Free all the data for the MATCH FILES procedure.
+ Returns true if successful, false if an I/O error
+ occurred. */
+static bool
+mtf_free (struct mtf_proc *mtf)
+{
+ bool ok;
+
+ ok = mtf_close_all_files (mtf);
+
+ casewriter_destroy (mtf->output);
+ dict_destroy (mtf->dict);
case_destroy (&mtf->mtf_case);
free (mtf->seq_nums);
/* Read a record from every input file.
Returns true if successful, false if an I/O error occurred. */
static bool
-mtf_read_records (struct mtf_proc *mtf, struct dataset *ds)
+mtf_read_records (struct mtf_proc *mtf)
{
struct mtf_file *iter, *next;
bool ok = true;
for (iter = mtf->head; ok && iter != NULL; iter = next)
{
next = iter->next;
- if (iter->handle
- ? !any_reader_read (iter->reader, iter->input)
- : !proc_read (ds, &iter->input))
+ if (!casereader_read (iter->reader, &iter->input))
{
if (!mtf_delete_file_in_place (mtf, &iter))
ok = false;
mtf_compare_BY_values (struct mtf_proc *mtf,
struct mtf_file *a, struct mtf_file *b)
{
- return case_compare_2dict (a->input, b->input, a->by, b->by, mtf->by_cnt);
+ return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
}
/* Perform one iteration of steps 3...7 above.
Returns true if successful, false if an I/O error occurred. */
static bool
-mtf_processing (struct mtf_proc *mtf, struct dataset *ds)
+mtf_processing (struct mtf_proc *mtf)
{
struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
struct mtf_file *iter, *next;
+ struct ccase out_case;
/* 3. Find the FILE input record(s) that have minimum BY
values. Store all the values from these input records into
min_tail = min_tail->next_min = iter;
else /* cmp > 0 */
{
- if (iter->handle
- ? any_reader_read (iter->reader, iter->input)
- : proc_read (ds, &iter->input))
+ case_destroy (&iter->input);
+ if (casereader_read (iter->reader, &iter->input))
continue;
if (!mtf_delete_file_in_place (mtf, &iter))
return false;
if (mv != NULL && mtf->seq_nums[mv_index] != mtf->seq_num)
{
- const struct ccase *record = iter->input;
union value *out = case_data_rw (&mtf->mtf_case, mv);
mtf->seq_nums[mv_index] = mtf->seq_num;
if (var_is_numeric (v))
- out->f = case_num (record, v);
+ out->f = case_num (&iter->input, v);
else
- memcpy (out->s, case_str (record, v), var_get_width (v));
+ memcpy (out->s, case_str (&iter->input, v), var_get_width (v));
}
}
if (iter->in_var != NULL)
}
/* 5. Write the output record. */
- casefile_append (mtf->output, &mtf->mtf_case);
+ case_clone (&out_case, &mtf->mtf_case);
+ casewriter_write (mtf->output, &out_case);
/* 6. Read another record from each input file FILE and TABLE
that we stored values from above. If we come to the end of
for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
{
next = iter->next_min;
- if (iter->reader != NULL
- ? !any_reader_read (iter->reader, iter->input)
- : !proc_read (ds, &iter->input))
+ case_destroy (&iter->input);
+ if (!casereader_read (iter->reader, &iter->input))
if (!mtf_delete_file_in_place (mtf, &iter))
return false;
}
{
size_t dst_idx;
- assert (map != NULL);
- assert (src != NULL);
- assert (dst != NULL);
- assert (src != dst);
-
for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
{
int src_idx = map->map[dst_idx];
#include <float.h>
#include <stdlib.h>
-#include <data/case-source.h>
#include <data/case.h>
-#include <data/case-source.h>
+#include <data/caseinit.h>
+#include <data/casereader-provider.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/transformations.h>
struct trns_chain *trns_chain;
enum trns_result restart;
- bool inited_case; /* Did one-time case initialization? */
size_t case_nr; /* Incremented by END CASE transformation. */
- enum value_init_type *init; /* How to initialize each `union value'. */
- size_t init_cnt; /* Number of elements in inp_init. */
- size_t case_size; /* Size of case in bytes. */
+ struct caseinit *init;
+ size_t value_cnt;
};
static void destroy_input_program (struct input_program_pgm *);
static trns_proc_func end_file_trns_proc;
static trns_free_func reread_trns_free;
-static const struct case_source_class input_program_source_class;
+static const struct casereader_class input_program_casereader_class;
static bool inside_input_program;
cmd_input_program (struct lexer *lexer, struct dataset *ds)
{
struct input_program_pgm *inp;
- size_t i;
bool saw_END_CASE = false;
- discard_variables (ds);
+ proc_discard_active_file (ds);
if (lex_token (lexer) != '.')
return lex_end_of_command (lexer);
if (result == CMD_EOF)
msg (SE, _("Unexpected end-of-file within INPUT PROGRAM."));
inside_input_program = false;
- discard_variables (ds);
+ proc_discard_active_file (ds);
destroy_input_program (inp);
return result;
}
if (dict_get_next_value_idx (dataset_dict (ds)) == 0)
{
msg (SE, _("Input program did not create any variables."));
- discard_variables (ds);
+ proc_discard_active_file (ds);
destroy_input_program (inp);
return CMD_FAILURE;
}
trns_chain_finalize (inp->trns_chain);
inp->restart = TRNS_CONTINUE;
- inp->inited_case = false;
- inp->case_nr = 1;
/* Figure out how to initialize each input case. */
- inp->init_cnt = dict_get_next_value_idx (dataset_dict (ds));
- inp->init = xnmalloc (inp->init_cnt, sizeof *inp->init);
- for (i = 0; i < inp->init_cnt; i++)
- inp->init[i] = -1;
- for (i = 0; i < dict_get_var_cnt (dataset_dict (ds)); i++)
- {
- struct variable *var = dict_get_var (dataset_dict (ds), i);
- size_t value_cnt = var_get_value_cnt (var);
- enum value_init_type value_init;
- size_t j;
-
- value_init = var_is_numeric (var) ? INP_NUMERIC : INP_STRING;
- value_init |= var_get_leave (var) ? INP_INIT_ONCE : INP_REINIT;
-
- for (j = 0; j < value_cnt; j++)
- inp->init[j + var_get_case_index (var)] = value_init;
- }
- for (i = 0; i < inp->init_cnt; i++)
- assert (inp->init[i] != -1);
- inp->case_size = dict_get_case_size (dataset_dict (ds));
-
- proc_set_source (ds,
- create_case_source (&input_program_source_class, inp));
+ inp->init = caseinit_create ();
+ caseinit_mark_for_init (inp->init, dataset_dict (ds));
+ inp->value_cnt = dict_get_next_value_idx (dataset_dict (ds));
+
+ proc_set_active_file_data (
+ ds, casereader_create_sequential (NULL, inp->value_cnt, CASENUMBER_MAX,
+ &input_program_casereader_class, inp));
return CMD_SUCCESS;
}
return CMD_END_INPUT_PROGRAM;
}
-/* Initializes case C. Called before the first case is read. */
-static void
-init_case (const struct input_program_pgm *inp, struct ccase *c)
-{
- size_t i;
-
- for (i = 0; i < inp->init_cnt; i++)
- switch (inp->init[i])
- {
- case INP_NUMERIC | INP_INIT_ONCE:
- case_data_rw_idx (c, i)->f = 0.0;
- break;
- case INP_NUMERIC | INP_REINIT:
- case_data_rw_idx (c, i)->f = SYSMIS;
- break;
- case INP_STRING | INP_INIT_ONCE:
- case INP_STRING | INP_REINIT:
- memset (case_data_rw_idx (c, i)->s, ' ',
- sizeof case_data_rw_idx (c, i)->s);
- break;
- default:
- NOT_REACHED ();
- }
-}
-
-/* Clears case C. Called between reading successive records. */
-static void
-clear_case (const struct input_program_pgm *inp, struct ccase *c)
-{
- size_t i;
-
- for (i = 0; i < inp->init_cnt; i++)
- switch (inp->init[i])
- {
- case INP_NUMERIC | INP_INIT_ONCE:
- break;
- case INP_NUMERIC | INP_REINIT:
- case_data_rw_idx (c, i)->f = SYSMIS;
- break;
- case INP_STRING | INP_INIT_ONCE:
- break;
- case INP_STRING | INP_REINIT:
- memset (case_data_rw_idx (c, i)->s, ' ',
- sizeof case_data_rw_idx (c, i)->s);
- break;
- default:
- NOT_REACHED ();
- }
-}
-
/* Returns true if STATE is valid given the transformations that
are allowed within INPUT PROGRAM. */
static bool
Returns true if successful, false at end of file or if an
I/O error occurred. */
static bool
-input_program_source_read (struct case_source *source, struct ccase *c)
+input_program_casereader_read (struct casereader *reader UNUSED, void *inp_,
+ struct ccase *c)
{
- struct input_program_pgm *inp = source->aux;
+ struct input_program_pgm *inp = inp_;
- if (!inp->inited_case)
- {
- init_case (inp, c);
- inp->inited_case = true;
- }
+ case_create (c, inp->value_cnt);
do
{
assert (is_valid_state (inp->restart));
- if (inp->restart == TRNS_ERROR || inp->restart == TRNS_END_FILE)
- return false;
+ if (inp->restart == TRNS_ERROR || inp->restart == TRNS_END_FILE)
+ {
+ case_destroy (c);
+ return false;
+ }
- clear_case (inp, c);
+ caseinit_init_reinit_vars (inp->init, c);
+ caseinit_init_left_vars (inp->init, c);
inp->restart = trns_chain_execute (inp->trns_chain, inp->restart,
c, &inp->case_nr);
assert (is_valid_state (inp->restart));
+ caseinit_update_left_vars (inp->init, c);
}
while (inp->restart < 0);
if (pgm != NULL)
{
trns_chain_destroy (pgm->trns_chain);
- free (pgm->init);
+ caseinit_destroy (pgm->init);
free (pgm);
}
}
-/* Destroys the source.
- Returns true if successful read, false if an I/O occurred
- during destruction or previously. */
-static bool
-input_program_source_destroy (struct case_source *source)
+/* Destroys the casereader. */
+static void
+input_program_casereader_destroy (struct casereader *reader UNUSED, void *inp_)
{
- struct input_program_pgm *inp = source->aux;
- bool ok = inp->restart != TRNS_ERROR;
+ struct input_program_pgm *inp = inp_;
+ if (inp->restart == TRNS_ERROR)
+ casereader_force_error (reader);
destroy_input_program (inp);
- return ok;
}
-static const struct case_source_class input_program_source_class =
+static const struct casereader_class input_program_casereader_class =
{
- "INPUT PROGRAM",
+ input_program_casereader_read,
+ input_program_casereader_destroy,
+ NULL,
NULL,
- input_program_source_read,
- input_program_source_destroy,
};
\f
int
return lex_end_of_command (lexer);
}
-/* Sends the current case as the source's output. */
+/* Outputs the current case */
int
end_case_trns_proc (void *inp_, struct ccase *c UNUSED,
casenumber case_nr UNUSED)
#include "intprops.h"
#include "size_max.h"
-#include <data/case.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/data-out.h>
#include <data/format.h>
/* Parsed command. */
static struct cmd_list cmd;
-/* Current case number. */
-static int case_idx;
-
/* Line buffer. */
static struct string line_buffer;
static void write_line (struct outp_driver *d, const char *s);
/* Other functions. */
-static bool list_cases (const struct ccase *, void *, const struct dataset *);
+static void list_case (struct ccase *, casenumber case_idx,
+ const struct dataset *);
static void determine_layout (void);
static void clean_up (void);
static void write_header (struct outp_driver *);
-static void write_all_headers (const struct ccase *, void *, const struct dataset*);
+static void write_all_headers (struct casereader *, const struct dataset*);
/* Returns the number of text lines that can fit on the remainder of
the page. */
int
cmd_list (struct lexer *lexer, struct dataset *ds)
{
+ struct dictionary *dict = dataset_dict (ds);
struct variable *casenum_var = NULL;
+ struct casegrouper *grouper;
+ struct casereader *group;
+ casenumber case_idx;
bool ok;
if (!parse_list (lexer, ds, &cmd, NULL))
if (cmd.last == NOT_LONG)
cmd.last = LONG_MAX;
if (!cmd.sbc_variables)
- dict_get_vars (dataset_dict (ds), &cmd.v_variables, &cmd.n_variables,
+ dict_get_vars (dict, &cmd.v_variables, &cmd.n_variables,
(1u << DC_SYSTEM) | (1u << DC_SCRATCH));
if (cmd.n_variables == 0)
{
/* Weighting variable. */
if (cmd.weight == LST_WEIGHT)
{
- if (dict_get_weight (dataset_dict (ds)) != NULL)
+ if (dict_get_weight (dict) != NULL)
{
size_t i;
for (i = 0; i < cmd.n_variables; i++)
- if (cmd.v_variables[i] == dict_get_weight (dataset_dict (ds)))
+ if (cmd.v_variables[i] == dict_get_weight (dict))
break;
if (i >= cmd.n_variables)
{
cmd.v_variables = xnrealloc (cmd.v_variables, cmd.n_variables,
sizeof *cmd.v_variables);
cmd.v_variables[cmd.n_variables - 1]
- = dict_get_weight (dataset_dict (ds));
+ = dict_get_weight (dict);
}
}
else
determine_layout ();
case_idx = 0;
- ok = procedure_with_splits (ds, write_all_headers, list_cases, NULL, NULL);
+ for (grouper = casegrouper_create_splits (proc_open (ds), dict);
+ casegrouper_get_next_group (grouper, &group);
+ casereader_destroy (group))
+ {
+ struct ccase c;
+
+ write_all_headers (group, ds);
+ for (; casereader_read (group, &c); case_destroy (&c))
+ {
+ case_idx++;
+ if (case_idx >= cmd.first && case_idx <= cmd.last
+ && (case_idx - cmd.first) % cmd.step == 0)
+ list_case (&c, case_idx, ds);
+ }
+ }
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
+
ds_destroy(&line_buffer);
clean_up ();
/* Writes headers to all devices. This is done at the beginning of
each SPLIT FILE group. */
static void
-write_all_headers (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
+write_all_headers (struct casereader *input, const struct dataset *ds)
{
struct outp_driver *d;
+ struct ccase c;
+
+ if (!casereader_peek (input, 0, &c))
+ return;
+ output_split_file_values (ds, &c);
+ case_destroy (&c);
- output_split_file_values (ds, c);
for (d = outp_drivers (NULL); d; d = outp_drivers (d))
{
if (!d->class->special)
}
/* Writes case C to output. */
-static bool
-list_cases (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
+static void
+list_case (struct ccase *c, casenumber case_idx, const struct dataset *ds)
{
+ struct dictionary *dict = dataset_dict (ds);
struct outp_driver *d;
- case_idx++;
- if (case_idx < cmd.first || case_idx > cmd.last
- || (cmd.step != 1 && (case_idx - cmd.first) % cmd.step))
- return true;
-
for (d = outp_drivers (NULL); d; d = outp_drivers (d))
if (d->class->special == 0)
{
ds_put_char_multiple(&line_buffer, ' ', width - print->w);
if (fmt_is_string (print->type)
- || dict_contains_var (dataset_dict (ds), v))
+ || dict_contains_var (dict, v))
{
data_out (case_data (c, v), print,
ds_put_uninit (&line_buffer, print->w));
char buf[256];
if (fmt_is_string (print->type)
- || dict_contains_var (dataset_dict (ds), v))
+ || dict_contains_var (dict, v))
data_out (case_data (c, v), print, buf);
else
{
}
else
NOT_REACHED ();
-
- return true;
}
/*
+2007-06-06 Ben Pfaff <blp@gnu.org>
+
+ Adapt case sources, sinks, and clients of procedure code to the
+ new infrastructure.
+
+ * apply-dictionary.c: Now any_reader_open returns a casereader.
+
+ * sys-file-open.c: Now sfm_reader_open returns a casereader.
+
Sat Feb 3 21:52:35 2007 Ben Pfaff <blp@gnu.org>
* vector.c (cmd_vector): Add support for specifying an output
#include <stdlib.h>
#include <data/any-reader.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/file-handle-def.h>
#include <data/missing-values.h>
cmd_apply_dictionary (struct lexer *lexer, struct dataset *ds)
{
struct file_handle *handle;
- struct any_reader *reader;
+ struct casereader *reader;
struct dictionary *dict;
int n_matched = 0;
reader = any_reader_open (handle, &dict);
if (dict == NULL)
return CMD_FAILURE;
- any_reader_close (reader);
+ casereader_destroy (reader);
for (i = 0; i < dict_get_var_cnt (dict); i++)
{
dict_set_weight (dataset_dict (ds), new_weight);
}
- any_reader_close (reader);
-
return lex_end_of_command (lexer);
}
/* PSPP - computes sample statistics.
- Copyright (C) 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2007 Free Software Foundation, Inc.
Written by Ben Pfaff <blp@gnu.org>.
This program is free software; you can redistribute it and/or
#include <stdlib.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <language/command.h>
{
struct variable **vars;
size_t var_cnt;
+ bool ok;
if (proc_make_temporary_transformations_permanent (ds))
msg (SE, _("DELETE VARIABLES may not be used after TEMPORARY. "
"from the active file dictionary. Use NEW FILE instead."));
goto error;
}
-
- if (!procedure (ds, NULL, NULL))
+
+ ok = casereader_destroy (proc_open (ds));
+ ok = proc_commit (ds) && ok;
+ if (!ok)
goto error;
-
dict_delete_vars (dataset_dict (ds), vars, var_cnt);
+
free (vars);
return CMD_SUCCESS;
#include "gettext.h"
#define _(msgid) gettext (msgid)
-/* FIXME: should change weighting variable, etc. */
/* These control the ordering produced by
compare_variables_given_ordering(). */
struct ordering
if (already_encountered & (1 | 4))
{
/* Read the data. */
- if (!procedure (ds,NULL, NULL))
+ if (!proc_execute (ds))
goto done;
}
#include <ctype.h>
#include <stdlib.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/file-handle-def.h>
#include <data/format.h>
struct file_handle *h;
struct dictionary *d;
struct tab_table *t;
- struct sfm_reader *reader;
+ struct casereader *reader;
struct sfm_read_info info;
int r, nr;
int i;
reader = sfm_open_reader (h, &d, &info);
if (!reader)
return CMD_FAILURE;
- sfm_close_reader (reader);
+ casereader_destroy (reader);
t = tab_create (2, 10, 0);
tab_vline (t, TAL_GAP, 1, 0, 8);
if ( ds == NULL )
{
- ds = create_dataset (NULL, NULL, NULL);
+ ds = create_dataset (NULL, NULL);
d = dataset_dict (ds);
}
vs = var_set_create_from_dict (d);
success = parse_var_set_vars (lexer, vs, var, cnt, opts);
- if ( success == 0 )
- {
- free ( *var ) ;
- *var = NULL;
- *cnt = 0;
- }
var_set_destroy (vs);
return success;
}
+2007-06-06 Ben Pfaff <blp@gnu.org>
+
+ Adapt case sources, sinks, and clients of procedure code to the
+ new infrastructure.
+
+ * aggregate.c: Simplify greatly since everything is more uniform
+ now.
+
+ * autorecode.c: Adapt to new procedure code.
+ * binomial.c: Ditto.
+ * chisquare.c: Ditto.
+ * crosstabs.q: Ditto.
+ * descriptives.c: Ditto.
+ * examine.q: Ditto.
+ * npar-summary.c: Ditto.
+ * frequencies.q: Ditto.
+ * npar.q: Ditto.
+ * oneway.q: Ditto.
+ * regression.q: Ditto.
+ * sort-cases.c: Ditto.
+ * t-test.c: Ditto.
+
+ * sort-criteria.c: Rewrite to output a struct case_ordering.
+
+ * flip.c: Rewrite to be a casereader.
+
+ * rank.q: Simplify greatly since casereaders are much more
+ flexible than what we had before.
+
2007-05-15 Jason Stover <jhs@math.gcsu.edu>
* regression.q (run_regression): Tell the user when the data
#include <stdlib.h>
#include <data/any-writer.h>
-#include <data/case-sink.h>
+#include <data/case-ordering.h>
#include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
#include <data/dictionary.h>
#include <data/file-handle-def.h>
#include <data/format.h>
#include <data/procedure.h>
#include <data/settings.h>
-#include <data/storage-stream.h>
#include <data/sys-file-writer.h>
#include <data/variable.h>
#include <language/command.h>
/* An entire AGGREGATE procedure. */
struct agr_proc
{
- /* We have either an output file or a sink. */
- struct any_writer *writer; /* Output file, or null if none. */
- struct case_sink *sink; /* Sink, or null if none. */
-
/* Break variables. */
- struct sort_criteria *sort; /* Sort criteria. */
+ struct case_ordering *sort; /* Sort criteria. */
const struct variable **break_vars; /* Break variables. */
size_t break_var_cnt; /* Number of break variables. */
struct ccase break_case; /* Last values of break variables. */
struct dictionary *dict; /* Aggregate dictionary. */
const struct dictionary *src_dict; /* Dict of the source */
int case_cnt; /* Counts aggregated cases. */
- struct ccase agr_case; /* Aggregate case for output. */
};
static void initialize_aggregate_info (struct agr_proc *,
const struct ccase *);
-
+static void accumulate_aggregate_info (struct agr_proc *,
+ const struct ccase *);
/* Prototypes. */
static bool parse_aggregate_functions (struct lexer *, const struct dictionary *,
struct agr_proc *);
static void agr_destroy (struct agr_proc *);
-static bool aggregate_single_case (struct agr_proc *agr,
- const struct ccase *input,
- struct ccase *output);
-static void dump_aggregate_info (struct agr_proc *agr, struct ccase *output);
+static void dump_aggregate_info (struct agr_proc *agr,
+ struct casewriter *output);
\f
/* Parsing. */
struct dictionary *dict = dataset_dict (ds);
struct agr_proc agr;
struct file_handle *out_file = NULL;
+ struct casereader *input = NULL, *group;
+ struct casegrouper *grouper;
+ struct casewriter *output = NULL;
bool copy_documents = false;
bool presorted = false;
bool saw_direction;
+ bool ok;
memset(&agr, 0 , sizeof (agr));
agr.missing = ITEMWISE;
int i;
lex_match (lexer, '=');
- agr.sort = sort_parse_criteria (lexer, dict,
- &agr.break_vars, &agr.break_var_cnt,
- &saw_direction, NULL);
+ agr.sort = parse_case_ordering (lexer, dict,
+
+ &saw_direction);
if (agr.sort == NULL)
goto error;
+ case_ordering_get_vars (agr.sort,
+ &agr.break_vars, &agr.break_var_cnt);
for (i = 0; i < agr.break_var_cnt; i++)
dict_clone_var_assert (agr.dict, agr.break_vars[i],
/* Initialize. */
agr.case_cnt = 0;
- case_create (&agr.agr_case, dict_get_next_value_idx (agr.dict));
- /* Output to active file or external file? */
if (out_file == NULL)
{
- struct ccase *c;
-
/* The active file will be replaced by the aggregated data,
so TEMPORARY is moot. */
proc_cancel_temporary_transformations (ds);
+ proc_discard_output (ds);
+ output = autopaging_writer_create (dict_get_next_value_idx (agr.dict));
+ }
+ else
+ {
+ output = any_writer_open (out_file, agr.dict);
+ if (output == NULL)
+ goto error;
+ }
- if (agr.sort != NULL && !presorted)
- {
- if (!sort_active_file_in_place (ds, agr.sort))
- goto error;
- }
+ input = proc_open (ds);
+ if (agr.sort != NULL && !presorted)
+ {
+ input = sort_execute (input, agr.sort);
+ agr.sort = NULL;
+ }
- agr.sink = create_case_sink (&storage_sink_class, agr.dict,
- dataset_get_casefile_factory (ds),
- NULL);
- if (agr.sink->class->open != NULL)
- agr.sink->class->open (agr.sink);
- proc_set_sink (ds,
- create_case_sink (&null_sink_class, dict,
- dataset_get_casefile_factory (ds),
- NULL));
- proc_open (ds);
- while (proc_read (ds, &c))
- if (aggregate_single_case (&agr, c, &agr.agr_case))
- if (!agr.sink->class->write (agr.sink, &agr.agr_case))
- {
- proc_close (ds);
- goto error;
- }
- if (!proc_close (ds))
- goto error;
+ for (grouper = casegrouper_create_vars (input, agr.break_vars,
+ agr.break_var_cnt);
+ casegrouper_get_next_group (grouper, &group);
+ casereader_destroy (group))
+ {
+ struct ccase c;
+
+ if (!casereader_peek (group, 0, &c))
+ continue;
+ initialize_aggregate_info (&agr, &c);
+ case_destroy (&c);
+
+ for (; casereader_read (group, &c); case_destroy (&c))
+ accumulate_aggregate_info (&agr, &c);
+ dump_aggregate_info (&agr, output);
+ }
+ if (!casegrouper_destroy (grouper))
+ goto error;
- if (agr.case_cnt > 0)
- {
- dump_aggregate_info (&agr, &agr.agr_case);
- if (!agr.sink->class->write (agr.sink, &agr.agr_case))
- goto error;
- }
- discard_variables (ds);
- dataset_set_dict (ds, agr.dict);
- agr.dict = NULL;
- proc_set_source (ds, agr.sink->class->make_source (agr.sink));
- free_case_sink (agr.sink);
+ if (!proc_commit (ds))
+ {
+ input = NULL;
+ goto error;
}
- else
+ input = NULL;
+
+ if (out_file == NULL)
{
- agr.writer = any_writer_open (out_file, agr.dict);
- if (agr.writer == NULL)
+ struct casereader *next_input = casewriter_make_reader (output);
+ if (next_input == NULL)
goto error;
- if (agr.sort != NULL && !presorted)
- {
- /* Sorting is needed. */
- struct casefile *dst;
- struct casereader *reader;
- struct ccase c;
- bool ok = true;
-
- dst = sort_active_file_to_casefile (ds, agr.sort);
- if (dst == NULL)
- goto error;
- reader = casefile_get_destructive_reader (dst);
- while (ok && casereader_read_xfer (reader, &c))
- {
- if (aggregate_single_case (&agr, &c, &agr.agr_case))
- ok = any_writer_write (agr.writer, &agr.agr_case);
- case_destroy (&c);
- }
- casereader_destroy (reader);
- if (ok)
- ok = !casefile_error (dst);
- casefile_destroy (dst);
- if (!ok)
- goto error;
- }
- else
- {
- /* Active file is already sorted. */
- struct ccase *c;
-
- proc_open (ds);
- while (proc_read (ds, &c))
- if (aggregate_single_case (&agr, c, &agr.agr_case))
- if (!any_writer_write (agr.writer, &agr.agr_case))
- {
- proc_close (ds);
- goto error;
- }
- if (!proc_close (ds))
- goto error;
- }
-
- if (agr.case_cnt > 0)
- {
- dump_aggregate_info (&agr, &agr.agr_case);
- any_writer_write (agr.writer, &agr.agr_case);
- }
- if (any_writer_error (agr.writer))
+ proc_set_active_file (ds, next_input, agr.dict);
+ agr.dict = NULL;
+ }
+ else
+ {
+ ok = casewriter_destroy (output);
+ output = NULL;
+ if (!ok)
goto error;
}
return CMD_SUCCESS;
error:
+ if (input != NULL)
+ proc_commit (ds);
+ casewriter_destroy (output);
agr_destroy (&agr);
return CMD_CASCADING_FAILURE;
}
{
struct agr_var *iter, *next;
- any_writer_close (agr->writer);
- if (agr->sort != NULL)
- sort_destroy_criteria (agr->sort);
+ case_ordering_destroy (agr->sort);
free (agr->break_vars);
case_destroy (&agr->break_case);
for (iter = agr->agr_vars; iter; iter = next)
}
if (agr->dict != NULL)
dict_destroy (agr->dict);
-
- case_destroy (&agr->agr_case);
}
\f
/* Execution. */
-static void accumulate_aggregate_info (struct agr_proc *,
- const struct ccase *);
-static void dump_aggregate_info (struct agr_proc *, struct ccase *);
-
-/* Processes a single case INPUT for aggregation. If output is
- warranted, writes it to OUTPUT and returns true.
- Otherwise, returns false and OUTPUT is unmodified. */
-static bool
-aggregate_single_case (struct agr_proc *agr,
- const struct ccase *input, struct ccase *output)
-{
- bool finished_group = false;
-
- if (agr->case_cnt++ == 0)
- initialize_aggregate_info (agr, input);
- else if (case_compare (&agr->break_case, input,
- agr->break_vars, agr->break_var_cnt))
- {
- dump_aggregate_info (agr, output);
- finished_group = true;
-
- initialize_aggregate_info (agr, input);
- }
-
- accumulate_aggregate_info (agr, input);
- return finished_group;
-}
-
/* Accumulates aggregation data from the case INPUT. */
static void
-accumulate_aggregate_info (struct agr_proc *agr,
- const struct ccase *input)
+accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input)
{
struct agr_var *iter;
double weight;
}
}
-/* We've come to a record that differs from the previous in one or
- more of the break variables. Make an output record from the
- accumulated statistics in the OUTPUT case. */
+/* Writes an aggregated record to OUTPUT. */
static void
-dump_aggregate_info (struct agr_proc *agr, struct ccase *output)
+dump_aggregate_info (struct agr_proc *agr, struct casewriter *output)
{
+ struct ccase c;
+
+ case_create (&c, dict_get_next_value_idx (agr->dict));
+
{
int value_idx = 0;
int i;
{
const struct variable *v = agr->break_vars[i];
size_t value_cnt = var_get_value_cnt (v);
- memcpy (case_data_rw_idx (output, value_idx),
+ memcpy (case_data_rw_idx (&c, value_idx),
case_data (&agr->break_case, v),
sizeof (union value) * value_cnt);
value_idx += value_cnt;
for (i = agr->agr_vars; i; i = i->next)
{
- union value *v = case_data_rw (output, i->dest);
+ union value *v = case_data_rw (&c, i->dest);
if (agr->missing == COLUMNWISE && i->saw_missing
&& (i->function & FUNC) != N && (i->function & FUNC) != NU
}
}
}
+
+ casewriter_write (output, &c);
}
/* Resets the state for all the aggregate functions. */
#include <stdlib.h>
#include <data/case.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/transformations.h>
cmd_autorecode (struct lexer *lexer, struct dataset *ds)
{
struct autorecode_pgm arc;
- struct ccase *c;
+ struct casereader *input;
+ struct ccase c;
size_t dst_cnt;
size_t i;
bool ok;
hash_numeric_value, NULL, NULL);
}
- proc_open (ds);
- while (proc_read (ds, &c))
+ input = proc_open (ds);
+ for (; casereader_read (input, &c); case_destroy (&c))
for (i = 0; i < arc.var_cnt; i++)
{
union arc_value v, *vp, **vpp;
if (var_is_numeric (arc.src_vars[i]))
- v.f = case_num (c, arc.src_vars[i]);
+ v.f = case_num (&c, arc.src_vars[i]);
else
- v.c = (char *) case_str (c, arc.src_vars[i]);
+ v.c = (char *) case_str (&c, arc.src_vars[i]);
vpp = (union arc_value **) hsh_probe (arc.src_values[i], &v);
if (*vpp == NULL)
*vpp = vp;
}
}
- ok = proc_close (ds);
+ ok = casereader_destroy (input);
+ ok = proc_commit (ds) && ok;
for (i = 0; i < arc.var_cnt; i++)
arc.dst_vars[i] = dict_create_var_assert (dataset_dict (ds),
#include <libpspp/alloc.h>
#include <data/case.h>
-#include <data/casefile.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/variable.h>
#include <data/value.h>
#include <data/value-labels.h>
-#include <data/casefilter.h>
#include <libpspp/message.h>
#include <libpspp/assertion.h>
return sig1tailed ;
}
-static void
+static bool
do_binomial (const struct dictionary *dict,
- const struct casefile *cf,
+ struct casereader *input,
const struct binomial_test *bst,
- struct freq *cat1,
- struct freq *cat2,
- const struct casefilter *filter
+ struct freq_mutable *cat1,
+ struct freq_mutable *cat2,
+ enum mv_class exclude
)
{
bool warn = true;
const struct one_sample_test *ost = (const struct one_sample_test *) bst;
struct ccase c;
- struct casereader *r = casefile_get_reader (cf, NULL);
- while (casereader_read(r, &c))
+ while (casereader_read(input, &c))
{
int v;
- double w =
- dict_get_case_weight (dict, &c, &warn);
+ double w = dict_get_case_weight (dict, &c, &warn);
for (v = 0 ; v < ost->n_vars ; ++v )
{
const struct variable *var = ost->vars[v];
const union value *value = case_data (&c, var);
+ int width = var_get_width (var);
- if ( casefilter_variable_missing (filter, &c, var))
+ if (var_is_value_missing (var, value, exclude))
break;
if ( NULL == cat1[v].value )
{
- cat1[v].value = value_dup (value, var_get_width (var));
+ cat1[v].value = value_dup (value, width);
cat1[v].count = w;
}
- else if ( 0 == compare_values (cat1[v].value, value,
- var_get_width (var)))
+ else if ( 0 == compare_values (cat1[v].value, value, width))
cat1[v].count += w;
else if ( NULL == cat2[v].value )
{
- cat2[v].value = value_dup (value, var_get_width (var));
+ cat2[v].value = value_dup (value, width);
cat2[v].count = w;
}
- else if ( 0 == compare_values (cat2[v].value, value,
- var_get_width (var)))
+ else if ( 0 == compare_values (cat2[v].value, value, width))
cat2[v].count += w;
else if ( bst->category1 == SYSMIS)
msg (ME, _("Variable %s is not dichotomous"), var_get_name (var));
case_destroy (&c);
}
- casereader_destroy (r);
+ return casereader_destroy (input);
}
void
binomial_execute (const struct dataset *ds,
- const struct casefile *cf,
- struct casefilter *filter,
+ struct casereader *input,
+ enum mv_class exclude,
const struct npar_test *test)
{
int v;
const struct binomial_test *bst = (const struct binomial_test *) test;
const struct one_sample_test *ost = (const struct one_sample_test*) test;
- struct freq *cat1 = xzalloc (sizeof (*cat1) * ost->n_vars);
- struct freq *cat2 = xzalloc (sizeof (*cat1) * ost->n_vars);
- struct tab_table *table ;
+ struct freq_mutable *cat1 = xzalloc (sizeof (*cat1) * ost->n_vars);
+ struct freq_mutable *cat2 = xzalloc (sizeof (*cat1) * ost->n_vars);
assert ((bst->category1 == SYSMIS) == (bst->category2 == SYSMIS) );
cat2->value = value_dup (&v, 0);
}
- do_binomial (dataset_dict(ds), cf, bst, cat1, cat2, filter);
-
- table = tab_create (7, ost->n_vars * 3 + 1, 0);
-
- tab_dim (table, tab_natural_dimensions);
-
- tab_title (table, _("Binomial Test"));
-
- tab_headers (table, 2, 0, 1, 0);
-
- tab_box (table, TAL_1, TAL_1, -1, TAL_1,
- 0, 0, table->nc - 1, tab_nr(table) - 1 );
-
- for (v = 0 ; v < ost->n_vars; ++v)
+ if (do_binomial (dataset_dict(ds), input, bst, cat1, cat2, exclude))
{
- double n_total, sig;
- const struct variable *var = ost->vars[v];
- tab_hline (table, TAL_1, 0, tab_nc (table) -1, 1 + v * 3);
-
- /* Titles */
- tab_text (table, 0, 1 + v * 3, TAB_LEFT,
- var_to_string (var));
-
- tab_text (table, 1, 1 + v * 3, TAB_LEFT,
- _("Group1"));
-
- tab_text (table, 1, 2 + v * 3, TAB_LEFT,
- _("Group2"));
+ struct tab_table *table = tab_create (7, ost->n_vars * 3 + 1, 0);
- tab_text (table, 1, 3 + v * 3, TAB_LEFT,
- _("Total"));
+ tab_dim (table, tab_natural_dimensions);
- /* Test Prop */
- tab_float (table, 5, 1 + v * 3, TAB_NONE, bst->p, 8, 3);
+ tab_title (table, _("Binomial Test"));
- /* Category labels */
- tab_text (table, 2, 1 + v * 3, TAB_NONE,
- var_get_value_name (var, cat1[v].value));
+ tab_headers (table, 2, 0, 1, 0);
- tab_text (table, 2, 2 + v * 3, TAB_NONE,
- var_get_value_name (var, cat2[v].value));
+ tab_box (table, TAL_1, TAL_1, -1, TAL_1,
+ 0, 0, table->nc - 1, tab_nr(table) - 1 );
- /* Observed N */
- tab_float (table, 3, 1 + v * 3, TAB_NONE,
- cat1[v].count, 8, 0);
+ for (v = 0 ; v < ost->n_vars; ++v)
+ {
+ double n_total, sig;
+ const struct variable *var = ost->vars[v];
+ tab_hline (table, TAL_1, 0, tab_nc (table) -1, 1 + v * 3);
- tab_float (table, 3, 2 + v * 3, TAB_NONE,
- cat2[v].count, 8, 0);
+ /* Titles */
+ tab_text (table, 0, 1 + v * 3, TAB_LEFT, var_to_string (var));
+ tab_text (table, 1, 1 + v * 3, TAB_LEFT, _("Group1"));
+ tab_text (table, 1, 2 + v * 3, TAB_LEFT, _("Group2"));
+ tab_text (table, 1, 3 + v * 3, TAB_LEFT, _("Total"));
- n_total = cat1[v].count + cat2[v].count;
+ /* Test Prop */
+ tab_float (table, 5, 1 + v * 3, TAB_NONE, bst->p, 8, 3);
+ /* Category labels */
+ tab_text (table, 2, 1 + v * 3, TAB_NONE,
+ var_get_value_name (var, cat1[v].value));
+ tab_text (table, 2, 2 + v * 3, TAB_NONE,
+ var_get_value_name (var, cat2[v].value));
- tab_float (table, 3, 3 + v * 3, TAB_NONE,
- n_total, 8, 0);
+ /* Observed N */
+ tab_float (table, 3, 1 + v * 3, TAB_NONE, cat1[v].count, 8, 0);
+ tab_float (table, 3, 2 + v * 3, TAB_NONE, cat2[v].count, 8, 0);
- /* Observed Proportions */
+ n_total = cat1[v].count + cat2[v].count;
+ tab_float (table, 3, 3 + v * 3, TAB_NONE, n_total, 8, 0);
- tab_float (table, 4, 1 + v * 3, TAB_NONE,
- cat1[v].count / n_total, 8, 3);
+ /* Observed Proportions */
+ tab_float (table, 4, 1 + v * 3, TAB_NONE,
+ cat1[v].count / n_total, 8, 3);
+ tab_float (table, 4, 2 + v * 3, TAB_NONE,
+ cat2[v].count / n_total, 8, 3);
+ tab_float (table, 4, 3 + v * 3, TAB_NONE,
+ (cat1[v].count + cat2[v].count) / n_total, 8, 2);
- tab_float (table, 4, 2 + v * 3, TAB_NONE,
- cat2[v].count / n_total, 8, 3);
+ /* Significance */
+ sig = calculate_binomial (cat1[v].count, cat2[v].count, bst->p);
+ tab_float (table, 6, 1 + v * 3, TAB_NONE, sig, 8, 3);
+ }
- tab_float (table, 4, 3 + v * 3, TAB_NONE,
- (cat1[v].count + cat2[v].count) / n_total, 8, 2);
+ tab_text (table, 2, 0, TAB_CENTER, _("Category"));
+ tab_text (table, 3, 0, TAB_CENTER, _("N"));
+ tab_text (table, 4, 0, TAB_CENTER, _("Observed Prop."));
+ tab_text (table, 5, 0, TAB_CENTER, _("Test Prop."));
+ tab_text (table, 6, 0, TAB_CENTER | TAT_PRINTF,
+ _("Exact Sig. (%d-tailed)"),
+ bst->p == 0.5 ? 2: 1);
- /* Significance */
- sig = calculate_binomial (cat1[v].count, cat2[v].count,
- bst->p);
-
- tab_float (table, 6, 1 + v * 3, TAB_NONE,
- sig, 8, 3);
+ tab_vline (table, TAL_2, 2, 0, tab_nr (table) -1);
+ tab_submit (table);
+ }
+
+ for (v = 0; v < ost->n_vars; v++)
+ {
+ free (cat1[v].value);
+ free (cat2[v].value);
}
-
- tab_text (table, 2, 0, TAB_CENTER, _("Category"));
- tab_text (table, 3, 0, TAB_CENTER, _("N"));
- tab_text (table, 4, 0, TAB_CENTER, _("Observed Prop."));
- tab_text (table, 5, 0, TAB_CENTER, _("Test Prop."));
-
- tab_text (table, 6, 0, TAB_CENTER | TAT_PRINTF,
- _("Exact Sig. (%d-tailed)"),
- bst->p == 0.5 ? 2: 1);
-
- tab_vline (table, TAL_2, 2, 0, tab_nr (table) -1);
-
free (cat1);
- free (cat2);
-
- tab_submit (table);
-
+ free (cat2);
}
};
-struct casefile;
+struct casereader;
struct dataset;
void binomial_execute (const struct dataset *,
- const struct casefile *,
- struct casefilter *,
+ struct casereader *,
+ enum mv_class,
const struct npar_test *);
#endif
02110-1301, USA. */
#include <config.h>
-#include <libpspp/compiler.h>
-#include <libpspp/assertion.h>
+
+#include <language/stats/chisquare.h>
#include <stdlib.h>
+#include <math.h>
#include <data/case.h>
-#include <data/casefile.h>
-#include <data/casefilter.h>
-#include <data/variable.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/procedure.h>
-
-#include <libpspp/message.h>
-#include <libpspp/hash.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
+#include <language/stats/freq.h>
+#include <language/stats/npar.h>
#include <libpspp/alloc.h>
-
-#include <gsl/gsl_cdf.h>
-
+#include <libpspp/assertion.h>
+#include <libpspp/compiler.h>
+#include <libpspp/hash.h>
+#include <libpspp/message.h>
+#include <libpspp/taint.h>
#include <output/table.h>
-#include <data/value-labels.h>
-#include "npar.h"
-#include "chisquare.h"
-#include "freq.h"
-
-#include <math.h>
+#include <gsl/gsl_cdf.h>
#include "gettext.h"
#define _(msgid) gettext (msgid)
-
-
-
/* Return a hash table containing the frequency counts of each
value of VAR in CF .
It is the caller's responsibility to free the hash table when
*/
static struct hsh_table *
create_freq_hash_with_range (const struct dictionary *dict,
- const struct casefile *cf,
- struct casefilter *filter,
+ struct casereader *input,
const struct variable *var,
double lo,
double hi)
bool warn = true;
float i_d;
struct ccase c;
- struct casereader *r = casefile_get_reader (cf, filter);
struct hsh_table *freq_hash =
hsh_create (4, compare_freq, hash_freq,
hsh_insert (freq_hash, fr);
}
- while (casereader_read(r, &c))
+ while (casereader_read (input, &c))
{
union value obs_value;
struct freq **existing_fr;
struct freq *fr = xmalloc(sizeof (*fr));
fr->value = case_data (&c, var);
- if ( casefilter_variable_missing (filter, &c, var))
- {
- free (fr);
- continue;
- }
-
fr->count = dict_get_case_weight (dict, &c, &warn);
obs_value.f = trunc (fr->value->f);
case_destroy (&c);
}
- casereader_destroy (r);
-
- return freq_hash;
+ if (casereader_destroy (input))
+ return freq_hash;
+ else
+ {
+ hsh_destroy (freq_hash);
+ return NULL;
+ }
}
/* Return a hash table containing the frequency counts of each
- value of VAR in CF .
+ value of VAR in INPUT .
It is the caller's responsibility to free the hash table when
no longer required.
*/
static struct hsh_table *
create_freq_hash (const struct dictionary *dict,
- const struct casefile *cf,
- struct casefilter *filter,
+ struct casereader *input,
const struct variable *var)
{
bool warn = true;
struct ccase c;
- struct casereader *r = casefile_get_reader (cf, filter);
struct hsh_table *freq_hash =
hsh_create (4, compare_freq, hash_freq,
free_freq_mutable_hash,
(void *) var);
- while (casereader_read(r, &c))
+ for (; casereader_read (input, &c); case_destroy (&c))
{
struct freq **existing_fr;
struct freq *fr = xmalloc(sizeof (*fr));
- fr->value = case_data (&c, var );
-
- if ( casefilter_variable_missing (filter, &c, var))
- {
- free (fr);
- continue;
- }
+ fr->value = case_data (&c, var);
fr->count = dict_get_case_weight (dict, &c, &warn);
*existing_fr = fr;
fr->value = value_dup (fr->value, var_get_width (var));
}
-
- case_destroy (&c);
}
- casereader_destroy (r);
-
- return freq_hash;
+ if (casereader_destroy (input))
+ return freq_hash;
+ else
+ {
+ hsh_destroy (freq_hash);
+ return NULL;
+ }
}
static struct tab_table *
create_variable_frequency_table (const struct dictionary *dict,
- const struct casefile *cf,
- struct casefilter *filter,
+ struct casereader *input,
const struct chisquare_test *test,
int v,
struct hsh_table **freq_hash)
struct tab_table *table ;
const struct variable *var = ost->vars[v];
- *freq_hash = create_freq_hash (dict, cf, filter, var);
+ *freq_hash = create_freq_hash (dict, input, var);
+ if (*freq_hash == NULL)
+ return NULL;
n_cells = hsh_count (*freq_hash);
{
const struct one_sample_test *ost = (const struct one_sample_test*) test;
- struct tab_table *table = tab_create (1 + ost->n_vars, 4, 0);
+ struct tab_table *table;
+ table = tab_create (1 + ost->n_vars, 4, 0);
tab_dim (table, tab_natural_dimensions);
tab_title (table, _("Test Statistics"));
tab_headers (table, 1, 0, 1, 0);
void
chisquare_execute (const struct dataset *ds,
- const struct casefile *cf,
- struct casefilter *filter,
+ struct casereader *input,
+ enum mv_class exclude,
const struct npar_test *test)
{
const struct dictionary *dict = dataset_dict (ds);
int v, i;
struct one_sample_test *ost = (struct one_sample_test *) test;
struct chisquare_test *cst = (struct chisquare_test *) test;
- struct tab_table *stats_table = create_stats_table (cst);
int n_cells = 0;
double total_expected = 0.0;
double *df = xzalloc (sizeof (*df) * ost->n_vars);
double *xsq = xzalloc (sizeof (*df) * ost->n_vars);
+ bool ok;
for ( i = 0 ; i < cst->n_expected ; ++i )
total_expected += cst->expected[i];
{
double total_obs = 0.0;
struct hsh_table *freq_hash = NULL;
+ struct casereader *reader =
+ casereader_create_filter_missing (casereader_clone (input),
+ &ost->vars[v], 1, exclude, NULL);
struct tab_table *freq_table =
- create_variable_frequency_table(dict, cf, filter, cst,
- v, &freq_hash);
+ create_variable_frequency_table(dict, reader, cst, v, &freq_hash);
- struct freq **ff = (struct freq **) hsh_sort (freq_hash);
+ struct freq **ff;
if ( NULL == freq_table )
- {
- hsh_destroy (freq_hash);
- continue;
- }
+ continue;
+ ff = (struct freq **) hsh_sort (freq_hash);
n_cells = hsh_count (freq_hash);
for ( v = 0 ; v < ost->n_vars ; ++v )
{
double total_obs = 0.0;
+ struct casereader *reader =
+ casereader_create_filter_missing (casereader_clone (input),
+ &ost->vars[v], 1, exclude, NULL);
struct hsh_table *freq_hash =
- create_freq_hash_with_range (dict, cf, filter, ost->vars[v],
- cst->lo, cst->hi);
+ create_freq_hash_with_range (dict, reader,
+ ost->vars[v], cst->lo, cst->hi);
+
+ struct freq **ff;
- struct freq **ff = (struct freq **) hsh_sort (freq_hash);
+ if (freq_hash == NULL)
+ continue;
+ ff = (struct freq **) hsh_sort (freq_hash);
assert ( n_cells == hsh_count (freq_hash));
for ( i = 0 ; i < hsh_count (freq_hash) ; ++i )
tab_submit (freq_table);
}
+ ok = !taint_has_tainted_successor (casereader_get_taint (input));
+ casereader_destroy (input);
-
- /* Populate the summary statistics table */
- for ( v = 0 ; v < ost->n_vars ; ++v )
+ if (ok)
{
- const struct variable *var = ost->vars[v];
+ struct tab_table *stats_table = create_stats_table (cst);
+
+ /* Populate the summary statistics table */
+ for ( v = 0 ; v < ost->n_vars ; ++v )
+ {
+ const struct variable *var = ost->vars[v];
- tab_text (stats_table, 1 + v, 0, TAB_CENTER, var_get_name (var));
+ tab_text (stats_table, 1 + v, 0, TAB_CENTER, var_get_name (var));
- tab_float (stats_table, 1 + v, 1, TAB_NONE, xsq[v], 8,3);
- tab_float (stats_table, 1 + v, 2, TAB_NONE, df[v], 8,0);
+ tab_float (stats_table, 1 + v, 1, TAB_NONE, xsq[v], 8,3);
+ tab_float (stats_table, 1 + v, 2, TAB_NONE, df[v], 8,0);
- tab_float (stats_table, 1 + v, 3, TAB_NONE,
- gsl_cdf_chisq_Q (xsq[v], df[v]), 8,3);
+ tab_float (stats_table, 1 + v, 3, TAB_NONE,
+ gsl_cdf_chisq_Q (xsq[v], df[v]), 8,3);
+ }
+ tab_submit (stats_table);
}
-
+
free (xsq);
free (df);
-
- tab_submit (stats_table);
}
#if !chisquare_h
#define chisquare_h 1
-#include <config.h>
#include <stddef.h>
#include <stdbool.h>
+#include <language/stats/npar.h>
-#include "npar.h"
struct chisquare_test
{
struct one_sample_test parent;
int n_expected;
};
-struct casefile;
-struct dictionary ;
+struct casereader;
+struct dictionary;
struct hsh_table;
+struct dataset;
void chisquare_insert_variables (const struct npar_test *test,
struct hsh_table *variables);
void chisquare_execute (const struct dataset *ds,
- const struct casefile *cf,
- struct casefilter *filter,
+ struct casereader *input,
+ enum mv_class exclude,
const struct npar_test *test);
#include <stdio.h>
#include <data/case.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/data-out.h>
#include <data/dictionary.h>
#include <data/format.h>
static struct pool *pl_col; /* For column data. */
static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
-static void precalc (const struct ccase *, void *, const struct dataset *);
-static bool calc_general (const struct ccase *, void *, const struct dataset *);
-static bool calc_integer (const struct ccase *, void *, const struct dataset *);
-static bool postcalc (void *, const struct dataset *);
+static void precalc (struct casereader *, const struct dataset *);
+static void calc_general (struct ccase *, const struct dataset *);
+static void calc_integer (struct ccase *, const struct dataset *);
+static void postcalc (void);
static void submit (struct tab_table *);
static void format_short (char *s, const struct fmt_spec *fp,
static int
internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
{
- int i;
+ struct casegrouper *grouper;
+ struct casereader *input, *group;
bool ok;
+ int i;
variables = NULL;
variables_cnt = 0;
else
write_style = CRS_WR_NONE;
- ok = procedure_with_splits (ds, precalc,
- mode == GENERAL ? calc_general : calc_integer,
- postcalc, NULL);
+ input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
+ NULL, NULL);
+ grouper = casegrouper_create_splits (input, dataset_dict (ds));
+ while (casegrouper_get_next_group (grouper, &group))
+ {
+ struct ccase c;
+
+ precalc (group, ds);
+
+ for (; casereader_read (group, &c); case_destroy (&c))
+ {
+ if (mode == GENERAL)
+ calc_general (&c, ds);
+ else
+ calc_integer (&c, ds);
+ }
+ casereader_destroy (group);
+
+ postcalc ();
+ }
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
}
static unsigned hash_table_entry (const void *, const void *);
/* Set up the crosstabulation tables for processing. */
-static void
-precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
+static void
+precalc (struct casereader *input, const struct dataset *ds)
{
- output_split_file_values (ds, first);
+ struct ccase c;
+
+ if (!casereader_peek (input, 0, &c))
+ return;
+ output_split_file_values (ds, &c);
+ case_destroy (&c);
+
if (mode == GENERAL)
{
gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
}
/* Form crosstabulations for general mode. */
-static bool
-calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
+static void
+calc_general (struct ccase *c, const struct dataset *ds)
{
- bool bad_warn = true;
-
/* Missing values to exclude. */
enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
: cmd.miss == CRS_INCLUDE ? MV_SYSTEM
: MV_NEVER);
/* Case weight. */
- double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
+ double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
/* Flattened current table index. */
int t;
next_crosstab:
local_free (te);
}
-
- return true;
}
-static bool
-calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
+static void
+calc_integer (struct ccase *c, const struct dataset *ds)
{
bool bad_warn = true;
next_crosstab: ;
}
-
- return true;
}
/* Compare the table_entry's at A and B and return a strcmp()-type
int *, int *, int *);
static void make_summary_table (void);
-static bool
-postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
+static void
+postcalc (void)
{
if (mode == GENERAL)
{
}
hsh_destroy (gen_tab);
-
- return true;
}
static void insert_summary (struct tab_table *, int tab_index, double valid);
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA. */
-/* FIXME: Many possible optimizations. */
-
#include <config.h>
#include <limits.h>
#include <math.h>
#include <stdlib.h>
-#include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/transformations.h>
static void setup_z_trns (struct dsc_proc *, struct dataset *);
/* Procedure execution functions. */
-static bool calc_descriptives (const struct ccase *first,
- const struct casefile *, void *dsc_,
- const struct dataset *);
+static void calc_descriptives (struct dsc_proc *, struct casereader *,
+ struct dataset *);
static void display (struct dsc_proc *dsc);
\f
/* Parser and outline. */
size_t i;
bool ok;
+ struct casegrouper *grouper;
+ struct casereader *group;
+
/* Create and initialize dsc. */
dsc = xmalloc (sizeof *dsc);
dsc->vars = NULL;
{
int i;
- if (!parse_variables_const (lexer, dataset_dict (ds),
- &vars, &var_cnt,
+ if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
goto error;
for (i = 0; i < dsc->var_cnt; i++)
dsc->vars[i].moments = moments_create (dsc->max_moment);
- /* Data pass. */
- ok = multipass_procedure_with_splits (ds, calc_descriptives, dsc);
+ /* Data pass. FIXME: error handling. */
+ grouper = casegrouper_create_splits (proc_open (ds), dict);
+ while (casegrouper_get_next_group (grouper, &group))
+ calc_descriptives (dsc, group, ds);
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
/* Z-scoring! */
if (ok && z_cnt)
/* Calculates and displays descriptive statistics for the cases
in CF. */
-static bool
-calc_descriptives (const struct ccase *first,
- const struct casefile *cf, void *dsc_,
- const struct dataset *ds)
+static void
+calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
+ struct dataset *ds)
{
- struct dsc_proc *dsc = dsc_;
- struct casereader *reader;
+ struct casereader *pass1, *pass2;
struct ccase c;
size_t i;
- output_split_file_values (ds, first);
+ if (!casereader_peek (group, 0, &c))
+ return;
+ output_split_file_values (ds, &c);
+ case_destroy (&c);
+
+ group = casereader_create_filter_weight (group, dataset_dict (ds),
+ NULL, NULL);
+
+ casereader_split (group, &pass1, &pass2);
+ if (dsc->max_moment <= MOMENT_MEAN)
+ casereader_destroy (pass2);
for (i = 0; i < dsc->var_cnt; i++)
{
dsc->valid = 0.;
/* First pass to handle most of the work. */
- for (reader = casefile_get_reader (cf, NULL);
- casereader_read (reader, &c);
- case_destroy (&c))
+ for (; casereader_read (pass1, &c); case_destroy (&c))
{
- double weight = dict_get_case_weight (dataset_dict (ds), &c, &dsc->bad_warn);
- if (weight <= 0.0)
- continue;
+ double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
/* Check for missing values. */
if (listwise_missing (dsc, &c))
struct dsc_var *dv = &dsc->vars[i];
double x = case_num (&c, dv->v);
- if (dsc->missing_type != DSC_LISTWISE
- && var_is_num_missing (dv->v, x, dsc->exclude))
+ if (var_is_num_missing (dv->v, x, dsc->exclude))
{
dv->missing += weight;
continue;
dv->max = x;
}
}
- casereader_destroy (reader);
+ if (!casereader_destroy (pass1))
+ return;
/* Second pass for higher-order moments. */
if (dsc->max_moment > MOMENT_MEAN)
{
- for (reader = casefile_get_reader (cf, NULL);
- casereader_read (reader, &c);
- case_destroy (&c))
+ for (; casereader_read (pass2, &c); case_destroy (&c))
{
- double weight = dict_get_case_weight (dataset_dict (ds), &c,
- &dsc->bad_warn);
- if (weight <= 0.0)
- continue;
+ double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
/* Check for missing values. */
if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, &c))
struct dsc_var *dv = &dsc->vars[i];
double x = case_num (&c, dv->v);
- if (dsc->missing_type != DSC_LISTWISE
- && var_is_num_missing (dv->v, x, dsc->exclude))
+ if (var_is_num_missing (dv->v, x, dsc->exclude))
continue;
if (dv->moments != NULL)
moments_pass_two (dv->moments, x, weight);
}
}
- casereader_destroy (reader);
+ if (!casereader_destroy (pass2))
+ return;
}
-
+
/* Calculate results. */
for (i = 0; i < dsc->var_cnt; i++)
{
/* Output results. */
display (dsc);
-
- return true;
}
/* Returns true if any of the descriptives variables in DSC's
#include <stdlib.h>
#include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/value-labels.h>
/* Per Split function */
-static bool run_examine (const struct ccase *,
- const struct casefile *cf, void *cmd_, const struct dataset *);
+static void run_examine (struct cmd_examine *, struct casereader *,
+ struct dataset *);
static void output_examine (void);
int
cmd_examine (struct lexer *lexer, struct dataset *ds)
{
+ struct casegrouper *grouper;
+ struct casereader *group;
bool ok;
subc_list_double_create (&percentile_list);
subc_list_double_push (&percentile_list, 75);
}
- ok = multipass_procedure_with_splits (ds, run_examine, &cmd);
+ grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+ while (casegrouper_get_next_group (grouper, &group))
+ run_examine (&cmd, group, ds);
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
if ( totals )
{
-static bool bad_weight_warn = true;
-
-
/* Perform calculations for the sub factors */
void
factor_calc (const struct ccase *c, int case_no, double weight,
}
}
-static bool
-run_examine (const struct ccase *first, const struct casefile *cf,
- void *cmd_, const struct dataset *ds)
+static void
+run_examine (struct cmd_examine *cmd, struct casereader *input,
+ struct dataset *ds)
{
struct dictionary *dict = dataset_dict (ds);
- struct casereader *r;
+ casenumber case_no;
struct ccase c;
int v;
-
- const struct cmd_examine *cmd = (struct cmd_examine *) cmd_;
+ bool ok;
struct factor *fctr;
- output_split_file_values (ds, first);
+ if (!casereader_peek (input, 0, &c))
+ return;
+ output_split_file_values (ds, &c);
+ case_destroy (&c);
+
+ input = casereader_create_filter_weight (input, dict, NULL, NULL);
+ input = casereader_create_counter (input, &case_no, 0);
/* Make sure we haven't got rubbish left over from a
- previous split */
+ previous split. */
fctr = factors;
while (fctr)
{
for ( v = 0 ; v < n_dependent_vars ; ++v )
metrics_precalc (&totals[v]);
- for (r = casefile_get_reader (cf, NULL);
- casereader_read (r, &c) ;
- case_destroy (&c) )
+ for (; casereader_read (input, &c); case_destroy (&c))
{
- int case_missing=0;
- const int case_no = casereader_cnum (r);
-
- const double weight =
- dict_get_case_weight (dict, &c, &bad_weight_warn);
+ int case_missing = 0;
+ const double weight = dict_get_case_weight (dict, &c, NULL);
if ( cmd->miss == XMN_LISTWISE )
{
factor_calc (&c, case_no, weight, case_missing);
}
+ ok = casereader_destroy (input);
for ( v = 0 ; v < n_dependent_vars ; ++v)
{
fctr = fctr->next;
}
- output_examine ();
+ if (ok)
+ output_examine ();
if ( totals )
metrics_destroy (&totals[i]);
}
}
-
- return true;
}
#include <sys/types.h>
#endif
-#include <data/case-sink.h>
-#include <data/case-source.h>
#include <data/case.h>
+#include <data/casereader.h>
+#include <data/casereader-provider.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/settings.h>
#include <libpspp/array.h>
#include <libpspp/assertion.h>
#include <libpspp/message.h>
-#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/pool.h>
#include <libpspp/str.h>
int case_cnt; /* Pre-flip case count. */
size_t case_size; /* Post-flip bytes per case. */
- union value *output_buf; /* Case output buffer. */
-
struct variable *new_names; /* Variable containing new variable names. */
struct varname *new_names_head; /* First new variable. */
struct varname *new_names_tail; /* Last new variable. */
bool error; /* Error reading temporary file? */
};
+static const struct casereader_class flip_casereader_class;
+
static void destroy_flip_pgm (struct flip_pgm *);
-static struct case_sink *flip_sink_create (struct dataset *ds, struct flip_pgm *);
-static struct case_source *flip_source_create (struct flip_pgm *);
static bool flip_file (struct flip_pgm *);
-static int build_dictionary (struct dictionary *, struct flip_pgm *);
-
-static const struct case_source_class flip_source_class;
-static const struct case_sink_class flip_sink_class;
+static bool build_dictionary (struct dictionary *, struct flip_pgm *);
+static bool write_flip_case (struct flip_pgm *, const struct ccase *);
/* Parses and executes FLIP. */
int
cmd_flip (struct lexer *lexer, struct dataset *ds)
{
- struct flip_pgm *flip;
- struct case_sink *sink;
struct dictionary *dict = dataset_dict (ds);
+ struct flip_pgm *flip;
+ struct casereader *input, *reader;
+ union value *output_buf;
+ struct ccase c;
+ size_t i;
bool ok;
if (proc_make_temporary_transformations_permanent (ds))
if (flip->new_names)
{
- size_t i;
-
for (i = 0; i < flip->var_cnt; i++)
if (flip->var[i] == flip->new_names)
{
}
}
+ output_buf = pool_nalloc (flip->pool,
+ flip->var_cnt, sizeof *output_buf);
+
+ flip->file = pool_tmpfile (flip->pool);
+ if (flip->file == NULL)
+ {
+ msg (SE, _("Could not create temporary file for FLIP."));
+ goto error;
+ }
+
+ /* Write variable names as first case. */
+ for (i = 0; i < flip->var_cnt; i++)
+ buf_copy_str_rpad (output_buf[i].s, MAX_SHORT_STRING,
+ var_get_name (flip->var[i]));
+ if (fwrite (output_buf, sizeof *output_buf,
+ flip->var_cnt, flip->file) != (size_t) flip->var_cnt)
+ {
+ msg (SE, _("Error writing FLIP file: %s."), strerror (errno));
+ goto error;
+ }
+
+ flip->case_cnt = 1;
+
/* Read the active file into a flip_sink. */
- flip->case_cnt = 0;
proc_make_temporary_transformations_permanent (ds);
- sink = flip_sink_create (ds, flip);
- if (sink == NULL)
- goto error;
- proc_set_sink (ds, sink);
- flip->new_names_tail = NULL;
- ok = procedure (ds,NULL, NULL);
+ proc_discard_output (ds);
+
+ input = proc_open (ds);
+ while (casereader_read (input, &c))
+ {
+ write_flip_case (flip, &c);
+ case_destroy (&c);
+ }
+ ok = casereader_destroy (input);
+ ok = proc_commit (ds) && ok;
/* Flip the data we read. */
- if (!flip_file (flip))
+ if (!ok || !flip_file (flip))
{
- discard_variables (ds);
+ proc_discard_active_file (ds);
goto error;
}
dict_clear (dict);
if (!build_dictionary (dict, flip))
{
- discard_variables (ds);
+ proc_discard_active_file (ds);
goto error;
}
flip->case_size = dict_get_case_size (dict);
/* Set up flipped data for reading. */
- proc_set_source (ds, flip_source_create (flip));
-
- return ok ? lex_end_of_command (lexer) : CMD_CASCADING_FAILURE;
+ reader = casereader_create_sequential (NULL, dict_get_next_value_idx (dict),
+ flip->case_cnt,
+ &flip_casereader_class, flip);
+ proc_set_active_file_data (ds, reader);
+ return lex_end_of_command (lexer);
error:
destroy_flip_pgm (flip);
}
/* Make a new dictionary for all the new variable names. */
-static int
+static bool
build_dictionary (struct dictionary *dict, struct flip_pgm *flip)
{
dict_create_var_assert (dict, "CASE_LBL", 8);
if (flip->case_cnt > 99999)
{
msg (SE, _("Cannot create more than 99999 variable names."));
- return 0;
+ return false;
}
for (i = 0; i < flip->case_cnt; i++)
for (v = flip->new_names_head; v; v = v->next)
if (!make_new_var (dict, v->name))
- return 0;
+ return false;
}
- return 1;
+ return true;
}
-/* Creates a flip sink based on FLIP. */
-static struct case_sink *
-flip_sink_create (struct dataset *ds, struct flip_pgm *flip)
-{
- size_t i;
-
- flip->output_buf = pool_nalloc (flip->pool,
- flip->var_cnt, sizeof *flip->output_buf);
-
- flip->file = pool_tmpfile (flip->pool);
- if (flip->file == NULL)
- {
- msg (SE, _("Could not create temporary file for FLIP: %s."),
- strerror (errno));
- return NULL;
- }
-
- /* Write variable names as first case. */
- for (i = 0; i < flip->var_cnt; i++)
- buf_copy_str_rpad (flip->output_buf[i].s, MAX_SHORT_STRING,
- var_get_name (flip->var[i]));
- if (fwrite (flip->output_buf, sizeof *flip->output_buf,
- flip->var_cnt, flip->file) != (size_t) flip->var_cnt)
- {
- msg (SE, _("Error writing FLIP file: %s."), strerror (errno));
- return NULL;
- }
-
- flip->case_cnt = 1;
-
- return create_case_sink (&flip_sink_class,
- dataset_dict (ds),
- dataset_get_casefile_factory (ds),
- flip);
-}
-
/* Writes case C to the FLIP sink.
Returns true if successful, false if an I/O error occurred. */
static bool
-flip_sink_write (struct case_sink *sink, const struct ccase *c)
+write_flip_case (struct flip_pgm *flip, const struct ccase *c)
{
- struct flip_pgm *flip = sink->aux;
size_t i;
flip->case_cnt++;
}
else
out = SYSMIS;
- flip->output_buf[i].f = out;
- }
-
- if (fwrite (flip->output_buf, sizeof *flip->output_buf,
- flip->var_cnt, flip->file) != (size_t) flip->var_cnt)
- {
- msg (SE, _("Error writing FLIP file: %s."), strerror (errno));
- return false;
+ fwrite (&out, sizeof out, 1, flip->file);
}
return true;
}
return true;
}
-/* FLIP sink class. */
-static const struct case_sink_class flip_sink_class =
- {
- "FLIP",
- NULL,
- flip_sink_write,
- NULL,
- NULL,
- };
-
-/* Creates and returns a FLIP source based on PGM,
- which should have already been used as a sink. */
-static struct case_source *
-flip_source_create (struct flip_pgm *pgm)
-{
- return create_case_source (&flip_source_class, pgm);
-}
-
/* Reads one case into C.
Returns true if successful, false at end of file or if an
I/O error occurred. */
static bool
-flip_source_read (struct case_source *source, struct ccase *c)
+flip_casereader_read (struct casereader *reader UNUSED, void *flip_,
+ struct ccase *c)
{
- struct flip_pgm *flip = source->aux;
+ struct flip_pgm *flip = flip_;
size_t i;
if (flip->error || flip->cases_read >= flip->var_cnt)
return false;
-
- if (flip->input_buf == NULL)
- flip->input_buf = pool_nmalloc (flip->pool,
- flip->case_cnt, sizeof *flip->input_buf);
- if (fread (flip->input_buf, sizeof *flip->input_buf, flip->case_cnt,
- flip->file) != flip->case_cnt)
+ case_create (c, flip->case_cnt);
+ for (i = 0; i < flip->case_cnt; i++)
{
- if (ferror (flip->file))
- msg (SE, _("Error reading FLIP temporary file: %s."),
- strerror (errno));
- else if (feof (flip->file))
- msg (SE, _("Unexpected end of file reading FLIP temporary file."));
- else
- NOT_REACHED ();
- flip->error = true;
- return false;
+ double in;
+ if (fread (&in, sizeof in, 1, flip->file) != 1)
+ {
+ case_destroy (c);
+ if (ferror (flip->file))
+ msg (SE, _("Error reading FLIP temporary file: %s."),
+ strerror (errno));
+ else if (feof (flip->file))
+ msg (SE, _("Unexpected end of file reading FLIP temporary file."));
+ else
+ NOT_REACHED ();
+ flip->error = true;
+ return false;
+ }
+ case_data_rw_idx (c, i)->f = in;
}
-
- for (i = 0; i < flip->case_cnt; i++)
- case_data_rw_idx (c, i)->f = flip->input_buf[i].f;
-
+
flip->cases_read++;
return true;
/* Destroys the source.
Returns true if successful read, false if an I/O occurred
during destruction or previously. */
-static bool
-flip_source_destroy (struct case_source *source)
+static void
+flip_casereader_destroy (struct casereader *reader UNUSED, void *flip_)
{
- struct flip_pgm *flip = source->aux;
- bool ok = !flip->error;
+ struct flip_pgm *flip = flip_;
+ if (flip->error)
+ casereader_force_error (reader);
destroy_flip_pgm (flip);
- return ok;
}
-static const struct case_source_class flip_source_class =
+static const struct casereader_class flip_casereader_class =
{
- "FLIP",
+ flip_casereader_read,
+ flip_casereader_destroy,
+ NULL,
NULL,
- flip_source_read,
- flip_source_destroy
};
#include <gsl/gsl_histogram.h>
#include <data/case.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/format.h>
#include <data/procedure.h>
#include <libpspp/hash.h>
#include <libpspp/magic.h>
#include <libpspp/message.h>
-#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/pool.h>
#include <libpspp/str.h>
static void calc_stats (const struct variable *v, double d[frq_n_stats]);
-static void precalc (const struct ccase *, void *, const struct dataset *);
-static bool calc (const struct ccase *, void *, const struct dataset *);
-static bool postcalc (void *, const struct dataset *);
+static void precalc (struct casereader *, struct dataset *);
+static void calc (const struct ccase *, const struct dataset *);
+static void postcalc (void);
static void postprocess_freq_tab (const struct variable *);
static void dump_full (const struct variable *);
static int
internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds)
{
- int i;
+ struct casegrouper *grouper;
+ struct casereader *input, *group;
bool ok;
+ int i;
n_percentiles = 0;
percentiles = NULL;
/* Do it! */
- ok = procedure_with_splits (ds, precalc, calc, postcalc, NULL);
+ input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
+ NULL, NULL);
+ grouper = casegrouper_create_splits (input, dataset_dict (ds));
+ for (; casegrouper_get_next_group (grouper, &group);
+ casereader_destroy (group))
+ {
+ struct ccase c;
+
+ precalc (group, ds);
+ for (; casereader_read (group, &c); case_destroy (&c))
+ calc (&c, ds);
+ postcalc ();
+ }
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
free_frequencies(&cmd);
}
/* Add data from case C to the frequency table. */
-static bool
-calc (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
+static void
+calc (const struct ccase *c, const struct dataset *ds)
{
- double weight;
+ double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
size_t i;
- bool bad_warn = true;
-
- weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
for (i = 0; i < n_variables; i++)
{
struct freq *fp = pool_alloc (gen_pool, sizeof *fp);
fp->count = weight;
fp->value = pool_clone (gen_pool,
- val, MAX (MAX_SHORT_STRING, vf->width));
+ val,
+ MAX (MAX_SHORT_STRING, vf->width));
*fpp = fp;
}
}
NOT_REACHED ();
}
}
- return true;
}
/* Prepares each variable that is the target of FREQUENCIES by setting
up its hash table. */
static void
-precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
+precalc (struct casereader *input, struct dataset *ds)
{
+ struct ccase c;
size_t i;
- output_split_file_values (ds, first);
+ if (!casereader_peek (input, 0, &c))
+ return;
+ output_split_file_values (ds, &c);
+ case_destroy (&c);
pool_destroy (gen_pool);
gen_pool = pool_create ();
/* Finishes up with the variables after frequencies have been
calculated. Displays statistics, percentiles, ... */
-static bool
-postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
+static void
+postcalc (void)
{
size_t i;
cleanup_freq_tab (v);
}
-
- return true;
}
/* Returns the comparison function that should be used for
#include <config.h>
#include <output/table.h>
+#include <data/casereader.h>
#include <libpspp/hash.h>
#include <data/variable.h>
#include "npar-summary.h"
#include <math/moments.h>
-#include <data/casefile.h>
-#include <data/casefilter.h>
#include <data/case.h>
#include <data/dictionary.h>
#include <math.h>
void
npar_summary_calc_descriptives (struct descriptives *desc,
- const struct casefile *cf,
- struct casefilter *filter,
+ struct casereader *input,
const struct dictionary *dict,
const struct variable *const *vv,
- int n_vars UNUSED)
+ int n_vars UNUSED,
+ enum mv_class filter)
{
int i = 0;
while (*vv)
{
- bool warn = true;
double minimum = DBL_MAX;
double maximum = -DBL_MAX;
double var;
struct moments1 *moments = moments1_create (MOMENT_VARIANCE);
- struct casereader *r = casefile_get_reader (cf, filter);
struct ccase c;
const struct variable *v = *vv++;
-
- while (casereader_read(r, &c))
+ struct casereader *pass;
+
+ pass = casereader_clone (input);
+ pass = casereader_create_filter_missing (pass,
+ (struct variable **) &v, 1,
+ filter, NULL);
+ pass = casereader_create_filter_weight (pass, dict, NULL, NULL);
+ while (casereader_read(pass, &c))
{
- const union value *val = case_data (&c, v);
- double w = dict_get_case_weight (dict, &c, &warn);
-
- if ( ! casefilter_variable_missing (filter, &c, v ))
- {
- minimum = MIN (minimum, val->f);
- maximum = MAX (maximum, val->f);
- moments1_add (moments, val->f, w);
- }
+ double val = case_num (&c, v);
+ double w = dict_get_case_weight (dict, &c, NULL);
+ minimum = MIN (minimum, val);
+ maximum = MAX (maximum, val);
+ moments1_add (moments, val, w);
case_destroy (&c);
}
- casereader_destroy (r);
+ casereader_destroy (pass);
moments1_calculate (moments,
&desc[i].n,
i++;
}
+ casereader_destroy (input);
}
#include <config.h>
struct variable ;
-struct casefile ;
+struct casereader ;
struct dictionary;
-struct casefilter;
struct descriptives
{
};
void npar_summary_calc_descriptives (struct descriptives *desc,
- const struct casefile *cf,
- struct casefilter *filter,
+ struct casereader *input,
const struct dictionary *dict,
const struct variable *const *vv,
- int n_vars);
+ int n_vars,
+ enum mv_class filter);
void do_summary_box (const struct descriptives *desc,
#if !npar_h
#define npar_h 1
-typedef const struct variable *var_ptr;
-typedef var_ptr variable_pair[2];
+#include <stddef.h>
+#include <data/missing-values.h>
+
+#include <stddef.h>
+#include <data/missing-values.h>
+
+typedef struct variable *variable_pair[2];
struct hsh_table;
struct const_hsh_table;
-struct casefilter ;
+struct casefilter;
+struct casereader;
+struct dataset;
struct npar_test
{
void (*execute) (const struct dataset *,
- const struct casefile *,
- struct casefilter *,
+ struct casereader *,
+ enum mv_class exclude,
const struct npar_test *
);
#include <config.h>
-#include <language/lexer/lexer.h>
-#include <language/lexer/variable-parser.h>
-#include <language/command.h>
-#include <data/procedure.h>
-#include <libpspp/pool.h>
-#include <libpspp/hash.h>
+#include <language/stats/npar.h>
+
+#include <math.h>
-#include <data/casefilter.h>
#include <data/case.h>
-#include <data/casefile.h>
-#include <math/moments.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
-#include <language/stats/chisquare.h>
+#include <data/procedure.h>
+#include <language/command.h>
+#include <language/lexer/lexer.h>
+#include <language/lexer/variable-parser.h>
#include <language/stats/binomial.h>
-#include <math.h>
+#include <language/stats/chisquare.h>
+#include <libpspp/hash.h>
+#include <libpspp/pool.h>
+#include <libpspp/taint.h>
+#include <math/moments.h>
-#include "npar.h"
#include "npar-summary.h"
#include "gettext.h"
(those mentioned on ANY subcommand */
int n_vars; /* Number of variables in vv */
- struct casefilter *filter; /* The missing value filter */
+ enum mv_class filter; /* Missing values to filter. */
bool descriptives; /* Descriptive statistics should be calculated */
bool quartiles; /* Quartiles should be calculated */
void one_sample_insert_variables (const struct npar_test *test,
struct const_hsh_table *variables);
-static bool
-npar_execute(const struct ccase *first UNUSED,
- const struct casefile *cf, void *aux,
+static void
+npar_execute(struct casereader *input,
+ const struct npar_specs *specs,
const struct dataset *ds)
{
int t;
- const struct npar_specs *specs = aux;
struct descriptives *summary_descriptives = NULL;
for ( t = 0 ; t < specs->n_tests; ++t )
msg (SW, _("NPAR subcommand not currently implemented."));
continue;
}
- test->execute (ds, cf, specs->filter, test);
+ test->execute (ds, casereader_clone (input), specs->filter, test);
}
if ( specs->descriptives )
summary_descriptives = xnmalloc (sizeof (*summary_descriptives),
specs->n_vars);
- npar_summary_calc_descriptives (summary_descriptives, cf,
- specs->filter,
+ npar_summary_calc_descriptives (summary_descriptives,
+ casereader_clone (input),
dataset_dict (ds),
- specs->vv, specs->n_vars);
+ specs->vv, specs->n_vars,
+ specs->filter);
}
- if ( specs->descriptives || specs->quartiles )
+ if ( (specs->descriptives || specs->quartiles)
+ && !taint_has_tainted_successor (casereader_get_taint (input)) )
do_summary_box (summary_descriptives, specs->vv, specs->n_vars );
free (summary_descriptives);
-
- return true;
+ casereader_destroy (input);
}
-
int
cmd_npar_tests (struct lexer *lexer, struct dataset *ds)
{
int i;
struct npar_specs npar_specs = {0, 0, 0, 0, 0, 0, 0, 0};
struct const_hsh_table *var_hash;
+ struct casegrouper *grouper;
+ struct casereader *input, *group;
+
npar_specs.pool = pool_create ();
var_hash = const_hsh_create_pool (npar_specs.pool, 0,
}
}
- npar_specs.filter =
- casefilter_create (cmd.incl == NPAR_EXCLUDE ? MV_ANY : MV_SYSTEM, 0, 0);
-
- if ( cmd.miss == NPAR_LISTWISE )
- casefilter_add_variables (npar_specs.filter,
- npar_specs.vv,
- npar_specs.n_vars);
+ npar_specs.filter = cmd.incl == NPAR_EXCLUDE ? MV_ANY : MV_SYSTEM;
- ok = multipass_procedure_with_splits (ds, npar_execute, &npar_specs);
+ input = proc_open (ds);
+ if ( cmd.miss == NPAR_LISTWISE )
+ input = casereader_create_filter_missing (input,
+ (struct variable **) npar_specs.vv,
+ npar_specs.n_vars,
+ npar_specs.filter, NULL);
- casefilter_destroy (npar_specs.filter);
+ grouper = casegrouper_create_splits (input, dataset_dict (ds));
+ while (casegrouper_get_next_group (grouper, &group))
+ npar_execute (group, &npar_specs, ds);
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
const_hsh_destroy (var_hash);
#include <stdlib.h>
#include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/value-labels.h>
#include <data/variable.h>
-#include <data/casefilter.h>
#include <language/command.h>
#include <language/dictionary/split-file.h>
#include <language/lexer/lexer.h>
#include <libpspp/hash.h>
#include <libpspp/magic.h>
#include <libpspp/message.h>
-#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/str.h>
+#include <libpspp/taint.h>
#include <math/group-proc.h>
#include <math/group.h>
#include <math/levene.h>
/* (declarations) */
/* (functions) */
-static bool bad_weight_warn = true;
-
-
static struct cmd_oneway cmd;
/* The independent variable */
static int ostensible_number_of_groups = -1;
-static bool run_oneway(const struct ccase *first,
- const struct casefile *cf,
- void *_mode, const struct dataset *);
+static void run_oneway (struct cmd_oneway *, struct casereader *,
+ const struct dataset *);
/* Routines to show the output tables */
int
cmd_oneway (struct lexer *lexer, struct dataset *ds)
{
+ struct casegrouper *grouper;
+ struct casereader *group;
int i;
bool ok;
}
}
- ok = multipass_procedure_with_splits (ds, run_oneway, &cmd);
+ /* Data pass. FIXME: error handling. */
+ grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+ while (casegrouper_get_next_group (grouper, &group))
+ run_oneway (&cmd, group, ds);
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
free (vars);
free_oneway (&cmd);
free (value);
}
-static bool
-run_oneway(const struct ccase *first, const struct casefile *cf,
- void *cmd_, const struct dataset *ds)
+static void
+run_oneway (struct cmd_oneway *cmd,
+ struct casereader *input,
+ const struct dataset *ds)
{
- struct casereader *r;
+ struct taint *taint;
+ struct dictionary *dict = dataset_dict (ds);
+ enum mv_class exclude;
+ struct casereader *reader;
struct ccase c;
- struct casefilter *filter = NULL;
- struct cmd_oneway *cmd = (struct cmd_oneway *) cmd_;
+ if (!casereader_peek (input, 0, &c))
+ return;
+ output_split_file_values (ds, &c);
+ case_destroy (&c);
- output_split_file_values (ds, first);
+ taint = taint_clone (casereader_get_taint (input));
global_group_hash = hsh_create(4,
(hsh_compare_func *) compare_values,
precalc(cmd);
- filter = casefilter_create ( (cmd->incl != ONEWAY_INCLUDE
- ? MV_ANY : MV_SYSTEM),
- vars, n_vars );
+ exclude = cmd->incl != ONEWAY_INCLUDE ? MV_ANY : MV_SYSTEM;
+ input = casereader_create_filter_missing (input, &indep_var, 1,
+ exclude, NULL);
+ if (cmd->miss == ONEWAY_LISTWISE)
+ input = casereader_create_filter_missing (input, vars, n_vars,
+ exclude, NULL);
+ input = casereader_create_filter_weight (input, dict, NULL, NULL);
- for(r = casefile_get_reader (cf, filter);
- casereader_read (r, &c) ;
- case_destroy (&c))
+ reader = casereader_clone (input);
+ for (; casereader_read (reader, &c); case_destroy (&c))
{
size_t i;
- const double weight =
- dict_get_case_weight (dataset_dict (ds), &c, &bad_weight_warn);
-
- const union value *indep_val;
- void **p;
+ const double weight = dict_get_case_weight (dict, &c, NULL);
- if ( casefilter_variable_missing (filter, &c, indep_var))
- continue;
-
- indep_val = case_data (&c, indep_var);
- p = hsh_probe (global_group_hash, indep_val);
+ const union value *indep_val = case_data (&c, indep_var);
+ void **p = hsh_probe (global_group_hash, indep_val);
if (*p == NULL)
*p = value_dup (indep_val, var_get_width (indep_var));
-
- hsh_insert ( global_group_hash, (void *) indep_val );
for ( i = 0 ; i < n_vars ; ++i )
{
hsh_insert ( group_hash, (void *) gs );
}
- if (! casefilter_variable_missing (filter, &c, v))
+ if (!var_is_value_missing (v, val, exclude))
{
struct group_statistics *totals = &gp->ugs;
}
}
-
- casereader_destroy (r);
+ casereader_destroy (reader);
postcalc(cmd);
if ( stat_tables & STAT_HOMO )
- levene (dataset_dict (ds), cf, indep_var, n_vars, vars,
- filter);
+ levene (dict, casereader_clone (input), indep_var, n_vars, vars, exclude);
- casefilter_destroy (filter);
+ casereader_destroy (input);
ostensible_number_of_groups = hsh_count (global_group_hash);
-
- output_oneway();
-
- return true;
+ if (!taint_has_tainted_successor (taint))
+ output_oneway();
+ taint_destroy (taint);
}
#include <config.h>
-#include "sort-criteria.h"
+#include <limits.h>
+#include <math.h>
#include <data/dictionary.h>
#include <data/format.h>
#include <data/missing-values.h>
#include <data/procedure.h>
#include <data/variable.h>
+#include <data/case-ordering.h>
#include <data/case.h>
-#include <data/casefile.h>
-#include <data/fastfile.h>
-#include <data/storage-stream.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
#include <language/command.h>
#include <language/stats/sort-criteria.h>
-#include <limits.h>
#include <libpspp/compiler.h>
+#include <libpspp/taint.h>
#include <math/sort.h>
#include <output/table.h>
#include <output/manager.h>
#include <gsl/gsl_cdf.h>
-#include <math.h>
#include "gettext.h"
#define _(msgid) gettext (msgid)
static struct rank_spec *rank_specs;
static size_t n_rank_specs;
-static struct sort_criteria *sc;
+static struct case_ordering *sc;
static const struct variable **group_vars;
static size_t n_group_vars;
static struct cmd_rank cmd;
-static struct casefile *rank_sorted_casefile (struct casefile *cf,
- const struct sort_criteria *,
- const struct dictionary *,
- const struct rank_spec *rs,
- int n_rank_specs,
- int idx,
- const struct missing_values *miss
- );
+static void rank_sorted_file (struct casereader *,
+ struct casewriter *,
+ const struct dictionary *,
+ const struct rank_spec *rs,
+ int n_rank_specs,
+ int idx,
+ struct variable *rank_var);
+
static const char *
fraction_name(void)
{
}
-static bool
-rank_cmd (struct dataset *ds, const struct sort_criteria *sc,
+static bool
+rank_cmd (struct dataset *ds, const struct case_ordering *sc,
const struct rank_spec *rank_specs, int n_rank_specs)
{
- struct sort_criteria criteria;
- bool result = true;
+ struct case_ordering *base_ordering;
+ bool ok = true;
int i;
const int n_splits = dict_get_split_cnt (dataset_dict (ds));
- criteria.crit_cnt = n_splits + n_group_vars + 1;
- criteria.crits = xnmalloc (criteria.crit_cnt, sizeof *criteria.crits);
+ base_ordering = case_ordering_create (dataset_dict (ds));
for (i = 0; i < n_splits ; i++)
- {
- const struct variable *v = dict_get_split_vars (dataset_dict (ds))[i];
- criteria.crits[i].fv = var_get_case_index (v);
- criteria.crits[i].width = var_get_width (v);
- criteria.crits[i].dir = SRT_ASCEND;
- }
+ case_ordering_add_var (base_ordering,
+ dict_get_split_vars (dataset_dict (ds))[i],
+ SRT_ASCEND);
+
for (i = 0; i < n_group_vars; i++)
+ case_ordering_add_var (base_ordering, group_vars[i], SRT_ASCEND);
+ for (i = 0 ; i < case_ordering_get_var_cnt (sc) ; ++i )
{
- criteria.crits[i + n_splits].fv = var_get_case_index (group_vars[i]);
- criteria.crits[i + n_splits].width = var_get_width (group_vars[i]);
- criteria.crits[i + n_splits].dir = SRT_ASCEND;
- }
- for (i = 0 ; i < sc->crit_cnt ; ++i )
- {
- struct casefile *out ;
- struct casefile *cf ;
- struct casereader *reader ;
- struct casefile *sorted_cf ;
-
- /* Obtain active file in CF. */
- if (!procedure (ds, NULL, NULL))
- goto error;
-
- cf = proc_capture_output (ds);
-
- /* Sort CF into SORTED_CF. */
- reader = casefile_get_destructive_reader (cf) ;
- criteria.crits[criteria.crit_cnt - 1] = sc->crits[i];
- assert ( sc->crits[i].fv == var_get_case_index (src_vars[i]) );
- sorted_cf = sort_execute (reader, &criteria, NULL);
- casefile_destroy (cf);
-
- out = rank_sorted_casefile (sorted_cf, &criteria,
- dataset_dict (ds),
- rank_specs, n_rank_specs,
- i, var_get_missing_values (src_vars[i]));
- if ( NULL == out )
- {
- result = false ;
- continue ;
- }
-
- proc_set_source (ds, storage_source_create (out));
+ struct case_ordering *ordering;
+ struct casegrouper *grouper;
+ struct casereader *group;
+ struct casewriter *output;
+ struct casereader *ranked_file;
+
+ ordering = case_ordering_clone (base_ordering);
+ case_ordering_add_var (ordering,
+ case_ordering_get_var (sc, i),
+ case_ordering_get_direction (sc, i));
+
+ proc_discard_output (ds);
+ grouper = casegrouper_create_case_ordering (sort_execute (proc_open (ds),
+ ordering),
+ base_ordering);
+ output = autopaging_writer_create (dict_get_next_value_idx (
+ dataset_dict (ds)));
+ while (casegrouper_get_next_group (grouper, &group))
+ rank_sorted_file (group, output, dataset_dict (ds),
+ rank_specs, n_rank_specs,
+ i, src_vars[i]);
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
+ ranked_file = casewriter_make_reader (output);
+ ok = proc_set_active_file_data (ds, ranked_file) && ok;
+ if (!ok)
+ break;
}
+ case_ordering_destroy (base_ordering);
- free (criteria.crits);
- return result ;
-
-error:
- free (criteria.crits);
- return false ;
+ return ok;
}
/* Hardly a rank function !! */
int i, double w UNUSED)
{
double rank;
- if ( c >= 1.0 )
+
+ if ( c >= 1.0 )
{
switch (cmd.ties)
{
NOT_REACHED();
}
-
-/* Rank the casefile belonging to CR, starting from the current
- postition of CR continuing up to and including the ENDth case.
-
- RS points to an array containing the rank specifications to
- use. N_RANK_SPECS is the number of elements of RS.
-
-
- DEST_VAR_INDEX is the index into the rank_spec destvar element
- to be used for this ranking.
-
- Prerequisites: 1. The casefile must be sorted according to CRITERION.
- 2. W is the sum of the non-missing caseweights for this
- range of the casefile.
-*/
static void
-rank_cases (struct casereader *cr,
- unsigned long end,
- const struct dictionary *dict,
- const struct sort_criterion *criterion,
- const struct missing_values *mv,
- double w,
- const struct rank_spec *rs,
- int n_rank_specs,
- int dest_var_index,
- struct casefile *dest)
+rank_sorted_file (struct casereader *input,
+ struct casewriter *output,
+ const struct dictionary *dict,
+ const struct rank_spec *rs,
+ int n_rank_specs,
+ int dest_idx,
+ struct variable *rank_var)
{
- bool warn = true;
+ struct casereader *pass1, *pass2, *pass2_1;
+ struct casegrouper *tie_grouper;
+ struct ccase c;
+ double w = 0.0;
double cc = 0.0;
- double cc_1;
- int iter = 1;
+ int tie_group = 1;
- const int fv = criterion->fv;
- const int width = criterion->width;
- while (casereader_cnum (cr) < end)
- {
- struct casereader *lookahead;
- const union value *this_value;
- bool this_value_is_missing;
- struct ccase this_case, lookahead_case;
- double c;
- int i;
- size_t n = 0;
-
- if (!casereader_read_xfer (cr, &this_case))
- break;
+ input = casereader_create_filter_missing (input, &rank_var, 1,
+ exclude_values, output);
+ input = casereader_create_filter_weight (input, dict, NULL, output);
- this_value = case_data_idx (&this_case, fv);
- this_value_is_missing = mv_is_value_missing (mv, this_value,
- exclude_values);
- c = dict_get_case_weight (dict, &this_case, &warn);
+ casereader_split (input, &pass1, &pass2);
- lookahead = casereader_clone (cr);
- n = 0;
- while (casereader_cnum (lookahead) < end
- && casereader_read_xfer (lookahead, &lookahead_case))
- {
- const union value *lookahead_value = case_data_idx (&lookahead_case, fv);
- int diff = compare_values (this_value, lookahead_value, width);
+ /* Pass 1: Get total group weight. */
+ for (; casereader_read (pass1, &c); case_destroy (&c))
+ w += dict_get_case_weight (dict, &c, NULL);
+ casereader_destroy (pass1);
- if (diff != 0)
- {
- /* Make sure the casefile was sorted */
- assert ( diff == ((criterion->dir == SRT_ASCEND) ? -1 :1));
-
- case_destroy (&lookahead_case);
- break;
- }
-
- c += dict_get_case_weight (dict, &lookahead_case, &warn);
- case_destroy (&lookahead_case);
- n++;
- }
- casereader_destroy (lookahead);
-
- cc_1 = cc;
- if ( !this_value_is_missing )
- cc += c;
-
- do
- {
- for (i = 0; i < n_rank_specs; ++i)
- {
- const struct variable *dst_var = rs[i].destvars[dest_var_index];
-
- if (this_value_is_missing)
- case_data_rw (&this_case, dst_var)->f = SYSMIS;
- else
- case_data_rw (&this_case, dst_var)->f =
- rank_func[rs[i].rfunc](c, cc, cc_1, iter, w);
- }
- casefile_append_xfer (dest, &this_case);
- }
- while (n-- > 0 && casereader_read_xfer (cr, &this_case));
-
- if ( !this_value_is_missing )
- iter++;
- }
-
- /* If this isn't true, then all the results will be wrong */
- assert ( w == cc );
-}
-
-static bool
-same_group (const struct ccase *a, const struct ccase *b,
- const struct sort_criteria *crit)
-{
- size_t i;
-
- for (i = 0; i < crit->crit_cnt - 1; i++)
+ /* Pass 2: Do ranking. */
+ tie_grouper = casegrouper_create_vars (pass2, &rank_var, 1);
+ while (casegrouper_get_next_group (tie_grouper, &pass2_1))
{
- struct sort_criterion *c = &crit->crits[i];
- if (compare_values (case_data_idx (a, c->fv),
- case_data_idx (b, c->fv), c->width) != 0)
- return false;
- }
-
- return true;
-}
-
-static struct casefile *
-rank_sorted_casefile (struct casefile *cf,
- const struct sort_criteria *crit,
- const struct dictionary *dict,
- const struct rank_spec *rs,
- int n_rank_specs,
- int dest_idx,
- const struct missing_values *mv)
-{
- struct casefile *dest = fastfile_create (casefile_get_value_cnt (cf));
- struct casereader *lookahead = casefile_get_reader (cf, NULL);
- struct casereader *pos = casereader_clone (lookahead);
- struct ccase group_case;
- bool warn = true;
-
- struct sort_criterion *ultimate_crit = &crit->crits[crit->crit_cnt - 1];
+ struct casereader *pass2_2;
+ double cc_1 = cc;
+ double tw = 0.0;
+ int i;
- if (casereader_read (lookahead, &group_case))
- {
- struct ccase this_case;
- const union value *this_value ;
- double w = 0.0;
- this_value = case_data_idx( &group_case, ultimate_crit->fv);
+ pass2_2 = casereader_clone (pass2_1);
+ taint_propagate (casereader_get_taint (pass2_2),
+ casewriter_get_taint (output));
- if ( !mv_is_value_missing (mv, this_value, exclude_values) )
- w = dict_get_case_weight (dict, &group_case, &warn);
+ /* Pass 2.1: Sum up weight for tied cases. */
+ for (; casereader_read (pass2_1, &c); case_destroy (&c))
+ tw += dict_get_case_weight (dict, &c, NULL);
+ cc += tw;
+ casereader_destroy (pass2_1);
- while (casereader_read (lookahead, &this_case))
+ /* Pass 2.2: Rank tied cases. */
+ while (casereader_read (pass2_2, &c))
{
- const union value *this_value =
- case_data_idx(&this_case, ultimate_crit->fv);
- double c = dict_get_case_weight (dict, &this_case, &warn);
- if (!same_group (&group_case, &this_case, crit))
+ for (i = 0; i < n_rank_specs; ++i)
{
- rank_cases (pos, casereader_cnum (lookahead) - 1,
- dict,
- ultimate_crit,
- mv, w,
- rs, n_rank_specs,
- dest_idx, dest);
-
- w = 0.0;
- case_destroy (&group_case);
- case_move (&group_case, &this_case);
+ const struct variable *dst_var = rs[i].destvars[dest_idx];
+ double *dst_value = &case_data_rw (&c, dst_var)->f;
+ *dst_value = rank_func[rs[i].rfunc] (tw, cc, cc_1, tie_group, w);
}
- if ( !mv_is_value_missing (mv, this_value, exclude_values) )
- w += c;
- case_destroy (&this_case);
+ casewriter_write (output, &c);
}
- case_destroy (&group_case);
- rank_cases (pos, ULONG_MAX, dict, ultimate_crit, mv, w,
- rs, n_rank_specs, dest_idx, dest);
- }
-
- if (casefile_error (dest))
- {
- casefile_destroy (dest);
- dest = NULL;
+ casereader_destroy (pass2_2);
+
+ tie_group++;
}
-
- casefile_destroy (cf);
- return dest;
+ casegrouper_destroy (tie_grouper);
}
-
/* Transformation function to enumerate all the cases */
static int
create_resort_key (void *key_var_, struct ccase *cc, casenumber case_num)
rank_specs = NULL;
n_rank_specs = 0;
- sort_destroy_criteria (sc);
+ case_ordering_destroy (sc);
sc = NULL;
free (src_vars);
rank_specs = xmalloc (sizeof (*rank_specs));
rank_specs[0].rfunc = RANK;
- rank_specs[0].destvars =
- xcalloc (sc->crit_cnt, sizeof (struct variable *));
+ rank_specs[0].destvars =
+ xcalloc (case_ordering_get_var_cnt (sc), sizeof (struct variable *));
n_rank_specs = 1;
}
- assert ( sc->crit_cnt == n_src_vars);
+ assert ( case_ordering_get_var_cnt (sc) == n_src_vars);
/* Create variables for all rank destinations which haven't
already been created with INTO.
msg(MW, _("FRACTION has been specified, but NORMAL and PROPORTION rank functions have not been requested. The FRACTION subcommand will be ignored.") );
/* Add a variable which we can sort by to get back the original
- order */
- order = dict_create_var_assert (dataset_dict (ds), "$ORDER_", 0);
+ order */
+ order = dict_create_var_assert (dataset_dict (ds), "$ORDER_", 0);
add_transformation (ds, create_resort_key, 0, order);
/* Do the ranking */
result = rank_cmd (ds, sc, rank_specs, n_rank_specs);
- /* Put the active file back in its original order */
+ /* Put the active file back in its original order. Delete
+ our sort key, which we don't need anymore. */
{
- struct sort_criteria criteria;
- struct sort_criterion restore_criterion ;
- restore_criterion.fv = var_get_case_index (order);
- restore_criterion.width = 0;
- restore_criterion.dir = SRT_ASCEND;
-
- criteria.crits = &restore_criterion;
- criteria.crit_cnt = 1;
-
- sort_active_file_in_place (ds, &criteria);
+ struct case_ordering *ordering = case_ordering_create (dataset_dict (ds));
+ struct casereader *sorted;
+ case_ordering_add_var (ordering, order, SRT_ASCEND);
+ /* FIXME: loses error conditions. */
+ proc_discard_output (ds);
+ sorted = sort_execute (proc_open (ds), ordering);
+ result = proc_commit (ds) && result;
+
+ dict_delete_var (dataset_dict (ds), order);
+ result = proc_set_active_file_data (ds, sorted) && result;
}
- /* ... and we don't need our sort key anymore. So delete it */
- dict_delete_var (dataset_dict (ds), order);
-
rank_cleanup();
static int
rank_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd UNUSED, void *aux UNUSED)
{
- static const int terminators[2] = {T_BY, 0};
-
lex_match (lexer, '=');
if ((lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
&& lex_token (lexer) != T_ALL)
return 2;
- sc = sort_parse_criteria (lexer, dataset_dict (ds),
- &src_vars, &n_src_vars, 0, terminators);
+ sc = parse_case_ordering (lexer, dataset_dict (ds), NULL);
+ if (sc == NULL)
+ return 0;
+ case_ordering_get_vars (sc, &src_vars, &n_src_vars);
if ( lex_match (lexer, T_BY) )
{
rank_specs[n_rank_specs - 1].rfunc = f;
rank_specs[n_rank_specs - 1].destvars = NULL;
- rank_specs[n_rank_specs - 1].destvars =
- xcalloc (sc->crit_cnt, sizeof (struct variable *));
-
+ rank_specs[n_rank_specs - 1].destvars =
+ xcalloc (case_ordering_get_var_cnt (sc),
+ sizeof (struct variable *));
+
if (lex_match_id (lexer, "INTO"))
{
struct variable *destvar;
msg(SE, _("Variable %s already exists."), lex_tokid (lexer));
return 0;
}
- if ( var_count >= sc->crit_cnt )
+ if ( var_count >= case_ordering_get_var_cnt (sc) )
{
msg(SE, _("Too many variables in INTO clause."));
return 0;
#include "regression-export.h"
#include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/category.h>
#include <data/dictionary.h>
#include <data/missing-values.h>
#include <libpspp/alloc.h>
#include <libpspp/compiler.h>
#include <libpspp/message.h>
+#include <libpspp/taint.h>
#include <math/design-matrix.h>
#include <math/coefficient.h>
#include <math/linreg/linreg.h>
#include <output/table.h>
#include "gettext.h"
+#define _(msgid) gettext (msgid)
#define REG_LARGE_DATA 1000
*/
static struct file_handle *model_file;
-/*
- Return value for the procedure.
- */
-static int pspp_reg_rc = CMD_SUCCESS;
-
-static bool run_regression (const struct ccase *,
- const struct casefile *, void *,
- const struct dataset *);
+static bool run_regression (struct casereader *, struct cmd_regression *,
+ struct dataset *);
/*
STATISTICS subcommand output functions.
int
cmd_regression (struct lexer *lexer, struct dataset *ds)
{
+ struct casegrouper *grouper;
+ struct casereader *group;
+ bool ok;
size_t i;
if (!parse_regression (lexer, ds, &cmd, NULL))
{
models[i] = NULL;
}
- if (!multipass_procedure_with_splits (ds, run_regression, &cmd))
- return CMD_CASCADING_FAILURE;
+
+ /* Data pass. */
+ grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+ while (casegrouper_get_next_group (grouper, &group))
+ run_regression (group, &cmd, ds);
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
+
subcommand_save (ds, cmd.sbc_save, models);
free (v_variables);
free (models);
- return pspp_reg_rc;
+ return ok ? CMD_SUCCESS : CMD_FAILURE;
}
/*
return v == v_variables[k];
}
-/*
- Mark missing cases. Return the number of non-missing cases.
- Compute the first two moments.
- */
-static size_t
-mark_missing_cases (const struct casefile *cf, const struct variable *v,
- int *is_missing_case, double n_data,
- struct moments_var *mom)
-{
- struct casereader *r;
- struct ccase c;
- size_t row;
- const union value *val;
- double w = 1.0;
-
- for (r = casefile_get_reader (cf, NULL);
- casereader_read (r, &c); case_destroy (&c))
- {
- row = casereader_cnum (r) - 1;
-
- val = case_data (&c, v);
- if (mom != NULL)
- {
- moments1_add (mom->m, val->f, w);
- }
- cat_value_update (v, val);
- if (var_is_value_missing (v, val, MV_ANY))
- {
- if (!is_missing_case[row])
- {
- /* Now it is missing. */
- n_data--;
- is_missing_case[row] = 1;
- }
- }
- }
- casereader_destroy (r);
-
- return n_data;
-}
-
/* Parser for the variables sub command */
static int
regression_custom_variables (struct lexer *lexer, struct dataset *ds,
return 1;
}
-/*
- Count the explanatory variables. The user may or may
- not have specified a response variable in the syntax.
- */
+/* Identify the explanatory variables in v_variables. Returns
+ the number of independent variables. */
static int
-get_n_indep (const struct variable *v)
+identify_indep_vars (struct variable **indep_vars, struct variable *depvar)
{
- int result;
- int i = 0;
+ int n_indep_vars = 0;
+ int i;
- result = n_variables;
- while (i < n_variables)
- {
- if (is_depvar (i, v))
- {
- result--;
- i = n_variables;
- }
- i++;
- }
- return (result == 0) ? 1 : result;
+ for (i = 0; i < n_variables; i++)
+ if (!is_depvar (i, depvar))
+ indep_vars[n_indep_vars++] = v_variables[i];
+
+ return n_indep_vars;
}
-/*
- Read from the active file. Identify the explanatory variables in
- v_variables. Encode categorical variables. Drop cases with missing
- values.
-*/
+/* Encode categorical variables.
+ Returns number of valid cases. */
static int
-prepare_data (int n_data, int is_missing_case[],
- const struct variable **indep_vars,
- const struct variable *depvar, const struct casefile *cf,
- struct moments_var *mom)
+prepare_categories (struct casereader *input,
+ struct variable **vars, size_t n_vars,
+ struct moments_var *mom)
{
- int i;
- int j;
+ int n_data;
+ struct ccase c;
+ size_t i;
- assert (indep_vars != NULL);
- j = 0;
- for (i = 0; i < n_variables; i++)
+ for (i = 0; i < n_vars; i++)
+ if (var_is_alpha (vars[i]))
+ cat_stored_values_create (vars[i]);
+
+ n_data = 0;
+ for (; casereader_read (input, &c); case_destroy (&c))
{
/*
The second condition ensures the program will run even if
there is only one variable to act as both explanatory and
response.
*/
- if ((!is_depvar (i, depvar)) || (n_variables == 1))
- {
- indep_vars[j] = v_variables[i];
- j++;
- if (var_is_alpha (v_variables[i]))
- {
- /* Make a place to hold the binary vectors
- corresponding to this variable's values. */
- cat_stored_values_create (v_variables[i]);
- }
- n_data =
- mark_missing_cases (cf, v_variables[i], is_missing_case, n_data,
- mom + i);
- }
- }
- /*
- Mark missing cases for the dependent variable.
- */
- n_data = mark_missing_cases (cf, depvar, is_missing_case, n_data, NULL);
+ for (i = 0; i < n_vars; i++)
+ {
+ const union value *val = case_data (&c, vars[i]);
+ if (var_is_alpha (vars[i]))
+ cat_value_update (vars[i], val);
+ else
+ moments1_add (mom[i].m, val->f, 1.0);
+ }
+ n_data++;
+ }
+ casereader_destroy (input);
return n_data;
}
+
static void
coeff_init (pspp_linreg_cache * c, struct design_matrix *dm)
{
}
}
}
+
static bool
-run_regression (const struct ccase *first,
- const struct casefile *cf, void *cmd_ UNUSED,
- const struct dataset *ds)
+run_regression (struct casereader *input, struct cmd_regression *cmd,
+ struct dataset *ds)
{
size_t i;
- size_t n_data = 0; /* Number of valide cases. */
- size_t n_cases; /* Number of cases. */
- size_t row;
- size_t case_num;
int n_indep = 0;
int k;
- /*
- Keep track of the missing cases.
- */
- int *is_missing_case;
- const union value *val;
- struct casereader *r;
struct ccase c;
const struct variable **indep_vars;
struct design_matrix *X;
assert (models != NULL);
- output_split_file_values (ds, first);
+ if (!casereader_peek (input, 0, &c))
+ return true;
+ output_split_file_values (ds, &c);
+ case_destroy (&c);
if (!v_variables)
{
1u << DC_SYSTEM);
}
- n_cases = casefile_get_case_cnt (cf);
-
- for (i = 0; i < cmd.n_dependent; i++)
+ for (i = 0; i < cmd->n_dependent; i++)
{
- if (!var_is_numeric (cmd.v_dependent[i]))
+ if (!var_is_numeric (cmd->v_dependent[i]))
{
- msg (SE, gettext ("Dependent variable must be numeric."));
- pspp_reg_rc = CMD_FAILURE;
- return true;
+ msg (SE, _("Dependent variable must be numeric."));
+ return false;
}
}
- is_missing_case = xnmalloc (n_cases, sizeof (*is_missing_case));
mom = xnmalloc (n_variables, sizeof (*mom));
for (i = 0; i < n_variables; i++)
{
}
lopts.get_depvar_mean_std = 1;
- for (k = 0; k < cmd.n_dependent; k++)
+ lopts.get_indep_mean_std = xnmalloc (n_variables, sizeof (int));
+ indep_vars = xnmalloc (n_variables, sizeof *indep_vars);
+
+ for (k = 0; k < cmd->n_dependent; k++)
{
- n_indep = get_n_indep ((const struct variable *) cmd.v_dependent[k]);
- lopts.get_indep_mean_std = xnmalloc (n_indep, sizeof (int));
- indep_vars = xnmalloc (n_indep, sizeof *indep_vars);
- assert (indep_vars != NULL);
+ struct variable *dep_var;
+ struct casereader *reader;
+ casenumber row;
+ struct ccase c;
+ size_t n_data; /* Number of valid cases. */
+
+ dep_var = cmd->v_dependent[k];
+ n_indep = identify_indep_vars (indep_vars, dep_var);
+
+ reader = casereader_clone (input);
+ reader = casereader_create_filter_missing (reader, indep_vars, n_indep,
+ MV_ANY, NULL);
+ reader = casereader_create_filter_missing (reader, &dep_var, 1,
+ MV_ANY, NULL);
+ n_data = prepare_categories (casereader_clone (reader),
+ indep_vars, n_indep, mom);
- for (i = 0; i < n_cases; i++)
- {
- is_missing_case[i] = 0;
- }
- n_data = prepare_data (n_cases, is_missing_case, indep_vars,
- cmd.v_dependent[k],
- (const struct casefile *) cf, mom);
if ((n_data > 0) && (n_indep > 0))
{
Y = gsl_vector_alloc (n_data);
models[k] = pspp_linreg_cache_alloc (X->m->size1, X->m->size2);
models[k]->indep_means = gsl_vector_alloc (X->m->size2);
models[k]->indep_std = gsl_vector_alloc (X->m->size2);
- models[k]->depvar = (const struct variable *) cmd.v_dependent[k];
- /*
+ models[k]->depvar = dep_var;
+ /*
For large data sets, use QR decomposition.
*/
if (n_data > sqrt (n_indep) && n_data > REG_LARGE_DATA)
}
/*
- The second pass fills the design matrix.
- */
- row = 0;
- for (r = casefile_get_reader (cf, NULL); casereader_read (r, &c);
- case_destroy (&c))
- /* Iterate over the cases. */
- {
- case_num = casereader_cnum (r) - 1;
- if (!is_missing_case[case_num])
- {
- for (i = 0; i < n_variables; ++i) /* Iterate over the
- variables for the
- current case.
- */
- {
- val = case_data (&c, v_variables[i]);
- /*
- Independent/dependent variable separation. The
- 'variables' subcommand specifies a varlist which contains
- both dependent and independent variables. The dependent
- variables are specified with the 'dependent'
- subcommand, and maybe also in the 'variables' subcommand.
- We need to separate the two.
- */
- if (!is_depvar (i, cmd.v_dependent[k]))
- {
- if (var_is_alpha (v_variables[i]))
- {
- design_matrix_set_categorical (X, row,
- v_variables[i],
- val);
- }
- else
- {
- design_matrix_set_numeric (X, row,
- v_variables[i], val);
- }
- }
- }
- val = case_data (&c, cmd.v_dependent[k]);
- gsl_vector_set (Y, row, val->f);
- row++;
- }
- }
+ The second pass fills the design matrix.
+ */
+ reader = casereader_create_counter (reader, &row, -1);
+ for (; casereader_read (reader, &c); case_destroy (&c))
+ {
+ for (i = 0; i < n_indep; ++i)
+ {
+ struct variable *v = indep_vars[i];
+ const union value *val = case_data (&c, v);
+ if (var_is_alpha (v))
+ design_matrix_set_categorical (X, row, v, val);
+ else
+ design_matrix_set_numeric (X, row, v, val);
+ }
+ gsl_vector_set (Y, row, case_num (&c, dep_var));
+ }
+ casereader_destroy (reader);
/*
Now that we know the number of coefficients, allocate space
and store pointers to the variables that correspond to the
*/
pspp_linreg ((const gsl_vector *) Y, X->m, &lopts, models[k]);
compute_moments (models[k], mom, X, n_variables);
- subcommand_statistics (cmd.a_statistics, models[k]);
- subcommand_export (cmd.sbc_export, models[k]);
+
+ if (!taint_has_tainted_successor (casereader_get_taint (input)))
+ {
+ subcommand_statistics (cmd->a_statistics, models[k]);
+ subcommand_export (cmd->sbc_export, models[k]);
+ }
gsl_vector_free (Y);
design_matrix_destroy (X);
- free (indep_vars);
- free (lopts.get_indep_mean_std);
- casereader_destroy (r);
}
else
{
msg (SE, gettext ("No valid data found. This command was skipped."));
}
}
- for (i = 0; i < n_variables; i++)
- {
- moments1_destroy ((mom + i)->m);
- }
- free (mom);
- free (is_missing_case);
+ free (indep_vars);
+ free (lopts.get_indep_mean_std);
+ casereader_destroy (input);
return true;
}
#include <language/lexer/lexer.h>
#include <libpspp/alloc.h>
#include <libpspp/message.h>
+#include <data/case-ordering.h>
#include <math/sort.h>
#include <sys/types.h>
int
cmd_sort_cases (struct lexer *lexer, struct dataset *ds)
{
- struct sort_criteria *criteria;
- bool success = false;
+ struct case_ordering *ordering;
+ struct casereader *output;
+ bool ok = false;
lex_match (lexer, T_BY);
- criteria = sort_parse_criteria (lexer, dataset_dict (ds), NULL, NULL, NULL, NULL);
- if (criteria == NULL)
+ proc_cancel_temporary_transformations (ds);
+ ordering = parse_case_ordering (lexer, dataset_dict (ds), NULL);
+ if (ordering == NULL)
return CMD_CASCADING_FAILURE;
if (get_testing_mode () && lex_match (lexer, '/'))
goto done;
min_buffers = max_buffers = lex_integer (lexer);
- allow_internal_sort = false;
if (max_buffers < 2)
{
msg (SE, _("Buffer limit must be at least 2."));
lex_get (lexer);
}
- success = sort_active_file_in_place (ds, criteria);
+ proc_discard_output (ds);
+ output = sort_execute (proc_open (ds), ordering);
+ ordering = NULL;
+ ok = proc_commit (ds);
+ ok = proc_set_active_file_data (ds, output) && ok;
done:
min_buffers = 64;
max_buffers = INT_MAX;
- allow_internal_sort = true;
- sort_destroy_criteria (criteria);
- return success ? lex_end_of_command (lexer) : CMD_CASCADING_FAILURE;
+ case_ordering_destroy (ordering);
+ return ok ? lex_end_of_command (lexer) : CMD_CASCADING_FAILURE;
}
/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
02110-1301, USA. */
#include <config.h>
-#include <sys/types.h>
-#include <assert.h>
+
+#include <language/stats/sort-criteria.h>
+
#include <stdlib.h>
-#include <limits.h>
-#include <libpspp/alloc.h>
-#include <language/command.h>
-#include <libpspp/message.h>
+
+#include <data/case-ordering.h>
+#include <data/dictionary.h>
+#include <data/variable.h>
#include <language/lexer/lexer.h>
#include <language/lexer/variable-parser.h>
-#include <data/settings.h>
-#include <data/variable.h>
-#include "sort-criteria.h"
-#include <math/sort.h>
+#include <libpspp/message.h>
#include "gettext.h"
#define _(msgid) gettext (msgid)
-static bool is_terminator(int tok, const int *terminators);
-
-
/* Parses a list of sort keys and returns a struct sort_criteria
based on it. Returns a null pointer on error.
If SAW_DIRECTION is nonnull, sets *SAW_DIRECTION to true if at
least one parenthesized sort direction was specified, false
- otherwise.
- If TERMINATORS is non-null, then it must be a pointer to a
- null terminated list of tokens, in addition to the defaults,
- which are to be considered terminators of the clause being parsed.
- The default terminators are '/' and '.'
-
-*/
-struct sort_criteria *
-sort_parse_criteria (struct lexer *lexer, const struct dictionary *dict,
- const struct variable ***vars, size_t *var_cnt,
- bool *saw_direction,
- const int *terminators
- )
+ otherwise. */
+struct case_ordering *
+parse_case_ordering (struct lexer *lexer, const struct dictionary *dict,
+ bool *saw_direction)
{
- struct sort_criteria *criteria;
- const struct variable **local_vars = NULL;
- size_t local_var_cnt;
-
- assert ((vars == NULL) == (var_cnt == NULL));
- if (vars == NULL)
- {
- vars = &local_vars;
- var_cnt = &local_var_cnt;
- }
-
- criteria = xmalloc (sizeof *criteria);
- criteria->crits = NULL;
- criteria->crit_cnt = 0;
-
- *vars = NULL;
- *var_cnt = 0;
- if (saw_direction != NULL)
+ struct case_ordering *ordering = case_ordering_create (dict);
+ struct variable **vars = NULL;
+ size_t var_cnt = 0;
+
+ if (saw_direction != NULL)
*saw_direction = false;
do
{
- size_t prev_var_cnt = *var_cnt;
enum sort_direction direction;
+ size_t i;
/* Variables. */
- if (!parse_variables_const (lexer, dict, vars, var_cnt,
- PV_NO_DUPLICATE | PV_APPEND | PV_NO_SCRATCH))
+ free (vars);
+ vars = NULL;
+ if (!parse_variables_const (lexer, dict, &vars, &var_cnt, PV_NO_SCRATCH))
goto error;
/* Sort direction. */
else
direction = SRT_ASCEND;
- criteria->crits = xnrealloc (criteria->crits,
- *var_cnt, sizeof *criteria->crits);
- criteria->crit_cnt = *var_cnt;
- for (; prev_var_cnt < criteria->crit_cnt; prev_var_cnt++)
- {
- struct sort_criterion *c = &criteria->crits[prev_var_cnt];
- c->fv = var_get_case_index ((*vars)[prev_var_cnt]);
- c->width = var_get_width ((*vars)[prev_var_cnt]);
- c->dir = direction;
- }
+ for (i = 0; i < var_cnt; i++)
+ if (!case_ordering_add_var (ordering, vars[i], direction))
+ msg (SW, _("Variable %s specified twice in sort criteria."),
+ var_get_name (vars[i]));
}
- while (lex_token (lexer) != '.' && lex_token (lexer) != '/' && !is_terminator(lex_token (lexer), terminators));
+ while (lex_token (lexer) == T_ID
+ && dict_lookup_var (dict, lex_tokid (lexer)) != NULL);
- free (local_vars);
- return criteria;
+ free (vars);
+ return ordering;
error:
- free (local_vars);
- sort_destroy_criteria (criteria);
+ free (vars);
+ case_ordering_destroy (ordering);
return NULL;
}
-
-/* Return TRUE if TOK is a member of the list of TERMINATORS.
- FALSE otherwise */
-static bool
-is_terminator(int tok, const int *terminators)
-{
- if (terminators == NULL )
- return false;
-
- while ( *terminators)
- {
- if (tok == *terminators++)
- return true;
- }
-
- return false;
-}
-
-
-
-/* Destroys a SORT CASES program. */
-void
-sort_destroy_criteria (struct sort_criteria *criteria)
-{
- if (criteria != NULL)
- {
- free (criteria->crits);
- free (criteria);
- }
-}
-
-
-
#include <stdbool.h>
#include <stddef.h>
-struct variable;
struct dictionary;
-struct lexer ;
+struct lexer;
-struct sort_criteria *sort_parse_criteria (struct lexer *, const struct dictionary *,
- const struct variable ***, size_t *,
- bool *saw_direction,
- const int *terminators
- );
-
-void sort_destroy_criteria (struct sort_criteria *criteria) ;
+struct case_ordering *parse_case_ordering (struct lexer *,
+ const struct dictionary *,
+ bool *saw_direction);
#endif /* SORT_PRS_H */
#include <stdlib.h>
#include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/value-labels.h>
#include <data/variable.h>
-#include <data/casefilter.h>
-
#include <language/command.h>
#include <language/dictionary/split-file.h>
#include <language/lexer/lexer.h>
#include <libpspp/hash.h>
#include <libpspp/magic.h>
#include <libpspp/message.h>
-#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/str.h>
+#include <libpspp/taint.h>
#include <math/group-proc.h>
#include <math/levene.h>
#include <output/manager.h>
static int common_calc (const struct dictionary *dict,
const struct ccase *, void *,
- const struct casefilter *filter);
+ enum mv_class);
static void common_precalc (struct cmd_t_test *);
static void common_postcalc (struct cmd_t_test *);
-static int one_sample_calc (const struct dictionary *dict, const struct ccase *, void *, const struct casefilter *);
+static int one_sample_calc (const struct dictionary *dict, const struct ccase *, void *, enum mv_class);
static void one_sample_precalc (struct cmd_t_test *);
static void one_sample_postcalc (struct cmd_t_test *);
static int paired_calc (const struct dictionary *dict, const struct ccase *,
- struct cmd_t_test*, const struct casefilter *);
+ struct cmd_t_test*, enum mv_class);
static void paired_precalc (struct cmd_t_test *);
static void paired_postcalc (struct cmd_t_test *);
static void group_precalc (struct cmd_t_test *);
static int group_calc (const struct dictionary *dict, const struct ccase *,
- struct cmd_t_test *, const struct casefilter *);
+ struct cmd_t_test *, enum mv_class);
static void group_postcalc (struct cmd_t_test *);
-static bool calculate(const struct ccase *first,
- const struct casefile *cf, void *_mode,
- const struct dataset *ds);
+static void calculate(struct cmd_t_test *,
+ struct casereader *,
+ const struct dataset *);
static int mode;
int
cmd_t_test (struct lexer *lexer, struct dataset *ds)
{
+ struct casegrouper *grouper;
+ struct casereader *group;
bool ok;
if ( !parse_t_test (lexer, ds, &cmd, NULL) )
bad_weight_warn = true;
- ok = multipass_procedure_with_splits (ds, calculate, &cmd);
+ /* Data pass. */
+ grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+ while (casegrouper_get_next_group (grouper, &group))
+ calculate (&cmd, group, ds);
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
n_pairs=0;
free(pairs);
common_calc (const struct dictionary *dict,
const struct ccase *c,
void *_cmd,
- const struct casefilter *filter)
+ enum mv_class exclude)
{
int i;
struct cmd_t_test *cmd = (struct cmd_t_test *)_cmd;
- double weight = dict_get_case_weight (dict, c, &bad_weight_warn);
+ double weight = dict_get_case_weight (dict, c, NULL);
/* Listwise has to be implicit if the independent variable is missing ?? */
if ( cmd->sbc_groups )
{
- if ( casefilter_variable_missing (filter, c, indep_var) )
+ if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude))
return 0;
}
for(i = 0; i < cmd->n_variables ; ++i)
{
const struct variable *v = cmd->v_variables[i];
-
- if (! casefilter_variable_missing (filter, c, v) )
+ const union value *val = case_data (c, v);
+
+ if (!var_is_value_missing (v, val, exclude))
{
struct group_statistics *gs;
- const union value *val = case_data (c, v);
- gs = &group_proc_get (cmd->v_variables[i])->ugs;
+ gs = &group_proc_get (v)->ugs;
gs->n += weight;
gs->sum += weight * val->f;
static int
one_sample_calc (const struct dictionary *dict,
const struct ccase *c, void *cmd_,
- const struct casefilter *filter)
+ enum mv_class exclude)
{
int i;
struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_;
- double weight = dict_get_case_weight (dict, c, &bad_weight_warn);
+ double weight = dict_get_case_weight (dict, c, NULL);
for(i=0; i< cmd->n_variables ; ++i)
gs= &group_proc_get (cmd->v_variables[i])->ugs;
- if ( ! casefilter_variable_missing (filter, c, v))
+ if (!var_is_value_missing (v, val, exclude))
gs->sum_diff += weight * (val->f - cmd->n_testval[0]);
}
static int
paired_calc (const struct dictionary *dict, const struct ccase *c,
- struct cmd_t_test *cmd UNUSED, const struct casefilter *filter)
+ struct cmd_t_test *cmd UNUSED, enum mv_class exclude)
{
int i;
- double weight = dict_get_case_weight (dict, c, &bad_weight_warn);
+ double weight = dict_get_case_weight (dict, c, NULL);
for(i=0; i < n_pairs ; ++i )
{
const union value *val0 = case_data (c, v0);
const union value *val1 = case_data (c, v1);
- if ( ! casefilter_variable_missing (filter, c, v0) &&
- ! casefilter_variable_missing (filter, c, v1) )
+ if (!var_is_value_missing (v0, val0, exclude) &&
+ !var_is_value_missing (v1, val1, exclude))
{
pairs[i].n += weight;
pairs[i].sum[0] += weight * val0->f;
static int
group_calc (const struct dictionary *dict,
const struct ccase *c, struct cmd_t_test *cmd,
- const struct casefilter *filter)
+ enum mv_class exclude)
{
int i;
- const double weight =
- dict_get_case_weight (dict, c, &bad_weight_warn);
+ const double weight = dict_get_case_weight (dict, c, NULL);
const union value *gv;
- if ( casefilter_variable_missing (filter, c, indep_var))
+ if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude))
return 0;
gv = case_data (c, indep_var);
if ( ! gs )
return 0;
- if ( ! casefilter_variable_missing (filter, c, var) )
+ if (!var_is_value_missing (var, val, exclude))
{
gs->n += weight;
gs->sum += weight * val->f;
-static bool
-calculate(const struct ccase *first, const struct casefile *cf,
- void *cmd_, const struct dataset *ds)
+static void
+calculate(struct cmd_t_test *cmd,
+ struct casereader *input, const struct dataset *ds)
{
const struct dictionary *dict = dataset_dict (ds);
struct ssbox stat_summary_box;
struct trbox test_results_box;
- struct casereader *r;
+ struct casereader *pass1, *pass2, *pass3;
+ struct taint *taint;
struct ccase c;
- struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_;
+ enum mv_class exclude = cmd->miss != TTS_INCLUDE ? MV_ANY : MV_SYSTEM;
- struct casefilter *filter = casefilter_create ((cmd->miss != TTS_INCLUDE
- ? MV_ANY : MV_SYSTEM),
- NULL, 0);
+ if (!casereader_peek (input, 0, &c))
+ return;
+ output_split_file_values (ds, &c);
+ case_destroy (&c);
if ( cmd->miss == TTS_LISTWISE )
- casefilter_add_variables (filter,
- cmd->v_variables, cmd->n_variables);
+ input = casereader_create_filter_missing (input,
+ cmd->v_variables,
+ cmd->n_variables,
+ exclude, NULL);
+
+ input = casereader_create_filter_weight (input, dict, NULL, NULL);
+
+ taint = taint_clone (casereader_get_taint (input));
+ casereader_split (input, &pass1, &pass2);
- output_split_file_values (ds, first);
common_precalc (cmd);
- for(r = casefile_get_reader (cf, filter);
- casereader_read (r, &c) ;
- case_destroy (&c))
- {
- common_calc (dict, &c, cmd, filter);
- }
-
- casereader_destroy (r);
+ for (; casereader_read (pass1, &c); case_destroy (&c))
+ common_calc (dict, &c, cmd, exclude);
+ casereader_destroy (pass1);
common_postcalc (cmd);
switch(mode)
{
case T_1_SAMPLE:
one_sample_precalc (cmd);
- for(r = casefile_get_reader (cf, filter);
- casereader_read (r, &c) ;
- case_destroy (&c))
- {
- one_sample_calc (dict, &c, cmd, filter);
- }
- casereader_destroy (r);
+ for (; casereader_read (pass2, &c); case_destroy (&c))
+ one_sample_calc (dict, &c, cmd, exclude);
one_sample_postcalc (cmd);
break;
case T_PAIRED:
paired_precalc(cmd);
- for(r = casefile_get_reader (cf, filter);
- casereader_read (r, &c) ;
- case_destroy (&c))
- {
- paired_calc (dict, &c, cmd, filter);
- }
- casereader_destroy (r);
+ for (; casereader_read (pass2, &c); case_destroy (&c))
+ paired_calc (dict, &c, cmd, exclude);
paired_postcalc (cmd);
-
break;
case T_IND_SAMPLES:
+ pass3 = casereader_clone (pass2);
group_precalc(cmd);
- for(r = casefile_get_reader (cf, filter);
- casereader_read (r, &c) ;
- case_destroy (&c))
- {
- group_calc (dict, &c, cmd, filter);
- }
- casereader_destroy (r);
+ for(; casereader_read (pass2, &c); case_destroy (&c))
+ group_calc (dict, &c, cmd, exclude);
group_postcalc(cmd);
- levene (dict, cf, indep_var, cmd->n_variables, cmd->v_variables,
- filter);
+ levene (dict, pass3, indep_var, cmd->n_variables, cmd->v_variables,
+ exclude);
break;
}
+ casereader_destroy (pass2);
+
+ if (!taint_has_tainted_successor (taint))
+ {
+ ssbox_create(&stat_summary_box,cmd,mode);
+ ssbox_populate(&stat_summary_box,cmd);
+ ssbox_finalize(&stat_summary_box);
- casefilter_destroy (filter);
-
- ssbox_create(&stat_summary_box,cmd,mode);
- ssbox_populate(&stat_summary_box,cmd);
- ssbox_finalize(&stat_summary_box);
-
- if ( mode == T_PAIRED)
- pscbox();
-
- trbox_create(&test_results_box,cmd,mode);
- trbox_populate(&test_results_box,cmd);
- trbox_finalize(&test_results_box);
-
- return true;
+ if ( mode == T_PAIRED )
+ pscbox();
+
+ trbox_create(&test_results_box,cmd,mode);
+ trbox_populate(&test_results_box,cmd);
+ trbox_finalize(&test_results_box);
+ }
}
short which_group(const struct group_statistics *g,
src/language/tests/check-model.c
language_tests_sources = \
- src/language/tests/casefile-test.c \
src/language/tests/check-model.h \
src/language/tests/datasheet-test.c \
src/language/tests/float-format.c \
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 2004 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#include <config.h>
-#include <data/casefile.h>
-#include <data/fastfile.h>
-
-#include <data/case.h>
-
-#include <gsl/gsl_randist.h>
-#include <gsl/gsl_rng.h>
-#include <stdarg.h>
-#include <language/command.h>
-#include <language/lexer/lexer.h>
-#include <libpspp/assertion.h>
-
-#include "xalloc.h"
-
-static void test_casefile (int pattern, size_t value_cnt, size_t case_cnt);
-static void get_random_case (struct ccase *, size_t value_cnt,
- size_t case_idx);
-static void write_random_case (struct casefile *cf, size_t case_idx);
-static void read_and_verify_random_case (struct casefile *cf,
- struct casereader *reader,
- size_t case_idx);
-static void test_casereader_clone (struct casereader *reader1, size_t case_cnt);
-
-
-static void fail_test (const char *message, ...);
-
-int
-cmd_debug_casefile (struct lexer *lexer, struct dataset *ds UNUSED)
-{
- static const size_t sizes[] =
- {
- 1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 31, 55, 73,
- 100, 137, 257, 521, 1031, 2053
- };
- int size_max;
- int case_max;
- int pattern;
-
- size_max = sizeof sizes / sizeof *sizes;
- if (lex_match_id (lexer, "SMALL"))
- {
- size_max -= 4;
- case_max = 511;
- }
- else
- case_max = 4095;
- if (lex_token (lexer) != '.')
- return lex_end_of_command (lexer);
-
- for (pattern = 0; pattern < 7; pattern++)
- {
- const size_t *size;
-
- for (size = sizes; size < sizes + size_max; size++)
- {
- size_t case_cnt;
-
- for (case_cnt = 0; case_cnt <= case_max;
- case_cnt = (case_cnt * 2) + 1)
- test_casefile (pattern, *size, case_cnt);
- }
- }
- printf ("Casefile tests succeeded.\n");
- return CMD_SUCCESS;
-}
-
-static void
-test_casefile (int pattern, size_t value_cnt, size_t case_cnt)
-{
- struct casefile *cf;
- struct casereader *r1, *r2;
- struct ccase c;
- gsl_rng *rng;
- size_t i, j;
-
- rng = gsl_rng_alloc (gsl_rng_mt19937);
- cf = fastfile_create (value_cnt);
- if (pattern == 5)
- casefile_to_disk (cf);
- for (i = 0; i < case_cnt; i++)
- write_random_case (cf, i);
- if (pattern == 5)
- casefile_sleep (cf);
- r1 = casefile_get_reader (cf, NULL);
- r2 = casefile_get_reader (cf, NULL);
- switch (pattern)
- {
- case 0:
- case 5:
- for (i = 0; i < case_cnt; i++)
- {
- read_and_verify_random_case (cf, r1, i);
- read_and_verify_random_case (cf, r2, i);
- }
- break;
- case 1:
- for (i = 0; i < case_cnt; i++)
- read_and_verify_random_case (cf, r1, i);
- for (i = 0; i < case_cnt; i++)
- read_and_verify_random_case (cf, r2, i);
- break;
- case 2:
- case 3:
- case 4:
- for (i = j = 0; i < case_cnt; i++)
- {
- read_and_verify_random_case (cf, r1, i);
- if (gsl_rng_get (rng) % pattern == 0)
- read_and_verify_random_case (cf, r2, j++);
- if (i == case_cnt / 2)
- casefile_to_disk (cf);
- }
- for (; j < case_cnt; j++)
- read_and_verify_random_case (cf, r2, j);
- break;
- case 6:
- test_casereader_clone (r1, case_cnt);
- test_casereader_clone (r2, case_cnt);
- break;
- default:
- NOT_REACHED ();
- }
- if (casereader_read (r1, &c))
- fail_test ("Casereader 1 not at end of file.");
- if (casereader_read (r2, &c))
- fail_test ("Casereader 2 not at end of file.");
- if (pattern != 1)
- casereader_destroy (r1);
- if (pattern != 2)
- casereader_destroy (r2);
- if (pattern > 2)
- {
- r1 = casefile_get_destructive_reader (cf);
- for (i = 0; i < case_cnt; i++)
- {
- struct ccase read_case, expected_case;
-
- get_random_case (&expected_case, value_cnt, i);
- if (!casereader_read_xfer (r1, &read_case))
- fail_test ("Premature end of casefile.");
- for (j = 0; j < value_cnt; j++)
- {
- double a = case_num_idx (&read_case, j);
- double b = case_num_idx (&expected_case, j);
- if (a != b)
- fail_test ("Case %lu fails comparison.", (unsigned long) i);
- }
- case_destroy (&expected_case);
- case_destroy (&read_case);
- }
- casereader_destroy (r1);
- }
- casefile_destroy (cf);
- gsl_rng_free (rng);
-}
-
-static void
-get_random_case (struct ccase *c, size_t value_cnt, size_t case_idx)
-{
- int i;
- case_create (c, value_cnt);
- for (i = 0; i < value_cnt; i++)
- case_data_rw_idx (c, i)->f = case_idx % 257 + i;
-}
-
-static void
-write_random_case (struct casefile *cf, size_t case_idx)
-{
- struct ccase c;
- get_random_case (&c, casefile_get_value_cnt (cf), case_idx);
- casefile_append_xfer (cf, &c);
-}
-
-static void
-read_and_verify_random_case (struct casefile *cf,
- struct casereader *reader, size_t case_idx)
-{
- struct ccase read_case, expected_case;
- size_t value_cnt;
- size_t i;
-
- value_cnt = casefile_get_value_cnt (cf);
- get_random_case (&expected_case, value_cnt, case_idx);
- if (!casereader_read (reader, &read_case))
- fail_test ("Premature end of casefile.");
- for (i = 0; i < value_cnt; i++)
- {
- double a = case_num_idx (&read_case, i);
- double b = case_num_idx (&expected_case, i);
- if (a != b)
- fail_test ("Case %lu fails comparison.", (unsigned long) case_idx);
- }
- case_destroy (&read_case);
- case_destroy (&expected_case);
-}
-
-static void
-test_casereader_clone (struct casereader *reader1, size_t case_cnt)
-{
- size_t i;
- size_t cases = 0;
- struct ccase c1;
- struct ccase c2;
- struct casefile *src = casereader_get_casefile (reader1);
- struct casereader *clone = NULL;
-
- size_t value_cnt = casefile_get_value_cnt (src);
-
- struct casefile *newfile = fastfile_create (value_cnt);
- struct casereader *newreader;
-
-
- /* Read a 3rd of the cases */
- for ( i = 0 ; i < case_cnt / 3 ; ++i )
- {
- casereader_read (reader1, &c1);
- case_destroy (&c1);
- }
-
- clone = casereader_clone (reader1);
-
- /* Copy all the cases into a new file */
- while( casereader_read (reader1, &c1))
- {
- casefile_append_xfer (newfile, &c1);
- cases ++;
- }
-
- newreader = casefile_get_reader (newfile, NULL);
-
- /* Make sure that the new file's are identical to those returned from
- the cloned reader */
- while( casereader_read (clone, &c1))
- {
- const union value *v1;
- const union value *v2;
- cases --;
-
- if ( ! casereader_read_xfer (newreader, &c2) )
- {
- case_destroy (&c1);
- break;
- }
-
- v1 = case_data_all (&c1) ;
- v2 = case_data_all (&c2) ;
-
- if ( 0 != memcmp (v1, v2, value_cnt * MAX_SHORT_STRING))
- fail_test ("Cloned reader read different value at case %ld", cases);
-
- case_destroy (&c1);
- case_destroy (&c2);
- }
-
- if ( cases > 0 )
- fail_test ("Cloned reader reads different number of cases.");
-
-}
-
-static void
-fail_test (const char *message, ...)
-{
- va_list args;
-
- va_start (args, message);
- vprintf (message, args);
- putchar ('\n');
- va_end (args);
-
- exit (1);
-}
#include <stdbool.h>
#include <stddef.h>
+#include <stdlib.h>
#include <libpspp/assertion.h>
+2007-06-06 Ben Pfaff <blp@gnu.org>
+
+ Adapt case sources, sinks, and clients of procedure code to the
+ new infrastructure.
+
+ * automake.mk: Add new files.
+
+ * levene.c: Adapt to new casereaders. Abstract better.
+
+ * merge.c: New file.
+
+ * merge.h: New file.
+
+ * sort.c: Rewrite in terms of case_ordering, merger.
+
2007-05-31 Jason Stover <jhs@math.gcsu.edu>
* interaction.c: New file.
src/math/interaction.h \
src/math/levene.c \
src/math/levene.h \
+ src/math/merge.c \
+ src/math/merge.h \
src/math/moments.c src/math/moments.h \
src/math/percentiles.c src/math/percentiles.h \
src/math/design-matrix.c src/math/design-matrix.h \
#include "levene.h"
#include <libpspp/message.h>
#include <data/case.h>
-#include <data/casefile.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include "group-proc.h"
#include <libpspp/hash.h>
#include <libpspp/str.h>
#include <data/variable.h>
#include <data/procedure.h>
-#include <data/casefilter.h>
#include <libpspp/alloc.h>
#include <libpspp/misc.h>
#include "group.h"
const struct variable **v_dep;
/* Filter for missing values */
- struct casefilter *filter;
+ enum mv_class exclude;
+
+ /* An array of lz_stats for each variable */
+ struct lz_stats *lz;
+
+ /* The denominator for the expression for the Levene */
+ double *lz_denominator;
+
+};
+
+/* Per variable statistics */
+struct lz_stats
+{
+ /* Total of all lz */
+ double grand_total;
+
+ /* Mean of all lz */
+ double grand_mean;
+
+ /* The total number of cases */
+ double total_n ;
+
+ /* Number of groups */
+ int n_groups;
};
/* First pass */
static void levene_precalc (const struct levene_info *l);
static int levene_calc (const struct dictionary *dict, const struct ccase *,
const struct levene_info *l);
-static void levene_postcalc (void *);
+static void levene_postcalc (struct levene_info *);
/* Second pass */
static void levene2_precalc (struct levene_info *l);
static int levene2_calc (const struct dictionary *, const struct ccase *,
struct levene_info *l);
-static void levene2_postcalc (void *);
+static void levene2_postcalc (struct levene_info *);
-void
+void
levene(const struct dictionary *dict,
- const struct casefile *cf,
+ struct casereader *reader,
const struct variable *v_indep, size_t n_dep,
const struct variable **v_dep,
- struct casefilter *filter)
+ enum mv_class exclude)
{
- struct casereader *r;
+ struct casereader *pass1, *pass2;
struct ccase c;
struct levene_info l;
l.n_dep = n_dep;
l.v_indep = v_indep;
l.v_dep = v_dep;
- l.filter = filter;
+ l.exclude = exclude;
+ l.lz = xnmalloc (l.n_dep, sizeof *l.lz);
+ l.lz_denominator = xnmalloc (l.n_dep, sizeof *l.lz_denominator);
+ casereader_split (reader, &pass1, &pass2);
levene_precalc (&l);
- for(r = casefile_get_reader (cf, filter);
- casereader_read (r, &c) ;
- case_destroy (&c))
- {
- levene_calc (dict, &c, &l);
- }
- casereader_destroy (r);
+ for (; casereader_read (pass1, &c); case_destroy (&c))
+ levene_calc (dict, &c, &l);
+ casereader_destroy (pass1);
levene_postcalc (&l);
levene2_precalc(&l);
- for(r = casefile_get_reader (cf, filter);
- casereader_read (r, &c) ;
- case_destroy (&c))
- {
- levene2_calc (dict, &c,&l);
- }
- casereader_destroy (r);
+ for (; casereader_read (pass2, &c); case_destroy (&c))
+ levene2_calc (dict, &c, &l);
+ casereader_destroy (pass2);
levene2_postcalc (&l);
-}
-
-/* Internal variables used in calculating the Levene statistic */
-
-/* Per variable statistics */
-struct lz_stats
-{
- /* Total of all lz */
- double grand_total;
-
- /* Mean of all lz */
- double grand_mean;
-
- /* The total number of cases */
- double total_n ;
-
- /* Number of groups */
- int n_groups;
-};
-
-/* An array of lz_stats for each variable */
-static struct lz_stats *lz;
+ free (l.lz_denominator);
+ free (l.lz);
+}
static void
levene_precalc (const struct levene_info *l)
{
size_t i;
- lz = xnmalloc (l->n_dep, sizeof *lz);
-
for(i = 0; i < l->n_dep ; ++i )
{
const struct variable *var = l->v_dep[i];
struct group_statistics *gs;
struct hsh_iterator hi;
- lz[i].grand_total = 0;
- lz[i].total_n = 0;
- lz[i].n_groups = gp->n_groups ;
+ l->lz[i].grand_total = 0;
+ l->lz[i].total_n = 0;
+ l->lz[i].n_groups = gp->n_groups ;
for ( gs = hsh_first(gp->group_hash, &hi);
if ( 0 == gs )
continue ;
- if ( ! casefilter_variable_missing (l->filter, c, var))
+ if ( !var_is_value_missing (var, v, l->exclude))
{
levene_z= fabs(v->f - gs->mean);
- lz[i].grand_total += levene_z * weight;
- lz[i].total_n += weight;
+ l->lz[i].grand_total += levene_z * weight;
+ l->lz[i].total_n += weight;
gs->lz_total += levene_z * weight;
}
static void
-levene_postcalc (void *_l)
+levene_postcalc (struct levene_info *l)
{
size_t v;
- struct levene_info *l = (struct levene_info *) _l;
-
for (v = 0; v < l->n_dep; ++v)
{
/* This is Z_LL */
- lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ;
+ l->lz[v].grand_mean = l->lz[v].grand_total / l->lz[v].total_n ;
}
-/* The denominator for the expression for the Levene */
-static double *lz_denominator = 0;
-
static void
levene2_precalc (struct levene_info *l)
{
size_t v;
- lz_denominator = xnmalloc (l->n_dep, sizeof *lz_denominator);
/* This stuff could go in the first post calc . . . */
for (v = 0;
{
g->lz_mean = g->lz_total / g->n ;
}
- lz_denominator[v] = 0;
+ l->lz_denominator[v] = 0;
}
}
if ( 0 == gs )
continue;
- if ( ! casefilter_variable_missing (l->filter, c, var))
-
+ if ( !var_is_value_missing (var, v, l->exclude))
{
levene_z = fabs(v->f - gs->mean);
- lz_denominator[i] += weight * pow2 (levene_z - gs->lz_mean);
+ l->lz_denominator[i] += weight * pow2 (levene_z - gs->lz_mean);
}
}
static void
-levene2_postcalc (void *_l)
+levene2_postcalc (struct levene_info *l)
{
size_t v;
- struct levene_info *l = (struct levene_info *) _l;
-
for (v = 0; v < l->n_dep; ++v)
{
double lz_numerator = 0;
g != 0 ;
g = (struct group_statistics *) hsh_next(hash,&hi) )
{
- lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean );
+ lz_numerator += g->n * pow2(g->lz_mean - l->lz[v].grand_mean );
}
lz_numerator *= ( gp->ugs.n - gp->n_groups );
- lz_denominator[v] *= (gp->n_groups - 1);
+ l->lz_denominator[v] *= (gp->n_groups - 1);
- gp->levene = lz_numerator / lz_denominator[v] ;
+ gp->levene = lz_numerator / l->lz_denominator[v] ;
}
-
- /* Now clear up after ourselves */
- free(lz_denominator);
- free(lz);
}
#if !levene_h
#define levene_h 1
-
+#include <data/casereader.h>
+#include <data/missing-values.h>
#include <data/variable.h>
-#include <data/casefile.h>
/* Calculate the Levene statistic
struct dictionary ;
struct casefilter ;
-void levene(const struct dictionary *dict, const struct casefile *cf,
+void levene(const struct dictionary *dict, struct casereader *,
const struct variable *v_indep, size_t n_dep,
const struct variable **v_dep,
- struct casefilter *filter);
+ enum mv_class exclude);
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* FIXME: error checking. */
+/* FIXME: merge pattern should be improved, this one causes a
+ performance regression. */
+#include <config.h>
+
+#include <math/merge.h>
+
+#include <data/case-ordering.h>
+#include <data/case.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
+#include <libpspp/array.h>
+#include <libpspp/assertion.h>
+#include <libpspp/taint.h>
+
+#include "xalloc.h"
+
+#define MAX_MERGE_ORDER 7
+
+struct merge_input
+ {
+ struct casereader *reader;
+ struct ccase c;
+ };
+
+struct merge
+ {
+ struct case_ordering *ordering;
+ struct merge_input inputs[MAX_MERGE_ORDER];
+ size_t input_cnt;
+ };
+
+static void do_merge (struct merge *m);
+
+struct merge *
+merge_create (const struct case_ordering *ordering)
+{
+ struct merge *m = xmalloc (sizeof *m);
+ m->ordering = case_ordering_clone (ordering);
+ m->input_cnt = 0;
+ return m;
+}
+
+void
+merge_destroy (struct merge *m)
+{
+ if (m != NULL)
+ {
+ size_t i;
+
+ case_ordering_destroy (m->ordering);
+ for (i = 0; i < m->input_cnt; i++)
+ casereader_destroy (m->inputs[i].reader);
+ free (m);
+ }
+}
+
+void
+merge_append (struct merge *m, struct casereader *r)
+{
+ r = casereader_rename (r);
+ m->inputs[m->input_cnt++].reader = r;
+ if (m->input_cnt >= MAX_MERGE_ORDER)
+ do_merge (m);
+}
+
+struct casereader *
+merge_make_reader (struct merge *m)
+{
+ struct casereader *r;
+
+ if (m->input_cnt > 1)
+ do_merge (m);
+
+ if (m->input_cnt == 1)
+ {
+ r = m->inputs[0].reader;
+ m->input_cnt = 0;
+ }
+ else if (m->input_cnt == 0)
+ {
+ size_t value_cnt = case_ordering_get_value_cnt (m->ordering);
+ struct casewriter *writer = mem_writer_create (value_cnt);
+ r = casewriter_make_reader (writer);
+ }
+ else
+ NOT_REACHED ();
+
+ return r;
+}
+
+static bool
+read_input_case (struct merge *m, size_t idx)
+{
+ struct merge_input *i = &m->inputs[idx];
+
+ if (casereader_read (i->reader, &i->c))
+ return true;
+ else
+ {
+ casereader_destroy (i->reader);
+ remove_element (m->inputs, m->input_cnt, sizeof *m->inputs, idx);
+ m->input_cnt--;
+ return false;
+ }
+}
+
+static void
+do_merge (struct merge *m)
+{
+ struct casewriter *w;
+ size_t i;
+
+ assert (m->input_cnt > 1);
+
+ w = tmpfile_writer_create (case_ordering_get_value_cnt (m->ordering));
+ for (i = 0; i < m->input_cnt; i++)
+ taint_propagate (casereader_get_taint (m->inputs[i].reader),
+ casewriter_get_taint (w));
+
+ for (i = 0; i < m->input_cnt; )
+ if (read_input_case (m, i))
+ i++;
+ while (m->input_cnt > 0)
+ {
+ size_t min;
+
+ min = 0;
+ for (i = 1; i < m->input_cnt; i++)
+ if (case_ordering_compare_cases (&m->inputs[i].c, &m->inputs[min].c,
+ m->ordering) < 0)
+ min = i;
+
+ casewriter_write (w, &m->inputs[min].c);
+ read_input_case (m, min);
+ }
+
+ m->input_cnt = 1;
+ m->inputs[0].reader = casewriter_make_reader (w);
+}
+
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+#ifndef MATH_MERGE_H
+#define MATH_MERGE_H 1
+
+#include <stdbool.h>
+
+struct case_ordering;
+struct casereader;
+
+struct merge *merge_create (const struct case_ordering *);
+void merge_destroy (struct merge *);
+void merge_append (struct merge *, struct casereader *);
+struct casereader *merge_make_reader (struct merge *);
+
+#endif /* math/merge.h */
#include "sort.h"
-#include <errno.h>
-#include <limits.h>
-#include <stdbool.h>
#include <stdio.h>
-#include <stdlib.h>
-#include <data/case-source.h>
+#include <data/case-ordering.h>
#include <data/case.h>
-#include <data/casefile.h>
-#include <data/fastfile.h>
-#include <data/casefile-factory.h>
-#include <data/fastfile-factory.h>
-#include <data/procedure.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
+#include <data/casewriter-provider.h>
#include <data/settings.h>
-#include <data/variable.h>
-#include <data/storage-stream.h>
#include <libpspp/alloc.h>
#include <libpspp/array.h>
#include <libpspp/assertion.h>
-#include <libpspp/message.h>
-#include <libpspp/message.h>
-#include <libpspp/misc.h>
-#include <libpspp/str.h>
-
-#include "minmax.h"
+#include <math/merge.h>
#include "gettext.h"
#define _(msgid) gettext (msgid)
/* These should only be changed for testing purposes. */
int min_buffers = 64;
int max_buffers = INT_MAX;
-bool allow_internal_sort = true;
-
-static int compare_record (const struct ccase *, const struct ccase *,
- const struct sort_criteria *);
-static struct casefile *do_internal_sort (struct casereader *,
- const struct sort_criteria *,
- struct casefile_factory *
- );
-static struct casefile *do_external_sort (struct casereader *,
- const struct sort_criteria *,
- struct casefile_factory *
- );
-
-
-/* Sorts the active file in-place according to CRITERIA.
- Returns true if successful. */
-bool
-sort_active_file_in_place (struct dataset *ds,
- const struct sort_criteria *criteria)
-{
- struct casefile *in, *out;
-
- proc_cancel_temporary_transformations (ds);
- if (!procedure (ds, NULL, NULL))
- return false;
-
- in = proc_capture_output (ds);
- out = sort_execute (casefile_get_destructive_reader (in), criteria,
- dataset_get_casefile_factory (ds));
- if (out == NULL)
- return false;
-
- proc_set_source (ds, storage_source_create (out));
- return true;
-}
-/* Data passed to sort_to_casefile_callback(). */
-struct sort_to_casefile_cb_data
+struct sort_writer
{
- const struct sort_criteria *criteria;
- struct casefile *output;
- struct casefile_factory *factory ;
+ struct case_ordering *ordering;
+ struct merge *merge;
+ struct pqueue *pqueue;
+
+ struct casewriter *run;
+ casenumber run_id;
+ struct ccase run_end;
};
-/* Sorts casefile CF according to the criteria in CB_DATA. */
-static bool
-sort_to_casefile_callback (const struct casefile *cf, void *cb_data_)
-{
- struct sort_to_casefile_cb_data *cb_data = cb_data_;
- cb_data->output = sort_execute (casefile_get_reader (cf, NULL),
- cb_data->criteria,
- cb_data->factory
- );
- return cb_data->output != NULL;
-}
+static struct casewriter_class sort_casewriter_class;
-/* Sorts the active file to a separate casefile. If successful,
- returns the sorted casefile. Returns a null pointer on
- failure. */
-struct casefile *
-sort_active_file_to_casefile (struct dataset *ds,
- const struct sort_criteria *criteria)
+static struct pqueue *pqueue_create (const struct case_ordering *);
+static void pqueue_destroy (struct pqueue *);
+static bool pqueue_is_full (const struct pqueue *);
+static bool pqueue_is_empty (const struct pqueue *);
+static void pqueue_push (struct pqueue *, struct ccase *, casenumber);
+static void pqueue_pop (struct pqueue *, struct ccase *, casenumber *);
+
+static void output_record (struct sort_writer *);
+
+struct casewriter *
+sort_create_writer (struct case_ordering *ordering)
{
- struct sort_to_casefile_cb_data cb_data;
-
- proc_cancel_temporary_transformations (ds);
+ struct sort_writer *sort;
- cb_data.criteria = criteria;
- cb_data.output = NULL;
- cb_data.factory = dataset_get_casefile_factory (ds);
- if (!multipass_procedure (ds, sort_to_casefile_callback, &cb_data))
- {
- casefile_destroy (cb_data.output);
- return NULL;
- }
- return cb_data.output;
-}
+ sort = xmalloc (sizeof *sort);
+ sort->ordering = case_ordering_clone (ordering);
+ sort->merge = merge_create (ordering);
+ sort->pqueue = pqueue_create (ordering);
+ sort->run = NULL;
+ sort->run_id = 0;
+ case_nullify (&sort->run_end);
+ case_ordering_destroy (ordering);
-/* Reads all the cases from READER, which is destroyed. Sorts
- the cases according to CRITERIA. Returns the sorted cases in
- a newly created casefile, which will be created by FACTORY.
- If FACTORY is NULL, then a local fastfile_factory will be used.
-*/
-struct casefile *
-sort_execute (struct casereader *reader,
- const struct sort_criteria *criteria,
- struct casefile_factory *factory
- )
-{
- struct casefile_factory *local_factory = NULL;
- struct casefile *output ;
- if ( factory == NULL )
- factory = local_factory = fastfile_factory_create ();
+ return casewriter_create (&sort_casewriter_class, sort);
+}
- output = do_internal_sort (reader, criteria, factory);
- if (output == NULL)
- output = do_external_sort (reader, criteria, factory);
- casereader_destroy (reader);
+static void
+sort_casewriter_write (struct casewriter *writer UNUSED, void *sort_,
+ struct ccase *c)
+{
+ struct sort_writer *sort = sort_;
+ bool next_run;
- fastfile_factory_destroy (local_factory);
+ if (pqueue_is_full (sort->pqueue))
+ output_record (sort);
- return output;
+ next_run = (case_is_null (&sort->run_end)
+ || case_ordering_compare_cases (c, &sort->run_end,
+ sort->ordering) < 0);
+ pqueue_push (sort->pqueue, c, sort->run_id + (next_run ? 1 : 0));
}
-\f
-/* A case and its index. */
-struct indexed_case
- {
- struct ccase c; /* Case. */
- unsigned long idx; /* Index to allow for stable sorting. */
- };
-static int compare_indexed_cases (const void *, const void *, const void *);
+static void
+sort_casewriter_destroy (struct casewriter *writer UNUSED, void *sort_)
+{
+ struct sort_writer *sort = sort_;
+
+ case_ordering_destroy (sort->ordering);
+ merge_destroy (sort->merge);
+ pqueue_destroy (sort->pqueue);
+ casewriter_destroy (sort->run);
+ case_destroy (&sort->run_end);
+ free (sort);
+}
-/* If the data is in memory, do an internal sort and return a new
- casefile for the data. Otherwise, return a null pointer. */
-static struct casefile *
-do_internal_sort (struct casereader *reader,
- const struct sort_criteria *criteria,
- struct casefile_factory *factory)
+static struct casereader *
+sort_casewriter_convert_to_reader (struct casewriter *writer, void *sort_)
{
- const struct casefile *src;
- struct casefile *dst;
- unsigned long case_cnt;
-
- if (!allow_internal_sort)
- return NULL;
-
- src = casereader_get_casefile (reader);
- if (casefile_get_case_cnt (src) > 1 && !casefile_in_core (src))
- return NULL;
-
- case_cnt = casefile_get_case_cnt (src);
- dst = factory->create_casefile (factory, casefile_get_value_cnt (src));
- if (case_cnt != 0)
+ struct sort_writer *sort = sort_;
+ struct casereader *output;
+
+ if (sort->run == NULL && sort->run_id == 0)
{
- struct indexed_case *cases = nmalloc (sizeof *cases, case_cnt);
- if (cases != NULL)
- {
- unsigned long i;
-
- for (i = 0; i < case_cnt; i++)
- {
- bool ok = casereader_read_xfer (reader, &cases[i].c);
- if (!ok)
- NOT_REACHED ();
- cases[i].idx = i;
- }
-
- sort (cases, case_cnt, sizeof *cases, compare_indexed_cases,
- (void *) criteria);
-
- for (i = 0; i < case_cnt; i++)
- casefile_append_xfer (dst, &cases[i].c);
- if (casefile_error (dst))
- NOT_REACHED ();
-
- free (cases);
- }
- else
- {
- /* Failure. */
- casefile_destroy (dst);
- dst = NULL;
- }
+ /* In-core sort. */
+ sort->run = mem_writer_create (case_ordering_get_value_cnt (
+ sort->ordering));
+ sort->run_id = 1;
}
+ while (!pqueue_is_empty (sort->pqueue))
+ output_record (sort);
- return dst;
-}
+ merge_append (sort->merge, casewriter_make_reader (sort->run));
+ sort->run = NULL;
-/* Compares the variables specified by CRITERIA between the cases
- at A and B, with a "last resort" comparison for stability, and
- returns a strcmp()-type result. */
-static int
-compare_indexed_cases (const void *a_, const void *b_, const void *criteria_)
-{
- const struct sort_criteria *criteria = criteria_;
- const struct indexed_case *a = a_;
- const struct indexed_case *b = b_;
- int result = compare_record (&a->c, &b->c, criteria);
- if (result == 0)
- result = a->idx < b->idx ? -1 : a->idx > b->idx;
- return result;
+ output = merge_make_reader (sort->merge);
+ sort_casewriter_destroy (writer, sort);
+ return output;
}
-\f
-/* External sort. */
-/* Maximum order of merge (external sort only). The maximum
- reasonable value is about 7. Above that, it would be a good
- idea to use a heap in merge_once() to select the minimum. */
-#define MAX_MERGE_ORDER 7
+static void
+output_record (struct sort_writer *sort)
+{
+ struct ccase min_case;
+ casenumber min_run_id;
-/* Results of an external sort. */
-struct external_sort
- {
- const struct sort_criteria *criteria; /* Sort criteria. */
- size_t value_cnt; /* Size of data in `union value's. */
- struct casefile **runs; /* Array of initial runs. */
- size_t run_cnt, run_cap; /* Number of runs, allocated capacity. */
- struct casefile_factory *factory; /* Factory used to create the result */
- };
+ pqueue_pop (sort->pqueue, &min_case, &min_run_id);
+#if 0
+ printf ("\toutput: %f to run %d\n", case_num_idx (&min_case, 0), min_run_id);
+#endif
-/* Prototypes for helper functions. */
-static int write_runs (struct external_sort *, struct casereader *);
-static struct casefile *merge (struct external_sort *);
-static void destroy_external_sort (struct external_sort *);
-
-/* Performs a stable external sort of the active file according
- to the specification in SCP. Forms initial runs using a heap
- as a reservoir. Merges the initial runs according to a
- pattern that assures stability. */
-static struct casefile *
-do_external_sort (struct casereader *reader,
- const struct sort_criteria *criteria,
- struct casefile_factory *factory
- )
-{
- struct external_sort *xsrt;
-
- if (!casefile_to_disk (casereader_get_casefile (reader)))
- return NULL;
-
- xsrt = xmalloc (sizeof *xsrt);
- xsrt->criteria = criteria;
- xsrt->value_cnt = casefile_get_value_cnt (casereader_get_casefile (reader));
- xsrt->run_cap = 512;
- xsrt->run_cnt = 0;
- xsrt->runs = xnmalloc (xsrt->run_cap, sizeof *xsrt->runs);
- xsrt->factory = factory;
- if (write_runs (xsrt, reader))
+ if (sort->run_id != min_run_id && sort->run != NULL)
{
- struct casefile *output = merge (xsrt);
- destroy_external_sort (xsrt);
- return output;
+ merge_append (sort->merge, casewriter_make_reader (sort->run));
+ sort->run = NULL;
}
- else
+ if (sort->run == NULL)
{
- destroy_external_sort (xsrt);
- return NULL;
+ sort->run = tmpfile_writer_create (case_ordering_get_value_cnt (
+ sort->ordering));
+ sort->run_id = min_run_id;
}
+
+ case_destroy (&sort->run_end);
+ case_clone (&sort->run_end, &min_case);
+
+ casewriter_write (sort->run, &min_case);
}
-/* Destroys XSRT. */
-static void
-destroy_external_sort (struct external_sort *xsrt)
+static struct casewriter_class sort_casewriter_class =
+ {
+ sort_casewriter_write,
+ sort_casewriter_destroy,
+ sort_casewriter_convert_to_reader,
+ };
+\f
+/* Reads all the cases from INPUT. Sorts the cases according to
+ ORDERING. Returns the sorted cases in a new casereader, or a
+ null pointer if an I/O error occurs. Both INPUT and ORDERING
+ are destroyed upon return, regardless of success. */
+struct casereader *
+sort_execute (struct casereader *input, struct case_ordering *ordering)
{
- if (xsrt != NULL)
- {
- int i;
-
- for (i = 0; i < xsrt->run_cnt; i++)
- casefile_destroy (xsrt->runs[i]);
- free (xsrt->runs);
- free (xsrt);
- }
+ struct casewriter *output = sort_create_writer (ordering);
+ casereader_transfer (input, output);
+ return casewriter_make_reader (output);
}
\f
-/* Replacement selection. */
-
-/* Pairs a record with a run number. */
-struct record_run
+struct pqueue
{
- int run; /* Run number of case. */
- struct ccase record; /* Case data. */
- size_t idx; /* Case number (for stability). */
+ struct case_ordering *ordering;
+ struct pqueue_record *records;
+ size_t record_cnt;
+ size_t record_cap;
+ casenumber idx;
};
-/* Represents a set of initial runs during an external sort. */
-struct initial_run_state
+struct pqueue_record
{
- struct external_sort *xsrt;
-
- /* Reservoir. */
- struct record_run *records; /* Records arranged as a heap. */
- size_t record_cnt; /* Current number of records. */
- size_t record_cap; /* Capacity for records. */
-
- /* Run currently being output. */
- int run; /* Run number. */
- size_t case_cnt; /* Number of cases so far. */
- struct casefile *casefile; /* Output file. */
- struct ccase last_output; /* Record last output. */
-
- int okay; /* Zero if an error has been encountered. */
+ casenumber id;
+ struct ccase c;
+ casenumber idx;
};
-static bool destroy_initial_run_state (struct initial_run_state *);
-static void process_case (struct initial_run_state *,
- const struct ccase *, size_t);
-static int allocate_cases (struct initial_run_state *);
-static void output_record (struct initial_run_state *);
-static void start_run (struct initial_run_state *);
-static void end_run (struct initial_run_state *);
-static int compare_record_run (const struct record_run *,
- const struct record_run *,
- const struct initial_run_state *);
-static int compare_record_run_minheap (const void *, const void *,
- const void *);
-
-/* Reads cases from READER and composes initial runs in XSRT. */
-static int
-write_runs (struct external_sort *xsrt, struct casereader *reader)
-{
- struct initial_run_state *irs;
- struct ccase c;
- size_t idx = 0;
- int success = 0;
-
- /* Allocate memory for cases. */
- irs = xmalloc (sizeof *irs);
- irs->xsrt = xsrt;
- irs->records = NULL;
- irs->record_cnt = irs->record_cap = 0;
- irs->run = 0;
- irs->case_cnt = 0;
- irs->casefile = NULL;
- case_nullify (&irs->last_output);
- irs->okay = 1;
- if (!allocate_cases (irs))
- goto done;
-
- /* Create initial runs. */
- start_run (irs);
- for (; irs->okay && casereader_read (reader, &c); case_destroy (&c))
- process_case (irs, &c, idx++);
- while (irs->okay && irs->record_cnt > 0)
- output_record (irs);
- end_run (irs);
-
- success = irs->okay;
-
- done:
- if (!destroy_initial_run_state (irs))
- success = false;
-
- return success;
-}
-
-/* Add a single case to an initial run. */
-static void
-process_case (struct initial_run_state *irs, const struct ccase *c,
- size_t idx)
-{
- struct record_run *rr;
-
- /* Compose record_run for this run and add to heap. */
- assert (irs->record_cnt < irs->record_cap - 1);
- rr = irs->records + irs->record_cnt++;
- case_copy (&rr->record, 0, c, 0, irs->xsrt->value_cnt);
- rr->run = irs->run;
- rr->idx = idx;
- if (!case_is_null (&irs->last_output)
- && compare_record (c, &irs->last_output, irs->xsrt->criteria) < 0)
- rr->run = irs->run + 1;
- push_heap (irs->records, irs->record_cnt, sizeof *irs->records,
- compare_record_run_minheap, irs);
-
- /* Output a record if the reservoir is full. */
- if (irs->record_cnt == irs->record_cap - 1 && irs->okay)
- output_record (irs);
-}
+static int compare_pqueue_records_minheap (const void *a, const void *b,
+ const void *pq_);
-/* Destroys the initial run state represented by IRS.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-destroy_initial_run_state (struct initial_run_state *irs)
+static struct pqueue *
+pqueue_create (const struct case_ordering *ordering)
{
- int i;
- bool ok = true;
-
- if (irs == NULL)
- return true;
-
- for (i = 0; i < irs->record_cap; i++)
- case_destroy (&irs->records[i].record);
- free (irs->records);
-
- if (irs->casefile != NULL)
- ok = casefile_sleep (irs->casefile);
-
- free (irs);
- return ok;
+ struct pqueue *pq;
+
+ pq = xmalloc (sizeof *pq);
+ pq->ordering = case_ordering_clone (ordering);
+ pq->record_cap
+ = get_workspace_cases (case_ordering_get_value_cnt (ordering));
+ if (pq->record_cap > max_buffers)
+ pq->record_cap = max_buffers;
+ else if (pq->record_cap < min_buffers)
+ pq->record_cap = min_buffers;
+ pq->record_cnt = 0;
+ pq->records = xnmalloc (pq->record_cap, sizeof *pq->records);
+ pq->idx = 0;
+
+ return pq;
}
-/* Allocates room for lots of cases as a buffer. */
-static int
-allocate_cases (struct initial_run_state *irs)
-{
- int approx_case_cost; /* Approximate memory cost of one case in bytes. */
- int max_cases; /* Maximum number of cases to allocate. */
- int i;
-
- /* Allocate as many cases as we can within the workspace
- limit. */
- approx_case_cost = (sizeof *irs->records
- + irs->xsrt->value_cnt * sizeof (union value)
- + 4 * sizeof (void *));
- max_cases = get_workspace() / approx_case_cost;
- if (max_cases > max_buffers)
- max_cases = max_buffers;
- irs->records = nmalloc (sizeof *irs->records, max_cases);
- if (irs->records != NULL)
- for (i = 0; i < max_cases; i++)
- if (!case_try_create (&irs->records[i].record, irs->xsrt->value_cnt))
- {
- max_cases = i;
- break;
- }
- irs->record_cap = max_cases;
-
- /* Fail if we didn't allocate an acceptable number of cases. */
- if (irs->records == NULL || max_cases < min_buffers)
- {
- msg (SE, _("Out of memory. Could not allocate room for minimum of %d "
- "cases of %d bytes each. (PSPP workspace is currently "
- "restricted to a maximum of %lu KB.)"),
- min_buffers, approx_case_cost,
- (unsigned long int) (get_workspace() / 1024));
- return 0;
- }
- return 1;
-}
-
-/* Compares the VAR_CNT variables in VARS[] between the `value's at
- A and B, and returns a strcmp()-type result. */
-static int
-compare_record (const struct ccase *a, const struct ccase *b,
- const struct sort_criteria *criteria)
+static void
+pqueue_destroy (struct pqueue *pq)
{
- int i;
-
- assert (a != NULL);
- assert (b != NULL);
-
- for (i = 0; i < criteria->crit_cnt; i++)
+ if (pq != NULL)
{
- const struct sort_criterion *c = &criteria->crits[i];
- int result;
-
- if (c->width == 0)
+ while (!pqueue_is_empty (pq))
{
- double af = case_num_idx (a, c->fv);
- double bf = case_num_idx (b, c->fv);
-
- result = af < bf ? -1 : af > bf;
+ struct ccase c;
+ casenumber id;
+ pqueue_pop (pq, &c, &id);
+ case_destroy (&c);
}
- else
- result = memcmp (case_str_idx (a, c->fv),
- case_str_idx (b, c->fv), c->width);
-
- if (result != 0)
- return c->dir == SRT_ASCEND ? result : -result;
+ case_ordering_destroy (pq->ordering);
+ free (pq->records);
+ free (pq);
}
-
- return 0;
}
-/* Compares record-run tuples A and B on run number first, then
- on record, then on case index. */
-static int
-compare_record_run (const struct record_run *a,
- const struct record_run *b,
- const struct initial_run_state *irs)
+static bool
+pqueue_is_full (const struct pqueue *pq)
{
- int result = a->run < b->run ? -1 : a->run > b->run;
- if (result == 0)
- result = compare_record (&a->record, &b->record, irs->xsrt->criteria);
- if (result == 0)
- result = a->idx < b->idx ? -1 : a->idx > b->idx;
- return result;
+ return pq->record_cnt >= pq->record_cap;
}
-/* Compares record-run tuples A and B on run number first, then
- on the current record according to SCP, but in descending
- order. */
-static int
-compare_record_run_minheap (const void *a, const void *b, const void *irs)
+static bool
+pqueue_is_empty (const struct pqueue *pq)
{
- return -compare_record_run (a, b, irs);
+ return pq->record_cnt == 0;
}
-/* Begins a new initial run, specifically its output file. */
static void
-start_run (struct initial_run_state *irs)
+pqueue_push (struct pqueue *pq, struct ccase *c, casenumber id)
{
- irs->run++;
- irs->case_cnt = 0;
-
- /* This casefile is internal to the sort, so don't use the factory
- to create it. */
- irs->casefile = fastfile_create (irs->xsrt->value_cnt);
- casefile_to_disk (irs->casefile);
- case_nullify (&irs->last_output);
-}
+ struct pqueue_record *r;
+
+ assert (!pqueue_is_full (pq));
-/* Ends the current initial run. */
-static void
-end_run (struct initial_run_state *irs)
-{
- struct external_sort *xsrt = irs->xsrt;
+ r = &pq->records[pq->record_cnt++];
+ r->id = id;
+ case_move (&r->c, c);
+ r->idx = pq->idx++;
- /* Record initial run. */
- if (irs->casefile != NULL)
- {
- casefile_sleep (irs->casefile);
- if (xsrt->run_cnt >= xsrt->run_cap)
- {
- xsrt->run_cap *= 2;
- xsrt->runs = xnrealloc (xsrt->runs,
- xsrt->run_cap, sizeof *xsrt->runs);
- }
- xsrt->runs[xsrt->run_cnt++] = irs->casefile;
- if (casefile_error (irs->casefile))
- irs->okay = false;
- irs->casefile = NULL;
- }
+ push_heap (pq->records, pq->record_cnt, sizeof *pq->records,
+ compare_pqueue_records_minheap, pq);
}
-/* Writes a record to the current initial run. */
static void
-output_record (struct initial_run_state *irs)
+pqueue_pop (struct pqueue *pq, struct ccase *c, casenumber *id)
{
- struct record_run *record_run;
- struct ccase case_tmp;
-
- /* Extract minimum case from heap. */
- assert (irs->record_cnt > 0);
- pop_heap (irs->records, irs->record_cnt--, sizeof *irs->records,
- compare_record_run_minheap, irs);
- record_run = irs->records + irs->record_cnt;
-
- /* Bail if an error has occurred. */
- if (!irs->okay)
- return;
-
- /* Start new run if necessary. */
- assert (record_run->run == irs->run
- || record_run->run == irs->run + 1);
- if (record_run->run != irs->run)
- {
- end_run (irs);
- start_run (irs);
- }
- assert (record_run->run == irs->run);
- irs->case_cnt++;
+ struct pqueue_record *r;
- /* Write to disk. */
- if (irs->casefile != NULL)
- casefile_append (irs->casefile, &record_run->record);
-
- /* This record becomes last_output. */
- irs->last_output = case_tmp = record_run->record;
- record_run->record = irs->records[irs->record_cap - 1].record;
- irs->records[irs->record_cap - 1].record = case_tmp;
-}
-\f
-/* Merging. */
+ assert (!pqueue_is_empty (pq));
-static int choose_merge (struct casefile *runs[], int run_cnt, int order);
-static struct casefile *merge_once (struct external_sort *,
- struct casefile *[], size_t);
+ pop_heap (pq->records, pq->record_cnt--, sizeof *pq->records,
+ compare_pqueue_records_minheap, pq);
-/* Repeatedly merges run until only one is left,
- and returns the final casefile.
- Returns a null pointer if an I/O error occurs. */
-static struct casefile *
-merge (struct external_sort *xsrt)
-{
- while (xsrt->run_cnt > 1)
- {
- int order = MIN (MAX_MERGE_ORDER, xsrt->run_cnt);
- int idx = choose_merge (xsrt->runs, xsrt->run_cnt, order);
- xsrt->runs[idx] = merge_once (xsrt, xsrt->runs + idx, order);
- remove_range (xsrt->runs, xsrt->run_cnt, sizeof *xsrt->runs,
- idx + 1, order - 1);
- xsrt->run_cnt -= order - 1;
-
- if (xsrt->runs[idx] == NULL)
- return NULL;
- }
- assert (xsrt->run_cnt == 1);
- xsrt->run_cnt = 0;
- return xsrt->runs[0];
+ r = &pq->records[pq->record_cnt];
+ *id = r->id;
+ case_move (c, &r->c);
}
-/* Chooses ORDER runs out of the RUN_CNT runs in RUNS to merge,
- and returns the index of the first one.
-
- For stability, we must merge only consecutive runs. For
- efficiency, we choose the shortest consecutive sequence of
- runs. */
+/* Compares record-run tuples A and B on id, then on case data,
+ then on insertion order, in descending order. */
static int
-choose_merge (struct casefile *runs[], int run_cnt, int order)
+compare_pqueue_records_minheap (const void *a_, const void *b_,
+ const void *pq_)
{
- int min_idx, min_sum;
- int cur_idx, cur_sum;
- int i;
-
- /* Sum up the length of the first ORDER runs. */
- cur_sum = 0;
- for (i = 0; i < order; i++)
- cur_sum += casefile_get_case_cnt (runs[i]);
-
- /* Find the shortest group of ORDER runs,
- using a running total for efficiency. */
- min_idx = 0;
- min_sum = cur_sum;
- for (cur_idx = 1; cur_idx + order <= run_cnt; cur_idx++)
- {
- cur_sum -= casefile_get_case_cnt (runs[cur_idx - 1]);
- cur_sum += casefile_get_case_cnt (runs[cur_idx + order - 1]);
- if (cur_sum < min_sum)
- {
- min_sum = cur_sum;
- min_idx = cur_idx;
- }
- }
-
- return min_idx;
-}
-
-/* Merges the RUN_CNT initial runs specified in INPUT_FILES into a
- new run, and returns the new run.
- Returns a null pointer if an I/O error occurs. */
-static struct casefile *
-merge_once (struct external_sort *xsrt,
- struct casefile **const input_files,
- size_t run_cnt)
-{
- struct run
- {
- struct casefile *file;
- struct casereader *reader;
- struct ccase ccase;
- }
- *runs;
-
- struct casefile *output = NULL;
- int i;
-
- /* Open input files. */
- runs = xnmalloc (run_cnt, sizeof *runs);
- for (i = 0; i < run_cnt; i++)
- {
- struct run *r = &runs[i];
- r->file = input_files[i];
- r->reader = casefile_get_destructive_reader (r->file);
- if (!casereader_read_xfer (r->reader, &r->ccase))
- {
- run_cnt--;
- i--;
- }
- }
-
- /* Create output file. */
- output = xsrt->factory->create_casefile (xsrt->factory, xsrt->value_cnt);
- casefile_to_disk (output);
-
- /* Merge. */
- while (run_cnt > 0)
- {
- struct run *min_run, *run;
-
- /* Find minimum. */
- min_run = runs;
- for (run = runs + 1; run < runs + run_cnt; run++)
- if (compare_record (&run->ccase, &min_run->ccase, xsrt->criteria) < 0)
- min_run = run;
-
- /* Write minimum to output file. */
- casefile_append_xfer (output, &min_run->ccase);
-
- /* Read another case from minimum run. */
- if (!casereader_read_xfer (min_run->reader, &min_run->ccase))
- {
- if (casefile_error (min_run->file) || casefile_error (output))
- goto error;
- casereader_destroy (min_run->reader);
- casefile_destroy (min_run->file);
-
- remove_element (runs, run_cnt, sizeof *runs, min_run - runs);
- run_cnt--;
- }
- }
-
- if (!casefile_sleep (output))
- goto error;
- free (runs);
-
- return output;
-
- error:
- for (i = 0; i < run_cnt; i++)
- casefile_destroy (runs[i].file);
- casefile_destroy (output);
- free (runs);
- return NULL;
+ const struct pqueue_record *a = a_;
+ const struct pqueue_record *b = b_;
+ const struct pqueue *pq = pq_;
+ int result = a->id < b->id ? -1 : a->id > b->id;
+ if (result == 0)
+ result = case_ordering_compare_cases (&a->c, &b->c, pq->ordering);
+ if (result == 0)
+ result = a->idx < b->idx ? -1 : a->idx > b->idx;
+ return -result;
}
/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA. */
-#if !sort_h
-#define sort_h 1
+#ifndef MATH_SORT_H
+#define MATH_SORT_H 1
#include <stddef.h>
#include <stdbool.h>
-struct casereader;
-struct dictionary;
-struct variable;
-struct casefile_factory;
+struct case_ordering;
extern int min_buffers ;
extern int max_buffers ;
-extern bool allow_internal_sort ;
+struct casewriter *sort_create_writer (struct case_ordering *);
+struct casereader *sort_execute (struct casereader *, struct case_ordering *);
-/* Sort direction. */
-enum sort_direction
- {
- SRT_ASCEND, /* A, B, C, ..., X, Y, Z. */
- SRT_DESCEND /* Z, Y, X, ..., C, B, A. */
- };
-
-/* A sort criterion. */
-struct sort_criterion
- {
- int fv; /* Variable data index. */
- int width; /* 0=numeric, otherwise string width. */
- enum sort_direction dir; /* Sort direction. */
- };
-
-/* A set of sort criteria. */
-struct sort_criteria
- {
- struct sort_criterion *crits;
- size_t crit_cnt;
- };
-
-
-void sort_destroy_criteria (struct sort_criteria *);
-
-struct casefile *sort_execute (struct casereader *,
- const struct sort_criteria *,
- struct casefile_factory *
- );
-
-struct dataset ;
-bool sort_active_file_in_place (struct dataset *ds,
- const struct sort_criteria *);
-
-struct casefile *sort_active_file_to_casefile (struct dataset *ds,
- const struct sort_criteria *);
-
-#endif /* !sort_h */
+#endif /* math/sort.h */
+2007-06-06 Ben Pfaff <blp@gnu.org>
+
+ Adapt case sources, sinks, and clients of procedure code to the
+ new infrastructure.
+
+ * automake.mk: Remove files.
+
+ * flexifile.c: Removed, dead code.
+ * flexifile.h: Ditto.
+
Thu Feb 8 06:34:52 2007 Ben Pfaff <blp@gnu.org>
* [!(HAVE_SYS_TYPES_H && HAVE_SYS_WAIT_H)] (connect_debugger) In
src_ui_libuicommon_a_SOURCES = \
src/ui/debugger.c \
- src/ui/debugger.h \
- src/ui/flexifile.c \
- src/ui/flexifile.h
+ src/ui/debugger.h
+++ /dev/null
-/* PSPP - computes sample statistics.
-
- Copyright (C) 2006, 2007 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#include <config.h>
-#include <xalloc.h>
-#include <assert.h>
-#include "flexifile.h"
-#include <string.h>
-#include <data/casefile.h>
-#include <data/casefile-private.h>
-#include <data/case.h>
-#include <libpspp/compiler.h>
-
-
-struct class_flexifile
-{
- struct class_casefile parent;
-
- bool (*get_case) (const struct flexifile *, unsigned long, struct ccase *);
-
- bool (*insert_case) (struct flexifile *, struct ccase *, int );
- bool (*delete_cases) (struct flexifile *, int, int );
-
- bool (*resize) (struct flexifile *, int, int );
-};
-
-static const struct class_flexifile class;
-
-#define CLASS_FLEXIFILE(K) ((struct class_flexifile *) K)
-#define CONST_CLASS_FLEXIFILE(K) ((const struct class_flexifile *) K)
-
-
-/* A flexifile. */
-struct flexifile
-{
- struct casefile cf; /* Parent */
-
- size_t value_cnt; /* Case size in `union value's. */
- unsigned long case_cnt; /* Number of cases stored. */
-
-
- /* Memory storage. */
- struct ccase *cases; /* Pointer to array of cases. */
- unsigned long capacity; /* size of array in cases */
-};
-
-struct class_flexifilereader
-{
- struct class_casereader parent ;
-};
-
-static const struct class_flexifilereader class_reader;
-
-/* For reading out the cases in a flexifile. */
-struct flexifilereader
-{
- struct casereader cr; /* Parent */
-
- unsigned long case_idx; /* Case number of current case. */
- bool destructive; /* Is this a destructive reader? */
-};
-
-
-
-#define CHUNK_SIZE 10
-
-static bool
-impl_get_case(const struct flexifile *ff, unsigned long casenum,
- struct ccase *);
-static bool
-impl_insert_case (struct flexifile *ff, struct ccase *c, int posn);
-
-static bool
-impl_delete_cases (struct flexifile *ff, int n_cases, int first);
-
-static bool
-impl_resize (struct flexifile *ff, int n_values, int posn);
-
-
-/* Gets a case, for which writing may not be safe */
-bool
-flexifile_get_case(const struct flexifile *ff, unsigned long casenum,
- struct ccase *c)
-{
- const struct class_flexifile *class =
- CONST_CLASS_FLEXIFILE (CONST_CASEFILE(ff)->class) ;
-
- return class->get_case(ff, casenum, c);
-}
-
-
-/* Insert N_VALUES before POSN.
- If N_VALUES is negative, then deleted -N_VALUES instead
-*/
-bool
-flexifile_resize (struct flexifile *ff, int n_values, int posn)
-{
- const struct class_flexifile *class =
- CONST_CLASS_FLEXIFILE (CONST_CASEFILE(ff)->class) ;
-
- return class->resize(ff, n_values, posn);
-}
-
-
-
-bool
-flexifile_insert_case (struct flexifile *ff, struct ccase *c, int posn)
-{
- const struct class_flexifile *class =
- CONST_CLASS_FLEXIFILE (CONST_CASEFILE(ff)->class) ;
-
- return class->insert_case(ff, c, posn);
-}
-
-
-bool
-flexifile_delete_cases (struct flexifile *ff, int n_cases, int first)
-{
- const struct class_flexifile *class =
- CONST_CLASS_FLEXIFILE (CONST_CASEFILE(ff)->class) ;
-
- return class->delete_cases (ff, n_cases, first);
-}
-
-
-static unsigned long
-flexifile_get_case_cnt (const struct casefile *cf)
-{
- return FLEXIFILE(cf)->case_cnt;
-}
-
-static size_t
-flexifile_get_value_cnt (const struct casefile *cf)
-{
- return FLEXIFILE(cf)->value_cnt;
-}
-
-
-static void
-flexifile_destroy (struct casefile *cf)
-{
- int i ;
- for ( i = 0 ; i < FLEXIFILE(cf)->case_cnt; ++i )
- case_destroy( &FLEXIFILE(cf)->cases[i]);
-
- free(FLEXIFILE(cf)->cases);
-}
-
-static void
-grow(struct flexifile *ff)
-{
- ff->capacity += CHUNK_SIZE;
- ff->cases = xrealloc(ff->cases, ff->capacity * sizeof ( *ff->cases) );
-}
-
-static bool
-flexifile_append (struct casefile *cf, const struct ccase *c)
-{
- struct flexifile *ff = FLEXIFILE(cf);
-
- if (ff->case_cnt >= ff->capacity)
- grow(ff);
-
- case_clone (&ff->cases[ff->case_cnt++], c);
-
- return true;
-}
-
-static unsigned long
-flexifilereader_cnum (const struct casereader *cr)
-{
- struct flexifilereader *ffr = FLEXIFILEREADER(cr);
-
- return ffr->case_idx;
-}
-
-static struct ccase *
-flexifilereader_get_next_case (struct casereader *cr)
-{
- struct flexifilereader *ffr = FLEXIFILEREADER(cr);
- struct flexifile *ff = FLEXIFILE(casereader_get_casefile(cr));
-
- if ( ffr->case_idx >= ff->case_cnt)
- return NULL;
-
- return &ff->cases[ffr->case_idx++];
-}
-
-static void
-flexifilereader_destroy(struct casereader *r)
-{
- free(r);
-}
-
-static struct casereader *
-flexifile_get_reader (const struct casefile *cf_)
-{
- struct casefile *cf = (struct casefile *) cf_;
- struct flexifilereader *ffr = xzalloc (sizeof *ffr);
- struct casereader *reader = (struct casereader *) ffr;
-
- casereader_register (cf, reader, CLASS_CASEREADER(&class_reader));
-
- return reader;
-}
-
-
-static struct casereader *
-flexifilereader_clone (const struct casereader *cr)
-{
- const struct flexifilereader *ffr = (const struct flexifilereader *) cr;
- struct flexifilereader *new_ffr = xzalloc (sizeof *new_ffr);
- struct casereader *new_reader = (struct casereader *) new_ffr;
- struct casefile *cf = casereader_get_casefile (cr);
-
- casereader_register (cf, new_reader, CLASS_CASEREADER(&class_reader));
-
- new_ffr->case_idx = ffr->case_idx ;
- new_ffr->destructive = ffr->destructive ;
-
- return new_reader;
-}
-
-
-static bool
-flexifile_in_core(const struct casefile *cf UNUSED)
-{
- /* Always in memory */
- return true;
-}
-
-static bool
-flexifile_error (const struct casefile *cf UNUSED )
-{
- return false;
-}
-
-
-struct casefile *
-flexifile_create (size_t value_cnt)
-{
- struct flexifile *ff = xzalloc (sizeof *ff);
- struct casefile *cf = (struct casefile *) ff;
-
- casefile_register (cf, (struct class_casefile *) &class);
-
- ff->value_cnt = value_cnt;
-
- ff->cases = xzalloc(sizeof (struct ccase *) * CHUNK_SIZE);
- ff->capacity = CHUNK_SIZE;
-
- return cf;
-}
-
-static const struct class_flexifile class = {
- {
- flexifile_destroy,
- flexifile_error,
- flexifile_get_value_cnt,
- flexifile_get_case_cnt,
- flexifile_get_reader,
- flexifile_append,
-
- flexifile_in_core,
- 0, /* to_disk */
- 0 /* sleep */
- },
-
- impl_get_case ,
- impl_insert_case ,
- impl_delete_cases,
- impl_resize,
-};
-
-
-static const struct class_flexifilereader class_reader =
- {
- {
- flexifilereader_get_next_case,
- flexifilereader_cnum,
- flexifilereader_destroy,
- flexifilereader_clone
- }
- };
-
-
-/* Implementations of class methods */
-
-static bool
-impl_get_case(const struct flexifile *ff, unsigned long casenum,
- struct ccase *c)
-{
- if ( casenum >= ff->case_cnt)
- return false;
-
- case_clone (c, &ff->cases[casenum]);
-
- return true;
-}
-
-#if DEBUGGING
-#include <stdio.h>
-
-static void
-dumpcasedata(struct ccase *c)
-{
- size_t value_cnt = case_get_value_cnt (c);
- int i;
- for ( i = 0 ; i < value_cnt * MAX_SHORT_STRING; ++i )
- putchar (case_str (c, 0)[i]);
- putchar('\n');
-}
-#endif
-
-static bool
-impl_resize (struct flexifile *ff, int n_values, int posn)
-{
- int i;
-
- for( i = 0 ; i < ff->case_cnt ; ++i )
- {
- struct ccase c;
- case_create (&c, ff->value_cnt + n_values);
-
- case_copy (&c, 0, &ff->cases[i], 0, posn);
- if ( n_values > 0 )
- memset (case_data_rw_idx(&c, posn), ' ', n_values * MAX_SHORT_STRING) ;
- case_copy (&c, posn + n_values,
- &ff->cases[i], posn, ff->value_cnt - posn);
-
- case_destroy (&ff->cases[i]);
- ff->cases[i] = c;
- }
-
- ff->value_cnt += n_values;
-
- return true;
-}
-
-static bool
-impl_insert_case (struct flexifile *ff, struct ccase *c, int posn)
-{
- int i;
- struct ccase blank;
-
- assert (ff);
-
- if ( posn > ff->case_cnt )
- return false;
-
- if ( posn >= ff->capacity )
- grow(ff);
-
- case_create(&blank, ff->value_cnt);
-
- flexifile_append(CASEFILE(ff), &blank);
-
- case_destroy(&blank);
-
- /* Shift the existing cases down one */
- for ( i = ff->case_cnt ; i > posn; --i)
- case_move(&ff->cases[i], &ff->cases[i-1]);
-
- case_clone (&ff->cases[posn], c);
-
- return true;
-}
-
-
-static bool
-impl_delete_cases (struct flexifile *ff, int n_cases, int first)
-{
- int i;
-
- if ( ff->case_cnt < first + n_cases )
- return false;
-
- for ( i = first ; i < first + n_cases; ++i )
- case_destroy (&ff->cases[i]);
-
- /* Shift the cases up by N_CASES */
- for ( i = first; i < ff->case_cnt - n_cases; ++i )
- {
- case_move (&ff->cases[i], &ff->cases[i+ n_cases]);
- }
-
- ff->case_cnt -= n_cases;
-
- return true;
-}
-
-
-
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 2006 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#ifndef FLEXIFILE_H
-#define FLEXIFILE_H
-
-#include <config.h>
-#include <stdbool.h>
-#include <stdlib.h>
-
-struct ccase;
-struct casefile;
-struct casereader;
-struct flexifile;
-struct flexifilereader;
-
-#define FLEXIFILE(CF) ( (struct flexifile *) CF)
-#define FLEXIFILEREADER(CR) ( (struct flexifilereader *) CR)
-
-struct casefile *flexifile_create (size_t value_cnt);
-
-bool flexifile_get_case(const struct flexifile *ff, unsigned long casenum,
- struct ccase *const c);
-
-bool flexifile_resize (struct flexifile *ff, int n_values, int posn);
-
-bool flexifile_insert_case (struct flexifile *ff, struct ccase *c, int posn);
-bool flexifile_delete_cases (struct flexifile *ff, int n_cases, int first);
-
-
-#endif
+2007-06-06 Ben Pfaff <blp@gnu.org>
+
+ Adapt case sources, sinks, and clients of procedure code to the
+ new infrastructure.
+
+ * automake.mk: Removed files.
+
+ * flexifile-factory.c: Removed, dead code.
+ * flexifile-factory.h: Ditto.
+
+ * helper.c: Adapt to new procedure and datasheet code.
+ * missing-val-dialog.c: Ditto.
+ * psppire-case-file.c: Ditto.
+ * psppire-data-store.c: Ditto.
+ * psppire.c: Ditto.
+
2007-06-03 Ben Pfaff <blp@gnu.org>
* psppire-var-store.c (psppire_var_store_item_editable): Use
src/ui/gui/dialog-common.h \
src/ui/gui/dict-display.c \
src/ui/gui/dict-display.h \
- src/ui/gui/flexifile-factory.h \
- src/ui/gui/flexifile-factory.c \
src/ui/gui/main.c \
src/ui/gui/message-dialog.c \
src/ui/gui/message-dialog.h \
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 2006 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#include <config.h>
-
-#include <stdlib.h>
-#include <libpspp/alloc.h>
-#include <libpspp/compiler.h>
-#include "flexifile-factory.h"
-#include <ui/flexifile.h>
-#include <data/casefile-factory.h>
-
-
-struct flexifile_factory
- {
- struct casefile_factory parent;
- };
-
-
-static struct casefile *
-produce_flexifile (struct casefile_factory *this UNUSED, size_t value_cnt)
-{
- struct casefile *ff = flexifile_create (value_cnt);
-
- return ff;
-}
-
-
-struct casefile_factory *
-flexifile_factory_create (void)
-{
- struct flexifile_factory *fact = xzalloc (sizeof (*fact));
-
- fact->parent.create_casefile = produce_flexifile;
-
- return (struct casefile_factory *) fact;
-}
-
-
-void
-flexifile_factory_destroy (struct casefile_factory *factory)
-{
- free (factory);
-}
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 2006 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-#ifndef FLEXIFILE_FACTORY_H
-#define FLEXIFILE_FACTORY_H
-
-
-struct casefile_factory ;
-
-struct casefile_factory * flexifile_factory_create (void);
-void flexifile_factory_destroy (struct casefile_factory *);
-
-#endif
#include <data/data-in.h>
#include <data/data-out.h>
#include <data/dictionary.h>
-#include <data/storage-stream.h>
#include <libpspp/message.h>
#include <libpspp/i18n.h>
{
struct lexer *lexer;
- g_return_val_if_fail (proc_has_source (the_dataset), FALSE);
+ g_return_val_if_fail (proc_has_active_file (the_dataset), FALSE);
lexer = lex_create (the_source_stream);
lex_destroy (lexer);
- /* The GUI must *always* have a data source, even if it's an empty one.
- Therefore, we find that there is none, (for example NEW FILE was the last
- item in the syntax) then we create a new one. */
- if ( ! proc_has_source (the_dataset))
- proc_set_source (the_dataset,
- storage_source_create (the_data_store->case_file->flexifile)
- );
-
/* GUI syntax needs this implicit EXECUTE command at the end of
every script. Otherwise commands like GET could leave the GUI without
a casefile. */
- return procedure (the_dataset, NULL, NULL);
+ return proc_execute (the_dataset);
}
gint nvals = 0;
gint badvals = 0;
gint i;
- mv_clear (&dialog->mvl);
- for (i = 0 ; i < 3 ; ++i )
+ mv_clear(&dialog->mvl);
+ for(i = 0 ; i < 3 ; ++i )
{
gchar *text =
g_strdup (gtk_entry_get_text (GTK_ENTRY (dialog->mv[i])));
#include <gtksheet/gtkextra-marshal.h>
#include <data/case.h>
-#include <ui/flexifile.h>
-#include "flexifile-factory.h"
-#include <data/casefile.h>
#include <data/data-in.h>
+#include <data/datasheet.h>
#include <math/sort.h>
#include <libpspp/misc.h>
+#include "xalloc.h"
+#include "xallocsa.h"
+
/* --- prototypes --- */
static void psppire_case_file_class_init (PsppireCaseFileClass *class);
static void psppire_case_file_init (PsppireCaseFile *case_file);
{
PsppireCaseFile *cf = PSPPIRE_CASE_FILE (object);
- if ( cf->flexifile)
- casefile_destroy (cf->flexifile);
+ datasheet_destroy (cf->datasheet);
G_OBJECT_CLASS (parent_class)->finalize (object);
}
static void
psppire_case_file_init (PsppireCaseFile *cf)
{
- cf->flexifile = 0;
+ cf->datasheet = NULL;
}
{
PsppireCaseFile *cf = g_object_new (G_TYPE_PSPPIRE_CASE_FILE, NULL);
- cf->flexifile = flexifile_create (0);
+ cf->datasheet = datasheet_create (NULL);
return cf;
}
void
-psppire_case_file_replace_flexifile (PsppireCaseFile *cf, struct flexifile *ff)
+psppire_case_file_replace_datasheet (PsppireCaseFile *cf, struct datasheet *ds)
{
- cf->flexifile = (struct casefile *) ff;
+ cf->datasheet = ds;
}
gboolean
psppire_case_file_delete_cases (PsppireCaseFile *cf, gint n_cases, gint first)
{
- int result;
-
g_return_val_if_fail (cf, FALSE);
- g_return_val_if_fail (cf->flexifile, FALSE);
+ g_return_val_if_fail (cf->datasheet, FALSE);
- result = flexifile_delete_cases (FLEXIFILE (cf->flexifile), n_cases, first);
+ datasheet_delete_rows (cf->datasheet, first, n_cases);
g_signal_emit (cf, signals [CASES_DELETED], 0, n_cases, first);
- return result;
+ return TRUE;
}
/* Insert case CC into the case file before POSN */
struct ccase *cc,
gint posn)
{
+ struct ccase tmp;
bool result ;
g_return_val_if_fail (cf, FALSE);
- g_return_val_if_fail (cf->flexifile, FALSE);
+ g_return_val_if_fail (cf->datasheet, FALSE);
- result = flexifile_insert_case (FLEXIFILE (cf->flexifile), cc, posn);
+ case_clone (&tmp, cc);
+ result = datasheet_insert_rows (cf->datasheet, posn, &tmp, 1);
if ( result )
g_signal_emit (cf, signals [CASE_INSERTED], 0, posn);
psppire_case_file_append_case (PsppireCaseFile *cf,
struct ccase *c)
{
+ struct ccase tmp;
bool result ;
gint posn ;
g_return_val_if_fail (cf, FALSE);
- g_return_val_if_fail (cf->flexifile, FALSE);
+ g_return_val_if_fail (cf->datasheet, FALSE);
- posn = casefile_get_case_cnt (cf->flexifile);
+ posn = datasheet_get_row_cnt (cf->datasheet);
- result = casefile_append (cf->flexifile, c);
+ case_clone (&tmp, c);
+ result = datasheet_insert_rows (cf->datasheet, posn, &tmp, 1);
g_signal_emit (cf, signals [CASE_INSERTED], 0, posn);
{
g_return_val_if_fail (cf, FALSE);
- if ( ! cf->flexifile)
+ if ( ! cf->datasheet)
return 0;
- return casefile_get_case_cnt (cf->flexifile);
+ return datasheet_get_row_cnt (cf->datasheet);
}
-/* Return the IDXth value from case CASENUM.
- The return value must not be freed or written to
- */
-const union value *
-psppire_case_file_get_value (const PsppireCaseFile *cf, gint casenum, gint idx)
+/* Copies the IDXth value from case CASENUM into VALUE.
+ If VALUE is null, then memory is allocated is allocated with
+ malloc. Returns the value if successful, NULL on failure. */
+union value *
+psppire_case_file_get_value (const PsppireCaseFile *cf,
+ casenumber casenum, size_t idx,
+ union value *value, int width)
{
- const union value *v;
- struct ccase c;
-
- g_return_val_if_fail (cf, NULL);
- g_return_val_if_fail (cf->flexifile, NULL);
-
- g_return_val_if_fail (idx < casefile_get_value_cnt (cf->flexifile), NULL);
+ bool allocated;
+
+ g_return_val_if_fail (cf, false);
+ g_return_val_if_fail (cf->datasheet, false);
- flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, &c);
+ g_return_val_if_fail (idx < datasheet_get_column_cnt (cf->datasheet), false);
- v = case_data_idx (&c, idx);
- case_destroy (&c);
-
- return v;
+ if (value == NULL)
+ {
+ value = xnmalloc (value_cnt_from_width (width), sizeof *value);
+ allocated = true;
+ }
+ else
+ allocated = false;
+ if (!datasheet_get_value (cf->datasheet, casenum, idx, value, width))
+ {
+ if (allocated)
+ free (value);
+ value = NULL;
+ }
+ return value;
}
void
psppire_case_file_clear (PsppireCaseFile *cf)
{
- casefile_destroy (cf->flexifile);
- cf->flexifile = 0;
+ datasheet_destroy (cf->datasheet);
+ cf->datasheet = NULL;
g_signal_emit (cf, signals [CASES_DELETED], 0, 0, -1);
}
-/* Set the IDXth value of case C to SYSMIS/EMPTY */
+/* Set the IDXth value of case C to V.
+ Returns true if successful, false on I/O error. */
gboolean
psppire_case_file_set_value (PsppireCaseFile *cf, gint casenum, gint idx,
union value *v, gint width)
{
- struct ccase cc ;
- int bytes;
+ bool ok;
g_return_val_if_fail (cf, FALSE);
- g_return_val_if_fail (cf->flexifile, FALSE);
-
- g_return_val_if_fail (idx < casefile_get_value_cnt (cf->flexifile), FALSE);
+ g_return_val_if_fail (cf->datasheet, FALSE);
- if ( ! flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, &cc) )
- return FALSE;
+ g_return_val_if_fail (idx < datasheet_get_column_cnt (cf->datasheet), FALSE);
- if ( width == 0 )
- bytes = MAX_SHORT_STRING;
- else
- bytes = DIV_RND_UP (width, MAX_SHORT_STRING) * MAX_SHORT_STRING ;
-
- /* Cast away const in flagrant abuse of the casefile */
- memcpy ((union value *)case_data_idx (&cc, idx), v, bytes);
-
- g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum);
-
- return TRUE;
+ ok = datasheet_put_value (cf->datasheet, casenum, idx, v, width);
+ if (ok)
+ g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum);
+ return ok;
}
psppire_case_file_data_in (PsppireCaseFile *cf, gint casenum, gint idx,
struct substring input, const struct fmt_spec *fmt)
{
- struct ccase cc ;
+ union value *value;
+ int width;
+ bool ok;
g_return_val_if_fail (cf, FALSE);
- g_return_val_if_fail (cf->flexifile, FALSE);
+ g_return_val_if_fail (cf->datasheet, FALSE);
- g_return_val_if_fail (idx < casefile_get_value_cnt (cf->flexifile), FALSE);
+ g_return_val_if_fail (idx < datasheet_get_column_cnt (cf->datasheet), FALSE);
- if ( ! flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, &cc) )
- return FALSE;
+ width = fmt_var_width (fmt);
+ value = xallocsa (value_cnt_from_width (width) * sizeof *value);
+ ok = (datasheet_get_value (cf->datasheet, casenum, idx, value, width)
+ && data_in (input, fmt->type, 0, 0, value, width)
+ && datasheet_put_value (cf->datasheet, casenum, idx, value, width));
- /* Cast away const in flagrant abuse of the casefile */
- if (!data_in (input, fmt->type, 0, 0,
- (union value *) case_data_idx (&cc, idx), fmt_var_width (fmt)))
- g_warning ("Cant set value\n");
+ if (ok)
+ g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum);
- g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum);
+ freesa (value);
return TRUE;
}
void
-psppire_case_file_sort (PsppireCaseFile *cf, const struct sort_criteria *sc)
+psppire_case_file_sort (PsppireCaseFile *cf, struct case_ordering *ordering)
{
+ struct casereader *sorted_data;
gint c;
- struct casereader *reader = casefile_get_reader (cf->flexifile, NULL);
- struct casefile *cfile;
-
- struct casefile_factory *factory = flexifile_factory_create ();
-
- cfile = sort_execute (reader, sc, factory);
-
- casefile_destroy (cf->flexifile);
-
- cf->flexifile = cfile;
+ sorted_data = sort_execute (datasheet_make_reader (cf->datasheet), ordering);
+ cf->datasheet = datasheet_create (sorted_data);
/* FIXME: Need to have a signal to change a range of cases, instead of
calling a signal many times */
- for ( c = 0 ; c < casefile_get_case_cnt (cf->flexifile) ; ++c )
+ for ( c = 0 ; c < datasheet_get_row_cnt (cf->datasheet) ; ++c )
g_signal_emit (cf, signals [CASE_CHANGED], 0, c);
-
- flexifile_factory_destroy (factory);
}
psppire_case_file_insert_values (PsppireCaseFile *cf,
gint n_values, gint before)
{
+ union value *values;
g_return_val_if_fail (cf, FALSE);
- if ( ! cf->flexifile )
- {
- cf->flexifile = flexifile_create (n_values);
+ if ( ! cf->datasheet )
+ cf->datasheet = datasheet_create (NULL);
- return TRUE;
- }
+ values = xcalloc (n_values, sizeof *values);
+ datasheet_insert_columns (cf->datasheet, values, n_values, before);
+ free (values);
- return flexifile_resize (FLEXIFILE (cf->flexifile), n_values, before);
+ return TRUE;
}
/* Fills C with the CASENUMth case.
struct ccase *c)
{
g_return_val_if_fail (cf, FALSE);
- g_return_val_if_fail (cf->flexifile, FALSE);
+ g_return_val_if_fail (cf->datasheet, FALSE);
- return flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, c);
+ return datasheet_get_row (cf->datasheet, casenum, c);
}
#include <glib.h>
#include <libpspp/str.h>
+#include <data/case.h>
{
GObject parent;
- struct casefile *flexifile;
+ struct datasheet *datasheet;
};
gint psppire_case_file_get_case_count (const PsppireCaseFile *cf);
-const union value * psppire_case_file_get_value (const PsppireCaseFile *cf,
- gint c, gint idx);
+union value * psppire_case_file_get_value (const PsppireCaseFile *cf,
+ casenumber, size_t idx,
+ union value *, int width);
struct fmt_spec;
gboolean psppire_case_file_insert_values (PsppireCaseFile *cf, gint n_values, gint before);
-struct sort_criteria;
-void psppire_case_file_sort (PsppireCaseFile *cf, const struct sort_criteria *);
+struct case_ordering;
+void psppire_case_file_sort (PsppireCaseFile *cf, struct case_ordering *);
gboolean psppire_case_file_get_case (const PsppireCaseFile *cf, gint casenum,
struct ccase *c);
-void psppire_case_file_replace_flexifile (PsppireCaseFile *,
- struct flexifile *);
+void psppire_case_file_replace_datasheet (PsppireCaseFile *,
+ struct datasheet *);
#define _(msgid) gettext (msgid)
#define N_(msgid) msgid
-#include <data/casefile.h>
-#include <data/case.h>
+#include <data/casewriter.h>
+#include <data/datasheet.h>
#include <data/data-out.h>
#include <data/variable.h>
/* Opportunity for optimisation exists here when creating a blank case */
- val_cnt = casefile_get_value_cnt (ds->case_file->flexifile) ;
+ val_cnt = datasheet_get_column_cnt (ds->case_file->datasheet) ;
case_create (&cc, val_cnt);
char *text;
const struct fmt_spec *fp ;
const struct variable *pv ;
- const union value *v ;
+ union value *v ;
GString *s;
PsppireDataStore *store = PSPPIRE_DATA_STORE (model);
g_assert (idx >= 0);
- v = psppire_case_file_get_value (store->case_file, row, idx);
-
+ v = psppire_case_file_get_value (store->case_file, row, idx, NULL,
+ var_get_width (pv));
+
g_return_val_if_fail (v, NULL);
if ( store->show_labels)
{
- const struct val_labs * vl = var_get_value_labels (pv);
-
- const gchar *label;
- if ( (label = val_labs_find (vl, *v)) )
- {
+ const gchar *label = var_lookup_value_label (pv, v);
+ if (label)
+ {
+ free (v);
return pspp_locale_to_utf8 (label, -1, 0);
- }
+ }
}
fp = var_get_write_format (pv);
g_strchomp (text);
+ free (v);
return text;
}
3 /* version */
};
- struct sfm_writer *writer ;
+ struct casewriter *writer;
g_assert (handle);
for (i = 0 ; i < psppire_case_file_get_case_count (store->case_file); ++i )
{
struct ccase c;
-
- case_create (&c, var_cnt);
psppire_case_file_get_case (store->case_file, i, &c);
- sfm_write_case (writer, &c);
-
- case_destroy (&c);
+ casewriter_write (writer, &c);
}
-
- sfm_close_writer (writer);
+ casewriter_destroy (writer);
}
#include "psppire.h"
+#include <data/casereader.h>
+#include <data/datasheet.h>
#include <data/file-handle-def.h>
#include <data/format.h>
-#include <data/storage-stream.h>
-#include <data/case-source.h>
#include <data/settings.h>
#include <data/file-name.h>
#include <data/procedure.h>
#include <libpspp/getl.h>
#include <language/lexer/lexer.h>
-#include <ui/flexifile.h>
#include <libpspp/version.h>
#include <gtk/gtk.h>
#include "data-sheet.h"
#include "var-sheet.h"
#include "message-dialog.h"
-#include "flexifile-factory.h"
PsppireDataStore *the_data_store = 0;
PsppireVarStore *the_var_store = 0;
static void
-replace_flexifile (struct case_source *s)
+replace_casereader (struct casereader *s)
{
- if ( NULL == s )
- psppire_case_file_replace_flexifile (the_data_store->case_file,
- (struct flexifile *) flexifile_create (0));
- else
- {
- if ( ! case_source_is_class (s, &storage_source_class))
- return ;
-
- psppire_case_file_replace_flexifile (the_data_store->case_file,
- (struct flexifile *)
- storage_source_get_casefile (s));
- }
-}
-
+ struct datasheet *datasheet = datasheet_create (s);
+ psppire_case_file_replace_datasheet (the_data_store->case_file,
+ datasheet);
+}
void
initialize (void)
{
- struct casefile_factory *factory;
PsppireDict *dictionary = 0;
/* gtk_init messes with the locale.
fmt_init ();
settings_init ();
fh_init ();
- factory = flexifile_factory_create ();
the_source_stream =
create_source_stream (
fn_getenv_default ("STAT_INCLUDE_PATH", include_path)
);
- the_dataset = create_dataset (factory,
- replace_flexifile,
+ the_dataset = create_dataset (replace_casereader,
replace_dictionary);
message_dialog_init (the_source_stream);
/* Create the model for the var_sheet */
the_var_store = psppire_var_store_new (dictionary);
-
the_data_store = psppire_data_store_new (dictionary);
- proc_set_source (the_dataset,
- storage_source_create (the_data_store->case_file->flexifile)
- );
+
+ proc_set_active_file_data (the_dataset,
+ datasheet_make_reader (the_data_store->case_file->datasheet));
+
create_icon_factory ();
#include <gtk/gtk.h>
#include <glade/glade.h>
+#include <data/variable.h>
struct val_labs;
+2007-06-06 Ben Pfaff <blp@gnu.org>
+
+ Adapt case sources, sinks, and clients of procedure code to the
+ new infrastructure.
+
+ * main.c: No need for fastfile_factory any more.
+
2007-02-25 Ben Pfaff <blp@gnu.org>
Thanks to Jason Stover for verifying that this patch helps under
#include "progname.h"
#include "read-line.h"
-#include <data/fastfile-factory.h>
#include <data/dictionary.h>
#include <data/file-handle-def.h>
#include <libpspp/getl.h>
int
main (int argc, char **argv)
{
- struct casefile_factory *factory;
signal (SIGABRT, bug_handler);
signal (SIGSEGV, bug_handler);
signal (SIGFPE, bug_handler);
settings_init ();
random_init ();
- factory = fastfile_factory_create ();
-
- the_dataset = create_dataset (factory, NULL, NULL);
+ the_dataset = create_dataset (NULL, NULL);
if (parse_command_line (argc, argv, the_source_stream))
{
+2007-06-06 Ben Pfaff <blp@gnu.org>
+
+ * automake.mk: Remove test.
+
+ * tests/xforms/casefile.sh: Removed test.
+
2007-06-06 Ben Pfaff <blp@gnu.org>
* automake.mk: Add new test.
tests/bugs/temp-freq.sh \
tests/bugs/print-crash.sh \
tests/bugs/keep-all.sh \
- tests/xforms/casefile.sh \
tests/xforms/recode.sh \
tests/stats/descript-basic.sh \
tests/stats/descript-missing.sh \
+++ /dev/null
-#!/bin/sh
-
-# This program tests casefiles by running DEBUG CASEFILE.
-
-TEMPDIR=/tmp/pspp-tst-$$
-
-# ensure that top_builddir are absolute
-if [ -z "$top_builddir" ] ; then top_builddir=. ; fi
-if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi
-top_builddir=`cd $top_builddir; pwd`
-PSPP=$top_builddir/src/ui/terminal/pspp
-
-# ensure that top_srcdir is absolute
-top_srcdir=`cd $top_srcdir; pwd`
-
-STAT_CONFIG_PATH=$top_srcdir/config
-export STAT_CONFIG_PATH
-
-
-cleanup()
-{
- cd /
- rm -rf $TEMPDIR
-}
-
-
-fail()
-{
- echo $activity
- echo FAILED
- cleanup;
- exit 1;
-}
-
-
-no_result()
-{
- echo $activity
- echo NO RESULT;
- cleanup;
- exit 2;
-}
-
-pass()
-{
- cleanup;
- exit 0;
-}
-
-mkdir -p $TEMPDIR
-
-cd $TEMPDIR
-
-activity="create program"
-cat > $TEMPDIR/casefile.stat <<EOF
-DEBUG CASEFILE SMALL.
-EOF
-if [ $? -ne 0 ] ; then no_result ; fi
-
-activity="run program"
-$SUPERVISOR $PSPP --testing-mode $TEMPDIR/casefile.stat > $TEMPDIR/casefile.out
-if [ $? -ne 0 ] ; then no_result ; fi
-
-activity="compare results"
-perl -pi -e 's/^\s*$//g' $TEMPDIR/casefile.out
-diff -b $TEMPDIR/casefile.out - <<EOF
-Casefile tests succeeded.
-EOF
-if [ $? -ne 0 ] ; then fail ; fi
-
-
-pass;