Actually implement the new procedure code and adapt all of its clients

author Ben Pfaff <blp@gnu.org>

Thu, 7 Jun 2007 06:41:58 +0000 (06:41 +0000)

committer Ben Pfaff <blp@gnu.org>

Thu, 7 Jun 2007 06:41:58 +0000 (06:41 +0000)
author Ben Pfaff <blp@gnu.org>
Thu, 7 Jun 2007 06:41:58 +0000 (06:41 +0000)
committer Ben Pfaff <blp@gnu.org>
Thu, 7 Jun 2007 06:41:58 +0000 (06:41 +0000)
diff --git a/ChangeLog b/ChangeLog

index da5339d5ebbe6260d85384428a96e3f8bcabca13..2bfa3441c35f072ab6e38af361c9e50edc486038 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2007-06-06  Ben Pfaff  <blp@gnu.org>
+
+       * Smake: Add xallocsa to modules.
+
  2007-04-22  Ben Pfaff  <blp@gnu.org>
  
         Implement model checker for testing purposes.
diff --git a/Smake b/Smake

index 14c19db98d67fca10a3d39e3539ad4368ed0b600..902591c685f6a752303c2dd6fe86382c3bab3229 100644 (file)
--- a/Smake
+++ b/Smake
@@ -62,6 +62,7 @@ GNULIB_MODULES = \
         vsnprintf \
         xalloc \
         xalloc-die \
+       xallocsa \
         xsize \
         xstrndup \
         xvasprintf
diff --git a/src/data/ChangeLog b/src/data/ChangeLog

index b80d60c2a9164f2020a6471d3b99136f11153fa1..913249f3bb0df1abbf4caff3c50c654bbe41e378 100644 (file)
--- a/src/data/ChangeLog
+++ b/src/data/ChangeLog
@@ -1,3 +1,44 @@
+2007-06-06  Ben Pfaff  <blp@gnu.org>
+
+       Actually implement the new procedure code and adapt all of its
+       clients to match.  Also adapt all of the other case sources and
+       sinks in the tree and their clients to use the
+       casereader/casewriter infrastructure.
+
+       * automake.mk: Add and remove files.
+
+       * any-reader.c: Change into a casereader.
+       * por-file-reader.c: Ditto.
+       * scratch-reader.c: Ditto.
+       * sys-file-reader.c: Ditto.
+
+       * any-writer.c: Change into a casewriter.
+       * por-file-writer.c: Ditto.
+       * scratch-writer.c: Ditto.
+       * sys-file-writer.c: Ditto.
+
+       * procedure.c: Change to use casereader, casewriter, caseinit, and
+       other new infrastructure.
+
+       * scratch-handle.c: Adapt to new infrastructure.
+
+       * case-sink.c: Removed, now dead code.
+       * case-sink.h: Ditto.
+       * case-source.c: Ditto.
+       * case-source.h: Ditto.
+       * casefile-factory.c: Ditto.
+       * casefile-private.h: Ditto.
+       * casefile.c: Ditto.
+       * casefile.h: Ditto.
+       * casefilter.c: Ditto.
+       * casefilter.h: Ditto.
+       * fastfile.c: Ditto.
+       * fastfile.h: Ditto.
+       * fastfile-factory.c: Ditto.
+       * fastfile-factory.h: Ditto.
+       * storage-stream.c: Ditto.
+       * storage-stream.h: Ditto.
+
  2007-06-06  Ben Pfaff  <blp@gnu.org>
  
         Add datasheet code.
diff --git a/src/data/any-reader.c b/src/data/any-reader.c

index 4951d490ba2484f67e2281b87aaca66ab309ff8e..009e42270d5403866c431c8f4f6b171ed8b18702 100644 (file)
--- a/src/data/any-reader.c
+++ b/src/data/any-reader.c
@@ -36,21 +36,6 @@
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
  
-/* Type of file backing an any_reader. */
-enum any_reader_type
-  {
-    SYSTEM_FILE,                /* System file. */
-    PORTABLE_FILE,              /* Portable file. */
-    SCRATCH_FILE                /* Scratch file. */
-  };
-
-/* Reader for any type of case-structured file. */
-struct any_reader 
-  {
-    enum any_reader_type type;  /* Type of file. */
-    void *private;              /* Private data. */
-  };
-
  /* Result of type detection. */
  enum detect_result 
    {
@@ -83,27 +68,10 @@ try_detect (struct file_handle *handle, bool (*detect) (FILE *))
    return is_type ? YES : NO;
  }
  
-/* If PRIVATE is non-null, creates and returns a new any_reader,
-   initializing its fields to TYPE and PRIVATE.  If PRIVATE is a
-   null pointer, just returns a null pointer. */   
-static struct any_reader *
-make_any_reader (enum any_reader_type type, void *private) 
-{
-  if (private != NULL) 
-    {
-      struct any_reader *reader = xmalloc (sizeof *reader);
-      reader->type = type;
-      reader->private = private;
-      return reader;
-    }
-  else
-    return NULL;
-}
-
-/* Creates an any_reader for HANDLE.  On success, returns the new
-   any_reader and stores the file's dictionary into *DICT.  On
+/* Returns a casereader for HANDLE.  On success, returns the new
+   casereader and stores the file's dictionary into *DICT.  On
     failure, returns a null pointer. */
-struct any_reader *
+struct casereader *
  any_reader_open (struct file_handle *handle, struct dictionary **dict)
  {
    switch (fh_get_referent (handle)) 
@@ -116,15 +84,13 @@ any_reader_open (struct file_handle *handle, struct dictionary **dict)
          if (result == IO_ERROR)
            return NULL;
          else if (result == YES)
-          return make_any_reader (SYSTEM_FILE,
-                                  sfm_open_reader (handle, dict, NULL));
+          return sfm_open_reader (handle, dict, NULL);
  
          result = try_detect (handle, pfm_detect);
          if (result == IO_ERROR)
            return NULL;
          else if (result == YES)
-          return make_any_reader (PORTABLE_FILE,
-                                  pfm_open_reader (handle, dict, NULL));
+          return pfm_open_reader (handle, dict, NULL);
  
          msg (SE, _("\"%s\" is not a system or portable file."),
               fh_get_file_name (handle));
@@ -136,74 +102,7 @@ any_reader_open (struct file_handle *handle, struct dictionary **dict)
        return NULL;
  
      case FH_REF_SCRATCH:
-      return make_any_reader (SCRATCH_FILE,
-                              scratch_reader_open (handle, dict));
-    }
-  NOT_REACHED ();
-}
-
-/* Reads a single case from READER into C.
-   Returns true if successful, false at end of file or on error. */
-bool
-any_reader_read (struct any_reader *reader, struct ccase *c) 
-{
-  switch (reader->type) 
-    {
-    case SYSTEM_FILE:
-      return sfm_read_case (reader->private, c);
-
-    case PORTABLE_FILE:
-      return pfm_read_case (reader->private, c);
-
-    case SCRATCH_FILE:
-      return scratch_reader_read_case (reader->private, c);
+      return scratch_reader_open (handle, dict);
      }
    NOT_REACHED ();
  }
-
-/* Returns true if an I/O error has occurred on READER, false
-   otherwise. */
-bool
-any_reader_error (struct any_reader *reader) 
-{
-  switch (reader->type) 
-    {
-    case SYSTEM_FILE:
-      return sfm_read_error (reader->private);
-
-    case PORTABLE_FILE:
-      return pfm_read_error (reader->private);
-
-    case SCRATCH_FILE:
-      return scratch_reader_error (reader->private);
-    }
-  NOT_REACHED ();
-}
-
-/* Closes READER. */
-void
-any_reader_close (struct any_reader *reader) 
-{
-  if (reader == NULL)
-    return;
-
-  switch (reader->type) 
-    {
-    case SYSTEM_FILE:
-      sfm_close_reader (reader->private);
-      break;
-
-    case PORTABLE_FILE:
-      pfm_close_reader (reader->private);
-      break;
-
-    case SCRATCH_FILE:
-      scratch_reader_close (reader->private);
-      break;
-
-    default:
-      NOT_REACHED ();
-    }
-
-  free (reader);
-}
diff --git a/src/data/any-reader.h b/src/data/any-reader.h

index bd3c28812ca6a406c21059b9940a01468027893e..44c8cef7a122bdca3c8b69a54076536c5c0b5c4e 100644 (file)
--- a/src/data/any-reader.h
+++ b/src/data/any-reader.h
@@ -23,11 +23,7 @@
  
  struct file_handle;
  struct dictionary;
-struct ccase;
-struct any_reader *any_reader_open (struct file_handle *,
+struct casereader *any_reader_open (struct file_handle *,
                                      struct dictionary **);
-bool any_reader_read (struct any_reader *, struct ccase *);
-bool any_reader_error (struct any_reader *);
-void any_reader_close (struct any_reader *);
  
  #endif /* any-reader.h */
diff --git a/src/data/any-writer.c b/src/data/any-writer.c

index de44df4682788b8bc4d380ccb181b8a2221be204..195292afb70beceee17c82cbebbb47e1a1fde794 100644 (file)
--- a/src/data/any-writer.c
+++ b/src/data/any-writer.c
@@ -36,41 +36,26 @@
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
  
-/* Type of file backing an any_writer. */
-enum any_writer_type
-  {
-    SYSTEM_FILE,                /* System file. */
-    PORTABLE_FILE,              /* Portable file. */
-    SCRATCH_FILE                /* Scratch file. */
-  };
-
-/* Writer for any type of case-structured file. */
-struct any_writer 
-  {
-    enum any_writer_type type;  /* Type of file. */
-    void *private;              /* Private data. */
-  };
-
  /* Creates and returns a writer for HANDLE with the given DICT. */
-struct any_writer *
+struct casewriter *
  any_writer_open (struct file_handle *handle, struct dictionary *dict)
  {
    switch (fh_get_referent (handle)) 
      {
      case FH_REF_FILE:
        {
-        struct any_writer *writer;
+        struct casewriter *writer;
          char *extension;
  
          extension = fn_extension (fh_get_file_name (handle));
          str_lowercase (extension);
  
          if (!strcmp (extension, ".por"))
-          writer = any_writer_from_pfm_writer (
-            pfm_open_writer (handle, dict, pfm_writer_default_options ()));
+          writer = pfm_open_writer (handle, dict,
+                                    pfm_writer_default_options ());
          else
-          writer = any_writer_from_sfm_writer (
-            sfm_open_writer (handle, dict, sfm_writer_default_options ()));
+          writer = sfm_open_writer (handle, dict,
+                                    sfm_writer_default_options ());
          free (extension);
  
          return writer;
@@ -81,137 +66,8 @@ any_writer_open (struct file_handle *handle, struct dictionary *dict)
        return NULL;
  
      case FH_REF_SCRATCH:
-      return any_writer_from_scratch_writer (scratch_writer_open (handle,
-                                                                  dict));
+      return scratch_writer_open (handle, dict);
      }
  
    NOT_REACHED ();
  }
-
-/* If PRIVATE is non-null, creates and returns a new any_writer,
-   initializing its fields to TYPE and PRIVATE.  If PRIVATE is a
-   null pointer, just returns a null pointer. */   
-static struct any_writer *
-make_any_writer (enum any_writer_type type, void *private) 
-{
-  if (private != NULL) 
-    {
-      struct any_writer *writer = xmalloc (sizeof *writer);
-      writer->type = type;
-      writer->private = private;
-      return writer; 
-    }
-  else
-    return NULL;
-}
-  
-/* If SFM_WRITER is non-null, encapsulates SFM_WRITER in an
-   any_writer and returns it.  If SFM_WRITER is null, just
-   returns a null pointer.
-
-   Useful when you need to pass options to sfm_open_writer().
-   Typical usage:
-        any_writer_from_sfm_writer (sfm_open_writer (fh, dict, opts))
-   If you don't need to pass options, then any_writer_open() by
-   itself is easier and more straightforward. */
-struct any_writer *
-any_writer_from_sfm_writer (struct sfm_writer *sfm_writer) 
-{
-  return make_any_writer (SYSTEM_FILE, sfm_writer);
-}
-
-/* If PFM_WRITER is non-null, encapsulates PFM_WRITER in an
-   any_writer and returns it.  If PFM_WRITER is null, just
-   returns a null pointer.
-
-   Useful when you need to pass options to pfm_open_writer().
-   Typical usage:
-        any_writer_from_pfm_writer (pfm_open_writer (fh, dict, opts))
-   If you don't need to pass options, then any_writer_open() by
-   itself is easier and more straightforward. */
-struct any_writer *
-any_writer_from_pfm_writer (struct pfm_writer *pfm_writer) 
-{
-  return make_any_writer (PORTABLE_FILE, pfm_writer);
-}
-
-/* If SCRATCH_WRITER is non-null, encapsulates SCRATCH_WRITER in
-   an any_writer and returns it.  If SCRATCH_WRITER is null, just
-   returns a null pointer.
-
-   Not particularly useful.  Included just for consistency. */
-struct any_writer *
-any_writer_from_scratch_writer (struct scratch_writer *scratch_writer) 
-{
-  return make_any_writer (SCRATCH_FILE, scratch_writer);
-}
-
-/* Writes cases C to WRITER.
-   Returns true if successful, false on failure. */
-bool
-any_writer_write (struct any_writer *writer, const struct ccase *c) 
-{
-  switch (writer->type) 
-    {
-    case SYSTEM_FILE:
-      return sfm_write_case (writer->private, c);
-
-    case PORTABLE_FILE:
-      return pfm_write_case (writer->private, c);
-
-    case SCRATCH_FILE:
-      return scratch_writer_write_case (writer->private, c);
-    }
-  NOT_REACHED ();
-}
-
-/* Returns true if an I/O error has occurred on WRITER, false
-   otherwise. */
-bool
-any_writer_error (const struct any_writer *writer) 
-{
-  switch (writer->type) 
-    {
-    case SYSTEM_FILE:
-      return sfm_write_error (writer->private);
-
-    case PORTABLE_FILE:
-      return pfm_write_error (writer->private);
-
-    case SCRATCH_FILE:
-      return scratch_writer_error (writer->private);
-    }
-  NOT_REACHED ();
-}
-
-/* Closes WRITER.
-   Returns true if successful, false if an I/O error occurred. */
-bool
-any_writer_close (struct any_writer *writer) 
-{
-  bool ok;
-  
-  if (writer == NULL)
-    return true;
-
-  switch (writer->type) 
-    {
-    case SYSTEM_FILE:
-      ok = sfm_close_writer (writer->private);
-      break;
-
-    case PORTABLE_FILE:
-      ok = pfm_close_writer (writer->private);
-      break;
-
-    case SCRATCH_FILE:
-      ok = scratch_writer_close (writer->private);
-      break;
-      
-    default:
-      NOT_REACHED ();
-    }
-
-  free (writer);
-  return ok;
-}
diff --git a/src/data/any-writer.h b/src/data/any-writer.h

index 46c3624c54beb55f9b671af3d1fa16d0c13e300e..927e61d3eb1b7cc27d4dd101c11f6dda91013350 100644 (file)
--- a/src/data/any-writer.h
+++ b/src/data/any-writer.h
@@ -23,18 +23,7 @@
  
  struct file_handle;
  struct dictionary;
-struct ccase;
-struct sfm_writer;
-struct pfm_writer;
-struct scratch_writer;
-
-struct any_writer *any_writer_open (struct file_handle *, struct dictionary *);
-struct any_writer *any_writer_from_sfm_writer (struct sfm_writer *);
-struct any_writer *any_writer_from_pfm_writer (struct pfm_writer *);
-struct any_writer *any_writer_from_scratch_writer (struct scratch_writer *);
-
-bool any_writer_write (struct any_writer *, const struct ccase *);
-bool any_writer_error (const struct any_writer *);
-bool any_writer_close (struct any_writer *);
+
+struct casewriter *any_writer_open (struct file_handle *, struct dictionary *);
  
  #endif /* any-writer.h */
diff --git a/src/data/automake.mk b/src/data/automake.mk

index ea4dd3c7c73e04ba070b57a06aa8b69e7c8e11ee..bc056531d95efa489482c6cf6e14de57528f7ad1 100644 (file)
--- a/src/data/automake.mk
+++ b/src/data/automake.mk
@@ -10,17 +10,7 @@ src_data_libdata_a_SOURCES = \
         src/data/calendar.h \
         src/data/case-ordering.c \
         src/data/case-ordering.h \
-       src/data/case-sink.c \
-       src/data/case-sink.h \
-       src/data/case-source.c \
-       src/data/case-source.h \
         src/data/case.c \
-       src/data/casefilter.c \
-       src/data/casefilter.h \
-       src/data/casefile.h \
-       src/data/casefile.c \
-       src/data/casefile-factory.h \
-       src/data/casefile-private.h \
         src/data/casegrouper.c \
         src/data/casegrouper.h \
         src/data/caseinit.c \
@@ -36,10 +26,6 @@ src_data_libdata_a_SOURCES = \
         src/data/casewriter-translator.c \
         src/data/casewriter.c \
         src/data/casewriter.h \
-       src/data/fastfile.c \
-       src/data/fastfile.h \
-       src/data/fastfile-factory.h \
-       src/data/fastfile-factory.c \
         src/data/case.h \
         src/data/case-tmpfile.c \
         src/data/case-tmpfile.h \
@@ -82,8 +68,6 @@ src_data_libdata_a_SOURCES = \
         src/data/settings.h \
         src/data/sparse-cases.c \
         src/data/sparse-cases.h \
-       src/data/storage-stream.c \
-       src/data/storage-stream.h \
         src/data/sys-file-private.c \
         src/data/sys-file-private.h \
         src/data/sys-file-reader.c \
diff --git a/src/data/case-sink.c b/src/data/case-sink.c

deleted file mode 100644 (file)

index d7be3fa..0000000
--- a/src/data/case-sink.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#include <config.h>
-
-#include <data/case-sink.h>
-
-#include <stdlib.h>
-
-#include <data/dictionary.h>
-
-#include "xalloc.h"
-
-/* Creates a case sink to accept cases from the given DICT with
-   class CLASS and auxiliary data AUX. */
-struct case_sink *
-create_case_sink (const struct case_sink_class *class,
-                  const struct dictionary *dict, struct casefile_factory *f,
-                  void *aux) 
-{
-  struct case_sink *sink = xmalloc (sizeof *sink);
-  sink->class = class;
-  sink->value_cnt = dict_get_compacted_value_cnt (dict);
-  sink->aux = aux;
-  sink->factory = f;
-  return sink;
-}
-
-/* Destroys case sink SINK.  */
-void
-free_case_sink (struct case_sink *sink) 
-{
-  if (sink != NULL) 
-    {
-      if (sink->class->destroy != NULL)
-        sink->class->destroy (sink);
-      free (sink); 
-    }
-}
-/* Null sink.  Used by a few procedures that keep track of output
-   themselves and would throw away anything that the sink
-   contained anyway. */
-
-const struct case_sink_class null_sink_class = 
-  {
-    "null",
-    NULL,
-    NULL,
-    NULL,
-    NULL,
-  };
diff --git a/src/data/case-sink.h b/src/data/case-sink.h

deleted file mode 100644 (file)

index ec2cfd2..0000000
--- a/src/data/case-sink.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#ifndef CASE_SINK_H
-#define CASE_SINK_H 1
-
-#include <stdbool.h>
-#include <stddef.h>
-
-struct ccase;
-struct dictionary;
-
-/* A case sink. */
-struct case_sink 
-  {
-    const struct case_sink_class *class;        /* Class. */
-    void *aux;          /* Auxiliary data. */
-    struct casefile_factory *factory ;    /* Factory used to create 
-                                             the destination */
-    size_t value_cnt;   /* Number of `union value's in case. */
-  };
-
-/* A case sink class. */
-struct case_sink_class
-  {
-    const char *name;                   /* Identifying name. */
-    
-    /* Opens the sink for writing. */
-    void (*open) (struct case_sink *);
-                  
-    /* Writes a case to the sink. */
-    bool (*write) (struct case_sink *, const struct ccase *);
-    
-    /* Closes and destroys the sink. */
-    void (*destroy) (struct case_sink *);
-
-    /* Closes the sink and returns a source that can read back
-       the cases that were written, perhaps transformed in some
-       way.  The sink must still be separately destroyed by
-       calling destroy(). */
-    struct case_source *(*make_source) (struct case_sink *);
-  };
-
-extern const struct case_sink_class null_sink_class;
-
-struct casefile_factory ;
-struct case_sink *create_case_sink (const struct case_sink_class *,
-                                    const struct dictionary *,
-                                   struct casefile_factory *,
-                                    void *);
-void free_case_sink (struct case_sink *);
-
-#endif /* case-sink.h */
diff --git a/src/data/case-source.c b/src/data/case-source.c

deleted file mode 100644 (file)

index 542f300..0000000
--- a/src/data/case-source.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#include <config.h>
-
-#include <data/case-source.h>
-
-#include <stdlib.h>
-
-#include "xalloc.h"
-
-/* Creates a case source with class CLASS and auxiliary data AUX
-   and based on dictionary DICT. */
-struct case_source *
-create_case_source (const struct case_source_class *class,
-                    void *aux) 
-{
-  struct case_source *source = xmalloc (sizeof *source);
-  source->class = class;
-  source->aux = aux;
-  return source;
-}
-
-/* Destroys case source SOURCE.
-   Returns true if successful,
-   false if the source encountered an I/O error during
-   destruction or reading cases. */
-bool
-free_case_source (struct case_source *source) 
-{
-  bool ok = true;
-  if (source != NULL) 
-    {
-      if (source->class->destroy != NULL)
-        ok = source->class->destroy (source);
-      free (source);
-    }
-  return ok;
-}
-
-/* Returns true if CLASS is the class of SOURCE. */
-bool
-case_source_is_class (const struct case_source *source,
-                      const struct case_source_class *class) 
-{
-  return source != NULL && source->class == class;
-}
diff --git a/src/data/case-source.h b/src/data/case-source.h

deleted file mode 100644 (file)

index 833502c..0000000
--- a/src/data/case-source.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#ifndef CASE_SOURCE_H
-#define CASE_SOURCE_H 1
-
-#include <stdbool.h>
-
-struct ccase;
-
-/* A case source. */
-struct case_source 
-  {
-    const struct case_source_class *class;      /* Class. */
-    void *aux;          /* Auxiliary data. */
-  };
-
-/* A case source class. */
-struct case_source_class
-  {
-    const char *name;                   /* Identifying name. */
-    
-    /* Returns the exact number of cases that READ will pass to
-       WRITE_CASE, if known, or -1 otherwise. */
-    int (*count) (const struct case_source *);
-
-    /* Reads one case into C.
-       Returns true if successful, false at end of file or if an
-       I/O error occurred. */
-    bool (*read) (struct case_source *, struct ccase *);
-
-    /* Destroys the source.
-       Returns true if successful read, false if an I/O occurred
-       during destruction or previously. */
-    bool (*destroy) (struct case_source *);
-  };
-
-
-struct case_source *create_case_source (const struct case_source_class *,
-                                        void *);
-bool free_case_source (struct case_source *);
-
-bool case_source_is_class (const struct case_source *,
-                          const struct case_source_class *);
-
-#endif /* case-source.h */
diff --git a/src/data/casefile-factory.h b/src/data/casefile-factory.h

deleted file mode 100644 (file)

index cc7423e..0000000
--- a/src/data/casefile-factory.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 2006 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#ifndef CASEFILE_FACTORY_H
-#define CASEFILE_FACTORY_H
-
-struct casefile_factory 
-{
-  struct casefile * (*create_casefile) (struct casefile_factory *, size_t);
-};
-
-#endif
-
diff --git a/src/data/casefile-private.h b/src/data/casefile-private.h

deleted file mode 100644 (file)

index 1375dd0..0000000
--- a/src/data/casefile-private.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 2004, 2006 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#ifndef CASEFILE_PRIVATE_H
-#define CASEFILE_PRIVATE_H
-
-#include <config.h>
-#include <stdbool.h>
-#include <libpspp/ll.h>
-
-struct ccase;
-struct casereader;
-struct casefile;
-struct casefilter;
-
-struct class_casefile
-{
-  void (*destroy) (struct casefile *) ;
-
-  bool (*error) (const struct casefile *) ;
-
-  size_t (*get_value_cnt) (const struct casefile *) ;
-  unsigned long (*get_case_cnt) (const struct casefile *) ;
-
-  struct casereader * (*get_reader) (const struct casefile *) ; 
-
-  bool (*append) (struct casefile *, const struct ccase *) ;
-
-
-  bool (*in_core) (const struct casefile *) ;
-  bool (*to_disk) (const struct casefile *) ;
-  bool (*sleep) (const struct casefile *) ;
-};
-
-struct casefile
-{
-  const struct class_casefile *class ;   /* Class pointer */
-
-  struct ll_list reader_list ;       /* List of our readers. */
-  struct ll ll ;                    /* Element in the class' list 
-                                      of casefiles. */
-  bool being_destroyed;            /* A destructive reader exists */
-};
-
-
-struct class_casereader
-{
-  struct ccase * (*get_next_case) (struct casereader *);
-
-  unsigned long (*cnum) (const struct casereader *);
-
-  void (*destroy) (struct casereader * r);
-
-  struct casereader * (*clone) (const struct casereader *);
-};
-
-
-#define CLASS_CASEREADER(K) ( (struct class_casereader *) K)
-
-struct casereader
-{
-  const struct class_casereader *class;  /* Class pointer */
-
-  struct casefile *cf;   /* The casefile to which this reader belongs */
-  struct ll ll;          /* Element in the casefile's list of readers */
-
-  struct casefilter *filter; /* The filter to be used */
-  bool destructive;      /* True if this reader is destructive */
-};
-
-
-#define CASEFILE(C)        ( (struct casefile *) C)
-#define CONST_CASEFILE(C) ( (const struct casefile *) C)
-
-#define CASEFILEREADER(CR) ((struct casereader *) CR)
-
-
-/* Functions for implementations' use  only */
-
-void casefile_register (struct casefile *cf, 
-                       const struct class_casefile *k);
-
-void casereader_register (struct casefile *cf, 
-                         struct casereader *reader, 
-                         const struct class_casereader *k);
-
-#endif
diff --git a/src/data/casefile.c b/src/data/casefile.c

deleted file mode 100644 (file)

index d81ec29..0000000
--- a/src/data/casefile.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#include <config.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <assert.h>
-
-#include "case.h"
-#include "casefile.h"
-#include "casefile-private.h"
-#include "casefilter.h"
-
-
-struct ccase;
-
-/* A casefile is an abstract class representing an array of cases.  In
-   general, cases are accessible sequentially,  and are immutable once
-   appended to the casefile.  However some implementations may provide
-   special methods for  case mutation or random access.
-
-   Use casefile_append or casefile_append_xfer to append a case to a
-   casefile. 
-
-   The casefile may be read sequentially,
-   starting from the beginning, by "casereaders".  Any
-   number of casereaders may be created, at any time.
-   Each casereader has an independent position in the casefile.
-
-   Casereaders may only move forward.  They cannot move backward to
-   arbitrary records or seek randomly.  Cloning casereaders is
-   possible, but it is not yet implemented.
-
-   Use casereader_read() or casereader_read_xfer() to read
-   a case from a casereader.  Use casereader_destroy() to
-   discard a casereader when it is no longer needed.
-
-   When a casefile is no longer needed, it may be destroyed with
-   casefile_destroy().  This function will also destroy any
-   remaining casereaders. */
-
-static struct ll_list all_casefiles = LL_INITIALIZER (all_casefiles);
-
-static struct casefile *
-ll_to_casefile (const struct ll *ll)
-{
-  return ll_data (ll, struct casefile, ll);
-}
-
-static struct casereader *
-ll_to_casereader (const struct ll *ll)
-{
-  return ll_data (ll, struct casereader, ll);
-}
-
-
-/* atexit() handler that closes and deletes our temporary
-   files. */
-static void
-exit_handler (void) 
-{
-  while (!ll_is_empty (&all_casefiles))
-    casefile_destroy (ll_to_casefile (ll_head (&all_casefiles)));
-}
-
-/* Insert CF into the global list of casefiles */
-void
-casefile_register (struct casefile *cf, const struct class_casefile *class)
-{
-  static bool initialised ;
-  if ( !initialised ) 
-    {
-      atexit (exit_handler);
-      initialised = true;
-    }
-
-  cf->class = class;
-  ll_push_head (&all_casefiles, &cf->ll);
-  ll_init (&cf->reader_list);
-}
-
-/* Remove CF from the global list */
-static void
-casefile_unregister(struct casefile *cf)
-{
-  ll_remove (&cf->ll);
-}
-
-/* Return the casefile corresponding to this reader */
-struct casefile *
-casereader_get_casefile (const struct casereader *r)
-{
-  return r->cf;
-}
-
-/* Return the case number of the current case */
-unsigned long
-casereader_cnum(const struct casereader *r)
-{
-  return r->class->cnum(r);
-}
-
-static struct ccase *
-get_next_case(struct casereader *reader)
-{
-  struct ccase *read_case = NULL;
-  struct casefile *cf = casereader_get_casefile (reader);
-
-  do 
-    { 
-      if ( casefile_error (cf) )
-       return NULL;
-  
-      read_case = reader->class->get_next_case (reader);
-    } 
-  while ( read_case && reader->filter 
-         && casefilter_skip_case (reader->filter, read_case) ) ;
-
-  return read_case;
-}
-
-/* Reads a copy of the next case from READER into C.
-   Caller is responsible for destroying C.
-   Returns true if successful, false at end of file. */
-bool
-casereader_read (struct casereader *reader, struct ccase *c)
-{
-  struct ccase * read_case = get_next_case (reader) ;
-
-  if ( NULL == read_case ) 
-    return false;
-
-  case_clone (c, read_case );
-
-  return true;
-}
-
-
-/* Reads the next case from READER into C and transfers ownership
-   to the caller.  Caller is responsible for destroying C.
-   Returns true if successful, false at end of file or on I/O
-   error. */
-bool
-casereader_read_xfer (struct casereader *reader, struct ccase *c)
-{
-  struct casefile *cf = casereader_get_casefile (reader);
-  struct ccase *read_case ;
-  case_nullify (c);
-
-  read_case = get_next_case (reader) ;
-
-  if ( NULL == read_case )
-    return false;
-
-  if ( reader->destructive && casefile_in_core (cf) )
-    case_move (c, read_case);
-  else
-    case_clone (c, read_case);
-
-  return true;
-}
-
-/* Destroys R. */
-void 
-casereader_destroy (struct casereader *r)
-{
-  ll_remove (&r->ll);
-
-  r->class->destroy(r);
-}
-
-/* Creates a copy of R and returns it */
-struct casereader *
-casereader_clone(const struct casereader *r)
-{
-  struct casereader *r2;
-
-  /* Would we ever want to clone a destructive reader ?? */
-  assert ( ! r->destructive ) ;
-
-  r2 = r->class->clone (r);
-
-  r2->filter = r->filter;
-
-  return r2;
-}
-
-/* Destroys casefile CF. */
-void
-casefile_destroy(struct casefile *cf)
-{
-  if (!cf) return;
-  
-  assert(cf->class->destroy);
-
-  while (!ll_is_empty (&cf->reader_list))
-    casereader_destroy (ll_to_casereader (ll_head (&cf->reader_list)));
-      
-  casefile_unregister(cf);
-
-  cf->class->destroy(cf);
-}
-
-/* Returns true if an I/O error has occurred in casefile CF. */
-bool 
-casefile_error (const struct casefile *cf)
-{
-  return cf->class->error(cf);
-}
-
-/* Returns the number of cases in casefile CF. */
-unsigned long 
-casefile_get_case_cnt (const struct casefile *cf)
-{
-  return cf->class->get_case_cnt(cf);
-}
-
-/* Returns the number of `union value's in a case for CF. */
-size_t 
-casefile_get_value_cnt (const struct casefile *cf)
-{
-  return cf->class->get_value_cnt(cf);
-}
-
-/* Creates and returns a casereader for CF.  A casereader can be used to
-   sequentially read the cases in a casefile. */
-struct casereader *
-casefile_get_reader  (const struct casefile *cf, struct casefilter *filter)
-{
-  struct casereader *r = cf->class->get_reader(cf);
-  r->cf = (struct casefile *) cf;
-  r->filter = filter;
-
-  assert (r->class);
-  
-  return r;
-}
-
-/* Creates and returns a destructive casereader for CF.  Like a
-   normal casereader, a destructive casereader sequentially reads
-   the cases in a casefile.  Unlike a normal casereader, a
-   destructive reader cannot operate concurrently with any other
-   reader.  (This restriction could be relaxed in a few ways, but
-   it is so far unnecessary for other code.) */
-struct casereader *
-casefile_get_destructive_reader (struct casefile *cf) 
-{
-  struct casereader *r = cf->class->get_reader (cf);
-  r->cf = cf;
-  r->destructive = true;
-  cf->being_destroyed = true;
-
-  return r;
-}
-
-/* Appends a copy of case C to casefile CF. 
-   Returns true if successful, false if an I/O error occurred. */
-bool 
-casefile_append (struct casefile *cf, const struct ccase *c)
-{
-  assert (case_get_value_cnt (c) >= casefile_get_value_cnt (cf));
-
-  return cf->class->append(cf, c);
-}
-
-/* Appends case C to casefile CF, which takes over ownership of
-   C.  
-   Returns true if successful, false if an I/O error occurred. */
-bool 
-casefile_append_xfer (struct casefile *cf, struct ccase *c)
-{
-  assert (case_get_value_cnt (c) >= casefile_get_value_cnt (cf));
-
-  cf->class->append (cf, c);
-  case_destroy (c);
-
-  return cf->class->error (cf);
-}
-
-
-
-
-/* Puts a casefile to "sleep", that is, minimizes the resources
-   needed for it by closing its file descriptor and freeing its
-   buffer.  This is useful if we need so many casefiles that we
-   might not have enough memory and file descriptors to go
-   around.
-  
-   Implementations may choose to silently ignore this function.
-
-   Returns true if successful, false if an I/O error occurred. */
-bool
-casefile_sleep (const struct casefile *cf)
-{
-  return cf->class->sleep ? cf->class->sleep(cf) : true;
-}
-
-/* Returns true only if casefile CF is stored in memory (instead of on
-   disk), false otherwise. 
-*/
-bool
-casefile_in_core (const struct casefile *cf)
-{
-  return cf->class->in_core(cf);
-}
-
-/* If CF is currently stored in memory, writes it to disk.  Readers, if any,
-   retain their current positions.
-
-   Implementations may choose to silently ignore this function.
-
-   Returns true if successful, false if an I/O error occurred. */
-bool 
-casefile_to_disk (const struct casefile *cf)
-{
-  return cf->class->to_disk ? cf->class->to_disk(cf) : true;
-}
-
-void
-casereader_register(struct casefile *cf, 
-                   struct casereader *reader, 
-                   const struct class_casereader *class)
-{
-  reader->class = class;
-  reader->cf = cf;
-      
-  ll_push_head (&cf->reader_list, &reader->ll);
-}
diff --git a/src/data/casefile.h b/src/data/casefile.h

deleted file mode 100644 (file)

index 8e765c9..0000000
--- a/src/data/casefile.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 2004, 2006 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#ifndef CASEFILE_H
-#define CASEFILE_H
-
-#include <config.h>
-#include <stddef.h>
-#include <stdbool.h>
-
-
-struct ccase;
-struct casereader;
-struct casefile;
-struct casefilter;
-
-/* Casereader functions */
-
-struct casefile *casereader_get_casefile (const struct casereader *r);
-
-unsigned long casereader_cnum (const struct casereader *r);
-
-bool casereader_read (struct casereader *r, struct ccase *c);
-
-bool casereader_read_xfer (struct casereader *r, struct ccase *c);
-
-void casereader_destroy (struct casereader *r);
-
-struct casereader *casereader_clone(const struct casereader *r);
-
-
-/* Casefile functions */
-
-void casefile_destroy (struct casefile *cf);
-
-bool casefile_error (const struct casefile *cf);
-
-unsigned long casefile_get_case_cnt (const struct casefile *cf);
-
-size_t casefile_get_value_cnt (const struct casefile *cf);
-
-struct casereader *casefile_get_reader (const struct casefile *cf, struct casefilter *filter);
-
-struct casereader *casefile_get_destructive_reader (struct casefile *cf);
-
-bool casefile_append (struct casefile *cf, const struct ccase *c);
-
-bool casefile_append_xfer (struct casefile *cf, struct ccase *c);
-
-bool casefile_sleep (const struct casefile *cf);
-
-bool casefile_in_core (const struct casefile *cf);
-
-bool casefile_to_disk (const struct casefile *cf);
-
-#endif
diff --git a/src/data/casefilter.c b/src/data/casefilter.c

deleted file mode 100644 (file)

index 2c6336d..0000000
--- a/src/data/casefilter.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 2006 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#include <config.h>
-#include <libpspp/alloc.h>
-#include <libpspp/compiler.h>
-#include "casefilter.h"
-#include <stdlib.h>
-
-#include <stdio.h>
-#include <data/case.h>
-#include <data/variable.h>
-#include <data/missing-values.h>
-
-struct casefilter
- {
-   enum mv_class class;
-
-   const struct variable **vars;
-   int n_vars;
- };
-
-
-/* Returns true iff the entire case should be skipped */
-bool
-casefilter_skip_case (const struct casefilter *filter, const struct ccase *c)
-{
-  int i;
-
-  for (i = 0; i < filter->n_vars; ++i)
-    {
-      if ( casefilter_variable_missing (filter, c, filter->vars[i]))
-       return true;
-    }
-
-  return false;
-}
-
-/* Returns true iff the variable V in case C is missing */
-bool
-casefilter_variable_missing (const struct casefilter *filter,
-                            const struct ccase *c,
-                            const struct variable *var)
-{
-  const union value *val = case_data (c, var) ;
-  return var_is_value_missing (var, val, filter->class);
-}
-
-/* Create a new casefilter that drops cases in which any of the
-   N_VARS variables in VARS are in the given CLASS of missing values.
-   VARS is an array of variables which if *any* of them are missing.
-   N_VARS is the size of VARS.
- */
-struct casefilter *
-casefilter_create (enum mv_class class, const struct variable **vars, int n_vars)
-{
-  int i;
-  struct casefilter * filter = xmalloc (sizeof (*filter)) ;
-
-  filter->class = class;
-  filter->vars = xnmalloc (n_vars, sizeof (*filter->vars) );
-
-  for ( i = 0 ; i < n_vars ; ++i )
-    filter->vars[i] = vars[i];
-
-  filter->n_vars = n_vars ;
-
-  return filter ;
-}
-
-
-/* Add the variables in VARS to the list of variables for which the
-   filter considers. N_VARS is the size of VARS */
-void
-casefilter_add_variables (struct casefilter *filter,
-                         const struct variable *const *vars, int n_vars)
-{
-  int i;
-
-  filter->vars = xnrealloc (filter->vars, filter->n_vars + n_vars,
-                          sizeof (*filter->vars) );
-
-  for ( i = 0 ; i < n_vars ; ++i )
-    filter->vars[i + filter->n_vars] = vars[i];
-
-  filter->n_vars += n_vars ;
-}
-
-/* Destroy the filter FILTER */
-void
-casefilter_destroy (struct casefilter *filter)
-{
-  free (filter->vars);
-  free (filter);
-}
diff --git a/src/data/casefilter.h b/src/data/casefilter.h

deleted file mode 100644 (file)

index 6afad6a..0000000
--- a/src/data/casefilter.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 2006 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#if !casefilter_h
-#define casefilter_h 1
-
-#include <stdbool.h>
-#include <data/missing-values.h>
-
-struct ccase;
-struct casefilter;
-struct variable ;
-
-/* Create a new casefilter that drops cases in which any of the
-   N_VARS variables in VARS are missing in the given CLASS.
-   VARS is an array of variables which if *any* of them are missing.
-   N_VARS is the size of VARS.
- */
-struct casefilter * casefilter_create (enum mv_class class,
-                                       const struct variable **, int);
-
-/* Add the variables in VARS to the list of variables for which the
-   filter considers. N_VARS is the size of VARS */
-void casefilter_add_variables (struct casefilter *, 
-                              const struct variable *const*, int);
-
-/* Destroy the filter FILTER */
-void casefilter_destroy (struct casefilter *); 
-
-/* Returns true iff the entire case should be skipped */
-bool casefilter_skip_case (const struct casefilter *, const struct ccase *);
-
-/* Returns true iff the variable V in case C is missing.
-   Note that this function's behaviour is independent of the set of 
-   variables  contained by the filter.
- */
-bool casefilter_variable_missing (const struct casefilter *f, 
-                                  const struct ccase *c, 
-                                  const struct variable *v);
-
-#endif
diff --git a/src/data/dictionary.c b/src/data/dictionary.c

index d77d9fdd1ed2b78620a87a6103f6f4304f0c4095..34ffc6af541eaccaba3695dc46f1244c07a0254c 100644 (file)
--- a/src/data/dictionary.c
+++ b/src/data/dictionary.c
@@ -718,7 +718,7 @@ dict_get_case_weight (const struct dictionary *d, const struct ccase *c,
        double w = case_num (c, d->weight);
        if (w < 0.0 || var_is_num_missing (d->weight, w, MV_ANY))
          w = 0.0;
-      if ( w == 0.0 && *warn_on_invalid ) {
+      if ( w == 0.0 && warn_on_invalid != NULL && *warn_on_invalid ) {
           *warn_on_invalid = false;
           msg (SW, _("At least one case in the data file had a weight value "
                      "that was user-missing, system-missing, zero, or "
diff --git a/src/data/por-file-reader.c b/src/data/por-file-reader.c

index a9b1de8193f18e34b6b30f3aee0077e8ce85e45d..2c99bde63d7b5625c093a1ace939a2c7efbe956d 100644 (file)
--- a/src/data/por-file-reader.c
+++ b/src/data/por-file-reader.c
@@ -20,29 +20,32 @@
  
  #include <config.h>
  #include "por-file-reader.h"
-#include <libpspp/message.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
+
  #include <ctype.h>
  #include <errno.h>
  #include <math.h>
  #include <setjmp.h>
-#include <libpspp/alloc.h>
+#include <stdarg.h>
  #include <stdbool.h>
-#include "case.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <data/casereader-provider.h>
+#include <data/casereader.h>
+#include <data/dictionary.h>
+#include <data/file-handle-def.h>
+#include <data/format.h>
+#include <data/missing-values.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
+#include <libpspp/alloc.h>
  #include <libpspp/compiler.h>
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "format.h"
-#include "missing-values.h"
  #include <libpspp/hash.h>
  #include <libpspp/magic.h>
+#include <libpspp/message.h>
  #include <libpspp/misc.h>
  #include <libpspp/pool.h>
  #include <libpspp/str.h>
-#include "value-labels.h"
-#include "variable.h"
  
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
@@ -71,10 +74,12 @@ struct pfm_reader
      int var_cnt;                /* Number of variables. */
      int weight_index;          /* 0-based index of weight variable, or -1. */
      int *widths;                /* Variable widths, 0 for numeric. */
-    int value_cnt;             /* Number of `value's per case. */
+    size_t value_cnt;          /* Number of `value's per case. */
      bool ok;                    /* Set false on I/O error. */
    };
  
+static struct casereader_class por_file_casereader_class;
+
  static void
  error (struct pfm_reader *r, const char *msg,...)
       PRINTF_FORMAT (2, 3)
@@ -110,11 +115,11 @@ error (struct pfm_reader *r, const char *msg, ...)
  }
  
  /* Closes portable file reader R, after we're done with it. */
-void
-pfm_close_reader (struct pfm_reader *r)
+static void
+por_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
  {
-  if (r != NULL)
-    pool_destroy (r->pool);
+  struct pfm_reader *r = r_;
+  pool_destroy (r->pool);
  }
  
  /* Read a single character into cur_char.  */
@@ -156,7 +161,7 @@ void dump_dictionary (struct dictionary *);
  /* Reads the dictionary from file with handle H, and returns it in a
     dictionary structure.  This dictionary may be modified in order to
     rename, reorder, and delete variables, etc. */
-struct pfm_reader *
+struct casereader *
  pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
                   struct pfm_read_info *info)
  {
@@ -204,10 +209,12 @@ pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
    if (!match (r, 'F'))
      error (r, _("Data record expected."));
  
-  return r;
+  r->value_cnt = dict_get_next_value_idx (*dict);
+  return casereader_create_sequential (NULL, r->value_cnt, CASENUMBER_MAX,
+                                       &por_file_casereader_class, r);
  
   error:
-  pfm_close_reader (r);
+  pool_destroy (r->pool);
    dict_destroy (*dict);
    *dict = NULL;
    return NULL;
@@ -677,19 +684,28 @@ read_value_label (struct pfm_reader *r, struct dictionary *dict)
  }
  
  /* Reads one case from portable file R into C. */
-bool
-pfm_read_case (struct pfm_reader *r, struct ccase *c)
+static bool
+por_file_casereader_read (struct casereader *reader, void *r_, struct ccase *c)
  {
+  struct pfm_reader *r = r_;
    size_t i;
    size_t idx;
  
+  case_create (c, casereader_get_value_cnt (reader));
    setjmp (r->bail_out);
-  if (!r->ok)
-    return false;
+  if (!r->ok) 
+    {
+      casereader_force_error (reader);
+      case_destroy (c);
+      return false; 
+    }
    
    /* Check for end of file. */
-  if (r->cc == 'Z')
-    return false;
+  if (r->cc == 'Z') 
+    {
+      case_destroy (c);
+      return false; 
+    }
  
    idx = 0;
    for (i = 0; i < r->var_cnt; i++) 
@@ -713,14 +729,6 @@ pfm_read_case (struct pfm_reader *r, struct ccase *c)
    return true;
  }
  
-/* Returns true if an I/O error has occurred on READER, false
-   otherwise. */
-bool
-pfm_read_error (const struct pfm_reader *reader) 
-{
-  return !reader->ok;
-}
-
  /* Returns true if FILE is an SPSS portable file,
     false otherwise. */
  bool
@@ -755,3 +763,11 @@ pfm_detect (FILE *file)
  
    return true;
  }
+
+static struct casereader_class por_file_casereader_class = 
+  {
+    por_file_casereader_read,
+    por_file_casereader_destroy,
+    NULL,
+    NULL,
+  };
diff --git a/src/data/por-file-reader.h b/src/data/por-file-reader.h

index 50ce46b5e7538fc004b93c249a19cfa902718578..812210444fbdbc38b40b84cfccd0e40aeae98812 100644 (file)
--- a/src/data/por-file-reader.h
+++ b/src/data/por-file-reader.h
@@ -37,12 +37,9 @@ struct pfm_read_info
  struct dictionary;
  struct file_handle;
  struct ccase;
-struct pfm_reader *pfm_open_reader (struct file_handle *,
+struct casereader *pfm_open_reader (struct file_handle *,
                                      struct dictionary **,
                                      struct pfm_read_info *);
-bool pfm_read_case (struct pfm_reader *, struct ccase *);
-bool pfm_read_error (const struct pfm_reader *);
-void pfm_close_reader (struct pfm_reader *);
  bool pfm_detect (FILE *);
  
  #endif /* por-file-reader.h */
diff --git a/src/data/por-file-writer.c b/src/data/por-file-writer.c

index d9f16ef41e9d296768f0b319b56820df6861a12f..bab453d0c5c989cf9d77036cb8e9dca7635dd271 100644 (file)
--- a/src/data/por-file-writer.c
+++ b/src/data/por-file-writer.c
@@ -30,13 +30,15 @@
  #include <time.h>
  #include <unistd.h>
  
-#include "case.h"
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "format.h"
-#include "missing-values.h"
-#include "value-labels.h"
-#include "variable.h"
+#include <data/case.h>
+#include <data/casewriter-provider.h>
+#include <data/casewriter.h>
+#include <data/dictionary.h>
+#include <data/file-handle-def.h>
+#include <data/format.h>
+#include <data/missing-values.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
  
  #include <libpspp/alloc.h>
  #include <libpspp/hash.h>
@@ -70,6 +72,9 @@ struct pfm_var
      int fv;                     /* Starting case index. */
    };
  
+static struct casewriter_class por_file_casewriter_class;
+
+static bool close_writer (struct pfm_writer *);
  static void buf_write (struct pfm_writer *, const void *, size_t);
  static void write_header (struct pfm_writer *);
  static void write_version_data (struct pfm_writer *);
@@ -94,7 +99,7 @@ pfm_writer_default_options (void)
  /* Writes the dictionary DICT to portable file HANDLE according
     to the given OPTS.  Returns nonzero only if successful.  DICT
     will not be modified, except to assign short names. */
-struct pfm_writer *
+struct casewriter *
  pfm_open_writer (struct file_handle *fh, struct dictionary *dict,
                   struct pfm_write_options opts)
  {
@@ -153,12 +158,12 @@ pfm_open_writer (struct file_handle *fh, struct dictionary *dict,
    write_variables (w, dict);
    write_value_labels (w, dict);
    buf_write (w, "F", 1);
-  if (pfm_write_error (w))
+  if (ferror (w->file))
      goto error;
-  return w;
+  return casewriter_create (&por_file_casewriter_class, w);
  
   error:
-  pfm_close_writer (w);
+  close_writer (w);
    return NULL;
  
   open_error:
@@ -356,6 +361,7 @@ write_variables (struct pfm_writer *w, struct dictionary *dict)
            write_value (w, &value, v);
          }
  
+      /* Write variable label. */
        if (var_get_label (v) != NULL)
          { 
            buf_write (w, "C", 1);
@@ -394,41 +400,47 @@ write_value_labels (struct pfm_writer *w, const struct dictionary *dict)
      }
  }
  
-/* Writes case ELEM to the portable file represented by H. */
-int 
-pfm_write_case (struct pfm_writer *w, const struct ccase *c)
+/* Writes case C to the portable file represented by H. */
+static void 
+por_file_casewriter_write (struct casewriter *writer, void *w_,
+                           struct ccase *c)
  {
+  struct pfm_writer *w = w_;
    int i;
  
-  if (ferror (w->file))
-    return 0;
-  
-  for (i = 0; i < w->var_cnt; i++)
+  if (!ferror (w->file)) 
      {
-      struct pfm_var *v = &w->vars[i];
+      for (i = 0; i < w->var_cnt; i++)
+        {
+          struct pfm_var *v = &w->vars[i];
        
-      if (v->width == 0)
-        write_float (w, case_num_idx (c, v->fv));
-      else
-       {
-         write_int (w, v->width);
-          buf_write (w, case_str_idx (c, v->fv), v->width);
-       }
+          if (v->width == 0)
+            write_float (w, case_num_idx (c, v->fv));
+          else
+            {
+              write_int (w, v->width);
+              buf_write (w, case_str_idx (c, v->fv), v->width);
+            }
+        } 
      }
-
-  return !pfm_write_error (w);
+  else
+    casewriter_force_error (writer);
+  
+  case_destroy (c);
  }
  
-bool
-pfm_write_error (const struct pfm_writer *w) 
+static void
+por_file_casewriter_destroy (struct casewriter *writer, void *w_) 
  {
-  return ferror (w->file);
+  struct pfm_writer *w = w_;
+  if (!close_writer (w))
+    casewriter_force_error (writer);
  }
  
  /* Closes a portable file after we're done with it.
     Returns true if successful, false if an I/O error occurred. */
-bool
-pfm_close_writer (struct pfm_writer *w)
+static bool
+close_writer (struct pfm_writer *w)
  {
    bool ok;
  
@@ -442,7 +454,7 @@ pfm_close_writer (struct pfm_writer *w)
        memset (buf, 'Z', sizeof buf);
        buf_write (w, buf, w->lc >= 80 ? 80 : 80 - w->lc);
  
-      ok = !pfm_write_error (w);
+      ok = !ferror (w->file);
        if (fclose (w->file) == EOF) 
          ok = false; 
  
@@ -844,3 +856,10 @@ format_trig_double (long double value, int base_10_precision, char output[])
    strcpy (output, "*.");
    return;
  }
+\f
+static struct casewriter_class por_file_casewriter_class = 
+  {
+    por_file_casewriter_write,
+    por_file_casewriter_destroy,
+    NULL,
+  };
diff --git a/src/data/por-file-writer.h b/src/data/por-file-writer.h

index e188288276711468f9415cebf980cadac390dcf9..6732abdea9983f6e329506136cad84d1810c1b21 100644 (file)
--- a/src/data/por-file-writer.h
+++ b/src/data/por-file-writer.h
@@ -41,12 +41,8 @@ struct pfm_write_options
  struct file_handle;
  struct dictionary;
  struct ccase;
-struct pfm_writer *pfm_open_writer (struct file_handle *, struct dictionary *,
+struct casewriter *pfm_open_writer (struct file_handle *, struct dictionary *,
                                      struct pfm_write_options);
  struct pfm_write_options pfm_writer_default_options (void);
  
-int pfm_write_case (struct pfm_writer *, const struct ccase *);
-bool pfm_write_error (const struct pfm_writer *);
-bool pfm_close_writer (struct pfm_writer *);
-
  #endif /* por-file-writer.h */
diff --git a/src/data/procedure.c b/src/data/procedure.c

index 7a9b432132c09158ce849147801f4a17bc4f64a4..46a18bb463297fdf735a6e30a2d2207c0dc45e9a 100644 (file)
--- a/src/data/procedure.c
+++ b/src/data/procedure.c
@@ -23,48 +23,50 @@
  #include <stdlib.h>
  #include <unistd.h>
  
-#include <data/case-source.h>
-#include <data/case-sink.h>
  #include <data/case.h>
-#include <data/casefile.h>
-#include <data/fastfile.h>
+#include <data/caseinit.h>
+#include <data/casereader.h>
+#include <data/casereader-provider.h>
+#include <data/casewriter.h>
  #include <data/dictionary.h>
  #include <data/file-handle-def.h>
  #include <data/procedure.h>
-#include <data/storage-stream.h>
  #include <data/transformations.h>
  #include <data/variable.h>
  #include <libpspp/alloc.h>
  #include <libpspp/deque.h>
  #include <libpspp/misc.h>
  #include <libpspp/str.h>
+#include <libpspp/taint.h>
  
  struct dataset {
-
-  /* An abstract factory which creates casefiles */
-  struct casefile_factory *cf_factory;
-
-  /* Callback which occurs when a procedure provides a new source for
-     the dataset */
-  replace_source_callback *replace_source ;
-
-  /* Callback which occurs whenever the DICT is replaced by a new one */
-  replace_dictionary_callback *replace_dict;
-
-  /* Cases are read from proc_source,
+  /* Cases are read from source,
+     their transformation variables are initialized,
       pass through permanent_trns_chain (which transforms them into
       the format described by permanent_dict),
-     are written to proc_sink,
+     are written to sink,
       pass through temporary_trns_chain (which transforms them into
       the format described by dict),
       and are finally passed to the procedure. */
-  struct case_source *proc_source;
+  struct casereader *source;
+  struct caseinit *caseinit;
    struct trns_chain *permanent_trns_chain;
    struct dictionary *permanent_dict;
-  struct case_sink *proc_sink;
+  struct casewriter *sink;
    struct trns_chain *temporary_trns_chain;
    struct dictionary *dict;
  
+  /* Callback which occurs when a procedure provides a new source for
+     the dataset */
+  replace_source_callback *replace_source ;
+
+  /* Callback which occurs whenever the DICT is replaced by a new one */
+  replace_dictionary_callback *replace_dict;
+
+  /* If true, cases are discarded instead of being written to
+     sink. */
+  bool discard_output;
+
    /* The transformation chain that the next transformation will be
       added to. */
    struct trns_chain *cur_trns_chain;
@@ -82,26 +84,22 @@ struct dataset {
    struct ccase *lag_cases;      /* Lagged cases managed by deque. */
  
    /* Procedure data. */
-  bool is_open;               /* Procedure open? */
-  struct ccase trns_case;     /* Case used for transformations. */
-  struct ccase sink_case;     /* Case written to sink, if
-                                 compacting is necessary. */
+  enum 
+    {
+      PROC_COMMITTED,
+      PROC_OPEN,
+      PROC_CLOSED 
+    }
+  proc_state;
    size_t cases_written;       /* Cases output so far. */
-  bool ok;
+  bool ok;                    /* Error status. */
  }; /* struct dataset */
  
  
  static void add_case_limit_trns (struct dataset *ds);
  static void add_filter_trns (struct dataset *ds);
  
-static bool internal_procedure (struct dataset *ds, case_func *,
-                                end_func *,
-                                void *aux);
  static void update_last_proc_invocation (struct dataset *ds);
-static void create_trns_case (struct ccase *, struct dictionary *);
-static void open_active_file (struct dataset *ds);
-static void clear_case (const struct dataset *ds, struct ccase *c);
-static bool close_active_file (struct dataset *ds);
  \f
  /* Public functions. */
  
@@ -116,146 +114,89 @@ time_of_last_procedure (struct dataset *ds)
  \f
  /* Regular procedure. */
  
-
-
-/* Reads the data from the input program and writes it to a new
-   active file.  For each case we read from the input program, we
-   do the following:
-
-   1. Execute permanent transformations.  If these drop the case,
-      start the next case from step 1.
-
-   2. Write case to replacement active file.
-
-   3. Execute temporary transformations.  If these drop the case,
-      start the next case from step 1.
-
-   4. Pass case to PROC_FUNC, passing AUX as auxiliary data.
-
-   Returns true if successful, false if an I/O error occurred. */
+/* Executes any pending transformations, if necessary.
+   This is not identical to the EXECUTE command in that it won't
+   always read the source data.  This can be important when the
+   source data is given inline within BEGIN DATA...END FILE. */
  bool
-procedure (struct dataset *ds, case_func *cf, void *aux)
+proc_execute (struct dataset *ds)
  {
-  update_last_proc_invocation (ds);
+  bool ok;
  
-  /* Optimize the trivial case where we're not going to do
-     anything with the data, by not reading the data at all. */
-  if (cf == NULL
-      && case_source_is_class (ds->proc_source, &storage_source_class)
-      && ds->proc_sink == NULL
-      && (ds->temporary_trns_chain == NULL
-          || trns_chain_is_empty (ds->temporary_trns_chain))
+  if ((ds->temporary_trns_chain == NULL
+       || trns_chain_is_empty (ds->temporary_trns_chain))
        && trns_chain_is_empty (ds->permanent_trns_chain))
      {
        ds->n_lag = 0;
+      ds->discard_output = false;
        dict_set_case_limit (ds->dict, 0);
        dict_clear_vectors (ds->dict);
        return true;
      }
  
-  return internal_procedure (ds, cf, NULL, aux);
+  ok = casereader_destroy (proc_open (ds));
+  return proc_commit (ds) && ok;
  }
-\f
-/* Multipass procedure. */
  
-struct multipass_aux_data
-  {
-    struct casefile *casefile;
+static struct casereader_class proc_casereader_class;
  
-    bool (*proc_func) (const struct casefile *, void *aux);
-    void *aux;
-  };
-
-/* Case processing function for multipass_procedure(). */
-static bool
-multipass_case_func (const struct ccase *c, void *aux_data_, const struct dataset *ds UNUSED)
-{
-  struct multipass_aux_data *aux_data = aux_data_;
-  return casefile_append (aux_data->casefile, c);
-}
-
-/* End-of-file function for multipass_procedure(). */
-static bool
-multipass_end_func (void *aux_data_, const struct dataset *ds UNUSED)
-{
-  struct multipass_aux_data *aux_data = aux_data_;
-  return (aux_data->proc_func == NULL
-          || aux_data->proc_func (aux_data->casefile, aux_data->aux));
-}
-
-/* Procedure that allows multiple passes over the input data.
-   The entire active file is passed to PROC_FUNC, with the given
-   AUX as auxiliary data, as a unit. */
-bool
-multipass_procedure (struct dataset *ds, casefile_func *proc_func,  void *aux)
+/* Opens dataset DS for reading cases with proc_read.
+   proc_commit must be called when done. */
+struct casereader *
+proc_open (struct dataset *ds)
  {
-  struct multipass_aux_data aux_data;
-  bool ok;
+  assert (ds->source != NULL);
+  assert (ds->proc_state == PROC_COMMITTED);
  
-  aux_data.casefile =
-    ds->cf_factory->create_casefile (ds->cf_factory,
-                                    dict_get_next_value_idx (ds->dict));
-
-  aux_data.proc_func = proc_func;
-  aux_data.aux = aux;
-
-  ok = internal_procedure (ds, multipass_case_func, multipass_end_func, &aux_data);
-  ok = !casefile_error (aux_data.casefile) && ok;
-
-  casefile_destroy (aux_data.casefile);
-
-  return ok;
-}
-\f
+  update_last_proc_invocation (ds);
  
-/* Procedure implementation. */
+  caseinit_mark_for_init (ds->caseinit, ds->dict);
  
-/* Executes a procedure.
-   Passes each case to CASE_FUNC.
-   Calls END_FUNC after the last case.
-   Returns true if successful, false if an I/O error occurred (or
-   if CASE_FUNC or END_FUNC ever returned false). */
-static bool
-internal_procedure (struct dataset *ds, case_func *proc,
-                   end_func *end,
-                    void *aux)
-{
-  struct ccase *c;
-  bool ok = true;
+  /* Finish up the collection of transformations. */
+  add_case_limit_trns (ds);
+  add_filter_trns (ds);
+  trns_chain_finalize (ds->cur_trns_chain);
  
-  proc_open (ds);
-  while (ok && proc_read (ds, &c))
-    if (proc != NULL)
-      ok = proc (c, aux, ds) && ok;
-  if (end != NULL)
-    ok = end (aux, ds) && ok;
+  /* Make permanent_dict refer to the dictionary right before
+     data reaches the sink. */
+  if (ds->permanent_dict == NULL)
+    ds->permanent_dict = ds->dict;
  
-  if ( proc_close (ds) && ok )
+  /* Prepare sink. */
+  if (!ds->discard_output) 
      {
-
-      return true;
+      ds->compactor = (dict_compacting_would_shrink (ds->permanent_dict)
+                       ? dict_make_compactor (ds->permanent_dict)
+                       : NULL);
+      ds->sink = autopaging_writer_create (dict_get_compacted_value_cnt (
+                                             ds->permanent_dict)); 
+    }
+  else 
+    {
+      ds->compactor = NULL;
+      ds->sink = NULL;
      }
  
-  return false;
-}
-
-/* Opens dataset DS for reading cases with proc_read.
-   proc_close must be called when done. */
-void
-proc_open (struct dataset *ds)
-{
-  assert (ds->proc_source != NULL);
-  assert (!ds->is_open);
-
-  update_last_proc_invocation (ds);
-
-  open_active_file (ds);
+  /* Allocate memory for lagged cases. */
+  ds->lag_cases = deque_init (&ds->lag, ds->n_lag, sizeof *ds->lag_cases);
  
-  ds->is_open = true;
-  create_trns_case (&ds->trns_case, ds->dict);
-  case_create (&ds->sink_case, dict_get_compacted_value_cnt (ds->dict));
+  ds->proc_state = PROC_OPEN;
    ds->cases_written = 0;
    ds->ok = true;
+
+  /* FIXME: use taint in dataset in place of `ok'? */
+  /* FIXME: for trivial cases we can just return a clone of
+     ds->source? */
+  return casereader_create_sequential (NULL,
+                                       dict_get_next_value_idx (ds->dict),
+                                       CASENUMBER_MAX,
+                                       &proc_casereader_class, ds);
+}
+
+bool
+proc_is_open (const struct dataset *ds) 
+{
+  return ds->proc_state != PROC_COMMITTED;
  }
  
  /* Reads the next case from dataset DS, which must have been
@@ -264,14 +205,15 @@ proc_open (struct dataset *ds)
     case is stored in *C.
     Return false at end of file or if a read error occurs.  In
     this case a null pointer is stored in *C. */
-bool
-proc_read (struct dataset *ds, struct ccase **c)
+static bool
+proc_casereader_read (struct casereader *reader UNUSED, void *ds_,
+                      struct ccase *c) 
  {
+  struct dataset *ds = ds_;
    enum trns_result retval = TRNS_DROP_CASE;
  
-  assert (ds->is_open);
-  *c = NULL;
-  for (;;)
+  assert (ds->proc_state == PROC_OPEN);
+  for (;;) 
      {
        size_t case_nr;
  
@@ -281,51 +223,59 @@ proc_read (struct dataset *ds, struct ccase **c)
        if (!ds->ok)
          return false;
  
-      /* Read a case from proc_source. */
-      clear_case (ds, &ds->trns_case);
-      if (!ds->proc_source->class->read (ds->proc_source, &ds->trns_case))
+      /* Read a case from source. */
+      if (!casereader_read (ds->source, c))
          return false;
+      case_resize (c, dict_get_next_value_idx (ds->dict));
+      caseinit_init_reinit_vars (ds->caseinit, c);
+      caseinit_init_left_vars (ds->caseinit, c);
  
        /* Execute permanent transformations.  */
        case_nr = ds->cases_written + 1;
        retval = trns_chain_execute (ds->permanent_trns_chain, TRNS_CONTINUE,
-                                   &ds->trns_case, &case_nr);
-      if (retval != TRNS_CONTINUE)
-        continue;
-
+                                   c, &case_nr);
+      caseinit_update_left_vars (ds->caseinit, c);
+      if (retval != TRNS_CONTINUE) 
+        {
+          case_destroy (c);
+          continue; 
+        }
+  
        /* Write case to collection of lagged cases. */
        if (ds->n_lag > 0) 
          {
            while (deque_count (&ds->lag) >= ds->n_lag)
              case_destroy (&ds->lag_cases[deque_pop_back (&ds->lag)]);
-          case_clone (&ds->lag_cases[deque_push_front (&ds->lag)],
-                      &ds->trns_case);
+          case_clone (&ds->lag_cases[deque_push_front (&ds->lag)], c);
          }
  
        /* Write case to replacement active file. */
        ds->cases_written++;
-      if (ds->proc_sink->class->write != NULL)
+      if (ds->sink != NULL) 
          {
-          if (ds->compactor != NULL)
+          struct ccase tmp;
+          if (ds->compactor != NULL) 
              {
-              dict_compactor_compact (ds->compactor, &ds->sink_case,
-                                      &ds->trns_case);
-              ds->proc_sink->class->write (ds->proc_sink, &ds->sink_case);
+              case_create (&tmp, dict_get_compacted_value_cnt (ds->dict));
+              dict_compactor_compact (ds->compactor, &tmp, c);
              }
            else
-            ds->proc_sink->class->write (ds->proc_sink, &ds->trns_case);
+            case_clone (&tmp, c);
+          casewriter_write (ds->sink, &tmp);
          }
  
        /* Execute temporary transformations. */
        if (ds->temporary_trns_chain != NULL)
          {
            retval = trns_chain_execute (ds->temporary_trns_chain, TRNS_CONTINUE,
-                                       &ds->trns_case, &ds->cases_written);
+                                       c, &ds->cases_written);
            if (retval != TRNS_CONTINUE)
-            continue;
+            {
+              case_destroy (c);
+              continue;
+            }
          }
  
-      *c = &ds->trns_case;
        return true;
      }
  }
@@ -335,120 +285,35 @@ proc_read (struct dataset *ds, struct ccase **c)
     while reading or closing the data set.
     If DS has not been opened, returns true without doing
     anything else. */
-bool
-proc_close (struct dataset *ds)
-{
-  if (!ds->is_open)
-    return true;
-
-  /* Drain any remaining cases. */
-  while (ds->ok)
-    {
-      struct ccase *c;
-      if (!proc_read (ds, &c))
-        break;
-    }
-  ds->ok = free_case_source (ds->proc_source) && ds->ok;
-  proc_set_source (ds, NULL);
-
-  case_destroy (&ds->sink_case);
-  case_destroy (&ds->trns_case);
-
-  ds->ok = close_active_file (ds) && ds->ok;
-  ds->is_open = false;
-
-  return ds->ok;
-}
-
-/* Updates last_proc_invocation. */
-static void
-update_last_proc_invocation (struct dataset *ds)
-{
-  ds->last_proc_invocation = time (NULL);
-}
-
-/* Creates and returns a case, initializing it from the vectors
-   that say which `value's need to be initialized just once, and
-   which ones need to be re-initialized before every case. */
  static void
-create_trns_case (struct ccase *trns_case, struct dictionary *dict)
+proc_casereader_destroy (struct casereader *reader, void *ds_)
  {
-  size_t var_cnt = dict_get_var_cnt (dict);
-  size_t i;
+  struct dataset *ds = ds_;
+  struct ccase c;
  
-  case_create (trns_case, dict_get_next_value_idx (dict));
-  for (i = 0; i < var_cnt; i++)
-    {
-      struct variable *v = dict_get_var (dict, i);
-      union value *value = case_data_rw (trns_case, v);
+  /* Make sure transformations happen for every input case, in
+     case they have side effects, and ensure that the replacement
+     active file gets all the cases it should. */
+  while (casereader_read (reader, &c))
+    case_destroy (&c);
  
-      if (var_is_numeric (v))
-        value->f = var_get_leave (v) ? 0.0 : SYSMIS;
-      else
-        memset (value->s, ' ', var_get_width (v));
-    }
+  ds->proc_state = PROC_CLOSED;
+  ds->ok = casereader_destroy (ds->source) && ds->ok;
+  ds->source = NULL;
+  proc_set_active_file_data (ds, NULL);
  }
  
-/* Makes all preparations for reading from the data source and writing
-   to the data sink. */
-static void
-open_active_file (struct dataset *ds)
-{
-  add_case_limit_trns (ds);
-  add_filter_trns (ds);
-
-  /* Finalize transformations. */
-  trns_chain_finalize (ds->cur_trns_chain);
-
-  /* Make permanent_dict refer to the dictionary right before
-     data reaches the sink. */
-  if (ds->permanent_dict == NULL)
-    ds->permanent_dict = ds->dict;
-
-  /* Figure out whether to compact. */
-  ds->compactor =
-    (dict_compacting_would_shrink (ds->permanent_dict)
-     ? dict_make_compactor (ds->permanent_dict)
-     : NULL);
-
-  /* Prepare sink. */
-  if (ds->proc_sink == NULL)
-    ds->proc_sink = create_case_sink (&storage_sink_class,
-                                     ds->permanent_dict,
-                                     ds->cf_factory,
-                                     NULL);
-  if (ds->proc_sink->class->open != NULL)
-    ds->proc_sink->class->open (ds->proc_sink);
-
-  /* Allocate memory for lagged cases. */
-  ds->lag_cases = deque_init (&ds->lag, ds->n_lag, sizeof *ds->lag_cases);
-}
-
-/* Clears the variables in C that need to be cleared between
-   processing cases.  */
-static void
-clear_case (const struct dataset *ds, struct ccase *c)
+/* Must return false if the source casereader, a transformation,
+   or the sink casewriter signaled an error.  (If a temporary
+   transformation signals an error, then the return value is
+   false, but the replacement active file may still be
+   untainted.) */
+bool
+proc_commit (struct dataset *ds) 
  {
-  size_t var_cnt = dict_get_var_cnt (ds->dict);
-  size_t i;
+  assert (ds->proc_state == PROC_CLOSED);
+  ds->proc_state = PROC_COMMITTED;
  
-  for (i = 0; i < var_cnt; i++)
-    {
-      struct variable *v = dict_get_var (ds->dict, i);
-      if (!var_get_leave (v))
-        {
-          if (var_is_numeric (v))
-            case_data_rw (c, v)->f = SYSMIS;
-          else
-            memset (case_data_rw (c, v)->s, ' ', var_get_width (v));
-        }
-    }
-}
-
-/* Closes the active file. */
-static bool
-close_active_file (struct dataset *ds)
-{
    /* Free memory for lagged cases. */
    while (!deque_is_empty (&ds->lag))
      case_destroy (&ds->lag_cases[deque_pop_back (&ds->lag)]);
@@ -457,23 +322,49 @@ close_active_file (struct dataset *ds)
    /* Dictionary from before TEMPORARY becomes permanent. */
    proc_cancel_temporary_transformations (ds);
  
-  /* Finish compacting. */
-  if (ds->compactor != NULL)
+  if (!ds->discard_output) 
      {
-      dict_compactor_destroy (ds->compactor);
-      dict_compact_values (ds->dict);
-      ds->compactor = NULL;
+      /* Finish compacting. */
+      if (ds->compactor != NULL) 
+        {
+          dict_compactor_destroy (ds->compactor);
+          dict_compact_values (ds->dict);
+          ds->compactor = NULL;
+        }
+    
+      /* Old data sink becomes new data source. */
+      if (ds->sink != NULL) 
+        ds->source = casewriter_make_reader (ds->sink);
      }
+  else 
+    {
+      ds->source = NULL;
+      ds->discard_output = false; 
+    }
+  ds->sink = NULL;
+  if ( ds->replace_source) ds->replace_source (ds->source);
  
-  /* Old data sink becomes new data source. */
-  if (ds->proc_sink->class->make_source != NULL)
-    proc_set_source (ds, ds->proc_sink->class->make_source (ds->proc_sink) );
-  free_case_sink (ds->proc_sink);
-  ds->proc_sink = NULL;
+  caseinit_clear (ds->caseinit);
+  caseinit_mark_as_preinited (ds->caseinit, ds->dict);
  
    dict_clear_vectors (ds->dict);
    ds->permanent_dict = NULL;
-  return proc_cancel_all_transformations (ds);
+  return proc_cancel_all_transformations (ds) && ds->ok;
+}
+
+static struct casereader_class proc_casereader_class = 
+  {
+    proc_casereader_read,
+    proc_casereader_destroy,
+    NULL,
+    NULL,
+  };
+
+/* Updates last_proc_invocation. */
+static void
+update_last_proc_invocation (struct dataset *ds)
+{
+  ds->last_proc_invocation = time (NULL);
  }
  \f
  /* Returns a pointer to the lagged case from N_BEFORE cases before the
@@ -490,218 +381,6 @@ lagged_case (const struct dataset *ds, int n_before)
      return NULL;
  }
  \f
-/* Procedure that separates the data into SPLIT FILE groups. */
-
-/* Represents auxiliary data for handling SPLIT FILE. */
-struct split_aux_data
-  {
-    struct dataset *dataset;    /* The dataset */
-    struct ccase prev_case;     /* Data in previous case. */
-
-    /* Callback functions. */
-    begin_func *begin;
-    case_func *proc;
-    end_func *end;
-    void *func_aux;
-  };
-
-static int equal_splits (const struct ccase *, const struct ccase *, const struct dataset *ds);
-static bool split_procedure_case_func (const struct ccase *c, void *, const struct dataset *);
-static bool split_procedure_end_func (void *, const struct dataset *);
-
-/* Like procedure(), but it automatically breaks the case stream
-   into SPLIT FILE break groups.  Before each group of cases with
-   identical SPLIT FILE variable values, BEGIN_FUNC is called
-   with the first case in the group.
-   Then PROC_FUNC is called for each case in the group (including
-   the first).
-   END_FUNC is called when the group is finished.  FUNC_AUX is
-   passed to each of the functions as auxiliary data.
-
-   If the active file is empty, none of BEGIN_FUNC, PROC_FUNC,
-   and END_FUNC will be called at all.
-
-   If SPLIT FILE is not in effect, then there is one break group
-   (if the active file is nonempty), and BEGIN_FUNC and END_FUNC
-   will be called once.
-
-   Returns true if successful, false if an I/O error occurred. */
-bool
-procedure_with_splits (struct dataset *ds,
-                      begin_func begin,
-                      case_func *proc,
-                       end_func *end,
-                       void *func_aux)
-{
-  struct split_aux_data split_aux;
-  bool ok;
-
-  case_nullify (&split_aux.prev_case);
-  split_aux.begin = begin;
-  split_aux.proc = proc;
-  split_aux.end = end;
-  split_aux.func_aux = func_aux;
-  split_aux.dataset = ds;
-
-  ok = internal_procedure (ds, split_procedure_case_func,
-                           split_procedure_end_func, &split_aux);
-
-  case_destroy (&split_aux.prev_case);
-
-  return ok;
-}
-
-/* Case callback used by procedure_with_splits(). */
-static bool
-split_procedure_case_func (const struct ccase *c, void *split_aux_, const struct dataset *ds)
-{
-  struct split_aux_data *split_aux = split_aux_;
-
-  /* Start a new series if needed. */
-  if (case_is_null (&split_aux->prev_case)
-      || !equal_splits (c, &split_aux->prev_case, split_aux->dataset))
-    {
-      if (!case_is_null (&split_aux->prev_case) && split_aux->end != NULL)
-        split_aux->end (split_aux->func_aux, ds);
-
-      case_destroy (&split_aux->prev_case);
-      case_clone (&split_aux->prev_case, c);
-
-      if (split_aux->begin != NULL)
-       split_aux->begin (&split_aux->prev_case, split_aux->func_aux, ds);
-    }
-
-  return (split_aux->proc == NULL
-          || split_aux->proc (c, split_aux->func_aux, ds));
-}
-
-/* End-of-file callback used by procedure_with_splits(). */
-static bool
-split_procedure_end_func (void *split_aux_, const struct dataset *ds)
-{
-  struct split_aux_data *split_aux = split_aux_;
-
-  if (!case_is_null (&split_aux->prev_case) && split_aux->end != NULL)
-    split_aux->end (split_aux->func_aux, ds);
-  return true;
-}
-
-/* Compares the SPLIT FILE variables in cases A and B and returns
-   nonzero only if they differ. */
-static int
-equal_splits (const struct ccase *a, const struct ccase *b,
-             const struct dataset *ds)
-{
-  return case_compare (a, b,
-                       dict_get_split_vars (ds->dict),
-                       dict_get_split_cnt (ds->dict)) == 0;
-}
-\f
-/* Multipass procedure that separates the data into SPLIT FILE
-   groups. */
-
-/* Represents auxiliary data for handling SPLIT FILE in a
-   multipass procedure. */
-struct multipass_split_aux_data
-  {
-    struct dataset *dataset;    /* The dataset of the split */
-    struct ccase prev_case;     /* Data in previous case. */
-    struct casefile *casefile;  /* Accumulates data for a split. */
-    split_func *split;          /* Function to call with the accumulated
-                                  data. */
-    void *func_aux;             /* Auxiliary data. */
-  };
-
-static bool multipass_split_case_func (const struct ccase *c, void *aux_, const struct dataset *);
-static bool multipass_split_end_func (void *aux_, const struct dataset *ds);
-static bool multipass_split_output (struct multipass_split_aux_data *, const struct dataset *ds);
-
-/* Returns true if successful, false if an I/O error occurred. */
-bool
-multipass_procedure_with_splits (struct dataset *ds,
-                                split_func  *split,
-                                 void *func_aux)
-{
-  struct multipass_split_aux_data aux;
-  bool ok;
-
-  case_nullify (&aux.prev_case);
-  aux.casefile = NULL;
-  aux.split = split;
-  aux.func_aux = func_aux;
-  aux.dataset = ds;
-
-  ok = internal_procedure (ds, multipass_split_case_func,
-                           multipass_split_end_func, &aux);
-  case_destroy (&aux.prev_case);
-
-  return ok;
-}
-
-/* Case callback used by multipass_procedure_with_splits(). */
-static bool
-multipass_split_case_func (const struct ccase *c, void *aux_, const struct dataset *ds)
-{
-  struct multipass_split_aux_data *aux = aux_;
-  bool ok = true;
-
-  /* Start a new series if needed. */
-  if (aux->casefile == NULL || ! equal_splits (c, &aux->prev_case, ds))
-    {
-      /* Record split values. */
-      case_destroy (&aux->prev_case);
-      case_clone (&aux->prev_case, c);
-
-      /* Pass any cases to split_func. */
-      if (aux->casefile != NULL)
-        ok = multipass_split_output (aux, ds);
-
-      /* Start a new casefile. */
-      aux->casefile =
-       ds->cf_factory->create_casefile (ds->cf_factory,
-                                        dict_get_next_value_idx (ds->dict));
-    }
-
-  return casefile_append (aux->casefile, c) && ok;
-}
-
-/* End-of-file callback used by multipass_procedure_with_splits(). */
-static bool
-multipass_split_end_func (void *aux_, const struct dataset *ds)
-{
-  struct multipass_split_aux_data *aux = aux_;
-  return (aux->casefile == NULL || multipass_split_output (aux, ds));
-}
-
-static bool
-multipass_split_output (struct multipass_split_aux_data *aux, const struct dataset *ds)
-{
-  bool ok;
-
-  assert (aux->casefile != NULL);
-  ok = aux->split (&aux->prev_case, aux->casefile, aux->func_aux, ds);
-  casefile_destroy (aux->casefile);
-  aux->casefile = NULL;
-
-  return ok;
-}
-\f
-/* Discards all the current state in preparation for a data-input
-   command like DATA LIST or GET. */
-void
-discard_variables (struct dataset *ds)
-{
-  dict_clear (ds->dict);
-  fh_set_default_handle (NULL);
-
-  ds->n_lag = 0;
-
-  free_case_source (ds->proc_source);
-  proc_set_source (ds, NULL);
-
-  proc_cancel_all_transformations (ds);
-}
-\f
  /* Returns the current set of permanent transformations,
     and clears the permanent transformations.
     For use by INPUT PROGRAM. */
@@ -804,8 +483,10 @@ proc_cancel_temporary_transformations (struct dataset *ds)
  {
    if (proc_in_temporary_transformations (ds))
      {
-      dataset_set_dict (ds, ds->permanent_dict);
+      dict_destroy (ds->dict);
+      ds->dict = ds->permanent_dict;
        ds->permanent_dict = NULL;
+      if (ds->replace_dict) ds->replace_dict (ds->dict);
  
        trns_chain_destroy (ds->temporary_trns_chain);
        ds->temporary_trns_chain = NULL;
@@ -822,6 +503,7 @@ bool
  proc_cancel_all_transformations (struct dataset *ds)
  {
    bool ok;
+  assert (ds->proc_state == PROC_COMMITTED);
    ok = trns_chain_destroy (ds->permanent_trns_chain);
    ok = trns_chain_destroy (ds->temporary_trns_chain) && ok;
    ds->permanent_trns_chain = ds->cur_trns_chain = trns_chain_create ();
@@ -831,14 +513,12 @@ proc_cancel_all_transformations (struct dataset *ds)
  \f
  /* Initializes procedure handling. */
  struct dataset *
-create_dataset (struct casefile_factory *fact,
-               replace_source_callback *rps,
-               replace_dictionary_callback *rds
-               )
+create_dataset (replace_source_callback *rps,
+               replace_dictionary_callback *rds)
  {
    struct dataset *ds = xzalloc (sizeof(*ds));
    ds->dict = dict_create ();
-  ds->cf_factory = fact;
+  ds->caseinit = caseinit_create ();
    ds->replace_source = rps;
    ds->replace_dict = rds;
    proc_cancel_all_transformations (ds);
@@ -849,60 +529,103 @@ create_dataset (struct casefile_factory *fact,
  void
  destroy_dataset (struct dataset *ds)
  {
-  discard_variables (ds);
+  proc_discard_active_file (ds);
    dict_destroy (ds->dict);
+  caseinit_destroy (ds->caseinit);
    trns_chain_destroy (ds->permanent_trns_chain);
    free (ds);
  }
  
-/* Sets SINK as the destination for procedure output from the
-   next procedure. */
+/* Causes output from the next procedure to be discarded, instead
+   of being preserved for use as input for the next procedure. */
  void
-proc_set_sink (struct dataset *ds, struct case_sink *sink)
+proc_discard_output (struct dataset *ds) 
  {
-  assert (ds->proc_sink == NULL);
-  ds->proc_sink = sink;
+  ds->discard_output = true;
+}
+
+/* Discards the active file dictionary, data, and
+   transformations. */
+void
+proc_discard_active_file (struct dataset *ds)
+{
+  assert (ds->proc_state == PROC_COMMITTED);
+
+  dict_clear (ds->dict);
+  fh_set_default_handle (NULL);
+
+  ds->n_lag = 0;
+  
+  casereader_destroy (ds->source);
+  ds->source = NULL;
+  if ( ds->replace_source) ds->replace_source (NULL);
+
+  proc_cancel_all_transformations (ds);
  }
  
  /* Sets SOURCE as the source for procedure input for the next
     procedure. */
  void
-proc_set_source (struct dataset *ds, struct case_source *source)
+proc_set_active_file (struct dataset *ds,
+                      struct casereader *source,
+                      struct dictionary *dict) 
  {
-  ds->proc_source = source;
+  assert (ds->proc_state == PROC_COMMITTED);
+  assert (ds->dict != dict);
+
+  proc_discard_active_file (ds);
  
-  if ( ds->replace_source )
-    ds->replace_source (ds->proc_source);
+  dict_destroy (ds->dict);
+  ds->dict = dict;
+  if ( ds->replace_dict) ds->replace_dict (dict);
+
+  proc_set_active_file_data (ds, source);
  }
  
-/* Returns true if a source for the next procedure has been
-   configured, false otherwise. */
+/* Replaces the active file's data by READER without replacing
+   the associated dictionary. */
  bool
-proc_has_source (const struct dataset *ds)
+proc_set_active_file_data (struct dataset *ds, struct casereader *reader) 
  {
-  return ds->proc_source != NULL;
-}
+  casereader_destroy (ds->source);
+  ds->source = reader;
+  if (ds->replace_source) ds->replace_source (reader);
  
-/* Returns the output from the previous procedure.
-   For use only immediately after executing a procedure.
-   The returned casefile is owned by the caller; it will not be
-   automatically used for the next procedure's input. */
-struct casefile *
-proc_capture_output (struct dataset *ds)
-{
-  struct casefile *casefile;
+  caseinit_clear (ds->caseinit);
+  caseinit_mark_as_preinited (ds->caseinit, ds->dict);
  
-  /* Try to make sure that this function is called immediately
-     after procedure() or a similar function. */
-  assert (ds->proc_source != NULL);
-  assert (case_source_is_class (ds->proc_source, &storage_source_class));
-  assert (trns_chain_is_empty (ds->permanent_trns_chain));
-  assert (!proc_in_temporary_transformations (ds));
+  return reader == NULL || !casereader_error (reader);
+}
  
-  casefile = storage_source_decapsulate (ds->proc_source);
-  proc_set_source (ds, NULL);
+/* Returns true if an active file data source is available, false
+   otherwise. */
+bool
+proc_has_active_file (const struct dataset *ds) 
+{
+  return ds->source != NULL;
+}
  
-  return casefile;
+/* Checks whether DS has a corrupted active file.  If so,
+   discards it and returns false.  If not, returns true without
+   doing anything. */
+bool
+dataset_end_of_command (struct dataset *ds) 
+{
+  if (ds->source != NULL) 
+    {
+      if (casereader_error (ds->source)) 
+        {
+          proc_discard_active_file (ds);
+          return false;
+        }
+      else 
+        {
+          const struct taint *taint = casereader_get_taint (ds->source);
+          taint_reset_successor_taint ((struct taint *) taint);
+          assert (!taint_has_tainted_successor (taint));
+        }
+    }
+  return true; 
  }
  \f
  static trns_proc_func case_limit_trns_proc;
@@ -983,32 +706,8 @@ dataset_dict (const struct dataset *ds)
    return ds->dict;
  }
  
-
-/* Set or replace dataset DS's dictionary with DICT.
-   The old dictionary is destroyed */
-void
-dataset_set_dict (struct dataset *ds, struct dictionary *dict)
-{
-  struct dictionary *old_dict = ds->dict;
-
-  dict_copy_callbacks (dict, ds->dict);
-  ds->dict = dict;
-
-  if ( ds->replace_dict )
-    ds->replace_dict (dict);
-
-  dict_destroy (old_dict);
-}
-
  void 
  dataset_need_lag (struct dataset *ds, int n_before)
  {
    ds->n_lag = MAX (ds->n_lag, n_before);
  }
-
-struct casefile_factory *
-dataset_get_casefile_factory (const struct dataset *ds)
-{
-  return ds->cf_factory;
-}
-
diff --git a/src/data/procedure.h b/src/data/procedure.h

index 0e8d286bde1c481cb6b6b166009cdd7aa4f45dda..7803e0e75f089aa507a5534e54963191cb7a3f1e 100644 (file)
--- a/src/data/procedure.h
+++ b/src/data/procedure.h
@@ -23,16 +23,11 @@
  #include <stdbool.h>
  
  #include <data/transformations.h>
-#include <data/casefile-factory.h>
  #include <libpspp/compiler.h>
  
-struct ccase;
-struct casefile;
-struct case_sink;
-struct case_source;
-
+struct casereader;
  struct dataset;
-
+struct dictionary;
  \f
  /* Transformations. */
  
@@ -44,10 +39,6 @@ void add_transformation_with_finalizer (struct dataset *ds,
                                          trns_free_func *, void *);
  size_t next_transformation (const struct dataset *ds);
  
-void discard_variables (struct dataset *ds);
-
-
-
  bool proc_cancel_all_transformations (struct dataset *ds);
  struct trns_chain *proc_capture_transformations (struct dataset *ds);
  
@@ -59,63 +50,35 @@ bool proc_cancel_temporary_transformations (struct dataset *ds);
  /* Procedures. */
  
  struct dictionary ;
-typedef void  replace_source_callback (struct case_source *);
+typedef void  replace_source_callback (struct casereader *);
  typedef void  replace_dictionary_callback (struct dictionary *);
  
  
-struct dataset * create_dataset (struct casefile_factory *fact,
-                                replace_source_callback *,
-                                replace_dictionary_callback *
-                                );
+struct dataset * create_dataset (replace_source_callback *,
+                                replace_dictionary_callback *);
  
  void destroy_dataset (struct dataset *);
  
-struct casefile_factory *dataset_get_casefile_factory (const struct dataset *);
-
-void proc_set_source (struct dataset *ds, struct case_source *);
-bool proc_has_source (const struct dataset *ds);
-
-void proc_set_sink (struct dataset *ds, struct case_sink *);
-struct casefile *proc_capture_output (struct dataset *ds);
-
-typedef bool casefile_func (const struct casefile *, void *);
-typedef bool case_func (const struct ccase *, void *, const struct dataset *);
-typedef void begin_func (const struct ccase *, void *, const struct dataset*);
+void proc_discard_active_file (struct dataset *);
+void proc_set_active_file (struct dataset *,
+                           struct casereader *, struct dictionary *);
+bool proc_set_active_file_data (struct dataset *, struct casereader *);
+bool proc_has_active_file (const struct dataset *ds);
  
-typedef bool end_func (void *, const struct dataset *);
-
-typedef bool split_func (const struct ccase *, const struct casefile *,
-                             void *, const struct dataset *);
-
-
-
-bool procedure (struct dataset *ds, case_func *, void *aux)  WARN_UNUSED_RESULT;
-
-bool procedure_with_splits (struct dataset *ds, 
-                           begin_func *,
-                            case_func *,
-                           end_func *,
-                            void *aux)
-     WARN_UNUSED_RESULT;
-bool multipass_procedure (struct dataset *ds, casefile_func *, void  *aux)
-     WARN_UNUSED_RESULT;
-bool multipass_procedure_with_splits (struct dataset *ds,
-                                          split_func *,
-                                          void *aux)
-     WARN_UNUSED_RESULT;
+void proc_discard_output (struct dataset *ds);
  
+bool proc_execute (struct dataset *ds);
  time_t time_of_last_procedure (struct dataset *ds);
  
-void proc_open (struct dataset *);
-bool proc_read (struct dataset *, struct ccase **);
-bool proc_close (struct dataset *);
+struct casereader *proc_open (struct dataset *);
+bool proc_is_open (const struct dataset *);
+bool proc_commit (struct dataset *);
+
+bool dataset_end_of_command (struct dataset *);
  \f
+struct dictionary *dataset_dict (const struct dataset *ds);
  
  struct ccase *lagged_case (const struct dataset *ds, int n_before);
-
-inline struct dictionary *dataset_dict (const struct dataset *ds);
-inline void dataset_set_dict ( struct dataset *ds, struct dictionary *dict);
-
  void dataset_need_lag (struct dataset *ds, int n_before);
  
  #endif /* procedure.h */
diff --git a/src/data/scratch-handle.c b/src/data/scratch-handle.c

index 2a08dbb051fc33cbd8eb378a55bf92a7df206763..0ac56433812d83040e7d507bb8e98f29f82d5611 100644 (file)
--- a/src/data/scratch-handle.c
+++ b/src/data/scratch-handle.c
@@ -18,9 +18,9 @@
  
  #include <config.h>
  #include <stdlib.h>
-#include "scratch-handle.h"
-#include "casefile.h"
-#include "dictionary.h"
+#include <data/casereader.h>
+#include <data/scratch-handle.h>
+#include <data/dictionary.h>
  
  /* Destroys HANDLE. */
  void
@@ -29,7 +29,7 @@ scratch_handle_destroy (struct scratch_handle *handle)
    if (handle != NULL) 
      {
        dict_destroy (handle->dictionary);
-      casefile_destroy (handle->casefile);
+      casereader_destroy (handle->casereader);
        free (handle);
      }
  }
diff --git a/src/data/scratch-handle.h b/src/data/scratch-handle.h

index 8a82650bb5f07708a2760760ae86e4bd38ba3df0..e70150a26e4394a794876754c93da3910705eb59 100644 (file)
--- a/src/data/scratch-handle.h
+++ b/src/data/scratch-handle.h
@@ -25,7 +25,7 @@
  struct scratch_handle 
    {
      struct dictionary *dictionary;      /* Dictionary. */
-    struct casefile *casefile;          /* Cases. */
+    struct casereader *casereader;      /* Cases. */
    };
  
  void scratch_handle_destroy (struct scratch_handle *);
diff --git a/src/data/scratch-reader.c b/src/data/scratch-reader.c

index 4459126bf71cb2b1da46f6a8865f0aae82001b2d..17c1aeaad0d4822d37f2aae0958b611750d0dd48 100644 (file)
--- a/src/data/scratch-reader.c
+++ b/src/data/scratch-reader.c
@@ -22,11 +22,11 @@
  
  #include <stdlib.h>
  
-#include "casefile.h"
  #include "dictionary.h"
  #include "file-handle-def.h"
  #include "scratch-handle.h"
  #include <data/case.h>
+#include <data/casereader.h>
  #include <libpspp/message.h>
  
  #include "xalloc.h"
@@ -34,31 +34,20 @@
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
  
-/* A reader for a scratch file. */
-struct scratch_reader 
-  {
-    struct file_handle *fh;             /* Underlying file handle. */
-    struct casereader *casereader;      /* Case reader. */
-  };
-
  /* Opens FH, which must have referent type FH_REF_SCRATCH, and
     returns a scratch_reader for it, or a null pointer on
     failure.  Stores the dictionary for the scratch file into
-   *DICT.
-
-   If you use an any_reader instead, then your code can be more
-   flexible without being any harder to write. */
-struct scratch_reader *
+   *DICT. */
+struct casereader *
  scratch_reader_open (struct file_handle *fh, struct dictionary **dict)
  {
    struct scratch_handle *sh;
-  struct scratch_reader *reader;
    
    if (!fh_open (fh, FH_REF_SCRATCH, "scratch file", "rs"))
      return NULL;
    
    sh = fh_get_scratch_handle (fh);
-  if (sh == NULL) 
+  if (sh == NULL || sh->casereader == NULL) 
      {
        msg (SE, _("Scratch file handle %s has not yet been written, "
                   "using SAVE or another procedure, so it cannot yet "
@@ -68,42 +57,5 @@ scratch_reader_open (struct file_handle *fh, struct dictionary **dict)
      }
  
    *dict = dict_clone (sh->dictionary);
-  reader = xmalloc (sizeof *reader);
-  reader->fh = fh;
-  reader->casereader = casefile_get_reader (sh->casefile, NULL);
-  return reader;
-}
-
-/* Reads a case from READER and copies it into C.
-   Returns true if successful, false on error or at end of file. */
-bool
-scratch_reader_read_case (struct scratch_reader *reader, struct ccase *c)
-{
-  struct ccase tmp;
-  if (casereader_read (reader->casereader, &tmp)) 
-    {
-      case_copy (c, 0, &tmp, 0,
-                 casefile_get_value_cnt (
-                   casereader_get_casefile (reader->casereader)));
-      case_destroy (&tmp);
-      return true;
-    }
-  else
-    return false;
-}
-
-/* Returns true if an I/O error occurred on READER, false otherwise. */
-bool
-scratch_reader_error (const struct scratch_reader *reader) 
-{
-  return casefile_error (casereader_get_casefile (reader->casereader));
-}
-
-/* Closes READER. */
-void
-scratch_reader_close (struct scratch_reader *reader) 
-{
-  fh_close (reader->fh, "scratch file", "rs");
-  casereader_destroy (reader->casereader);
-  free (reader);
+  return casereader_clone (sh->casereader);
  }
diff --git a/src/data/scratch-reader.h b/src/data/scratch-reader.h

index 7d5f28b4ce0f955018b2684216e8ff72d115b377..16490df73039f892bd33f55050c309de3604fe12 100644 (file)
--- a/src/data/scratch-reader.h
+++ b/src/data/scratch-reader.h
@@ -24,10 +24,7 @@
  struct dictionary;
  struct file_handle;
  struct ccase;
-struct scratch_reader *scratch_reader_open (struct file_handle *,
-                                            struct dictionary **);
-bool scratch_reader_read_case (struct scratch_reader *, struct ccase *);
-bool scratch_reader_error (const struct scratch_reader *);
-void scratch_reader_close (struct scratch_reader *);
+struct casereader *scratch_reader_open (struct file_handle *,
+                                        struct dictionary **);
  
  #endif /* scratch-reader.h */
diff --git a/src/data/scratch-writer.c b/src/data/scratch-writer.c

index 42e77aa7b47869f5483c542f68142e4892cf541a..67e371a499d5e4b6a4116ad22089cbe8a31a5c56 100644 (file)
--- a/src/data/scratch-writer.c
+++ b/src/data/scratch-writer.c
@@ -17,14 +17,21 @@
     02110-1301, USA. */
  
  #include <config.h>
+
  #include "scratch-writer.h"
+
  #include <stdlib.h>
-#include "case.h"
-#include "casefile.h"
-#include "fastfile.h"
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "scratch-handle.h"
+
+#include <data/case.h>
+#include <data/casereader.h>
+#include <data/casewriter-provider.h>
+#include <data/casewriter.h>
+#include <data/dictionary.h>
+#include <data/file-handle-def.h>
+#include <data/scratch-handle.h>
+#include <libpspp/compiler.h>
+#include <libpspp/taint.h>
+
  #include "xalloc.h"
  
  /* A scratch file writer. */
@@ -33,16 +40,16 @@ struct scratch_writer
      struct scratch_handle *handle;      /* Underlying scratch handle. */
      struct file_handle *fh;             /* Underlying file handle. */
      struct dict_compactor *compactor;   /* Compacts into handle->dictionary. */
+    struct casewriter *subwriter;       /* Data output. */
    };
  
+static struct casewriter_class scratch_writer_casewriter_class;
+
  /* Opens FH, which must have referent type FH_REF_SCRATCH, and
     returns a scratch_writer for it, or a null pointer on
     failure.  Cases stored in the scratch_writer will be expected
-   to be drawn from DICTIONARY.
-
-   If you use an any_writer instead, then your code can be more
-   flexible without being any harder to write. */
-struct scratch_writer *
+   to be drawn from DICTIONARY. */
+struct casewriter *
  scratch_writer_open (struct file_handle *fh,
                       const struct dictionary *dictionary) 
  {
@@ -50,6 +57,7 @@ scratch_writer_open (struct file_handle *fh,
    struct scratch_writer *writer;
    struct dictionary *scratch_dict;
    struct dict_compactor *compactor;
+  struct casewriter *casewriter;
  
    if (!fh_open (fh, FH_REF_SCRATCH, "scratch file", "we"))
      return NULL;
@@ -72,50 +80,57 @@ scratch_writer_open (struct file_handle *fh,
    /* Create new contents. */
    sh = xmalloc (sizeof *sh);
    sh->dictionary = scratch_dict;
-  sh->casefile = fastfile_create (dict_get_next_value_idx (sh->dictionary));
+  sh->casereader = NULL;
  
    /* Create writer. */
    writer = xmalloc (sizeof *writer);
    writer->handle = sh;
    writer->fh = fh;
    writer->compactor = compactor;
+  writer->subwriter = autopaging_writer_create (dict_get_next_value_idx (
+                                               scratch_dict));
  
    fh_set_scratch_handle (fh, sh);
-  return writer;
+  casewriter = casewriter_create (&scratch_writer_casewriter_class, writer);
+  taint_propagate (casewriter_get_taint (writer->subwriter),
+                   casewriter_get_taint (casewriter));
+  return casewriter;
  }
  
  /* Writes case C to WRITER. */
-bool
-scratch_writer_write_case (struct scratch_writer *writer,
-                           const struct ccase *c) 
+static void
+scratch_writer_casewriter_write (struct casewriter *w UNUSED, void *writer_,
+                                 struct ccase *c) 
  {
+  struct scratch_writer *writer = writer_;
    struct scratch_handle *handle = writer->handle;
+  struct ccase tmp;
    if (writer->compactor) 
      {
-      struct ccase tmp_case;
-      case_create (&tmp_case, dict_get_next_value_idx (handle->dictionary));
-      dict_compactor_compact (writer->compactor, &tmp_case, c);
-      return casefile_append_xfer (handle->casefile, &tmp_case);
+      case_create (&tmp, dict_get_next_value_idx (handle->dictionary));
+      dict_compactor_compact (writer->compactor, &tmp, c);
+      case_destroy (c);
      }
-  else 
-    return casefile_append (handle->casefile, c);
-}
-
-/* Returns true if an I/O error occurred on WRITER, false otherwise. */
-bool
-scratch_writer_error (const struct scratch_writer *writer) 
-{
-  return casefile_error (writer->handle->casefile);
+  else
+    case_move (&tmp, c);
+  casewriter_write (writer->subwriter, &tmp);
  }
  
-/* Closes WRITER.
-   Returns true if successful, false if an I/O error occurred on WRITER. */
-bool
-scratch_writer_close (struct scratch_writer *writer) 
+/* Closes WRITER. */
+static void
+scratch_writer_casewriter_destroy (struct casewriter *w UNUSED, void *writer_) 
  {
-  struct casefile *cf = writer->handle->casefile;
-  bool ok = casefile_error (cf);
+  struct scratch_writer *writer = writer_;
+  struct casereader *reader = casewriter_make_reader (writer->subwriter);
+  if (!casereader_error (reader))
+    writer->handle->casereader = reader;
    fh_close (writer->fh, "scratch file", "we");
    free (writer);
-  return ok;
  }
+
+static struct casewriter_class scratch_writer_casewriter_class = 
+  {
+    scratch_writer_casewriter_write,
+    scratch_writer_casewriter_destroy,
+    NULL,
+  };
diff --git a/src/data/scratch-writer.h b/src/data/scratch-writer.h

index d4832a4fee76e1670b622e6be1511b16262bcc84..95b0bddbe7922bc71a05ca6b4b8e0f7344231d1d 100644 (file)
--- a/src/data/scratch-writer.h
+++ b/src/data/scratch-writer.h
@@ -24,10 +24,7 @@
  struct dictionary;
  struct file_handle;
  struct ccase;
-struct scratch_writer *scratch_writer_open (struct file_handle *,
-                                            const struct dictionary *);
-bool scratch_writer_write_case (struct scratch_writer *, const struct ccase *);
-bool scratch_writer_error (const struct scratch_writer *);
-bool scratch_writer_close (struct scratch_writer *);
+struct casewriter *scratch_writer_open (struct file_handle *,
+                                        const struct dictionary *);
  
  #endif /* scratch-writer.h */
diff --git a/src/data/storage-stream.c b/src/data/storage-stream.c

deleted file mode 100644 (file)

index 4ff939c..0000000
--- a/src/data/storage-stream.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#include <config.h>
-
-#include <data/storage-stream.h>
-
-#include <assert.h>
-#include <stdlib.h>
-
-#include <data/case-sink.h>
-#include <data/case-source.h>
-#include <data/case.h>
-#include <data/casefile.h> 
-#include <data/casefile-factory.h>
-
-#include "xalloc.h"
-
-/* Storage sink. */
-
-/* Information about storage sink. */
-struct storage_sink_info 
-  {
-    struct casefile *casefile;  /* Storage. */
-  };
-
-static struct storage_sink_info *
-get_storage_sink_info (struct case_sink *sink) 
-{
-  assert (sink->class == &storage_sink_class);
-  return sink->aux;
-}
-
-/* Initializes a storage sink. */
-static void
-storage_sink_open (struct case_sink *sink)
-{
-  struct storage_sink_info *info;
-
-  sink->aux = info = xmalloc (sizeof *info);
-  info->casefile = sink->factory->create_casefile (sink->factory,
-                                                  sink->value_cnt);
-}
-
-/* Writes case C to the storage sink SINK.
-   Returns true if successful, false if an I/O error occurred. */
-static bool
-storage_sink_write (struct case_sink *sink, const struct ccase *c)
-{
-  struct storage_sink_info *info = get_storage_sink_info (sink);
-  return casefile_append (info->casefile, c);
-}
-
-/* Destroys internal data in SINK. */
-static void
-storage_sink_destroy (struct case_sink *sink)
-{
-  struct storage_sink_info *info = get_storage_sink_info (sink);
-  casefile_destroy (info->casefile);
-  free (info); 
-}
-
-/* Closes the sink and returns a storage source to read back the
-   written data. */
-static struct case_source *
-storage_sink_make_source (struct case_sink *sink) 
-{
-  struct storage_sink_info *info = get_storage_sink_info (sink);
-  struct case_source *source = storage_source_create (info->casefile);
-  info->casefile = NULL;
-  return source;
-}
-
-/* Storage sink. */
-const struct case_sink_class storage_sink_class = 
-  {
-    "storage",
-    storage_sink_open,
-    storage_sink_write,
-    storage_sink_destroy,
-    storage_sink_make_source,
-  };
-\f
-/* Storage source. */
-
-struct storage_source_info 
-  {
-    struct casefile *casefile;  /* Storage. */
-    struct casereader *reader;  /* Reader. */
-  };
-
-static struct storage_source_info *
-get_storage_source_info (const struct case_source *source) 
-{
-  assert (source->class == &storage_source_class);
-  return source->aux;
-}
-
-/* Returns the number of cases that will be read by
-   storage_source_read(). */
-static int
-storage_source_count (const struct case_source *source) 
-{
-  struct storage_source_info *info = get_storage_source_info (source);
-  return casefile_get_case_cnt (info->casefile);
-}
-
-/* Reads one case into OUTPUT_CASE.
-   Returns true if successful, false at end of file or if an
-   I/O error occurred. */
-static bool
-storage_source_read (struct case_source *source, struct ccase *output_case)
-{
-  struct storage_source_info *info = get_storage_source_info (source);
-  struct ccase casefile_case;
-
-  if (info->reader == NULL)
-    info->reader = casefile_get_reader (info->casefile, NULL);
-
-  if (casereader_read (info->reader, &casefile_case))
-    {
-      case_copy (output_case, 0,
-                 &casefile_case, 0,
-                 casefile_get_value_cnt (info->casefile));
-      return true;
-    }
-  else
-    return false;
-}
-
-/* Destroys the source.
-   Returns true if successful read, false if an I/O occurred
-   during destruction or previously. */
-static bool
-storage_source_destroy (struct case_source *source)
-{
-  struct storage_source_info *info = get_storage_source_info (source);
-  bool ok = true;
-  if (info->casefile)
-    {
-      ok = !casefile_error (info->casefile);
-      casefile_destroy (info->casefile); 
-    }
-  free (info);
-  return ok;
-}
-
-/* Returns the casefile encapsulated by SOURCE. */
-struct casefile *
-storage_source_get_casefile (struct case_source *source) 
-{
-  struct storage_source_info *info = get_storage_source_info (source);
-  return info->casefile;
-}
-
-/* Destroys SOURCE and returns the casefile that it
-   encapsulated. */
-struct casefile *
-storage_source_decapsulate (struct case_source *source) 
-{
-  struct storage_source_info *info = get_storage_source_info (source);
-  struct casefile *casefile = info->casefile;
-  assert (info->reader == NULL);
-  info->casefile = NULL;
-  free_case_source (source);
-  return casefile;
-}
-
-/* Creates and returns a new storage source that encapsulates
-   CASEFILE. */
-struct case_source *
-storage_source_create (struct casefile *casefile)
-{
-  struct storage_source_info *info;
-
-  info = xmalloc (sizeof *info);
-  info->casefile = casefile;
-  info->reader = NULL;
-
-  return create_case_source (&storage_source_class, info);
-}
-
-/* Storage source. */
-const struct case_source_class storage_source_class = 
-  {
-    "storage",
-    storage_source_count,
-    storage_source_read,
-    storage_source_destroy,
-  };
diff --git a/src/data/storage-stream.h b/src/data/storage-stream.h

deleted file mode 100644 (file)

index 980b1c7..0000000
--- a/src/data/storage-stream.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#ifndef STORAGE_STREAM_H
-#define STORAGE_STREAM_H 1
-
-struct case_source;
-struct casefile;
-
-extern const struct case_sink_class storage_sink_class;
-extern const struct case_source_class storage_source_class;
-
-struct casefile *storage_source_get_casefile (struct case_source *);
-struct casefile *storage_source_decapsulate (struct case_source *);
-struct case_source *storage_source_create (struct casefile *);
-
-#endif /* storage-stream.h */
diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c

index 401e3e27451e1d9c42b15e4eca19433d541439ef..9589747ea0bc2775d98c20372a804ea9a2ff2214 100644 (file)
--- a/src/data/sys-file-reader.c
+++ b/src/data/sys-file-reader.c
@@ -18,8 +18,8 @@
  
  #include <config.h>
  
-#include "sys-file-reader.h"
-#include "sys-file-private.h"
+#include <data/sys-file-reader.h>
+#include <data/sys-file-private.h>
  
  #include <errno.h>
  #include <float.h>
@@ -38,15 +38,17 @@
  #include <libpspp/hash.h>
  #include <libpspp/array.h>
  
-#include "case.h"
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "file-name.h"
-#include "format.h"
-#include "missing-values.h"
-#include "value-labels.h"
-#include "variable.h"
-#include "value.h"
+#include <data/case.h>
+#include <data/casereader-provider.h>
+#include <data/casereader.h>
+#include <data/dictionary.h>
+#include <data/file-handle-def.h>
+#include <data/file-name.h>
+#include <data/format.h>
+#include <data/missing-values.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
+#include <data/value.h>
  
  #include "c-ctype.h"
  #include "inttostr.h"
@@ -69,11 +71,12 @@ struct sfm_reader
      struct file_handle *fh;     /* File handle. */
      FILE *file;                 /* File stream. */
      bool error;                 /* I/O or corruption error? */
+    size_t value_cnt;           /* Number of "union value"s in struct case. */
  
      /* File format. */
      enum integer_format integer_format; /* On-disk integer format. */
      enum float_format float_format; /* On-disk floating point format. */
-    int value_cnt;             /* Number of 8-byte units per case. */
+    int flt64_cnt;             /* Number of 8-byte units per case. */
      struct sfm_var *vars;       /* Variables. */
      size_t var_cnt;             /* Number of variables. */
      bool has_long_var_names;    /* File has a long variable name map */
@@ -93,6 +96,10 @@ struct sfm_var
      int case_index;             /* Index into case. */
    };
  
+static struct casereader_class sys_file_casereader_class;
+
+static bool close_reader (struct sfm_reader *);
+
  static struct variable **make_var_by_value_idx (struct sfm_reader *,
                                                  struct dictionary *);
  static struct variable *lookup_var_by_value_idx (struct sfm_reader *,
@@ -125,6 +132,8 @@ static bool read_variable_to_value_map (struct sfm_reader *,
                                          struct variable_to_value_map *,
                                          struct variable **var, char **value,
                                          int *warning_cnt);
+
+static bool close_reader (struct sfm_reader *r);
  \f
  /* Dictionary reader. */
  
@@ -135,7 +144,7 @@ enum which_format
    };
  
  static void read_header (struct sfm_reader *, struct dictionary *,
-                         int *weight_idx, int *claimed_value_cnt,
+                         int *weight_idx, int *claimed_flt64_cnt,
                           struct sfm_read_info *);
  static void read_variable_record (struct sfm_reader *, struct dictionary *,
                                    int *format_warning_cnt);
@@ -169,7 +178,7 @@ static void read_long_string_map (struct sfm_reader *,
     reading.  Reads the system file's dictionary into *DICT.
     If INFO is non-null, then it receives additional info about the
     system file. */
-struct sfm_reader *
+struct casereader *
  sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
                   struct sfm_read_info *info)
  {
@@ -177,7 +186,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
    struct variable **var_by_value_idx;
    int format_warning_cnt = 0;
    int weight_idx;
-  int claimed_value_cnt;
+  int claimed_flt64_cnt;
    int rec_type;
    size_t i;
  
@@ -191,14 +200,14 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
    r->fh = fh;
    r->file = fn_open (fh_get_file_name (fh), "rb");
    r->error = false;
-  r->value_cnt = 0;
+  r->flt64_cnt = 0;
    r->has_vls = false;
    r->has_long_var_names = false;
    r->opcode_idx = sizeof r->opcodes;
  
    if (setjmp (r->bail_out)) 
      {
-      sfm_close_reader (r);
+      close_reader (r);
        dict_destroy (*dict);
        *dict = NULL;
        return NULL;
@@ -212,7 +221,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
      }
  
    /* Read header. */
-  read_header (r, *dict, &weight_idx, &claimed_value_cnt, info);
+  read_header (r, *dict, &weight_idx, &claimed_flt64_cnt, info);
  
    /* Read all the variable definition records. */
    rec_type = read_int32 (r);
@@ -280,10 +289,10 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
    /* Read record 999 data, which is just filler. */
    read_int32 (r);
  
-  if (claimed_value_cnt != -1 && claimed_value_cnt != r->value_cnt)
+  if (claimed_flt64_cnt != -1 && claimed_flt64_cnt != r->flt64_cnt)
      sys_warn (r, _("File header claims %d variable positions but "
                     "%d were read from file."),
-              claimed_value_cnt, r->value_cnt);
+              claimed_flt64_cnt, r->flt64_cnt);
  
    /* Create an index of dictionary variable widths for
       sfm_read_case to use.  We cannot use the `struct variable's
@@ -300,36 +309,48 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
      }
  
    pool_free (r->pool, var_by_value_idx);
-  return r;
+  r->value_cnt = dict_get_next_value_idx (*dict);
+  return casereader_create_sequential (NULL, r->value_cnt, CASENUMBER_MAX,
+                                       &sys_file_casereader_class, r);
  }
  
-/* Closes a system file after we're done with it. */
-void
-sfm_close_reader (struct sfm_reader *r)
+/* Closes a system file after we're done with it.
+   Returns true if an I/O error has occurred on READER, false
+   otherwise. */
+static bool
+close_reader (struct sfm_reader *r)
  {
+  bool error;
+
    if (r == NULL)
-    return;
+    return true;
  
    if (r->file)
      {
-      if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
-        msg (ME, _("Error closing system file \"%s\": %s."),
-             fh_get_file_name (r->fh), strerror (errno));
+      if (fn_close (fh_get_file_name (r->fh), r->file) == EOF) 
+        {
+          msg (ME, _("Error closing system file \"%s\": %s."),
+               fh_get_file_name (r->fh), strerror (errno));
+          r->error = true;
+        }
        r->file = NULL;
      }
  
    if (r->fh != NULL)
      fh_close (r->fh, "system file", "rs");
  
+  error = r->error;
    pool_destroy (r->pool);
+
+  return !error;
  }
  
-/* Returns true if an I/O error has occurred on READER, false
-   otherwise. */
-bool
-sfm_read_error (const struct sfm_reader *reader) 
+/* Destroys READER. */
+static void
+sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) 
  {
-  return reader->error;
+  struct sfm_reader *r = r_;
+  close_reader (r);
  }
  
  /* Returns true if FILE is an SPSS system file,
@@ -350,13 +371,13 @@ sfm_detect (FILE *file)
     Sets DICT's file label to the system file's label.
     Sets *WEIGHT_IDX to 0 if the system file is unweighted,
     or to the value index of the weight variable otherwise.
-   Sets *CLAIMED_VALUE_CNT to the number of values that the file
+   Sets *CLAIMED_FLT64_CNT to the number of values that the file
     claims to have (although it is not always correct).
     If INFO is non-null, initializes *INFO with header
     information. */   
  static void
  read_header (struct sfm_reader *r, struct dictionary *dict,
-             int *weight_idx, int *claimed_value_cnt,
+             int *weight_idx, int *claimed_flt64_cnt,
               struct sfm_read_info *info)
  {
    char rec_type[5];
@@ -385,9 +406,9 @@ read_header (struct sfm_reader *r, struct dictionary *dict,
            && r->integer_format != INTEGER_LSB_FIRST))
      sys_error (r, _("This is not an SPSS system file."));
  
-  *claimed_value_cnt = read_int32 (r);
-  if (*claimed_value_cnt < 0 || *claimed_value_cnt > INT_MAX / 16)
-    *claimed_value_cnt = -1;
+  *claimed_flt64_cnt = read_int32 (r);
+  if (*claimed_flt64_cnt < 0 || *claimed_flt64_cnt > INT_MAX / 16)
+    *claimed_flt64_cnt = -1;
  
    r->compressed = read_int32 (r) != 0;
  
@@ -564,7 +585,7 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict,
    /* Account for values.
       Skip long string continuation records, if any. */
    nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
-  r->value_cnt += nv;
+  r->flt64_cnt += nv;
    if (width > 8)
      {
        int i;
@@ -1110,29 +1131,39 @@ static bool read_compressed_number (struct sfm_reader *, double *);
  static bool read_compressed_string (struct sfm_reader *, char *);
  static bool read_whole_strings (struct sfm_reader *, char *, size_t);
  
-/* Reads one case from READER's file into C.  Returns nonzero
-   only if successful. */
-int
-sfm_read_case (struct sfm_reader *r, struct ccase *c)
+/* Reads one case from READER's file into C.  Returns true only
+   if successful. */
+static bool
+sys_file_casereader_read (struct casereader *reader, void *r_,
+                          struct ccase *c)
  {
+  struct sfm_reader *r = r_;
    if (r->error)
-    return 0;
+    return false;
  
-  if (setjmp (r->bail_out))
-    return 0;
+  case_create (c, r->value_cnt);
+  if (setjmp (r->bail_out)) 
+    {
+      casereader_force_error (reader);
+      case_destroy (c);
+      return false; 
+    }
  
    if (!r->compressed && sizeof (double) == 8 && !r->has_vls) 
      {
        /* Fast path.  Read the whole case directly. */
        if (!try_read_bytes (r, case_data_all_rw (c),
-                         sizeof (union value) * r->value_cnt))
-        return 0;
+                           sizeof (union value) * r->flt64_cnt)) 
+        {
+          case_destroy (c);
+          return false; 
+        }
  
        /* Convert floating point numbers to native format if needed. */
        if (r->float_format != FLOAT_NATIVE_DOUBLE) 
          {
            int i;
-          
+
            for (i = 0; i < r->var_cnt; i++) 
              if (r->vars[i].width == 0) 
                {
@@ -1140,7 +1171,7 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c)
                  float_convert (r->float_format, d, FLOAT_NATIVE_DOUBLE, d); 
                }
          }
-      return 1;
+      return true;
      }
    else 
      {
@@ -1194,12 +1225,13 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c)
                  }
              }
          }
-      return 1; 
+      return true; 
  
      eof:
+      case_destroy (c);
        if (i != 0)
          partial_record (r);
-      return 0;
+      return false;
      }
  }
  
@@ -1386,7 +1418,7 @@ make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict)
    int i;
  
    var_by_value_idx = pool_nmalloc (r->pool,
-                                   r->value_cnt, sizeof *var_by_value_idx);
+                                   r->flt64_cnt, sizeof *var_by_value_idx);
    for (i = 0; i < dict_get_var_cnt (dict); i++) 
      {
        struct variable *v = dict_get_var (dict, i);
@@ -1397,7 +1429,7 @@ make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict)
        for (j = 1; j < nv; j++)
          var_by_value_idx[value_idx++] = NULL;
      }
-  assert (value_idx == r->value_cnt);
+  assert (value_idx == r->flt64_cnt);
  
    return var_by_value_idx;
  }
@@ -1411,9 +1443,9 @@ lookup_var_by_value_idx (struct sfm_reader *r,
  {
    struct variable *var;
    
-  if (value_idx < 1 || value_idx > r->value_cnt)
+  if (value_idx < 1 || value_idx > r->flt64_cnt)
      sys_error (r, _("Variable index %d not in valid range 1...%d."),
-               value_idx, r->value_cnt);
+               value_idx, r->flt64_cnt);
  
    var = var_by_value_idx[value_idx - 1];
    if (var == NULL)
@@ -1686,4 +1718,11 @@ flt64_to_double (const struct sfm_reader *r, const uint8_t flt64[8])
      float_convert (r->float_format, flt64, FLOAT_NATIVE_DOUBLE, &x);
    return x;
  }
-
+\f
+static struct casereader_class sys_file_casereader_class = 
+  {
+    sys_file_casereader_read,
+    sys_file_casereader_destroy,
+    NULL,
+    NULL,
+  };
diff --git a/src/data/sys-file-reader.h b/src/data/sys-file-reader.h

index 6a3e7029b529d44169db53191eb4134d612493c2..337404757c42e5cdd00e9b64afe44c9203488e4b 100644 (file)
--- a/src/data/sys-file-reader.h
+++ b/src/data/sys-file-reader.h
@@ -42,12 +42,9 @@ struct sfm_read_info
  struct dictionary;
  struct file_handle;
  struct ccase;
-struct sfm_reader *sfm_open_reader (struct file_handle *,
+struct casereader *sfm_open_reader (struct file_handle *,
                                      struct dictionary **,
                                      struct sfm_read_info *);
-int sfm_read_case (struct sfm_reader *, struct ccase *);
-bool sfm_read_error (const struct sfm_reader *);
-void sfm_close_reader (struct sfm_reader *);
  bool sfm_detect (FILE *);
  
  #endif /* sys-file-reader.h */
diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c

index 808a307ec7dc53dc68e26baed0f6fd6c45d41262..c7b988da04b4497e988bae9b8fa734f157d3c681 100644 (file)
--- a/src/data/sys-file-writer.c
+++ b/src/data/sys-file-writer.c
@@ -37,14 +37,16 @@
  #include <libpspp/str.h>
  #include <libpspp/version.h>
  
-#include "case.h"
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "format.h"
-#include "missing-values.h"
-#include "settings.h"
-#include "value-labels.h"
-#include "variable.h"
+#include <data/case.h>
+#include <data/casewriter-provider.h>
+#include <data/casewriter.h>
+#include <data/dictionary.h>
+#include <data/file-handle-def.h>
+#include <data/format.h>
+#include <data/missing-values.h>
+#include <data/settings.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
  
  #include "minmax.h"
  
@@ -144,6 +146,8 @@ struct sfm_var
      size_t flt64_cnt;           /* Number of flt64 elements. */
    };
  
+static struct casewriter_class sys_file_casewriter_class;
+
  static char *append_string_max (char *, const char *, const char *);
  static void write_header (struct sfm_writer *, const struct dictionary *);
  static void buf_write (struct sfm_writer *, const void *, size_t);
@@ -164,6 +168,9 @@ static void write_variable_display_parameters (struct sfm_writer *w,
  
  static void write_documents (struct sfm_writer *, const struct dictionary *);
  
+bool write_error (const struct sfm_writer *);
+bool close_writer (struct sfm_writer *);
+
  static inline int
  var_flt64_cnt (const struct variable *v) 
  {
@@ -219,7 +226,7 @@ cont_var_name(const char *sn, int idx)
     No reference to D is retained, so it may be modified or
     destroyed at will after this function returns.  D is not
     modified by this function, except to assign short names. */
-struct sfm_writer *
+struct casewriter *
  sfm_open_writer (struct file_handle *fh, struct dictionary *d,
                   struct sfm_write_options opts)
  {
@@ -374,13 +381,13 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d,
        w->y = (unsigned char *) w->ptr;
      }
  
-  if (sfm_write_error (w))
+  if (write_error (w))
      goto error;
    
-  return w;
+  return casewriter_create (&sys_file_casewriter_class, w);
  
   error:
-  sfm_close_writer (w);
+  close_writer (w);
    return NULL;
  
   open_error:
@@ -925,13 +932,18 @@ ensure_buf_space (struct sfm_writer *w)
  
  static void write_compressed_data (struct sfm_writer *w, const flt64 *elem);
  
-/* Writes case C to system file W.
-   Returns 1 if successful, 0 if an I/O error occurred. */
-bool
-sfm_write_case (struct sfm_writer *w, const struct ccase *c)
+/* Writes case C to system file W. */
+static void
+sys_file_casewriter_write (struct casewriter *writer, void *w_,
+                           struct ccase *c)
  {
-  if (ferror (w->file))
-    return 0;
+  struct sfm_writer *w = w_;
+  if (ferror (w->file)) 
+    {
+      casewriter_force_error (writer);
+      case_destroy (c);
+      return; 
+    }
    
    w->case_cnt++;
  
@@ -990,8 +1002,16 @@ sfm_write_case (struct sfm_writer *w, const struct ccase *c)
  
        local_free (bounce); 
      }
-  
-  return !sfm_write_error (w);
+
+  case_destroy (c);
+}
+
+static void
+sys_file_casewriter_destroy (struct casewriter *writer, void *w_) 
+{
+  struct sfm_writer *w = w_;
+  if (!close_writer (w))
+    casewriter_force_error (writer);
  }
  
  static void
@@ -1057,7 +1077,7 @@ write_compressed_data (struct sfm_writer *w, const flt64 *elem)
  
  /* Returns true if an I/O error has occurred on WRITER, false otherwise. */
  bool
-sfm_write_error (const struct sfm_writer *writer)
+write_error (const struct sfm_writer *writer)
  {
    return ferror (writer->file);
  }
@@ -1065,7 +1085,7 @@ sfm_write_error (const struct sfm_writer *writer)
  /* Closes a system file after we're done with it.
     Returns true if successful, false if an I/O error occurred. */
  bool
-sfm_close_writer (struct sfm_writer *w)
+close_writer (struct sfm_writer *w)
  {
    bool ok;
    
@@ -1083,7 +1103,7 @@ sfm_close_writer (struct sfm_writer *w)
          }
        fflush (w->file);
  
-      ok = !sfm_write_error (w);
+      ok = !write_error (w);
  
        /* Seek back to the beginning and update the number of cases.
           This is just a courtesy to later readers, so there's no need
@@ -1112,3 +1132,10 @@ sfm_close_writer (struct sfm_writer *w)
  
    return ok;
  }
+\f
+static struct casewriter_class sys_file_casewriter_class = 
+  {
+    sys_file_casewriter_write,
+    sys_file_casewriter_destroy,
+    NULL,
+  };
diff --git a/src/data/sys-file-writer.h b/src/data/sys-file-writer.h

index 9773230e9791b628c14ba65b246008caac8b2ec8..9e850156141ca4cc0abcbf4197578f01d5d888f4 100644 (file)
--- a/src/data/sys-file-writer.h
+++ b/src/data/sys-file-writer.h
@@ -34,12 +34,8 @@ struct sfm_write_options
  struct file_handle;
  struct dictionary;
  struct ccase;
-struct sfm_writer *sfm_open_writer (struct file_handle *, struct dictionary *,
+struct casewriter *sfm_open_writer (struct file_handle *, struct dictionary *,
                                      struct sfm_write_options);
  struct sfm_write_options sfm_writer_default_options (void);
  
-bool sfm_write_case (struct sfm_writer *, const struct ccase *);
-bool sfm_write_error (const struct sfm_writer *);
-bool sfm_close_writer (struct sfm_writer *);
-
  #endif /* sys-file-writer.h */
diff --git a/src/language/ChangeLog b/src/language/ChangeLog

index 0a8068834a064d6487f56367519bee90ddd2c423..8949ff75c1b989290dd343e45de6772fa925804b 100644 (file)
--- a/src/language/ChangeLog
+++ b/src/language/ChangeLog
@@ -1,6 +1,7 @@
  2007-06-06  Ben Pfaff  <blp@gnu.org>
  
-       * command.def: Add DEBUG DATASHEET command.
+       * command.def: Add DEBUG DATASHEET command.  Remove DEBUG CASEFILE
+       command.
  
  2007-03-18  Ben Pfaff  <blp@gnu.org>
  
diff --git a/src/language/command.c b/src/language/command.c

index a4821db45c14efe7a850d5daa36200c045102b12..49ec22fdb8904546cd10d081d89ee31fed15a19a 100644 (file)
--- a/src/language/command.c
+++ b/src/language/command.c
@@ -26,6 +26,7 @@
  #include <errno.h>
  #include <unistd.h>
  
+#include <data/casereader.h>
  #include <data/dictionary.h>
  #include <data/procedure.h>
  #include <data/settings.h>
@@ -147,8 +148,11 @@ cmd_parse_in_state (struct lexer *lexer, struct dataset *ds,
    if (cmd_result_is_failure (result))
      lex_discard_rest_of_command (lexer);
  
+  assert (!proc_is_open (ds));
    unset_cmd_algorithm ();
    dict_clear_aux (dataset_dict (ds));
+  if (!dataset_end_of_command (ds))
+    result = CMD_CASCADING_FAILURE;
  
    return result;
  }
@@ -158,7 +162,7 @@ cmd_parse (struct lexer *lexer, struct dataset *ds)
  {
    const struct dictionary *dict = dataset_dict (ds);
    return cmd_parse_in_state (lexer, ds,
-                            proc_has_source (ds) &&
+                            proc_has_active_file (ds) &&
                              dict_get_var_cnt (dict) > 0 ?
                              CMD_STATE_DATA : CMD_STATE_INITIAL);
  }
@@ -203,7 +207,7 @@ do_parse_command (struct lexer *lexer, struct dataset *ds, enum cmd_state state)
      {
        msg (SE, _("%s may be used only in enhanced syntax mode."),
             command->name);
-       return CMD_FAILURE;
+      return CMD_FAILURE;
      }
    else if (!in_correct_state (command, state)) 
      {
@@ -687,7 +691,8 @@ cmd_n_of_cases (struct lexer *lexer, struct dataset *ds)
  int
  cmd_execute (struct lexer *lexer, struct dataset *ds)
  {
-  if (!procedure (ds, NULL, NULL))
+  bool ok = casereader_destroy (proc_open (ds));
+  if (!proc_commit (ds) || !ok)
      return CMD_CASCADING_FAILURE;
    return lex_end_of_command (lexer);
  }
@@ -840,7 +845,7 @@ cmd_host (struct lexer *lexer, struct dataset *ds UNUSED)
  int
  cmd_new_file (struct lexer *lexer, struct dataset *ds)
  {
-  discard_variables (ds);
+  proc_discard_active_file (ds);
  
    return lex_end_of_command (lexer);
  }
diff --git a/src/language/command.def b/src/language/command.def

index ef8b385b3aa423bb79b30d889aa633f92bdf074f..a0e974e1db9e8029aa8bb527b24c38c168df23e1 100644 (file)
--- a/src/language/command.def
+++ b/src/language/command.def
@@ -128,7 +128,6 @@ DEF_CMD (S_INPUT_PROGRAM, 0, "END INPUT PROGRAM", cmd_end_input_program)
  DEF_CMD (S_INPUT_PROGRAM, 0, "REREAD", cmd_reread)
  
  /* Commands for testing PSPP. */
-DEF_CMD (S_ANY, F_TESTING, "DEBUG CASEFILE", cmd_debug_casefile)
  DEF_CMD (S_ANY, F_TESTING, "DEBUG DATASHEET", cmd_debug_datasheet)
  DEF_CMD (S_ANY, F_TESTING, "DEBUG EVALUATE", cmd_debug_evaluate)
  DEF_CMD (S_ANY, F_TESTING, "DEBUG MOMENTS", cmd_debug_moments)
diff --git a/src/language/control/do-if.c b/src/language/control/do-if.c

index cf3a8b0e9844b6e4f50391e097702fdff5318b90..7de886efd3e8dbe182caf8db73d1c82d8d1539a3 100644 (file)
--- a/src/language/control/do-if.c
+++ b/src/language/control/do-if.c
@@ -21,6 +21,7 @@
  #include <stdlib.h>
  
  #include "control-stack.h"
+#include <data/case.h>
  #include <data/procedure.h>
  #include <data/transformations.h>
  #include <data/value.h>
diff --git a/src/language/data-io/ChangeLog b/src/language/data-io/ChangeLog

index 7d3297410473335bf8d1b68380924bc5b64cc051..0c1f3791f7df36e098cd844b1200e070de0978ac 100644 (file)
--- a/src/language/data-io/ChangeLog
+++ b/src/language/data-io/ChangeLog
@@ -1,3 +1,18 @@
+2007-06-06  Ben Pfaff  <blp@gnu.org>
+
+       Adapt case sources, sinks, and clients of procedure code to the
+       new infrastructure.
+       
+       * data-list.c: Make DATA LIST into a casereader.
+
+       * get.c: Change GET, IMPORT, SAVE, EXPORT to use casereaders,
+       casewriters.
+
+       * inpt-pgm.c: Use caseinit code.  Turn INPUT PROGRAM into a
+       casereader.
+
+       * list.q: Adapt to new procedure code.
+
  2007-05-06  Ben Pfaff  <blp@gnu.org>
  
         Abstract the documents within a dictionary a little better.
diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c

index 9594ead0759b888427a184e5c75322e55cac1eb7..07a130c3e9d9dbe0003045bc49f2f98a8fb2089c 100644 (file)
--- a/src/language/data-io/data-list.c
+++ b/src/language/data-io/data-list.c
@@ -23,10 +23,10 @@
  #include <stdio.h>
  #include <stdlib.h>
  
-#include <data/case-source.h>
  #include <data/case.h>
-#include <data/case-source.h>
  #include <data/data-in.h>
+#include <data/casereader.h>
+#include <data/casereader-provider.h>
  #include <data/dictionary.h>
  #include <data/format.h>
  #include <data/procedure.h>
@@ -99,9 +99,10 @@ struct data_list_pgm
      int record_cnt;             /* Number of records. */
      struct string delims;       /* Field delimiters. */
      int skip_records;           /* Records to skip before first case. */
+    size_t value_cnt;           /* Number of `union value's in case. */
    };
  
-static const struct case_source_class data_list_source_class;
+static const struct casereader_class data_list_casereader_class;
  
  static bool parse_fixed (struct lexer *, struct dictionary *dict, 
                          struct pool *tmp_pool, struct data_list_pgm *);
@@ -118,15 +119,14 @@ static trns_proc_func data_list_trns_proc;
  int
  cmd_data_list (struct lexer *lexer, struct dataset *ds)
  {
-  struct dictionary *dict = dataset_dict (ds);
+  struct dictionary *dict;
    struct data_list_pgm *dls;
    int table = -1;                /* Print table if nonzero, -1=undecided. */
    struct file_handle *fh = fh_inline_file ();
    struct pool *tmp_pool;
    bool ok;
  
-  if (!in_input_program ())
-    discard_variables (ds);
+  dict = in_input_program () ? dataset_dict (ds) : dict_create ();
  
    dls = pool_create_container (struct data_list_pgm, pool);
    ll_init (&dls->specs);
@@ -178,9 +178,9 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds)
           lex_match (lexer, '=');
           if (!lex_force_id (lexer))
             goto error;
-         dls->end = dict_lookup_var (dataset_dict (ds), lex_tokid (lexer));
+         dls->end = dict_lookup_var (dict, lex_tokid (lexer));
           if (!dls->end) 
-            dls->end = dict_create_var_assert (dataset_dict (ds), lex_tokid (lexer), 0);
+            dls->end = dict_create_var_assert (dict, lex_tokid (lexer), 0);
           lex_get (lexer);
         }
        else if (lex_token (lexer) == T_ID)
@@ -273,10 +273,19 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds)
    if (dls->reader == NULL)
      goto error;
  
+  dls->value_cnt = dict_get_next_value_idx (dict);
+
    if (in_input_program ())
      add_transformation (ds, data_list_trns_proc, data_list_trns_free, dls);
    else 
-    proc_set_source (ds, create_case_source (&data_list_source_class, dls));
+    {
+      struct casereader *reader;
+      reader = casereader_create_sequential (NULL,
+                                             dict_get_next_value_idx (dict),
+                                             -1, &data_list_casereader_class,
+                                             dls);
+      proc_set_active_file (ds, reader, dict); 
+    }
  
    pool_destroy (tmp_pool);
  
@@ -810,10 +819,12 @@ data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED)
     Returns true if successful, false at end of file or if an
     I/O error occurred. */
  static bool
-data_list_source_read (struct case_source *source, struct ccase *c)
+data_list_casereader_read (struct casereader *reader UNUSED, void *dls_,
+                           struct ccase *c)
  {
-  struct data_list_pgm *dls = source->aux;
-
+  struct data_list_pgm *dls = dls_;
+  bool ok;
+  
    /* Skip the requested number of records before reading the
       first case. */
    while (dls->skip_records > 0) 
@@ -823,26 +834,28 @@ data_list_source_read (struct case_source *source, struct ccase *c)
        dfm_forward_record (dls->reader);
        dls->skip_records--;
      }
-  
-  return read_from_data_list (dls, c);
+
+  case_create (c, dls->value_cnt);
+  ok = read_from_data_list (dls, c);
+  if (!ok)
+    case_destroy (c);
+  return ok;
  }
  
-/* Destroys the source.
-   Returns true if successful read, false if an I/O occurred
-   during destruction or previously. */
-static bool
-data_list_source_destroy (struct case_source *source)
+/* Destroys the casereader. */
+static void
+data_list_casereader_destroy (struct casereader *reader UNUSED, void *dls_)
  {
-  struct data_list_pgm *dls = source->aux;
-  bool ok = !dfm_reader_error (dls->reader);
+  struct data_list_pgm *dls = dls_;
+  if (dfm_reader_error (dls->reader))
+    casereader_force_error (reader);
    data_list_trns_free (dls);
-  return ok;
  }
  
-static const struct case_source_class data_list_source_class = 
+static const struct casereader_class data_list_casereader_class =
    {
-    "DATA LIST",
+    data_list_casereader_read,
+    data_list_casereader_destroy,
+    NULL,
      NULL,
-    data_list_source_read,
-    data_list_source_destroy,
    };
diff --git a/src/language/data-io/data-reader.c b/src/language/data-io/data-reader.c

index fa96922329fbc29b315cf0feb169d2e4cc1ca7b3..0650a54670c8c40bcdcfb7492cb335ab118cea97 100644 (file)
--- a/src/language/data-io/data-reader.c
+++ b/src/language/data-io/data-reader.c
@@ -25,6 +25,7 @@
  #include <stdio.h>
  #include <stdlib.h>
  
+#include <data/casereader.h>
  #include <data/file-handle-def.h>
  #include <data/file-name.h>
  #include <data/procedure.h>
@@ -444,8 +445,8 @@ cmd_begin_data (struct lexer *lexer, struct dataset *ds)
  
    /* Input procedure reads from inline file. */
    prompt_set_style (PROMPT_DATA);
-  ok = procedure (ds, NULL, NULL);
-
+  casereader_destroy (proc_open (ds));
+  ok = proc_commit (ds);
    dfm_close_reader (r);
  
    return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
diff --git a/src/language/data-io/get.c b/src/language/data-io/get.c

index 32b3764ea6f98829c6fe19585dea4e0ea6e52afe..d06a8e6594fcfeffec9df1cd96eab3c93c4a054e 100644 (file)
--- a/src/language/data-io/get.c
+++ b/src/language/data-io/get.c
@@ -22,17 +22,14 @@
  
  #include <data/any-reader.h>
  #include <data/any-writer.h>
-#include <data/case-sink.h>
-#include <data/case-source.h>
  #include <data/case.h>
-#include <data/casefile.h>
-#include <data/fastfile.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
  #include <data/format.h>
  #include <data/dictionary.h>
  #include <data/por-file-writer.h>
  #include <data/procedure.h>
  #include <data/settings.h>
-#include <data/storage-stream.h>
  #include <data/sys-file-writer.h>
  #include <data/transformations.h>
  #include <data/value-labels.h>
@@ -46,9 +43,9 @@
  #include <libpspp/compiler.h>
  #include <libpspp/hash.h>
  #include <libpspp/message.h>
-#include <libpspp/message.h>
  #include <libpspp/misc.h>
  #include <libpspp/str.h>
+#include <libpspp/taint.h>
  
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
@@ -71,25 +68,18 @@ enum reader_command
      IMPORT_CMD
    };
  
-/* Case reader input program. */
-struct case_reader_pgm 
-  {
-    struct any_reader *reader;  /* File reader. */
-    struct case_map *map;       /* Map from file dict to active file dict. */
-    struct ccase bounce;        /* Bounce buffer. */
-  };
-
-static const struct case_source_class case_reader_source_class;
-
-static void case_reader_pgm_free (struct case_reader_pgm *);
+static void get_translate_case (const struct ccase *, struct ccase *,
+                                void *map_);
+static bool get_destroy_case_map (void *map_);
  
  /* Parses a GET or IMPORT command. */
  static int
  parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command type)
  {
-  struct case_reader_pgm *pgm = NULL;
+  struct casereader *reader = NULL;
    struct file_handle *fh = NULL;
    struct dictionary *dict = NULL;
+  struct case_map *map = NULL;
  
    for (;;)
      {
@@ -127,17 +117,10 @@ parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command
        goto error;
      }
                
-  discard_variables (ds);
-
-  pgm = xmalloc (sizeof *pgm);
-  pgm->reader = any_reader_open (fh, &dict);
-  pgm->map = NULL;
-  case_nullify (&pgm->bounce);
-  if (pgm->reader == NULL)
+  reader = any_reader_open (fh, &dict);
+  if (reader == NULL)
      goto error;
  
-  case_create (&pgm->bounce, dict_get_next_value_idx (dict));
-
    start_case_map (dict);
  
    while (lex_token (lexer) != '.')
@@ -147,71 +130,40 @@ parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command
          goto error;
      }
  
-  pgm->map = finish_case_map (dict);
-
-  dataset_set_dict (ds, dict);
-
-  proc_set_source (ds,
-                  create_case_source (&case_reader_source_class, pgm));
+  map = finish_case_map (dict);
+  if (map != NULL)
+    reader = casereader_create_translator (reader,
+                                           dict_get_next_value_idx (dict),
+                                           get_translate_case,
+                                           get_destroy_case_map,
+                                           map);
+  
+  proc_set_active_file (ds, reader, dict);
  
    return CMD_SUCCESS;
  
   error:
-  case_reader_pgm_free (pgm);
+  casereader_destroy (reader);
    if (dict != NULL)
      dict_destroy (dict);
    return CMD_CASCADING_FAILURE;
  }
  
-/* Frees a struct case_reader_pgm. */
  static void
-case_reader_pgm_free (struct case_reader_pgm *pgm) 
+get_translate_case (const struct ccase *input, struct ccase *output,
+                    void *map_) 
  {
-  if (pgm != NULL) 
-    {
-      any_reader_close (pgm->reader);
-      destroy_case_map (pgm->map);
-      case_destroy (&pgm->bounce);
-      free (pgm);
-    }
+  struct case_map *map = map_;
+  map_case (map, input, output);
  }
  
-/* Reads one case into C.
-   Returns true if successful, false at end of file or if an
-   I/O error occurred. */
  static bool
-case_reader_source_read (struct case_source *source, struct ccase *c)
+get_destroy_case_map (void *map_) 
  {
-  struct case_reader_pgm *pgm = source->aux;
-  if (any_reader_read (pgm->reader, pgm->map == NULL ? c : &pgm->bounce)) 
-    {
-      if (pgm->map != NULL)
-        map_case (pgm->map, &pgm->bounce, c);
-      return true;
-    }
-  else  
-    return false;
-}
-
-/* Destroys the source.
-   Returns true if successful read, false if an I/O occurred
-   during destruction or previously. */
-static bool
-case_reader_source_destroy (struct case_source *source)
-{
-  struct case_reader_pgm *pgm = source->aux;
-  bool ok = !any_reader_error (pgm->reader); 
-  case_reader_pgm_free (pgm);
-  return ok;
+  struct case_map *map = map_;
+  destroy_case_map (map);
+  return true;
  }
-
-static const struct case_source_class case_reader_source_class =
-  {
-    "case reader",
-    NULL,
-    case_reader_source_read,
-    case_reader_source_destroy,
-  };
  \f
  /* GET. */
  int
@@ -243,30 +195,6 @@ enum command_type
      PROC_CMD            /* Procedure. */
    };
  
-/* File writer plus a case map. */
-struct case_writer
-  {
-    struct any_writer *writer;  /* File writer. */
-    struct case_map *map;       /* Map to output file dictionary
-                                   (null pointer for identity mapping). */
-    struct ccase bounce;        /* Bounce buffer for mapping (if needed). */
-  };
-
-/* Destroys AW. */
-static bool
-case_writer_destroy (struct case_writer *aw)
-{
-  bool ok = true;
-  if (aw != NULL) 
-    {
-      ok = any_writer_close (aw->writer);
-      destroy_case_map (aw->map);
-      case_destroy (&aw->bounce);
-      free (aw);
-    }
-  return ok;
-}
-
  /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
     WRITER_TYPE identifies the type of file to write,
     and COMMAND_TYPE identifies the type of command.
@@ -277,7 +205,7 @@ case_writer_destroy (struct case_writer *aw)
     included.
  
     On failure, returns a null pointer. */
-static struct case_writer *
+static struct casewriter *
  parse_write_command (struct lexer *lexer, struct dataset *ds, 
                      enum writer_type writer_type,
                       enum command_type command_type,
@@ -286,7 +214,8 @@ parse_write_command (struct lexer *lexer, struct dataset *ds,
    /* Common data. */
    struct file_handle *handle; /* Output file. */
    struct dictionary *dict;    /* Dictionary for output file. */
-  struct case_writer *aw;      /* Writer. */  
+  struct casewriter *writer;  /* Writer. */
+  struct case_map *map;       /* Map from input data to data for writer. */
  
    /* Common options. */
    bool print_map;             /* Print map?  TODO. */
@@ -303,10 +232,8 @@ parse_write_command (struct lexer *lexer, struct dataset *ds,
  
    handle = NULL;
    dict = dict_clone (dataset_dict (ds));
-  aw = xmalloc (sizeof *aw);
-  aw->writer = NULL;
-  aw->map = NULL;
-  case_nullify (&aw->bounce);
+  writer = NULL;
+  map = NULL;
    print_map = false;
    print_short_names = false;
    sysfile_opts = sfm_writer_default_options ();
@@ -412,49 +339,40 @@ parse_write_command (struct lexer *lexer, struct dataset *ds,
      }
  
    dict_compact_values (dict);
-  aw->map = finish_case_map (dict);
-  if (aw->map != NULL)
-    case_create (&aw->bounce, dict_get_next_value_idx (dict));
  
    if (fh_get_referent (handle) == FH_REF_FILE) 
      {
        switch (writer_type) 
          {
          case SYSFILE_WRITER:
-          aw->writer = any_writer_from_sfm_writer (
-            sfm_open_writer (handle, dict, sysfile_opts));
+          writer = sfm_open_writer (handle, dict, sysfile_opts);
            break;
          case PORFILE_WRITER:
-          aw->writer = any_writer_from_pfm_writer (
-            pfm_open_writer (handle, dict, porfile_opts));
+          writer = pfm_open_writer (handle, dict, porfile_opts);
            break;
          }
      }
    else
-    aw->writer = any_writer_open (handle, dict);
-  if (aw->writer == NULL)
+    writer = any_writer_open (handle, dict);
+  if (writer == NULL)
      goto error;
+
+  map = finish_case_map (dict);
+  if (map != NULL)
+    writer = casewriter_create_translator (writer,
+                                           get_translate_case,
+                                           get_destroy_case_map,
+                                           map);
    dict_destroy (dict);
    
-  return aw;
+  return writer;
  
   error:
-  case_writer_destroy (aw);
+  casewriter_destroy (writer);
    dict_destroy (dict);
+  destroy_case_map (map);
    return NULL;
  }
-
-/* Writes case C to writer AW. */
-static bool
-case_writer_write_case (struct case_writer *aw, const struct ccase *c) 
-{
-  if (aw->map != NULL) 
-    {
-      map_case (aw->map, c, &aw->bounce);
-      c = &aw->bounce; 
-    }
-  return any_writer_write (aw->writer, c);
-}
  \f
  /* SAVE and EXPORT. */
  
@@ -464,26 +382,24 @@ parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type wri
  {
    bool retain_unselected;
    struct variable *saved_filter_variable;
-  struct case_writer *aw;
-  struct ccase *c;
-  bool ok = true;
+  struct casewriter *output;
+  bool ok;
  
-  aw = parse_write_command (lexer, ds, writer_type, PROC_CMD, &retain_unselected);
-  if (aw == NULL) 
+  output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
+                                &retain_unselected);
+  if (output == NULL) 
      return CMD_CASCADING_FAILURE;
  
    saved_filter_variable = dict_get_filter (dataset_dict (ds));
    if (retain_unselected) 
      dict_set_filter (dataset_dict (ds), NULL);
  
-  proc_open (ds);
-  while (ok && proc_read (ds, &c))
-    ok = case_writer_write_case (aw, c);
-  ok = proc_close (ds) && ok;
+  casereader_transfer (proc_open (ds), output);
+  ok = casewriter_destroy (output);
+  ok = proc_commit (ds) && ok;
  
    dict_set_filter (dataset_dict (ds), saved_filter_variable);
  
-  case_writer_destroy (aw);
    return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
  }
  
@@ -504,7 +420,7 @@ cmd_export (struct lexer *lexer, struct dataset *ds)
  /* Transformation. */
  struct output_trns 
    {
-    struct case_writer *aw;      /* Writer. */
+    struct casewriter *writer;          /* Writer. */
    };
  
  static trns_proc_func output_trns_proc;
@@ -515,8 +431,8 @@ static int
  parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type) 
  {
    struct output_trns *t = xmalloc (sizeof *t);
-  t->aw = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
-  if (t->aw == NULL) 
+  t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
+  if (t->writer == NULL) 
      {
        free (t);
        return CMD_CASCADING_FAILURE;
@@ -531,7 +447,9 @@ static int
  output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
  {
    struct output_trns *t = trns_;
-  case_writer_write_case (t->aw, c);
+  struct ccase tmp;
+  case_clone (&tmp, c);
+  casewriter_write (t->writer, &tmp);
    return TRNS_CONTINUE;
  }
  
@@ -541,13 +459,8 @@ static bool
  output_trns_free (void *trns_)
  {
    struct output_trns *t = trns_;
-  bool ok = true;
-
-  if (t != NULL)
-    {
-      ok = case_writer_destroy (t->aw);
-      free (t);
-    }
+  bool ok = casewriter_destroy (t->writer);
+  free (t);
    return ok;
  }
  
@@ -748,15 +661,15 @@ struct mtf_file
      int type;                  /* One of MTF_*. */
      const struct variable **by;        /* List of BY variables for this file. */
      struct file_handle *handle; /* File handle. */
-    struct any_reader *reader;  /* File reader. */
+    struct casereader *reader;  /* File reader. */
      struct dictionary *dict;   /* Dictionary from system file. */
+    bool active_file;           /* Active file? */
  
      /* IN subcommand. */
      char *in_name;              /* Variable name. */
      struct variable *in_var;    /* Variable (in master dictionary). */
  
-    struct ccase input_storage; /* Input record storage. */
-    struct ccase *input;        /* Input record. */
+    struct ccase input;         /* Input record. */
    };
  
  /* MATCH FILES procedure. */
@@ -773,7 +686,7 @@ struct mtf_proc
      char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
      
      struct dictionary *dict;    /* Dictionary of output file. */
-    struct casefile *output;    /* MATCH FILES output. */
+    struct casewriter *output;  /* MATCH FILES output. */
      struct ccase mtf_case;      /* Case used for output. */
  
      unsigned seq_num;           /* Have we initialized this variable? */
@@ -782,11 +695,12 @@ struct mtf_proc
  
  static bool mtf_free (struct mtf_proc *);
  static bool mtf_close_file (struct mtf_file *);
+static bool mtf_close_all_files (struct mtf_proc *);
  static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
-static bool mtf_read_records (struct mtf_proc *, struct dataset *);
+static bool mtf_read_records (struct mtf_proc *);
  static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
  
-static bool mtf_processing (struct mtf_proc *, struct dataset *);
+static bool mtf_processing (struct mtf_proc *);
  
  static char *var_type_description (struct variable *);
  
@@ -804,6 +718,7 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds)
    bool used_active_file = false;
    bool saw_table = false;
    bool saw_in = false;
+  bool open_active_file = false;
  
    mtf.head = mtf.tail = NULL;
    mtf.by_cnt = 0;
@@ -840,8 +755,8 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds)
        file->dict = NULL;
        file->in_name = NULL;
        file->in_var = NULL;
-      case_nullify (&file->input_storage);
-      file->input = &file->input_storage;
+      file->active_file = false;
+      case_nullify (&file->input);
  
        /* FILEs go first, then TABLEs. */
        if (file->type == MTF_TABLE || first_table == NULL)
@@ -881,7 +796,7 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds)
              }
            used_active_file = true;
  
-          if (!proc_has_source (ds))
+          if (!proc_has_active_file (ds))
              {
                msg (SE, _("Cannot specify the active file since no active "
                           "file has been defined."));
@@ -895,6 +810,7 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds)
                     "Temporary transformations will be made permanent."));
  
            file->dict = dataset_dict (ds);
+          file->active_file = true;
          }
        else
          {
@@ -905,9 +821,6 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds)
            file->reader = any_reader_open (file->handle, &file->dict);
            if (file->reader == NULL)
              goto error;
-
-          case_create (&file->input_storage,
-                       dict_get_next_value_idx (file->dict));
          }
  
        while (lex_match (lexer, '/'))
@@ -1109,63 +1022,50 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds)
  
    if (used_active_file) 
      {
-      proc_set_sink (ds, create_case_sink (&null_sink_class, 
-                                           dataset_dict (ds),
-                                          dataset_get_casefile_factory (ds),
-                                          NULL));
-      proc_open (ds); 
+      proc_discard_output (ds);
+      for (iter = mtf.head; iter != NULL; iter = iter->next)
+        if (iter->reader == NULL) 
+          iter->reader = proc_open (ds);
+      open_active_file = true;
      }
-  else
-    discard_variables (ds);
  
    dict_compact_values (mtf.dict);
-  mtf.output = dataset_get_casefile_factory (ds)->create_casefile
-    (dataset_get_casefile_factory (ds),
-     dict_get_next_value_idx (mtf.dict));
-
+  mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
    mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
    case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
  
-  if (!mtf_read_records (&mtf, ds))
-    goto error;
+  if (!mtf_read_records (&mtf)) 
+    goto error; 
    while (mtf.head && mtf.head->type == MTF_FILE)
-    if (!mtf_processing (&mtf, ds))
-      goto error;
-  if (!proc_close (ds))
+    if (!mtf_processing (&mtf))
+      goto error; 
+  if (!mtf_close_all_files (&mtf))
      goto error;
+  if (open_active_file)
+    proc_commit (ds);
  
-  discard_variables (ds);
-
-  dataset_set_dict (ds, mtf.dict);
+  proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
    mtf.dict = NULL;
-  proc_set_source (ds, storage_source_create (mtf.output));
    mtf.output = NULL;
  
    return mtf_free (&mtf) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
  
   error:
-  proc_close (ds);
+  if (open_active_file)
+    proc_commit (ds);
    mtf_free (&mtf);
    return CMD_CASCADING_FAILURE;
  }
  
-/* Return a string in a static buffer describing V's variable type and
-   width. */
+/* Return a string in an allocated buffer describing V's variable
+   type and width. */
  static char *
  var_type_description (struct variable *v)
  {
-  static char buf[2][32];
-  static int x = 0;
-  char *s;
-
-  x ^= 1;
-  s = buf[x];
-
    if (var_is_numeric (v))
-    strcpy (s, "numeric");
+    return xstrdup ("numeric");
    else
-    sprintf (s, "string with width %d", var_get_width (v));
-  return s;
+    return xasprintf ("string with width %d", var_get_width (v));
  }
  
  /* Closes FILE and frees its associated data.
@@ -1174,22 +1074,18 @@ var_type_description (struct variable *v)
  static bool
  mtf_close_file (struct mtf_file *file)
  {
-  bool ok = file->reader == NULL || !any_reader_error (file->reader);
+  bool ok = casereader_destroy (file->reader);
    free (file->by);
-  any_reader_close (file->reader);
-  if (file->handle != NULL)
+  if (!file->active_file)
      dict_destroy (file->dict);
-  case_destroy (&file->input_storage);
    free (file->in_name);
+  case_destroy (&file->input);
    free (file);
    return ok;
  }
  
-/* Free all the data for the MATCH FILES procedure.
-   Returns true if successful, false if an I/O error
-   occurred. */
  static bool
-mtf_free (struct mtf_proc *mtf)
+mtf_close_all_files (struct mtf_proc *mtf) 
  {
    struct mtf_file *iter, *next;
    bool ok = true;
@@ -1201,9 +1097,22 @@ mtf_free (struct mtf_proc *mtf)
        if (!mtf_close_file (iter))
          ok = false;
      }
-  
-  if (mtf->dict)
-    dict_destroy (mtf->dict);
+  mtf->head = NULL;
+  return ok;
+}
+
+/* Free all the data for the MATCH FILES procedure.
+   Returns true if successful, false if an I/O error
+   occurred. */
+static bool
+mtf_free (struct mtf_proc *mtf)
+{
+  bool ok;
+
+  ok = mtf_close_all_files (mtf);
+
+  casewriter_destroy (mtf->output);
+  dict_destroy (mtf->dict);
    case_destroy (&mtf->mtf_case);
    free (mtf->seq_nums);
  
@@ -1252,7 +1161,7 @@ mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
  /* Read a record from every input file.
     Returns true if successful, false if an I/O error occurred. */
  static bool
-mtf_read_records (struct mtf_proc *mtf, struct dataset *ds)
+mtf_read_records (struct mtf_proc *mtf)
  {
    struct mtf_file *iter, *next;
    bool ok = true;
@@ -1260,9 +1169,7 @@ mtf_read_records (struct mtf_proc *mtf, struct dataset *ds)
    for (iter = mtf->head; ok && iter != NULL; iter = next)
      {
        next = iter->next;
-      if (iter->handle
-          ? !any_reader_read (iter->reader, iter->input)
-          : !proc_read (ds, &iter->input)) 
+      if (!casereader_read (iter->reader, &iter->input))
          {
            if (!mtf_delete_file_in_place (mtf, &iter))
              ok = false; 
@@ -1277,17 +1184,18 @@ static inline int
  mtf_compare_BY_values (struct mtf_proc *mtf,
                         struct mtf_file *a, struct mtf_file *b)
  {
-  return case_compare_2dict (a->input, b->input, a->by, b->by, mtf->by_cnt);
+  return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
  }
  
  /* Perform one iteration of steps 3...7 above.
     Returns true if successful, false if an I/O error occurred. */
  static bool
-mtf_processing (struct mtf_proc *mtf, struct dataset *ds)
+mtf_processing (struct mtf_proc *mtf)
  {
    struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
    struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
    struct mtf_file *iter, *next;
+  struct ccase out_case;
  
    /* 3. Find the FILE input record(s) that have minimum BY
       values.  Store all the values from these input records into
@@ -1346,9 +1254,8 @@ mtf_processing (struct mtf_proc *mtf, struct dataset *ds)
              min_tail = min_tail->next_min = iter;
            else /* cmp > 0 */
              {
-              if (iter->handle
-                  ? any_reader_read (iter->reader, iter->input)
-                  : proc_read (ds, &iter->input))
+              case_destroy (&iter->input);
+              if (casereader_read (iter->reader, &iter->input))
                  continue;
                if (!mtf_delete_file_in_place (mtf, &iter))
                  return false;
@@ -1375,14 +1282,13 @@ mtf_processing (struct mtf_proc *mtf, struct dataset *ds)
           
            if (mv != NULL && mtf->seq_nums[mv_index] != mtf->seq_num) 
              {
-              const struct ccase *record = iter->input;
                union value *out = case_data_rw (&mtf->mtf_case, mv);
  
                mtf->seq_nums[mv_index] = mtf->seq_num;
                if (var_is_numeric (v))
-                out->f = case_num (record, v);
+                out->f = case_num (&iter->input, v);
                else
-                memcpy (out->s, case_str (record, v), var_get_width (v));
+                memcpy (out->s, case_str (&iter->input, v), var_get_width (v));
              } 
          }
        if (iter->in_var != NULL)
@@ -1418,7 +1324,8 @@ mtf_processing (struct mtf_proc *mtf, struct dataset *ds)
      }
  
    /* 5. Write the output record. */
-  casefile_append (mtf->output, &mtf->mtf_case);
+  case_clone (&out_case, &mtf->mtf_case);
+  casewriter_write (mtf->output, &out_case);
  
    /* 6. Read another record from each input file FILE and TABLE
       that we stored values from above.  If we come to the end of
@@ -1427,9 +1334,8 @@ mtf_processing (struct mtf_proc *mtf, struct dataset *ds)
    for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
      {
        next = iter->next_min;
-      if (iter->reader != NULL
-          ? !any_reader_read (iter->reader, iter->input)
-          : !proc_read (ds, &iter->input))
+      case_destroy (&iter->input);
+      if (!casereader_read (iter->reader, &iter->input))
          if (!mtf_delete_file_in_place (mtf, &iter))
            return false;
      }
@@ -1614,11 +1520,6 @@ map_case (const struct case_map *map,
  {
    size_t dst_idx;
  
-  assert (map != NULL);
-  assert (src != NULL);
-  assert (dst != NULL);
-  assert (src != dst);
-
    for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
      {
        int src_idx = map->map[dst_idx];
diff --git a/src/language/data-io/inpt-pgm.c b/src/language/data-io/inpt-pgm.c

index 71860ebfaee9bd67f618d43e94471c91e62af4ed..97fbbf1e134f4643bf6f0b3adf37a2148c23e28f 100644 (file)
--- a/src/language/data-io/inpt-pgm.c
+++ b/src/language/data-io/inpt-pgm.c
@@ -23,9 +23,9 @@
  #include <float.h>
  #include <stdlib.h>
  
-#include <data/case-source.h>
  #include <data/case.h>
-#include <data/case-source.h>
+#include <data/caseinit.h>
+#include <data/casereader-provider.h>
  #include <data/dictionary.h>
  #include <data/procedure.h>
  #include <data/transformations.h>
@@ -68,12 +68,10 @@ struct input_program_pgm
      struct trns_chain *trns_chain;
      enum trns_result restart;
  
-    bool inited_case;           /* Did one-time case initialization? */
      size_t case_nr;             /* Incremented by END CASE transformation. */
  
-    enum value_init_type *init; /* How to initialize each `union value'. */
-    size_t init_cnt;            /* Number of elements in inp_init. */
-    size_t case_size;           /* Size of case in bytes. */
+    struct caseinit *init;
+    size_t value_cnt;
    };
  
  static void destroy_input_program (struct input_program_pgm *);
@@ -82,7 +80,7 @@ static trns_proc_func reread_trns_proc;
  static trns_proc_func end_file_trns_proc;
  static trns_free_func reread_trns_free;
  
-static const struct case_source_class input_program_source_class;
+static const struct casereader_class input_program_casereader_class;
  
  static bool inside_input_program;
  
@@ -105,10 +103,9 @@ int
  cmd_input_program (struct lexer *lexer, struct dataset *ds)
  {
    struct input_program_pgm *inp;
-  size_t i;
    bool saw_END_CASE = false;
  
-  discard_variables (ds);
+  proc_discard_active_file (ds);
    if (lex_token (lexer) != '.')
      return lex_end_of_command (lexer);
  
@@ -132,7 +129,7 @@ cmd_input_program (struct lexer *lexer, struct dataset *ds)
            if (result == CMD_EOF)
              msg (SE, _("Unexpected end-of-file within INPUT PROGRAM."));
            inside_input_program = false;
-          discard_variables (ds);
+          proc_discard_active_file (ds);
            destroy_input_program (inp);
            return result;
          }
@@ -144,7 +141,7 @@ cmd_input_program (struct lexer *lexer, struct dataset *ds)
    if (dict_get_next_value_idx (dataset_dict (ds)) == 0) 
      {
        msg (SE, _("Input program did not create any variables."));
-      discard_variables (ds);
+      proc_discard_active_file (ds);
        destroy_input_program (inp);
        return CMD_FAILURE;
      }
@@ -153,33 +150,15 @@ cmd_input_program (struct lexer *lexer, struct dataset *ds)
    trns_chain_finalize (inp->trns_chain);
  
    inp->restart = TRNS_CONTINUE;
-  inp->inited_case = false;
-  inp->case_nr = 1;
  
    /* Figure out how to initialize each input case. */
-  inp->init_cnt = dict_get_next_value_idx (dataset_dict (ds));
-  inp->init = xnmalloc (inp->init_cnt, sizeof *inp->init);
-  for (i = 0; i < inp->init_cnt; i++)
-    inp->init[i] = -1;
-  for (i = 0; i < dict_get_var_cnt (dataset_dict (ds)); i++)
-    {
-      struct variable *var = dict_get_var (dataset_dict (ds), i);
-      size_t value_cnt = var_get_value_cnt (var);
-      enum value_init_type value_init;
-      size_t j;
-      
-      value_init = var_is_numeric (var) ? INP_NUMERIC : INP_STRING;
-      value_init |= var_get_leave (var) ? INP_INIT_ONCE : INP_REINIT;
-
-      for (j = 0; j < value_cnt; j++)
-        inp->init[j + var_get_case_index (var)] = value_init;
-    }
-  for (i = 0; i < inp->init_cnt; i++)
-    assert (inp->init[i] != -1);
-  inp->case_size = dict_get_case_size (dataset_dict (ds));
-
-  proc_set_source (ds, 
-                   create_case_source (&input_program_source_class, inp));
+  inp->init = caseinit_create ();
+  caseinit_mark_for_init (inp->init, dataset_dict (ds));
+  inp->value_cnt = dict_get_next_value_idx (dataset_dict (ds));
+  
+  proc_set_active_file_data (
+    ds, casereader_create_sequential (NULL, inp->value_cnt, CASENUMBER_MAX,
+                                      &input_program_casereader_class, inp));
  
    return CMD_SUCCESS;
  }
@@ -191,56 +170,6 @@ cmd_end_input_program (struct lexer *lexer UNUSED, struct dataset *ds UNUSED)
    return CMD_END_INPUT_PROGRAM; 
  }
  
-/* Initializes case C.  Called before the first case is read. */
-static void
-init_case (const struct input_program_pgm *inp, struct ccase *c)
-{
-  size_t i;
-
-  for (i = 0; i < inp->init_cnt; i++)
-    switch (inp->init[i]) 
-      {
-      case INP_NUMERIC | INP_INIT_ONCE:
-        case_data_rw_idx (c, i)->f = 0.0;
-        break;
-      case INP_NUMERIC | INP_REINIT:
-        case_data_rw_idx (c, i)->f = SYSMIS;
-        break;
-      case INP_STRING | INP_INIT_ONCE:
-      case INP_STRING | INP_REINIT:
-        memset (case_data_rw_idx (c, i)->s, ' ',
-                sizeof case_data_rw_idx (c, i)->s);
-        break;
-      default:
-        NOT_REACHED ();
-      }
-}
-
-/* Clears case C.  Called between reading successive records. */
-static void
-clear_case (const struct input_program_pgm *inp, struct ccase *c)
-{
-  size_t i;
-
-  for (i = 0; i < inp->init_cnt; i++)
-    switch (inp->init[i]) 
-      {
-      case INP_NUMERIC | INP_INIT_ONCE:
-        break;
-      case INP_NUMERIC | INP_REINIT:
-        case_data_rw_idx (c, i)->f = SYSMIS;
-        break;
-      case INP_STRING | INP_INIT_ONCE:
-        break;
-      case INP_STRING | INP_REINIT:
-        memset (case_data_rw_idx (c, i)->s, ' ',
-                sizeof case_data_rw_idx (c, i)->s);
-        break;
-      default:
-        NOT_REACHED ();
-      }
-}
-
  /* Returns true if STATE is valid given the transformations that
     are allowed within INPUT PROGRAM. */
  static bool
@@ -256,26 +185,28 @@ is_valid_state (enum trns_result state)
     Returns true if successful, false at end of file or if an
     I/O error occurred. */
  static bool
-input_program_source_read (struct case_source *source, struct ccase *c)
+input_program_casereader_read (struct casereader *reader UNUSED, void *inp_,
+                               struct ccase *c)
  {
-  struct input_program_pgm *inp = source->aux;
+  struct input_program_pgm *inp = inp_;
  
-  if (!inp->inited_case)
-    {
-      init_case (inp, c);
-      inp->inited_case = true;
-    }
+  case_create (c, inp->value_cnt);
  
    do
      {
        assert (is_valid_state (inp->restart));
-      if (inp->restart == TRNS_ERROR || inp->restart == TRNS_END_FILE)
-        return false;
+      if (inp->restart == TRNS_ERROR || inp->restart == TRNS_END_FILE) 
+        {
+          case_destroy (c);
+          return false; 
+        }
  
-      clear_case (inp, c);
+      caseinit_init_reinit_vars (inp->init, c);
+      caseinit_init_left_vars (inp->init, c);
        inp->restart = trns_chain_execute (inp->trns_chain, inp->restart,
                                           c, &inp->case_nr);
        assert (is_valid_state (inp->restart));
+      caseinit_update_left_vars (inp->init, c);
      }
    while (inp->restart < 0);
  
@@ -288,29 +219,27 @@ destroy_input_program (struct input_program_pgm *pgm)
    if (pgm != NULL) 
      {
        trns_chain_destroy (pgm->trns_chain);
-      free (pgm->init);
+      caseinit_destroy (pgm->init);
        free (pgm);
      }
  }
  
-/* Destroys the source.
-   Returns true if successful read, false if an I/O occurred
-   during destruction or previously. */
-static bool
-input_program_source_destroy (struct case_source *source)
+/* Destroys the casereader. */
+static void
+input_program_casereader_destroy (struct casereader *reader UNUSED, void *inp_)
  {
-  struct input_program_pgm *inp = source->aux;
-  bool ok = inp->restart != TRNS_ERROR;
+  struct input_program_pgm *inp = inp_;
+  if (inp->restart == TRNS_ERROR)
+    casereader_force_error (reader);
    destroy_input_program (inp);
-  return ok;
  }
  
-static const struct case_source_class input_program_source_class =
+static const struct casereader_class input_program_casereader_class =
    {
-    "INPUT PROGRAM",
+    input_program_casereader_read,
+    input_program_casereader_destroy,
+    NULL,
      NULL,
-    input_program_source_read,
-    input_program_source_destroy,
    };
  \f
  int
@@ -322,7 +251,7 @@ cmd_end_case (struct lexer *lexer, struct dataset *ds UNUSED)
    return lex_end_of_command (lexer);
  }
  
-/* Sends the current case as the source's output. */
+/* Outputs the current case */
  int
  end_case_trns_proc (void *inp_, struct ccase *c UNUSED,
                      casenumber case_nr UNUSED)
diff --git a/src/language/data-io/list.q b/src/language/data-io/list.q

index 50fe1201f80139d623f9df7013ef58d4296a6b9e..28627bbb0b8196bd992c5a16d12a7740b9ed0dbf 100644 (file)
--- a/src/language/data-io/list.q
+++ b/src/language/data-io/list.q
@@ -23,7 +23,8 @@
  
  #include "intprops.h"
  #include "size_max.h"
-#include <data/case.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
  #include <data/dictionary.h>
  #include <data/data-out.h>
  #include <data/format.h>
@@ -73,9 +74,6 @@ struct list_ext
  /* Parsed command. */
  static struct cmd_list cmd;
  
-/* Current case number. */
-static int case_idx;
-
  /* Line buffer. */
  static struct string line_buffer;
  
@@ -85,11 +83,12 @@ static unsigned n_chars_width (struct outp_driver *d);
  static void write_line (struct outp_driver *d, const char *s);
  
  /* Other functions. */
-static bool list_cases (const struct ccase *, void *, const struct dataset *);
+static void list_case (struct ccase *, casenumber case_idx,
+                       const struct dataset *);
  static void determine_layout (void);
  static void clean_up (void);
  static void write_header (struct outp_driver *);
-static void write_all_headers (const struct ccase *, void *, const struct dataset*);
+static void write_all_headers (struct casereader *, const struct dataset*);
  
  /* Returns the number of text lines that can fit on the remainder of
     the page. */
@@ -133,7 +132,11 @@ write_line (struct outp_driver *d, const char *s)
  int
  cmd_list (struct lexer *lexer, struct dataset *ds)
  {
+  struct dictionary *dict = dataset_dict (ds);
    struct variable *casenum_var = NULL;
+  struct casegrouper *grouper;
+  struct casereader *group;
+  casenumber case_idx;
    bool ok;
  
    if (!parse_list (lexer, ds, &cmd, NULL))
@@ -147,7 +150,7 @@ cmd_list (struct lexer *lexer, struct dataset *ds)
    if (cmd.last == NOT_LONG)
      cmd.last = LONG_MAX;
    if (!cmd.sbc_variables)
-    dict_get_vars (dataset_dict (ds), &cmd.v_variables, &cmd.n_variables,
+    dict_get_vars (dict, &cmd.v_variables, &cmd.n_variables,
                    (1u << DC_SYSTEM) | (1u << DC_SCRATCH));
    if (cmd.n_variables == 0)
      {
@@ -187,12 +190,12 @@ cmd_list (struct lexer *lexer, struct dataset *ds)
    /* Weighting variable. */
    if (cmd.weight == LST_WEIGHT)
      {
-      if (dict_get_weight (dataset_dict (ds)) != NULL)
+      if (dict_get_weight (dict) != NULL)
         {
           size_t i;
  
           for (i = 0; i < cmd.n_variables; i++)
-           if (cmd.v_variables[i] == dict_get_weight (dataset_dict (ds)))
+           if (cmd.v_variables[i] == dict_get_weight (dict))
               break;
           if (i >= cmd.n_variables)
             {
@@ -201,7 +204,7 @@ cmd_list (struct lexer *lexer, struct dataset *ds)
               cmd.v_variables = xnrealloc (cmd.v_variables, cmd.n_variables,
                                             sizeof *cmd.v_variables);
               cmd.v_variables[cmd.n_variables - 1]
-                = dict_get_weight (dataset_dict (ds));
+                = dict_get_weight (dict);
             }
         }
        else
@@ -229,7 +232,24 @@ cmd_list (struct lexer *lexer, struct dataset *ds)
    determine_layout ();
  
    case_idx = 0;
-  ok = procedure_with_splits (ds, write_all_headers, list_cases, NULL, NULL);
+  for (grouper = casegrouper_create_splits (proc_open (ds), dict);
+       casegrouper_get_next_group (grouper, &group);
+       casereader_destroy (group)) 
+    {
+      struct ccase c;
+      
+      write_all_headers (group, ds);
+      for (; casereader_read (group, &c); case_destroy (&c)) 
+        {
+          case_idx++;
+          if (case_idx >= cmd.first && case_idx <= cmd.last
+              && (case_idx - cmd.first) % cmd.step == 0)
+            list_case (&c, case_idx, ds); 
+        }
+    }
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
+
    ds_destroy(&line_buffer);
  
    clean_up ();
@@ -242,11 +262,16 @@ cmd_list (struct lexer *lexer, struct dataset *ds)
  /* Writes headers to all devices.  This is done at the beginning of
     each SPLIT FILE group. */
  static void
-write_all_headers (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
+write_all_headers (struct casereader *input, const struct dataset *ds)
  {
    struct outp_driver *d;
+  struct ccase c;
+
+  if (!casereader_peek (input, 0, &c))
+    return;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
  
-  output_split_file_values (ds, c);
    for (d = outp_drivers (NULL); d; d = outp_drivers (d))
      {
        if (!d->class->special)
@@ -623,16 +648,12 @@ determine_layout (void)
  }
  
  /* Writes case C to output. */
-static bool
-list_cases (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
+static void
+list_case (struct ccase *c, casenumber case_idx, const struct dataset *ds)
  {
+  struct dictionary *dict = dataset_dict (ds);
    struct outp_driver *d;
    
-  case_idx++;
-  if (case_idx < cmd.first || case_idx > cmd.last
-      || (cmd.step != 1 && (case_idx - cmd.first) % cmd.step))
-    return true;
-
    for (d = outp_drivers (NULL); d; d = outp_drivers (d))
      if (d->class->special == 0)
        {
@@ -681,7 +702,7 @@ list_cases (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
                ds_put_char_multiple(&line_buffer, ' ', width - print->w);
  
              if (fmt_is_string (print->type)
-                || dict_contains_var (dataset_dict (ds), v))
+                || dict_contains_var (dict, v))
               {
                  data_out (case_data (c, v), print,
                            ds_put_uninit (&line_buffer, print->w));
@@ -720,7 +741,7 @@ list_cases (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
             char buf[256];
             
              if (fmt_is_string (print->type)
-                || dict_contains_var (dataset_dict (ds), v))
+                || dict_contains_var (dict, v))
               data_out (case_data (c, v), print, buf);
              else 
                {
@@ -738,8 +759,6 @@ list_cases (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
        }
      else
        NOT_REACHED ();
-
-  return true;
  }
  
  /* 
diff --git a/src/language/dictionary/ChangeLog b/src/language/dictionary/ChangeLog

index bd4c23aeb3b0095eb668c0e12825d1af49a9c2cc..b7d543c1fbd912d920a3baf1eeed3bc2984831fe 100644 (file)
--- a/src/language/dictionary/ChangeLog
+++ b/src/language/dictionary/ChangeLog
@@ -1,3 +1,12 @@
+2007-06-06  Ben Pfaff  <blp@gnu.org>
+
+       Adapt case sources, sinks, and clients of procedure code to the
+       new infrastructure.
+       
+       * apply-dictionary.c: Now any_reader_open returns a casereader.
+
+       * sys-file-open.c: Now sfm_reader_open returns a casereader.
+
  Sat Feb  3 21:52:35 2007  Ben Pfaff  <blp@gnu.org>
  
         * vector.c (cmd_vector): Add support for specifying an output
diff --git a/src/language/dictionary/apply-dictionary.c b/src/language/dictionary/apply-dictionary.c

index 0c9f2ade468d290cfdad8877b3cde1e1d2c4422b..ac38a0899a1b6e7b849fdb7d00b4c63c55b1dcfe 100644 (file)
--- a/src/language/dictionary/apply-dictionary.c
+++ b/src/language/dictionary/apply-dictionary.c
@@ -21,6 +21,7 @@
  #include <stdlib.h>
  
  #include <data/any-reader.h>
+#include <data/casereader.h>
  #include <data/dictionary.h>
  #include <data/file-handle-def.h>
  #include <data/missing-values.h>
@@ -42,7 +43,7 @@ int
  cmd_apply_dictionary (struct lexer *lexer, struct dataset *ds)
  {
    struct file_handle *handle;
-  struct any_reader *reader;
+  struct casereader *reader;
    struct dictionary *dict;
  
    int n_matched = 0;
@@ -58,7 +59,7 @@ cmd_apply_dictionary (struct lexer *lexer, struct dataset *ds)
    reader = any_reader_open (handle, &dict);
    if (dict == NULL)
      return CMD_FAILURE;
-  any_reader_close (reader);
+  casereader_destroy (reader);
  
    for (i = 0; i < dict_get_var_cnt (dict); i++)
      {
@@ -136,7 +137,5 @@ cmd_apply_dictionary (struct lexer *lexer, struct dataset *ds)
          dict_set_weight (dataset_dict (ds), new_weight);
      }
    
-  any_reader_close (reader);
-
    return lex_end_of_command (lexer);
  }
diff --git a/src/language/dictionary/delete-variables.c b/src/language/dictionary/delete-variables.c

index bd26a7d1f272bc86551db7259448e7d9892122a6..a0a1fb244f6e6508cb8823fb9e298593ee8ebb8d 100644 (file)
--- a/src/language/dictionary/delete-variables.c
+++ b/src/language/dictionary/delete-variables.c
@@ -1,5 +1,5 @@
  /* PSPP - computes sample statistics.
-   Copyright (C) 2006 Free Software Foundation, Inc.
+   Copyright (C) 2006, 2007 Free Software Foundation, Inc.
     Written by Ben Pfaff <blp@gnu.org>.
  
     This program is free software; you can redistribute it and/or
@@ -21,6 +21,7 @@
  
  #include <stdlib.h>
  
+#include <data/casereader.h>
  #include <data/dictionary.h>
  #include <data/procedure.h>
  #include <language/command.h>
@@ -36,6 +37,7 @@ cmd_delete_variables (struct lexer *lexer, struct dataset *ds)
  {
    struct variable **vars;
    size_t var_cnt;
+  bool ok;
  
    if (proc_make_temporary_transformations_permanent (ds))
      msg (SE, _("DELETE VARIABLES may not be used after TEMPORARY.  "
@@ -50,11 +52,13 @@ cmd_delete_variables (struct lexer *lexer, struct dataset *ds)
                   "from the active file dictionary.  Use NEW FILE instead."));
        goto error;
      }
-
-  if (!procedure (ds, NULL, NULL))
+ 
+  ok = casereader_destroy (proc_open (ds));
+  ok = proc_commit (ds) && ok;
+  if (!ok)
      goto error;
-  
    dict_delete_vars (dataset_dict (ds), vars, var_cnt);
+  
    free (vars);
    
    return CMD_SUCCESS;
diff --git a/src/language/dictionary/modify-variables.c b/src/language/dictionary/modify-variables.c

index f3a18325818dae9c8f2c4c65ae67671c8957ad2d..41709416ae669e349a6fa6263322cae3bed0d2d1 100644 (file)
--- a/src/language/dictionary/modify-variables.c
+++ b/src/language/dictionary/modify-variables.c
@@ -40,7 +40,6 @@
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
  
-/* FIXME: should change weighting variable, etc. */
  /* These control the ordering produced by
     compare_variables_given_ordering(). */
  struct ordering
@@ -322,7 +321,7 @@ cmd_modify_vars (struct lexer *lexer, struct dataset *ds)
    if (already_encountered & (1 | 4))
      {
        /* Read the data. */
-      if (!procedure (ds,NULL, NULL)) 
+      if (!proc_execute (ds)) 
          goto done; 
      }
  
diff --git a/src/language/dictionary/sys-file-info.c b/src/language/dictionary/sys-file-info.c

index ec93ea201cfc3db974284194b86a387810fa2321..180188abd285ebae3c12d457f4f46fe1eb31d102 100644 (file)
--- a/src/language/dictionary/sys-file-info.c
+++ b/src/language/dictionary/sys-file-info.c
@@ -21,6 +21,7 @@
  #include <ctype.h>
  #include <stdlib.h>
  
+#include <data/casereader.h>
  #include <data/dictionary.h>
  #include <data/file-handle-def.h>
  #include <data/format.h>
@@ -87,7 +88,7 @@ cmd_sysfile_info (struct lexer *lexer, struct dataset *ds UNUSED)
    struct file_handle *h;
    struct dictionary *d;
    struct tab_table *t;
-  struct sfm_reader *reader;
+  struct casereader *reader;
    struct sfm_read_info info;
    int r, nr;
    int i;
@@ -102,7 +103,7 @@ cmd_sysfile_info (struct lexer *lexer, struct dataset *ds UNUSED)
    reader = sfm_open_reader (h, &d, &info);
    if (!reader)
      return CMD_FAILURE;
-  sfm_close_reader (reader);
+  casereader_destroy (reader);
  
    t = tab_create (2, 10, 0);
    tab_vline (t, TAL_GAP, 1, 0, 8);
diff --git a/src/language/expressions/evaluate.c b/src/language/expressions/evaluate.c

index 92f0a0e347c6ee3acf966bbfffd7b2044e855025..05b19ff121297e49ef39f0eb7745c7e1f7754b27 100644 (file)
--- a/src/language/expressions/evaluate.c
+++ b/src/language/expressions/evaluate.c
@@ -158,7 +158,7 @@ cmd_debug_evaluate (struct lexer *lexer, struct dataset *dsother UNUSED)
  
           if  ( ds == NULL )
             {
-             ds = create_dataset (NULL, NULL, NULL);
+             ds = create_dataset (NULL, NULL);
               d = dataset_dict (ds);
             }
  
diff --git a/src/language/lexer/variable-parser.c b/src/language/lexer/variable-parser.c

index 26b2e1bfd2537d3fff65978a93e1458a09492a30..055bfe09522676f1252681be4f84c3df388220e5 100644 (file)
--- a/src/language/lexer/variable-parser.c
+++ b/src/language/lexer/variable-parser.c
@@ -116,12 +116,6 @@ parse_variables (struct lexer *lexer, const struct dictionary *d,
  
    vs = var_set_create_from_dict (d);
    success = parse_var_set_vars (lexer, vs, var, cnt, opts);
-  if ( success == 0 )
-    {
-      free ( *var ) ;
-      *var = NULL;
-      *cnt = 0;
-    }
    var_set_destroy (vs);
    return success;
  }
diff --git a/src/language/stats/ChangeLog b/src/language/stats/ChangeLog

index c3cb4fad78015a2c0274af1466ff97c7481315fd..91956d8292398990d422cb29fda3ddbb4989f01b 100644 (file)
--- a/src/language/stats/ChangeLog
+++ b/src/language/stats/ChangeLog
@@ -1,3 +1,32 @@
+2007-06-06  Ben Pfaff  <blp@gnu.org>
+
+       Adapt case sources, sinks, and clients of procedure code to the
+       new infrastructure.
+       
+       * aggregate.c: Simplify greatly since everything is more uniform
+       now.
+
+       * autorecode.c: Adapt to new procedure code.
+       * binomial.c: Ditto.
+       * chisquare.c: Ditto.
+       * crosstabs.q: Ditto.
+       * descriptives.c: Ditto.
+       * examine.q: Ditto.
+       * npar-summary.c: Ditto.
+       * frequencies.q: Ditto.
+       * npar.q: Ditto.
+       * oneway.q: Ditto.
+       * regression.q: Ditto.
+       * sort-cases.c: Ditto.
+       * t-test.c: Ditto.
+
+       * sort-criteria.c: Rewrite to output a struct case_ordering.
+       
+       * flip.c: Rewrite to be a casereader.
+
+       * rank.q: Simplify greatly since casereaders are much more
+       flexible than what we had before.
+       
  2007-05-15  Jason Stover  <jhs@math.gcsu.edu>
  
         * regression.q (run_regression): Tell the user when the data
diff --git a/src/language/stats/aggregate.c b/src/language/stats/aggregate.c

index 297d2abee4cc495cc17f1aaedd4da0d9e00f53bf..22ca39d76373c7bd1634e1d5ad92ef1c0f96ecf8 100644 (file)
--- a/src/language/stats/aggregate.c
+++ b/src/language/stats/aggregate.c
@@ -21,15 +21,16 @@
  #include <stdlib.h>
  
  #include <data/any-writer.h>
-#include <data/case-sink.h>
+#include <data/case-ordering.h>
  #include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
  #include <data/dictionary.h>
  #include <data/file-handle-def.h>
  #include <data/format.h>
  #include <data/procedure.h>
  #include <data/settings.h>
-#include <data/storage-stream.h>
  #include <data/sys-file-writer.h>
  #include <data/variable.h>
  #include <language/command.h>
@@ -135,12 +136,8 @@ enum missing_treatment
  /* An entire AGGREGATE procedure. */
  struct agr_proc 
    {
-    /* We have either an output file or a sink. */
-    struct any_writer *writer;          /* Output file, or null if none. */
-    struct case_sink *sink;             /* Sink, or null if none. */
-
      /* Break variables. */
-    struct sort_criteria *sort;         /* Sort criteria. */
+    struct case_ordering *sort;         /* Sort criteria. */
      const struct variable **break_vars;       /* Break variables. */
      size_t break_var_cnt;               /* Number of break variables. */
      struct ccase break_case;            /* Last values of break variables. */
@@ -150,20 +147,18 @@ struct agr_proc
      struct dictionary *dict;            /* Aggregate dictionary. */
      const struct dictionary *src_dict;  /* Dict of the source */
      int case_cnt;                       /* Counts aggregated cases. */
-    struct ccase agr_case;              /* Aggregate case for output. */
    };
  
  static void initialize_aggregate_info (struct agr_proc *,
                                         const struct ccase *);
-
+static void accumulate_aggregate_info (struct agr_proc *,
+                                       const struct ccase *);
  /* Prototypes. */
  static bool parse_aggregate_functions (struct lexer *, const struct dictionary *,
                                        struct agr_proc *);
  static void agr_destroy (struct agr_proc *);
-static bool aggregate_single_case (struct agr_proc *agr,
-                                  const struct ccase *input,
-                                  struct ccase *output);
-static void dump_aggregate_info (struct agr_proc *agr, struct ccase *output);
+static void dump_aggregate_info (struct agr_proc *agr,
+                                 struct casewriter *output);
  \f
  /* Parsing. */
  
@@ -174,10 +169,14 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds)
    struct dictionary *dict = dataset_dict (ds);
    struct agr_proc agr;
    struct file_handle *out_file = NULL;
+  struct casereader *input = NULL, *group;
+  struct casegrouper *grouper;
+  struct casewriter *output = NULL;
  
    bool copy_documents = false;
    bool presorted = false;
    bool saw_direction;
+  bool ok;
  
    memset(&agr, 0 , sizeof (agr));
    agr.missing = ITEMWISE;
@@ -223,11 +222,13 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds)
            int i;
  
           lex_match (lexer, '=');
-          agr.sort = sort_parse_criteria (lexer, dict,
-                                          &agr.break_vars, &agr.break_var_cnt,
-                                          &saw_direction, NULL);
+          agr.sort = parse_case_ordering (lexer, dict,
+                                          
+                                          &saw_direction);
            if (agr.sort == NULL)
              goto error;
+          case_ordering_get_vars (agr.sort,
+                                  &agr.break_vars, &agr.break_var_cnt);
           
            for (i = 0; i < agr.break_var_cnt; i++)
              dict_clone_var_assert (agr.dict, agr.break_vars[i],
@@ -261,109 +262,69 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds)
    
    /* Initialize. */
    agr.case_cnt = 0;
-  case_create (&agr.agr_case, dict_get_next_value_idx (agr.dict));
  
-  /* Output to active file or external file? */
    if (out_file == NULL) 
      {
-      struct ccase *c;
-      
        /* The active file will be replaced by the aggregated data,
           so TEMPORARY is moot. */
        proc_cancel_temporary_transformations (ds);
+      proc_discard_output (ds);
+      output = autopaging_writer_create (dict_get_next_value_idx (agr.dict));
+    }
+  else 
+    {
+      output = any_writer_open (out_file, agr.dict);
+      if (output == NULL)
+        goto error;
+    }
  
-      if (agr.sort != NULL && !presorted) 
-        {
-          if (!sort_active_file_in_place (ds, agr.sort))
-            goto error;
-        }
+  input = proc_open (ds);
+  if (agr.sort != NULL && !presorted) 
+    {
+      input = sort_execute (input, agr.sort);
+      agr.sort = NULL; 
+    }
  
-      agr.sink = create_case_sink (&storage_sink_class, agr.dict,
-                                  dataset_get_casefile_factory (ds),
-                                  NULL);
-      if (agr.sink->class->open != NULL)
-        agr.sink->class->open (agr.sink);
-      proc_set_sink (ds, 
-                    create_case_sink (&null_sink_class, dict,
-                                      dataset_get_casefile_factory (ds),
-                                      NULL));
-      proc_open (ds);
-      while (proc_read (ds, &c))
-        if (aggregate_single_case (&agr, c, &agr.agr_case)) 
-          if (!agr.sink->class->write (agr.sink, &agr.agr_case)) 
-            {
-              proc_close (ds);
-              goto error; 
-            }
-      if (!proc_close (ds))
-        goto error;
+  for (grouper = casegrouper_create_vars (input, agr.break_vars,
+                                          agr.break_var_cnt);
+       casegrouper_get_next_group (grouper, &group);
+       casereader_destroy (group)) 
+    {
+      struct ccase c;
+      
+      if (!casereader_peek (group, 0, &c))
+        continue;
+      initialize_aggregate_info (&agr, &c);
+      case_destroy (&c);
+
+      for (; casereader_read (group, &c); case_destroy (&c)) 
+        accumulate_aggregate_info (&agr, &c);
+      dump_aggregate_info (&agr, output);
+    }
+  if (!casegrouper_destroy (grouper))
+    goto error;
  
-      if (agr.case_cnt > 0) 
-        {
-          dump_aggregate_info (&agr, &agr.agr_case);
-          if (!agr.sink->class->write (agr.sink, &agr.agr_case))
-            goto error;
-        }
-      discard_variables (ds);
-      dataset_set_dict (ds, agr.dict);
-      agr.dict = NULL;
-      proc_set_source (ds, agr.sink->class->make_source (agr.sink));
-      free_case_sink (agr.sink);
+  if (!proc_commit (ds)) 
+    {
+      input = NULL;
+      goto error;
      }
-  else
+  input = NULL;
+
+  if (out_file == NULL) 
      {
-      agr.writer = any_writer_open (out_file, agr.dict);
-      if (agr.writer == NULL)
+      struct casereader *next_input = casewriter_make_reader (output);
+      if (next_input == NULL)
          goto error;
        
-      if (agr.sort != NULL && !presorted) 
-        {
-          /* Sorting is needed. */
-          struct casefile *dst;
-          struct casereader *reader;
-          struct ccase c;
-          bool ok = true;
-          
-          dst = sort_active_file_to_casefile (ds, agr.sort);
-          if (dst == NULL)
-            goto error;
-          reader = casefile_get_destructive_reader (dst);
-          while (ok && casereader_read_xfer (reader, &c)) 
-            {
-              if (aggregate_single_case (&agr, &c, &agr.agr_case)) 
-                ok = any_writer_write (agr.writer, &agr.agr_case);
-              case_destroy (&c);
-            }
-          casereader_destroy (reader);
-          if (ok)
-            ok = !casefile_error (dst);
-          casefile_destroy (dst);
-          if (!ok)
-            goto error;
-        }
-      else 
-        {
-          /* Active file is already sorted. */
-          struct ccase *c;
-          
-          proc_open (ds);
-          while (proc_read (ds, &c))
-            if (aggregate_single_case (&agr, c, &agr.agr_case)) 
-              if (!any_writer_write (agr.writer, &agr.agr_case)) 
-                {
-                  proc_close (ds);
-                  goto error;
-                }
-          if (!proc_close (ds))
-            goto error;
-        }
-      
-      if (agr.case_cnt > 0) 
-        {
-          dump_aggregate_info (&agr, &agr.agr_case);
-          any_writer_write (agr.writer, &agr.agr_case);
-        }
-      if (any_writer_error (agr.writer))
+      proc_set_active_file (ds, next_input, agr.dict);
+      agr.dict = NULL;
+    }
+  else 
+    {
+      ok = casewriter_destroy (output);
+      output = NULL;
+      if (!ok)
          goto error;
      }
    
@@ -371,6 +332,9 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds)
    return CMD_SUCCESS;
  
  error:
+  if (input != NULL)
+    proc_commit (ds);
+  casewriter_destroy (output);
    agr_destroy (&agr);
    return CMD_CASCADING_FAILURE;
  }
@@ -717,9 +681,7 @@ agr_destroy (struct agr_proc *agr)
  {
    struct agr_var *iter, *next;
  
-  any_writer_close (agr->writer);
-  if (agr->sort != NULL)
-    sort_destroy_criteria (agr->sort);
+  case_ordering_destroy (agr->sort);
    free (agr->break_vars);
    case_destroy (&agr->break_case);
    for (iter = agr->agr_vars; iter; iter = next)
@@ -742,44 +704,13 @@ agr_destroy (struct agr_proc *agr)
      }
    if (agr->dict != NULL)
      dict_destroy (agr->dict);
-
-  case_destroy (&agr->agr_case);
  }
  \f
  /* Execution. */
  
-static void accumulate_aggregate_info (struct agr_proc *,
-                                       const struct ccase *);
-static void dump_aggregate_info (struct agr_proc *, struct ccase *);
-
-/* Processes a single case INPUT for aggregation.  If output is
-   warranted, writes it to OUTPUT and returns true.
-   Otherwise, returns false and OUTPUT is unmodified. */
-static bool
-aggregate_single_case (struct agr_proc *agr,
-                       const struct ccase *input, struct ccase *output)
-{
-  bool finished_group = false;
-  
-  if (agr->case_cnt++ == 0)
-    initialize_aggregate_info (agr, input);
-  else if (case_compare (&agr->break_case, input,
-                         agr->break_vars, agr->break_var_cnt))
-    {
-      dump_aggregate_info (agr, output);
-      finished_group = true;
-
-      initialize_aggregate_info (agr, input);
-    }
-
-  accumulate_aggregate_info (agr, input);
-  return finished_group;
-}
-
  /* Accumulates aggregation data from the case INPUT. */
  static void 
-accumulate_aggregate_info (struct agr_proc *agr,
-                           const struct ccase *input)
+accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input)
  {
    struct agr_var *iter;
    double weight;
@@ -947,12 +878,14 @@ accumulate_aggregate_info (struct agr_proc *agr,
      }
  }
  
-/* We've come to a record that differs from the previous in one or
-   more of the break variables.  Make an output record from the
-   accumulated statistics in the OUTPUT case. */
+/* Writes an aggregated record to OUTPUT. */
  static void 
-dump_aggregate_info (struct agr_proc *agr, struct ccase *output)
+dump_aggregate_info (struct agr_proc *agr, struct casewriter *output)
  {
+  struct ccase c;
+
+  case_create (&c, dict_get_next_value_idx (agr->dict));
+
    {
      int value_idx = 0;
      int i;
@@ -961,7 +894,7 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output)
        {
          const struct variable *v = agr->break_vars[i];
          size_t value_cnt = var_get_value_cnt (v);
-        memcpy (case_data_rw_idx (output, value_idx),
+        memcpy (case_data_rw_idx (&c, value_idx),
                  case_data (&agr->break_case, v),
                  sizeof (union value) * value_cnt);
          value_idx += value_cnt; 
@@ -973,7 +906,7 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output)
    
      for (i = agr->agr_vars; i; i = i->next)
        {
-       union value *v = case_data_rw (output, i->dest);
+       union value *v = case_data_rw (&c, i->dest);
  
         if (agr->missing == COLUMNWISE && i->saw_missing
             && (i->function & FUNC) != N && (i->function & FUNC) != NU
@@ -1076,6 +1009,8 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output)
           }
        }
    }
+
+  casewriter_write (output, &c);
  }
  
  /* Resets the state for all the aggregate functions. */
diff --git a/src/language/stats/autorecode.c b/src/language/stats/autorecode.c

index 2944f91218fc07f315fa7c1a6fb5537fbecb1091..4e5628a0cc5dd2ff0862bd5abab5e94e96ce823b 100644 (file)
--- a/src/language/stats/autorecode.c
+++ b/src/language/stats/autorecode.c
@@ -20,6 +20,7 @@
  #include <stdlib.h>
  
  #include <data/case.h>
+#include <data/casereader.h>
  #include <data/dictionary.h>
  #include <data/procedure.h>
  #include <data/transformations.h>
@@ -103,7 +104,8 @@ int
  cmd_autorecode (struct lexer *lexer, struct dataset *ds)
  {
    struct autorecode_pgm arc;
-  struct ccase *c;
+  struct casereader *input;
+  struct ccase c;
    size_t dst_cnt;
    size_t i;
    bool ok;
@@ -188,16 +190,16 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds)
                                        hash_numeric_value, NULL, NULL);
     }
  
-  proc_open (ds);
-  while (proc_read (ds, &c))
+  input = proc_open (ds);
+  for (; casereader_read (input, &c); case_destroy (&c))
      for (i = 0; i < arc.var_cnt; i++)
        {
          union arc_value v, *vp, **vpp;
  
          if (var_is_numeric (arc.src_vars[i]))
-          v.f = case_num (c, arc.src_vars[i]);
+          v.f = case_num (&c, arc.src_vars[i]);
          else
-          v.c = (char *) case_str (c, arc.src_vars[i]);
+          v.c = (char *) case_str (&c, arc.src_vars[i]);
  
          vpp = (union arc_value **) hsh_probe (arc.src_values[i], &v);
          if (*vpp == NULL)
@@ -211,7 +213,8 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds)
              *vpp = vp;
            }
        }
-  ok = proc_close (ds);
+  ok = casereader_destroy (input);
+  ok = proc_commit (ds) && ok;
  
    for (i = 0; i < arc.var_cnt; i++)
      arc.dst_vars[i] = dict_create_var_assert (dataset_dict (ds),
diff --git a/src/language/stats/binomial.c b/src/language/stats/binomial.c

index 33c47989e74015025c9b5f3415da17f51bf81a01..91910ee5102e5d28dc927a3404b129d9c908c8a9 100644 (file)
--- a/src/language/stats/binomial.c
+++ b/src/language/stats/binomial.c
@@ -22,13 +22,12 @@
  #include <libpspp/alloc.h>
  
  #include <data/case.h>
-#include <data/casefile.h>
+#include <data/casereader.h>
  #include <data/dictionary.h>
  #include <data/procedure.h>
  #include <data/variable.h>
  #include <data/value.h>
  #include <data/value-labels.h>
-#include <data/casefilter.h>
  
  #include <libpspp/message.h>
  #include <libpspp/assertion.h>
@@ -89,50 +88,47 @@ calculate_binomial_internal (double n1, double n2, double p)
    return sig1tailed ;
  }
  
-static void
+static bool
  do_binomial (const struct dictionary *dict,
-            const struct casefile *cf,
+            struct casereader *input,
              const struct binomial_test *bst,
-            struct freq *cat1,
-            struct freq *cat2,
-            const struct casefilter *filter
+            struct freq_mutable *cat1,
+            struct freq_mutable *cat2,
+             enum mv_class exclude
              )
  {
    bool warn = true;
  
    const struct one_sample_test *ost = (const struct one_sample_test *) bst;
    struct ccase c;
-  struct casereader *r = casefile_get_reader (cf, NULL);
  
-  while (casereader_read(r, &c))
+  while (casereader_read(input, &c))
      {
        int v;
-      double w =
-       dict_get_case_weight (dict, &c, &warn);
+      double w = dict_get_case_weight (dict, &c, &warn);
  
        for (v = 0 ; v < ost->n_vars ; ++v )
         {
           const struct variable *var = ost->vars[v];
           const union value *value = case_data (&c, var);
+          int width = var_get_width (var);
  
-         if ( casefilter_variable_missing (filter, &c, var))
+         if (var_is_value_missing (var, value, exclude))
             break;
  
           if ( NULL == cat1[v].value )
             {
-             cat1[v].value = value_dup (value, var_get_width (var));
+             cat1[v].value = value_dup (value, width);
               cat1[v].count = w;
             }
-         else if ( 0 == compare_values (cat1[v].value, value,
-                                        var_get_width (var)))
+         else if ( 0 == compare_values (cat1[v].value, value, width))
             cat1[v].count += w;
           else if ( NULL == cat2[v].value )
             {
-             cat2[v].value = value_dup (value, var_get_width (var));
+             cat2[v].value = value_dup (value, width);
               cat2[v].count = w;
             }
-         else if ( 0 == compare_values (cat2[v].value, value,
-                                        var_get_width (var)))
+         else if ( 0 == compare_values (cat2[v].value, value, width))
             cat2[v].count += w;
           else if ( bst->category1 == SYSMIS)
             msg (ME, _("Variable %s is not dichotomous"), var_get_name (var));
@@ -140,24 +136,23 @@ do_binomial (const struct dictionary *dict,
  
        case_destroy (&c);
      }
-  casereader_destroy (r);
+  return casereader_destroy (input);
  }
  
  
  
  void
  binomial_execute (const struct dataset *ds,
-                 const struct casefile *cf,
-                 struct casefilter *filter,
+                 struct casereader *input,
+                  enum mv_class exclude,
                   const struct npar_test *test)
  {
    int v;
    const struct binomial_test *bst = (const struct binomial_test *) test;
    const struct one_sample_test *ost = (const struct one_sample_test*) test;
  
-  struct freq *cat1 = xzalloc (sizeof (*cat1) * ost->n_vars);
-  struct freq *cat2 = xzalloc (sizeof (*cat1) * ost->n_vars);
-  struct tab_table *table ;
+  struct freq_mutable *cat1 = xzalloc (sizeof (*cat1) * ost->n_vars);
+  struct freq_mutable *cat2 = xzalloc (sizeof (*cat1) * ost->n_vars);
  
    assert ((bst->category1 == SYSMIS) == (bst->category2 == SYSMIS) );
  
@@ -175,95 +170,78 @@ binomial_execute (const struct dataset *ds,
        cat2->value = value_dup (&v, 0);
      }
  
-  do_binomial (dataset_dict(ds), cf, bst, cat1, cat2, filter);
-
-  table = tab_create (7, ost->n_vars * 3 + 1, 0);
-
-  tab_dim (table, tab_natural_dimensions);
-
-  tab_title (table, _("Binomial Test"));
-
-  tab_headers (table, 2, 0, 1, 0);
-
-  tab_box (table, TAL_1, TAL_1, -1, TAL_1,
-          0, 0, table->nc - 1, tab_nr(table) - 1 );
-
-  for (v = 0 ; v < ost->n_vars; ++v)
+  if (do_binomial (dataset_dict(ds), input, bst, cat1, cat2, exclude)) 
      {
-      double n_total, sig;
-      const struct variable *var = ost->vars[v];
-      tab_hline (table, TAL_1, 0, tab_nc (table) -1, 1 + v * 3);
-
-      /* Titles */
-      tab_text (table, 0, 1 + v * 3, TAB_LEFT,
-               var_to_string (var));
-
-      tab_text (table, 1, 1 + v * 3, TAB_LEFT,
-               _("Group1"));
-
-      tab_text (table, 1, 2 + v * 3, TAB_LEFT,
-               _("Group2"));
+      struct tab_table *table = tab_create (7, ost->n_vars * 3 + 1, 0);
  
-      tab_text (table, 1, 3 + v * 3, TAB_LEFT,
-               _("Total"));
+      tab_dim (table, tab_natural_dimensions);
  
-      /* Test Prop */
-      tab_float (table, 5, 1 + v * 3, TAB_NONE, bst->p, 8, 3);
+      tab_title (table, _("Binomial Test"));
  
-      /* Category labels */
-      tab_text (table, 2, 1 + v * 3, TAB_NONE,
-               var_get_value_name (var, cat1[v].value));
+      tab_headers (table, 2, 0, 1, 0);
  
-      tab_text (table, 2, 2 + v * 3, TAB_NONE,
-               var_get_value_name (var, cat2[v].value));
+      tab_box (table, TAL_1, TAL_1, -1, TAL_1,
+               0, 0, table->nc - 1, tab_nr(table) - 1 );
  
-      /* Observed N */
-      tab_float (table, 3, 1 + v * 3, TAB_NONE,
-                cat1[v].count, 8, 0);
+      for (v = 0 ; v < ost->n_vars; ++v)
+        {
+          double n_total, sig;
+          const struct variable *var = ost->vars[v];
+          tab_hline (table, TAL_1, 0, tab_nc (table) -1, 1 + v * 3);
  
-      tab_float (table, 3, 2 + v * 3, TAB_NONE,
-                cat2[v].count, 8, 0);
+          /* Titles */
+          tab_text (table, 0, 1 + v * 3, TAB_LEFT, var_to_string (var));
+          tab_text (table, 1, 1 + v * 3, TAB_LEFT, _("Group1"));
+          tab_text (table, 1, 2 + v * 3, TAB_LEFT, _("Group2"));
+          tab_text (table, 1, 3 + v * 3, TAB_LEFT, _("Total"));
  
-      n_total = cat1[v].count + cat2[v].count;
+          /* Test Prop */
+          tab_float (table, 5, 1 + v * 3, TAB_NONE, bst->p, 8, 3);
  
+          /* Category labels */
+          tab_text (table, 2, 1 + v * 3, TAB_NONE,
+                    var_get_value_name (var, cat1[v].value));
+          tab_text (table, 2, 2 + v * 3, TAB_NONE,
+                    var_get_value_name (var, cat2[v].value));
  
-      tab_float (table, 3, 3 + v * 3, TAB_NONE,
-                n_total, 8, 0);
+          /* Observed N */
+          tab_float (table, 3, 1 + v * 3, TAB_NONE, cat1[v].count, 8, 0);
+          tab_float (table, 3, 2 + v * 3, TAB_NONE, cat2[v].count, 8, 0);
  
-      /* Observed Proportions */
+          n_total = cat1[v].count + cat2[v].count;
+          tab_float (table, 3, 3 + v * 3, TAB_NONE, n_total, 8, 0);
  
-      tab_float (table, 4, 1 + v * 3, TAB_NONE,
-                cat1[v].count / n_total, 8, 3);
+          /* Observed Proportions */
+          tab_float (table, 4, 1 + v * 3, TAB_NONE,
+                     cat1[v].count / n_total, 8, 3);
+          tab_float (table, 4, 2 + v * 3, TAB_NONE,
+                     cat2[v].count / n_total, 8, 3);
+          tab_float (table, 4, 3 + v * 3, TAB_NONE,
+                     (cat1[v].count + cat2[v].count) / n_total, 8, 2);
  
-      tab_float (table, 4, 2 + v * 3, TAB_NONE,
-                cat2[v].count / n_total, 8, 3);
+          /* Significance */
+          sig = calculate_binomial (cat1[v].count, cat2[v].count, bst->p);
+          tab_float (table, 6, 1 + v * 3, TAB_NONE, sig, 8, 3);
+        }
  
-      tab_float (table, 4, 3 + v * 3, TAB_NONE,
-                (cat1[v].count + cat2[v].count) / n_total, 8, 2);
+      tab_text (table,  2, 0,  TAB_CENTER, _("Category"));
+      tab_text (table,  3, 0,  TAB_CENTER, _("N"));
+      tab_text (table,  4, 0,  TAB_CENTER, _("Observed Prop."));
+      tab_text (table,  5, 0,  TAB_CENTER, _("Test Prop."));
  
+      tab_text (table,  6, 0,  TAB_CENTER | TAT_PRINTF,
+                _("Exact Sig. (%d-tailed)"),
+                bst->p == 0.5 ? 2: 1);
  
-      /* Significance */
-      sig = calculate_binomial (cat1[v].count, cat2[v].count,
-                                      bst->p);
-
-      tab_float (table, 6, 1 + v * 3, TAB_NONE,
-                sig, 8, 3);
+      tab_vline (table, TAL_2, 2, 0, tab_nr (table) -1);
+      tab_submit (table);
+    }
+  
+  for (v = 0; v < ost->n_vars; v++) 
+    {
+      free (cat1[v].value);
+      free (cat2[v].value); 
      }
-
-  tab_text (table,  2, 0,  TAB_CENTER, _("Category"));
-  tab_text (table,  3, 0,  TAB_CENTER, _("N"));
-  tab_text (table,  4, 0,  TAB_CENTER, _("Observed Prop."));
-  tab_text (table,  5, 0,  TAB_CENTER, _("Test Prop."));
-
-  tab_text (table,  6, 0,  TAB_CENTER | TAT_PRINTF,
-           _("Exact Sig. (%d-tailed)"),
-           bst->p == 0.5 ? 2: 1);
-
-  tab_vline (table, TAL_2, 2, 0, tab_nr (table) -1);
-
    free (cat1);
-  free (cat2);
-
-  tab_submit (table);
-
+  free (cat2); 
  }
diff --git a/src/language/stats/binomial.h b/src/language/stats/binomial.h

index 10f5d39068c3766327a5745202ab94252e18e2fe..e54a1dedee0a06124c2f05064f655c3919c82a26 100644 (file)
--- a/src/language/stats/binomial.h
+++ b/src/language/stats/binomial.h
@@ -36,13 +36,13 @@ struct binomial_test
  };
  
  
-struct casefile;
+struct casereader;
  struct dataset;
  
  
  void binomial_execute (const struct dataset *, 
-                      const struct casefile *, 
-                      struct casefilter *, 
+                      struct casereader *,
+                       enum mv_class,
                        const struct npar_test *);
  
  #endif
diff --git a/src/language/stats/chisquare.c b/src/language/stats/chisquare.c

index a406edc7433d8ff9ef9f128d68385e32ef590f12..3dceb1c36d8dfbb56d676d4dd7056d779fe08b7c 100644 (file)
--- a/src/language/stats/chisquare.c
+++ b/src/language/stats/chisquare.c
@@ -17,39 +17,33 @@
     02110-1301, USA. */
  
  #include <config.h>
-#include <libpspp/compiler.h>
-#include <libpspp/assertion.h>
+
+#include <language/stats/chisquare.h>
  
  #include <stdlib.h>
+#include <math.h>
  
  #include <data/case.h>
-#include <data/casefile.h>
-#include <data/casefilter.h>
-#include <data/variable.h>
+#include <data/casereader.h>
  #include <data/dictionary.h>
  #include <data/procedure.h>
-
-#include <libpspp/message.h>
-#include <libpspp/hash.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
+#include <language/stats/freq.h>
+#include <language/stats/npar.h>
  #include <libpspp/alloc.h>
-
-#include <gsl/gsl_cdf.h>
-
+#include <libpspp/assertion.h>
+#include <libpspp/compiler.h>
+#include <libpspp/hash.h>
+#include <libpspp/message.h>
+#include <libpspp/taint.h>
  #include <output/table.h>
-#include <data/value-labels.h>
  
-#include "npar.h"
-#include "chisquare.h"
-#include "freq.h"
-
-#include <math.h>
+#include <gsl/gsl_cdf.h>
  
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
  
-
-
-
  /* Return a hash table containing the frequency counts of each 
     value of VAR in CF .
     It is the caller's responsibility to free the hash table when 
@@ -57,8 +51,7 @@
  */
  static struct hsh_table *
  create_freq_hash_with_range (const struct dictionary *dict, 
-                            const struct casefile *cf, 
-                            struct casefilter *filter,
+                            struct casereader *input, 
                              const struct variable *var, 
                              double lo, 
                              double hi)
@@ -66,7 +59,6 @@ create_freq_hash_with_range (const struct dictionary *dict,
    bool warn = true;
    float i_d;
    struct ccase c;
-  struct casereader *r = casefile_get_reader (cf, filter);
  
    struct hsh_table *freq_hash = 
      hsh_create (4, compare_freq, hash_freq, 
@@ -87,19 +79,13 @@ create_freq_hash_with_range (const struct dictionary *dict,
        hsh_insert (freq_hash, fr);
      }
  
-  while (casereader_read(r, &c))
+  while (casereader_read (input, &c))
      {
        union value obs_value;
        struct freq **existing_fr;
        struct freq *fr = xmalloc(sizeof  (*fr));
        fr->value = case_data (&c, var);
  
-      if ( casefilter_variable_missing (filter, &c, var))
-       {
-         free (fr);
-         continue;
-       }
-
        fr->count = dict_get_case_weight (dict, &c, &warn);
  
        obs_value.f = trunc (fr->value->f);
@@ -124,43 +110,39 @@ create_freq_hash_with_range (const struct dictionary *dict,
  
        case_destroy (&c);
      }
-  casereader_destroy (r);
-
-  return freq_hash;
+  if (casereader_destroy (input))
+    return freq_hash;
+  else 
+    {
+      hsh_destroy (freq_hash);
+      return NULL;
+    }
  }
  
  
  /* Return a hash table containing the frequency counts of each 
-   value of VAR in CF .
+   value of VAR in INPUT .
     It is the caller's responsibility to free the hash table when 
     no longer required.
  */
  static struct hsh_table *
  create_freq_hash (const struct dictionary *dict, 
-                 const struct casefile *cf, 
-                 struct casefilter *filter, 
+                 struct casereader *input, 
                   const struct variable *var)
  {
    bool warn = true;
    struct ccase c;
-  struct casereader *r = casefile_get_reader (cf, filter);
  
    struct hsh_table *freq_hash = 
      hsh_create (4, compare_freq, hash_freq, 
                 free_freq_mutable_hash,
                 (void *) var);
  
-  while (casereader_read(r, &c))
+  for (; casereader_read (input, &c); case_destroy (&c))
      {
        struct freq **existing_fr;
        struct freq *fr = xmalloc(sizeof  (*fr));
-      fr->value = case_data (&c, var );
-
-      if ( casefilter_variable_missing (filter, &c, var))
-       {
-         free (fr);
-         continue;
-       }
+      fr->value = case_data (&c, var);
  
        fr->count = dict_get_case_weight (dict, &c, &warn);
  
@@ -175,20 +157,21 @@ create_freq_hash (const struct dictionary *dict,
            *existing_fr = fr;
            fr->value = value_dup (fr->value, var_get_width (var));
         }
-
-      case_destroy (&c);
      }
-  casereader_destroy (r);
-
-  return freq_hash;
+  if (casereader_destroy (input))
+    return freq_hash;
+  else
+    {
+      hsh_destroy (freq_hash);
+      return NULL;
+    }
  }
  
  
  
  static struct tab_table *
  create_variable_frequency_table (const struct dictionary *dict, 
-                                const struct casefile *cf, 
-                                struct casefilter *filter,
+                                struct casereader *input, 
                                  const struct chisquare_test *test, 
                                  int v, 
                                  struct hsh_table **freq_hash)
@@ -200,7 +183,9 @@ create_variable_frequency_table (const struct dictionary *dict,
    struct tab_table *table ;
    const struct variable *var =  ost->vars[v];
  
-  *freq_hash = create_freq_hash (dict, cf, filter, var);
+  *freq_hash = create_freq_hash (dict, input, var);
+  if (*freq_hash == NULL)
+    return NULL;
        
    n_cells = hsh_count (*freq_hash);
  
@@ -305,7 +290,8 @@ create_stats_table (const struct chisquare_test *test)
  {
    const struct one_sample_test *ost = (const struct one_sample_test*) test;
    
-  struct tab_table *table = tab_create (1 + ost->n_vars, 4, 0);
+  struct tab_table *table;
+  table = tab_create (1 + ost->n_vars, 4, 0);
    tab_dim (table, tab_natural_dimensions);
    tab_title (table, _("Test Statistics"));
    tab_headers (table, 1, 0, 1, 0);
@@ -331,20 +317,20 @@ create_stats_table (const struct chisquare_test *test)
  
  void 
  chisquare_execute (const struct dataset *ds,
-                  const struct casefile *cf, 
-                  struct casefilter *filter,
+                  struct casereader *input,
+                   enum mv_class exclude,
                    const struct npar_test *test)
  {
    const struct dictionary *dict = dataset_dict (ds);
    int v, i;
    struct one_sample_test *ost = (struct one_sample_test *) test;
    struct chisquare_test *cst = (struct chisquare_test *) test;
-  struct tab_table *stats_table = create_stats_table (cst);
    int n_cells = 0;
    double total_expected = 0.0;
  
    double *df = xzalloc (sizeof (*df) * ost->n_vars);
    double *xsq = xzalloc (sizeof (*df) * ost->n_vars);
+  bool ok;
    
    for ( i = 0 ; i < cst->n_expected ; ++i ) 
      total_expected += cst->expected[i];
@@ -355,17 +341,17 @@ chisquare_execute (const struct dataset *ds,
         {
           double total_obs = 0.0;
           struct hsh_table *freq_hash = NULL;
+          struct casereader *reader =
+            casereader_create_filter_missing (casereader_clone (input),
+                                              &ost->vars[v], 1, exclude, NULL);
           struct tab_table *freq_table = 
-           create_variable_frequency_table(dict, cf, filter, cst, 
-                                           v, &freq_hash);
+            create_variable_frequency_table(dict, reader, cst, v, &freq_hash);
  
-         struct freq **ff = (struct freq **) hsh_sort (freq_hash);
+         struct freq **ff;
  
           if ( NULL == freq_table ) 
-           {
-             hsh_destroy (freq_hash);
-             continue;
-           }
+            continue;
+          ff = (struct freq **) hsh_sort (freq_hash);
  
           n_cells = hsh_count (freq_hash);
  
@@ -420,12 +406,19 @@ chisquare_execute (const struct dataset *ds,
        for ( v = 0 ; v < ost->n_vars ; ++v ) 
         {
           double total_obs = 0.0;
+          struct casereader *reader =
+            casereader_create_filter_missing (casereader_clone (input),
+                                              &ost->vars[v], 1, exclude, NULL);
           struct hsh_table *freq_hash = 
-           create_freq_hash_with_range (dict, cf, filter, ost->vars[v], 
-                                        cst->lo, cst->hi);
+           create_freq_hash_with_range (dict, reader,
+                                         ost->vars[v], cst->lo, cst->hi);
+
+         struct freq **ff;
  
-         struct freq **ff = (struct freq **) hsh_sort (freq_hash);
+          if (freq_hash == NULL)
+            continue;
  
+          ff = (struct freq **) hsh_sort (freq_hash);
           assert ( n_cells == hsh_count (freq_hash));
  
           for ( i = 0 ; i < hsh_count (freq_hash) ; ++i ) 
@@ -473,25 +466,30 @@ chisquare_execute (const struct dataset *ds,
  
        tab_submit (freq_table);
      }
+  ok = !taint_has_tainted_successor (casereader_get_taint (input));
+  casereader_destroy (input);
  
-
-  /* Populate the summary statistics table */
-  for ( v = 0 ; v < ost->n_vars ; ++v ) 
+  if (ok) 
      {
-      const struct variable *var = ost->vars[v];
+      struct tab_table *stats_table = create_stats_table (cst);
+      
+      /* Populate the summary statistics table */
+      for ( v = 0 ; v < ost->n_vars ; ++v ) 
+        {
+          const struct variable *var = ost->vars[v];
  
-      tab_text (stats_table, 1 + v, 0, TAB_CENTER, var_get_name (var));
+          tab_text (stats_table, 1 + v, 0, TAB_CENTER, var_get_name (var));
  
-      tab_float (stats_table, 1 + v, 1, TAB_NONE, xsq[v], 8,3);
-      tab_float (stats_table, 1 + v, 2, TAB_NONE, df[v], 8,0);
+          tab_float (stats_table, 1 + v, 1, TAB_NONE, xsq[v], 8,3);
+          tab_float (stats_table, 1 + v, 2, TAB_NONE, df[v], 8,0);
  
-      tab_float (stats_table, 1 + v, 3, TAB_NONE, 
-                gsl_cdf_chisq_Q (xsq[v], df[v]), 8,3);
+          tab_float (stats_table, 1 + v, 3, TAB_NONE, 
+                     gsl_cdf_chisq_Q (xsq[v], df[v]), 8,3);
+        }
+      tab_submit (stats_table);
      }
-
+  
    free (xsq);
    free (df);
-
-  tab_submit (stats_table);
  }
  
diff --git a/src/language/stats/chisquare.h b/src/language/stats/chisquare.h

index 71a3c1745c88f3caee111a406315c1aa0bf49ce8..d9d34abdce38e1030cd143246554fb1c3e4a34a3 100644 (file)
--- a/src/language/stats/chisquare.h
+++ b/src/language/stats/chisquare.h
@@ -19,11 +19,10 @@
  #if !chisquare_h
  #define chisquare_h 1
  
-#include <config.h>
  #include <stddef.h>
  #include <stdbool.h>
+#include <language/stats/npar.h>
  
-#include "npar.h"
  struct chisquare_test
  {
    struct one_sample_test parent;  
@@ -37,17 +36,18 @@ struct chisquare_test
    int n_expected;
  };
  
-struct casefile;
-struct dictionary ;
+struct casereader;
+struct dictionary;
  struct hsh_table;
+struct dataset;
  
  void chisquare_insert_variables (const struct npar_test *test,
                                  struct hsh_table *variables);
  
  
  void chisquare_execute (const struct dataset *ds, 
-                       const struct casefile *cf, 
-                       struct casefilter *filter,
+                       struct casereader *input,
+                        enum mv_class exclude,
                         const struct npar_test *test);
  
  
diff --git a/src/language/stats/crosstabs.q b/src/language/stats/crosstabs.q

index e54fa2d7a26d441bfd60dbde29e8145de469ff27..1d2bdf74b4f0f6e7508b067dcf70e8e36100a5b5 100644 (file)
--- a/src/language/stats/crosstabs.q
+++ b/src/language/stats/crosstabs.q
@@ -36,6 +36,8 @@
  #include <stdio.h>
  
  #include <data/case.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
  #include <data/data-out.h>
  #include <data/dictionary.h>
  #include <data/format.h>
@@ -177,10 +179,10 @@ static struct pool *pl_tc;        /* For table cells. */
  static struct pool *pl_col;    /* For column data. */
  
  static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
-static void precalc (const struct ccase *, void *, const struct dataset *);
-static bool calc_general (const struct ccase *, void *, const struct dataset *);
-static bool calc_integer (const struct ccase *, void *, const struct dataset *);
-static bool postcalc (void *, const struct dataset *);
+static void precalc (struct casereader *, const struct dataset *);
+static void calc_general (struct ccase *, const struct dataset *);
+static void calc_integer (struct ccase *, const struct dataset *);
+static void postcalc (void);
  static void submit (struct tab_table *);
  
  static void format_short (char *s, const struct fmt_spec *fp,
@@ -203,8 +205,10 @@ cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
  static int
  internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
  {
-  int i;
+  struct casegrouper *grouper;
+  struct casereader *input, *group;
    bool ok;
+  int i;
  
    variables = NULL;
    variables_cnt = 0;
@@ -294,9 +298,28 @@ internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
    else
      write_style = CRS_WR_NONE;
  
-  ok = procedure_with_splits (ds, precalc,
-                              mode == GENERAL ? calc_general : calc_integer,
-                              postcalc, NULL);
+  input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
+                                           NULL, NULL);
+  grouper = casegrouper_create_splits (input, dataset_dict (ds));
+  while (casegrouper_get_next_group (grouper, &group)) 
+    {
+      struct ccase c;
+      
+      precalc (group, ds);
+      
+      for (; casereader_read (group, &c); case_destroy (&c)) 
+        {
+          if (mode == GENERAL)
+            calc_general (&c, ds);
+          else
+            calc_integer (&c, ds); 
+        }
+      casereader_destroy (group);
+
+      postcalc ();
+    }
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
  
    return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
  }
@@ -490,10 +513,16 @@ static int compare_table_entry (const void *, const void *, const void *);
  static unsigned hash_table_entry (const void *, const void *);
  
  /* Set up the crosstabulation tables for processing. */
-static  void
-precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
+static void
+precalc (struct casereader *input, const struct dataset *ds)
  {
-  output_split_file_values (ds, first);
+  struct ccase c;
+
+  if (!casereader_peek (input, 0, &c))
+    return;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
+
    if (mode == GENERAL)
      {
        gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
@@ -565,18 +594,16 @@ precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
  }
  
  /* Form crosstabulations for general mode. */
-static bool
-calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
+static void
+calc_general (struct ccase *c, const struct dataset *ds)
  {
-  bool bad_warn = true;
-
    /* Missing values to exclude. */
    enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
                             : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
                             : MV_NEVER);
  
    /* Case weight. */
-  double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
+  double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
  
    /* Flattened current table index. */
    int t;
@@ -637,12 +664,10 @@ calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
      next_crosstab:
        local_free (te);
      }
-  
-  return true;
  }
  
-static bool
-calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
+static void
+calc_integer (struct ccase *c, const struct dataset *ds)
  {
    bool bad_warn = true;
  
@@ -695,8 +720,6 @@ calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
        
      next_crosstab: ;
      }
-  
-  return true;
  }
  
  /* Compare the table_entry's at A and B and return a strcmp()-type
@@ -764,8 +787,8 @@ static void output_pivot_table (struct table_entry **, struct table_entry **,
                                 int *, int *, int *);
  static void make_summary_table (void);
  
-static bool
-postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
+static void
+postcalc (void)
  {
    if (mode == GENERAL)
      {
@@ -801,8 +824,6 @@ postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
    }
    
    hsh_destroy (gen_tab);
-
-  return true;
  }
  
  static void insert_summary (struct tab_table *, int tab_index, double valid);
diff --git a/src/language/stats/descriptives.c b/src/language/stats/descriptives.c

index 8b06aa521208967a1f70a313515b300e77e01169..3eb638bd79316e099752a23bd89a715ecef1850c 100644 (file)
--- a/src/language/stats/descriptives.c
+++ b/src/language/stats/descriptives.c
@@ -16,16 +16,14 @@
     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     02110-1301, USA. */
  
-/* FIXME: Many possible optimizations. */
-
  #include <config.h>
  
  #include <limits.h>
  #include <math.h>
  #include <stdlib.h>
  
-#include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
  #include <data/dictionary.h>
  #include <data/procedure.h>
  #include <data/transformations.h>
@@ -180,9 +178,8 @@ static void dump_z_table (struct dsc_proc *);
  static void setup_z_trns (struct dsc_proc *, struct dataset *);
  
  /* Procedure execution functions. */
-static bool calc_descriptives (const struct ccase *first,
-                               const struct casefile *, void *dsc_, 
-                              const struct dataset *);
+static void calc_descriptives (struct dsc_proc *, struct casereader *,
+                               struct dataset *);
  static void display (struct dsc_proc *dsc);
  \f
  /* Parser and outline. */
@@ -200,6 +197,9 @@ cmd_descriptives (struct lexer *lexer, struct dataset *ds)
    size_t i;
    bool ok;
  
+  struct casegrouper *grouper;
+  struct casereader *group;
+
    /* Create and initialize dsc. */
    dsc = xmalloc (sizeof *dsc);
    dsc->vars = NULL;
@@ -316,8 +316,7 @@ cmd_descriptives (struct lexer *lexer, struct dataset *ds)
              {
                int i;
                
-              if (!parse_variables_const (lexer, dataset_dict (ds), 
-                                         &vars, &var_cnt,
+              if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
                                      PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
                 goto error;
  
@@ -413,8 +412,12 @@ cmd_descriptives (struct lexer *lexer, struct dataset *ds)
      for (i = 0; i < dsc->var_cnt; i++)
        dsc->vars[i].moments = moments_create (dsc->max_moment);
  
-  /* Data pass. */
-  ok = multipass_procedure_with_splits (ds, calc_descriptives, dsc);
+  /* Data pass.  FIXME: error handling. */
+  grouper = casegrouper_create_splits (proc_open (ds), dict);
+  while (casegrouper_get_next_group (grouper, &group)) 
+    calc_descriptives (dsc, group, ds);
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
  
    /* Z-scoring! */
    if (ok && z_cnt)
@@ -689,17 +692,25 @@ static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
  
  /* Calculates and displays descriptive statistics for the cases
     in CF. */
-static bool
-calc_descriptives (const struct ccase *first,
-                   const struct casefile *cf, void *dsc_, 
-                  const struct dataset *ds) 
+static void
+calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
+                   struct dataset *ds) 
  {
-  struct dsc_proc *dsc = dsc_;
-  struct casereader *reader;
+  struct casereader *pass1, *pass2;
    struct ccase c;
    size_t i;
  
-  output_split_file_values (ds, first);
+  if (!casereader_peek (group, 0, &c))
+    return;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
+
+  group = casereader_create_filter_weight (group, dataset_dict (ds),
+                                           NULL, NULL);
+
+  casereader_split (group, &pass1, &pass2);
+  if (dsc->max_moment <= MOMENT_MEAN)
+    casereader_destroy (pass2);
  
    for (i = 0; i < dsc->var_cnt; i++)
      {
@@ -715,13 +726,9 @@ calc_descriptives (const struct ccase *first,
    dsc->valid = 0.;
  
    /* First pass to handle most of the work. */
-  for (reader = casefile_get_reader (cf, NULL);
-       casereader_read (reader, &c);
-       case_destroy (&c))
+  for (; casereader_read (pass1, &c); case_destroy (&c))
      {
-      double weight = dict_get_case_weight (dataset_dict (ds), &c, &dsc->bad_warn);
-      if (weight <= 0.0) 
-        continue;
+      double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
         
        /* Check for missing values. */
        if (listwise_missing (dsc, &c)) 
@@ -737,8 +744,7 @@ calc_descriptives (const struct ccase *first,
            struct dsc_var *dv = &dsc->vars[i];
            double x = case_num (&c, dv->v);
            
-          if (dsc->missing_type != DSC_LISTWISE
-              && var_is_num_missing (dv->v, x, dsc->exclude))
+          if (var_is_num_missing (dv->v, x, dsc->exclude))
              {
                dv->missing += weight;
                continue;
@@ -753,19 +759,15 @@ calc_descriptives (const struct ccase *first,
              dv->max = x;
          }
      }
-  casereader_destroy (reader);
+  if (!casereader_destroy (pass1))
+    return;
  
    /* Second pass for higher-order moments. */
    if (dsc->max_moment > MOMENT_MEAN) 
      {
-      for (reader = casefile_get_reader (cf, NULL);
-           casereader_read (reader, &c);
-           case_destroy (&c))
+      for (; casereader_read (pass2, &c); case_destroy (&c))
          {
-          double weight = dict_get_case_weight (dataset_dict (ds), &c, 
-                                               &dsc->bad_warn);
-          if (weight <= 0.0)
-            continue;
+          double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
        
            /* Check for missing values. */
            if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, &c))
@@ -776,17 +778,17 @@ calc_descriptives (const struct ccase *first,
                struct dsc_var *dv = &dsc->vars[i];
                double x = case_num (&c, dv->v);
            
-              if (dsc->missing_type != DSC_LISTWISE
-                  && var_is_num_missing (dv->v, x, dsc->exclude))
+              if (var_is_num_missing (dv->v, x, dsc->exclude))
                  continue;
  
                if (dv->moments != NULL)
                  moments_pass_two (dv->moments, x, weight);
              }
          }
-      casereader_destroy (reader);
+      if (!casereader_destroy (pass2))
+        return;
      }
-  
+
    /* Calculate results. */
    for (i = 0; i < dsc->var_cnt; i++)
      {
@@ -825,8 +827,6 @@ calc_descriptives (const struct ccase *first,
  
    /* Output results. */
    display (dsc);
-
-  return true;
  }
  
  /* Returns true if any of the descriptives variables in DSC's
diff --git a/src/language/stats/examine.q b/src/language/stats/examine.q

index bb6f9eec5da0f5f9ba24d06e57d9d1cbe4ae7556..e15d294975eb2ba26960db79b225d3ff8939635e 100644 (file)
--- a/src/language/stats/examine.q
+++ b/src/language/stats/examine.q
@@ -26,7 +26,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  #include <stdlib.h>
  
  #include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
  #include <data/dictionary.h>
  #include <data/procedure.h>
  #include <data/value-labels.h>
@@ -152,8 +153,8 @@ void box_plot_variables (const struct factor *fctr,
  
  
  /* Per Split function */
-static bool run_examine (const struct ccase *,
-                        const struct casefile *cf, void *cmd_, const struct dataset *);
+static void run_examine (struct cmd_examine *, struct casereader *,
+                         struct dataset *);
  
  static void output_examine (void);
  
@@ -193,6 +194,8 @@ static short sbc_percentile;
  int
  cmd_examine (struct lexer *lexer, struct dataset *ds)
  {
+  struct casegrouper *grouper;
+  struct casereader *group;
    bool ok;
  
    subc_list_double_create (&percentile_list);
@@ -222,7 +225,11 @@ cmd_examine (struct lexer *lexer, struct dataset *ds)
        subc_list_double_push (&percentile_list, 75);
      }
  
-  ok = multipass_procedure_with_splits (ds, run_examine, &cmd);
+  grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+  while (casegrouper_get_next_group (grouper, &group)) 
+    run_examine (&cmd, group, ds);
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
  
    if ( totals )
      {
@@ -627,9 +634,6 @@ void populate_summary (struct tab_table *t, int col, int row,
  
  
  
-static bool bad_weight_warn = true;
-
-
  /* Perform calculations for the sub factors */
  void
  factor_calc (const struct ccase *c, int case_no, double weight,
@@ -706,23 +710,28 @@ factor_calc (const struct ccase *c, int case_no, double weight,
      }
  }
  
-static bool
-run_examine (const struct ccase *first, const struct casefile *cf,
-           void *cmd_, const struct dataset *ds)
+static void
+run_examine (struct cmd_examine *cmd, struct casereader *input,
+             struct dataset *ds)
  {
    struct dictionary *dict = dataset_dict (ds);
-  struct casereader *r;
+  casenumber case_no;
    struct ccase c;
    int v;
-
-  const struct cmd_examine *cmd = (struct cmd_examine *) cmd_;
+  bool ok;
  
    struct factor *fctr;
  
-  output_split_file_values (ds, first);
+  if (!casereader_peek (input, 0, &c))
+    return;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
+
+  input = casereader_create_filter_weight (input, dict, NULL, NULL);
+  input = casereader_create_counter (input, &case_no, 0);
  
    /* Make sure we haven't got rubbish left over from a
-     previous split */
+     previous split. */
    fctr = factors;
    while (fctr)
      {
@@ -738,15 +747,10 @@ run_examine (const struct ccase *first, const struct casefile *cf,
    for ( v = 0 ; v < n_dependent_vars ; ++v )
      metrics_precalc (&totals[v]);
  
-  for (r = casefile_get_reader (cf, NULL);
-      casereader_read (r, &c) ;
-      case_destroy (&c) )
+  for (; casereader_read (input, &c); case_destroy (&c))
      {
-      int case_missing=0;
-      const int case_no = casereader_cnum (r);
-
-      const double weight =
-       dict_get_case_weight (dict, &c, &bad_weight_warn);
+      int case_missing = 0;
+      const double weight = dict_get_case_weight (dict, &c, NULL);
  
        if ( cmd->miss == XMN_LISTWISE )
         {
@@ -787,6 +791,7 @@ run_examine (const struct ccase *first, const struct casefile *cf,
  
        factor_calc (&c, case_no, weight, case_missing);
      }
+  ok = casereader_destroy (input);
  
    for ( v = 0 ; v < n_dependent_vars ; ++v)
      {
@@ -882,7 +887,8 @@ run_examine (const struct ccase *first, const struct casefile *cf,
        fctr = fctr->next;
      }
  
-  output_examine ();
+  if (ok)
+    output_examine ();
  
  
    if ( totals )
@@ -893,8 +899,6 @@ run_examine (const struct ccase *first, const struct casefile *cf,
           metrics_destroy (&totals[i]);
         }
      }
-
-  return true;
  }
  
  
diff --git a/src/language/stats/flip.c b/src/language/stats/flip.c

index 5c6e77404879e1b1b340f87d60c8ad1ef8fc6fe9..0bbe637eca43199932a12a476cdd5465717b0283 100644 (file)
--- a/src/language/stats/flip.c
+++ b/src/language/stats/flip.c
@@ -27,9 +27,9 @@
  #include <sys/types.h>
  #endif
  
-#include <data/case-sink.h>
-#include <data/case-source.h>
  #include <data/case.h>
+#include <data/casereader.h>
+#include <data/casereader-provider.h>
  #include <data/dictionary.h>
  #include <data/procedure.h>
  #include <data/settings.h>
@@ -42,7 +42,6 @@
  #include <libpspp/array.h>
  #include <libpspp/assertion.h>
  #include <libpspp/message.h>
-#include <libpspp/message.h>
  #include <libpspp/misc.h>
  #include <libpspp/pool.h>
  #include <libpspp/str.h>
@@ -70,8 +69,6 @@ struct flip_pgm
      int case_cnt;               /* Pre-flip case count. */
      size_t case_size;           /* Post-flip bytes per case. */
  
-    union value *output_buf;            /* Case output buffer. */
-
      struct variable *new_names; /* Variable containing new variable names. */
      struct varname *new_names_head; /* First new variable. */
      struct varname *new_names_tail; /* Last new variable. */
@@ -82,22 +79,23 @@ struct flip_pgm
      bool error;                 /* Error reading temporary file? */
    };
  
+static const struct casereader_class flip_casereader_class;
+
  static void destroy_flip_pgm (struct flip_pgm *);
-static struct case_sink *flip_sink_create (struct dataset *ds, struct flip_pgm *);
-static struct case_source *flip_source_create (struct flip_pgm *);
  static bool flip_file (struct flip_pgm *);
-static int build_dictionary (struct dictionary *, struct flip_pgm *);
-
-static const struct case_source_class flip_source_class;
-static const struct case_sink_class flip_sink_class;
+static bool build_dictionary (struct dictionary *, struct flip_pgm *);
+static bool write_flip_case (struct flip_pgm *, const struct ccase *);
  
  /* Parses and executes FLIP. */
  int
  cmd_flip (struct lexer *lexer, struct dataset *ds)
  {
-  struct flip_pgm *flip;
-  struct case_sink *sink;
    struct dictionary *dict = dataset_dict (ds);
+  struct flip_pgm *flip;
+  struct casereader *input, *reader;
+  union value *output_buf;
+  struct ccase c;
+  size_t i;
    bool ok;
  
    if (proc_make_temporary_transformations_permanent (ds))
@@ -144,8 +142,6 @@ cmd_flip (struct lexer *lexer, struct dataset *ds)
  
    if (flip->new_names)
      {
-      size_t i;
-      
        for (i = 0; i < flip->var_cnt; i++)
         if (flip->var[i] == flip->new_names)
           {
@@ -155,20 +151,46 @@ cmd_flip (struct lexer *lexer, struct dataset *ds)
           }
      }
  
+  output_buf = pool_nalloc (flip->pool,
+                                  flip->var_cnt, sizeof *output_buf);
+
+  flip->file = pool_tmpfile (flip->pool);
+  if (flip->file == NULL)
+    {
+      msg (SE, _("Could not create temporary file for FLIP."));
+      goto error;
+    }
+
+  /* Write variable names as first case. */
+  for (i = 0; i < flip->var_cnt; i++) 
+    buf_copy_str_rpad (output_buf[i].s, MAX_SHORT_STRING,
+                       var_get_name (flip->var[i]));
+  if (fwrite (output_buf, sizeof *output_buf,
+              flip->var_cnt, flip->file) != (size_t) flip->var_cnt) 
+    {
+      msg (SE, _("Error writing FLIP file: %s."), strerror (errno));
+      goto error;
+    }
+
+  flip->case_cnt = 1;
+
    /* Read the active file into a flip_sink. */
-  flip->case_cnt = 0;
    proc_make_temporary_transformations_permanent (ds);
-  sink = flip_sink_create (ds, flip);
-  if (sink == NULL)
-    goto error;
-  proc_set_sink (ds, sink);
-  flip->new_names_tail = NULL;
-  ok = procedure (ds,NULL, NULL);
+  proc_discard_output (ds);
+
+  input = proc_open (ds);
+  while (casereader_read (input, &c)) 
+    {
+      write_flip_case (flip, &c);
+      case_destroy (&c);
+    }
+  ok = casereader_destroy (input);
+  ok = proc_commit (ds) && ok;
  
    /* Flip the data we read. */
-  if (!flip_file (flip)) 
+  if (!ok || !flip_file (flip)) 
      {
-      discard_variables (ds);
+      proc_discard_active_file (ds);
        goto error;
      }
  
@@ -176,15 +198,17 @@ cmd_flip (struct lexer *lexer, struct dataset *ds)
    dict_clear (dict);
    if (!build_dictionary (dict, flip))
      {
-      discard_variables (ds);
+      proc_discard_active_file (ds);
        goto error;
      }
    flip->case_size = dict_get_case_size (dict);
  
    /* Set up flipped data for reading. */
-  proc_set_source (ds, flip_source_create (flip));
-
-  return ok ? lex_end_of_command (lexer) : CMD_CASCADING_FAILURE;
+  reader = casereader_create_sequential (NULL, dict_get_next_value_idx (dict),
+                                         flip->case_cnt,
+                                         &flip_casereader_class, flip);
+  proc_set_active_file_data (ds, reader);
+  return lex_end_of_command (lexer);
  
   error:
    destroy_flip_pgm (flip);
@@ -251,7 +275,7 @@ make_new_var (struct dictionary *dict, char name[])
  }
  
  /* Make a new dictionary for all the new variable names. */
-static int
+static bool
  build_dictionary (struct dictionary *dict, struct flip_pgm *flip)
  {
    dict_create_var_assert (dict, "CASE_LBL", 8);
@@ -263,7 +287,7 @@ build_dictionary (struct dictionary *dict, struct flip_pgm *flip)
        if (flip->case_cnt > 99999)
         {
           msg (SE, _("Cannot create more than 99999 variable names."));
-         return 0;
+         return false;
         }
        
        for (i = 0; i < flip->case_cnt; i++)
@@ -281,54 +305,17 @@ build_dictionary (struct dictionary *dict, struct flip_pgm *flip)
  
        for (v = flip->new_names_head; v; v = v->next)
          if (!make_new_var (dict, v->name))
-          return 0;
+          return false;
      }
    
-  return 1;
+  return true;
  }
       
-/* Creates a flip sink based on FLIP. */
-static struct case_sink *
-flip_sink_create (struct dataset *ds, struct flip_pgm *flip) 
-{
-  size_t i;
-
-  flip->output_buf = pool_nalloc (flip->pool,
-                                  flip->var_cnt, sizeof *flip->output_buf);
-
-  flip->file = pool_tmpfile (flip->pool);
-  if (flip->file == NULL)
-    {
-      msg (SE, _("Could not create temporary file for FLIP: %s."),
-           strerror (errno));
-      return NULL;
-    }
-
-  /* Write variable names as first case. */
-  for (i = 0; i < flip->var_cnt; i++) 
-    buf_copy_str_rpad (flip->output_buf[i].s, MAX_SHORT_STRING,
-                       var_get_name (flip->var[i]));
-  if (fwrite (flip->output_buf, sizeof *flip->output_buf,
-              flip->var_cnt, flip->file) != (size_t) flip->var_cnt) 
-    {
-      msg (SE, _("Error writing FLIP file: %s."), strerror (errno));
-      return NULL;
-    }
-
-  flip->case_cnt = 1;
-
-  return create_case_sink (&flip_sink_class,
-                          dataset_dict (ds),
-                          dataset_get_casefile_factory (ds),
-                          flip);
-}
-
  /* Writes case C to the FLIP sink.
     Returns true if successful, false if an I/O error occurred. */
  static bool
-flip_sink_write (struct case_sink *sink, const struct ccase *c)
+write_flip_case (struct flip_pgm *flip, const struct ccase *c)
  {
-  struct flip_pgm *flip = sink->aux;
    size_t i;
    
    flip->case_cnt++;
@@ -377,14 +364,7 @@ flip_sink_write (struct case_sink *sink, const struct ccase *c)
          }
        else
          out = SYSMIS;
-      flip->output_buf[i].f = out;
-    }
-         
-  if (fwrite (flip->output_buf, sizeof *flip->output_buf,
-              flip->var_cnt, flip->file) != (size_t) flip->var_cnt) 
-    {
-      msg (SE, _("Error writing FLIP file: %s."), strerror (errno));
-      return false; 
+      fwrite (&out, sizeof out, 1, flip->file);
      }
    return true;
  }
@@ -511,57 +491,39 @@ flip_file (struct flip_pgm *flip)
    return true;
  }
  
-/* FLIP sink class. */
-static const struct case_sink_class flip_sink_class = 
-  {
-    "FLIP",
-    NULL,
-    flip_sink_write,
-    NULL,
-    NULL,
-  };
-
-/* Creates and returns a FLIP source based on PGM,
-   which should have already been used as a sink. */
-static struct case_source *
-flip_source_create (struct flip_pgm *pgm)
-{
-  return create_case_source (&flip_source_class, pgm);
-}
-
  /* Reads one case into C.
     Returns true if successful, false at end of file or if an
     I/O error occurred. */
  static bool
-flip_source_read (struct case_source *source, struct ccase *c)
+flip_casereader_read (struct casereader *reader UNUSED, void *flip_,
+                      struct ccase *c)
  {
-  struct flip_pgm *flip = source->aux;
+  struct flip_pgm *flip = flip_;
    size_t i;
  
    if (flip->error || flip->cases_read >= flip->var_cnt)
      return false;
-  
-  if (flip->input_buf == NULL)
-    flip->input_buf = pool_nmalloc (flip->pool,
-                                    flip->case_cnt, sizeof *flip->input_buf);
  
-  if (fread (flip->input_buf, sizeof *flip->input_buf, flip->case_cnt,
-             flip->file) != flip->case_cnt) 
+  case_create (c, flip->case_cnt);
+  for (i = 0; i < flip->case_cnt; i++) 
      {
-      if (ferror (flip->file))
-        msg (SE, _("Error reading FLIP temporary file: %s."),
-             strerror (errno));
-      else if (feof (flip->file))
-        msg (SE, _("Unexpected end of file reading FLIP temporary file."));
-      else
-        NOT_REACHED ();
-      flip->error = true;
-      return false;
+      double in;
+      if (fread (&in, sizeof in, 1, flip->file) != 1)
+        {
+          case_destroy (c);
+          if (ferror (flip->file))
+            msg (SE, _("Error reading FLIP temporary file: %s."),
+                 strerror (errno));
+          else if (feof (flip->file))
+            msg (SE, _("Unexpected end of file reading FLIP temporary file."));
+          else
+            NOT_REACHED ();
+          flip->error = true;
+          return false;
+        }
+      case_data_rw_idx (c, i)->f = in;
      }
-
-  for (i = 0; i < flip->case_cnt; i++)
-    case_data_rw_idx (c, i)->f = flip->input_buf[i].f;
-
+  
    flip->cases_read++;
  
    return true;
@@ -570,19 +532,19 @@ flip_source_read (struct case_source *source, struct ccase *c)
  /* Destroys the source.
     Returns true if successful read, false if an I/O occurred
     during destruction or previously. */
-static bool
-flip_source_destroy (struct case_source *source)
+static void
+flip_casereader_destroy (struct casereader *reader UNUSED, void *flip_)
  {
-  struct flip_pgm *flip = source->aux;
-  bool ok = !flip->error;
+  struct flip_pgm *flip = flip_;
+  if (flip->error)
+    casereader_force_error (reader);
    destroy_flip_pgm (flip);
-  return ok;
  }
  
-static const struct case_source_class flip_source_class = 
+static const struct casereader_class flip_casereader_class = 
    {
-    "FLIP",
+    flip_casereader_read,
+    flip_casereader_destroy,
+    NULL,
      NULL,
-    flip_source_read,
-    flip_source_destroy
    };
diff --git a/src/language/stats/frequencies.q b/src/language/stats/frequencies.q

index 29e85babd3f60a418167636249fb1732c988f599..5f0e96e9e1eeb3e7c6e24a6b8fba8bd211138576 100644 (file)
--- a/src/language/stats/frequencies.q
+++ b/src/language/stats/frequencies.q
@@ -29,6 +29,8 @@
  #include <gsl/gsl_histogram.h>
  
  #include <data/case.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
  #include <data/dictionary.h>
  #include <data/format.h>
  #include <data/procedure.h>
@@ -45,7 +47,6 @@
  #include <libpspp/hash.h>
  #include <libpspp/magic.h>
  #include <libpspp/message.h>
-#include <libpspp/message.h>
  #include <libpspp/misc.h>
  #include <libpspp/pool.h>
  #include <libpspp/str.h>
@@ -271,9 +272,9 @@ static void determine_charts (void);
  
  static void calc_stats (const struct variable *v, double d[frq_n_stats]);
  
-static void precalc (const struct ccase *, void *, const struct dataset *);
-static bool calc (const struct ccase *, void *, const struct dataset *);
-static bool postcalc (void *, const struct dataset *);
+static void precalc (struct casereader *, struct dataset *);
+static void calc (const struct ccase *, const struct dataset *);
+static void postcalc (void);
  
  static void postprocess_freq_tab (const struct variable *);
  static void dump_full (const struct variable *);
@@ -318,8 +319,10 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds)
  static int
  internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds)
  {
-  int i;
+  struct casegrouper *grouper;
+  struct casereader *input, *group;
    bool ok;
+  int i;
  
    n_percentiles = 0;
    percentiles = NULL;
@@ -383,7 +386,21 @@ internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds)
    
  
    /* Do it! */
-  ok = procedure_with_splits (ds, precalc, calc, postcalc, NULL);
+  input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
+                                           NULL, NULL);
+  grouper = casegrouper_create_splits (input, dataset_dict (ds));
+  for (; casegrouper_get_next_group (grouper, &group);
+       casereader_destroy (group)) 
+    {
+      struct ccase c;
+      
+      precalc (group, ds);
+      for (; casereader_read (group, &c); case_destroy (&c)) 
+        calc (&c, ds);
+      postcalc ();
+    }
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
  
    free_frequencies(&cmd);
  
@@ -496,14 +513,11 @@ determine_charts (void)
  }
  
  /* Add data from case C to the frequency table. */
-static bool
-calc (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
+static void
+calc (const struct ccase *c, const struct dataset *ds)
  {
-  double weight;
+  double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
    size_t i;
-  bool bad_warn = true;
-
-  weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
  
    for (i = 0; i < n_variables; i++)
      {
@@ -530,7 +544,8 @@ calc (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
                   struct freq *fp = pool_alloc (gen_pool, sizeof *fp);
                    fp->count = weight;
                    fp->value = pool_clone (gen_pool,
-                                      val, MAX (MAX_SHORT_STRING, vf->width));
+                                          val,
+                                          MAX (MAX_SHORT_STRING, vf->width));
                    *fpp = fp;
                 }
             }
@@ -552,17 +567,20 @@ calc (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
            NOT_REACHED ();
         }
      }
-  return true;
  }
  
  /* Prepares each variable that is the target of FREQUENCIES by setting
     up its hash table. */
  static void
-precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
+precalc (struct casereader *input, struct dataset *ds)
  {
+  struct ccase c;
    size_t i;
  
-  output_split_file_values (ds, first);
+  if (!casereader_peek (input, 0, &c))
+    return;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
  
    pool_destroy (gen_pool);
    gen_pool = pool_create ();
@@ -590,8 +608,8 @@ precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
  
  /* Finishes up with the variables after frequencies have been
     calculated.  Displays statistics, percentiles, ... */
-static bool
-postcalc (void *aux UNUSED, const struct dataset *ds  UNUSED)
+static void
+postcalc (void)
  {
    size_t i;
  
@@ -666,8 +684,6 @@ postcalc (void *aux UNUSED, const struct dataset *ds  UNUSED)
        cleanup_freq_tab (v);
  
      }
-
-  return true;
  }
  
  /* Returns the comparison function that should be used for
diff --git a/src/language/stats/npar-summary.c b/src/language/stats/npar-summary.c

index 349fcc1e3c7c995d71934cbf2ffd02c0ac0b9e0c..74532231019078f41ddfcd983a6326ecc4de000b 100644 (file)
--- a/src/language/stats/npar-summary.c
+++ b/src/language/stats/npar-summary.c
@@ -18,12 +18,11 @@
  
  #include <config.h>
  #include <output/table.h>
+#include <data/casereader.h>
  #include <libpspp/hash.h>
  #include <data/variable.h>
  #include "npar-summary.h"
  #include <math/moments.h>
-#include <data/casefile.h>
-#include <data/casefilter.h>
  #include <data/case.h>
  #include <data/dictionary.h>
  #include <math.h>
@@ -35,38 +34,38 @@
  
  void
  npar_summary_calc_descriptives (struct descriptives *desc,
-                               const struct casefile *cf,
-                               struct casefilter *filter, 
+                               struct casereader *input,
                                 const struct dictionary *dict,
                                 const struct variable *const *vv, 
-                               int n_vars UNUSED)
+                               int n_vars UNUSED,
+                                enum mv_class filter)
  {
    int i = 0;
    while (*vv)
      {
-      bool warn = true;
        double minimum = DBL_MAX;
        double maximum = -DBL_MAX;
        double var;
        struct moments1 *moments = moments1_create (MOMENT_VARIANCE);
-      struct casereader *r = casefile_get_reader (cf, filter);
        struct ccase c;
        const struct variable *v = *vv++;
-
-      while (casereader_read(r, &c))
+      struct casereader *pass;
+
+      pass = casereader_clone (input);
+      pass = casereader_create_filter_missing (pass,
+                                               (struct variable **) &v, 1,
+                                               filter, NULL);
+      pass = casereader_create_filter_weight (pass, dict, NULL, NULL);
+      while (casereader_read(pass, &c))
         {
-         const union value *val = case_data (&c, v);
-         double w = dict_get_case_weight (dict, &c, &warn);
-
-         if ( ! casefilter_variable_missing (filter, &c, v ))
-           {
-             minimum = MIN (minimum, val->f);
-             maximum = MAX (maximum, val->f);
-             moments1_add (moments, val->f, w); 
-           }
+          double val = case_num (&c, v);
+          double w = dict_get_case_weight (dict, &c, NULL);
+          minimum = MIN (minimum, val);
+          maximum = MAX (maximum, val);
+          moments1_add (moments, val, w); 
           case_destroy (&c);
         }
-      casereader_destroy (r);
+      casereader_destroy (pass);
  
        moments1_calculate (moments, 
                           &desc[i].n, 
@@ -83,6 +82,7 @@ npar_summary_calc_descriptives (struct descriptives *desc,
        
        i++;
      }
+  casereader_destroy (input);
  }
  
  
diff --git a/src/language/stats/npar-summary.h b/src/language/stats/npar-summary.h

index 1a5125b671ae56b34c0d376ebc3c1dc64c98b3c7..f57fa1cfd7b678e5954a8f0e733567e92d938ada 100644 (file)
--- a/src/language/stats/npar-summary.h
+++ b/src/language/stats/npar-summary.h
@@ -22,9 +22,8 @@
  #include <config.h>
  
  struct variable ;
-struct casefile ;
+struct casereader ;
  struct dictionary;
-struct casefilter;
  
  struct descriptives
  {
@@ -36,11 +35,11 @@ struct descriptives
  };
  
  void npar_summary_calc_descriptives (struct descriptives *desc,
-                                    const struct casefile *cf,
-                                    struct casefilter *filter,
+                                    struct casereader *input,
                                      const struct dictionary *dict,
                                      const struct variable *const *vv, 
-                                    int n_vars);
+                                    int n_vars,
+                                     enum mv_class filter);
  
  
  void do_summary_box (const struct descriptives *desc, 
diff --git a/src/language/stats/npar.h b/src/language/stats/npar.h

index 3907bb26716993de67e369d6aaf837b6d3dc3aaa..80446c3a56af41e6f392e6738c2c9fe43ca0b7b3 100644 (file)
--- a/src/language/stats/npar.h
+++ b/src/language/stats/npar.h
@@ -19,18 +19,25 @@
  #if !npar_h
  #define npar_h 1
  
-typedef const struct variable *var_ptr;
-typedef var_ptr variable_pair[2];
+#include <stddef.h>
+#include <data/missing-values.h>
+
+#include <stddef.h>
+#include <data/missing-values.h>
+ 
+typedef struct variable *variable_pair[2];
  
  struct hsh_table;
  struct const_hsh_table;
-struct casefilter ;
+struct casefilter;
+struct casereader;
+struct dataset;
  
  struct npar_test
  {
    void (*execute) (const struct dataset *, 
-                  const struct casefile *, 
-                  struct casefilter *,
+                  struct casereader *,
+                   enum mv_class exclude,
                    const struct npar_test *
                    );
  
diff --git a/src/language/stats/npar.q b/src/language/stats/npar.q

index ab5047cf0d24c21756e6fe74b9767586275c52fc..74e8364f1b14bd7ae33a0b97507bbe4fb9129d75 100644 (file)
--- a/src/language/stats/npar.q
+++ b/src/language/stats/npar.q
@@ -20,23 +20,25 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  
  #include <config.h>
  
-#include <language/lexer/lexer.h>
-#include <language/lexer/variable-parser.h>
-#include <language/command.h>
-#include <data/procedure.h>
-#include <libpspp/pool.h>
-#include <libpspp/hash.h>
+#include <language/stats/npar.h>
+
+#include <math.h>
  
-#include <data/casefilter.h>
  #include <data/case.h>
-#include <data/casefile.h>
-#include <math/moments.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
  #include <data/dictionary.h>
-#include <language/stats/chisquare.h>
+#include <data/procedure.h>
+#include <language/command.h>
+#include <language/lexer/lexer.h>
+#include <language/lexer/variable-parser.h>
  #include <language/stats/binomial.h>
-#include <math.h>
+#include <language/stats/chisquare.h>
+#include <libpspp/hash.h>
+#include <libpspp/pool.h>
+#include <libpspp/taint.h>
+#include <math/moments.h>
  
-#include "npar.h"
  #include "npar-summary.h"
  
  #include "gettext.h"
@@ -75,7 +77,7 @@ struct npar_specs
                                        (those mentioned on ANY subcommand */
    int n_vars; /* Number of variables in vv */
  
-  struct casefilter *filter; /* The missing value filter */
+  enum mv_class filter;    /* Missing values to filter. */
  
    bool descriptives;       /* Descriptive statistics should be calculated */
    bool quartiles;          /* Quartiles should be calculated */
@@ -84,13 +86,12 @@ struct npar_specs
  void one_sample_insert_variables (const struct npar_test *test,
                                   struct const_hsh_table *variables);
  
-static bool 
-npar_execute(const struct ccase *first UNUSED,
-            const struct casefile *cf, void *aux, 
+static void
+npar_execute(struct casereader *input,
+             const struct npar_specs *specs,
              const struct dataset *ds)
  {
    int t;
-  const struct npar_specs *specs = aux;
    struct descriptives *summary_descriptives = NULL;
  
    for ( t = 0 ; t < specs->n_tests; ++t ) 
@@ -101,7 +102,7 @@ npar_execute(const struct ccase *first UNUSED,
           msg (SW, _("NPAR subcommand not currently implemented."));
           continue;
         }
-      test->execute (ds, cf, specs->filter, test);
+      test->execute (ds, casereader_clone (input), specs->filter, test);
      }
  
    if ( specs->descriptives )
@@ -109,21 +110,21 @@ npar_execute(const struct ccase *first UNUSED,
        summary_descriptives = xnmalloc (sizeof (*summary_descriptives), 
                                        specs->n_vars);
  
-      npar_summary_calc_descriptives (summary_descriptives, cf, 
-                                     specs->filter,
+      npar_summary_calc_descriptives (summary_descriptives,
+                                      casereader_clone (input), 
                                       dataset_dict (ds),
-                                     specs->vv, specs->n_vars);
+                                     specs->vv, specs->n_vars,
+                                      specs->filter);
      }
  
-  if ( specs->descriptives || specs->quartiles ) 
+  if ( (specs->descriptives || specs->quartiles)
+       && !taint_has_tainted_successor (casereader_get_taint (input)) ) 
      do_summary_box (summary_descriptives, specs->vv, specs->n_vars );
  
    free (summary_descriptives);
-  
-  return true;
+  casereader_destroy (input);
  }
  
-
  int
  cmd_npar_tests (struct lexer *lexer, struct dataset *ds)
  {
@@ -131,6 +132,9 @@ cmd_npar_tests (struct lexer *lexer, struct dataset *ds)
    int i;
    struct npar_specs npar_specs = {0, 0, 0, 0, 0, 0, 0, 0};
    struct const_hsh_table *var_hash;
+  struct casegrouper *grouper;
+  struct casereader *input, *group;
+  
    npar_specs.pool = pool_create ();
  
    var_hash = const_hsh_create_pool (npar_specs.pool, 0, 
@@ -179,17 +183,20 @@ cmd_npar_tests (struct lexer *lexer, struct dataset *ds)
         }
      }
  
-  npar_specs.filter = 
-    casefilter_create (cmd.incl == NPAR_EXCLUDE ? MV_ANY : MV_SYSTEM, 0, 0);
-
-  if ( cmd.miss == NPAR_LISTWISE ) 
-    casefilter_add_variables (npar_specs.filter, 
-                             npar_specs.vv, 
-                             npar_specs.n_vars);
+  npar_specs.filter = cmd.incl == NPAR_EXCLUDE ? MV_ANY : MV_SYSTEM;
  
-  ok = multipass_procedure_with_splits (ds, npar_execute, &npar_specs);
+  input = proc_open (ds);
+  if ( cmd.miss == NPAR_LISTWISE )
+    input = casereader_create_filter_missing (input,
+                                              (struct variable **) npar_specs.vv,
+                                              npar_specs.n_vars,
+                                              npar_specs.filter, NULL);
  
-  casefilter_destroy (npar_specs.filter);
+  grouper = casegrouper_create_splits (input, dataset_dict (ds));
+  while (casegrouper_get_next_group (grouper, &group))
+    npar_execute (group, &npar_specs, ds);
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
  
    const_hsh_destroy (var_hash);
  
diff --git a/src/language/stats/oneway.q b/src/language/stats/oneway.q

index 9b45119f8664f1ae948a5bfc838479fa8115f921..78300723ab948525364cf41dad6c6333d919518b 100644 (file)
--- a/src/language/stats/oneway.q
+++ b/src/language/stats/oneway.q
@@ -25,12 +25,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  #include <stdlib.h>
  
  #include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
  #include <data/dictionary.h>
  #include <data/procedure.h>
  #include <data/value-labels.h>
  #include <data/variable.h>
-#include <data/casefilter.h>
  #include <language/command.h>
  #include <language/dictionary/split-file.h>
  #include <language/lexer/lexer.h>
@@ -39,9 +39,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  #include <libpspp/hash.h>
  #include <libpspp/magic.h>
  #include <libpspp/message.h>
-#include <libpspp/message.h>
  #include <libpspp/misc.h>
  #include <libpspp/str.h>
+#include <libpspp/taint.h>
  #include <math/group-proc.h>
  #include <math/group.h>
  #include <math/levene.h>
@@ -65,9 +65,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  /* (declarations) */
  /* (functions) */
  
-static bool bad_weight_warn = true;
-
-
  static struct cmd_oneway cmd;
  
  /* The independent variable */
@@ -89,9 +86,8 @@ static struct hsh_table *global_group_hash ;
  static int ostensible_number_of_groups = -1;
  
  
-static bool run_oneway(const struct ccase *first,
-                       const struct casefile *cf, 
-                      void *_mode, const struct dataset *);
+static void run_oneway (struct cmd_oneway *, struct casereader *, 
+                        const struct dataset *);
  
  
  /* Routines to show the output tables */
@@ -113,6 +109,8 @@ void output_oneway(void);
  int
  cmd_oneway (struct lexer *lexer, struct dataset *ds)
  {
+  struct casegrouper *grouper;
+  struct casereader *group;
    int i;
    bool ok;
  
@@ -138,7 +136,12 @@ cmd_oneway (struct lexer *lexer, struct dataset *ds)
         }
      }
  
-  ok = multipass_procedure_with_splits (ds, run_oneway, &cmd);
+  /* Data pass.  FIXME: error handling. */
+  grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+  while (casegrouper_get_next_group (grouper, &group)) 
+    run_oneway (&cmd, group, ds);
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
  
    free (vars);
    free_oneway (&cmd);
@@ -887,17 +890,23 @@ free_value (void *value_, const void *aux UNUSED)
    free (value);
  }
  
-static bool
-run_oneway(const struct ccase *first, const struct casefile *cf, 
-          void *cmd_, const struct dataset *ds)
+static void
+run_oneway (struct cmd_oneway *cmd,
+            struct casereader *input, 
+            const struct dataset *ds)
  {
-  struct casereader *r;
+  struct taint *taint;
+  struct dictionary *dict = dataset_dict (ds);
+  enum mv_class exclude;
+  struct casereader *reader;
    struct ccase c;
-  struct casefilter *filter = NULL;
  
-  struct cmd_oneway *cmd = (struct cmd_oneway *) cmd_;
+  if (!casereader_peek (input, 0, &c))
+    return;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
  
-  output_split_file_values (ds, first);
+  taint = taint_clone (casereader_get_taint (input));
  
    global_group_hash = hsh_create(4, 
                                  (hsh_compare_func *) compare_values,
@@ -907,31 +916,25 @@ run_oneway(const struct ccase *first, const struct casefile *cf,
  
    precalc(cmd);
  
-  filter = casefilter_create ( (cmd->incl != ONEWAY_INCLUDE
-                                ? MV_ANY : MV_SYSTEM), 
-                              vars, n_vars );
+  exclude = cmd->incl != ONEWAY_INCLUDE ? MV_ANY : MV_SYSTEM;
+  input = casereader_create_filter_missing (input, &indep_var, 1,
+                                            exclude, NULL);
+  if (cmd->miss == ONEWAY_LISTWISE)
+    input = casereader_create_filter_missing (input, vars, n_vars,
+                                              exclude, NULL);
+  input = casereader_create_filter_weight (input, dict, NULL, NULL);
  
-  for(r = casefile_get_reader (cf, filter);
-      casereader_read (r, &c) ;
-      case_destroy (&c)) 
+  reader = casereader_clone (input);
+  for (; casereader_read (reader, &c); case_destroy (&c)) 
      {
        size_t i;
  
-      const double weight = 
-       dict_get_case_weight (dataset_dict (ds), &c, &bad_weight_warn);
-
-      const union value *indep_val;
-      void **p;
+      const double weight = dict_get_case_weight (dict, &c, NULL);
        
-      if ( casefilter_variable_missing (filter, &c, indep_var))
-       continue;
-
-      indep_val = case_data (&c, indep_var);
-      p = hsh_probe (global_group_hash, indep_val);
+      const union value *indep_val = case_data (&c, indep_var);
+      void **p = hsh_probe (global_group_hash, indep_val);
        if (*p == NULL)
          *p = value_dup (indep_val, var_get_width (indep_var));
-         
-      hsh_insert ( global_group_hash, (void *) indep_val );
  
        for ( i = 0 ; i < n_vars ; ++i ) 
         {
@@ -960,7 +963,7 @@ run_oneway(const struct ccase *first, const struct casefile *cf,
               hsh_insert ( group_hash, (void *) gs );
             }
  
-         if (! casefilter_variable_missing (filter, &c, v))
+         if (!var_is_value_missing (v, val, exclude))
             {
               struct group_statistics *totals = &gp->ugs;
  
@@ -989,24 +992,21 @@ run_oneway(const struct ccase *first, const struct casefile *cf,
         }
    
      }
-
-  casereader_destroy (r);
+  casereader_destroy (reader);
  
    postcalc(cmd);
  
    
    if ( stat_tables & STAT_HOMO ) 
-    levene (dataset_dict (ds), cf, indep_var, n_vars, vars, 
-           filter);
+    levene (dict, casereader_clone (input), indep_var, n_vars, vars, exclude);
  
-  casefilter_destroy (filter);
+  casereader_destroy (input);
  
    ostensible_number_of_groups = hsh_count (global_group_hash);
  
-
-  output_oneway();
-
-  return true;
+  if (!taint_has_tainted_successor (taint))
+    output_oneway();
+  taint_destroy (taint);
  }
  
  
diff --git a/src/language/stats/rank.q b/src/language/stats/rank.q

index 3f1dd3a9e92c6eb7fcb159eff15846e4cb7c16b3..c42f896a8c2c97031cd8e4640dc6a66dbfa3da16 100644 (file)
--- a/src/language/stats/rank.q
+++ b/src/language/stats/rank.q
@@ -18,27 +18,28 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  
  #include <config.h>
  
-#include "sort-criteria.h"
+#include <limits.h>
+#include <math.h>
  
  #include <data/dictionary.h>
  #include <data/format.h>
  #include <data/missing-values.h>
  #include <data/procedure.h>
  #include <data/variable.h>
+#include <data/case-ordering.h>
  #include <data/case.h>
-#include <data/casefile.h>
-#include <data/fastfile.h>
-#include <data/storage-stream.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
  #include <language/command.h>
  #include <language/stats/sort-criteria.h>
-#include <limits.h>
  #include <libpspp/compiler.h>
+#include <libpspp/taint.h>
  #include <math/sort.h>
  #include <output/table.h>
  #include <output/manager.h>
  
  #include <gsl/gsl_cdf.h>
-#include <math.h>
  
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
@@ -152,7 +153,7 @@ static enum mv_class exclude_values;
  static struct rank_spec *rank_specs;
  static size_t n_rank_specs;
  
-static struct sort_criteria *sc;
+static struct case_ordering *sc;
  
  static const struct variable **group_vars;
  static size_t n_group_vars;
@@ -165,14 +166,14 @@ static int k_ntiles;
  
  static struct cmd_rank cmd;
  
-static struct casefile *rank_sorted_casefile (struct casefile *cf,
-                                             const struct sort_criteria *,
-                                             const struct dictionary *,
-                                             const struct rank_spec *rs,
-                                             int n_rank_specs,
-                                             int idx,
-                                             const struct missing_values *miss
-                                             );
+static void rank_sorted_file (struct casereader *, 
+                              struct casewriter *,
+                              const struct dictionary *,
+                              const struct rank_spec *rs, 
+                              int n_rank_specs,
+                              int idx,
+                              struct variable *rank_var);
+
  static const char *
  fraction_name(void)
  {
@@ -232,69 +233,56 @@ create_var_label (struct variable *dest_var,
  }
  
  
-static bool
-rank_cmd (struct dataset *ds, const struct sort_criteria *sc,
+static bool 
+rank_cmd (struct dataset *ds, const struct case_ordering *sc, 
           const struct rank_spec *rank_specs, int n_rank_specs)
  {
-  struct sort_criteria criteria;
-  bool result = true;
+  struct case_ordering *base_ordering;
+  bool ok = true;
    int i;
    const int n_splits = dict_get_split_cnt (dataset_dict (ds));
  
-  criteria.crit_cnt = n_splits + n_group_vars + 1;
-  criteria.crits = xnmalloc (criteria.crit_cnt, sizeof *criteria.crits);
+  base_ordering = case_ordering_create (dataset_dict (ds));
    for (i = 0; i < n_splits ; i++)
-    {
-      const struct variable *v = dict_get_split_vars (dataset_dict (ds))[i];
-      criteria.crits[i].fv = var_get_case_index (v);
-      criteria.crits[i].width = var_get_width (v);
-      criteria.crits[i].dir = SRT_ASCEND;
-    }
+    case_ordering_add_var (base_ordering,
+                           dict_get_split_vars (dataset_dict (ds))[i],
+                           SRT_ASCEND);
+
    for (i = 0; i < n_group_vars; i++)
+    case_ordering_add_var (base_ordering, group_vars[i], SRT_ASCEND);
+  for (i = 0 ; i < case_ordering_get_var_cnt (sc) ; ++i )
      {
-      criteria.crits[i + n_splits].fv = var_get_case_index (group_vars[i]);
-      criteria.crits[i + n_splits].width = var_get_width (group_vars[i]);
-      criteria.crits[i + n_splits].dir = SRT_ASCEND;
-    }
-  for (i = 0 ; i < sc->crit_cnt ; ++i )
-    {
-      struct casefile *out ;
-      struct casefile *cf ;
-      struct casereader *reader ;
-      struct casefile *sorted_cf ;
-
-      /* Obtain active file in CF. */
-      if (!procedure (ds, NULL, NULL))
-       goto error;
-
-      cf = proc_capture_output (ds);
-
-      /* Sort CF into SORTED_CF. */
-      reader = casefile_get_destructive_reader (cf) ;
-      criteria.crits[criteria.crit_cnt - 1] = sc->crits[i];
-      assert ( sc->crits[i].fv == var_get_case_index (src_vars[i]) );
-      sorted_cf = sort_execute (reader, &criteria, NULL);
-      casefile_destroy (cf);
-
-      out = rank_sorted_casefile (sorted_cf, &criteria,
-                                 dataset_dict (ds),
-                                  rank_specs, n_rank_specs,
-                                 i, var_get_missing_values (src_vars[i]));
-      if ( NULL == out )
-       {
-         result = false ;
-         continue ;
-       }
-
-      proc_set_source (ds, storage_source_create (out));
+      struct case_ordering *ordering;
+      struct casegrouper *grouper;
+      struct casereader *group;
+      struct casewriter *output;
+      struct casereader *ranked_file;
+
+      ordering = case_ordering_clone (base_ordering);
+      case_ordering_add_var (ordering,
+                             case_ordering_get_var (sc, i),
+                             case_ordering_get_direction (sc, i));
+
+      proc_discard_output (ds);
+      grouper = casegrouper_create_case_ordering (sort_execute (proc_open (ds),
+                                                                ordering),
+                                                  base_ordering);
+      output = autopaging_writer_create (dict_get_next_value_idx (
+                                           dataset_dict (ds)));
+      while (casegrouper_get_next_group (grouper, &group)) 
+        rank_sorted_file (group, output, dataset_dict (ds),
+                          rank_specs, n_rank_specs,
+                          i, src_vars[i]); 
+      ok = casegrouper_destroy (grouper);
+      ok = proc_commit (ds) && ok;
+      ranked_file = casewriter_make_reader (output);
+      ok = proc_set_active_file_data (ds, ranked_file) && ok;
+      if (!ok)
+        break;
      }
+  case_ordering_destroy (base_ordering);
  
-  free (criteria.crits);
-  return result ;
-
-error:
-  free (criteria.crits);
-  return false ;
+  return ok; 
  }
  
  /* Hardly a rank function !! */
@@ -311,7 +299,8 @@ rank_rank (double c, double cc, double cc_1,
           int i, double w UNUSED)
  {
    double rank;
-  if ( c >= 1.0 )
+
+  if ( c >= 1.0 ) 
      {
        switch (cmd.ties)
         {
@@ -471,192 +460,71 @@ rank_savage (double c, double cc, double cc_1,
    NOT_REACHED();
  }
  
-
-/* Rank the casefile belonging to CR, starting from the current
-   postition of CR continuing up to and including the ENDth case.
-
-   RS points to an array containing  the rank specifications to
-   use. N_RANK_SPECS is the number of elements of RS.
-
-
-   DEST_VAR_INDEX is the index into the rank_spec destvar element
-   to be used for this ranking.
-
-   Prerequisites: 1. The casefile must be sorted according to CRITERION.
-                  2. W is the sum of the non-missing caseweights for this
-                 range of the casefile.
-*/
  static void
-rank_cases (struct casereader *cr,
-           unsigned long end,
-           const struct dictionary *dict,
-           const struct sort_criterion *criterion,
-           const struct missing_values *mv,
-           double w,
-           const struct rank_spec *rs,
-           int n_rank_specs,
-           int dest_var_index,
-           struct casefile *dest)
+rank_sorted_file (struct casereader *input, 
+                  struct casewriter *output,
+                  const struct dictionary *dict,
+                  const struct rank_spec *rs, 
+                  int n_rank_specs, 
+                  int dest_idx, 
+                  struct variable *rank_var)
  {
-  bool warn = true;
+  struct casereader *pass1, *pass2, *pass2_1;
+  struct casegrouper *tie_grouper;
+  struct ccase c;
+  double w = 0.0;
    double cc = 0.0;
-  double cc_1;
-  int iter = 1;
+  int tie_group = 1;
  
-  const int fv = criterion->fv;
-  const int width = criterion->width;
  
-  while (casereader_cnum (cr) < end)
-    {
-      struct casereader *lookahead;
-      const union value *this_value;
-      bool this_value_is_missing;
-      struct ccase this_case, lookahead_case;
-      double c;
-      int i;
-      size_t n = 0;
-
-      if (!casereader_read_xfer (cr, &this_case))
-        break;
+  input = casereader_create_filter_missing (input, &rank_var, 1,
+                                            exclude_values, output);
+  input = casereader_create_filter_weight (input, dict, NULL, output);
  
-      this_value = case_data_idx (&this_case, fv);
-      this_value_is_missing = mv_is_value_missing (mv, this_value,
-                                                   exclude_values);
-      c = dict_get_case_weight (dict, &this_case, &warn);
+  casereader_split (input, &pass1, &pass2);
  
-      lookahead = casereader_clone (cr);
-      n = 0;
-      while (casereader_cnum (lookahead) < end
-             && casereader_read_xfer (lookahead, &lookahead_case))
-        {
-          const union value *lookahead_value = case_data_idx (&lookahead_case, fv);
-          int diff = compare_values (this_value, lookahead_value, width);
+  /* Pass 1: Get total group weight. */
+  for (; casereader_read (pass1, &c); case_destroy (&c)) 
+    w += dict_get_case_weight (dict, &c, NULL);
+  casereader_destroy (pass1);
  
-          if (diff != 0)
-            {
-             /* Make sure the casefile was sorted */
-             assert ( diff == ((criterion->dir == SRT_ASCEND) ? -1 :1));
-
-              case_destroy (&lookahead_case);
-              break;
-            }
-
-          c += dict_get_case_weight (dict, &lookahead_case, &warn);
-          case_destroy (&lookahead_case);
-          n++;
-        }
-      casereader_destroy (lookahead);
-
-      cc_1 = cc;
-      if ( !this_value_is_missing )
-       cc += c;
-
-      do
-        {
-          for (i = 0; i < n_rank_specs; ++i)
-            {
-              const struct variable *dst_var = rs[i].destvars[dest_var_index];
-
-             if  (this_value_is_missing)
-               case_data_rw (&this_case, dst_var)->f = SYSMIS;
-             else
-               case_data_rw (&this_case, dst_var)->f =
-                 rank_func[rs[i].rfunc](c, cc, cc_1, iter, w);
-            }
-          casefile_append_xfer (dest, &this_case);
-        }
-      while (n-- > 0 && casereader_read_xfer (cr, &this_case));
-
-      if ( !this_value_is_missing )
-       iter++;
-    }
-
-  /* If this isn't true, then all the results will be wrong */
-  assert ( w == cc );
-}
-
-static bool
-same_group (const struct ccase *a, const struct ccase *b,
-            const struct sort_criteria *crit)
-{
-  size_t i;
-
-  for (i = 0; i < crit->crit_cnt - 1; i++)
+  /* Pass 2: Do ranking. */
+  tie_grouper = casegrouper_create_vars (pass2, &rank_var, 1);
+  while (casegrouper_get_next_group (tie_grouper, &pass2_1)) 
      {
-      struct sort_criterion *c = &crit->crits[i];
-      if (compare_values (case_data_idx (a, c->fv),
-                          case_data_idx (b, c->fv), c->width) != 0)
-        return false;
-    }
-
-  return true;
-}
-
-static struct casefile *
-rank_sorted_casefile (struct casefile *cf,
-                     const struct sort_criteria *crit,
-                     const struct dictionary *dict,
-                     const struct rank_spec *rs,
-                     int n_rank_specs,
-                     int dest_idx,
-                     const struct missing_values *mv)
-{
-  struct casefile *dest = fastfile_create (casefile_get_value_cnt (cf));
-  struct casereader *lookahead = casefile_get_reader (cf, NULL);
-  struct casereader *pos = casereader_clone (lookahead);
-  struct ccase group_case;
-  bool warn = true;
-
-  struct sort_criterion *ultimate_crit = &crit->crits[crit->crit_cnt - 1];
+      struct casereader *pass2_2;
+      double cc_1 = cc;
+      double tw = 0.0;
+      int i;
  
-  if (casereader_read (lookahead, &group_case))
-    {
-      struct ccase this_case;
-      const union value *this_value ;
-      double w = 0.0;
-      this_value = case_data_idx( &group_case, ultimate_crit->fv);
+      pass2_2 = casereader_clone (pass2_1);
+      taint_propagate (casereader_get_taint (pass2_2),
+                       casewriter_get_taint (output));
  
-      if ( !mv_is_value_missing (mv, this_value, exclude_values) )
-       w = dict_get_case_weight (dict, &group_case, &warn);
+      /* Pass 2.1: Sum up weight for tied cases. */
+      for (; casereader_read (pass2_1, &c); case_destroy (&c)) 
+        tw += dict_get_case_weight (dict, &c, NULL);
+      cc += tw;
+      casereader_destroy (pass2_1);
  
-      while (casereader_read (lookahead, &this_case))
+      /* Pass 2.2: Rank tied cases. */
+      while (casereader_read (pass2_2, &c)) 
          {
-         const union value *this_value =
-           case_data_idx(&this_case, ultimate_crit->fv);
-          double c = dict_get_case_weight (dict, &this_case, &warn);
-          if (!same_group (&group_case, &this_case, crit))
+          for (i = 0; i < n_rank_specs; ++i)
              {
-              rank_cases (pos, casereader_cnum (lookahead) - 1,
-                         dict,
-                         ultimate_crit,
-                         mv, w,
-                         rs, n_rank_specs,
-                         dest_idx, dest);
-
-              w = 0.0;
-              case_destroy (&group_case);
-              case_move (&group_case, &this_case);
+              const struct variable *dst_var = rs[i].destvars[dest_idx];
+              double *dst_value = &case_data_rw (&c, dst_var)->f;
+              *dst_value = rank_func[rs[i].rfunc] (tw, cc, cc_1, tie_group, w);
              }
-         if ( !mv_is_value_missing (mv, this_value, exclude_values) )
-           w += c;
-          case_destroy (&this_case);
+          casewriter_write (output, &c);
          }
-      case_destroy (&group_case);
-      rank_cases (pos, ULONG_MAX, dict, ultimate_crit, mv, w,
-                 rs, n_rank_specs, dest_idx, dest);
-    }
-
-  if (casefile_error (dest))
-    {
-      casefile_destroy (dest);
-      dest = NULL;
+      casereader_destroy (pass2_2);
+          
+      tie_group++;
      }
-
-  casefile_destroy (cf);
-  return dest;
+  casegrouper_destroy (tie_grouper);
  }
  
-
  /* Transformation function to enumerate all the cases */
  static int
  create_resort_key (void *key_var_, struct ccase *cc, casenumber case_num)
@@ -749,7 +617,7 @@ rank_cleanup(void)
    rank_specs = NULL;
    n_rank_specs = 0;
  
-  sort_destroy_criteria (sc);
+  case_ordering_destroy (sc);
    sc = NULL;
  
    free (src_vars);
@@ -783,13 +651,13 @@ cmd_rank (struct lexer *lexer, struct dataset *ds)
  
        rank_specs = xmalloc (sizeof (*rank_specs));
        rank_specs[0].rfunc = RANK;
-      rank_specs[0].destvars =
-       xcalloc (sc->crit_cnt, sizeof (struct variable *));
+      rank_specs[0].destvars = 
+       xcalloc (case_ordering_get_var_cnt (sc), sizeof (struct variable *));
  
        n_rank_specs = 1;
      }
  
-  assert ( sc->crit_cnt == n_src_vars);
+  assert ( case_ordering_get_var_cnt (sc) == n_src_vars);
  
    /* Create variables for all rank destinations which haven't
       already been created with INTO.
@@ -891,31 +759,29 @@ cmd_rank (struct lexer *lexer, struct dataset *ds)
      msg(MW, _("FRACTION has been specified, but NORMAL and PROPORTION rank functions have not been requested.  The FRACTION subcommand will be ignored.") );
  
    /* Add a variable which we can sort by to get back the original
-     order */
-  order = dict_create_var_assert (dataset_dict (ds), "$ORDER_", 0);
+     order */ 
+  order = dict_create_var_assert (dataset_dict (ds), "$ORDER_", 0); 
  
    add_transformation (ds, create_resort_key, 0, order);
  
    /* Do the ranking */
    result = rank_cmd (ds, sc, rank_specs, n_rank_specs);
  
-  /* Put the active file back in its original order */
+  /* Put the active file back in its original order.  Delete
+     our sort key, which we don't need anymore.  */
    {
-    struct sort_criteria criteria;
-    struct sort_criterion restore_criterion ;
-    restore_criterion.fv = var_get_case_index (order);
-    restore_criterion.width = 0;
-    restore_criterion.dir = SRT_ASCEND;
-
-    criteria.crits = &restore_criterion;
-    criteria.crit_cnt = 1;
-
-    sort_active_file_in_place (ds, &criteria);
+    struct case_ordering *ordering = case_ordering_create (dataset_dict (ds));
+    struct casereader *sorted;
+    case_ordering_add_var (ordering, order, SRT_ASCEND);
+    /* FIXME: loses error conditions. */
+    proc_discard_output (ds);
+    sorted = sort_execute (proc_open (ds), ordering);
+    result = proc_commit (ds) && result;
+
+    dict_delete_var (dataset_dict (ds), order);
+    result = proc_set_active_file_data (ds, sorted) && result;
    }
  
-  /* ... and we don't need our sort key anymore. So delete it */
-  dict_delete_var (dataset_dict (ds), order);
-
    rank_cleanup();
  
  
@@ -928,16 +794,16 @@ cmd_rank (struct lexer *lexer, struct dataset *ds)
  static int
  rank_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd UNUSED, void *aux UNUSED)
  {
-  static const int terminators[2] = {T_BY, 0};
-
    lex_match (lexer, '=');
  
    if ((lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
        && lex_token (lexer) != T_ALL)
        return 2;
  
-  sc = sort_parse_criteria (lexer, dataset_dict (ds),
-                           &src_vars, &n_src_vars, 0, terminators);
+  sc = parse_case_ordering (lexer, dataset_dict (ds), NULL);
+  if (sc == NULL)
+    return 0;
+  case_ordering_get_vars (sc, &src_vars, &n_src_vars);
  
    if ( lex_match (lexer, T_BY)  )
      {
@@ -970,9 +836,10 @@ parse_rank_function (struct lexer *lexer, struct dictionary *dict, struct cmd_ra
    rank_specs[n_rank_specs - 1].rfunc = f;
    rank_specs[n_rank_specs - 1].destvars = NULL;
  
-  rank_specs[n_rank_specs - 1].destvars =
-           xcalloc (sc->crit_cnt, sizeof (struct variable *));
-
+  rank_specs[n_rank_specs - 1].destvars = 
+           xcalloc (case_ordering_get_var_cnt (sc),
+                     sizeof (struct variable *));
+         
    if (lex_match_id (lexer, "INTO"))
      {
        struct variable *destvar;
@@ -985,7 +852,7 @@ parse_rank_function (struct lexer *lexer, struct dictionary *dict, struct cmd_ra
               msg(SE, _("Variable %s already exists."), lex_tokid (lexer));
               return 0;
             }
-         if ( var_count >= sc->crit_cnt )
+         if ( var_count >= case_ordering_get_var_cnt (sc) ) 
             {
               msg(SE, _("Too many variables in INTO clause."));
               return 0;
diff --git a/src/language/stats/regression.q b/src/language/stats/regression.q

index e10b89623dbe1c10cec387e9579b7955b47fd1d3..690b6809aacda290c88d4175c177113d60c6396f 100644 (file)
--- a/src/language/stats/regression.q
+++ b/src/language/stats/regression.q
@@ -26,7 +26,8 @@
  
  #include "regression-export.h"
  #include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
  #include <data/category.h>
  #include <data/dictionary.h>
  #include <data/missing-values.h>
@@ -41,6 +42,7 @@
  #include <libpspp/alloc.h>
  #include <libpspp/compiler.h>
  #include <libpspp/message.h>
+#include <libpspp/taint.h>
  #include <math/design-matrix.h>
  #include <math/coefficient.h>
  #include <math/linreg/linreg.h>
@@ -48,6 +50,7 @@
  #include <output/table.h>
  
  #include "gettext.h"
+#define _(msgid) gettext (msgid)
  
  #define REG_LARGE_DATA 1000
  
@@ -120,14 +123,8 @@ static size_t n_variables;
   */
  static struct file_handle *model_file;
  
-/*
-  Return value for the procedure.
- */
-static int pspp_reg_rc = CMD_SUCCESS;
-
-static bool run_regression (const struct ccase *,
-                           const struct casefile *, void *,
-                           const struct dataset *);
+static bool run_regression (struct casereader *, struct cmd_regression *,
+                            struct dataset *);
  
  /* 
     STATISTICS subcommand output functions.
@@ -951,6 +948,9 @@ regression_custom_export (struct lexer *lexer, struct dataset *ds UNUSED,
  int
  cmd_regression (struct lexer *lexer, struct dataset *ds)
  {
+  struct casegrouper *grouper;
+  struct casereader *group;
+  bool ok;
    size_t i;
  
    if (!parse_regression (lexer, ds, &cmd, NULL))
@@ -961,12 +961,18 @@ cmd_regression (struct lexer *lexer, struct dataset *ds)
      {
        models[i] = NULL;
      }
-  if (!multipass_procedure_with_splits (ds, run_regression, &cmd))
-    return CMD_CASCADING_FAILURE;
+
+  /* Data pass. */
+  grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+  while (casegrouper_get_next_group (grouper, &group))
+    run_regression (group, &cmd, ds);
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
+
    subcommand_save (ds, cmd.sbc_save, models);
    free (v_variables);
    free (models);
-  return pspp_reg_rc;
+  return ok ? CMD_SUCCESS : CMD_FAILURE;
  }
  
  /*
@@ -978,47 +984,6 @@ is_depvar (size_t k, const struct variable *v)
    return v == v_variables[k];
  }
  
-/*
-  Mark missing cases. Return the number of non-missing cases.
-  Compute the first two moments.
- */
-static size_t
-mark_missing_cases (const struct casefile *cf, const struct variable *v,
-                   int *is_missing_case, double n_data,
-                   struct moments_var *mom)
-{
-  struct casereader *r;
-  struct ccase c;
-  size_t row;
-  const union value *val;
-  double w = 1.0;
-
-  for (r = casefile_get_reader (cf, NULL);
-       casereader_read (r, &c); case_destroy (&c))
-    {
-      row = casereader_cnum (r) - 1;
-
-      val = case_data (&c, v);
-      if (mom != NULL)
-       {
-         moments1_add (mom->m, val->f, w);
-       }
-      cat_value_update (v, val);
-      if (var_is_value_missing (v, val, MV_ANY))
-       {
-         if (!is_missing_case[row])
-           {
-             /* Now it is missing. */
-             n_data--;
-             is_missing_case[row] = 1;
-           }
-       }
-    }
-  casereader_destroy (r);
-
-  return n_data;
-}
-
  /* Parser for the variables sub command */
  static int
  regression_custom_variables (struct lexer *lexer, struct dataset *ds,
@@ -1046,74 +1011,59 @@ regression_custom_variables (struct lexer *lexer, struct dataset *ds,
    return 1;
  }
  
-/*
-  Count the explanatory variables. The user may or may
-  not have specified a response variable in the syntax.
- */
+/* Identify the explanatory variables in v_variables.  Returns
+   the number of independent variables. */
  static int
-get_n_indep (const struct variable *v)
+identify_indep_vars (struct variable **indep_vars, struct variable *depvar)
  {
-  int result;
-  int i = 0;
+  int n_indep_vars = 0;
+  int i;
  
-  result = n_variables;
-  while (i < n_variables)
-    {
-      if (is_depvar (i, v))
-       {
-         result--;
-         i = n_variables;
-       }
-      i++;
-    }
-  return (result == 0) ? 1 : result;
+  for (i = 0; i < n_variables; i++)
+    if (!is_depvar (i, depvar))
+      indep_vars[n_indep_vars++] = v_variables[i];
+
+  return n_indep_vars;
  }
  
-/*
-  Read from the active file. Identify the explanatory variables in
-  v_variables. Encode categorical variables. Drop cases with missing
-  values.
-*/
+/* Encode categorical variables.
+   Returns number of valid cases. */
  static int
-prepare_data (int n_data, int is_missing_case[],
-             const struct variable **indep_vars,
-             const struct variable *depvar, const struct casefile *cf,
-             struct moments_var *mom)
+prepare_categories (struct casereader *input,
+                    struct variable **vars, size_t n_vars,
+                    struct moments_var *mom)
  {
-  int i;
-  int j;
+  int n_data;
+  struct ccase c;
+  size_t i;
  
-  assert (indep_vars != NULL);
-  j = 0;
-  for (i = 0; i < n_variables; i++)
+  for (i = 0; i < n_vars; i++)
+    if (var_is_alpha (vars[i]))
+      cat_stored_values_create (vars[i]);
+
+  n_data = 0;
+  for (; casereader_read (input, &c); case_destroy (&c)) 
      {
        /*
         The second condition ensures the program will run even if
         there is only one variable to act as both explanatory and
         response.
         */
-      if ((!is_depvar (i, depvar)) || (n_variables == 1))
-       {
-         indep_vars[j] = v_variables[i];
-         j++;
-         if (var_is_alpha (v_variables[i]))
-           {
-             /* Make a place to hold the binary vectors
-                corresponding to this variable's values. */
-             cat_stored_values_create (v_variables[i]);
-           }
-         n_data =
-           mark_missing_cases (cf, v_variables[i], is_missing_case, n_data,
-                               mom + i);
-       }
-    }
-  /*
-     Mark missing cases for the dependent variable.
-   */
-  n_data = mark_missing_cases (cf, depvar, is_missing_case, n_data, NULL);
+      for (i = 0; i < n_vars; i++)
+        {
+          const union value *val = case_data (&c, vars[i]);
+          if (var_is_alpha (vars[i])) 
+            cat_value_update (vars[i], val); 
+          else
+            moments1_add (mom[i].m, val->f, 1.0);
+        }
+      n_data++; 
+   }
+  casereader_destroy (input);
  
    return n_data;
  }
+
  static void
  coeff_init (pspp_linreg_cache * c, struct design_matrix *dm)
  {
@@ -1155,24 +1105,14 @@ compute_moments (pspp_linreg_cache * c, struct moments_var *mom,
         }
      }
  }
+
  static bool
-run_regression (const struct ccase *first,
-               const struct casefile *cf, void *cmd_ UNUSED,
-               const struct dataset *ds)
+run_regression (struct casereader *input, struct cmd_regression *cmd,
+                struct dataset *ds)
  {
    size_t i;
-  size_t n_data = 0;           /* Number of valide cases. */
-  size_t n_cases;              /* Number of cases. */
-  size_t row;
-  size_t case_num;
    int n_indep = 0;
    int k;
-  /*
-     Keep track of the missing cases.
-   */
-  int *is_missing_case;
-  const union value *val;
-  struct casereader *r;
    struct ccase c;
    const struct variable **indep_vars;
    struct design_matrix *X;
@@ -1183,7 +1123,10 @@ run_regression (const struct ccase *first,
  
    assert (models != NULL);
  
-  output_split_file_values (ds, first);
+  if (!casereader_peek (input, 0, &c))
+    return true;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
  
    if (!v_variables)
      {
@@ -1191,19 +1134,15 @@ run_regression (const struct ccase *first,
                      1u << DC_SYSTEM);
      }
  
-  n_cases = casefile_get_case_cnt (cf);
-
-  for (i = 0; i < cmd.n_dependent; i++)
+  for (i = 0; i < cmd->n_dependent; i++)
      {
-      if (!var_is_numeric (cmd.v_dependent[i]))
+      if (!var_is_numeric (cmd->v_dependent[i]))
         {
-         msg (SE, gettext ("Dependent variable must be numeric."));
-         pspp_reg_rc = CMD_FAILURE;
-         return true;
+         msg (SE, _("Dependent variable must be numeric."));
+         return false;
         }
      }
  
-  is_missing_case = xnmalloc (n_cases, sizeof (*is_missing_case));
    mom = xnmalloc (n_variables, sizeof (*mom));
    for (i = 0; i < n_variables; i++)
      {
@@ -1212,20 +1151,28 @@ run_regression (const struct ccase *first,
      }
    lopts.get_depvar_mean_std = 1;
  
-  for (k = 0; k < cmd.n_dependent; k++)
+  lopts.get_indep_mean_std = xnmalloc (n_variables, sizeof (int));
+  indep_vars = xnmalloc (n_variables, sizeof *indep_vars);
+
+  for (k = 0; k < cmd->n_dependent; k++)
      {
-      n_indep = get_n_indep ((const struct variable *) cmd.v_dependent[k]);
-      lopts.get_indep_mean_std = xnmalloc (n_indep, sizeof (int));
-      indep_vars = xnmalloc (n_indep, sizeof *indep_vars);
-      assert (indep_vars != NULL);
+      struct variable *dep_var;
+      struct casereader *reader;
+      casenumber row;
+      struct ccase c;
+      size_t n_data;           /* Number of valid cases. */
+      
+      dep_var = cmd->v_dependent[k];
+      n_indep = identify_indep_vars (indep_vars, dep_var);
+
+      reader = casereader_clone (input);
+      reader = casereader_create_filter_missing (reader, indep_vars, n_indep,
+                                                 MV_ANY, NULL);
+      reader = casereader_create_filter_missing (reader, &dep_var, 1,
+                                                 MV_ANY, NULL);
+       n_data = prepare_categories (casereader_clone (reader),
+                                    indep_vars, n_indep, mom);
  
-      for (i = 0; i < n_cases; i++)
-       {
-         is_missing_case[i] = 0;
-       }
-      n_data = prepare_data (n_cases, is_missing_case, indep_vars,
-                            cmd.v_dependent[k],
-                            (const struct casefile *) cf, mom);
        if ((n_data > 0) && (n_indep > 0))
         {
           Y = gsl_vector_alloc (n_data);
@@ -1240,8 +1187,8 @@ run_regression (const struct ccase *first,
           models[k] = pspp_linreg_cache_alloc (X->m->size1, X->m->size2);
           models[k]->indep_means = gsl_vector_alloc (X->m->size2);
           models[k]->indep_std = gsl_vector_alloc (X->m->size2);
-         models[k]->depvar = (const struct variable *) cmd.v_dependent[k];
-         /*
+          models[k]->depvar = dep_var;
+          /*
              For large data sets, use QR decomposition.
            */
           if (n_data > sqrt (n_indep) && n_data > REG_LARGE_DATA)
@@ -1250,50 +1197,23 @@ run_regression (const struct ccase *first,
             }
  
           /*
-            The second pass fills the design matrix.
-          */
-         row = 0;
-         for (r = casefile_get_reader (cf, NULL); casereader_read (r, &c);
-              case_destroy (&c))
-           /* Iterate over the cases. */
-           {
-             case_num = casereader_cnum (r) - 1;
-             if (!is_missing_case[case_num])
-               {
-                 for (i = 0; i < n_variables; ++i)     /* Iterate over the
-                                                          variables for the
-                                                          current case.
-                                                        */
-                   {
-                     val = case_data (&c, v_variables[i]);
-                     /*
-                        Independent/dependent variable separation. The
-                        'variables' subcommand specifies a varlist which contains
-                        both dependent and independent variables. The dependent
-                        variables are specified with the 'dependent'
-                        subcommand, and maybe also in the 'variables' subcommand. 
-                        We need to separate the two.
-                      */
-                     if (!is_depvar (i, cmd.v_dependent[k]))
-                       {
-                         if (var_is_alpha (v_variables[i]))
-                           {
-                             design_matrix_set_categorical (X, row,
-                                                            v_variables[i],
-                                                            val);
-                           }
-                         else
-                           {
-                             design_matrix_set_numeric (X, row,
-                                                        v_variables[i], val);
-                           }
-                       }
-                   }
-                 val = case_data (&c, cmd.v_dependent[k]);
-                 gsl_vector_set (Y, row, val->f);
-                 row++;
-               }
-           }
+            The second pass fills the design matrix.
+          */
+          reader = casereader_create_counter (reader, &row, -1);
+          for (; casereader_read (reader, &c); case_destroy (&c))
+            {
+              for (i = 0; i < n_indep; ++i)
+                {
+                  struct variable *v = indep_vars[i];
+                  const union value *val = case_data (&c, v);
+                  if (var_is_alpha (v))
+                    design_matrix_set_categorical (X, row, v, val);
+                  else
+                    design_matrix_set_numeric (X, row, v, val);
+                }
+          gsl_vector_set (Y, row, case_num (&c, dep_var));
+            }
+          casereader_destroy (reader);
           /*
              Now that we know the number of coefficients, allocate space
              and store pointers to the variables that correspond to the
@@ -1306,26 +1226,24 @@ run_regression (const struct ccase *first,
            */
           pspp_linreg ((const gsl_vector *) Y, X->m, &lopts, models[k]);
           compute_moments (models[k], mom, X, n_variables);
-         subcommand_statistics (cmd.a_statistics, models[k]);
-         subcommand_export (cmd.sbc_export, models[k]);
+
+          if (!taint_has_tainted_successor (casereader_get_taint (input)))
+            {
+              subcommand_statistics (cmd->a_statistics, models[k]);
+              subcommand_export (cmd->sbc_export, models[k]); 
+            }
  
           gsl_vector_free (Y);
           design_matrix_destroy (X);
-         free (indep_vars);
-         free (lopts.get_indep_mean_std);
-         casereader_destroy (r);
         }
        else
         {
           msg (SE, gettext ("No valid data found. This command was skipped."));
         }
      }
-  for (i = 0; i < n_variables; i++)
-    {
-      moments1_destroy ((mom + i)->m);
-    }
-  free (mom);
-  free (is_missing_case);
+  free (indep_vars);
+  free (lopts.get_indep_mean_std);
+  casereader_destroy (input);
  
    return true;
  }
diff --git a/src/language/stats/sort-cases.c b/src/language/stats/sort-cases.c

index 13e0c6edb247024e14541f9e94ca62729cfe6299..913718f476dc54ff6a60892e7e39b6ea3dd1bb5a 100644 (file)
--- a/src/language/stats/sort-cases.c
+++ b/src/language/stats/sort-cases.c
@@ -30,6 +30,7 @@
  #include <language/lexer/lexer.h>
  #include <libpspp/alloc.h>
  #include <libpspp/message.h>
+#include <data/case-ordering.h>
  #include <math/sort.h>
  #include <sys/types.h>
  
@@ -41,13 +42,15 @@
  int
  cmd_sort_cases (struct lexer *lexer, struct dataset *ds)
  {
-  struct sort_criteria *criteria;
-  bool success = false;
+  struct case_ordering *ordering;
+  struct casereader *output;
+  bool ok = false;
  
    lex_match (lexer, T_BY);
  
-  criteria = sort_parse_criteria (lexer, dataset_dict (ds), NULL, NULL, NULL, NULL);
-  if (criteria == NULL)
+  proc_cancel_temporary_transformations (ds);
+  ordering = parse_case_ordering (lexer, dataset_dict (ds), NULL);
+  if (ordering == NULL)
      return CMD_CASCADING_FAILURE;
  
    if (get_testing_mode () && lex_match (lexer, '/')) 
@@ -57,7 +60,6 @@ cmd_sort_cases (struct lexer *lexer, struct dataset *ds)
          goto done;
  
        min_buffers = max_buffers = lex_integer (lexer);
-      allow_internal_sort = false;
        if (max_buffers < 2) 
          {
            msg (SE, _("Buffer limit must be at least 2."));
@@ -67,14 +69,17 @@ cmd_sort_cases (struct lexer *lexer, struct dataset *ds)
        lex_get (lexer);
      }
  
-  success = sort_active_file_in_place (ds, criteria);
+  proc_discard_output (ds);
+  output = sort_execute (proc_open (ds), ordering);
+  ordering = NULL;
+  ok = proc_commit (ds);
+  ok = proc_set_active_file_data (ds, output) && ok;
  
   done:
    min_buffers = 64;
    max_buffers = INT_MAX;
-  allow_internal_sort = true;
    
-  sort_destroy_criteria (criteria);
-  return success ? lex_end_of_command (lexer) : CMD_CASCADING_FAILURE;
+  case_ordering_destroy (ordering);
+  return ok ? lex_end_of_command (lexer) : CMD_CASCADING_FAILURE;
  }
  
diff --git a/src/language/stats/sort-criteria.c b/src/language/stats/sort-criteria.c

index 81b68eab6c41afd5cb79fa2205aa72cb5ba347ff..bd1983dd4a305ec1c4b02ccf6a7711184f3316ea 100644 (file)
--- a/src/language/stats/sort-criteria.c
+++ b/src/language/stats/sort-criteria.c
@@ -1,5 +1,5 @@
  /* PSPP - computes sample statistics.
-   Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
  
     This program is free software; you can redistribute it and/or
     modify it under the terms of the GNU General Public License as
@@ -17,72 +17,46 @@
     02110-1301, USA. */
  
  #include <config.h>
-#include <sys/types.h>
-#include <assert.h>
+
+#include <language/stats/sort-criteria.h>
+
  #include <stdlib.h>
-#include <limits.h>
-#include <libpspp/alloc.h>
-#include <language/command.h>
-#include <libpspp/message.h>
+
+#include <data/case-ordering.h>
+#include <data/dictionary.h>
+#include <data/variable.h>
  #include <language/lexer/lexer.h>
  #include <language/lexer/variable-parser.h>
-#include <data/settings.h>
-#include <data/variable.h>
-#include "sort-criteria.h"
-#include <math/sort.h>
+#include <libpspp/message.h>
  
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
  
-static bool  is_terminator(int tok, const int *terminators);
-
-
  /* Parses a list of sort keys and returns a struct sort_criteria
     based on it.  Returns a null pointer on error.
     If SAW_DIRECTION is nonnull, sets *SAW_DIRECTION to true if at
     least one parenthesized sort direction was specified, false
-   otherwise. 
-   If TERMINATORS is non-null, then it must be a pointer to a 
-   null terminated list of tokens, in addition to the defaults,
-   which are to be considered terminators of the clause being parsed.
-   The default terminators are '/' and '.'
-   
-*/
-struct sort_criteria *
-sort_parse_criteria (struct lexer *lexer, const struct dictionary *dict,
-                     const struct variable ***vars, size_t *var_cnt,
-                     bool *saw_direction,
-                    const int *terminators
-                    )
+   otherwise. */
+struct case_ordering *
+parse_case_ordering (struct lexer *lexer, const struct dictionary *dict,
+                     bool *saw_direction)
  {
-  struct sort_criteria *criteria;
-  const struct variable **local_vars = NULL;
-  size_t local_var_cnt;
-
-  assert ((vars == NULL) == (var_cnt == NULL));
-  if (vars == NULL) 
-    {
-      vars = &local_vars;
-      var_cnt = &local_var_cnt;
-    }
-
-  criteria = xmalloc (sizeof *criteria);
-  criteria->crits = NULL;
-  criteria->crit_cnt = 0;
-
-  *vars = NULL;
-  *var_cnt = 0;
-  if (saw_direction != NULL)
+  struct case_ordering *ordering = case_ordering_create (dict);
+  struct variable **vars = NULL;
+  size_t var_cnt = 0;
+  
+ if (saw_direction != NULL)
      *saw_direction = false;
  
    do
      {
-      size_t prev_var_cnt = *var_cnt;
        enum sort_direction direction;
+      size_t i;
  
        /* Variables. */
-      if (!parse_variables_const (lexer, dict, vars, var_cnt,
-                           PV_NO_DUPLICATE | PV_APPEND | PV_NO_SCRATCH))
+      free (vars);
+      vars = NULL;
+      if (!parse_variables_const (lexer, dict, &vars, &var_cnt, PV_NO_SCRATCH))
          goto error;
  
        /* Sort direction. */
@@ -108,57 +82,19 @@ sort_parse_criteria (struct lexer *lexer, const struct dictionary *dict,
        else
          direction = SRT_ASCEND;
  
-      criteria->crits = xnrealloc (criteria->crits,
-                                   *var_cnt, sizeof *criteria->crits);
-      criteria->crit_cnt = *var_cnt;
-      for (; prev_var_cnt < criteria->crit_cnt; prev_var_cnt++) 
-        {
-          struct sort_criterion *c = &criteria->crits[prev_var_cnt];
-          c->fv = var_get_case_index ((*vars)[prev_var_cnt]);
-          c->width = var_get_width ((*vars)[prev_var_cnt]);
-          c->dir = direction;
-        }
+      for (i = 0; i < var_cnt; i++)
+        if (!case_ordering_add_var (ordering, vars[i], direction))
+          msg (SW, _("Variable %s specified twice in sort criteria."),
+               var_get_name (vars[i]));
      }
-  while (lex_token (lexer) != '.' && lex_token (lexer) != '/' && !is_terminator(lex_token (lexer), terminators));
+  while (lex_token (lexer) == T_ID
+         && dict_lookup_var (dict, lex_tokid (lexer)) != NULL);
  
-  free (local_vars);
-  return criteria;
+  free (vars);
+  return ordering;
  
   error:
-  free (local_vars);
-  sort_destroy_criteria (criteria);
+  free (vars);
+  case_ordering_destroy (ordering);
    return NULL;
  }
-
-/* Return TRUE if TOK is a member of the list of TERMINATORS.
-   FALSE otherwise */
-static bool 
-is_terminator(int tok, const int *terminators)
-{
-  if (terminators == NULL ) 
-    return false;
-
-  while ( *terminators) 
-    {
-      if (tok == *terminators++)
-       return true;
-    }
-
-  return false;
-}
-
-
-
-/* Destroys a SORT CASES program. */
-void
-sort_destroy_criteria (struct sort_criteria *criteria) 
-{
-  if (criteria != NULL) 
-    {
-      free (criteria->crits);
-      free (criteria);
-    }
-}
-
-
-
diff --git a/src/language/stats/sort-criteria.h b/src/language/stats/sort-criteria.h

index 1c44cc586eecfc0cc6df2f9c7e3572c41a1abdc0..b2bd9ab4859743fbe19ac093f583a5d5471212ce 100644 (file)
--- a/src/language/stats/sort-criteria.h
+++ b/src/language/stats/sort-criteria.h
@@ -23,17 +23,12 @@
  #include <stdbool.h>
  #include <stddef.h>
  
-struct variable;
  struct dictionary;
-struct lexer ;
+struct lexer;
  
-struct sort_criteria *sort_parse_criteria (struct lexer *, const struct dictionary *,
-                                           const struct variable ***, size_t *,
-                                           bool *saw_direction,
-                                          const int *terminators
-                                          );
-
-void sort_destroy_criteria (struct sort_criteria *criteria) ;
+struct case_ordering *parse_case_ordering (struct lexer *,
+                                           const struct dictionary *,
+                                           bool *saw_direction);
  
  
  #endif /* SORT_PRS_H */
diff --git a/src/language/stats/t-test.q b/src/language/stats/t-test.q

index b593ebc4bc3df675f99a01dfe9e5d109c8b5a946..91a7179e3e3dbd5232195ea474b4d5ddd4af1134 100644 (file)
--- a/src/language/stats/t-test.q
+++ b/src/language/stats/t-test.q
@@ -25,13 +25,12 @@
  #include <stdlib.h>
  
  #include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
  #include <data/dictionary.h>
  #include <data/procedure.h>
  #include <data/value-labels.h>
  #include <data/variable.h>
-#include <data/casefilter.h>
-
  #include <language/command.h>
  #include <language/dictionary/split-file.h>
  #include <language/lexer/lexer.h>
@@ -41,9 +40,9 @@
  #include <libpspp/hash.h>
  #include <libpspp/magic.h>
  #include <libpspp/message.h>
-#include <libpspp/message.h>
  #include <libpspp/misc.h>
  #include <libpspp/str.h>
+#include <libpspp/taint.h>
  #include <math/group-proc.h>
  #include <math/levene.h>
  #include <output/manager.h>
@@ -215,28 +214,28 @@ enum {
  
  static int common_calc (const struct dictionary *dict, 
                         const struct ccase *, void *, 
-                       const struct casefilter *filter);
+                       enum mv_class);
  static void common_precalc (struct cmd_t_test *);
  static void common_postcalc (struct cmd_t_test *);
  
-static int one_sample_calc (const struct dictionary *dict, const struct ccase *, void *, const struct casefilter *);
+static int one_sample_calc (const struct dictionary *dict, const struct ccase *, void *, enum mv_class);
  static void one_sample_precalc (struct cmd_t_test *);
  static void one_sample_postcalc (struct cmd_t_test *);
  
  static int  paired_calc (const struct dictionary *dict, const struct ccase *, 
-                        struct cmd_t_test*, const struct casefilter *);
+                        struct cmd_t_test*, enum mv_class);
  static void paired_precalc (struct cmd_t_test *);
  static void paired_postcalc (struct cmd_t_test *);
  
  static void group_precalc (struct cmd_t_test *);
  static int  group_calc (const struct dictionary *dict, const struct ccase *, 
-                       struct cmd_t_test *, const struct casefilter *);
+                       struct cmd_t_test *, enum mv_class);
  static void group_postcalc (struct cmd_t_test *);
  
  
-static bool calculate(const struct ccase *first,
-                      const struct casefile *cf, void *_mode, 
-                     const struct dataset *ds);
+static void calculate(struct cmd_t_test *,
+                      struct casereader *,
+                     const struct dataset *);
  
  static  int mode;
  
@@ -258,6 +257,8 @@ static unsigned  hash_group_binary(const struct group_statistics *g,
  int
  cmd_t_test (struct lexer *lexer, struct dataset *ds)
  {
+  struct casegrouper *grouper;
+  struct casereader *group;
    bool ok;
    
    if ( !parse_t_test (lexer, ds, &cmd, NULL) )
@@ -338,7 +339,12 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds)
  
    bad_weight_warn = true;
  
-  ok = multipass_procedure_with_splits (ds, calculate, &cmd);
+  /* Data pass. */
+  grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+  while (casegrouper_get_next_group (grouper, &group)) 
+    calculate (&cmd, group, ds);
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
  
    n_pairs=0;
    free(pairs);
@@ -1411,30 +1417,30 @@ static int
  common_calc (const struct dictionary *dict, 
              const struct ccase *c, 
              void *_cmd, 
-            const struct casefilter *filter)
+            enum mv_class exclude)
  {
    int i;
    struct cmd_t_test *cmd = (struct cmd_t_test *)_cmd;  
  
-  double weight = dict_get_case_weight (dict, c, &bad_weight_warn);
+  double weight = dict_get_case_weight (dict, c, NULL);
  
  
    /* Listwise has to be implicit if the independent variable is missing ?? */
    if ( cmd->sbc_groups )
      {
-      if ( casefilter_variable_missing (filter, c, indep_var) )
+      if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude))
         return 0;
      }
  
    for(i = 0; i < cmd->n_variables ; ++i) 
      {
        const struct variable *v = cmd->v_variables[i];
-
-      if (! casefilter_variable_missing (filter, c, v) )
+      const union value *val = case_data (c, v);
+      
+      if (!var_is_value_missing (v, val, exclude))
         {
           struct group_statistics *gs;
-         const union value *val = case_data (c, v);
-         gs = &group_proc_get (cmd->v_variables[i])->ugs;
+         gs = &group_proc_get (v)->ugs;
  
           gs->n += weight;
           gs->sum += weight * val->f;
@@ -1492,13 +1498,13 @@ common_postcalc (struct cmd_t_test *cmd)
  static int 
  one_sample_calc (const struct dictionary *dict, 
                  const struct ccase *c, void *cmd_, 
-                const struct casefilter *filter)
+                enum mv_class exclude)
  {
    int i;
  
    struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_;
  
-  double weight = dict_get_case_weight (dict, c, &bad_weight_warn);
+  double weight = dict_get_case_weight (dict, c, NULL);
  
  
    for(i=0; i< cmd->n_variables ; ++i) 
@@ -1509,7 +1515,7 @@ one_sample_calc (const struct dictionary *dict,
  
        gs= &group_proc_get (cmd->v_variables[i])->ugs;
  
-      if ( ! casefilter_variable_missing (filter, c, v))
+      if (!var_is_value_missing (v, val, exclude))
         gs->sum_diff += weight * (val->f - cmd->n_testval[0]);
      }
  
@@ -1569,11 +1575,11 @@ paired_precalc (struct cmd_t_test *cmd UNUSED)
  
  static int  
  paired_calc (const struct dictionary *dict, const struct ccase *c, 
-            struct cmd_t_test *cmd UNUSED, const struct casefilter *filter)
+            struct cmd_t_test *cmd UNUSED, enum mv_class exclude)
  {
    int i;
  
-  double weight = dict_get_case_weight (dict, c, &bad_weight_warn);
+  double weight = dict_get_case_weight (dict, c, NULL);
  
    for(i=0; i < n_pairs ; ++i )
      {
@@ -1583,8 +1589,8 @@ paired_calc (const struct dictionary *dict, const struct ccase *c,
        const union value *val0 = case_data (c, v0);
        const union value *val1 = case_data (c, v1);
  
-      if (  ! casefilter_variable_missing (filter, c, v0) && 
-           ! casefilter_variable_missing (filter, c, v1) )
+      if (!var_is_value_missing (v0, val0, exclude) &&
+          !var_is_value_missing (v1, val1, exclude))
         {
           pairs[i].n += weight;
           pairs[i].sum[0] += weight * val0->f;
@@ -1694,16 +1700,15 @@ group_precalc (struct cmd_t_test *cmd )
  static int  
  group_calc (const struct dictionary *dict, 
             const struct ccase *c, struct cmd_t_test *cmd, 
-           const struct casefilter *filter)
+           enum mv_class exclude)
  {
    int i;
  
-  const double weight = 
-    dict_get_case_weight (dict, c, &bad_weight_warn);
+  const double weight = dict_get_case_weight (dict, c, NULL);
  
    const union value *gv;
  
-  if ( casefilter_variable_missing (filter, c, indep_var))
+  if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude))
      return 0;
  
    gv = case_data (c, indep_var);
@@ -1722,7 +1727,7 @@ group_calc (const struct dictionary *dict,
        if ( ! gs ) 
         return 0;
  
-      if ( ! casefilter_variable_missing (filter, c, var) )
+      if (!var_is_value_missing (var, val, exclude))
         {
           gs->n += weight;
           gs->sum += weight * val->f;
@@ -1771,95 +1776,83 @@ group_postcalc ( struct cmd_t_test *cmd )
  
  
  
-static bool
-calculate(const struct ccase *first, const struct casefile *cf, 
-         void *cmd_, const struct dataset *ds)
+static void
+calculate(struct cmd_t_test *cmd,
+          struct casereader *input, const struct dataset *ds)
  {
    const struct dictionary *dict = dataset_dict (ds);
    struct ssbox stat_summary_box;
    struct trbox test_results_box;
  
-  struct casereader *r;
+  struct casereader *pass1, *pass2, *pass3;
+  struct taint *taint;
    struct ccase c;
  
-  struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_;
+  enum mv_class exclude = cmd->miss != TTS_INCLUDE ? MV_ANY : MV_SYSTEM;
  
-  struct casefilter *filter = casefilter_create ((cmd->miss != TTS_INCLUDE
-                                                  ? MV_ANY : MV_SYSTEM), 
-                                                NULL, 0);
+  if (!casereader_peek (input, 0, &c))
+    return;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
  
    if ( cmd->miss == TTS_LISTWISE ) 
-    casefilter_add_variables (filter,
-                             cmd->v_variables, cmd->n_variables);
+    input = casereader_create_filter_missing (input,
+                                              cmd->v_variables,
+                                              cmd->n_variables,
+                                              exclude, NULL);
+
+  input = casereader_create_filter_weight (input, dict, NULL, NULL);
+
+  taint = taint_clone (casereader_get_taint (input));
+  casereader_split (input, &pass1, &pass2);
                                 
-  output_split_file_values (ds, first);
    common_precalc (cmd);
-  for(r = casefile_get_reader (cf, filter);
-      casereader_read (r, &c) ;
-      case_destroy (&c)) 
-    {
-      common_calc (dict, &c, cmd, filter);
-    }
-
-  casereader_destroy (r);
+  for (; casereader_read (pass1, &c); case_destroy (&c)) 
+    common_calc (dict, &c, cmd, exclude);
+  casereader_destroy (pass1);
    common_postcalc (cmd);
  
    switch(mode)
      {
      case T_1_SAMPLE:
        one_sample_precalc (cmd);
-      for(r = casefile_get_reader (cf, filter);
-         casereader_read (r, &c) ;
-          case_destroy (&c)) 
-       {
-         one_sample_calc (dict, &c, cmd, filter);
-       }
-      casereader_destroy (r);
+      for (; casereader_read (pass2, &c); case_destroy (&c)) 
+        one_sample_calc (dict, &c, cmd, exclude);
        one_sample_postcalc (cmd);
        break;
      case T_PAIRED:
        paired_precalc(cmd);
-      for(r = casefile_get_reader (cf, filter);
-         casereader_read (r, &c) ;
-          case_destroy (&c)) 
-       {
-         paired_calc (dict, &c, cmd, filter);
-       }
-      casereader_destroy (r);
+      for (; casereader_read (pass2, &c); case_destroy (&c)) 
+        paired_calc (dict, &c, cmd, exclude);
        paired_postcalc (cmd);
-
        break;
      case T_IND_SAMPLES:
+      pass3 = casereader_clone (pass2);
  
        group_precalc(cmd);
-      for(r = casefile_get_reader (cf, filter);
-         casereader_read (r, &c) ;
-          case_destroy (&c)) 
-       {
-         group_calc (dict, &c, cmd, filter);
-       }
-      casereader_destroy (r);
+      for(; casereader_read (pass2, &c); case_destroy (&c)) 
+        group_calc (dict, &c, cmd, exclude);
        group_postcalc(cmd);
  
-      levene (dict, cf, indep_var, cmd->n_variables, cmd->v_variables,
-             filter);
+      levene (dict, pass3, indep_var, cmd->n_variables, cmd->v_variables,
+              exclude);
        break;
      }
+  casereader_destroy (pass2);
+ 
+  if (!taint_has_tainted_successor (taint)) 
+    {
+      ssbox_create(&stat_summary_box,cmd,mode);
+      ssbox_populate(&stat_summary_box,cmd);
+      ssbox_finalize(&stat_summary_box);
  
-  casefilter_destroy (filter);
-
-  ssbox_create(&stat_summary_box,cmd,mode);
-  ssbox_populate(&stat_summary_box,cmd);
-  ssbox_finalize(&stat_summary_box);
-
-  if ( mode == T_PAIRED) 
-      pscbox();
-
-  trbox_create(&test_results_box,cmd,mode);
-  trbox_populate(&test_results_box,cmd);
-  trbox_finalize(&test_results_box);
-
-  return true;
+      if ( mode == T_PAIRED ) 
+        pscbox();
+  
+      trbox_create(&test_results_box,cmd,mode);
+      trbox_populate(&test_results_box,cmd);
+      trbox_finalize(&test_results_box);
+    }
  }
  
  short which_group(const struct group_statistics *g,
diff --git a/src/language/tests/automake.mk b/src/language/tests/automake.mk

index 198c1497cbe01d9dc3e599fc95789ce48aac054e..bbcd777b3ebeec54c6b0cece014015a6100b3388 100644 (file)
--- a/src/language/tests/automake.mk
+++ b/src/language/tests/automake.mk
@@ -4,7 +4,6 @@ language_tests_built_sources = \
         src/language/tests/check-model.c
  
  language_tests_sources = \
-       src/language/tests/casefile-test.c \
         src/language/tests/check-model.h \
         src/language/tests/datasheet-test.c \
         src/language/tests/float-format.c \
diff --git a/src/language/tests/casefile-test.c b/src/language/tests/casefile-test.c

deleted file mode 100644 (file)

index eb8ee06..0000000
--- a/src/language/tests/casefile-test.c
+++ /dev/null
@@ -1,290 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 2004 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#include <config.h>
-#include <data/casefile.h>
-#include <data/fastfile.h>
-
-#include <data/case.h>
-
-#include <gsl/gsl_randist.h>
-#include <gsl/gsl_rng.h>
-#include <stdarg.h>
-#include <language/command.h>
-#include <language/lexer/lexer.h>
-#include <libpspp/assertion.h>
-
-#include "xalloc.h"
-
-static void test_casefile (int pattern, size_t value_cnt, size_t case_cnt);
-static void get_random_case (struct ccase *, size_t value_cnt,
-                             size_t case_idx);
-static void write_random_case (struct casefile *cf, size_t case_idx);
-static void read_and_verify_random_case (struct casefile *cf,
-                                         struct casereader *reader,
-                                         size_t case_idx);
-static void test_casereader_clone (struct casereader *reader1, size_t case_cnt);
-                                
-
-static void fail_test (const char *message, ...);
-
-int
-cmd_debug_casefile (struct lexer *lexer, struct dataset *ds UNUSED) 
-{
-  static const size_t sizes[] =
-    {
-      1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 31, 55, 73,
-      100, 137, 257, 521, 1031, 2053
-    };
-  int size_max;
-  int case_max;
-  int pattern;
-
-  size_max = sizeof sizes / sizeof *sizes;
-  if (lex_match_id (lexer, "SMALL")) 
-    {
-      size_max -= 4;
-      case_max = 511; 
-    }
-  else
-    case_max = 4095;
-  if (lex_token (lexer) != '.')
-    return lex_end_of_command (lexer);
-    
-  for (pattern = 0; pattern < 7; pattern++) 
-    {
-      const size_t *size;
-
-      for (size = sizes; size < sizes + size_max; size++) 
-        {
-          size_t case_cnt;
-
-          for (case_cnt = 0; case_cnt <= case_max;
-               case_cnt = (case_cnt * 2) + 1)
-            test_casefile (pattern, *size, case_cnt);
-        }
-    }
-  printf ("Casefile tests succeeded.\n");
-  return CMD_SUCCESS;
-}
-
-static void
-test_casefile (int pattern, size_t value_cnt, size_t case_cnt) 
-{
-  struct casefile *cf;
-  struct casereader *r1, *r2;
-  struct ccase c;
-  gsl_rng *rng;
-  size_t i, j;
-
-  rng = gsl_rng_alloc (gsl_rng_mt19937);
-  cf = fastfile_create (value_cnt);
-  if (pattern == 5)
-    casefile_to_disk (cf);
-  for (i = 0; i < case_cnt; i++)
-    write_random_case (cf, i);
-  if (pattern == 5)
-    casefile_sleep (cf);
-  r1 = casefile_get_reader (cf, NULL);
-  r2 = casefile_get_reader (cf, NULL);
-  switch (pattern) 
-    {
-    case 0:
-    case 5:
-      for (i = 0; i < case_cnt; i++) 
-        {
-          read_and_verify_random_case (cf, r1, i);
-          read_and_verify_random_case (cf, r2, i);
-        } 
-      break;
-    case 1:
-      for (i = 0; i < case_cnt; i++)
-        read_and_verify_random_case (cf, r1, i);
-      for (i = 0; i < case_cnt; i++) 
-        read_and_verify_random_case (cf, r2, i);
-      break;
-    case 2:
-    case 3:
-    case 4:
-      for (i = j = 0; i < case_cnt; i++) 
-        {
-          read_and_verify_random_case (cf, r1, i);
-          if (gsl_rng_get (rng) % pattern == 0) 
-            read_and_verify_random_case (cf, r2, j++); 
-          if (i == case_cnt / 2)
-            casefile_to_disk (cf);
-        }
-      for (; j < case_cnt; j++) 
-        read_and_verify_random_case (cf, r2, j);
-      break;
-    case 6:
-      test_casereader_clone (r1, case_cnt);
-      test_casereader_clone (r2, case_cnt);
-      break;
-    default:
-      NOT_REACHED ();
-    }
-  if (casereader_read (r1, &c))
-    fail_test ("Casereader 1 not at end of file.");
-  if (casereader_read (r2, &c))
-    fail_test ("Casereader 2 not at end of file.");
-  if (pattern != 1)
-    casereader_destroy (r1);
-  if (pattern != 2)
-    casereader_destroy (r2);
-  if (pattern > 2) 
-    {
-      r1 = casefile_get_destructive_reader (cf);
-      for (i = 0; i < case_cnt; i++) 
-        {
-          struct ccase read_case, expected_case;
-          
-          get_random_case (&expected_case, value_cnt, i);
-          if (!casereader_read_xfer (r1, &read_case)) 
-            fail_test ("Premature end of casefile.");
-          for (j = 0; j < value_cnt; j++) 
-            {
-              double a = case_num_idx (&read_case, j);
-              double b = case_num_idx (&expected_case, j);
-              if (a != b)
-                fail_test ("Case %lu fails comparison.", (unsigned long) i); 
-            }
-          case_destroy (&expected_case);
-          case_destroy (&read_case);
-        }
-      casereader_destroy (r1);
-    }
-  casefile_destroy (cf);
-  gsl_rng_free (rng);
-}
-
-static void
-get_random_case (struct ccase *c, size_t value_cnt, size_t case_idx) 
-{
-  int i;
-  case_create (c, value_cnt);
-  for (i = 0; i < value_cnt; i++)
-    case_data_rw_idx (c, i)->f = case_idx % 257 + i;
-}
-
-static void
-write_random_case (struct casefile *cf, size_t case_idx) 
-{
-  struct ccase c;
-  get_random_case (&c, casefile_get_value_cnt (cf), case_idx);
-  casefile_append_xfer (cf, &c);
-}
-
-static void
-read_and_verify_random_case (struct casefile *cf,
-                             struct casereader *reader, size_t case_idx) 
-{
-  struct ccase read_case, expected_case;
-  size_t value_cnt;
-  size_t i;
-  
-  value_cnt = casefile_get_value_cnt (cf);
-  get_random_case (&expected_case, value_cnt, case_idx);
-  if (!casereader_read (reader, &read_case)) 
-    fail_test ("Premature end of casefile.");
-  for (i = 0; i < value_cnt; i++) 
-    {
-      double a = case_num_idx (&read_case, i);
-      double b = case_num_idx (&expected_case, i);
-      if (a != b)
-        fail_test ("Case %lu fails comparison.", (unsigned long) case_idx); 
-    }
-  case_destroy (&read_case);
-  case_destroy (&expected_case);
-}
-
-static void
-test_casereader_clone (struct casereader *reader1, size_t case_cnt)
-{
-  size_t i;
-  size_t cases = 0;
-  struct ccase c1;
-  struct ccase c2;
-  struct casefile *src = casereader_get_casefile (reader1);
-  struct casereader *clone = NULL;
-
-  size_t value_cnt = casefile_get_value_cnt (src);
-
-  struct casefile *newfile = fastfile_create (value_cnt);
-  struct casereader *newreader;
-
-
-  /* Read a 3rd of the cases */
-  for ( i = 0 ; i < case_cnt / 3 ; ++i ) 
-    {
-      casereader_read (reader1, &c1);
-      case_destroy (&c1);
-    }
-
-  clone = casereader_clone (reader1);
-
-  /* Copy all the cases into a new file */
-  while( casereader_read (reader1, &c1))
-    { 
-      casefile_append_xfer (newfile, &c1);
-      cases ++;
-    }
-
-  newreader = casefile_get_reader (newfile, NULL);
-
-  /* Make sure that the new file's are identical to those returned from 
-     the cloned reader */
-  while( casereader_read (clone, &c1))
-    { 
-      const union value *v1;
-      const union value *v2;
-      cases --;
-
-      if ( ! casereader_read_xfer (newreader, &c2) ) 
-        {
-          case_destroy (&c1);
-          break; 
-        }
-      
-      v1 = case_data_all (&c1) ;
-      v2 = case_data_all (&c2) ;
-
-      if ( 0 != memcmp (v1, v2, value_cnt * MAX_SHORT_STRING))
-       fail_test ("Cloned reader read different value at case %ld", cases);
-
-      case_destroy (&c1);
-      case_destroy (&c2);
-    }
-
-  if ( cases > 0 ) 
-    fail_test ("Cloned reader reads different number of cases.");
-
-}
-
-static void
-fail_test (const char *message, ...) 
-{
-  va_list args;
-
-  va_start (args, message);
-  vprintf (message, args);
-  putchar ('\n');
-  va_end (args);
-  
-  exit (1);
-}
diff --git a/src/libpspp/deque.h b/src/libpspp/deque.h

index be1121bfe7ab22f3bd794e36429d07190226472a..0233c13d31bac1870ce39fdd361d58f6f6a12bec 100644 (file)
--- a/src/libpspp/deque.h
+++ b/src/libpspp/deque.h
@@ -66,6 +66,7 @@
  
  #include <stdbool.h>
  #include <stddef.h>
+#include <stdlib.h>
  
  #include <libpspp/assertion.h>
  
diff --git a/src/math/ChangeLog b/src/math/ChangeLog

index acc403883b9ba9fd3b4ea1ff9283a3575e22cbf9..2c0df305822838c8d3aa2c9aa0fcb76fa2a7f688 100644 (file)
--- a/src/math/ChangeLog
+++ b/src/math/ChangeLog
@@ -1,3 +1,18 @@
+2007-06-06  Ben Pfaff  <blp@gnu.org>
+
+       Adapt case sources, sinks, and clients of procedure code to the
+       new infrastructure.
+
+       * automake.mk: Add new files.
+       
+       * levene.c: Adapt to new casereaders.  Abstract better.
+
+       * merge.c: New file.
+
+       * merge.h: New file.
+
+       * sort.c: Rewrite in terms of case_ordering, merger.
+
  2007-05-31  Jason Stover  <jhs@math.gcsu.edu>
  
         * interaction.c: New file.
diff --git a/src/math/automake.mk b/src/math/automake.mk

index 2a8f4408a0fe548e827f5149f6bf3cc775462517..5bbf24fac2e39db8679a261c958510ec04851cfc 100644 (file)
--- a/src/math/automake.mk
+++ b/src/math/automake.mk
@@ -19,6 +19,8 @@ src_math_libpspp_math_a_SOURCES = \
         src/math/interaction.h \
         src/math/levene.c \
         src/math/levene.h \
+       src/math/merge.c \
+       src/math/merge.h \
         src/math/moments.c  src/math/moments.h \
         src/math/percentiles.c src/math/percentiles.h \
         src/math/design-matrix.c src/math/design-matrix.h \
diff --git a/src/math/levene.c b/src/math/levene.c

index a325138b14517198eeb71e8204c60472a8ba26c3..15f9a583027b4da5b07333c82a01b03fe457f13d 100644 (file)
--- a/src/math/levene.c
+++ b/src/math/levene.c
@@ -22,14 +22,13 @@
  #include "levene.h"
  #include <libpspp/message.h>
  #include <data/case.h>
-#include <data/casefile.h>
+#include <data/casereader.h>
  #include <data/dictionary.h>
  #include "group-proc.h"
  #include <libpspp/hash.h>
  #include <libpspp/str.h>
  #include <data/variable.h>
  #include <data/procedure.h>
-#include <data/casefilter.h>
  #include <libpspp/alloc.h>
  #include <libpspp/misc.h>
  #include "group.h"
@@ -74,90 +73,87 @@ struct levene_info
    const struct variable  **v_dep;
  
    /* Filter for missing values */
-  struct casefilter *filter;
+  enum mv_class exclude;
+
+  /* An array of lz_stats for each variable */
+  struct lz_stats *lz;
+
+  /* The denominator for the expression for the Levene */
+  double *lz_denominator;
+
+};
+
+/* Per variable statistics */
+struct lz_stats
+{
+  /* Total of all lz */
+  double grand_total;
+
+  /* Mean of all lz */
+  double grand_mean;
+
+  /* The total number of cases */
+  double total_n ; 
+
+  /* Number of groups */
+  int n_groups;
  };
  
  /* First pass */
  static void  levene_precalc (const struct levene_info *l);
  static int levene_calc (const struct dictionary *dict, const struct ccase *, 
                         const struct levene_info *l);
-static void levene_postcalc (void *);
+static void levene_postcalc (struct levene_info *);
  
  
  /* Second pass */
  static void levene2_precalc (struct levene_info *l);
  static int levene2_calc (const struct dictionary *, const struct ccase *, 
                          struct levene_info *l);
-static void levene2_postcalc (void *);
+static void levene2_postcalc (struct levene_info *);
  
  
-void  
+void
  levene(const struct dictionary *dict, 
-       const struct casefile *cf,
+       struct casereader *reader,
         const struct variable *v_indep, size_t n_dep, 
         const struct variable **v_dep,
-       struct casefilter *filter)
+       enum mv_class exclude)
  {
-  struct casereader *r;
+  struct casereader *pass1, *pass2;
    struct ccase c;
    struct levene_info l;
  
    l.n_dep      = n_dep;
    l.v_indep    = v_indep;
    l.v_dep      = v_dep;
-  l.filter = filter;
+  l.exclude    = exclude;
+  l.lz         = xnmalloc (l.n_dep, sizeof *l.lz);
+  l.lz_denominator = xnmalloc (l.n_dep, sizeof *l.lz_denominator);
  
+  casereader_split (reader, &pass1, &pass2);
  
    levene_precalc (&l);
-  for(r = casefile_get_reader (cf, filter);
-      casereader_read (r, &c) ;
-      case_destroy (&c)) 
-    {
-      levene_calc (dict, &c, &l);
-    }
-  casereader_destroy (r);
+  for (; casereader_read (pass1, &c); case_destroy (&c)) 
+    levene_calc (dict, &c, &l);
+  casereader_destroy (pass1);
    levene_postcalc (&l);
  
    levene2_precalc(&l);
-  for(r = casefile_get_reader (cf, filter);
-      casereader_read (r, &c) ;
-      case_destroy (&c)) 
-    {
-      levene2_calc (dict, &c,&l);
-    }
-  casereader_destroy (r);
+  for (; casereader_read (pass2, &c); case_destroy (&c)) 
+    levene2_calc (dict, &c, &l);
+  casereader_destroy (pass2);
    levene2_postcalc (&l);
-}
-
-/* Internal variables used in calculating the Levene statistic */
-
-/* Per variable statistics */
-struct lz_stats
-{
-  /* Total of all lz */
-  double grand_total;
-
-  /* Mean of all lz */
-  double grand_mean;
-
-  /* The total number of cases */
-  double total_n ; 
-
-  /* Number of groups */
-  int n_groups;
-};
-
-/* An array of lz_stats for each variable */
-static struct lz_stats *lz;
  
+  free (l.lz_denominator);
+  free (l.lz);
+}
  
  static void 
  levene_precalc (const struct levene_info *l)
  {
    size_t i;
  
-  lz = xnmalloc (l->n_dep, sizeof *lz);
-
    for(i = 0; i < l->n_dep ; ++i ) 
      {
        const struct variable *var = l->v_dep[i];
@@ -165,9 +161,9 @@ levene_precalc (const struct levene_info *l)
        struct group_statistics *gs;
        struct hsh_iterator hi;
  
-      lz[i].grand_total = 0;
-      lz[i].total_n = 0;
-      lz[i].n_groups = gp->n_groups ; 
+      l->lz[i].grand_total = 0;
+      l->lz[i].total_n = 0;
+      l->lz[i].n_groups = gp->n_groups ; 
  
        
        for ( gs = hsh_first(gp->group_hash, &hi);
@@ -206,11 +202,11 @@ levene_calc (const struct dictionary *dict, const struct ccase *c,
        if ( 0 == gs ) 
         continue ;
  
-      if ( ! casefilter_variable_missing (l->filter, c, var))
+      if ( !var_is_value_missing (var, v, l->exclude))
         {
           levene_z= fabs(v->f - gs->mean);
-         lz[i].grand_total += levene_z * weight;
-         lz[i].total_n += weight; 
+         l->lz[i].grand_total += levene_z * weight;
+         l->lz[i].total_n += weight; 
  
           gs->lz_total += levene_z * weight;
         }
@@ -220,16 +216,14 @@ levene_calc (const struct dictionary *dict, const struct ccase *c,
  
  
  static void 
-levene_postcalc (void *_l)
+levene_postcalc (struct levene_info *l)
  {
    size_t v;
  
-  struct levene_info *l = (struct levene_info *) _l;
-
    for (v = 0; v < l->n_dep; ++v) 
      {
        /* This is Z_LL */
-      lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ;
+      l->lz[v].grand_mean = l->lz[v].grand_total / l->lz[v].total_n ;
      }
  
    
@@ -237,15 +231,11 @@ levene_postcalc (void *_l)
  
  
  
-/* The denominator for the expression for the Levene */
-static double *lz_denominator = 0;
-
  static void 
  levene2_precalc (struct levene_info *l)
  {
    size_t v;
  
-  lz_denominator = xnmalloc (l->n_dep, sizeof *lz_denominator);
  
    /* This stuff could go in the first post calc . . . */
    for (v = 0; 
@@ -265,7 +255,7 @@ levene2_precalc (struct levene_info *l)
         {
           g->lz_mean = g->lz_total / g->n ;
         }
-      lz_denominator[v] = 0;
+      l->lz_denominator[v] = 0;
    }
  }
  
@@ -295,11 +285,10 @@ levene2_calc (const struct dictionary *dict, const struct ccase *c,
        if ( 0 == gs ) 
         continue;
  
-      if ( ! casefilter_variable_missing (l->filter, c, var))
-
+      if ( !var_is_value_missing (var, v, l->exclude))
         {
           levene_z = fabs(v->f - gs->mean); 
-         lz_denominator[i] += weight * pow2 (levene_z - gs->lz_mean);
+         l->lz_denominator[i] += weight * pow2 (levene_z - gs->lz_mean);
         }
      }
  
@@ -308,12 +297,10 @@ levene2_calc (const struct dictionary *dict, const struct ccase *c,
  
  
  static void 
-levene2_postcalc (void *_l)
+levene2_postcalc (struct levene_info *l)
  {
    size_t v;
  
-  struct levene_info *l = (struct levene_info *) _l;
-
    for (v = 0; v < l->n_dep; ++v) 
      {
        double lz_numerator = 0;
@@ -328,18 +315,14 @@ levene2_postcalc (void *_l)
           g != 0 ;
           g = (struct group_statistics *) hsh_next(hash,&hi) )
         {
-         lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean );
+         lz_numerator += g->n * pow2(g->lz_mean - l->lz[v].grand_mean );
         }
        lz_numerator *= ( gp->ugs.n - gp->n_groups );
  
-      lz_denominator[v] *= (gp->n_groups - 1);
+      l->lz_denominator[v] *= (gp->n_groups - 1);
  
-      gp->levene = lz_numerator / lz_denominator[v] ;
+      gp->levene = lz_numerator / l->lz_denominator[v] ;
  
      }
-
-  /* Now clear up after ourselves */
-  free(lz_denominator);
-  free(lz);
  }
  
diff --git a/src/math/levene.h b/src/math/levene.h

index 66944dafb434325aaaf4cc97a781a3f82e0be1c7..40ed52ce0ed493a6027f7435eda264ed8e54f03f 100644 (file)
--- a/src/math/levene.h
+++ b/src/math/levene.h
@@ -21,9 +21,9 @@
  #if !levene_h
  #define levene_h 1
  
-
+#include <data/casereader.h>
+#include <data/missing-values.h>
  #include <data/variable.h>
-#include <data/casefile.h>
  
  /* Calculate the Levene statistic 
  
@@ -39,10 +39,10 @@ The dependent variables :   v_dep;
  struct dictionary ;
  struct casefilter ;
  
-void  levene(const struct dictionary *dict, const struct casefile *cf, 
+void  levene(const struct dictionary *dict, struct casereader *,
              const struct variable *v_indep, size_t n_dep, 
              const struct variable **v_dep,
-            struct casefilter *filter);
+            enum mv_class exclude);
  
  
  
diff --git a/src/math/merge.c b/src/math/merge.c

new file mode 100644 (file)

index 0000000..3344547
--- /dev/null
+++ b/src/math/merge.c
@@ -0,0 +1,159 @@
+/* PSPP - computes sample statistics.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA. */
+
+/* FIXME: error checking. */
+/* FIXME: merge pattern should be improved, this one causes a
+   performance regression. */
+#include <config.h>
+
+#include <math/merge.h>
+
+#include <data/case-ordering.h>
+#include <data/case.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
+#include <libpspp/array.h>
+#include <libpspp/assertion.h>
+#include <libpspp/taint.h>
+
+#include "xalloc.h"
+
+#define MAX_MERGE_ORDER 7
+
+struct merge_input 
+  {
+    struct casereader *reader;
+    struct ccase c;
+  };
+
+struct merge 
+  {
+    struct case_ordering *ordering;
+    struct merge_input inputs[MAX_MERGE_ORDER];
+    size_t input_cnt;
+  };
+
+static void do_merge (struct merge *m);
+
+struct merge *
+merge_create (const struct case_ordering *ordering) 
+{
+  struct merge *m = xmalloc (sizeof *m);
+  m->ordering = case_ordering_clone (ordering);
+  m->input_cnt = 0;
+  return m;
+}
+
+void
+merge_destroy (struct merge *m) 
+{
+  if (m != NULL) 
+    {
+      size_t i;
+      
+      case_ordering_destroy (m->ordering);
+      for (i = 0; i < m->input_cnt; i++)
+        casereader_destroy (m->inputs[i].reader);
+      free (m);
+    }
+}
+
+void
+merge_append (struct merge *m, struct casereader *r) 
+{
+  r = casereader_rename (r);
+  m->inputs[m->input_cnt++].reader = r;
+  if (m->input_cnt >= MAX_MERGE_ORDER)
+    do_merge (m);
+}
+
+struct casereader *
+merge_make_reader (struct merge *m) 
+{
+  struct casereader *r;
+  
+  if (m->input_cnt > 1)
+    do_merge (m);
+
+  if (m->input_cnt == 1)
+    {
+      r = m->inputs[0].reader;
+      m->input_cnt = 0;
+    }
+  else if (m->input_cnt == 0)
+    {
+      size_t value_cnt = case_ordering_get_value_cnt (m->ordering);
+      struct casewriter *writer = mem_writer_create (value_cnt);
+      r = casewriter_make_reader (writer);
+    }
+  else
+    NOT_REACHED ();
+
+  return r;
+}
+
+static bool
+read_input_case (struct merge *m, size_t idx) 
+{
+  struct merge_input *i = &m->inputs[idx];
+
+  if (casereader_read (i->reader, &i->c))
+    return true;
+  else
+    {
+      casereader_destroy (i->reader);
+      remove_element (m->inputs, m->input_cnt, sizeof *m->inputs, idx);
+      m->input_cnt--;
+      return false;
+    }  
+}
+
+static void
+do_merge (struct merge *m) 
+{
+  struct casewriter *w;
+  size_t i;
+  
+  assert (m->input_cnt > 1);
+
+  w = tmpfile_writer_create (case_ordering_get_value_cnt (m->ordering));
+  for (i = 0; i < m->input_cnt; i++) 
+    taint_propagate (casereader_get_taint (m->inputs[i].reader),
+                     casewriter_get_taint (w));
+  
+  for (i = 0; i < m->input_cnt; ) 
+    if (read_input_case (m, i))
+      i++;
+  while (m->input_cnt > 0) 
+    {
+      size_t min;
+
+      min = 0;
+      for (i = 1; i < m->input_cnt; i++)
+        if (case_ordering_compare_cases (&m->inputs[i].c, &m->inputs[min].c,
+                                         m->ordering) < 0)
+          min = i;
+
+      casewriter_write (w, &m->inputs[min].c);
+      read_input_case (m, min);
+    }
+
+  m->input_cnt = 1;
+  m->inputs[0].reader = casewriter_make_reader (w);
+}
+
diff --git a/src/math/merge.h b/src/math/merge.h

new file mode 100644 (file)

index 0000000..6185205
--- /dev/null
+++ b/src/math/merge.h
@@ -0,0 +1,32 @@
+/* PSPP - computes sample statistics.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA. */
+
+#ifndef MATH_MERGE_H
+#define MATH_MERGE_H 1
+
+#include <stdbool.h>
+
+struct case_ordering;
+struct casereader;
+
+struct merge *merge_create (const struct case_ordering *);
+void merge_destroy (struct merge *);
+void merge_append (struct merge *, struct casereader *);
+struct casereader *merge_make_reader (struct merge *);
+
+#endif /* math/merge.h */
diff --git a/src/math/sort.c b/src/math/sort.c

index 46da0ec0dc4406b16d4b9b3c6c993b96cdf40623..aa7d2071d2724434478e85cf0af32b8b7053ffd9 100644 (file)
--- a/src/math/sort.c
+++ b/src/math/sort.c
@@ -20,31 +20,18 @@
  
  #include "sort.h"
  
-#include <errno.h>
-#include <limits.h>
-#include <stdbool.h>
  #include <stdio.h>
-#include <stdlib.h>
  
-#include <data/case-source.h>
+#include <data/case-ordering.h>
  #include <data/case.h>
-#include <data/casefile.h>
-#include <data/fastfile.h>
-#include <data/casefile-factory.h>
-#include <data/fastfile-factory.h>
-#include <data/procedure.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
+#include <data/casewriter-provider.h>
  #include <data/settings.h>
-#include <data/variable.h>
-#include <data/storage-stream.h>
  #include <libpspp/alloc.h>
  #include <libpspp/array.h>
  #include <libpspp/assertion.h>
-#include <libpspp/message.h>
-#include <libpspp/message.h>
-#include <libpspp/misc.h>
-#include <libpspp/str.h>
-
-#include "minmax.h"
+#include <math/merge.h>
  
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
@@ -52,701 +39,261 @@
  /* These should only be changed for testing purposes. */
  int min_buffers = 64;
  int max_buffers = INT_MAX;
-bool allow_internal_sort = true;
-
-static int compare_record (const struct ccase *, const struct ccase *,
-                           const struct sort_criteria *);
-static struct casefile *do_internal_sort (struct casereader *,
-                                          const struct sort_criteria *,
-                                         struct casefile_factory *
-                                         );
-static struct casefile *do_external_sort (struct casereader *,
-                                          const struct sort_criteria *,
-                                         struct casefile_factory *
-                                         );
-
-
-/* Sorts the active file in-place according to CRITERIA.
-   Returns true if successful. */
-bool
-sort_active_file_in_place (struct dataset *ds, 
-                          const struct sort_criteria *criteria) 
-{
-  struct casefile *in, *out;
-
-  proc_cancel_temporary_transformations (ds);
-  if (!procedure (ds, NULL, NULL))
-    return false;
-  
-  in = proc_capture_output (ds);
-  out = sort_execute (casefile_get_destructive_reader (in), criteria, 
-                     dataset_get_casefile_factory (ds));
-  if (out == NULL) 
-    return false;
-
-  proc_set_source (ds, storage_source_create (out));
-  return true;
-}
  
-/* Data passed to sort_to_casefile_callback(). */
-struct sort_to_casefile_cb_data 
+struct sort_writer 
    {
-    const struct sort_criteria *criteria;
-    struct casefile *output;
-    struct casefile_factory *factory ;
+    struct case_ordering *ordering;
+    struct merge *merge;
+    struct pqueue *pqueue;
+
+    struct casewriter *run;
+    casenumber run_id;
+    struct ccase run_end;
    };
  
-/* Sorts casefile CF according to the criteria in CB_DATA. */
-static bool
-sort_to_casefile_callback (const struct casefile *cf, void *cb_data_) 
-{
-  struct sort_to_casefile_cb_data *cb_data = cb_data_;
-  cb_data->output = sort_execute (casefile_get_reader (cf, NULL), 
-                                 cb_data->criteria,
-                                 cb_data->factory
-                                 );
-  return cb_data->output != NULL;
-}
+static struct casewriter_class sort_casewriter_class;
  
-/* Sorts the active file to a separate casefile.  If successful,
-   returns the sorted casefile.  Returns a null pointer on
-   failure. */
-struct casefile *
-sort_active_file_to_casefile (struct dataset *ds, 
-                             const struct sort_criteria *criteria) 
+static struct pqueue *pqueue_create (const struct case_ordering *);
+static void pqueue_destroy (struct pqueue *);
+static bool pqueue_is_full (const struct pqueue *);
+static bool pqueue_is_empty (const struct pqueue *);
+static void pqueue_push (struct pqueue *, struct ccase *, casenumber);
+static void pqueue_pop (struct pqueue *, struct ccase *, casenumber *);
+
+static void output_record (struct sort_writer *);
+
+struct casewriter *
+sort_create_writer (struct case_ordering *ordering) 
  {
-  struct sort_to_casefile_cb_data cb_data;
-  
-  proc_cancel_temporary_transformations (ds);
+  struct sort_writer *sort;
  
-  cb_data.criteria = criteria;
-  cb_data.output = NULL;
-  cb_data.factory = dataset_get_casefile_factory (ds);
-  if (!multipass_procedure (ds, sort_to_casefile_callback, &cb_data)) 
-    {
-      casefile_destroy (cb_data.output);
-      return NULL;
-    }
-  return cb_data.output;
-}
+  sort = xmalloc (sizeof *sort);
+  sort->ordering = case_ordering_clone (ordering);
+  sort->merge = merge_create (ordering);
+  sort->pqueue = pqueue_create (ordering);
+  sort->run = NULL;
+  sort->run_id = 0;
+  case_nullify (&sort->run_end);
  
+  case_ordering_destroy (ordering);
  
-/* Reads all the cases from READER, which is destroyed.  Sorts
-   the cases according to CRITERIA.  Returns the sorted cases in
-   a newly created casefile, which will be created by FACTORY.
-   If FACTORY is NULL, then a local fastfile_factory will be used.
-*/
-struct casefile *
-sort_execute (struct casereader *reader,
-             const struct sort_criteria *criteria,
-             struct casefile_factory *factory
-             )
-{
-  struct casefile_factory *local_factory = NULL;
-  struct casefile *output ;
-  if ( factory == NULL )
-    factory = local_factory = fastfile_factory_create ();
+  return casewriter_create (&sort_casewriter_class, sort);
+}
  
-  output = do_internal_sort (reader, criteria, factory);
-  if (output == NULL)
-    output = do_external_sort (reader, criteria, factory);
-  casereader_destroy (reader);
+static void
+sort_casewriter_write (struct casewriter *writer UNUSED, void *sort_,
+                       struct ccase *c)
+{
+  struct sort_writer *sort = sort_;
+  bool next_run;
  
-  fastfile_factory_destroy (local_factory);
+  if (pqueue_is_full (sort->pqueue)) 
+    output_record (sort); 
  
-  return output;
+  next_run = (case_is_null (&sort->run_end)
+              || case_ordering_compare_cases (c, &sort->run_end,
+                                              sort->ordering) < 0);
+  pqueue_push (sort->pqueue, c, sort->run_id + (next_run ? 1 : 0));
  }
-\f
-/* A case and its index. */
-struct indexed_case 
-  {
-    struct ccase c;     /* Case. */
-    unsigned long idx;  /* Index to allow for stable sorting. */
-  };
  
-static int compare_indexed_cases (const void *, const void *, const void *);
+static void
+sort_casewriter_destroy (struct casewriter *writer UNUSED, void *sort_) 
+{
+  struct sort_writer *sort = sort_;
+  
+  case_ordering_destroy (sort->ordering);
+  merge_destroy (sort->merge);
+  pqueue_destroy (sort->pqueue);
+  casewriter_destroy (sort->run);
+  case_destroy (&sort->run_end);
+  free (sort);
+}
  
-/* If the data is in memory, do an internal sort and return a new
-   casefile for the data.  Otherwise, return a null pointer. */
-static struct casefile *
-do_internal_sort (struct casereader *reader,
-                  const struct sort_criteria *criteria, 
-                 struct casefile_factory *factory)
+static struct casereader *
+sort_casewriter_convert_to_reader (struct casewriter *writer, void *sort_)
  {
-  const struct casefile *src;
-  struct casefile *dst;
-  unsigned long case_cnt;
-
-  if (!allow_internal_sort)
-    return NULL;
-
-  src = casereader_get_casefile (reader);
-  if (casefile_get_case_cnt (src) > 1 && !casefile_in_core (src))
-    return NULL;
-      
-  case_cnt = casefile_get_case_cnt (src);
-  dst = factory->create_casefile (factory, casefile_get_value_cnt (src));
-  if (case_cnt != 0) 
+  struct sort_writer *sort = sort_;
+  struct casereader *output;
+
+  if (sort->run == NULL && sort->run_id == 0) 
      {
-      struct indexed_case *cases = nmalloc (sizeof *cases, case_cnt);
-      if (cases != NULL) 
-        {
-          unsigned long i;
-          
-          for (i = 0; i < case_cnt; i++)
-            {
-              bool ok = casereader_read_xfer (reader, &cases[i].c);
-              if (!ok)
-                NOT_REACHED ();
-              cases[i].idx = i;
-            }
-
-          sort (cases, case_cnt, sizeof *cases, compare_indexed_cases,
-                (void *) criteria);
-      
-          for (i = 0; i < case_cnt; i++)
-            casefile_append_xfer (dst, &cases[i].c);
-          if (casefile_error (dst))
-            NOT_REACHED ();
-
-          free (cases);
-        }
-      else 
-        {
-          /* Failure. */
-          casefile_destroy (dst);
-          dst = NULL;
-        }
+      /* In-core sort. */
+      sort->run = mem_writer_create (case_ordering_get_value_cnt (
+                                       sort->ordering));
+      sort->run_id = 1; 
      }
+  while (!pqueue_is_empty (sort->pqueue))
+    output_record (sort);
  
-  return dst;
-}
+  merge_append (sort->merge, casewriter_make_reader (sort->run));
+  sort->run = NULL;
  
-/* Compares the variables specified by CRITERIA between the cases
-   at A and B, with a "last resort" comparison for stability, and
-   returns a strcmp()-type result. */
-static int
-compare_indexed_cases (const void *a_, const void *b_, const void *criteria_)
-{
-  const struct sort_criteria *criteria = criteria_;
-  const struct indexed_case *a = a_;
-  const struct indexed_case *b = b_;
-  int result = compare_record (&a->c, &b->c, criteria);
-  if (result == 0)
-    result = a->idx < b->idx ? -1 : a->idx > b->idx;
-  return result;
+  output = merge_make_reader (sort->merge);
+  sort_casewriter_destroy (writer, sort);
+  return output;
  }
-\f
-/* External sort. */
  
-/* Maximum order of merge (external sort only).  The maximum
-   reasonable value is about 7.  Above that, it would be a good
-   idea to use a heap in merge_once() to select the minimum. */
-#define MAX_MERGE_ORDER 7
+static void
+output_record (struct sort_writer *sort)
+{
+  struct ccase min_case;
+  casenumber min_run_id;
  
-/* Results of an external sort. */
-struct external_sort 
-  {
-    const struct sort_criteria *criteria; /* Sort criteria. */
-    size_t value_cnt;                 /* Size of data in `union value's. */
-    struct casefile **runs;           /* Array of initial runs. */
-    size_t run_cnt, run_cap;          /* Number of runs, allocated capacity. */
-    struct casefile_factory *factory; /* Factory used to  create the result */
-  };
+  pqueue_pop (sort->pqueue, &min_case, &min_run_id);
+#if 0
+  printf ("\toutput: %f to run %d\n", case_num_idx (&min_case, 0), min_run_id);
+#endif
  
-/* Prototypes for helper functions. */
-static int write_runs (struct external_sort *, struct casereader *);
-static struct casefile *merge (struct external_sort *);
-static void destroy_external_sort (struct external_sort *);
-
-/* Performs a stable external sort of the active file according
-   to the specification in SCP.  Forms initial runs using a heap
-   as a reservoir.  Merges the initial runs according to a
-   pattern that assures stability. */
-static struct casefile *
-do_external_sort (struct casereader *reader,
-                  const struct sort_criteria *criteria,
-                 struct casefile_factory *factory
-                 )
-{
-  struct external_sort *xsrt;
-
-  if (!casefile_to_disk (casereader_get_casefile (reader)))
-    return NULL;
-
-  xsrt = xmalloc (sizeof *xsrt);
-  xsrt->criteria = criteria;
-  xsrt->value_cnt = casefile_get_value_cnt (casereader_get_casefile (reader));
-  xsrt->run_cap = 512;
-  xsrt->run_cnt = 0;
-  xsrt->runs = xnmalloc (xsrt->run_cap, sizeof *xsrt->runs);
-  xsrt->factory = factory;
-  if (write_runs (xsrt, reader))
+  if (sort->run_id != min_run_id && sort->run != NULL) 
      {
-      struct casefile *output = merge (xsrt);
-      destroy_external_sort (xsrt);
-      return output;
+      merge_append (sort->merge, casewriter_make_reader (sort->run));
+      sort->run = NULL; 
      }
-  else
+  if (sort->run == NULL) 
      {
-      destroy_external_sort (xsrt);
-      return NULL;
+      sort->run = tmpfile_writer_create (case_ordering_get_value_cnt (
+                                           sort->ordering));
+      sort->run_id = min_run_id;
      }
+
+  case_destroy (&sort->run_end);
+  case_clone (&sort->run_end, &min_case);
+  
+  casewriter_write (sort->run, &min_case);
  }
  
-/* Destroys XSRT. */
-static void
-destroy_external_sort (struct external_sort *xsrt) 
+static struct casewriter_class sort_casewriter_class = 
+  {
+    sort_casewriter_write,
+    sort_casewriter_destroy,
+    sort_casewriter_convert_to_reader,
+  };
+\f
+/* Reads all the cases from INPUT.  Sorts the cases according to
+   ORDERING.  Returns the sorted cases in a new casereader, or a
+   null pointer if an I/O error occurs.  Both INPUT and ORDERING
+   are destroyed upon return, regardless of success. */
+struct casereader *
+sort_execute (struct casereader *input, struct case_ordering *ordering)
  {
-  if (xsrt != NULL) 
-    {
-      int i;
-      
-      for (i = 0; i < xsrt->run_cnt; i++)
-        casefile_destroy (xsrt->runs[i]);
-      free (xsrt->runs);
-      free (xsrt);
-    }
+  struct casewriter *output = sort_create_writer (ordering);
+  casereader_transfer (input, output);
+  return casewriter_make_reader (output);
  }
  \f
-/* Replacement selection. */
-
-/* Pairs a record with a run number. */
-struct record_run
+struct pqueue 
    {
-    int run;                    /* Run number of case. */
-    struct ccase record;        /* Case data. */
-    size_t idx;                 /* Case number (for stability). */
+    struct case_ordering *ordering;
+    struct pqueue_record *records;
+    size_t record_cnt;
+    size_t record_cap;
+    casenumber idx;
    };
  
-/* Represents a set of initial runs during an external sort. */
-struct initial_run_state 
+struct pqueue_record
    {
-    struct external_sort *xsrt;
-
-    /* Reservoir. */
-    struct record_run *records; /* Records arranged as a heap. */
-    size_t record_cnt;          /* Current number of records. */
-    size_t record_cap;          /* Capacity for records. */
-    
-    /* Run currently being output. */
-    int run;                    /* Run number. */
-    size_t case_cnt;            /* Number of cases so far. */
-    struct casefile *casefile;  /* Output file. */
-    struct ccase last_output;   /* Record last output. */
-
-    int okay;                   /* Zero if an error has been encountered. */
+    casenumber id;
+    struct ccase c;
+    casenumber idx;
    };
  
-static bool destroy_initial_run_state (struct initial_run_state *);
-static void process_case (struct initial_run_state *, 
-                         const struct ccase *, size_t);
-static int allocate_cases (struct initial_run_state *);
-static void output_record (struct initial_run_state *);
-static void start_run (struct initial_run_state *);
-static void end_run (struct initial_run_state *);
-static int compare_record_run (const struct record_run *,
-                               const struct record_run *,
-                               const struct initial_run_state *);
-static int compare_record_run_minheap (const void *, const void *, 
-                                      const void *);
-
-/* Reads cases from READER and composes initial runs in XSRT. */
-static int
-write_runs (struct external_sort *xsrt, struct casereader *reader)
-{
-  struct initial_run_state *irs;
-  struct ccase c;
-  size_t idx = 0;
-  int success = 0;
-
-  /* Allocate memory for cases. */
-  irs = xmalloc (sizeof *irs);
-  irs->xsrt = xsrt;
-  irs->records = NULL;
-  irs->record_cnt = irs->record_cap = 0;
-  irs->run = 0;
-  irs->case_cnt = 0;
-  irs->casefile = NULL;
-  case_nullify (&irs->last_output);
-  irs->okay = 1;
-  if (!allocate_cases (irs)) 
-    goto done;
-
-  /* Create initial runs. */
-  start_run (irs);
-  for (; irs->okay && casereader_read (reader, &c); case_destroy (&c))
-    process_case (irs, &c, idx++);
-  while (irs->okay && irs->record_cnt > 0)
-    output_record (irs);
-  end_run (irs);
-
-  success = irs->okay;
-
- done:
-  if (!destroy_initial_run_state (irs))
-    success = false;
-
-  return success;
-}
-
-/* Add a single case to an initial run. */
-static void
-process_case (struct initial_run_state *irs, const struct ccase *c, 
-             size_t idx)
-{
-  struct record_run *rr;
-
-  /* Compose record_run for this run and add to heap. */
-  assert (irs->record_cnt < irs->record_cap - 1);
-  rr = irs->records + irs->record_cnt++;
-  case_copy (&rr->record, 0, c, 0, irs->xsrt->value_cnt);
-  rr->run = irs->run;
-  rr->idx = idx;
-  if (!case_is_null (&irs->last_output)
-      && compare_record (c, &irs->last_output, irs->xsrt->criteria) < 0)
-    rr->run = irs->run + 1;
-  push_heap (irs->records, irs->record_cnt, sizeof *irs->records,
-             compare_record_run_minheap, irs);
-
-  /* Output a record if the reservoir is full. */
-  if (irs->record_cnt == irs->record_cap - 1 && irs->okay)
-    output_record (irs);
-}
+static int compare_pqueue_records_minheap (const void *a, const void *b,
+                                           const void *pq_);
  
-/* Destroys the initial run state represented by IRS.
-   Returns true if successful, false if an I/O error occurred. */
-static bool
-destroy_initial_run_state (struct initial_run_state *irs) 
+static struct pqueue *
+pqueue_create (const struct case_ordering *ordering) 
  {
-  int i;
-  bool ok = true;
-
-  if (irs == NULL)
-    return true;
-
-  for (i = 0; i < irs->record_cap; i++)
-    case_destroy (&irs->records[i].record);
-  free (irs->records);
-
-  if (irs->casefile != NULL)
-    ok = casefile_sleep (irs->casefile);
-
-  free (irs);
-  return ok;
+  struct pqueue *pq;
+
+  pq = xmalloc (sizeof *pq);
+  pq->ordering = case_ordering_clone (ordering);
+  pq->record_cap
+    = get_workspace_cases (case_ordering_get_value_cnt (ordering));
+  if (pq->record_cap > max_buffers)
+    pq->record_cap = max_buffers;
+  else if (pq->record_cap < min_buffers)
+    pq->record_cap = min_buffers;
+  pq->record_cnt = 0;
+  pq->records = xnmalloc (pq->record_cap, sizeof *pq->records);
+  pq->idx = 0;
+
+  return pq; 
  }
  
-/* Allocates room for lots of cases as a buffer. */
-static int
-allocate_cases (struct initial_run_state *irs)
-{
-  int approx_case_cost; /* Approximate memory cost of one case in bytes. */
-  int max_cases;        /* Maximum number of cases to allocate. */
-  int i;
-
-  /* Allocate as many cases as we can within the workspace
-     limit. */
-  approx_case_cost = (sizeof *irs->records
-                      + irs->xsrt->value_cnt * sizeof (union value)
-                      + 4 * sizeof (void *));
-  max_cases = get_workspace() / approx_case_cost;
-  if (max_cases > max_buffers)
-    max_cases = max_buffers;
-  irs->records = nmalloc (sizeof *irs->records, max_cases);
-  if (irs->records != NULL)
-    for (i = 0; i < max_cases; i++)
-      if (!case_try_create (&irs->records[i].record, irs->xsrt->value_cnt))
-        {
-          max_cases = i;
-          break;
-        }
-  irs->record_cap = max_cases;
-
-  /* Fail if we didn't allocate an acceptable number of cases. */
-  if (irs->records == NULL || max_cases < min_buffers)
-    {
-      msg (SE, _("Out of memory.  Could not allocate room for minimum of %d "
-                "cases of %d bytes each.  (PSPP workspace is currently "
-                "restricted to a maximum of %lu KB.)"),
-          min_buffers, approx_case_cost,
-           (unsigned long int) (get_workspace() / 1024));
-      return 0;
-    }
-  return 1;
-}
-
-/* Compares the VAR_CNT variables in VARS[] between the `value's at
-   A and B, and returns a strcmp()-type result. */
-static int
-compare_record (const struct ccase *a, const struct ccase *b,
-                const struct sort_criteria *criteria)
+static void
+pqueue_destroy (struct pqueue *pq) 
  {
-  int i;
-
-  assert (a != NULL);
-  assert (b != NULL);
-  
-  for (i = 0; i < criteria->crit_cnt; i++)
+  if (pq != NULL) 
      {
-      const struct sort_criterion *c = &criteria->crits[i];
-      int result;
-      
-      if (c->width == 0)
+      while (!pqueue_is_empty (pq)) 
          {
-          double af = case_num_idx (a, c->fv);
-          double bf = case_num_idx (b, c->fv);
-          
-          result = af < bf ? -1 : af > bf;
+          struct ccase c;
+          casenumber id;
+          pqueue_pop (pq, &c, &id);
+          case_destroy (&c);
          }
-      else
-        result = memcmp (case_str_idx (a, c->fv),
-                         case_str_idx (b, c->fv), c->width);
-
-      if (result != 0)
-        return c->dir == SRT_ASCEND ? result : -result;
+      case_ordering_destroy (pq->ordering);
+      free (pq->records);
+      free (pq);
      }
-
-  return 0;
  }
  
-/* Compares record-run tuples A and B on run number first, then
-   on record, then on case index. */
-static int
-compare_record_run (const struct record_run *a,
-                    const struct record_run *b,
-                    const struct initial_run_state *irs)
+static bool
+pqueue_is_full (const struct pqueue *pq) 
  {
-  int result = a->run < b->run ? -1 : a->run > b->run;
-  if (result == 0)
-    result = compare_record (&a->record, &b->record, irs->xsrt->criteria);
-  if (result == 0)
-    result = a->idx < b->idx ? -1 : a->idx > b->idx;
-  return result;
+  return pq->record_cnt >= pq->record_cap;
  }
  
-/* Compares record-run tuples A and B on run number first, then
-   on the current record according to SCP, but in descending
-   order. */
-static int
-compare_record_run_minheap (const void *a, const void *b, const void *irs) 
+static bool
+pqueue_is_empty (const struct pqueue *pq) 
  {
-  return -compare_record_run (a, b, irs);
+  return pq->record_cnt == 0;
  }
  
-/* Begins a new initial run, specifically its output file. */
  static void
-start_run (struct initial_run_state *irs)
+pqueue_push (struct pqueue *pq, struct ccase *c, casenumber id) 
  {
-  irs->run++;
-  irs->case_cnt = 0;
-
-  /* This casefile is internal to the sort, so don't use the factory
-     to create it. */
-  irs->casefile = fastfile_create (irs->xsrt->value_cnt);
-  casefile_to_disk (irs->casefile);
-  case_nullify (&irs->last_output); 
-}
+  struct pqueue_record *r;
+  
+  assert (!pqueue_is_full (pq));
  
-/* Ends the current initial run.  */
-static void
-end_run (struct initial_run_state *irs)
-{
-  struct external_sort *xsrt = irs->xsrt;
+  r = &pq->records[pq->record_cnt++];
+  r->id = id;
+  case_move (&r->c, c);
+  r->idx = pq->idx++;
  
-  /* Record initial run. */
-  if (irs->casefile != NULL) 
-    {
-      casefile_sleep (irs->casefile);
-      if (xsrt->run_cnt >= xsrt->run_cap) 
-        {
-          xsrt->run_cap *= 2;
-          xsrt->runs = xnrealloc (xsrt->runs,
-                                  xsrt->run_cap, sizeof *xsrt->runs);
-        }
-      xsrt->runs[xsrt->run_cnt++] = irs->casefile;
-      if (casefile_error (irs->casefile))
-        irs->okay = false;
-      irs->casefile = NULL; 
-    }
+  push_heap (pq->records, pq->record_cnt, sizeof *pq->records,
+             compare_pqueue_records_minheap, pq);
  }
  
-/* Writes a record to the current initial run. */
  static void
-output_record (struct initial_run_state *irs)
+pqueue_pop (struct pqueue *pq, struct ccase *c, casenumber *id) 
  {
-  struct record_run *record_run;
-  struct ccase case_tmp;
-  
-  /* Extract minimum case from heap. */
-  assert (irs->record_cnt > 0);
-  pop_heap (irs->records, irs->record_cnt--, sizeof *irs->records,
-            compare_record_run_minheap, irs);
-  record_run = irs->records + irs->record_cnt;
-
-  /* Bail if an error has occurred. */
-  if (!irs->okay)
-    return;
-
-  /* Start new run if necessary. */
-  assert (record_run->run == irs->run
-          || record_run->run == irs->run + 1);
-  if (record_run->run != irs->run)
-    {
-      end_run (irs);
-      start_run (irs);
-    }
-  assert (record_run->run == irs->run);
-  irs->case_cnt++;
+  struct pqueue_record *r;
  
-  /* Write to disk. */
-  if (irs->casefile != NULL)
-    casefile_append (irs->casefile, &record_run->record);
-
-  /* This record becomes last_output. */
-  irs->last_output = case_tmp = record_run->record;
-  record_run->record = irs->records[irs->record_cap - 1].record;
-  irs->records[irs->record_cap - 1].record = case_tmp;
-}
-\f
-/* Merging. */
+  assert (!pqueue_is_empty (pq));
  
-static int choose_merge (struct casefile *runs[], int run_cnt, int order);
-static struct casefile *merge_once (struct external_sort *,
-                                    struct casefile *[], size_t);
+  pop_heap (pq->records, pq->record_cnt--, sizeof *pq->records,
+            compare_pqueue_records_minheap, pq);
  
-/* Repeatedly merges run until only one is left,
-   and returns the final casefile.
-   Returns a null pointer if an I/O error occurs. */
-static struct casefile *
-merge (struct external_sort *xsrt)
-{
-  while (xsrt->run_cnt > 1)
-    {
-      int order = MIN (MAX_MERGE_ORDER, xsrt->run_cnt);
-      int idx = choose_merge (xsrt->runs, xsrt->run_cnt, order);
-      xsrt->runs[idx] = merge_once (xsrt, xsrt->runs + idx, order);
-      remove_range (xsrt->runs, xsrt->run_cnt, sizeof *xsrt->runs,
-                    idx + 1, order - 1);
-      xsrt->run_cnt -= order - 1;
-
-      if (xsrt->runs[idx] == NULL)
-        return NULL;
-    }
-  assert (xsrt->run_cnt == 1);
-  xsrt->run_cnt = 0;
-  return xsrt->runs[0];
+  r = &pq->records[pq->record_cnt];
+  *id = r->id;
+  case_move (c, &r->c);
  }
  
-/* Chooses ORDER runs out of the RUN_CNT runs in RUNS to merge,
-   and returns the index of the first one.
-
-   For stability, we must merge only consecutive runs.  For
-   efficiency, we choose the shortest consecutive sequence of
-   runs. */
+/* Compares record-run tuples A and B on id, then on case data,
+   then on insertion order, in descending order. */
  static int
-choose_merge (struct casefile *runs[], int run_cnt, int order) 
+compare_pqueue_records_minheap (const void *a_, const void *b_,
+                                const void *pq_) 
  {
-  int min_idx, min_sum;
-  int cur_idx, cur_sum;
-  int i;
-
-  /* Sum up the length of the first ORDER runs. */
-  cur_sum = 0;
-  for (i = 0; i < order; i++)
-    cur_sum += casefile_get_case_cnt (runs[i]);
-
-  /* Find the shortest group of ORDER runs,
-     using a running total for efficiency. */
-  min_idx = 0;
-  min_sum = cur_sum;
-  for (cur_idx = 1; cur_idx + order <= run_cnt; cur_idx++)
-    {
-      cur_sum -= casefile_get_case_cnt (runs[cur_idx - 1]);
-      cur_sum += casefile_get_case_cnt (runs[cur_idx + order - 1]);
-      if (cur_sum < min_sum)
-        {
-          min_sum = cur_sum;
-          min_idx = cur_idx;
-        }
-    }
-
-  return min_idx;
-}
-
-/* Merges the RUN_CNT initial runs specified in INPUT_FILES into a
-   new run, and returns the new run.
-   Returns a null pointer if an I/O error occurs. */
-static struct casefile *
-merge_once (struct external_sort *xsrt,
-            struct casefile **const input_files,
-            size_t run_cnt)
-{
-  struct run
-    {
-      struct casefile *file;
-      struct casereader *reader;
-      struct ccase ccase;
-    }
-  *runs;
-
-  struct casefile *output = NULL;
-  int i;
-
-  /* Open input files. */
-  runs = xnmalloc (run_cnt, sizeof *runs);
-  for (i = 0; i < run_cnt; i++) 
-    {
-      struct run *r = &runs[i];
-      r->file = input_files[i];
-      r->reader = casefile_get_destructive_reader (r->file);
-      if (!casereader_read_xfer (r->reader, &r->ccase))
-        {
-          run_cnt--;
-          i--;
-        }
-    }
-
-  /* Create output file. */
-  output = xsrt->factory->create_casefile (xsrt->factory, xsrt->value_cnt);
-  casefile_to_disk (output);
-
-  /* Merge. */
-  while (run_cnt > 0) 
-    {
-      struct run *min_run, *run;
-      
-      /* Find minimum. */
-      min_run = runs;
-      for (run = runs + 1; run < runs + run_cnt; run++)
-       if (compare_record (&run->ccase, &min_run->ccase, xsrt->criteria) < 0)
-          min_run = run;
-
-      /* Write minimum to output file. */
-      casefile_append_xfer (output, &min_run->ccase);
-
-      /* Read another case from minimum run. */
-      if (!casereader_read_xfer (min_run->reader, &min_run->ccase))
-        {
-          if (casefile_error (min_run->file) || casefile_error (output))
-            goto error;
-          casereader_destroy (min_run->reader);
-          casefile_destroy (min_run->file);
-
-          remove_element (runs, run_cnt, sizeof *runs, min_run - runs);
-          run_cnt--;
-        } 
-    }
-
-  if (!casefile_sleep (output))
-    goto error;
-  free (runs);
-
-  return output;
-
- error:
-  for (i = 0; i < run_cnt; i++) 
-    casefile_destroy (runs[i].file);
-  casefile_destroy (output);
-  free (runs);
-  return NULL;
+  const struct pqueue_record *a = a_;
+  const struct pqueue_record *b = b_;
+  const struct pqueue *pq = pq_;
+  int result = a->id < b->id ? -1 : a->id > b->id;
+  if (result == 0)
+    result = case_ordering_compare_cases (&a->c, &b->c, pq->ordering);
+  if (result == 0)
+    result = a->idx < b->idx ? -1 : a->idx > b->idx;
+  return -result;
  }
diff --git a/src/math/sort.h b/src/math/sort.h

index c6f86e9f4d39990a851b0b02c8a34fbfd164a4a6..0a0fe4ce8dab32e225f1b91ecb31b8ea2365d1c9 100644 (file)
--- a/src/math/sort.h
+++ b/src/math/sort.h
@@ -1,5 +1,5 @@
  /* PSPP - computes sample statistics.
-   Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
  
     This program is free software; you can redistribute it and/or
     modify it under the terms of the GNU General Public License as
@@ -16,57 +16,18 @@
     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     02110-1301, USA. */
  
-#if !sort_h
-#define sort_h 1
+#ifndef MATH_SORT_H
+#define MATH_SORT_H 1
  
  #include <stddef.h>
  #include <stdbool.h>
  
-struct casereader;
-struct dictionary;
-struct variable;
-struct casefile_factory;
+struct case_ordering;
  
  extern int min_buffers ;
  extern int max_buffers ;
-extern bool allow_internal_sort ;
  
+struct casewriter *sort_create_writer (struct case_ordering *);
+struct casereader *sort_execute (struct casereader *, struct case_ordering *);
  
-/* Sort direction. */
-enum sort_direction
-  {
-    SRT_ASCEND,                        /* A, B, C, ..., X, Y, Z. */
-    SRT_DESCEND                        /* Z, Y, X, ..., C, B, A. */
-  };
-
-/* A sort criterion. */
-struct sort_criterion
-  {
-    int fv;                     /* Variable data index. */
-    int width;                  /* 0=numeric, otherwise string width. */
-    enum sort_direction dir;    /* Sort direction. */
-  };
-
-/* A set of sort criteria. */
-struct sort_criteria 
-  {
-    struct sort_criterion *crits;
-    size_t crit_cnt;
-  };
-
-
-void sort_destroy_criteria (struct sort_criteria *);
-
-struct casefile *sort_execute (struct casereader *,
-                               const struct sort_criteria *,
-                              struct casefile_factory *
-                              );
-
-struct dataset ;
-bool sort_active_file_in_place (struct dataset *ds, 
-                               const struct sort_criteria *);
-
-struct casefile *sort_active_file_to_casefile (struct dataset *ds, 
-                                              const struct sort_criteria *);
-
-#endif /* !sort_h */
+#endif /* math/sort.h */
diff --git a/src/ui/ChangeLog b/src/ui/ChangeLog

index 3f59ee0166dcdd18e2487271a5ec148344e2dba8..38443f8ea8bad0b607ae23b34ee59a6cef420131 100644 (file)
--- a/src/ui/ChangeLog
+++ b/src/ui/ChangeLog
@@ -1,3 +1,13 @@
+2007-06-06  Ben Pfaff  <blp@gnu.org>
+
+       Adapt case sources, sinks, and clients of procedure code to the
+       new infrastructure.
+       
+       * automake.mk: Remove files.
+
+       * flexifile.c: Removed, dead code.
+       * flexifile.h: Ditto.
+
  Thu Feb  8 06:34:52 2007  Ben Pfaff  <blp@gnu.org>
  
         * [!(HAVE_SYS_TYPES_H && HAVE_SYS_WAIT_H)] (connect_debugger) In
diff --git a/src/ui/automake.mk b/src/ui/automake.mk

index f2d56bad2d337d64eb523444064c54c163492c7c..e1915b283f666bc45dfadb3edc19c3b68131f920 100644 (file)
--- a/src/ui/automake.mk
+++ b/src/ui/automake.mk
@@ -10,6 +10,4 @@ noinst_LIBRARIES += src/ui/libuicommon.a
  
  src_ui_libuicommon_a_SOURCES = \
         src/ui/debugger.c \
-       src/ui/debugger.h \
-       src/ui/flexifile.c \
-       src/ui/flexifile.h
+       src/ui/debugger.h
diff --git a/src/ui/flexifile.c b/src/ui/flexifile.c

deleted file mode 100644 (file)

index 339764e..0000000
--- a/src/ui/flexifile.c
+++ /dev/null
@@ -1,409 +0,0 @@
-/* PSPP - computes sample statistics.
-
-   Copyright (C) 2006, 2007 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#include <config.h>
-#include <xalloc.h>
-#include <assert.h>
-#include "flexifile.h"
-#include <string.h>
-#include <data/casefile.h>
-#include <data/casefile-private.h>
-#include <data/case.h>
-#include <libpspp/compiler.h>
-
-
-struct class_flexifile
-{
-  struct class_casefile parent;
-
-  bool (*get_case) (const struct flexifile *, unsigned long, struct ccase *);
-
-  bool (*insert_case) (struct flexifile *, struct ccase *, int );
-  bool (*delete_cases) (struct flexifile *, int, int );
-
-  bool (*resize) (struct flexifile *, int, int );
-};
-
-static const struct class_flexifile class;
-
-#define CLASS_FLEXIFILE(K)  ((struct class_flexifile *) K)
-#define CONST_CLASS_FLEXIFILE(K) ((const struct class_flexifile *) K)
-
-
-/* A flexifile. */
-struct flexifile
-{
-  struct casefile cf;          /* Parent */
-
-  size_t value_cnt;            /* Case size in `union value's. */
-  unsigned long case_cnt;      /* Number of cases stored. */
-
-
-  /* Memory storage. */
-  struct ccase *cases;         /* Pointer to array of cases. */
-  unsigned long capacity;       /* size of array in cases */
-};
-
-struct class_flexifilereader
-{
-  struct class_casereader parent ;
-};
-
-static const struct class_flexifilereader class_reader;
-
-/* For reading out the cases in a flexifile. */
-struct flexifilereader
-{
-  struct casereader cr;                /* Parent */
-
-  unsigned long case_idx;      /* Case number of current case. */
-  bool destructive;            /* Is this a destructive reader? */
-};
-
-
-
-#define CHUNK_SIZE 10
-
-static bool
-impl_get_case(const struct flexifile *ff, unsigned long casenum,
-             struct ccase *);
-static bool
-impl_insert_case (struct flexifile *ff, struct ccase *c, int posn);
-
-static bool
-impl_delete_cases (struct flexifile *ff, int n_cases, int first);
-
-static bool
-impl_resize (struct flexifile *ff, int n_values, int posn);
-
-
-/* Gets a case, for which writing may not be safe */
-bool
-flexifile_get_case(const struct flexifile *ff, unsigned long casenum,
-                  struct ccase *c)
-{
-  const struct class_flexifile *class =
-    CONST_CLASS_FLEXIFILE (CONST_CASEFILE(ff)->class) ;
-
-  return class->get_case(ff, casenum, c);
-}
-
-
-/* Insert N_VALUES before POSN.
-   If N_VALUES is negative, then deleted -N_VALUES instead
-*/
-bool
-flexifile_resize (struct flexifile *ff, int n_values, int posn)
-{
-  const struct class_flexifile *class =
-    CONST_CLASS_FLEXIFILE (CONST_CASEFILE(ff)->class) ;
-
-  return class->resize(ff, n_values, posn);
-}
-
-
-
-bool
-flexifile_insert_case (struct flexifile *ff, struct ccase *c, int posn)
-{
-  const struct class_flexifile *class =
-    CONST_CLASS_FLEXIFILE (CONST_CASEFILE(ff)->class) ;
-
-  return class->insert_case(ff, c, posn);
-}
-
-
-bool
-flexifile_delete_cases (struct flexifile *ff, int n_cases, int first)
-{
-  const struct class_flexifile *class =
-    CONST_CLASS_FLEXIFILE (CONST_CASEFILE(ff)->class) ;
-
-  return class->delete_cases (ff, n_cases, first);
-}
-
-
-static unsigned long
-flexifile_get_case_cnt (const struct casefile *cf)
-{
-  return FLEXIFILE(cf)->case_cnt;
-}
-
-static size_t
-flexifile_get_value_cnt (const struct casefile *cf)
-{
-  return FLEXIFILE(cf)->value_cnt;
-}
-
-
-static void
-flexifile_destroy (struct casefile *cf)
-{
-  int i ;
-  for ( i = 0 ; i < FLEXIFILE(cf)->case_cnt; ++i )
-    case_destroy( &FLEXIFILE(cf)->cases[i]);
-
-  free(FLEXIFILE(cf)->cases);
-}
-
-static void
-grow(struct flexifile *ff)
-{
-  ff->capacity += CHUNK_SIZE;
-  ff->cases = xrealloc(ff->cases, ff->capacity * sizeof ( *ff->cases) );
-}
-
-static bool
-flexifile_append (struct casefile *cf, const struct ccase *c)
-{
-  struct flexifile *ff =  FLEXIFILE(cf);
-
-  if (ff->case_cnt >= ff->capacity)
-    grow(ff);
-
-  case_clone (&ff->cases[ff->case_cnt++], c);
-
-  return true;
-}
-
-static unsigned long
-flexifilereader_cnum (const struct casereader *cr)
-{
-  struct flexifilereader *ffr = FLEXIFILEREADER(cr);
-
-  return ffr->case_idx;
-}
-
-static struct ccase *
-flexifilereader_get_next_case (struct casereader *cr)
-{
-  struct flexifilereader *ffr = FLEXIFILEREADER(cr);
-  struct flexifile *ff = FLEXIFILE(casereader_get_casefile(cr));
-
-  if ( ffr->case_idx >= ff->case_cnt)
-    return NULL;
-
-  return &ff->cases[ffr->case_idx++];
-}
-
-static void
-flexifilereader_destroy(struct casereader *r)
-{
-  free(r);
-}
-
-static struct casereader *
-flexifile_get_reader (const struct casefile *cf_)
-{
-  struct casefile *cf = (struct casefile *) cf_;
-  struct flexifilereader *ffr = xzalloc (sizeof *ffr);
-  struct casereader *reader = (struct casereader *) ffr;
-
-  casereader_register (cf, reader, CLASS_CASEREADER(&class_reader));
-
-  return reader;
-}
-
-
-static struct casereader *
-flexifilereader_clone (const struct casereader *cr)
-{
-  const struct flexifilereader *ffr = (const struct flexifilereader *) cr;
-  struct flexifilereader *new_ffr = xzalloc (sizeof *new_ffr);
-  struct casereader *new_reader = (struct casereader *) new_ffr;
-  struct casefile *cf = casereader_get_casefile (cr);
-
-  casereader_register (cf, new_reader, CLASS_CASEREADER(&class_reader));
-
-  new_ffr->case_idx = ffr->case_idx ;
-  new_ffr->destructive = ffr->destructive ;
-
-  return new_reader;
-}
-
-
-static bool
-flexifile_in_core(const struct casefile *cf UNUSED)
-{
-  /* Always in memory */
-  return true;
-}
-
-static bool
-flexifile_error (const struct casefile *cf UNUSED )
-{
-  return false;
-}
-
-
-struct casefile *
-flexifile_create (size_t value_cnt)
-{
-  struct flexifile *ff = xzalloc (sizeof *ff);
-  struct casefile *cf = (struct casefile *) ff;
-
-  casefile_register (cf, (struct class_casefile *) &class);
-
-  ff->value_cnt = value_cnt;
-
-  ff->cases = xzalloc(sizeof (struct ccase *) * CHUNK_SIZE);
-  ff->capacity = CHUNK_SIZE;
-
-  return cf;
-}
-
-static const struct class_flexifile class = {
-  {
-    flexifile_destroy,
-    flexifile_error,
-    flexifile_get_value_cnt,
-    flexifile_get_case_cnt,
-    flexifile_get_reader,
-    flexifile_append,
-
-    flexifile_in_core,
-    0, /* to_disk */
-    0 /* sleep */
-  },
-
-  impl_get_case ,
-  impl_insert_case ,
-  impl_delete_cases,
-  impl_resize,
-};
-
-
-static const struct class_flexifilereader class_reader =
-  {
-    {
-      flexifilereader_get_next_case,
-      flexifilereader_cnum,
-      flexifilereader_destroy,
-      flexifilereader_clone
-    }
-  };
-
-
-/* Implementations of class methods */
-
-static bool
-impl_get_case(const struct flexifile *ff, unsigned long casenum,
-             struct ccase *c)
-{
-  if ( casenum >= ff->case_cnt)
-    return false;
-
-  case_clone (c, &ff->cases[casenum]);
-
-  return true;
-}
-
-#if DEBUGGING
-#include <stdio.h>
-
-static void 
-dumpcasedata(struct ccase *c)
-{
-  size_t value_cnt = case_get_value_cnt (c);
-  int i;
-  for ( i = 0 ; i < value_cnt * MAX_SHORT_STRING; ++i )
-    putchar (case_str (c, 0)[i]);
-  putchar('\n');
-}
-#endif
-
-static bool
-impl_resize (struct flexifile *ff, int n_values, int posn)
-{
-  int i;
-
-  for( i = 0 ; i < ff->case_cnt ; ++i )
-    {
-      struct ccase c;
-      case_create (&c, ff->value_cnt + n_values);
-
-      case_copy (&c, 0, &ff->cases[i], 0, posn);
-      if ( n_values > 0 )
-       memset (case_data_rw_idx(&c, posn), ' ', n_values * MAX_SHORT_STRING) ;
-      case_copy (&c, posn + n_values,
-                &ff->cases[i], posn, ff->value_cnt - posn);
-
-      case_destroy (&ff->cases[i]);
-      ff->cases[i] = c;
-    }
-
-  ff->value_cnt += n_values;
-
-  return true;
-}
-
-static bool
-impl_insert_case (struct flexifile *ff, struct ccase *c, int posn)
-{
-  int i;
-  struct ccase blank;
-
-  assert (ff);
-
-  if ( posn > ff->case_cnt )
-    return false;
-
-  if ( posn >= ff->capacity )
-    grow(ff);
-
-  case_create(&blank, ff->value_cnt);
-
-  flexifile_append(CASEFILE(ff), &blank);
-
-  case_destroy(&blank);
-
-  /* Shift the existing cases down one */
-  for ( i = ff->case_cnt ; i > posn; --i)
-      case_move(&ff->cases[i], &ff->cases[i-1]);
-
-  case_clone (&ff->cases[posn], c);
-
-  return true;
-}
-
-
-static bool
-impl_delete_cases (struct flexifile *ff, int n_cases, int first)
-{
-  int i;
-
-  if ( ff->case_cnt < first + n_cases )
-    return false;
-
-  for ( i = first ; i < first + n_cases; ++i )
-    case_destroy (&ff->cases[i]);
-
-  /* Shift the cases up by N_CASES */
-  for ( i = first; i < ff->case_cnt - n_cases; ++i )
-    {
-      case_move (&ff->cases[i], &ff->cases[i+ n_cases]);
-    }
-
-  ff->case_cnt -= n_cases;
-
-  return true;
-}
-
-
-
diff --git a/src/ui/flexifile.h b/src/ui/flexifile.h

deleted file mode 100644 (file)

index d1dc6b4..0000000
--- a/src/ui/flexifile.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 2006 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#ifndef FLEXIFILE_H
-#define FLEXIFILE_H
-
-#include <config.h>
-#include <stdbool.h>
-#include <stdlib.h>
-
-struct ccase;
-struct casefile;
-struct casereader;
-struct flexifile;
-struct flexifilereader;
-
-#define FLEXIFILE(CF) ( (struct flexifile *) CF)
-#define FLEXIFILEREADER(CR) ( (struct flexifilereader *) CR)
-
-struct casefile *flexifile_create (size_t value_cnt);
-
-bool flexifile_get_case(const struct flexifile *ff, unsigned long casenum, 
-                       struct ccase *const c);
-
-bool flexifile_resize (struct flexifile *ff, int n_values, int posn);
-
-bool flexifile_insert_case (struct flexifile *ff, struct ccase *c, int posn);
-bool flexifile_delete_cases (struct flexifile *ff, int n_cases, int first);
-
-
-#endif
diff --git a/src/ui/gui/ChangeLog b/src/ui/gui/ChangeLog

index 82ea6d63da4c6bdca9c3e27c5cff37e2123d3b33..4b2fc7c4bb262afc60c09ec966e8e67ccde3f3ba 100644 (file)
--- a/src/ui/gui/ChangeLog
+++ b/src/ui/gui/ChangeLog
@@ -1,3 +1,19 @@
+2007-06-06  Ben Pfaff  <blp@gnu.org>
+
+       Adapt case sources, sinks, and clients of procedure code to the
+       new infrastructure.
+       
+       * automake.mk: Removed files.
+
+       * flexifile-factory.c: Removed, dead code.
+       * flexifile-factory.h: Ditto.
+
+       * helper.c: Adapt to new procedure and datasheet code.
+       * missing-val-dialog.c: Ditto.
+       * psppire-case-file.c: Ditto.
+       * psppire-data-store.c: Ditto.
+       * psppire.c: Ditto.
+
  2007-06-03  Ben Pfaff  <blp@gnu.org>
  
         * psppire-var-store.c (psppire_var_store_item_editable): Use
diff --git a/src/ui/gui/automake.mk b/src/ui/gui/automake.mk

index 969ede199ea43dfe5104f7d47974aed7a8caf17b..d14f54a4cdbf8c58cbf554aa79f80796b6400a50 100644 (file)
--- a/src/ui/gui/automake.mk
+++ b/src/ui/gui/automake.mk
@@ -84,8 +84,6 @@ src_ui_gui_psppire_SOURCES = \
         src/ui/gui/dialog-common.h \
         src/ui/gui/dict-display.c \
         src/ui/gui/dict-display.h \
-       src/ui/gui/flexifile-factory.h \
-       src/ui/gui/flexifile-factory.c \
         src/ui/gui/main.c \
         src/ui/gui/message-dialog.c \
         src/ui/gui/message-dialog.h \
diff --git a/src/ui/gui/flexifile-factory.c b/src/ui/gui/flexifile-factory.c

deleted file mode 100644 (file)

index cdb7d9d..0000000
--- a/src/ui/gui/flexifile-factory.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 2006 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#include <config.h>
-
-#include <stdlib.h>
-#include <libpspp/alloc.h>
-#include <libpspp/compiler.h>
-#include "flexifile-factory.h"
-#include <ui/flexifile.h>
-#include <data/casefile-factory.h>
-
-
-struct flexifile_factory
- {
-   struct casefile_factory parent;
- };
-
-
-static struct casefile *
-produce_flexifile (struct casefile_factory *this UNUSED, size_t value_cnt)
-{
-  struct casefile *ff =  flexifile_create (value_cnt);
-
-  return ff;
-}
-
-
-struct casefile_factory *
-flexifile_factory_create (void)
-{
-  struct flexifile_factory *fact = xzalloc (sizeof (*fact));
-
-  fact->parent.create_casefile = produce_flexifile;
-
-  return (struct casefile_factory *) fact;
-}
-
-
-void
-flexifile_factory_destroy (struct casefile_factory *factory)
-{
-  free (factory);
-}
diff --git a/src/ui/gui/flexifile-factory.h b/src/ui/gui/flexifile-factory.h

deleted file mode 100644 (file)

index eecb901..0000000
--- a/src/ui/gui/flexifile-factory.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* PSPP - computes sample statistics.
-   Copyright (C) 2006 Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA. */
-
-#ifndef FLEXIFILE_FACTORY_H
-#define FLEXIFILE_FACTORY_H
-
-
-struct casefile_factory ;
-
-struct casefile_factory * flexifile_factory_create (void);
-void flexifile_factory_destroy (struct casefile_factory *);
-
-#endif
diff --git a/src/ui/gui/helper.c b/src/ui/gui/helper.c

index 69b5d4376cb6198248c42b34c82d9496a19230c9..f8e3ddba23e212723e84d95aed2eeb1458faad47 100644 (file)
--- a/src/ui/gui/helper.c
+++ b/src/ui/gui/helper.c
@@ -29,7 +29,6 @@
  #include <data/data-in.h>
  #include <data/data-out.h>
  #include <data/dictionary.h>
-#include <data/storage-stream.h>
  #include <libpspp/message.h>
  
  #include <libpspp/i18n.h>
@@ -171,7 +170,7 @@ execute_syntax (struct getl_interface *sss)
  {
    struct lexer *lexer;
  
-  g_return_val_if_fail (proc_has_source (the_dataset), FALSE);
+  g_return_val_if_fail (proc_has_active_file (the_dataset), FALSE);
  
    lexer = lex_create (the_source_stream);
  
@@ -189,18 +188,10 @@ execute_syntax (struct getl_interface *sss)
  
    lex_destroy (lexer);
  
-  /* The GUI must *always* have a data source, even if it's an empty one.
-     Therefore, we find that there is none, (for example NEW FILE was the last
-     item in the syntax) then we create a new one. */
-  if ( ! proc_has_source (the_dataset))
-    proc_set_source (the_dataset,
-                    storage_source_create (the_data_store->case_file->flexifile)
-                    );
-
    /* GUI syntax needs this implicit EXECUTE command at the end of
       every script.  Otherwise commands like GET could leave the GUI without
       a casefile. */
-  return procedure (the_dataset, NULL, NULL);
+  return proc_execute (the_dataset);
  }
  
  
diff --git a/src/ui/gui/missing-val-dialog.c b/src/ui/gui/missing-val-dialog.c

index 63403d92ae30de2033d5b37e648ab2abf03171fa..86f7d86f464c8a7fce6824e1f7f1b1fc74e67c8f 100644 (file)
--- a/src/ui/gui/missing-val-dialog.c
+++ b/src/ui/gui/missing-val-dialog.c
@@ -90,8 +90,8 @@ missing_val_dialog_accept (GtkWidget *w, gpointer data)
        gint nvals = 0;
        gint badvals = 0;
        gint i;
-      mv_clear (&dialog->mvl);
-      for (i = 0 ; i < 3 ; ++i )
+      mv_clear(&dialog->mvl);
+      for(i = 0 ; i < 3 ; ++i ) 
         {
           gchar *text =
             g_strdup (gtk_entry_get_text (GTK_ENTRY (dialog->mv[i])));
diff --git a/src/ui/gui/psppire-case-file.c b/src/ui/gui/psppire-case-file.c

index ec50b1525b74d516de207ddd4e8a33277cdb06a8..8a4f196df7fd17b58d2cc41edda66d31c6518df2 100644 (file)
--- a/src/ui/gui/psppire-case-file.c
+++ b/src/ui/gui/psppire-case-file.c
@@ -26,13 +26,14 @@
  #include <gtksheet/gtkextra-marshal.h>
  
  #include <data/case.h>
-#include <ui/flexifile.h>
-#include "flexifile-factory.h"
-#include <data/casefile.h>
  #include <data/data-in.h>
+#include <data/datasheet.h>
  #include <math/sort.h>
  #include <libpspp/misc.h>
  
+#include "xalloc.h"
+#include "xallocsa.h"
+
  /* --- prototypes --- */
  static void psppire_case_file_class_init       (PsppireCaseFileClass   *class);
  static void psppire_case_file_init     (PsppireCaseFile        *case_file);
@@ -132,8 +133,7 @@ psppire_case_file_finalize (GObject *object)
  {
    PsppireCaseFile *cf = PSPPIRE_CASE_FILE (object);
  
-  if ( cf->flexifile)
-    casefile_destroy (cf->flexifile);
+  datasheet_destroy (cf->datasheet);
  
    G_OBJECT_CLASS (parent_class)->finalize (object);
  }
@@ -141,7 +141,7 @@ psppire_case_file_finalize (GObject *object)
  static void
  psppire_case_file_init (PsppireCaseFile *cf)
  {
-  cf->flexifile = 0;
+  cf->datasheet = NULL;
  }
  
  
@@ -156,16 +156,16 @@ psppire_case_file_new (void)
  {
    PsppireCaseFile *cf = g_object_new (G_TYPE_PSPPIRE_CASE_FILE, NULL);
  
-  cf->flexifile = flexifile_create (0);
+  cf->datasheet = datasheet_create (NULL);
  
    return cf;
  }
  
  
  void
-psppire_case_file_replace_flexifile (PsppireCaseFile *cf, struct flexifile *ff)
+psppire_case_file_replace_datasheet (PsppireCaseFile *cf, struct datasheet *ds)
  {
-  cf->flexifile = (struct casefile *) ff;
+  cf->datasheet = ds;
  }
  
  
@@ -173,16 +173,14 @@ psppire_case_file_replace_flexifile (PsppireCaseFile *cf, struct flexifile *ff)
  gboolean
  psppire_case_file_delete_cases (PsppireCaseFile *cf, gint n_cases, gint first)
  {
-  int result;
-
    g_return_val_if_fail (cf, FALSE);
-  g_return_val_if_fail (cf->flexifile, FALSE);
+  g_return_val_if_fail (cf->datasheet, FALSE);
  
-  result =  flexifile_delete_cases (FLEXIFILE (cf->flexifile), n_cases,  first);
+  datasheet_delete_rows (cf->datasheet, first, n_cases);
  
    g_signal_emit (cf, signals [CASES_DELETED], 0, n_cases, first);
  
-  return result;
+  return TRUE;
  }
  
  /* Insert case CC into the case file before POSN */
@@ -191,12 +189,14 @@ psppire_case_file_insert_case (PsppireCaseFile *cf,
                               struct ccase *cc,
                               gint posn)
  {
+  struct ccase tmp;
    bool result ;
  
    g_return_val_if_fail (cf, FALSE);
-  g_return_val_if_fail (cf->flexifile, FALSE);
+  g_return_val_if_fail (cf->datasheet, FALSE);
  
-  result = flexifile_insert_case (FLEXIFILE (cf->flexifile), cc, posn);
+  case_clone (&tmp, cc);
+  result = datasheet_insert_rows (cf->datasheet, posn, &tmp, 1);
  
    if ( result )
      g_signal_emit (cf, signals [CASE_INSERTED], 0, posn);
@@ -212,15 +212,17 @@ gboolean
  psppire_case_file_append_case (PsppireCaseFile *cf,
                               struct ccase *c)
  {
+  struct ccase tmp;
    bool result ;
    gint posn ;
  
    g_return_val_if_fail (cf, FALSE);
-  g_return_val_if_fail (cf->flexifile, FALSE);
+  g_return_val_if_fail (cf->datasheet, FALSE);
  
-  posn = casefile_get_case_cnt (cf->flexifile);
+  posn = datasheet_get_row_cnt (cf->datasheet);
  
-  result = casefile_append (cf->flexifile, c);
+  case_clone (&tmp, c);
+  result = datasheet_insert_rows (cf->datasheet, posn, &tmp, 1);
  
    g_signal_emit (cf, signals [CASE_INSERTED], 0, posn);
  
@@ -233,69 +235,68 @@ psppire_case_file_get_case_count (const PsppireCaseFile *cf)
  {
    g_return_val_if_fail (cf, FALSE);
  
-  if ( ! cf->flexifile)
+  if ( ! cf->datasheet)
      return 0;
  
-  return casefile_get_case_cnt (cf->flexifile);
+  return datasheet_get_row_cnt (cf->datasheet);
  }
  
-/* Return the IDXth value from case CASENUM.
-   The return value must not be freed or written to
- */
-const union value *
-psppire_case_file_get_value (const PsppireCaseFile *cf, gint casenum, gint idx)
+/* Copies the IDXth value from case CASENUM into VALUE.
+   If VALUE is null, then memory is allocated is allocated with
+   malloc.  Returns the value if successful, NULL on failure. */
+union value *
+psppire_case_file_get_value (const PsppireCaseFile *cf,
+                             casenumber casenum, size_t idx,
+                             union value *value, int width)
  {
-  const union value *v;
-  struct ccase c;
-
-  g_return_val_if_fail (cf, NULL);
-  g_return_val_if_fail (cf->flexifile, NULL);
-
-  g_return_val_if_fail (idx < casefile_get_value_cnt (cf->flexifile), NULL);
+  bool allocated;
+  
+  g_return_val_if_fail (cf, false);
+  g_return_val_if_fail (cf->datasheet, false);
  
-  flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, &c);
+  g_return_val_if_fail (idx < datasheet_get_column_cnt (cf->datasheet), false);
  
-  v = case_data_idx (&c, idx);
-  case_destroy (&c);
-
-  return v;
+  if (value == NULL) 
+    {
+      value = xnmalloc (value_cnt_from_width (width), sizeof *value);
+      allocated = true;
+    }
+  else
+    allocated = false;
+  if (!datasheet_get_value (cf->datasheet, casenum, idx, value, width))
+    {
+      if (allocated) 
+        free (value);
+      value = NULL;
+    }
+  return value;
  }
  
  void
  psppire_case_file_clear (PsppireCaseFile *cf)
  {
-  casefile_destroy (cf->flexifile);
-  cf->flexifile = 0;
+  datasheet_destroy (cf->datasheet);
+  cf->datasheet = NULL;
    g_signal_emit (cf, signals [CASES_DELETED], 0, 0, -1);
  }
  
-/* Set the IDXth value of case C to SYSMIS/EMPTY */
+/* Set the IDXth value of case C to V.
+   Returns true if successful, false on I/O error. */
  gboolean
  psppire_case_file_set_value (PsppireCaseFile *cf, gint casenum, gint idx,
                             union value *v, gint width)
  {
-  struct ccase cc ;
-  int bytes;
+  bool ok;
  
    g_return_val_if_fail (cf, FALSE);
-  g_return_val_if_fail (cf->flexifile, FALSE);
-
-  g_return_val_if_fail (idx < casefile_get_value_cnt (cf->flexifile), FALSE);
+  g_return_val_if_fail (cf->datasheet, FALSE);
  
-  if ( ! flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, &cc) )
-    return FALSE;
+  g_return_val_if_fail (idx < datasheet_get_column_cnt (cf->datasheet), FALSE);
  
-  if ( width == 0 )
-    bytes = MAX_SHORT_STRING;
-  else
-    bytes = DIV_RND_UP (width, MAX_SHORT_STRING) * MAX_SHORT_STRING ;
-
-  /* Cast away const in flagrant abuse of the casefile */
-  memcpy ((union value *)case_data_idx (&cc, idx), v, bytes);
-
-  g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum);
-
-  return TRUE;
+  ok = datasheet_put_value (cf->datasheet, casenum, idx, v, width);
+  if (ok)
+    g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum);
+  return ok;
  }
  
  
@@ -305,49 +306,43 @@ gboolean
  psppire_case_file_data_in (PsppireCaseFile *cf, gint casenum, gint idx,
                            struct substring input, const struct fmt_spec *fmt)
  {
-  struct ccase cc ;
+  union value *value;
+  int width;
+  bool ok;
  
    g_return_val_if_fail (cf, FALSE);
-  g_return_val_if_fail (cf->flexifile, FALSE);
+  g_return_val_if_fail (cf->datasheet, FALSE);
  
-  g_return_val_if_fail (idx < casefile_get_value_cnt (cf->flexifile), FALSE);
+  g_return_val_if_fail (idx < datasheet_get_column_cnt (cf->datasheet), FALSE);
  
-  if ( ! flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, &cc) )
-    return FALSE;
+  width = fmt_var_width (fmt);
+  value = xallocsa (value_cnt_from_width (width) * sizeof *value);
+  ok = (datasheet_get_value (cf->datasheet, casenum, idx, value, width)
+        && data_in (input, fmt->type, 0, 0, value, width)
+        && datasheet_put_value (cf->datasheet, casenum, idx, value, width));
  
-  /* Cast away const in flagrant abuse of the casefile */
-  if (!data_in (input, fmt->type, 0, 0,
-                (union value *) case_data_idx (&cc, idx), fmt_var_width (fmt)))
-    g_warning ("Cant set value\n");
+  if (ok)
+    g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum);
  
-  g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum);
+  freesa (value);
  
    return TRUE;
  }
  
  
  void
-psppire_case_file_sort (PsppireCaseFile *cf, const struct sort_criteria *sc)
+psppire_case_file_sort (PsppireCaseFile *cf, struct case_ordering *ordering)
  {
+  struct casereader *sorted_data;
    gint c;
  
-  struct casereader *reader = casefile_get_reader (cf->flexifile, NULL);
-  struct casefile *cfile;
-
-  struct casefile_factory *factory  = flexifile_factory_create ();
-
-  cfile = sort_execute (reader, sc, factory);
-
-  casefile_destroy (cf->flexifile);
-
-  cf->flexifile = cfile;
+  sorted_data = sort_execute (datasheet_make_reader (cf->datasheet), ordering);
+  cf->datasheet = datasheet_create (sorted_data);
  
    /* FIXME: Need to have a signal to change a range of cases, instead of
       calling a signal many times */
-  for ( c = 0 ; c < casefile_get_case_cnt (cf->flexifile) ; ++c )
+  for ( c = 0 ; c < datasheet_get_row_cnt (cf->datasheet) ; ++c )
      g_signal_emit (cf, signals [CASE_CHANGED], 0, c);
-
-  flexifile_factory_destroy (factory);
  }
  
  
@@ -357,16 +352,17 @@ gboolean
  psppire_case_file_insert_values (PsppireCaseFile *cf,
                                  gint n_values, gint before)
  {
+  union value *values;
    g_return_val_if_fail (cf, FALSE);
  
-  if ( ! cf->flexifile )
-    {
-      cf->flexifile = flexifile_create (n_values);
+  if ( ! cf->datasheet )
+    cf->datasheet = datasheet_create (NULL);
  
-      return TRUE;
-    }
+  values = xcalloc (n_values, sizeof *values);
+  datasheet_insert_columns (cf->datasheet, values, n_values, before);
+  free (values);
  
-  return flexifile_resize (FLEXIFILE (cf->flexifile), n_values, before);
+  return TRUE;
  }
  
  /* Fills C with the CASENUMth case.
@@ -377,7 +373,7 @@ psppire_case_file_get_case (const PsppireCaseFile *cf, gint casenum,
                            struct ccase *c)
  {
    g_return_val_if_fail (cf, FALSE);
-  g_return_val_if_fail (cf->flexifile, FALSE);
+  g_return_val_if_fail (cf->datasheet, FALSE);
  
-  return flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, c);
+  return datasheet_get_row (cf->datasheet, casenum, c);
  }
diff --git a/src/ui/gui/psppire-case-file.h b/src/ui/gui/psppire-case-file.h

index fd8af07916c006714336c901910d077cae2d272e..b3fdfcdaa787c2ffea5afd0318fc5f73470cca04 100644 (file)
--- a/src/ui/gui/psppire-case-file.h
+++ b/src/ui/gui/psppire-case-file.h
@@ -26,6 +26,7 @@
  #include <glib.h>
  
  #include <libpspp/str.h>
+#include <data/case.h>
  
  
  
@@ -55,7 +56,7 @@ struct _PsppireCaseFile
  {
    GObject             parent;
  
-  struct casefile *flexifile;
+  struct datasheet *datasheet;
  };
  
  
@@ -75,8 +76,9 @@ gboolean psppire_case_file_insert_case (PsppireCaseFile *cf, struct ccase *c, gi
  gint psppire_case_file_get_case_count (const PsppireCaseFile *cf);
  
  
-const union value * psppire_case_file_get_value (const PsppireCaseFile *cf,
-                                             gint c, gint idx);
+union value * psppire_case_file_get_value (const PsppireCaseFile *cf,
+                                           casenumber, size_t idx,
+                                           union value *, int width);
  
  struct fmt_spec;
  
@@ -95,14 +97,14 @@ gboolean psppire_case_file_delete_cases (PsppireCaseFile *cf, gint n_rows,
  
  gboolean psppire_case_file_insert_values (PsppireCaseFile *cf, gint n_values, gint before);
  
-struct sort_criteria;
-void psppire_case_file_sort (PsppireCaseFile *cf, const struct sort_criteria *);
+struct case_ordering;
+void psppire_case_file_sort (PsppireCaseFile *cf, struct case_ordering *);
  
  gboolean psppire_case_file_get_case (const PsppireCaseFile *cf, gint casenum,
                                     struct ccase *c);
  
-void psppire_case_file_replace_flexifile (PsppireCaseFile *,
-                                         struct flexifile *);
+void psppire_case_file_replace_datasheet (PsppireCaseFile *,
+                                          struct datasheet *);
  
  
  
diff --git a/src/ui/gui/psppire-data-store.c b/src/ui/gui/psppire-data-store.c

index 6af1b2246088243905add8ac7d9468df4a403b64..d0a346170bc49ecb215038c625e5684093456b31 100644 (file)
--- a/src/ui/gui/psppire-data-store.c
+++ b/src/ui/gui/psppire-data-store.c
@@ -25,8 +25,8 @@
  #define _(msgid) gettext (msgid)
  #define N_(msgid) msgid
  
-#include <data/casefile.h>
-#include <data/case.h>
+#include <data/casewriter.h>
+#include <data/datasheet.h>
  #include <data/data-out.h>
  #include <data/variable.h>
  
@@ -454,7 +454,7 @@ psppire_data_store_insert_new_case (PsppireDataStore *ds, gint posn)
  
  
    /* Opportunity for optimisation exists here when creating a blank case */
-  val_cnt = casefile_get_value_cnt (ds->case_file->flexifile) ;
+  val_cnt = datasheet_get_column_cnt (ds->case_file->datasheet) ;
  
    case_create (&cc, val_cnt);
  
@@ -484,7 +484,7 @@ psppire_data_store_get_string (const GSheetModel *model, gint row, gint column)
    char *text;
    const struct fmt_spec *fp ;
    const struct variable *pv ;
-  const union value *v ;
+  union value *v ;
    GString *s;
    PsppireDataStore *store = PSPPIRE_DATA_STORE (model);
  
@@ -505,19 +505,19 @@ psppire_data_store_get_string (const GSheetModel *model, gint row, gint column)
  
    g_assert (idx >= 0);
  
-  v = psppire_case_file_get_value (store->case_file, row, idx);
-
+  v = psppire_case_file_get_value (store->case_file, row, idx, NULL,
+                                   var_get_width (pv));
+  
    g_return_val_if_fail (v, NULL);
  
    if ( store->show_labels)
      {
-      const struct val_labs * vl = var_get_value_labels (pv);
-
-      const gchar *label;
-      if ( (label = val_labs_find (vl, *v)) )
-       {
+      const gchar *label = var_lookup_value_label (pv, v);
+      if (label)
+        {
+          free (v);
           return pspp_locale_to_utf8 (label, -1, 0);
-       }
+        }
      }
  
    fp = var_get_write_format (pv);
@@ -539,6 +539,7 @@ psppire_data_store_get_string (const GSheetModel *model, gint row, gint column)
  
    g_strchomp (text);
  
+  free (v);
    return text;
  }
  
@@ -649,7 +650,7 @@ psppire_data_store_create_system_file (PsppireDataStore *store,
      3 /* version */
    };
  
-  struct sfm_writer *writer ;
+  struct casewriter *writer;
  
    g_assert (handle);
  
@@ -664,15 +665,10 @@ psppire_data_store_create_system_file (PsppireDataStore *store,
    for (i = 0 ; i < psppire_case_file_get_case_count (store->case_file); ++i )
      {
        struct ccase c;
-
-      case_create (&c, var_cnt);
        psppire_case_file_get_case (store->case_file, i, &c);
-      sfm_write_case (writer, &c);
-
-      case_destroy (&c);
+      casewriter_write (writer, &c);
      }
-
-  sfm_close_writer (writer);
+  casewriter_destroy (writer);
  }
  
  
diff --git a/src/ui/gui/psppire.c b/src/ui/gui/psppire.c

index c575534c060a44eca7ef4f54b58109e39abb501c..d636ab4960781ddc911e0dc813917f9e85de58d4 100644 (file)
--- a/src/ui/gui/psppire.c
+++ b/src/ui/gui/psppire.c
@@ -29,16 +29,15 @@
  #include "psppire.h"
  
  
+#include <data/casereader.h>
+#include <data/datasheet.h>
  #include <data/file-handle-def.h>
  #include <data/format.h>
-#include <data/storage-stream.h>
-#include <data/case-source.h>
  #include <data/settings.h>
  #include <data/file-name.h>
  #include <data/procedure.h>
  #include <libpspp/getl.h>
  #include <language/lexer/lexer.h>
-#include <ui/flexifile.h>
  #include <libpspp/version.h>
  
  #include <gtk/gtk.h>
@@ -50,7 +49,6 @@
  #include "data-sheet.h"
  #include "var-sheet.h"
  #include "message-dialog.h"
-#include "flexifile-factory.h"
  
  PsppireDataStore *the_data_store = 0;
  PsppireVarStore *the_var_store = 0;
@@ -68,28 +66,17 @@ replace_dictionary (struct dictionary *d)
  
  
  static void
-replace_flexifile (struct case_source *s)
+replace_casereader (struct casereader *s)
  {
-  if ( NULL == s )
-    psppire_case_file_replace_flexifile (the_data_store->case_file,
-                                        (struct flexifile *) flexifile_create (0));
-  else
-    {
-      if ( ! case_source_is_class (s, &storage_source_class))
-       return ;
-
-      psppire_case_file_replace_flexifile (the_data_store->case_file,
-                                          (struct flexifile *)
-                                          storage_source_get_casefile (s));
-    }
-}
-
+  struct datasheet *datasheet = datasheet_create (s);
  
+  psppire_case_file_replace_datasheet (the_data_store->case_file,
+                                       datasheet);
+}
  
  void
  initialize (void)
  {
-  struct casefile_factory *factory;
    PsppireDict *dictionary = 0;
  
    /* gtk_init messes with the locale.
@@ -105,14 +92,12 @@ initialize (void)
    fmt_init ();
    settings_init ();
    fh_init ();
-  factory = flexifile_factory_create ();
    the_source_stream =
      create_source_stream (
                           fn_getenv_default ("STAT_INCLUDE_PATH", include_path)
                           );
  
-  the_dataset = create_dataset (factory,
-                               replace_flexifile,
+  the_dataset = create_dataset (replace_casereader,
                                 replace_dictionary);
  
    message_dialog_init (the_source_stream);
@@ -127,12 +112,12 @@ initialize (void)
    /* Create the model for the var_sheet */
    the_var_store = psppire_var_store_new (dictionary);
  
-
    the_data_store = psppire_data_store_new (dictionary);
  
-  proc_set_source (the_dataset,
-                  storage_source_create (the_data_store->case_file->flexifile)
-                  );
+
+  proc_set_active_file_data (the_dataset,
+                            datasheet_make_reader (the_data_store->case_file->datasheet));
+
  
    create_icon_factory ();
  
diff --git a/src/ui/gui/val-labs-dialog.h b/src/ui/gui/val-labs-dialog.h

index e11b43305f64e5eb43d6a8cf074a86636081e8bb..404b7a5a795772e6a4bc7ad37bbe0ddb3fb238d0 100644 (file)
--- a/src/ui/gui/val-labs-dialog.h
+++ b/src/ui/gui/val-labs-dialog.h
@@ -29,6 +29,7 @@
  
  #include <gtk/gtk.h>
  #include <glade/glade.h>
+#include <data/variable.h>
  
  
  struct val_labs;
diff --git a/src/ui/terminal/ChangeLog b/src/ui/terminal/ChangeLog

index ad09bff30b906c94170260daefa2d77238cfa9f0..10d7d36558ba7f729c7d4e01610108b3561804c5 100644 (file)
--- a/src/ui/terminal/ChangeLog
+++ b/src/ui/terminal/ChangeLog
@@ -1,3 +1,10 @@
+2007-06-06  Ben Pfaff  <blp@gnu.org>
+
+       Adapt case sources, sinks, and clients of procedure code to the
+       new infrastructure.
+       
+       * main.c: No need for fastfile_factory any more.
+
  2007-02-25  Ben Pfaff  <blp@gnu.org>
  
         Thanks to Jason Stover for verifying that this patch helps under
diff --git a/src/ui/terminal/main.c b/src/ui/terminal/main.c

index b95ae2e3a2662675ec299c27d8f8f98dd6cbc6a5..1a1c436e967253a6c188337b38e4644c22cf59b8 100644 (file)
--- a/src/ui/terminal/main.c
+++ b/src/ui/terminal/main.c
@@ -27,7 +27,6 @@
  #include "progname.h"
  #include "read-line.h"
  
-#include <data/fastfile-factory.h>
  #include <data/dictionary.h>
  #include <data/file-handle-def.h>
  #include <libpspp/getl.h>
@@ -86,7 +85,6 @@ static struct source_stream *the_source_stream ;
  int
  main (int argc, char **argv)
  {
-  struct casefile_factory *factory;
    signal (SIGABRT, bug_handler);
    signal (SIGSEGV, bug_handler);
    signal (SIGFPE, bug_handler);
@@ -111,9 +109,7 @@ main (int argc, char **argv)
    settings_init ();
    random_init ();
  
-  factory = fastfile_factory_create ();
-
-  the_dataset = create_dataset (factory, NULL, NULL);
+  the_dataset = create_dataset (NULL, NULL);
  
    if (parse_command_line (argc, argv, the_source_stream))
      {
diff --git a/tests/ChangeLog b/tests/ChangeLog

index 4bec5f1dc0f5d6203133131ebbbe6a6acf3a51bf..0f0d2c89ca4e21c5a5aabeed576e81f1faebfa9f 100644 (file)
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2007-06-06  Ben Pfaff  <blp@gnu.org>
+
+       * automake.mk: Remove test.
+
+       * tests/xforms/casefile.sh: Removed test.
+
  2007-06-06  Ben Pfaff  <blp@gnu.org>
  
         * automake.mk: Add new test.
diff --git a/tests/automake.mk b/tests/automake.mk

index 66a67ed7f6a365af9a85a386bd1431ad993aec92..aead28ae8f7ac7df9d45377df0a95f8a06736d22 100644 (file)
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -118,7 +118,6 @@ dist_TESTS = \
         tests/bugs/temp-freq.sh \
         tests/bugs/print-crash.sh \
         tests/bugs/keep-all.sh \
-       tests/xforms/casefile.sh \
         tests/xforms/recode.sh \
         tests/stats/descript-basic.sh \
         tests/stats/descript-missing.sh \
diff --git a/tests/xforms/casefile.sh b/tests/xforms/casefile.sh

deleted file mode 100755 (executable)

index c354857..0000000
--- a/tests/xforms/casefile.sh
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/bin/sh
-
-# This program tests casefiles by running DEBUG CASEFILE.
-
-TEMPDIR=/tmp/pspp-tst-$$
-
-# ensure that top_builddir  are absolute
-if [ -z "$top_builddir" ] ; then top_builddir=. ; fi
-if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi
-top_builddir=`cd $top_builddir; pwd`
-PSPP=$top_builddir/src/ui/terminal/pspp
-
-# ensure that top_srcdir is absolute
-top_srcdir=`cd $top_srcdir; pwd`
-
-STAT_CONFIG_PATH=$top_srcdir/config
-export STAT_CONFIG_PATH
-
-
-cleanup()
-{
-     cd /
-     rm -rf $TEMPDIR
-}
-
-
-fail()
-{
-    echo $activity
-    echo FAILED
-    cleanup;
-    exit 1;
-}
-
-
-no_result()
-{
-    echo $activity
-    echo NO RESULT;
-    cleanup;
-    exit 2;
-}
-
-pass()
-{
-    cleanup;
-    exit 0;
-}
-
-mkdir -p $TEMPDIR
-
-cd $TEMPDIR
-
-activity="create program"
-cat > $TEMPDIR/casefile.stat <<EOF
-DEBUG CASEFILE SMALL.
-EOF
-if [ $? -ne 0 ] ; then no_result ; fi
-
-activity="run program"
-$SUPERVISOR $PSPP --testing-mode $TEMPDIR/casefile.stat > $TEMPDIR/casefile.out
-if [ $? -ne 0 ] ; then no_result ; fi
-
-activity="compare results"
-perl -pi -e 's/^\s*$//g' $TEMPDIR/casefile.out
-diff -b $TEMPDIR/casefile.out - <<EOF
-Casefile tests succeeded.
-EOF
-if [ $? -ne 0 ] ; then fail ; fi
-
-
-pass;
author	Ben Pfaff <blp@gnu.org>
	Thu, 7 Jun 2007 06:41:58 +0000 (06:41 +0000)
committer	Ben Pfaff <blp@gnu.org>
	Thu, 7 Jun 2007 06:41:58 +0000 (06:41 +0000)
ChangeLog		patch \| blob \| history
Smake		patch \| blob \| history
src/data/ChangeLog		patch \| blob \| history
src/data/any-reader.c		patch \| blob \| history
src/data/any-reader.h		patch \| blob \| history
src/data/any-writer.c		patch \| blob \| history
src/data/any-writer.h		patch \| blob \| history
src/data/automake.mk		patch \| blob \| history
src/data/case-sink.c	[deleted file]	patch \| blob \| history
src/data/case-sink.h	[deleted file]	patch \| blob \| history
src/data/case-source.c	[deleted file]	patch \| blob \| history
src/data/case-source.h	[deleted file]	patch \| blob \| history
src/data/casefile-factory.h	[deleted file]	patch \| blob \| history
src/data/casefile-private.h	[deleted file]	patch \| blob \| history
src/data/casefile.c	[deleted file]	patch \| blob \| history
src/data/casefile.h	[deleted file]	patch \| blob \| history
src/data/casefilter.c	[deleted file]	patch \| blob \| history
src/data/casefilter.h	[deleted file]	patch \| blob \| history
src/data/dictionary.c		patch \| blob \| history
src/data/por-file-reader.c		patch \| blob \| history
src/data/por-file-reader.h		patch \| blob \| history
src/data/por-file-writer.c		patch \| blob \| history
src/data/por-file-writer.h		patch \| blob \| history
src/data/procedure.c		patch \| blob \| history
src/data/procedure.h		patch \| blob \| history
src/data/scratch-handle.c		patch \| blob \| history
src/data/scratch-handle.h		patch \| blob \| history
src/data/scratch-reader.c		patch \| blob \| history
src/data/scratch-reader.h		patch \| blob \| history
src/data/scratch-writer.c		patch \| blob \| history
src/data/scratch-writer.h		patch \| blob \| history
src/data/storage-stream.c	[deleted file]	patch \| blob \| history
src/data/storage-stream.h	[deleted file]	patch \| blob \| history
src/data/sys-file-reader.c		patch \| blob \| history
src/data/sys-file-reader.h		patch \| blob \| history
src/data/sys-file-writer.c		patch \| blob \| history
src/data/sys-file-writer.h		patch \| blob \| history
src/language/ChangeLog		patch \| blob \| history
src/language/command.c		patch \| blob \| history
src/language/command.def		patch \| blob \| history
src/language/control/do-if.c		patch \| blob \| history
src/language/data-io/ChangeLog		patch \| blob \| history
src/language/data-io/data-list.c		patch \| blob \| history
src/language/data-io/data-reader.c		patch \| blob \| history
src/language/data-io/get.c		patch \| blob \| history
src/language/data-io/inpt-pgm.c		patch \| blob \| history
src/language/data-io/list.q		patch \| blob \| history
src/language/dictionary/ChangeLog		patch \| blob \| history
src/language/dictionary/apply-dictionary.c		patch \| blob \| history
src/language/dictionary/delete-variables.c		patch \| blob \| history
src/language/dictionary/modify-variables.c		patch \| blob \| history
src/language/dictionary/sys-file-info.c		patch \| blob \| history
src/language/expressions/evaluate.c		patch \| blob \| history
src/language/lexer/variable-parser.c		patch \| blob \| history
src/language/stats/ChangeLog		patch \| blob \| history
src/language/stats/aggregate.c		patch \| blob \| history
src/language/stats/autorecode.c		patch \| blob \| history
src/language/stats/binomial.c		patch \| blob \| history
src/language/stats/binomial.h		patch \| blob \| history
src/language/stats/chisquare.c		patch \| blob \| history
src/language/stats/chisquare.h		patch \| blob \| history
src/language/stats/crosstabs.q		patch \| blob \| history
src/language/stats/descriptives.c		patch \| blob \| history
src/language/stats/examine.q		patch \| blob \| history
src/language/stats/flip.c		patch \| blob \| history
src/language/stats/frequencies.q		patch \| blob \| history
src/language/stats/npar-summary.c		patch \| blob \| history
src/language/stats/npar-summary.h		patch \| blob \| history
src/language/stats/npar.h		patch \| blob \| history
src/language/stats/npar.q		patch \| blob \| history
src/language/stats/oneway.q		patch \| blob \| history
src/language/stats/rank.q		patch \| blob \| history
src/language/stats/regression.q		patch \| blob \| history
src/language/stats/sort-cases.c		patch \| blob \| history
src/language/stats/sort-criteria.c		patch \| blob \| history
src/language/stats/sort-criteria.h		patch \| blob \| history
src/language/stats/t-test.q		patch \| blob \| history
src/language/tests/automake.mk		patch \| blob \| history
src/language/tests/casefile-test.c	[deleted file]	patch \| blob \| history
src/libpspp/deque.h		patch \| blob \| history
src/math/ChangeLog		patch \| blob \| history
src/math/automake.mk		patch \| blob \| history
src/math/levene.c		patch \| blob \| history
src/math/levene.h		patch \| blob \| history
src/math/merge.c	[new file with mode: 0644]	patch \| blob
src/math/merge.h	[new file with mode: 0644]	patch \| blob
src/math/sort.c		patch \| blob \| history
src/math/sort.h		patch \| blob \| history
src/ui/ChangeLog		patch \| blob \| history
src/ui/automake.mk		patch \| blob \| history
src/ui/flexifile.c	[deleted file]	patch \| blob \| history
src/ui/flexifile.h	[deleted file]	patch \| blob \| history
src/ui/gui/ChangeLog		patch \| blob \| history
src/ui/gui/automake.mk		patch \| blob \| history
src/ui/gui/flexifile-factory.c	[deleted file]	patch \| blob \| history
src/ui/gui/flexifile-factory.h	[deleted file]	patch \| blob \| history
src/ui/gui/helper.c		patch \| blob \| history
src/ui/gui/missing-val-dialog.c		patch \| blob \| history
src/ui/gui/psppire-case-file.c		patch \| blob \| history
src/ui/gui/psppire-case-file.h		patch \| blob \| history
src/ui/gui/psppire-data-store.c		patch \| blob \| history
src/ui/gui/psppire.c		patch \| blob \| history
src/ui/gui/val-labs-dialog.h		patch \| blob \| history
src/ui/terminal/ChangeLog		patch \| blob \| history
src/ui/terminal/main.c		patch \| blob \| history
tests/ChangeLog		patch \| blob \| history
tests/automake.mk		patch \| blob \| history
tests/xforms/casefile.sh	[deleted file]	patch \| blob \| history