Add lots of comments. Some minor substantive changes too:

author Ben Pfaff <blp@gnu.org>

Mon, 11 Jun 2007 04:03:19 +0000 (04:03 +0000)

committer Ben Pfaff <blp@gnu.org>

Mon, 11 Jun 2007 04:03:19 +0000 (04:03 +0000)
author Ben Pfaff <blp@gnu.org>
Mon, 11 Jun 2007 04:03:19 +0000 (04:03 +0000)
committer Ben Pfaff <blp@gnu.org>
Mon, 11 Jun 2007 04:03:19 +0000 (04:03 +0000)
diff --git a/src/data/ChangeLog b/src/data/ChangeLog

index 913249f3bb0df1abbf4caff3c50c654bbe41e378..45564a95debc89a67597c56b5f5bbc52c6e67b62 100644 (file)
--- a/src/data/ChangeLog
+++ b/src/data/ChangeLog
@@ -1,3 +1,14 @@
+2007-06-10  Ben Pfaff  <blp@gnu.org>
+
+       * casereader-filter.c (casereader_filter_destroy): Make sure to
+       write all the remaining excluded cases to the casewriter, if any.
+
+       * caseinit.c (init_list_destroy): Rewrite.
+       (init_list_clear): Ditto.
+
+       * casegrouper.c (casegrouper_get_next_group): Always set *reader
+       to null when returning false.
+
  2007-06-06  Ben Pfaff  <blp@gnu.org>
  
         Actually implement the new procedure code and adapt all of its
diff --git a/src/data/case-ordering.c b/src/data/case-ordering.c

index 910a9ea66ad7facb55a2c4a5740d636845e52b11..e9e7095ead9fc1a5916356e87cd4f018cb023c10 100644 (file)
--- a/src/data/case-ordering.c
+++ b/src/data/case-ordering.c
@@ -46,6 +46,10 @@ struct case_ordering
      size_t key_cnt;
    };
  
+/* Creates and returns a new case ordering for comparing cases
+   that represent dictionary DICT.  The case ordering initially
+   contains no variables, so that all cases will compare as
+   equal. */
  struct case_ordering *
  case_ordering_create (const struct dictionary *dict)
  {
@@ -56,6 +60,7 @@ case_ordering_create (const struct dictionary *dict)
    return co;
  }
  
+/* Creates and returns a clone of case ordering ORIG. */
  struct case_ordering *
  case_ordering_clone (const struct case_ordering *orig)
  {
@@ -66,6 +71,7 @@ case_ordering_clone (const struct case_ordering *orig)
    return co;
  }
  
+/* Destroys case ordering CO. */
  void
  case_ordering_destroy (struct case_ordering *co)
  {
@@ -76,12 +82,17 @@ case_ordering_destroy (struct case_ordering *co)
      }
  }
  
+/* Returns the number of `union value's in the cases that case
+   ordering CO compares (taken from the dictionary used to
+   construct it). */
  size_t
  case_ordering_get_value_cnt (const struct case_ordering *co)
  {
    return co->value_cnt;
  }
  
+/* Compares cases A and B given case ordering CO and returns a
+   strcmp()-type result. */
  int
  case_ordering_compare_cases (const struct ccase *a, const struct ccase *b,
                               const struct case_ordering *co)
@@ -116,6 +127,9 @@ case_ordering_compare_cases (const struct ccase *a, const struct ccase *b,
    return 0;
  }
  
+/* Adds VAR to case ordering CO as an additional sort key in sort
+   direction DIR.  Returns true if successful, false if VAR was
+   already part of the ordering for CO. */
  bool
  case_ordering_add_var (struct case_ordering *co,
                         const struct variable *var, enum sort_direction dir)
@@ -134,12 +148,18 @@ case_ordering_add_var (struct case_ordering *co,
    return true;
  }
  
+/* Returns the number of variables used for ordering within
+   CO. */
  size_t
  case_ordering_get_var_cnt (const struct case_ordering *co)
  {
    return co->key_cnt;
  }
  
+/* Returns sort variable IDX within CO.  An IDX of 0 returns the
+   primary sort key (the one added first), an IDX of 1 returns
+   the secondary sort key, and so on.  IDX must be less than the
+   number of sort variables. */
  const struct variable *
  case_ordering_get_var (const struct case_ordering *co, size_t idx)
  {
@@ -147,6 +167,7 @@ case_ordering_get_var (const struct case_ordering *co, size_t idx)
    return co->keys[idx].var;
  }
  
+/* Returns the sort direction for sort variable IDX within CO. */
  enum sort_direction
  case_ordering_get_direction (const struct case_ordering *co, size_t idx)
  {
@@ -154,6 +175,10 @@ case_ordering_get_direction (const struct case_ordering *co, size_t idx)
    return co->keys[idx].dir;
  }
  
+/* Stores an array listing all of the variables used for sorting
+   within CO into *VARS and the number of variables into
+   *VAR_CNT.  The caller is responsible for freeing *VARS when it
+   is no longer needed. */
  void
  case_ordering_get_vars (const struct case_ordering *co,
                          const struct variable ***vars, size_t *var_cnt)
diff --git a/src/data/case-ordering.h b/src/data/case-ordering.h

index f537829d9cc80ae0c22c6c4b6011118c8bc91bb0..841d943f48c30277ab70c6b11aada08899c4120f 100644 (file)
--- a/src/data/case-ordering.h
+++ b/src/data/case-ordering.h
@@ -16,6 +16,8 @@
     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     02110-1301, USA. */
  
+/* Sort order for comparing cases. */
+
  #ifndef DATA_CASE_ORDERING_H
  #define DATA_CASE_ORDERING_H 1
  
@@ -31,18 +33,24 @@ enum sort_direction
      SRT_DESCEND                        /* Z, Y, X, ..., C, B, A. */
    };
  
+/* Creation and destruction. */
  struct case_ordering *case_ordering_create (const struct dictionary *);
  struct case_ordering *case_ordering_clone (const struct case_ordering *);
  void case_ordering_destroy (struct case_ordering *);
  
-size_t case_ordering_get_value_cnt (const struct case_ordering *);
+/* Modification. */
+bool case_ordering_add_var (struct case_ordering *,
+                            const struct variable *, enum sort_direction);
+
+/* Comparing cases. */
  int case_ordering_compare_cases (const struct ccase *, const struct ccase *,
                                   const struct case_ordering *);
  
-bool case_ordering_add_var (struct case_ordering *,
-                            const struct variable *, enum sort_direction);
+/* Inspection. */
+size_t case_ordering_get_value_cnt (const struct case_ordering *);
  size_t case_ordering_get_var_cnt (const struct case_ordering *);
-const struct variable *case_ordering_get_var (const struct case_ordering *, size_t);
+const struct variable *case_ordering_get_var (const struct case_ordering *,
+                                              size_t);
  enum sort_direction case_ordering_get_direction (const struct case_ordering *,
                                                   size_t);
  void case_ordering_get_vars (const struct case_ordering *,
diff --git a/src/data/casegrouper.c b/src/data/casegrouper.c

index f2815106ea6330bac01609c40ae310be8cc808da..8a392afbab8be62531b68c9f3890818e88df4630 100644 (file)
--- a/src/data/casegrouper.c
+++ b/src/data/casegrouper.c
@@ -30,16 +30,27 @@
  
  #include "xalloc.h"
  
+/* A casegrouper. */
  struct casegrouper
    {
-    struct casereader *reader;
-    struct taint *taint;
+    struct casereader *reader;  /* Source of input cases. */
+    struct taint *taint;        /* Error status for casegrouper. */
  
+    /* Functions for grouping cases. */
      bool (*same_group) (const struct ccase *, const struct ccase *, void *aux);
      void (*destroy) (void *aux);
      void *aux;
    };
  
+/* Creates and returns a new casegrouper that takes its input
+   from READER.  SAME_GROUP is used to decide which cases are in
+   a group: it returns true if the pair of cases provided are in
+   the same group, false otherwise.  DESTROY will be called when
+   the casegrouper is destroyed and should free any storage
+   needed by SAME_GROUP.
+
+   SAME_GROUP may be a null pointer.  If so, READER's entire
+   contents is considered to be a single group. */
  struct casegrouper *
  casegrouper_create_func (struct casereader *reader,
                           bool (*same_group) (const struct ccase *,
@@ -57,13 +68,17 @@ casegrouper_create_func (struct casereader *reader,
    return grouper;
  }
  
-/* FIXME: we really shouldn't need a temporary casewriter for the
-   common case where we read an entire group's data before going
-   on to the next. */
+/* Obtains the next group of cases from GROUPER.  Returns true if
+   successful, false if no groups remain.  If successful, *READER
+   is set to the casereader for the new group; otherwise, it is
+   set to NULL. */
  bool
  casegrouper_get_next_group (struct casegrouper *grouper,
                              struct casereader **reader)
  {
+  /* FIXME: we really shouldn't need a temporary casewriter for
+     the common case where we read an entire group's data before
+     going on to the next. */
    if (grouper->same_group != NULL)
      {
        struct casewriter *writer;
@@ -102,10 +117,17 @@ casegrouper_get_next_group (struct casegrouper *grouper,
            return true;
          }
        else
-        return false;
+        {
+          *reader = NULL;
+          return false;
+        }
      }
  }
  
+/* Destroys GROUPER.  Returns false if GROUPER's input casereader
+   or any state derived from it had become tainted, which means
+   that an I/O error or other serious error occurred in
+   processing data derived from GROUPER; otherwise, return true. */
  bool
  casegrouper_destroy (struct casegrouper *grouper)
  {
@@ -126,29 +148,26 @@ casegrouper_destroy (struct casegrouper *grouper)
    else
      return true;
  }
+\f
+/* Casegrouper based on equal values of variables from case to
+   case. */
  
+/* Casegrouper based on equal variables. */
  struct casegrouper_vars
    {
-    const struct variable **vars;
-    size_t var_cnt;
+    const struct variable **vars; /* Variables to compare. */
+    size_t var_cnt;               /* Number of variables. */
    };
  
-static bool
-casegrouper_vars_same_group (const struct ccase *a, const struct ccase *b,
-                             void *cv_)
-{
-  struct casegrouper_vars *cv = cv_;
-  return case_compare (a, b, cv->vars, cv->var_cnt) == 0;
-}
-
-static void
-casegrouper_vars_destroy (void *cv_)
-{
-  struct casegrouper_vars *cv = cv_;
-  free (cv->vars);
-  free (cv);
-}
+static bool casegrouper_vars_same_group (const struct ccase *,
+                                         const struct ccase *,
+                                         void *);
+static void casegrouper_vars_destroy (void *);
  
+/* Creates and returns a casegrouper that reads data from READER
+   and breaks it into contiguous groups of cases that have equal
+   values for the VAR_CNT variables in VARS.  If VAR_CNT is 0,
+   then all the cases will be put in a single group. */
  struct casegrouper *
  casegrouper_create_vars (struct casereader *reader,
                           const struct variable *const *vars,
@@ -168,6 +187,11 @@ casegrouper_create_vars (struct casereader *reader,
      return casegrouper_create_func (reader, NULL, NULL, NULL);
  }
  
+/* Creates and returns a casegrouper that reads data from READER
+   and breaks it into contiguous groups of cases that have equal
+   values for the SPLIT FILE variables in DICT.  If DICT has no
+   SPLIT FILE variables, then all the cases will be put into a
+   single group. */
  struct casegrouper *
  casegrouper_create_splits (struct casereader *reader,
                             const struct dictionary *dict)
@@ -177,6 +201,11 @@ casegrouper_create_splits (struct casereader *reader,
                                    dict_get_split_cnt (dict));
  }
  
+/* Creates and returns a casegrouper that reads data from READER
+   and breaks it into contiguous groups of cases that have equal
+   values for the variables used for sorting in CO.  If CO is
+   empty (contains no sort keys), then all the cases will be put
+   into a single group. */
  struct casegrouper *
  casegrouper_create_case_ordering (struct casereader *reader,
                                    const struct case_ordering *co)
@@ -191,3 +220,22 @@ casegrouper_create_case_ordering (struct casereader *reader,
  
    return grouper;
  }
+
+/* "same_group" function for an equal-variables casegrouper. */
+static bool
+casegrouper_vars_same_group (const struct ccase *a, const struct ccase *b,
+                             void *cv_)
+{
+  struct casegrouper_vars *cv = cv_;
+  return case_compare (a, b, cv->vars, cv->var_cnt) == 0;
+}
+
+/* "destroy" for an equal-variables casegrouper. */
+static void
+casegrouper_vars_destroy (void *cv_)
+{
+  struct casegrouper_vars *cv = cv_;
+  free (cv->vars);
+  free (cv);
+}
+
diff --git a/src/data/casegrouper.h b/src/data/casegrouper.h

index 3d9c6a89dc41fc1805ecf0623e8590972655dd8e..5f686ff99027b814c6bef09ec91789aec29a033a 100644 (file)
--- a/src/data/casegrouper.h
+++ b/src/data/casegrouper.h
@@ -16,6 +16,13 @@
     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     02110-1301, USA. */
  
+/* Casegrouper.
+
+   Breaks up the cases from a casereader into sets of contiguous
+   cases based on some criteria, e.g. sets of cases that all have
+   the same values for some subset of variables.  Each set of
+   cases is made available to the client as a casereader. */
+
  #ifndef DATA_CASEGROUPER_H
  #define DATA_CASEGROUPER_H 1
  
diff --git a/src/data/caseinit.c b/src/data/caseinit.c

index 564d35342981623d58940968854f6fc80cba3e43..496ea9b2ce29d26c8fa3a3a3ce304dc0fec6b2fc 100644 (file)
--- a/src/data/caseinit.c
+++ b/src/data/caseinit.c
@@ -33,25 +33,32 @@
  #include <libpspp/compiler.h>
  
  #include "xalloc.h"
+\f
+/* Initializer list: a set of values to write to locations within
+   a case. */
  
+/* Binds a value with a place to put it. */
  struct init_value
    {
      union value value;
      size_t case_index;
    };
  
+/* A set of values to initialize in a case. */
  struct init_list
    {
      struct init_value *values;
      size_t cnt;
    };
  
+/* A bitmap of the "left" status of variables. */
  enum leave_class
    {
-    LEAVE_REINIT = 0x001,
-    LEAVE_LEFT = 0x002
+    LEAVE_REINIT = 0x001,       /* Reinitalize for every case. */
+    LEAVE_LEFT = 0x002          /* Keep the value from one case to the next. */
    };
  
+/* Initializes LIST as an empty initializer list. */
  static void
  init_list_create (struct init_list *list)
  {
@@ -59,19 +66,23 @@ init_list_create (struct init_list *list)
    list->cnt = 0;
  }
  
+/* Frees the storage associated with LIST. */
  static void
-init_list_clear (struct init_list *list)
+init_list_destroy (struct init_list *list)
  {
    free (list->values);
-  init_list_create (list);
  }
  
+/* Clears LIST, making it an empty list. */
  static void
-init_list_destroy (struct init_list *list)
+init_list_clear (struct init_list *list)
  {
-  init_list_clear (list);
+  init_list_destroy (list);
+  init_list_create (list);
  }
  
+/* Compares `struct init_value's A and B by case_index and
+   returns a strcmp()-type result. */
  static int
  compare_init_values (const void *a_, const void *b_, const void *aux UNUSED)
  {
@@ -81,6 +92,7 @@ compare_init_values (const void *a_, const void *b_, const void *aux UNUSED)
    return a->case_index < b->case_index ? -1 : a->case_index > b->case_index;
  }
  
+/* Returns true if LIST includes CASE_INDEX, false otherwise. */
  static bool
  init_list_includes (const struct init_list *list, size_t case_index)
  {
@@ -90,6 +102,9 @@ init_list_includes (const struct init_list *list, size_t case_index)
                          &value, compare_init_values, NULL) != NULL;
  }
  
+/* Marks LIST to initialize the `union value's for the variables
+   in dictionary D that both (1) fall in the leave class or
+   classes designated by INCLUDE and (2) are not in EXCLUDE. */
  static void
  init_list_mark (struct init_list *list, const struct init_list *exclude,
                  enum leave_class include, const struct dictionary *d)
@@ -133,9 +148,10 @@ init_list_mark (struct init_list *list, const struct init_list *exclude,
    /* Drop duplicates. */
    list->cnt = sort_unique (list->values, list->cnt, sizeof *list->values,
                             compare_init_values, NULL);
-
  }
  
+/* Initializes data in case C to the values in the initializer
+   LIST. */
  static void
  init_list_init (const struct init_list *list, struct ccase *c)
  {
@@ -148,6 +164,8 @@ init_list_init (const struct init_list *list, struct ccase *c)
      }
  }
  
+/* Updates the values in the initializer LIST from the data in
+   case C. */
  static void
  init_list_update (const struct init_list *list, const struct ccase *c)
  {
@@ -159,14 +177,26 @@ init_list_update (const struct init_list *list, const struct ccase *c)
        value->value = *case_data_idx (c, value->case_index);
      }
  }
-
+\f
+/* A case initializer. */
  struct caseinit
    {
+    /* Values that do not need to be initialized by the
+       procedure, because they are initialized by the data
+       source. */
      struct init_list preinited_values;
+
+    /* Values that need to be initialized to SYSMIS or spaces in
+       each case. */
      struct init_list reinit_values;
+
+    /* Values that need to be initialized to 0 or spaces in the
+       first case and thereafter retain their values from case to
+       case. */
      struct init_list left_values;
    };
  
+/* Creates and returns a new case initializer. */
  struct caseinit *
  caseinit_create (void)
  {
@@ -177,6 +207,7 @@ caseinit_create (void)
    return ci;
  }
  
+/* Clears the contents of case initializer CI. */
  void
  caseinit_clear (struct caseinit *ci)
  {
@@ -185,6 +216,7 @@ caseinit_clear (struct caseinit *ci)
    init_list_clear (&ci->left_values);
  }
  
+/* Destroys case initializer CI. */
  void
  caseinit_destroy (struct caseinit *ci)
  {
@@ -197,12 +229,19 @@ caseinit_destroy (struct caseinit *ci)
      }
  }
  
+/* Marks the variables from dictionary D in CI as being
+   initialized by the data source, so that the case initializer
+   need not initialize them itself. */
  void
  caseinit_mark_as_preinited (struct caseinit *ci, const struct dictionary *d)
  {
    init_list_mark (&ci->preinited_values, NULL, LEAVE_REINIT | LEAVE_LEFT, d);
  }
  
+/* Marks in CI the variables from dictionary D, except for any
+   variables that were already marked with
+   caseinit_mark_as_preinited, as needing initialization
+   according to their leave status. */
  void
  caseinit_mark_for_init (struct caseinit *ci, const struct dictionary *d)
  {
@@ -210,17 +249,17 @@ caseinit_mark_for_init (struct caseinit *ci, const struct dictionary *d)
    init_list_mark (&ci->left_values, &ci->preinited_values, LEAVE_LEFT, d);
  }
  
+/* Initializes variables in C as described by CI. */
  void
-caseinit_init_reinit_vars (const struct caseinit *ci, struct ccase *c)
+caseinit_init_vars (const struct caseinit *ci, struct ccase *c)
  {
    init_list_init (&ci->reinit_values, c);
-}
-
-void caseinit_init_left_vars (const struct caseinit *ci, struct ccase *c)
-{
    init_list_init (&ci->left_values, c);
  }
  
+/* Updates the left vars in CI from the data in C, so that the
+   next call to caseinit_init_vars will store those values in the
+   next case. */
  void
  caseinit_update_left_vars (struct caseinit *ci, const struct ccase *c)
  {
diff --git a/src/data/caseinit.h b/src/data/caseinit.h

index 7c7f1c69e7d7288a98d7c6da07b03e1cf1eba350..3c849805a69a8fc2c518f5e9d13264fa1412507c 100644 (file)
--- a/src/data/caseinit.h
+++ b/src/data/caseinit.h
@@ -26,7 +26,9 @@
     save the values of "left" variables to copy into the next case
     read from the active file.
  
-   The caseinit code helps with this. */
+   The caseinit data structure provides a little help for
+   tracking what data to initialize or to copy from case to
+   case. */
  
  #ifndef DATA_CASEINIT_H
  #define DATA_CASEINIT_H 1
@@ -34,15 +36,17 @@
  struct dictionary;
  struct ccase;
  
+/* Creation and destruction. */
  struct caseinit *caseinit_create (void);
  void caseinit_clear (struct caseinit *);
  void caseinit_destroy (struct caseinit *);
  
+/* Track data to be initialized. */
  void caseinit_mark_as_preinited (struct caseinit *, const struct dictionary *);
  void caseinit_mark_for_init (struct caseinit *, const struct dictionary *);
  
-void caseinit_init_reinit_vars (const struct caseinit *, struct ccase *);
-void caseinit_init_left_vars (const struct caseinit *, struct ccase *);
+/* Initialize data and copy data from case to case. */
+void caseinit_init_vars (const struct caseinit *, struct ccase *);
  void caseinit_update_left_vars (struct caseinit *, const struct ccase *);
  
  #endif /* data/caseinit.h */
diff --git a/src/data/casereader-filter.c b/src/data/casereader-filter.c

index 36ff62acec9c1cf3bc4ace2a109a41cccc03e115..afb72cca1434e7a40dea2a723ba5b7a68bcd8e69 100644 (file)
--- a/src/data/casereader-filter.c
+++ b/src/data/casereader-filter.c
@@ -34,17 +34,36 @@
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
  
+/* A casereader that filters data coming from another
+   casereader. */
  struct casereader_filter
    {
-    struct casereader *subreader;
+    struct casereader *subreader; /* The reader to filter. */
      bool (*include) (const struct ccase *, void *aux);
      bool (*destroy) (void *aux);
      void *aux;
-    struct casewriter *exclude;
+    struct casewriter *exclude; /* Writer that gets filtered cases, or NULL. */
    };
  
  static struct casereader_class casereader_filter_class;
  
+/* Creates and returns a casereader whose content is a filtered
+   version of the data in SUBREADER.  Only the cases for which
+   INCLUDE returns true will appear in the returned casereader,
+   in the original order.
+
+   If EXCLUDE is non-null, then cases for which INCLUDE returns
+   false are written to EXCLUDE.  These cases will not
+   necessarily be fully written to EXCLUDE until the filtering casereader's
+   cases have been fully read or, if that never occurs, until the
+   filtering casereader is destroyed.
+
+   When the filtering casereader is destroyed, DESTROY will be
+   called to allow any state maintained by INCLUDE to be freed.
+
+   After this function is called, SUBREADER must not ever again
+   be referenced directly.  It will be destroyed automatically
+   when the filtering casereader is destroyed. */
  struct casereader *
  casereader_create_filter_func (struct casereader *subreader,
                                 bool (*include) (const struct ccase *,
@@ -68,6 +87,7 @@ casereader_create_filter_func (struct casereader *subreader,
    return reader;
  }
  
+/* Internal read function for filtering casereader. */
  static bool
  casereader_filter_read (struct casereader *reader UNUSED, void *filter_,
                          struct ccase *c)
@@ -87,16 +107,31 @@ casereader_filter_read (struct casereader *reader UNUSED, void *filter_,
      }
  }
  
+/* Internal destruction function for filtering casereader. */
  static void
  casereader_filter_destroy (struct casereader *reader, void *filter_)
  {
    struct casereader_filter *filter = filter_;
+
+  /* Make sure we've written everything to the excluded cases
+     casewriter, if there is one. */
+  if (filter->exclude != NULL)
+    {
+      struct ccase c;
+      while (casereader_read (filter->subreader, &c))
+        if (filter->include (&c, filter->aux))
+          case_destroy (&c);
+        else
+          casewriter_write (filter->exclude, &c);
+    }
+
    casereader_destroy (filter->subreader);
    if (filter->destroy != NULL && !filter->destroy (filter->aux))
      casereader_force_error (reader);
    free (filter);
  }
  
+/* Filtering casereader class. */
  static struct casereader_class casereader_filter_class =
    {
      casereader_filter_read,
@@ -111,41 +146,42 @@ static struct casereader_class casereader_filter_class =
      NULL,
    };
  
+\f
+/* Casereader for filtering valid weights. */
+
+/* Weight-filtering data. */
  struct casereader_filter_weight
    {
-    const struct variable *weight_var;
-    bool *warn_on_invalid;
-    bool local_warn_on_invalid;
+    const struct variable *weight_var; /* Weight variable. */
+    bool *warn_on_invalid;      /* Have we already issued an error? */
+    bool local_warn_on_invalid; /* warn_on_invalid might point here. */
    };
  
-static bool
-casereader_filter_weight_include (const struct ccase *c, void *cfw_)
-{
-  struct casereader_filter_weight *cfw = cfw_;
-  double value = case_num (c, cfw->weight_var);
-  if (value >= 0.0 && !var_is_num_missing (cfw->weight_var, value, MV_ANY))
-    return true;
-  else
-    {
-      if (*cfw->warn_on_invalid)
-        {
-         msg (SW, _("At least one case in the data read had a weight value "
-                    "that was user-missing, system-missing, zero, or "
-                    "negative.  These case(s) were ignored."));
-          *cfw->warn_on_invalid = false;
-        }
-      return false;
-    }
-}
+static bool casereader_filter_weight_include (const struct ccase *, void *);
+static bool casereader_filter_weight_destroy (void *);
  
-static bool
-casereader_filter_weight_destroy (void *cfw_)
-{
-  struct casereader_filter_weight *cfw = cfw_;
-  free (cfw);
-  return true;
-}
+/* Creates and returns a casereader that filters cases from
+   READER by valid weights, that is, any cases with user- or
+   system-missing, zero, or negative weights are dropped.  The
+   weight variable's information is taken from DICT.  If DICT
+   does not have a weight variable, then no cases are filtered
+   out.
+
+   When a case with an invalid weight is encountered,
+   *WARN_ON_INVALID is checked.  If it is true, then an error
+   message is issued and *WARN_ON_INVALID is set false.  If
+   WARN_ON_INVALID is a null pointer, then an internal bool that
+   is initially true is used instead of a caller-supplied bool.
  
+   If EXCLUDE is non-null, then dropped cases are written to
+   EXCLUDE.  These cases will not necessarily be fully written to
+   EXCLUDE until the filtering casereader's cases have been fully
+   read or, if that never occurs, until the filtering casereader
+   is destroyed.
+
+   After this function is called, READER must not ever again be
+   referenced directly.  It will be destroyed automatically when
+   the filtering casereader is destroyed. */
  struct casereader *
  casereader_create_filter_weight (struct casereader *reader,
                                   const struct dictionary *dict,
@@ -170,39 +206,69 @@ casereader_create_filter_weight (struct casereader *reader,
      reader = casereader_rename (reader);
    return reader;
  }
-\f
-struct casereader_filter_missing
-  {
-    struct variable **vars;
-    size_t var_cnt;
-    enum mv_class class;
-  };
  
+/* Internal "include" function for weight-filtering
+   casereader. */
  static bool
-casereader_filter_missing_include (const struct ccase *c, void *cfm_)
+casereader_filter_weight_include (const struct ccase *c, void *cfw_)
  {
-  const struct casereader_filter_missing *cfm = cfm_;
-  size_t i;
-
-  for (i = 0; i < cfm->var_cnt; i++)
+  struct casereader_filter_weight *cfw = cfw_;
+  double value = case_num (c, cfw->weight_var);
+  if (value >= 0.0 && !var_is_num_missing (cfw->weight_var, value, MV_ANY))
+    return true;
+  else
      {
-      struct variable *var = cfm->vars[i];
-      const union value *value = case_data (c, var);
-      if (var_is_value_missing (var, value, cfm->class))
-        return false;
+      if (*cfw->warn_on_invalid)
+        {
+         msg (SW, _("At least one case in the data read had a weight value "
+                    "that was user-missing, system-missing, zero, or "
+                    "negative.  These case(s) were ignored."));
+          *cfw->warn_on_invalid = false;
+        }
+      return false;
      }
-  return true;
  }
  
+/* Internal "destroy" function for weight-filtering
+   casereader. */
  static bool
-casereader_filter_missing_destroy (void *cfm_)
+casereader_filter_weight_destroy (void *cfw_)
  {
-  struct casereader_filter_missing *cfm = cfm_;
-  free (cfm->vars);
-  free (cfm);
+  struct casereader_filter_weight *cfw = cfw_;
+  free (cfw);
    return true;
  }
+\f
+/* Casereader for filtering missing values. */
+
+/* Missing-value filtering data. */
+struct casereader_filter_missing
+  {
+    struct variable **vars;     /* Variables whose values to filter. */
+    size_t var_cnt;             /* Number of variables. */
+    enum mv_class class;        /* Types of missing values to filter. */
+  };
+
+static bool casereader_filter_missing_include (const struct ccase *, void *);
+static bool casereader_filter_missing_destroy (void *);
+
+/* Creates and returns a casereader that filters out cases from
+   READER that have a missing value in the given CLASS for any of
+   the VAR_CNT variables in VARS.  Only cases that have
+   non-missing values for all of these variables are passed
+   through.
  
+   Ownership of VARS is retained by the caller.
+
+   If EXCLUDE is non-null, then dropped cases are written to
+   EXCLUDE.  These cases will not necessarily be fully written to
+   EXCLUDE until the filtering casereader's cases have been fully
+   read or, if that never occurs, until the filtering casereader
+   is destroyed.
+
+   After this function is called, READER must not ever again
+   be referenced directly.  It will be destroyed automatically
+   when the filtering casereader is destroyed. */
  struct casereader *
  casereader_create_filter_missing (struct casereader *reader,
                                    const struct variable **vars, size_t var_cnt,
@@ -224,16 +290,58 @@ casereader_create_filter_missing (struct casereader *reader,
    else
      return casereader_rename (reader);
  }
-\f
-\f
+
+/* Internal "include" function for missing value-filtering
+   casereader. */
  static bool
-casereader_counter_include (const struct ccase *c UNUSED, void *counter_)
+casereader_filter_missing_include (const struct ccase *c, void *cfm_)
  {
-  casenumber *counter = counter_;
-  ++*counter;
+  const struct casereader_filter_missing *cfm = cfm_;
+  size_t i;
+
+  for (i = 0; i < cfm->var_cnt; i++)
+    {
+      struct variable *var = cfm->vars[i];
+      const union value *value = case_data (c, var);
+      if (var_is_value_missing (var, value, cfm->class))
+        return false;
+    }
+  return true;
+}
+
+/* Internal "destroy" function for missing value-filtering
+   casereader. */
+static bool
+casereader_filter_missing_destroy (void *cfm_)
+{
+  struct casereader_filter_missing *cfm = cfm_;
+  free (cfm->vars);
+  free (cfm);
    return true;
  }
+\f
+/* Case-counting casereader. */
+
+static bool casereader_counter_include (const struct ccase *, void *);
  
+/* Creates and returns a new casereader that counts the number of
+   cases that have been read from it.  *COUNTER is initially set
+   to INITIAL_VALUE, then incremented by 1 each time a case is read.
+
+   Counting casereaders must be used very cautiously: if a
+   counting casereader is cloned or if the casereader_peek
+   function is used on it, then the counter's value can be higher
+   than expected because of the buffering that goes on behind the
+   scenes.
+
+   The counter is only incremented as cases are actually read
+   from the casereader.  In particular, if the casereader is
+   destroyed before all cases have been read from the casereader,
+   cases never read will not be included in the count.
+
+   After this function is called, READER must not ever again
+   be referenced directly.  It will be destroyed automatically
+   when the filtering casereader is destroyed. */
  struct casereader *
  casereader_create_counter (struct casereader *reader, casenumber *counter,
                             casenumber initial_value)
@@ -242,3 +350,12 @@ casereader_create_counter (struct casereader *reader, casenumber *counter,
    return casereader_create_filter_func (reader, casereader_counter_include,
                                          NULL, counter, NULL);
  }
+
+/* Internal "include" function for counting casereader. */
+static bool
+casereader_counter_include (const struct ccase *c UNUSED, void *counter_)
+{
+  casenumber *counter = counter_;
+  ++*counter;
+  return true;
+}
diff --git a/src/data/casereader-translator.c b/src/data/casereader-translator.c

index b409beeb1c166541b7d4f98469303f904b6ad40c..28b9c180a18d7b433a8e8e20ae364f41ee8690bd 100644 (file)
--- a/src/data/casereader-translator.c
+++ b/src/data/casereader-translator.c
@@ -27,9 +27,13 @@
  
  #include "xalloc.h"
  
+/* Casereader that applies a user-supplied function to translate
+   each case into another in an arbitrary fashion. */
+
+/* A translating casereader. */
  struct casereader_translator
    {
-    struct casereader *subreader;
+    struct casereader *subreader; /* Source of input cases. */
  
      void (*translate) (const struct ccase *input, struct ccase *output,
                         void *aux);
@@ -39,6 +43,18 @@ struct casereader_translator
  
  static struct casereader_class casereader_translator_class;
  
+/* Creates and returns a new casereader whose cases are produced
+   by reading from SUBREADER and passing through TRANSLATE, which
+   must create case OUTPUT, with OUTPUT_VALUE_CNT values, and
+   populate it based on INPUT and auxiliary data AUX.  TRANSLATE
+   must also destroy INPUT.
+
+   When the translating casereader is destroyed, DESTROY will be
+   called to allow any state maintained by TRANSLATE to be freed.
+
+   After this function is called, SUBREADER must not ever again
+   be referenced directly.  It will be destroyed automatically
+   when the translating casereader is destroyed. */
  struct casereader *
  casereader_create_translator (struct casereader *subreader,
                                size_t output_value_cnt,
@@ -62,6 +78,7 @@ casereader_create_translator (struct casereader *subreader,
    return reader;
  }
  
+/* Internal read function for translating casereader. */
  static bool
  casereader_translator_read (struct casereader *reader UNUSED,
                              void *ct_, struct ccase *c)
@@ -78,6 +95,7 @@ casereader_translator_read (struct casereader *reader UNUSED,
      return false;
  }
  
+/* Internal destroy function for translating casereader. */
  static void
  casereader_translator_destroy (struct casereader *reader UNUSED, void *ct_)
  {
@@ -87,6 +105,7 @@ casereader_translator_destroy (struct casereader *reader UNUSED, void *ct_)
    free (ct);
  }
  
+/* Casereader class for translating casereader. */
  static struct casereader_class casereader_translator_class =
    {
      casereader_translator_read,
diff --git a/src/data/datasheet.c b/src/data/datasheet.c

index 4a3eda32cc7700a030adc96b624f1bd444935aa3..1fc1e98b08aba877620733f90a12a225a0be4f36 100644 (file)
--- a/src/data/datasheet.c
+++ b/src/data/datasheet.c
@@ -229,20 +229,23 @@ datasheet_rename (struct datasheet *ds)
    return new;
  }
  
-/* Returns true if a I/O error has occurred while processing a
-   datasheet operation. */
+/* Returns true if datasheet DS is tainted.
+   A datasheet is tainted by an I/O error or by taint
+   propagation to the datasheet. */
  bool
  datasheet_error (const struct datasheet *ds)
  {
    return taint_is_tainted (ds->taint);
  }
  
+/* Marks datasheet DS tainted. */
  void
  datasheet_force_error (struct datasheet *ds)
  {
    taint_set_taint (ds->taint);
  }
  
+/* Returns datasheet DS's taint object. */
  const struct taint *
  datasheet_get_taint (const struct datasheet *ds)
  {
@@ -535,6 +538,7 @@ datasheet_make_reader (struct datasheet *ds)
    return reader;
  }
  
+/* "read" function for the datasheet random casereader. */
  static bool
  datasheet_reader_read (struct casereader *reader UNUSED, void *ds_,
                         casenumber case_idx, struct ccase *c)
@@ -551,6 +555,7 @@ datasheet_reader_read (struct casereader *reader UNUSED, void *ds_,
      }
  }
  
+/* "destroy" function for the datasheet random casereader. */
  static void
  datasheet_reader_destroy (struct casereader *reader UNUSED, void *ds_)
  {
@@ -558,6 +563,7 @@ datasheet_reader_destroy (struct casereader *reader UNUSED, void *ds_)
    datasheet_destroy (ds);
  }
  
+/* "advance" function for the datasheet random casereader. */
  static void
  datasheet_reader_advance (struct casereader *reader UNUSED, void *ds_,
                            casenumber case_cnt)
@@ -566,6 +572,7 @@ datasheet_reader_advance (struct casereader *reader UNUSED, void *ds_,
    datasheet_delete_rows (ds, 0, case_cnt);
  }
  
+/* Random casereader class for a datasheet. */
  static const struct casereader_random_class datasheet_reader_class =
    {
      datasheet_reader_read,
diff --git a/src/data/procedure.c b/src/data/procedure.c

index 55fe5a48ad8b8ea69d66f5234549af61444325df..0741b1cc944ce479b6214aba0933d624cb8611c8 100644 (file)
--- a/src/data/procedure.c
+++ b/src/data/procedure.c
@@ -86,9 +86,10 @@ struct dataset {
    /* Procedure data. */
    enum
      {
-      PROC_COMMITTED,
-      PROC_OPEN,
-      PROC_CLOSED
+      PROC_COMMITTED,           /* No procedure in progress. */
+      PROC_OPEN,                /* proc_open called, casereader still open. */
+      PROC_CLOSED               /* casereader from proc_open destroyed,
+                                   but proc_commit not yet called. */
      }
    proc_state;
    size_t cases_written;       /* Cases output so far. */
@@ -193,18 +194,15 @@ proc_open (struct dataset *ds)
                                         &proc_casereader_class, ds);
  }
  
+/* Returns true if a procedure is in progress, that is, if
+   proc_open has been called but proc_commit has not. */
  bool
  proc_is_open (const struct dataset *ds)
  {
    return ds->proc_state != PROC_COMMITTED;
  }
  
-/* Reads the next case from dataset DS, which must have been
-   opened for reading with proc_open.
-   Returns true if successful, in which case a pointer to the
-   case is stored in *C.
-   Return false at end of file or if a read error occurs.  In
-   this case a null pointer is stored in *C. */
+/* "read" function for procedure casereader. */
  static bool
  proc_casereader_read (struct casereader *reader UNUSED, void *ds_,
                        struct ccase *c)
@@ -227,8 +225,7 @@ proc_casereader_read (struct casereader *reader UNUSED, void *ds_,
        if (!casereader_read (ds->source, c))
          return false;
        case_resize (c, dict_get_next_value_idx (ds->dict));
-      caseinit_init_reinit_vars (ds->caseinit, c);
-      caseinit_init_left_vars (ds->caseinit, c);
+      caseinit_init_vars (ds->caseinit, c);
  
        /* Execute permanent transformations.  */
        case_nr = ds->cases_written + 1;
@@ -280,11 +277,7 @@ proc_casereader_read (struct casereader *reader UNUSED, void *ds_,
      }
  }
  
-/* Closes dataset DS for reading.
-   Returns true if successful, false if an I/O error occurred
-   while reading or closing the data set.
-   If DS has not been opened, returns true without doing
-   anything else. */
+/* "destroy" function for procedure casereader. */
  static void
  proc_casereader_destroy (struct casereader *reader, void *ds_)
  {
@@ -352,6 +345,7 @@ proc_commit (struct dataset *ds)
    return proc_cancel_all_transformations (ds) && ds->ok;
  }
  
+/* Casereader class for procedure execution. */
  static struct casereader_class proc_casereader_class =
    {
      proc_casereader_read,
author	Ben Pfaff <blp@gnu.org>
	Mon, 11 Jun 2007 04:03:19 +0000 (04:03 +0000)
committer	Ben Pfaff <blp@gnu.org>
	Mon, 11 Jun 2007 04:03:19 +0000 (04:03 +0000)
src/data/ChangeLog		patch \| blob \| history
src/data/case-ordering.c		patch \| blob \| history
src/data/case-ordering.h		patch \| blob \| history
src/data/casegrouper.c		patch \| blob \| history
src/data/casegrouper.h		patch \| blob \| history
src/data/caseinit.c		patch \| blob \| history
src/data/caseinit.h		patch \| blob \| history
src/data/casereader-filter.c		patch \| blob \| history
src/data/casereader-translator.c		patch \| blob \| history
src/data/datasheet.c		patch \| blob \| history
src/data/procedure.c		patch \| blob \| history