From 5da5a98055ad574120c3e3922af097411a0dcf3a Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 11 Jun 2007 04:03:19 +0000 Subject: [PATCH] Add lots of comments. Some minor substantive changes too: * casereader-filter.c (casereader_filter_destroy): Make sure to write all the remaining excluded cases to the casewriter, if any. * caseinit.c (init_list_destroy): Rewrite. (init_list_clear): Ditto. * casegrouper.c (casegrouper_get_next_group): Always set *reader to null when returning false. --- src/data/ChangeLog | 11 ++ src/data/case-ordering.c | 25 ++++ src/data/case-ordering.h | 16 ++- src/data/casegrouper.c | 94 ++++++++++--- src/data/casegrouper.h | 7 + src/data/caseinit.c | 65 +++++++-- src/data/caseinit.h | 10 +- src/data/casereader-filter.c | 231 +++++++++++++++++++++++-------- src/data/casereader-translator.c | 21 ++- src/data/datasheet.c | 11 +- src/data/procedure.c | 26 ++-- 11 files changed, 398 insertions(+), 119 deletions(-) diff --git a/src/data/ChangeLog b/src/data/ChangeLog index 913249f3..45564a95 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,14 @@ +2007-06-10 Ben Pfaff + + * casereader-filter.c (casereader_filter_destroy): Make sure to + write all the remaining excluded cases to the casewriter, if any. + + * caseinit.c (init_list_destroy): Rewrite. + (init_list_clear): Ditto. + + * casegrouper.c (casegrouper_get_next_group): Always set *reader + to null when returning false. + 2007-06-06 Ben Pfaff Actually implement the new procedure code and adapt all of its diff --git a/src/data/case-ordering.c b/src/data/case-ordering.c index 910a9ea6..e9e7095e 100644 --- a/src/data/case-ordering.c +++ b/src/data/case-ordering.c @@ -46,6 +46,10 @@ struct case_ordering size_t key_cnt; }; +/* Creates and returns a new case ordering for comparing cases + that represent dictionary DICT. The case ordering initially + contains no variables, so that all cases will compare as + equal. */ struct case_ordering * case_ordering_create (const struct dictionary *dict) { @@ -56,6 +60,7 @@ case_ordering_create (const struct dictionary *dict) return co; } +/* Creates and returns a clone of case ordering ORIG. */ struct case_ordering * case_ordering_clone (const struct case_ordering *orig) { @@ -66,6 +71,7 @@ case_ordering_clone (const struct case_ordering *orig) return co; } +/* Destroys case ordering CO. */ void case_ordering_destroy (struct case_ordering *co) { @@ -76,12 +82,17 @@ case_ordering_destroy (struct case_ordering *co) } } +/* Returns the number of `union value's in the cases that case + ordering CO compares (taken from the dictionary used to + construct it). */ size_t case_ordering_get_value_cnt (const struct case_ordering *co) { return co->value_cnt; } +/* Compares cases A and B given case ordering CO and returns a + strcmp()-type result. */ int case_ordering_compare_cases (const struct ccase *a, const struct ccase *b, const struct case_ordering *co) @@ -116,6 +127,9 @@ case_ordering_compare_cases (const struct ccase *a, const struct ccase *b, return 0; } +/* Adds VAR to case ordering CO as an additional sort key in sort + direction DIR. Returns true if successful, false if VAR was + already part of the ordering for CO. */ bool case_ordering_add_var (struct case_ordering *co, const struct variable *var, enum sort_direction dir) @@ -134,12 +148,18 @@ case_ordering_add_var (struct case_ordering *co, return true; } +/* Returns the number of variables used for ordering within + CO. */ size_t case_ordering_get_var_cnt (const struct case_ordering *co) { return co->key_cnt; } +/* Returns sort variable IDX within CO. An IDX of 0 returns the + primary sort key (the one added first), an IDX of 1 returns + the secondary sort key, and so on. IDX must be less than the + number of sort variables. */ const struct variable * case_ordering_get_var (const struct case_ordering *co, size_t idx) { @@ -147,6 +167,7 @@ case_ordering_get_var (const struct case_ordering *co, size_t idx) return co->keys[idx].var; } +/* Returns the sort direction for sort variable IDX within CO. */ enum sort_direction case_ordering_get_direction (const struct case_ordering *co, size_t idx) { @@ -154,6 +175,10 @@ case_ordering_get_direction (const struct case_ordering *co, size_t idx) return co->keys[idx].dir; } +/* Stores an array listing all of the variables used for sorting + within CO into *VARS and the number of variables into + *VAR_CNT. The caller is responsible for freeing *VARS when it + is no longer needed. */ void case_ordering_get_vars (const struct case_ordering *co, const struct variable ***vars, size_t *var_cnt) diff --git a/src/data/case-ordering.h b/src/data/case-ordering.h index f537829d..841d943f 100644 --- a/src/data/case-ordering.h +++ b/src/data/case-ordering.h @@ -16,6 +16,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +/* Sort order for comparing cases. */ + #ifndef DATA_CASE_ORDERING_H #define DATA_CASE_ORDERING_H 1 @@ -31,18 +33,24 @@ enum sort_direction SRT_DESCEND /* Z, Y, X, ..., C, B, A. */ }; +/* Creation and destruction. */ struct case_ordering *case_ordering_create (const struct dictionary *); struct case_ordering *case_ordering_clone (const struct case_ordering *); void case_ordering_destroy (struct case_ordering *); -size_t case_ordering_get_value_cnt (const struct case_ordering *); +/* Modification. */ +bool case_ordering_add_var (struct case_ordering *, + const struct variable *, enum sort_direction); + +/* Comparing cases. */ int case_ordering_compare_cases (const struct ccase *, const struct ccase *, const struct case_ordering *); -bool case_ordering_add_var (struct case_ordering *, - const struct variable *, enum sort_direction); +/* Inspection. */ +size_t case_ordering_get_value_cnt (const struct case_ordering *); size_t case_ordering_get_var_cnt (const struct case_ordering *); -const struct variable *case_ordering_get_var (const struct case_ordering *, size_t); +const struct variable *case_ordering_get_var (const struct case_ordering *, + size_t); enum sort_direction case_ordering_get_direction (const struct case_ordering *, size_t); void case_ordering_get_vars (const struct case_ordering *, diff --git a/src/data/casegrouper.c b/src/data/casegrouper.c index f2815106..8a392afb 100644 --- a/src/data/casegrouper.c +++ b/src/data/casegrouper.c @@ -30,16 +30,27 @@ #include "xalloc.h" +/* A casegrouper. */ struct casegrouper { - struct casereader *reader; - struct taint *taint; + struct casereader *reader; /* Source of input cases. */ + struct taint *taint; /* Error status for casegrouper. */ + /* Functions for grouping cases. */ bool (*same_group) (const struct ccase *, const struct ccase *, void *aux); void (*destroy) (void *aux); void *aux; }; +/* Creates and returns a new casegrouper that takes its input + from READER. SAME_GROUP is used to decide which cases are in + a group: it returns true if the pair of cases provided are in + the same group, false otherwise. DESTROY will be called when + the casegrouper is destroyed and should free any storage + needed by SAME_GROUP. + + SAME_GROUP may be a null pointer. If so, READER's entire + contents is considered to be a single group. */ struct casegrouper * casegrouper_create_func (struct casereader *reader, bool (*same_group) (const struct ccase *, @@ -57,13 +68,17 @@ casegrouper_create_func (struct casereader *reader, return grouper; } -/* FIXME: we really shouldn't need a temporary casewriter for the - common case where we read an entire group's data before going - on to the next. */ +/* Obtains the next group of cases from GROUPER. Returns true if + successful, false if no groups remain. If successful, *READER + is set to the casereader for the new group; otherwise, it is + set to NULL. */ bool casegrouper_get_next_group (struct casegrouper *grouper, struct casereader **reader) { + /* FIXME: we really shouldn't need a temporary casewriter for + the common case where we read an entire group's data before + going on to the next. */ if (grouper->same_group != NULL) { struct casewriter *writer; @@ -102,10 +117,17 @@ casegrouper_get_next_group (struct casegrouper *grouper, return true; } else - return false; + { + *reader = NULL; + return false; + } } } +/* Destroys GROUPER. Returns false if GROUPER's input casereader + or any state derived from it had become tainted, which means + that an I/O error or other serious error occurred in + processing data derived from GROUPER; otherwise, return true. */ bool casegrouper_destroy (struct casegrouper *grouper) { @@ -126,29 +148,26 @@ casegrouper_destroy (struct casegrouper *grouper) else return true; } + +/* Casegrouper based on equal values of variables from case to + case. */ +/* Casegrouper based on equal variables. */ struct casegrouper_vars { - const struct variable **vars; - size_t var_cnt; + const struct variable **vars; /* Variables to compare. */ + size_t var_cnt; /* Number of variables. */ }; -static bool -casegrouper_vars_same_group (const struct ccase *a, const struct ccase *b, - void *cv_) -{ - struct casegrouper_vars *cv = cv_; - return case_compare (a, b, cv->vars, cv->var_cnt) == 0; -} - -static void -casegrouper_vars_destroy (void *cv_) -{ - struct casegrouper_vars *cv = cv_; - free (cv->vars); - free (cv); -} +static bool casegrouper_vars_same_group (const struct ccase *, + const struct ccase *, + void *); +static void casegrouper_vars_destroy (void *); +/* Creates and returns a casegrouper that reads data from READER + and breaks it into contiguous groups of cases that have equal + values for the VAR_CNT variables in VARS. If VAR_CNT is 0, + then all the cases will be put in a single group. */ struct casegrouper * casegrouper_create_vars (struct casereader *reader, const struct variable *const *vars, @@ -168,6 +187,11 @@ casegrouper_create_vars (struct casereader *reader, return casegrouper_create_func (reader, NULL, NULL, NULL); } +/* Creates and returns a casegrouper that reads data from READER + and breaks it into contiguous groups of cases that have equal + values for the SPLIT FILE variables in DICT. If DICT has no + SPLIT FILE variables, then all the cases will be put into a + single group. */ struct casegrouper * casegrouper_create_splits (struct casereader *reader, const struct dictionary *dict) @@ -177,6 +201,11 @@ casegrouper_create_splits (struct casereader *reader, dict_get_split_cnt (dict)); } +/* Creates and returns a casegrouper that reads data from READER + and breaks it into contiguous groups of cases that have equal + values for the variables used for sorting in CO. If CO is + empty (contains no sort keys), then all the cases will be put + into a single group. */ struct casegrouper * casegrouper_create_case_ordering (struct casereader *reader, const struct case_ordering *co) @@ -191,3 +220,22 @@ casegrouper_create_case_ordering (struct casereader *reader, return grouper; } + +/* "same_group" function for an equal-variables casegrouper. */ +static bool +casegrouper_vars_same_group (const struct ccase *a, const struct ccase *b, + void *cv_) +{ + struct casegrouper_vars *cv = cv_; + return case_compare (a, b, cv->vars, cv->var_cnt) == 0; +} + +/* "destroy" for an equal-variables casegrouper. */ +static void +casegrouper_vars_destroy (void *cv_) +{ + struct casegrouper_vars *cv = cv_; + free (cv->vars); + free (cv); +} + diff --git a/src/data/casegrouper.h b/src/data/casegrouper.h index 3d9c6a89..5f686ff9 100644 --- a/src/data/casegrouper.h +++ b/src/data/casegrouper.h @@ -16,6 +16,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +/* Casegrouper. + + Breaks up the cases from a casereader into sets of contiguous + cases based on some criteria, e.g. sets of cases that all have + the same values for some subset of variables. Each set of + cases is made available to the client as a casereader. */ + #ifndef DATA_CASEGROUPER_H #define DATA_CASEGROUPER_H 1 diff --git a/src/data/caseinit.c b/src/data/caseinit.c index 564d3534..496ea9b2 100644 --- a/src/data/caseinit.c +++ b/src/data/caseinit.c @@ -33,25 +33,32 @@ #include #include "xalloc.h" + +/* Initializer list: a set of values to write to locations within + a case. */ +/* Binds a value with a place to put it. */ struct init_value { union value value; size_t case_index; }; +/* A set of values to initialize in a case. */ struct init_list { struct init_value *values; size_t cnt; }; +/* A bitmap of the "left" status of variables. */ enum leave_class { - LEAVE_REINIT = 0x001, - LEAVE_LEFT = 0x002 + LEAVE_REINIT = 0x001, /* Reinitalize for every case. */ + LEAVE_LEFT = 0x002 /* Keep the value from one case to the next. */ }; +/* Initializes LIST as an empty initializer list. */ static void init_list_create (struct init_list *list) { @@ -59,19 +66,23 @@ init_list_create (struct init_list *list) list->cnt = 0; } +/* Frees the storage associated with LIST. */ static void -init_list_clear (struct init_list *list) +init_list_destroy (struct init_list *list) { free (list->values); - init_list_create (list); } +/* Clears LIST, making it an empty list. */ static void -init_list_destroy (struct init_list *list) +init_list_clear (struct init_list *list) { - init_list_clear (list); + init_list_destroy (list); + init_list_create (list); } +/* Compares `struct init_value's A and B by case_index and + returns a strcmp()-type result. */ static int compare_init_values (const void *a_, const void *b_, const void *aux UNUSED) { @@ -81,6 +92,7 @@ compare_init_values (const void *a_, const void *b_, const void *aux UNUSED) return a->case_index < b->case_index ? -1 : a->case_index > b->case_index; } +/* Returns true if LIST includes CASE_INDEX, false otherwise. */ static bool init_list_includes (const struct init_list *list, size_t case_index) { @@ -90,6 +102,9 @@ init_list_includes (const struct init_list *list, size_t case_index) &value, compare_init_values, NULL) != NULL; } +/* Marks LIST to initialize the `union value's for the variables + in dictionary D that both (1) fall in the leave class or + classes designated by INCLUDE and (2) are not in EXCLUDE. */ static void init_list_mark (struct init_list *list, const struct init_list *exclude, enum leave_class include, const struct dictionary *d) @@ -133,9 +148,10 @@ init_list_mark (struct init_list *list, const struct init_list *exclude, /* Drop duplicates. */ list->cnt = sort_unique (list->values, list->cnt, sizeof *list->values, compare_init_values, NULL); - } +/* Initializes data in case C to the values in the initializer + LIST. */ static void init_list_init (const struct init_list *list, struct ccase *c) { @@ -148,6 +164,8 @@ init_list_init (const struct init_list *list, struct ccase *c) } } +/* Updates the values in the initializer LIST from the data in + case C. */ static void init_list_update (const struct init_list *list, const struct ccase *c) { @@ -159,14 +177,26 @@ init_list_update (const struct init_list *list, const struct ccase *c) value->value = *case_data_idx (c, value->case_index); } } - + +/* A case initializer. */ struct caseinit { + /* Values that do not need to be initialized by the + procedure, because they are initialized by the data + source. */ struct init_list preinited_values; + + /* Values that need to be initialized to SYSMIS or spaces in + each case. */ struct init_list reinit_values; + + /* Values that need to be initialized to 0 or spaces in the + first case and thereafter retain their values from case to + case. */ struct init_list left_values; }; +/* Creates and returns a new case initializer. */ struct caseinit * caseinit_create (void) { @@ -177,6 +207,7 @@ caseinit_create (void) return ci; } +/* Clears the contents of case initializer CI. */ void caseinit_clear (struct caseinit *ci) { @@ -185,6 +216,7 @@ caseinit_clear (struct caseinit *ci) init_list_clear (&ci->left_values); } +/* Destroys case initializer CI. */ void caseinit_destroy (struct caseinit *ci) { @@ -197,12 +229,19 @@ caseinit_destroy (struct caseinit *ci) } } +/* Marks the variables from dictionary D in CI as being + initialized by the data source, so that the case initializer + need not initialize them itself. */ void caseinit_mark_as_preinited (struct caseinit *ci, const struct dictionary *d) { init_list_mark (&ci->preinited_values, NULL, LEAVE_REINIT | LEAVE_LEFT, d); } +/* Marks in CI the variables from dictionary D, except for any + variables that were already marked with + caseinit_mark_as_preinited, as needing initialization + according to their leave status. */ void caseinit_mark_for_init (struct caseinit *ci, const struct dictionary *d) { @@ -210,17 +249,17 @@ caseinit_mark_for_init (struct caseinit *ci, const struct dictionary *d) init_list_mark (&ci->left_values, &ci->preinited_values, LEAVE_LEFT, d); } +/* Initializes variables in C as described by CI. */ void -caseinit_init_reinit_vars (const struct caseinit *ci, struct ccase *c) +caseinit_init_vars (const struct caseinit *ci, struct ccase *c) { init_list_init (&ci->reinit_values, c); -} - -void caseinit_init_left_vars (const struct caseinit *ci, struct ccase *c) -{ init_list_init (&ci->left_values, c); } +/* Updates the left vars in CI from the data in C, so that the + next call to caseinit_init_vars will store those values in the + next case. */ void caseinit_update_left_vars (struct caseinit *ci, const struct ccase *c) { diff --git a/src/data/caseinit.h b/src/data/caseinit.h index 7c7f1c69..3c849805 100644 --- a/src/data/caseinit.h +++ b/src/data/caseinit.h @@ -26,7 +26,9 @@ save the values of "left" variables to copy into the next case read from the active file. - The caseinit code helps with this. */ + The caseinit data structure provides a little help for + tracking what data to initialize or to copy from case to + case. */ #ifndef DATA_CASEINIT_H #define DATA_CASEINIT_H 1 @@ -34,15 +36,17 @@ struct dictionary; struct ccase; +/* Creation and destruction. */ struct caseinit *caseinit_create (void); void caseinit_clear (struct caseinit *); void caseinit_destroy (struct caseinit *); +/* Track data to be initialized. */ void caseinit_mark_as_preinited (struct caseinit *, const struct dictionary *); void caseinit_mark_for_init (struct caseinit *, const struct dictionary *); -void caseinit_init_reinit_vars (const struct caseinit *, struct ccase *); -void caseinit_init_left_vars (const struct caseinit *, struct ccase *); +/* Initialize data and copy data from case to case. */ +void caseinit_init_vars (const struct caseinit *, struct ccase *); void caseinit_update_left_vars (struct caseinit *, const struct ccase *); #endif /* data/caseinit.h */ diff --git a/src/data/casereader-filter.c b/src/data/casereader-filter.c index 36ff62ac..afb72cca 100644 --- a/src/data/casereader-filter.c +++ b/src/data/casereader-filter.c @@ -34,17 +34,36 @@ #include "gettext.h" #define _(msgid) gettext (msgid) +/* A casereader that filters data coming from another + casereader. */ struct casereader_filter { - struct casereader *subreader; + struct casereader *subreader; /* The reader to filter. */ bool (*include) (const struct ccase *, void *aux); bool (*destroy) (void *aux); void *aux; - struct casewriter *exclude; + struct casewriter *exclude; /* Writer that gets filtered cases, or NULL. */ }; static struct casereader_class casereader_filter_class; +/* Creates and returns a casereader whose content is a filtered + version of the data in SUBREADER. Only the cases for which + INCLUDE returns true will appear in the returned casereader, + in the original order. + + If EXCLUDE is non-null, then cases for which INCLUDE returns + false are written to EXCLUDE. These cases will not + necessarily be fully written to EXCLUDE until the filtering casereader's + cases have been fully read or, if that never occurs, until the + filtering casereader is destroyed. + + When the filtering casereader is destroyed, DESTROY will be + called to allow any state maintained by INCLUDE to be freed. + + After this function is called, SUBREADER must not ever again + be referenced directly. It will be destroyed automatically + when the filtering casereader is destroyed. */ struct casereader * casereader_create_filter_func (struct casereader *subreader, bool (*include) (const struct ccase *, @@ -68,6 +87,7 @@ casereader_create_filter_func (struct casereader *subreader, return reader; } +/* Internal read function for filtering casereader. */ static bool casereader_filter_read (struct casereader *reader UNUSED, void *filter_, struct ccase *c) @@ -87,16 +107,31 @@ casereader_filter_read (struct casereader *reader UNUSED, void *filter_, } } +/* Internal destruction function for filtering casereader. */ static void casereader_filter_destroy (struct casereader *reader, void *filter_) { struct casereader_filter *filter = filter_; + + /* Make sure we've written everything to the excluded cases + casewriter, if there is one. */ + if (filter->exclude != NULL) + { + struct ccase c; + while (casereader_read (filter->subreader, &c)) + if (filter->include (&c, filter->aux)) + case_destroy (&c); + else + casewriter_write (filter->exclude, &c); + } + casereader_destroy (filter->subreader); if (filter->destroy != NULL && !filter->destroy (filter->aux)) casereader_force_error (reader); free (filter); } +/* Filtering casereader class. */ static struct casereader_class casereader_filter_class = { casereader_filter_read, @@ -111,41 +146,42 @@ static struct casereader_class casereader_filter_class = NULL, }; + +/* Casereader for filtering valid weights. */ + +/* Weight-filtering data. */ struct casereader_filter_weight { - const struct variable *weight_var; - bool *warn_on_invalid; - bool local_warn_on_invalid; + const struct variable *weight_var; /* Weight variable. */ + bool *warn_on_invalid; /* Have we already issued an error? */ + bool local_warn_on_invalid; /* warn_on_invalid might point here. */ }; -static bool -casereader_filter_weight_include (const struct ccase *c, void *cfw_) -{ - struct casereader_filter_weight *cfw = cfw_; - double value = case_num (c, cfw->weight_var); - if (value >= 0.0 && !var_is_num_missing (cfw->weight_var, value, MV_ANY)) - return true; - else - { - if (*cfw->warn_on_invalid) - { - msg (SW, _("At least one case in the data read had a weight value " - "that was user-missing, system-missing, zero, or " - "negative. These case(s) were ignored.")); - *cfw->warn_on_invalid = false; - } - return false; - } -} +static bool casereader_filter_weight_include (const struct ccase *, void *); +static bool casereader_filter_weight_destroy (void *); -static bool -casereader_filter_weight_destroy (void *cfw_) -{ - struct casereader_filter_weight *cfw = cfw_; - free (cfw); - return true; -} +/* Creates and returns a casereader that filters cases from + READER by valid weights, that is, any cases with user- or + system-missing, zero, or negative weights are dropped. The + weight variable's information is taken from DICT. If DICT + does not have a weight variable, then no cases are filtered + out. + + When a case with an invalid weight is encountered, + *WARN_ON_INVALID is checked. If it is true, then an error + message is issued and *WARN_ON_INVALID is set false. If + WARN_ON_INVALID is a null pointer, then an internal bool that + is initially true is used instead of a caller-supplied bool. + If EXCLUDE is non-null, then dropped cases are written to + EXCLUDE. These cases will not necessarily be fully written to + EXCLUDE until the filtering casereader's cases have been fully + read or, if that never occurs, until the filtering casereader + is destroyed. + + After this function is called, READER must not ever again be + referenced directly. It will be destroyed automatically when + the filtering casereader is destroyed. */ struct casereader * casereader_create_filter_weight (struct casereader *reader, const struct dictionary *dict, @@ -170,39 +206,69 @@ casereader_create_filter_weight (struct casereader *reader, reader = casereader_rename (reader); return reader; } - -struct casereader_filter_missing - { - struct variable **vars; - size_t var_cnt; - enum mv_class class; - }; +/* Internal "include" function for weight-filtering + casereader. */ static bool -casereader_filter_missing_include (const struct ccase *c, void *cfm_) +casereader_filter_weight_include (const struct ccase *c, void *cfw_) { - const struct casereader_filter_missing *cfm = cfm_; - size_t i; - - for (i = 0; i < cfm->var_cnt; i++) + struct casereader_filter_weight *cfw = cfw_; + double value = case_num (c, cfw->weight_var); + if (value >= 0.0 && !var_is_num_missing (cfw->weight_var, value, MV_ANY)) + return true; + else { - struct variable *var = cfm->vars[i]; - const union value *value = case_data (c, var); - if (var_is_value_missing (var, value, cfm->class)) - return false; + if (*cfw->warn_on_invalid) + { + msg (SW, _("At least one case in the data read had a weight value " + "that was user-missing, system-missing, zero, or " + "negative. These case(s) were ignored.")); + *cfw->warn_on_invalid = false; + } + return false; } - return true; } +/* Internal "destroy" function for weight-filtering + casereader. */ static bool -casereader_filter_missing_destroy (void *cfm_) +casereader_filter_weight_destroy (void *cfw_) { - struct casereader_filter_missing *cfm = cfm_; - free (cfm->vars); - free (cfm); + struct casereader_filter_weight *cfw = cfw_; + free (cfw); return true; } + +/* Casereader for filtering missing values. */ + +/* Missing-value filtering data. */ +struct casereader_filter_missing + { + struct variable **vars; /* Variables whose values to filter. */ + size_t var_cnt; /* Number of variables. */ + enum mv_class class; /* Types of missing values to filter. */ + }; + +static bool casereader_filter_missing_include (const struct ccase *, void *); +static bool casereader_filter_missing_destroy (void *); + +/* Creates and returns a casereader that filters out cases from + READER that have a missing value in the given CLASS for any of + the VAR_CNT variables in VARS. Only cases that have + non-missing values for all of these variables are passed + through. + Ownership of VARS is retained by the caller. + + If EXCLUDE is non-null, then dropped cases are written to + EXCLUDE. These cases will not necessarily be fully written to + EXCLUDE until the filtering casereader's cases have been fully + read or, if that never occurs, until the filtering casereader + is destroyed. + + After this function is called, READER must not ever again + be referenced directly. It will be destroyed automatically + when the filtering casereader is destroyed. */ struct casereader * casereader_create_filter_missing (struct casereader *reader, const struct variable **vars, size_t var_cnt, @@ -224,16 +290,58 @@ casereader_create_filter_missing (struct casereader *reader, else return casereader_rename (reader); } - - + +/* Internal "include" function for missing value-filtering + casereader. */ static bool -casereader_counter_include (const struct ccase *c UNUSED, void *counter_) +casereader_filter_missing_include (const struct ccase *c, void *cfm_) { - casenumber *counter = counter_; - ++*counter; + const struct casereader_filter_missing *cfm = cfm_; + size_t i; + + for (i = 0; i < cfm->var_cnt; i++) + { + struct variable *var = cfm->vars[i]; + const union value *value = case_data (c, var); + if (var_is_value_missing (var, value, cfm->class)) + return false; + } + return true; +} + +/* Internal "destroy" function for missing value-filtering + casereader. */ +static bool +casereader_filter_missing_destroy (void *cfm_) +{ + struct casereader_filter_missing *cfm = cfm_; + free (cfm->vars); + free (cfm); return true; } + +/* Case-counting casereader. */ + +static bool casereader_counter_include (const struct ccase *, void *); +/* Creates and returns a new casereader that counts the number of + cases that have been read from it. *COUNTER is initially set + to INITIAL_VALUE, then incremented by 1 each time a case is read. + + Counting casereaders must be used very cautiously: if a + counting casereader is cloned or if the casereader_peek + function is used on it, then the counter's value can be higher + than expected because of the buffering that goes on behind the + scenes. + + The counter is only incremented as cases are actually read + from the casereader. In particular, if the casereader is + destroyed before all cases have been read from the casereader, + cases never read will not be included in the count. + + After this function is called, READER must not ever again + be referenced directly. It will be destroyed automatically + when the filtering casereader is destroyed. */ struct casereader * casereader_create_counter (struct casereader *reader, casenumber *counter, casenumber initial_value) @@ -242,3 +350,12 @@ casereader_create_counter (struct casereader *reader, casenumber *counter, return casereader_create_filter_func (reader, casereader_counter_include, NULL, counter, NULL); } + +/* Internal "include" function for counting casereader. */ +static bool +casereader_counter_include (const struct ccase *c UNUSED, void *counter_) +{ + casenumber *counter = counter_; + ++*counter; + return true; +} diff --git a/src/data/casereader-translator.c b/src/data/casereader-translator.c index b409beeb..28b9c180 100644 --- a/src/data/casereader-translator.c +++ b/src/data/casereader-translator.c @@ -27,9 +27,13 @@ #include "xalloc.h" +/* Casereader that applies a user-supplied function to translate + each case into another in an arbitrary fashion. */ + +/* A translating casereader. */ struct casereader_translator { - struct casereader *subreader; + struct casereader *subreader; /* Source of input cases. */ void (*translate) (const struct ccase *input, struct ccase *output, void *aux); @@ -39,6 +43,18 @@ struct casereader_translator static struct casereader_class casereader_translator_class; +/* Creates and returns a new casereader whose cases are produced + by reading from SUBREADER and passing through TRANSLATE, which + must create case OUTPUT, with OUTPUT_VALUE_CNT values, and + populate it based on INPUT and auxiliary data AUX. TRANSLATE + must also destroy INPUT. + + When the translating casereader is destroyed, DESTROY will be + called to allow any state maintained by TRANSLATE to be freed. + + After this function is called, SUBREADER must not ever again + be referenced directly. It will be destroyed automatically + when the translating casereader is destroyed. */ struct casereader * casereader_create_translator (struct casereader *subreader, size_t output_value_cnt, @@ -62,6 +78,7 @@ casereader_create_translator (struct casereader *subreader, return reader; } +/* Internal read function for translating casereader. */ static bool casereader_translator_read (struct casereader *reader UNUSED, void *ct_, struct ccase *c) @@ -78,6 +95,7 @@ casereader_translator_read (struct casereader *reader UNUSED, return false; } +/* Internal destroy function for translating casereader. */ static void casereader_translator_destroy (struct casereader *reader UNUSED, void *ct_) { @@ -87,6 +105,7 @@ casereader_translator_destroy (struct casereader *reader UNUSED, void *ct_) free (ct); } +/* Casereader class for translating casereader. */ static struct casereader_class casereader_translator_class = { casereader_translator_read, diff --git a/src/data/datasheet.c b/src/data/datasheet.c index 4a3eda32..1fc1e98b 100644 --- a/src/data/datasheet.c +++ b/src/data/datasheet.c @@ -229,20 +229,23 @@ datasheet_rename (struct datasheet *ds) return new; } -/* Returns true if a I/O error has occurred while processing a - datasheet operation. */ +/* Returns true if datasheet DS is tainted. + A datasheet is tainted by an I/O error or by taint + propagation to the datasheet. */ bool datasheet_error (const struct datasheet *ds) { return taint_is_tainted (ds->taint); } +/* Marks datasheet DS tainted. */ void datasheet_force_error (struct datasheet *ds) { taint_set_taint (ds->taint); } +/* Returns datasheet DS's taint object. */ const struct taint * datasheet_get_taint (const struct datasheet *ds) { @@ -535,6 +538,7 @@ datasheet_make_reader (struct datasheet *ds) return reader; } +/* "read" function for the datasheet random casereader. */ static bool datasheet_reader_read (struct casereader *reader UNUSED, void *ds_, casenumber case_idx, struct ccase *c) @@ -551,6 +555,7 @@ datasheet_reader_read (struct casereader *reader UNUSED, void *ds_, } } +/* "destroy" function for the datasheet random casereader. */ static void datasheet_reader_destroy (struct casereader *reader UNUSED, void *ds_) { @@ -558,6 +563,7 @@ datasheet_reader_destroy (struct casereader *reader UNUSED, void *ds_) datasheet_destroy (ds); } +/* "advance" function for the datasheet random casereader. */ static void datasheet_reader_advance (struct casereader *reader UNUSED, void *ds_, casenumber case_cnt) @@ -566,6 +572,7 @@ datasheet_reader_advance (struct casereader *reader UNUSED, void *ds_, datasheet_delete_rows (ds, 0, case_cnt); } +/* Random casereader class for a datasheet. */ static const struct casereader_random_class datasheet_reader_class = { datasheet_reader_read, diff --git a/src/data/procedure.c b/src/data/procedure.c index 55fe5a48..0741b1cc 100644 --- a/src/data/procedure.c +++ b/src/data/procedure.c @@ -86,9 +86,10 @@ struct dataset { /* Procedure data. */ enum { - PROC_COMMITTED, - PROC_OPEN, - PROC_CLOSED + PROC_COMMITTED, /* No procedure in progress. */ + PROC_OPEN, /* proc_open called, casereader still open. */ + PROC_CLOSED /* casereader from proc_open destroyed, + but proc_commit not yet called. */ } proc_state; size_t cases_written; /* Cases output so far. */ @@ -193,18 +194,15 @@ proc_open (struct dataset *ds) &proc_casereader_class, ds); } +/* Returns true if a procedure is in progress, that is, if + proc_open has been called but proc_commit has not. */ bool proc_is_open (const struct dataset *ds) { return ds->proc_state != PROC_COMMITTED; } -/* Reads the next case from dataset DS, which must have been - opened for reading with proc_open. - Returns true if successful, in which case a pointer to the - case is stored in *C. - Return false at end of file or if a read error occurs. In - this case a null pointer is stored in *C. */ +/* "read" function for procedure casereader. */ static bool proc_casereader_read (struct casereader *reader UNUSED, void *ds_, struct ccase *c) @@ -227,8 +225,7 @@ proc_casereader_read (struct casereader *reader UNUSED, void *ds_, if (!casereader_read (ds->source, c)) return false; case_resize (c, dict_get_next_value_idx (ds->dict)); - caseinit_init_reinit_vars (ds->caseinit, c); - caseinit_init_left_vars (ds->caseinit, c); + caseinit_init_vars (ds->caseinit, c); /* Execute permanent transformations. */ case_nr = ds->cases_written + 1; @@ -280,11 +277,7 @@ proc_casereader_read (struct casereader *reader UNUSED, void *ds_, } } -/* Closes dataset DS for reading. - Returns true if successful, false if an I/O error occurred - while reading or closing the data set. - If DS has not been opened, returns true without doing - anything else. */ +/* "destroy" function for procedure casereader. */ static void proc_casereader_destroy (struct casereader *reader, void *ds_) { @@ -352,6 +345,7 @@ proc_commit (struct dataset *ds) return proc_cancel_all_transformations (ds) && ds->ok; } +/* Casereader class for procedure execution. */ static struct casereader_class proc_casereader_class = { proc_casereader_read, -- 2.30.2