From: John Darrington Date: Sun, 5 Nov 2006 00:35:43 +0000 (+0000) Subject: Added casefilter structure to assist with missing values. Changed T-TEST X-Git-Tag: v0.6.0~702 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f43378497b8400e9c22a3485c534693dc1bc9554;p=pspp-builds.git Added casefilter structure to assist with missing values. Changed T-TEST and ONEWAY commands to use it. --- diff --git a/src/data/ChangeLog b/src/data/ChangeLog index 54368b4a..60076a0f 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,9 @@ +Sun Nov 5 08:29:34 WST 2006 John Darrington + + * casefilter.c casefilter.h (new files), casefile.c casefile.h + casefile-private.h: Added casefilter to assist commands with missing + values. + Sat Nov 4 11:47:09 2006 Ben Pfaff Implement SET ERRORS, SHOW ERRORS. Fixes bug #17609. diff --git a/src/data/automake.mk b/src/data/automake.mk index 9cbe39e6..c01f9dd1 100644 --- a/src/data/automake.mk +++ b/src/data/automake.mk @@ -13,6 +13,8 @@ src_data_libdata_a_SOURCES = \ src/data/case-source.c \ src/data/case-source.h \ src/data/case.c \ + src/data/casefilter.c \ + src/data/casefilter.h \ src/data/casefile.h \ src/data/casefile.c \ src/data/casefile-private.h \ diff --git a/src/data/casefile-private.h b/src/data/casefile-private.h index c5902765..eddf6aba 100644 --- a/src/data/casefile-private.h +++ b/src/data/casefile-private.h @@ -27,6 +27,7 @@ struct ccase; struct casereader; struct casefile; +struct casefilter; struct class_casefile { @@ -78,6 +79,8 @@ struct casereader struct casefile *cf; /* The casefile to which this reader belongs */ struct ll ll; /* Element in the casefile's list of readers */ + + struct casefilter *filter; /* The filter to be used */ bool destructive; /* True if this reader is destructive */ }; diff --git a/src/data/casefile.c b/src/data/casefile.c index e6c6fa67..ba450633 100644 --- a/src/data/casefile.c +++ b/src/data/casefile.c @@ -26,6 +26,7 @@ #include "case.h" #include "casefile.h" #include "casefile-private.h" +#include "casefilter.h" struct ccase; @@ -116,6 +117,24 @@ casereader_cnum(const struct casereader *r) return r->class->cnum(r); } +static struct ccase * +get_next_case(struct casereader *reader) +{ + struct ccase *read_case = NULL; + struct casefile *cf = casereader_get_casefile (reader); + + do + { + if ( casefile_error (cf) ) + return NULL; + + read_case = reader->class->get_next_case (reader); + } + while ( read_case && reader->filter + && casefilter_skip_case (reader->filter, read_case) ) ; + + return read_case; +} /* Reads a copy of the next case from READER into C. Caller is responsible for destroying C. @@ -123,16 +142,11 @@ casereader_cnum(const struct casereader *r) bool casereader_read (struct casereader *reader, struct ccase *c) { - struct casefile *cf = casereader_get_casefile (reader); - - struct ccase *read_case = NULL; + struct ccase * read_case = get_next_case (reader) ; - if ( casefile_error (cf) ) + if ( NULL == read_case ) return false; - read_case = reader->class->get_next_case (reader); - if ( ! read_case ) return false; - case_clone (c, read_case ); return true; @@ -144,20 +158,18 @@ casereader_read (struct casereader *reader, struct ccase *c) Returns true if successful, false at end of file or on I/O error. */ bool -casereader_read_xfer (struct casereader *ffr, struct ccase *c) +casereader_read_xfer (struct casereader *reader, struct ccase *c) { - struct casefile *cf = casereader_get_casefile (ffr); + struct casefile *cf = casereader_get_casefile (reader); + struct ccase *read_case ; + case_nullify (c); - struct ccase *read_case = NULL ; + read_case = get_next_case (reader) ; - case_nullify (c); - if ( casefile_error (cf) ) + if ( NULL == read_case ) return false; - read_case = ffr->class->get_next_case (ffr); - if ( ! read_case ) return false; - - if ( ffr->destructive && casefile_in_core (cf) ) + if ( reader->destructive && casefile_in_core (cf) ) case_move (c, read_case); else case_clone (c, read_case); @@ -178,10 +190,16 @@ casereader_destroy (struct casereader *r) struct casereader * casereader_clone(const struct casereader *r) { + struct casereader *r2; + /* Would we ever want to clone a destructive reader ?? */ assert ( ! r->destructive ) ; - return r->class->clone (r); + r2 = r->class->clone (r); + + r2->filter = r->filter; + + return r2; } /* Destroys casefile CF. */ @@ -224,10 +242,11 @@ casefile_get_value_cnt (const struct casefile *cf) /* Creates and returns a casereader for CF. A casereader can be used to sequentially read the cases in a casefile. */ struct casereader * -casefile_get_reader (const struct casefile *cf) +casefile_get_reader (const struct casefile *cf, struct casefilter *filter) { struct casereader *r = cf->class->get_reader(cf); r->cf = (struct casefile *) cf; + r->filter = filter; assert (r->class); diff --git a/src/data/casefile.h b/src/data/casefile.h index 9c985001..2a4cfa85 100644 --- a/src/data/casefile.h +++ b/src/data/casefile.h @@ -28,7 +28,7 @@ struct ccase; struct casereader; struct casefile; - +struct casefilter; /* Casereader functions */ @@ -55,7 +55,7 @@ unsigned long casefile_get_case_cnt (const struct casefile *cf); size_t casefile_get_value_cnt (const struct casefile *cf); -struct casereader *casefile_get_reader (const struct casefile *cf); +struct casereader *casefile_get_reader (const struct casefile *cf, struct casefilter *filter); struct casereader *casefile_get_destructive_reader (struct casefile *cf); diff --git a/src/data/casefilter.c b/src/data/casefilter.c new file mode 100644 index 00000000..d88af50a --- /dev/null +++ b/src/data/casefilter.c @@ -0,0 +1,122 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2006 Free Software Foundation, Inc. + Written by John Darrington + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#include +#include +#include +#include "casefilter.h" +#include + +#include +#include +#include +#include + +struct casefilter + { + bool exclude_user_missing; + + const struct variable **vars; + int n_vars; + }; + + +/* Returns true iff the entire case should be skipped */ +bool +casefilter_skip_case (const struct casefilter *filter, const struct ccase *c) +{ + int i; + + for (i = 0; i < filter->n_vars; ++i) + { + if ( casefilter_variable_missing (filter, c, filter->vars[i])) + return true; + } + + return false; +} + +/* Returns true iff the variable V in case C is missing */ +bool +casefilter_variable_missing (const struct casefilter *filter, + const struct ccase *c, + const struct variable *var) +{ + const union value *val = case_data (c, var->fv) ; + + if ( val->f == SYSMIS ) + return true; + + if ( filter->exclude_user_missing && + mv_is_value_user_missing (&var->miss, val) ) + return true; + + return false; +} + +/* Create a new casefilter. + If EXCL is true, then the filter user missing values to be missing, + otherwise they are considered at their face value. + VARS is an array of variables which if *any* of them are missing. + N_VARS is the size of VARS. + */ +struct casefilter * +casefilter_create (bool excl, struct variable **vars, int n_vars) +{ + int i; + struct casefilter * filter = xmalloc (sizeof (*filter)) ; + + filter->exclude_user_missing = excl ; + filter->vars = xnmalloc (n_vars, sizeof (*filter->vars) ); + + for ( i = 0 ; i < n_vars ; ++i ) + filter->vars[i] = vars[i]; + + filter->n_vars = n_vars ; + + return filter ; +} + + +/* Add the variables in VARS to the list of variables for which the + filter considers. N_VARS is the size of VARS */ +void +casefilter_add_variables (struct casefilter *filter, + struct variable **vars, int n_vars) +{ + int i; + + filter->vars = xnrealloc (filter->vars, filter->n_vars + n_vars, + sizeof (*filter->vars) ); + + for ( i = 0 ; i < n_vars ; ++i ) + filter->vars[i + filter->n_vars] = vars[i]; + + filter->n_vars += n_vars ; +} + +/* Destroy the filter FILTER */ +void +casefilter_destroy (struct casefilter *filter) +{ + free (filter->vars); + free (filter); +} + + diff --git a/src/data/casefilter.h b/src/data/casefilter.h new file mode 100644 index 00000000..00fac3bb --- /dev/null +++ b/src/data/casefilter.h @@ -0,0 +1,52 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2006 Free Software Foundation, Inc. + Written by John Darrington + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#if !casefilter_h +#define casefilter_h 1 + +#include + +struct ccase; +struct casefilter; +struct variable ; + +/* Create a new casefilter. + If EXCL is true, then the filter user missing values to be missing, + otherwise they are considered at their face value. + VARS is an array of variables which if *any* of them are missing. + N_VARS is the size of VARS. + */ +struct casefilter * casefilter_create (bool, struct variable **, int); + +/* Add the variables in VARS to the list of variables for which the + filter considers. N_VARS is the size of VARS */ +void casefilter_add_variables (struct casefilter *, struct variable **, int); + +/* Destroy the filter FILTER */ +void casefilter_destroy (struct casefilter *); + +/* Returns true iff the entire case should be skipped */ +bool casefilter_skip_case (const struct casefilter *, const struct ccase *); + +/* Returns true iff the variable V in case C is missing */ +bool casefilter_variable_missing (const struct casefilter *f, + const struct ccase *c, + const struct variable *v); + +#endif diff --git a/src/data/scratch-reader.c b/src/data/scratch-reader.c index cb94e245..8cce42f2 100644 --- a/src/data/scratch-reader.c +++ b/src/data/scratch-reader.c @@ -71,7 +71,7 @@ scratch_reader_open (struct file_handle *fh, struct dictionary **dict) *dict = dict_clone (sh->dictionary); reader = xmalloc (sizeof *reader); reader->fh = fh; - reader->casereader = casefile_get_reader (sh->casefile); + reader->casereader = casefile_get_reader (sh->casefile, NULL); return reader; } diff --git a/src/data/storage-stream.c b/src/data/storage-stream.c index 74769023..912a1039 100644 --- a/src/data/storage-stream.c +++ b/src/data/storage-stream.c @@ -123,7 +123,7 @@ storage_source_read (struct case_source *source, struct casereader *reader; bool ok = true; - for (reader = casefile_get_reader (info->casefile); + for (reader = casefile_get_reader (info->casefile, NULL); ok && casereader_read (reader, &casefile_case); case_destroy (&casefile_case)) { diff --git a/src/language/stats/ChangeLog b/src/language/stats/ChangeLog index e891f6d5..26a0472e 100644 --- a/src/language/stats/ChangeLog +++ b/src/language/stats/ChangeLog @@ -1,3 +1,7 @@ +Sun Nov 5 08:31:42 WST 2006 John Darrington + + * t-test.q, oneway.q: Changed to use the new casefilter structure. + Sat Oct 14 16:52:28 2006 Ben Pfaff * rank.q: (rank_sorted_casefile) Add some missing case_destroy() diff --git a/src/language/stats/descriptives.c b/src/language/stats/descriptives.c index f07baece..de0e417c 100644 --- a/src/language/stats/descriptives.c +++ b/src/language/stats/descriptives.c @@ -730,7 +730,7 @@ calc_descriptives (const struct ccase *first, dsc->valid = 0.; /* First pass to handle most of the work. */ - for (reader = casefile_get_reader (cf); + for (reader = casefile_get_reader (cf, NULL); casereader_read (reader, &c); case_destroy (&c)) { @@ -775,7 +775,7 @@ calc_descriptives (const struct ccase *first, /* Second pass for higher-order moments. */ if (dsc->max_moment > MOMENT_MEAN) { - for (reader = casefile_get_reader (cf); + for (reader = casefile_get_reader (cf, NULL); casereader_read (reader, &c); case_destroy (&c)) { diff --git a/src/language/stats/examine.q b/src/language/stats/examine.q index 6f34eee9..c88fc90b 100644 --- a/src/language/stats/examine.q +++ b/src/language/stats/examine.q @@ -718,7 +718,7 @@ run_examine(const struct ccase *first, const struct casefile *cf, for ( v = 0 ; v < n_dependent_vars ; ++v ) metrics_precalc(&totals[v]); - for(r = casefile_get_reader (cf); + for(r = casefile_get_reader (cf, NULL); casereader_read (r, &c) ; case_destroy (&c) ) { diff --git a/src/language/stats/oneway.q b/src/language/stats/oneway.q index 86506710..afce1efd 100644 --- a/src/language/stats/oneway.q +++ b/src/language/stats/oneway.q @@ -31,6 +31,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA #include #include #include +#include #include #include #include @@ -65,8 +66,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA /* (declarations) */ /* (functions) */ - - static bool bad_weight_warn = true; @@ -91,12 +90,9 @@ static struct hsh_table *global_group_hash ; static int ostensible_number_of_groups = -1; -/* Function to use for testing for missing values */ -static is_missing_func *value_is_missing; - - static bool run_oneway(const struct ccase *first, - const struct casefile *cf, void *_mode, const struct dataset *); + const struct casefile *cf, + void *_mode, const struct dataset *); /* Routines to show the output tables */ @@ -124,12 +120,6 @@ cmd_oneway (struct dataset *ds) if ( !parse_oneway (ds, &cmd, NULL) ) return CMD_FAILURE; - /* If /MISSING=INCLUDE is set, then user missing values are ignored */ - if (cmd.incl == ONEWAY_INCLUDE ) - value_is_missing = mv_is_value_system_missing; - else - value_is_missing = mv_is_value_missing; - /* What statistics were requested */ if ( cmd.sbc_statistics ) { @@ -223,7 +213,8 @@ output_oneway(void) /* Parser for the variables sub command */ static int -oneway_custom_variables(struct dataset *ds, struct cmd_oneway *cmd UNUSED, void *aux UNUSED) +oneway_custom_variables(struct dataset *ds, struct cmd_oneway *cmd UNUSED, + void *aux UNUSED) { struct dictionary *dict = dataset_dict (ds); @@ -360,10 +351,9 @@ show_anova_table(void) tab_title (t, _("ANOVA")); tab_submit (t); - - } + /* Show the descriptives table */ static void show_descriptives(void) @@ -570,14 +560,12 @@ show_homogeneity(void) } tab_submit (t); - - } /* Show the contrast coefficients table */ static void -show_contrast_coeffs(short *bad_contrast) +show_contrast_coeffs (short *bad_contrast) { int n_cols = 2 + ostensible_number_of_groups; int n_rows = 2 + cmd.sbc_contrast; @@ -892,10 +880,12 @@ precalc ( struct cmd_oneway *cmd UNUSED ) static bool -run_oneway(const struct ccase *first, const struct casefile *cf, void *cmd_, const struct dataset *ds) +run_oneway(const struct ccase *first, const struct casefile *cf, + void *cmd_, const struct dataset *ds) { struct casereader *r; struct ccase c; + struct casefilter *filter = NULL; struct cmd_oneway *cmd = (struct cmd_oneway *) cmd_; @@ -906,9 +896,13 @@ run_oneway(const struct ccase *first, const struct casefile *cf, void *cmd_, con (hsh_hash_func *) hash_value, 0, (void *) indep_var->width ); + precalc(cmd); - for(r = casefile_get_reader (cf); + filter = casefilter_create ( (cmd->incl != ONEWAY_INCLUDE), + vars, n_vars ); + + for(r = casefile_get_reader (cf, filter); casereader_read (r, &c) ; case_destroy (&c)) { @@ -917,28 +911,12 @@ run_oneway(const struct ccase *first, const struct casefile *cf, void *cmd_, con const double weight = dict_get_case_weight (dataset_dict (ds), &c, &bad_weight_warn); - const union value *indep_val = case_data (&c, indep_var->fv); + const union value *indep_val; - /* Deal with missing values */ - if ( value_is_missing(&indep_var->miss, indep_val) ) + if ( casefilter_variable_missing (filter, &c, indep_var)) continue; - /* Skip the entire case if /MISSING=LISTWISE is set */ - if ( cmd->miss == ONEWAY_LISTWISE ) - { - for(i = 0; i < n_vars ; ++i) - { - const struct variable *v = vars[i]; - const union value *val = case_data (&c, v->fv); - - if (value_is_missing(&v->miss, val) ) - break; - } - if ( i != n_vars ) - continue; - - } - + indep_val = case_data (&c, indep_var->fv); hsh_insert ( global_group_hash, (void *) indep_val ); @@ -968,8 +946,8 @@ run_oneway(const struct ccase *first, const struct casefile *cf, void *cmd_, con hsh_insert ( group_hash, (void *) gs ); } - - if (! value_is_missing(&v->miss, val) ) + + if (! casefilter_variable_missing (filter, &c, v)) { struct group_statistics *totals = &gp->ugs; @@ -998,6 +976,7 @@ run_oneway(const struct ccase *first, const struct casefile *cf, void *cmd_, con } } + casereader_destroy (r); postcalc(cmd); @@ -1005,8 +984,9 @@ run_oneway(const struct ccase *first, const struct casefile *cf, void *cmd_, con if ( stat_tables & STAT_HOMO ) levene (dataset_dict (ds), cf, indep_var, n_vars, vars, - (cmd->miss == ONEWAY_LISTWISE) ? LEV_LISTWISE : LEV_ANALYSIS , - value_is_missing); + filter); + + casefilter_destroy (filter); ostensible_number_of_groups = hsh_count (global_group_hash); diff --git a/src/language/stats/rank.q b/src/language/stats/rank.q index fec2ee7f..7e882ab0 100644 --- a/src/language/stats/rank.q +++ b/src/language/stats/rank.q @@ -599,7 +599,7 @@ rank_sorted_casefile (struct casefile *cf, const struct missing_values *mv) { struct casefile *dest = fastfile_create (casefile_get_value_cnt (cf)); - struct casereader *lookahead = casefile_get_reader (cf); + struct casereader *lookahead = casefile_get_reader (cf, NULL); struct casereader *pos = casereader_clone (lookahead); struct ccase group_case; bool warn = true; diff --git a/src/language/stats/regression.q b/src/language/stats/regression.q index cbb1031d..afe4e77a 100644 --- a/src/language/stats/regression.q +++ b/src/language/stats/regression.q @@ -980,7 +980,7 @@ mark_missing_cases (const struct casefile *cf, struct variable *v, size_t row; const union value *val; - for (r = casefile_get_reader (cf); + for (r = casefile_get_reader (cf, NULL); casereader_read (r, &c); case_destroy (&c)) { row = casereader_cnum (r) - 1; @@ -1186,7 +1186,7 @@ run_regression (const struct ccase *first, The second pass fills the design matrix. */ row = 0; - for (r = casefile_get_reader (cf); casereader_read (r, &c); + for (r = casefile_get_reader (cf, NULL); casereader_read (r, &c); case_destroy (&c)) /* Iterate over the cases. */ { diff --git a/src/language/stats/t-test.q b/src/language/stats/t-test.q index 3fbb6d88..570be931 100644 --- a/src/language/stats/t-test.q +++ b/src/language/stats/t-test.q @@ -32,6 +32,8 @@ #include #include #include +#include + #include #include #include @@ -71,11 +73,6 @@ /* (functions) */ - - -/* Function to use for testing for missing values */ -static is_missing_func *value_is_missing; - /* Variable for the GROUPS subcommand, if given. */ static struct variable *indep_var; @@ -219,20 +216,23 @@ enum { static int common_calc (const struct dictionary *dict, - const struct ccase *, void *); + const struct ccase *, void *, + const struct casefilter *filter); static void common_precalc (struct cmd_t_test *); static void common_postcalc (struct cmd_t_test *); -static int one_sample_calc (const struct dictionary *dict, const struct ccase *, void *); +static int one_sample_calc (const struct dictionary *dict, const struct ccase *, void *, const struct casefilter *); static void one_sample_precalc (struct cmd_t_test *); static void one_sample_postcalc (struct cmd_t_test *); -static int paired_calc (const struct dictionary *dict, const struct ccase *, void *); +static int paired_calc (const struct dictionary *dict, const struct ccase *, + struct cmd_t_test*, const struct casefilter *); static void paired_precalc (struct cmd_t_test *); static void paired_postcalc (struct cmd_t_test *); static void group_precalc (struct cmd_t_test *); -static int group_calc (const struct dictionary *dict, const struct ccase *, struct cmd_t_test *); +static int group_calc (const struct dictionary *dict, const struct ccase *, + struct cmd_t_test *, const struct casefilter *); static void group_postcalc (struct cmd_t_test *); @@ -338,13 +338,6 @@ cmd_t_test (struct dataset *ds) return CMD_FAILURE; } - - /* If /MISSING=INCLUDE is set, then user missing values are ignored */ - if (cmd.incl == TTS_INCLUDE ) - value_is_missing = mv_is_value_system_missing; - else - value_is_missing = mv_is_value_missing; - bad_weight_warn = true; ok = multipass_procedure_with_splits (ds, calculate, &cmd); @@ -1413,7 +1406,10 @@ pscbox(void) /* Per case calculations common to all variants of the T test */ static int -common_calc (const struct dictionary *dict, const struct ccase *c, void *_cmd) +common_calc (const struct dictionary *dict, + const struct ccase *c, + void *_cmd, + const struct casefilter *filter) { int i; struct cmd_t_test *cmd = (struct cmd_t_test *)_cmd; @@ -1421,45 +1417,26 @@ common_calc (const struct dictionary *dict, const struct ccase *c, void *_cmd) double weight = dict_get_case_weight (dict, c, &bad_weight_warn); - /* Skip the entire case if /MISSING=LISTWISE is set */ - if ( cmd->miss == TTS_LISTWISE ) - { - for(i=0; i< cmd->n_variables ; ++i) - { - struct variable *v = cmd->v_variables[i]; - const union value *val = case_data (c, v->fv); - - if (value_is_missing(&v->miss, val) ) - { - return 0; - } - } - } - /* Listwise has to be implicit if the independent variable is missing ?? */ if ( cmd->sbc_groups ) { - const union value *gv = case_data (c, indep_var->fv); - if ( value_is_missing(&indep_var->miss, gv) ) - { - return 0; - } + if ( casefilter_variable_missing (filter, c, indep_var) ) + return 0; } - - for(i=0; i< cmd->n_variables ; ++i) + for(i = 0; i < cmd->n_variables ; ++i) { - struct group_statistics *gs; struct variable *v = cmd->v_variables[i]; - const union value *val = case_data (c, v->fv); - gs= &group_proc_get (cmd->v_variables[i])->ugs; - - if (! value_is_missing(&v->miss, val) ) + if (! casefilter_variable_missing (filter, c, v) ) { - gs->n+=weight; - gs->sum+=weight * val->f; - gs->ssq+=weight * val->f * val->f; + struct group_statistics *gs; + const union value *val = case_data (c, v->fv); + gs = &group_proc_get (cmd->v_variables[i])->ugs; + + gs->n += weight; + gs->sum += weight * val->f; + gs->ssq += weight * val->f * val->f; } } return 0; @@ -1485,11 +1462,10 @@ common_precalc ( struct cmd_t_test *cmd ) /* Post calculations common to all variants of the T test */ void -common_postcalc ( struct cmd_t_test *cmd ) +common_postcalc (struct cmd_t_test *cmd) { int i=0; - for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; @@ -1513,28 +1489,15 @@ common_postcalc ( struct cmd_t_test *cmd ) /* Per case calculations for one sample t test */ static int one_sample_calc (const struct dictionary *dict, - const struct ccase *c, void *cmd_) + const struct ccase *c, void *cmd_, + const struct casefilter *filter) { int i; - struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_; + struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_; double weight = dict_get_case_weight (dict, c, &bad_weight_warn); - /* Skip the entire case if /MISSING=LISTWISE is set */ - if ( cmd->miss == TTS_LISTWISE ) - { - for(i=0; i< cmd->n_variables ; ++i) - { - struct variable *v = cmd->v_variables[i]; - const union value *val = case_data (c, v->fv); - - if (value_is_missing(&v->miss, val) ) - { - return 0; - } - } - } for(i=0; i< cmd->n_variables ; ++i) { @@ -1543,8 +1506,8 @@ one_sample_calc (const struct dictionary *dict, const union value *val = case_data (c, v->fv); gs= &group_proc_get (cmd->v_variables[i])->ugs; - - if ( ! value_is_missing(&v->miss, val)) + + if ( ! casefilter_variable_missing (filter, c, v)) gs->sum_diff += weight * (val->f - cmd->n_testval[0]); } @@ -1603,34 +1566,13 @@ paired_precalc (struct cmd_t_test *cmd UNUSED) static int -paired_calc (const struct dictionary *dict, const struct ccase *c, void *cmd_) +paired_calc (const struct dictionary *dict, const struct ccase *c, + struct cmd_t_test *cmd UNUSED, const struct casefilter *filter) { int i; - struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_; - double weight = dict_get_case_weight (dict, c, &bad_weight_warn); - /* Skip the entire case if /MISSING=LISTWISE is set , - AND one member of a pair is missing */ - if ( cmd->miss == TTS_LISTWISE ) - { - for(i=0; i < n_pairs ; ++i ) - { - struct variable *v0 = pairs[i].v[0]; - struct variable *v1 = pairs[i].v[1]; - - const union value *val0 = case_data (c, v0->fv); - const union value *val1 = case_data (c, v1->fv); - - if ( value_is_missing(&v0->miss, val0) || - value_is_missing(&v1->miss, val1) ) - { - return 0; - } - } - } - for(i=0; i < n_pairs ; ++i ) { struct variable *v0 = pairs[i].v[0]; @@ -1639,21 +1581,21 @@ paired_calc (const struct dictionary *dict, const struct ccase *c, void *cmd_) const union value *val0 = case_data (c, v0->fv); const union value *val1 = case_data (c, v1->fv); - if ( ( !value_is_missing(&v0->miss, val0) - && !value_is_missing(&v1->miss, val1) ) ) - { - pairs[i].n += weight; - pairs[i].sum[0] += weight * val0->f; - pairs[i].sum[1] += weight * val1->f; + if ( ! casefilter_variable_missing (filter, c, v0) && + ! casefilter_variable_missing (filter, c, v1) ) + { + pairs[i].n += weight; + pairs[i].sum[0] += weight * val0->f; + pairs[i].sum[1] += weight * val1->f; - pairs[i].ssq[0] += weight * pow2(val0->f); - pairs[i].ssq[1] += weight * pow2(val1->f); + pairs[i].ssq[0] += weight * pow2(val0->f); + pairs[i].ssq[1] += weight * pow2(val1->f); - pairs[i].sum_of_prod += weight * val0->f * val1->f ; + pairs[i].sum_of_prod += weight * val0->f * val1->f ; - pairs[i].sum_of_diffs += weight * ( val0->f - val1->f ) ; - pairs[i].ssq_diffs += weight * pow2(val0->f - val1->f); - } + pairs[i].sum_of_diffs += weight * ( val0->f - val1->f ) ; + pairs[i].ssq_diffs += weight * pow2(val0->f - val1->f); + } } return 0; @@ -1749,33 +1691,18 @@ group_precalc (struct cmd_t_test *cmd ) static int group_calc (const struct dictionary *dict, - const struct ccase *c, struct cmd_t_test *cmd) + const struct ccase *c, struct cmd_t_test *cmd, + const struct casefilter *filter) { int i; - const union value *gv = case_data (c, indep_var->fv); - const double weight = dict_get_case_weight (dict, c, &bad_weight_warn); - if ( value_is_missing(&indep_var->miss, gv) ) - { - return 0; - } - - if ( cmd->miss == TTS_LISTWISE ) - { - for(i=0; i< cmd->n_variables ; ++i) - { - struct variable *v = cmd->v_variables[i]; - const union value *val = case_data (c, v->fv); + const union value *gv; - if (value_is_missing(&v->miss, val) ) - { - return 0; - } - } - } + if ( casefilter_variable_missing (filter, c, indep_var)) + return 0; gv = case_data (c, indep_var->fv); @@ -1793,11 +1720,11 @@ group_calc (const struct dictionary *dict, if ( ! gs ) return 0; - if ( !value_is_missing(&var->miss, val) ) + if ( ! casefilter_variable_missing (filter, c, var) ) { - gs->n+=weight; - gs->sum+=weight * val->f; - gs->ssq+=weight * pow2(val->f); + gs->n += weight; + gs->sum += weight * val->f; + gs->ssq += weight * pow2(val->f); } } @@ -1810,7 +1737,7 @@ group_postcalc ( struct cmd_t_test *cmd ) { int i; - for(i=0; i< cmd->n_variables ; ++i) + for (i = 0; i < cmd->n_variables ; ++i) { struct variable *var = cmd->v_variables[i]; struct hsh_table *grp_hash = group_proc_get (var)->group_hash; @@ -1855,38 +1782,45 @@ calculate(const struct ccase *first, const struct casefile *cf, struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_; + struct casefilter *filter = casefilter_create (cmd->miss != TTS_INCLUDE, + NULL, 0); + + if ( cmd->miss == TTS_LISTWISE ) + casefilter_add_variables (filter, + cmd->v_variables, cmd->n_variables); + output_split_file_values (ds, first); - common_precalc(cmd); - for(r = casefile_get_reader (cf); + common_precalc (cmd); + for(r = casefile_get_reader (cf, filter); casereader_read (r, &c) ; case_destroy (&c)) { - common_calc(dict, &c,cmd); + common_calc (dict, &c, cmd, filter); } + casereader_destroy (r); - common_postcalc(cmd); + common_postcalc (cmd); switch(mode) { case T_1_SAMPLE: - one_sample_precalc(cmd); - for(r = casefile_get_reader (cf); + one_sample_precalc (cmd); + for(r = casefile_get_reader (cf, filter); casereader_read (r, &c) ; case_destroy (&c)) { - one_sample_calc (dict, &c,cmd); + one_sample_calc (dict, &c, cmd, filter); } casereader_destroy (r); - one_sample_postcalc(cmd); - + one_sample_postcalc (cmd); break; case T_PAIRED: paired_precalc(cmd); - for(r = casefile_get_reader (cf); + for(r = casefile_get_reader (cf, filter); casereader_read (r, &c) ; case_destroy (&c)) { - paired_calc (dict, &c,cmd); + paired_calc (dict, &c, cmd, filter); } casereader_destroy (r); paired_postcalc (cmd); @@ -1895,21 +1829,22 @@ calculate(const struct ccase *first, const struct casefile *cf, case T_IND_SAMPLES: group_precalc(cmd); - for(r = casefile_get_reader (cf); + for(r = casefile_get_reader (cf, filter); casereader_read (r, &c) ; case_destroy (&c)) { - group_calc (dict, &c, cmd); + group_calc (dict, &c, cmd, filter); } casereader_destroy (r); group_postcalc(cmd); levene (dict, cf, indep_var, cmd->n_variables, cmd->v_variables, - (cmd->miss == TTS_LISTWISE)?LEV_LISTWISE:LEV_ANALYSIS , - value_is_missing); + filter); break; } + casefilter_destroy (filter); + ssbox_create(&stat_summary_box,cmd,mode); ssbox_populate(&stat_summary_box,cmd); ssbox_finalize(&stat_summary_box); diff --git a/src/language/tests/casefile-test.c b/src/language/tests/casefile-test.c index 6f5a5d2e..c6885c61 100644 --- a/src/language/tests/casefile-test.c +++ b/src/language/tests/casefile-test.c @@ -101,8 +101,8 @@ test_casefile (int pattern, size_t value_cnt, size_t case_cnt) write_random_case (cf, i); if (pattern == 5) casefile_sleep (cf); - r1 = casefile_get_reader (cf); - r2 = casefile_get_reader (cf); + r1 = casefile_get_reader (cf, NULL); + r2 = casefile_get_reader (cf, NULL); switch (pattern) { case 0: @@ -246,7 +246,7 @@ test_casereader_clone (struct casereader *reader1, size_t case_cnt) cases ++; } - newreader = casefile_get_reader (newfile); + newreader = casefile_get_reader (newfile, NULL); /* Make sure that the new file's are identical to those returned from the cloned reader */ diff --git a/src/math/ChangeLog b/src/math/ChangeLog index 2d1b2e96..5b05c817 100644 --- a/src/math/ChangeLog +++ b/src/math/ChangeLog @@ -1,3 +1,7 @@ +Sun Nov 5 08:30:32 WST 2006 John Darrington + + * levene.c levene.h Changed to use the new casefilter structure. + 2006-07-15 Jason Stover * coefficient.c (pspp_coeff_init): Make design_matrix arg const. diff --git a/src/math/levene.c b/src/math/levene.c index 8fb41fed..f5ed0568 100644 --- a/src/math/levene.c +++ b/src/math/levene.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include "group.h" @@ -73,22 +74,21 @@ struct levene_info /* The dependent variables */ struct variable **v_dep; - /* How to treat missing values */ - enum lev_missing missing; - - /* Function to test for missing values */ - is_missing_func *is_missing; + /* Filter for missing values */ + struct casefilter *filter; }; /* First pass */ static void levene_precalc (const struct levene_info *l); -static int levene_calc (const struct dictionary *dict, const struct ccase *, void *); +static int levene_calc (const struct dictionary *dict, const struct ccase *, + const struct levene_info *l); static void levene_postcalc (void *); /* Second pass */ -static void levene2_precalc (void *); -static int levene2_calc (const struct dictionary *,const struct ccase *, void *); +static void levene2_precalc (struct levene_info *l); +static int levene2_calc (const struct dictionary *, const struct ccase *, + struct levene_info *l); static void levene2_postcalc (void *); @@ -96,7 +96,7 @@ void levene(const struct dictionary *dict, const struct casefile *cf, struct variable *v_indep, size_t n_dep, struct variable **v_dep, - enum lev_missing missing, is_missing_func value_is_missing) + struct casefilter *filter) { struct casereader *r; struct ccase c; @@ -105,31 +105,28 @@ levene(const struct dictionary *dict, l.n_dep = n_dep; l.v_indep = v_indep; l.v_dep = v_dep; - l.missing = missing; - l.is_missing = value_is_missing; - + l.filter = filter; - levene_precalc(&l); - for(r = casefile_get_reader (cf); + levene_precalc (&l); + for(r = casefile_get_reader (cf, filter); casereader_read (r, &c) ; case_destroy (&c)) { - levene_calc (dict, &c,&l); + levene_calc (dict, &c, &l); } casereader_destroy (r); - levene_postcalc(&l); + levene_postcalc (&l); levene2_precalc(&l); - for(r = casefile_get_reader (cf); + for(r = casefile_get_reader (cf, filter); casereader_read (r, &c) ; case_destroy (&c)) { levene2_calc (dict, &c,&l); } casereader_destroy (r); - levene2_postcalc(&l); - + levene2_postcalc (&l); } /* Internal variables used in calculating the Levene statistic */ @@ -185,30 +182,15 @@ levene_precalc (const struct levene_info *l) } static int -levene_calc (const struct dictionary *dict, const struct ccase *c, void *_l) +levene_calc (const struct dictionary *dict, const struct ccase *c, + const struct levene_info *l) { size_t i; bool warn = false; - struct levene_info *l = (struct levene_info *) _l; const union value *gv = case_data (c, l->v_indep->fv); struct group_statistics key; double weight = dict_get_case_weight (dict, c, &warn); - /* Skip the entire case if /MISSING=LISTWISE is set */ - if ( l->missing == LEV_LISTWISE ) - { - for (i = 0; i < l->n_dep; ++i) - { - struct variable *v = l->v_dep[i]; - const union value *val = case_data (c, v->fv); - - if (l->is_missing (&v->miss, val) ) - { - return 0; - } - } - } - key.id = *gv; for (i = 0; i < l->n_dep; ++i) @@ -224,7 +206,7 @@ levene_calc (const struct dictionary *dict, const struct ccase *c, void *_l) if ( 0 == gs ) continue ; - if ( ! l->is_missing(&var->miss, v)) + if ( ! casefilter_variable_missing (l->filter, c, var)) { levene_z= fabs(v->f - gs->mean); lz[i].grand_total += levene_z * weight; @@ -232,7 +214,6 @@ levene_calc (const struct dictionary *dict, const struct ccase *c, void *_l) gs->lz_total += levene_z * weight; } - } return 0; } @@ -255,20 +236,21 @@ levene_postcalc (void *_l) } + /* The denominator for the expression for the Levene */ -static double *lz_denominator; +static double *lz_denominator = 0; static void -levene2_precalc (void *_l) +levene2_precalc (struct levene_info *l) { size_t v; - struct levene_info *l = (struct levene_info *) _l; - lz_denominator = xnmalloc (l->n_dep, sizeof *lz_denominator); /* This stuff could go in the first post calc . . . */ - for (v = 0; v < l->n_dep; ++v) + for (v = 0; + v < l->n_dep; + ++v) { struct hsh_iterator hi; struct group_statistics *g; @@ -288,33 +270,17 @@ levene2_precalc (void *_l) } static int -levene2_calc (const struct dictionary *dict, const struct ccase *c, void *_l) +levene2_calc (const struct dictionary *dict, const struct ccase *c, + struct levene_info *l) { size_t i; bool warn = false; - struct levene_info *l = (struct levene_info *) _l; - double weight = dict_get_case_weight (dict, c, &warn); const union value *gv = case_data (c, l->v_indep->fv); struct group_statistics key; - /* Skip the entire case if /MISSING=LISTWISE is set */ - if ( l->missing == LEV_LISTWISE ) - { - for (i = 0; i < l->n_dep; ++i) - { - struct variable *v = l->v_dep[i]; - const union value *val = case_data (c, v->fv); - - if (l->is_missing(&v->miss, val) ) - { - return 0; - } - } - } - key.id = *gv; for (i = 0; i < l->n_dep; ++i) @@ -329,10 +295,11 @@ levene2_calc (const struct dictionary *dict, const struct ccase *c, void *_l) if ( 0 == gs ) continue; - if ( ! l->is_missing (&var->miss, v) ) + if ( ! casefilter_variable_missing (l->filter, c, var)) + { levene_z = fabs(v->f - gs->mean); - lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean); + lz_denominator[i] += weight * pow2 (levene_z - gs->lz_mean); } } diff --git a/src/math/levene.h b/src/math/levene.h index c94c3d61..ba7dea5d 100644 --- a/src/math/levene.h +++ b/src/math/levene.h @@ -26,9 +26,6 @@ #include #include -/* What to do with missing values */ -enum lev_missing { LEV_ANALYSIS, LEV_LISTWISE }; - /* Calculate the Levene statistic The independent variable : v_indep; @@ -41,10 +38,11 @@ The dependent variables : v_dep; struct dictionary ; +struct casefilter ; void levene(const struct dictionary *dict, const struct casefile *cf, struct variable *v_indep, size_t n_dep, struct variable **v_dep, - enum lev_missing, is_missing_func); + struct casefilter *filter); diff --git a/src/math/sort.c b/src/math/sort.c index 0d32d5cc..9f5e5fb9 100644 --- a/src/math/sort.c +++ b/src/math/sort.c @@ -93,7 +93,7 @@ static bool sort_to_casefile_callback (const struct casefile *cf, void *cb_data_) { struct sort_to_casefile_cb_data *cb_data = cb_data_; - cb_data->output = sort_execute (casefile_get_reader (cf), cb_data->criteria); + cb_data->output = sort_execute (casefile_get_reader (cf, NULL), cb_data->criteria); return cb_data->output != NULL; } diff --git a/src/ui/gui/psppire-case-file.c b/src/ui/gui/psppire-case-file.c index b16fd7ca..6b0f2111 100644 --- a/src/ui/gui/psppire-case-file.c +++ b/src/ui/gui/psppire-case-file.c @@ -326,7 +326,7 @@ psppire_case_file_sort(PsppireCaseFile *cf, const struct sort_criteria *sc) struct ccase cc; gint c; struct casefile *cfile; - struct casereader *reader = casefile_get_reader(cf->flexifile); + struct casereader *reader = casefile_get_reader (cf->flexifile, NULL); const int value_cnt = casefile_get_value_cnt(cf->flexifile); cfile = sort_execute(reader, sc);