X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Ft-test.q;h=b593ebc4bc3df675f99a01dfe9e5d109c8b5a946;hb=3e2cb3dda617f6d2a97e5df635d592a691d3c9b0;hp=d0fff3fd0cb4505ecdaa801bf248fbde829c6415;hpb=dcf9b154cbcaa35c3d8459a201b77eec8bcb30bd;p=pspp diff --git a/src/language/stats/t-test.q b/src/language/stats/t-test.q index d0fff3fd0c..b593ebc4bc 100644 --- a/src/language/stats/t-test.q +++ b/src/language/stats/t-test.q @@ -1,8 +1,6 @@ /* PSPP - computes sample statistics. -*-c-*- Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by John Williams . - Almost completly re-written by John Darrington 2004 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -20,30 +18,38 @@ 02110-1301, USA. */ #include + #include -#include "message.h" +#include #include #include -#include -#include "alloc.h" -#include "case.h" -#include "casefile.h" -#include "command.h" -#include "dictionary.h" -#include "message.h" -#include "group-proc.h" -#include "hash.h" -#include "levene.h" -#include "lexer.h" -#include "magic.h" -#include "misc.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include "size_max.h" -#include "manager.h" -#include "str.h" -#include "table.h" -#include "value-labels.h" -#include "variable.h" -#include "procedure.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -54,22 +60,17 @@ "T-TEST" (tts_): +groups=custom; testval=double; - variables=varlist("PV_NO_SCRATCH | PV_NUMERIC"); - pairs=custom; - +missing=miss:!analysis/listwise, - incl:include/!exclude; - format=fmt:!labels/nolabels; + +variables=varlist("PV_NO_SCRATCH | PV_NUMERIC"); + +pairs=custom; + missing=miss:!analysis/listwise, + incl:include/!exclude; + +format=fmt:!labels/nolabels; criteria=:cin(d:criteria,"%s > 0. && %s < 1."). */ /* (declarations) */ /* (functions) */ - - -/* Function to use for testing for missing values */ -static is_missing_func *value_is_missing; - /* Variable for the GROUPS subcommand, if given. */ static struct variable *indep_var; @@ -109,7 +110,7 @@ static int n_pairs = 0 ; struct pair { /* The variables comprising the pair */ - struct variable *v[2]; + const struct variable *v[2]; /* The number of valid variable pairs */ double n; @@ -151,7 +152,7 @@ struct pair static struct pair *pairs=0; -static int parse_value (union value * v, int type) ; +static int parse_value (struct lexer *lexer, union value * v, enum var_type); /* Structures and Functions for the Statistics Summary Box */ struct ssbox; @@ -212,30 +213,36 @@ enum { }; -static int common_calc (const struct ccase *, void *); +static int common_calc (const struct dictionary *dict, + const struct ccase *, void *, + const struct casefilter *filter); static void common_precalc (struct cmd_t_test *); static void common_postcalc (struct cmd_t_test *); -static int one_sample_calc (const struct ccase *, void *); +static int one_sample_calc (const struct dictionary *dict, const struct ccase *, void *, const struct casefilter *); static void one_sample_precalc (struct cmd_t_test *); static void one_sample_postcalc (struct cmd_t_test *); -static int paired_calc (const struct ccase *, void *); +static int paired_calc (const struct dictionary *dict, const struct ccase *, + struct cmd_t_test*, const struct casefilter *); static void paired_precalc (struct cmd_t_test *); static void paired_postcalc (struct cmd_t_test *); static void group_precalc (struct cmd_t_test *); -static int group_calc (const struct ccase *, struct cmd_t_test *); +static int group_calc (const struct dictionary *dict, const struct ccase *, + struct cmd_t_test *, const struct casefilter *); static void group_postcalc (struct cmd_t_test *); -static bool calculate(const struct casefile *cf, void *_mode); +static bool calculate(const struct ccase *first, + const struct casefile *cf, void *_mode, + const struct dataset *ds); static int mode; static struct cmd_t_test cmd; -static int bad_weight_warn; +static bool bad_weight_warn = false; static int compare_group_binary(const struct group_statistics *a, @@ -249,11 +256,11 @@ static unsigned hash_group_binary(const struct group_statistics *g, int -cmd_t_test(void) +cmd_t_test (struct lexer *lexer, struct dataset *ds) { bool ok; - if ( !parse_t_test(&cmd) ) + if ( !parse_t_test (lexer, ds, &cmd, NULL) ) return CMD_FAILURE; if (! cmd.sbc_criteria) @@ -297,29 +304,29 @@ cmd_t_test(void) int i; struct hsh_iterator hi; - struct hsh_table *hash; - struct variable *v; + struct const_hsh_table *hash; + const struct variable *v; - hash = hsh_create (n_pairs, compare_var_names, hash_var_name, 0, 0); + hash = const_hsh_create (n_pairs, compare_vars_by_name, hash_var_by_name, + 0, 0); for (i=0; i < n_pairs; ++i) { - hsh_insert(hash,pairs[i].v[0]); - hsh_insert(hash,pairs[i].v[1]); + const_hsh_insert (hash, pairs[i].v[0]); + const_hsh_insert (hash, pairs[i].v[1]); } assert(cmd.n_variables == 0); - cmd.n_variables = hsh_count(hash); + cmd.n_variables = const_hsh_count (hash); cmd.v_variables = xnrealloc (cmd.v_variables, cmd.n_variables, sizeof *cmd.v_variables); /* Iterate through the hash */ - for (i=0,v = (struct variable *) hsh_first(hash,&hi); + for (i=0,v = const_hsh_first (hash, &hi); v != 0; - v=hsh_next(hash,&hi) ) + v = const_hsh_next (hash, &hi) ) cmd.v_variables[i++]=v; - - hsh_destroy(hash); + const_hsh_destroy(hash); } } else if ( !cmd.sbc_variables) @@ -329,16 +336,9 @@ cmd_t_test(void) return CMD_FAILURE; } + bad_weight_warn = true; - /* If /MISSING=INCLUDE is set, then user missing values are ignored */ - if (cmd.incl == TTS_INCLUDE ) - value_is_missing = mv_is_value_system_missing; - else - value_is_missing = mv_is_value_missing; - - bad_weight_warn = 1; - - ok = multipass_procedure_with_splits (calculate, &cmd); + ok = multipass_procedure_with_splits (ds, calculate, &cmd); n_pairs=0; free(pairs); @@ -360,29 +360,29 @@ cmd_t_test(void) } static int -tts_custom_groups (struct cmd_t_test *cmd UNUSED) +tts_custom_groups (struct lexer *lexer, struct dataset *ds, struct cmd_t_test *cmd UNUSED, void *aux UNUSED) { int n_group_values=0; - lex_match('='); + lex_match (lexer, '='); - indep_var = parse_variable (); + indep_var = parse_variable (lexer, dataset_dict (ds)); if (!indep_var) { - lex_error ("expecting variable name in GROUPS subcommand"); + lex_error (lexer, "expecting variable name in GROUPS subcommand"); return 0; } - if (indep_var->type == T_STRING && indep_var->width > MAX_SHORT_STRING) + if (var_is_long_string (indep_var)) { msg (SE, _("Long string variable %s is not valid here."), - indep_var->name); + var_get_name (indep_var)); return 0; } - if (!lex_match ('(')) + if (!lex_match (lexer, '(')) { - if (indep_var->type == NUMERIC) + if (var_is_numeric (indep_var)) { gp.v.g_value[0].f = 1; gp.v.g_value[1].f = 2; @@ -401,15 +401,14 @@ tts_custom_groups (struct cmd_t_test *cmd UNUSED) } } - if (!parse_value (&gp.v.g_value[0], indep_var->type)) + if (!parse_value (lexer, &gp.v.g_value[0], var_get_type (indep_var))) return 0; - lex_match (','); - if (lex_match (')')) + lex_match (lexer, ','); + if (lex_match (lexer, ')')) { - if (indep_var->type != NUMERIC) + if (var_is_alpha (indep_var)) { - msg (SE, _("When applying GROUPS to a string variable, two " "values must be specified.")); return 0; @@ -421,11 +420,11 @@ tts_custom_groups (struct cmd_t_test *cmd UNUSED) return 1; } - if (!parse_value (&gp.v.g_value[1], indep_var->type)) + if (!parse_value (lexer, &gp.v.g_value[1], var_get_type (indep_var))) return 0; n_group_values = 2; - if (!lex_force_match (')')) + if (!lex_force_match (lexer, ')')) return 0; if ( n_group_values == 2 ) @@ -439,9 +438,9 @@ tts_custom_groups (struct cmd_t_test *cmd UNUSED) static int -tts_custom_pairs (struct cmd_t_test *cmd UNUSED) +tts_custom_pairs (struct lexer *lexer, struct dataset *ds, struct cmd_t_test *cmd UNUSED, void *aux UNUSED) { - struct variable **vars; + const struct variable **vars; size_t n_vars; size_t n_pairs_local; @@ -449,10 +448,10 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) size_t n_after_WITH = SIZE_MAX; int paired ; /* Was the PAIRED keyword given ? */ - lex_match('='); + lex_match (lexer, '='); n_vars=0; - if (!parse_variables (default_dict, &vars, &n_vars, + if (!parse_variables_const (lexer, dataset_dict (ds), &vars, &n_vars, PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH)) { free (vars); @@ -461,10 +460,10 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) assert (n_vars); n_before_WITH = 0; - if (lex_match (T_WITH)) + if (lex_match (lexer, T_WITH)) { n_before_WITH = n_vars; - if (!parse_variables (default_dict, &vars, &n_vars, + if (!parse_variables_const (lexer, dataset_dict (ds), &vars, &n_vars, PV_DUPLICATE | PV_APPEND | PV_NUMERIC | PV_NO_SCRATCH)) { @@ -474,7 +473,7 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) n_after_WITH = n_vars - n_before_WITH; } - paired = (lex_match ('(') && lex_match_id ("PAIRED") && lex_match (')')); + paired = (lex_match (lexer, '(') && lex_match_id (lexer, "PAIRED") && lex_match (lexer, ')')); /* Determine the number of pairs needed */ if (paired) @@ -485,7 +484,7 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) msg (SE, _("PAIRED was specified but the number of variables " "preceding WITH (%d) did not match the number " "following (%d)."), - n_before_WITH, n_after_WITH ); + (int) n_before_WITH, (int) n_after_WITH ); return 0; } n_pairs_local = n_before_WITH; @@ -564,22 +563,22 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) /* Parses the current token (numeric or string, depending on type) value v and returns success. */ static int -parse_value (union value * v, int type ) +parse_value (struct lexer *lexer, union value * v, enum var_type type) { - if (type == NUMERIC) + if (type == VAR_NUMERIC) { - if (!lex_force_num ()) + if (!lex_force_num (lexer)) return 0; - v->f = tokval; + v->f = lex_tokval (lexer); } else { - if (!lex_force_string ()) + if (!lex_force_string (lexer)) return 0; - strncpy (v->s, ds_c_str (&tokstr), ds_length (&tokstr)); + strncpy (v->s, ds_cstr (lex_tokstr (lexer)), ds_length (lex_tokstr (lexer))); } - lex_get (); + lex_get (lexer); return 1; } @@ -617,7 +616,7 @@ ssbox_create(struct ssbox *ssb, struct cmd_t_test *cmd, int mode) ssbox_paired_init(ssb,cmd); break; default: - assert(0); + NOT_REACHED (); } } @@ -676,7 +675,7 @@ ssbox_one_sample_init(struct ssbox *this, this->populate = ssbox_one_sample_populate; ssbox_base_init(this, hsize,vsize); - tab_title (this->t, 0, _("One-Sample Statistics")); + tab_title (this->t, _("One-Sample Statistics")); tab_vline(this->t, TAL_2, 1,0,vsize - 1); tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, _("N")); tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean")); @@ -698,9 +697,9 @@ ssbox_independent_samples_init(struct ssbox *this, this->populate = ssbox_independent_samples_populate; ssbox_base_init(this, hsize,vsize); - tab_title (this->t, 0, _("Group Statistics")); - tab_vline(this->t,0,1,0,vsize - 1); - tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, indep_var->name); + tab_vline (this->t, TAL_GAP, 1, 0,vsize - 1); + tab_title (this->t, _("Group Statistics")); + tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, var_get_name (indep_var)); tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("N")); tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); @@ -715,16 +714,16 @@ ssbox_independent_samples_populate(struct ssbox *ssb, { int i; - char *val_lab0=0; - char *val_lab1=0; + const char *val_lab0; + const char *val_lab1; double indep_value[2]; char prefix[2][3]={"",""}; - if ( indep_var->type == NUMERIC ) + if ( var_is_numeric (indep_var) ) { - val_lab0 = val_labs_find( indep_var->val_labs,gp.v.g_value[0]); - val_lab1 = val_labs_find( indep_var->val_labs,gp.v.g_value[1]); + val_lab0 = var_lookup_value_label (indep_var, &gp.v.g_value[0]); + val_lab1 = var_lookup_value_label (indep_var, &gp.v.g_value[1]); } else { @@ -749,11 +748,12 @@ ssbox_independent_samples_populate(struct ssbox *ssb, for (i=0; i < cmd->n_variables; ++i) { - struct variable *var = cmd->v_variables[i]; + const struct variable *var = cmd->v_variables[i]; struct hsh_table *grp_hash = group_proc_get (var)->group_hash; int count=0; - tab_text (ssb->t, 0, i*2+1, TAB_LEFT, cmd->v_variables[i]->name); + tab_text (ssb->t, 0, i*2+1, TAB_LEFT, + var_get_name (cmd->v_variables[i])); if (val_lab0) tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF, @@ -799,7 +799,7 @@ ssbox_independent_samples_populate(struct ssbox *ssb, gs = hsh_find(grp_hash, (void *) &search_val); assert(gs); - tab_float(ssb->t, 2 ,i*2+count+1, TAB_RIGHT, gs->n, 2, 0); + tab_float(ssb->t, 2 ,i*2+count+1, TAB_RIGHT, gs->n, 10, 0); tab_float(ssb->t, 3 ,i*2+count+1, TAB_RIGHT, gs->mean, 8, 2); tab_float(ssb->t, 4 ,i*2+count+1, TAB_RIGHT, gs->std_dev, 8, 3); tab_float(ssb->t, 5 ,i*2+count+1, TAB_RIGHT, gs->se_mean, 8, 3); @@ -822,8 +822,8 @@ ssbox_paired_init(struct ssbox *this, struct cmd_t_test *cmd UNUSED) this->populate = ssbox_paired_populate; ssbox_base_init(this, hsize,vsize); - tab_title (this->t, 0, _("Paired Sample Statistics")); - tab_vline(this->t,TAL_0,1,0,vsize-1); + tab_title (this->t, _("Paired Sample Statistics")); + tab_vline(this->t,TAL_GAP,1,0,vsize-1); tab_vline(this->t,TAL_2,2,0,vsize-1); tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("N")); @@ -854,11 +854,12 @@ ssbox_paired_populate(struct ssbox *ssb,struct cmd_t_test *cmd UNUSED) /* Titles */ - tab_text (ssb->t, 1, i*2+j+1, TAB_LEFT, pairs[i].v[j]->name); + tab_text (ssb->t, 1, i*2+j+1, TAB_LEFT, + var_get_name (pairs[i].v[j])); /* Values */ tab_float (ssb->t,2, i*2+j+1, TAB_RIGHT, pairs[i].mean[j], 8, 2); - tab_float (ssb->t,3, i*2+j+1, TAB_RIGHT, pairs[i].n, 2, 0); + tab_float (ssb->t,3, i*2+j+1, TAB_RIGHT, pairs[i].n, 10, 0); tab_float (ssb->t,4, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j], 8, 3); tab_float (ssb->t,5, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j]/sqrt(pairs[i].n), 8, 3); @@ -878,8 +879,8 @@ ssbox_one_sample_populate(struct ssbox *ssb, struct cmd_t_test *cmd) { struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs; - tab_text (ssb->t, 0, i+1, TAB_LEFT, cmd->v_variables[i]->name); - tab_float (ssb->t,1, i+1, TAB_RIGHT, gs->n, 2, 0); + tab_text (ssb->t, 0, i+1, TAB_LEFT, var_get_name (cmd->v_variables[i])); + tab_float (ssb->t,1, i+1, TAB_RIGHT, gs->n, 10, 0); tab_float (ssb->t,2, i+1, TAB_RIGHT, gs->mean, 8, 2); tab_float (ssb->t,3, i+1, TAB_RIGHT, gs->std_dev, 8, 2); tab_float (ssb->t,4, i+1, TAB_RIGHT, gs->se_mean, 8, 3); @@ -931,7 +932,7 @@ trbox_create(struct trbox *trb, trbox_paired_init(trb,cmd); break; default: - assert(0); + NOT_REACHED (); } } @@ -961,7 +962,7 @@ trbox_independent_samples_init(struct trbox *self, self->populate = trbox_independent_samples_populate; trbox_base_init(self,cmd->n_variables*2,hsize); - tab_title(self->t,0,_("Independent Samples Test")); + tab_title(self->t,_("Independent Samples Test")); tab_hline(self->t,TAL_1,2,hsize-1,1); tab_vline(self->t,TAL_2,2,0,vsize-1); tab_vline(self->t,TAL_1,4,0,vsize-1); @@ -1010,7 +1011,7 @@ trbox_independent_samples_populate(struct trbox *self, double std_err_diff; double mean_diff; - struct variable *var = cmd->v_variables[i]; + const struct variable *var = cmd->v_variables[i]; struct group_proc *grp_data = group_proc_get (var); struct hsh_table *grp_hash = grp_data->group_hash; @@ -1037,7 +1038,7 @@ trbox_independent_samples_populate(struct trbox *self, assert(gs1); - tab_text (self->t, 0, i*2+3, TAB_LEFT, cmd->v_variables[i]->name); + tab_text (self->t, 0, i*2+3, TAB_LEFT, var_get_name (cmd->v_variables[i])); tab_text (self->t, 1, i*2+3, TAB_LEFT, _("Equal variances assumed")); @@ -1051,7 +1052,7 @@ trbox_independent_samples_populate(struct trbox *self, tab_float(self->t, 3, i*2+3, TAB_CENTER, q, 8,3 ); df = gs0->n + gs1->n - 2.0 ; - tab_float (self->t, 5, i*2+3, TAB_RIGHT, df, 2, 0); + tab_float (self->t, 5, i*2+3, TAB_RIGHT, df, 10, 0); pooled_variance = ( (gs0->n )*pow2(gs0->s_std_dev) + @@ -1149,14 +1150,14 @@ trbox_paired_init(struct trbox *self, self->populate = trbox_paired_populate; trbox_base_init(self,n_pairs,hsize); - tab_title (self->t, 0, _("Paired Samples Test")); + tab_title (self->t, _("Paired Samples Test")); tab_hline(self->t,TAL_1,2,6,1); tab_vline(self->t,TAL_2,2,0,vsize - 1); tab_joint_text(self->t,2,0,6,0,TAB_CENTER,_("Paired Differences")); tab_box(self->t,-1,-1,-1,TAL_1, 2,1,6,vsize-1); tab_box(self->t,-1,-1,-1,TAL_1, 6,0,hsize-1,vsize-1); tab_hline(self->t,TAL_1,5,6, 2); - tab_vline(self->t,TAL_0,6,0,1); + tab_vline(self->t,TAL_GAP,6,0,1); tab_joint_text(self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF, _("%g%% Confidence Interval of the Difference"), @@ -1191,7 +1192,8 @@ trbox_paired_populate(struct trbox *trb, tab_text (trb->t, 0, i+3, TAB_LEFT | TAT_PRINTF, _("Pair %d"),i); tab_text (trb->t, 1, i+3, TAB_LEFT | TAT_PRINTF, "%s - %s", - pairs[i].v[0]->name, pairs[i].v[1]->name); + var_get_name (pairs[i].v[0]), + var_get_name (pairs[i].v[1])); tab_float(trb->t, 2, i+3, TAB_RIGHT, pairs[i].mean_diff, 8, 4); @@ -1223,7 +1225,7 @@ trbox_paired_populate(struct trbox *trb, tab_float(trb->t, 7, i+3, TAB_RIGHT, t , 8,3 ); /* Degrees of freedom */ - tab_float(trb->t, 8, i+3, TAB_RIGHT, df , 2, 0 ); + tab_float(trb->t, 8, i+3, TAB_RIGHT, df , 10, 0 ); p = gsl_cdf_tdist_P(t,df); q = gsl_cdf_tdist_P(t,df); @@ -1243,7 +1245,7 @@ trbox_one_sample_init(struct trbox *self, struct cmd_t_test *cmd ) self->populate = trbox_one_sample_populate; trbox_base_init(self, cmd->n_variables,hsize); - tab_title (self->t, 0, _("One-Sample Test")); + tab_title (self->t, _("One-Sample Test")); tab_hline(self->t, TAL_1, 1, hsize - 1, 1); tab_vline(self->t, TAL_2, 1, 0, vsize - 1); @@ -1257,7 +1259,7 @@ trbox_one_sample_init(struct trbox *self, struct cmd_t_test *cmd ) _("%g%% Confidence Interval of the Difference"), cmd->criteria*100.0); - tab_vline(self->t,TAL_0,6,1,1); + tab_vline(self->t,TAL_GAP,6,1,1); tab_hline(self->t,TAL_1,5,6,2); tab_text (self->t, 1, 2, TAB_CENTER | TAT_TITLE, _("t")); tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("df")); @@ -1285,7 +1287,7 @@ trbox_one_sample_populate(struct trbox *trb, struct cmd_t_test *cmd) struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs; - tab_text (trb->t, 0, i+3, TAB_LEFT, cmd->v_variables[i]->name); + tab_text (trb->t, 0, i+3, TAB_LEFT, var_get_name (cmd->v_variables[i])); t = (gs->mean - cmd->n_testval[0] ) * sqrt(gs->n) / gs->std_dev ; @@ -1358,7 +1360,7 @@ pscbox(void) tab_hline(table, TAL_2, 0, cols - 1, 1); tab_vline(table, TAL_2, 2, 0, rows - 1); tab_dim(table, tab_natural_dimensions); - tab_title(table, 0, _("Paired Samples Correlations")); + tab_title(table, _("Paired Samples Correlations")); /* column headings */ tab_text(table, 2,0, TAB_CENTER | TAT_TITLE, _("N")); @@ -1381,7 +1383,9 @@ pscbox(void) _("Pair %d"), i); tab_text(table, 1,i+1, TAB_LEFT | TAT_TITLE | TAT_PRINTF, - _("%s & %s"), pairs[i].v[0]->name, pairs[i].v[1]->name); + _("%s & %s"), + var_get_name (pairs[i].v[0]), + var_get_name (pairs[i].v[1])); /* row data */ @@ -1404,53 +1408,37 @@ pscbox(void) /* Per case calculations common to all variants of the T test */ static int -common_calc (const struct ccase *c, void *_cmd) +common_calc (const struct dictionary *dict, + const struct ccase *c, + void *_cmd, + const struct casefilter *filter) { int i; struct cmd_t_test *cmd = (struct cmd_t_test *)_cmd; - double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); - - - /* Skip the entire case if /MISSING=LISTWISE is set */ - if ( cmd->miss == TTS_LISTWISE ) - { - for(i=0; i< cmd->n_variables ; ++i) - { - struct variable *v = cmd->v_variables[i]; - const union value *val = case_data (c, v->fv); + double weight = dict_get_case_weight (dict, c, &bad_weight_warn); - if (value_is_missing(&v->miss, val) ) - { - return 0; - } - } - } /* Listwise has to be implicit if the independent variable is missing ?? */ if ( cmd->sbc_groups ) { - const union value *gv = case_data (c, indep_var->fv); - if ( value_is_missing(&indep_var->miss, gv) ) - { - return 0; - } + if ( casefilter_variable_missing (filter, c, indep_var) ) + return 0; } - - for(i=0; i< cmd->n_variables ; ++i) + for(i = 0; i < cmd->n_variables ; ++i) { - struct group_statistics *gs; - struct variable *v = cmd->v_variables[i]; - const union value *val = case_data (c, v->fv); - - gs= &group_proc_get (cmd->v_variables[i])->ugs; + const struct variable *v = cmd->v_variables[i]; - if (! value_is_missing(&v->miss, val) ) + if (! casefilter_variable_missing (filter, c, v) ) { - gs->n+=weight; - gs->sum+=weight * val->f; - gs->ssq+=weight * val->f * val->f; + struct group_statistics *gs; + const union value *val = case_data (c, v); + gs = &group_proc_get (cmd->v_variables[i])->ugs; + + gs->n += weight; + gs->sum += weight * val->f; + gs->ssq += weight * val->f * val->f; } } return 0; @@ -1476,11 +1464,10 @@ common_precalc ( struct cmd_t_test *cmd ) /* Post calculations common to all variants of the T test */ void -common_postcalc ( struct cmd_t_test *cmd ) +common_postcalc (struct cmd_t_test *cmd) { int i=0; - for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; @@ -1503,38 +1490,26 @@ common_postcalc ( struct cmd_t_test *cmd ) /* Per case calculations for one sample t test */ static int -one_sample_calc (const struct ccase *c, void *cmd_) +one_sample_calc (const struct dictionary *dict, + const struct ccase *c, void *cmd_, + const struct casefilter *filter) { int i; - struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_; + struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_; - double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); + double weight = dict_get_case_weight (dict, c, &bad_weight_warn); - /* Skip the entire case if /MISSING=LISTWISE is set */ - if ( cmd->miss == TTS_LISTWISE ) - { - for(i=0; i< cmd->n_variables ; ++i) - { - struct variable *v = cmd->v_variables[i]; - const union value *val = case_data (c, v->fv); - - if (value_is_missing(&v->miss, val) ) - { - return 0; - } - } - } for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - struct variable *v = cmd->v_variables[i]; - const union value *val = case_data (c, v->fv); + const struct variable *v = cmd->v_variables[i]; + const union value *val = case_data (c, v); gs= &group_proc_get (cmd->v_variables[i])->ugs; - - if ( ! value_is_missing(&v->miss, val)) + + if ( ! casefilter_variable_missing (filter, c, v)) gs->sum_diff += weight * (val->f - cmd->n_testval[0]); } @@ -1593,57 +1568,36 @@ paired_precalc (struct cmd_t_test *cmd UNUSED) static int -paired_calc (const struct ccase *c, void *cmd_) +paired_calc (const struct dictionary *dict, const struct ccase *c, + struct cmd_t_test *cmd UNUSED, const struct casefilter *filter) { int i; - struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_; - - double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); - - /* Skip the entire case if /MISSING=LISTWISE is set , - AND one member of a pair is missing */ - if ( cmd->miss == TTS_LISTWISE ) - { - for(i=0; i < n_pairs ; ++i ) - { - struct variable *v0 = pairs[i].v[0]; - struct variable *v1 = pairs[i].v[1]; - - const union value *val0 = case_data (c, v0->fv); - const union value *val1 = case_data (c, v1->fv); - - if ( value_is_missing(&v0->miss, val0) || - value_is_missing(&v1->miss, val1) ) - { - return 0; - } - } - } + double weight = dict_get_case_weight (dict, c, &bad_weight_warn); for(i=0; i < n_pairs ; ++i ) { - struct variable *v0 = pairs[i].v[0]; - struct variable *v1 = pairs[i].v[1]; + const struct variable *v0 = pairs[i].v[0]; + const struct variable *v1 = pairs[i].v[1]; - const union value *val0 = case_data (c, v0->fv); - const union value *val1 = case_data (c, v1->fv); + const union value *val0 = case_data (c, v0); + const union value *val1 = case_data (c, v1); - if ( ( !value_is_missing(&v0->miss, val0) - && !value_is_missing(&v1->miss, val1) ) ) - { - pairs[i].n += weight; - pairs[i].sum[0] += weight * val0->f; - pairs[i].sum[1] += weight * val1->f; + if ( ! casefilter_variable_missing (filter, c, v0) && + ! casefilter_variable_missing (filter, c, v1) ) + { + pairs[i].n += weight; + pairs[i].sum[0] += weight * val0->f; + pairs[i].sum[1] += weight * val1->f; - pairs[i].ssq[0] += weight * pow2(val0->f); - pairs[i].ssq[1] += weight * pow2(val1->f); + pairs[i].ssq[0] += weight * pow2(val0->f); + pairs[i].ssq[1] += weight * pow2(val1->f); - pairs[i].sum_of_prod += weight * val0->f * val1->f ; + pairs[i].sum_of_prod += weight * val0->f * val1->f ; - pairs[i].sum_of_diffs += weight * ( val0->f - val1->f ) ; - pairs[i].ssq_diffs += weight * pow2(val0->f - val1->f); - } + pairs[i].sum_of_diffs += weight * ( val0->f - val1->f ) ; + pairs[i].ssq_diffs += weight * pow2(val0->f - val1->f); + } } return 0; @@ -1701,7 +1655,7 @@ group_precalc (struct cmd_t_test *cmd ) /* There's always 2 groups for a T - TEST */ ttpr->n_groups = 2; - gp.indep_width = indep_var->width; + gp.indep_width = var_get_width (indep_var); ttpr->group_hash = hsh_create(2, (hsh_compare_func *) compare_group_binary, @@ -1738,39 +1692,26 @@ group_precalc (struct cmd_t_test *cmd ) } static int -group_calc (const struct ccase *c, struct cmd_t_test *cmd) +group_calc (const struct dictionary *dict, + const struct ccase *c, struct cmd_t_test *cmd, + const struct casefilter *filter) { int i; - const union value *gv = case_data (c, indep_var->fv); + const double weight = + dict_get_case_weight (dict, c, &bad_weight_warn); - const double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); + const union value *gv; - if ( value_is_missing(&indep_var->miss, gv) ) - { - return 0; - } - - if ( cmd->miss == TTS_LISTWISE ) - { - for(i=0; i< cmd->n_variables ; ++i) - { - struct variable *v = cmd->v_variables[i]; - const union value *val = case_data (c, v->fv); - - if (value_is_missing(&v->miss, val) ) - { - return 0; - } - } - } + if ( casefilter_variable_missing (filter, c, indep_var)) + return 0; - gv = case_data (c, indep_var->fv); + gv = case_data (c, indep_var); for(i=0; i< cmd->n_variables ; ++i) { - struct variable *var = cmd->v_variables[i]; - const union value *val = case_data (c, var->fv); + const struct variable *var = cmd->v_variables[i]; + const union value *val = case_data (c, var); struct hsh_table *grp_hash = group_proc_get (var)->group_hash; struct group_statistics *gs; @@ -1781,11 +1722,11 @@ group_calc (const struct ccase *c, struct cmd_t_test *cmd) if ( ! gs ) return 0; - if ( !value_is_missing(&var->miss, val) ) + if ( ! casefilter_variable_missing (filter, c, var) ) { - gs->n+=weight; - gs->sum+=weight * val->f; - gs->ssq+=weight * pow2(val->f); + gs->n += weight; + gs->sum += weight * val->f; + gs->ssq += weight * pow2(val->f); } } @@ -1798,9 +1739,9 @@ group_postcalc ( struct cmd_t_test *cmd ) { int i; - for(i=0; i< cmd->n_variables ; ++i) + for (i = 0; i < cmd->n_variables ; ++i) { - struct variable *var = cmd->v_variables[i]; + const struct variable *var = cmd->v_variables[i]; struct hsh_table *grp_hash = group_proc_get (var)->group_hash; struct hsh_iterator g; struct group_statistics *gs; @@ -1831,8 +1772,10 @@ group_postcalc ( struct cmd_t_test *cmd ) static bool -calculate(const struct casefile *cf, void *cmd_) +calculate(const struct ccase *first, const struct casefile *cf, + void *cmd_, const struct dataset *ds) { + const struct dictionary *dict = dataset_dict (ds); struct ssbox stat_summary_box; struct trbox test_results_box; @@ -1841,60 +1784,70 @@ calculate(const struct casefile *cf, void *cmd_) struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_; - common_precalc(cmd); - for(r = casefile_get_reader (cf); + struct casefilter *filter = casefilter_create ((cmd->miss != TTS_INCLUDE + ? MV_ANY : MV_SYSTEM), + NULL, 0); + + if ( cmd->miss == TTS_LISTWISE ) + casefilter_add_variables (filter, + cmd->v_variables, cmd->n_variables); + + output_split_file_values (ds, first); + common_precalc (cmd); + for(r = casefile_get_reader (cf, filter); casereader_read (r, &c) ; case_destroy (&c)) { - common_calc(&c,cmd); + common_calc (dict, &c, cmd, filter); } + casereader_destroy (r); - common_postcalc(cmd); + common_postcalc (cmd); switch(mode) { case T_1_SAMPLE: - one_sample_precalc(cmd); - for(r = casefile_get_reader (cf); + one_sample_precalc (cmd); + for(r = casefile_get_reader (cf, filter); casereader_read (r, &c) ; case_destroy (&c)) { - one_sample_calc(&c,cmd); + one_sample_calc (dict, &c, cmd, filter); } casereader_destroy (r); - one_sample_postcalc(cmd); - + one_sample_postcalc (cmd); break; case T_PAIRED: paired_precalc(cmd); - for(r = casefile_get_reader (cf); + for(r = casefile_get_reader (cf, filter); casereader_read (r, &c) ; case_destroy (&c)) { - paired_calc(&c,cmd); + paired_calc (dict, &c, cmd, filter); } casereader_destroy (r); - paired_postcalc(cmd); + paired_postcalc (cmd); break; case T_IND_SAMPLES: group_precalc(cmd); - for(r = casefile_get_reader (cf); + for(r = casefile_get_reader (cf, filter); casereader_read (r, &c) ; case_destroy (&c)) { - group_calc(&c,cmd); + group_calc (dict, &c, cmd, filter); } casereader_destroy (r); group_postcalc(cmd); - levene(cf, indep_var, cmd->n_variables, cmd->v_variables, - (cmd->miss == TTS_LISTWISE)?LEV_LISTWISE:LEV_ANALYSIS , - value_is_missing); + levene (dict, cf, indep_var, cmd->n_variables, cmd->v_variables, + filter); break; } + casefilter_destroy (filter); + ssbox_create(&stat_summary_box,cmd,mode); ssbox_populate(&stat_summary_box,cmd); ssbox_finalize(&stat_summary_box); @@ -1963,7 +1916,7 @@ hash_group_binary(const struct group_statistics *g, flag = which_group(g,p); } else - assert(0); + NOT_REACHED (); return flag; }