X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Ft-test.q;h=d5349dddf6f1a9b0431ef70c59266f9de0a757e5;hb=ef05645131b8163a4bb9839e609bb76d553b80e9;hp=0f4133a6c70577588637f88e7ac0ceb9cc776390;hpb=b9e28aa5614a079548c616bcf97aa804024ad647;p=pspp-builds.git diff --git a/src/t-test.q b/src/t-test.q index 0f4133a6..d5349ddd 100644 --- a/src/t-test.q +++ b/src/t-test.q @@ -27,7 +27,9 @@ #include #include "alloc.h" #include "str.h" +#include "case.h" #include "command.h" +#include "dictionary.h" #include "lexer.h" #include "error.h" #include "magic.h" @@ -38,13 +40,15 @@ #include "var.h" #include "vfm.h" #include "hash.h" -#include "t-test.h" +#include "group_proc.h" +#include "casefile.h" #include "levene.h" +/* (headers) */ /* (specification) "T-TEST" (tts_): +groups=custom; - +testval=double; + testval=double; variables=varlist("PV_NO_SCRATCH | PV_NUMERIC"); pairs=custom; +missing=miss:!analysis/listwise, @@ -56,7 +60,7 @@ /* (functions) */ -static struct cmd_t_test cmd; + /* Function to use for testing for missing values */ static is_missing_func value_is_missing; @@ -64,12 +68,34 @@ static is_missing_func value_is_missing; /* Variable for the GROUPS subcommand, if given. */ static struct variable *indep_var; -/* GROUPS: Number of values specified by the user; the values - specified if any. */ +enum comparison + { + CMP_LE = -2, + CMP_EQ = 0, + }; -static int n_group_values; -static union value groups_values[2]; -static enum comparison criteria[2]; +struct group_properties +{ + /* The comparison criterion */ + enum comparison criterion; + + /* The width of the independent variable */ + int indep_width ; + + union { + /* The value of the independent variable at which groups are determined to + belong to one group or the other */ + double critical_value; + + + /* The values of the independent variable for each group */ + union value g_value[2]; + } v ; + +}; + + +static struct group_properties gp ; @@ -181,35 +207,48 @@ enum { }; -static int common_calc (struct ccase *, void *); -static void common_precalc (void *); -static void common_postcalc (void *); +static int common_calc (const struct ccase *, void *); +static void common_precalc (struct cmd_t_test *); +static void common_postcalc (struct cmd_t_test *); -static int one_sample_calc (struct ccase *, void *); -static void one_sample_precalc (void *); -static void one_sample_postcalc (void *); +static int one_sample_calc (const struct ccase *, void *); +static void one_sample_precalc (struct cmd_t_test *); +static void one_sample_postcalc (struct cmd_t_test *); -static int paired_calc (struct ccase *, void *); -static void paired_precalc (void *); -static void paired_postcalc (void *); +static int paired_calc (const struct ccase *, void *); +static void paired_precalc (struct cmd_t_test *); +static void paired_postcalc (struct cmd_t_test *); -static void group_precalc (void *); -static int group_calc (struct ccase *, void *); -static void group_postcalc (void *); +static void group_precalc (struct cmd_t_test *); +static int group_calc (const struct ccase *, struct cmd_t_test *); +static void group_postcalc (struct cmd_t_test *); static int compare_var_name (const void *a_, const void *b_, void *v_ UNUSED); static unsigned hash_var_name (const void *a_, void *v_ UNUSED); +static void calculate(const struct casefile *cf, void *_mode); + +static int mode; + +static struct cmd_t_test cmd; + +static int bad_weight_warn; + + +static int compare_group_binary(const struct group_statistics *a, + const struct group_statistics *b, + struct group_properties *p); + + +static unsigned hash_group_binary(const struct group_statistics *g, + struct group_properties *p); + int cmd_t_test(void) { - int mode; - - struct ssbox stat_summary_box; - struct trbox test_results_box; if ( !parse_t_test(&cmd) ) return CMD_FAILURE; @@ -291,36 +330,9 @@ cmd_t_test(void) else value_is_missing = is_missing; - procedure_with_splits (common_precalc, common_calc, common_postcalc, NULL); - - switch(mode) - { - case T_1_SAMPLE: - procedure_with_splits (one_sample_precalc, one_sample_calc, - one_sample_postcalc, NULL); - break; - case T_PAIRED: - procedure_with_splits (paired_precalc, paired_calc, paired_postcalc, - NULL); - break; - case T_IND_SAMPLES: - procedure_with_splits(group_precalc,group_calc,group_postcalc, NULL); - levene(indep_var, cmd.n_variables, cmd.v_variables, - (cmd.miss == TTS_LISTWISE)?LEV_LISTWISE:LEV_ANALYSIS , - value_is_missing); - break; - } + bad_weight_warn = 1; - ssbox_create(&stat_summary_box,&cmd,mode); - ssbox_populate(&stat_summary_box,&cmd); - ssbox_finalize(&stat_summary_box); - - if ( mode == T_PAIRED) - pscbox(); - - trbox_create(&test_results_box,&cmd,mode); - trbox_populate(&test_results_box,&cmd); - trbox_finalize(&test_results_box); + multipass_procedure_with_splits (calculate, &cmd); n_pairs=0; free(pairs); @@ -328,11 +340,12 @@ cmd_t_test(void) if ( mode == T_IND_SAMPLES) { - int i; + int v; /* Destroy any group statistics we created */ - for (i= 0 ; i < cmd.n_variables ; ++i ) + for (v = 0 ; v < cmd.n_variables ; ++v ) { - free(cmd.v_variables[i]->p.t_t.gs); + struct group_proc *grpp = group_proc_get (cmd.v_variables[v]); + free(grpp->group_hash); } } @@ -342,6 +355,7 @@ cmd_t_test(void) static int tts_custom_groups (struct cmd_t_test *cmd UNUSED) { + int n_group_values=0; lex_match('='); @@ -371,10 +385,13 @@ tts_custom_groups (struct cmd_t_test *cmd UNUSED) { if (indep_var->type == NUMERIC) { - groups_values[0].f = 1; - groups_values[1].f = 2; - criteria[0] = criteria[1] = CMP_EQ; + gp.v.g_value[0].f = 1; + gp.v.g_value[1].f = 2; + + gp.criterion = CMP_EQ; + n_group_values = 2; + return 1; } else @@ -385,27 +402,32 @@ tts_custom_groups (struct cmd_t_test *cmd UNUSED) } } - if (!parse_value (&groups_values[0],indep_var->type)) + if (!parse_value (&gp.v.g_value[0],indep_var->type)) return 0; lex_match (','); if (lex_match (')')) { - criteria[0] = CMP_LE; - criteria[1] = CMP_GT; - groups_values[1] = groups_values[0]; + gp.criterion = CMP_LE; + gp.v.critical_value = gp.v.g_value[0].f; + n_group_values = 1; return 1; } - if (!parse_value (&groups_values[1],indep_var->type)) + if (!parse_value (&gp.v.g_value[1],indep_var->type)) return 0; - + n_group_values = 2; if (!lex_force_match (')')) return 0; - criteria[0] = criteria[1] = CMP_EQ; + if ( n_group_values == 2 ) + gp.criterion = CMP_EQ ; + else + gp.criterion = CMP_LE ; + + return 1; } @@ -536,6 +558,7 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) n_pairs+=n_pairs_local; + free (vars); return 1; } @@ -554,7 +577,7 @@ parse_value (union value * v, int type ) { if (!lex_force_string ()) return 0; - strncpy (v->s, ds_value (&tokstr), ds_length (&tokstr)); + strncpy (v->s, ds_c_str (&tokstr), ds_length (&tokstr)); } lex_get (); @@ -578,6 +601,7 @@ void ssbox_independent_samples_init(struct ssbox *this, void ssbox_paired_init(struct ssbox *this, struct cmd_t_test *cmd); + /* Factory to create an ssbox */ void ssbox_create(struct ssbox *ssb, struct cmd_t_test *cmd, int mode) @@ -599,6 +623,7 @@ ssbox_create(struct ssbox *ssb, struct cmd_t_test *cmd, int mode) } + /* Despatcher for the populate method */ void ssbox_populate(struct ssbox *ssb,struct cmd_t_test *cmd) @@ -622,6 +647,8 @@ ssbox_base_finalize(struct ssbox *ssb) tab_submit(ssb->t); } + + /* Initialize a ssbox struct */ void ssbox_base_init(struct ssbox *this, int cols,int rows) @@ -651,7 +678,7 @@ ssbox_one_sample_init(struct ssbox *this, ssbox_base_init(this, hsize,vsize); tab_title (this->t, 0, _("One-Sample Statistics")); - tab_vline(this->t, TAL_2, 1,0,vsize); + tab_vline(this->t, TAL_2, 1,0,vsize - 1); tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, _("N")); tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); @@ -673,7 +700,7 @@ ssbox_independent_samples_init(struct ssbox *this, ssbox_base_init(this, hsize,vsize); tab_title (this->t, 0, _("Group Statistics")); - tab_vline(this->t,0,1,0,vsize); + tab_vline(this->t,0,1,0,vsize - 1); tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, indep_var->name); tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("N")); tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean")); @@ -691,31 +718,41 @@ ssbox_independent_samples_populate(struct ssbox *ssb, char *val_lab0=0; char *val_lab1=0; + double indep_value[2]; char prefix[2][3]={"",""}; if ( indep_var->type == NUMERIC ) { - val_lab0 = val_labs_find( indep_var->val_labs,groups_values[0]); - val_lab1 = val_labs_find( indep_var->val_labs,groups_values[1]); + val_lab0 = val_labs_find( indep_var->val_labs,gp.v.g_value[0]); + val_lab1 = val_labs_find( indep_var->val_labs,gp.v.g_value[1]); } else { - val_lab0 = groups_values[0].s; - val_lab1 = groups_values[1].s; + val_lab0 = gp.v.g_value[0].s; + val_lab1 = gp.v.g_value[1].s; } - if (n_group_values == 1) + if (gp.criterion == CMP_LE ) { strcpy(prefix[0],"< "); strcpy(prefix[1],">="); + indep_value[0] = gp.v.critical_value; + indep_value[1] = gp.v.critical_value; + } + else + { + indep_value[0] = gp.v.g_value[0].f; + indep_value[1] = gp.v.g_value[1].f; } assert(ssb->t); for (i=0; i < cmd->n_variables; ++i) { - int g; + struct variable *var = cmd->v_variables[i]; + struct hsh_table *grp_hash = group_proc_get (var)->group_hash; + int count=0; tab_text (ssb->t, 0, i*2+1, TAB_LEFT, cmd->v_variables[i]->name); @@ -723,26 +760,50 @@ ssbox_independent_samples_populate(struct ssbox *ssb, tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF, "%s%s", prefix[0], val_lab0); else - tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF, - "%s%g", prefix[0], groups_values[0].f); + tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF, + "%s%g", prefix[0], indep_value[0]); if (val_lab1) tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF, "%s%s", prefix[1], val_lab1); else - tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF, - "%s%g", prefix[1], groups_values[1].f); + tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF, + "%s%g", prefix[1], indep_value[1]); + /* Fill in the group statistics */ - for ( g=0; g < 2 ; ++g ) + for ( count = 0 ; count < 2 ; ++count ) { - struct group_statistics *gs = &cmd->v_variables[i]->p.t_t.gs[g]; + union value search_val; + + struct group_statistics *gs; - tab_float(ssb->t, 2 ,i*2+g+1, TAB_RIGHT, gs->n, 2, 0); - tab_float(ssb->t, 3 ,i*2+g+1, TAB_RIGHT, gs->mean, 8, 2); - tab_float(ssb->t, 4 ,i*2+g+1, TAB_RIGHT, gs->std_dev, 8, 3); - tab_float(ssb->t, 5 ,i*2+g+1, TAB_RIGHT, gs->se_mean, 8, 3); + if ( gp.criterion == CMP_LE ) + { + if ( count == 0 ) + { + /* less than ( < ) case */ + search_val.f = gp.v.critical_value - 1.0; + } + else + { + /* >= case */ + search_val.f = gp.v.critical_value + 1.0; + } + } + else + { + search_val = gp.v.g_value[count]; + } + + gs = hsh_find(grp_hash, (void *) &search_val); + assert(gs); + + tab_float(ssb->t, 2 ,i*2+count+1, TAB_RIGHT, gs->n, 2, 0); + tab_float(ssb->t, 3 ,i*2+count+1, TAB_RIGHT, gs->mean, 8, 2); + tab_float(ssb->t, 4 ,i*2+count+1, TAB_RIGHT, gs->std_dev, 8, 3); + tab_float(ssb->t, 5 ,i*2+count+1, TAB_RIGHT, gs->se_mean, 8, 3); } } } @@ -790,7 +851,7 @@ ssbox_paired_populate(struct ssbox *ssb,struct cmd_t_test *cmd UNUSED) { struct group_statistics *gs; - gs=&pairs[i].v[j]->p.t_t.ugs; + gs = &group_proc_get (pairs[i].v[j])->ugs; /* Titles */ @@ -816,8 +877,7 @@ ssbox_one_sample_populate(struct ssbox *ssb, struct cmd_t_test *cmd) for (i=0; i < cmd->n_variables; ++i) { - struct group_statistics *gs; - gs= &cmd->v_variables[i]->p.t_t.ugs; + struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs; tab_text (ssb->t, 0, i+1, TAB_LEFT, cmd->v_variables[i]->name); tab_float (ssb->t,1, i+1, TAB_RIGHT, gs->n, 2, 0); @@ -925,8 +985,8 @@ trbox_independent_samples_init(struct trbox *self, tab_text(self->t,10,2, TAB_CENTER | TAT_TITLE,_("Upper")); tab_joint_text(self->t, 9, 1, 10, 1, TAB_CENTER | TAT_PRINTF, - _("%d%% Confidence Interval of the Difference"), - (int)round(cmd->criteria*100.0)); + _("%g%% Confidence Interval of the Difference"), + cmd->criteria*100.0); } @@ -951,21 +1011,43 @@ trbox_independent_samples_populate(struct trbox *self, double std_err_diff; double mean_diff; - struct group_statistics *gs0 = &cmd->v_variables[i]->p.t_t.gs[0]; - struct group_statistics *gs1 = &cmd->v_variables[i]->p.t_t.gs[1]; + struct variable *var = cmd->v_variables[i]; + struct group_proc *grp_data = group_proc_get (var); + + struct hsh_table *grp_hash = grp_data->group_hash; + + struct group_statistics *gs0 ; + struct group_statistics *gs1 ; + + union value search_val; + + if ( gp.criterion == CMP_LE ) + search_val.f = gp.v.critical_value - 1.0; + else + search_val = gp.v.g_value[0]; + + gs0 = hsh_find(grp_hash, (void *) &search_val); + assert(gs0); + + if ( gp.criterion == CMP_LE ) + search_val.f = gp.v.critical_value + 1.0; + else + search_val = gp.v.g_value[1]; + + gs1 = hsh_find(grp_hash, (void *) &search_val); + assert(gs1); + tab_text (self->t, 0, i*2+3, TAB_LEFT, cmd->v_variables[i]->name); tab_text (self->t, 1, i*2+3, TAB_LEFT, _("Equal variances assumed")); - tab_float(self->t, 2, i*2+3, TAB_CENTER, - cmd->v_variables[i]->p.t_t.levene, 8,3); - + tab_float(self->t, 2, i*2+3, TAB_CENTER, grp_data->levene, 8,3); /* Now work out the significance of the Levene test */ - df1 = 1; df2 = cmd->v_variables[i]->p.t_t.ugs.n - 2; - q = gsl_cdf_fdist_Q(cmd->v_variables[i]->p.t_t.levene, df1, df2); + df1 = 1; df2 = grp_data->ugs.n - 2; + q = gsl_cdf_fdist_Q(grp_data->levene, df1, df2); tab_float(self->t, 3, i*2+3, TAB_CENTER, q, 8,3 ); @@ -999,7 +1081,6 @@ trbox_independent_samples_populate(struct trbox *self, q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ t = gsl_cdf_tdist_Qinv(q,df); - tab_float(self->t, 9, i*2+3, TAB_RIGHT, mean_diff - t * std_err_diff, 8, 3); @@ -1071,7 +1152,7 @@ trbox_paired_init(struct trbox *self, trbox_base_init(self,n_pairs,hsize); tab_title (self->t, 0, _("Paired Samples Test")); tab_hline(self->t,TAL_1,2,6,1); - tab_vline(self->t,TAL_2,2,0,vsize); + tab_vline(self->t,TAL_2,2,0,vsize - 1); tab_joint_text(self->t,2,0,6,0,TAB_CENTER,_("Paired Differences")); tab_box(self->t,-1,-1,-1,TAL_1, 2,1,6,vsize-1); tab_box(self->t,-1,-1,-1,TAL_1, 6,0,hsize-1,vsize-1); @@ -1079,8 +1160,8 @@ trbox_paired_init(struct trbox *self, tab_vline(self->t,TAL_0,6,0,1); tab_joint_text(self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF, - _("%d%% Confidence Interval of the Difference"), - (int)round(cmd->criteria*100.0)); + _("%g%% Confidence Interval of the Difference"), + cmd->criteria*100.0); tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); @@ -1165,17 +1246,17 @@ trbox_one_sample_init(struct trbox *self, struct cmd_t_test *cmd ) trbox_base_init(self, cmd->n_variables,hsize); tab_title (self->t, 0, _("One-Sample Test")); tab_hline(self->t, TAL_1, 1, hsize - 1, 1); - tab_vline(self->t, TAL_2, 1, 0, vsize); + tab_vline(self->t, TAL_2, 1, 0, vsize - 1); tab_joint_text(self->t, 1, 0, hsize-1,0, TAB_CENTER | TAT_PRINTF, - _("Test Value = %f"),cmd->n_testval); + _("Test Value = %f"), cmd->n_testval[0]); tab_box(self->t, -1, -1, -1, TAL_1, 1,1,hsize-1,vsize-1); tab_joint_text(self->t,5,1,6,1,TAB_CENTER | TAT_PRINTF, - _("%d%% Confidence Interval of the Difference"), - (int)round(cmd->criteria*100.0)); + _("%g%% Confidence Interval of the Difference"), + cmd->criteria*100.0); tab_vline(self->t,TAL_0,6,1,1); tab_hline(self->t,TAL_1,5,6,2); @@ -1202,13 +1283,12 @@ trbox_one_sample_populate(struct trbox *trb, struct cmd_t_test *cmd) double t; double p,q; double df; - struct group_statistics *gs; - gs= &cmd->v_variables[i]->p.t_t.ugs; + struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs; tab_text (trb->t, 0, i+3, TAB_LEFT, cmd->v_variables[i]->name); - t = (gs->mean - cmd->n_testval ) * sqrt(gs->n) / gs->std_dev ; + t = (gs->mean - cmd->n_testval[0] ) * sqrt(gs->n) / gs->std_dev ; tab_float (trb->t, 1, i+3, TAB_RIGHT, t, 8,3); @@ -1320,24 +1400,26 @@ pscbox(void) + /* Calculation Implementation */ /* Per case calculations common to all variants of the T test */ static int -common_calc (struct ccase *c, void *aux UNUSED) +common_calc (const struct ccase *c, void *_cmd) { int i; + struct cmd_t_test *cmd = (struct cmd_t_test *)_cmd; - double weight = dict_get_case_weight(default_dict,c); + double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); /* Skip the entire case if /MISSING=LISTWISE is set */ - if ( cmd.miss == TTS_LISTWISE ) + if ( cmd->miss == TTS_LISTWISE ) { - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { - struct variable *v = cmd.v_variables[i]; - union value *val = &c->data[v->fv]; + struct variable *v = cmd->v_variables[i]; + const union value *val = case_data (c, v->fv); if (value_is_missing(val,v) ) { @@ -1347,9 +1429,9 @@ common_calc (struct ccase *c, void *aux UNUSED) } /* Listwise has to be implicit if the independent variable is missing ?? */ - if ( cmd.sbc_groups ) + if ( cmd->sbc_groups ) { - union value *gv = &c->data[indep_var->fv]; + const union value *gv = case_data (c, indep_var->fv); if ( value_is_missing(gv,indep_var) ) { return 0; @@ -1357,13 +1439,13 @@ common_calc (struct ccase *c, void *aux UNUSED) } - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - struct variable *v = cmd.v_variables[i]; - union value *val = &c->data[v->fv]; + struct variable *v = cmd->v_variables[i]; + const union value *val = case_data (c, v->fv); - gs= &cmd.v_variables[i]->p.t_t.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; if (! value_is_missing(val,v) ) { @@ -1377,14 +1459,14 @@ common_calc (struct ccase *c, void *aux UNUSED) /* Pre calculations common to all variants of the T test */ static void -common_precalc (void *aux UNUSED) +common_precalc ( struct cmd_t_test *cmd ) { int i=0; - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - gs= &cmd.v_variables[i]->p.t_t.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; gs->sum=0; gs->n=0; @@ -1395,14 +1477,15 @@ common_precalc (void *aux UNUSED) /* Post calculations common to all variants of the T test */ void -common_postcalc (void *aux UNUSED) +common_postcalc ( struct cmd_t_test *cmd ) { int i=0; - for(i=0; i< cmd.n_variables ; ++i) + + for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - gs= &cmd.v_variables[i]->p.t_t.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; gs->mean=gs->sum / gs->n; gs->s_std_dev= sqrt( @@ -1421,19 +1504,21 @@ common_postcalc (void *aux UNUSED) /* Per case calculations for one sample t test */ static int -one_sample_calc (struct ccase *c, void *aux UNUSED) +one_sample_calc (const struct ccase *c, void *cmd_) { int i; + struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_; + - double weight = dict_get_case_weight(default_dict,c); + double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); /* Skip the entire case if /MISSING=LISTWISE is set */ - if ( cmd.miss == TTS_LISTWISE ) + if ( cmd->miss == TTS_LISTWISE ) { - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { - struct variable *v = cmd.v_variables[i]; - union value *val = &c->data[v->fv]; + struct variable *v = cmd->v_variables[i]; + const union value *val = case_data (c, v->fv); if (value_is_missing(val,v) ) { @@ -1442,16 +1527,16 @@ one_sample_calc (struct ccase *c, void *aux UNUSED) } } - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - struct variable *v = cmd.v_variables[i]; - union value *val = &c->data[v->fv]; + struct variable *v = cmd->v_variables[i]; + const union value *val = case_data (c, v->fv); - gs= &cmd.v_variables[i]->p.t_t.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; if ( ! value_is_missing(val,v)) - gs->sum_diff += weight * (val->f - cmd.n_testval); + gs->sum_diff += weight * (val->f - cmd->n_testval[0]); } return 0; @@ -1459,14 +1544,14 @@ one_sample_calc (struct ccase *c, void *aux UNUSED) /* Pre calculations for one sample t test */ static void -one_sample_precalc (void *aux UNUSED) +one_sample_precalc ( struct cmd_t_test *cmd ) { - int i=0; - - for(i=0; i< cmd.n_variables ; ++i) + int i=0; + + for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - gs= &cmd.v_variables[i]->p.t_t.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; gs->sum_diff=0; } @@ -1474,16 +1559,15 @@ one_sample_precalc (void *aux UNUSED) /* Post calculations for one sample t test */ static void -one_sample_postcalc (void *aux UNUSED) +one_sample_postcalc (struct cmd_t_test *cmd) { int i=0; - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - gs= &cmd.v_variables[i]->p.t_t.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; - gs->mean_diff = gs->sum_diff / gs->n ; } } @@ -1510,7 +1594,7 @@ hash_var_name (const void *a_, void *v_ UNUSED) static void -paired_precalc (void *aux UNUSED) +paired_precalc (struct cmd_t_test *cmd UNUSED) { int i; @@ -1529,23 +1613,25 @@ paired_precalc (void *aux UNUSED) static int -paired_calc (struct ccase *c, void *aux UNUSED) +paired_calc (const struct ccase *c, void *cmd_) { int i; - double weight = dict_get_case_weight(default_dict,c); + struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_; + + double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); /* Skip the entire case if /MISSING=LISTWISE is set , AND one member of a pair is missing */ - if ( cmd.miss == TTS_LISTWISE ) + if ( cmd->miss == TTS_LISTWISE ) { for(i=0; i < n_pairs ; ++i ) { struct variable *v0 = pairs[i].v[0]; struct variable *v1 = pairs[i].v[1]; - union value *val0 = &c->data[v0->fv]; - union value *val1 = &c->data[v1->fv]; + const union value *val0 = case_data (c, v0->fv); + const union value *val1 = case_data (c, v1->fv); if ( value_is_missing(val0,v0) || value_is_missing(val1,v1) ) @@ -1560,8 +1646,8 @@ paired_calc (struct ccase *c, void *aux UNUSED) struct variable *v0 = pairs[i].v[0]; struct variable *v1 = pairs[i].v[1]; - union value *val0 = &c->data[v0->fv]; - union value *val1 = &c->data[v1->fv]; + const union value *val0 = case_data (c, v0->fv); + const union value *val1 = case_data (c, v1->fv); if ( ( !value_is_missing(val0,v0) && !value_is_missing(val1,v1) ) ) { @@ -1583,7 +1669,7 @@ paired_calc (struct ccase *c, void *aux UNUSED) } static void -paired_postcalc (void *aux UNUSED) +paired_postcalc (struct cmd_t_test *cmd UNUSED) { int i; @@ -1621,94 +1707,76 @@ paired_postcalc (void *aux UNUSED) } } -/* Return the group # corresponding to the - independent variable with the value val -*/ -static int -get_group(const union value *val, struct variable *indep) -{ - int i; - - for (i = 0; i < 2 ; ++i ) - { - const int cmp = compare_values(val,&groups_values[i],indep->width) ; - switch ( criteria[i]) - { - case CMP_EQ: - if ( 0 == cmp ) return i; - break; - case CMP_LT: - if ( 0 > cmp ) return i; - break; - case CMP_LE: - if ( cmp <= 0 ) return i; - break; - case CMP_GT: - if ( cmp > 0 ) return i; - break; - case CMP_GE: - if ( cmp >= 0 ) return i; - break; - default: - assert(0); - }; - } - - /* No groups matched */ - return -1; -} - - static void -group_precalc (void *aux UNUSED) +group_precalc (struct cmd_t_test *cmd ) { int i; int j; - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { - struct t_test_proc *ttpr = &cmd.v_variables[i]->p.t_t; + struct group_proc *ttpr = group_proc_get (cmd->v_variables[i]); /* There's always 2 groups for a T - TEST */ ttpr->n_groups = 2; - ttpr->gs = xmalloc(sizeof(struct group_statistics) * 2) ; + + gp.indep_width = indep_var->width; + + ttpr->group_hash = hsh_create(2, + (hsh_compare_func *) compare_group_binary, + (hsh_hash_func *) hash_group_binary, + (hsh_free_func *) free_group, + (void *) &gp ); for (j=0 ; j < 2 ; ++j) { - ttpr->gs[j].sum = 0; - ttpr->gs[j].n = 0; - ttpr->gs[j].ssq = 0; + + struct group_statistics *gs = (struct group_statistics *) + xmalloc (sizeof(struct group_statistics)); + + gs->sum = 0; + gs->n = 0; + gs->ssq = 0; - if ( n_group_values == 2 ) - ttpr->gs[j].id = groups_values[j]; + if ( gp.criterion == CMP_EQ ) + { + gs->id = gp.v.g_value[j]; + } else - ttpr->gs[j].id = groups_values[0]; - ttpr->gs[j].criterion = criteria[j]; + { + if ( j == 0 ) + gs->id.f = gp.v.critical_value - 1.0 ; + else + gs->id.f = gp.v.critical_value + 1.0 ; + } + + hsh_insert ( ttpr->group_hash, (void *) gs ); + } } } static int -group_calc (struct ccase *c, void *aux UNUSED) +group_calc (const struct ccase *c, struct cmd_t_test *cmd) { int i; - int g; - union value *gv = &c->data[indep_var->fv]; - double weight = dict_get_case_weight(default_dict,c); + const union value *gv = case_data (c, indep_var->fv); + + const double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); if ( value_is_missing(gv,indep_var) ) { return 0; } - if ( cmd.miss == TTS_LISTWISE ) + if ( cmd->miss == TTS_LISTWISE ) { - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { - struct variable *v = cmd.v_variables[i]; - union value *val = &c->data[v->fv]; + struct variable *v = cmd->v_variables[i]; + const union value *val = case_data (c, v->fv); if (value_is_missing(val,v) ) { @@ -1717,23 +1785,21 @@ group_calc (struct ccase *c, void *aux UNUSED) } } + gv = case_data (c, indep_var->fv); - gv = &c->data[indep_var->fv]; - - g = get_group(gv,indep_var); - - /* If the independent variable doesn't match either of the values - for this case then move on to the next case */ - if (g == -1 ) - return 0; - - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { - struct variable *var = cmd.v_variables[i]; + struct variable *var = cmd->v_variables[i]; + const union value *val = case_data (c, var->fv); + struct hsh_table *grp_hash = group_proc_get (var)->group_hash; + struct group_statistics *gs; - struct group_statistics *gs = &var->p.t_t.gs[g]; + gs = hsh_find(grp_hash, (void *) gv); - union value *val=&c->data[var->fv]; + /* If the independent variable doesn't match either of the values + for this case then move on to the next case */ + if ( ! gs ) + return 0; if ( !value_is_missing(val,var) ) { @@ -1748,31 +1814,175 @@ group_calc (struct ccase *c, void *aux UNUSED) static void -group_postcalc (void *aux UNUSED) +group_postcalc ( struct cmd_t_test *cmd ) { int i; - int j; - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { - for (j=0 ; j < 2 ; ++j) - { - struct group_statistics *gs; - gs=&cmd.v_variables[i]->p.t_t.gs[j]; + struct variable *var = cmd->v_variables[i]; + struct hsh_table *grp_hash = group_proc_get (var)->group_hash; + struct hsh_iterator g; + struct group_statistics *gs; + int count=0; + for (gs = hsh_first (grp_hash,&g); + gs != 0; + gs = hsh_next(grp_hash,&g)) + { gs->mean = gs->sum / gs->n; gs->s_std_dev= sqrt( - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) - ) ; + ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) + ) ; gs->std_dev= sqrt( - gs->n/(gs->n-1) * - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) - ) ; + gs->n/(gs->n-1) * + ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) + ) ; gs->se_mean = gs->std_dev / sqrt(gs->n); + count ++; + } + assert(count == 2); + } +} + + + +static void +calculate(const struct casefile *cf, void *cmd_) +{ + struct ssbox stat_summary_box; + struct trbox test_results_box; + + struct casereader *r; + struct ccase c; + + struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_; + + common_precalc(cmd); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; + case_destroy (&c)) + { + common_calc(&c,cmd); + } + casereader_destroy (r); + common_postcalc(cmd); + + switch(mode) + { + case T_1_SAMPLE: + one_sample_precalc(cmd); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; + case_destroy (&c)) + { + one_sample_calc(&c,cmd); } + casereader_destroy (r); + one_sample_postcalc(cmd); + + break; + case T_PAIRED: + paired_precalc(cmd); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; + case_destroy (&c)) + { + paired_calc(&c,cmd); + } + casereader_destroy (r); + paired_postcalc(cmd); + + break; + case T_IND_SAMPLES: + + group_precalc(cmd); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; + case_destroy (&c)) + { + group_calc(&c,cmd); + } + casereader_destroy (r); + group_postcalc(cmd); + + levene(cf, indep_var, cmd->n_variables, cmd->v_variables, + (cmd->miss == TTS_LISTWISE)?LEV_LISTWISE:LEV_ANALYSIS , + value_is_missing); + break; } + + ssbox_create(&stat_summary_box,cmd,mode); + ssbox_populate(&stat_summary_box,cmd); + ssbox_finalize(&stat_summary_box); + + if ( mode == T_PAIRED) + pscbox(); + + trbox_create(&test_results_box,cmd,mode); + trbox_populate(&test_results_box,cmd); + trbox_finalize(&test_results_box); + } + +/* Return -1 if the id of a is less than b; +1 if greater than and + 0 if equal */ +static int +compare_group_binary(const struct group_statistics *a, + const struct group_statistics *b, + struct group_properties *p) +{ + + short flag_a; + short flag_b; + + assert(p->indep_width == 0 ) ; + + if ( p->criterion == CMP_LE ) + { + flag_a = ( a->id.f < p->v.critical_value ) ; + flag_b = ( b->id.f < p->v.critical_value ) ; + } + else + { + flag_a = ( a->id.f == p->v.critical_value ) ; + flag_b = ( b->id.f == p->v.critical_value ) ; + } + + + if ( flag_a == flag_b) + return 0 ; + + return ( flag_a < flag_b); +} + +static unsigned +hash_group_binary(const struct group_statistics *g, struct group_properties *p) +{ + short flag = -1; + + assert(p->indep_width == 0 ) ; + + /* FIXME: should compare union values */ + if ( p->criterion == CMP_LE ) + { + flag = ( g->id.f < p->v.critical_value ) ; + } + else if ( p->criterion == CMP_EQ) + { + if ( g->id.f == p->v.g_value[0].f ) + flag = 0 ; + else if ( g->id.f == p->v.g_value[1].f ) + flag = 1; + else + flag = 2; + } + else + assert(0); + + return flag; +}