X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Ft-test.q;h=4c4135ba732d97f4f5f75c095bd03c7a3bf8da54;hb=fa33e52b61bc3cb1018ad2759880f5ecaf65653e;hp=c19a932798129f0ccfc46869e370293a5b87fcfd;hpb=41134129b55e49cbaa9713273f7f2f572872a31a;p=pspp-builds.git diff --git a/src/t-test.q b/src/t-test.q index c19a9327..4c4135ba 100644 --- a/src/t-test.q +++ b/src/t-test.q @@ -20,25 +20,27 @@ 02111-1307, USA. */ #include -#include +#include +#include "error.h" #include #include #include #include "alloc.h" #include "str.h" -#include "dcdflib/cdflib.h" #include "command.h" #include "lexer.h" #include "error.h" #include "magic.h" +#include "misc.h" #include "tab.h" #include "som.h" #include "value-labels.h" #include "var.h" #include "vfm.h" -#include "pool.h" #include "hash.h" -#include "stats.h" +#include "t-test.h" +#include "casefile.h" +#include "levene.h" /* (specification) "T-TEST" (tts_): @@ -54,19 +56,23 @@ /* (declarations) */ /* (functions) */ -static struct cmd_t_test cmd; -static struct pool *t_test_pool ; +/* Function to use for testing for missing values */ +static is_missing_func value_is_missing; /* Variable for the GROUPS subcommand, if given. */ -static struct variable *groups; +static struct variable *indep_var; /* GROUPS: Number of values specified by the user; the values specified if any. */ -static int n_groups_values; + +static int n_group_values; static union value groups_values[2]; +static enum comparison criteria[2]; + + /* PAIRS: Number of pairs to be compared ; each pair. */ static int n_pairs = 0 ; @@ -75,12 +81,34 @@ struct pair /* The variables comprising the pair */ struct variable *v[2]; + /* The number of valid variable pairs */ + double n; + + /* The sum of the members */ + double sum[2]; + + /* sum of squares of the members */ + double ssq[2]; + + /* Std deviation of the members */ + double std_dev[2]; + + + /* Sample Std deviation of the members */ + double s_std_dev[2]; + + /* The means of the members */ + double mean[2]; + /* The correlation coefficient between the variables */ double correlation; /* The sum of the differences */ double sum_of_diffs; + /* The sum of the products */ + double sum_of_prod; + /* The mean of the differences */ double mean_diff; @@ -90,12 +118,11 @@ struct pair /* The std deviation of the differences */ double std_dev_diff; }; -static struct pair *pairs=0; +static struct pair *pairs=0; static int parse_value (union value * v, int type) ; - /* Structures and Functions for the Statistics Summary Box */ struct ssbox; typedef void populate_ssbox_func(struct ssbox *ssb, @@ -122,7 +149,7 @@ void ssbox_finalize(struct ssbox *ssb); /* A function to create, populate and submit the Paired Samples Correlation box */ -void pscbox(struct cmd_t_test *cmd); +void pscbox(void); /* Structures and Functions for the Test Results Box */ @@ -155,35 +182,39 @@ enum { }; -static int common_calc (struct ccase *); -static void common_precalc (void); -static void common_postcalc (void); +static int common_calc (const struct ccase *, void *); +static void common_precalc (struct cmd_t_test *); +static void common_postcalc (struct cmd_t_test *); + +static int one_sample_calc (const struct ccase *, void *); +static void one_sample_precalc (struct cmd_t_test *); +static void one_sample_postcalc (struct cmd_t_test *); + +static int paired_calc (const struct ccase *, void *); +static void paired_precalc (struct cmd_t_test *); +static void paired_postcalc (struct cmd_t_test *); -static int one_sample_calc (struct ccase *); -static void one_sample_precalc (void); -static void one_sample_postcalc (void); +static void group_precalc (struct cmd_t_test *); +static int group_calc (const struct ccase *, struct cmd_t_test *); +static void group_postcalc (struct cmd_t_test *); -static int paired_calc (struct ccase *); -static void paired_precalc (void); -static void paired_postcalc (void); -static int compare_var_name (const void *a_, const void *b_, void *v_ unused); -static unsigned hash_var_name (const void *a_, void *v_ unused); +static int compare_var_name (const void *a_, const void *b_, void *v_ UNUSED); +static unsigned hash_var_name (const void *a_, void *v_ UNUSED); +static void calculate(const struct casefile *cf, void *_mode); + +static int mode; + +static struct cmd_t_test cmd; + +static int bad_weight_warn; int cmd_t_test(void) { - int mode; - - struct ssbox stat_summary_box; - struct trbox test_results_box; - if (!lex_force_match_id ("T")) - return CMD_FAILURE; - lex_match ('-'); - lex_match_id ("TEST"); if ( !parse_t_test(&cmd) ) return CMD_FAILURE; @@ -252,51 +283,44 @@ cmd_t_test(void) hsh_destroy(hash); } } - - - procedure(common_precalc,common_calc,common_postcalc); - - switch(mode) - { - case T_1_SAMPLE: - procedure(one_sample_precalc,one_sample_calc,one_sample_postcalc); - break; - case T_PAIRED: - procedure(paired_precalc,paired_calc,paired_postcalc); - break; - } - - - t_test_pool = pool_create (); - - ssbox_create(&stat_summary_box,&cmd,mode); - ssbox_populate(&stat_summary_box,&cmd); - ssbox_finalize(&stat_summary_box); - - if ( mode == T_PAIRED) + else if ( !cmd.sbc_variables) { - pscbox(&cmd); + msg(SE, _("One or more VARIABLES must be specified.")); + return CMD_FAILURE; } - trbox_create(&test_results_box,&cmd,mode); - trbox_populate(&test_results_box,&cmd); - trbox_finalize(&test_results_box); - pool_destroy (t_test_pool); + /* If /MISSING=INCLUDE is set, then user missing values are ignored */ + if (cmd.incl == TTS_INCLUDE ) + value_is_missing = is_system_missing; + else + value_is_missing = is_missing; - t_test_pool=0; + bad_weight_warn = 1; + multipass_procedure_with_splits (calculate, &cmd); n_pairs=0; free(pairs); pairs=0; + + if ( mode == T_IND_SAMPLES) + { + int i; + /* Destroy any group statistics we created */ + for (i= 0 ; i < cmd.n_variables ; ++i ) + { + free(cmd.v_variables[i]->p.t_t.gs); + } + } return CMD_SUCCESS; } static int -tts_custom_groups (struct cmd_t_test *cmd unused) +tts_custom_groups (struct cmd_t_test *cmd UNUSED) { + lex_match('='); if (token != T_ALL && @@ -307,27 +331,28 @@ tts_custom_groups (struct cmd_t_test *cmd unused) return 0; } - groups = parse_variable (); - if (!groups) + indep_var = parse_variable (); + if (!indep_var) { lex_error ("expecting variable name in GROUPS subcommand"); return 0; } - if (groups->type == T_STRING && groups->width > MAX_SHORT_STRING) + if (indep_var->type == T_STRING && indep_var->width > MAX_SHORT_STRING) { msg (SE, _("Long string variable %s is not valid here."), - groups->name); + indep_var->name); return 0; } if (!lex_match ('(')) { - if (groups->type == NUMERIC) + if (indep_var->type == NUMERIC) { - n_groups_values = 2; groups_values[0].f = 1; groups_values[1].f = 2; + criteria[0] = criteria[1] = CMP_EQ; + n_group_values = 2; return 1; } else @@ -338,29 +363,33 @@ tts_custom_groups (struct cmd_t_test *cmd unused) } } - if (!parse_value (&groups_values[0],groups->type)) - return 0; - n_groups_values = 1; + if (!parse_value (&groups_values[0],indep_var->type)) + return 0; lex_match (','); if (lex_match (')')) - return 1; + { + criteria[0] = CMP_LE; + criteria[1] = CMP_GT; + groups_values[1] = groups_values[0]; + n_group_values = 1; + return 1; + } - if (!parse_value (&groups_values[1],groups->type)) + if (!parse_value (&groups_values[1],indep_var->type)) return 0; - n_groups_values = 2; - + + n_group_values = 2; if (!lex_force_match (')')) return 0; + criteria[0] = criteria[1] = CMP_EQ; return 1; } - - static int -tts_custom_pairs (struct cmd_t_test *cmd unused) +tts_custom_pairs (struct cmd_t_test *cmd UNUSED) { struct variable **vars; int n_vars; @@ -623,7 +652,7 @@ ssbox_independent_samples_init(struct ssbox *this, ssbox_base_init(this, hsize,vsize); tab_title (this->t, 0, _("Group Statistics")); tab_vline(this->t,0,1,0,vsize); - tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, groups->name); + tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, indep_var->name); tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("N")); tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); @@ -638,35 +667,61 @@ ssbox_independent_samples_populate(struct ssbox *ssb, { int i; + char *val_lab0=0; char *val_lab1=0; - char *val_lab2=0; - if ( groups->type == NUMERIC ) + char prefix[2][3]={"",""}; + + if ( indep_var->type == NUMERIC ) { - val_lab1 = val_labs_find( groups->val_labs,groups_values[0]); - val_lab2 = val_labs_find( groups->val_labs,groups_values[1]); + val_lab0 = val_labs_find( indep_var->val_labs,groups_values[0]); + val_lab1 = val_labs_find( indep_var->val_labs,groups_values[1]); } else { - val_lab1 = groups_values[0].s; - val_lab2 = groups_values[1].s; + val_lab0 = groups_values[0].s; + val_lab1 = groups_values[1].s; + } + + if (n_group_values == 1) + { + strcpy(prefix[0],"< "); + strcpy(prefix[1],">="); } assert(ssb->t); for (i=0; i < cmd->n_variables; ++i) { + int g; + tab_text (ssb->t, 0, i*2+1, TAB_LEFT, cmd->v_variables[i]->name); - if (val_lab1) - tab_text (ssb->t, 1, i*2+1, TAB_LEFT, val_lab1); + if (val_lab0) + tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF, + "%s%s", prefix[0], val_lab0); else - tab_float(ssb->t, 1 ,i*2+1, TAB_LEFT, groups_values[0].f, 2,0); + tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF, + "%s%g", prefix[0], groups_values[0].f); + - if (val_lab2) - tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT, val_lab2); + if (val_lab1) + tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF, + "%s%s", prefix[1], val_lab1); else - tab_float(ssb->t, 1 ,i*2+1+1, TAB_LEFT, groups_values[1].f,2,0); + tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF, + "%s%g", prefix[1], groups_values[1].f); + + /* Fill in the group statistics */ + for ( g=0; g < 2 ; ++g ) + { + struct group_statistics *gs = &cmd->v_variables[i]->p.t_t.gs[g]; + + tab_float(ssb->t, 2 ,i*2+g+1, TAB_RIGHT, gs->n, 2, 0); + tab_float(ssb->t, 3 ,i*2+g+1, TAB_RIGHT, gs->mean, 8, 2); + tab_float(ssb->t, 4 ,i*2+g+1, TAB_RIGHT, gs->std_dev, 8, 3); + tab_float(ssb->t, 5 ,i*2+g+1, TAB_RIGHT, gs->se_mean, 8, 3); + } } } @@ -676,7 +731,7 @@ void ssbox_paired_populate(struct ssbox *ssb, /* Initialize the paired values ssbox */ void -ssbox_paired_init(struct ssbox *this, struct cmd_t_test *cmd unused) +ssbox_paired_init(struct ssbox *this, struct cmd_t_test *cmd UNUSED) { int hsize=6; @@ -697,7 +752,7 @@ ssbox_paired_init(struct ssbox *this, struct cmd_t_test *cmd unused) /* Populate the ssbox for paired values */ void -ssbox_paired_populate(struct ssbox *ssb,struct cmd_t_test *cmd unused) +ssbox_paired_populate(struct ssbox *ssb,struct cmd_t_test *cmd UNUSED) { int i; @@ -711,24 +766,22 @@ ssbox_paired_populate(struct ssbox *ssb,struct cmd_t_test *cmd unused) for (j=0 ; j < 2 ; ++j) { - struct t_test_proc *ttp; + struct group_statistics *gs; - ttp=&pairs[i].v[j]->p.t_t; + gs=&pairs[i].v[j]->p.t_t.ugs; /* Titles */ tab_text (ssb->t, 1, i*2+j+1, TAB_LEFT, pairs[i].v[j]->name); /* Values */ - tab_float (ssb->t,2, i*2+j+1, TAB_RIGHT, ttp->mean, 8, 2); - tab_float (ssb->t,3, i*2+j+1, TAB_RIGHT, ttp->n, 2, 0); - tab_float (ssb->t,4, i*2+j+1, TAB_RIGHT, ttp->std_dev, 8, 3); - tab_float (ssb->t,5, i*2+j+1, TAB_RIGHT, ttp->se_mean, 8, 3); + tab_float (ssb->t,2, i*2+j+1, TAB_RIGHT, pairs[i].mean[j], 8, 2); + tab_float (ssb->t,3, i*2+j+1, TAB_RIGHT, pairs[i].n, 2, 0); + tab_float (ssb->t,4, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j], 8, 3); + tab_float (ssb->t,5, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j]/sqrt(pairs[i].n), 8, 3); } - } - } /* Populate the one sample ssbox */ @@ -741,14 +794,14 @@ ssbox_one_sample_populate(struct ssbox *ssb, struct cmd_t_test *cmd) for (i=0; i < cmd->n_variables; ++i) { - struct t_test_proc *ttp; - ttp= &cmd->v_variables[i]->p.t_t; + struct group_statistics *gs; + gs= &cmd->v_variables[i]->p.t_t.ugs; tab_text (ssb->t, 0, i+1, TAB_LEFT, cmd->v_variables[i]->name); - tab_float (ssb->t,1, i+1, TAB_RIGHT, ttp->n, 2, 0); - tab_float (ssb->t,2, i+1, TAB_RIGHT, ttp->mean, 8, 2); - tab_float (ssb->t,3, i+1, TAB_RIGHT, ttp->std_dev, 8, 2); - tab_float (ssb->t,4, i+1, TAB_RIGHT, ttp->se_mean, 8, 3); + tab_float (ssb->t,1, i+1, TAB_RIGHT, gs->n, 2, 0); + tab_float (ssb->t,2, i+1, TAB_RIGHT, gs->mean, 8, 2); + tab_float (ssb->t,3, i+1, TAB_RIGHT, gs->std_dev, 8, 2); + tab_float (ssb->t,4, i+1, TAB_RIGHT, gs->se_mean, 8, 3); } } @@ -818,7 +871,7 @@ trbox_finalize(struct trbox *trb) /* Initialize the independent samples trbox */ void trbox_independent_samples_init(struct trbox *self, - struct cmd_t_test *cmd unused) + struct cmd_t_test *cmd UNUSED) { const int hsize=11; const int vsize=cmd->n_variables*2+3; @@ -835,7 +888,7 @@ trbox_independent_samples_init(struct trbox *self, tab_hline(self->t,TAL_1, hsize-2,hsize-1,2); tab_box(self->t,-1,-1,-1,TAL_1, hsize-2,2,hsize-1,vsize-1); tab_joint_text(self->t, 2, 0, 3, 0, - TAB_CENTER,_("Levine's Test for Equality of Variances")); + TAB_CENTER,_("Levene's Test for Equality of Variances")); tab_joint_text(self->t, 4,0,hsize-1,0, TAB_CENTER,_("t-test for Equality of Means")); @@ -850,8 +903,8 @@ trbox_independent_samples_init(struct trbox *self, tab_text(self->t,10,2, TAB_CENTER | TAT_TITLE,_("Upper")); tab_joint_text(self->t, 9, 1, 10, 1, TAB_CENTER | TAT_PRINTF, - _("%d%% Confidence Interval of the Difference"), - (int)round(cmd->criteria*100.0)); + _("%g%% Confidence Interval of the Difference"), + cmd->criteria*100.0); } @@ -865,19 +918,125 @@ trbox_independent_samples_populate(struct trbox *self, assert(self); for (i=0; i < cmd->n_variables; ++i) { + double p,q; + + double t; + double df; + + double df1, df2; + + double pooled_variance; + double std_err_diff; + double mean_diff; + + struct group_statistics *gs0 = &cmd->v_variables[i]->p.t_t.gs[0]; + struct group_statistics *gs1 = &cmd->v_variables[i]->p.t_t.gs[1]; + tab_text (self->t, 0, i*2+3, TAB_LEFT, cmd->v_variables[i]->name); tab_text (self->t, 1, i*2+3, TAB_LEFT, _("Equal variances assumed")); + + tab_float(self->t, 2, i*2+3, TAB_CENTER, + cmd->v_variables[i]->p.t_t.levene, 8,3); + + /* Now work out the significance of the Levene test */ + df1 = 1; df2 = cmd->v_variables[i]->p.t_t.ugs.n - 2; + q = gsl_cdf_fdist_Q(cmd->v_variables[i]->p.t_t.levene, df1, df2); + + tab_float(self->t, 3, i*2+3, TAB_CENTER, q, 8,3 ); + + df = gs0->n + gs1->n - 2.0 ; + tab_float (self->t, 5, i*2+3, TAB_RIGHT, df, 2, 0); + + pooled_variance = ( (gs0->n )*pow2(gs0->s_std_dev) + + + (gs1->n )*pow2(gs1->s_std_dev) + ) / df ; + + t = (gs0->mean - gs1->mean) / sqrt(pooled_variance) ; + t /= sqrt((gs0->n + gs1->n)/(gs0->n*gs1->n)); + + tab_float (self->t, 4, i*2+3, TAB_RIGHT, t, 8, 3); + + p = gsl_cdf_tdist_P(t, df); + q = gsl_cdf_tdist_Q(t, df); + + tab_float(self->t, 6, i*2+3, TAB_RIGHT, 2.0*(t>0?q:p) , 8, 3); + + mean_diff = gs0->mean - gs1->mean; + tab_float(self->t, 7, i*2+3, TAB_RIGHT, mean_diff, 8, 3); + + + std_err_diff = sqrt( pow2(gs0->se_mean) + pow2(gs1->se_mean)); + tab_float(self->t, 8, i*2+3, TAB_RIGHT, std_err_diff, 8, 3); + + + /* Now work out the confidence interval */ + q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ + + t = gsl_cdf_tdist_Qinv(q,df); + tab_float(self->t, 9, i*2+3, TAB_RIGHT, + mean_diff - t * std_err_diff, 8, 3); + + tab_float(self->t, 10, i*2+3, TAB_RIGHT, + mean_diff + t * std_err_diff, 8, 3); + + + { + double se2; + /* Now for the \sigma_1 != \sigma_2 case */ tab_text (self->t, 1, i*2+3+1, TAB_LEFT, _("Equal variances not assumed")); + + + se2 = (pow2(gs0->s_std_dev)/(gs0->n -1) ) + + (pow2(gs1->s_std_dev)/(gs1->n -1) ); + + t = mean_diff / sqrt(se2) ; + tab_float (self->t, 4, i*2+3+1, TAB_RIGHT, t, 8, 3); + + df = pow2(se2) / ( + (pow2(pow2(gs0->s_std_dev)/(gs0->n - 1 )) + /(gs0->n -1 ) + ) + + + (pow2(pow2(gs1->s_std_dev)/(gs1->n - 1 )) + /(gs1->n -1 ) + ) + ) ; + tab_float (self->t, 5, i*2+3+1, TAB_RIGHT, df, 8, 3); + + p = gsl_cdf_tdist_P(t, df); + q = gsl_cdf_tdist_Q(t, df); + + tab_float(self->t, 6, i*2+3+1, TAB_RIGHT, 2.0*(t>0?q:p) , 8, 3); + + /* Now work out the confidence interval */ + q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ + + t = gsl_cdf_tdist_Qinv(q, df); + + tab_float(self->t, 7, i*2+3+1, TAB_RIGHT, mean_diff, 8, 3); + + + tab_float(self->t, 8, i*2+3+1, TAB_RIGHT, std_err_diff, 8, 3); + + + tab_float(self->t, 9, i*2+3+1, TAB_RIGHT, + mean_diff - t * std_err_diff, 8, 3); + + tab_float(self->t, 10, i*2+3+1, TAB_RIGHT, + mean_diff + t * std_err_diff, 8, 3); + + } } } /* Initialize the paired samples trbox */ void trbox_paired_init(struct trbox *self, - struct cmd_t_test *cmd unused) + struct cmd_t_test *cmd UNUSED) { const int hsize=10; @@ -896,8 +1055,8 @@ trbox_paired_init(struct trbox *self, tab_vline(self->t,TAL_0,6,0,1); tab_joint_text(self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF, - _("%d%% Confidence Interval of the Difference"), - (int)round(cmd->criteria*100.0)); + _("%g%% Confidence Interval of the Difference"), + cmd->criteria*100.0); tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); @@ -912,25 +1071,16 @@ trbox_paired_init(struct trbox *self, /* Populate the paired samples trbox */ void trbox_paired_populate(struct trbox *trb, - struct cmd_t_test *cmd unused) + struct cmd_t_test *cmd UNUSED) { int i; for (i=0; i < n_pairs; ++i) { - int which =1; double p,q; - int status; - double bound; double se_mean; - struct variable *v0 = pairs[i].v[0]; - struct variable *v1 = pairs[i].v[1]; - - struct t_test_proc *ttp0 = &v0->p.t_t; - struct t_test_proc *ttp1 = &v1->p.t_t; - - double n = ttp0->n; + double n = pairs[i].n; double t; double df = n - 1; @@ -949,14 +1099,8 @@ trbox_paired_populate(struct trbox *trb, /* Now work out the confidence interval */ q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ - p = 1 - q ; - which=2; /* Calc T from p,q and df */ - cdft(&which, &p, &q, &t, &df, &status, &bound); - if ( 0 != status ) - { - msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); - } + t = gsl_cdf_tdist_Qinv(q, df); tab_float(trb->t, 5, i+3, TAB_RIGHT, pairs[i].mean_diff - t * se_mean , 8, 4); @@ -964,32 +1108,25 @@ trbox_paired_populate(struct trbox *trb, tab_float(trb->t, 6, i+3, TAB_RIGHT, pairs[i].mean_diff + t * se_mean , 8, 4); - t = ( ttp0->mean - ttp1->mean) - / sqrt ( - ( sqr(ttp0->s_std_dev) + sqr(ttp1->s_std_dev) - - 2 * pairs[i].correlation * ttp0->s_std_dev * ttp1->s_std_dev ) - / (n-1) ) - ; + t = (pairs[i].mean[0] - pairs[i].mean[1]) + / sqrt ( + ( pow2 (pairs[i].s_std_dev[0]) + pow2 (pairs[i].s_std_dev[1]) - + 2 * pairs[i].correlation * + pairs[i].s_std_dev[0] * pairs[i].s_std_dev[1] ) + / (n - 1) + ); tab_float(trb->t, 7, i+3, TAB_RIGHT, t , 8,3 ); /* Degrees of freedom */ tab_float(trb->t, 8, i+3, TAB_RIGHT, df , 2, 0 ); - which=1; - cdft(&which, &p, &q, &t, &df, &status, &bound); - - if ( 0 != status ) - { - msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); - } - - - tab_float(trb->t, 9, i+3, TAB_RIGHT, p*2.0 , 8, 3); + p = gsl_cdf_tdist_P(t,df); + q = gsl_cdf_tdist_P(t,df); + tab_float(trb->t, 9, i+3, TAB_RIGHT, 2.0*(t>0?q:p) , 8, 3); } - } /* Initialize the one sample trbox */ @@ -1013,8 +1150,8 @@ trbox_one_sample_init(struct trbox *self, struct cmd_t_test *cmd ) tab_joint_text(self->t,5,1,6,1,TAB_CENTER | TAT_PRINTF, - _("%d%% Confidence Interval of the Difference"), - (int)round(cmd->criteria*100.0)); + _("%g%% Confidence Interval of the Difference"), + cmd->criteria*100.0); tab_vline(self->t,TAL_0,6,1,1); tab_hline(self->t,TAL_1,5,6,2); @@ -1038,55 +1175,42 @@ trbox_one_sample_populate(struct trbox *trb, struct cmd_t_test *cmd) for (i=0; i < cmd->n_variables; ++i) { - int which =1; double t; double p,q; double df; - int status; - double bound; - struct t_test_proc *ttp; - ttp= &cmd->v_variables[i]->p.t_t; + struct group_statistics *gs; + gs= &cmd->v_variables[i]->p.t_t.ugs; tab_text (trb->t, 0, i+3, TAB_LEFT, cmd->v_variables[i]->name); - t = (ttp->mean - cmd->n_testval ) * sqrt(ttp->n) / ttp->std_dev ; + t = (gs->mean - cmd->n_testval ) * sqrt(gs->n) / gs->std_dev ; tab_float (trb->t, 1, i+3, TAB_RIGHT, t, 8,3); /* degrees of freedom */ - df = ttp->n - 1; + df = gs->n - 1; tab_float (trb->t, 2, i+3, TAB_RIGHT, df, 8,0); - cdft(&which, &p, &q, &t, &df, &status, &bound); - - if ( 0 != status ) - { - msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); - } - + p = gsl_cdf_tdist_P(t, df); + q = gsl_cdf_tdist_Q(t, df); - /* Multiply by 2 to get 2-tailed significance */ - tab_float (trb->t, 3, i+3, TAB_RIGHT, q*2.0, 8,3); + /* Multiply by 2 to get 2-tailed significance, makeing sure we've got + the correct tail*/ + tab_float (trb->t, 3, i+3, TAB_RIGHT, 2.0*(t>0?q:p), 8,3); - tab_float (trb->t, 4, i+3, TAB_RIGHT, ttp->mean_diff, 8,3); + tab_float (trb->t, 4, i+3, TAB_RIGHT, gs->mean_diff, 8,3); q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ - p = 1 - q ; - which=2; /* Calc T from p,q and df */ - cdft(&which, &p, &q, &t, &df, &status, &bound); - if ( 0 != status ) - { - msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); - } + t = gsl_cdf_tdist_Qinv(q, df); tab_float (trb->t, 5, i+3, TAB_RIGHT, - ttp->mean_diff - t * ttp->se_mean, 8,4); + gs->mean_diff - t * gs->se_mean, 8,4); tab_float (trb->t, 6, i+3, TAB_RIGHT, - ttp->mean_diff + t * ttp->se_mean, 8,4); + gs->mean_diff + t * gs->se_mean, 8,4); } } @@ -1115,7 +1239,7 @@ trbox_base_finalize(struct trbox *trb) /* Create , populate and submit the Paired Samples Correlation box */ void -pscbox(struct cmd_t_test *cmd) +pscbox(void) { const int rows=1+n_pairs; const int cols=5; @@ -1138,20 +1262,15 @@ pscbox(struct cmd_t_test *cmd) tab_text(table, 3,0, TAB_CENTER | TAT_TITLE, _("Correlation")); tab_text(table, 4,0, TAB_CENTER | TAT_TITLE, _("Sig.")); - for (i=0; i < n_pairs; ++i) { - int which =1; double p,q; - int status; - double bound; - - double df = pairs[i].v[0]->p.t_t.n -2; + double df = pairs[i].n -2; double correlation_t = pairs[i].correlation * sqrt(df) / - sqrt(1 - sqr(pairs[i].correlation)); + sqrt(1 - pow2(pairs[i].correlation)); /* row headings */ @@ -1163,23 +1282,13 @@ pscbox(struct cmd_t_test *cmd) /* row data */ + tab_float(table, 2, i+1, TAB_RIGHT, pairs[i].n, 4, 0); tab_float(table, 3, i+1, TAB_RIGHT, pairs[i].correlation, 8, 3); - tab_float(table, 2, i+1, TAB_RIGHT, pairs[i].v[0]->p.t_t.n , 4, 0); - - - cdft(&which, &p, &q, &correlation_t, &df, &status, &bound); - - if ( 0 != status ) - { - msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); - } - - - tab_float(table, 4, i+1, TAB_RIGHT, q*2.0, 8, 3); + p = gsl_cdf_tdist_P(correlation_t, df); + q = gsl_cdf_tdist_Q(correlation_t, df); - - + tab_float(table, 4, i+1, TAB_RIGHT, 2.0*(correlation_t>0?q:p), 8, 3); } tab_submit(table); @@ -1191,25 +1300,53 @@ pscbox(struct cmd_t_test *cmd) /* Per case calculations common to all variants of the T test */ static int -common_calc (struct ccase *c) +common_calc (const struct ccase *c, void *_cmd) { int i; + struct cmd_t_test *cmd = (struct cmd_t_test *)_cmd; + + double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); + + + /* Skip the entire case if /MISSING=LISTWISE is set */ + if ( cmd->miss == TTS_LISTWISE ) + { + for(i=0; i< cmd->n_variables ; ++i) + { + struct variable *v = cmd->v_variables[i]; + const union value *val = &c->data[v->fv]; + + if (value_is_missing(val,v) ) + { + return 0; + } + } + } + + /* Listwise has to be implicit if the independent variable is missing ?? */ + if ( cmd->sbc_groups ) + { + const union value *gv = &c->data[indep_var->fv]; + if ( value_is_missing(gv,indep_var) ) + { + return 0; + } + } - double weight = dict_get_case_weight(default_dict,c); - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { - struct t_test_proc *ttp; - struct variable *v = cmd.v_variables[i]; - union value *val = &c->data[v->fv]; + struct group_statistics *gs; + struct variable *v = cmd->v_variables[i]; + const union value *val = &c->data[v->fv]; - ttp= &cmd.v_variables[i]->p.t_t; + gs= &cmd->v_variables[i]->p.t_t.ugs; - if (val->f != SYSMIS) + if (! value_is_missing(val,v) ) { - ttp->n+=weight; - ttp->sum+=weight * val->f; - ttp->ssq+=weight * val->f * val->f; + gs->n+=weight; + gs->sum+=weight * val->f; + gs->ssq+=weight * val->f * val->f; } } return 0; @@ -1217,66 +1354,84 @@ common_calc (struct ccase *c) /* Pre calculations common to all variants of the T test */ static void -common_precalc (void) +common_precalc ( struct cmd_t_test *cmd ) { int i=0; - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { - struct t_test_proc *ttp; - ttp= &cmd.v_variables[i]->p.t_t; + struct group_statistics *gs; + gs= &cmd->v_variables[i]->p.t_t.ugs; - ttp->sum=0; - ttp->n=0; - ttp->ssq=0; - ttp->sum_diff=0; + gs->sum=0; + gs->n=0; + gs->ssq=0; + gs->sum_diff=0; } } /* Post calculations common to all variants of the T test */ void -common_postcalc (void) +common_postcalc ( struct cmd_t_test *cmd ) { int i=0; - for(i=0; i< cmd.n_variables ; ++i) + + for(i=0; i< cmd->n_variables ; ++i) { - struct t_test_proc *ttp; - ttp= &cmd.v_variables[i]->p.t_t; + struct group_statistics *gs; + gs= &cmd->v_variables[i]->p.t_t.ugs; - ttp->mean=ttp->sum / ttp->n; - ttp->s_std_dev= sqrt( - ( (ttp->ssq / ttp->n ) - ttp->mean * ttp->mean ) + gs->mean=gs->sum / gs->n; + gs->s_std_dev= sqrt( + ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) ) ; - ttp->std_dev= sqrt( - ttp->n/(ttp->n-1) * - ( (ttp->ssq / ttp->n ) - ttp->mean * ttp->mean ) + gs->std_dev= sqrt( + gs->n/(gs->n-1) * + ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) ) ; - ttp->se_mean = ttp->std_dev / sqrt(ttp->n); - ttp->mean_diff= ttp->sum_diff / ttp->n; + gs->se_mean = gs->std_dev / sqrt(gs->n); + gs->mean_diff= gs->sum_diff / gs->n; } } /* Per case calculations for one sample t test */ static int -one_sample_calc (struct ccase *c) +one_sample_calc (const struct ccase *c, void *cmd_) { int i; + struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_; + + + double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); - double weight = dict_get_case_weight(default_dict,c); + /* Skip the entire case if /MISSING=LISTWISE is set */ + if ( cmd->miss == TTS_LISTWISE ) + { + for(i=0; i< cmd->n_variables ; ++i) + { + struct variable *v = cmd->v_variables[i]; + const union value *val = &c->data[v->fv]; + + if (value_is_missing(val,v) ) + { + return 0; + } + } + } - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { - struct t_test_proc *ttp; - struct variable *v = cmd.v_variables[i]; - union value *val = &c->data[v->fv]; + struct group_statistics *gs; + struct variable *v = cmd->v_variables[i]; + const union value *val = &c->data[v->fv]; - ttp= &cmd.v_variables[i]->p.t_t; + gs= &cmd->v_variables[i]->p.t_t.ugs; - if (val->f != SYSMIS) - ttp->sum_diff += weight * (val->f - cmd.n_testval); + if ( ! value_is_missing(val,v)) + gs->sum_diff += weight * (val->f - cmd->n_testval); } return 0; @@ -1284,39 +1439,38 @@ one_sample_calc (struct ccase *c) /* Pre calculations for one sample t test */ static void -one_sample_precalc (void) +one_sample_precalc ( struct cmd_t_test *cmd ) { - int i=0; - - for(i=0; i< cmd.n_variables ; ++i) + int i=0; + + for(i=0; i< cmd->n_variables ; ++i) { - struct t_test_proc *ttp; - ttp= &cmd.v_variables[i]->p.t_t; + struct group_statistics *gs; + gs= &cmd->v_variables[i]->p.t_t.ugs; - ttp->sum_diff=0; + gs->sum_diff=0; } } /* Post calculations for one sample t test */ static void -one_sample_postcalc (void) +one_sample_postcalc (struct cmd_t_test *cmd) { int i=0; - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { - struct t_test_proc *ttp; - ttp= &cmd.v_variables[i]->p.t_t; + struct group_statistics *gs; + gs= &cmd->v_variables[i]->p.t_t.ugs; - - ttp->mean_diff = ttp->sum_diff / ttp->n ; + gs->mean_diff = gs->sum_diff / gs->n ; } } static int -compare_var_name (const void *a_, const void *b_, void *v_ unused) +compare_var_name (const void *a_, const void *b_, void *v_ UNUSED) { const struct variable *a = a_; const struct variable *b = b_; @@ -1325,7 +1479,7 @@ compare_var_name (const void *a_, const void *b_, void *v_ unused) } static unsigned -hash_var_name (const void *a_, void *v_ unused) +hash_var_name (const void *a_, void *v_ UNUSED) { const struct variable *a = a_; @@ -1333,68 +1487,351 @@ hash_var_name (const void *a_, void *v_ unused) } + static void -paired_precalc (void) +paired_precalc (struct cmd_t_test *cmd UNUSED) { int i; for(i=0; i < n_pairs ; ++i ) { - pairs[i].correlation=0; - pairs[i].sum_of_diffs=0; - pairs[i].ssq_diffs=0; + pairs[i].n = 0; + pairs[i].sum[0] = 0; pairs[i].sum[1] = 0; + pairs[i].ssq[0] = 0; pairs[i].ssq[1] = 0; + pairs[i].sum_of_prod = 0; + pairs[i].correlation = 0; + pairs[i].sum_of_diffs = 0; + pairs[i].ssq_diffs = 0; } } + static int -paired_calc (struct ccase *c) +paired_calc (const struct ccase *c, void *cmd_) { int i; + struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_; + + double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); + + /* Skip the entire case if /MISSING=LISTWISE is set , + AND one member of a pair is missing */ + if ( cmd->miss == TTS_LISTWISE ) + { + for(i=0; i < n_pairs ; ++i ) + { + struct variable *v0 = pairs[i].v[0]; + struct variable *v1 = pairs[i].v[1]; + + const union value *val0 = &c->data[v0->fv]; + const union value *val1 = &c->data[v1->fv]; + + if ( value_is_missing(val0,v0) || + value_is_missing(val1,v1) ) + { + return 0; + } + } + } + for(i=0; i < n_pairs ; ++i ) { struct variable *v0 = pairs[i].v[0]; struct variable *v1 = pairs[i].v[1]; - union value *val0 = &c->data[v0->fv]; - union value *val1 = &c->data[v1->fv]; + const union value *val0 = &c->data[v0->fv]; + const union value *val1 = &c->data[v1->fv]; - pairs[i].correlation += ( val0->f - pairs[i].v[0]->p.t_t.mean ) - * - ( val1->f - pairs[i].v[1]->p.t_t.mean ); + if ( ( !value_is_missing(val0,v0) && !value_is_missing(val1,v1) ) ) + { + pairs[i].n += weight; + pairs[i].sum[0] += weight * val0->f; + pairs[i].sum[1] += weight * val1->f; - pairs[i].sum_of_diffs += val0->f - val1->f ; - pairs[i].ssq_diffs += sqr(val0->f - val1->f); + pairs[i].ssq[0] += weight * pow2(val0->f); + pairs[i].ssq[1] += weight * pow2(val1->f); - } + pairs[i].sum_of_prod += weight * val0->f * val1->f ; + pairs[i].sum_of_diffs += weight * ( val0->f - val1->f ) ; + pairs[i].ssq_diffs += weight * pow2(val0->f - val1->f); + } + } return 0; } static void -paired_postcalc (void) +paired_postcalc (struct cmd_t_test *cmd UNUSED) { int i; for(i=0; i < n_pairs ; ++i ) { - const double n = pairs[i].v[0]->p.t_t.n ; - - pairs[i].correlation /= pairs[i].v[0]->p.t_t.std_dev * - pairs[i].v[1]->p.t_t.std_dev ; - pairs[i].correlation /= pairs[i].v[0]->p.t_t.n -1; - + int j; + const double n = pairs[i].n; + for (j=0; j < 2 ; ++j) + { + pairs[i].mean[j] = pairs[i].sum[j] / n ; + pairs[i].s_std_dev[j] = sqrt((pairs[i].ssq[j] / n - + pow2(pairs[i].mean[j])) + ); + + pairs[i].std_dev[j] = sqrt(n/(n-1)*(pairs[i].ssq[j] / n - + pow2(pairs[i].mean[j])) + ); + } + + pairs[i].correlation = pairs[i].sum_of_prod / pairs[i].n - + pairs[i].mean[0] * pairs[i].mean[1] ; + /* correlation now actually contains the covariance */ + + pairs[i].correlation /= pairs[i].std_dev[0] * pairs[i].std_dev[1]; + pairs[i].correlation *= pairs[i].n / ( pairs[i].n - 1 ); + pairs[i].mean_diff = pairs[i].sum_of_diffs / n ; - pairs[i].std_dev_diff = sqrt ( n / (n - 1) * ( ( pairs[i].ssq_diffs / n ) - - sqr(pairs[i].mean_diff ) + pow2(pairs[i].mean_diff ) ) ); + } +} + +/* Return the group # corresponding to the + independent variable with the value val +*/ +static int +get_group(const union value *val, struct variable *indep) +{ + int i; + + for (i = 0; i < 2 ; ++i ) + { + const int cmp = compare_values(val,&groups_values[i],indep->width) ; + switch ( criteria[i]) + { + case CMP_EQ: + if ( 0 == cmp ) return i; + break; + case CMP_LT: + if ( 0 > cmp ) return i; + break; + case CMP_LE: + if ( cmp <= 0 ) return i; + break; + case CMP_GT: + if ( cmp > 0 ) return i; + break; + case CMP_GE: + if ( cmp >= 0 ) return i; + break; + default: + assert(0); + }; + } + + /* No groups matched */ + return -1; +} + + +static void +group_precalc (struct cmd_t_test *cmd ) +{ + int i; + int j; + + for(i=0; i< cmd->n_variables ; ++i) + { + struct t_test_proc *ttpr = &cmd->v_variables[i]->p.t_t; + + /* There's always 2 groups for a T - TEST */ + ttpr->n_groups = 2; + ttpr->gs = xmalloc(sizeof(struct group_statistics) * 2) ; + + for (j=0 ; j < 2 ; ++j) + { + ttpr->gs[j].sum = 0; + ttpr->gs[j].n = 0; + ttpr->gs[j].ssq = 0; + + if ( n_group_values == 2 ) + ttpr->gs[j].id = groups_values[j]; + else + ttpr->gs[j].id = groups_values[0]; + ttpr->gs[j].criterion = criteria[j]; + } + } + +} + +static int +group_calc (const struct ccase *c, struct cmd_t_test *cmd) +{ + int i; + int g; + + const union value *gv = &c->data[indep_var->fv]; + + const double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); + + if ( value_is_missing(gv,indep_var) ) + { + return 0; + } + + if ( cmd->miss == TTS_LISTWISE ) + { + for(i=0; i< cmd->n_variables ; ++i) + { + struct variable *v = cmd->v_variables[i]; + const union value *val = &c->data[v->fv]; + + if (value_is_missing(val,v) ) + { + return 0; + } + } + } + + + gv = &c->data[indep_var->fv]; + + g = get_group(gv,indep_var); + + + /* If the independent variable doesn't match either of the values + for this case then move on to the next case */ + if (g == -1 ) + return 0; + + for(i=0; i< cmd->n_variables ; ++i) + { + struct variable *var = cmd->v_variables[i]; + + struct group_statistics *gs = &var->p.t_t.gs[g]; + + const union value *val=&c->data[var->fv]; + + if ( !value_is_missing(val,var) ) + { + gs->n+=weight; + gs->sum+=weight * val->f; + gs->ssq+=weight * pow2(val->f); + } + } + + return 0; +} + + +static void +group_postcalc ( struct cmd_t_test *cmd ) +{ + int i; + int j; + + for(i=0; i< cmd->n_variables ; ++i) + { + for (j=0 ; j < 2 ; ++j) + { + struct group_statistics *gs; + gs=&cmd->v_variables[i]->p.t_t.gs[j]; + + gs->mean = gs->sum / gs->n; + + gs->s_std_dev= sqrt( + ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) + ) ; + + gs->std_dev= sqrt( + gs->n/(gs->n-1) * + ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) + ) ; + + gs->se_mean = gs->std_dev / sqrt(gs->n); + } + } +} + + + +static void +calculate(const struct casefile *cf, void *cmd_) +{ + struct ssbox stat_summary_box; + struct trbox test_results_box; + + struct casereader *r; + const struct ccase *c; + struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_; + + common_precalc(cmd); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; ) + { + common_calc(c,cmd); } + casereader_destroy (r); + common_postcalc(cmd); + + switch(mode) + { + case T_1_SAMPLE: + one_sample_precalc(cmd); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; ) + { + one_sample_calc(c,cmd); + } + casereader_destroy (r); + one_sample_postcalc(cmd); + + break; + case T_PAIRED: + paired_precalc(cmd); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; ) + { + paired_calc(c,cmd); + } + casereader_destroy (r); + paired_postcalc(cmd); + + break; + case T_IND_SAMPLES: + + group_precalc(cmd); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; ) + { + group_calc(c,cmd); + } + casereader_destroy (r); + group_postcalc(cmd); + + + levene(cf, indep_var, cmd->n_variables, cmd->v_variables, + (cmd->miss == TTS_LISTWISE)?LEV_LISTWISE:LEV_ANALYSIS , + value_is_missing); + break; + } + + ssbox_create(&stat_summary_box,cmd,mode); + ssbox_populate(&stat_summary_box,cmd); + ssbox_finalize(&stat_summary_box); + + if ( mode == T_PAIRED) + pscbox(); + + trbox_create(&test_results_box,cmd,mode); + trbox_populate(&test_results_box,cmd); + trbox_finalize(&test_results_box); + }