X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Ft-test.q;h=1e3150fa572e1fac7b84e28060098eac725de4cb;hb=7b98b3a4f58f6dc5a8e9cbc188b627966d5e652d;hp=c2fc85d7f774bdf5308cd458f5684efc65518536;hpb=d69833d7d425cd98c3793a89945153306dfa7ab3;p=pspp diff --git a/src/t-test.q b/src/t-test.q index c2fc85d7f7..1e3150fa57 100644 --- a/src/t-test.q +++ b/src/t-test.q @@ -37,11 +37,13 @@ #include "var.h" #include "vfm.h" #include "pool.h" +#include "hash.h" +#include "stats.h" /* (specification) "T-TEST" (tts_): - groups=custom; - testval=double; + +groups=custom; + +testval=double; variables=varlist("PV_NO_SCRATCH | PV_NUMERIC"); pairs=custom; +missing=miss:!analysis/listwise, @@ -67,9 +69,28 @@ static int n_groups_values; static union value groups_values[2]; /* PAIRS: Number of pairs to be compared ; each pair. */ -static int n_pairs ; -typedef struct variable *pair_t[2] ; -static pair_t *pairs; +static int n_pairs = 0 ; +struct pair +{ + /* The variables comprising the pair */ + struct variable *v[2]; + + /* The correlation coefficient between the variables */ + double correlation; + + /* The sum of the differences */ + double sum_of_diffs; + + /* The mean of the differences */ + double mean_diff; + + /* The sum of the squares of the differences */ + double ssq_diffs; + + /* The std deviation of the differences */ + double std_dev_diff; +}; +static struct pair *pairs=0; static int parse_value (union value * v, int type) ; @@ -142,6 +163,13 @@ static int one_sample_calc (struct ccase *); static void one_sample_precalc (void); static void one_sample_postcalc (void); +static int paired_calc (struct ccase *); +static void paired_precalc (void); +static void paired_postcalc (void); + +static int compare_var_name (const void *a_, const void *b_, void *v_ UNUSED); +static unsigned hash_var_name (const void *a_, void *v_ UNUSED); + int cmd_t_test(void) @@ -163,13 +191,20 @@ cmd_t_test(void) if (! cmd.sbc_criteria) cmd.criteria=0.95; - if ( cmd.sbc_testval + cmd.sbc_groups + cmd.sbc_pairs != 1 ) - { - msg(SE, - _("Exactly one of TESTVAL, GROUPS or PAIRS subcommands is required") - ); - return CMD_FAILURE; - } + { + int m=0; + if (cmd.sbc_testval) ++m; + if (cmd.sbc_groups) ++m; + if (cmd.sbc_pairs) ++m; + + if ( m != 1) + { + msg(SE, + _("TESTVAL, GROUPS and PAIRS subcommands are mutually exclusive.") + ); + return CMD_FAILURE; + } + } if (cmd.sbc_testval) mode=T_1_SAMPLE; @@ -178,16 +213,59 @@ cmd_t_test(void) else mode=T_PAIRED; - if ( mode == T_PAIRED && cmd.sbc_variables) + if ( mode == T_PAIRED) { - msg(SE, _("VARIABLES subcommand is not appropriate with PAIRS")); - return CMD_FAILURE; + if (cmd.sbc_variables) + { + msg(SE, _("VARIABLES subcommand is not appropriate with PAIRS")); + return CMD_FAILURE; + } + else + { + /* Iterate through the pairs and put each variable that is a + member of a pair into cmd.v_variables */ + + int i; + struct hsh_iterator hi; + struct hsh_table *hash; + struct variable *v; + + hash=hsh_create(n_pairs,compare_var_name,hash_var_name,0,0); + + for (i=0; i < n_pairs; ++i) + { + hsh_insert(hash,pairs[i].v[0]); + hsh_insert(hash,pairs[i].v[1]); + } + + assert(cmd.n_variables == 0); + cmd.n_variables = hsh_count(hash); + + cmd.v_variables = xrealloc(cmd.v_variables, + sizeof(struct variable) * cmd.n_variables); + /* Iterate through the hash */ + for (i=0,v = (struct variable *) hsh_first(hash,&hi); + v != 0; + v=hsh_next(hash,&hi) ) + cmd.v_variables[i++]=v; + + hsh_destroy(hash); + } } + procedure(common_precalc,common_calc,common_postcalc); - if (mode == T_1_SAMPLE) - procedure(one_sample_precalc,one_sample_calc,one_sample_postcalc); + switch(mode) + { + case T_1_SAMPLE: + procedure(one_sample_precalc,one_sample_calc,one_sample_postcalc); + break; + case T_PAIRED: + procedure(paired_precalc,paired_calc,paired_postcalc); + break; + } + t_test_pool = pool_create (); @@ -207,12 +285,17 @@ cmd_t_test(void) pool_destroy (t_test_pool); t_test_pool=0; + + + n_pairs=0; + free(pairs); + pairs=0; return CMD_SUCCESS; } static int -tts_custom_groups (struct cmd_t_test *cmd unused) +tts_custom_groups (struct cmd_t_test *cmd UNUSED) { lex_match('='); @@ -277,10 +360,11 @@ tts_custom_groups (struct cmd_t_test *cmd unused) static int -tts_custom_pairs (struct cmd_t_test *cmd unused) +tts_custom_pairs (struct cmd_t_test *cmd UNUSED) { struct variable **vars; int n_vars; + int n_pairs_local; int n_before_WITH ; int n_after_WITH = -1; @@ -332,11 +416,11 @@ tts_custom_pairs (struct cmd_t_test *cmd unused) n_before_WITH, n_after_WITH ); return 0; } - n_pairs=n_before_WITH; + n_pairs_local=n_before_WITH; } else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */ { - n_pairs=n_before_WITH * n_after_WITH ; + n_pairs_local=n_before_WITH * n_after_WITH ; } else /* Neither WITH nor PAIRED keyword given */ { @@ -349,35 +433,36 @@ tts_custom_pairs (struct cmd_t_test *cmd unused) } /* how many ways can you pick 2 from n_vars ? */ - n_pairs = n_vars * (n_vars -1 ) /2 ; + n_pairs_local = n_vars * (n_vars -1 ) /2 ; } + /* Allocate storage for the pairs */ - pairs = xrealloc(pairs,sizeof(pair_t) *n_pairs); + pairs = xrealloc(pairs, sizeof(struct pair) * (n_pairs + n_pairs_local) ); /* Populate the pairs with the appropriate variables */ if ( paired ) { int i; - assert(n_pairs == n_vars/2); - for (i = 0; i < n_pairs ; ++i) + assert(n_pairs_local == n_vars/2); + for (i = 0; i < n_pairs_local ; ++i) { - pairs[i][0] = vars[i]; - pairs[i][1] = vars[i+n_pairs]; + pairs[i].v[n_pairs+0] = vars[i]; + pairs[i].v[n_pairs+1] = vars[i+n_pairs_local]; } } else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */ { int i,j; - int p=0; + int p=n_pairs; for(i=0 ; i < n_before_WITH ; ++i ) { for(j=0 ; j < n_after_WITH ; ++j) { - pairs[p][0] = vars[i]; - pairs[p][1] = vars[j+n_before_WITH]; + pairs[p].v[0] = vars[i]; + pairs[p].v[1] = vars[j+n_before_WITH]; ++p; } } @@ -385,19 +470,21 @@ tts_custom_pairs (struct cmd_t_test *cmd unused) else /* Neither WITH nor PAIRED given */ { int i,j; - int p=0; + int p=n_pairs; for(i=0 ; i < n_vars ; ++i ) { for(j=i+1 ; j < n_vars ; ++j) { - pairs[p][0] = vars[i]; - pairs[p][1] = vars[j]; + pairs[p].v[0] = vars[i]; + pairs[p].v[1] = vars[j]; ++p; } } } + n_pairs+=n_pairs_local; + return 1; } @@ -589,7 +676,7 @@ void ssbox_paired_populate(struct ssbox *ssb, /* Initialize the paired values ssbox */ void -ssbox_paired_init(struct ssbox *this, struct cmd_t_test *cmd unused) +ssbox_paired_init(struct ssbox *this, struct cmd_t_test *cmd UNUSED) { int hsize=6; @@ -610,26 +697,38 @@ ssbox_paired_init(struct ssbox *this, struct cmd_t_test *cmd unused) /* Populate the ssbox for paired values */ void -ssbox_paired_populate(struct ssbox *ssb,struct cmd_t_test *cmd unused) +ssbox_paired_populate(struct ssbox *ssb,struct cmd_t_test *cmd UNUSED) { int i; - struct string ds; assert(ssb->t); - ds_init(t_test_pool,&ds,15); for (i=0; i < n_pairs; ++i) { - ds_clear(&ds); + int j; + + tab_text (ssb->t, 0, i*2+1, TAB_LEFT | TAT_PRINTF , _("Pair %d"),i); - ds_printf(&ds,_("Pair %d"),i); + for (j=0 ; j < 2 ; ++j) + { + struct t_test_proc *ttp; + + ttp=&pairs[i].v[j]->p.t_t; + + /* Titles */ + + tab_text (ssb->t, 1, i*2+j+1, TAB_LEFT, pairs[i].v[j]->name); + + /* Values */ + tab_float (ssb->t,2, i*2+j+1, TAB_RIGHT, ttp->mean, 8, 2); + tab_float (ssb->t,3, i*2+j+1, TAB_RIGHT, ttp->n, 2, 0); + tab_float (ssb->t,4, i*2+j+1, TAB_RIGHT, ttp->std_dev, 8, 3); + tab_float (ssb->t,5, i*2+j+1, TAB_RIGHT, ttp->se_mean, 8, 3); + + } - tab_text (ssb->t, 0, i*2+1, TAB_LEFT, ds.string); - tab_text (ssb->t, 1, i*2+1, TAB_LEFT, pairs[i][0]->name); - tab_text (ssb->t, 1, i*2+2, TAB_LEFT, pairs[i][1]->name); } - ds_destroy(&ds); } /* Populate the one sample ssbox */ @@ -719,13 +818,11 @@ trbox_finalize(struct trbox *trb) /* Initialize the independent samples trbox */ void trbox_independent_samples_init(struct trbox *self, - struct cmd_t_test *cmd unused) + struct cmd_t_test *cmd UNUSED) { const int hsize=11; const int vsize=cmd->n_variables*2+3; - struct string ds; - assert(self); self->populate = trbox_independent_samples_populate; @@ -752,14 +849,10 @@ trbox_independent_samples_init(struct trbox *self, tab_text(self->t,9,2, TAB_CENTER | TAT_TITLE,_("Lower")); tab_text(self->t,10,2, TAB_CENTER | TAT_TITLE,_("Upper")); - ds_init(t_test_pool,&ds,80); - - ds_printf(&ds,_("%d%% Confidence Interval of the Difference"), - (int)round(cmd->criteria*100.0)); - - tab_joint_text(self->t,9,1,10,1,TAB_CENTER, ds.string); + tab_joint_text(self->t, 9, 1, 10, 1, TAB_CENTER | TAT_PRINTF, + _("%d%% Confidence Interval of the Difference"), + (int)round(cmd->criteria*100.0)); - ds_destroy(&ds); } /* Populate the independent samples trbox */ @@ -784,17 +877,15 @@ trbox_independent_samples_populate(struct trbox *self, /* Initialize the paired samples trbox */ void trbox_paired_init(struct trbox *self, - struct cmd_t_test *cmd unused) + struct cmd_t_test *cmd UNUSED) { const int hsize=10; - const int vsize=n_pairs*2+3; - - struct string ds; + const int vsize=n_pairs+3; self->populate = trbox_paired_populate; - trbox_base_init(self,n_pairs*2,hsize); + trbox_base_init(self,n_pairs,hsize); tab_title (self->t, 0, _("Paired Samples Test")); tab_hline(self->t,TAL_1,2,6,1); tab_vline(self->t,TAL_2,2,0,vsize); @@ -804,14 +895,9 @@ trbox_paired_init(struct trbox *self, tab_hline(self->t,TAL_1,5,6, 2); tab_vline(self->t,TAL_0,6,0,1); - ds_init(t_test_pool,&ds,80); - - ds_printf(&ds,_("%d%% Confidence Interval of the Difference"), - (int)round(cmd->criteria*100.0)); - - tab_joint_text(self->t,5,1,6,1,TAB_CENTER, ds.string); - - ds_destroy(&ds); + tab_joint_text(self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF, + _("%d%% Confidence Interval of the Difference"), + (int)round(cmd->criteria*100.0)); tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); @@ -826,23 +912,84 @@ trbox_paired_init(struct trbox *self, /* Populate the paired samples trbox */ void trbox_paired_populate(struct trbox *trb, - struct cmd_t_test *cmd unused) + struct cmd_t_test *cmd UNUSED) { int i; - struct string ds; - - ds_init(t_test_pool,&ds,15); for (i=0; i < n_pairs; ++i) { - ds_clear(&ds); - ds_printf(&ds,_("Pair %d"),i); + int which =1; + double p,q; + int status; + double bound; + double se_mean; + + struct variable *v0 = pairs[i].v[0]; + struct variable *v1 = pairs[i].v[1]; + + struct t_test_proc *ttp0 = &v0->p.t_t; + struct t_test_proc *ttp1 = &v1->p.t_t; + + double n = ttp0->n; + double t; + double df = n - 1; + + tab_text (trb->t, 0, i+3, TAB_LEFT | TAT_PRINTF, _("Pair %d"),i); + + tab_text (trb->t, 1, i+3, TAB_LEFT | TAT_PRINTF, "%s - %s", + pairs[i].v[0]->name, pairs[i].v[1]->name); + + tab_float(trb->t, 2, i+3, TAB_RIGHT, pairs[i].mean_diff, 8, 4); + + tab_float(trb->t, 3, i+3, TAB_RIGHT, pairs[i].std_dev_diff, 8, 5); + + /* SE Mean */ + se_mean = pairs[i].std_dev_diff / sqrt(n) ; + tab_float(trb->t, 4, i+3, TAB_RIGHT, se_mean, 8,5 ); + + /* Now work out the confidence interval */ + q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ + p = 1 - q ; + which=2; /* Calc T from p,q and df */ + cdft(&which, &p, &q, &t, &df, &status, &bound); + + if ( 0 != status ) + { + msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); + } + + tab_float(trb->t, 5, i+3, TAB_RIGHT, + pairs[i].mean_diff - t * se_mean , 8, 4); + + tab_float(trb->t, 6, i+3, TAB_RIGHT, + pairs[i].mean_diff + t * se_mean , 8, 4); + + t = ( ttp0->mean - ttp1->mean) + / sqrt ( + ( sqr(ttp0->s_std_dev) + sqr(ttp1->s_std_dev) - + 2 * pairs[i].correlation * ttp0->s_std_dev * ttp1->s_std_dev ) + / (n-1) ) + ; + + tab_float(trb->t, 7, i+3, TAB_RIGHT, t , 8,3 ); + + /* Degrees of freedom */ + tab_float(trb->t, 8, i+3, TAB_RIGHT, df , 2, 0 ); + + which=1; + cdft(&which, &p, &q, &t, &df, &status, &bound); + + if ( 0 != status ) + { + msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); + } + + + tab_float(trb->t, 9, i+3, TAB_RIGHT, p*2.0 , 8, 3); + - tab_text (trb->t, 0, i*2+3, TAB_LEFT, ds.string); - tab_text (trb->t, 1, i*2+3, TAB_LEFT, pairs[i][0]->name); - tab_text (trb->t, 1, i*2+4, TAB_LEFT, pairs[i][1]->name); } - ds_destroy(&ds); + } /* Initialize the one sample trbox */ @@ -852,24 +999,23 @@ trbox_one_sample_init(struct trbox *self, struct cmd_t_test *cmd ) const int hsize=7; const int vsize=cmd->n_variables+3; - struct string ds; - self->populate = trbox_one_sample_populate; trbox_base_init(self, cmd->n_variables,hsize); tab_title (self->t, 0, _("One-Sample Test")); tab_hline(self->t, TAL_1, 1, hsize - 1, 1); tab_vline(self->t, TAL_2, 1, 0, vsize); - ds_init(t_test_pool, &ds, 80); - ds_printf(&ds,_("Test Value = %f"),cmd->n_testval); - tab_joint_text(self->t, 1, 0, hsize-1,0, TAB_CENTER,ds.string); + + tab_joint_text(self->t, 1, 0, hsize-1,0, TAB_CENTER | TAT_PRINTF, + _("Test Value = %f"),cmd->n_testval); + tab_box(self->t, -1, -1, -1, TAL_1, 1,1,hsize-1,vsize-1); - ds_clear(&ds); - ds_printf(&ds,_("%d%% Confidence Interval of the Difference"), - (int)round(cmd->criteria*100.0)); - tab_joint_text(self->t,5,1,6,1,TAB_CENTER, ds.string); - ds_destroy(&ds); + + tab_joint_text(self->t,5,1,6,1,TAB_CENTER | TAT_PRINTF, + _("%d%% Confidence Interval of the Difference"), + (int)round(cmd->criteria*100.0)); + tab_vline(self->t,TAL_0,6,1,1); tab_hline(self->t,TAL_1,5,6,2); tab_text (self->t, 1, 2, TAB_CENTER | TAT_TITLE, _("t")); @@ -992,26 +1138,49 @@ pscbox(struct cmd_t_test *cmd) tab_text(table, 3,0, TAB_CENTER | TAT_TITLE, _("Correlation")); tab_text(table, 4,0, TAB_CENTER | TAT_TITLE, _("Sig.")); - /* row headings */ - { - struct string ds; - - ds_init(t_test_pool,&ds,15); for (i=0; i < n_pairs; ++i) { - ds_clear(&ds); - ds_printf(&ds,_("Pair %d"),i); - tab_text(table, 0,i+1, TAB_LEFT | TAT_TITLE, ds.string); + int which =1; + double p,q; - ds_clear(&ds); - ds_printf(&ds,_("%s & %s"),pairs[i][0]->name,pairs[i][1]->name); - tab_text(table, 1,i+1, TAB_LEFT | TAT_TITLE, ds.string); - } + int status; + double bound; - ds_destroy(&ds); - } + double df = pairs[i].v[0]->p.t_t.n -2; + double correlation_t = + pairs[i].correlation * sqrt(df) / + sqrt(1 - sqr(pairs[i].correlation)); + + + /* row headings */ + tab_text(table, 0,i+1, TAB_LEFT | TAT_TITLE | TAT_PRINTF, + _("Pair %d"), i); + + tab_text(table, 1,i+1, TAB_LEFT | TAT_TITLE | TAT_PRINTF, + _("%s & %s"), pairs[i].v[0]->name, pairs[i].v[1]->name); + + + /* row data */ + tab_float(table, 3, i+1, TAB_RIGHT, pairs[i].correlation, 8, 3); + tab_float(table, 2, i+1, TAB_RIGHT, pairs[i].v[0]->p.t_t.n , 4, 0); + + + cdft(&which, &p, &q, &correlation_t, &df, &status, &bound); + + if ( 0 != status ) + { + msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); + } + + + tab_float(table, 4, i+1, TAB_RIGHT, q*2.0, 8, 3); + + + + + } tab_submit(table); } @@ -1051,7 +1220,7 @@ static void common_precalc (void) { int i=0; - + for(i=0; i< cmd.n_variables ; ++i) { struct t_test_proc *ttp; @@ -1076,13 +1245,16 @@ common_postcalc (void) ttp= &cmd.v_variables[i]->p.t_t; ttp->mean=ttp->sum / ttp->n; + ttp->s_std_dev= sqrt( + ( (ttp->ssq / ttp->n ) - ttp->mean * ttp->mean ) + ) ; + ttp->std_dev= sqrt( ttp->n/(ttp->n-1) * ( (ttp->ssq / ttp->n ) - ttp->mean * ttp->mean ) ) ; ttp->se_mean = ttp->std_dev / sqrt(ttp->n); - ttp->mean_diff= ttp->sum_diff / ttp->n; } } @@ -1140,3 +1312,89 @@ one_sample_postcalc (void) ttp->mean_diff = ttp->sum_diff / ttp->n ; } } + + + +static int +compare_var_name (const void *a_, const void *b_, void *v_ UNUSED) +{ + const struct variable *a = a_; + const struct variable *b = b_; + + return strcmp(a->name,b->name); +} + +static unsigned +hash_var_name (const void *a_, void *v_ UNUSED) +{ + const struct variable *a = a_; + + return hsh_hash_bytes (a->name, strlen(a->name)); +} + + +static void +paired_precalc (void) +{ + int i; + + for(i=0; i < n_pairs ; ++i ) + { + pairs[i].correlation=0; + pairs[i].sum_of_diffs=0; + pairs[i].ssq_diffs=0; + } + +} + +static int +paired_calc (struct ccase *c) +{ + int i; + + for(i=0; i < n_pairs ; ++i ) + { + struct variable *v0 = pairs[i].v[0]; + struct variable *v1 = pairs[i].v[1]; + + union value *val0 = &c->data[v0->fv]; + union value *val1 = &c->data[v1->fv]; + + pairs[i].correlation += ( val0->f - pairs[i].v[0]->p.t_t.mean ) + * + ( val1->f - pairs[i].v[1]->p.t_t.mean ); + + pairs[i].sum_of_diffs += val0->f - val1->f ; + pairs[i].ssq_diffs += sqr(val0->f - val1->f); + + } + + + return 0; +} + +static void +paired_postcalc (void) +{ + int i; + + for(i=0; i < n_pairs ; ++i ) + { + const double n = pairs[i].v[0]->p.t_t.n ; + + pairs[i].correlation /= pairs[i].v[0]->p.t_t.std_dev * + pairs[i].v[1]->p.t_t.std_dev ; + pairs[i].correlation /= pairs[i].v[0]->p.t_t.n -1; + + + pairs[i].mean_diff = pairs[i].sum_of_diffs / n ; + + + pairs[i].std_dev_diff = sqrt ( n / (n - 1) * ( + ( pairs[i].ssq_diffs / n ) + - + sqr(pairs[i].mean_diff ) + ) ); + + } +}