X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Ft-test.q;h=5fdb802d633be7246b49c2f639c130e63905cf41;hb=e210b20bf6f405637c8c03dd280b5a4a627191b8;hp=3f80214494bde1013c85f05713accbe7fe6df559;hpb=ccf2f45c091ce1555b4e2a36186c501675c18a59;p=pspp-builds.git diff --git a/src/t-test.q b/src/t-test.q index 3f802144..5fdb802d 100644 --- a/src/t-test.q +++ b/src/t-test.q @@ -16,36 +16,44 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ #include -#include +#include +#include "error.h" #include #include #include #include "alloc.h" -#include "str.h" -#include "dcdflib/cdflib.h" +#include "case.h" +#include "casefile.h" #include "command.h" -#include "lexer.h" +#include "dictionary.h" #include "error.h" +#include "group_proc.h" +#include "hash.h" +#include "levene.h" +#include "lexer.h" #include "magic.h" -#include "tab.h" +#include "misc.h" +#include "size_max.h" #include "som.h" +#include "str.h" +#include "tab.h" #include "value-labels.h" #include "var.h" #include "vfm.h" -#include "pool.h" -#include "hash.h" -#include "stats.h" -#include "t-test.h" -#include "levene.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +/* (headers) */ /* (specification) "T-TEST" (tts_): +groups=custom; - +testval=double; + testval=double; variables=varlist("PV_NO_SCRATCH | PV_NUMERIC"); pairs=custom; +missing=miss:!analysis/listwise, @@ -56,19 +64,44 @@ /* (declarations) */ /* (functions) */ -static struct cmd_t_test cmd; -static struct pool *t_test_pool ; + +/* Function to use for testing for missing values */ +static is_missing_func *value_is_missing; /* Variable for the GROUPS subcommand, if given. */ -static struct variable *groups; +static struct variable *indep_var; -/* GROUPS: Number of values specified by the user; the values - specified if any. */ +enum comparison + { + CMP_LE = -2, + CMP_EQ = 0, + }; + +struct group_properties +{ + /* The comparison criterion */ + enum comparison criterion; + + /* The width of the independent variable */ + int indep_width ; + + union { + /* The value of the independent variable at which groups are determined to + belong to one group or the other */ + double critical_value; + + + /* The values of the independent variable for each group */ + union value g_value[2]; + } v ; + +}; + + +static struct group_properties gp ; -static int n_groups_values; -static union value groups_values[2]; /* PAIRS: Number of pairs to be compared ; each pair. */ @@ -78,12 +111,34 @@ struct pair /* The variables comprising the pair */ struct variable *v[2]; + /* The number of valid variable pairs */ + double n; + + /* The sum of the members */ + double sum[2]; + + /* sum of squares of the members */ + double ssq[2]; + + /* Std deviation of the members */ + double std_dev[2]; + + + /* Sample Std deviation of the members */ + double s_std_dev[2]; + + /* The means of the members */ + double mean[2]; + /* The correlation coefficient between the variables */ double correlation; /* The sum of the differences */ double sum_of_diffs; + /* The sum of the products */ + double sum_of_prod; + /* The mean of the differences */ double mean_diff; @@ -93,12 +148,11 @@ struct pair /* The std deviation of the differences */ double std_dev_diff; }; -static struct pair *pairs=0; +static struct pair *pairs=0; static int parse_value (union value * v, int type) ; - /* Structures and Functions for the Statistics Summary Box */ struct ssbox; typedef void populate_ssbox_func(struct ssbox *ssb, @@ -158,41 +212,45 @@ enum { }; -static int common_calc (struct ccase *, void *); -static void common_precalc (void *); -static void common_postcalc (void *); +static int common_calc (const struct ccase *, void *); +static void common_precalc (struct cmd_t_test *); +static void common_postcalc (struct cmd_t_test *); -static int one_sample_calc (struct ccase *, void *); -static void one_sample_precalc (void *); -static void one_sample_postcalc (void *); +static int one_sample_calc (const struct ccase *, void *); +static void one_sample_precalc (struct cmd_t_test *); +static void one_sample_postcalc (struct cmd_t_test *); -static int paired_calc (struct ccase *, void *); -static void paired_precalc (void *); -static void paired_postcalc (void *); +static int paired_calc (const struct ccase *, void *); +static void paired_precalc (struct cmd_t_test *); +static void paired_postcalc (struct cmd_t_test *); -static void group_precalc (void *); -static int group_calc (struct ccase *, void *); -static void group_postcalc (void *); +static void group_precalc (struct cmd_t_test *); +static int group_calc (const struct ccase *, struct cmd_t_test *); +static void group_postcalc (struct cmd_t_test *); -static int compare_var_name (const void *a_, const void *b_, void *v_ UNUSED); -static unsigned hash_var_name (const void *a_, void *v_ UNUSED); +static void calculate(const struct casefile *cf, void *_mode); +static int mode; +static struct cmd_t_test cmd; -int -cmd_t_test(void) -{ - int mode; +static int bad_weight_warn; - struct ssbox stat_summary_box; - struct trbox test_results_box; - if (!lex_force_match_id ("T")) - return CMD_FAILURE; +static int compare_group_binary(const struct group_statistics *a, + const struct group_statistics *b, + const struct group_properties *p); + - lex_match ('-'); - lex_match_id ("TEST"); +static unsigned hash_group_binary(const struct group_statistics *g, + const struct group_properties *p); + + + +int +cmd_t_test(void) +{ if ( !parse_t_test(&cmd) ) return CMD_FAILURE; @@ -211,6 +269,7 @@ cmd_t_test(void) msg(SE, _("TESTVAL, GROUPS and PAIRS subcommands are mutually exclusive.") ); + free_t_test(&cmd); return CMD_FAILURE; } } @@ -227,6 +286,7 @@ cmd_t_test(void) if (cmd.sbc_variables) { msg(SE, _("VARIABLES subcommand is not appropriate with PAIRS")); + free_t_test(&cmd); return CMD_FAILURE; } else @@ -239,7 +299,7 @@ cmd_t_test(void) struct hsh_table *hash; struct variable *v; - hash=hsh_create(n_pairs,compare_var_name,hash_var_name,0,0); + hash = hsh_create (n_pairs, compare_var_names, hash_var_name, 0, 0); for (i=0; i < n_pairs; ++i) { @@ -250,8 +310,8 @@ cmd_t_test(void) assert(cmd.n_variables == 0); cmd.n_variables = hsh_count(hash); - cmd.v_variables = xrealloc(cmd.v_variables, - sizeof(struct variable) * cmd.n_variables); + cmd.v_variables = xnrealloc (cmd.v_variables, cmd.n_variables, + sizeof *cmd.v_variables); /* Iterate through the hash */ for (i=0,v = (struct variable *) hsh_first(hash,&hi); v != 0; @@ -261,145 +321,135 @@ cmd_t_test(void) hsh_destroy(hash); } } - - - procedure(common_precalc,common_calc,common_postcalc, NULL); - - switch(mode) + else if ( !cmd.sbc_variables) { - case T_1_SAMPLE: - procedure(one_sample_precalc,one_sample_calc,one_sample_postcalc, NULL); - break; - case T_PAIRED: - procedure(paired_precalc,paired_calc,paired_postcalc, NULL); - break; - case T_IND_SAMPLES: - procedure(group_precalc,group_calc,group_postcalc, NULL); - levene(groups, cmd.n_variables, cmd.v_variables); - break; + msg(SE, _("One or more VARIABLES must be specified.")); + free_t_test(&cmd); + return CMD_FAILURE; } - t_test_pool = pool_create (); - - ssbox_create(&stat_summary_box,&cmd,mode); - ssbox_populate(&stat_summary_box,&cmd); - ssbox_finalize(&stat_summary_box); - - if ( mode == T_PAIRED) - pscbox(); - - trbox_create(&test_results_box,&cmd,mode); - trbox_populate(&test_results_box,&cmd); - trbox_finalize(&test_results_box); - pool_destroy (t_test_pool); + /* If /MISSING=INCLUDE is set, then user missing values are ignored */ + if (cmd.incl == TTS_INCLUDE ) + value_is_missing = mv_is_value_system_missing; + else + value_is_missing = mv_is_value_missing; - t_test_pool=0; + bad_weight_warn = 1; + multipass_procedure_with_splits (calculate, &cmd); n_pairs=0; free(pairs); pairs=0; - if ( mode == T_IND_SAMPLES) { - int i; + int v; /* Destroy any group statistics we created */ - for (i= 0 ; i < cmd.n_variables ; ++i ) + for (v = 0 ; v < cmd.n_variables ; ++v ) { - free(cmd.v_variables[i]->p.t_t.gs); + struct group_proc *grpp = group_proc_get (cmd.v_variables[v]); + hsh_destroy (grpp->group_hash); } } + free_t_test(&cmd); return CMD_SUCCESS; } static int tts_custom_groups (struct cmd_t_test *cmd UNUSED) { - lex_match('='); + int n_group_values=0; - if (token != T_ALL && - (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) - ) - { - msg(SE,_("`%s' is not a variable name"),tokid); - return 0; - } + lex_match('='); - groups = parse_variable (); - if (!groups) + indep_var = parse_variable (); + if (!indep_var) { lex_error ("expecting variable name in GROUPS subcommand"); return 0; } - if (groups->type == T_STRING && groups->width > MAX_SHORT_STRING) + if (indep_var->type == T_STRING && indep_var->width > MAX_SHORT_STRING) { msg (SE, _("Long string variable %s is not valid here."), - groups->name); + indep_var->name); return 0; } if (!lex_match ('(')) { - if (groups->type == NUMERIC) + if (indep_var->type == NUMERIC) { - n_groups_values = 2; - groups_values[0].f = 1; - groups_values[1].f = 2; + gp.v.g_value[0].f = 1; + gp.v.g_value[1].f = 2; + + gp.criterion = CMP_EQ; + + n_group_values = 2; + return 1; } else { - msg (SE, _("When applying GROUPS to a string variable, at " - "least one value must be specified.")); + msg (SE, _("When applying GROUPS to a string variable, two " + "values must be specified.")); return 0; } } - if (!parse_value (&groups_values[0],groups->type)) - return 0; - n_groups_values = 1; + if (!parse_value (&gp.v.g_value[0], indep_var->type)) + return 0; lex_match (','); if (lex_match (')')) - return 1; + { + if (indep_var->type != NUMERIC) + { + + msg (SE, _("When applying GROUPS to a string variable, two " + "values must be specified.")); + return 0; + } + gp.criterion = CMP_LE; + gp.v.critical_value = gp.v.g_value[0].f; + + n_group_values = 1; + return 1; + } - if (!parse_value (&groups_values[1],groups->type)) + if (!parse_value (&gp.v.g_value[1], indep_var->type)) return 0; - n_groups_values = 2; + n_group_values = 2; if (!lex_force_match (')')) return 0; - return 1; -} + if ( n_group_values == 2 ) + gp.criterion = CMP_EQ ; + else + gp.criterion = CMP_LE ; + return 1; +} static int tts_custom_pairs (struct cmd_t_test *cmd UNUSED) { struct variable **vars; - int n_vars; - int n_pairs_local; + size_t n_vars; + size_t n_pairs_local; - int n_before_WITH ; - int n_after_WITH = -1; + size_t n_before_WITH; + size_t n_after_WITH = SIZE_MAX; int paired ; /* Was the PAIRED keyword given ? */ lex_match('='); - if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) - && token != T_ALL) - { - msg(SE,_("`%s' is not a variable name"),tokid); - return 0; - } - n_vars=0; if (!parse_variables (default_dict, &vars, &n_vars, PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH)) @@ -409,7 +459,7 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) } assert (n_vars); - n_before_WITH=0; + n_before_WITH = 0; if (lex_match (T_WITH)) { n_before_WITH = n_vars; @@ -437,11 +487,11 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) n_before_WITH, n_after_WITH ); return 0; } - n_pairs_local=n_before_WITH; + n_pairs_local = n_before_WITH; } else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */ { - n_pairs_local=n_before_WITH * n_after_WITH ; + n_pairs_local = n_before_WITH * n_after_WITH ; } else /* Neither WITH nor PAIRED keyword given */ { @@ -454,29 +504,29 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) } /* how many ways can you pick 2 from n_vars ? */ - n_pairs_local = n_vars * (n_vars -1 ) /2 ; + n_pairs_local = n_vars * (n_vars - 1) / 2; } /* Allocate storage for the pairs */ - pairs = xrealloc(pairs, sizeof(struct pair) * (n_pairs + n_pairs_local) ); + pairs = xnrealloc (pairs, n_pairs + n_pairs_local, sizeof *pairs); /* Populate the pairs with the appropriate variables */ if ( paired ) { int i; - assert(n_pairs_local == n_vars/2); - for (i = 0; i < n_pairs_local ; ++i) + assert(n_pairs_local == n_vars / 2); + for (i = 0; i < n_pairs_local; ++i) { - pairs[i].v[n_pairs+0] = vars[i]; - pairs[i].v[n_pairs+1] = vars[i+n_pairs_local]; + pairs[i].v[n_pairs] = vars[i]; + pairs[i].v[n_pairs + 1] = vars[i + n_pairs_local]; } } else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */ { int i,j; - int p=n_pairs; + size_t p = n_pairs; for(i=0 ; i < n_before_WITH ; ++i ) { @@ -490,8 +540,8 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) } else /* Neither WITH nor PAIRED given */ { - int i,j; - int p=n_pairs; + size_t i,j; + size_t p=n_pairs; for(i=0 ; i < n_vars ; ++i ) { @@ -506,6 +556,7 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) n_pairs+=n_pairs_local; + free (vars); return 1; } @@ -524,7 +575,7 @@ parse_value (union value * v, int type ) { if (!lex_force_string ()) return 0; - strncpy (v->s, ds_value (&tokstr), ds_length (&tokstr)); + strncpy (v->s, ds_c_str (&tokstr), ds_length (&tokstr)); } lex_get (); @@ -548,6 +599,7 @@ void ssbox_independent_samples_init(struct ssbox *this, void ssbox_paired_init(struct ssbox *this, struct cmd_t_test *cmd); + /* Factory to create an ssbox */ void ssbox_create(struct ssbox *ssb, struct cmd_t_test *cmd, int mode) @@ -569,6 +621,7 @@ ssbox_create(struct ssbox *ssb, struct cmd_t_test *cmd, int mode) } + /* Despatcher for the populate method */ void ssbox_populate(struct ssbox *ssb,struct cmd_t_test *cmd) @@ -592,6 +645,8 @@ ssbox_base_finalize(struct ssbox *ssb) tab_submit(ssb->t); } + + /* Initialize a ssbox struct */ void ssbox_base_init(struct ssbox *this, int cols,int rows) @@ -621,7 +676,7 @@ ssbox_one_sample_init(struct ssbox *this, ssbox_base_init(this, hsize,vsize); tab_title (this->t, 0, _("One-Sample Statistics")); - tab_vline(this->t, TAL_2, 1,0,vsize); + tab_vline(this->t, TAL_2, 1,0,vsize - 1); tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, _("N")); tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); @@ -643,8 +698,8 @@ ssbox_independent_samples_init(struct ssbox *this, ssbox_base_init(this, hsize,vsize); tab_title (this->t, 0, _("Group Statistics")); - tab_vline(this->t,0,1,0,vsize); - tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, groups->name); + tab_vline(this->t,0,1,0,vsize - 1); + tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, indep_var->name); tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("N")); tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); @@ -659,48 +714,94 @@ ssbox_independent_samples_populate(struct ssbox *ssb, { int i; + char *val_lab0=0; char *val_lab1=0; - char *val_lab2=0; + double indep_value[2]; - if ( groups->type == NUMERIC ) + char prefix[2][3]={"",""}; + + if ( indep_var->type == NUMERIC ) { - val_lab1 = val_labs_find( groups->val_labs,groups_values[0]); - val_lab2 = val_labs_find( groups->val_labs,groups_values[1]); + val_lab0 = val_labs_find( indep_var->val_labs,gp.v.g_value[0]); + val_lab1 = val_labs_find( indep_var->val_labs,gp.v.g_value[1]); } else { - val_lab1 = groups_values[0].s; - val_lab2 = groups_values[1].s; + val_lab0 = gp.v.g_value[0].s; + val_lab1 = gp.v.g_value[1].s; + } + + if (gp.criterion == CMP_LE ) + { + strcpy(prefix[0],"< "); + strcpy(prefix[1],">="); + indep_value[0] = gp.v.critical_value; + indep_value[1] = gp.v.critical_value; + } + else + { + indep_value[0] = gp.v.g_value[0].f; + indep_value[1] = gp.v.g_value[1].f; } assert(ssb->t); for (i=0; i < cmd->n_variables; ++i) { - int g; + struct variable *var = cmd->v_variables[i]; + struct hsh_table *grp_hash = group_proc_get (var)->group_hash; + int count=0; tab_text (ssb->t, 0, i*2+1, TAB_LEFT, cmd->v_variables[i]->name); - if (val_lab1) - tab_text (ssb->t, 1, i*2+1, TAB_LEFT, val_lab1); + if (val_lab0) + tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF, + "%s%s", prefix[0], val_lab0); else - tab_float(ssb->t, 1 ,i*2+1, TAB_LEFT, groups_values[0].f, 2,0); + tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF, + "%s%g", prefix[0], indep_value[0]); - if (val_lab2) - tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT, val_lab2); + if (val_lab1) + tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF, + "%s%s", prefix[1], val_lab1); else - tab_float(ssb->t, 1 ,i*2+1+1, TAB_LEFT, groups_values[1].f,2,0); + tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF, + "%s%g", prefix[1], indep_value[1]); + /* Fill in the group statistics */ - for ( g=0; g < 2 ; ++g ) + for ( count = 0 ; count < 2 ; ++count ) { - struct group_statistics *gs = &cmd->v_variables[i]->p.t_t.gs[g]; + union value search_val; + + struct group_statistics *gs; - tab_float(ssb->t, 2 ,i*2+g+1, TAB_RIGHT, gs->n, 2, 0); - tab_float(ssb->t, 3 ,i*2+g+1, TAB_RIGHT, gs->mean, 8, 2); - tab_float(ssb->t, 4 ,i*2+g+1, TAB_RIGHT, gs->std_dev, 8, 3); - tab_float(ssb->t, 5 ,i*2+g+1, TAB_RIGHT, gs->se_mean, 8, 3); + if ( gp.criterion == CMP_LE ) + { + if ( count == 0 ) + { + /* less than ( < ) case */ + search_val.f = gp.v.critical_value - 1.0; + } + else + { + /* >= case */ + search_val.f = gp.v.critical_value + 1.0; + } + } + else + { + search_val = gp.v.g_value[count]; + } + + gs = hsh_find(grp_hash, (void *) &search_val); + assert(gs); + + tab_float(ssb->t, 2 ,i*2+count+1, TAB_RIGHT, gs->n, 2, 0); + tab_float(ssb->t, 3 ,i*2+count+1, TAB_RIGHT, gs->mean, 8, 2); + tab_float(ssb->t, 4 ,i*2+count+1, TAB_RIGHT, gs->std_dev, 8, 3); + tab_float(ssb->t, 5 ,i*2+count+1, TAB_RIGHT, gs->se_mean, 8, 3); } } } @@ -748,22 +849,20 @@ ssbox_paired_populate(struct ssbox *ssb,struct cmd_t_test *cmd UNUSED) { struct group_statistics *gs; - gs=&pairs[i].v[j]->p.t_t.ugs; + gs = &group_proc_get (pairs[i].v[j])->ugs; /* Titles */ tab_text (ssb->t, 1, i*2+j+1, TAB_LEFT, pairs[i].v[j]->name); /* Values */ - tab_float (ssb->t,2, i*2+j+1, TAB_RIGHT, gs->mean, 8, 2); - tab_float (ssb->t,3, i*2+j+1, TAB_RIGHT, gs->n, 2, 0); - tab_float (ssb->t,4, i*2+j+1, TAB_RIGHT, gs->std_dev, 8, 3); - tab_float (ssb->t,5, i*2+j+1, TAB_RIGHT, gs->se_mean, 8, 3); + tab_float (ssb->t,2, i*2+j+1, TAB_RIGHT, pairs[i].mean[j], 8, 2); + tab_float (ssb->t,3, i*2+j+1, TAB_RIGHT, pairs[i].n, 2, 0); + tab_float (ssb->t,4, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j], 8, 3); + tab_float (ssb->t,5, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j]/sqrt(pairs[i].n), 8, 3); } - } - } /* Populate the one sample ssbox */ @@ -776,8 +875,7 @@ ssbox_one_sample_populate(struct ssbox *ssb, struct cmd_t_test *cmd) for (i=0; i < cmd->n_variables; ++i) { - struct group_statistics *gs; - gs= &cmd->v_variables[i]->p.t_t.ugs; + struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs; tab_text (ssb->t, 0, i+1, TAB_LEFT, cmd->v_variables[i]->name); tab_float (ssb->t,1, i+1, TAB_RIGHT, gs->n, 2, 0); @@ -792,7 +890,7 @@ ssbox_one_sample_populate(struct ssbox *ssb, struct cmd_t_test *cmd) /* Implementation of the Test Results box struct */ -void trbox_base_init(struct trbox *self,int n_vars, int cols); +void trbox_base_init(struct trbox *self,size_t n_vars, int cols); void trbox_base_finalize(struct trbox *trb); void trbox_independent_samples_init(struct trbox *trb, @@ -885,8 +983,8 @@ trbox_independent_samples_init(struct trbox *self, tab_text(self->t,10,2, TAB_CENTER | TAT_TITLE,_("Upper")); tab_joint_text(self->t, 9, 1, 10, 1, TAB_CENTER | TAT_PRINTF, - _("%d%% Confidence Interval of the Difference"), - (int)round(cmd->criteria*100.0)); + _("%g%% Confidence Interval of the Difference"), + cmd->criteria*100.0); } @@ -900,10 +998,7 @@ trbox_independent_samples_populate(struct trbox *self, assert(self); for (i=0; i < cmd->n_variables; ++i) { - int which =1; double p,q; - int status; - double bound; double t; double df; @@ -914,37 +1009,52 @@ trbox_independent_samples_populate(struct trbox *self, double std_err_diff; double mean_diff; - struct group_statistics *gs0 = &cmd->v_variables[i]->p.t_t.gs[0]; - struct group_statistics *gs1 = &cmd->v_variables[i]->p.t_t.gs[1]; + struct variable *var = cmd->v_variables[i]; + struct group_proc *grp_data = group_proc_get (var); + + struct hsh_table *grp_hash = grp_data->group_hash; + + struct group_statistics *gs0 ; + struct group_statistics *gs1 ; - tab_text (self->t, 0, i*2+3, TAB_LEFT, cmd->v_variables[i]->name); + union value search_val; + + if ( gp.criterion == CMP_LE ) + search_val.f = gp.v.critical_value - 1.0; + else + search_val = gp.v.g_value[0]; - tab_text (self->t, 1, i*2+3, TAB_LEFT, _("Equal variances assumed")); + gs0 = hsh_find(grp_hash, (void *) &search_val); + assert(gs0); + if ( gp.criterion == CMP_LE ) + search_val.f = gp.v.critical_value + 1.0; + else + search_val = gp.v.g_value[1]; - tab_float(self->t, 2, i*2+3, TAB_CENTER, - cmd->v_variables[i]->p.t_t.levene, 8,3); + gs1 = hsh_find(grp_hash, (void *) &search_val); + assert(gs1); + + + tab_text (self->t, 0, i*2+3, TAB_LEFT, cmd->v_variables[i]->name); + tab_text (self->t, 1, i*2+3, TAB_LEFT, _("Equal variances assumed")); - /* Now work out the significance of the Levene test */ - which=1; df1 = 1; df2 = cmd->v_variables[i]->p.t_t.ugs.n - 2; - cdff(&which,&p,&q,&cmd->v_variables[i]->p.t_t.levene, - &df1,&df2,&status,&bound); + tab_float(self->t, 2, i*2+3, TAB_CENTER, grp_data->levene, 8,3); - if ( 0 != status ) - { - msg( SE, _("Error calculating F statistic (cdff returned %d)."),status); - } + /* Now work out the significance of the Levene test */ + df1 = 1; df2 = grp_data->ugs.n - 2; + q = gsl_cdf_fdist_Q(grp_data->levene, df1, df2); tab_float(self->t, 3, i*2+3, TAB_CENTER, q, 8,3 ); df = gs0->n + gs1->n - 2.0 ; tab_float (self->t, 5, i*2+3, TAB_RIGHT, df, 2, 0); - pooled_variance = ( (gs0->n )*sqr(gs0->s_std_dev) + pooled_variance = ( (gs0->n )*pow2(gs0->s_std_dev) + - (gs1->n )*sqr(gs1->s_std_dev) + (gs1->n )*pow2(gs1->s_std_dev) ) / df ; t = (gs0->mean - gs1->mean) / sqrt(pooled_variance) ; @@ -952,13 +1062,8 @@ trbox_independent_samples_populate(struct trbox *self, tab_float (self->t, 4, i*2+3, TAB_RIGHT, t, 8, 3); - - which=1; /* get p & q from t & df */ - cdft(&which, &p, &q, &t, &df, &status, &bound); - if ( 0 != status ) - { - msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); - } + p = gsl_cdf_tdist_P(t, df); + q = gsl_cdf_tdist_Q(t, df); tab_float(self->t, 6, i*2+3, TAB_RIGHT, 2.0*(t>0?q:p) , 8, 3); @@ -966,20 +1071,14 @@ trbox_independent_samples_populate(struct trbox *self, tab_float(self->t, 7, i*2+3, TAB_RIGHT, mean_diff, 8, 3); - std_err_diff = sqrt( sqr(gs0->se_mean) + sqr(gs1->se_mean)); + std_err_diff = sqrt( pow2(gs0->se_mean) + pow2(gs1->se_mean)); tab_float(self->t, 8, i*2+3, TAB_RIGHT, std_err_diff, 8, 3); /* Now work out the confidence interval */ q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ - p = 1 - q ; - which=2; /* Calc T from p,q and df */ - cdft(&which, &p, &q, &t, &df, &status, &bound); - if ( 0 != status ) - { - msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); - } + t = gsl_cdf_tdist_Qinv(q,df); tab_float(self->t, 9, i*2+3, TAB_RIGHT, mean_diff - t * std_err_diff, 8, 3); @@ -994,42 +1093,32 @@ trbox_independent_samples_populate(struct trbox *self, TAB_LEFT, _("Equal variances not assumed")); - se2 = (sqr(gs0->s_std_dev)/(gs0->n -1) ) + - (sqr(gs1->s_std_dev)/(gs1->n -1) ); + se2 = (pow2(gs0->s_std_dev)/(gs0->n -1) ) + + (pow2(gs1->s_std_dev)/(gs1->n -1) ); t = mean_diff / sqrt(se2) ; tab_float (self->t, 4, i*2+3+1, TAB_RIGHT, t, 8, 3); - df = sqr(se2) / ( - (sqr(sqr(gs0->s_std_dev)/(gs0->n - 1 )) + df = pow2(se2) / ( + (pow2(pow2(gs0->s_std_dev)/(gs0->n - 1 )) /(gs0->n -1 ) ) + - (sqr(sqr(gs1->s_std_dev)/(gs1->n - 1 )) + (pow2(pow2(gs1->s_std_dev)/(gs1->n - 1 )) /(gs1->n -1 ) ) ) ; tab_float (self->t, 5, i*2+3+1, TAB_RIGHT, df, 8, 3); - which=1; /* get p & q from t & df */ - cdft(&which, &p, &q, &t, &df, &status, &bound); - if ( 0 != status ) - { - msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); - } + p = gsl_cdf_tdist_P(t, df); + q = gsl_cdf_tdist_Q(t, df); tab_float(self->t, 6, i*2+3+1, TAB_RIGHT, 2.0*(t>0?q:p) , 8, 3); /* Now work out the confidence interval */ q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ - p = 1 - q ; - which=2; /* Calc T from p,q and df */ - cdft(&which, &p, &q, &t, &df, &status, &bound); - if ( 0 != status ) - { - msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); - } + t = gsl_cdf_tdist_Qinv(q, df); tab_float(self->t, 7, i*2+3+1, TAB_RIGHT, mean_diff, 8, 3); @@ -1061,7 +1150,7 @@ trbox_paired_init(struct trbox *self, trbox_base_init(self,n_pairs,hsize); tab_title (self->t, 0, _("Paired Samples Test")); tab_hline(self->t,TAL_1,2,6,1); - tab_vline(self->t,TAL_2,2,0,vsize); + tab_vline(self->t,TAL_2,2,0,vsize - 1); tab_joint_text(self->t,2,0,6,0,TAB_CENTER,_("Paired Differences")); tab_box(self->t,-1,-1,-1,TAL_1, 2,1,6,vsize-1); tab_box(self->t,-1,-1,-1,TAL_1, 6,0,hsize-1,vsize-1); @@ -1069,8 +1158,8 @@ trbox_paired_init(struct trbox *self, tab_vline(self->t,TAL_0,6,0,1); tab_joint_text(self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF, - _("%d%% Confidence Interval of the Difference"), - (int)round(cmd->criteria*100.0)); + _("%g%% Confidence Interval of the Difference"), + cmd->criteria*100.0); tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); @@ -1091,19 +1180,10 @@ trbox_paired_populate(struct trbox *trb, for (i=0; i < n_pairs; ++i) { - int which =1; double p,q; - int status; - double bound; double se_mean; - struct variable *v0 = pairs[i].v[0]; - struct variable *v1 = pairs[i].v[1]; - - struct group_statistics *gs0 = &v0->p.t_t.ugs; - struct group_statistics *gs1 = &v1->p.t_t.ugs; - - double n = gs0->n; + double n = pairs[i].n; double t; double df = n - 1; @@ -1122,14 +1202,8 @@ trbox_paired_populate(struct trbox *trb, /* Now work out the confidence interval */ q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ - p = 1 - q ; - which=2; /* Calc T from p,q and df */ - cdft(&which, &p, &q, &t, &df, &status, &bound); - if ( 0 != status ) - { - msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); - } + t = gsl_cdf_tdist_Qinv(q, df); tab_float(trb->t, 5, i+3, TAB_RIGHT, pairs[i].mean_diff - t * se_mean , 8, 4); @@ -1137,26 +1211,21 @@ trbox_paired_populate(struct trbox *trb, tab_float(trb->t, 6, i+3, TAB_RIGHT, pairs[i].mean_diff + t * se_mean , 8, 4); - t = ( gs0->mean - gs1->mean) - / sqrt ( - ( sqr(gs0->s_std_dev) + sqr(gs1->s_std_dev) - - 2 * pairs[i].correlation * gs0->s_std_dev * gs1->s_std_dev ) - / (n-1) ) - ; + t = (pairs[i].mean[0] - pairs[i].mean[1]) + / sqrt ( + ( pow2 (pairs[i].s_std_dev[0]) + pow2 (pairs[i].s_std_dev[1]) - + 2 * pairs[i].correlation * + pairs[i].s_std_dev[0] * pairs[i].s_std_dev[1] ) + / (n - 1) + ); tab_float(trb->t, 7, i+3, TAB_RIGHT, t , 8,3 ); /* Degrees of freedom */ tab_float(trb->t, 8, i+3, TAB_RIGHT, df , 2, 0 ); - which=1; - cdft(&which, &p, &q, &t, &df, &status, &bound); - - if ( 0 != status ) - { - msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); - } - + p = gsl_cdf_tdist_P(t,df); + q = gsl_cdf_tdist_P(t,df); tab_float(trb->t, 9, i+3, TAB_RIGHT, 2.0*(t>0?q:p) , 8, 3); @@ -1175,17 +1244,17 @@ trbox_one_sample_init(struct trbox *self, struct cmd_t_test *cmd ) trbox_base_init(self, cmd->n_variables,hsize); tab_title (self->t, 0, _("One-Sample Test")); tab_hline(self->t, TAL_1, 1, hsize - 1, 1); - tab_vline(self->t, TAL_2, 1, 0, vsize); + tab_vline(self->t, TAL_2, 1, 0, vsize - 1); tab_joint_text(self->t, 1, 0, hsize-1,0, TAB_CENTER | TAT_PRINTF, - _("Test Value = %f"),cmd->n_testval); + _("Test Value = %f"), cmd->n_testval[0]); tab_box(self->t, -1, -1, -1, TAL_1, 1,1,hsize-1,vsize-1); tab_joint_text(self->t,5,1,6,1,TAB_CENTER | TAT_PRINTF, - _("%d%% Confidence Interval of the Difference"), - (int)round(cmd->criteria*100.0)); + _("%g%% Confidence Interval of the Difference"), + cmd->criteria*100.0); tab_vline(self->t,TAL_0,6,1,1); tab_hline(self->t,TAL_1,5,6,2); @@ -1209,19 +1278,15 @@ trbox_one_sample_populate(struct trbox *trb, struct cmd_t_test *cmd) for (i=0; i < cmd->n_variables; ++i) { - int which =1; double t; double p,q; double df; - int status; - double bound; - struct group_statistics *gs; - gs= &cmd->v_variables[i]->p.t_t.ugs; + struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs; tab_text (trb->t, 0, i+3, TAB_LEFT, cmd->v_variables[i]->name); - t = (gs->mean - cmd->n_testval ) * sqrt(gs->n) / gs->std_dev ; + t = (gs->mean - cmd->n_testval[0] ) * sqrt(gs->n) / gs->std_dev ; tab_float (trb->t, 1, i+3, TAB_RIGHT, t, 8,3); @@ -1230,13 +1295,8 @@ trbox_one_sample_populate(struct trbox *trb, struct cmd_t_test *cmd) tab_float (trb->t, 2, i+3, TAB_RIGHT, df, 8,0); - cdft(&which, &p, &q, &t, &df, &status, &bound); - - if ( 0 != status ) - { - msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); - } - + p = gsl_cdf_tdist_P(t, df); + q = gsl_cdf_tdist_Q(t, df); /* Multiply by 2 to get 2-tailed significance, makeing sure we've got the correct tail*/ @@ -1246,13 +1306,7 @@ trbox_one_sample_populate(struct trbox *trb, struct cmd_t_test *cmd) q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ - p = 1 - q ; - which=2; /* Calc T from p,q and df */ - cdft(&which, &p, &q, &t, &df, &status, &bound); - if ( 0 != status ) - { - msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); - } + t = gsl_cdf_tdist_Qinv(q, df); tab_float (trb->t, 5, i+3, TAB_RIGHT, gs->mean_diff - t * gs->se_mean, 8,4); @@ -1264,9 +1318,9 @@ trbox_one_sample_populate(struct trbox *trb, struct cmd_t_test *cmd) /* Base initializer for the generalized trbox */ void -trbox_base_init(struct trbox *self, int data_rows, int cols) +trbox_base_init(struct trbox *self, size_t data_rows, int cols) { - const int rows = 3 + data_rows; + const size_t rows = 3 + data_rows; self->finalize = trbox_base_finalize; self->t = tab_create (cols, rows, 0); @@ -1312,17 +1366,13 @@ pscbox(void) for (i=0; i < n_pairs; ++i) { - int which =1; double p,q; - int status; - double bound; - - double df = pairs[i].v[0]->p.t_t.ugs.n -2; + double df = pairs[i].n -2; double correlation_t = pairs[i].correlation * sqrt(df) / - sqrt(1 - sqr(pairs[i].correlation)); + sqrt(1 - pow2(pairs[i].correlation)); /* row headings */ @@ -1334,20 +1384,13 @@ pscbox(void) /* row data */ + tab_float(table, 2, i+1, TAB_RIGHT, pairs[i].n, 4, 0); tab_float(table, 3, i+1, TAB_RIGHT, pairs[i].correlation, 8, 3); - tab_float(table, 2, i+1, TAB_RIGHT, pairs[i].v[0]->p.t_t.ugs.n , 4, 0); - - - cdft(&which, &p, &q, &correlation_t, &df, &status, &bound); - - if ( 0 != status ) - { - msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); - } + p = gsl_cdf_tdist_P(correlation_t, df); + q = gsl_cdf_tdist_Q(correlation_t, df); tab_float(table, 4, i+1, TAB_RIGHT, 2.0*(correlation_t>0?q:p), 8, 3); - } tab_submit(table); @@ -1355,25 +1398,54 @@ pscbox(void) + /* Calculation Implementation */ /* Per case calculations common to all variants of the T test */ static int -common_calc (struct ccase *c, void *aux UNUSED) +common_calc (const struct ccase *c, void *_cmd) { int i; + struct cmd_t_test *cmd = (struct cmd_t_test *)_cmd; + + double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); + + + /* Skip the entire case if /MISSING=LISTWISE is set */ + if ( cmd->miss == TTS_LISTWISE ) + { + for(i=0; i< cmd->n_variables ; ++i) + { + struct variable *v = cmd->v_variables[i]; + const union value *val = case_data (c, v->fv); + + if (value_is_missing(&v->miss, val) ) + { + return 0; + } + } + } + + /* Listwise has to be implicit if the independent variable is missing ?? */ + if ( cmd->sbc_groups ) + { + const union value *gv = case_data (c, indep_var->fv); + if ( value_is_missing(&indep_var->miss, gv) ) + { + return 0; + } + } - double weight = dict_get_case_weight(default_dict,c); - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - struct variable *v = cmd.v_variables[i]; - union value *val = &c->data[v->fv]; + struct variable *v = cmd->v_variables[i]; + const union value *val = case_data (c, v->fv); - gs= &cmd.v_variables[i]->p.t_t.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; - if (val->f != SYSMIS) + if (! value_is_missing(&v->miss, val) ) { gs->n+=weight; gs->sum+=weight * val->f; @@ -1385,14 +1457,14 @@ common_calc (struct ccase *c, void *aux UNUSED) /* Pre calculations common to all variants of the T test */ static void -common_precalc (void *aux UNUSED) +common_precalc ( struct cmd_t_test *cmd ) { int i=0; - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - gs= &cmd.v_variables[i]->p.t_t.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; gs->sum=0; gs->n=0; @@ -1403,14 +1475,15 @@ common_precalc (void *aux UNUSED) /* Post calculations common to all variants of the T test */ void -common_postcalc (void *aux UNUSED) +common_postcalc ( struct cmd_t_test *cmd ) { int i=0; - for(i=0; i< cmd.n_variables ; ++i) + + for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - gs= &cmd.v_variables[i]->p.t_t.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; gs->mean=gs->sum / gs->n; gs->s_std_dev= sqrt( @@ -1429,22 +1502,39 @@ common_postcalc (void *aux UNUSED) /* Per case calculations for one sample t test */ static int -one_sample_calc (struct ccase *c, void *aux UNUSED) +one_sample_calc (const struct ccase *c, void *cmd_) { int i; + struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_; + - double weight = dict_get_case_weight(default_dict,c); + double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); - for(i=0; i< cmd.n_variables ; ++i) + /* Skip the entire case if /MISSING=LISTWISE is set */ + if ( cmd->miss == TTS_LISTWISE ) + { + for(i=0; i< cmd->n_variables ; ++i) + { + struct variable *v = cmd->v_variables[i]; + const union value *val = case_data (c, v->fv); + + if (value_is_missing(&v->miss, val) ) + { + return 0; + } + } + } + + for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - struct variable *v = cmd.v_variables[i]; - union value *val = &c->data[v->fv]; + struct variable *v = cmd->v_variables[i]; + const union value *val = case_data (c, v->fv); - gs= &cmd.v_variables[i]->p.t_t.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; - if (val->f != SYSMIS) - gs->sum_diff += weight * (val->f - cmd.n_testval); + if ( ! value_is_missing(&v->miss, val)) + gs->sum_diff += weight * (val->f - cmd->n_testval[0]); } return 0; @@ -1452,14 +1542,14 @@ one_sample_calc (struct ccase *c, void *aux UNUSED) /* Pre calculations for one sample t test */ static void -one_sample_precalc (void *aux UNUSED) +one_sample_precalc ( struct cmd_t_test *cmd ) { - int i=0; - - for(i=0; i< cmd.n_variables ; ++i) + int i=0; + + for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - gs= &cmd.v_variables[i]->p.t_t.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; gs->sum_diff=0; } @@ -1467,166 +1557,235 @@ one_sample_precalc (void *aux UNUSED) /* Post calculations for one sample t test */ static void -one_sample_postcalc (void *aux UNUSED) +one_sample_postcalc (struct cmd_t_test *cmd) { int i=0; - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - gs= &cmd.v_variables[i]->p.t_t.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; - gs->mean_diff = gs->sum_diff / gs->n ; } } -static int -compare_var_name (const void *a_, const void *b_, void *v_ UNUSED) -{ - const struct variable *a = a_; - const struct variable *b = b_; - - return strcmp(a->name,b->name); -} - -static unsigned -hash_var_name (const void *a_, void *v_ UNUSED) -{ - const struct variable *a = a_; - - return hsh_hash_bytes (a->name, strlen(a->name)); -} - - - static void -paired_precalc (void *aux UNUSED) +paired_precalc (struct cmd_t_test *cmd UNUSED) { int i; for(i=0; i < n_pairs ; ++i ) { - pairs[i].correlation=0; - pairs[i].sum_of_diffs=0; - pairs[i].ssq_diffs=0; + pairs[i].n = 0; + pairs[i].sum[0] = 0; pairs[i].sum[1] = 0; + pairs[i].ssq[0] = 0; pairs[i].ssq[1] = 0; + pairs[i].sum_of_prod = 0; + pairs[i].correlation = 0; + pairs[i].sum_of_diffs = 0; + pairs[i].ssq_diffs = 0; } } static int -paired_calc (struct ccase *c, void *aux UNUSED) +paired_calc (const struct ccase *c, void *cmd_) { int i; + struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_; + + double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); + + /* Skip the entire case if /MISSING=LISTWISE is set , + AND one member of a pair is missing */ + if ( cmd->miss == TTS_LISTWISE ) + { + for(i=0; i < n_pairs ; ++i ) + { + struct variable *v0 = pairs[i].v[0]; + struct variable *v1 = pairs[i].v[1]; + + const union value *val0 = case_data (c, v0->fv); + const union value *val1 = case_data (c, v1->fv); + + if ( value_is_missing(&v0->miss, val0) || + value_is_missing(&v1->miss, val1) ) + { + return 0; + } + } + } + for(i=0; i < n_pairs ; ++i ) { struct variable *v0 = pairs[i].v[0]; struct variable *v1 = pairs[i].v[1]; - union value *val0 = &c->data[v0->fv]; - union value *val1 = &c->data[v1->fv]; + const union value *val0 = case_data (c, v0->fv); + const union value *val1 = case_data (c, v1->fv); + + if ( ( !value_is_missing(&v0->miss, val0) + && !value_is_missing(&v1->miss, val1) ) ) + { + pairs[i].n += weight; + pairs[i].sum[0] += weight * val0->f; + pairs[i].sum[1] += weight * val1->f; - pairs[i].correlation += ( val0->f - pairs[i].v[0]->p.t_t.ugs.mean ) - * - ( val1->f - pairs[i].v[1]->p.t_t.ugs.mean ); + pairs[i].ssq[0] += weight * pow2(val0->f); + pairs[i].ssq[1] += weight * pow2(val1->f); - pairs[i].sum_of_diffs += val0->f - val1->f ; - pairs[i].ssq_diffs += sqr(val0->f - val1->f); + pairs[i].sum_of_prod += weight * val0->f * val1->f ; + pairs[i].sum_of_diffs += weight * ( val0->f - val1->f ) ; + pairs[i].ssq_diffs += weight * pow2(val0->f - val1->f); + } } return 0; } static void -paired_postcalc (void *aux UNUSED) +paired_postcalc (struct cmd_t_test *cmd UNUSED) { int i; for(i=0; i < n_pairs ; ++i ) { - const double n = pairs[i].v[0]->p.t_t.ugs.n ; - - pairs[i].correlation /= pairs[i].v[0]->p.t_t.ugs.std_dev * - pairs[i].v[1]->p.t_t.ugs.std_dev ; - pairs[i].correlation /= pairs[i].v[0]->p.t_t.ugs.n -1; - + int j; + const double n = pairs[i].n; + for (j=0; j < 2 ; ++j) + { + pairs[i].mean[j] = pairs[i].sum[j] / n ; + pairs[i].s_std_dev[j] = sqrt((pairs[i].ssq[j] / n - + pow2(pairs[i].mean[j])) + ); + + pairs[i].std_dev[j] = sqrt(n/(n-1)*(pairs[i].ssq[j] / n - + pow2(pairs[i].mean[j])) + ); + } + + pairs[i].correlation = pairs[i].sum_of_prod / pairs[i].n - + pairs[i].mean[0] * pairs[i].mean[1] ; + /* correlation now actually contains the covariance */ + + pairs[i].correlation /= pairs[i].std_dev[0] * pairs[i].std_dev[1]; + pairs[i].correlation *= pairs[i].n / ( pairs[i].n - 1 ); + pairs[i].mean_diff = pairs[i].sum_of_diffs / n ; - pairs[i].std_dev_diff = sqrt ( n / (n - 1) * ( ( pairs[i].ssq_diffs / n ) - - sqr(pairs[i].mean_diff ) + pow2(pairs[i].mean_diff ) ) ); } } -static int -get_group(const union value *val, struct variable *var) -{ - if ( 0 == compare_values(val,&groups_values[0],var->width) ) - return 0; - else if (0 == compare_values(val,&groups_values[1],var->width) ) - return 1; - - /* Never reached */ - assert(0); - return -1; -} - - static void -group_precalc (void *aux UNUSED) +group_precalc (struct cmd_t_test *cmd ) { int i; int j; - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { - struct t_test_proc *ttpr = &cmd.v_variables[i]->p.t_t; + struct group_proc *ttpr = group_proc_get (cmd->v_variables[i]); /* There's always 2 groups for a T - TEST */ ttpr->n_groups = 2; - ttpr->gs = xmalloc(sizeof(struct group_statistics) * 2) ; + + gp.indep_width = indep_var->width; + + ttpr->group_hash = hsh_create(2, + (hsh_compare_func *) compare_group_binary, + (hsh_hash_func *) hash_group_binary, + (hsh_free_func *) free_group, + (void *) &gp ); for (j=0 ; j < 2 ; ++j) { - ttpr->gs[j].sum=0; - ttpr->gs[j].n=0; - ttpr->gs[j].ssq=0; - ttpr->gs[j].id = groups_values[j]; + + struct group_statistics *gs = xmalloc (sizeof *gs); + + gs->sum = 0; + gs->n = 0; + gs->ssq = 0; + + if ( gp.criterion == CMP_EQ ) + { + gs->id = gp.v.g_value[j]; + } + else + { + if ( j == 0 ) + gs->id.f = gp.v.critical_value - 1.0 ; + else + gs->id.f = gp.v.critical_value + 1.0 ; + } + + hsh_insert ( ttpr->group_hash, (void *) gs ); + } } } static int -group_calc (struct ccase *c, void *aux UNUSED) +group_calc (const struct ccase *c, struct cmd_t_test *cmd) { int i; - union value *gv = &c->data[groups->fv]; - double weight = dict_get_case_weight(default_dict,c); + const union value *gv = case_data (c, indep_var->fv); - gv = &c->data[groups->fv]; + const double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); + + if ( value_is_missing(&indep_var->miss, gv) ) + { + return 0; + } - for(i=0; i< cmd.n_variables ; ++i) + if ( cmd->miss == TTS_LISTWISE ) { - int g = get_group(gv,groups); + for(i=0; i< cmd->n_variables ; ++i) + { + struct variable *v = cmd->v_variables[i]; + const union value *val = case_data (c, v->fv); - struct group_statistics *gs = &cmd.v_variables[i]->p.t_t.gs[g]; + if (value_is_missing(&v->miss, val) ) + { + return 0; + } + } + } + + gv = case_data (c, indep_var->fv); + + for(i=0; i< cmd->n_variables ; ++i) + { + struct variable *var = cmd->v_variables[i]; + const union value *val = case_data (c, var->fv); + struct hsh_table *grp_hash = group_proc_get (var)->group_hash; + struct group_statistics *gs; - union value *val=&c->data[cmd.v_variables[i]->fv]; + gs = hsh_find(grp_hash, (void *) gv); - gs->n+=weight; - gs->sum+=weight * val->f; - gs->ssq+=weight * sqr(val->f); + /* If the independent variable doesn't match either of the values + for this case then move on to the next case */ + if ( ! gs ) + return 0; + + if ( !value_is_missing(&var->miss, val) ) + { + gs->n+=weight; + gs->sum+=weight * val->f; + gs->ssq+=weight * pow2(val->f); + } } return 0; @@ -1634,31 +1793,193 @@ group_calc (struct ccase *c, void *aux UNUSED) static void -group_postcalc (void *aux UNUSED) +group_postcalc ( struct cmd_t_test *cmd ) { int i; - int j; - for(i=0; i< cmd.n_variables ; ++i) + for(i=0; i< cmd->n_variables ; ++i) { - for (j=0 ; j < 2 ; ++j) - { - struct group_statistics *gs; - gs=&cmd.v_variables[i]->p.t_t.gs[j]; + struct variable *var = cmd->v_variables[i]; + struct hsh_table *grp_hash = group_proc_get (var)->group_hash; + struct hsh_iterator g; + struct group_statistics *gs; + int count=0; + for (gs = hsh_first (grp_hash,&g); + gs != 0; + gs = hsh_next(grp_hash,&g)) + { gs->mean = gs->sum / gs->n; gs->s_std_dev= sqrt( - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) - ) ; + ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) + ) ; gs->std_dev= sqrt( - gs->n/(gs->n-1) * - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) - ) ; + gs->n/(gs->n-1) * + ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) + ) ; gs->se_mean = gs->std_dev / sqrt(gs->n); + count ++; + } + assert(count == 2); + } +} + + + +static void +calculate(const struct casefile *cf, void *cmd_) +{ + struct ssbox stat_summary_box; + struct trbox test_results_box; + + struct casereader *r; + struct ccase c; + + struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_; + + common_precalc(cmd); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; + case_destroy (&c)) + { + common_calc(&c,cmd); + } + casereader_destroy (r); + common_postcalc(cmd); + + switch(mode) + { + case T_1_SAMPLE: + one_sample_precalc(cmd); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; + case_destroy (&c)) + { + one_sample_calc(&c,cmd); + } + casereader_destroy (r); + one_sample_postcalc(cmd); + + break; + case T_PAIRED: + paired_precalc(cmd); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; + case_destroy (&c)) + { + paired_calc(&c,cmd); + } + casereader_destroy (r); + paired_postcalc(cmd); + + break; + case T_IND_SAMPLES: + + group_precalc(cmd); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; + case_destroy (&c)) + { + group_calc(&c,cmd); } + casereader_destroy (r); + group_postcalc(cmd); + + levene(cf, indep_var, cmd->n_variables, cmd->v_variables, + (cmd->miss == TTS_LISTWISE)?LEV_LISTWISE:LEV_ANALYSIS , + value_is_missing); + break; + } + + ssbox_create(&stat_summary_box,cmd,mode); + ssbox_populate(&stat_summary_box,cmd); + ssbox_finalize(&stat_summary_box); + + if ( mode == T_PAIRED) + pscbox(); + + trbox_create(&test_results_box,cmd,mode); + trbox_populate(&test_results_box,cmd); + trbox_finalize(&test_results_box); + +} + +short which_group(const struct group_statistics *g, + const struct group_properties *p); + +/* Return -1 if the id of a is less than b; +1 if greater than and + 0 if equal */ +static int +compare_group_binary(const struct group_statistics *a, + const struct group_statistics *b, + const struct group_properties *p) +{ + short flag_a; + short flag_b; + + if ( p->criterion == CMP_LE ) + { + /* less-than-or-equal comparision is not meaningfull for + alpha variables, so we shouldn't ever arrive here */ + assert(p->indep_width == 0 ) ; + + flag_a = ( a->id.f < p->v.critical_value ) ; + flag_b = ( b->id.f < p->v.critical_value ) ; } + else + { + flag_a = which_group(a, p); + flag_b = which_group(b, p); + } + + if (flag_a < flag_b ) + return -1; + + return (flag_a > flag_b); +} + +/* This is a degenerate case of a hash, since it can only return three possible + values. It's really a comparison, being used as a hash function */ + +static unsigned +hash_group_binary(const struct group_statistics *g, + const struct group_properties *p) +{ + short flag = -1; + + if ( p->criterion == CMP_LE ) + { + /* Not meaningfull to do a less than compare for alpha values ? */ + assert(p->indep_width == 0 ) ; + flag = ( g->id.f < p->v.critical_value ) ; + } + else if ( p->criterion == CMP_EQ) + { + flag = which_group(g,p); + } + else + assert(0); + + return flag; } +/* return 0 if G belongs to group 0, + 1 if it belongs to group 1, + 2 if it belongs to neither group */ +short +which_group(const struct group_statistics *g, + const struct group_properties *p) +{ + + if ( 0 == compare_values (&g->id, &p->v.g_value[0], p->indep_width)) + return 0; + + if ( 0 == compare_values (&g->id, &p->v.g_value[1], p->indep_width)) + return 1; + + return 2; +} +