X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Ft-test.q;h=e673395b6ac39f979fd43d27c3a0cc6dad7b52e2;hb=6429e116f1cacfe4be0ec6aae341cd712ca58463;hp=6b65d0ec07b842ff03d33581b9d2956f950f8c8e;hpb=4944c86a9318bc5b5578ab145a95c116ffd2c9fd;p=pspp diff --git a/src/t-test.q b/src/t-test.q index 6b65d0ec07..e673395b6a 100644 --- a/src/t-test.q +++ b/src/t-test.q @@ -1,6 +1,8 @@ -/* PSPP - computes sample statistics. +/* PSPP - computes sample statistics. -*-c-*- + Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . + Written by John Williams . + Almost completly re-written by John Darrington 2004 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -29,29 +31,37 @@ #include "lexer.h" #include "error.h" #include "magic.h" +#include "tab.h" +#include "som.h" +#include "value-labels.h" #include "var.h" #include "vfm.h" +#include "pool.h" +#include "hash.h" +#include "stats.h" +#include "t-test.h" /* (specification) "T-TEST" (tts_): - groups=custom; + +groups=custom; + +testval=double; variables=varlist("PV_NO_SCRATCH | PV_NUMERIC"); - *+pairs=custom; + pairs=custom; +missing=miss:!analysis/listwise, incl:include/!exclude; - +format=fmt:!labels/nolabels; - +criteria=:ci(d:criteria,"%s > 0. && %s < 1."). + format=fmt:!labels/nolabels; + criteria=:cin(d:criteria,"%s > 0. && %s < 1."). */ /* (declarations) */ /* (functions) */ -#undef DEBUGGING -#define DEBUGGING 1 -#include "debug-print.h" - -/* Command parsing information. */ static struct cmd_t_test cmd; +int value_compare(const union value *a, const union value *b, int width); + + +static struct pool *t_test_pool ; + /* Variable for the GROUPS subcommand, if given. */ static struct variable *groups; @@ -60,416 +70,269 @@ static struct variable *groups; static int n_groups_values; static union value groups_values[2]; -/* PAIRED: Number of pairs; each pair. */ -static int n_pairs; -static struct variable *(*pairs)[2]; - -/* Routines to scan data and perform t-tests */ -static void precalc (void); -static void postcalc (void); -static void g_postcalc (void); -static void t_pairs (void); -static void t_groups (void); -static int groups_calc (struct ccase *); -static int pairs_calc (struct ccase *); -static int z_calc (struct ccase *); - -struct value_list - { - double sum; - double ss; - double n; - struct value_list *next; - }; - -/* general workhorses - should move these to a separate library... */ -double variance (double n, double ss, double sum); - -double covariance (double x_sum, double x_n, - double y_sum, double y_n, double ss); - -double pooled_variance (double n_1, double var_1, - double n_2, double var_2); - -double oneway (double *f, double *p, struct value_list *list); - -double pearson_r (double c_xy, double c_xx, double c_yy); - -double f_sig (double f, double dfn, double dfd); -double t_crt (double df, double q); -double t_sig (double t, double df); - -/* massive function simply to remove any responsibility for output - from the function which does the actual t-test calculations */ -void print_t_groups (struct variable * grps, union value * g1, union value * g2, - double n1, double n2, double mean1, double mean2, - double sd1, double sd2, double se1, double se2, - double diff, double l_f, double l_p, - double p_t, double p_sig, double p_df, double p_sed, - double p_l, double p_h, - double s_t, double s_sig, double s_df, double s_sed, - double s_l, double s_h); - -/* Global variables to communicate between calc() and postcalc() - should move to a structure in the p union of variable... */ -static double v1_n, v1_ss, v1_sum, v1_se, v1_var, v1_mean; -static double v2_n, v2_ss, v2_sum, v2_se, v2_var, v2_mean; -static double v1_z_sum, v1_z_ss; -static double v2_z_sum, v2_z_ss; -static double diff, se_diff, sp, xy_sum, xy_diff, xy_ss; -static int cur_var; - -/* some defines for CDFlib */ -#define FIND_P 1 -#define FIND_CRITICAL_VALUE 2 -#define ERROR_SIG -1 - -#ifdef DEBUGGING -static void debug_print (void); -#endif - -/* Parses and executes the T-TEST procedure. */ -int -cmd_t_test (void) +/* Array of statistics for each group */ +typedef struct t_test_proc group_stats_t[2]; +static group_stats_t *groups_stats; + + + + +/* PAIRS: Number of pairs to be compared ; each pair. */ +static int n_pairs = 0 ; +struct pair { - struct cmd_t_test cmd; - - if (!lex_force_match_id ("T")) - return CMD_FAILURE; - lex_match ('-'); - lex_match_id ("TEST"); + /* The variables comprising the pair */ + struct variable *v[2]; - if (!parse_t_test (&cmd)) - return CMD_FAILURE; + /* The correlation coefficient between the variables */ + double correlation; -#if DEBUGGING - debug_print (); -#endif + /* The sum of the differences */ + double sum_of_diffs; - if (n_pairs > 0) - procedure (precalc, pairs_calc, postcalc); - else - /* probably groups then... */ - { - printf ("\n\n t-tests for independent samples of %s %s\n", - groups->name, groups->label); + /* The mean of the differences */ + double mean_diff; - for (cur_var = 0; cur_var < cmd.n_variables; cur_var++) - { - v1_n = v1_ss = v1_sum = v1_se = v1_var = v1_mean = 0.0; - v2_n = v2_ss = v2_sum = v2_se = v2_var = v2_mean = 0.0; - v1_z_sum = v1_z_ss = v2_z_sum = v2_z_ss = 0.0; - diff = se_diff = sp = xy_diff = xy_ss = xy_sum = 0.0; + /* The sum of the squares of the differences */ + double ssq_diffs; - procedure (precalc, groups_calc, g_postcalc); - procedure (precalc, z_calc, postcalc); - } - } + /* The std deviation of the differences */ + double std_dev_diff; +}; +static struct pair *pairs=0; - return CMD_SUCCESS; -} -void -precalc (void) -{ - return; /* rilly void... */ -} +static int parse_value (union value * v, int type) ; -int -groups_calc (struct ccase * c) + +/* Structures and Functions for the Statistics Summary Box */ +struct ssbox; +typedef void populate_ssbox_func(struct ssbox *ssb, + struct cmd_t_test *cmd); +typedef void finalize_ssbox_func(struct ssbox *ssb); + +struct ssbox { - int bad_weight; - double group, w; - struct variable *v = cmd.v_variables[cur_var]; - double X = c->data[v->fv].f; - - /* Get the weight for this case. */ - if (default_dict.weight_index == -1) - w = 1.0; - else - { - w = c->data[default_dict.weight_index].f; - if (w <= 0.0 || w == SYSMIS) - { - w = 0.0; - bad_weight = 1; - printf ("Bad weight\n"); - } - } + struct tab_table *t; - if (X == SYSMIS || X == 0.0) /* FIXME: should be USER_MISSING? */ - { - /* printf("Missing value\n"); */ - return 1; - } - else - { - X = X * w; - group = c->data[groups->fv].f; + populate_ssbox_func *populate; + finalize_ssbox_func *finalize; - if (group == groups_values[0].f) - { - v1_sum += X; - v1_ss += X * X; - v1_n += w; - } - else if (group == groups_values[1].f) - { - v2_sum += X; - v2_ss += X * X; - v2_n += w; - } - } +}; - return 1; -} +/* Create a ssbox */ +void ssbox_create(struct ssbox *ssb, struct cmd_t_test *cmd, int mode); -void -g_postcalc (void) -{ - v1_mean = v1_sum / v1_n; - v2_mean = v2_sum / v2_n; - return; -} +/* Populate a ssbox according to cmd */ +void ssbox_populate(struct ssbox *ssb, struct cmd_t_test *cmd); -int /* this pass generates the z-zcores */ -z_calc (struct ccase * c) -{ - int bad_weight; - double group, z, w; - struct variable *v = cmd.v_variables[cur_var]; - double X = c->data[v->fv].f; +/* Submit and destroy a ssbox */ +void ssbox_finalize(struct ssbox *ssb); - z = 0.0; +/* A function to create, populate and submit the Paired Samples Correlation + box */ +void pscbox(void); - /* Get the weight for this case. */ - if (default_dict.weight_index == -1) - w = 1.0; - else - { - w = c->data[default_dict.weight_index].f; - if (w <= 0.0 || w == SYSMIS) - { - w = 0.0; - bad_weight = 1; - } - } - if (X == SYSMIS || X == 0.0) /* FIXME: how to specify user missing? */ - { - return 1; - } - else - { - group = c->data[groups->fv].f; - X = w * X; +/* Structures and Functions for the Test Results Box */ +struct trbox; - if (group == groups_values[0].f) - { - z = fabs (X - v1_mean); - v1_z_sum += z; - v1_z_ss += pow (z, 2); - } - else if (group == groups_values[1].f) - { - z = fabs (X - v2_mean); - v2_z_ss += pow (z, 2); - v2_z_sum += z; - } - } +typedef void populate_trbox_func(struct trbox *trb, + struct cmd_t_test *cmd); +typedef void finalize_trbox_func(struct trbox *trb); + +struct trbox { + struct tab_table *t; + populate_trbox_func *populate; + finalize_trbox_func *finalize; +}; + +/* Create a trbox */ +void trbox_create(struct trbox *trb, struct cmd_t_test *cmd, int mode); + +/* Populate a ssbox according to cmd */ +void trbox_populate(struct trbox *trb, struct cmd_t_test *cmd); + +/* Submit and destroy a ssbox */ +void trbox_finalize(struct trbox *trb); + +/* Which mode was T-TEST invoked */ +enum { + T_1_SAMPLE = 0 , + T_IND_SAMPLES, + T_PAIRED +}; + + +static int common_calc (struct ccase *); +static void common_precalc (void); +static void common_postcalc (void); + +static int one_sample_calc (struct ccase *); +static void one_sample_precalc (void); +static void one_sample_postcalc (void); + +static int paired_calc (struct ccase *); +static void paired_precalc (void); +static void paired_postcalc (void); + +static void group_precalc (void); +static int group_calc (struct ccase *); +static void group_postcalc (void); + + +static int compare_var_name (const void *a_, const void *b_, void *v_ UNUSED); +static unsigned hash_var_name (const void *a_, void *v_ UNUSED); - return 1; -} int -pairs_calc (struct ccase * c) +cmd_t_test(void) { - int i; - struct variable *v1, *v2; - double X, Y; + int mode; - for (i = 0; i < n_pairs; i++) - { + struct ssbox stat_summary_box; + struct trbox test_results_box; - v1 = pairs[i][0]; - v2 = pairs[i][1]; - X = c->data[v1->fv].f; - Y = c->data[v2->fv].f; + if (!lex_force_match_id ("T")) + return CMD_FAILURE; + + lex_match ('-'); + lex_match_id ("TEST"); + + if ( !parse_t_test(&cmd) ) + return CMD_FAILURE; + + if (! cmd.sbc_criteria) + cmd.criteria=0.95; - if (X == SYSMIS || Y == SYSMIS) + { + int m=0; + if (cmd.sbc_testval) ++m; + if (cmd.sbc_groups) ++m; + if (cmd.sbc_pairs) ++m; + + if ( m != 1) + { + msg(SE, + _("TESTVAL, GROUPS and PAIRS subcommands are mutually exclusive.") + ); + return CMD_FAILURE; + } + } + + if (cmd.sbc_testval) + mode=T_1_SAMPLE; + else if (cmd.sbc_groups) + mode=T_IND_SAMPLES; + else + mode=T_PAIRED; + + if ( mode == T_PAIRED) + { + if (cmd.sbc_variables) { - printf ("Missing value\n"); + msg(SE, _("VARIABLES subcommand is not appropriate with PAIRS")); + return CMD_FAILURE; } else { - xy_sum += X * Y; - xy_diff += (X - Y); - xy_ss += pow ((X - Y), 2); - v1_sum += X; - v2_sum += Y; - v1_n++; - v2_n++; - v1_ss += (X * X); - v2_ss += (Y * Y); + /* Iterate through the pairs and put each variable that is a + member of a pair into cmd.v_variables */ + + int i; + struct hsh_iterator hi; + struct hsh_table *hash; + struct variable *v; + + hash=hsh_create(n_pairs,compare_var_name,hash_var_name,0,0); + + for (i=0; i < n_pairs; ++i) + { + hsh_insert(hash,pairs[i].v[0]); + hsh_insert(hash,pairs[i].v[1]); + } + + assert(cmd.n_variables == 0); + cmd.n_variables = hsh_count(hash); + + cmd.v_variables = xrealloc(cmd.v_variables, + sizeof(struct variable) * cmd.n_variables); + /* Iterate through the hash */ + for (i=0,v = (struct variable *) hsh_first(hash,&hi); + v != 0; + v=hsh_next(hash,&hi) ) + cmd.v_variables[i++]=v; + + hsh_destroy(hash); } } - return 1; -} -void -postcalc (void) -{ - /* Calculate basic statistics */ - v1_var = variance (v1_n, v1_ss, v1_sum); /* variances */ - v2_var = variance (v2_n, v2_ss, v2_sum); - v1_se = sqrt (v1_var / v1_n); /* standard errors */ - v2_se = sqrt (v2_var / v2_n); - diff = v1_mean - v2_mean; - - if (n_pairs > 0) + procedure(common_precalc,common_calc,common_postcalc); + + switch(mode) { - t_pairs (); + case T_1_SAMPLE: + procedure(one_sample_precalc,one_sample_calc,one_sample_postcalc); + break; + case T_PAIRED: + procedure(paired_precalc,paired_calc,paired_postcalc); + break; + case T_IND_SAMPLES: + procedure(group_precalc,group_calc,group_postcalc); + break; + } - else + + + t_test_pool = pool_create (); + + ssbox_create(&stat_summary_box,&cmd,mode); + ssbox_populate(&stat_summary_box,&cmd); + ssbox_finalize(&stat_summary_box); + + if ( mode == T_PAIRED) { - t_groups (); + pscbox(); } - return; -} + trbox_create(&test_results_box,&cmd,mode); + trbox_populate(&test_results_box,&cmd); + trbox_finalize(&test_results_box); -void -t_groups (void) -{ - double df_pooled, t_pooled, t_sep, p_pooled, p_sep; - double crt_t_p, crt_t_s, tmp, v1_z, v2_z, f_levene, p_levene; - double df_sep, se_diff_s, se_diff_p; - struct value_list *val_1, *val_2; - - /* Levene's test */ - val_1 = malloc (sizeof (struct value_list)); - val_1->sum = v1_z_sum; - val_1->ss = v1_z_ss; - val_1->n = v1_n; - val_2 = malloc (sizeof (struct value_list)); - val_2->sum = v2_z_sum; - val_2->ss = v2_z_ss; - val_2->n = v2_n; - - val_1->next = val_2; - val_2->next = NULL; - - f_levene = oneway (&f_levene, &p_levene, val_1); - - /* T test results for pooled variances */ - se_diff_p = sqrt (pooled_variance (v1_n, v1_var, v2_n, v2_var)); - df_pooled = v1_n + v2_n - 2.0; - t_pooled = diff / se_diff_p; - p_pooled = t_sig (t_pooled, df_pooled); - crt_t_p = t_crt (df_pooled, 0.025); - - if ((2.0 * p_pooled) >= 1.0) - p_pooled = 1.0 - p_pooled; - - /* oh god, the separate variance calculations... */ - t_sep = diff / sqrt ((v1_var / v1_n) + (v2_var / v2_n)); - - tmp = (v1_var / v1_n) + (v2_var / v2_n); - tmp = (v1_var / v1_n) / tmp; - tmp = pow (tmp, 2); - tmp = tmp / (v1_n - 1.0); - v1_z = tmp; - - tmp = (v1_var / v1_n) + (v2_var / v2_n); - tmp = (v2_var / v2_n) / tmp; - tmp = pow (tmp, 2); - tmp = tmp / (v2_n - 1.0); - v2_z = tmp; - - tmp = 1.0 / (v1_z + v2_z); - - df_sep = tmp; - p_sep = t_sig (t_sep, df_sep); - if ((2.0 * p_sep) >= 1.0) - p_sep = 1.0 - p_sep; - crt_t_s = t_crt (df_sep, 0.025); - se_diff_s = sqrt ((v1_var / v1_n) + (v2_var / v2_n)); - - /* FIXME: convert to a proper PSPP output call */ - print_t_groups (groups, &groups_values[0], &groups_values[1], - v1_n, v2_n, v1_mean, v2_mean, - sqrt (v1_var), sqrt (v2_var), v1_se, v2_se, - diff, f_levene, p_levene, - t_pooled, 2.0 * p_pooled, df_pooled, se_diff_p, - diff - (crt_t_p * se_diff_p), diff + (crt_t_p * se_diff_p), - t_sep, 2.0 * p_sep, df_sep, se_diff_s, - diff - (crt_t_s * se_diff_s), diff + (crt_t_s * se_diff_s)); - return; + pool_destroy (t_test_pool); + + t_test_pool=0; + + + n_pairs=0; + free(pairs); + pairs=0; + + return CMD_SUCCESS; } -void -t_pairs (void) +static int +tts_custom_groups (struct cmd_t_test *cmd UNUSED) { - double cov12, cov11, cov22, r, t, p, crt_t, sp, r_t, r_p; - struct variable *v1, *v2; - - v1 = pairs[0][0]; - v2 = pairs[0][1]; - cov12 = covariance (v1_sum, v1_n, v2_sum, v2_n, xy_sum); - cov11 = covariance (v1_sum, v1_n, v1_sum, v1_n, v1_ss); - cov22 = covariance (v2_sum, v2_n, v2_sum, v2_n, v2_ss); - r = pearson_r (cov12, cov11, cov22); - /* this t and it's associated p is a significance test for the pearson's r */ - r_t = r * sqrt ((v1_n - 2.0) / (1.0 - (r * r))); - r_p = t_sig (r_t, v1_n - 2.0); - - /* now we move to the t test for the difference in means */ - diff = xy_diff / v1_n; - sp = sqrt (variance (v1_n, xy_ss, xy_diff)); - se_diff = sp / sqrt (v1_n); - t = diff / se_diff; - crt_t = t_crt (v1_n - 1.0, 0.025); - p = t_sig (t, v1_n - 1.0); - - - printf (" Number of 2-tail\n"); - printf (" Variable pairs Corr Sig Mean SD SE of Mean\n"); - printf ("---------------------------------------------------------------\n"); - printf ("%s %8.4f %8.4f %8.4f\n", - v1->name, v1_mean, sqrt (v1_var), v1_se); - printf (" %8.4f %0.4f %0.4f\n", v1_n, r, r_p); - printf ("%s %8.4f %8.4f %8.4f\n", - v2->name, v2_mean, sqrt (v2_var), v2_se); - printf ("---------------------------------------------------------------\n"); - - printf ("\n\n\n"); - printf (" Paired Differences |\n"); - printf (" Mean SD SE of Mean | t-value df 2-tail Sig\n"); - printf ("--------------------------------------|---------------------------\n"); - - printf ("%8.4f %8.4f %8.4f | %8.4f %8.4f %8.4f\n", - diff, sp, se_diff, t, v1_n - 1.0, 2.0 * (1.0 - p)); - - printf ("95pc CI (%8.4f, %8.4f) |\n\n", - diff - (se_diff * crt_t), diff + (se_diff * crt_t)); - - return; -} + lex_match('='); -static int parse_value (union value *); + if (token != T_ALL && + (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) + ) + { + msg(SE,_("`%s' is not a variable name"),tokid); + return 0; + } -/* Parses the GROUPS subcommand. */ -int -tts_custom_groups (struct cmd_t_test *cmd unused) -{ groups = parse_variable (); if (!groups) { - lex_error (_("expecting variable name in GROUPS subcommand")); + lex_error ("expecting variable name in GROUPS subcommand"); return 0; } + if (groups->type == T_STRING && groups->width > MAX_SHORT_STRING) { msg (SE, _("Long string variable %s is not valid here."), @@ -493,17 +356,16 @@ tts_custom_groups (struct cmd_t_test *cmd unused) return 0; } } - - if (!parse_value (&groups_values[0])) + + if (!parse_value (&groups_values[0],groups->type)) return 0; n_groups_values = 1; lex_match (','); - if (lex_match (')')) return 1; - if (!parse_value (&groups_values[1])) + if (!parse_value (&groups_values[1],groups->type)) return 0; n_groups_values = 2; @@ -513,83 +375,73 @@ tts_custom_groups (struct cmd_t_test *cmd unused) return 1; } -/* Parses the current token (numeric or string, depending on the - variable in `groups') into value V and returns success. */ -static int -parse_value (union value * v) -{ - if (groups->type == NUMERIC) - { - if (!lex_force_num ()) - return 0; - v->f = tokval; - } - else - { - if (!lex_force_string ()) - return 0; - strncpy (v->s, ds_value (&tokstr), ds_length (&tokstr)); - } - lex_get (); - return 1; -} -/* Parses the PAIRS subcommand. */ static int -tts_custom_pairs (struct cmd_t_test *cmd unused) +tts_custom_pairs (struct cmd_t_test *cmd UNUSED) { struct variable **vars; - int n_before_WITH; int n_vars; - int paired; - int extra; -#if DEBUGGING - int n_predicted; -#endif - - if ((token != T_ID || !is_varname (tokid)) && token != T_ALL) - return 2; - if (!parse_variables (&default_dict, &vars, &n_vars, - PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH)) - return 0; + int n_pairs_local; + int n_before_WITH ; + int n_after_WITH = -1; + int paired ; /* Was the PAIRED keyword given ? */ + + lex_match('='); + + if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) + && token != T_ALL) + { + msg(SE,_("`%s' is not a variable name"),tokid); + return 0; + } + + n_vars=0; + if (!parse_variables (default_dict, &vars, &n_vars, + PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH)) + { + free (vars); + return 0; + } assert (n_vars); + + n_before_WITH=0; if (lex_match (T_WITH)) { n_before_WITH = n_vars; - - if (!parse_variables (&default_dict, &vars, &n_vars, + if (!parse_variables (default_dict, &vars, &n_vars, PV_DUPLICATE | PV_APPEND | PV_NUMERIC | PV_NO_SCRATCH)) { free (vars); return 0; } + n_after_WITH = n_vars - n_before_WITH; } - else - n_before_WITH = 0; paired = (lex_match ('(') && lex_match_id ("PAIRED") && lex_match (')')); + /* Determine the number of pairs needed */ if (paired) { - if (n_before_WITH * 2 != n_vars) + if (n_before_WITH != n_after_WITH) { free (vars); msg (SE, _("PAIRED was specified but the number of variables " "preceding WITH (%d) did not match the number " "following (%d)."), - n_before_WITH, n_vars - n_before_WITH); + n_before_WITH, n_after_WITH ); return 0; } - - extra = n_before_WITH; + n_pairs_local=n_before_WITH; } - else if (n_before_WITH) - extra = n_before_WITH * (n_vars - n_before_WITH); - else + else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */ + { + n_pairs_local=n_before_WITH * n_after_WITH ; + } + else /* Neither WITH nor PAIRED keyword given */ { if (n_vars < 2) { @@ -599,489 +451,1206 @@ tts_custom_pairs (struct cmd_t_test *cmd unused) return 0; } - extra = n_vars * (n_vars - 1) / 2; + /* how many ways can you pick 2 from n_vars ? */ + n_pairs_local = n_vars * (n_vars -1 ) /2 ; } -#if DEBUGGING - n_predicted = n_pairs + extra; -#endif - pairs = xrealloc (pairs, sizeof (struct variable *[2]) * (n_pairs + extra)); + /* Allocate storage for the pairs */ + pairs = xrealloc(pairs, sizeof(struct pair) * (n_pairs + n_pairs_local) ); - if (paired) + /* Populate the pairs with the appropriate variables */ + if ( paired ) { int i; - for (i = 0; i < extra; i++) + assert(n_pairs_local == n_vars/2); + for (i = 0; i < n_pairs_local ; ++i) { - pairs[n_pairs][0] = vars[i]; - pairs[n_pairs++][1] = vars[i + extra]; + pairs[i].v[n_pairs+0] = vars[i]; + pairs[i].v[n_pairs+1] = vars[i+n_pairs_local]; } } - else if (n_before_WITH) + else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */ { - int i; + int i,j; + int p=n_pairs; - for (i = 0; i < n_before_WITH; i++) + for(i=0 ; i < n_before_WITH ; ++i ) { - int j; - - for (j = n_before_WITH; j < n_vars; j++) + for(j=0 ; j < n_after_WITH ; ++j) { - pairs[n_pairs][0] = vars[i]; - pairs[n_pairs++][1] = vars[j]; + pairs[p].v[0] = vars[i]; + pairs[p].v[1] = vars[j+n_before_WITH]; + ++p; } } } - else + else /* Neither WITH nor PAIRED given */ { - int i; - - for (i = 0; i < n_vars; i++) + int i,j; + int p=n_pairs; + + for(i=0 ; i < n_vars ; ++i ) { - int j; - - for (j = i + 1; j < n_vars; j++) + for(j=i+1 ; j < n_vars ; ++j) { - pairs[n_pairs][0] = vars[i]; - pairs[n_pairs++][1] = vars[j]; + pairs[p].v[0] = vars[i]; + pairs[p].v[1] = vars[j]; + ++p; } } } -#if DEBUGGING - assert (n_pairs == n_predicted); -#endif + n_pairs+=n_pairs_local; - free (vars); return 1; } -#if DEBUGGING -static void -debug_print (void) +/* Parses the current token (numeric or string, depending on type) + value v and returns success. */ +static int +parse_value (union value * v, int type ) { - printf ("T-TEST\n"); - if (groups) + if (type == NUMERIC) { - printf (" GROUPS=%s", groups->name); - if (n_groups_values) - { - int i; - - printf (" ("); - for (i = 0; i < n_groups_values; i++) - if (groups->type == NUMERIC) - printf ("%g%s", groups_values[i].f, i ? " " : ""); - else - printf ("%.*s%s", groups->width, groups_values[i].s, - i ? " " : ""); - printf (")"); - } - printf ("\n"); + if (!lex_force_num ()) + return 0; + v->f = tokval; } - if (cmd.n_variables) + else { - int i; - - printf (" VARIABLES="); - for (i = 0; i < cmd.n_variables; i++) - printf ("%s ", cmd.v_variables[i]->name); - printf ("\n"); + if (!lex_force_string ()) + return 0; + strncpy (v->s, ds_value (&tokstr), ds_length (&tokstr)); } - if (cmd.sbc_pairs) - { - int i; - printf (" PAIRS="); - for (i = 0; i < n_pairs; i++) - printf ("%s ", pairs[i][0]->name); - printf ("WITH"); - for (i = 0; i < n_pairs; i++) - printf (" %s", pairs[i][1]->name); - printf (" (PAIRED)\n"); - } - printf (" MISSING=%s %s\n", - cmd.miss == TTS_ANALYSIS ? "ANALYSIS" : "LISTWISE", - cmd.miss == TTS_INCLUDE ? "INCLUDE" : "EXCLUDE"); - printf (" FORMAT=%s\n", - cmd.fmt == TTS_LABELS ? "LABELS" : "NOLABELS"); - if (cmd.criteria != NOT_LONG) - printf (" CRITERIA=%f\n", cmd.criteria); + lex_get (); + + return 1; +} + + +/* Implementation of the SSBOX object */ + +void ssbox_base_init(struct ssbox *this, int cols,int rows); + +void ssbox_base_finalize(struct ssbox *ssb); + +void ssbox_one_sample_init(struct ssbox *this, + struct cmd_t_test *cmd ); + +void ssbox_independent_samples_init(struct ssbox *this, + struct cmd_t_test *cmd); + +void ssbox_paired_init(struct ssbox *this, + struct cmd_t_test *cmd); + +/* Factory to create an ssbox */ +void +ssbox_create(struct ssbox *ssb, struct cmd_t_test *cmd, int mode) +{ + switch (mode) + { + case T_1_SAMPLE: + ssbox_one_sample_init(ssb,cmd); + break; + case T_IND_SAMPLES: + ssbox_independent_samples_init(ssb,cmd); + break; + case T_PAIRED: + ssbox_paired_init(ssb,cmd); + break; + default: + assert(0); + } +} + + +/* Despatcher for the populate method */ +void +ssbox_populate(struct ssbox *ssb,struct cmd_t_test *cmd) +{ + ssb->populate(ssb,cmd); +} + + +/* Despatcher for finalize */ +void +ssbox_finalize(struct ssbox *ssb) +{ + ssb->finalize(ssb); } -#endif /* DEBUGGING */ -/* Here are some general routines tha should probably be moved into - a separate library and documented as part of the PSPP "API" */ -double -variance (double n, double ss, double sum) +/* Submit the box and clear up */ +void +ssbox_base_finalize(struct ssbox *ssb) +{ + tab_submit(ssb->t); +} + +/* Initialize a ssbox struct */ +void +ssbox_base_init(struct ssbox *this, int cols,int rows) { - return ((ss - ((sum * sum) / n)) / (n - 1.0)); + this->finalize = ssbox_base_finalize; + this->t = tab_create (cols, rows, 0); + + tab_columns (this->t, SOM_COL_DOWN, 1); + tab_headers (this->t,0,0,1,0); + tab_box (this->t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols -1, rows -1 ); + tab_hline(this->t, TAL_2,0,cols-1,1); + tab_dim (this->t, tab_natural_dimensions); } -double -pooled_variance (double n_1, double var_1, double n_2, double var_2) +void ssbox_one_sample_populate(struct ssbox *ssb, + struct cmd_t_test *cmd); + +/* Initialize the one_sample ssbox */ +void +ssbox_one_sample_init(struct ssbox *this, + struct cmd_t_test *cmd ) { - double tmp; + const int hsize=5; + const int vsize=cmd->n_variables+1; + + this->populate = ssbox_one_sample_populate; + + ssbox_base_init(this, hsize,vsize); + tab_title (this->t, 0, _("One-Sample Statistics")); + tab_vline(this->t, TAL_2, 1,0,vsize); + tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, _("N")); + tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean")); + tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); + tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean")); +} - tmp = n_1 + n_2 - 2.0; - tmp = (((n_1 - 1.0) * var_1) + ((n_2 - 1.0) * var_2)) / tmp; - tmp = tmp * ((n_1 + n_2) / (n_1 * n_2)); - return tmp; +void ssbox_independent_samples_populate(struct ssbox *ssb, + struct cmd_t_test *cmd); + +/* Initialize the independent samples ssbox */ +void +ssbox_independent_samples_init(struct ssbox *this, + struct cmd_t_test *cmd) +{ + int hsize=6; + int vsize = cmd->n_variables*2 +1; + + this->populate = ssbox_independent_samples_populate; + + ssbox_base_init(this, hsize,vsize); + tab_title (this->t, 0, _("Group Statistics")); + tab_vline(this->t,0,1,0,vsize); + tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, groups->name); + tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("N")); + tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean")); + tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); + tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean")); } -double -oneway (double *f, double *p, struct value_list *levels) + +/* Populate the ssbox for independent samples */ +void +ssbox_independent_samples_populate(struct ssbox *ssb, + struct cmd_t_test *cmd) { - double k, SSTR, SSE, SSTO, N, MSTR, MSE, sum, dftr, dfe, print; - struct value_list *g; + int i; - k = 0.0; + char *val_lab1=0; + char *val_lab2=0; - for (g = levels; g != NULL; g = g->next) + if ( groups->type == NUMERIC ) { - k++; - sum += g->sum; - N += g->n; - SSTR += g->ss - (pow (g->sum, 2) / g->n); - SSTO += g->ss; + val_lab1 = val_labs_find( groups->val_labs,groups_values[0]); + val_lab2 = val_labs_find( groups->val_labs,groups_values[1]); } + else + { + val_lab1 = groups_values[0].s; + val_lab2 = groups_values[1].s; + } + + assert(ssb->t); - SSTO = SSTO - (pow (sum, 2) / N); - SSE = SSTO - SSTR; + for (i=0; i < cmd->n_variables; ++i) + { + int g; + + tab_text (ssb->t, 0, i*2+1, TAB_LEFT, cmd->v_variables[i]->name); - dftr = N - k; - dfe = k - 1.0; - MSTR = SSTR / dftr; - MSE = SSE / dfe; + if (val_lab1) + tab_text (ssb->t, 1, i*2+1, TAB_LEFT, val_lab1); + else + tab_float(ssb->t, 1 ,i*2+1, TAB_LEFT, groups_values[0].f, 2,0); - *f = (MSE / MSTR); - *p = f_sig (*f, dfe, dftr); - print = 1.0; - if (print == 1.0) - { - printf ("sum1 %f, sum2 %f, ss1 %f, ss2 %f\n", - levels->sum, levels->next->sum, levels->ss, levels->next->ss); - printf (" - - - - - - O N E W A Y - - - - - -\n\n"); - printf (" Variable %s %s\n", - cmd.v_variables[0]->name, cmd.v_variables[0]->label); - printf ("By Variable %s %s\n", groups->name, groups->label); - printf ("\n Analysis of Variance\n\n"); - printf (" Sum of Mean F F\n"); - printf ("Source D.F. Squares Squares Ratio Prob\n\n"); - printf ("Between %8.0f %8.4f %8.4f %8.4f %8.4f\n", - dfe, SSE, MSE, *f, *p); - printf ("Within %8.0f %8.4f %8.4f\n", dftr, SSTR, MSTR); - printf ("Total %8.0f %8.4f\n\n\n", N - 1.0, SSTO); + if (val_lab2) + tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT, val_lab2); + else + tab_float(ssb->t, 1 ,i*2+1+1, TAB_LEFT, groups_values[1].f,2,0); + + /* Fill in the group statistics */ + for ( g=0; g < 2 ; ++g ) + { + struct t_test_proc *ttp = &groups_stats[i][g]; + + tab_float(ssb->t, 2 ,i*2+g+1, TAB_RIGHT, ttp->n, 2, 0); + tab_float(ssb->t, 3 ,i*2+g+1, TAB_RIGHT, ttp->mean, 8, 2); + tab_float(ssb->t, 4 ,i*2+g+1, TAB_RIGHT, ttp->std_dev, 8, 3); + tab_float(ssb->t, 5 ,i*2+g+1, TAB_RIGHT, ttp->se_mean, 8, 3); + } + } - return (*f); } -double -f_sig (double f, double dfn, double dfd) + +void ssbox_paired_populate(struct ssbox *ssb, + struct cmd_t_test *cmd); + +/* Initialize the paired values ssbox */ +void +ssbox_paired_init(struct ssbox *this, struct cmd_t_test *cmd UNUSED) { - int which, status; - double p, q, bound; + int hsize=6; - which = FIND_P; - status = 1; - p = q = bound = 0.0; - cdff (&which, &p, &q, &f, &dfn, &dfd, &status, &bound); + int vsize = n_pairs*2+1; - switch (status) - { - case -1: - { - printf ("Parameter 1 is out of range\n"); - break; - } - case -2: - { - printf ("Parameter 2 is out of range\n"); - break; - } - case -3: - { - printf ("Parameter 3 is out of range\n"); - break; - } - case -4: - { - printf ("Parameter 4 is out of range\n"); - break; - } - case -5: - { - printf ("Parameter 5 is out of range\n"); - break; - } - case -6: - { - printf ("Parameter 6 is out of range\n"); - break; - } - case -7: - { - printf ("Parameter 7 is out of range\n"); - break; - } - case -8: - { - printf ("Parameter 8 is out of range\n"); - break; - } - case 0: + this->populate = ssbox_paired_populate; + + ssbox_base_init(this, hsize,vsize); + tab_title (this->t, 0, _("Paired Sample Statistics")); + tab_vline(this->t,TAL_0,1,0,vsize-1); + tab_vline(this->t,TAL_2,2,0,vsize-1); + tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean")); + tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("N")); + tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); + tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean")); +} + + +/* Populate the ssbox for paired values */ +void +ssbox_paired_populate(struct ssbox *ssb,struct cmd_t_test *cmd UNUSED) +{ + int i; + + assert(ssb->t); + + for (i=0; i < n_pairs; ++i) + { + int j; + + tab_text (ssb->t, 0, i*2+1, TAB_LEFT | TAT_PRINTF , _("Pair %d"),i); + + for (j=0 ; j < 2 ; ++j) + { + struct t_test_proc *ttp; + + ttp=&pairs[i].v[j]->p.t_t; + + /* Titles */ + + tab_text (ssb->t, 1, i*2+j+1, TAB_LEFT, pairs[i].v[j]->name); + + /* Values */ + tab_float (ssb->t,2, i*2+j+1, TAB_RIGHT, ttp->mean, 8, 2); + tab_float (ssb->t,3, i*2+j+1, TAB_RIGHT, ttp->n, 2, 0); + tab_float (ssb->t,4, i*2+j+1, TAB_RIGHT, ttp->std_dev, 8, 3); + tab_float (ssb->t,5, i*2+j+1, TAB_RIGHT, ttp->se_mean, 8, 3); + + } + + } + +} + +/* Populate the one sample ssbox */ +void +ssbox_one_sample_populate(struct ssbox *ssb, struct cmd_t_test *cmd) +{ + int i; + + assert(ssb->t); + + for (i=0; i < cmd->n_variables; ++i) + { + struct t_test_proc *ttp; + ttp= &cmd->v_variables[i]->p.t_t; + + tab_text (ssb->t, 0, i+1, TAB_LEFT, cmd->v_variables[i]->name); + tab_float (ssb->t,1, i+1, TAB_RIGHT, ttp->n, 2, 0); + tab_float (ssb->t,2, i+1, TAB_RIGHT, ttp->mean, 8, 2); + tab_float (ssb->t,3, i+1, TAB_RIGHT, ttp->std_dev, 8, 2); + tab_float (ssb->t,4, i+1, TAB_RIGHT, ttp->se_mean, 8, 3); + } + +} + + + +/* Implementation of the Test Results box struct */ + +void trbox_base_init(struct trbox *self,int n_vars, int cols); +void trbox_base_finalize(struct trbox *trb); + +void trbox_independent_samples_init(struct trbox *trb, + struct cmd_t_test *cmd ); + +void trbox_independent_samples_populate(struct trbox *trb, + struct cmd_t_test *cmd); + +void trbox_one_sample_init(struct trbox *self, + struct cmd_t_test *cmd ); + +void trbox_one_sample_populate(struct trbox *trb, + struct cmd_t_test *cmd); + +void trbox_paired_init(struct trbox *self, + struct cmd_t_test *cmd ); + +void trbox_paired_populate(struct trbox *trb, + struct cmd_t_test *cmd); + + + +/* Create a trbox according to mode*/ +void +trbox_create(struct trbox *trb, + struct cmd_t_test *cmd, int mode) +{ + switch (mode) { - /* printf( "Command completed successfully\n" ); */ + case T_1_SAMPLE: + trbox_one_sample_init(trb,cmd); break; - } - case 1: - { - printf ("Answer appears to be lower than the lowest search bound\n"); + case T_IND_SAMPLES: + trbox_independent_samples_init(trb,cmd); break; - } - case 2: - { - printf ("Answer appears to be higher than the greatest search bound\n"); + case T_PAIRED: + trbox_paired_init(trb,cmd); break; + default: + assert(0); } - case 3: +} + +/* Populate a trbox according to cmd */ +void +trbox_populate(struct trbox *trb, struct cmd_t_test *cmd) +{ + trb->populate(trb,cmd); +} + +/* Submit and destroy a trbox */ +void +trbox_finalize(struct trbox *trb) +{ + trb->finalize(trb); +} + +/* Initialize the independent samples trbox */ +void +trbox_independent_samples_init(struct trbox *self, + struct cmd_t_test *cmd UNUSED) +{ + const int hsize=11; + const int vsize=cmd->n_variables*2+3; + + assert(self); + self->populate = trbox_independent_samples_populate; + + trbox_base_init(self,cmd->n_variables*2,hsize); + tab_title(self->t,0,_("Independent Samples Test")); + tab_hline(self->t,TAL_1,2,hsize-1,1); + tab_vline(self->t,TAL_2,2,0,vsize-1); + tab_vline(self->t,TAL_1,4,0,vsize-1); + tab_box(self->t,-1,-1,-1,TAL_1, 2,1,hsize-2,vsize-1); + tab_hline(self->t,TAL_1, hsize-2,hsize-1,2); + tab_box(self->t,-1,-1,-1,TAL_1, hsize-2,2,hsize-1,vsize-1); + tab_joint_text(self->t, 2, 0, 3, 0, + TAB_CENTER,_("Levene's Test for Equality of Variances")); + tab_joint_text(self->t, 4,0,hsize-1,0, + TAB_CENTER,_("t-test for Equality of Means")); + + tab_text(self->t,2,2, TAB_CENTER | TAT_TITLE,_("F")); + tab_text(self->t,3,2, TAB_CENTER | TAT_TITLE,_("Sig.")); + tab_text(self->t,4,2, TAB_CENTER | TAT_TITLE,_("t")); + tab_text(self->t,5,2, TAB_CENTER | TAT_TITLE,_("df")); + tab_text(self->t,6,2, TAB_CENTER | TAT_TITLE,_("Sig. (2-tailed)")); + tab_text(self->t,7,2, TAB_CENTER | TAT_TITLE,_("Mean Difference")); + tab_text(self->t,8,2, TAB_CENTER | TAT_TITLE,_("Std. Error Difference")); + tab_text(self->t,9,2, TAB_CENTER | TAT_TITLE,_("Lower")); + tab_text(self->t,10,2, TAB_CENTER | TAT_TITLE,_("Upper")); + + tab_joint_text(self->t, 9, 1, 10, 1, TAB_CENTER | TAT_PRINTF, + _("%d%% Confidence Interval of the Difference"), + (int)round(cmd->criteria*100.0)); + +} + +/* Populate the independent samples trbox */ +void +trbox_independent_samples_populate(struct trbox *self, + struct cmd_t_test *cmd ) +{ + int i; + + assert(self); + for (i=0; i < cmd->n_variables; ++i) + { + int which =1; + double p,q; + int status; + double bound; + + double t; + double df; + + double pooled_variance; + double std_err_diff; + double mean_diff; + + struct t_test_proc *ttp0; + struct t_test_proc *ttp1; + ttp0=&groups_stats[i][0]; + ttp1=&groups_stats[i][1]; + + + tab_text (self->t, 0, i*2+3, TAB_LEFT, cmd->v_variables[i]->name); + + tab_text (self->t, 1, i*2+3, TAB_LEFT, _("Equal variances assumed")); + + df = ttp0->n + ttp1->n - 2.0 ; + tab_float (self->t, 5, i*2+3, TAB_RIGHT, df, 2, 0); + + pooled_variance = ( (ttp0->n )*sqr(ttp0->s_std_dev) + + + (ttp1->n )*sqr(ttp1->s_std_dev) + ) / df ; + + t = (ttp0->mean - ttp1->mean) / sqrt(pooled_variance) ; + t /= sqrt((ttp0->n + ttp1->n)/(ttp0->n*ttp1->n)); + + tab_float (self->t, 4, i*2+3, TAB_RIGHT, t, 8, 3); + + + which=1; /* get p & q from t & df */ + cdft(&which, &p, &q, &t, &df, &status, &bound); + if ( 0 != status ) + { + msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); + } + + tab_float(self->t, 6, i*2+3, TAB_RIGHT, 2.0*(t>0?q:p) , 8, 3); + + mean_diff = ttp0->mean - ttp1->mean; + tab_float(self->t, 7, i*2+3, TAB_RIGHT, mean_diff, 8, 3); + + + std_err_diff = sqrt( sqr(ttp0->se_mean) + sqr(ttp1->se_mean)); + tab_float(self->t, 8, i*2+3, TAB_RIGHT, std_err_diff, 8, 3); + + + /* Now work out the confidence interval */ + q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ + p = 1 - q ; + which=2; /* Calc T from p,q and df */ + cdft(&which, &p, &q, &t, &df, &status, &bound); + if ( 0 != status ) + { + msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); + } + + tab_float(self->t, 9, i*2+3, TAB_RIGHT, + mean_diff - t * std_err_diff, 8, 3); + + tab_float(self->t, 10, i*2+3, TAB_RIGHT, + mean_diff + t * std_err_diff, 8, 3); + + { - printf ("P - Q NE 1\n"); - break; + double se2; + /* Now for the \sigma_1 != \sigma_2 case */ + tab_text (self->t, 1, i*2+3+1, + TAB_LEFT, _("Equal variances not assumed")); + + + se2 = (sqr(ttp0->s_std_dev)/(ttp0->n -1) ) + + (sqr(ttp1->s_std_dev)/(ttp1->n -1) ); + + t = mean_diff / sqrt(se2) ; + tab_float (self->t, 4, i*2+3+1, TAB_RIGHT, t, 8, 3); + + df = sqr(se2) / ( + (sqr(sqr(ttp0->s_std_dev)/(ttp0->n - 1 )) + /(ttp0->n -1 ) + ) + + + (sqr(sqr(ttp1->s_std_dev)/(ttp1->n - 1 )) + /(ttp1->n -1 ) + ) + ) ; + tab_float (self->t, 5, i*2+3+1, TAB_RIGHT, df, 8, 3); + + which=1; /* get p & q from t & df */ + cdft(&which, &p, &q, &t, &df, &status, &bound); + if ( 0 != status ) + { + msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); + } + + tab_float(self->t, 6, i*2+3+1, TAB_RIGHT, 2.0*(t>0?q:p) , 8, 3); + + /* Now work out the confidence interval */ + q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ + p = 1 - q ; + which=2; /* Calc T from p,q and df */ + cdft(&which, &p, &q, &t, &df, &status, &bound); + if ( 0 != status ) + { + msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); + } + + + tab_float(self->t, 7, i*2+3+1, TAB_RIGHT, mean_diff, 8, 3); + + + tab_float(self->t, 8, i*2+3+1, TAB_RIGHT, std_err_diff, 8, 3); + + + tab_float(self->t, 9, i*2+3+1, TAB_RIGHT, + mean_diff - t * std_err_diff, 8, 3); + + tab_float(self->t, 10, i*2+3+1, TAB_RIGHT, + mean_diff + t * std_err_diff, 8, 3); + + + + } + } +} + +/* Initialize the paired samples trbox */ +void +trbox_paired_init(struct trbox *self, + struct cmd_t_test *cmd UNUSED) +{ + + const int hsize=10; + const int vsize=n_pairs+3; + + self->populate = trbox_paired_populate; + + trbox_base_init(self,n_pairs,hsize); + tab_title (self->t, 0, _("Paired Samples Test")); + tab_hline(self->t,TAL_1,2,6,1); + tab_vline(self->t,TAL_2,2,0,vsize); + tab_joint_text(self->t,2,0,6,0,TAB_CENTER,_("Paired Differences")); + tab_box(self->t,-1,-1,-1,TAL_1, 2,1,6,vsize-1); + tab_box(self->t,-1,-1,-1,TAL_1, 6,0,hsize-1,vsize-1); + tab_hline(self->t,TAL_1,5,6, 2); + tab_vline(self->t,TAL_0,6,0,1); + + tab_joint_text(self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF, + _("%d%% Confidence Interval of the Difference"), + (int)round(cmd->criteria*100.0)); + + tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("Mean")); + tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); + tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("Std. Error Mean")); + tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower")); + tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper")); + tab_text (self->t, 7, 2, TAB_CENTER | TAT_TITLE, _("t")); + tab_text (self->t, 8, 2, TAB_CENTER | TAT_TITLE, _("df")); + tab_text (self->t, 9, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)")); +} + +/* Populate the paired samples trbox */ +void +trbox_paired_populate(struct trbox *trb, + struct cmd_t_test *cmd UNUSED) +{ + int i; - if (status) + for (i=0; i < n_pairs; ++i) { - return (double) ERROR_SIG; + int which =1; + double p,q; + int status; + double bound; + double se_mean; + + struct variable *v0 = pairs[i].v[0]; + struct variable *v1 = pairs[i].v[1]; + + struct t_test_proc *ttp0 = &v0->p.t_t; + struct t_test_proc *ttp1 = &v1->p.t_t; + + double n = ttp0->n; + double t; + double df = n - 1; + + tab_text (trb->t, 0, i+3, TAB_LEFT | TAT_PRINTF, _("Pair %d"),i); + + tab_text (trb->t, 1, i+3, TAB_LEFT | TAT_PRINTF, "%s - %s", + pairs[i].v[0]->name, pairs[i].v[1]->name); + + tab_float(trb->t, 2, i+3, TAB_RIGHT, pairs[i].mean_diff, 8, 4); + + tab_float(trb->t, 3, i+3, TAB_RIGHT, pairs[i].std_dev_diff, 8, 5); + + /* SE Mean */ + se_mean = pairs[i].std_dev_diff / sqrt(n) ; + tab_float(trb->t, 4, i+3, TAB_RIGHT, se_mean, 8,5 ); + + /* Now work out the confidence interval */ + q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ + p = 1 - q ; + which=2; /* Calc T from p,q and df */ + cdft(&which, &p, &q, &t, &df, &status, &bound); + + if ( 0 != status ) + { + msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); + } + + tab_float(trb->t, 5, i+3, TAB_RIGHT, + pairs[i].mean_diff - t * se_mean , 8, 4); + + tab_float(trb->t, 6, i+3, TAB_RIGHT, + pairs[i].mean_diff + t * se_mean , 8, 4); + + t = ( ttp0->mean - ttp1->mean) + / sqrt ( + ( sqr(ttp0->s_std_dev) + sqr(ttp1->s_std_dev) - + 2 * pairs[i].correlation * ttp0->s_std_dev * ttp1->s_std_dev ) + / (n-1) ) + ; + + tab_float(trb->t, 7, i+3, TAB_RIGHT, t , 8,3 ); + + /* Degrees of freedom */ + tab_float(trb->t, 8, i+3, TAB_RIGHT, df , 2, 0 ); + + which=1; + cdft(&which, &p, &q, &t, &df, &status, &bound); + + if ( 0 != status ) + { + msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); + } + + + tab_float(trb->t, 9, i+3, TAB_RIGHT, 2.0*(t>0?q:p) , 8, 3); + + } - else + +} + +/* Initialize the one sample trbox */ +void +trbox_one_sample_init(struct trbox *self, struct cmd_t_test *cmd ) +{ + const int hsize=7; + const int vsize=cmd->n_variables+3; + + self->populate = trbox_one_sample_populate; + + trbox_base_init(self, cmd->n_variables,hsize); + tab_title (self->t, 0, _("One-Sample Test")); + tab_hline(self->t, TAL_1, 1, hsize - 1, 1); + tab_vline(self->t, TAL_2, 1, 0, vsize); + + tab_joint_text(self->t, 1, 0, hsize-1,0, TAB_CENTER | TAT_PRINTF, + _("Test Value = %f"),cmd->n_testval); + + tab_box(self->t, -1, -1, -1, TAL_1, 1,1,hsize-1,vsize-1); + + + tab_joint_text(self->t,5,1,6,1,TAB_CENTER | TAT_PRINTF, + _("%d%% Confidence Interval of the Difference"), + (int)round(cmd->criteria*100.0)); + + tab_vline(self->t,TAL_0,6,1,1); + tab_hline(self->t,TAL_1,5,6,2); + tab_text (self->t, 1, 2, TAB_CENTER | TAT_TITLE, _("t")); + tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("df")); + tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)")); + tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("Mean Difference")); + tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower")); + tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper")); + +} + + +/* Populate the one sample trbox */ +void +trbox_one_sample_populate(struct trbox *trb, struct cmd_t_test *cmd) +{ + int i; + + assert(trb->t); + + for (i=0; i < cmd->n_variables; ++i) + { + int which =1; + double t; + double p,q; + double df; + int status; + double bound; + struct t_test_proc *ttp; + ttp= &cmd->v_variables[i]->p.t_t; + + + tab_text (trb->t, 0, i+3, TAB_LEFT, cmd->v_variables[i]->name); + + t = (ttp->mean - cmd->n_testval ) * sqrt(ttp->n) / ttp->std_dev ; + + tab_float (trb->t, 1, i+3, TAB_RIGHT, t, 8,3); + + /* degrees of freedom */ + df = ttp->n - 1; + + tab_float (trb->t, 2, i+3, TAB_RIGHT, df, 8,0); + + cdft(&which, &p, &q, &t, &df, &status, &bound); + + if ( 0 != status ) + { + msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); + } + + + /* Multiply by 2 to get 2-tailed significance, makeing sure we've got + the correct tail*/ + tab_float (trb->t, 3, i+3, TAB_RIGHT, 2.0*(t>0?q:p), 8,3); + + tab_float (trb->t, 4, i+3, TAB_RIGHT, ttp->mean_diff, 8,3); + + + q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ + p = 1 - q ; + which=2; /* Calc T from p,q and df */ + cdft(&which, &p, &q, &t, &df, &status, &bound); + if ( 0 != status ) + { + msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); + } + + tab_float (trb->t, 5, i+3, TAB_RIGHT, + ttp->mean_diff - t * ttp->se_mean, 8,4); + + tab_float (trb->t, 6, i+3, TAB_RIGHT, + ttp->mean_diff + t * ttp->se_mean, 8,4); + } +} + +/* Base initializer for the generalized trbox */ +void +trbox_base_init(struct trbox *self, int data_rows, int cols) +{ + const int rows = 3 + data_rows; + + self->finalize = trbox_base_finalize; + self->t = tab_create (cols, rows, 0); + tab_headers (self->t,0,0,3,0); + tab_box (self->t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols -1, rows -1); + tab_hline(self->t, TAL_2,0,cols-1,3); + tab_dim (self->t, tab_natural_dimensions); +} + + +/* Base finalizer for the trbox */ +void +trbox_base_finalize(struct trbox *trb) +{ + tab_submit(trb->t); +} + + +/* Create , populate and submit the Paired Samples Correlation box */ +void +pscbox(void) +{ + const int rows=1+n_pairs; + const int cols=5; + int i; + + struct tab_table *table; + + table = tab_create (cols,rows,0); + + tab_columns (table, SOM_COL_DOWN, 1); + tab_headers (table,0,0,1,0); + tab_box (table, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols -1, rows -1 ); + tab_hline(table, TAL_2, 0, cols - 1, 1); + tab_vline(table, TAL_2, 2, 0, rows - 1); + tab_dim(table, tab_natural_dimensions); + tab_title(table, 0, _("Paired Samples Correlations")); + + /* column headings */ + tab_text(table, 2,0, TAB_CENTER | TAT_TITLE, _("N")); + tab_text(table, 3,0, TAB_CENTER | TAT_TITLE, _("Correlation")); + tab_text(table, 4,0, TAB_CENTER | TAT_TITLE, _("Sig.")); + + for (i=0; i < n_pairs; ++i) { - return q; + int which =1; + double p,q; + + int status; + double bound; + + double df = pairs[i].v[0]->p.t_t.n -2; + + double correlation_t = + pairs[i].correlation * sqrt(df) / + sqrt(1 - sqr(pairs[i].correlation)); + + + /* row headings */ + tab_text(table, 0,i+1, TAB_LEFT | TAT_TITLE | TAT_PRINTF, + _("Pair %d"), i); + + tab_text(table, 1,i+1, TAB_LEFT | TAT_TITLE | TAT_PRINTF, + _("%s & %s"), pairs[i].v[0]->name, pairs[i].v[1]->name); + + + /* row data */ + tab_float(table, 3, i+1, TAB_RIGHT, pairs[i].correlation, 8, 3); + tab_float(table, 2, i+1, TAB_RIGHT, pairs[i].v[0]->p.t_t.n , 4, 0); + + + cdft(&which, &p, &q, &correlation_t, &df, &status, &bound); + + if ( 0 != status ) + { + msg( SE, _("Error calculating T statistic (cdft returned %d)."),status); + } + + + tab_float(table, 4, i+1, TAB_RIGHT, 2.0*(correlation_t>0?q:p), 8, 3); + } + + tab_submit(table); } -double -t_crt (double df, double q) + + +/* Calculation Implementation */ + +/* Per case calculations common to all variants of the T test */ +static int +common_calc (struct ccase *c) { - int which, status; - double p, bound, t; + int i; + + double weight = dict_get_case_weight(default_dict,c); - which = FIND_CRITICAL_VALUE; - bound = 0.0; - p = 1.0 - q; - t = 0.0; + for(i=0; i< cmd.n_variables ; ++i) + { + struct t_test_proc *ttp; + struct variable *v = cmd.v_variables[i]; + union value *val = &c->data[v->fv]; - cdft (&which, &p, &q, &t, &df, &status, &bound); + ttp= &cmd.v_variables[i]->p.t_t; - switch (status) + if (val->f != SYSMIS) + { + ttp->n+=weight; + ttp->sum+=weight * val->f; + ttp->ssq+=weight * val->f * val->f; + } + } + return 0; +} + +/* Pre calculations common to all variants of the T test */ +static void +common_precalc (void) +{ + int i=0; + + for(i=0; i< cmd.n_variables ; ++i) { - case -1: - { - printf ("t_crt: Parameter 1 is out of range\n"); - break; - } - case -2: - { - printf ("t_crt: value of p (%f) is out of range\n", p); - break; - } - case -3: - { - printf ("t_crt: value of q (%f) is out of range\n", q); - break; - } - case -4: - { - printf ("t_crt: value of df (%f) is out of range\n", df); - break; - } - case -5: - { - printf ("t_crt: Parameter 5 is out of range\n"); - break; - } - case -6: - { - printf ("t_crt: Parameter 6 is out of range\n"); - break; - } - case -7: - { - printf ("t_crt: Parameter 7 is out of range\n"); - break; - } - case 0: - { - /* printf( "Command completed successfully\n" ); */ - break; - } - case 1: - { - printf ("t_crt: Answer appears to be lower than the lowest search bound\n"); - break; - } - case 2: - { - printf ("t_crt: Answer appears to be higher than the greatest search bound\n"); - break; - } - case 3: - { - printf ("t_crt: P - Q NE 1\n"); - break; - } + struct t_test_proc *ttp; + ttp= &cmd.v_variables[i]->p.t_t; + + ttp->sum=0; + ttp->n=0; + ttp->ssq=0; + ttp->sum_diff=0; } +} - if (status) +/* Post calculations common to all variants of the T test */ +void +common_postcalc (void) +{ + int i=0; + + for(i=0; i< cmd.n_variables ; ++i) { - return (double) ERROR_SIG; + struct t_test_proc *ttp; + ttp= &cmd.v_variables[i]->p.t_t; + + ttp->mean=ttp->sum / ttp->n; + ttp->s_std_dev= sqrt( + ( (ttp->ssq / ttp->n ) - ttp->mean * ttp->mean ) + ) ; + + ttp->std_dev= sqrt( + ttp->n/(ttp->n-1) * + ( (ttp->ssq / ttp->n ) - ttp->mean * ttp->mean ) + ) ; + + ttp->se_mean = ttp->std_dev / sqrt(ttp->n); + ttp->mean_diff= ttp->sum_diff / ttp->n; } - else +} + +/* Per case calculations for one sample t test */ +static int +one_sample_calc (struct ccase *c) +{ + int i; + + double weight = dict_get_case_weight(default_dict,c); + + for(i=0; i< cmd.n_variables ; ++i) + { + struct t_test_proc *ttp; + struct variable *v = cmd.v_variables[i]; + union value *val = &c->data[v->fv]; + + ttp= &cmd.v_variables[i]->p.t_t; + + if (val->f != SYSMIS) + ttp->sum_diff += weight * (val->f - cmd.n_testval); + } + + return 0; +} + +/* Pre calculations for one sample t test */ +static void +one_sample_precalc (void) +{ + int i=0; + + for(i=0; i< cmd.n_variables ; ++i) + { + struct t_test_proc *ttp; + ttp= &cmd.v_variables[i]->p.t_t; + + ttp->sum_diff=0; + } +} + +/* Post calculations for one sample t test */ +static void +one_sample_postcalc (void) +{ + int i=0; + + for(i=0; i< cmd.n_variables ; ++i) { - return t; + struct t_test_proc *ttp; + ttp= &cmd.v_variables[i]->p.t_t; + + + ttp->mean_diff = ttp->sum_diff / ttp->n ; } } -double -t_sig (double t, double df) + + +static int +compare_var_name (const void *a_, const void *b_, void *v_ UNUSED) +{ + const struct variable *a = a_; + const struct variable *b = b_; + + return strcmp(a->name,b->name); +} + +static unsigned +hash_var_name (const void *a_, void *v_ UNUSED) { - int which, status; - double p, q, bound; + const struct variable *a = a_; - which = FIND_P; - q = 0.0; - p = 0.0; - bound = 0.0; + return hsh_hash_bytes (a->name, strlen(a->name)); +} - cdft (&which, &p, &q, &t, &df, &status, &bound); - switch (status) +static void +paired_precalc (void) +{ + int i; + + for(i=0; i < n_pairs ; ++i ) { - case -1: - { - printf ("t-sig: Parameter 1 is out of range\n"); - break; - } - case -2: - { - printf ("t-sig: Parameter 2 is out of range\n"); - break; - } - case -3: - { - printf ("t-sig: Parameter 3 is out of range\n"); - break; - } - case -4: - { - printf ("t-sig: Parameter 4 is out of range\n"); - break; - } - case -5: - { - printf ("t-sig: Parameter 5 is out of range\n"); - break; - } - case -6: - { - printf ("t-sig: Parameter 6 is out of range\n"); - break; - } - case -7: - { - printf ("t-sig: Parameter 7 is out of range\n"); - break; - } - case 0: - { - /* printf( "Command completed successfully\n" ); */ - break; - } - case 1: - { - printf ("t-sig: Answer appears to be lower than the lowest search bound\n"); - break; - } - case 2: - { - printf ("t-sig: Answer appears to be higher than the greatest search bound\n"); - break; - } - case 3: - { - printf ("t-sig: P - Q NE 1\n"); - break; - } + pairs[i].correlation=0; + pairs[i].sum_of_diffs=0; + pairs[i].ssq_diffs=0; } - if (status) +} + +static int +paired_calc (struct ccase *c) +{ + int i; + + for(i=0; i < n_pairs ; ++i ) { - return (double) ERROR_SIG; + struct variable *v0 = pairs[i].v[0]; + struct variable *v1 = pairs[i].v[1]; + + union value *val0 = &c->data[v0->fv]; + union value *val1 = &c->data[v1->fv]; + + pairs[i].correlation += ( val0->f - pairs[i].v[0]->p.t_t.mean ) + * + ( val1->f - pairs[i].v[1]->p.t_t.mean ); + + pairs[i].sum_of_diffs += val0->f - val1->f ; + pairs[i].ssq_diffs += sqr(val0->f - val1->f); + } - else + + + return 0; +} + +static void +paired_postcalc (void) +{ + int i; + + for(i=0; i < n_pairs ; ++i ) { - return q; + const double n = pairs[i].v[0]->p.t_t.n ; + + pairs[i].correlation /= pairs[i].v[0]->p.t_t.std_dev * + pairs[i].v[1]->p.t_t.std_dev ; + pairs[i].correlation /= pairs[i].v[0]->p.t_t.n -1; + + + pairs[i].mean_diff = pairs[i].sum_of_diffs / n ; + + + pairs[i].std_dev_diff = sqrt ( n / (n - 1) * ( + ( pairs[i].ssq_diffs / n ) + - + sqr(pairs[i].mean_diff ) + ) ); + } } -double -covariance (double x_sum, double x_n, double y_sum, double y_n, double ss) + +/* Compare two (union value)s */ +int +value_compare(const union value *a, const union value *b, int width) { - double tmp; + if (width == 0) + return (a->f < b->f) ? -1 : ( a->f > b->f ) ; + else + return memcmp (a->s, b->s, width); +} + + +static int +get_group(const union value *val, struct variable *var) +{ + if ( 0 == value_compare(val,&groups_values[0],var->width) ) + return 0; + else if (0 == value_compare(val,&groups_values[1],var->width) ) + return 1; - tmp = x_sum * y_sum; - tmp = tmp / x_n; - tmp = ss - tmp; - tmp = (tmp / (x_n + y_n - 1.0)); - return tmp; + /* Never reached */ + assert(0); + return -1; } -double -pearson_r (double c_xy, double c_xx, double c_yy) + +static void +group_precalc (void) { - return (c_xy / (sqrt (c_xx * c_yy))); + int i; + int j; + + groups_stats = xmalloc(sizeof(group_stats_t) * cmd.n_variables); + + for(i=0; i< cmd.n_variables ; ++i) + { + for (j=0 ; j < 2 ; ++j) + { + groups_stats[i][j].sum=0; + groups_stats[i][j].n=0; + groups_stats[i][j].ssq=0; + } + } + } -void -print_t_groups (struct variable * grps, union value * g1, union value * g2, - double n1, double n2, double mean1, double mean2, - double sd1, double sd2, double se1, double se2, - double diff, double l_f, double l_p, - double p_t, double p_sig, double p_df, double p_sed, - double p_l, double p_h, - double s_t, double s_sig, double s_df, double s_sed, - double s_l, double s_h) +static int +group_calc (struct ccase *c) { + int i; + union value *gv = &c->data[groups->fv]; + + double weight = dict_get_case_weight(default_dict,c); + + gv = &c->data[groups->fv]; - /* Display all this shit as SPSS 6.0 does (roughly) */ - printf ("\n\n Number \n"); - printf (" Variable of Cases Mean SD SE of Mean\n"); - printf ("-----------------------------------------------------------\n"); - printf (" %s %s\n\n", cmd.v_variables[cur_var]->name, cmd.v_variables[cur_var]->label); - printf ("%s %8.4f %8.0f %8.4f %8.3f %8.3f\n", - get_val_lab (grps, *g1, 0), g1->f, n1, mean1, sd1, se1); - printf ("%s %8.4f %8.0f %8.4f %8.3f %8.3f\n", - get_val_lab (grps, *g2, 0), g2->f, n2, mean2, sd2, se2); - printf ("-----------------------------------------------------------\n"); - printf ("\n Mean Difference = %8.4f\n", diff); - printf ("\n Levene's Test for Equality of Variances: F= %.3f P= %.3f\n", - l_f, l_p); - printf ("\n\n t-test for Equality of Means 95pc \n"); - printf ("Variances t-value df 2-Tail Sig SE of Diff CI for Diff \n"); - printf ("-----------------------------------------------------------------\n"); - printf ("Equal %8.2f %8.0f %8.3f %8.3f (%8.3f, %8.3f)\n", - p_t, p_df, p_sig, p_sed, p_l, p_h); - printf ("Unequal %8.2f %8.2f %8.3f %8.3f (%8.3f, %8.3f)\n", - s_t, s_df, s_sig, s_sed, s_l, s_h); - printf ("-----------------------------------------------------------------\n"); + for(i=0; i< cmd.n_variables ; ++i) + { + int g = get_group(gv,groups); + struct t_test_proc *ttp=&groups_stats[i][g]; + union value *val=&c->data[cmd.v_variables[i]->fv]; + + ttp->n+=weight; + ttp->sum+=weight * val->f; + ttp->ssq+=weight * sqr(val->f); + } + + return 0; } -/* - Local Variables: - mode: c - End: -*/ +static void +group_postcalc (void) +{ + int i; + int j; + + for(i=0; i< cmd.n_variables ; ++i) + { + for (j=0 ; j < 2 ; ++j) + { + struct t_test_proc *ttp; + ttp=&groups_stats[i][j]; + + ttp->mean = ttp->sum / ttp->n; + + ttp->s_std_dev= sqrt( + ( (ttp->ssq / ttp->n ) - ttp->mean * ttp->mean ) + ) ; + + ttp->std_dev= sqrt( + ttp->n/(ttp->n-1) * + ( (ttp->ssq / ttp->n ) - ttp->mean * ttp->mean ) + ) ; + + ttp->se_mean = ttp->std_dev / sqrt(ttp->n); + } + + } +}