+
+
+/* Create , populate and submit the Paired Samples Correlation box */
+void
+pscbox(void)
+{
+ const int rows=1+n_pairs;
+ const int cols=5;
+ int i;
+
+ struct tab_table *table;
+
+ table = tab_create (cols,rows,0);
+
+ tab_columns (table, SOM_COL_DOWN, 1);
+ tab_headers (table,0,0,1,0);
+ tab_box (table, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols -1, rows -1 );
+ tab_hline(table, TAL_2, 0, cols - 1, 1);
+ tab_vline(table, TAL_2, 2, 0, rows - 1);
+ tab_dim(table, tab_natural_dimensions);
+ tab_title(table, 0, _("Paired Samples Correlations"));
+
+ /* column headings */
+ tab_text(table, 2,0, TAB_CENTER | TAT_TITLE, _("N"));
+ tab_text(table, 3,0, TAB_CENTER | TAT_TITLE, _("Correlation"));
+ tab_text(table, 4,0, TAB_CENTER | TAT_TITLE, _("Sig."));
+
+ for (i=0; i < n_pairs; ++i)
+ {
+ double p,q;
+
+ double df = pairs[i].n -2;
+
+ double correlation_t =
+ pairs[i].correlation * sqrt(df) /
+ sqrt(1 - pow2(pairs[i].correlation));
+
+
+ /* row headings */
+ tab_text(table, 0,i+1, TAB_LEFT | TAT_TITLE | TAT_PRINTF,
+ _("Pair %d"), i);
+
+ tab_text(table, 1,i+1, TAB_LEFT | TAT_TITLE | TAT_PRINTF,
+ _("%s & %s"), pairs[i].v[0]->name, pairs[i].v[1]->name);
+
+
+ /* row data */
+ tab_float(table, 2, i+1, TAB_RIGHT, pairs[i].n, 4, 0);
+ tab_float(table, 3, i+1, TAB_RIGHT, pairs[i].correlation, 8, 3);
+
+ p = gsl_cdf_tdist_P(correlation_t, df);
+ q = gsl_cdf_tdist_Q(correlation_t, df);
+
+ tab_float(table, 4, i+1, TAB_RIGHT, 2.0*(correlation_t>0?q:p), 8, 3);
+ }
+
+ tab_submit(table);
+}
+
+
+
+
+/* Calculation Implementation */
+
+/* Per case calculations common to all variants of the T test */
+static int
+common_calc (const struct ccase *c, void *_cmd)
+{
+ int i;
+ struct cmd_t_test *cmd = (struct cmd_t_test *)_cmd;
+
+ double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn);
+
+
+ /* Skip the entire case if /MISSING=LISTWISE is set */
+ if ( cmd->miss == TTS_LISTWISE )
+ {
+ for(i=0; i< cmd->n_variables ; ++i)
+ {
+ struct variable *v = cmd->v_variables[i];
+ const union value *val = case_data (c, v->fv);
+
+ if (value_is_missing(&v->miss, val) )
+ {
+ return 0;
+ }
+ }
+ }
+
+ /* Listwise has to be implicit if the independent variable is missing ?? */
+ if ( cmd->sbc_groups )
+ {
+ const union value *gv = case_data (c, indep_var->fv);
+ if ( value_is_missing(&indep_var->miss, gv) )
+ {
+ return 0;
+ }
+ }
+
+
+ for(i=0; i< cmd->n_variables ; ++i)
+ {
+ struct group_statistics *gs;
+ struct variable *v = cmd->v_variables[i];
+ const union value *val = case_data (c, v->fv);
+
+ gs= &group_proc_get (cmd->v_variables[i])->ugs;
+
+ if (! value_is_missing(&v->miss, val) )
+ {
+ gs->n+=weight;
+ gs->sum+=weight * val->f;
+ gs->ssq+=weight * val->f * val->f;
+ }
+ }
+ return 0;
+}
+
+/* Pre calculations common to all variants of the T test */
+static void
+common_precalc ( struct cmd_t_test *cmd )
+{
+ int i=0;
+
+ for(i=0; i< cmd->n_variables ; ++i)
+ {
+ struct group_statistics *gs;
+ gs= &group_proc_get (cmd->v_variables[i])->ugs;
+
+ gs->sum=0;
+ gs->n=0;
+ gs->ssq=0;
+ gs->sum_diff=0;
+ }
+}
+
+/* Post calculations common to all variants of the T test */
+void
+common_postcalc ( struct cmd_t_test *cmd )
+{
+ int i=0;
+
+
+ for(i=0; i< cmd->n_variables ; ++i)
+ {
+ struct group_statistics *gs;
+ gs= &group_proc_get (cmd->v_variables[i])->ugs;
+
+ gs->mean=gs->sum / gs->n;
+ gs->s_std_dev= sqrt(
+ ( (gs->ssq / gs->n ) - gs->mean * gs->mean )
+ ) ;
+
+ gs->std_dev= sqrt(
+ gs->n/(gs->n-1) *
+ ( (gs->ssq / gs->n ) - gs->mean * gs->mean )
+ ) ;
+
+ gs->se_mean = gs->std_dev / sqrt(gs->n);
+ gs->mean_diff= gs->sum_diff / gs->n;
+ }
+}
+
+/* Per case calculations for one sample t test */
+static int
+one_sample_calc (const struct ccase *c, void *cmd_)
+{
+ int i;
+ struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_;
+
+
+ double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn);
+
+ /* Skip the entire case if /MISSING=LISTWISE is set */
+ if ( cmd->miss == TTS_LISTWISE )
+ {
+ for(i=0; i< cmd->n_variables ; ++i)
+ {
+ struct variable *v = cmd->v_variables[i];
+ const union value *val = case_data (c, v->fv);
+
+ if (value_is_missing(&v->miss, val) )
+ {
+ return 0;
+ }
+ }
+ }
+
+ for(i=0; i< cmd->n_variables ; ++i)
+ {
+ struct group_statistics *gs;
+ struct variable *v = cmd->v_variables[i];
+ const union value *val = case_data (c, v->fv);
+
+ gs= &group_proc_get (cmd->v_variables[i])->ugs;
+
+ if ( ! value_is_missing(&v->miss, val))
+ gs->sum_diff += weight * (val->f - cmd->n_testval[0]);
+ }
+
+ return 0;
+}
+
+/* Pre calculations for one sample t test */
+static void
+one_sample_precalc ( struct cmd_t_test *cmd )
+{
+ int i=0;
+
+ for(i=0; i< cmd->n_variables ; ++i)
+ {
+ struct group_statistics *gs;
+ gs= &group_proc_get (cmd->v_variables[i])->ugs;
+
+ gs->sum_diff=0;
+ }
+}
+
+/* Post calculations for one sample t test */
+static void
+one_sample_postcalc (struct cmd_t_test *cmd)
+{
+ int i=0;
+
+ for(i=0; i< cmd->n_variables ; ++i)
+ {
+ struct group_statistics *gs;
+ gs= &group_proc_get (cmd->v_variables[i])->ugs;
+
+ gs->mean_diff = gs->sum_diff / gs->n ;
+ }
+}
+
+
+
+static void
+paired_precalc (struct cmd_t_test *cmd UNUSED)
+{
+ int i;
+
+ for(i=0; i < n_pairs ; ++i )
+ {
+ pairs[i].n = 0;
+ pairs[i].sum[0] = 0; pairs[i].sum[1] = 0;
+ pairs[i].ssq[0] = 0; pairs[i].ssq[1] = 0;
+ pairs[i].sum_of_prod = 0;
+ pairs[i].correlation = 0;
+ pairs[i].sum_of_diffs = 0;
+ pairs[i].ssq_diffs = 0;
+ }
+
+}
+
+
+static int
+paired_calc (const struct ccase *c, void *cmd_)
+{
+ int i;
+
+ struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_;
+
+ double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn);
+
+ /* Skip the entire case if /MISSING=LISTWISE is set ,
+ AND one member of a pair is missing */
+ if ( cmd->miss == TTS_LISTWISE )
+ {
+ for(i=0; i < n_pairs ; ++i )
+ {
+ struct variable *v0 = pairs[i].v[0];
+ struct variable *v1 = pairs[i].v[1];
+
+ const union value *val0 = case_data (c, v0->fv);
+ const union value *val1 = case_data (c, v1->fv);
+
+ if ( value_is_missing(&v0->miss, val0) ||
+ value_is_missing(&v1->miss, val1) )
+ {
+ return 0;
+ }
+ }
+ }
+
+ for(i=0; i < n_pairs ; ++i )
+ {
+ struct variable *v0 = pairs[i].v[0];
+ struct variable *v1 = pairs[i].v[1];
+
+ const union value *val0 = case_data (c, v0->fv);
+ const union value *val1 = case_data (c, v1->fv);
+
+ if ( ( !value_is_missing(&v0->miss, val0)
+ && !value_is_missing(&v1->miss, val1) ) )
+ {
+ pairs[i].n += weight;
+ pairs[i].sum[0] += weight * val0->f;
+ pairs[i].sum[1] += weight * val1->f;
+
+ pairs[i].ssq[0] += weight * pow2(val0->f);
+ pairs[i].ssq[1] += weight * pow2(val1->f);
+
+ pairs[i].sum_of_prod += weight * val0->f * val1->f ;
+
+ pairs[i].sum_of_diffs += weight * ( val0->f - val1->f ) ;
+ pairs[i].ssq_diffs += weight * pow2(val0->f - val1->f);
+ }
+ }
+
+ return 0;
+}
+
+static void
+paired_postcalc (struct cmd_t_test *cmd UNUSED)
+{
+ int i;
+
+ for(i=0; i < n_pairs ; ++i )
+ {
+ int j;
+ const double n = pairs[i].n;
+
+ for (j=0; j < 2 ; ++j)
+ {
+ pairs[i].mean[j] = pairs[i].sum[j] / n ;
+ pairs[i].s_std_dev[j] = sqrt((pairs[i].ssq[j] / n -
+ pow2(pairs[i].mean[j]))
+ );
+
+ pairs[i].std_dev[j] = sqrt(n/(n-1)*(pairs[i].ssq[j] / n -
+ pow2(pairs[i].mean[j]))
+ );
+ }
+
+ pairs[i].correlation = pairs[i].sum_of_prod / pairs[i].n -
+ pairs[i].mean[0] * pairs[i].mean[1] ;
+ /* correlation now actually contains the covariance */
+
+ pairs[i].correlation /= pairs[i].std_dev[0] * pairs[i].std_dev[1];
+ pairs[i].correlation *= pairs[i].n / ( pairs[i].n - 1 );
+
+ pairs[i].mean_diff = pairs[i].sum_of_diffs / n ;
+
+ pairs[i].std_dev_diff = sqrt ( n / (n - 1) * (
+ ( pairs[i].ssq_diffs / n )
+ -
+ pow2(pairs[i].mean_diff )
+ ) );
+ }
+}
+
+static void
+group_precalc (struct cmd_t_test *cmd )
+{
+ int i;
+ int j;
+
+ for(i=0; i< cmd->n_variables ; ++i)
+ {
+ struct group_proc *ttpr = group_proc_get (cmd->v_variables[i]);
+
+ /* There's always 2 groups for a T - TEST */
+ ttpr->n_groups = 2;
+
+ gp.indep_width = indep_var->width;
+
+ ttpr->group_hash = hsh_create(2,
+ (hsh_compare_func *) compare_group_binary,
+ (hsh_hash_func *) hash_group_binary,
+ (hsh_free_func *) free_group,
+ (void *) &gp );
+
+ for (j=0 ; j < 2 ; ++j)
+ {
+
+ struct group_statistics *gs = xmalloc (sizeof *gs);
+
+ gs->sum = 0;
+ gs->n = 0;
+ gs->ssq = 0;
+
+ if ( gp.criterion == CMP_EQ )
+ {
+ gs->id = gp.v.g_value[j];
+ }
+ else
+ {
+ if ( j == 0 )
+ gs->id.f = gp.v.critical_value - 1.0 ;
+ else
+ gs->id.f = gp.v.critical_value + 1.0 ;
+ }
+
+ hsh_insert ( ttpr->group_hash, (void *) gs );
+
+ }
+ }
+
+}
+
+static int
+group_calc (const struct ccase *c, struct cmd_t_test *cmd)
+{
+ int i;
+
+ const union value *gv = case_data (c, indep_var->fv);
+
+ const double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn);
+
+ if ( value_is_missing(&indep_var->miss, gv) )
+ {
+ return 0;
+ }
+
+ if ( cmd->miss == TTS_LISTWISE )
+ {
+ for(i=0; i< cmd->n_variables ; ++i)
+ {
+ struct variable *v = cmd->v_variables[i];
+ const union value *val = case_data (c, v->fv);
+
+ if (value_is_missing(&v->miss, val) )
+ {
+ return 0;
+ }
+ }
+ }
+
+ gv = case_data (c, indep_var->fv);
+
+ for(i=0; i< cmd->n_variables ; ++i)
+ {
+ struct variable *var = cmd->v_variables[i];
+ const union value *val = case_data (c, var->fv);
+ struct hsh_table *grp_hash = group_proc_get (var)->group_hash;
+ struct group_statistics *gs;
+
+ gs = hsh_find(grp_hash, (void *) gv);
+
+ /* If the independent variable doesn't match either of the values
+ for this case then move on to the next case */
+ if ( ! gs )
+ return 0;
+
+ if ( !value_is_missing(&var->miss, val) )
+ {
+ gs->n+=weight;
+ gs->sum+=weight * val->f;
+ gs->ssq+=weight * pow2(val->f);
+ }
+ }
+
+ return 0;
+}
+
+
+static void
+group_postcalc ( struct cmd_t_test *cmd )
+{
+ int i;
+
+ for(i=0; i< cmd->n_variables ; ++i)
+ {
+ struct variable *var = cmd->v_variables[i];
+ struct hsh_table *grp_hash = group_proc_get (var)->group_hash;
+ struct hsh_iterator g;
+ struct group_statistics *gs;
+ int count=0;
+
+ for (gs = hsh_first (grp_hash,&g);
+ gs != 0;
+ gs = hsh_next(grp_hash,&g))
+ {
+ gs->mean = gs->sum / gs->n;
+
+ gs->s_std_dev= sqrt(
+ ( (gs->ssq / gs->n ) - gs->mean * gs->mean )
+ ) ;
+
+ gs->std_dev= sqrt(
+ gs->n/(gs->n-1) *
+ ( (gs->ssq / gs->n ) - gs->mean * gs->mean )
+ ) ;
+
+ gs->se_mean = gs->std_dev / sqrt(gs->n);
+ count ++;
+ }
+ assert(count == 2);
+ }
+}
+
+
+
+static void
+calculate(const struct casefile *cf, void *cmd_)
+{
+ struct ssbox stat_summary_box;
+ struct trbox test_results_box;
+
+ struct casereader *r;
+ struct ccase c;
+
+ struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_;
+
+ common_precalc(cmd);
+ for(r = casefile_get_reader (cf);
+ casereader_read (r, &c) ;
+ case_destroy (&c))
+ {
+ common_calc(&c,cmd);
+ }
+ casereader_destroy (r);
+ common_postcalc(cmd);
+
+ switch(mode)
+ {
+ case T_1_SAMPLE:
+ one_sample_precalc(cmd);
+ for(r = casefile_get_reader (cf);
+ casereader_read (r, &c) ;
+ case_destroy (&c))
+ {
+ one_sample_calc(&c,cmd);
+ }
+ casereader_destroy (r);
+ one_sample_postcalc(cmd);
+
+ break;
+ case T_PAIRED:
+ paired_precalc(cmd);
+ for(r = casefile_get_reader (cf);
+ casereader_read (r, &c) ;
+ case_destroy (&c))
+ {
+ paired_calc(&c,cmd);
+ }
+ casereader_destroy (r);
+ paired_postcalc(cmd);
+
+ break;
+ case T_IND_SAMPLES:
+
+ group_precalc(cmd);
+ for(r = casefile_get_reader (cf);
+ casereader_read (r, &c) ;
+ case_destroy (&c))
+ {
+ group_calc(&c,cmd);
+ }
+ casereader_destroy (r);
+ group_postcalc(cmd);
+
+ levene(cf, indep_var, cmd->n_variables, cmd->v_variables,
+ (cmd->miss == TTS_LISTWISE)?LEV_LISTWISE:LEV_ANALYSIS ,
+ value_is_missing);
+ break;
+ }
+
+ ssbox_create(&stat_summary_box,cmd,mode);
+ ssbox_populate(&stat_summary_box,cmd);
+ ssbox_finalize(&stat_summary_box);
+
+ if ( mode == T_PAIRED)
+ pscbox();
+
+ trbox_create(&test_results_box,cmd,mode);
+ trbox_populate(&test_results_box,cmd);
+ trbox_finalize(&test_results_box);
+
+}
+
+short which_group(const struct group_statistics *g,
+ const struct group_properties *p);
+
+/* Return -1 if the id of a is less than b; +1 if greater than and
+ 0 if equal */
+static int
+compare_group_binary(const struct group_statistics *a,
+ const struct group_statistics *b,
+ const struct group_properties *p)
+{
+ short flag_a;
+ short flag_b;
+
+ if ( p->criterion == CMP_LE )
+ {
+ /* less-than-or-equal comparision is not meaningfull for
+ alpha variables, so we shouldn't ever arrive here */
+ assert(p->indep_width == 0 ) ;
+
+ flag_a = ( a->id.f < p->v.critical_value ) ;
+ flag_b = ( b->id.f < p->v.critical_value ) ;
+ }
+ else
+ {
+ flag_a = which_group(a, p);
+ flag_b = which_group(b, p);
+ }
+
+ if (flag_a < flag_b )
+ return -1;
+
+ return (flag_a > flag_b);
+}
+
+/* This is a degenerate case of a hash, since it can only return three possible
+ values. It's really a comparison, being used as a hash function */
+
+static unsigned
+hash_group_binary(const struct group_statistics *g,
+ const struct group_properties *p)
+{
+ short flag = -1;
+
+ if ( p->criterion == CMP_LE )
+ {
+ /* Not meaningfull to do a less than compare for alpha values ? */
+ assert(p->indep_width == 0 ) ;
+ flag = ( g->id.f < p->v.critical_value ) ;
+ }
+ else if ( p->criterion == CMP_EQ)
+ {
+ flag = which_group(g,p);
+ }
+ else
+ assert(0);
+
+ return flag;
+}
+
+/* return 0 if G belongs to group 0,
+ 1 if it belongs to group 1,
+ 2 if it belongs to neither group */
+short
+which_group(const struct group_statistics *g,
+ const struct group_properties *p)
+{
+
+ if ( 0 == compare_values (&g->id, &p->v.g_value[0], p->indep_width))
+ return 0;
+
+ if ( 0 == compare_values (&g->id, &p->v.g_value[1], p->indep_width))
+ return 1;
+
+ return 2;
+}
+