#include "value-labels.h"
#include "var.h"
#include "vfm.h"
-#include "pool.h"
#include "hash.h"
#include "stats.h"
#include "t-test.h"
/* (declarations) */
/* (functions) */
-static struct cmd_t_test cmd;
+static struct cmd_t_test cmd;
-static struct pool *t_test_pool ;
+/* Function to use for testing for missing values */
+static is_missing_func value_is_missing;
/* Variable for the GROUPS subcommand, if given. */
-static struct variable *groups;
+static struct variable *indep_var;
/* GROUPS: Number of values specified by the user; the values
specified if any. */
-static int n_groups_values;
+static int n_group_values;
static union value groups_values[2];
+static enum comparison criteria[2];
+
/* PAIRS: Number of pairs to be compared ; each pair. */
static int n_pairs = 0 ;
struct pair
{
+#if 1
/* The variables comprising the pair */
struct variable *v[2];
+#endif
+
+ /* The number of valid variable pairs */
+ double n;
+
+ /* The sum of the members */
+ double sum[2];
+
+ /* sum of squares of the members */
+ double ssq[2];
+
+ /* Std deviation of the members */
+ double std_dev[2];
+
+
+ /* Sample Std deviation of the members */
+ double s_std_dev[2];
+
+ /* The means of the members */
+ double mean[2];
/* The correlation coefficient between the variables */
double correlation;
/* The sum of the differences */
double sum_of_diffs;
+ /* The sum of the products */
+ double sum_of_prod;
+
/* The mean of the differences */
double mean_diff;
/* The std deviation of the differences */
double std_dev_diff;
};
-static struct pair *pairs=0;
+static struct pair *pairs=0;
static int parse_value (union value * v, int type) ;
-
/* Structures and Functions for the Statistics Summary Box */
struct ssbox;
typedef void populate_ssbox_func(struct ssbox *ssb,
}
}
+ /* If /MISSING=INCLUDE is set, then user missing values are ignored */
+ if (cmd.incl == TTS_INCLUDE )
+ value_is_missing = is_system_missing;
+ else
+ value_is_missing = is_missing;
- procedure(common_precalc,common_calc,common_postcalc, NULL);
+ procedure_with_splits (common_precalc, common_calc, common_postcalc, NULL);
switch(mode)
{
case T_1_SAMPLE:
- procedure(one_sample_precalc,one_sample_calc,one_sample_postcalc, NULL);
+ procedure_with_splits (one_sample_precalc, one_sample_calc,
+ one_sample_postcalc, NULL);
break;
case T_PAIRED:
- procedure(paired_precalc,paired_calc,paired_postcalc, NULL);
+ procedure_with_splits (paired_precalc, paired_calc, paired_postcalc,
+ NULL);
break;
case T_IND_SAMPLES:
- procedure(group_precalc,group_calc,group_postcalc, NULL);
- levene(groups, cmd.n_variables, cmd.v_variables);
+ procedure_with_splits(group_precalc,group_calc,group_postcalc, NULL);
+ levene(indep_var, cmd.n_variables, cmd.v_variables,
+ (cmd.miss == TTS_LISTWISE)?LEV_LISTWISE:LEV_ANALYSIS ,
+ value_is_missing);
break;
}
- t_test_pool = pool_create ();
-
ssbox_create(&stat_summary_box,&cmd,mode);
ssbox_populate(&stat_summary_box,&cmd);
ssbox_finalize(&stat_summary_box);
trbox_populate(&test_results_box,&cmd);
trbox_finalize(&test_results_box);
- pool_destroy (t_test_pool);
-
- t_test_pool=0;
-
-
n_pairs=0;
free(pairs);
pairs=0;
-
if ( mode == T_IND_SAMPLES)
{
int i;
static int
tts_custom_groups (struct cmd_t_test *cmd UNUSED)
{
+
lex_match('=');
if (token != T_ALL &&
return 0;
}
- groups = parse_variable ();
- if (!groups)
+ indep_var = parse_variable ();
+ if (!indep_var)
{
lex_error ("expecting variable name in GROUPS subcommand");
return 0;
}
- if (groups->type == T_STRING && groups->width > MAX_SHORT_STRING)
+ if (indep_var->type == T_STRING && indep_var->width > MAX_SHORT_STRING)
{
msg (SE, _("Long string variable %s is not valid here."),
- groups->name);
+ indep_var->name);
return 0;
}
if (!lex_match ('('))
{
- if (groups->type == NUMERIC)
+ if (indep_var->type == NUMERIC)
{
- n_groups_values = 2;
groups_values[0].f = 1;
groups_values[1].f = 2;
+ criteria[0] = criteria[1] = CMP_EQ;
+ n_group_values = 2;
return 1;
}
else
}
}
- if (!parse_value (&groups_values[0],groups->type))
- return 0;
- n_groups_values = 1;
+ if (!parse_value (&groups_values[0],indep_var->type))
+ return 0;
lex_match (',');
if (lex_match (')'))
- return 1;
+ {
+ criteria[0] = CMP_LE;
+ criteria[1] = CMP_GT;
+ groups_values[1] = groups_values[0];
+ n_group_values = 1;
+ return 1;
+ }
- if (!parse_value (&groups_values[1],groups->type))
+ if (!parse_value (&groups_values[1],indep_var->type))
return 0;
- n_groups_values = 2;
-
+
+ n_group_values = 2;
if (!lex_force_match (')'))
return 0;
+ criteria[0] = criteria[1] = CMP_EQ;
return 1;
}
-
-
static int
tts_custom_pairs (struct cmd_t_test *cmd UNUSED)
{
ssbox_base_init(this, hsize,vsize);
tab_title (this->t, 0, _("Group Statistics"));
tab_vline(this->t,0,1,0,vsize);
- tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, groups->name);
+ tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, indep_var->name);
tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("N"));
tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
{
int i;
+ char *val_lab0=0;
char *val_lab1=0;
- char *val_lab2=0;
- if ( groups->type == NUMERIC )
+ char prefix[2][3]={"",""};
+
+ if ( indep_var->type == NUMERIC )
{
- val_lab1 = val_labs_find( groups->val_labs,groups_values[0]);
- val_lab2 = val_labs_find( groups->val_labs,groups_values[1]);
+ val_lab0 = val_labs_find( indep_var->val_labs,groups_values[0]);
+ val_lab1 = val_labs_find( indep_var->val_labs,groups_values[1]);
}
else
{
- val_lab1 = groups_values[0].s;
- val_lab2 = groups_values[1].s;
+ val_lab0 = groups_values[0].s;
+ val_lab1 = groups_values[1].s;
+ }
+
+ if (n_group_values == 1)
+ {
+ strcpy(prefix[0],"< ");
+ strcpy(prefix[1],">=");
}
assert(ssb->t);
tab_text (ssb->t, 0, i*2+1, TAB_LEFT, cmd->v_variables[i]->name);
- if (val_lab1)
- tab_text (ssb->t, 1, i*2+1, TAB_LEFT, val_lab1);
+ if (val_lab0)
+ tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF,
+ "%s%s", prefix[0], val_lab0);
else
- tab_float(ssb->t, 1 ,i*2+1, TAB_LEFT, groups_values[0].f, 2,0);
+ tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF,
+ "%s%g", prefix[0], groups_values[0].f);
- if (val_lab2)
- tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT, val_lab2);
+ if (val_lab1)
+ tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF,
+ "%s%s", prefix[1], val_lab1);
else
- tab_float(ssb->t, 1 ,i*2+1+1, TAB_LEFT, groups_values[1].f,2,0);
+ tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF,
+ "%s%g", prefix[1], groups_values[1].f);
/* Fill in the group statistics */
for ( g=0; g < 2 ; ++g )
tab_text (ssb->t, 1, i*2+j+1, TAB_LEFT, pairs[i].v[j]->name);
/* Values */
- tab_float (ssb->t,2, i*2+j+1, TAB_RIGHT, gs->mean, 8, 2);
- tab_float (ssb->t,3, i*2+j+1, TAB_RIGHT, gs->n, 2, 0);
- tab_float (ssb->t,4, i*2+j+1, TAB_RIGHT, gs->std_dev, 8, 3);
- tab_float (ssb->t,5, i*2+j+1, TAB_RIGHT, gs->se_mean, 8, 3);
+ tab_float (ssb->t,2, i*2+j+1, TAB_RIGHT, pairs[i].mean[j], 8, 2);
+ tab_float (ssb->t,3, i*2+j+1, TAB_RIGHT, pairs[i].n, 2, 0);
+ tab_float (ssb->t,4, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j], 8, 3);
+ tab_float (ssb->t,5, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j]/sqrt(pairs[i].n), 8, 3);
}
-
}
-
}
/* Populate the one sample ssbox */
double bound;
double se_mean;
- struct variable *v0 = pairs[i].v[0];
- struct variable *v1 = pairs[i].v[1];
-
- struct group_statistics *gs0 = &v0->p.t_t.ugs;
- struct group_statistics *gs1 = &v1->p.t_t.ugs;
-
- double n = gs0->n;
+ double n = pairs[i].n;
double t;
double df = n - 1;
tab_float(trb->t, 6, i+3, TAB_RIGHT,
pairs[i].mean_diff + t * se_mean , 8, 4);
- t = ( gs0->mean - gs1->mean)
- / sqrt (
- ( sqr(gs0->s_std_dev) + sqr(gs1->s_std_dev) -
- 2 * pairs[i].correlation * gs0->s_std_dev * gs1->s_std_dev )
- / (n-1) )
- ;
+ t = (pairs[i].mean[0] - pairs[i].mean[1])
+ / sqrt (
+ ( sqr (pairs[i].s_std_dev[0]) + sqr (pairs[i].s_std_dev[1]) -
+ 2 * pairs[i].correlation *
+ pairs[i].s_std_dev[0] * pairs[i].s_std_dev[1] )
+ / (n - 1)
+ );
tab_float(trb->t, 7, i+3, TAB_RIGHT, t , 8,3 );
which=1;
cdft(&which, &p, &q, &t, &df, &status, &bound);
-
if ( 0 != status )
{
msg( SE, _("Error calculating T statistic (cdft returned %d)."),status);
int status;
double bound;
- double df = pairs[i].v[0]->p.t_t.ugs.n -2;
+ double df = pairs[i].n -2;
double correlation_t =
pairs[i].correlation * sqrt(df) /
/* row data */
+ tab_float(table, 2, i+1, TAB_RIGHT, pairs[i].n, 4, 0);
tab_float(table, 3, i+1, TAB_RIGHT, pairs[i].correlation, 8, 3);
- tab_float(table, 2, i+1, TAB_RIGHT, pairs[i].v[0]->p.t_t.ugs.n , 4, 0);
-
cdft(&which, &p, &q, &correlation_t, &df, &status, &bound);
-
if ( 0 != status )
{
msg( SE, _("Error calculating T statistic (cdft returned %d)."),status);
}
-
tab_float(table, 4, i+1, TAB_RIGHT, 2.0*(correlation_t>0?q:p), 8, 3);
-
}
tab_submit(table);
double weight = dict_get_case_weight(default_dict,c);
+
+ /* Skip the entire case if /MISSING=LISTWISE is set */
+ if ( cmd.miss == TTS_LISTWISE )
+ {
+ for(i=0; i< cmd.n_variables ; ++i)
+ {
+ struct variable *v = cmd.v_variables[i];
+ union value *val = &c->data[v->fv];
+
+ if (value_is_missing(val,v) )
+ {
+ return 0;
+ }
+ }
+ }
+
+ /* Listwise has to be implicit if the independent variable is missing ?? */
+ if ( cmd.sbc_groups )
+ {
+ union value *gv = &c->data[indep_var->fv];
+ if ( value_is_missing(gv,indep_var) )
+ {
+ return 0;
+ }
+ }
+
+
for(i=0; i< cmd.n_variables ; ++i)
{
struct group_statistics *gs;
gs= &cmd.v_variables[i]->p.t_t.ugs;
- if (val->f != SYSMIS)
+ if (! value_is_missing(val,v) )
{
gs->n+=weight;
gs->sum+=weight * val->f;
double weight = dict_get_case_weight(default_dict,c);
+ /* Skip the entire case if /MISSING=LISTWISE is set */
+ if ( cmd.miss == TTS_LISTWISE )
+ {
+ for(i=0; i< cmd.n_variables ; ++i)
+ {
+ struct variable *v = cmd.v_variables[i];
+ union value *val = &c->data[v->fv];
+
+ if (value_is_missing(val,v) )
+ {
+ return 0;
+ }
+ }
+ }
+
for(i=0; i< cmd.n_variables ; ++i)
{
struct group_statistics *gs;
gs= &cmd.v_variables[i]->p.t_t.ugs;
- if (val->f != SYSMIS)
+ if ( ! value_is_missing(val,v))
gs->sum_diff += weight * (val->f - cmd.n_testval);
}
for(i=0; i < n_pairs ; ++i )
{
- pairs[i].correlation=0;
- pairs[i].sum_of_diffs=0;
- pairs[i].ssq_diffs=0;
+ pairs[i].n = 0;
+ pairs[i].sum[0] = 0; pairs[i].sum[1] = 0;
+ pairs[i].ssq[0] = 0; pairs[i].ssq[1] = 0;
+ pairs[i].sum_of_prod = 0;
+ pairs[i].correlation = 0;
+ pairs[i].sum_of_diffs = 0;
+ pairs[i].ssq_diffs = 0;
}
}
{
int i;
+ double weight = dict_get_case_weight(default_dict,c);
+
+ /* Skip the entire case if /MISSING=LISTWISE is set ,
+ AND one member of a pair is missing */
+ if ( cmd.miss == TTS_LISTWISE )
+ {
+ for(i=0; i < n_pairs ; ++i )
+ {
+ struct variable *v0 = pairs[i].v[0];
+ struct variable *v1 = pairs[i].v[1];
+
+ union value *val0 = &c->data[v0->fv];
+ union value *val1 = &c->data[v1->fv];
+
+ if ( value_is_missing(val0,v0) ||
+ value_is_missing(val1,v1) )
+ {
+ return 0;
+ }
+ }
+ }
+
for(i=0; i < n_pairs ; ++i )
{
struct variable *v0 = pairs[i].v[0];
union value *val0 = &c->data[v0->fv];
union value *val1 = &c->data[v1->fv];
- pairs[i].correlation += ( val0->f - pairs[i].v[0]->p.t_t.ugs.mean )
- *
- ( val1->f - pairs[i].v[1]->p.t_t.ugs.mean );
+ if ( ( !value_is_missing(val0,v0) && !value_is_missing(val1,v1) ) )
+ {
+ pairs[i].n += weight;
+ pairs[i].sum[0] += weight * val0->f;
+ pairs[i].sum[1] += weight * val1->f;
+
+ pairs[i].ssq[0] += weight * sqr(val0->f);
+ pairs[i].ssq[1] += weight * sqr(val1->f);
+
+#if 0
+ pairs[i].correlation += weight *
+ ( val0->f - pairs[i].v[0]->p.t_t.ugs.mean )
+ *
+ ( val1->f - pairs[i].v[1]->p.t_t.ugs.mean );
+#endif
- pairs[i].sum_of_diffs += val0->f - val1->f ;
- pairs[i].ssq_diffs += sqr(val0->f - val1->f);
+ pairs[i].sum_of_prod += weight * val0->f * val1->f ;
+
+ pairs[i].sum_of_diffs += weight * ( val0->f - val1->f ) ;
+ pairs[i].ssq_diffs += weight * sqr(val0->f - val1->f);
+ }
}
return 0;
for(i=0; i < n_pairs ; ++i )
{
- const double n = pairs[i].v[0]->p.t_t.ugs.n ;
+ int j;
+ const double n = pairs[i].n;
+
+ for (j=0; j < 2 ; ++j)
+ {
+ pairs[i].mean[j] = pairs[i].sum[j] / n ;
+ pairs[i].s_std_dev[j] = sqrt((pairs[i].ssq[j] / n -
+ sqr(pairs[i].mean[j]))
+ );
+
+ pairs[i].std_dev[j] = sqrt(n/(n-1)*(pairs[i].ssq[j] / n -
+ sqr(pairs[i].mean[j]))
+ );
+ }
+
+ pairs[i].correlation = pairs[i].sum_of_prod / pairs[i].n -
+ pairs[i].mean[0] * pairs[i].mean[1] ;
+ /* correlation now actually contains the covariance */
+ pairs[i].correlation /= pairs[i].std_dev[0] * pairs[i].std_dev[1];
+ pairs[i].correlation *= pairs[i].n / ( pairs[i].n - 1 );
+
+#if 0
pairs[i].correlation /= pairs[i].v[0]->p.t_t.ugs.std_dev *
pairs[i].v[1]->p.t_t.ugs.std_dev ;
- pairs[i].correlation /= pairs[i].v[0]->p.t_t.ugs.n -1;
+ pairs[i].correlation /= n - 1;
+#endif
pairs[i].mean_diff = pairs[i].sum_of_diffs / n ;
}
}
+/* Return the group # corresponding to the
+ independent variable with the value val
+*/
static int
-get_group(const union value *val, struct variable *var)
+get_group(const union value *val, struct variable *indep)
{
- if ( 0 == compare_values(val,&groups_values[0],var->width) )
- return 0;
- else if (0 == compare_values(val,&groups_values[1],var->width) )
- return 1;
+ int i;
- /* Never reached */
- assert(0);
+ for (i = 0; i < 2 ; ++i )
+ {
+ const int cmp = compare_values(val,&groups_values[i],indep->width) ;
+ switch ( criteria[i])
+ {
+ case CMP_EQ:
+ if ( 0 == cmp ) return i;
+ break;
+ case CMP_LT:
+ if ( 0 > cmp ) return i;
+ break;
+ case CMP_LE:
+ if ( cmp <= 0 ) return i;
+ break;
+ case CMP_GT:
+ if ( cmp > 0 ) return i;
+ break;
+ case CMP_GE:
+ if ( cmp >= 0 ) return i;
+ break;
+ default:
+ assert(0);
+ };
+ }
+
+ /* No groups matched */
return -1;
}
for (j=0 ; j < 2 ; ++j)
{
- ttpr->gs[j].sum=0;
- ttpr->gs[j].n=0;
- ttpr->gs[j].ssq=0;
- ttpr->gs[j].id = groups_values[j];
+ ttpr->gs[j].sum = 0;
+ ttpr->gs[j].n = 0;
+ ttpr->gs[j].ssq = 0;
+
+ if ( n_group_values == 2 )
+ ttpr->gs[j].id = groups_values[j];
+ else
+ ttpr->gs[j].id = groups_values[0];
+ ttpr->gs[j].criterion = criteria[j];
}
}
group_calc (struct ccase *c, void *aux UNUSED)
{
int i;
- union value *gv = &c->data[groups->fv];
+ int g;
+ union value *gv = &c->data[indep_var->fv];
double weight = dict_get_case_weight(default_dict,c);
- gv = &c->data[groups->fv];
+ if ( value_is_missing(gv,indep_var) )
+ {
+ return 0;
+ }
+
+ if ( cmd.miss == TTS_LISTWISE )
+ {
+ for(i=0; i< cmd.n_variables ; ++i)
+ {
+ struct variable *v = cmd.v_variables[i];
+ union value *val = &c->data[v->fv];
+
+ if (value_is_missing(val,v) )
+ {
+ return 0;
+ }
+ }
+ }
+
+
+ gv = &c->data[indep_var->fv];
+
+ g = get_group(gv,indep_var);
+
+ /* If the independent variable doesn't match either of the values
+ for this case then move on to the next case */
+ if (g == -1 )
+ return 0;
for(i=0; i< cmd.n_variables ; ++i)
{
- int g = get_group(gv,groups);
+ struct variable *var = cmd.v_variables[i];
- struct group_statistics *gs = &cmd.v_variables[i]->p.t_t.gs[g];
+ struct group_statistics *gs = &var->p.t_t.gs[g];
- union value *val=&c->data[cmd.v_variables[i]->fv];
+ union value *val=&c->data[var->fv];
- gs->n+=weight;
- gs->sum+=weight * val->f;
- gs->ssq+=weight * sqr(val->f);
+ if ( !value_is_missing(val,var) )
+ {
+ gs->n+=weight;
+ gs->sum+=weight * val->f;
+ gs->ssq+=weight * sqr(val->f);
+ }
}
return 0;