#include "vfm.h"
#include "hash.h"
#include "casefile.h"
-#include "oneway.h"
+#include "group_proc.h"
#include "group.h"
#include "levene.h"
static is_missing_func value_is_missing;
-static void calculate(const struct casefile *cf, void *_mode);
+static void run_oneway(const struct casefile *cf, void *_mode);
/* Routines to show the output tables */
static void show_anova_table(void);
static void show_descriptives(void);
static void show_homogeneity(void);
-static void show_contrast_coeffs(void);
-static void show_contrast_tests(void);
+static void show_contrast_coeffs(short *);
+static void show_contrast_tests(short *);
+
+
+enum stat_table_t {STAT_DESC = 1, STAT_HOMO = 2};
+
+static enum stat_table_t stat_tables ;
+
+void output_oneway(void);
int
else
value_is_missing = is_missing;
- multipass_procedure_with_splits (calculate, &cmd);
+ /* What statistics were requested */
+ if ( cmd.sbc_statistics )
+ {
+
+ for (i = 0 ; i < ONEWAY_ST_count ; ++i )
+ {
+ if ( ! cmd.a_statistics[i] ) continue;
+
+ switch (i) {
+ case ONEWAY_ST_DESCRIPTIVES:
+ stat_tables |= STAT_DESC;
+ break;
+ case ONEWAY_ST_HOMOGENEITY:
+ stat_tables |= STAT_HOMO;
+ break;
+ }
+ }
+ }
+
+ multipass_procedure_with_splits (run_oneway, &cmd);
+
+
+ return CMD_SUCCESS;
+}
+
+
+void
+output_oneway(void)
+{
+
+ int i;
+ short *bad_contrast ;
+
+ bad_contrast = xmalloc ( sizeof (short) * cmd.sbc_contrast );
/* Check the sanity of the given contrast values */
for (i = 0 ; i < cmd.sbc_contrast ; ++i )
int j;
double sum = 0;
+ bad_contrast[i] = 0;
if ( subc_list_double_count(&cmd.dl_contrast[i]) !=
ostensible_number_of_groups )
{
- msg(SE,
+ msg(SW,
_("Number of contrast coefficients must equal the number of groups"));
- return CMD_FAILURE;
+ bad_contrast[i] = 1;
+ continue;
}
for (j=0; j < ostensible_number_of_groups ; ++j )
msg(SW,_("Coefficients for contrast %d do not total zero"),i + 1);
}
+ if ( stat_tables & STAT_DESC )
+ show_descriptives();
-
- /* Show the statistics tables */
- if ( cmd.sbc_statistics )
- {
- for (i = 0 ; i < ONEWAY_ST_count ; ++i )
- {
- if ( ! cmd.a_statistics[i] ) continue;
-
- switch (i) {
- case ONEWAY_ST_DESCRIPTIVES:
- show_descriptives();
- break;
- case ONEWAY_ST_HOMOGENEITY:
- show_homogeneity();
- break;
- }
- }
- }
-
+ if ( stat_tables & STAT_HOMO )
+ show_homogeneity();
show_anova_table();
- if (cmd.sbc_contrast)
+ if (cmd.sbc_contrast )
{
- show_contrast_coeffs();
- show_contrast_tests();
+ show_contrast_coeffs(bad_contrast);
+ show_contrast_tests(bad_contrast);
}
+ free(bad_contrast);
+
/* Clean up */
for (i = 0 ; i < n_vars ; ++i )
{
- struct hsh_table *group_hash = vars[i]->p.ww.group_hash;
+ struct hsh_table *group_hash = vars[i]->p.grp_data.group_hash;
hsh_destroy(group_hash);
}
hsh_destroy(global_group_hash);
- return CMD_SUCCESS;
}
-
/* Parser for the variables sub command */
static int
oneway_custom_variables(struct cmd_oneway *cmd UNUSED)
for ( i=0 ; i < n_vars ; ++i )
{
- struct group_statistics *totals = &vars[i]->p.ww.ugs;
- struct hsh_table *group_hash = vars[i]->p.ww.group_hash;
+ struct group_statistics *totals = &vars[i]->p.grp_data.ugs;
+ struct hsh_table *group_hash = vars[i]->p.grp_data.group_hash;
struct hsh_iterator g;
struct group_statistics *gs;
double ssa=0;
{
const double sst = totals->ssq - ( totals->sum * totals->sum) / totals->n ;
- const double df1 = vars[i]->p.ww.n_groups - 1;
- const double df2 = totals->n - vars[i]->p.ww.n_groups ;
+ const double df1 = vars[i]->p.grp_data.n_groups - 1;
+ const double df2 = totals->n - vars[i]->p.grp_data.n_groups ;
const double msa = ssa / df1;
- vars[i]->p.ww.mse = (sst - ssa) / df2;
+ vars[i]->p.grp_data.mse = (sst - ssa) / df2;
/* Sums of Squares */
/* Mean Squares */
tab_float (t, 4, i * 3 + 1, TAB_RIGHT, msa, 8, 3);
- tab_float (t, 4, i * 3 + 2, TAB_RIGHT, vars[i]->p.ww.mse, 8, 3);
+ tab_float (t, 4, i * 3 + 2, TAB_RIGHT, vars[i]->p.grp_data.mse, 8, 3);
{
- const double F = msa/vars[i]->p.ww.mse ;
+ const double F = msa/vars[i]->p.grp_data.mse ;
/* The F value */
tab_float (t, 5, i * 3 + 1, 0, F, 8, 3);
for ( v = 0 ; v < n_vars ; ++v )
- n_rows += vars[v]->p.ww.n_groups + 1;
+ n_rows += vars[v]->p.grp_data.n_groups + 1;
t = tab_create (n_cols,n_rows,0);
tab_headers (t, 2, 0, 2, 0);
struct hsh_iterator g;
struct group_statistics *gs;
- struct group_statistics *totals = &vars[v]->p.ww.ugs;
+ struct group_statistics *totals = &vars[v]->p.grp_data.ugs;
int count = 0 ;
char *s = (vars[v]->label) ? vars[v]->label : vars[v]->name;
- struct hsh_table *group_hash = vars[v]->p.ww.group_hash;
+ struct hsh_table *group_hash = vars[v]->p.grp_data.group_hash;
tab_text (t, 0, row, TAB_LEFT | TAT_TITLE, s);
tab_float(t, 8, row + count, 0, totals->minimum, 8, 2);
tab_float(t, 9, row + count, 0, totals->maximum, 8, 2);
- row += vars[v]->p.ww.n_groups + 1;
+ row += vars[v]->p.grp_data.n_groups + 1;
}
for ( v=0 ; v < n_vars ; ++v )
{
- char *s = (vars[v]->label) ? vars[v]->label : vars[v]->name;
+ double F;
+ const struct variable *var = vars[v];
+ const char *s = (var->label) ? var->label : var->name;
+ const struct group_statistics *totals = &var->p.grp_data.ugs;
+
+ const double df1 = var->p.grp_data.n_groups - 1;
+ const double df2 = totals->n - var->p.grp_data.n_groups ;
tab_text (t, 0, v + 1, TAB_LEFT | TAT_TITLE, s);
+
+ F = var->p.grp_data.levene;
+ tab_float (t, 1, v + 1, TAB_RIGHT, F, 8,3);
+ tab_float (t, 2, v + 1, TAB_RIGHT, df1 ,8,0);
+ tab_float (t, 3, v + 1, TAB_RIGHT, df2 ,8,0);
+
+ /* Now the significance */
+ tab_float (t, 4, v + 1, TAB_RIGHT,gsl_cdf_fdist_Q(F,df1,df2), 8, 3);
}
tab_submit (t);
/* Show the contrast coefficients table */
static void
-show_contrast_coeffs(void)
+show_contrast_coeffs(short *bad_contrast)
{
char *s;
int n_cols = 2 + ostensible_number_of_groups;
int i;
char *lab;
+
lab = val_labs_find(indep_var->val_labs,*group_value);
if ( lab )
for (i = 0 ; i < cmd.sbc_contrast ; ++i )
{
+
tab_text(t, 1, i + 2, TAB_CENTER | TAT_PRINTF, "%d", i + 1);
- tab_text(t, count + 2, i + 2, TAB_RIGHT | TAT_PRINTF, "%g",
- subc_list_double_at(&cmd.dl_contrast[i],count)
- );
+
+ if ( bad_contrast[i] )
+ tab_text(t, count + 2, i + 2, TAB_RIGHT, "?" );
+ else
+ tab_text(t, count + 2, i + 2, TAB_RIGHT | TAT_PRINTF, "%g",
+ subc_list_double_at(&cmd.dl_contrast[i],count)
+ );
}
count++ ;
/* Show the results of the contrast tests */
static void
-show_contrast_tests(void)
+show_contrast_tests(short *bad_contrast)
{
int v;
int n_cols = 8;
int ci;
double contrast_value = 0.0;
double coef_msq = 0.0;
- struct oneway_proc *ww = &vars[v]->p.ww ;
- struct hsh_table *group_hash = ww->group_hash;
+ struct group_proc *grp_data = &vars[v]->p.grp_data ;
+ struct hsh_table *group_hash = grp_data->group_hash;
struct hsh_iterator g;
struct group_statistics *gs;
double T;
double std_error_contrast ;
double df;
+ double sec_vneq=0.0;
+
+
+ /* Note: The calculation of the degrees of freedom in the variances
+ not equal case is painfull!!
+ The following formula may help to understand it:
+ \frac{\left(\sum_{i=1}^k{c_i^2\frac{s_i^2}{n_i}}\right)^2}
+ {
+ \sum_{i=1}^k\left(
+ \frac{\left(c_i^2\frac{s_i^2}{n_i}\right)^2} {n_i-1}
+ \right)
+ }
+ */
+
+ double df_denominator = 0.0;
+ double df_numerator = 0.0;
if ( i == 0 )
tab_text (t, 2, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
TAB_CENTER | TAT_TITLE | TAT_PRINTF, "%d",i+1);
+
+ if ( bad_contrast[i])
+ continue;
+
/* FIXME: Potential danger here.
We're ASSUMING THE array is in the order corresponding to the
hash order. */
gs != 0;
++ci, gs = hsh_next(group_hash,&g))
{
+
const double coef = subc_list_double_at(&cmd.dl_contrast[i],ci);
+ const double winv = (gs->std_dev * gs->std_dev) / gs->n;
contrast_value += coef * gs->mean;
coef_msq += (coef * coef) / gs->n ;
+
+ sec_vneq += (coef * coef) * (gs->std_dev * gs->std_dev ) /gs->n ;
+
+ df_numerator += (coef * coef) * winv;
+ df_denominator += pow2((coef * coef) * winv) / (gs->n - 1);
+
}
+ sec_vneq = sqrt(sec_vneq);
+
+ df_numerator = pow2(df_numerator);
tab_float (t, 3, (v * lines_per_variable) + i + 1,
TAB_RIGHT, contrast_value, 8,2);
- tab_float (t, 3, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
+ tab_float (t, 3, (v * lines_per_variable) + i + 1 +
+ cmd.sbc_contrast,
TAB_RIGHT, contrast_value, 8,2);
-
- std_error_contrast = sqrt(vars[v]->p.ww.mse * coef_msq);
+ std_error_contrast = sqrt(vars[v]->p.grp_data.mse * coef_msq);
/* Std. Error */
tab_float (t, 4, (v * lines_per_variable) + i + 1,
TAB_RIGHT, T,
8,3);
- df = ww->ugs.n - ww->n_groups;
+ df = grp_data->ugs.n - grp_data->n_groups;
/* Degrees of Freedom */
tab_float (t, 6, (v * lines_per_variable) + i + 1,
TAB_RIGHT, 2 * gsl_cdf_tdist_Q(T,df),
8,3);
- }
- if ( v > 0 )
- tab_hline(t, TAL_1, 0, n_cols - 1, (v * lines_per_variable) + 1);
- }
+ /* Now for the Variances NOT Equal case */
- tab_submit (t);
+ /* Std. Error */
+ tab_float (t, 4,
+ (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
+ TAB_RIGHT, sec_vneq,
+ 8,3);
-}
+ T = contrast_value / sec_vneq;
+ tab_float (t, 5,
+ (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
+ TAB_RIGHT, T,
+ 8,3);
-/* ONEWAY ANOVA Calculations */
-static void postcalc ( struct cmd_oneway *cmd UNUSED );
+ df = df_numerator / df_denominator;
-static void precalc ( struct cmd_oneway *cmd UNUSED );
+ tab_float (t, 6,
+ (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
+ TAB_RIGHT, df,
+ 8,3);
-int compare_group_id (const struct group_statistics *a,
- const struct group_statistics *b, int width);
+ /* The Significance */
-unsigned int hash_group_id(const struct group_statistics *v, int width);
+ tab_float (t, 7, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
+ TAB_RIGHT, 2 * gsl_cdf_tdist_Q(T,df),
+ 8,3);
-void free_group_id(struct group_statistics *v, void *aux UNUSED);
+ }
+ if ( v > 0 )
+ tab_hline(t, TAL_1, 0, n_cols - 1, (v * lines_per_variable) + 1);
+ }
+ tab_submit (t);
-int
-compare_group_id (const struct group_statistics *a,
- const struct group_statistics *b, int width)
-{
- return compare_values(&a->id, &b->id, width);
}
-unsigned int
-hash_group_id(const struct group_statistics *v, int width)
-{
- return hash_value ( &v->id, width);
-}
-void
-free_group_id(struct group_statistics *v, void *aux UNUSED)
-{
- free(v);
-}
+/* ONEWAY ANOVA Calculations */
+
+static void postcalc ( struct cmd_oneway *cmd UNUSED );
+
+static void precalc ( struct cmd_oneway *cmd UNUSED );
+
/* Pre calculations */
for(i=0; i< n_vars ; ++i)
{
- struct group_statistics *totals = &vars[i]->p.ww.ugs;
+ struct group_statistics *totals = &vars[i]->p.grp_data.ugs;
/* Create a hash for each of the dependent variables.
The hash contains a group_statistics structure,
and is keyed by value of the independent variable */
- vars[i]->p.ww.group_hash =
+ vars[i]->p.grp_data.group_hash =
hsh_create(4,
- (hsh_compare_func *) compare_group_id,
- (hsh_hash_func *) hash_group_id,
- (hsh_free_func *) free_group_id,
+ (hsh_compare_func *) compare_group,
+ (hsh_hash_func *) hash_group,
+ (hsh_free_func *) free_group,
(void *) indep_var->width );
static void
-calculate(const struct casefile *cf, void *cmd_)
+run_oneway(const struct casefile *cf, void *cmd_)
{
struct casereader *r;
struct ccase c;
-
struct cmd_oneway *cmd = (struct cmd_oneway *) cmd_;
global_group_hash = hsh_create(4,
(hsh_hash_func *) hash_value,
0,
(void *) indep_var->width );
-
precalc(cmd);
for(r = casefile_get_reader (cf);
dict_get_case_weight(default_dict,&c,&bad_weight_warn);
const union value *indep_val = case_data (&c, indep_var->fv);
+
+ /* Deal with missing values */
+ if ( value_is_missing(indep_val,indep_var) )
+ continue;
+
+ /* Skip the entire case if /MISSING=LISTWISE is set */
+ if ( cmd->miss == ONEWAY_LISTWISE )
+ {
+ for(i = 0; i < n_vars ; ++i)
+ {
+ const struct variable *v = vars[i];
+ const union value *val = case_data (&c, v->fv);
+
+ if (value_is_missing(val,v) )
+ break;
+ }
+ if ( i != n_vars )
+ continue;
+
+ }
+
hsh_insert ( global_group_hash, (void *) indep_val );
-
for ( i = 0 ; i < n_vars ; ++i )
{
const struct variable *v = vars[i];
const union value *val = case_data (&c, v->fv);
- struct hsh_table *group_hash = vars[i]->p.ww.group_hash;
+ struct hsh_table *group_hash = vars[i]->p.grp_data.group_hash;
struct group_statistics *gs;
if (! value_is_missing(val,v) )
{
- struct group_statistics *totals = &vars[i]->p.ww.ugs;
+ struct group_statistics *totals = &vars[i]->p.grp_data.ugs;
totals->n+=weight;
totals->sum+=weight * val->f;
gs->maximum = val->f * weight;
}
- vars[i]->p.ww.n_groups = hsh_count ( group_hash );
+ vars[i]->p.grp_data.n_groups = hsh_count ( group_hash );
}
}
postcalc(cmd);
+
+ if ( stat_tables & STAT_HOMO )
+ levene(cf, indep_var, n_vars, vars,
+ (cmd->miss == ONEWAY_LISTWISE) ? LEV_LISTWISE : LEV_ANALYSIS ,
+ value_is_missing);
+
ostensible_number_of_groups = hsh_count (global_group_hash);
+
+ output_oneway();
+
+
}
for(i = 0; i < n_vars ; ++i)
{
- struct hsh_table *group_hash = vars[i]->p.ww.group_hash;
- struct group_statistics *totals = &vars[i]->p.ww.ugs;
+ struct hsh_table *group_hash = vars[i]->p.grp_data.group_hash;
+ struct group_statistics *totals = &vars[i]->p.grp_data.ugs;
struct hsh_iterator g;
struct group_statistics *gs;
totals->se_mean = totals->std_dev / sqrt(totals->n);
-
-
-
}
}