From c759a7875e72ad8f1523f1c8980ca5a9cb8d20c1 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Thu, 5 Aug 2010 17:07:42 +0200 Subject: [PATCH] Replace oneway.q with a manually crafted oneway.c file --- src/language/stats/.gitignore | 1 - src/language/stats/automake.mk | 2 +- src/language/stats/{oneway.q => oneway.c} | 1008 +++++++++++---------- 3 files changed, 543 insertions(+), 468 deletions(-) rename src/language/stats/{oneway.q => oneway.c} (61%) diff --git a/src/language/stats/.gitignore b/src/language/stats/.gitignore index 03d63ca9..c52a3ab9 100644 --- a/src/language/stats/.gitignore +++ b/src/language/stats/.gitignore @@ -3,7 +3,6 @@ examine.c frequencies.c glm.c npar.c -oneway.c rank.c regression.c t-test.c diff --git a/src/language/stats/automake.mk b/src/language/stats/automake.mk index 97c67002..6242d9c0 100644 --- a/src/language/stats/automake.mk +++ b/src/language/stats/automake.mk @@ -8,7 +8,6 @@ src_language_stats_built_sources = \ src/language/stats/frequencies.c \ src/language/stats/glm.c \ src/language/stats/npar.c \ - src/language/stats/oneway.c \ src/language/stats/rank.c \ src/language/stats/regression.c \ src/language/stats/t-test.c @@ -33,6 +32,7 @@ language_stats_sources = \ src/language/stats/freq.h \ src/language/stats/npar-summary.c \ src/language/stats/npar-summary.h \ + src/language/stats/oneway.c \ src/language/stats/reliability.c \ src/language/stats/roc.c \ src/language/stats/roc.h \ diff --git a/src/language/stats/oneway.q b/src/language/stats/oneway.c similarity index 61% rename from src/language/stats/oneway.q rename to src/language/stats/oneway.c index 2d55edff..3995a1a4 100644 --- a/src/language/stats/oneway.q +++ b/src/language/stats/oneway.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2007, 2009 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2007, 2009, 2010 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,249 +16,535 @@ #include -#include -#include -#include -#include - #include #include #include -#include -#include -#include -#include + +#include + +#include +#include +#include #include + +#include +#include + + #include -#include -#include #include -#include -#include -#include #include #include -#include #include +#include + #include -#include "sort-criteria.h" + +#include +#include #include -#include "xalloc.h" +#include #include "gettext.h" #define _(msgid) gettext (msgid) -/* (headers) */ - -/* (specification) - "ONEWAY" (oneway_): - *^variables=custom; - missing=miss:!analysis/listwise, - incl:include/!exclude; - +contrast= double list; - +statistics[st_]=descriptives,homogeneity. -*/ -/* (declarations) */ -/* (functions) */ +enum missing_type + { + MISS_LISTWISE, + MISS_ANALYSIS, + }; -static struct cmd_oneway cmd; - -/* The independent variable */ -static const struct variable *indep_var; - -/* Number of dependent variables */ -static size_t n_vars; - -/* The dependent variables */ -static const struct variable **vars; +enum statistics + { + STATS_DESCRIPTIVES = 0x0001, + STATS_HOMOGENEITY = 0x0002 + }; +struct coeff_node +{ + struct ll ll; + double coeff; +}; -/* A hash table containing all the distinct values of the independent - variables */ -static struct hsh_table *global_group_hash; -/* The number of distinct values of the independent variable, when all - missing values are disregarded */ -static int ostensible_number_of_groups = -1; +struct contrasts_node +{ + struct ll ll; + struct ll_list coefficient_list; + bool bad_count; /* True if the number of coefficients does not equal the number of groups */ +}; -static void run_oneway (struct cmd_oneway *, struct casereader *, - const struct dataset *); +struct oneway +{ + size_t n_vars; + const struct variable **vars; + const struct variable *indep_var; -/* Routines to show the output tables */ -static void show_anova_table(void); -static void show_descriptives (const struct dictionary *dict); -static void show_homogeneity(void); + enum statistics stats; -static void show_contrast_coeffs (short *); -static void show_contrast_tests (short *); + enum missing_type missing_type; + enum mv_class exclude; + /* The number of distinct values of the independent variable, when all + missing values are disregarded */ + int actual_number_of_groups; -enum stat_table_t {STAT_DESC = 1, STAT_HOMO = 2}; + /* A hash table containing all the distinct values of the independent + variable */ + struct hsh_table *group_hash; -static enum stat_table_t stat_tables; + /* List of contrasts */ + struct ll_list contrast_list; +}; -static void output_oneway (const struct dictionary *dict); +/* Routines to show the output tables */ +static void show_anova_table (const struct oneway *); +static void show_descriptives (const struct oneway *, const struct dictionary *dict); +static void show_homogeneity (const struct oneway *); +static void output_oneway (const struct oneway *, const struct dictionary *dict); +static void run_oneway (struct oneway *cmd, struct casereader *input, const struct dataset *ds); int cmd_oneway (struct lexer *lexer, struct dataset *ds) { - struct casegrouper *grouper; - struct casereader *group; - int i; - bool ok; + const struct dictionary *dict = dataset_dict (ds); + + struct oneway oneway ; + oneway.n_vars = 0; + oneway.vars = NULL; + oneway.indep_var = NULL; + oneway.stats = 0; + oneway.missing_type = MISS_ANALYSIS; + oneway.exclude = MV_ANY; + oneway.actual_number_of_groups = 0; + oneway.group_hash = NULL; + + ll_init (&oneway.contrast_list); + + + if ( lex_match (lexer, '/')) + { + if (!lex_force_match_id (lexer, "VARIABLES")) + { + goto error; + } + lex_match (lexer, '='); + } + + if (!parse_variables_const (lexer, dict, + &oneway.vars, &oneway.n_vars, + PV_NO_DUPLICATE | PV_NUMERIC)) + goto error; + + lex_force_match (lexer, T_BY); - if ( !parse_oneway (lexer, ds, &cmd, NULL)) - return CMD_FAILURE; + oneway.indep_var = parse_variable_const (lexer, dict); - /* What statistics were requested */ - if ( cmd.sbc_statistics) + while (lex_token (lexer) != '.') { + lex_match (lexer, '/'); - for (i = 0; i < ONEWAY_ST_count; ++i) + if (lex_match_id (lexer, "STATISTICS")) { - if (! cmd.a_statistics[i]) continue; + lex_match (lexer, '='); + while (lex_token (lexer) != '.' && lex_token (lexer) != '/') + { + if (lex_match_id (lexer, "DESCRIPTIVES")) + { + oneway.stats |= STATS_DESCRIPTIVES; + } + else if (lex_match_id (lexer, "HOMOGENEITY")) + { + oneway.stats |= STATS_HOMOGENEITY; + } + else + { + lex_error (lexer, NULL); + goto error; + } + } + } + else if (lex_match_id (lexer, "CONTRAST")) + { + struct contrasts_node *cl = xzalloc (sizeof *cl); - switch (i) + struct ll_list *coefficient_list = &cl->coefficient_list; + lex_match (lexer, '='); + + ll_init (coefficient_list); + + while (lex_token (lexer) != '.' && lex_token (lexer) != '/') { - case ONEWAY_ST_DESCRIPTIVES: - stat_tables |= STAT_DESC; - break; - case ONEWAY_ST_HOMOGENEITY: - stat_tables |= STAT_HOMO; - break; + union value val; + if ( parse_value (lexer, &val, 0)) + { + struct coeff_node *cc = xmalloc (sizeof *cc); + cc->coeff = val.f; + + ll_push_tail (coefficient_list, &cc->ll); + } + else + { + lex_error (lexer, NULL); + goto error; + } + } + + ll_push_tail (&oneway.contrast_list, &cl->ll); + } + else if (lex_match_id (lexer, "MISSING")) + { + lex_match (lexer, '='); + while (lex_token (lexer) != '.' && lex_token (lexer) != '/') + { + if (lex_match_id (lexer, "INCLUDE")) + { + oneway.exclude = MV_SYSTEM; + } + else if (lex_match_id (lexer, "EXCLUDE")) + { + oneway.exclude = MV_ANY; + } + else if (lex_match_id (lexer, "LISTWISE")) + { + oneway.missing_type = MISS_LISTWISE; + } + else if (lex_match_id (lexer, "ANALYSIS")) + { + oneway.missing_type = MISS_ANALYSIS; + } + else + { + lex_error (lexer, NULL); + goto error; + } } } + else + { + lex_error (lexer, NULL); + goto error; + } } - /* Data pass. FIXME: error handling. */ - grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); - while (casegrouper_get_next_group (grouper, &group)) - run_oneway (&cmd, group, ds); - ok = casegrouper_destroy (grouper); - ok = proc_commit (ds) && ok; - free (vars); - free_oneway (&cmd); + { + struct casegrouper *grouper; + struct casereader *group; + bool ok; + + grouper = casegrouper_create_splits (proc_open (ds), dict); + while (casegrouper_get_next_group (grouper, &group)) + run_oneway (&oneway, group, ds); + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; + } - return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; + free (oneway.vars); + return CMD_SUCCESS; + + error: + free (oneway.vars); + return CMD_FAILURE; } + + +static int +compare_double_3way (const void *a_, const void *b_, const void *aux UNUSED) +{ + const double *a = a_; + const double *b = b_; + return *a < *b ? -1 : *a > *b; +} + +static unsigned +do_hash_double (const void *value_, const void *aux UNUSED) +{ + const double *value = value_; + return hash_double (*value, 0); +} + static void -output_oneway (const struct dictionary *dict) +free_double (void *value_, const void *aux UNUSED) { - size_t i; - short *bad_contrast; + double *value = value_; + free (value); +} - bad_contrast = xnmalloc (cmd.sbc_contrast, sizeof *bad_contrast); +static void postcalc (const struct oneway *cmd); +static void precalc (const struct oneway *cmd); - /* Check the sanity of the given contrast values */ - for (i = 0; i < cmd.sbc_contrast; ++i) + +static void +run_oneway (struct oneway *cmd, + struct casereader *input, + const struct dataset *ds) +{ + struct taint *taint; + struct dictionary *dict = dataset_dict (ds); + struct casereader *reader; + struct ccase *c; + + c = casereader_peek (input, 0); + if (c == NULL) { - int j; - double sum = 0; + casereader_destroy (input); + return; + } + output_split_file_values (ds, c); + case_unref (c); + + taint = taint_clone (casereader_get_taint (input)); + + cmd->group_hash = hsh_create (4, + compare_double_3way, + do_hash_double, + free_double, + cmd->indep_var); - bad_contrast[i] = 0; - if (subc_list_double_count (&cmd.dl_contrast[i]) != - ostensible_number_of_groups) + precalc (cmd); + + input = casereader_create_filter_missing (input, &cmd->indep_var, 1, + cmd->exclude, NULL, NULL); + if (cmd->missing_type == MISS_LISTWISE) + input = casereader_create_filter_missing (input, cmd->vars, cmd->n_vars, + cmd->exclude, NULL, NULL); + input = casereader_create_filter_weight (input, dict, NULL, NULL); + + reader = casereader_clone (input); + for (; (c = casereader_read (reader)) != NULL; case_unref (c)) + { + size_t i; + + const double weight = dict_get_case_weight (dict, c, NULL); + + const union value *indep_val = case_data (c, cmd->indep_var); + void **p = hsh_probe (cmd->group_hash, &indep_val->f); + if (*p == NULL) + { + double *value = *p = xmalloc (sizeof *value); + *value = indep_val->f; + } + + for (i = 0; i < cmd->n_vars; ++i) { - msg (SW, - _("Number of contrast coefficients must equal the number of groups")); - bad_contrast[i] = 1; - continue; - } + const struct variable *v = cmd->vars[i]; - for (j = 0; j < ostensible_number_of_groups; ++j) - sum += subc_list_double_at (&cmd.dl_contrast[i], j); + const union value *val = case_data (c, v); + + struct group_proc *gp = group_proc_get (cmd->vars[i]); + struct hsh_table *group_hash = gp->group_hash; + + struct group_statistics *gs; + + gs = hsh_find (group_hash, indep_val ); + + if ( ! gs ) + { + gs = xmalloc (sizeof *gs); + gs->id = *indep_val; + gs->sum = 0; + gs->n = 0; + gs->ssq = 0; + gs->sum_diff = 0; + gs->minimum = DBL_MAX; + gs->maximum = -DBL_MAX; + + hsh_insert ( group_hash, gs ); + } + + if (!var_is_value_missing (v, val, cmd->exclude)) + { + struct group_statistics *totals = &gp->ugs; + + totals->n += weight; + totals->sum += weight * val->f; + totals->ssq += weight * pow2 (val->f); + + if ( val->f * weight < totals->minimum ) + totals->minimum = val->f * weight; + + if ( val->f * weight > totals->maximum ) + totals->maximum = val->f * weight; + + gs->n += weight; + gs->sum += weight * val->f; + gs->ssq += weight * pow2 (val->f); + + if ( val->f * weight < gs->minimum ) + gs->minimum = val->f * weight; + + if ( val->f * weight > gs->maximum ) + gs->maximum = val->f * weight; + } + + gp->n_groups = hsh_count (group_hash ); + } - if ( sum != 0.0 ) - msg (SW, _("Coefficients for contrast %zu do not total zero"), i + 1); } + casereader_destroy (reader); + + postcalc (cmd); + + if ( cmd->stats & STATS_HOMOGENEITY ) + levene (dict, casereader_clone (input), cmd->indep_var, + cmd->n_vars, cmd->vars, cmd->exclude); - if ( stat_tables & STAT_DESC ) - show_descriptives (dict); + casereader_destroy (input); + + cmd->actual_number_of_groups = hsh_count (cmd->group_hash); - if ( stat_tables & STAT_HOMO ) - show_homogeneity (); + if (!taint_has_tainted_successor (taint)) + output_oneway (cmd, dict); - show_anova_table (); + taint_destroy (taint); +} + +/* Pre calculations */ +static void +precalc (const struct oneway *cmd) +{ + size_t i = 0; - if (cmd.sbc_contrast ) + for (i = 0; i < cmd->n_vars; ++i) { - show_contrast_coeffs (bad_contrast); - show_contrast_tests (bad_contrast); + struct group_proc *gp = group_proc_get (cmd->vars[i]); + struct group_statistics *totals = &gp->ugs; + + /* Create a hash for each of the dependent variables. + The hash contains a group_statistics structure, + and is keyed by value of the independent variable */ + + gp->group_hash = hsh_create (4, compare_group, hash_group, + (hsh_free_func *) free_group, + cmd->indep_var); + + totals->sum = 0; + totals->n = 0; + totals->ssq = 0; + totals->sum_diff = 0; + totals->maximum = -DBL_MAX; + totals->minimum = DBL_MAX; } +} - free (bad_contrast); +/* Post calculations for the ONEWAY command */ +static void +postcalc (const struct oneway *cmd) +{ + size_t i = 0; - /* Clean up */ - for (i = 0; i < n_vars; ++i ) + for (i = 0; i < cmd->n_vars; ++i) { - struct hsh_table *group_hash = group_proc_get (vars[i])->group_hash; + struct group_proc *gp = group_proc_get (cmd->vars[i]); + struct hsh_table *group_hash = gp->group_hash; + struct group_statistics *totals = &gp->ugs; - hsh_destroy (group_hash); - } + struct hsh_iterator g; + struct group_statistics *gs; - hsh_destroy (global_group_hash); + for (gs = hsh_first (group_hash, &g); + gs != 0; + gs = hsh_next (group_hash, &g)) + { + gs->mean = gs->sum / gs->n; + gs->s_std_dev = sqrt (gs->ssq / gs->n - pow2 (gs->mean)); + + gs->std_dev = sqrt ( + gs->n / (gs->n - 1) * + ( gs->ssq / gs->n - pow2 (gs->mean)) + ); + + gs->se_mean = gs->std_dev / sqrt (gs->n); + gs->mean_diff = gs->sum_diff / gs->n; + } + + totals->mean = totals->sum / totals->n; + totals->std_dev = sqrt ( + totals->n / (totals->n - 1) * + (totals->ssq / totals->n - pow2 (totals->mean)) + ); + + totals->se_mean = totals->std_dev / sqrt (totals->n); + } } +static void show_contrast_coeffs (const struct oneway *cmd); +static void show_contrast_tests (const struct oneway *cmd); -/* Parser for the variables sub command */ -static int -oneway_custom_variables (struct lexer *lexer, - struct dataset *ds, struct cmd_oneway *cmd UNUSED, - void *aux UNUSED) +static void +output_oneway (const struct oneway *cmd, const struct dictionary *dict) { - struct dictionary *dict = dataset_dict (ds); + size_t i = 0; - lex_match (lexer, '='); + /* Check the sanity of the given contrast values */ + struct contrasts_node *coeff_list = NULL; + ll_for_each (coeff_list, struct contrasts_node, ll, &cmd->contrast_list) + { + struct coeff_node *cn = NULL; + double sum = 0; + struct ll_list *cl = &coeff_list->coefficient_list; + ++i; - if ((lex_token (lexer) != T_ID || - dict_lookup_var (dict, lex_tokid (lexer)) == NULL) - && lex_token (lexer) != T_ALL) - return 2; + if (ll_count (cl) != cmd->actual_number_of_groups) + { + msg (SW, + _("Number of contrast coefficients must equal the number of groups")); + coeff_list->bad_count = true; + continue; + } - if (!parse_variables_const (lexer, dict, &vars, &n_vars, - PV_DUPLICATE - | PV_NUMERIC | PV_NO_SCRATCH) ) - { - free (vars); - return 0; + ll_for_each (cn, struct coeff_node, ll, cl) + sum += cn->coeff; + + if ( sum != 0.0 ) + msg (SW, _("Coefficients for contrast %zu do not total zero"), i); } - assert (n_vars); + if (cmd->stats & STATS_DESCRIPTIVES) + show_descriptives (cmd, dict); + + if (cmd->stats & STATS_HOMOGENEITY) + show_homogeneity (cmd); + + show_anova_table (cmd); - if ( ! lex_match (lexer, T_BY)) - return 2; - indep_var = parse_variable (lexer, dict); + if (ll_count (&cmd->contrast_list) > 0) + { + show_contrast_coeffs (cmd); + show_contrast_tests (cmd); + } + - if ( !indep_var ) + /* Clean up */ + for (i = 0; i < cmd->n_vars; ++i ) { - msg (SE, _("`%s' is not a variable name"), lex_tokid (lexer)); - return 0; + struct hsh_table *group_hash = group_proc_get (cmd->vars[i])->group_hash; + + hsh_destroy (group_hash); } - return 1; + hsh_destroy (cmd->group_hash); } /* Show the ANOVA table */ static void -show_anova_table (void) +show_anova_table (const struct oneway *cmd) { size_t i; int n_cols =7; - size_t n_rows = n_vars * 3 + 1; - - struct tab_table *t; + size_t n_rows = cmd->n_vars * 3 + 1; + struct tab_table *t = tab_create (n_cols, n_rows); - t = tab_create (n_cols, n_rows); tab_headers (t, 2, 0, 1, 0); tab_box (t, @@ -278,14 +564,14 @@ show_anova_table (void) tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance")); - for (i = 0; i < n_vars; ++i) + for (i = 0; i < cmd->n_vars; ++i) { - struct group_statistics *totals = &group_proc_get (vars[i])->ugs; - struct hsh_table *group_hash = group_proc_get (vars[i])->group_hash; + struct group_statistics *totals = &group_proc_get (cmd->vars[i])->ugs; + struct hsh_table *group_hash = group_proc_get (cmd->vars[i])->group_hash; struct hsh_iterator g; struct group_statistics *gs; double ssa = 0; - const char *s = var_to_string (vars[i]); + const char *s = var_to_string (cmd->vars[i]); for (gs = hsh_first (group_hash, &g); gs != 0; @@ -305,7 +591,7 @@ show_anova_table (void) tab_hline (t, TAL_1, 0, n_cols - 1, i * 3 + 1); { - struct group_proc *gp = group_proc_get (vars[i]); + struct group_proc *gp = group_proc_get (cmd->vars[i]); const double sst = totals->ssq - pow2 (totals->sum) / totals->n; const double df1 = gp->n_groups - 1; const double df2 = totals->n - gp->n_groups; @@ -349,7 +635,7 @@ show_anova_table (void) /* Show the descriptives table */ static void -show_descriptives (const struct dictionary *dict) +show_descriptives (const struct oneway *cmd, const struct dictionary *dict) { size_t v; int n_cols = 10; @@ -364,8 +650,8 @@ show_descriptives (const struct dictionary *dict) int n_rows = 2; - for ( v = 0; v < n_vars; ++v ) - n_rows += group_proc_get (vars[v])->n_groups + 1; + for ( v = 0; v < cmd->n_vars; ++v ) + n_rows += group_proc_get (cmd->vars[v])->n_groups + 1; t = tab_create (n_cols, n_rows); tab_headers (t, 2, 0, 2, 0); @@ -405,18 +691,18 @@ show_descriptives (const struct dictionary *dict) row = 2; - for (v = 0; v < n_vars; ++v) + for (v = 0; v < cmd->n_vars; ++v) { double T; double std_error; - struct group_proc *gp = group_proc_get (vars[v]); + struct group_proc *gp = group_proc_get (cmd->vars[v]); struct group_statistics *gs; struct group_statistics *totals = &gp->ugs; - const char *s = var_to_string (vars[v]); - const struct fmt_spec *fmt = var_get_print_format (vars[v]); + const char *s = var_to_string (cmd->vars[v]); + const struct fmt_spec *fmt = var_get_print_format (cmd->vars[v]); struct group_statistics *const *gs_array = (struct group_statistics *const *) hsh_sort (gp->group_hash); @@ -432,7 +718,7 @@ show_descriptives (const struct dictionary *dict) ds_init_empty (&vstr); gs = gs_array[count]; - var_append_value_name (indep_var, &gs->id, &vstr); + var_append_value_name (cmd->indep_var, &gs->id, &vstr); tab_text (t, 1, row + count, TAB_LEFT | TAT_TITLE, @@ -450,17 +736,17 @@ show_descriptives (const struct dictionary *dict) std_error = gs->std_dev / sqrt (gs->n) ; tab_double (t, 5, row + count, 0, - std_error, NULL); + std_error, NULL); /* Now the confidence interval */ T = gsl_cdf_tdist_Qinv (q, gs->n - 1); tab_double (t, 6, row + count, 0, - gs->mean - T * std_error, NULL); + gs->mean - T * std_error, NULL); tab_double (t, 7, row + count, 0, - gs->mean + T * std_error, NULL); + gs->mean + T * std_error, NULL); /* Min and Max */ @@ -504,11 +790,11 @@ show_descriptives (const struct dictionary *dict) /* Show the homogeneity table */ static void -show_homogeneity (void) +show_homogeneity (const struct oneway *cmd) { size_t v; int n_cols = 5; - size_t n_rows = n_vars + 1; + size_t n_rows = cmd->n_vars + 1; struct tab_table *t; @@ -536,11 +822,11 @@ show_homogeneity (void) tab_title (t, _("Test of Homogeneity of Variances")); - for (v = 0; v < n_vars; ++v) + for (v = 0; v < cmd->n_vars; ++v) { double F; - const struct variable *var = vars[v]; - const struct group_proc *gp = group_proc_get (vars[v]); + const struct variable *var = cmd->vars[v]; + const struct group_proc *gp = group_proc_get (cmd->vars[v]); const char *s = var_to_string (var); const struct group_statistics *totals = &gp->ugs; @@ -564,11 +850,15 @@ show_homogeneity (void) /* Show the contrast coefficients table */ static void -show_contrast_coeffs (short *bad_contrast) +show_contrast_coeffs (const struct oneway *cmd) { - int n_cols = 2 + ostensible_number_of_groups; - int n_rows = 2 + cmd.sbc_contrast; - int count = 0; + int c_num = 0; + struct ll *cli; + + int n_contrasts = ll_count (&cmd->contrast_list); + int n_cols = 2 + cmd->actual_number_of_groups; + int n_rows = 2 + n_contrasts; + void *const *group_values; struct tab_table *t; @@ -606,40 +896,51 @@ show_contrast_coeffs (short *bad_contrast) tab_joint_text (t, 2, 0, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, - var_to_string (indep_var)); + var_to_string (cmd->indep_var)); - group_values = hsh_sort (global_group_hash); - for (count = 0; - count < hsh_count (global_group_hash); - ++count) + group_values = hsh_sort (cmd->group_hash); + + for ( cli = ll_head (&cmd->contrast_list); + cli != ll_null (&cmd->contrast_list); + cli = ll_next (cli)) { - double *group_value_p; - union value group_value; - int i; - struct string vstr; + int count = 0; + struct contrasts_node *cn = ll_data (cli, struct contrasts_node, ll); + struct ll *coeffi = ll_head (&cn->coefficient_list); - ds_init_empty (&vstr); + tab_text_format (t, 1, c_num + 2, TAB_CENTER, "%d", c_num + 1); - group_value_p = group_values[count]; - group_value.f = *group_value_p; - var_append_value_name (indep_var, &group_value, &vstr); + for (count = 0; + count < hsh_count (cmd->group_hash) && coeffi != ll_null (&cn->coefficient_list); + ++count) + { + double *group_value_p; + union value group_value; + struct string vstr; - tab_text (t, count + 2, 1, TAB_CENTER | TAT_TITLE, - ds_cstr (&vstr)); + ds_init_empty (&vstr); - ds_destroy (&vstr); + group_value_p = group_values[count]; + group_value.f = *group_value_p; + var_append_value_name (cmd->indep_var, &group_value, &vstr); + tab_text (t, count + 2, 1, TAB_CENTER | TAT_TITLE, + ds_cstr (&vstr)); - for (i = 0; i < cmd.sbc_contrast; ++i ) - { - tab_text_format (t, 1, i + 2, TAB_CENTER, "%d", i + 1); + ds_destroy (&vstr); - if ( bad_contrast[i] ) - tab_text (t, count + 2, i + 2, TAB_RIGHT, "?" ); + if (cn->bad_count) + tab_text (t, count + 2, c_num + 2, TAB_RIGHT, "?" ); else - tab_text_format (t, count + 2, i + 2, TAB_RIGHT, "%g", - subc_list_double_at (&cmd.dl_contrast[i], count)); + { + struct coeff_node *coeffn = ll_data (coeffi, struct coeff_node, ll); + + tab_text_format (t, count + 2, c_num + 2, TAB_RIGHT, "%g", coeffn->coeff); + } + + coeffi = ll_next (coeffi); } + ++c_num; } tab_submit (t); @@ -648,11 +949,12 @@ show_contrast_coeffs (short *bad_contrast) /* Show the results of the contrast tests */ static void -show_contrast_tests (short *bad_contrast) +show_contrast_tests (const struct oneway *cmd) { + int n_contrasts = ll_count (&cmd->contrast_list); size_t v; int n_cols = 8; - size_t n_rows = 1 + n_vars * 2 * cmd.sbc_contrast; + size_t n_rows = 1 + cmd->n_vars * 2 * n_contrasts; struct tab_table *t; @@ -685,21 +987,25 @@ show_contrast_tests (short *bad_contrast) tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("df")); tab_text (t, 7, 0, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)")); - for (v = 0; v < n_vars; ++v) + for (v = 0; v < cmd->n_vars; ++v) { - int i; - int lines_per_variable = 2 * cmd.sbc_contrast; - + struct ll *cli; + int i = 0; + int lines_per_variable = 2 * n_contrasts; tab_text (t, 0, (v * lines_per_variable) + 1, TAB_LEFT | TAT_TITLE, - var_to_string (vars[v])); + var_to_string (cmd->vars[v])); - for (i = 0; i < cmd.sbc_contrast; ++i) + for ( cli = ll_head (&cmd->contrast_list); + cli != ll_null (&cmd->contrast_list); + ++i, cli = ll_next (cli)) { + struct contrasts_node *cn = ll_data (cli, struct contrasts_node, ll); + struct ll *coeffi = ll_head (&cn->coefficient_list); int ci; double contrast_value = 0.0; double coef_msq = 0.0; - struct group_proc *grp_data = group_proc_get (vars[v]); + struct group_proc *grp_data = group_proc_get (cmd->vars[v]); struct hsh_table *group_hash = grp_data->group_hash; void *const *group_stat_array; @@ -709,7 +1015,6 @@ show_contrast_tests (short *bad_contrast) double df; double sec_vneq = 0.0; - /* Note: The calculation of the degrees of freedom in the "variances not equal" case is painfull!! The following formula may help to understand it: @@ -729,7 +1034,7 @@ show_contrast_tests (short *bad_contrast) TAB_LEFT | TAT_TITLE, _("Assume equal variances")); - tab_text (t, 1, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, + tab_text (t, 1, (v * lines_per_variable) + i + 1 + n_contrasts, TAB_LEFT | TAT_TITLE, _("Does not assume equal")); } @@ -739,18 +1044,21 @@ show_contrast_tests (short *bad_contrast) tab_text_format (t, 2, - (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, + (v * lines_per_variable) + i + 1 + n_contrasts, TAB_CENTER | TAT_TITLE, "%d", i + 1); - - if ( bad_contrast[i]) + if (cn->bad_count) continue; group_stat_array = hsh_sort (group_hash); - for (ci = 0; ci < hsh_count (group_hash); ++ci) + for (ci = 0; + coeffi != ll_null (&cn->coefficient_list) && + ci < hsh_count (group_hash); + ++ci, coeffi = ll_next (coeffi)) { - const double coef = subc_list_double_at (&cmd.dl_contrast[i], ci); + struct coeff_node *cn = ll_data (coeffi, struct coeff_node, ll); + const double coef = cn->coeff; struct group_statistics *gs = group_stat_array[ci]; const double winv = pow2 (gs->std_dev) / gs->n; @@ -764,31 +1072,32 @@ show_contrast_tests (short *bad_contrast) df_numerator += (coef * coef) * winv; df_denominator += pow2((coef * coef) * winv) / (gs->n - 1); } + sec_vneq = sqrt (sec_vneq); df_numerator = pow2 (df_numerator); tab_double (t, 3, (v * lines_per_variable) + i + 1, - TAB_RIGHT, contrast_value, NULL); + TAB_RIGHT, contrast_value, NULL); tab_double (t, 3, (v * lines_per_variable) + i + 1 + - cmd.sbc_contrast, - TAB_RIGHT, contrast_value, NULL); + n_contrasts, + TAB_RIGHT, contrast_value, NULL); std_error_contrast = sqrt (grp_data->mse * coef_msq); /* Std. Error */ tab_double (t, 4, (v * lines_per_variable) + i + 1, - TAB_RIGHT, std_error_contrast, - NULL); + TAB_RIGHT, std_error_contrast, + NULL); T = fabs (contrast_value / std_error_contrast); /* T Statistic */ tab_double (t, 5, (v * lines_per_variable) + i + 1, - TAB_RIGHT, T, - NULL); + TAB_RIGHT, T, + NULL); df = grp_data->ugs.n - grp_data->n_groups; @@ -800,35 +1109,34 @@ show_contrast_tests (short *bad_contrast) /* Significance TWO TAILED !!*/ tab_double (t, 7, (v * lines_per_variable) + i + 1, - TAB_RIGHT, 2 * gsl_cdf_tdist_Q (T, df), - NULL); + TAB_RIGHT, 2 * gsl_cdf_tdist_Q (T, df), + NULL); /* Now for the Variances NOT Equal case */ /* Std. Error */ tab_double (t, 4, - (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, - TAB_RIGHT, sec_vneq, - NULL); + (v * lines_per_variable) + i + 1 + n_contrasts, + TAB_RIGHT, sec_vneq, + NULL); T = contrast_value / sec_vneq; tab_double (t, 5, - (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, - TAB_RIGHT, T, - NULL); + (v * lines_per_variable) + i + 1 + n_contrasts, + TAB_RIGHT, T, + NULL); df = df_numerator / df_denominator; tab_double (t, 6, - (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, - TAB_RIGHT, df, - NULL); + (v * lines_per_variable) + i + 1 + n_contrasts, + TAB_RIGHT, df, + NULL); /* The Significance */ - - tab_double (t, 7, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, - TAB_RIGHT, 2 * gsl_cdf_tdist_Q (T,df), - NULL); + tab_double (t, 7, (v * lines_per_variable) + i + 1 + n_contrasts, + TAB_RIGHT, 2 * gsl_cdf_tdist_Q (T,df), + NULL); } if ( v > 0 ) @@ -839,235 +1147,3 @@ show_contrast_tests (short *bad_contrast) } -/* ONEWAY ANOVA Calculations */ - -static void postcalc (struct cmd_oneway *cmd UNUSED); - -static void precalc (struct cmd_oneway *cmd UNUSED); - - - -/* Pre calculations */ -static void -precalc (struct cmd_oneway *cmd UNUSED) -{ - size_t i = 0; - - for (i = 0; i < n_vars; ++i) - { - struct group_proc *gp = group_proc_get (vars[i]); - struct group_statistics *totals = &gp->ugs; - - /* Create a hash for each of the dependent variables. - The hash contains a group_statistics structure, - and is keyed by value of the independent variable */ - - gp->group_hash = hsh_create (4, compare_group, hash_group, - (hsh_free_func *) free_group, - indep_var); - - totals->sum = 0; - totals->n = 0; - totals->ssq = 0; - totals->sum_diff = 0; - totals->maximum = -DBL_MAX; - totals->minimum = DBL_MAX; - } -} - -static int -compare_double_3way (const void *a_, const void *b_, const void *aux UNUSED) -{ - const double *a = a_; - const double *b = b_; - return *a < *b ? -1 : *a > *b; -} - -static unsigned -do_hash_double (const void *value_, const void *aux UNUSED) -{ - const double *value = value_; - return hash_double (*value, 0); -} - -static void -free_double (void *value_, const void *aux UNUSED) -{ - double *value = value_; - free (value); -} - -static void -run_oneway (struct cmd_oneway *cmd, - struct casereader *input, - const struct dataset *ds) -{ - struct taint *taint; - struct dictionary *dict = dataset_dict (ds); - enum mv_class exclude; - struct casereader *reader; - struct ccase *c; - - c = casereader_peek (input, 0); - if (c == NULL) - { - casereader_destroy (input); - return; - } - output_split_file_values (ds, c); - case_unref (c); - - taint = taint_clone (casereader_get_taint (input)); - - global_group_hash = hsh_create (4, - compare_double_3way, - do_hash_double, - free_double, - indep_var); - - precalc (cmd); - - exclude = cmd->incl != ONEWAY_INCLUDE ? MV_ANY : MV_SYSTEM; - input = casereader_create_filter_missing (input, &indep_var, 1, - exclude, NULL, NULL); - if (cmd->miss == ONEWAY_LISTWISE) - input = casereader_create_filter_missing (input, vars, n_vars, - exclude, NULL, NULL); - input = casereader_create_filter_weight (input, dict, NULL, NULL); - - reader = casereader_clone (input); - for (; (c = casereader_read (reader)) != NULL; case_unref (c)) - { - size_t i; - - const double weight = dict_get_case_weight (dict, c, NULL); - - const union value *indep_val = case_data (c, indep_var); - void **p = hsh_probe (global_group_hash, &indep_val->f); - if (*p == NULL) - { - double *value = *p = xmalloc (sizeof *value); - *value = indep_val->f; - } - - for (i = 0; i < n_vars; ++i) - { - const struct variable *v = vars[i]; - - const union value *val = case_data (c, v); - - struct group_proc *gp = group_proc_get (vars[i]); - struct hsh_table *group_hash = gp->group_hash; - - struct group_statistics *gs; - - gs = hsh_find (group_hash, indep_val ); - - if ( ! gs ) - { - gs = xmalloc (sizeof *gs); - gs->id = *indep_val; - gs->sum = 0; - gs->n = 0; - gs->ssq = 0; - gs->sum_diff = 0; - gs->minimum = DBL_MAX; - gs->maximum = -DBL_MAX; - - hsh_insert ( group_hash, gs ); - } - - if (!var_is_value_missing (v, val, exclude)) - { - struct group_statistics *totals = &gp->ugs; - - totals->n += weight; - totals->sum += weight * val->f; - totals->ssq += weight * pow2 (val->f); - - if ( val->f * weight < totals->minimum ) - totals->minimum = val->f * weight; - - if ( val->f * weight > totals->maximum ) - totals->maximum = val->f * weight; - - gs->n += weight; - gs->sum += weight * val->f; - gs->ssq += weight * pow2 (val->f); - - if ( val->f * weight < gs->minimum ) - gs->minimum = val->f * weight; - - if ( val->f * weight > gs->maximum ) - gs->maximum = val->f * weight; - } - - gp->n_groups = hsh_count ( group_hash ); - } - - } - casereader_destroy (reader); - - postcalc (cmd); - - - if ( stat_tables & STAT_HOMO ) - levene (dict, casereader_clone (input), indep_var, n_vars, vars, exclude); - - casereader_destroy (input); - - ostensible_number_of_groups = hsh_count (global_group_hash); - - if (!taint_has_tainted_successor (taint)) - output_oneway (dict); - - taint_destroy (taint); -} - - -/* Post calculations for the ONEWAY command */ -void -postcalc ( struct cmd_oneway *cmd UNUSED ) -{ - size_t i = 0; - - for (i = 0; i < n_vars; ++i) - { - struct group_proc *gp = group_proc_get (vars[i]); - struct hsh_table *group_hash = gp->group_hash; - struct group_statistics *totals = &gp->ugs; - - struct hsh_iterator g; - struct group_statistics *gs; - - for (gs = hsh_first (group_hash, &g); - gs != 0; - gs = hsh_next (group_hash, &g)) - { - gs->mean = gs->sum / gs->n; - gs->s_std_dev = sqrt (gs->ssq / gs->n - pow2 (gs->mean)); - - gs->std_dev = sqrt ( - gs->n / (gs->n - 1) * - ( gs->ssq / gs->n - pow2 (gs->mean)) - ); - - gs->se_mean = gs->std_dev / sqrt (gs->n); - gs->mean_diff = gs->sum_diff / gs->n; - } - - totals->mean = totals->sum / totals->n; - totals->std_dev = sqrt ( - totals->n / (totals->n - 1) * - (totals->ssq / totals->n - pow2 (totals->mean)) - ); - - totals->se_mean = totals->std_dev / sqrt (totals->n); - } -} - -/* - Local Variables: - mode: c - End: -*/ -- 2.30.2