X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Foneway.q;h=0f6b20a1db04cdf9503e4a21cd67cb2f22ecf6bf;hb=2cf38ce51a9f34961d68a75e0b312a591b5c9abf;hp=9b45119f8664f1ae948a5bfc838479fa8115f921;hpb=48386ee68a5283653435d05a9ea4e449710fd370;p=pspp-builds.git diff --git a/src/language/stats/oneway.q b/src/language/stats/oneway.q index 9b45119f..0f6b20a1 100644 --- a/src/language/stats/oneway.q +++ b/src/language/stats/oneway.q @@ -1,21 +1,18 @@ -/* PSPP - One way ANOVA. -*-c-*- +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2007, 2009 Free Software Foundation, Inc. -Copyright (C) 1997-9, 2000, 2007 Free Software Foundation, Inc. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. -This program is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the -License, or (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -02110-1301, USA. */ + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ #include @@ -25,29 +22,30 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA #include #include -#include +#include +#include #include #include #include #include -#include #include #include #include -#include #include #include -#include -#include #include #include #include +#include #include #include #include #include #include #include "sort-criteria.h" +#include + +#include "xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -58,16 +56,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA "ONEWAY" (oneway_): *^variables=custom; missing=miss:!analysis/listwise, - incl:include/!exclude; + incl:include/!exclude; +contrast= double list; +statistics[st_]=descriptives,homogeneity. */ /* (declarations) */ /* (functions) */ -static bool bad_weight_warn = true; - - static struct cmd_oneway cmd; /* The independent variable */ @@ -82,63 +77,70 @@ static const struct variable **vars; /* A hash table containing all the distinct values of the independent variables */ -static struct hsh_table *global_group_hash ; +static struct hsh_table *global_group_hash; -/* The number of distinct values of the independent variable, when all +/* The number of distinct values of the independent variable, when all missing values are disregarded */ static int ostensible_number_of_groups = -1; -static bool run_oneway(const struct ccase *first, - const struct casefile *cf, - void *_mode, const struct dataset *); +static void run_oneway (struct cmd_oneway *, struct casereader *, + const struct dataset *); /* Routines to show the output tables */ static void show_anova_table(void); -static void show_descriptives(void); +static void show_descriptives (const struct dictionary *dict); static void show_homogeneity(void); -static void show_contrast_coeffs(short *); -static void show_contrast_tests(short *); +static void show_contrast_coeffs (short *); +static void show_contrast_tests (short *); enum stat_table_t {STAT_DESC = 1, STAT_HOMO = 2}; -static enum stat_table_t stat_tables ; +static enum stat_table_t stat_tables; -void output_oneway(void); +static void output_oneway (const struct dictionary *dict); int cmd_oneway (struct lexer *lexer, struct dataset *ds) { + struct casegrouper *grouper; + struct casereader *group; int i; bool ok; - if ( !parse_oneway (lexer, ds, &cmd, NULL) ) + if ( !parse_oneway (lexer, ds, &cmd, NULL)) return CMD_FAILURE; /* What statistics were requested */ - if ( cmd.sbc_statistics ) + if ( cmd.sbc_statistics) { - for (i = 0 ; i < ONEWAY_ST_count ; ++i ) + for (i = 0; i < ONEWAY_ST_count; ++i) { - if ( ! cmd.a_statistics[i] ) continue; - - switch (i) { - case ONEWAY_ST_DESCRIPTIVES: - stat_tables |= STAT_DESC; - break; - case ONEWAY_ST_HOMOGENEITY: - stat_tables |= STAT_HOMO; - break; - } + if (! cmd.a_statistics[i]) continue; + + switch (i) + { + case ONEWAY_ST_DESCRIPTIVES: + stat_tables |= STAT_DESC; + break; + case ONEWAY_ST_HOMOGENEITY: + stat_tables |= STAT_HOMO; + break; + } } } - ok = multipass_procedure_with_splits (ds, run_oneway, &cmd); + /* Data pass. FIXME: error handling. */ + grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); + while (casegrouper_get_next_group (grouper, &group)) + run_oneway (&cmd, group, ds); + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; free (vars); free_oneway (&cmd); @@ -147,102 +149,98 @@ cmd_oneway (struct lexer *lexer, struct dataset *ds) } -void -output_oneway(void) +static void +output_oneway (const struct dictionary *dict) { size_t i; - short *bad_contrast ; + short *bad_contrast; bad_contrast = xnmalloc (cmd.sbc_contrast, sizeof *bad_contrast); /* Check the sanity of the given contrast values */ - for (i = 0 ; i < cmd.sbc_contrast ; ++i ) + for (i = 0; i < cmd.sbc_contrast; ++i) { int j; double sum = 0; bad_contrast[i] = 0; - if ( subc_list_double_count(&cmd.dl_contrast[i]) != - ostensible_number_of_groups ) + if (subc_list_double_count (&cmd.dl_contrast[i]) != + ostensible_number_of_groups) { - msg(SW, - _("Number of contrast coefficients must equal the number of groups")); + msg (SW, + _("Number of contrast coefficients must equal the number of groups")); bad_contrast[i] = 1; continue; } - for (j=0; j < ostensible_number_of_groups ; ++j ) - sum += subc_list_double_at(&cmd.dl_contrast[i],j); + for (j = 0; j < ostensible_number_of_groups; ++j) + sum += subc_list_double_at (&cmd.dl_contrast[i], j); - if ( sum != 0.0 ) - msg(SW,_("Coefficients for contrast %d do not total zero"), - (int) i + 1); + if ( sum != 0.0 ) + msg (SW, _("Coefficients for contrast %zu do not total zero"), i + 1); } - if ( stat_tables & STAT_DESC ) - show_descriptives(); + if ( stat_tables & STAT_DESC ) + show_descriptives (dict); if ( stat_tables & STAT_HOMO ) - show_homogeneity(); + show_homogeneity (); + + show_anova_table (); - show_anova_table(); - if (cmd.sbc_contrast ) { - show_contrast_coeffs(bad_contrast); - show_contrast_tests(bad_contrast); + show_contrast_coeffs (bad_contrast); + show_contrast_tests (bad_contrast); } - - free(bad_contrast); + free (bad_contrast); /* Clean up */ - for (i = 0 ; i < n_vars ; ++i ) + for (i = 0; i < n_vars; ++i ) { struct hsh_table *group_hash = group_proc_get (vars[i])->group_hash; - hsh_destroy(group_hash); + hsh_destroy (group_hash); } - hsh_destroy(global_group_hash); - + hsh_destroy (global_group_hash); } - - /* Parser for the variables sub command */ static int -oneway_custom_variables (struct lexer *lexer, - struct dataset *ds, struct cmd_oneway *cmd UNUSED, - void *aux UNUSED) +oneway_custom_variables (struct lexer *lexer, + struct dataset *ds, struct cmd_oneway *cmd UNUSED, + void *aux UNUSED) { struct dictionary *dict = dataset_dict (ds); lex_match (lexer, '='); - if ((lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL) + if ((lex_token (lexer) != T_ID || + dict_lookup_var (dict, lex_tokid (lexer)) == NULL) && lex_token (lexer) != T_ALL) return 2; if (!parse_variables_const (lexer, dict, &vars, &n_vars, - PV_DUPLICATE - | PV_NUMERIC | PV_NO_SCRATCH) ) + PV_DUPLICATE + | PV_NUMERIC | PV_NO_SCRATCH) ) { free (vars); return 0; } - assert(n_vars); + assert (n_vars); if ( ! lex_match (lexer, T_BY)) return 2; indep_var = parse_variable (lexer, dict); - if ( !indep_var ) + if ( !indep_var ) { - msg(SE,_("`%s' is not a variable name"),lex_tokid (lexer)); + msg (SE, _("`%s' is not a variable name"), lex_tokid (lexer)); return 0; } @@ -251,8 +249,8 @@ oneway_custom_variables (struct lexer *lexer, /* Show the ANOVA table */ -static void -show_anova_table(void) +static void +show_anova_table (void) { size_t i; int n_cols =7; @@ -261,12 +259,12 @@ show_anova_table(void) struct tab_table *t; - t = tab_create (n_cols,n_rows,0); + t = tab_create (n_cols, n_rows, 0); tab_headers (t, 2, 0, 1, 0); - tab_dim (t, tab_natural_dimensions); + tab_dim (t, tab_natural_dimensions, NULL); - tab_box (t, + tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, @@ -275,7 +273,7 @@ show_anova_table(void) tab_hline (t, TAL_2, 0, n_cols - 1, 1 ); tab_vline (t, TAL_2, 2, 0, n_rows - 1); tab_vline (t, TAL_0, 1, 0, 0); - + tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Sum of Squares")); tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df")); tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Mean Square")); @@ -283,70 +281,67 @@ show_anova_table(void) tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance")); - for ( i=0 ; i < n_vars ; ++i ) + for (i = 0; i < n_vars; ++i) { struct group_statistics *totals = &group_proc_get (vars[i])->ugs; struct hsh_table *group_hash = group_proc_get (vars[i])->group_hash; struct hsh_iterator g; struct group_statistics *gs; - double ssa=0; - const char *s = var_to_string(vars[i]); + double ssa = 0; + const char *s = var_to_string (vars[i]); - for (gs = hsh_first (group_hash,&g); - gs != 0; - gs = hsh_next(group_hash,&g)) + for (gs = hsh_first (group_hash, &g); + gs != 0; + gs = hsh_next (group_hash, &g)) { - ssa += (gs->sum * gs->sum)/gs->n; + ssa += pow2 (gs->sum) / gs->n; } - - ssa -= ( totals->sum * totals->sum ) / totals->n ; + + ssa -= pow2 (totals->sum) / totals->n; tab_text (t, 0, i * 3 + 1, TAB_LEFT | TAT_TITLE, s); tab_text (t, 1, i * 3 + 1, TAB_LEFT | TAT_TITLE, _("Between Groups")); tab_text (t, 1, i * 3 + 2, TAB_LEFT | TAT_TITLE, _("Within Groups")); tab_text (t, 1, i * 3 + 3, TAB_LEFT | TAT_TITLE, _("Total")); - + if (i > 0) - tab_hline(t, TAL_1, 0, n_cols - 1 , i * 3 + 1); + tab_hline (t, TAL_1, 0, n_cols - 1, i * 3 + 1); { struct group_proc *gp = group_proc_get (vars[i]); - const double sst = totals->ssq - ( totals->sum * totals->sum) / totals->n ; + const double sst = totals->ssq - pow2 (totals->sum) / totals->n; const double df1 = gp->n_groups - 1; - const double df2 = totals->n - gp->n_groups ; + const double df2 = totals->n - gp->n_groups; const double msa = ssa / df1; - + gp->mse = (sst - ssa) / df2; - - + + /* Sums of Squares */ - tab_float (t, 2, i * 3 + 1, 0, ssa, 10, 2); - tab_float (t, 2, i * 3 + 3, 0, sst, 10, 2); - tab_float (t, 2, i * 3 + 2, 0, sst - ssa, 10, 2); + tab_double (t, 2, i * 3 + 1, 0, ssa, NULL); + tab_double (t, 2, i * 3 + 3, 0, sst, NULL); + tab_double (t, 2, i * 3 + 2, 0, sst - ssa, NULL); /* Degrees of freedom */ - tab_float (t, 3, i * 3 + 1, 0, df1, 4, 0); - tab_float (t, 3, i * 3 + 2, 0, df2, 4, 0); - tab_float (t, 3, i * 3 + 3, 0, totals->n - 1, 4, 0); + tab_fixed (t, 3, i * 3 + 1, 0, df1, 4, 0); + tab_fixed (t, 3, i * 3 + 2, 0, df2, 4, 0); + tab_fixed (t, 3, i * 3 + 3, 0, totals->n - 1, 4, 0); /* Mean Squares */ - tab_float (t, 4, i * 3 + 1, TAB_RIGHT, msa, 8, 3); - tab_float (t, 4, i * 3 + 2, TAB_RIGHT, gp->mse, 8, 3); - + tab_double (t, 4, i * 3 + 1, TAB_RIGHT, msa, NULL); + tab_double (t, 4, i * 3 + 2, TAB_RIGHT, gp->mse, NULL); - { - const double F = msa/gp->mse ; + { + const double F = msa / gp->mse ; /* The F value */ - tab_float (t, 5, i * 3 + 1, 0, F, 8, 3); - + tab_double (t, 5, i * 3 + 1, 0, F, NULL); + /* The significance */ - tab_float (t, 6, i * 3 + 1, 0, gsl_cdf_fdist_Q(F,df1,df2), 8, 3); + tab_double (t, 6, i * 3 + 1, 0, gsl_cdf_fdist_Q (F, df1, df2), NULL); } - } - } @@ -356,48 +351,52 @@ show_anova_table(void) /* Show the descriptives table */ -static void -show_descriptives(void) +static void +show_descriptives (const struct dictionary *dict) { size_t v; - int n_cols =10; + int n_cols = 10; struct tab_table *t; int row; - const double confidence=0.95; + const double confidence = 0.95; const double q = (1.0 - confidence) / 2.0; - - int n_rows = 2 ; + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; + + int n_rows = 2; - for ( v = 0 ; v < n_vars ; ++v ) + for ( v = 0; v < n_vars; ++v ) n_rows += group_proc_get (vars[v])->n_groups + 1; - t = tab_create (n_cols,n_rows,0); + t = tab_create (n_cols, n_rows, 0); tab_headers (t, 2, 0, 2, 0); - tab_dim (t, tab_natural_dimensions); + tab_dim (t, tab_natural_dimensions, NULL); /* Put a frame around the entire box, and vertical lines inside */ - tab_box (t, + tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1); /* Underline headers */ - tab_hline (t, TAL_2, 0, n_cols - 1, 2 ); + tab_hline (t, TAL_2, 0, n_cols - 1, 2); tab_vline (t, TAL_2, 2, 0, n_rows - 1); - + tab_text (t, 2, 1, TAB_CENTER | TAT_TITLE, _("N")); tab_text (t, 3, 1, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (t, 4, 1, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); tab_text (t, 5, 1, TAB_CENTER | TAT_TITLE, _("Std. Error")); - tab_vline(t, TAL_0, 7, 0, 0); - tab_hline(t, TAL_1, 6, 7, 1); - tab_joint_text (t, 6, 0, 7, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF, _("%g%% Confidence Interval for Mean"),confidence*100.0); + tab_vline (t, TAL_0, 7, 0, 0); + tab_hline (t, TAL_1, 6, 7, 1); + tab_joint_text_format (t, 6, 0, 7, 0, TAB_CENTER | TAT_TITLE, + _("%g%% Confidence Interval for Mean"), + confidence*100.0); tab_text (t, 6, 1, TAB_CENTER | TAT_TITLE, _("Lower Bound")); tab_text (t, 7, 1, TAB_CENTER | TAT_TITLE, _("Upper Bound")); @@ -410,103 +409,106 @@ show_descriptives(void) row = 2; - for ( v=0 ; v < n_vars ; ++v ) + for (v = 0; v < n_vars; ++v) { double T; double std_error; - + struct group_proc *gp = group_proc_get (vars[v]); struct group_statistics *gs; - struct group_statistics *totals = &gp->ugs; + struct group_statistics *totals = &gp->ugs; - const char *s = var_to_string(vars[v]); + const char *s = var_to_string (vars[v]); + const struct fmt_spec *fmt = var_get_print_format (vars[v]); struct group_statistics *const *gs_array = - (struct group_statistics *const *) hsh_sort(gp->group_hash); + (struct group_statistics *const *) hsh_sort (gp->group_hash); int count = 0; tab_text (t, 0, row, TAB_LEFT | TAT_TITLE, s); - if ( v > 0) - tab_hline(t, TAL_1, 0, n_cols - 1 , row); + if ( v > 0) + tab_hline (t, TAL_1, 0, n_cols - 1, row); - for (count = 0 ; count < hsh_count(gp->group_hash) ; ++count) + for (count = 0; count < hsh_count (gp->group_hash); ++count) { + struct string vstr; + ds_init_empty (&vstr); gs = gs_array[count]; - tab_text (t, 1, row + count, - TAB_LEFT | TAT_TITLE, var_get_value_name(indep_var, - &gs->id)); + var_append_value_name (indep_var, &gs->id, &vstr); + + tab_text (t, 1, row + count, + TAB_LEFT | TAT_TITLE, + ds_cstr (&vstr)); + + ds_destroy (&vstr); /* Now fill in the numbers ... */ - tab_float (t, 2, row + count, 0, gs->n, 8,0); + tab_fixed (t, 2, row + count, 0, gs->n, 8, 0); + + tab_double (t, 3, row + count, 0, gs->mean, NULL); - tab_float (t, 3, row + count, 0, gs->mean,8,2); - - tab_float (t, 4, row + count, 0, gs->std_dev,8,2); + tab_double (t, 4, row + count, 0, gs->std_dev, NULL); - std_error = gs->std_dev/sqrt(gs->n) ; - tab_float (t, 5, row + count, 0, - std_error, 8,2); + std_error = gs->std_dev / sqrt (gs->n) ; + tab_double (t, 5, row + count, 0, + std_error, NULL); /* Now the confidence interval */ - - T = gsl_cdf_tdist_Qinv(q,gs->n - 1); - tab_float(t, 6, row + count, 0, - gs->mean - T * std_error, 8, 2); + T = gsl_cdf_tdist_Qinv (q, gs->n - 1); - tab_float(t, 7, row + count, 0, - gs->mean + T * std_error, 8, 2); + tab_double (t, 6, row + count, 0, + gs->mean - T * std_error, NULL); - /* Min and Max */ + tab_double (t, 7, row + count, 0, + gs->mean + T * std_error, NULL); - tab_float(t, 8, row + count, 0, gs->minimum, 8, 2); - tab_float(t, 9, row + count, 0, gs->maximum, 8, 2); + /* Min and Max */ + tab_double (t, 8, row + count, 0, gs->minimum, fmt); + tab_double (t, 9, row + count, 0, gs->maximum, fmt); } - tab_text (t, 1, row + count, - TAB_LEFT | TAT_TITLE ,_("Total")); + tab_text (t, 1, row + count, + TAB_LEFT | TAT_TITLE, _("Total")); - tab_float (t, 2, row + count, 0, totals->n, 8,0); + tab_double (t, 2, row + count, 0, totals->n, wfmt); - tab_float (t, 3, row + count, 0, totals->mean, 8,2); + tab_double (t, 3, row + count, 0, totals->mean, NULL); - tab_float (t, 4, row + count, 0, totals->std_dev,8,2); + tab_double (t, 4, row + count, 0, totals->std_dev, NULL); - std_error = totals->std_dev/sqrt(totals->n) ; + std_error = totals->std_dev / sqrt (totals->n) ; - tab_float (t, 5, row + count, 0, std_error, 8,2); + tab_double (t, 5, row + count, 0, std_error, NULL); /* Now the confidence interval */ - - T = gsl_cdf_tdist_Qinv(q,totals->n - 1); - tab_float(t, 6, row + count, 0, - totals->mean - T * std_error, 8, 2); + T = gsl_cdf_tdist_Qinv (q, totals->n - 1); + + tab_double (t, 6, row + count, 0, + totals->mean - T * std_error, NULL); - tab_float(t, 7, row + count, 0, - totals->mean + T * std_error, 8, 2); + tab_double (t, 7, row + count, 0, + totals->mean + T * std_error, NULL); /* Min and Max */ - tab_float(t, 8, row + count, 0, totals->minimum, 8, 2); - tab_float(t, 9, row + count, 0, totals->maximum, 8, 2); + tab_double (t, 8, row + count, 0, totals->minimum, fmt); + tab_double (t, 9, row + count, 0, totals->maximum, fmt); row += gp->n_groups + 1; } - tab_submit (t); - - } /* Show the homogeneity table */ -static void -show_homogeneity(void) +static void +show_homogeneity (void) { size_t v; int n_cols = 5; @@ -515,50 +517,49 @@ show_homogeneity(void) struct tab_table *t; - t = tab_create (n_cols,n_rows,0); + t = tab_create (n_cols, n_rows, 0); tab_headers (t, 1, 0, 1, 0); - tab_dim (t, tab_natural_dimensions); + tab_dim (t, tab_natural_dimensions, NULL); /* Put a frame around the entire box, and vertical lines inside */ - tab_box (t, + tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1); - tab_hline(t, TAL_2, 0, n_cols - 1, 1); - tab_vline(t, TAL_2, 1, 0, n_rows - 1); + tab_hline (t, TAL_2, 0, n_cols - 1, 1); + tab_vline (t, TAL_2, 1, 0, n_rows - 1); - tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Levene Statistic")); - tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("df1")); - tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df2")); - tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Significance")); - + tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Levene Statistic")); + tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("df1")); + tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df2")); + tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Significance")); tab_title (t, _("Test of Homogeneity of Variances")); - for ( v=0 ; v < n_vars ; ++v ) + for (v = 0; v < n_vars; ++v) { double F; const struct variable *var = vars[v]; const struct group_proc *gp = group_proc_get (vars[v]); - const char *s = var_to_string(var); + const char *s = var_to_string (var); const struct group_statistics *totals = &gp->ugs; const double df1 = gp->n_groups - 1; - const double df2 = totals->n - gp->n_groups ; + const double df2 = totals->n - gp->n_groups; tab_text (t, 0, v + 1, TAB_LEFT | TAT_TITLE, s); F = gp->levene; - tab_float (t, 1, v + 1, TAB_RIGHT, F, 8,3); - tab_float (t, 2, v + 1, TAB_RIGHT, df1 ,8,0); - tab_float (t, 3, v + 1, TAB_RIGHT, df2 ,8,0); + tab_double (t, 1, v + 1, TAB_RIGHT, F, NULL); + tab_fixed (t, 2, v + 1, TAB_RIGHT, df1, 8, 0); + tab_fixed (t, 3, v + 1, TAB_RIGHT, df2, 8, 0); /* Now the significance */ - tab_float (t, 4, v + 1, TAB_RIGHT,gsl_cdf_fdist_Q(F,df1,df2), 8, 3); + tab_double (t, 4, v + 1, TAB_RIGHT,gsl_cdf_fdist_Q (F, df1, df2), NULL); } tab_submit (t); @@ -566,84 +567,93 @@ show_homogeneity(void) /* Show the contrast coefficients table */ -static void +static void show_contrast_coeffs (short *bad_contrast) { int n_cols = 2 + ostensible_number_of_groups; int n_rows = 2 + cmd.sbc_contrast; - union value *group_value; - int count = 0 ; - void *const *group_values ; + int count = 0; + void *const *group_values; struct tab_table *t; - t = tab_create (n_cols,n_rows,0); + t = tab_create (n_cols, n_rows, 0); tab_headers (t, 2, 0, 2, 0); - tab_dim (t, tab_natural_dimensions); + tab_dim (t, tab_natural_dimensions, NULL); /* Put a frame around the entire box, and vertical lines inside */ - tab_box (t, + tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1); - tab_box (t, - -1,-1, + tab_box (t, + -1, -1, TAL_0, TAL_0, 2, 0, n_cols - 1, 0); tab_box (t, - -1,-1, + -1, -1, TAL_0, TAL_0, - 0,0, - 1,1); + 0, 0, + 1, 1); - tab_hline(t, TAL_1, 2, n_cols - 1, 1); - tab_hline(t, TAL_2, 0, n_cols - 1, 2); + tab_hline (t, TAL_1, 2, n_cols - 1, 1); + tab_hline (t, TAL_2, 0, n_cols - 1, 2); - tab_vline(t, TAL_2, 2, 0, n_rows - 1); + tab_vline (t, TAL_2, 2, 0, n_rows - 1); tab_title (t, _("Contrast Coefficients")); tab_text (t, 0, 2, TAB_LEFT | TAT_TITLE, _("Contrast")); - tab_joint_text (t, 2, 0, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, - var_to_string(indep_var)); + tab_joint_text (t, 2, 0, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, + var_to_string (indep_var)); - group_values = hsh_sort(global_group_hash); - for (count = 0 ; - count < hsh_count(global_group_hash) ; + group_values = hsh_sort (global_group_hash); + for (count = 0; + count < hsh_count (global_group_hash); ++count) { + double *group_value_p; + union value group_value; int i; - group_value = group_values[count]; + struct string vstr; + + ds_init_empty (&vstr); + + group_value_p = group_values[count]; + group_value.f = *group_value_p; + var_append_value_name (indep_var, &group_value, &vstr); - tab_text (t, count + 2, 1, TAB_CENTER | TAT_TITLE, - var_get_value_name (indep_var, group_value)); + tab_text (t, count + 2, 1, TAB_CENTER | TAT_TITLE, + ds_cstr (&vstr)); - for (i = 0 ; i < cmd.sbc_contrast ; ++i ) + ds_destroy (&vstr); + + + for (i = 0; i < cmd.sbc_contrast; ++i ) { - tab_text(t, 1, i + 2, TAB_CENTER | TAT_PRINTF, "%d", i + 1); + tab_text_format (t, 1, i + 2, TAB_CENTER, "%d", i + 1); - if ( bad_contrast[i] ) - tab_text(t, count + 2, i + 2, TAB_RIGHT, "?" ); + if ( bad_contrast[i] ) + tab_text (t, count + 2, i + 2, TAB_RIGHT, "?" ); else - tab_text(t, count + 2, i + 2, TAB_RIGHT | TAT_PRINTF, "%g", - subc_list_double_at(&cmd.dl_contrast[i], count) - ); + tab_text_format (t, count + 2, i + 2, TAB_RIGHT, "%g", + subc_list_double_at (&cmd.dl_contrast[i], count)); } } - + tab_submit (t); } /* Show the results of the contrast tests */ -static void -show_contrast_tests(short *bad_contrast) +static void +show_contrast_tests (short *bad_contrast) { size_t v; int n_cols = 8; @@ -651,46 +661,46 @@ show_contrast_tests(short *bad_contrast) struct tab_table *t; - t = tab_create (n_cols,n_rows,0); + t = tab_create (n_cols, n_rows, 0); tab_headers (t, 3, 0, 1, 0); - tab_dim (t, tab_natural_dimensions); + tab_dim (t, tab_natural_dimensions, NULL); /* Put a frame around the entire box, and vertical lines inside */ - tab_box (t, + tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1); - tab_box (t, - -1,-1, + tab_box (t, + -1, -1, TAL_0, TAL_0, 0, 0, 2, 0); - tab_hline(t, TAL_2, 0, n_cols - 1, 1); - tab_vline(t, TAL_2, 3, 0, n_rows - 1); + tab_hline (t, TAL_2, 0, n_cols - 1, 1); + tab_vline (t, TAL_2, 3, 0, n_rows - 1); tab_title (t, _("Contrast Tests")); - tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Contrast")); - tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Value of Contrast")); + tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Contrast")); + tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Value of Contrast")); tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Error")); tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("t")); tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("df")); tab_text (t, 7, 0, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)")); - for ( v = 0 ; v < n_vars ; ++v ) + for (v = 0; v < n_vars; ++v) { int i; int lines_per_variable = 2 * cmd.sbc_contrast; tab_text (t, 0, (v * lines_per_variable) + 1, TAB_LEFT | TAT_TITLE, - var_to_string(vars[v])); + var_to_string (vars[v])); - for ( i = 0 ; i < cmd.sbc_contrast ; ++i ) + for (i = 0; i < cmd.sbc_contrast; ++i) { int ci; double contrast_value = 0.0; @@ -701,323 +711,333 @@ show_contrast_tests(short *bad_contrast) void *const *group_stat_array; double T; - double std_error_contrast ; + double std_error_contrast; double df; - double sec_vneq=0.0; + double sec_vneq = 0.0; - /* Note: The calculation of the degrees of freedom in the + /* Note: The calculation of the degrees of freedom in the "variances not equal" case is painfull!! The following formula may help to understand it: - \frac{\left(\sum_{i=1}^k{c_i^2\frac{s_i^2}{n_i}}\right)^2} + \frac{\left (\sum_{i=1}^k{c_i^2\frac{s_i^2}{n_i}}\right)^2} { - \sum_{i=1}^k\left( - \frac{\left(c_i^2\frac{s_i^2}{n_i}\right)^2} {n_i-1} + \sum_{i=1}^k\left ( + \frac{\left (c_i^2\frac{s_i^2}{n_i}\right)^2} {n_i-1} \right) } */ double df_denominator = 0.0; double df_numerator = 0.0; - if ( i == 0 ) + if ( i == 0 ) { - tab_text (t, 1, (v * lines_per_variable) + i + 1, + tab_text (t, 1, (v * lines_per_variable) + i + 1, TAB_LEFT | TAT_TITLE, _("Assume equal variances")); - tab_text (t, 1, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, - TAB_LEFT | TAT_TITLE, + tab_text (t, 1, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, + TAB_LEFT | TAT_TITLE, _("Does not assume equal")); } - tab_text (t, 2, (v * lines_per_variable) + i + 1, - TAB_CENTER | TAT_TITLE | TAT_PRINTF, "%d",i+1); + tab_text_format (t, 2, (v * lines_per_variable) + i + 1, + TAB_CENTER | TAT_TITLE, "%d", i + 1); - tab_text (t, 2, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, - TAB_CENTER | TAT_TITLE | TAT_PRINTF, "%d",i+1); + tab_text_format (t, 2, + (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, + TAB_CENTER | TAT_TITLE, "%d", i + 1); - if ( bad_contrast[i]) + if ( bad_contrast[i]) continue; - group_stat_array = hsh_sort(group_hash); - - for (ci = 0 ; ci < hsh_count(group_hash) ; ++ci) + group_stat_array = hsh_sort (group_hash); + + for (ci = 0; ci < hsh_count (group_hash); ++ci) { - const double coef = subc_list_double_at(&cmd.dl_contrast[i], ci); + const double coef = subc_list_double_at (&cmd.dl_contrast[i], ci); struct group_statistics *gs = group_stat_array[ci]; - const double winv = (gs->std_dev * gs->std_dev) / gs->n; + const double winv = pow2 (gs->std_dev) / gs->n; contrast_value += coef * gs->mean; - coef_msq += (coef * coef) / gs->n ; + coef_msq += (coef * coef) / gs->n; - sec_vneq += (coef * coef) * (gs->std_dev * gs->std_dev ) /gs->n ; + sec_vneq += (coef * coef) * pow2 (gs->std_dev) /gs->n; df_numerator += (coef * coef) * winv; df_denominator += pow2((coef * coef) * winv) / (gs->n - 1); } - sec_vneq = sqrt(sec_vneq); + sec_vneq = sqrt (sec_vneq); - df_numerator = pow2(df_numerator); + df_numerator = pow2 (df_numerator); - tab_float (t, 3, (v * lines_per_variable) + i + 1, - TAB_RIGHT, contrast_value, 8,2); + tab_double (t, 3, (v * lines_per_variable) + i + 1, + TAB_RIGHT, contrast_value, NULL); - tab_float (t, 3, (v * lines_per_variable) + i + 1 + + tab_double (t, 3, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, - TAB_RIGHT, contrast_value, 8,2); + TAB_RIGHT, contrast_value, NULL); - std_error_contrast = sqrt(grp_data->mse * coef_msq); + std_error_contrast = sqrt (grp_data->mse * coef_msq); /* Std. Error */ - tab_float (t, 4, (v * lines_per_variable) + i + 1, + tab_double (t, 4, (v * lines_per_variable) + i + 1, TAB_RIGHT, std_error_contrast, - 8,3); + NULL); - T = fabs(contrast_value / std_error_contrast) ; + T = fabs (contrast_value / std_error_contrast); /* T Statistic */ - tab_float (t, 5, (v * lines_per_variable) + i + 1, + tab_double (t, 5, (v * lines_per_variable) + i + 1, TAB_RIGHT, T, - 8,3); + NULL); df = grp_data->ugs.n - grp_data->n_groups; /* Degrees of Freedom */ - tab_float (t, 6, (v * lines_per_variable) + i + 1, + tab_fixed (t, 6, (v * lines_per_variable) + i + 1, TAB_RIGHT, df, - 8,0); + 8, 0); /* Significance TWO TAILED !!*/ - tab_float (t, 7, (v * lines_per_variable) + i + 1, - TAB_RIGHT, 2 * gsl_cdf_tdist_Q(T,df), - 8,3); - + tab_double (t, 7, (v * lines_per_variable) + i + 1, + TAB_RIGHT, 2 * gsl_cdf_tdist_Q (T, df), + NULL); /* Now for the Variances NOT Equal case */ /* Std. Error */ - tab_float (t, 4, - (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, + tab_double (t, 4, + (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, TAB_RIGHT, sec_vneq, - 8,3); - + NULL); T = contrast_value / sec_vneq; - tab_float (t, 5, - (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, + tab_double (t, 5, + (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, TAB_RIGHT, T, - 8,3); - + NULL); df = df_numerator / df_denominator; - tab_float (t, 6, - (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, + tab_double (t, 6, + (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, TAB_RIGHT, df, - 8,3); + NULL); /* The Significance */ - tab_float (t, 7, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, - TAB_RIGHT, 2 * gsl_cdf_tdist_Q(T,df), - 8,3); - - + tab_double (t, 7, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, + TAB_RIGHT, 2 * gsl_cdf_tdist_Q (T,df), + NULL); } - if ( v > 0 ) - tab_hline(t, TAL_1, 0, n_cols - 1, (v * lines_per_variable) + 1); + if ( v > 0 ) + tab_hline (t, TAL_1, 0, n_cols - 1, (v * lines_per_variable) + 1); } tab_submit (t); - } /* ONEWAY ANOVA Calculations */ -static void postcalc ( struct cmd_oneway *cmd UNUSED ); +static void postcalc (struct cmd_oneway *cmd UNUSED); -static void precalc ( struct cmd_oneway *cmd UNUSED ); +static void precalc (struct cmd_oneway *cmd UNUSED); /* Pre calculations */ -static void -precalc ( struct cmd_oneway *cmd UNUSED ) +static void +precalc (struct cmd_oneway *cmd UNUSED) { - size_t i=0; + size_t i = 0; - for(i=0; i< n_vars ; ++i) + for (i = 0; i < n_vars; ++i) { struct group_proc *gp = group_proc_get (vars[i]); struct group_statistics *totals = &gp->ugs; - + /* Create a hash for each of the dependent variables. - The hash contains a group_statistics structure, + The hash contains a group_statistics structure, and is keyed by value of the independent variable */ - gp->group_hash = - hsh_create(4, - (hsh_compare_func *) compare_group, - (hsh_hash_func *) hash_group, - (hsh_free_func *) free_group, - (void *) var_get_width (indep_var) ); - + gp->group_hash = hsh_create (4, compare_group, hash_group, + (hsh_free_func *) free_group, + indep_var); - totals->sum=0; - totals->n=0; - totals->ssq=0; - totals->sum_diff=0; - totals->maximum = - DBL_MAX; + totals->sum = 0; + totals->n = 0; + totals->ssq = 0; + totals->sum_diff = 0; + totals->maximum = -DBL_MAX; totals->minimum = DBL_MAX; } } +static int +compare_double_3way (const void *a_, const void *b_, const void *aux UNUSED) +{ + const double *a = a_; + const double *b = b_; + return *a < *b ? -1 : *a > *b; +} + +static unsigned +do_hash_double (const void *value_, const void *aux UNUSED) +{ + const double *value = value_; + return hash_double (*value, 0); +} + static void -free_value (void *value_, const void *aux UNUSED) +free_double (void *value_, const void *aux UNUSED) { - union value *value = value_; + double *value = value_; free (value); } -static bool -run_oneway(const struct ccase *first, const struct casefile *cf, - void *cmd_, const struct dataset *ds) +static void +run_oneway (struct cmd_oneway *cmd, + struct casereader *input, + const struct dataset *ds) { - struct casereader *r; - struct ccase c; - struct casefilter *filter = NULL; + struct taint *taint; + struct dictionary *dict = dataset_dict (ds); + enum mv_class exclude; + struct casereader *reader; + struct ccase *c; - struct cmd_oneway *cmd = (struct cmd_oneway *) cmd_; + c = casereader_peek (input, 0); + if (c == NULL) + { + casereader_destroy (input); + return; + } + output_split_file_values (ds, c); + case_unref (c); - output_split_file_values (ds, first); + taint = taint_clone (casereader_get_taint (input)); - global_group_hash = hsh_create(4, - (hsh_compare_func *) compare_values, - (hsh_hash_func *) hash_value, - free_value, - (void *) var_get_width (indep_var) ); + global_group_hash = hsh_create (4, + compare_double_3way, + do_hash_double, + free_double, + indep_var); - precalc(cmd); + precalc (cmd); - filter = casefilter_create ( (cmd->incl != ONEWAY_INCLUDE - ? MV_ANY : MV_SYSTEM), - vars, n_vars ); + exclude = cmd->incl != ONEWAY_INCLUDE ? MV_ANY : MV_SYSTEM; + input = casereader_create_filter_missing (input, &indep_var, 1, + exclude, NULL, NULL); + if (cmd->miss == ONEWAY_LISTWISE) + input = casereader_create_filter_missing (input, vars, n_vars, + exclude, NULL, NULL); + input = casereader_create_filter_weight (input, dict, NULL, NULL); - for(r = casefile_get_reader (cf, filter); - casereader_read (r, &c) ; - case_destroy (&c)) + reader = casereader_clone (input); + for (; (c = casereader_read (reader)) != NULL; case_unref (c)) { size_t i; - const double weight = - dict_get_case_weight (dataset_dict (ds), &c, &bad_weight_warn); + const double weight = dict_get_case_weight (dict, c, NULL); - const union value *indep_val; - void **p; - - if ( casefilter_variable_missing (filter, &c, indep_var)) - continue; - - indep_val = case_data (&c, indep_var); - p = hsh_probe (global_group_hash, indep_val); + const union value *indep_val = case_data (c, indep_var); + void **p = hsh_probe (global_group_hash, &indep_val->f); if (*p == NULL) - *p = value_dup (indep_val, var_get_width (indep_var)); - - hsh_insert ( global_group_hash, (void *) indep_val ); + { + double *value = *p = xmalloc (sizeof *value); + *value = indep_val->f; + } - for ( i = 0 ; i < n_vars ; ++i ) + for (i = 0; i < n_vars; ++i) { const struct variable *v = vars[i]; - const union value *val = case_data (&c, v); + const union value *val = case_data (c, v); struct group_proc *gp = group_proc_get (vars[i]); struct hsh_table *group_hash = gp->group_hash; struct group_statistics *gs; - gs = hsh_find(group_hash, (void *) indep_val ); + gs = hsh_find (group_hash, indep_val ); - if ( ! gs ) + if ( ! gs ) { gs = xmalloc (sizeof *gs); gs->id = *indep_val; - gs->sum=0; - gs->n=0; - gs->ssq=0; - gs->sum_diff=0; + gs->sum = 0; + gs->n = 0; + gs->ssq = 0; + gs->sum_diff = 0; gs->minimum = DBL_MAX; gs->maximum = -DBL_MAX; - hsh_insert ( group_hash, (void *) gs ); + hsh_insert ( group_hash, gs ); } - if (! casefilter_variable_missing (filter, &c, v)) + if (!var_is_value_missing (v, val, exclude)) { struct group_statistics *totals = &gp->ugs; - totals->n+=weight; - totals->sum+=weight * val->f; - totals->ssq+=weight * val->f * val->f; + totals->n += weight; + totals->sum += weight * val->f; + totals->ssq += weight * pow2 (val->f); - if ( val->f * weight < totals->minimum ) + if ( val->f * weight < totals->minimum ) totals->minimum = val->f * weight; - if ( val->f * weight > totals->maximum ) + if ( val->f * weight > totals->maximum ) totals->maximum = val->f * weight; - gs->n+=weight; - gs->sum+=weight * val->f; - gs->ssq+=weight * val->f * val->f; + gs->n += weight; + gs->sum += weight * val->f; + gs->ssq += weight * pow2 (val->f); - if ( val->f * weight < gs->minimum ) + if ( val->f * weight < gs->minimum ) gs->minimum = val->f * weight; - if ( val->f * weight > gs->maximum ) + if ( val->f * weight > gs->maximum ) gs->maximum = val->f * weight; } gp->n_groups = hsh_count ( group_hash ); } - + } + casereader_destroy (reader); - casereader_destroy (r); + postcalc (cmd); - postcalc(cmd); - - if ( stat_tables & STAT_HOMO ) - levene (dataset_dict (ds), cf, indep_var, n_vars, vars, - filter); + if ( stat_tables & STAT_HOMO ) + levene (dict, casereader_clone (input), indep_var, n_vars, vars, exclude); - casefilter_destroy (filter); + casereader_destroy (input); ostensible_number_of_groups = hsh_count (global_group_hash); + if (!taint_has_tainted_successor (taint)) + output_oneway (dict); - output_oneway(); - - return true; + taint_destroy (taint); } /* Post calculations for the ONEWAY command */ -void +void postcalc ( struct cmd_oneway *cmd UNUSED ) { - size_t i=0; - + size_t i = 0; - for(i = 0; i < n_vars ; ++i) + for (i = 0; i < n_vars; ++i) { struct group_proc *gp = group_proc_get (vars[i]); struct hsh_table *group_hash = gp->group_hash; @@ -1026,34 +1046,34 @@ postcalc ( struct cmd_oneway *cmd UNUSED ) struct hsh_iterator g; struct group_statistics *gs; - for (gs = hsh_first (group_hash,&g); - gs != 0; - gs = hsh_next(group_hash,&g)) + for (gs = hsh_first (group_hash, &g); + gs != 0; + gs = hsh_next (group_hash, &g)) { - gs->mean=gs->sum / gs->n; - gs->s_std_dev= sqrt( - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) - ) ; + gs->mean = gs->sum / gs->n; + gs->s_std_dev = sqrt (gs->ssq / gs->n - pow2 (gs->mean)); - gs->std_dev= sqrt( - gs->n/(gs->n-1) * - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) - ) ; - - gs->se_mean = gs->std_dev / sqrt(gs->n); - gs->mean_diff= gs->sum_diff / gs->n; + gs->std_dev = sqrt ( + gs->n / (gs->n - 1) * + ( gs->ssq / gs->n - pow2 (gs->mean)) + ); + gs->se_mean = gs->std_dev / sqrt (gs->n); + gs->mean_diff = gs->sum_diff / gs->n; } - - totals->mean = totals->sum / totals->n; - totals->std_dev= sqrt( - totals->n/(totals->n-1) * - ( (totals->ssq / totals->n ) - totals->mean * totals->mean ) - ) ; + totals->std_dev = sqrt ( + totals->n / (totals->n - 1) * + (totals->ssq / totals->n - pow2 (totals->mean)) + ); - totals->se_mean = totals->std_dev / sqrt(totals->n); - + totals->se_mean = totals->std_dev / sqrt (totals->n); } } + +/* + Local Variables: + mode: c + End: +*/