X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=src%2Flanguage%2Fstats%2Foneway.q;h=7da992274ea999c8ad488a877264e6dba0aa5327;hb=c41f14854e73ad44824b54933ae96eb52f781fc2;hp=6c745cd15367fdab19724bc6e63a57b77ed25a2e;hpb=2322678e8fddbbf158b01b2720db2636404bba3b;p=pspp-builds.git diff --git a/src/language/stats/oneway.q b/src/language/stats/oneway.q index 6c745cd1..7da99227 100644 --- a/src/language/stats/oneway.q +++ b/src/language/stats/oneway.q @@ -1,49 +1,50 @@ -/* PSPP - One way ANOVA. -*-c-*- +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2007 Free Software Foundation, Inc. -Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. -Author: John Darrington 2004 + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. -This program is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the -License, or (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -02110-1301, USA. */ + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ #include + #include -#include "message.h" +#include #include #include -#include -#include "alloc.h" -#include "str.h" -#include "case.h" -#include "dictionary.h" -#include "command.h" -#include "compiler.h" -#include "lexer.h" -#include "message.h" -#include "magic.h" -#include "misc.h" -#include "table.h" -#include "manager.h" -#include "value-labels.h" -#include "variable.h" -#include "procedure.h" -#include "hash.h" -#include "casefile.h" -#include "group-proc.h" -#include "group.h" -#include "levene.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sort-criteria.h" + +#include "xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -53,45 +54,37 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA /* (specification) "ONEWAY" (oneway_): *^variables=custom; - +missing=miss:!analysis/listwise, - incl:include/!exclude; - contrast= double list; - statistics[st_]=descriptives,homogeneity. + missing=miss:!analysis/listwise, + incl:include/!exclude; + +contrast= double list; + +statistics[st_]=descriptives,homogeneity. */ /* (declarations) */ /* (functions) */ - - -static int bad_weight_warn = 1; - - static struct cmd_oneway cmd; /* The independent variable */ -static struct variable *indep_var; +static const struct variable *indep_var; /* Number of dependent variables */ static size_t n_vars; /* The dependent variables */ -static struct variable **vars; +static const struct variable **vars; /* A hash table containing all the distinct values of the independent variables */ static struct hsh_table *global_group_hash ; -/* The number of distinct values of the independent variable, when all +/* The number of distinct values of the independent variable, when all missing values are disregarded */ -static int ostensible_number_of_groups=-1; - +static int ostensible_number_of_groups = -1; -/* Function to use for testing for missing values */ -static is_missing_func *value_is_missing; - -static bool run_oneway(const struct casefile *cf, void *_mode); +static void run_oneway (struct cmd_oneway *, struct casereader *, + const struct dataset *); /* Routines to show the output tables */ @@ -111,25 +104,21 @@ void output_oneway(void); int -cmd_oneway(void) +cmd_oneway (struct lexer *lexer, struct dataset *ds) { + struct casegrouper *grouper; + struct casereader *group; int i; bool ok; - if ( !parse_oneway(&cmd) ) + if ( !parse_oneway (lexer, ds, &cmd, NULL) ) return CMD_FAILURE; - /* If /MISSING=INCLUDE is set, then user missing values are ignored */ - if (cmd.incl == ONEWAY_INCLUDE ) - value_is_missing = mv_is_value_system_missing; - else - value_is_missing = mv_is_value_missing; - /* What statistics were requested */ - if ( cmd.sbc_statistics ) + if ( cmd.sbc_statistics ) { - for (i = 0 ; i < ONEWAY_ST_count ; ++i ) + for (i = 0 ; i < ONEWAY_ST_count ; ++i ) { if ( ! cmd.a_statistics[i] ) continue; @@ -144,7 +133,12 @@ cmd_oneway(void) } } - ok = multipass_procedure_with_splits (run_oneway, &cmd); + /* Data pass. FIXME: error handling. */ + grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); + while (casegrouper_get_next_group (grouper, &group)) + run_oneway (&cmd, group, ds); + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; free (vars); free_oneway (&cmd); @@ -157,21 +151,21 @@ void output_oneway(void) { size_t i; - short *bad_contrast ; + short *bad_contrast ; bad_contrast = xnmalloc (cmd.sbc_contrast, sizeof *bad_contrast); /* Check the sanity of the given contrast values */ - for (i = 0 ; i < cmd.sbc_contrast ; ++i ) + for (i = 0 ; i < cmd.sbc_contrast ; ++i ) { int j; double sum = 0; bad_contrast[i] = 0; - if ( subc_list_double_count(&cmd.dl_contrast[i]) != + if ( subc_list_double_count(&cmd.dl_contrast[i]) != ostensible_number_of_groups ) { - msg(SW, + msg(SW, _("Number of contrast coefficients must equal the number of groups")); bad_contrast[i] = 1; continue; @@ -180,18 +174,18 @@ output_oneway(void) for (j=0; j < ostensible_number_of_groups ; ++j ) sum += subc_list_double_at(&cmd.dl_contrast[i],j); - if ( sum != 0.0 ) - msg(SW,_("Coefficients for contrast %d do not total zero"),i + 1); + if ( sum != 0.0 ) + msg(SW,_("Coefficients for contrast %zu do not total zero"), i + 1); } - if ( stat_tables & STAT_DESC ) + if ( stat_tables & STAT_DESC ) show_descriptives(); if ( stat_tables & STAT_HOMO ) show_homogeneity(); show_anova_table(); - + if (cmd.sbc_contrast ) { show_contrast_coeffs(bad_contrast); @@ -202,7 +196,7 @@ output_oneway(void) free(bad_contrast); /* Clean up */ - for (i = 0 ; i < n_vars ; ++i ) + for (i = 0 ; i < n_vars ; ++i ) { struct hsh_table *group_hash = group_proc_get (vars[i])->group_hash; @@ -218,18 +212,20 @@ output_oneway(void) /* Parser for the variables sub command */ static int -oneway_custom_variables(struct cmd_oneway *cmd UNUSED) +oneway_custom_variables (struct lexer *lexer, + struct dataset *ds, struct cmd_oneway *cmd UNUSED, + void *aux UNUSED) { + struct dictionary *dict = dataset_dict (ds); - lex_match('='); + lex_match (lexer, '='); - if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) - && token != T_ALL) + if ((lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL) + && lex_token (lexer) != T_ALL) return 2; - - if (!parse_variables (default_dict, &vars, &n_vars, - PV_DUPLICATE + if (!parse_variables_const (lexer, dict, &vars, &n_vars, + PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) ) { free (vars); @@ -238,25 +234,23 @@ oneway_custom_variables(struct cmd_oneway *cmd UNUSED) assert(n_vars); - if ( ! lex_match(T_BY)) + if ( ! lex_match (lexer, T_BY)) return 2; + indep_var = parse_variable (lexer, dict); - indep_var = parse_variable(); - - if ( !indep_var ) + if ( !indep_var ) { - msg(SE,_("`%s' is not a variable name"),tokid); + msg(SE,_("`%s' is not a variable name"),lex_tokid (lexer)); return 0; } - return 1; } /* Show the ANOVA table */ -static void +static void show_anova_table(void) { size_t i; @@ -271,7 +265,7 @@ show_anova_table(void) tab_dim (t, tab_natural_dimensions); - tab_box (t, + tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, @@ -280,7 +274,7 @@ show_anova_table(void) tab_hline (t, TAL_2, 0, n_cols - 1, 1 ); tab_vline (t, TAL_2, 2, 0, n_rows - 1); tab_vline (t, TAL_0, 1, 0, 0); - + tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Sum of Squares")); tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df")); tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Mean Square")); @@ -288,7 +282,7 @@ show_anova_table(void) tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance")); - for ( i=0 ; i < n_vars ; ++i ) + for ( i=0 ; i < n_vars ; ++i ) { struct group_statistics *totals = &group_proc_get (vars[i])->ugs; struct hsh_table *group_hash = group_proc_get (vars[i])->group_hash; @@ -297,20 +291,20 @@ show_anova_table(void) double ssa=0; const char *s = var_to_string(vars[i]); - for (gs = hsh_first (group_hash,&g); - gs != 0; + for (gs = hsh_first (group_hash,&g); + gs != 0; gs = hsh_next(group_hash,&g)) { ssa += (gs->sum * gs->sum)/gs->n; } - + ssa -= ( totals->sum * totals->sum ) / totals->n ; tab_text (t, 0, i * 3 + 1, TAB_LEFT | TAT_TITLE, s); tab_text (t, 1, i * 3 + 1, TAB_LEFT | TAT_TITLE, _("Between Groups")); tab_text (t, 1, i * 3 + 2, TAB_LEFT | TAT_TITLE, _("Within Groups")); tab_text (t, 1, i * 3 + 3, TAB_LEFT | TAT_TITLE, _("Total")); - + if (i > 0) tab_hline(t, TAL_1, 0, n_cols - 1 , i * 3 + 1); @@ -320,10 +314,10 @@ show_anova_table(void) const double df1 = gp->n_groups - 1; const double df2 = totals->n - gp->n_groups ; const double msa = ssa / df1; - + gp->mse = (sst - ssa) / df2; - - + + /* Sums of Squares */ tab_float (t, 2, i * 3 + 1, 0, ssa, 10, 2); tab_float (t, 2, i * 3 + 3, 0, sst, 10, 2); @@ -338,14 +332,14 @@ show_anova_table(void) /* Mean Squares */ tab_float (t, 4, i * 3 + 1, TAB_RIGHT, msa, 8, 3); tab_float (t, 4, i * 3 + 2, TAB_RIGHT, gp->mse, 8, 3); - - { + + { const double F = msa/gp->mse ; /* The F value */ tab_float (t, 5, i * 3 + 1, 0, F, 8, 3); - + /* The significance */ tab_float (t, 6, i * 3 + 1, 0, gsl_cdf_fdist_Q(F,df1,df2), 8, 3); } @@ -355,14 +349,13 @@ show_anova_table(void) } - tab_title (t, 0, _("ANOVA")); + tab_title (t, _("ANOVA")); tab_submit (t); - - } + /* Show the descriptives table */ -static void +static void show_descriptives(void) { size_t v; @@ -373,10 +366,10 @@ show_descriptives(void) const double confidence=0.95; const double q = (1.0 - confidence) / 2.0; - - int n_rows = 2 ; - for ( v = 0 ; v < n_vars ; ++v ) + int n_rows = 2 ; + + for ( v = 0 ; v < n_vars ; ++v ) n_rows += group_proc_get (vars[v])->n_groups + 1; t = tab_create (n_cols,n_rows,0); @@ -385,7 +378,7 @@ show_descriptives(void) /* Put a frame around the entire box, and vertical lines inside */ - tab_box (t, + tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, @@ -394,7 +387,7 @@ show_descriptives(void) /* Underline headers */ tab_hline (t, TAL_2, 0, n_cols - 1, 2 ); tab_vline (t, TAL_2, 2, 0, n_rows - 1); - + tab_text (t, 2, 1, TAB_CENTER | TAT_TITLE, _("N")); tab_text (t, 3, 1, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (t, 4, 1, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); @@ -412,66 +405,74 @@ show_descriptives(void) tab_text (t, 9, 1, TAB_CENTER | TAT_TITLE, _("Maximum")); - tab_title (t, 0, _("Descriptives")); + tab_title (t, _("Descriptives")); row = 2; - for ( v=0 ; v < n_vars ; ++v ) + for ( v=0 ; v < n_vars ; ++v ) { double T; double std_error; - + struct group_proc *gp = group_proc_get (vars[v]); struct group_statistics *gs; - struct group_statistics *totals = &gp->ugs; + struct group_statistics *totals = &gp->ugs; const char *s = var_to_string(vars[v]); - struct group_statistics *const *gs_array = hsh_sort(gp->group_hash); + struct group_statistics *const *gs_array = + (struct group_statistics *const *) hsh_sort(gp->group_hash); int count = 0; tab_text (t, 0, row, TAB_LEFT | TAT_TITLE, s); - if ( v > 0) + if ( v > 0) tab_hline(t, TAL_1, 0, n_cols - 1 , row); for (count = 0 ; count < hsh_count(gp->group_hash) ; ++count) { + struct string vstr; + ds_init_empty (&vstr); gs = gs_array[count]; - tab_text (t, 1, row + count, - TAB_LEFT | TAT_TITLE ,value_to_string(&gs->id,indep_var)); + var_append_value_name (indep_var, &gs->id, &vstr); + + tab_text (t, 1, row + count, + TAB_LEFT | TAT_TITLE, + ds_cstr (&vstr)); + + ds_destroy (&vstr); /* Now fill in the numbers ... */ tab_float (t, 2, row + count, 0, gs->n, 8,0); tab_float (t, 3, row + count, 0, gs->mean,8,2); - + tab_float (t, 4, row + count, 0, gs->std_dev,8,2); std_error = gs->std_dev/sqrt(gs->n) ; - tab_float (t, 5, row + count, 0, + tab_float (t, 5, row + count, 0, std_error, 8,2); /* Now the confidence interval */ - + T = gsl_cdf_tdist_Qinv(q,gs->n - 1); tab_float(t, 6, row + count, 0, - gs->mean - T * std_error, 8, 2); + gs->mean - T * std_error, 8, 2); tab_float(t, 7, row + count, 0, - gs->mean + T * std_error, 8, 2); + gs->mean + T * std_error, 8, 2); /* Min and Max */ - tab_float(t, 8, row + count, 0, gs->minimum, 8, 2); - tab_float(t, 9, row + count, 0, gs->maximum, 8, 2); + tab_float(t, 8, row + count, 0, gs->minimum, 8, 2); + tab_float(t, 9, row + count, 0, gs->maximum, 8, 2); } - tab_text (t, 1, row + count, + tab_text (t, 1, row + count, TAB_LEFT | TAT_TITLE ,_("Total")); tab_float (t, 2, row + count, 0, totals->n, 8,0); @@ -485,19 +486,19 @@ show_descriptives(void) tab_float (t, 5, row + count, 0, std_error, 8,2); /* Now the confidence interval */ - + T = gsl_cdf_tdist_Qinv(q,totals->n - 1); tab_float(t, 6, row + count, 0, - totals->mean - T * std_error, 8, 2); + totals->mean - T * std_error, 8, 2); tab_float(t, 7, row + count, 0, - totals->mean + T * std_error, 8, 2); + totals->mean + T * std_error, 8, 2); /* Min and Max */ - tab_float(t, 8, row + count, 0, totals->minimum, 8, 2); - tab_float(t, 9, row + count, 0, totals->maximum, 8, 2); + tab_float(t, 8, row + count, 0, totals->minimum, 8, 2); + tab_float(t, 9, row + count, 0, totals->maximum, 8, 2); row += gp->n_groups + 1; } @@ -509,7 +510,7 @@ show_descriptives(void) } /* Show the homogeneity table */ -static void +static void show_homogeneity(void) { size_t v; @@ -524,7 +525,7 @@ show_homogeneity(void) tab_dim (t, tab_natural_dimensions); /* Put a frame around the entire box, and vertical lines inside */ - tab_box (t, + tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, @@ -539,11 +540,11 @@ show_homogeneity(void) tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("df1")); tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df2")); tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Significance")); - - tab_title (t, 0, _("Test of Homogeneity of Variances")); - for ( v=0 ; v < n_vars ; ++v ) + tab_title (t, _("Test of Homogeneity of Variances")); + + for ( v=0 ; v < n_vars ; ++v ) { double F; const struct variable *var = vars[v]; @@ -566,19 +567,17 @@ show_homogeneity(void) } tab_submit (t); - - } /* Show the contrast coefficients table */ -static void -show_contrast_coeffs(short *bad_contrast) +static void +show_contrast_coeffs (short *bad_contrast) { int n_cols = 2 + ostensible_number_of_groups; int n_rows = 2 + cmd.sbc_contrast; union value *group_value; - int count = 0 ; + int count = 0 ; void *const *group_values ; struct tab_table *t; @@ -588,13 +587,13 @@ show_contrast_coeffs(short *bad_contrast) tab_dim (t, tab_natural_dimensions); /* Put a frame around the entire box, and vertical lines inside */ - tab_box (t, + tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1); - tab_box (t, + tab_box (t, -1,-1, TAL_0, TAL_0, 2, 0, @@ -611,44 +610,52 @@ show_contrast_coeffs(short *bad_contrast) tab_vline(t, TAL_2, 2, 0, n_rows - 1); - tab_title (t, 0, _("Contrast Coefficients")); + tab_title (t, _("Contrast Coefficients")); tab_text (t, 0, 2, TAB_LEFT | TAT_TITLE, _("Contrast")); - tab_joint_text (t, 2, 0, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, + tab_joint_text (t, 2, 0, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, var_to_string(indep_var)); group_values = hsh_sort(global_group_hash); - for (count = 0 ; - count < hsh_count(global_group_hash) ; + for (count = 0 ; + count < hsh_count(global_group_hash) ; ++count) { int i; + struct string vstr; group_value = group_values[count]; - tab_text (t, count + 2, 1, TAB_CENTER | TAT_TITLE, - value_to_string(group_value, indep_var)); + ds_init_empty (&vstr); - for (i = 0 ; i < cmd.sbc_contrast ; ++i ) + var_append_value_name (indep_var, group_value, &vstr); + + tab_text (t, count + 2, 1, TAB_CENTER | TAT_TITLE, + ds_cstr (&vstr)); + + ds_destroy (&vstr); + + + for (i = 0 ; i < cmd.sbc_contrast ; ++i ) { tab_text(t, 1, i + 2, TAB_CENTER | TAT_PRINTF, "%d", i + 1); - if ( bad_contrast[i] ) + if ( bad_contrast[i] ) tab_text(t, count + 2, i + 2, TAB_RIGHT, "?" ); else - tab_text(t, count + 2, i + 2, TAB_RIGHT | TAT_PRINTF, "%g", + tab_text(t, count + 2, i + 2, TAB_RIGHT | TAT_PRINTF, "%g", subc_list_double_at(&cmd.dl_contrast[i], count) ); } } - + tab_submit (t); } /* Show the results of the contrast tests */ -static void +static void show_contrast_tests(short *bad_contrast) { size_t v; @@ -662,13 +669,13 @@ show_contrast_tests(short *bad_contrast) tab_dim (t, tab_natural_dimensions); /* Put a frame around the entire box, and vertical lines inside */ - tab_box (t, + tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1); - tab_box (t, + tab_box (t, -1,-1, TAL_0, TAL_0, 0, 0, @@ -678,7 +685,7 @@ show_contrast_tests(short *bad_contrast) tab_vline(t, TAL_2, 3, 0, n_rows - 1); - tab_title (t, 0, _("Contrast Tests")); + tab_title (t, _("Contrast Tests")); tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Contrast")); tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Value of Contrast")); @@ -687,7 +694,7 @@ show_contrast_tests(short *bad_contrast) tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("df")); tab_text (t, 7, 0, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)")); - for ( v = 0 ; v < n_vars ; ++v ) + for ( v = 0 ; v < n_vars ; ++v ) { int i; int lines_per_variable = 2 * cmd.sbc_contrast; @@ -696,7 +703,7 @@ show_contrast_tests(short *bad_contrast) tab_text (t, 0, (v * lines_per_variable) + 1, TAB_LEFT | TAT_TITLE, var_to_string(vars[v])); - for ( i = 0 ; i < cmd.sbc_contrast ; ++i ) + for ( i = 0 ; i < cmd.sbc_contrast ; ++i ) { int ci; double contrast_value = 0.0; @@ -712,7 +719,7 @@ show_contrast_tests(short *bad_contrast) double sec_vneq=0.0; - /* Note: The calculation of the degrees of freedom in the + /* Note: The calculation of the degrees of freedom in the "variances not equal" case is painfull!! The following formula may help to understand it: \frac{\left(\sum_{i=1}^k{c_i^2\frac{s_i^2}{n_i}}\right)^2} @@ -725,18 +732,18 @@ show_contrast_tests(short *bad_contrast) double df_denominator = 0.0; double df_numerator = 0.0; - if ( i == 0 ) + if ( i == 0 ) { - tab_text (t, 1, (v * lines_per_variable) + i + 1, + tab_text (t, 1, (v * lines_per_variable) + i + 1, TAB_LEFT | TAT_TITLE, _("Assume equal variances")); - tab_text (t, 1, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, - TAB_LEFT | TAT_TITLE, + tab_text (t, 1, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, + TAB_LEFT | TAT_TITLE, _("Does not assume equal")); } - tab_text (t, 2, (v * lines_per_variable) + i + 1, + tab_text (t, 2, (v * lines_per_variable) + i + 1, TAB_CENTER | TAT_TITLE | TAT_PRINTF, "%d",i+1); @@ -744,11 +751,11 @@ show_contrast_tests(short *bad_contrast) TAB_CENTER | TAT_TITLE | TAT_PRINTF, "%d",i+1); - if ( bad_contrast[i]) + if ( bad_contrast[i]) continue; group_stat_array = hsh_sort(group_hash); - + for (ci = 0 ; ci < hsh_count(group_hash) ; ++ci) { const double coef = subc_list_double_at(&cmd.dl_contrast[i], ci); @@ -758,7 +765,7 @@ show_contrast_tests(short *bad_contrast) contrast_value += coef * gs->mean; - coef_msq += (coef * coef) / gs->n ; + coef_msq += (coef * coef) / gs->n ; sec_vneq += (coef * coef) * (gs->std_dev * gs->std_dev ) /gs->n ; @@ -769,17 +776,17 @@ show_contrast_tests(short *bad_contrast) df_numerator = pow2(df_numerator); - tab_float (t, 3, (v * lines_per_variable) + i + 1, + tab_float (t, 3, (v * lines_per_variable) + i + 1, TAB_RIGHT, contrast_value, 8,2); - tab_float (t, 3, (v * lines_per_variable) + i + 1 + + tab_float (t, 3, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, TAB_RIGHT, contrast_value, 8,2); std_error_contrast = sqrt(grp_data->mse * coef_msq); /* Std. Error */ - tab_float (t, 4, (v * lines_per_variable) + i + 1, + tab_float (t, 4, (v * lines_per_variable) + i + 1, TAB_RIGHT, std_error_contrast, 8,3); @@ -787,20 +794,20 @@ show_contrast_tests(short *bad_contrast) /* T Statistic */ - tab_float (t, 5, (v * lines_per_variable) + i + 1, + tab_float (t, 5, (v * lines_per_variable) + i + 1, TAB_RIGHT, T, 8,3); df = grp_data->ugs.n - grp_data->n_groups; /* Degrees of Freedom */ - tab_float (t, 6, (v * lines_per_variable) + i + 1, + tab_float (t, 6, (v * lines_per_variable) + i + 1, TAB_RIGHT, df, 8,0); /* Significance TWO TAILED !!*/ - tab_float (t, 7, (v * lines_per_variable) + i + 1, + tab_float (t, 7, (v * lines_per_variable) + i + 1, TAB_RIGHT, 2 * gsl_cdf_tdist_Q(T,df), 8,3); @@ -808,23 +815,23 @@ show_contrast_tests(short *bad_contrast) /* Now for the Variances NOT Equal case */ /* Std. Error */ - tab_float (t, 4, - (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, + tab_float (t, 4, + (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, TAB_RIGHT, sec_vneq, 8,3); T = contrast_value / sec_vneq; - tab_float (t, 5, - (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, + tab_float (t, 5, + (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, TAB_RIGHT, T, 8,3); df = df_numerator / df_denominator; - tab_float (t, 6, - (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, + tab_float (t, 6, + (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, TAB_RIGHT, df, 8,3); @@ -837,7 +844,7 @@ show_contrast_tests(short *bad_contrast) } - if ( v > 0 ) + if ( v > 0 ) tab_hline(t, TAL_1, 0, n_cols - 1, (v * lines_per_variable) + 1); } @@ -855,26 +862,26 @@ static void precalc ( struct cmd_oneway *cmd UNUSED ); /* Pre calculations */ -static void +static void precalc ( struct cmd_oneway *cmd UNUSED ) { size_t i=0; - for(i=0; i< n_vars ; ++i) + for(i=0; i< n_vars ; ++i) { struct group_proc *gp = group_proc_get (vars[i]); struct group_statistics *totals = &gp->ugs; - + /* Create a hash for each of the dependent variables. - The hash contains a group_statistics structure, + The hash contains a group_statistics structure, and is keyed by value of the independent variable */ - gp->group_hash = - hsh_create(4, + gp->group_hash = + hsh_create(4, (hsh_compare_func *) compare_group, (hsh_hash_func *) hash_group, (hsh_free_func *) free_group, - (void *) indep_var->width ); + (void *) var_get_width (indep_var) ); totals->sum=0; @@ -886,61 +893,67 @@ precalc ( struct cmd_oneway *cmd UNUSED ) } } +static void +free_value (void *value_, const void *aux UNUSED) +{ + union value *value = value_; + free (value); +} -static bool -run_oneway(const struct casefile *cf, void *cmd_) +static void +run_oneway (struct cmd_oneway *cmd, + struct casereader *input, + const struct dataset *ds) { - struct casereader *r; + struct taint *taint; + struct dictionary *dict = dataset_dict (ds); + enum mv_class exclude; + struct casereader *reader; struct ccase c; - struct cmd_oneway *cmd = (struct cmd_oneway *) cmd_; + if (!casereader_peek (input, 0, &c)) + { + casereader_destroy (input); + return; + } + output_split_file_values (ds, &c); + case_destroy (&c); + + taint = taint_clone (casereader_get_taint (input)); - global_group_hash = hsh_create(4, + global_group_hash = hsh_create(4, (hsh_compare_func *) compare_values, (hsh_hash_func *) hash_value, - 0, - (void *) indep_var->width ); + free_value, + (void *) var_get_width (indep_var) ); + precalc(cmd); - for(r = casefile_get_reader (cf); - casereader_read (r, &c) ; - case_destroy (&c)) + exclude = cmd->incl != ONEWAY_INCLUDE ? MV_ANY : MV_SYSTEM; + input = casereader_create_filter_missing (input, &indep_var, 1, + exclude, NULL); + if (cmd->miss == ONEWAY_LISTWISE) + input = casereader_create_filter_missing (input, vars, n_vars, + exclude, NULL); + input = casereader_create_filter_weight (input, dict, NULL, NULL); + + reader = casereader_clone (input); + for (; casereader_read (reader, &c); case_destroy (&c)) { size_t i; - const double weight = - dict_get_case_weight(default_dict,&c,&bad_weight_warn); - - const union value *indep_val = case_data (&c, indep_var->fv); - - /* Deal with missing values */ - if ( value_is_missing(&indep_var->miss, indep_val) ) - continue; - - /* Skip the entire case if /MISSING=LISTWISE is set */ - if ( cmd->miss == ONEWAY_LISTWISE ) - { - for(i = 0; i < n_vars ; ++i) - { - const struct variable *v = vars[i]; - const union value *val = case_data (&c, v->fv); - - if (value_is_missing(&v->miss, val) ) - break; - } - if ( i != n_vars ) - continue; + const double weight = dict_get_case_weight (dict, &c, NULL); - } - - - hsh_insert ( global_group_hash, (void *) indep_val ); + const union value *indep_val = case_data (&c, indep_var); + void **p = hsh_probe (global_group_hash, indep_val); + if (*p == NULL) + *p = value_dup (indep_val, var_get_width (indep_var)); - for ( i = 0 ; i < n_vars ; ++i ) + for ( i = 0 ; i < n_vars ; ++i ) { const struct variable *v = vars[i]; - const union value *val = case_data (&c, v->fv); + const union value *val = case_data (&c, v); struct group_proc *gp = group_proc_get (vars[i]); struct hsh_table *group_hash = gp->group_hash; @@ -949,7 +962,7 @@ run_oneway(const struct casefile *cf, void *cmd_) gs = hsh_find(group_hash, (void *) indep_val ); - if ( ! gs ) + if ( ! gs ) { gs = xmalloc (sizeof *gs); gs->id = *indep_val; @@ -962,8 +975,8 @@ run_oneway(const struct casefile *cf, void *cmd_) hsh_insert ( group_hash, (void *) gs ); } - - if (! value_is_missing(&v->miss, val) ) + + if (!var_is_value_missing (v, val, exclude)) { struct group_statistics *totals = &gp->ugs; @@ -971,54 +984,53 @@ run_oneway(const struct casefile *cf, void *cmd_) totals->sum+=weight * val->f; totals->ssq+=weight * val->f * val->f; - if ( val->f * weight < totals->minimum ) + if ( val->f * weight < totals->minimum ) totals->minimum = val->f * weight; - if ( val->f * weight > totals->maximum ) + if ( val->f * weight > totals->maximum ) totals->maximum = val->f * weight; gs->n+=weight; gs->sum+=weight * val->f; gs->ssq+=weight * val->f * val->f; - if ( val->f * weight < gs->minimum ) + if ( val->f * weight < gs->minimum ) gs->minimum = val->f * weight; - if ( val->f * weight > gs->maximum ) + if ( val->f * weight > gs->maximum ) gs->maximum = val->f * weight; } gp->n_groups = hsh_count ( group_hash ); } - + } - casereader_destroy (r); + casereader_destroy (reader); postcalc(cmd); - - if ( stat_tables & STAT_HOMO ) - levene(cf, indep_var, n_vars, vars, - (cmd->miss == ONEWAY_LISTWISE) ? LEV_LISTWISE : LEV_ANALYSIS , - value_is_missing); - ostensible_number_of_groups = hsh_count (global_group_hash); + if ( stat_tables & STAT_HOMO ) + levene (dict, casereader_clone (input), indep_var, n_vars, vars, exclude); + casereader_destroy (input); - output_oneway(); + ostensible_number_of_groups = hsh_count (global_group_hash); - return true; + if (!taint_has_tainted_successor (taint)) + output_oneway(); + taint_destroy (taint); } /* Post calculations for the ONEWAY command */ -void +void postcalc ( struct cmd_oneway *cmd UNUSED ) { size_t i=0; - for(i = 0; i < n_vars ; ++i) + for(i = 0; i < n_vars ; ++i) { struct group_proc *gp = group_proc_get (vars[i]); struct hsh_table *group_hash = gp->group_hash; @@ -1027,8 +1039,8 @@ postcalc ( struct cmd_oneway *cmd UNUSED ) struct hsh_iterator g; struct group_statistics *gs; - for (gs = hsh_first (group_hash,&g); - gs != 0; + for (gs = hsh_first (group_hash,&g); + gs != 0; gs = hsh_next(group_hash,&g)) { gs->mean=gs->sum / gs->n; @@ -1055,6 +1067,12 @@ postcalc ( struct cmd_oneway *cmd UNUSED ) ) ; totals->se_mean = totals->std_dev / sqrt(totals->n); - + } } + +/* + Local Variables: + mode: c + End: +*/