-/* PSPP - One way ANOVA. -*-c-*-
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 1997-9, 2000, 2007 Free Software Foundation, Inc.
-Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
-Author: John Darrington 2004
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License as
-published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301, USA. */
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include <stdlib.h>
#include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/value-labels.h>
#include <language/command.h>
#include <language/dictionary/split-file.h>
#include <language/lexer/lexer.h>
-#include <libpspp/alloc.h>
#include <libpspp/compiler.h>
#include <libpspp/hash.h>
-#include <libpspp/magic.h>
-#include <libpspp/message.h>
#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/str.h>
+#include <libpspp/taint.h>
#include <math/group-proc.h>
#include <math/group.h>
#include <math/levene.h>
#include <output/manager.h>
#include <output/table.h>
#include "sort-criteria.h"
+#include <data/format.h>
+
+#include "xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
/* (declarations) */
/* (functions) */
-
-
-static bool bad_weight_warn = true;
-
-
static struct cmd_oneway cmd;
/* The independent variable */
-static struct variable *indep_var;
+static const struct variable *indep_var;
/* Number of dependent variables */
static size_t n_vars;
/* The dependent variables */
-static struct variable **vars;
+static const struct variable **vars;
/* A hash table containing all the distinct values of the independent
variables */
static struct hsh_table *global_group_hash ;
-/* The number of distinct values of the independent variable, when all
+/* The number of distinct values of the independent variable, when all
missing values are disregarded */
-static int ostensible_number_of_groups=-1;
-
-
-/* Function to use for testing for missing values */
-static is_missing_func *value_is_missing;
+static int ostensible_number_of_groups = -1;
-static bool run_oneway(const struct ccase *first,
- const struct casefile *cf, void *_mode);
+static void run_oneway (struct cmd_oneway *, struct casereader *,
+ const struct dataset *);
/* Routines to show the output tables */
static void show_anova_table(void);
-static void show_descriptives(void);
+static void show_descriptives (const struct dictionary *dict);
static void show_homogeneity(void);
static void show_contrast_coeffs(short *);
static enum stat_table_t stat_tables ;
-void output_oneway(void);
+static void output_oneway (const struct dictionary *dict);
int
-cmd_oneway(void)
+cmd_oneway (struct lexer *lexer, struct dataset *ds)
{
+ struct casegrouper *grouper;
+ struct casereader *group;
int i;
bool ok;
- if ( !parse_oneway(&cmd, NULL) )
+ if ( !parse_oneway (lexer, ds, &cmd, NULL) )
return CMD_FAILURE;
- /* If /MISSING=INCLUDE is set, then user missing values are ignored */
- if (cmd.incl == ONEWAY_INCLUDE )
- value_is_missing = mv_is_value_system_missing;
- else
- value_is_missing = mv_is_value_missing;
-
/* What statistics were requested */
- if ( cmd.sbc_statistics )
+ if ( cmd.sbc_statistics )
{
- for (i = 0 ; i < ONEWAY_ST_count ; ++i )
+ for (i = 0 ; i < ONEWAY_ST_count ; ++i )
{
if ( ! cmd.a_statistics[i] ) continue;
}
}
- ok = multipass_procedure_with_splits (current_dataset, run_oneway, &cmd);
+ /* Data pass. FIXME: error handling. */
+ grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+ while (casegrouper_get_next_group (grouper, &group))
+ run_oneway (&cmd, group, ds);
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
free (vars);
free_oneway (&cmd);
}
-void
-output_oneway(void)
+static void
+output_oneway (const struct dictionary *dict)
{
size_t i;
- short *bad_contrast ;
+ short *bad_contrast ;
bad_contrast = xnmalloc (cmd.sbc_contrast, sizeof *bad_contrast);
/* Check the sanity of the given contrast values */
- for (i = 0 ; i < cmd.sbc_contrast ; ++i )
+ for (i = 0 ; i < cmd.sbc_contrast ; ++i )
{
int j;
double sum = 0;
bad_contrast[i] = 0;
- if ( subc_list_double_count(&cmd.dl_contrast[i]) !=
+ if ( subc_list_double_count(&cmd.dl_contrast[i]) !=
ostensible_number_of_groups )
{
- msg(SW,
+ msg(SW,
_("Number of contrast coefficients must equal the number of groups"));
bad_contrast[i] = 1;
continue;
for (j=0; j < ostensible_number_of_groups ; ++j )
sum += subc_list_double_at(&cmd.dl_contrast[i],j);
- if ( sum != 0.0 )
- msg(SW,_("Coefficients for contrast %d do not total zero"),i + 1);
+ if ( sum != 0.0 )
+ msg(SW,_("Coefficients for contrast %zu do not total zero"), i + 1);
}
- if ( stat_tables & STAT_DESC )
- show_descriptives();
+ if ( stat_tables & STAT_DESC )
+ show_descriptives (dict);
if ( stat_tables & STAT_HOMO )
show_homogeneity();
show_anova_table();
-
+
if (cmd.sbc_contrast )
{
show_contrast_coeffs(bad_contrast);
free(bad_contrast);
/* Clean up */
- for (i = 0 ; i < n_vars ; ++i )
+ for (i = 0 ; i < n_vars ; ++i )
{
struct hsh_table *group_hash = group_proc_get (vars[i])->group_hash;
/* Parser for the variables sub command */
static int
-oneway_custom_variables(struct cmd_oneway *cmd UNUSED, void *aux UNUSED)
+oneway_custom_variables (struct lexer *lexer,
+ struct dataset *ds, struct cmd_oneway *cmd UNUSED,
+ void *aux UNUSED)
{
+ struct dictionary *dict = dataset_dict (ds);
- lex_match('=');
+ lex_match (lexer, '=');
- if ((token != T_ID || dict_lookup_var (dataset_dict (current_dataset), tokid) == NULL)
- && token != T_ALL)
+ if ((lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
+ && lex_token (lexer) != T_ALL)
return 2;
-
- if (!parse_variables (dataset_dict (current_dataset), &vars, &n_vars,
- PV_DUPLICATE
+ if (!parse_variables_const (lexer, dict, &vars, &n_vars,
+ PV_DUPLICATE
| PV_NUMERIC | PV_NO_SCRATCH) )
{
free (vars);
assert(n_vars);
- if ( ! lex_match(T_BY))
+ if ( ! lex_match (lexer, T_BY))
return 2;
+ indep_var = parse_variable (lexer, dict);
- indep_var = parse_variable();
-
- if ( !indep_var )
+ if ( !indep_var )
{
- msg(SE,_("`%s' is not a variable name"),tokid);
+ msg(SE,_("`%s' is not a variable name"),lex_tokid (lexer));
return 0;
}
-
return 1;
}
/* Show the ANOVA table */
-static void
+static void
show_anova_table(void)
{
size_t i;
tab_dim (t, tab_natural_dimensions);
- tab_box (t,
+ tab_box (t,
TAL_2, TAL_2,
-1, TAL_1,
0, 0,
tab_hline (t, TAL_2, 0, n_cols - 1, 1 );
tab_vline (t, TAL_2, 2, 0, n_rows - 1);
tab_vline (t, TAL_0, 1, 0, 0);
-
+
tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Sum of Squares"));
tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df"));
tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Mean Square"));
tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance"));
- for ( i=0 ; i < n_vars ; ++i )
+ for ( i=0 ; i < n_vars ; ++i )
{
struct group_statistics *totals = &group_proc_get (vars[i])->ugs;
struct hsh_table *group_hash = group_proc_get (vars[i])->group_hash;
double ssa=0;
const char *s = var_to_string(vars[i]);
- for (gs = hsh_first (group_hash,&g);
- gs != 0;
+ for (gs = hsh_first (group_hash,&g);
+ gs != 0;
gs = hsh_next(group_hash,&g))
{
ssa += (gs->sum * gs->sum)/gs->n;
}
-
+
ssa -= ( totals->sum * totals->sum ) / totals->n ;
tab_text (t, 0, i * 3 + 1, TAB_LEFT | TAT_TITLE, s);
tab_text (t, 1, i * 3 + 1, TAB_LEFT | TAT_TITLE, _("Between Groups"));
tab_text (t, 1, i * 3 + 2, TAB_LEFT | TAT_TITLE, _("Within Groups"));
tab_text (t, 1, i * 3 + 3, TAB_LEFT | TAT_TITLE, _("Total"));
-
+
if (i > 0)
tab_hline(t, TAL_1, 0, n_cols - 1 , i * 3 + 1);
const double df1 = gp->n_groups - 1;
const double df2 = totals->n - gp->n_groups ;
const double msa = ssa / df1;
-
+
gp->mse = (sst - ssa) / df2;
-
-
+
+
/* Sums of Squares */
- tab_float (t, 2, i * 3 + 1, 0, ssa, 10, 2);
- tab_float (t, 2, i * 3 + 3, 0, sst, 10, 2);
- tab_float (t, 2, i * 3 + 2, 0, sst - ssa, 10, 2);
+ tab_double (t, 2, i * 3 + 1, 0, ssa, NULL);
+ tab_double (t, 2, i * 3 + 3, 0, sst, NULL);
+ tab_double (t, 2, i * 3 + 2, 0, sst - ssa, NULL);
/* Degrees of freedom */
- tab_float (t, 3, i * 3 + 1, 0, df1, 4, 0);
- tab_float (t, 3, i * 3 + 2, 0, df2, 4, 0);
- tab_float (t, 3, i * 3 + 3, 0, totals->n - 1, 4, 0);
+ tab_fixed (t, 3, i * 3 + 1, 0, df1, 4, 0);
+ tab_fixed (t, 3, i * 3 + 2, 0, df2, 4, 0);
+ tab_fixed (t, 3, i * 3 + 3, 0, totals->n - 1, 4, 0);
/* Mean Squares */
- tab_float (t, 4, i * 3 + 1, TAB_RIGHT, msa, 8, 3);
- tab_float (t, 4, i * 3 + 2, TAB_RIGHT, gp->mse, 8, 3);
-
+ tab_double (t, 4, i * 3 + 1, TAB_RIGHT, msa, NULL);
+ tab_double (t, 4, i * 3 + 2, TAB_RIGHT, gp->mse, NULL);
- {
- const double F = msa/gp->mse ;
+
+ {
+ const double F = msa / gp->mse ;
/* The F value */
- tab_float (t, 5, i * 3 + 1, 0, F, 8, 3);
-
+ tab_double (t, 5, i * 3 + 1, 0, F, NULL);
+
/* The significance */
- tab_float (t, 6, i * 3 + 1, 0, gsl_cdf_fdist_Q(F,df1,df2), 8, 3);
+ tab_double (t, 6, i * 3 + 1, 0, gsl_cdf_fdist_Q (F, df1,df2), NULL);
}
}
tab_title (t, _("ANOVA"));
tab_submit (t);
-
-
}
+
/* Show the descriptives table */
-static void
-show_descriptives(void)
+static void
+show_descriptives (const struct dictionary *dict)
{
size_t v;
- int n_cols =10;
+ int n_cols = 10;
struct tab_table *t;
int row;
- const double confidence=0.95;
+ const double confidence = 0.95;
const double q = (1.0 - confidence) / 2.0;
-
- int n_rows = 2 ;
+ const struct variable *wv = dict_get_weight (dict);
+ const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
+
+ int n_rows = 2 ;
- for ( v = 0 ; v < n_vars ; ++v )
+ for ( v = 0 ; v < n_vars ; ++v )
n_rows += group_proc_get (vars[v])->n_groups + 1;
t = tab_create (n_cols,n_rows,0);
/* Put a frame around the entire box, and vertical lines inside */
- tab_box (t,
+ tab_box (t,
TAL_2, TAL_2,
-1, TAL_1,
0, 0,
/* Underline headers */
tab_hline (t, TAL_2, 0, n_cols - 1, 2 );
tab_vline (t, TAL_2, 2, 0, n_rows - 1);
-
+
tab_text (t, 2, 1, TAB_CENTER | TAT_TITLE, _("N"));
tab_text (t, 3, 1, TAB_CENTER | TAT_TITLE, _("Mean"));
tab_text (t, 4, 1, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
row = 2;
- for ( v=0 ; v < n_vars ; ++v )
+ for ( v=0 ; v < n_vars ; ++v )
{
double T;
double std_error;
-
+
struct group_proc *gp = group_proc_get (vars[v]);
struct group_statistics *gs;
- struct group_statistics *totals = &gp->ugs;
+ struct group_statistics *totals = &gp->ugs;
- const char *s = var_to_string(vars[v]);
+ const char *s = var_to_string (vars[v]);
+ const struct fmt_spec *fmt = var_get_print_format (vars[v]);
struct group_statistics *const *gs_array =
(struct group_statistics *const *) hsh_sort(gp->group_hash);
int count = 0;
tab_text (t, 0, row, TAB_LEFT | TAT_TITLE, s);
- if ( v > 0)
+ if ( v > 0)
tab_hline(t, TAL_1, 0, n_cols - 1 , row);
- for (count = 0 ; count < hsh_count(gp->group_hash) ; ++count)
+ for (count = 0; count < hsh_count (gp->group_hash); ++count)
{
+ struct string vstr;
+ ds_init_empty (&vstr);
gs = gs_array[count];
- tab_text (t, 1, row + count,
- TAB_LEFT | TAT_TITLE ,value_to_string(&gs->id,indep_var));
+ var_append_value_name (indep_var, &gs->id, &vstr);
+
+ tab_text (t, 1, row + count,
+ TAB_LEFT | TAT_TITLE,
+ ds_cstr (&vstr));
+
+ ds_destroy (&vstr);
/* Now fill in the numbers ... */
- tab_float (t, 2, row + count, 0, gs->n, 8,0);
+ tab_fixed (t, 2, row + count, 0, gs->n, 8, 0);
- tab_float (t, 3, row + count, 0, gs->mean,8,2);
-
- tab_float (t, 4, row + count, 0, gs->std_dev,8,2);
+ tab_double (t, 3, row + count, 0, gs->mean, NULL);
- std_error = gs->std_dev/sqrt(gs->n) ;
- tab_float (t, 5, row + count, 0,
- std_error, 8,2);
+ tab_double (t, 4, row + count, 0, gs->std_dev, NULL);
+
+ std_error = gs->std_dev / sqrt (gs->n) ;
+ tab_double (t, 5, row + count, 0,
+ std_error, NULL);
/* Now the confidence interval */
-
- T = gsl_cdf_tdist_Qinv(q,gs->n - 1);
- tab_float(t, 6, row + count, 0,
- gs->mean - T * std_error, 8, 2);
+ T = gsl_cdf_tdist_Qinv (q, gs->n - 1);
- tab_float(t, 7, row + count, 0,
- gs->mean + T * std_error, 8, 2);
+ tab_double (t, 6, row + count, 0,
+ gs->mean - T * std_error, NULL);
- /* Min and Max */
+ tab_double (t, 7, row + count, 0,
+ gs->mean + T * std_error, NULL);
- tab_float(t, 8, row + count, 0, gs->minimum, 8, 2);
- tab_float(t, 9, row + count, 0, gs->maximum, 8, 2);
+ /* Min and Max */
+ tab_double (t, 8, row + count, 0, gs->minimum, fmt);
+ tab_double (t, 9, row + count, 0, gs->maximum, fmt);
}
- tab_text (t, 1, row + count,
+ tab_text (t, 1, row + count,
TAB_LEFT | TAT_TITLE ,_("Total"));
- tab_float (t, 2, row + count, 0, totals->n, 8,0);
+ tab_double (t, 2, row + count, 0, totals->n, wfmt);
- tab_float (t, 3, row + count, 0, totals->mean, 8,2);
+ tab_double (t, 3, row + count, 0, totals->mean, NULL);
- tab_float (t, 4, row + count, 0, totals->std_dev,8,2);
+ tab_double (t, 4, row + count, 0, totals->std_dev, NULL);
- std_error = totals->std_dev/sqrt(totals->n) ;
+ std_error = totals->std_dev / sqrt (totals->n) ;
- tab_float (t, 5, row + count, 0, std_error, 8,2);
+ tab_double (t, 5, row + count, 0, std_error, NULL);
/* Now the confidence interval */
-
- T = gsl_cdf_tdist_Qinv(q,totals->n - 1);
- tab_float(t, 6, row + count, 0,
- totals->mean - T * std_error, 8, 2);
+ T = gsl_cdf_tdist_Qinv (q, totals->n - 1);
+
+ tab_double (t, 6, row + count, 0,
+ totals->mean - T * std_error, NULL);
- tab_float(t, 7, row + count, 0,
- totals->mean + T * std_error, 8, 2);
+ tab_double (t, 7, row + count, 0,
+ totals->mean + T * std_error, NULL);
/* Min and Max */
- tab_float(t, 8, row + count, 0, totals->minimum, 8, 2);
- tab_float(t, 9, row + count, 0, totals->maximum, 8, 2);
+ tab_double (t, 8, row + count, 0, totals->minimum, fmt);
+ tab_double (t, 9, row + count, 0, totals->maximum, fmt);
row += gp->n_groups + 1;
}
tab_submit (t);
-
-
}
/* Show the homogeneity table */
-static void
+static void
show_homogeneity(void)
{
size_t v;
tab_dim (t, tab_natural_dimensions);
/* Put a frame around the entire box, and vertical lines inside */
- tab_box (t,
+ tab_box (t,
TAL_2, TAL_2,
-1, TAL_1,
0, 0,
tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("df1"));
tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df2"));
tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Significance"));
-
+
tab_title (t, _("Test of Homogeneity of Variances"));
- for ( v=0 ; v < n_vars ; ++v )
+ for ( v=0 ; v < n_vars ; ++v )
{
double F;
const struct variable *var = vars[v];
tab_text (t, 0, v + 1, TAB_LEFT | TAT_TITLE, s);
F = gp->levene;
- tab_float (t, 1, v + 1, TAB_RIGHT, F, 8,3);
- tab_float (t, 2, v + 1, TAB_RIGHT, df1 ,8,0);
- tab_float (t, 3, v + 1, TAB_RIGHT, df2 ,8,0);
+ tab_double (t, 1, v + 1, TAB_RIGHT, F, NULL);
+ tab_fixed (t, 2, v + 1, TAB_RIGHT, df1, 8, 0);
+ tab_fixed (t, 3, v + 1, TAB_RIGHT, df2, 8, 0);
/* Now the significance */
- tab_float (t, 4, v + 1, TAB_RIGHT,gsl_cdf_fdist_Q(F,df1,df2), 8, 3);
+ tab_double (t, 4, v + 1, TAB_RIGHT,gsl_cdf_fdist_Q (F, df1, df2), NULL);
}
tab_submit (t);
-
-
}
/* Show the contrast coefficients table */
-static void
-show_contrast_coeffs(short *bad_contrast)
+static void
+show_contrast_coeffs (short *bad_contrast)
{
int n_cols = 2 + ostensible_number_of_groups;
int n_rows = 2 + cmd.sbc_contrast;
union value *group_value;
- int count = 0 ;
+ int count = 0 ;
void *const *group_values ;
struct tab_table *t;
tab_dim (t, tab_natural_dimensions);
/* Put a frame around the entire box, and vertical lines inside */
- tab_box (t,
+ tab_box (t,
TAL_2, TAL_2,
-1, TAL_1,
0, 0,
n_cols - 1, n_rows - 1);
- tab_box (t,
+ tab_box (t,
-1,-1,
TAL_0, TAL_0,
2, 0,
tab_text (t, 0, 2, TAB_LEFT | TAT_TITLE, _("Contrast"));
- tab_joint_text (t, 2, 0, n_cols - 1, 0, TAB_CENTER | TAT_TITLE,
+ tab_joint_text (t, 2, 0, n_cols - 1, 0, TAB_CENTER | TAT_TITLE,
var_to_string(indep_var));
group_values = hsh_sort(global_group_hash);
- for (count = 0 ;
- count < hsh_count(global_group_hash) ;
+ for (count = 0 ;
+ count < hsh_count(global_group_hash) ;
++count)
{
int i;
+ struct string vstr;
group_value = group_values[count];
- tab_text (t, count + 2, 1, TAB_CENTER | TAT_TITLE,
- value_to_string(group_value, indep_var));
+ ds_init_empty (&vstr);
- for (i = 0 ; i < cmd.sbc_contrast ; ++i )
+ var_append_value_name (indep_var, group_value, &vstr);
+
+ tab_text (t, count + 2, 1, TAB_CENTER | TAT_TITLE,
+ ds_cstr (&vstr));
+
+ ds_destroy (&vstr);
+
+
+ for (i = 0 ; i < cmd.sbc_contrast ; ++i )
{
tab_text(t, 1, i + 2, TAB_CENTER | TAT_PRINTF, "%d", i + 1);
- if ( bad_contrast[i] )
+ if ( bad_contrast[i] )
tab_text(t, count + 2, i + 2, TAB_RIGHT, "?" );
else
- tab_text(t, count + 2, i + 2, TAB_RIGHT | TAT_PRINTF, "%g",
+ tab_text(t, count + 2, i + 2, TAB_RIGHT | TAT_PRINTF, "%g",
subc_list_double_at(&cmd.dl_contrast[i], count)
);
}
}
-
+
tab_submit (t);
}
/* Show the results of the contrast tests */
-static void
+static void
show_contrast_tests(short *bad_contrast)
{
size_t v;
tab_dim (t, tab_natural_dimensions);
/* Put a frame around the entire box, and vertical lines inside */
- tab_box (t,
+ tab_box (t,
TAL_2, TAL_2,
-1, TAL_1,
0, 0,
n_cols - 1, n_rows - 1);
- tab_box (t,
+ tab_box (t,
-1,-1,
TAL_0, TAL_0,
0, 0,
tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("df"));
tab_text (t, 7, 0, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
- for ( v = 0 ; v < n_vars ; ++v )
+ for ( v = 0 ; v < n_vars ; ++v )
{
int i;
int lines_per_variable = 2 * cmd.sbc_contrast;
tab_text (t, 0, (v * lines_per_variable) + 1, TAB_LEFT | TAT_TITLE,
var_to_string(vars[v]));
- for ( i = 0 ; i < cmd.sbc_contrast ; ++i )
+ for ( i = 0 ; i < cmd.sbc_contrast ; ++i )
{
int ci;
double contrast_value = 0.0;
double sec_vneq=0.0;
- /* Note: The calculation of the degrees of freedom in the
+ /* Note: The calculation of the degrees of freedom in the
"variances not equal" case is painfull!!
The following formula may help to understand it:
\frac{\left(\sum_{i=1}^k{c_i^2\frac{s_i^2}{n_i}}\right)^2}
double df_denominator = 0.0;
double df_numerator = 0.0;
- if ( i == 0 )
+ if ( i == 0 )
{
- tab_text (t, 1, (v * lines_per_variable) + i + 1,
+ tab_text (t, 1, (v * lines_per_variable) + i + 1,
TAB_LEFT | TAT_TITLE,
_("Assume equal variances"));
- tab_text (t, 1, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
- TAB_LEFT | TAT_TITLE,
+ tab_text (t, 1, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
+ TAB_LEFT | TAT_TITLE,
_("Does not assume equal"));
}
- tab_text (t, 2, (v * lines_per_variable) + i + 1,
+ tab_text (t, 2, (v * lines_per_variable) + i + 1,
TAB_CENTER | TAT_TITLE | TAT_PRINTF, "%d",i+1);
TAB_CENTER | TAT_TITLE | TAT_PRINTF, "%d",i+1);
- if ( bad_contrast[i])
+ if ( bad_contrast[i])
continue;
group_stat_array = hsh_sort(group_hash);
-
+
for (ci = 0 ; ci < hsh_count(group_hash) ; ++ci)
{
const double coef = subc_list_double_at(&cmd.dl_contrast[i], ci);
contrast_value += coef * gs->mean;
- coef_msq += (coef * coef) / gs->n ;
+ coef_msq += (coef * coef) / gs->n ;
sec_vneq += (coef * coef) * (gs->std_dev * gs->std_dev ) /gs->n ;
}
sec_vneq = sqrt(sec_vneq);
- df_numerator = pow2(df_numerator);
+ df_numerator = pow2 (df_numerator);
- tab_float (t, 3, (v * lines_per_variable) + i + 1,
- TAB_RIGHT, contrast_value, 8,2);
+ tab_double (t, 3, (v * lines_per_variable) + i + 1,
+ TAB_RIGHT, contrast_value, NULL);
- tab_float (t, 3, (v * lines_per_variable) + i + 1 +
+ tab_double (t, 3, (v * lines_per_variable) + i + 1 +
cmd.sbc_contrast,
- TAB_RIGHT, contrast_value, 8,2);
+ TAB_RIGHT, contrast_value, NULL);
- std_error_contrast = sqrt(grp_data->mse * coef_msq);
+ std_error_contrast = sqrt (grp_data->mse * coef_msq);
/* Std. Error */
- tab_float (t, 4, (v * lines_per_variable) + i + 1,
+ tab_double (t, 4, (v * lines_per_variable) + i + 1,
TAB_RIGHT, std_error_contrast,
- 8,3);
+ NULL);
T = fabs(contrast_value / std_error_contrast) ;
/* T Statistic */
- tab_float (t, 5, (v * lines_per_variable) + i + 1,
+ tab_double (t, 5, (v * lines_per_variable) + i + 1,
TAB_RIGHT, T,
- 8,3);
+ NULL);
df = grp_data->ugs.n - grp_data->n_groups;
/* Degrees of Freedom */
- tab_float (t, 6, (v * lines_per_variable) + i + 1,
+ tab_fixed (t, 6, (v * lines_per_variable) + i + 1,
TAB_RIGHT, df,
- 8,0);
+ 8, 0);
/* Significance TWO TAILED !!*/
- tab_float (t, 7, (v * lines_per_variable) + i + 1,
- TAB_RIGHT, 2 * gsl_cdf_tdist_Q(T,df),
- 8,3);
+ tab_double (t, 7, (v * lines_per_variable) + i + 1,
+ TAB_RIGHT, 2 * gsl_cdf_tdist_Q (T, df),
+ NULL);
/* Now for the Variances NOT Equal case */
/* Std. Error */
- tab_float (t, 4,
- (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
+ tab_double (t, 4,
+ (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
TAB_RIGHT, sec_vneq,
- 8,3);
+ NULL);
T = contrast_value / sec_vneq;
- tab_float (t, 5,
- (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
+ tab_double (t, 5,
+ (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
TAB_RIGHT, T,
- 8,3);
+ NULL);
df = df_numerator / df_denominator;
- tab_float (t, 6,
- (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
+ tab_double (t, 6,
+ (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
TAB_RIGHT, df,
- 8,3);
+ NULL);
/* The Significance */
- tab_float (t, 7, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
- TAB_RIGHT, 2 * gsl_cdf_tdist_Q(T,df),
- 8,3);
+ tab_double (t, 7, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
+ TAB_RIGHT, 2 * gsl_cdf_tdist_Q (T,df),
+ NULL);
}
- if ( v > 0 )
+ if ( v > 0 )
tab_hline(t, TAL_1, 0, n_cols - 1, (v * lines_per_variable) + 1);
}
tab_submit (t);
-
}
/* Pre calculations */
-static void
+static void
precalc ( struct cmd_oneway *cmd UNUSED )
{
size_t i=0;
- for(i=0; i< n_vars ; ++i)
+ for(i=0; i< n_vars ; ++i)
{
struct group_proc *gp = group_proc_get (vars[i]);
struct group_statistics *totals = &gp->ugs;
-
+
/* Create a hash for each of the dependent variables.
- The hash contains a group_statistics structure,
+ The hash contains a group_statistics structure,
and is keyed by value of the independent variable */
- gp->group_hash =
- hsh_create(4,
+ gp->group_hash =
+ hsh_create(4,
(hsh_compare_func *) compare_group,
(hsh_hash_func *) hash_group,
(hsh_free_func *) free_group,
- (void *) indep_var->width );
+ (void *) var_get_width (indep_var) );
totals->sum=0;
}
}
+static void
+free_value (void *value_, const void *aux UNUSED)
+{
+ union value *value = value_;
+ free (value);
+}
-static bool
-run_oneway(const struct ccase *first, const struct casefile *cf, void *cmd_)
+static void
+run_oneway (struct cmd_oneway *cmd,
+ struct casereader *input,
+ const struct dataset *ds)
{
- struct casereader *r;
+ struct taint *taint;
+ struct dictionary *dict = dataset_dict (ds);
+ enum mv_class exclude;
+ struct casereader *reader;
struct ccase c;
- struct cmd_oneway *cmd = (struct cmd_oneway *) cmd_;
+ if (!casereader_peek (input, 0, &c))
+ {
+ casereader_destroy (input);
+ return;
+ }
+ output_split_file_values (ds, &c);
+ case_destroy (&c);
- output_split_file_values (first);
+ taint = taint_clone (casereader_get_taint (input));
- global_group_hash = hsh_create(4,
+ global_group_hash = hsh_create(4,
(hsh_compare_func *) compare_values,
(hsh_hash_func *) hash_value,
- 0,
- (void *) indep_var->width );
+ free_value,
+ (void *) var_get_width (indep_var) );
+
precalc(cmd);
- for(r = casefile_get_reader (cf);
- casereader_read (r, &c) ;
- case_destroy (&c))
+ exclude = cmd->incl != ONEWAY_INCLUDE ? MV_ANY : MV_SYSTEM;
+ input = casereader_create_filter_missing (input, &indep_var, 1,
+ exclude, NULL);
+ if (cmd->miss == ONEWAY_LISTWISE)
+ input = casereader_create_filter_missing (input, vars, n_vars,
+ exclude, NULL);
+ input = casereader_create_filter_weight (input, dict, NULL, NULL);
+
+ reader = casereader_clone (input);
+ for (; casereader_read (reader, &c); case_destroy (&c))
{
size_t i;
- const double weight =
- dict_get_case_weight (dataset_dict (current_dataset), &c, &bad_weight_warn);
-
- const union value *indep_val = case_data (&c, indep_var->fv);
-
- /* Deal with missing values */
- if ( value_is_missing(&indep_var->miss, indep_val) )
- continue;
-
- /* Skip the entire case if /MISSING=LISTWISE is set */
- if ( cmd->miss == ONEWAY_LISTWISE )
- {
- for(i = 0; i < n_vars ; ++i)
- {
- const struct variable *v = vars[i];
- const union value *val = case_data (&c, v->fv);
-
- if (value_is_missing(&v->miss, val) )
- break;
- }
- if ( i != n_vars )
- continue;
+ const double weight = dict_get_case_weight (dict, &c, NULL);
- }
-
-
- hsh_insert ( global_group_hash, (void *) indep_val );
+ const union value *indep_val = case_data (&c, indep_var);
+ void **p = hsh_probe (global_group_hash, indep_val);
+ if (*p == NULL)
+ *p = value_dup (indep_val, var_get_width (indep_var));
- for ( i = 0 ; i < n_vars ; ++i )
+ for ( i = 0 ; i < n_vars ; ++i )
{
const struct variable *v = vars[i];
- const union value *val = case_data (&c, v->fv);
+ const union value *val = case_data (&c, v);
struct group_proc *gp = group_proc_get (vars[i]);
struct hsh_table *group_hash = gp->group_hash;
gs = hsh_find(group_hash, (void *) indep_val );
- if ( ! gs )
+ if ( ! gs )
{
gs = xmalloc (sizeof *gs);
gs->id = *indep_val;
hsh_insert ( group_hash, (void *) gs );
}
-
- if (! value_is_missing(&v->miss, val) )
+
+ if (!var_is_value_missing (v, val, exclude))
{
struct group_statistics *totals = &gp->ugs;
totals->sum+=weight * val->f;
totals->ssq+=weight * val->f * val->f;
- if ( val->f * weight < totals->minimum )
+ if ( val->f * weight < totals->minimum )
totals->minimum = val->f * weight;
- if ( val->f * weight > totals->maximum )
+ if ( val->f * weight > totals->maximum )
totals->maximum = val->f * weight;
gs->n+=weight;
gs->sum+=weight * val->f;
gs->ssq+=weight * val->f * val->f;
- if ( val->f * weight < gs->minimum )
+ if ( val->f * weight < gs->minimum )
gs->minimum = val->f * weight;
- if ( val->f * weight > gs->maximum )
+ if ( val->f * weight > gs->maximum )
gs->maximum = val->f * weight;
}
gp->n_groups = hsh_count ( group_hash );
}
-
+
}
- casereader_destroy (r);
+ casereader_destroy (reader);
postcalc(cmd);
-
- if ( stat_tables & STAT_HOMO )
- levene(cf, indep_var, n_vars, vars,
- (cmd->miss == ONEWAY_LISTWISE) ? LEV_LISTWISE : LEV_ANALYSIS ,
- value_is_missing);
- ostensible_number_of_groups = hsh_count (global_group_hash);
+ if ( stat_tables & STAT_HOMO )
+ levene (dict, casereader_clone (input), indep_var, n_vars, vars, exclude);
+
+ casereader_destroy (input);
+ ostensible_number_of_groups = hsh_count (global_group_hash);
- output_oneway();
+ if (!taint_has_tainted_successor (taint))
+ output_oneway (dict);
- return true;
+ taint_destroy (taint);
}
/* Post calculations for the ONEWAY command */
-void
+void
postcalc ( struct cmd_oneway *cmd UNUSED )
{
size_t i=0;
- for(i = 0; i < n_vars ; ++i)
+ for(i = 0; i < n_vars ; ++i)
{
struct group_proc *gp = group_proc_get (vars[i]);
struct hsh_table *group_hash = gp->group_hash;
struct hsh_iterator g;
struct group_statistics *gs;
- for (gs = hsh_first (group_hash,&g);
- gs != 0;
+ for (gs = hsh_first (group_hash,&g);
+ gs != 0;
gs = hsh_next(group_hash,&g))
{
gs->mean=gs->sum / gs->n;
) ;
totals->se_mean = totals->std_dev / sqrt(totals->n);
-
+
}
}
+
+/*
+ Local Variables:
+ mode: c
+ End:
+*/