/* PSPP - computes sample statistics. -*-c-*-
Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
- Written by John Williams <johnr.williams@stonebow.otago.ac.nz>.
- Almost completly re-written by John Darrington 2004
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
#include <stdlib.h>
#include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/value-labels.h>
#include <data/variable.h>
-#include <data/casefilter.h>
-
#include <language/command.h>
#include <language/dictionary/split-file.h>
#include <language/lexer/lexer.h>
#include <libpspp/hash.h>
#include <libpspp/magic.h>
#include <libpspp/message.h>
-#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/str.h>
+#include <libpspp/taint.h>
#include <math/group-proc.h>
#include <math/levene.h>
#include <output/manager.h>
struct pair
{
/* The variables comprising the pair */
- struct variable *v[2];
+ const struct variable *v[2];
/* The number of valid variable pairs */
double n;
static int common_calc (const struct dictionary *dict,
const struct ccase *, void *,
- const struct casefilter *filter);
+ enum mv_class);
static void common_precalc (struct cmd_t_test *);
static void common_postcalc (struct cmd_t_test *);
-static int one_sample_calc (const struct dictionary *dict, const struct ccase *, void *, const struct casefilter *);
+static int one_sample_calc (const struct dictionary *dict, const struct ccase *, void *, enum mv_class);
static void one_sample_precalc (struct cmd_t_test *);
static void one_sample_postcalc (struct cmd_t_test *);
static int paired_calc (const struct dictionary *dict, const struct ccase *,
- struct cmd_t_test*, const struct casefilter *);
+ struct cmd_t_test*, enum mv_class);
static void paired_precalc (struct cmd_t_test *);
static void paired_postcalc (struct cmd_t_test *);
static void group_precalc (struct cmd_t_test *);
static int group_calc (const struct dictionary *dict, const struct ccase *,
- struct cmd_t_test *, const struct casefilter *);
+ struct cmd_t_test *, enum mv_class);
static void group_postcalc (struct cmd_t_test *);
-static bool calculate(const struct ccase *first,
- const struct casefile *cf, void *_mode,
- const struct dataset *ds);
+static void calculate(struct cmd_t_test *,
+ struct casereader *,
+ const struct dataset *);
static int mode;
int
cmd_t_test (struct lexer *lexer, struct dataset *ds)
{
+ struct casegrouper *grouper;
+ struct casereader *group;
bool ok;
if ( !parse_t_test (lexer, ds, &cmd, NULL) )
int i;
struct hsh_iterator hi;
- struct hsh_table *hash;
- struct variable *v;
+ struct const_hsh_table *hash;
+ const struct variable *v;
- hash = hsh_create (n_pairs, compare_vars_by_name, hash_var_by_name,
+ hash = const_hsh_create (n_pairs, compare_vars_by_name, hash_var_by_name,
0, 0);
for (i=0; i < n_pairs; ++i)
{
- hsh_insert(hash,pairs[i].v[0]);
- hsh_insert(hash,pairs[i].v[1]);
+ const_hsh_insert (hash, pairs[i].v[0]);
+ const_hsh_insert (hash, pairs[i].v[1]);
}
assert(cmd.n_variables == 0);
- cmd.n_variables = hsh_count(hash);
+ cmd.n_variables = const_hsh_count (hash);
cmd.v_variables = xnrealloc (cmd.v_variables, cmd.n_variables,
sizeof *cmd.v_variables);
/* Iterate through the hash */
- for (i=0,v = (struct variable *) hsh_first(hash,&hi);
+ for (i=0,v = const_hsh_first (hash, &hi);
v != 0;
- v=hsh_next(hash,&hi) )
+ v = const_hsh_next (hash, &hi) )
cmd.v_variables[i++]=v;
-
- hsh_destroy(hash);
+ const_hsh_destroy(hash);
}
}
else if ( !cmd.sbc_variables)
bad_weight_warn = true;
- ok = multipass_procedure_with_splits (ds, calculate, &cmd);
+ /* Data pass. */
+ grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+ while (casegrouper_get_next_group (grouper, &group))
+ calculate (&cmd, group, ds);
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
n_pairs=0;
free(pairs);
static int
tts_custom_pairs (struct lexer *lexer, struct dataset *ds, struct cmd_t_test *cmd UNUSED, void *aux UNUSED)
{
- struct variable **vars;
+ const struct variable **vars;
size_t n_vars;
size_t n_pairs_local;
lex_match (lexer, '=');
n_vars=0;
- if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
+ if (!parse_variables_const (lexer, dataset_dict (ds), &vars, &n_vars,
PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH))
{
free (vars);
if (lex_match (lexer, T_WITH))
{
n_before_WITH = n_vars;
- if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
+ if (!parse_variables_const (lexer, dataset_dict (ds), &vars, &n_vars,
PV_DUPLICATE | PV_APPEND
| PV_NUMERIC | PV_NO_SCRATCH))
{
msg (SE, _("PAIRED was specified but the number of variables "
"preceding WITH (%d) did not match the number "
"following (%d)."),
- n_before_WITH, n_after_WITH );
+ (int) n_before_WITH, (int) n_after_WITH );
return 0;
}
n_pairs_local = n_before_WITH;
for (i=0; i < cmd->n_variables; ++i)
{
- struct variable *var = cmd->v_variables[i];
+ const struct variable *var = cmd->v_variables[i];
struct hsh_table *grp_hash = group_proc_get (var)->group_hash;
int count=0;
gs = hsh_find(grp_hash, (void *) &search_val);
assert(gs);
- tab_float(ssb->t, 2 ,i*2+count+1, TAB_RIGHT, gs->n, 2, 0);
+ tab_float(ssb->t, 2 ,i*2+count+1, TAB_RIGHT, gs->n, 10, 0);
tab_float(ssb->t, 3 ,i*2+count+1, TAB_RIGHT, gs->mean, 8, 2);
tab_float(ssb->t, 4 ,i*2+count+1, TAB_RIGHT, gs->std_dev, 8, 3);
tab_float(ssb->t, 5 ,i*2+count+1, TAB_RIGHT, gs->se_mean, 8, 3);
/* Values */
tab_float (ssb->t,2, i*2+j+1, TAB_RIGHT, pairs[i].mean[j], 8, 2);
- tab_float (ssb->t,3, i*2+j+1, TAB_RIGHT, pairs[i].n, 2, 0);
+ tab_float (ssb->t,3, i*2+j+1, TAB_RIGHT, pairs[i].n, 10, 0);
tab_float (ssb->t,4, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j], 8, 3);
tab_float (ssb->t,5, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j]/sqrt(pairs[i].n), 8, 3);
struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs;
tab_text (ssb->t, 0, i+1, TAB_LEFT, var_get_name (cmd->v_variables[i]));
- tab_float (ssb->t,1, i+1, TAB_RIGHT, gs->n, 2, 0);
+ tab_float (ssb->t,1, i+1, TAB_RIGHT, gs->n, 10, 0);
tab_float (ssb->t,2, i+1, TAB_RIGHT, gs->mean, 8, 2);
tab_float (ssb->t,3, i+1, TAB_RIGHT, gs->std_dev, 8, 2);
tab_float (ssb->t,4, i+1, TAB_RIGHT, gs->se_mean, 8, 3);
double std_err_diff;
double mean_diff;
- struct variable *var = cmd->v_variables[i];
+ const struct variable *var = cmd->v_variables[i];
struct group_proc *grp_data = group_proc_get (var);
struct hsh_table *grp_hash = grp_data->group_hash;
tab_float(self->t, 3, i*2+3, TAB_CENTER, q, 8,3 );
df = gs0->n + gs1->n - 2.0 ;
- tab_float (self->t, 5, i*2+3, TAB_RIGHT, df, 2, 0);
+ tab_float (self->t, 5, i*2+3, TAB_RIGHT, df, 10, 0);
pooled_variance = ( (gs0->n )*pow2(gs0->s_std_dev)
+
tab_float(trb->t, 7, i+3, TAB_RIGHT, t , 8,3 );
/* Degrees of freedom */
- tab_float(trb->t, 8, i+3, TAB_RIGHT, df , 2, 0 );
+ tab_float(trb->t, 8, i+3, TAB_RIGHT, df , 10, 0 );
p = gsl_cdf_tdist_P(t,df);
q = gsl_cdf_tdist_P(t,df);
common_calc (const struct dictionary *dict,
const struct ccase *c,
void *_cmd,
- const struct casefilter *filter)
+ enum mv_class exclude)
{
int i;
struct cmd_t_test *cmd = (struct cmd_t_test *)_cmd;
- double weight = dict_get_case_weight (dict, c, &bad_weight_warn);
+ double weight = dict_get_case_weight (dict, c, NULL);
/* Listwise has to be implicit if the independent variable is missing ?? */
if ( cmd->sbc_groups )
{
- if ( casefilter_variable_missing (filter, c, indep_var) )
+ if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude))
return 0;
}
for(i = 0; i < cmd->n_variables ; ++i)
{
- struct variable *v = cmd->v_variables[i];
-
- if (! casefilter_variable_missing (filter, c, v) )
+ const struct variable *v = cmd->v_variables[i];
+ const union value *val = case_data (c, v);
+
+ if (!var_is_value_missing (v, val, exclude))
{
struct group_statistics *gs;
- const union value *val = case_data (c, v);
- gs = &group_proc_get (cmd->v_variables[i])->ugs;
+ gs = &group_proc_get (v)->ugs;
gs->n += weight;
gs->sum += weight * val->f;
static int
one_sample_calc (const struct dictionary *dict,
const struct ccase *c, void *cmd_,
- const struct casefilter *filter)
+ enum mv_class exclude)
{
int i;
struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_;
- double weight = dict_get_case_weight (dict, c, &bad_weight_warn);
+ double weight = dict_get_case_weight (dict, c, NULL);
for(i=0; i< cmd->n_variables ; ++i)
{
struct group_statistics *gs;
- struct variable *v = cmd->v_variables[i];
+ const struct variable *v = cmd->v_variables[i];
const union value *val = case_data (c, v);
gs= &group_proc_get (cmd->v_variables[i])->ugs;
- if ( ! casefilter_variable_missing (filter, c, v))
+ if (!var_is_value_missing (v, val, exclude))
gs->sum_diff += weight * (val->f - cmd->n_testval[0]);
}
static int
paired_calc (const struct dictionary *dict, const struct ccase *c,
- struct cmd_t_test *cmd UNUSED, const struct casefilter *filter)
+ struct cmd_t_test *cmd UNUSED, enum mv_class exclude)
{
int i;
- double weight = dict_get_case_weight (dict, c, &bad_weight_warn);
+ double weight = dict_get_case_weight (dict, c, NULL);
for(i=0; i < n_pairs ; ++i )
{
- struct variable *v0 = pairs[i].v[0];
- struct variable *v1 = pairs[i].v[1];
+ const struct variable *v0 = pairs[i].v[0];
+ const struct variable *v1 = pairs[i].v[1];
const union value *val0 = case_data (c, v0);
const union value *val1 = case_data (c, v1);
- if ( ! casefilter_variable_missing (filter, c, v0) &&
- ! casefilter_variable_missing (filter, c, v1) )
+ if (!var_is_value_missing (v0, val0, exclude) &&
+ !var_is_value_missing (v1, val1, exclude))
{
pairs[i].n += weight;
pairs[i].sum[0] += weight * val0->f;
static int
group_calc (const struct dictionary *dict,
const struct ccase *c, struct cmd_t_test *cmd,
- const struct casefilter *filter)
+ enum mv_class exclude)
{
int i;
- const double weight =
- dict_get_case_weight (dict, c, &bad_weight_warn);
+ const double weight = dict_get_case_weight (dict, c, NULL);
const union value *gv;
- if ( casefilter_variable_missing (filter, c, indep_var))
+ if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude))
return 0;
gv = case_data (c, indep_var);
for(i=0; i< cmd->n_variables ; ++i)
{
- struct variable *var = cmd->v_variables[i];
+ const struct variable *var = cmd->v_variables[i];
const union value *val = case_data (c, var);
struct hsh_table *grp_hash = group_proc_get (var)->group_hash;
struct group_statistics *gs;
if ( ! gs )
return 0;
- if ( ! casefilter_variable_missing (filter, c, var) )
+ if (!var_is_value_missing (var, val, exclude))
{
gs->n += weight;
gs->sum += weight * val->f;
for (i = 0; i < cmd->n_variables ; ++i)
{
- struct variable *var = cmd->v_variables[i];
+ const struct variable *var = cmd->v_variables[i];
struct hsh_table *grp_hash = group_proc_get (var)->group_hash;
struct hsh_iterator g;
struct group_statistics *gs;
-static bool
-calculate(const struct ccase *first, const struct casefile *cf,
- void *cmd_, const struct dataset *ds)
+static void
+calculate(struct cmd_t_test *cmd,
+ struct casereader *input, const struct dataset *ds)
{
const struct dictionary *dict = dataset_dict (ds);
struct ssbox stat_summary_box;
struct trbox test_results_box;
- struct casereader *r;
+ struct casereader *pass1, *pass2, *pass3;
+ struct taint *taint;
struct ccase c;
- struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_;
+ enum mv_class exclude = cmd->miss != TTS_INCLUDE ? MV_ANY : MV_SYSTEM;
- struct casefilter *filter = casefilter_create (cmd->miss != TTS_INCLUDE,
- NULL, 0);
+ if (!casereader_peek (input, 0, &c))
+ return;
+ output_split_file_values (ds, &c);
+ case_destroy (&c);
if ( cmd->miss == TTS_LISTWISE )
- casefilter_add_variables (filter,
- cmd->v_variables, cmd->n_variables);
+ input = casereader_create_filter_missing (input,
+ cmd->v_variables,
+ cmd->n_variables,
+ exclude, NULL);
+
+ input = casereader_create_filter_weight (input, dict, NULL, NULL);
+
+ taint = taint_clone (casereader_get_taint (input));
+ casereader_split (input, &pass1, &pass2);
- output_split_file_values (ds, first);
common_precalc (cmd);
- for(r = casefile_get_reader (cf, filter);
- casereader_read (r, &c) ;
- case_destroy (&c))
- {
- common_calc (dict, &c, cmd, filter);
- }
-
- casereader_destroy (r);
+ for (; casereader_read (pass1, &c); case_destroy (&c))
+ common_calc (dict, &c, cmd, exclude);
+ casereader_destroy (pass1);
common_postcalc (cmd);
switch(mode)
{
case T_1_SAMPLE:
one_sample_precalc (cmd);
- for(r = casefile_get_reader (cf, filter);
- casereader_read (r, &c) ;
- case_destroy (&c))
- {
- one_sample_calc (dict, &c, cmd, filter);
- }
- casereader_destroy (r);
+ for (; casereader_read (pass2, &c); case_destroy (&c))
+ one_sample_calc (dict, &c, cmd, exclude);
one_sample_postcalc (cmd);
break;
case T_PAIRED:
paired_precalc(cmd);
- for(r = casefile_get_reader (cf, filter);
- casereader_read (r, &c) ;
- case_destroy (&c))
- {
- paired_calc (dict, &c, cmd, filter);
- }
- casereader_destroy (r);
+ for (; casereader_read (pass2, &c); case_destroy (&c))
+ paired_calc (dict, &c, cmd, exclude);
paired_postcalc (cmd);
-
break;
case T_IND_SAMPLES:
+ pass3 = casereader_clone (pass2);
group_precalc(cmd);
- for(r = casefile_get_reader (cf, filter);
- casereader_read (r, &c) ;
- case_destroy (&c))
- {
- group_calc (dict, &c, cmd, filter);
- }
- casereader_destroy (r);
+ for(; casereader_read (pass2, &c); case_destroy (&c))
+ group_calc (dict, &c, cmd, exclude);
group_postcalc(cmd);
- levene (dict, cf, indep_var, cmd->n_variables, cmd->v_variables,
- filter);
+ levene (dict, pass3, indep_var, cmd->n_variables, cmd->v_variables,
+ exclude);
break;
}
+ casereader_destroy (pass2);
+
+ if (!taint_has_tainted_successor (taint))
+ {
+ ssbox_create(&stat_summary_box,cmd,mode);
+ ssbox_populate(&stat_summary_box,cmd);
+ ssbox_finalize(&stat_summary_box);
- casefilter_destroy (filter);
-
- ssbox_create(&stat_summary_box,cmd,mode);
- ssbox_populate(&stat_summary_box,cmd);
- ssbox_finalize(&stat_summary_box);
-
- if ( mode == T_PAIRED)
- pscbox();
-
- trbox_create(&test_results_box,cmd,mode);
- trbox_populate(&test_results_box,cmd);
- trbox_finalize(&test_results_box);
-
- return true;
+ if ( mode == T_PAIRED )
+ pscbox();
+
+ trbox_create(&test_results_box,cmd,mode);
+ trbox_populate(&test_results_box,cmd);
+ trbox_finalize(&test_results_box);
+ }
}
short which_group(const struct group_statistics *g,