/* PSPP - computes sample statistics.
Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
- Written by Ben Pfaff <blp@gnu.org>.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA. */
-/* FIXME: Many possible optimizations. */
-
#include <config.h>
#include <limits.h>
#include <math.h>
#include <stdlib.h>
-#include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/transformations.h>
calculating a Z-score. */
struct dsc_z_score
{
- int src_idx; /* Source index into case data. */
- int dst_idx; /* Destination index into case data. */
+ const struct variable *src_var; /* Variable on which z-score is based. */
+ struct variable *z_var; /* New z-score variable. */
double mean; /* Distribution mean. */
double std_dev; /* Distribution standard deviation. */
- struct variable *v; /* Variable on which z-score is based. */
};
/* DESCRIPTIVES transformation (for calculating Z-scores). */
{
struct dsc_z_score *z_scores; /* Array of Z-scores. */
int z_score_cnt; /* Number of Z-scores. */
- struct variable **vars; /* Variables for listwise missing checks. */
+ const struct variable **vars; /* Variables for listwise missing checks. */
size_t var_cnt; /* Number of variables. */
enum dsc_missing_type missing_type; /* Treatment of missing values. */
- int include_user_missing; /* Nonzero to include user-missing values. */
+ enum mv_class exclude; /* Classes of missing values to exclude. */
};
/* Statistics. Used as bit indexes, so must be 32 or fewer. */
/* A variable specified on DESCRIPTIVES. */
struct dsc_var
{
- struct variable *v; /* Variable to calculate on. */
+ const struct variable *v; /* Variable to calculate on. */
char z_name[LONG_NAME_LEN + 1]; /* Name for z-score variable. */
double valid, missing; /* Valid, missing counts. */
struct moments *moments; /* Moments. */
/* User options. */
enum dsc_missing_type missing_type; /* Treatment of missing values. */
- int include_user_missing; /* Nonzero to include user-missing values. */
+ enum mv_class exclude; /* Classes of missing values to exclude. */
int show_var_labels; /* Nonzero to show variable labels. */
int show_index; /* Nonzero to show variable index. */
enum dsc_format format; /* Output format. */
struct dsc_proc *dsc, const char *name);
static bool generate_z_varname (const struct dictionary *dict,
struct dsc_proc *dsc, char *z_name,
- const char *name, size_t *z_cnt);
+ const char *name, int *z_cnt);
static void dump_z_table (struct dsc_proc *);
static void setup_z_trns (struct dsc_proc *, struct dataset *);
/* Procedure execution functions. */
-static bool calc_descriptives (const struct ccase *first,
- const struct casefile *, void *dsc_,
- const struct dataset *);
+static void calc_descriptives (struct dsc_proc *, struct casereader *,
+ struct dataset *);
static void display (struct dsc_proc *dsc);
\f
/* Parser and outline. */
{
struct dictionary *dict = dataset_dict (ds);
struct dsc_proc *dsc;
- struct variable **vars = NULL;
+ const struct variable **vars = NULL;
size_t var_cnt = 0;
int save_z_scores = 0;
- size_t z_cnt = 0;
+ int z_cnt = 0;
size_t i;
bool ok;
+ struct casegrouper *grouper;
+ struct casereader *group;
+
/* Create and initialize dsc. */
dsc = xmalloc (sizeof *dsc);
dsc->vars = NULL;
dsc->var_cnt = 0;
dsc->missing_type = DSC_VARIABLE;
- dsc->include_user_missing = 0;
+ dsc->exclude = MV_ANY;
dsc->show_var_labels = 1;
dsc->show_index = 0;
dsc->format = DSC_LINE;
else if (lex_match_id (lexer, "LISTWISE"))
dsc->missing_type = DSC_LISTWISE;
else if (lex_match_id (lexer, "INCLUDE"))
- dsc->include_user_missing = 1;
+ dsc->exclude = MV_SYSTEM;
else
{
lex_error (lexer, NULL);
{
int i;
- if (!parse_variables (lexer, dataset_dict (ds), &vars, &var_cnt,
+ if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
goto error;
- dsc->vars = xnrealloc (dsc->vars, var_cnt, sizeof *dsc->vars);
+ dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
for (i = dsc->var_cnt; i < var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
{
if (save_z_scores)
{
- size_t gen_cnt = 0;
+ int gen_cnt = 0;
for (i = 0; i < dsc->var_cnt; i++)
if (dsc->vars[i].z_name[0] == 0)
for (i = 0; i < dsc->var_cnt; i++)
dsc->vars[i].moments = moments_create (dsc->max_moment);
- /* Data pass. */
- ok = multipass_procedure_with_splits (ds, calc_descriptives, dsc);
+ /* Data pass. FIXME: error handling. */
+ grouper = casegrouper_create_splits (proc_open (ds), dict);
+ while (casegrouper_get_next_group (grouper, &group))
+ calc_descriptives (dsc, group, ds);
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
/* Z-scoring! */
if (ok && z_cnt)
copies the new name into Z_NAME. On failure, returns false. */
static bool
generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
- const char *var_name, size_t *z_cnt)
+ const char *var_name, int *z_cnt)
{
char name[LONG_NAME_LEN + 1];
{
struct dsc_trns *t = trns_;
struct dsc_z_score *z;
- struct variable **vars;
+ const struct variable **vars;
int all_sysmis = 0;
if (t->missing_type == DSC_LISTWISE)
assert(t->vars);
for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
{
- double score = case_num (c, (*vars)->fv);
- if ( score == SYSMIS
- || (!t->include_user_missing
- && var_is_num_user_missing (*vars, score)))
+ double score = case_num (c, *vars);
+ if (var_is_num_missing (*vars, score, t->exclude))
{
all_sysmis = 1;
break;
for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
{
- double input = case_num (c, z->src_idx);
- double *output = &case_data_rw (c, z->dst_idx)->f;
+ double input = case_num (c, z->src_var);
+ double *output = &case_data_rw (c, z->z_var)->f;
- if (z->mean == SYSMIS || z->std_dev == SYSMIS
- || all_sysmis || input == SYSMIS
- || (!t->include_user_missing
- && var_is_num_user_missing (z->v, input)))
+ if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
+ || var_is_num_missing (z->src_var, input, t->exclude))
*output = SYSMIS;
else
*output = (input - z->mean) / z->std_dev;
t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
t->z_score_cnt = cnt;
t->missing_type = dsc->missing_type;
- t->include_user_missing = dsc->include_user_missing;
+ t->exclude = dsc->exclude;
if ( t->missing_type == DSC_LISTWISE )
{
t->var_cnt = dsc->var_cnt;
var_to_string (dv->v)));
z = &t->z_scores[cnt++];
- z->src_idx = dv->v->fv;
- z->dst_idx = dst_var->fv;
+ z->src_var = dv->v;
+ z->z_var = dst_var;
z->mean = dv->stats[DSC_MEAN];
z->std_dev = dv->stats[DSC_STDDEV];
- z->v = dv->v;
}
}
/* Calculates and displays descriptive statistics for the cases
in CF. */
-static bool
-calc_descriptives (const struct ccase *first,
- const struct casefile *cf, void *dsc_,
- const struct dataset *ds)
+static void
+calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
+ struct dataset *ds)
{
- struct dsc_proc *dsc = dsc_;
- struct casereader *reader;
+ struct casereader *pass1, *pass2;
struct ccase c;
size_t i;
- output_split_file_values (ds, first);
+ if (!casereader_peek (group, 0, &c))
+ return;
+ output_split_file_values (ds, &c);
+ case_destroy (&c);
+
+ group = casereader_create_filter_weight (group, dataset_dict (ds),
+ NULL, NULL);
+
+ casereader_split (group, &pass1, &pass2);
+ if (dsc->max_moment <= MOMENT_MEAN)
+ casereader_destroy (pass2);
for (i = 0; i < dsc->var_cnt; i++)
{
dsc->valid = 0.;
/* First pass to handle most of the work. */
- for (reader = casefile_get_reader (cf, NULL);
- casereader_read (reader, &c);
- case_destroy (&c))
+ for (; casereader_read (pass1, &c); case_destroy (&c))
{
- double weight = dict_get_case_weight (dataset_dict (ds), &c, &dsc->bad_warn);
- if (weight <= 0.0)
- continue;
+ double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
/* Check for missing values. */
if (listwise_missing (dsc, &c))
for (i = 0; i < dsc->var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
- double x = case_num (&c, dv->v->fv);
+ double x = case_num (&c, dv->v);
- if (dsc->missing_type != DSC_LISTWISE
- && (x == SYSMIS
- || (!dsc->include_user_missing
- && var_is_num_user_missing (dv->v, x))))
+ if (var_is_num_missing (dv->v, x, dsc->exclude))
{
dv->missing += weight;
continue;
dv->max = x;
}
}
- casereader_destroy (reader);
+ if (!casereader_destroy (pass1))
+ return;
/* Second pass for higher-order moments. */
if (dsc->max_moment > MOMENT_MEAN)
{
- for (reader = casefile_get_reader (cf, NULL);
- casereader_read (reader, &c);
- case_destroy (&c))
+ for (; casereader_read (pass2, &c); case_destroy (&c))
{
- double weight = dict_get_case_weight (dataset_dict (ds), &c,
- &dsc->bad_warn);
- if (weight <= 0.0)
- continue;
+ double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
/* Check for missing values. */
- if (listwise_missing (dsc, &c)
- && dsc->missing_type == DSC_LISTWISE)
+ if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, &c))
continue;
for (i = 0; i < dsc->var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
- double x = case_num (&c, dv->v->fv);
+ double x = case_num (&c, dv->v);
- if (dsc->missing_type != DSC_LISTWISE
- && (x == SYSMIS
- || (!dsc->include_user_missing
- && var_is_num_user_missing (dv->v, x))))
+ if (var_is_num_missing (dv->v, x, dsc->exclude))
continue;
if (dv->moments != NULL)
moments_pass_two (dv->moments, x, weight);
}
}
- casereader_destroy (reader);
+ if (!casereader_destroy (pass2))
+ return;
}
-
+
/* Calculate results. */
for (i = 0; i < dsc->var_cnt; i++)
{
/* Output results. */
display (dsc);
-
- return true;
}
/* Returns true if any of the descriptives variables in DSC's
for (i = 0; i < dsc->var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
- double x = case_num (c, dv->v->fv);
+ double x = case_num (c, dv->v);
- if (x == SYSMIS
- || (!dsc->include_user_missing
- && var_is_num_user_missing (dv->v, x)))
+ if (var_is_num_missing (dv->v, x, dsc->exclude))
return true;
}
return false;