-/* PSPP - computes sample statistics.
+/* PSPP - a program for statistical analysis.
Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
- Written by Ben Pfaff <blp@gnu.org>.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-/* FIXME: Many possible optimizations. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include <math.h>
#include <stdlib.h>
-#include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/procedure.h>
#include <data/transformations.h>
#include <data/variable.h>
#include <language/command.h>
+#include <language/dictionary/split-file.h>
#include <language/lexer/lexer.h>
-#include <libpspp/alloc.h>
+#include <language/lexer/variable-parser.h>
#include <libpspp/array.h>
#include <libpspp/compiler.h>
-#include <libpspp/magic.h>
#include <libpspp/message.h>
+#include <libpspp/assertion.h>
#include <math/moments.h>
#include <output/manager.h>
#include <output/table.h>
+#include "xalloc.h"
+
#include "gettext.h"
#define _(msgid) gettext (msgid)
#define N_(msgid) msgid
calculating a Z-score. */
struct dsc_z_score
{
- int src_idx; /* Source index into case data. */
- int dst_idx; /* Destination index into case data. */
+ const struct variable *src_var; /* Variable on which z-score is based. */
+ struct variable *z_var; /* New z-score variable. */
double mean; /* Distribution mean. */
double std_dev; /* Distribution standard deviation. */
- struct variable *v; /* Variable on which z-score is based. */
};
/* DESCRIPTIVES transformation (for calculating Z-scores). */
{
struct dsc_z_score *z_scores; /* Array of Z-scores. */
int z_score_cnt; /* Number of Z-scores. */
- struct variable **vars; /* Variables for listwise missing checks. */
+ const struct variable **vars; /* Variables for listwise missing checks. */
size_t var_cnt; /* Number of variables. */
enum dsc_missing_type missing_type; /* Treatment of missing values. */
- int include_user_missing; /* Nonzero to include user-missing values. */
+ enum mv_class exclude; /* Classes of missing values to exclude. */
};
/* Statistics. Used as bit indexes, so must be 32 or fewer. */
#define DEFAULT_STATS \
((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \
| (1ul << DSC_MAX))
-
+
/* A variable specified on DESCRIPTIVES. */
struct dsc_var
{
- struct variable *v; /* Variable to calculate on. */
- char z_name[LONG_NAME_LEN + 1]; /* Name for z-score variable. */
+ const struct variable *v; /* Variable to calculate on. */
+ char z_name[VAR_NAME_LEN + 1]; /* Name for z-score variable. */
double valid, missing; /* Valid, missing counts. */
struct moments *moments; /* Moments. */
double min, max; /* Maximum and mimimum values. */
};
/* Output format. */
-enum dsc_format
+enum dsc_format
{
DSC_LINE, /* Abbreviated format. */
DSC_SERIAL /* Long format. */
};
/* A DESCRIPTIVES procedure. */
-struct dsc_proc
+struct dsc_proc
{
/* Per-variable info. */
struct dsc_var *vars; /* Variables. */
/* User options. */
enum dsc_missing_type missing_type; /* Treatment of missing values. */
- int include_user_missing; /* Nonzero to include user-missing values. */
+ enum mv_class exclude; /* Classes of missing values to exclude. */
int show_var_labels; /* Nonzero to show variable labels. */
int show_index; /* Nonzero to show variable index. */
enum dsc_format format; /* Output format. */
/* Accumulated results. */
double missing_listwise; /* Sum of weights of cases missing listwise. */
double valid; /* Sum of weights of valid cases. */
- int bad_warn; /* Warn if bad weight found. */
+ bool bad_warn; /* Warn if bad weight found. */
enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */
int sort_ascending; /* !0: ascending order; 0: descending. */
unsigned long show_stats; /* Statistics to display. */
};
/* Parsing. */
-static enum dsc_statistic match_statistic (void);
+static enum dsc_statistic match_statistic (struct lexer *);
static void free_dsc_proc (struct dsc_proc *);
/* Z-score functions. */
-static int try_name (struct dsc_proc *dsc, char *name);
-static int generate_z_varname (struct dsc_proc *dsc, char *z_name,
- const char *name, size_t *z_cnt);
+static bool try_name (const struct dictionary *dict,
+ struct dsc_proc *dsc, const char *name);
+static bool generate_z_varname (const struct dictionary *dict,
+ struct dsc_proc *dsc, char *z_name,
+ const char *name, int *z_cnt);
static void dump_z_table (struct dsc_proc *);
-static void setup_z_trns (struct dsc_proc *);
+static void setup_z_trns (struct dsc_proc *, struct dataset *);
/* Procedure execution functions. */
-static bool calc_descriptives (const struct casefile *, void *dsc_);
+static void calc_descriptives (struct dsc_proc *, struct casereader *,
+ struct dataset *);
static void display (struct dsc_proc *dsc);
\f
/* Parser and outline. */
/* Handles DESCRIPTIVES. */
int
-cmd_descriptives (void)
+cmd_descriptives (struct lexer *lexer, struct dataset *ds)
{
+ struct dictionary *dict = dataset_dict (ds);
struct dsc_proc *dsc;
- struct variable **vars = NULL;
+ const struct variable **vars = NULL;
size_t var_cnt = 0;
int save_z_scores = 0;
- size_t z_cnt = 0;
+ int z_cnt = 0;
size_t i;
bool ok;
+ struct casegrouper *grouper;
+ struct casereader *group;
+
/* Create and initialize dsc. */
dsc = xmalloc (sizeof *dsc);
dsc->vars = NULL;
dsc->var_cnt = 0;
dsc->missing_type = DSC_VARIABLE;
- dsc->include_user_missing = 0;
+ dsc->exclude = MV_ANY;
dsc->show_var_labels = 1;
dsc->show_index = 0;
dsc->format = DSC_LINE;
dsc->show_stats = dsc->calc_stats = DEFAULT_STATS;
/* Parse DESCRIPTIVES. */
- while (token != '.')
+ while (lex_token (lexer) != '.')
{
- if (lex_match_id ("MISSING"))
+ if (lex_match_id (lexer, "MISSING"))
{
- lex_match ('=');
- while (token != '.' && token != '/')
+ lex_match (lexer, '=');
+ while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
{
- if (lex_match_id ("VARIABLE"))
+ if (lex_match_id (lexer, "VARIABLE"))
dsc->missing_type = DSC_VARIABLE;
- else if (lex_match_id ("LISTWISE"))
+ else if (lex_match_id (lexer, "LISTWISE"))
dsc->missing_type = DSC_LISTWISE;
- else if (lex_match_id ("INCLUDE"))
- dsc->include_user_missing = 1;
+ else if (lex_match_id (lexer, "INCLUDE"))
+ dsc->exclude = MV_SYSTEM;
else
{
- lex_error (NULL);
+ lex_error (lexer, NULL);
goto error;
}
- lex_match (',');
+ lex_match (lexer, ',');
}
}
- else if (lex_match_id ("SAVE"))
+ else if (lex_match_id (lexer, "SAVE"))
save_z_scores = 1;
- else if (lex_match_id ("FORMAT"))
+ else if (lex_match_id (lexer, "FORMAT"))
{
- lex_match ('=');
- while (token != '.' && token != '/')
+ lex_match (lexer, '=');
+ while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
{
- if (lex_match_id ("LABELS"))
+ if (lex_match_id (lexer, "LABELS"))
dsc->show_var_labels = 1;
- else if (lex_match_id ("NOLABELS"))
+ else if (lex_match_id (lexer, "NOLABELS"))
dsc->show_var_labels = 0;
- else if (lex_match_id ("INDEX"))
+ else if (lex_match_id (lexer, "INDEX"))
dsc->show_index = 1;
- else if (lex_match_id ("NOINDEX"))
+ else if (lex_match_id (lexer, "NOINDEX"))
dsc->show_index = 0;
- else if (lex_match_id ("LINE"))
+ else if (lex_match_id (lexer, "LINE"))
dsc->format = DSC_LINE;
- else if (lex_match_id ("SERIAL"))
+ else if (lex_match_id (lexer, "SERIAL"))
dsc->format = DSC_SERIAL;
else
{
- lex_error (NULL);
+ lex_error (lexer, NULL);
goto error;
}
- lex_match (',');
+ lex_match (lexer, ',');
}
}
- else if (lex_match_id ("STATISTICS"))
+ else if (lex_match_id (lexer, "STATISTICS"))
{
- lex_match ('=');
+ lex_match (lexer, '=');
dsc->show_stats = 0;
- while (token != '.' && token != '/')
+ while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
{
- if (lex_match (T_ALL))
+ if (lex_match (lexer, T_ALL))
dsc->show_stats |= (1ul << DSC_N_STATS) - 1;
- else if (lex_match_id ("DEFAULT"))
+ else if (lex_match_id (lexer, "DEFAULT"))
dsc->show_stats |= DEFAULT_STATS;
else
- dsc->show_stats |= 1ul << (match_statistic ());
- lex_match (',');
+ dsc->show_stats |= 1ul << (match_statistic (lexer));
+ lex_match (lexer, ',');
}
if (dsc->show_stats == 0)
dsc->show_stats = DEFAULT_STATS;
}
- else if (lex_match_id ("SORT"))
+ else if (lex_match_id (lexer, "SORT"))
{
- lex_match ('=');
- if (lex_match_id ("NAME"))
+ lex_match (lexer, '=');
+ if (lex_match_id (lexer, "NAME"))
dsc->sort_by_stat = DSC_NAME;
- else
+ else
{
- dsc->sort_by_stat = match_statistic ();
+ dsc->sort_by_stat = match_statistic (lexer);
if (dsc->sort_by_stat == DSC_NONE )
dsc->sort_by_stat = DSC_MEAN;
}
- if (lex_match ('('))
+ if (lex_match (lexer, '('))
{
- if (lex_match_id ("A"))
+ if (lex_match_id (lexer, "A"))
dsc->sort_ascending = 1;
- else if (lex_match_id ("D"))
+ else if (lex_match_id (lexer, "D"))
dsc->sort_ascending = 0;
else
- lex_error (NULL);
- lex_force_match (')');
+ lex_error (lexer, NULL);
+ lex_force_match (lexer, ')');
}
}
else if (var_cnt == 0)
{
- if (lex_look_ahead () == '=')
+ if (lex_look_ahead (lexer) == '=')
{
- lex_match_id ("VARIABLES");
- lex_match ('=');
+ lex_match_id (lexer, "VARIABLES");
+ lex_match (lexer, '=');
}
- while (token != '.' && token != '/')
+ while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
{
int i;
-
- if (!parse_variables (default_dict, &vars, &var_cnt,
+
+ if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
goto error;
- dsc->vars = xnrealloc (dsc->vars, var_cnt, sizeof *dsc->vars);
+ dsc->vars = xnrealloc ((void *)dsc->vars, var_cnt, sizeof *dsc->vars);
for (i = dsc->var_cnt; i < var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
}
dsc->var_cnt = var_cnt;
- if (lex_match ('('))
+ if (lex_match (lexer, '('))
{
- if (token != T_ID)
+ if (lex_token (lexer) != T_ID)
{
- lex_error (NULL);
+ lex_error (lexer, NULL);
goto error;
}
- if (try_name (dsc, tokid))
+ if (try_name (dict, dsc, lex_tokid (lexer)))
{
- strcpy (dsc->vars[dsc->var_cnt - 1].z_name, tokid);
+ strcpy (dsc->vars[dsc->var_cnt - 1].z_name, lex_tokid (lexer));
z_cnt++;
}
else
msg (SE, _("Z-score variable name %s would be"
- " a duplicate variable name."), tokid);
- lex_get ();
- if (!lex_force_match (')'))
+ " a duplicate variable name."), lex_tokid (lexer));
+ lex_get (lexer);
+ if (!lex_force_match (lexer, ')'))
goto error;
}
}
}
- else
+ else
{
- lex_error (NULL);
- goto error;
+ lex_error (lexer, NULL);
+ goto error;
}
- lex_match ('/');
+ lex_match (lexer, '/');
}
if (var_cnt == 0)
{
/* Construct z-score varnames, show translation table. */
if (z_cnt || save_z_scores)
{
- if (save_z_scores)
+ if (save_z_scores)
{
- size_t gen_cnt = 0;
+ int gen_cnt = 0;
for (i = 0; i < dsc->var_cnt; i++)
- if (dsc->vars[i].z_name[0] == 0)
+ if (dsc->vars[i].z_name[0] == 0)
{
- if (!generate_z_varname (dsc, dsc->vars[i].z_name,
- dsc->vars[i].v->name, &gen_cnt))
+ if (!generate_z_varname (dict, dsc, dsc->vars[i].z_name,
+ var_get_name (dsc->vars[i].v),
+ &gen_cnt))
goto error;
z_cnt++;
- }
+ }
}
dump_z_table (dsc);
}
/* Figure out maximum moment needed and allocate moments for
the variables. */
dsc->max_moment = MOMENT_NONE;
- for (i = 0; i < DSC_N_STATS; i++)
+ for (i = 0; i < DSC_N_STATS; i++)
if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment)
dsc->max_moment = dsc_info[i].moment;
if (dsc->max_moment != MOMENT_NONE)
dsc->vars[i].moments = moments_create (dsc->max_moment);
/* Data pass. */
- ok = multipass_procedure_with_splits (calc_descriptives, dsc);
+ grouper = casegrouper_create_splits (proc_open (ds), dict);
+ while (casegrouper_get_next_group (grouper, &group))
+ calc_descriptives (dsc, group, ds);
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
/* Z-scoring! */
if (ok && z_cnt)
- setup_z_trns (dsc);
+ setup_z_trns (dsc, ds);
/* Done. */
free (vars);
specifiers). Emits an error if the current token ID does not name a
statistic. */
static enum dsc_statistic
-match_statistic (void)
+match_statistic (struct lexer *lexer)
{
- if (token == T_ID)
+ if (lex_token (lexer) == T_ID)
{
enum dsc_statistic stat;
for (stat = 0; stat < DSC_N_STATS; stat++)
- if (lex_match_id (dsc_info[stat].identifier))
+ if (lex_match_id (lexer, dsc_info[stat].identifier))
return stat;
- lex_get();
- lex_error (_("expecting statistic name: reverting to default"));
+ lex_get (lexer);
+ lex_error (lexer, _("expecting statistic name: reverting to default"));
}
return DSC_NONE;
if (dsc == NULL)
return;
-
+
for (i = 0; i < dsc->var_cnt; i++)
moments_destroy (dsc->vars[i].moments);
free (dsc->vars);
\f
/* Z scores. */
-/* Returns 0 if NAME is a duplicate of any existing variable name or
- of any previously-declared z-var name; otherwise returns 1. */
-static int
-try_name (struct dsc_proc *dsc, char *name)
+/* Returns false if NAME is a duplicate of any existing variable name or
+ of any previously-declared z-var name; otherwise returns true. */
+static bool
+try_name (const struct dictionary *dict, struct dsc_proc *dsc,
+ const char *name)
{
size_t i;
- if (dict_lookup_var (default_dict, name) != NULL)
- return 0;
+ if (dict_lookup_var (dict, name) != NULL)
+ return false;
for (i = 0; i < dsc->var_cnt; i++)
if (!strcasecmp (dsc->vars[i].z_name, name))
- return 0;
- return 1;
+ return false;
+ return true;
}
/* Generates a name for a Z-score variable based on a variable
named VAR_NAME, given that *Z_CNT generated variable names are
- known to already exist. If successful, returns nonzero and
- copies the new name into Z_NAME. On failure, returns zero. */
-static int
-generate_z_varname (struct dsc_proc *dsc, char *z_name,
- const char *var_name, size_t *z_cnt)
+ known to already exist. If successful, returns true and
+ copies the new name into Z_NAME. On failure, returns false. */
+static bool
+generate_z_varname (const struct dictionary *dict, struct dsc_proc *dsc, char *z_name,
+ const char *var_name, int *z_cnt)
{
- char name[LONG_NAME_LEN + 1];
+ char name[VAR_NAME_LEN + 1];
/* Try a name based on the original variable name. */
name[0] = 'Z';
str_copy_trunc (name + 1, sizeof name - 1, var_name);
- if (try_name (dsc, name))
+ if (try_name (dict, dsc, name))
{
strcpy (z_name, name);
- return 1;
+ return true;
}
/* Generate a synthetic name. */
msg (SE, _("Ran out of generic names for Z-score variables. "
"There are only 126 generic names: ZSC001-ZSC0999, "
"STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
- return 0;
+ return false;
}
-
- if (try_name (dsc, name))
+
+ if (try_name (dict, dsc, name))
{
strcpy (z_name, name);
- return 1;
+ return true;
}
}
+ NOT_REACHED();
}
/* Outputs a table describing the mapping between source
{
size_t cnt = 0;
struct tab_table *t;
-
+
{
size_t i;
-
+
for (i = 0; i < dsc->var_cnt; i++)
if (dsc->vars[i].z_name[0] != '\0')
cnt++;
}
-
+
t = tab_create (2, cnt + 1, 0);
tab_title (t, _("Mapping of variables to corresponding Z-scores."));
tab_columns (t, SOM_COL_DOWN, 1);
{
size_t i, y;
-
+
for (i = 0, y = 1; i < dsc->var_cnt; i++)
if (dsc->vars[i].z_name[0] != '\0')
{
- tab_text (t, 0, y, TAB_LEFT, dsc->vars[i].v->name);
+ tab_text (t, 0, y, TAB_LEFT, var_get_name (dsc->vars[i].v));
tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name);
}
}
-
+
tab_submit (t);
}
*/
static int
descriptives_trns_proc (void *trns_, struct ccase * c,
- int case_idx UNUSED)
+ casenumber case_idx UNUSED)
{
struct dsc_trns *t = trns_;
struct dsc_z_score *z;
- struct variable **vars;
+ const struct variable **vars;
int all_sysmis = 0;
if (t->missing_type == DSC_LISTWISE)
assert(t->vars);
for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
{
- double score = case_num (c, (*vars)->fv);
- if ( score == SYSMIS
- || (!t->include_user_missing
- && mv_is_num_user_missing (&(*vars)->miss, score)))
+ double score = case_num (c, *vars);
+ if (var_is_num_missing (*vars, score, t->exclude))
{
all_sysmis = 1;
break;
}
}
}
-
+
for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
{
- double input = case_num (c, z->src_idx);
- double *output = &case_data_rw (c, z->dst_idx)->f;
+ double input = case_num (c, z->src_var);
+ double *output = &case_data_rw (c, z->z_var)->f;
- if (z->mean == SYSMIS || z->std_dev == SYSMIS
- || all_sysmis || input == SYSMIS
- || (!t->include_user_missing
- && mv_is_num_user_missing (&z->v->miss, input)))
+ if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis
+ || var_is_num_missing (z->src_var, input, t->exclude))
*output = SYSMIS;
else
*output = (input - z->mean) / z->std_dev;
/* Sets up a transformation to calculate Z scores. */
static void
-setup_z_trns (struct dsc_proc *dsc)
+setup_z_trns (struct dsc_proc *dsc, struct dataset *ds)
{
struct dsc_trns *t;
size_t cnt, i;
t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
t->z_score_cnt = cnt;
t->missing_type = dsc->missing_type;
- t->include_user_missing = dsc->include_user_missing;
+ t->exclude = dsc->exclude;
if ( t->missing_type == DSC_LISTWISE )
{
t->var_cnt = dsc->var_cnt;
if (dv->z_name[0] != '\0')
{
struct dsc_z_score *z;
- char *cp;
struct variable *dst_var;
- dst_var = dict_create_var_assert (default_dict, dv->z_name, 0);
- if (dv->v->label)
- {
- dst_var->label = xmalloc (strlen (dv->v->label) + 12);
- cp = stpcpy (dst_var->label, _("Z-score of "));
- strcpy (cp, dv->v->label);
- }
- else
- {
- dst_var->label = xmalloc (strlen (dv->v->name) + 12);
- cp = stpcpy (dst_var->label, _("Z-score of "));
- strcpy (cp, dv->v->name);
- }
+ dst_var = dict_create_var_assert (dataset_dict (ds), dv->z_name, 0);
+ var_set_label (dst_var, xasprintf (_("Z-score of %s"),
+ var_to_string (dv->v)));
z = &t->z_scores[cnt++];
- z->src_idx = dv->v->fv;
- z->dst_idx = dst_var->fv;
+ z->src_var = dv->v;
+ z->z_var = dst_var;
z->mean = dv->stats[DSC_MEAN];
z->std_dev = dv->stats[DSC_STDDEV];
- z->v = dv->v;
}
}
- add_transformation (descriptives_trns_proc, descriptives_trns_free, t);
+ add_transformation (ds,
+ descriptives_trns_proc, descriptives_trns_free, t);
}
\f
/* Statistical calculation. */
-static int listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
+static bool listwise_missing (struct dsc_proc *dsc, const struct ccase *c);
/* Calculates and displays descriptive statistics for the cases
in CF. */
-static bool
-calc_descriptives (const struct casefile *cf, void *dsc_)
+static void
+calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
+ struct dataset *ds)
{
- struct dsc_proc *dsc = dsc_;
- struct casereader *reader;
+ struct casereader *pass1, *pass2;
struct ccase c;
size_t i;
+ if (!casereader_peek (group, 0, &c))
+ {
+ casereader_destroy (group);
+ return;
+ }
+ output_split_file_values (ds, &c);
+ case_destroy (&c);
+
+ group = casereader_create_filter_weight (group, dataset_dict (ds),
+ NULL, NULL);
+
+ pass1 = group;
+ pass2 = dsc->max_moment <= MOMENT_MEAN ? NULL : casereader_clone (pass1);
+
for (i = 0; i < dsc->var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
-
+
dv->valid = dv->missing = 0.0;
if (dv->moments != NULL)
moments_clear (dv->moments);
dsc->valid = 0.;
/* First pass to handle most of the work. */
- for (reader = casefile_get_reader (cf);
- casereader_read (reader, &c);
- case_destroy (&c))
+ for (; casereader_read (pass1, &c); case_destroy (&c))
{
- double weight = dict_get_case_weight (default_dict, &c, &dsc->bad_warn);
- if (weight <= 0.0)
- continue;
-
+ double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
+
/* Check for missing values. */
- if (listwise_missing (dsc, &c))
+ if (listwise_missing (dsc, &c))
{
dsc->missing_listwise += weight;
if (dsc->missing_type == DSC_LISTWISE)
- continue;
+ continue;
}
dsc->valid += weight;
- for (i = 0; i < dsc->var_cnt; i++)
+ for (i = 0; i < dsc->var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
- double x = case_num (&c, dv->v->fv);
-
- if (dsc->missing_type != DSC_LISTWISE
- && (x == SYSMIS
- || (!dsc->include_user_missing
- && mv_is_num_user_missing (&dv->v->miss, x))))
+ double x = case_num (&c, dv->v);
+
+ if (var_is_num_missing (dv->v, x, dsc->exclude))
{
dv->missing += weight;
continue;
}
- if (dv->moments != NULL)
+ if (dv->moments != NULL)
moments_pass_one (dv->moments, x, weight);
if (x < dv->min)
dv->max = x;
}
}
- casereader_destroy (reader);
+ if (!casereader_destroy (pass1))
+ {
+ casereader_destroy (pass2);
+ return;
+ }
/* Second pass for higher-order moments. */
- if (dsc->max_moment > MOMENT_MEAN)
+ if (dsc->max_moment > MOMENT_MEAN)
{
- for (reader = casefile_get_reader (cf);
- casereader_read (reader, &c);
- case_destroy (&c))
+ for (; casereader_read (pass2, &c); case_destroy (&c))
{
- double weight = dict_get_case_weight (default_dict, &c,
- &dsc->bad_warn);
- if (weight <= 0.0)
- continue;
-
+ double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
+
/* Check for missing values. */
- if (listwise_missing (dsc, &c)
- && dsc->missing_type == DSC_LISTWISE)
- continue;
+ if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, &c))
+ continue;
- for (i = 0; i < dsc->var_cnt; i++)
+ for (i = 0; i < dsc->var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
- double x = case_num (&c, dv->v->fv);
-
- if (dsc->missing_type != DSC_LISTWISE
- && (x == SYSMIS
- || (!dsc->include_user_missing
- && mv_is_num_user_missing (&dv->v->miss, x))))
+ double x = case_num (&c, dv->v);
+
+ if (var_is_num_missing (dv->v, x, dsc->exclude))
continue;
if (dv->moments != NULL)
moments_pass_two (dv->moments, x, weight);
}
}
- casereader_destroy (reader);
+ if (!casereader_destroy (pass2))
+ return;
}
-
+
/* Calculate results. */
for (i = 0; i < dsc->var_cnt; i++)
{
if (dsc->calc_stats & (1ul << DSC_STDDEV)
&& dv->stats[DSC_VARIANCE] != SYSMIS)
dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]);
- if (dsc->calc_stats & (1ul << DSC_SEKURT))
+ if (dsc->calc_stats & (1ul << DSC_SEKURT))
if (dv->stats[DSC_KURTOSIS] != SYSMIS)
dv->stats[DSC_SEKURT] = calc_sekurt (W);
if (dsc->calc_stats & (1ul << DSC_SESKEW)
/* Output results. */
display (dsc);
-
- return true;
}
-/* Returns nonzero if any of the descriptives variables in DSC's
- variable list have missing values in case C, zero otherwise. */
-static int
-listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
+/* Returns true if any of the descriptives variables in DSC's
+ variable list have missing values in case C, false otherwise. */
+static bool
+listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
{
size_t i;
for (i = 0; i < dsc->var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
- double x = case_num (c, dv->v->fv);
+ double x = case_num (c, dv->v);
- if (x == SYSMIS
- || (!dsc->include_user_missing
- && mv_is_num_user_missing (&dv->v->miss, x)))
- return 1;
+ if (var_is_num_missing (dv->v, x, dsc->exclude))
+ return true;
}
- return 0;
+ return false;
}
\f
/* Statistical display. */
size_t j;
nc = 0;
- tab_text (t, nc++, i + 1, TAB_LEFT, dv->v->name);
+ tab_text (t, nc++, i + 1, TAB_LEFT, var_get_name (dv->v));
tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->valid);
if (dsc->format == DSC_SERIAL)
tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->missing);
+
for (j = 0; j < DSC_N_STATS; j++)
if (dsc->show_stats & (1ul << j))
- tab_float (t, nc++, i + 1, TAB_NONE, dv->stats[j], 10, 3);
+ tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL);
}
tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."),
/* Compares `struct dsc_var's A and B according to the ordering
specified by CMD. */
static int
-descriptives_compare_dsc_vars (const void *a_, const void *b_, void *dsc_)
+descriptives_compare_dsc_vars (const void *a_, const void *b_, const void *dsc_)
{
const struct dsc_var *a = a_;
const struct dsc_var *b = b_;
- struct dsc_proc *dsc = dsc_;
+ const struct dsc_proc *dsc = dsc_;
int result;
if (dsc->sort_by_stat == DSC_NAME)
- result = strcasecmp (a->v->name, b->v->name);
- else
+ result = strcasecmp (var_get_name (a->v), var_get_name (b->v));
+ else
{
double as = a->stats[dsc->sort_by_stat];
double bs = b->stats[dsc->sort_by_stat];