From: Ben Pfaff Date: Tue, 30 Mar 2004 00:52:36 +0000 (+0000) Subject: Add multipass procedures. Add two-pass moments calculation. Rewrite X-Git-Tag: v0.4.0~299 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=18de82b59d34f4936aa5557da9f35ea189f2d561;p=pspp-builds.git Add multipass procedures. Add two-pass moments calculation. Rewrite DESCRIPTIVES as a multipass procedure to use two-pass moments. Fix some memory leaks. --- diff --git a/src/descript.c b/src/descript.c new file mode 100644 index 00000000..55b1d37c --- /dev/null +++ b/src/descript.c @@ -0,0 +1,885 @@ +/* PSPP - computes sample statistics. + Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Written by Ben Pfaff . + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + 02111-1307, USA. */ + +/* FIXME: Many possible optimizations. */ + +#include +#include "error.h" +#include +#include +#include +#include "algorithm.h" +#include "alloc.h" +#include "casefile.h" +#include "command.h" +#include "lexer.h" +#include "error.h" +#include "magic.h" +#include "moments.h" +#include "som.h" +#include "tab.h" +#include "var.h" +#include "vfm.h" + +/* DESCRIPTIVES private data. */ + +/* Describes properties of a distribution for the purpose of + calculating a Z-score. */ +struct dsc_z_score + { + int src_idx; /* Source index into case data. */ + int dst_idx; /* Destination index into case data. */ + double mean; /* Distribution mean. */ + double std_dev; /* Distribution standard deviation. */ + }; + +/* DESCRIPTIVES transformation (for calculating Z-scores). */ +struct dsc_trns + { + struct trns_header h; + struct dsc_z_score *z_scores; /* Array of Z-scores. */ + int z_score_cnt; /* Number of Z-scores. */ + }; + +/* Statistics. Used as bit indexes, so must be 32 or fewer. */ +enum dsc_statistic + { + DSC_MEAN = 0, DSC_SEMEAN, DSC_STDDEV, DSC_VARIANCE, DSC_KURTOSIS, + DSC_SEKURT, DSC_SKEWNESS, DSC_SESKEW, DSC_RANGE, DSC_MIN, + DSC_MAX, DSC_SUM, DSC_N_STATS, + + /* Only valid as sort criteria. */ + DSC_NAME = -2, /* Sort by name. */ + DSC_NONE = -1 /* Unsorted. */ + }; + +/* Describes one statistic. */ +struct dsc_statistic_info + { + const char *identifier; /* Identifier. */ + const char *name; /* Full name. */ + enum moment moment; /* Highest moment needed to calculate. */ + }; + +/* Table of statistics, indexed by DSC_*. */ +static const struct dsc_statistic_info dsc_info[DSC_N_STATS] = + { + {"MEAN", N_("Mean"), MOMENT_MEAN}, + {"SEMEAN", N_("S E Mean"), MOMENT_VARIANCE}, + {"STDDEV", N_("Std Dev"), MOMENT_VARIANCE}, + {"VARIANCE", N_("Variance"), MOMENT_VARIANCE}, + {"KURTOSIS", N_("Kurtosis"), MOMENT_KURTOSIS}, + {"SEKURTOSIS", N_("S E Kurt"), MOMENT_NONE}, + {"SKEWNESS", N_("Skewness"), MOMENT_SKEWNESS}, + {"SESKEWNESS", N_("S E Skew"), MOMENT_NONE}, + {"RANGE", N_("Range"), MOMENT_NONE}, + {"MINIMUM", N_("Minimum"), MOMENT_NONE}, + {"MAXIMUM", N_("Maximum"), MOMENT_NONE}, + {"SUM", N_("Sum"), MOMENT_MEAN}, + }; + +/* Statistics calculated by default if none are explicitly + requested. */ +#define DEFAULT_STATS \ + ((1ul << DSC_MEAN) | (1ul << DSC_STDDEV) | (1ul << DSC_MIN) \ + | (1ul << DSC_MAX)) + +/* A variable specified on DESCRIPTIVES. */ +struct dsc_var + { + struct variable *v; /* Variable to calculate on. */ + char z_name[9]; /* Name for z-score variable. */ + double valid, missing; /* Valid, missing counts. */ + struct moments *moments; /* Moments. */ + double min, max; /* Maximum and mimimum values. */ + double stats[DSC_N_STATS]; /* All the stats' values. */ + }; + +/* Handling of missing values. */ +enum dsc_missing_type + { + DSC_VARIABLE, /* Handle missing values on a per-variable basis. */ + DSC_LISTWISE /* Discard entire case if any variable is missing. */ + }; + +/* Output format. */ +enum dsc_format + { + DSC_LINE, /* Abbreviated format. */ + DSC_SERIAL /* Long format. */ + }; + +/* A DESCRIPTIVES procedure. */ +struct dsc_proc + { + /* Per-variable info. */ + struct dsc_var *vars; /* Variables. */ + size_t var_cnt; /* Number of variables. */ + + /* User options. */ + enum dsc_missing_type missing_type; /* Treatment of missing values. */ + int include_user_missing; /* Nonzero to include user-missing values. */ + int show_var_labels; /* Nonzero to show variable labels. */ + int show_index; /* Nonzero to show variable index. */ + enum dsc_format format; /* Output format. */ + + /* Accumulated results. */ + double missing_listwise; /* Sum of weights of cases missing listwise. */ + double valid; /* Sum of weights of valid cases. */ + int bad_weight; /* Nonzero if a bad weight has been found. */ + enum dsc_statistic sort_by_stat; /* Statistic to sort by; -1: name. */ + int sort_ascending; /* !0: ascending order; 0: descending. */ + unsigned long show_stats; /* Statistics to display. */ + unsigned long calc_stats; /* Statistics to calculate. */ + enum moment max_moment; /* Highest moment needed for stats. */ + }; + +/* Parsing. */ +static enum dsc_statistic match_statistic (void); +static void free_dsc_proc (struct dsc_proc *); + +/* Z-score functions. */ +static int try_name (struct dsc_proc *dsc, char *name); +static int generate_z_varname (struct dsc_proc *dsc, char *z_name, + const char *name, int *z_cnt); +static void dump_z_table (struct dsc_proc *); +static void setup_z_trns (struct dsc_proc *); + +/* Procedure execution functions. */ +static void calc_descriptives (const struct casefile *, void *dsc_); +static void display (struct dsc_proc *dsc); + +/* Parser and outline. */ + +/* Handles DESCRIPTIVES. */ +int +cmd_descriptives (void) +{ + struct dsc_proc *dsc; + struct variable **vars = NULL; + int var_cnt = 0; + int save_z_scores = 0; + int z_cnt = 0; + int i; + + /* Create and initialize dsc. */ + dsc = xmalloc (sizeof *dsc); + dsc->vars = NULL; + dsc->var_cnt = 0; + dsc->missing_type = DSC_VARIABLE; + dsc->include_user_missing = 0; + dsc->show_var_labels = 1; + dsc->show_index = 0; + dsc->format = DSC_LINE; + dsc->missing_listwise = 0.; + dsc->valid = 0.; + dsc->bad_weight = 0; + dsc->sort_by_stat = DSC_NONE; + dsc->sort_ascending = 1; + dsc->show_stats = dsc->calc_stats = DEFAULT_STATS; + + /* Parse DESCRIPTIVES. */ + while (token != '.') + { + if (lex_match_id ("MISSING")) + { + lex_match ('='); + while (token != '.' && token != '/') + { + if (lex_match_id ("VARIABLE")) + dsc->missing_type = DSC_VARIABLE; + else if (lex_match_id ("LISTWISE")) + dsc->missing_type = DSC_LISTWISE; + else if (lex_match_id ("INCLUDE")) + dsc->include_user_missing = 1; + else + { + lex_error (NULL); + goto error; + } + lex_match (','); + } + } + else if (lex_match_id ("SAVE")) + save_z_scores = 1; + else if (lex_match_id ("FORMAT")) + { + lex_match ('='); + while (token != '.' && token != '/') + { + if (lex_match_id ("LABELS")) + dsc->show_var_labels = 1; + else if (lex_match_id ("NOLABELS")) + dsc->show_var_labels = 0; + else if (lex_match_id ("INDEX")) + dsc->show_index = 1; + else if (lex_match_id ("NOINDEX")) + dsc->show_index = 0; + else if (lex_match_id ("LINE")) + dsc->format = DSC_LINE; + else if (lex_match_id ("SERIAL")) + dsc->format = DSC_SERIAL; + else + { + lex_error (NULL); + goto error; + } + lex_match (','); + } + } + else if (lex_match_id ("STATISTICS")) + { + lex_match ('='); + dsc->show_stats = 0; + while (token != '.' && token != '/') + { + if (lex_match (T_ALL)) + dsc->show_stats |= (1ul << DSC_N_STATS) - 1; + else if (lex_match_id ("DEFAULT")) + dsc->show_stats |= DEFAULT_STATS; + else + dsc->show_stats |= 1ul << (match_statistic ()); + lex_match (','); + } + if (dsc->show_stats == 0) + dsc->show_stats = DEFAULT_STATS; + } + else if (lex_match_id ("SORT")) + { + lex_match ('='); + if (lex_match_id ("NAME")) + dsc->sort_by_stat = DSC_NAME; + else + dsc->sort_by_stat = match_statistic (); + if (lex_match ('(')) + { + if (lex_match_id ("A")) + dsc->sort_ascending = 1; + else if (lex_match_id ("D")) + dsc->sort_ascending = 0; + else + lex_error (NULL); + lex_force_match (')'); + } + } + else if (var_cnt == 0) + { + if (lex_look_ahead () == '=') + { + lex_match_id ("VARIABLES"); + lex_match ('='); + } + + while (token != '.' && token != '/') + { + int i; + + if (!parse_variables (default_dict, &vars, &var_cnt, + PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC)) + break; + + dsc->vars = xrealloc (dsc->vars, sizeof *dsc->vars * var_cnt); + for (i = dsc->var_cnt; i < var_cnt; i++) + { + struct dsc_var *dv = &dsc->vars[i]; + dv->v = vars[i]; + dv->z_name[0] = '\0'; + dv->moments = NULL; + } + dsc->var_cnt = var_cnt; + + if (lex_match ('(')) + { + if (token != T_ID) + { + lex_error (NULL); + break; + } + if (try_name (dsc, tokid)) + { + strcpy (dsc->vars[dsc->var_cnt - 1].z_name, tokid); + z_cnt++; + } + else + msg (SE, _("Z-score variable name %s would be" + "a duplicate variable name."), tokid); + lex_get (); + lex_force_match (')'); + } + } + } + else + { + lex_error (NULL); + break; + } + + lex_match ('/'); + } + if (var_cnt == 0) + { + msg (SE, _("No variables specified.")); + goto error; + } + + /* Construct z-score varnames, show translation table. */ + if (z_cnt || save_z_scores) + { + if (save_z_scores) + { + int gen_cnt = 0; + + for (i = 0; i < dsc->var_cnt; i++) + if (dsc->vars[i].z_name[0] == 0) + { + if (!generate_z_varname (dsc, dsc->vars[i].z_name, + dsc->vars[i].v->name, &gen_cnt)) + goto error; + z_cnt++; + } + } + dump_z_table (dsc); + } + + /* Figure out statistics to display. */ + if (dsc->show_stats & (1ul << DSC_SKEWNESS)) + dsc->show_stats |= 1ul << DSC_SESKEW; + if (dsc->show_stats & (1ul << DSC_KURTOSIS)) + dsc->show_stats |= 1ul << DSC_SEKURT; + + /* Figure out which statistics to calculate. */ + dsc->calc_stats = dsc->show_stats; + if (z_cnt > 0) + dsc->calc_stats |= (1ul << DSC_MEAN) | (1ul << DSC_STDDEV); + if (dsc->sort_by_stat >= 0) + dsc->calc_stats |= 1ul << dsc->sort_by_stat; + if (dsc->show_stats & (1ul << DSC_SESKEW)) + dsc->calc_stats |= 1ul << DSC_SKEWNESS; + if (dsc->show_stats & (1ul << DSC_SEKURT)) + dsc->calc_stats |= 1ul << DSC_KURTOSIS; + + /* Figure out maximum moment needed and allocate moments for + the variables. */ + dsc->max_moment = MOMENT_NONE; + for (i = 0; i < DSC_N_STATS; i++) + if (dsc->calc_stats & (1ul << i) && dsc_info[i].moment > dsc->max_moment) + dsc->max_moment = dsc_info[i].moment; + if (dsc->max_moment != MOMENT_NONE) + for (i = 0; i < dsc->var_cnt; i++) + dsc->vars[i].moments = moments_create (dsc->max_moment); + + /* Data pass. */ + multipass_procedure_with_splits (calc_descriptives, dsc); + if (dsc->bad_weight) + msg (SW, _("At least one case in the data file had a weight value " + "that was system-missing, zero, or negative. These case(s) " + "were ignored.")); + + /* Z-scoring! */ + if (z_cnt) + setup_z_trns (dsc); + + /* Done. */ + free_dsc_proc (dsc); + return CMD_SUCCESS; + + error: + free_dsc_proc (dsc); + return CMD_FAILURE; +} + +/* Returns the statistic named by the current token and skips + past the token. Emits an error if the current token does not + name a statistic. */ +static enum dsc_statistic +match_statistic (void) +{ + if (token == T_ID) + { + enum dsc_statistic stat; + + for (stat = 0; stat < DSC_N_STATS; stat++) + if (lex_match_id (dsc_info[stat].identifier)) + { + lex_get (); + return stat; + } + } + + lex_error (_("expecting statistic name")); + return DSC_MEAN; +} + +/* Frees DSC. */ +static void +free_dsc_proc (struct dsc_proc *dsc) +{ + size_t i; + + if (dsc == NULL) + return; + + for (i = 0; i < dsc->var_cnt; i++) + moments_destroy (dsc->vars[i].moments); + free (dsc->vars); + free (dsc); +} + +/* Z scores. */ + +/* Returns 0 if NAME is a duplicate of any existing variable name or + of any previously-declared z-var name; otherwise returns 1. */ +static int +try_name (struct dsc_proc *dsc, char *name) +{ + int i; + + if (dict_lookup_var (default_dict, name) != NULL) + return 0; + for (i = 0; i < dsc->var_cnt; i++) + if (!strcmp (dsc->vars[i].z_name, name)) + return 0; + return 1; +} + +/* Generates a name for a Z-score variable based on a variable + named VAR_NAME, given that *Z_CNT generated variable names are + known to already exist. If successful, returns nonzero and + copies the new name into Z_NAME. On failure, returns zero. */ +static int +generate_z_varname (struct dsc_proc *dsc, char *z_name, + const char *var_name, int *z_cnt) +{ + char name[10]; + + /* Try a name based on the original variable name. */ + name[0] = 'Z'; + strcpy (name + 1, var_name); + name[8] = '\0'; + if (try_name (dsc, name)) + { + strcpy (z_name, name); + return 1; + } + + /* Generate a synthetic name. */ + for (;;) + { + (*z_cnt)++; + + if (*z_cnt <= 99) + sprintf (name, "ZSC%03d", *z_cnt); + else if (*z_cnt <= 108) + sprintf (name, "STDZ%02d", *z_cnt - 99); + else if (*z_cnt <= 117) + sprintf (name, "ZZZZ%02d", *z_cnt - 108); + else if (*z_cnt <= 126) + sprintf (name, "ZQZQ%02d", *z_cnt - 117); + else + { + msg (SE, _("Ran out of generic names for Z-score variables. " + "There are only 126 generic names: ZSC001-ZSC0999, " + "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09.")); + return 0; + } + + if (try_name (dsc, name)) + { + strcpy (z_name, name); + return 1; + } + } +} + +/* Outputs a table describing the mapping between source + variables and Z-score variables. */ +static void +dump_z_table (struct dsc_proc *dsc) +{ + int cnt = 0; + struct tab_table *t; + + { + int i; + + for (i = 0; i < dsc->var_cnt; i++) + if (dsc->vars[i].z_name[0] != '\0') + cnt++; + } + + t = tab_create (2, cnt + 1, 0); + tab_title (t, 0, _("Mapping of variables to corresponding Z-scores.")); + tab_columns (t, SOM_COL_DOWN, 1); + tab_headers (t, 0, 0, 1, 0); + tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, cnt); + tab_hline (t, TAL_2, 0, 1, 1); + tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source")); + tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target")); + tab_dim (t, tab_natural_dimensions); + + { + int i, y; + + for (i = 0, y = 1; i < dsc->var_cnt; i++) + if (dsc->vars[i].z_name[0] != '\0') + { + tab_text (t, 0, y, TAB_LEFT, dsc->vars[i].v->name); + tab_text (t, 1, y++, TAB_LEFT, dsc->vars[i].z_name); + } + } + + tab_submit (t); +} + +/* Transformation function to calculate Z-scores. */ +static int +descriptives_trns_proc (struct trns_header *trns, struct ccase * c, + int case_num UNUSED) +{ + struct dsc_trns *t = (struct dsc_trns *) trns; + struct dsc_z_score *z; + + for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++) + { + double score = c->data[z->src_idx].f; + + if (z->mean == SYSMIS || score == SYSMIS) + c->data[z->dst_idx].f = SYSMIS; + else + c->data[z->dst_idx].f = (score - z->mean) / z->std_dev; + } + return -1; +} + +/* Frees a descriptives_trns struct. */ +static void +descriptives_trns_free (struct trns_header * trns) +{ + struct dsc_trns *t = (struct dsc_trns *) trns; + + free (t->z_scores); +} + +/* Sets up a transformation to calculate Z scores. */ +static void +setup_z_trns (struct dsc_proc *dsc) +{ + struct dsc_trns *t; + int cnt, i; + + for (cnt = i = 0; i < dsc->var_cnt; i++) + if (dsc->vars[i].z_name[0] != '\0') + cnt++; + + t = xmalloc (sizeof *t); + t->h.proc = descriptives_trns_proc; + t->h.free = descriptives_trns_free; + t->z_scores = xmalloc (cnt * sizeof *t->z_scores); + t->z_score_cnt = cnt; + + for (cnt = i = 0; i < dsc->var_cnt; i++) + { + struct dsc_var *dv = &dsc->vars[i]; + if (dv->z_name[0] != '\0') + { + struct dsc_z_score *z; + char *cp; + struct variable *dst_var; + + dst_var = dict_create_var_assert (default_dict, dv->z_name, 0); + dst_var->init = 0; + if (dv->v->label) + { + dst_var->label = xmalloc (strlen (dv->v->label) + 12); + cp = stpcpy (dst_var->label, _("Z-score of ")); + strcpy (cp, dv->v->label); + } + else + { + dst_var->label = xmalloc (strlen (dv->v->name) + 12); + cp = stpcpy (dst_var->label, _("Z-score of ")); + strcpy (cp, dv->v->name); + } + + z = &t->z_scores[cnt++]; + z->src_idx = dv->v->fv; + z->dst_idx = dst_var->fv; + z->mean = dv->stats[DSC_MEAN]; + z->std_dev = dv->stats[DSC_STDDEV]; + } + } + + add_transformation ((struct trns_header *) t); +} + +/* Statistical calculation. */ + +static int listwise_missing (struct dsc_proc *dsc, const struct ccase *c); + +/* Calculates and displays descriptive statistics for the cases + in CF. */ +static void +calc_descriptives (const struct casefile *cf, void *dsc_) +{ + struct dsc_proc *dsc = dsc_; + struct casereader *reader; + const struct ccase *c; + int i; + + for (i = 0; i < dsc->var_cnt; i++) + { + struct dsc_var *dv = &dsc->vars[i]; + + dv->valid = dv->missing = 0.0; + if (dv->moments != NULL) + moments_clear (dv->moments); + dv->min = DBL_MAX; + dv->max = -DBL_MAX; + } + dsc->missing_listwise = 0.; + dsc->valid = 0.; + + /* First pass to handle most of the work. */ + reader = casefile_get_reader (cf); + while (casereader_read (reader, &c)) + { + double weight = dict_get_case_weight (default_dict, c); + if (weight <= 0.0) + { + dsc->bad_weight = 1; + continue; + } + + /* Check for missing values. */ + if (listwise_missing (dsc, c)) + { + dsc->missing_listwise += weight; + if (dsc->missing_type == DSC_LISTWISE) + continue; + } + dsc->valid += weight; + + for (i = 0; i < dsc->var_cnt; i++) + { + struct dsc_var *dv = &dsc->vars[i]; + double x = c->data[dv->v->fv].f; + + if (dsc->missing_type != DSC_LISTWISE + && (x == SYSMIS + || (!dsc->include_user_missing + && is_num_user_missing (x, dv->v)))) + { + dv->missing += weight; + continue; + } + + if (dv->moments != NULL) + moments_pass_one (dv->moments, x, weight); + if (x < dv->min) + dv->min = x; + if (x > dv->max) + dv->max = x; + } + } + casereader_destroy (reader); + + /* Second pass for higher-order moments. */ + if (dsc->max_moment > MOMENT_MEAN) + { + reader = casefile_get_reader (cf); + while (casereader_read (reader, &c)) + { + double weight = dict_get_case_weight (default_dict, c); + if (weight <= 0.0) + continue; + + /* Check for missing values. */ + if (listwise_missing (dsc, c) + && dsc->missing_type == DSC_LISTWISE) + continue; + + for (i = 0; i < dsc->var_cnt; i++) + { + struct dsc_var *dv = &dsc->vars[i]; + double x = c->data[dv->v->fv].f; + + if (dsc->missing_type != DSC_LISTWISE + && (x == SYSMIS + || (!dsc->include_user_missing + && is_num_user_missing (x, dv->v)))) + continue; + + if (dv->moments != NULL) + moments_pass_two (dv->moments, x, weight); + } + } + casereader_destroy (reader); + } + + /* Calculate results. */ + for (i = 0; i < dsc->var_cnt; i++) + { + struct dsc_var *dv = &dsc->vars[i]; + double W; + int j; + + for (j = 0; j < DSC_N_STATS; j++) + dv->stats[j] = SYSMIS; + + dv->valid = W = dsc->valid - dv->missing; + + if (dv->moments != NULL) + moments_calculate (dv->moments, NULL, + &dv->stats[DSC_MEAN], &dv->stats[DSC_VARIANCE], + &dv->stats[DSC_SKEWNESS], &dv->stats[DSC_KURTOSIS]); + if (dsc->calc_stats & (1ul << DSC_SEMEAN) + && dv->stats[DSC_VARIANCE] != SYSMIS && W > 0.) + dv->stats[DSC_SEMEAN] = sqrt (dv->stats[DSC_VARIANCE]) / sqrt (W); + if (dsc->calc_stats & (1ul << DSC_STDDEV) + && dv->stats[DSC_VARIANCE] != SYSMIS) + dv->stats[DSC_STDDEV] = sqrt (dv->stats[DSC_VARIANCE]); + if (dsc->calc_stats & (1ul << DSC_SEKURT)) + if (dv->stats[DSC_KURTOSIS] != SYSMIS) + dv->stats[DSC_SEKURT] = calc_sekurt (W); + if (dsc->calc_stats & (1ul << DSC_SESKEW) + && dv->stats[DSC_SKEWNESS] != SYSMIS) + dv->stats[DSC_SESKEW] = calc_seskew (W); + dv->stats[DSC_RANGE] = ((dv->min == DBL_MAX || dv->max == -DBL_MAX) + ? SYSMIS : dv->max - dv->min); + dv->stats[DSC_MIN] = dv->min == DBL_MAX ? SYSMIS : dv->min; + dv->stats[DSC_MAX] = dv->max == -DBL_MAX ? SYSMIS : dv->max; + if (dsc->calc_stats & (1ul << DSC_SUM)) + dv->stats[DSC_SUM] = W * dv->stats[DSC_MEAN]; + } + + /* Output results. */ + display (dsc); +} + +/* Returns nonzero if any of the descriptives variables in DSC's + variable list have missing values in case C, zero otherwise. */ +static int +listwise_missing (struct dsc_proc *dsc, const struct ccase *c) +{ + int i; + + for (i = 0; i < dsc->var_cnt; i++) + { + struct dsc_var *dv = &dsc->vars[i]; + double x = c->data[dv->v->fv].f; + + if (x == SYSMIS + || (!dsc->include_user_missing && is_num_user_missing (x, dv->v))) + return 1; + } + return 0; +} + +/* Statistical display. */ + +static algo_compare_func descriptives_compare_dsc_vars; + +/* Displays a table of descriptive statistics for DSC. */ +static void +display (struct dsc_proc *dsc) +{ + int i, j; + int nc; + struct tab_table *t; + + nc = 1 + (dsc->format == DSC_SERIAL ? 2 : 1); + for (i = 0; i < DSC_N_STATS; i++) + if (dsc->show_stats & (1ul << i)) + nc++; + + if (dsc->sort_by_stat != DSC_NONE) + sort (dsc->vars, dsc->var_cnt, sizeof *dsc->vars, + descriptives_compare_dsc_vars, dsc); + + t = tab_create (nc, dsc->var_cnt + 1, 0); + tab_headers (t, 1, 0, 1, 0); + tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, dsc->var_cnt); + tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, dsc->var_cnt); + tab_hline (t, TAL_2, 0, nc - 1, 1); + tab_vline (t, TAL_2, 1, 0, dsc->var_cnt); + tab_dim (t, tab_natural_dimensions); + + nc = 0; + tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable")); + if (dsc->format == DSC_SERIAL) + { + tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N")); + tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N")); + } + else + tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N"); + + for (i = 0; i < DSC_N_STATS; i++) + if (dsc->show_stats & (1ul << i)) + { + const char *title = gettext (dsc_info[i].name); + tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title); + } + + for (i = 0; i < dsc->var_cnt; i++) + { + struct dsc_var *dv = &dsc->vars[i]; + + nc = 0; + tab_text (t, nc++, i + 1, TAB_LEFT, dv->v->name); + tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->valid); + if (dsc->format == DSC_SERIAL) + tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->missing); + for (j = 0; j < DSC_N_STATS; j++) + if (dsc->show_stats & (1ul << j)) + tab_float (t, nc++, i + 1, TAB_NONE, dv->stats[j], 10, 3); + } + + tab_title (t, 1, _("Valid cases = %g; cases with missing value(s) = %g."), + dsc->valid, dsc->missing_listwise); + + tab_submit (t); +} + +/* Compares `struct dsc_var's A and B according to the ordering + specified by CMD. */ +static int +descriptives_compare_dsc_vars (const void *a_, const void *b_, void *dsc_) +{ + const struct dsc_var *a = a_; + const struct dsc_var *b = b_; + struct dsc_proc *dsc = dsc_; + + int result; + + if (dsc->sort_by_stat == DSC_NAME) + result = strcmp (a->v->name, b->v->name); + else + { + double as = a->stats[dsc->sort_by_stat]; + double bs = b->stats[dsc->sort_by_stat]; + + result = as < bs ? -1 : as > bs; + } + + if (!dsc->sort_ascending) + result = -result; + + return result; +}