From 6a8d0d2d8bf801c21d71d6c3317ab97d110175cf Mon Sep 17 00:00:00 2001 From: John Darrington Date: Thu, 20 Jun 2019 11:10:00 +0200 Subject: [PATCH] Re-implement MEANS. Fixes bug #44264. --- NEWS | 2 + Smake | 1 + doc/statistics.texi | 6 +- src/language/stats/automake.mk | 3 + src/language/stats/means-calc.c | 451 +++++++ src/language/stats/means-parser.c | 305 +++++ src/language/stats/means.c | 1812 ++++++++++++++--------------- src/language/stats/means.h | 163 +++ tests/language/stats/means.at | 1205 +++++++++++++------ 9 files changed, 2619 insertions(+), 1329 deletions(-) create mode 100644 src/language/stats/means-calc.c create mode 100644 src/language/stats/means-parser.c create mode 100644 src/language/stats/means.h diff --git a/NEWS b/NEWS index b2fbdc60c7..800ff8f1e7 100644 --- a/NEWS +++ b/NEWS @@ -28,6 +28,8 @@ Changes from 1.2.0 to 1.3.0: be used to save the cases' cluster membership and/or their distance from the cluster centre to the active file. + * The MEANS command has been re-implemented. + * A bug where the GUI would crash when T-TEST was executed whilst a filter was set has been fixed. diff --git a/Smake b/Smake index 5669ad079f..abe67b3db1 100644 --- a/Smake +++ b/Smake @@ -37,6 +37,7 @@ GNULIB_MODULES = \ close \ configmake \ count-one-bits \ + count-leading-zeros \ crc \ crypto/md4 \ crypto/rijndael \ diff --git a/doc/statistics.texi b/doc/statistics.texi index 89ab28e1a6..522ef4d310 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -1097,7 +1097,7 @@ MEANS [TABLES =] [ALL] [NONE] ] - [/MISSING = [TABLE] [INCLUDE] [DEPENDENT]] + [/MISSING = [INCLUDE] [DEPENDENT]] @end display You can use the @cmd{MEANS} command to calculate the arithmetic mean and similar @@ -1201,10 +1201,6 @@ encountered. This behaviour can be modified with the @subcmd{/MISSING} subcommand. Three options are possible: @subcmd{TABLE}, @subcmd{INCLUDE} and @subcmd{DEPENDENT}. -@subcmd{/MISSING = TABLE} causes cases to be dropped if any variable is missing -in the table specification currently being processed, regardless of -whether it is needed to calculate the statistic. - @subcmd{/MISSING = INCLUDE} says that user missing values, either in the dependent variables or in the categorical variables should be taken at their face value, and not excluded. diff --git a/src/language/stats/automake.mk b/src/language/stats/automake.mk index 33d9cd4c62..aa385529eb 100644 --- a/src/language/stats/automake.mk +++ b/src/language/stats/automake.mk @@ -54,6 +54,9 @@ language_stats_sources = \ src/language/stats/mann-whitney.c \ src/language/stats/mann-whitney.h \ src/language/stats/means.c \ + src/language/stats/means.h \ + src/language/stats/means-calc.c \ + src/language/stats/means-parser.c \ src/language/stats/mcnemar.c \ src/language/stats/mcnemar.h \ src/language/stats/median.c \ diff --git a/src/language/stats/means-calc.c b/src/language/stats/means-calc.c new file mode 100644 index 0000000000..7e5142fd78 --- /dev/null +++ b/src/language/stats/means-calc.c @@ -0,0 +1,451 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2011, 2012, 2013, 2019 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "data/case.h" +#include "data/format.h" +#include "data/variable.h" + +#include "libpspp/bt.h" +#include "libpspp/hmap.h" +#include "libpspp/misc.h" +#include "libpspp/pool.h" + +#include "math/moments.h" +#include "output/pivot-table.h" + +#include + +#include "means.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) (msgid) + +struct statistic +{ +}; + +struct statistic_simple +{ + struct statistic parent; + double acc; +}; + +struct statistic_moment +{ + struct statistic parent; + struct moments1 *mom; +}; + +static struct statistic * +default_create (struct pool *pool) +{ + struct statistic_moment *pvd = pool_alloc (pool, sizeof *pvd); + + pvd->mom = moments1_create (MOMENT_KURTOSIS); + + return (struct statistic *) pvd; +} + +static void +default_update (struct statistic *stat, double w, double x) +{ + struct statistic_moment *pvd = (struct statistic_moment *) stat; + + moments1_add (pvd->mom, x, w); +} + +static void +default_destroy (struct statistic *stat) +{ + struct statistic_moment *pvd = (struct statistic_moment *) stat; + moments1_destroy (pvd->mom); +} + +static void +simple_destroy (struct statistic *stat UNUSED) +{ +} + + + +/* HARMONIC MEAN: The reciprocal of the sum of the reciprocals: + 1 / ( 1/(x_0) + 1/(x_1) + ... + 1/(x_{n-1}) ) */ + +struct harmonic_mean +{ + struct statistic parent; + double rsum; + double n; +}; + +static struct statistic * +harmonic_create (struct pool *pool) +{ + struct harmonic_mean *hm = pool_alloc (pool, sizeof *hm); + + hm->rsum = 0; + hm->n = 0; + + return (struct statistic *) hm; +} + + +static void +harmonic_update (struct statistic *stat, double w, double x) +{ + struct harmonic_mean *hm = (struct harmonic_mean *) stat; + hm->rsum += w / x; + hm->n += w; +} + + +static double +harmonic_get (const struct statistic *pvd) +{ + const struct harmonic_mean *hm = (const struct harmonic_mean *) pvd; + + return hm->n / hm->rsum; +} + + + +/* GEOMETRIC MEAN: The nth root of the product of all n observations + pow ((x_0 * x_1 * ... x_{n - 1}), 1/n) */ +struct geometric_mean +{ + struct statistic parent; + double prod; + double n; +}; + +static struct statistic * +geometric_create (struct pool *pool) +{ + struct geometric_mean *gm = pool_alloc (pool, sizeof *gm); + + gm->prod = 1.0; + gm->n = 0; + + return (struct statistic *) gm; +} + +static void +geometric_update (struct statistic *pvd, double w, double x) +{ + struct geometric_mean *gm = (struct geometric_mean *)pvd; + gm->prod *= pow (x, w); + gm->n += w; +} + + +static double +geometric_get (const struct statistic *pvd) +{ + const struct geometric_mean *gm = (const struct geometric_mean *)pvd; + return pow (gm->prod, 1.0 / gm->n); +} + + + +static double +sum_get (const struct statistic *pvd) +{ + double n, mean; + + moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, &mean, 0, 0, 0); + + return mean * n; +} + + +static double +n_get (const struct statistic *pvd) +{ + double n; + + moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, 0, 0, 0, 0); + + return n; +} + +static double +arithmean_get (const struct statistic *pvd) +{ + double n, mean; + + moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, &mean, 0, 0, 0); + + return mean; +} + +static double +variance_get (const struct statistic *pvd) +{ + double n, mean, variance; + + moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, &mean, &variance, 0, 0); + + return variance; +} + + +static double +stddev_get (const struct statistic *pvd) +{ + return sqrt (variance_get (pvd)); +} + + + + +static double +skew_get (const struct statistic *pvd) +{ + double skew; + + moments1_calculate (((struct statistic_moment *)pvd)->mom, NULL, NULL, NULL, &skew, 0); + + return skew; +} + +static double +sekurt_get (const struct statistic *pvd) +{ + double n; + + moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, NULL, NULL, NULL, NULL); + + return calc_sekurt (n); +} + +static double +seskew_get (const struct statistic *pvd) +{ + double n; + + moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, NULL, NULL, NULL, NULL); + + return calc_seskew (n); +} + +static double +kurt_get (const struct statistic *pvd) +{ + double kurt; + + moments1_calculate (((struct statistic_moment *)pvd)->mom, NULL, NULL, NULL, NULL, &kurt); + + return kurt; +} + +static double +semean_get (const struct statistic *pvd) +{ + double n, var; + + moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, NULL, &var, NULL, NULL); + + return sqrt (var / n); +} + + + +/* MIN: The smallest (closest to minus infinity) value. */ + +static struct statistic * +min_create (struct pool *pool) +{ + struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd); + + pvd->acc = DBL_MAX; + + return (struct statistic *) pvd; +} + +static void +min_update (struct statistic *pvd, double w UNUSED, double x) +{ + double *r = &((struct statistic_simple *)pvd)->acc; + + if (x < *r) + *r = x; +} + +static double +min_get (const struct statistic *pvd) +{ + double *r = &((struct statistic_simple *)pvd)->acc; + + return *r; +} + +/* MAX: The largest (closest to plus infinity) value. */ + +static struct statistic * +max_create (struct pool *pool) +{ + struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd); + + pvd->acc = -DBL_MAX; + + return (struct statistic *) pvd; +} + +static void +max_update (struct statistic *pvd, double w UNUSED, double x) +{ + double *r = &((struct statistic_simple *)pvd)->acc; + + if (x > *r) + *r = x; +} + +static double +max_get (const struct statistic *pvd) +{ + double *r = &((struct statistic_simple *)pvd)->acc; + + return *r; +} + + + +struct range +{ + struct statistic parent; + double min; + double max; +}; + +static struct statistic * +range_create (struct pool *pool) +{ + struct range *r = pool_alloc (pool, sizeof *r); + + r->min = DBL_MAX; + r->max = -DBL_MAX; + + return (struct statistic *) r; +} + +static void +range_update (struct statistic *pvd, double w UNUSED, double x) +{ + struct range *r = (struct range *) pvd; + + if (x > r->max) + r->max = x; + + if (x < r->min) + r->min = x; +} + +static double +range_get (const struct statistic *pvd) +{ + const struct range *r = (struct range *) pvd; + + return r->max - r->min; +} + + + +/* LAST: The last value (the one closest to the end of the file). */ + +static struct statistic * +last_create (struct pool *pool) +{ + struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd); + + return (struct statistic *) pvd; +} + +static void +last_update (struct statistic *pvd, double w UNUSED, double x) +{ + struct statistic_simple *stat = (struct statistic_simple *) pvd; + + stat->acc = x; +} + +static double +last_get (const struct statistic *pvd) +{ + const struct statistic_simple *stat = (struct statistic_simple *) pvd; + + return stat->acc; +} + +/* FIRST: The first value (the one closest to the start of the file). */ + +static struct statistic * +first_create (struct pool *pool) +{ + struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd); + + pvd->acc = SYSMIS; + + return (struct statistic *) pvd; +} + +static void +first_update (struct statistic *pvd, double w UNUSED, double x) +{ + struct statistic_simple *stat = (struct statistic_simple *) pvd; + + if (stat->acc == SYSMIS) + stat->acc = x; +} + +static double +first_get (const struct statistic *pvd) +{ + const struct statistic_simple *stat = (struct statistic_simple *) pvd; + + return stat->acc; +} + +/* Table of cell_specs */ +const struct cell_spec cell_spec[n_MEANS_STATISTICS] = { + {N_("Mean"), "MEAN", PIVOT_RC_OTHER, default_create, default_update, arithmean_get, default_destroy}, + {N_("N"), "COUNT", PIVOT_RC_COUNT, default_create, default_update, n_get, default_destroy}, + {N_("Std. Deviation"), "STDDEV", PIVOT_RC_OTHER, default_create, default_update, stddev_get, default_destroy}, +#if 0 + {N_("Median"), "MEDIAN", PIVOT_RC_OTHER, default_create, default_update, NULL, default_destroy}, + {N_("Group Median"), "GMEDIAN", PIVOT_RC_OTHER, default_create, default_update, NULL, default_destroy}, +#endif + {N_("S.E. Mean"), "SEMEAN", PIVOT_RC_OTHER, default_create, default_update, semean_get, default_destroy}, + {N_("Sum"), "SUM", PIVOT_RC_OTHER, default_create, default_update, sum_get, default_destroy}, + {N_("Minimum"), "MIN", PIVOT_RC_OTHER, min_create, min_update, min_get, simple_destroy}, + {N_("Maximum"), "MAX", PIVOT_RC_OTHER, max_create, max_update, max_get, simple_destroy}, + {N_("Range"), "RANGE", PIVOT_RC_OTHER, range_create, range_update, range_get, simple_destroy}, + {N_("Variance"), "VARIANCE", PIVOT_RC_OTHER, default_create, default_update, variance_get, default_destroy}, + {N_("Kurtosis"), "KURT", PIVOT_RC_OTHER, default_create, default_update, kurt_get, default_destroy}, + {N_("S.E. Kurt"), "SEKURT", PIVOT_RC_OTHER, default_create, default_update, sekurt_get, default_destroy}, + {N_("Skewness"), "SKEW", PIVOT_RC_OTHER, default_create, default_update, skew_get, default_destroy}, + {N_("S.E. Skew"), "SESKEW", PIVOT_RC_OTHER, default_create, default_update, seskew_get, default_destroy}, + {N_("First"), "FIRST", PIVOT_RC_OTHER, first_create, first_update, first_get, simple_destroy}, + {N_("Last"), "LAST", PIVOT_RC_OTHER, last_create, last_update, last_get, simple_destroy}, +#if 0 + {N_("Percent N"), "NPCT", PIVOT_RC_PERCENT, default_create, default_update, NULL, default_destroy}, + {N_("Percent Sum"), "SPCT", PIVOT_RC_PERCENT, default_create, default_update, NULL, default_destroy}, +#endif + {N_("Harmonic Mean"), "HARMONIC", PIVOT_RC_OTHER, harmonic_create, harmonic_update, harmonic_get, simple_destroy}, + {N_("Geom. Mean"), "GEOMETRIC", PIVOT_RC_OTHER, geometric_create, geometric_update, geometric_get, simple_destroy} +}; diff --git a/src/language/stats/means-parser.c b/src/language/stats/means-parser.c new file mode 100644 index 0000000000..50e25d9c34 --- /dev/null +++ b/src/language/stats/means-parser.c @@ -0,0 +1,305 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2011, 2012, 2013, 2019 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "data/case.h" +#include "data/casegrouper.h" +#include "data/casereader.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/variable.h" + +#include "language/command.h" +#include "language/lexer/lexer.h" +#include "language/lexer/variable-parser.h" + +#include "libpspp/hmap.h" +#include "libpspp/bt.h" +#include "libpspp/misc.h" +#include "libpspp/pool.h" + +#include "means.h" + +/* Parse the /TABLES stanza of the command. */ +static bool +parse_means_table_syntax (struct lexer *lexer, const struct means *cmd, + struct mtable *table) +{ + memset (table, 0, sizeof *table); + + /* Dependent variable (s) */ + if (!parse_variables_const_pool (lexer, cmd->pool, cmd->dict, + &table->dep_vars, &table->n_dep_vars, + PV_NO_DUPLICATE | PV_NUMERIC)) + return false; + + /* Factor variable (s) */ + while (lex_match (lexer, T_BY)) + { + struct layer *layer = pool_zalloc (cmd->pool, sizeof *layer); + + table->layers = + pool_nrealloc (cmd->pool, table->layers, table->n_layers + 1, + sizeof *table->layers); + table->layers[table->n_layers] = layer; + table->n_layers++; + + if (!parse_variables_const_pool + (lexer, cmd->pool, cmd->dict, + &layer->factor_vars, + &layer->n_factor_vars, + PV_NO_DUPLICATE)) + return false; + } + + return true; +} + +/* Match a variable. + If the match succeeds, the variable will be placed in VAR. + Returns true if successful */ +static bool +lex_is_variable (struct lexer *lexer, const struct dictionary *dict, + int n) +{ + const char *tstr; + if (lex_next_token (lexer, n) != T_ID) + return false; + + tstr = lex_next_tokcstr (lexer, n); + + if (NULL == dict_lookup_var (dict, tstr) ) + return false; + + return true; +} + +static bool +means_parse (struct lexer *lexer, struct means *means) +{ + /* Optional TABLES = */ + if (lex_match_id (lexer, "TABLES")) + { + if (! lex_force_match (lexer, T_EQUALS)) + return false; + } + + bool more_tables = true; + /* Parse the "tables" */ + while (more_tables) + { + means->table = pool_realloc (means->pool, means->table, + (means->n_tables + 1) * sizeof (*means->table)); + + if (! parse_means_table_syntax (lexer, means, + &means->table[means->n_tables])) + { + return false; + } + means->n_tables ++; + + /* Look ahead to see if there are more tables to be parsed */ + more_tables = false; + if ( T_SLASH == lex_next_token (lexer, 0) ) + { + if (lex_is_variable (lexer, means->dict, 1) ) + { + more_tables = true; + lex_match (lexer, T_SLASH); + } + } + } + + /* /MISSING subcommand */ + while (lex_token (lexer) != T_ENDCMD) + { + lex_match (lexer, T_SLASH); + + if (lex_match_id (lexer, "MISSING")) + { + /* + If no MISSING subcommand is specified, each combination of + a dependent variable and categorical variables is handled + separately. + */ + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "INCLUDE")) + { + /* + Use the subcommand "/MISSING=INCLUDE" to include user-missing + values in the analysis. + */ + + means->ctrl_exclude = MV_SYSTEM; + means->dep_exclude = MV_SYSTEM; + } + else if (lex_match_id (lexer, "DEPENDENT")) + /* + Use the command "/MISSING=DEPENDENT" to + include user-missing values for the categorical variables, + while excluding them for the dependent variables. + + Cases are dropped only when user-missing values + appear in dependent variables. User-missing + values for categorical variables are treated according to + their face value. + + Cases are ALWAYS dropped when System Missing values appear + in the categorical variables. + */ + { + means->dep_exclude = MV_ANY; + means->ctrl_exclude = MV_SYSTEM; + } + else + { + lex_error (lexer, NULL); + return false; + } + } + else if (lex_match_id (lexer, "CELLS")) + { + lex_match (lexer, T_EQUALS); + + /* The default values become overwritten */ + means->n_statistics = 0; + pool_free (means->pool, means->statistics); + means->statistics = 0; + while (lex_token (lexer) != T_ENDCMD + && lex_token (lexer) != T_SLASH) + { + if (lex_match (lexer, T_ALL)) + { + pool_free (means->pool, means->statistics); + means->statistics = pool_calloc (means->pool, + n_MEANS_STATISTICS, + sizeof (*means->statistics)); + means->n_statistics = n_MEANS_STATISTICS; + int i; + for (i = 0; i < n_MEANS_STATISTICS; ++i) + { + means->statistics[i] = i; + } + } + else if (lex_match_id (lexer, "NONE")) + { + means->n_statistics = 0; + pool_free (means->pool, means->statistics); + means->statistics = 0; + } + else if (lex_match_id (lexer, "DEFAULT")) + { + pool_free (means->pool, means->statistics); + means->statistics = pool_calloc (means->pool, + 3, + sizeof *means->statistics); + means->statistics[0] = MEANS_MEAN; + means->statistics[1] = MEANS_N; + means->statistics[2] = MEANS_STDDEV; + } + else + { + int i; + for (i = 0; i < n_MEANS_STATISTICS; ++i) + { + const struct cell_spec *cs = cell_spec + i; + if (lex_match_id (lexer, cs->keyword)) + { + means->statistics + = pool_realloc (means->pool, + means->statistics, + (means->n_statistics + 1) + * sizeof (*means->statistics)); + + means->statistics[means->n_statistics] = i; + means->n_statistics++; + break; + } + } + + if (i >= n_MEANS_STATISTICS) + { + lex_error (lexer, NULL); + return false; + } + } + } + } + else + { + lex_error (lexer, NULL); + return false; + } + } + return true; +} + + +int +cmd_means (struct lexer *lexer, struct dataset *ds) +{ + struct means means; + means.pool = pool_create (); + + means.ctrl_exclude = MV_ANY; + means.dep_exclude = MV_ANY; + means.table = NULL; + means.n_tables = 0; + + means.dict = dataset_dict (ds); + + means.n_statistics = 3; + means.statistics = pool_calloc (means.pool, 3, sizeof *means.statistics); + means.statistics[0] = MEANS_MEAN; + means.statistics[1] = MEANS_N; + means.statistics[2] = MEANS_STDDEV; + + if (! means_parse (lexer, &means)) + goto error; + + { + struct casegrouper *grouper; + struct casereader *group; + bool ok; + + grouper = casegrouper_create_splits (proc_open (ds), means.dict); + while (casegrouper_get_next_group (grouper, &group)) + { + run_means (&means, group, ds); + } + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; + } + + for (int t = 0; t < means.n_tables; ++t) + { + const struct mtable *table = means.table + t; + + means_case_processing_summary (table); + means_shipout (table, &means); + } + destroy_means (&means); + pool_destroy (means.pool); + return CMD_SUCCESS; + + error: + + destroy_means (&means); + pool_destroy (means.pool); + return CMD_FAILURE; +} diff --git a/src/language/stats/means.c b/src/language/stats/means.c index 7e7642adf1..f0174c4ea3 100644 --- a/src/language/stats/means.c +++ b/src/language/stats/means.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc. + Copyright (C) 2019 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,1167 +24,1103 @@ #include "data/format.h" #include "data/variable.h" -#include "language/command.h" -#include "language/lexer/lexer.h" -#include "language/lexer/variable-parser.h" +#include "libpspp/hmap.h" +#include "libpspp/bt.h" +#include "libpspp/hash-functions.h" -#include "libpspp/misc.h" -#include "libpspp/pool.h" - -#include "math/categoricals.h" -#include "math/interaction.h" -#include "math/moments.h" +#include "count-one-bits.h" +#include "count-leading-zeros.h" #include "output/pivot-table.h" -#include +#include "means.h" + #include "gettext.h" #define _(msgid) gettext (msgid) #define N_(msgid) (msgid) -struct means; +/* A "cell" in this procedure represents a distinct value of the + procedure's categorical variables, and a set of summary statistics + of all cases which whose categorical variables have that set of + values. For example, the dataset -struct per_var_data -{ - void **cell_stats; - struct moments1 *mom; -}; + v1 v2 cat1 cat2 + 100 202 0 1 + 100 202 0 2 + 100 202 1 0 + 100 202 0 1 -typedef void *stat_create (struct pool *pool); -typedef void stat_update (void *stat, double w, double x); -typedef double stat_get (const struct per_var_data *, void *aux); + has three cells in layer 0 and two cells in layer 1 in addition + to a "grand summary" cell to which all (non-missing) cases + contribute. -struct cell_spec + The cells form a n-ary tree structure with the "grand summary" + cell at the root. +*/ +struct cell { - /* Printable title for output */ - const char *title; - - /* Keyword for syntax */ - const char *keyword; - - stat_create *sc; - stat_update *su; - stat_get *sd; -}; - -struct harmonic_mean -{ - double rsum; - double n; -}; - -static void * -harmonic_create (struct pool *pool) -{ - struct harmonic_mean *hm = pool_alloc (pool, sizeof *hm); - - hm->rsum = 0; - hm->n = 0; + struct hmap_node hmap_node; /* Element in hash table. */ + struct bt_node bt_node; /* Element in binary tree */ - return hm; -} + int n_children; + struct cell_container *children; + /* The statistics to be calculated for the cell. */ + struct statistic **stat; -static void -harmonic_update (void *stat, double w, double x) -{ - struct harmonic_mean *hm = stat; - hm->rsum += w / x; - hm->n += w; -} + /* The parent of this cell, or NULL if this is the root cell. */ + const struct cell *parent_cell; + /* A bit-field variable which indicates which control variables + are allocated a fixed value (for this cell), and which are + "wildcards". -static double -harmonic_get (const struct per_var_data *pvd UNUSED, void *stat) -{ - struct harmonic_mean *hm = stat; - - return hm->n / hm->rsum; -} + A one indicates a fixed value. A zero indicates a wildcard. + Wildcard values are used to calculate totals and sub-totals. + */ + unsigned int not_wild; - + /* The value(s). */ + union value *values; -struct geometric_mean -{ - double prod; - double n; + /* The variables corresponding to the above values. */ + const struct variable **vars; }; - -static void * -geometric_create (struct pool *pool) +/* A structure used to find the union of all values used + within a layer, and to sort those values. */ +struct instance { - struct geometric_mean *gm = pool_alloc (pool, sizeof *gm); + struct hmap_node hmap_node; /* Element in hash table. */ + struct bt_node bt_node; /* Element in binary tree */ - gm->prod = 1.0; - gm->n = 0; + /* A unique, consecutive, zero based index identifying this + instance. */ + int index; - return gm; -} + /* The top level value of this instance. */ + union value value; + const struct variable *var; +}; static void -geometric_update (void *stat, double w, double x) +destroy_workspace (const struct mtable *mt, struct workspace *ws) { - struct geometric_mean *gm = stat; - gm->prod *= pow (x, w); - gm->n += w; + for (int l = 0; l < mt->n_layers; ++l) + { + struct cell_container *instances = ws->instances + l; + struct instance *inst; + struct instance *next; + HMAP_FOR_EACH_SAFE (inst, next, struct instance, hmap_node, + &instances->map) + { + free (inst); + } + hmap_destroy (&instances->map); + } + free (ws->control_idx); + free (ws->instances); } - -static double -geometric_get (const struct per_var_data *pvd UNUSED, void *stat) +/* Destroy CELL. */ +static void +destroy_cell (const struct means *means, + const struct mtable *mt, struct cell *cell) { - struct geometric_mean *gm = stat; - - return pow (gm->prod, 1.0 / gm->n); -} + int idx = 0; + for (int i = 0; i < mt->n_layers; ++i) + { + if (0 == ((cell->not_wild >> i) & 0x1)) + continue; - + const struct layer *layer = mt->layers[i]; + for (int cmb = 0; cmb < mt->n_combinations; ++cmb) + { + struct workspace *ws = mt->ws + cmb; + const struct variable *var + = layer->factor_vars[ws->control_idx[i]]; -static double -sum_get (const struct per_var_data *pvd, void *stat UNUSED) -{ - double n, mean; + int width = var_get_width (var); + value_destroy (&cell->values[idx++], width); + } + } + for (int i = 0; i < cell->n_children; ++i) + { + struct cell_container *container = cell->children + i; + hmap_destroy (&container->map); + } - moments1_calculate (pvd->mom, &n, &mean, 0, 0, 0); + for (int v = 0; v < mt->n_dep_vars; ++v) + { + for (int s = 0; s < means->n_statistics; ++s) + { + stat_destroy *des = cell_spec[means->statistics[s]].sf; + des (cell->stat[s + v * means->n_statistics]); + } + } + free (cell->stat); - return mean * n; + free (cell->children); + free (cell->values); + free (cell->vars); + free (cell); } -static double -n_get (const struct per_var_data *pvd, void *stat UNUSED) -{ - double n; - - moments1_calculate (pvd->mom, &n, 0, 0, 0, 0); - - return n; -} - -static double -arithmean_get (const struct per_var_data *pvd, void *stat UNUSED) +/* Walk the tree in postorder starting from CELL and destroy all the + cells. */ +static void +means_destroy_cells (const struct means *means, struct cell *cell, + const struct mtable *table) { - double n, mean; - - moments1_calculate (pvd->mom, &n, &mean, 0, 0, 0); + for (int i = 0; i < cell->n_children; ++i) + { + struct cell_container *container = cell->children + i; + struct cell *sub_cell; + struct cell *next; + HMAP_FOR_EACH_SAFE (sub_cell, next, struct cell, hmap_node, + &container->map) + { + means_destroy_cells (means, sub_cell, table); + } + } - return mean; + destroy_cell (means, table, cell); } -static double -variance_get (const struct per_var_data *pvd, void *stat UNUSED) +static void +dump_cell (const struct cell *cell, const struct mtable *mt, int level) { - double n, mean, variance; - - moments1_calculate (pvd->mom, &n, &mean, &variance, 0, 0); + for (int l = 0; l < level; ++l) + putchar (' '); + printf ("%p: ", cell); + for (int i = 0; i < mt->n_layers; ++i) + { + putchar (((cell->not_wild >> i) & 0x1) ? 'w' : '.'); + } + printf (" - "); + int x = 0; + for (int i = 0; i < mt->n_layers; ++i) + { + if ((cell->not_wild >> i) & 0x1) + { + printf ("%s: ", var_get_name (cell->vars[x])); + printf ("%g ", cell->values[x++].f); + } + else + printf ("x "); + } + stat_get *sg = cell_spec[MEANS_N].sd; + printf ("--- S1: %g", sg (cell->stat[0])); - return variance; + printf ("--- N Children: %d", cell->n_children); + // printf ("--- Level: %d", level); + printf ("--- Parent: %p", cell->parent_cell); + printf ("\n"); } - -static double -stddev_get (const struct per_var_data *pvd, void *stat) +static void +dump_indeces (const size_t *indexes, int n) { - return sqrt (variance_get (pvd, stat)); + for (int i = 0 ; i < n; ++i) + { + printf ("%ld; ", indexes[i]); + } + printf ("\n"); } - - - -static double -skew_get (const struct per_var_data *pvd, void *stat UNUSED) +/* Dump the tree in pre-order. */ +static void +dump_tree (const struct cell *cell, const struct mtable *table, + int level, const struct cell *parent) { - double skew; - - moments1_calculate (pvd->mom, NULL, NULL, NULL, &skew, 0); + assert (cell->parent_cell == parent); + dump_cell (cell, table, level); - return skew; + for (int i = 0; i < cell->n_children; ++i) + { + struct cell_container *container = cell->children + i; + struct cell *sub_cell; + BT_FOR_EACH (sub_cell, struct cell, bt_node, &container->bt) + { + dump_tree (sub_cell, table, level + 1, cell); + } + } } -static double -sekurt_get (const struct per_var_data *pvd, void *stat UNUSED) +/* Generate a hash based on the values of the N variables in + the array VARS which are taken from the case C. */ +static unsigned int +generate_hash (const struct mtable *mt, + const struct ccase *c, + unsigned int not_wild, + const struct workspace *ws) { - double n; - - moments1_calculate (pvd->mom, &n, NULL, NULL, NULL, NULL); + unsigned int hash = 0; + for (int i = 0; i < mt->n_layers; ++i) + { + if (0 == ((not_wild >> i) & 0x1)) + continue; + + const struct layer *layer = mt->layers[i]; + const struct variable *var = layer->factor_vars[ws->control_idx[i]]; + const union value *vv = case_data (c, var); + int width = var_get_width (var); + hash = hash_int (i, hash); + hash = value_hash (vv, width, hash); + } - return calc_sekurt (n); + return hash; } -static double -seskew_get (const struct per_var_data *pvd, void *stat UNUSED) +/* Create a cell based on the N variables in the array VARS, + which are indeces into the case C. + The caller is responsible for destroying this cell when + no longer needed. */ +static struct cell * +generate_cell (const struct means *means, + const struct mtable *mt, + const struct ccase *c, + unsigned int not_wild, + const struct cell *pcell, + const struct workspace *ws) { - double n; - - moments1_calculate (pvd->mom, &n, NULL, NULL, NULL, NULL); - - return calc_seskew (n); -} + int n_vars = count_one_bits (not_wild); + struct cell *cell = xzalloc ((sizeof *cell)); + cell->values = xcalloc (n_vars, sizeof *cell->values); + cell->vars = xcalloc (n_vars, sizeof *cell->vars); + cell->not_wild = not_wild; + + cell->parent_cell = pcell; + cell->n_children = mt->n_layers - + (sizeof (cell->not_wild) * CHAR_BIT) + + count_leading_zeros (cell->not_wild); + + int idx = 0; + for (int i = 0; i < mt->n_layers; ++i) + { + if (0 == ((not_wild >> i) & 0x1)) + continue; + + const struct layer *layer = mt->layers[i]; + const struct variable *var = layer->factor_vars[ws->control_idx[i]]; + const union value *vv = case_data (c, var); + int width = var_get_width (var); + cell->vars[idx] = var; + value_clone (&cell->values[idx++], vv, width); + } + assert (idx == n_vars); -static double -kurt_get (const struct per_var_data *pvd, void *stat UNUSED) -{ - double kurt; + cell->children = xcalloc (cell->n_children, sizeof *cell->children); + for (int i = 0; i < cell->n_children; ++i) + { + struct cell_container *container = cell->children + i; + hmap_init (&container->map); + } - moments1_calculate (pvd->mom, NULL, NULL, NULL, NULL, &kurt); + cell->stat = xcalloc (means->n_statistics * mt->n_dep_vars, sizeof *cell->stat); + for (int v = 0; v < mt->n_dep_vars; ++v) + { + for (int stat = 0; stat < means->n_statistics; ++stat) + { + stat_create *sc = cell_spec[means->statistics[stat]].sc; - return kurt; + cell->stat[stat + v * means->n_statistics] = sc (means->pool); + } + } + return cell; } -static double -semean_get (const struct per_var_data *pvd, void *stat UNUSED) -{ - double n, var; - moments1_calculate (pvd->mom, &n, NULL, &var, NULL, NULL); +/* If a cell based on the N variables in the array VARS, + which are indeces into the case C and whose hash is HASH, + exists in HMAP, then return that cell. + Otherwise, return NULL. */ +static struct cell * +lookup_cell (const struct mtable *mt, + struct hmap *hmap, unsigned int hash, + const struct ccase *c, + unsigned int not_wild, + const struct workspace *ws) +{ + struct cell *cell = NULL; + HMAP_FOR_EACH_WITH_HASH (cell, struct cell, hmap_node, hash, hmap) + { + bool match = true; + int idx = 0; + if (cell->not_wild != not_wild) + continue; + for (int i = 0; i < mt->n_layers; ++i) + { + if (0 == ((cell->not_wild >> i) & 0x1)) + continue; - return sqrt (var / n); + const struct layer *layer = mt->layers[i]; + const struct variable *var = layer->factor_vars[ws->control_idx[i]]; + const union value *vv = case_data (c, var); + int width = var_get_width (var); + assert (var == cell->vars[idx]); + if (!value_equal (vv, &cell->values[idx++], width)) + { + match = false; + break; + } + } + if (match) + return cell; + } + return NULL; } - -static void * -min_create (struct pool *pool) +/* A comparison function used to sort cells in a binary tree. + Only the innermost value needs to be compared, because no + two cells with similar outer values will appear in the same + tree/map. */ +static int +cell_compare_3way (const struct bt_node *a, + const struct bt_node *b, + const void *aux UNUSED) { - double *r = pool_alloc (pool, sizeof *r); + const struct cell *fa = BT_DATA (a, struct cell, bt_node); + const struct cell *fb = BT_DATA (b, struct cell, bt_node); - *r = DBL_MAX; + assert (fa->not_wild == fb->not_wild); + int vidx = count_one_bits (fa->not_wild) - 1; + assert (fa->vars[vidx] == fb->vars[vidx]); - return r; + return value_compare_3way (&fa->values[vidx], + &fb->values[vidx], + var_get_width (fa->vars[vidx])); } -static void -min_update (void *stat, double w UNUSED, double x) +/* A comparison function used to sort cells in a binary tree. */ +static int +compare_instance_3way (const struct bt_node *a, + const struct bt_node *b, + const void *aux UNUSED) { - double *r = stat; + const struct instance *fa = BT_DATA (a, struct instance, bt_node); + const struct instance *fb = BT_DATA (b, struct instance, bt_node); - if (x < *r) - *r = x; -} + assert (fa->var == fb->var); -static double -min_get (const struct per_var_data *pvd UNUSED, void *stat) -{ - double *r = stat; - - return *r; + return value_compare_3way (&fa->value, + &fb->value, + var_get_width (fa->var)); } -static void * -max_create (struct pool *pool) -{ - double *r = pool_alloc (pool, sizeof *r); - *r = -DBL_MAX; +static void arrange_cells (struct workspace *ws, + struct cell *cell, const struct mtable *table); - return r; -} +/* Iterate CONTAINER's map inserting a copy of its elements into + CONTAINER's binary tree. Also, for each layer in TABLE, create + an instance container, containing the union of all elements in + CONTAINER. */ static void -max_update (void *stat, double w UNUSED, double x) +arrange_cell (struct workspace *ws, struct cell_container *container, + const struct mtable *mt) { - double *r = stat; + struct bt *bt = &container->bt; + struct hmap *map = &container->map; + bt_init (bt, cell_compare_3way, NULL); - if (x > *r) - *r = x; -} - -static double -max_get (const struct per_var_data *pvd UNUSED, void *stat) -{ - double *r = stat; - - return *r; -} + struct cell *cell; + HMAP_FOR_EACH (cell, struct cell, hmap_node, map) + { + bt_insert (bt, &cell->bt_node); - + int idx = 0; + for (int i = 0; i < mt->n_layers; ++i) + { + if (0 == ((cell->not_wild >> i) & 0x1)) + continue; -struct range -{ - double min; - double max; -}; + struct cell_container *instances = ws->instances + i; + const struct variable *var = cell->vars[idx]; + int width = var_get_width (var); + unsigned int hash + = value_hash (&cell->values[idx], width, 0); + + struct instance *inst = NULL; + struct instance *next = NULL; + HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next, struct instance, + hmap_node, + hash, &instances->map) + { + assert (cell->vars[idx] == var); + if (value_equal (&inst->value, + &cell->values[idx], + width)) + { + break; + } + } -static void * -range_create (struct pool *pool) -{ - struct range *r = pool_alloc (pool, sizeof *r); + if (!inst) + { + inst = xzalloc (sizeof *inst); + inst->index = -1; + inst->var = var; + value_clone (&inst->value, &cell->values[idx], + width); + hmap_insert (&instances->map, &inst->hmap_node, hash); + } - r->min = DBL_MAX; - r->max = -DBL_MAX; + idx++; + } - return r; + arrange_cells (ws, cell, mt); + } } +/* Arrange the children and then all the subtotals. */ static void -range_update (void *stat, double w UNUSED, double x) +arrange_cells (struct workspace *ws, struct cell *cell, + const struct mtable *table) { - struct range *r = stat; - - if (x > r->max) - r->max = x; - - if (x < r->min) - r->min = x; + for (int i = 0; i < cell->n_children; ++i) + { + struct cell_container *container = cell->children + i; + arrange_cell (ws, container, table); + } } -static double -range_get (const struct per_var_data *pvd UNUSED, void *stat) -{ - struct range *r = stat; - - return r->max - r->min; -} -static void * -last_create (struct pool *pool) +/* If the top level value in CELL, has an instance in the L_IDX'th layer, + then return that instance. Otherwise return NULL. */ +static const struct instance * +lookup_instance (const struct mtable *mt, const struct workspace *ws, + int l_idx, const struct cell *cell) { - double *l = pool_alloc (pool, sizeof *l); - - return l; + const struct layer *layer = mt->layers[l_idx]; + int n_vals = count_one_bits (cell->not_wild); + const struct variable *var = layer->factor_vars[ws->control_idx[l_idx]]; + const union value *val = cell->values + n_vals - 1; + int width = var_get_width (var); + unsigned int hash = value_hash (val, width, 0); + const struct cell_container *instances = ws->instances + l_idx; + struct instance *inst = NULL; + struct instance *next; + HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next, + struct instance, hmap_node, + hash, &instances->map) + { + if (value_equal (val, &inst->value, width)) + break; + } + return inst; } +/* Enter the values into PT. */ static void -last_update (void *stat, double w UNUSED, double x) +populate_table (const struct means *means, const struct mtable *mt, + const struct workspace *ws, + const struct cell *cell, + struct pivot_table *pt) { - double *l = stat; + size_t *indexes = xcalloc (pt->n_dimensions, sizeof *indexes); + for (int v = 0; v < mt->n_dep_vars; ++v) + { + for (int s = 0; s < means->n_statistics; ++s) + { + int i = 0; + if (mt->n_dep_vars > 1) + indexes[i++] = v; + indexes[i++] = s; + int stat = means->statistics[s]; + stat_get *sg = cell_spec[stat].sd; + { + const struct cell *pc = cell; + for (; i < pt->n_dimensions; ++i) + { + int l_idx = pt->n_dimensions - i - 1; + const struct cell_container *instances = ws->instances + l_idx; + if (0 == (cell->not_wild >> l_idx & 0x1U)) + { + indexes [i] = hmap_count (&instances->map); + } + else + { + assert (pc); + const struct instance *inst + = lookup_instance (mt, ws, l_idx, pc); + assert (inst); + indexes [i] = inst->index; + pc = pc->parent_cell; + } + } + } + + int idx = s + v * means->n_statistics; + pivot_table_put (pt, indexes, pt->n_dimensions, + pivot_value_new_number (sg (cell->stat[idx]))); + } + } + free (indexes); - *l = x; + for (int i = 0; i < cell->n_children; ++i) + { + struct cell_container *container = cell->children + i; + struct cell *child = NULL; + BT_FOR_EACH (child, struct cell, bt_node, &container->bt) + { + populate_table (means, mt, ws, child, pt); + } + } } -static double -last_get (const struct per_var_data *pvd UNUSED, void *stat) +static void +create_table_structure (const struct mtable *mt, struct pivot_table *pt, + const struct workspace *ws) { - double *l = stat; + int * lindexes = ws->control_idx; + /* The inner layers are situated rightmost in the table. + So this iteration is in reverse order. */ + for (int l = mt->n_layers -1; l >=0 ; --l) + { + const struct layer *layer = mt->layers[l]; + const struct cell_container *instances = ws->instances + l; + const struct variable *var = layer->factor_vars[lindexes[l]]; + struct pivot_dimension *dim_layer + = pivot_dimension_create (pt, PIVOT_AXIS_ROW, + var_to_string (var)); + dim_layer->root->show_label = true; + + /* Place the values of the control variables as table headings. */ + { + struct instance *inst = NULL; + BT_FOR_EACH (inst, struct instance, bt_node, &instances->bt) + { + struct substring space = SS_LITERAL_INITIALIZER ("\t "); + struct string str; + ds_init_empty (&str); + var_append_value_name (var, + &inst->value, + &str); - return *l; -} + ds_ltrim (&str, space); + pivot_category_create_leaf (dim_layer->root, + pivot_value_new_text (ds_cstr (&str))); -static void * -first_create (struct pool *pool) -{ - double *f = pool_alloc (pool, sizeof *f); - - *f = SYSMIS; + ds_destroy (&str); + } + } - return f; + pivot_category_create_leaf (dim_layer->root, + pivot_value_new_text ("Total")); + } } +/* Initialise C_DES with a string describing the control variable + relating to MT, LINDEXES. */ static void -first_update (void *stat, double w UNUSED, double x) +layers_to_string (const struct mtable *mt, const int *lindexes, + struct string *c_des) { - double *f = stat; - - if (*f == SYSMIS) - *f = x; + for (int l = 0; l < mt->n_layers; ++l) + { + const struct layer *layer = mt->layers[l]; + const struct variable *ctrl_var = layer->factor_vars[lindexes[l]]; + if (l > 0) + ds_put_cstr (c_des, " * "); + ds_put_cstr (c_des, var_get_name (ctrl_var)); + } } -static double -first_get (const struct per_var_data *pvd UNUSED, void *stat) +static void +populate_case_processing_summary (struct pivot_category *pc, + const struct mtable *mt, + const int *lindexes) { - double *f = stat; + struct string ds; + ds_init_empty (&ds); + int l = 0; + for (l = 0; l < mt->n_layers; ++l) + { + const struct layer *layer = mt->layers[l]; + const struct variable *ctrl_var = layer->factor_vars[lindexes[l]]; + if (l > 0) + ds_put_cstr (&ds, " * "); + ds_put_cstr (&ds, var_get_name (ctrl_var)); + } + for (int dv = 0; dv < mt->n_dep_vars; ++dv) + { + struct string dss; + ds_init_empty (&dss); + ds_put_cstr (&dss, var_get_name (mt->dep_vars[dv])); + if (mt->n_layers > 0) + { + ds_put_cstr (&dss, " * "); + ds_put_substring (&dss, ds.ss); + } + pivot_category_create_leaf (pc, + pivot_value_new_text (ds_cstr (&dss))); + ds_destroy (&dss); + } - return *f; + ds_destroy (&ds); } -enum - { - MEANS_MEAN = 0, - MEANS_N, - MEANS_STDDEV - }; - -/* Table of cell_specs */ -static const struct cell_spec cell_spec[] = { - {N_("Mean"), "MEAN", NULL, NULL, arithmean_get}, - {N_("N"), "COUNT", NULL, NULL, n_get}, - {N_("Std. Deviation"), "STDDEV", NULL, NULL, stddev_get}, -#if 0 - {N_("Median"), "MEDIAN", NULL, NULL, NULL}, - {N_("Group Median"), "GMEDIAN", NULL, NULL, NULL}, -#endif - {N_("S.E. Mean"), "SEMEAN", NULL, NULL, semean_get}, - {N_("Sum"), "SUM", NULL, NULL, sum_get}, - {N_("Min"), "MIN", min_create, min_update, min_get}, - {N_("Max"), "MAX", max_create, max_update, max_get}, - {N_("Range"), "RANGE", range_create, range_update, range_get}, - {N_("Variance"), "VARIANCE", NULL, NULL, variance_get}, - {N_("Kurtosis"), "KURT", NULL, NULL, kurt_get}, - {N_("S.E. Kurt"), "SEKURT", NULL, NULL, sekurt_get}, - {N_("Skewness"), "SKEW", NULL, NULL, skew_get}, - {N_("S.E. Skew"), "SESKEW", NULL, NULL, seskew_get}, - {N_("First"), "FIRST", first_create, first_update, first_get}, - {N_("Last"), "LAST", last_create, last_update, last_get}, -#if 0 - {N_("Percent N"), "NPCT", NULL, NULL, NULL}, - {N_("Percent Sum"), "SPCT", NULL, NULL, NULL}, -#endif - {N_("Harmonic Mean"), "HARMONIC", harmonic_create, harmonic_update, harmonic_get}, - {N_("Geom. Mean"), "GEOMETRIC", geometric_create, geometric_update, geometric_get} -}; - -#define n_C (sizeof (cell_spec) / sizeof (struct cell_spec)) - - -struct summary +/* Create the "Case Processing Summary" table. */ +void +means_case_processing_summary (const struct mtable *mt) { - casenumber missing; - casenumber non_missing; -}; - - -struct layer -{ - size_t n_factor_vars; - const struct variable **factor_vars; -}; - -/* The thing parsed after TABLES= */ -struct mtable -{ - size_t n_dep_vars; - const struct variable **dep_vars; - - int n_layers; - struct layer *layers; - - struct interaction **interactions; - struct summary *summary; - - int ii; - - struct categoricals *cats; -}; - -struct means -{ - const struct dictionary *dict; - - struct mtable *table; - size_t n_tables; - - /* Missing value class for categorical variables */ - enum mv_class exclude; - - /* Missing value class for dependent variables */ - enum mv_class dep_exclude; - - bool listwise_exclude; - - /* an array indicating which statistics are to be calculated */ - int *cells; - - /* Size of cells */ - int n_cells; - - /* Pool on which cell functions may allocate data */ - struct pool *pool; -}; - - -static void -run_means (struct means *cmd, struct casereader *input, - const struct dataset *ds); - - - -static bool -parse_means_table_syntax (struct lexer *lexer, const struct means *cmd, struct mtable *table) -{ - table->ii = 0; - table->n_layers = 0; - table->layers = NULL; - table->interactions = NULL; - - /* Dependent variable (s) */ - if (!parse_variables_const_pool (lexer, cmd->pool, cmd->dict, - &table->dep_vars, &table->n_dep_vars, - PV_NO_DUPLICATE | PV_NUMERIC)) - return false; - - /* Factor variable (s) */ - while (lex_match (lexer, T_BY)) + struct pivot_table *pt = pivot_table_create (N_("Case Processing Summary")); + + struct pivot_dimension *dim_cases = + pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Cases")); + dim_cases->root->show_label = true; + + struct pivot_category *cats[3]; + cats[0] = pivot_category_create_group (dim_cases->root, + N_("Included"), NULL); + cats[1] = pivot_category_create_group (dim_cases->root, + N_("Excluded"), NULL); + cats[2] = pivot_category_create_group (dim_cases->root, + N_("Total"), NULL); + for (int i = 0; i < 3; ++i) { - table->n_layers++; - table->layers = - pool_realloc (cmd->pool, table->layers, - sizeof (*table->layers) * table->n_layers); - - if (!parse_variables_const_pool - (lexer, cmd->pool, cmd->dict, - &table->layers[table->n_layers - 1].factor_vars, - &table->layers[table->n_layers - 1].n_factor_vars, - PV_NO_DUPLICATE)) - return false; + pivot_category_create_leaf_rc (cats[i], + pivot_value_new_text (N_("N")), + PIVOT_RC_COUNT); + pivot_category_create_leaf_rc (cats[i], + pivot_value_new_text (N_("Percent")), + PIVOT_RC_PERCENT); } - /* There is always at least one layer. - However the final layer is the total, and not - normally considered by the user as a - layer. - */ + struct pivot_dimension *rows = + pivot_dimension_create (pt, PIVOT_AXIS_ROW, N_("Variables")); - table->n_layers++; - table->layers = - pool_realloc (cmd->pool, table->layers, - sizeof (*table->layers) * table->n_layers); - table->layers[table->n_layers - 1].factor_vars = NULL; - table->layers[table->n_layers - 1].n_factor_vars = 0; + for (int cmb = 0; cmb < mt->n_combinations; ++cmb) + { + const struct workspace *ws = mt->ws + cmb; + populate_case_processing_summary (rows->root, mt, ws->control_idx); + for (int dv = 0; dv < mt->n_dep_vars; ++dv) + { + int idx = cmb * mt->n_dep_vars + dv; + const struct summary *summ = mt->summ + idx; + double n_included = summ->n_total - summ->n_missing; + pivot_table_put2 (pt, 5, idx, + pivot_value_new_number (100.0 * summ->n_total / summ->n_total)); + pivot_table_put2 (pt, 4, idx, + pivot_value_new_number (summ->n_total)); + + pivot_table_put2 (pt, 3, idx, + pivot_value_new_number (100.0 * summ->n_missing / summ->n_total)); + pivot_table_put2 (pt, 2, idx, + pivot_value_new_number (summ->n_missing)); + + pivot_table_put2 (pt, 1, idx, + pivot_value_new_number (100.0 * n_included / summ->n_total)); + pivot_table_put2 (pt, 0, idx, + pivot_value_new_number (n_included)); + } + } - return true; + pivot_table_submit (pt); } -/* Match a variable. - If the match succeeds, the variable will be placed in VAR. - Returns true if successful */ -static bool -lex_is_variable (struct lexer *lexer, const struct dictionary *dict, - int n) +static void +means_shipout_single (const struct mtable *mt, const struct means *means, + const struct workspace *ws) { - const char *tstr; - if (lex_next_token (lexer, n) != T_ID) - return false; + struct pivot_table *pt = pivot_table_create (N_("Report")); + pt->omit_empty = true; - tstr = lex_next_tokcstr (lexer, n); + struct pivot_dimension *dim_cells = + pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Statistics")); - if (NULL == dict_lookup_var (dict, tstr) ) - return false; + /* Set the statistics headings, eg "Mean", "Std. Dev" etc. */ + for (int i = 0; i < means->n_statistics; ++i) + { + const struct cell_spec *cs = cell_spec + means->statistics[i]; + pivot_category_create_leaf_rc + (dim_cells->root, + pivot_value_new_text (gettext (cs->title)), cs->rc); + } - return true; + create_table_structure (mt, pt, ws); + populate_table (means, mt, ws, ws->root_cell, pt); + pivot_table_submit (pt); } -int -cmd_means (struct lexer *lexer, struct dataset *ds) +static void +means_shipout_multivar (const struct mtable *mt, const struct means *means, + const struct workspace *ws) { - int t; - int i; - int l; - struct means means; - bool more_tables = true; - - means.pool = pool_create (); - - means.exclude = MV_ANY; - means.dep_exclude = MV_ANY; - means.listwise_exclude = false; - means.table = NULL; - means.n_tables = 0; - - means.dict = dataset_dict (ds); - - means.n_cells = 3; - means.cells = pool_calloc (means.pool, means.n_cells, sizeof (*means.cells)); + struct string dss; + ds_init_empty (&dss); + for (int dv = 0; dv < mt->n_dep_vars; ++dv) + { + ds_put_cstr (&dss, var_get_name (mt->dep_vars[dv])); + if (mt->n_layers > 0) + ds_put_cstr (&dss, " * "); + } + for (int l = 0; l < mt->n_layers; ++l) + { + const struct layer *layer = mt->layers[l]; + const struct variable *var = layer->factor_vars[ws->control_idx[l]]; + ds_put_cstr (&dss, var_get_name (var)); + if (l < mt->n_layers - 1) + ds_put_cstr (&dss, " * "); + } - /* The first three items (MEAN, COUNT, STDDEV) are the default */ - for (i = 0; i < 3; ++i) - means.cells[i] = i; + struct pivot_table *pt = pivot_table_create (ds_cstr (&dss)); + pt->omit_empty = true; + ds_destroy (&dss); + struct pivot_dimension *dim_cells = + pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Variables")); - /* Optional TABLES = */ - if (lex_match_id (lexer, "TABLES")) + for (int i = 0; i < mt->n_dep_vars; ++i) { - if (! lex_force_match (lexer, T_EQUALS)) - goto error; + pivot_category_create_leaf + (dim_cells->root, + pivot_value_new_variable (mt->dep_vars[i])); } + struct pivot_dimension *dim_stats + = pivot_dimension_create (pt, PIVOT_AXIS_ROW, + N_ ("Statistics")); + dim_stats->root->show_label = false; - more_tables = true; - /* Parse the "tables" */ - while (more_tables) + for (int i = 0; i < means->n_statistics; ++i) { - means.n_tables ++; - means.table = pool_realloc (means.pool, means.table, means.n_tables * sizeof (*means.table)); - - if (! parse_means_table_syntax (lexer, &means, - &means.table[means.n_tables - 1])) - { - goto error; - } - - /* Look ahead to see if there are more tables to be parsed */ - more_tables = false; - if ( T_SLASH == lex_next_token (lexer, 0) ) - { - if (lex_is_variable (lexer, means.dict, 1) ) - { - more_tables = true; - lex_match (lexer, T_SLASH); - } - } + const struct cell_spec *cs = cell_spec + means->statistics[i]; + pivot_category_create_leaf_rc + (dim_stats->root, + pivot_value_new_text (gettext (cs->title)), cs->rc); } - /* /MISSING subcommand */ - while (lex_token (lexer) != T_ENDCMD) - { - lex_match (lexer, T_SLASH); + create_table_structure (mt, pt, ws); + populate_table (means, mt, ws, ws->root_cell, pt); + pivot_table_submit (pt); +} - if (lex_match_id (lexer, "MISSING")) +void +means_shipout (const struct mtable *mt, const struct means *means) +{ + for (int cmb = 0; cmb < mt->n_combinations; ++cmb) + { + const struct workspace *ws = mt->ws + cmb; + if (ws->root_cell == NULL) { - /* - If no MISSING subcommand is specified, each combination of - a dependent variable and categorical variables is handled - separately. - */ - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "INCLUDE")) - { - /* - Use the subcommand "/MISSING=INCLUDE" to include user-missing - values in the analysis. - */ - - means.exclude = MV_SYSTEM; - means.dep_exclude = MV_SYSTEM; - } - else if (lex_match_id (lexer, "TABLE")) - /* - This is the default. (I think). - Every case containing a complete set of variables for a given - table. If any variable, categorical or dependent for in a table - is missing (as defined by what?), then that variable will - be dropped FOR THAT TABLE ONLY. - */ - { - means.listwise_exclude = true; - } - else if (lex_match_id (lexer, "DEPENDENT")) - /* - Use the command "/MISSING=DEPENDENT" to - include user-missing values for the categorical variables, - while excluding them for the dependent variables. - - Cases are dropped only when user-missing values - appear in dependent variables. User-missing - values for categorical variables are treated according to - their face value. - - Cases are ALWAYS dropped when System Missing values appear - in the categorical variables. - */ - { - means.dep_exclude = MV_ANY; - means.exclude = MV_SYSTEM; - } - else - { - lex_error (lexer, NULL); - goto error; - } - } - else if (lex_match_id (lexer, "CELLS")) - { - lex_match (lexer, T_EQUALS); - - /* The default values become overwritten */ - means.n_cells = 0; - while (lex_token (lexer) != T_ENDCMD - && lex_token (lexer) != T_SLASH) - { - int k = 0; - if (lex_match (lexer, T_ALL)) - { - int x; - means.cells = - pool_realloc (means.pool, means.cells, - (means.n_cells += n_C) * sizeof (*means.cells)); - - for (x = 0; x < n_C; ++x) - means.cells[means.n_cells - (n_C - 1 - x) - 1] = x; - } - else if (lex_match_id (lexer, "NONE")) - { - /* Do nothing */ - } - else if (lex_match_id (lexer, "DEFAULT")) - { - means.cells = - pool_realloc (means.pool, means.cells, - (means.n_cells += 3) * sizeof (*means.cells)); - - means.cells[means.n_cells - 2 - 1] = MEANS_MEAN; - means.cells[means.n_cells - 1 - 1] = MEANS_N; - means.cells[means.n_cells - 0 - 1] = MEANS_STDDEV; - } - else - { - for (; k < n_C; ++k) - { - if (lex_match_id (lexer, cell_spec[k].keyword)) - { - means.cells = - pool_realloc (means.pool, means.cells, - ++means.n_cells * sizeof (*means.cells)); - - means.cells[means.n_cells - 1] = k; - break; - } - } - } - if (k >= n_C) - { - lex_error (lexer, NULL); - goto error; - } - } + struct string des; + ds_init_empty (&des); + layers_to_string (mt, ws->control_idx, &des); + msg (MW, _("The table \"%s\" has no non-empty control variables." + " No result for this table will be displayed."), + ds_cstr (&des)); + ds_destroy (&des); + continue; } + if (mt->n_dep_vars > 1) + means_shipout_multivar (mt, means, ws); else - { - lex_error (lexer, NULL); - goto error; - } + means_shipout_single (mt, means, ws); } +} + - for (t = 0; t < means.n_tables; ++t) +static bool +control_var_missing (const struct means *means, + const struct mtable *mt, + unsigned int not_wild UNUSED, + const struct ccase *c, + const struct workspace *ws) +{ + bool miss = false; + for (int l = 0; l < mt->n_layers; ++l) { - struct mtable *table = &means.table[t]; - - table->interactions = - pool_calloc (means.pool, table->n_layers, sizeof (*table->interactions)); + /* if (0 == ((not_wild >> l) & 0x1)) */ + /* { */ + /* continue; */ + /* } */ + + const struct layer *layer = mt->layers[l]; + const struct variable *var = layer->factor_vars[ws->control_idx[l]]; + const union value *vv = case_data (c, var); + + miss = var_is_value_missing (var, vv, means->ctrl_exclude); + if (miss) + break; + } - table->summary = - pool_calloc (means.pool, table->n_dep_vars * table->n_layers, sizeof (*table->summary)); + return miss; +} - for (l = 0; l < table->n_layers; ++l) +/* Lookup the set of control variables described by MT, C and NOT_WILD, + in the hash table MAP. If there is no such entry, then create a + cell with these paremeters and add is to MAP. + If the generated cell has childen, repeat for all the children. + Returns the root cell. +*/ +static struct cell * +service_cell_map (const struct means *means, const struct mtable *mt, + const struct ccase *c, + unsigned int not_wild, + struct hmap *map, + const struct cell *pcell, + int level, + const struct workspace *ws) +{ + struct cell *cell = NULL; + if (map) + { + if (!control_var_missing (means, mt, not_wild, c, ws)) { - int v; - const struct layer *lyr = &table->layers[l]; - const int n_vars = lyr->n_factor_vars; - table->interactions[l] = interaction_create (NULL); - for (v = 0; v < n_vars ; ++v) + /* Lookup this set of values in the cell's hash table. */ + unsigned int hash = generate_hash (mt, c, not_wild, ws); + cell = lookup_cell (mt, map, hash, c, not_wild, ws); + + /* If it has not been seen before, then create a new + subcell, with this set of values, and insert it + into the table. */ + if (cell == NULL) { - interaction_add_variable (table->interactions[l], - lyr->factor_vars[v]); + cell = generate_cell (means, mt, c, not_wild, pcell, ws); + hmap_insert (map, &cell->hmap_node, hash); } } } - - { - struct casegrouper *grouper; - struct casereader *group; - bool ok; - - grouper = casegrouper_create_splits (proc_open (ds), means.dict); - while (casegrouper_get_next_group (grouper, &group)) - { - run_means (&means, group, ds); - } - ok = casegrouper_destroy (grouper); - ok = proc_commit (ds) && ok; - } - - for (t = 0; t < means.n_tables; ++t) + else { - int l; - struct mtable *table = &means.table[t]; - if (table->interactions) - for (l = 0; l < table->n_layers; ++l) - { - interaction_destroy (table->interactions[l]); - } + /* This condition should only happen in the root node case. */ + cell = ws->root_cell; + if (cell == NULL && + !control_var_missing (means, mt, not_wild, c, ws)) + cell = generate_cell (means, mt, c, not_wild, pcell, ws); } - pool_destroy (means.pool); - return CMD_SUCCESS; - - error: - - for (t = 0; t < means.n_tables; ++t) + if (cell) { - int l; - struct mtable *table = &means.table[t]; - if (table->interactions) - for (l = 0; l < table->n_layers; ++l) - { - interaction_destroy (table->interactions[l]); - } - } - - pool_destroy (means.pool); - return CMD_FAILURE; -} - - -static bool -is_missing (const struct means *cmd, - const struct variable *dvar, - const struct interaction *iact, - const struct ccase *c) -{ - if ( interaction_case_is_missing (iact, c, cmd->exclude) ) - return true; - + /* Here is where the business really happens! After + testing for missing values, the cell's statistics + are accumulated. */ + if (!control_var_missing (means, mt, not_wild, c, ws)) + { + for (int v = 0; v < mt->n_dep_vars; ++v) + { + const struct variable *dep_var = mt->dep_vars[v]; + const union value *vv = case_data (c, dep_var); + if (var_is_value_missing (dep_var, vv, means->dep_exclude)) + continue; + + for (int stat = 0; stat < means->n_statistics; ++stat) + { + const double weight = dict_get_case_weight (means->dict, c, + NULL); + stat_update *su = cell_spec[means->statistics[stat]].su; + su (cell->stat[stat + v * means->n_statistics], weight, + case_data (c, dep_var)->f); + } + } + } - if (var_is_value_missing (dvar, - case_data (c, dvar), - cmd->dep_exclude)) - return true; + /* Recurse into all the children (if there are any). */ + for (int i = 0; i < cell->n_children; ++i) + { + struct cell_container *cc = cell->children + i; + service_cell_map (means, mt, c, + not_wild | (0x1U << (i + level)), + &cc->map, cell, level + i + 1, ws); + } + } - return false; + return cell; } -static void output_case_processing_summary (const struct mtable *, - const struct variable *wv); - -static void output_report (const struct means *, int, const struct mtable *); - - -struct per_cat_data -{ - struct per_var_data *pvd; - - bool warn; -}; - - +/* Do all the necessary preparation and pre-calculation that + needs to be done before iterating the data. */ static void -destroy_n (const void *aux1 UNUSED, void *aux2, void *user_data) -{ - struct mtable *table = aux2; - int v; - struct per_cat_data *per_cat_data = user_data; - struct per_var_data *pvd = per_cat_data->pvd; - - for (v = 0; v < table->n_dep_vars; ++v) - { - struct per_var_data *pp = &pvd[v]; - moments1_destroy (pp->mom); - } -} - -static void * -create_n (const void *aux1, void *aux2) +prepare_means (struct means *cmd) { - int i, v; - const struct means *means = aux1; - struct mtable *table = aux2; - struct per_cat_data *per_cat_data = pool_malloc (means->pool, sizeof *per_cat_data); - - struct per_var_data *pvd = pool_calloc (means->pool, table->n_dep_vars, sizeof *pvd); - - for (v = 0; v < table->n_dep_vars; ++v) + for (int t = 0; t < cmd->n_tables; ++t) { - enum moment maxmom = MOMENT_KURTOSIS; - struct per_var_data *pp = &pvd[v]; - - pp->cell_stats = pool_calloc (means->pool, means->n_cells, sizeof *pp->cell_stats); + struct mtable *mt = cmd->table + t; + mt->n_combinations = 1; + for (int l = 0; l < mt->n_layers; ++l) + mt->n_combinations *= mt->layers[l]->n_factor_vars; - for (i = 0; i < means->n_cells; ++i) - { - int csi = means->cells[i]; - const struct cell_spec *cs = &cell_spec[csi]; - if (cs->sc) - { - pp->cell_stats[i] = cs->sc (means->pool); - } - } - pp->mom = moments1_create (maxmom); + mt->ws = xzalloc (mt->n_combinations * sizeof (*mt->ws)); + mt->summ = xzalloc (mt->n_combinations * mt->n_dep_vars + * sizeof (*mt->summ)); + for (int i = 0; i < mt->n_combinations; ++i) + { + struct workspace *ws = mt->ws + i; + ws->root_cell = NULL; + ws->control_idx = xzalloc (mt->n_layers + * sizeof *ws->control_idx); + ws->instances = xzalloc (mt->n_layers + * sizeof *ws->instances); + int cmb = i; + for (int l = mt->n_layers - 1; l >= 0; --l) + { + struct cell_container *instances = ws->instances + l; + const struct layer *layer = mt->layers[l]; + ws->control_idx[l] = cmb % layer->n_factor_vars; + cmb /= layer->n_factor_vars; + hmap_init (&instances->map); + } + } } - - - per_cat_data->pvd = pvd; - per_cat_data->warn = true; - return per_cat_data; } + +/* Do all the necessary calculations that occur AFTER iterating + the data. */ static void -update_n (const void *aux1, void *aux2, void *user_data, const struct ccase *c, double weight) +post_means (struct means *cmd) { - int i; - int v = 0; - const struct means *means = aux1; - struct mtable *table = aux2; - struct per_cat_data *per_cat_data = user_data; - - for (v = 0; v < table->n_dep_vars; ++v) + for (int t = 0; t < cmd->n_tables; ++t) { - struct per_var_data *pvd = &per_cat_data->pvd[v]; - - const double x = case_data (c, table->dep_vars[v])->f; - - for (i = 0; i < table->n_layers; ++i) + struct mtable *mt = cmd->table + t; + for (int cmb = 0; cmb < mt->n_combinations; ++cmb) { - if ( is_missing (means, table->dep_vars[v], - table->interactions[i], c)) - goto end; - } - - for (i = 0; i < means->n_cells; ++i) - { - const int csi = means->cells[i]; - const struct cell_spec *cs = &cell_spec[csi]; + struct workspace *ws = mt->ws + cmb; + if (ws->root_cell == NULL) + continue; + arrange_cells (ws, ws->root_cell, mt); + /* The root cell should have no parent. */ + assert (ws->root_cell->parent_cell == 0); + for (int l = 0; l < mt->n_layers; ++l) + { + struct cell_container *instances = ws->instances + l; + bt_init (&instances->bt, compare_instance_3way, NULL); + + /* Iterate the instance hash table, and insert each instance + into the binary tree BT. */ + struct instance *inst; + HMAP_FOR_EACH (inst, struct instance, hmap_node, + &instances->map) + { + bt_insert (&instances->bt, &inst->bt_node); + } - if (cs->su) - cs->su (pvd->cell_stats[i], - weight, x); + /* Iterate the binary tree (in order) and assign the index + member accordingly. */ + int index = 0; + BT_FOR_EACH (inst, struct instance, bt_node, &instances->bt) + { + inst->index = index++; + } + } } - - moments1_add (pvd->mom, x, weight); - - end: - continue; } } + +/* Update the summary information (the missings and the totals). */ static void -calculate_n (const void *aux1, void *aux2, void *user_data) +update_summaries (const struct means *means, struct mtable *mt, + const struct ccase *c, double weight) { - int i; - int v = 0; - struct per_cat_data *per_cat_data = user_data; - const struct means *means = aux1; - struct mtable *table = aux2; - - for (v = 0; v < table->n_dep_vars; ++v) + for (int dv = 0; dv < mt->n_dep_vars; ++dv) { - struct per_var_data *pvd = &per_cat_data->pvd[v]; - for (i = 0; i < means->n_cells; ++i) + for (int cmb = 0; cmb < mt->n_combinations; ++cmb) { - int csi = means->cells[i]; - const struct cell_spec *cs = &cell_spec[csi]; - - if (cs->su) - cs->sd (pvd, pvd->cell_stats[i]); + struct workspace *ws = mt->ws + cmb; + struct summary *summ = mt->summ + + cmb * mt->n_dep_vars + dv; + + summ->n_total += weight; + const struct variable *var = mt->dep_vars[dv]; + const union value *vv = case_data (c, var); + /* First check if the dependent variable is missing. */ + if (var_is_value_missing (var, vv, means->dep_exclude)) + summ->n_missing += weight; + /* If the dep var is not missing, then check each + control variable. */ + else + for (int l = 0; l < mt->n_layers; ++l) + { + const struct layer *layer = mt->layers [l]; + const struct variable *var + = layer->factor_vars[ws->control_idx[l]]; + const union value *vv = case_data (c, var); + if (var_is_value_missing (var, vv, means->ctrl_exclude)) + { + summ->n_missing += weight; + break; + } + } } } } -static void + +void run_means (struct means *cmd, struct casereader *input, const struct dataset *ds UNUSED) { - int t; - const struct variable *wv = dict_get_weight (cmd->dict); - struct ccase *c; + struct ccase *c = NULL; struct casereader *reader; - struct payload payload; - payload.create = create_n; - payload.update = update_n; - payload.calculate = calculate_n; - payload.destroy = destroy_n; - - for (t = 0; t < cmd->n_tables; ++t) - { - struct mtable *table = &cmd->table[t]; - table->cats - = categoricals_create (table->interactions, - table->n_layers, wv, cmd->exclude); - - categoricals_set_payload (table->cats, &payload, cmd, table); - } + prepare_means (cmd); for (reader = input; (c = casereader_read (reader)) != NULL; case_unref (c)) { - for (t = 0; t < cmd->n_tables; ++t) + const double weight + = dict_get_case_weight (cmd->dict, c, NULL); + for (int t = 0; t < cmd->n_tables; ++t) { - bool something_missing = false; - int v; - struct mtable *table = &cmd->table[t]; + struct mtable *mt = cmd->table + t; + update_summaries (cmd, mt, c, weight); - for (v = 0; v < table->n_dep_vars; ++v) + for (int cmb = 0; cmb < mt->n_combinations; ++cmb) { - int i; - for (i = 0; i < table->n_layers; ++i) - { - const bool missing = - is_missing (cmd, table->dep_vars[v], - table->interactions[i], c); - if (missing) - { - something_missing = true; - table->summary[v * table->n_layers + i].missing++; - } - else - table->summary[v * table->n_layers + i].non_missing++; - } - } - if ( something_missing && cmd->listwise_exclude) - continue; + struct workspace *ws = mt->ws + cmb; - categoricals_update (table->cats, c); + ws->root_cell = service_cell_map (cmd, mt, c, + 0U, NULL, NULL, 0, ws); + } } } casereader_destroy (reader); - for (t = 0; t < cmd->n_tables; ++t) - { - struct mtable *table = &cmd->table[t]; - - categoricals_done (table->cats); - } - - - for (t = 0; t < cmd->n_tables; ++t) - { - int i; - const struct mtable *table = &cmd->table[t]; - - output_case_processing_summary (table, wv); - - for (i = 0; i < table->n_layers; ++i) - { - output_report (cmd, i, table); - } - categoricals_destroy (table->cats); - } - + post_means (cmd); } - -static void -output_case_processing_summary (const struct mtable *mt, - const struct variable *wv) +/* Release all resources allocated by this routine. + This does not include those allocated by the parser, + which exclusively use MEANS->pool. */ +void +destroy_means (struct means *means) { - struct pivot_table *table = pivot_table_create ( - N_("Case Processing Summary")); - pivot_table_set_weight_var (table, wv); - - pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), - N_("N"), PIVOT_RC_COUNT, - N_("Percent"), PIVOT_RC_PERCENT); - pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Cases"), - N_("Included"), N_("Excluded"), N_("Total")) - ->root->show_label = true; - - struct pivot_dimension *tables = pivot_dimension_create ( - table, PIVOT_AXIS_ROW, N_("Tables")); - - for (size_t v = 0; v < mt->n_dep_vars; ++v) + for (int t = 0; t < means->n_tables; ++t) { - const struct variable *var = mt->dep_vars[v]; - for (size_t i = 0; i < mt->n_layers; ++i) - { - const int row = v * mt->n_layers + i; - const struct interaction *iact = mt->interactions[i]; - - struct string str = DS_EMPTY_INITIALIZER; - ds_put_format (&str, "%s: ", var_to_string (var)); - interaction_to_string (iact, &str); - int table_idx = pivot_category_create_leaf ( - tables->root, pivot_value_new_user_text_nocopy ( - ds_steal_cstr (&str))); - - const struct summary *s = &mt->summary[row]; - double n_total = s->missing + s->non_missing; - struct entry - { - int stat_idx; - int case_idx; - double x; - } - entries[] = - { - { 0, 0, s->non_missing }, - { 1, 0, s->non_missing / n_total * 100.0 }, - { 0, 1, s->missing }, - { 1, 1, s->missing / n_total * 100.0 }, - { 0, 2, n_total }, - { 1, 2, 100.0 }, - }; - - for (size_t j = 0; j < sizeof entries / sizeof *entries; j++) - { - const struct entry *e = &entries[j]; - pivot_table_put3 (table, e->stat_idx, e->case_idx, table_idx, - pivot_value_new_number (e->x)); - } + const struct mtable *table = means->table + t; + for (int i = 0; i < table->n_combinations; ++i) + { + struct workspace *ws = table->ws + i; + if (ws->root_cell == NULL) + continue; + means_destroy_cells (means, ws->root_cell, table); } - } - - pivot_table_submit (table); -} - -static void -create_interaction_dimensions (struct pivot_table *table, - const struct categoricals *cats, - const struct interaction *iact) -{ - for (size_t i = iact->n_vars; i-- > 0; ) - { - const struct variable *var = iact->vars[i]; - struct pivot_dimension *d = pivot_dimension_create__ ( - table, PIVOT_AXIS_ROW, pivot_value_new_variable (var)); - d->root->show_label = true; - - size_t n; - union value *values = categoricals_get_var_values (cats, var, &n); - for (size_t j = 0; j < n; j++) - pivot_category_create_leaf ( - d->root, pivot_value_new_var_value (var, &values[j])); - } -} - -static void -output_report (const struct means *cmd, int iact_idx, - const struct mtable *mt) -{ - struct pivot_table *table = pivot_table_create (N_("Report")); - table->omit_empty = true; - - struct pivot_dimension *statistics = pivot_dimension_create ( - table, PIVOT_AXIS_COLUMN, N_("Statistics")); - for (int i = 0; i < cmd->n_cells; ++i) - pivot_category_create_leaf ( - statistics->root, pivot_value_new_text (cell_spec[cmd->cells[i]].title)); - - const struct interaction *iact = mt->interactions[iact_idx]; - create_interaction_dimensions (table, mt->cats, iact); - - struct pivot_dimension *dep_dim = pivot_dimension_create ( - table, PIVOT_AXIS_ROW, N_("Dependent Variables")); - - size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes); - - size_t n_cats = categoricals_n_count (mt->cats, iact_idx); - for (size_t v = 0; v < mt->n_dep_vars; ++v) - { - indexes[table->n_dimensions - 1] = pivot_category_create_leaf ( - dep_dim->root, pivot_value_new_variable (mt->dep_vars[v])); - - for (size_t i = 0; i < n_cats; ++i) + for (int i = 0; i < table->n_combinations; ++i) { - for (size_t j = 0; j < iact->n_vars; j++) - { - int idx = categoricals_get_value_index_by_category_real ( - mt->cats, iact_idx, i, j); - indexes[table->n_dimensions - 2 - j] = idx; - } - - struct per_cat_data *per_cat_data - = categoricals_get_user_data_by_category_real ( - mt->cats, iact_idx, i); - - const struct per_var_data *pvd = &per_cat_data->pvd[v]; - for (int stat_idx = 0; stat_idx < cmd->n_cells; ++stat_idx) - { - indexes[0] = stat_idx; - const int csi = cmd->cells[stat_idx]; - const struct cell_spec *cs = &cell_spec[csi]; - - double result = cs->sd (pvd, pvd->cell_stats[stat_idx]); - pivot_table_put (table, indexes, table->n_dimensions, - pivot_value_new_number (result)); - } - } + struct workspace *ws = table->ws + i; + destroy_workspace (table, ws); + } + free (table->ws); + free (table->summ); } - free (indexes); - - pivot_table_submit (table); } - diff --git a/src/language/stats/means.h b/src/language/stats/means.h new file mode 100644 index 0000000000..9d93a4f07a --- /dev/null +++ b/src/language/stats/means.h @@ -0,0 +1,163 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2011, 2012, 2013, 2019 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef MEANS_H +#define MEANS_H + +#include "libpspp/compiler.h" + +struct cell_container +{ + /* A hash table containing the cells. The table is indexed by a hash + based on the cell's categorical value. */ + struct hmap map; + + /* A binary tree containing the cells. This is + used to sort the elements in order of their categorical + values. */ + struct bt bt; +}; + + + +struct layer +{ + size_t n_factor_vars; + const struct variable **factor_vars; +}; + + +struct statistic; + +typedef struct statistic *stat_create (struct pool *pool); +typedef void stat_update (struct statistic *stat, double w, double x); +typedef double stat_get (const struct statistic *); +typedef void stat_destroy (struct statistic *); + + +struct cell_spec +{ + /* Printable title for output */ + const char *title; + + /* Keyword for syntax */ + const char *keyword; + + /* The result class for the datum. */ + const char *rc; + + stat_create *sc; + stat_update *su; + stat_get *sd; + stat_destroy *sf; +}; + +struct summary +{ + double n_total; + double n_missing; +}; + +/* Intermediate data per table. */ +struct workspace +{ + /* An array of n_layers integers which are used + to permute access into the factor_vars of each layer. */ + int *control_idx; + + /* An array of n_layers cell_containers which hold the union + of instances used respectively by each layer. */ + struct cell_container *instances; + + struct cell *root_cell; +}; + +/* The thing parsed after TABLES= */ +struct mtable +{ + size_t n_dep_vars; + const struct variable **dep_vars; + + struct layer **layers; + int n_layers; + + int n_combinations; + + /* An array of n_combinations workspaces. */ + struct workspace *ws; + + /* An array of n_combinations * n_dep_vars summaries. + These are displayed in the Case Processing + Summary box. */ + struct summary *summ; +}; + +/* A structure created by the parser. Contains the definition of the + what the procedure should calculate. */ +struct means +{ + const struct dictionary *dict; + + /* The "tables" (ie, a definition of how the data should + be broken down). */ + struct mtable *table; + size_t n_tables; + + /* Missing value class for categorical variables. */ + enum mv_class ctrl_exclude; + + /* Missing value class for dependent variables */ + enum mv_class dep_exclude; + + /* The statistics to be calculated for each cell. */ + int *statistics; + int n_statistics; + + /* Pool on which cell functions may allocate data. */ + struct pool *pool; +}; + + + +#define n_MEANS_STATISTICS 17 +extern const struct cell_spec cell_spec[n_MEANS_STATISTICS]; + +/* This enum must be consistent with the array cell_spec (in means-calc.c). + A bitfield instead of enums would in my opinion be + more elegent. However we want the order of the specified + statistics to be retained in the output. */ +enum + { + MEANS_MEAN = 0, + MEANS_N, + MEANS_STDDEV + }; + + + +struct dataset; +struct casereader; +void run_means (struct means *cmd, struct casereader *input, const struct dataset *ds UNUSED); + +void means_shipout (const struct mtable *mt, const struct means *means); + +void means_case_processing_summary (const struct mtable *mt); + + +void destroy_means (struct means *means); + + +#endif diff --git a/tests/language/stats/means.at b/tests/language/stats/means.at index 93ef68ac70..c6e298b5af 100644 --- a/tests/language/stats/means.at +++ b/tests/language/stats/means.at @@ -1,614 +1,1047 @@ dnl PSPP - a program for statistical analysis. -dnl Copyright (C) 2017 Free Software Foundation, Inc. -dnl +dnl Copyright (C) 2017, 2019 Free Software Foundation, Inc. +dnl dnl This program is free software: you can redistribute it and/or modify dnl it under the terms of the GNU General Public License as published by dnl the Free Software Foundation, either version 3 of the License, or dnl (at your option) any later version. -dnl +dnl dnl This program is distributed in the hope that it will be useful, dnl but WITHOUT ANY WARRANTY; without even the implied warranty of dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the dnl GNU General Public License for more details. -dnl +dnl dnl You should have received a copy of the GNU General Public License dnl along with this program. If not, see . dnl AT_BANNER([MEANS procedure]) -AT_SETUP([MEANS simple example]) +AT_SETUP([MEANS simple]) AT_KEYWORDS([categorical categoricals]) AT_DATA([means-simple.sps], [dnl -SET FORMAT=F12.5. - -data list notable list /score * factor *. -BEGIN DATA. -22 01 -22 01 -29 01 -16 01 -24 02 -21 02 -22 01 -24 01 -19 01 -17 01 -22 01 -17 02 -23 02 -25 02 -20 01 -15 01 -18 01 -26 01 -23 02 -35 02 -20 01 -16 01 -19 01 -14 01 -14 01 -21 01 -END DATA. - -MEANS TABLES = score BY factor. +data list notable list /hand * score * w *. +begin data. +1 17 4 +1 16 5 +2 21 1 +2 22 1 +2 20 8 +end data. + +weight by w. + +means tables = score by hand + /cells = mean count. ]) -AT_CHECK([pspp -O format=csv means-simple.sps], [0], - [dnl +AT_CHECK([pspp -O format=csv means-simple.sps], [0], [dnl Table: Case Processing Summary ,Cases,,,,, ,Included,,Excluded,,Total, ,N,Percent,N,Percent,N,Percent -score: factor,26,100.0%,0,.0%,26,100.0% -score: ,26,100.0%,0,.0%,26,100.0% +score * hand,19,100.0%,0,.0%,19,100.0% Table: Report -,factor,Mean,N,Std. Deviation -score,1.00000,19.78947,19.00000,4.03566 -,2.00000,24.00000,7.00000,5.50757 - -Table: Report -,Mean,N,Std. Deviation -score,20.92308,26.00000,4.75750 +hand,Mean,N +1.00,16.44,9 +2.00,20.30,10 +Total,18.47,19 ]) AT_CLEANUP - - -AT_SETUP([MEANS very simple example]) +AT_SETUP([MEANS very simple]) AT_KEYWORDS([categorical categoricals]) -AT_DATA([means-vsimple.sps], [dnl -SET FORMAT=F12.5. - -data list notable list /score. +AT_DATA([very-simple.sps], [dnl +data list notable list /score *. begin data. -1 -1 -2 -2 +17 +17 +17 +16 +17 +16 +16 +16 +16 +21 +22 +20 +20 +20 +20 +20 +20 +20 +20 end data. -means tables = score. +means tables = score + /cells = mean count. ]) -AT_CHECK([pspp -O format=csv means-vsimple.sps], [0], - [dnl +AT_CHECK([pspp -O format=csv very-simple.sps], [0], [dnl Table: Case Processing Summary ,Cases,,,,, ,Included,,Excluded,,Total, ,N,Percent,N,Percent,N,Percent -score: ,4,100.0%,0,.0%,4,100.0% +score,19,100.0%,0,.0%,19,100.0% Table: Report -,Mean,N,Std. Deviation -score,1.50000,4.00000,.57735 +Mean,N +18.47,19 ]) AT_CLEANUP - - -AT_SETUP([MEANS default missing]) +AT_SETUP([MEANS empty factor spec]) AT_KEYWORDS([categorical categoricals]) -AT_DATA([means-dmiss.sps], [dnl -SET FORMAT=F12.2. -data list notable list /a * g1 * g2 *. +AT_DATA([means-bad.sps], [dnl +data list list /outcome *. begin data. -3 1 . -4 1 11 -3 1 21 -6 2 21 -2 2 31 -. 2 31 -8 2 31 -7 2 31 +1 +2 +3 end data. -MEANS TABLES = - a BY g1 g2 - BY g2 - /cells = MEAN COUNT - . +MEANS TABLES = outcome + BY. ]) -AT_CHECK([pspp -O format=csv means-dmiss.sps], [0], - [dnl -Table: Case Processing Summary -,Cases,,,,, -,Included,,Excluded,,Total, -,N,Percent,N,Percent,N,Percent -a: g1 × g2,6,75.0%,2,25.0%,8,100.0% -a: g2,6,75.0%,2,25.0%,8,100.0% -a: ,7,87.5%,1,12.5%,8,100.0% +AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore]) -Table: Report -,g1,g2,Mean,N -a,1.00,11.00,4.00,1.00 -,,21.00,3.00,1.00 -,2.00,,6.00,1.00 -,,31.00,5.67,3.00 +AT_CLEANUP -Table: Report -,g2,Mean,N -a,11.00,4.00,1.00 -,21.00,4.50,2.00 -,31.00,5.67,3.00 -Table: Report -,Mean,N -a,5.00,6.00 + +AT_SETUP([MEANS parser bug]) +AT_KEYWORDS([categorical categoricals]) + +dnl This bug caused an infinite loop +AT_DATA([means-bad.sps], [dnl +DATA LIST notable LIST /a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 fylo *. +begin data. +1 2 3 4 5 6 7 8 9 0 11 +end data. + +MEANS TABLES = a1 a2 a3 a4 a5 a6 a7 a8 a9 a10a BY fylo. ]) +AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore]) + AT_CLEANUP -AT_SETUP([MEANS linear stats]) +dnl This example is based upon info from https://libguides.library.kent.edu/SPSS/CompareMeans +AT_SETUP([MEANS default missing behaviour]) AT_KEYWORDS([categorical categoricals]) -dnl Slightly more involved example to test the linear statistics -AT_DATA([means-linear.sps], [dnl -set format F12.4. -data list notable list /id * group * test1 * -begin data. -1 1 85 -2 1 90 -3 1 82 -4 1 75 -5 1 99 -6 2 70 -7 2 66 -8 2 52 -9 2 71 -10 2 50 +AT_DATA([means-missing.sps], [dnl +data list notable list /w * score * a * b *. +begin data + 12 . 0 0 + 13 . 0 1 + 11 . 1 0 + 7 . 1 1 + 5 1 0 . + 91 1 0 0 + 130 1 0 1 + 4 1 1 . + 90 1 1 0 + 72 1 1 1 end data. -add value labels /group 1 "experimental group" 2 "control group". +weight by w. -means test1 by group - /cells = mean count stddev sum min max range variance kurt skew - . +MEANS tables=score + /cells = count. + +MEANS tables=score by a + /cells = count. + +MEANS tables=score by a by b + /cells = count. ]) +AT_CHECK([pspp -O format=csv means-missing.sps], [0], [dnl +Table: Case Processing Summary +,Cases,,,,, +,Included,,Excluded,,Total, +,N,Percent,N,Percent,N,Percent +score,392,90.1%,43,9.9%,435,100.0% + +Table: Report +N +392 -AT_CHECK([pspp -O format=csv means-linear.sps], [0], - [dnl Table: Case Processing Summary ,Cases,,,,, ,Included,,Excluded,,Total, ,N,Percent,N,Percent,N,Percent -test1: group,10,100.0%,0,.0%,10,100.0% -test1: ,10,100.0%,0,.0%,10,100.0% +score * a,392,90.1%,43,9.9%,435,100.0% Table: Report -,group,Mean,N,Std. Deviation,Sum,Min,Max,Range,Variance,Kurtosis,Skewness -test1,experimental group,86.2000,5.0000,8.9833,431.0000,75.0000,99.0000,24.0000,80.7000,.2727,.3858 -,control group,61.8000,5.0000,10.0598,309.0000,50.0000,71.0000,21.0000,101.2000,-3.0437,-.4830 +a,N +.00,226 +1.00,166 +Total,392 + +Table: Case Processing Summary +,Cases,,,,, +,Included,,Excluded,,Total, +,N,Percent,N,Percent,N,Percent +score * a * b,383,88.0%,52,12.0%,435,100.0% Table: Report -,Mean,N,Std. Deviation,Sum,Min,Max,Range,Variance,Kurtosis,Skewness -test1,74.0000,10.0000,15.6915,740.0000,50.0000,99.0000,49.0000,246.2222,-.5759,-.1262 +a,b,N +.00,.00,91 +,1.00,130 +,Total,221 +1.00,.00,90 +,1.00,72 +,Total,162 +Total,.00,181 +,1.00,202 +,Total,383 ]) AT_CLEANUP -AT_SETUP([MEANS standard errors]) +dnl This example from https://www.spss-tutorials.com/spss-means-command/ +AT_SETUP([MEANS two way]) AT_KEYWORDS([categorical categoricals]) -AT_DATA([means-stderr.sps], [dnl -set format F12.4. -data list notable list /id * group * test1 * -begin data. -1 1 85 -2 1 90 -3 1 82 -4 1 75 -5 1 99 -6 1 70 -7 2 66 -8 2 52 -9 2 71 -10 2 50 +AT_DATA([means-freelancer.sps], [dnl +data list notable list /income_2010 * gender sector_2010. +begin data +6072.40 0 5 +12706.65 1 4 +14912.82 0 2 +16338.36 1 5 +22606.99 0 . +23544.95 1 1 +24985.21 0 2 +26586.48 0 1 +29076.24 1 3 +31010.18 0 2 +33190.63 1 1 +35570.67 1 4 +36202.60 1 4 +36205.85 1 2 +36262.56 1 . +38283.56 0 1 +38569.91 1 5 +39057.56 1 4 +39594.68 1 5 +42087.38 0 1 +42370.92 0 2 +42931.32 1 2 +45907.58 0 4 +45911.32 1 . +47227.09 1 3 +50440.71 1 5 +57440.17 1 3 +58918.86 0 5 +59430.07 1 2 +61135.95 0 4 +64193.85 0 4 +64857.02 0 3 +65903.42 0 4 +66592.38 1 3 +70986.10 0 3 +71229.94 0 4 +74663.05 1 4 +76676.14 1 4 +79260.80 0 4 +80311.71 0 4 end data. -means test1 by group - /cells = mean count semean seskew sekurt. +means income_2010 by gender by sector_2010 + /cells count min mean stddev. ]) - -AT_CHECK([pspp -O format=csv means-stderr.sps], [0], - [dnl +AT_CHECK([pspp -O format=csv means-freelancer.sps], [0], [dnl Table: Case Processing Summary ,Cases,,,,, ,Included,,Excluded,,Total, ,N,Percent,N,Percent,N,Percent -test1: group,10,100.0%,0,.0%,10,100.0% -test1: ,10,100.0%,0,.0%,10,100.0% +income_2010 * gender * sector_2010,37,92.5%,3,7.5%,40,100.0% Table: Report -,group,Mean,N,S.E. Mean,S.E. Skew,S.E. Kurt -test1,1.0000,83.5000,6.0000,4.2485,.8452,1.7408 -,2.0000,59.7500,4.0000,5.1700,1.0142,2.6186 - -Table: Report -,Mean,N,S.E. Mean,S.E. Skew,S.E. Kurt -test1,74.0000,10.0000,4.9621,.6870,1.3342 +gender,sector_2010,N,Minimum,Mean,Std. Deviation +.00,1.00,3,26586.48,35652.47,8078.46 +,2.00,4,14912.82,28319.78,11482.43 +,3.00,2,64857.02,67921.56,4333.91 +,4.00,7,45907.58,66849.04,11787.11 +,5.00,2,6072.40,32495.63,37368.09 +,Total,18,6072.40,49389.68,22371.48 +1.00,1.00,2,23544.95,28367.79,6820.53 +,2.00,3,36205.85,46189.08,11949.93 +,3.00,4,29076.24,50083.97,16084.44 +,4.00,6,12706.65,45812.78,24995.16 +,5.00,4,16338.36,36235.92,14311.04 +,Total,19,12706.65,42918.90,17851.64 +Total,1.00,5,23544.95,32738.60,7757.62 +,2.00,7,14912.82,35978.05,14309.27 +,3.00,6,29076.24,56029.83,15615.06 +,4.00,13,12706.65,57139.99,21187.85 +,5.00,6,6072.40,34989.15,20146.69 +,Total,37,6072.40,46066.84,20160.12 ]) AT_CLEANUP - -AT_SETUP([MEANS harmonic and geometric means]) +dnl Check that rows are suppressed and that things generally work ok +dnl when there are a 2 way instance contains an unbalanced set of +dnl categorical values. +AT_SETUP([MEANS unbalanced]) AT_KEYWORDS([categorical categoricals]) -AT_DATA([means-hg.sps], [dnl -set format F12.4. -data list notable list /x * y *. +AT_DATA([means-unbalanced.sps], [dnl +data list notable list /b c x *. begin data. -1 3 -2 3 -3 3 -4 3 -5 3 +4 1 123 +3 1 123 +5 0 246 +4 0 246 +3 0 246 end data. +* The data above lack a 5 1 case. -means x y - /cells = mean harmonic geometric -. +means + table=x by b by c + /cells = mean count + . ]) - -AT_CHECK([pspp -O format=csv means-hg.sps], [0], - [dnl +AT_CHECK([pspp -O format=csv means-unbalanced.sps], [0], [dnl Table: Case Processing Summary ,Cases,,,,, ,Included,,Excluded,,Total, ,N,Percent,N,Percent,N,Percent -x: ,5,100.0%,0,.0%,5,100.0% -y: ,5,100.0%,0,.0%,5,100.0% +x * b * c,5,100.0%,0,.0%,5,100.0% Table: Report -,Mean,Harmonic Mean,Geom. Mean -x,3.0000,2.1898,2.6052 -y,3.0000,3.0000,3.0000 +b,c,Mean,N +3.00,.00,246.00,1 +,1.00,123.00,1 +,Total,184.50,2 +4.00,.00,246.00,1 +,1.00,123.00,1 +,Total,184.50,2 +5.00,.00,246.00,1 +,Total,246.00,1 +Total,.00,246.00,3 +,1.00,123.00,2 +,Total,196.80,5 ]) AT_CLEANUP - - - - - -AT_SETUP([MEANS all/none/default]) +dnl This example kindly provided by Dana Williams +AT_SETUP([MEANS three way]) AT_KEYWORDS([categorical categoricals]) -dnl Make sure that /CELLS = {ALL,NONE,DEFAULT} work properly -AT_DATA([means-stat-keywords.sps], [dnl -SET FORMAT=F12.2. -SET DECIMAL=DOT. - -DATA LIST NOTABLE LIST /score *. -BEGIN DATA. -22 -22 -29 -16 -23 -END DATA. +AT_DATA([means-threeway.sps], [dnl +data list notable list /score a b c. +begin data. +3 0 0 0 +4 0 0 1 +41 0 0 2 +5 0 1 0 +6 0 1 1 +7 1 0 0 +8 1 0 1 +9 1 1 0 +10 1 1 1 +end data. -MEANS score /CELLS = ALL. -MEANS score /CELLS = DEFAULT. -MEANS score /CELLS = NONE. +means score by a by b by c. ]) - -AT_CHECK([pspp -O format=csv means-stat-keywords.sps], [0], - [dnl +AT_CHECK([pspp -O format=csv means-threeway.sps], [0], [dnl Table: Case Processing Summary ,Cases,,,,, ,Included,,Excluded,,Total, ,N,Percent,N,Percent,N,Percent -score: ,5,100.0%,0,.0%,5,100.0% +score * a * b * c,9,100.0%,0,.0%,9,100.0% Table: Report -,Mean,N,Std. Deviation,S.E. Mean,Sum,Min,Max,Range,Variance,Kurtosis,S.E. Kurt,Skewness,S.E. Skew,First,Last,Harmonic Mean,Geom. Mean -score,22.40,5.00,4.62,2.06,112.00,16.00,29.00,13.00,21.30,1.85,2.00,.11,.91,22.00,23.00,21.61,22.01 +a,b,c,Mean,N,Std. Deviation +.00,.00,.00,3.00,1,NaN +,,1.00,4.00,1,NaN +,,2.00,41.00,1,NaN +,,Total,16.00,3,21.66 +,1.00,.00,5.00,1,NaN +,,1.00,6.00,1,NaN +,,Total,5.50,2,.71 +,Total,.00,4.00,2,1.41 +,,1.00,5.00,2,1.41 +,,2.00,41.00,1,NaN +,,Total,11.80,5,16.36 +1.00,.00,.00,7.00,1,NaN +,,1.00,8.00,1,NaN +,,Total,7.50,2,.71 +,1.00,.00,9.00,1,NaN +,,1.00,10.00,1,NaN +,,Total,9.50,2,.71 +,Total,.00,8.00,2,1.41 +,,1.00,9.00,2,1.41 +,,Total,8.50,4,1.29 +Total,.00,.00,5.00,2,2.83 +,,1.00,6.00,2,2.83 +,,2.00,41.00,1,NaN +,,Total,12.60,5,16.01 +,1.00,.00,7.00,2,2.83 +,,1.00,8.00,2,2.83 +,,Total,7.50,4,2.38 +,Total,.00,6.00,4,2.58 +,,1.00,7.00,4,2.58 +,,2.00,41.00,1,NaN +,,Total,10.33,9,11.73 +]) -Table: Case Processing Summary -,Cases,,,,, -,Included,,Excluded,,Total, -,N,Percent,N,Percent,N,Percent -score: ,5,100.0%,0,.0%,5,100.0% +AT_CLEANUP -Table: Report -,Mean,N,Std. Deviation -score,22.40,5.00,4.62 +dnl The above example again, but with string variables for +dnl the control vars. +AT_SETUP([MEANS three way string]) +AT_KEYWORDS([categorical categoricals]) + +AT_DATA([means-threeway-string.sps], [dnl +data list notable list /score (f22.2) a (a24) b (a16) c (a8). +begin data. +3 fooberrycrumblexzaQ fosilationwereqd zero +4 fooberrycrumblexzaQ fosilationwereqd one +41 fooberrycrumblexzaQ fosilationwereqd two +5 fooberrycrumblexzaQ onlyonekonboys zero +6 fooberrycrumblexzaQ onlyonekonboys one +7 wontledingbatsXASDF fosilationwereqd zero +8 wontledingbatsXASDF fosilationwereqd one +9 wontledingbatsXASDF onlyonekonboys zero +10 wontledingbatsXASDF onlyonekonboys one +end data. + +means score by a by b by c. +]) +AT_CHECK([pspp -O format=csv means-threeway-string.sps], [0], [dnl Table: Case Processing Summary ,Cases,,,,, ,Included,,Excluded,,Total, ,N,Percent,N,Percent,N,Percent -score: ,5,100.0%,0,.0%,5,100.0% +score * a * b * c,9,100.0%,0,.0%,9,100.0% Table: Report +a,b,c,Mean,N,Std. Deviation +fooberrycrumblexzaQ ,fosilationwereqd,one ,4.00,1,NaN +,,two ,41.00,1,NaN +,,zero ,3.00,1,NaN +,,Total,16.00,3,21.66 +,onlyonekonboys ,one ,6.00,1,NaN +,,zero ,5.00,1,NaN +,,Total,5.50,2,.71 +,Total,one ,5.00,2,1.41 +,,two ,41.00,1,NaN +,,zero ,4.00,2,1.41 +,,Total,11.80,5,16.36 +wontledingbatsXASDF ,fosilationwereqd,one ,8.00,1,NaN +,,zero ,7.00,1,NaN +,,Total,7.50,2,.71 +,onlyonekonboys ,one ,10.00,1,NaN +,,zero ,9.00,1,NaN +,,Total,9.50,2,.71 +,Total,one ,9.00,2,1.41 +,,zero ,8.00,2,1.41 +,,Total,8.50,4,1.29 +Total,fosilationwereqd,one ,6.00,2,2.83 +,,two ,41.00,1,NaN +,,zero ,5.00,2,2.83 +,,Total,12.60,5,16.01 +,onlyonekonboys ,one ,8.00,2,2.83 +,,zero ,7.00,2,2.83 +,,Total,7.50,4,2.38 +,Total,one ,7.00,4,2.58 +,,two ,41.00,1,NaN +,,zero ,6.00,4,2.58 +,,Total,10.33,9,11.73 ]) AT_CLEANUP - -AT_SETUP([MEANS missing=table ]) +dnl An example with multiple tables +AT_SETUP([MEANS multiple tables]) AT_KEYWORDS([categorical categoricals]) -AT_DATA([means-miss-table.sps], [dnl -data list notable list /a * b * g1. +AT_DATA([means-multi-table.sps], [dnl +data list notable list /a * b * c * x * y *. begin data. -1 9 1 -2 9 1 -3 9 1 -4 9 2 -5 9 2 -6 9 2 -7 . 2 +6 3 0 123 456 +6 3 1 123 456 +6 4 0 123 456 +6 4 1 123 456 +6 5 0 123 456 +6 5 1 123 456 +7 3 0 123 456 +7 3 1 123 456 +7 4 0 123 456 +7 4 1 123 456 +7 5 0 123 456 +7 5 1 123 456 +8 3 0 123 456 +8 3 1 123 456 +8 4 0 123 456 +8 4 1 123 456 +8 5 0 123 456 +8 5 1 123 456 +9 3 0 123 456 +9 3 1 123 456 +9 4 0 123 456 +9 4 1 123 456 +9 5 0 123 456 +9 5 1 123 456 end data. -MEANS a b BY g1 - /a BY g1 - /cells = COUNT - /missing = TABLE - . -MEANS a b BY g1 - /a BY g1 - /cells = COUNT - . +means table = x by b by c + /x by b + /y by a by b + cells = min count . ]) - -AT_CHECK([pspp -o pspp.csv -o pspp.txt means-miss-table.sps]) -AT_CHECK([cat pspp.csv], [0], [dnl +AT_CHECK([pspp -O format=csv means-multi-table.sps], [0], [dnl Table: Case Processing Summary ,Cases,,,,, ,Included,,Excluded,,Total, ,N,Percent,N,Percent,N,Percent -a: g1,7,100.0%,0,.0%,7,100.0% -a: ,7,100.0%,0,.0%,7,100.0% -b: g1,6,85.7%,1,14.3%,7,100.0% -b: ,6,85.7%,1,14.3%,7,100.0% +x * b * c,24,100.0%,0,.0%,24,100.0% Table: Report -,g1,N -a,1.00,3.00 -,2.00,3.00 -b,1.00,3.00 -,2.00,3.00 - -Table: Report -,N -a,6.00 -b,6.00 +b,c,Minimum,N +3.00,.00,123.00,4 +,1.00,123.00,4 +,Total,123.00,8 +4.00,.00,123.00,4 +,1.00,123.00,4 +,Total,123.00,8 +5.00,.00,123.00,4 +,1.00,123.00,4 +,Total,123.00,8 +Total,.00,123.00,12 +,1.00,123.00,12 +,Total,123.00,24 Table: Case Processing Summary ,Cases,,,,, ,Included,,Excluded,,Total, ,N,Percent,N,Percent,N,Percent -a: g1,7,100.0%,0,.0%,7,100.0% -a: ,7,100.0%,0,.0%,7,100.0% - -Table: Report -,g1,N -a,1.00,3.00 -,2.00,4.00 +x * b,24,100.0%,0,.0%,24,100.0% Table: Report -,N -a,7.00 +b,Minimum,N +3.00,123.00,8 +4.00,123.00,8 +5.00,123.00,8 +Total,123.00,24 Table: Case Processing Summary ,Cases,,,,, ,Included,,Excluded,,Total, ,N,Percent,N,Percent,N,Percent -a: g1,7,100.0%,0,.0%,7,100.0% -a: ,7,100.0%,0,.0%,7,100.0% -b: g1,6,85.7%,1,14.3%,7,100.0% -b: ,6,85.7%,1,14.3%,7,100.0% +y * a * b,24,100.0%,0,.0%,24,100.0% Table: Report -,g1,N -a,1.00,3.00 -,2.00,4.00 -b,1.00,3.00 -,2.00,3.00 +a,b,Minimum,N +6.00,3.00,456.00,2 +,4.00,456.00,2 +,5.00,456.00,2 +,Total,456.00,6 +7.00,3.00,456.00,2 +,4.00,456.00,2 +,5.00,456.00,2 +,Total,456.00,6 +8.00,3.00,456.00,2 +,4.00,456.00,2 +,5.00,456.00,2 +,Total,456.00,6 +9.00,3.00,456.00,2 +,4.00,456.00,2 +,5.00,456.00,2 +,Total,456.00,6 +Total,3.00,456.00,8 +,4.00,456.00,8 +,5.00,456.00,8 +,Total,456.00,24 +]) -Table: Report -,N -a,7.00 -b,6.00 +AT_CLEANUP + + +dnl An example with more than one dependent variable. +dnl This case uses a somewhat different table layout. +AT_SETUP([MEANS multi variable]) +AT_KEYWORDS([categorical categoricals]) + +AT_DATA([means-multi-variable.sps], [dnl +data list notable list /b c x y. +begin data. +5 1 123 55 +5 1 123 55 +5 1 123 55 +5 1 123 55 +4 1 456 44 +4 1 456 44 +4 1 456 44 +4 1 456 44 +3 1 789 55 +3 1 789 55 +3 1 789 55 +3 1 789 55 +5 0 246 99 +5 0 246 99 +5 0 246 99 +5 0 246 . +4 0 987 99 +4 0 987 99 +4 0 987 99 +4 0 987 99 +3 0 654 11 +3 0 654 11 +3 0 654 11 +3 0 654 11 +end data. + +means + table = x y by b by c + . +]) + +AT_CHECK([pspp -O format=csv means-multi-variable.sps], [0], [dnl Table: Case Processing Summary ,Cases,,,,, ,Included,,Excluded,,Total, ,N,Percent,N,Percent,N,Percent -a: g1,7,100.0%,0,.0%,7,100.0% -a: ,7,100.0%,0,.0%,7,100.0% +x * b * c,24,100.0%,0,.0%,24,100.0% +y * b * c,23,95.8%,1,4.2%,24,100.0% + +Table: x * y * b * c +b,c,,x,y +3.00,.00,Mean,654.00,11.00 +,,N,4,4 +,,Std. Deviation,.00,.00 +,1.00,Mean,789.00,55.00 +,,N,4,4 +,,Std. Deviation,.00,.00 +,Total,Mean,721.50,33.00 +,,N,8,8 +,,Std. Deviation,72.16,23.52 +4.00,.00,Mean,987.00,99.00 +,,N,4,4 +,,Std. Deviation,.00,.00 +,1.00,Mean,456.00,44.00 +,,N,4,4 +,,Std. Deviation,.00,.00 +,Total,Mean,721.50,71.50 +,,N,8,8 +,,Std. Deviation,283.83,29.40 +5.00,.00,Mean,246.00,99.00 +,,N,4,3 +,,Std. Deviation,.00,.00 +,1.00,Mean,123.00,55.00 +,,N,4,4 +,,Std. Deviation,.00,.00 +,Total,Mean,184.50,73.86 +,,N,8,7 +,,Std. Deviation,65.75,23.52 +Total,.00,Mean,629.00,67.00 +,,N,12,11 +,,Std. Deviation,316.50,44.40 +,1.00,Mean,456.00,51.33 +,,N,12,12 +,,Std. Deviation,283.98,5.42 +,Total,Mean,542.50,58.83 +,,N,24,23 +,,Std. Deviation,307.06,31.22 +]) -Table: Report -,g1,N -a,1.00,3.00 -,2.00,4.00 -Table: Report -,N -a,7.00 -]) AT_CLEANUP -AT_SETUP([MEANS user missing values]) + +dnl This example is based upon one kindly provided by Dana Williams +dnl It exercises the most complex case where there are multiple +dnl dependent variables AND multiple control variables in each layer. +AT_SETUP([MEANS multi combination]) AT_KEYWORDS([categorical categoricals]) -AT_DATA([means-missing.sps], [dnl -data list notable list /a * b * g1. -begin data. -1 2 9 -2 2 1 -3 2 1 -4 2 2 -5 2 2 -6 2 2 -7 9 2 +AT_DATA([means-multi-combination.sps], [dnl +data list notable list /one two three four five six. +begin data +1 1 1 1 1 1 +2 1 1 1 1 1 +1 2 1 1 1 1 +2 2 1 1 1 1 +1 1 2 1 1 1 +2 1 2 1 1 1 +1 2 2 1 1 1 +2 2 2 1 1 1 +1 1 1 2 1 1 +2 1 1 2 1 1 +1 2 1 2 1 1 +2 2 1 2 1 1 +1 1 2 2 1 1 +2 1 2 2 1 1 +1 2 2 2 1 1 +2 2 2 2 1 1 +1 1 1 1 2 1 +2 1 1 1 2 1 +1 2 1 1 2 1 +2 2 1 1 2 1 +1 1 2 1 2 1 +2 1 2 1 2 1 +1 2 2 1 2 1 +2 2 2 1 2 1 +1 1 1 2 2 1 +2 1 1 2 2 1 +1 2 1 2 2 1 +2 2 1 2 2 1 +1 1 2 2 2 1 +2 1 2 2 2 1 +1 2 2 2 2 1 +2 2 2 2 2 1 +1 1 1 1 1 2 +2 1 1 1 1 2 +1 2 1 1 1 2 +2 2 1 1 1 2 +1 1 2 1 1 2 +2 1 2 1 1 2 +1 2 2 1 1 2 +2 2 2 1 1 2 +1 1 1 2 1 2 +2 1 1 2 1 2 +1 2 1 2 1 2 +2 2 1 2 1 2 +1 1 2 2 1 2 +2 1 2 2 1 2 +1 2 2 2 1 2 +2 2 2 2 1 2 +1 1 1 1 2 2 +2 1 1 1 2 2 +1 2 1 1 2 2 +2 2 1 1 2 2 +1 1 2 1 2 2 +2 1 2 1 2 2 +1 2 2 1 2 2 +2 2 2 1 2 2 +1 1 1 2 2 2 +2 1 1 2 2 2 +1 2 1 2 2 2 +2 2 1 2 2 2 +1 1 2 2 2 2 +2 1 2 2 2 2 +1 2 2 2 2 2 +2 2 2 2 2 2 end data. -MISSING VALUES a b g1 (9). - -MEANS a b BY g1 /cells = COUNT . +recode six (2 = 62) (1 = 61). +recode five (2 = 52) (1 = 51). +recode four (2 = 42) (1 = 41). +recode three (2 = 32) (1 = 31). -MEANS a b BY g1 /cells = COUNT /missing = include . +set format F22.5. -MEANS a b BY g1 /cells = COUNT /missing = dependent . +means tables = one two BY three four BY five six. ]) - -AT_CHECK([pspp -o pspp.csv -o pspp.txt means-missing.sps]) -AT_CHECK([cat pspp.csv], [0], [dnl +AT_CHECK([pspp -O format=csv means-multi-combination.sps], [0], [dnl Table: Case Processing Summary ,Cases,,,,, ,Included,,Excluded,,Total, ,N,Percent,N,Percent,N,Percent -a: g1,6,85.7%,1,14.3%,7,100.0% -a: ,7,100.0%,0,.0%,7,100.0% -b: g1,5,71.4%,2,28.6%,7,100.0% -b: ,6,85.7%,1,14.3%,7,100.0% - -Table: Report -,g1,N -a,1.00,2.00 -,2.00,4.00 -b,1.00,2.00 -,2.00,3.00 +one * three * five,64,100.0%,0,.0%,64,100.0% +two * three * five,64,100.0%,0,.0%,64,100.0% +one * three * six,64,100.0%,0,.0%,64,100.0% +two * three * six,64,100.0%,0,.0%,64,100.0% +one * four * five,64,100.0%,0,.0%,64,100.0% +two * four * five,64,100.0%,0,.0%,64,100.0% +one * four * six,64,100.0%,0,.0%,64,100.0% +two * four * six,64,100.0%,0,.0%,64,100.0% + +Table: one * two * three * five +three,five,,one,two +31.00,51.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,52.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,Total,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +32.00,51.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,52.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,Total,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +Total,51.00,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +,52.00,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +,Total,Mean,1.50000,1.50000 +,,N,64,64 +,,Std. Deviation,.50395,.50395 + +Table: one * two * three * six +three,six,,one,two +31.00,61.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,62.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,Total,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +32.00,61.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,62.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,Total,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +Total,61.00,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +,62.00,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +,Total,Mean,1.50000,1.50000 +,,N,64,64 +,,Std. Deviation,.50395,.50395 + +Table: one * two * four * five +four,five,,one,two +41.00,51.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,52.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,Total,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +42.00,51.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,52.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,Total,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +Total,51.00,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +,52.00,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +,Total,Mean,1.50000,1.50000 +,,N,64,64 +,,Std. Deviation,.50395,.50395 + +Table: one * two * four * six +four,six,,one,two +41.00,61.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,62.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,Total,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +42.00,61.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,62.00,Mean,1.50000,1.50000 +,,N,16,16 +,,Std. Deviation,.51640,.51640 +,Total,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +Total,61.00,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +,62.00,Mean,1.50000,1.50000 +,,N,32,32 +,,Std. Deviation,.50800,.50800 +,Total,Mean,1.50000,1.50000 +,,N,64,64 +,,Std. Deviation,.50395,.50395 +]) -Table: Report -,N -a,6.00 -b,5.00 +AT_CLEANUP -Table: Case Processing Summary -,Cases,,,,, -,Included,,Excluded,,Total, -,N,Percent,N,Percent,N,Percent -a: g1,7,100.0%,0,.0%,7,100.0% -a: ,7,100.0%,0,.0%,7,100.0% -b: g1,7,100.0%,0,.0%,7,100.0% -b: ,7,100.0%,0,.0%,7,100.0% -Table: Report -,g1,N -a,1.00,2.00 -,2.00,4.00 -,9.00,1.00 -b,1.00,2.00 -,2.00,4.00 -,9.00,1.00 +dnl This example was observed to cause a crash in the +dnl destructor. Found by zzuf. +AT_SETUP([MEANS clean up]) +AT_KEYWORDS([categorical categoricals]) -Table: Report -,N -a,7.00 -b,7.00 +AT_DATA([means-bad.sps], [dnl +data list notable list /one two three four five six. +begin data +1 1 1 1 1 1 +2 1 1 1 1 ! +1 2 2 2 2 2 +2 2 2 2 2 2 +end data. -Table: Case Processing Summary -,Cases,,,,, -,Included,,Excluded,,Total, -,N,Percent,N,Percent,N,Percent -a: g1,7,100.0%,0,.0%,7,100.0% -a: ,7,100.0%,0,.0%,7,100.0% -b: g1,6,85.7%,1,14.3%,7,100.0% -b: ,6,85.7%,1,14.3%,7,100.0% +means tables = one two BY thsee four BY five six. +]) -Table: Report -,g1,N -a,1.00,2.00 -,2.00,4.00 -,9.00,1.00 -b,1.00,2.00 -,2.00,3.00 -,9.00,1.00 +AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore]) -Table: Report -,N -a,7.00 -b,6.00 -]) AT_CLEANUP - -AT_SETUP([MEANS empty factor spec]) +dnl Another example which caused a crash. +dnl Found by zzuf. +AT_SETUP([MEANS control all missing]) AT_KEYWORDS([categorical categoricals]) AT_DATA([means-bad.sps], [dnl -data list list /outcome *. +data list notable list /a * b * y * uu *. begin data. -1 -2 -3 +6 3 . 5 +6 3 . 5 +6 4 . 5 end data. -MEANS TABLES = outcome - BY. +means table = b by a by y by uu + . ]) -AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore]) +AT_CHECK([pspp -O format=csv means-bad.sps], [0], [dnl +Table: Case Processing Summary +,Cases,,,,, +,Included,,Excluded,,Total, +,N,Percent,N,Percent,N,Percent +b * a * y * uu,0,.0%,3,100.0%,3,100.0% -AT_CLEANUP +"warning: The table ""a * y * uu"" has no non-empty control variables. No result for this table will be displayed." +]) +AT_CLEANUP -AT_SETUP([MEANS parser bug]) +dnl Do some tests on the MISSING keyword. +AT_SETUP([MEANS missing classes]) AT_KEYWORDS([categorical categoricals]) -dnl This bug caused an infinite loop -AT_DATA([means-bad.sps], [dnl -DATA LIST notable LIST /a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 fylo *. +AT_DATA([means-missing-classes.sps], [dnl +data list notable list /hand * score *. begin data. -1 2 3 4 5 6 7 8 9 0 11 +1 17 +1 17 +1 17 +1 16 +1 17 +1 16 +1 16 +1 . +1 99 +2 21 +2 22 +2 20 +2 20 +2 20 +2 20 +2 20 +2 20 +2 20 +2 20 +9 55 end data. -MEANS TABLES = a1 a2 a3 a4 a5 a6 a7 a8 a9 a10a BY fylo. +missing values score (99). +missing values hand (9). + +means tables=score by hand + /cells = count max + /missing = dependent + . + +means tables=score by hand + /cells = count max + /missing = include + . + +means tables=score by hand + /cells = count max + . + ]) -AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore]) +AT_CHECK([pspp -O format=csv means-missing-classes.sps], [0], [dnl +Table: Case Processing Summary +,Cases,,,,, +,Included,,Excluded,,Total, +,N,Percent,N,Percent,N,Percent +score * hand,18,90.0%,2,10.0%,20,100.0% -AT_CLEANUP +Table: Report +hand,N,Maximum +1.00,7,17.00 +2.00,10,22.00 +9.00,1,55.00 +Total,18,55.00 +Table: Case Processing Summary +,Cases,,,,, +,Included,,Excluded,,Total, +,N,Percent,N,Percent,N,Percent +score * hand,19,95.0%,1,5.0%,20,100.0% + +Table: Report +hand,N,Maximum +1.00,8,99.00 +2.00,10,22.00 +9.00,1,55.00 +Total,19,99.00 + +Table: Case Processing Summary +,Cases,,,,, +,Included,,Excluded,,Total, +,N,Percent,N,Percent,N,Percent +score * hand,17,85.0%,3,15.0%,20,100.0% + +Table: Report +hand,N,Maximum +1.00,7,17.00 +2.00,10,22.00 +Total,17,22.00 +]) + +AT_CLEANUP -- 2.30.2