From 6a8d0d2d8bf801c21d71d6c3317ab97d110175cf Mon Sep 17 00:00:00 2001
From: John Darrington <john@darrington.wattle.id.au>
Date: Thu, 20 Jun 2019 11:10:00 +0200
Subject: [PATCH] Re-implement MEANS.

Fixes bug #44264.
---
 NEWS                              |    2 +
 Smake                             |    1 +
 doc/statistics.texi               |    6 +-
 src/language/stats/automake.mk    |    3 +
 src/language/stats/means-calc.c   |  451 +++++++
 src/language/stats/means-parser.c |  305 +++++
 src/language/stats/means.c        | 1812 ++++++++++++++---------------
 src/language/stats/means.h        |  163 +++
 tests/language/stats/means.at     | 1205 +++++++++++++------
 9 files changed, 2619 insertions(+), 1329 deletions(-)
 create mode 100644 src/language/stats/means-calc.c
 create mode 100644 src/language/stats/means-parser.c
 create mode 100644 src/language/stats/means.h

diff --git a/NEWS b/NEWS
index b2fbdc60c7..800ff8f1e7 100644
--- a/NEWS
+++ b/NEWS
@@ -28,6 +28,8 @@ Changes from 1.2.0 to 1.3.0:
    be used to save the cases' cluster membership and/or their distance
    from the cluster centre to the active file.
 
+ * The MEANS command has been re-implemented.
+
  * A bug where the GUI would crash when T-TEST was executed whilst
    a filter was set has been fixed.
 
diff --git a/Smake b/Smake
index 5669ad079f..abe67b3db1 100644
--- a/Smake
+++ b/Smake
@@ -37,6 +37,7 @@ GNULIB_MODULES = \
 	close \
 	configmake \
 	count-one-bits \
+	count-leading-zeros \
 	crc \
 	crypto/md4 \
 	crypto/rijndael \
diff --git a/doc/statistics.texi b/doc/statistics.texi
index 89ab28e1a6..522ef4d310 100644
--- a/doc/statistics.texi
+++ b/doc/statistics.texi
@@ -1097,7 +1097,7 @@ MEANS [TABLES =]
         [ALL]
         [NONE] ]
 
-      [/MISSING = [TABLE] [INCLUDE] [DEPENDENT]]
+      [/MISSING = [INCLUDE] [DEPENDENT]]
 @end display 
 
 You can use the @cmd{MEANS} command to calculate the arithmetic mean and similar
@@ -1201,10 +1201,6 @@ encountered.
 This behaviour can be modified with the  @subcmd{/MISSING} subcommand.
 Three options are possible: @subcmd{TABLE}, @subcmd{INCLUDE} and @subcmd{DEPENDENT}.
 
-@subcmd{/MISSING = TABLE} causes cases to be dropped if any variable is missing 
-in the table specification currently being processed, regardless of 
-whether it is needed to calculate the statistic.
-
 @subcmd{/MISSING = INCLUDE} says that user missing values, either in the dependent
 variables or in the categorical variables should be taken at their face
 value, and not excluded.
diff --git a/src/language/stats/automake.mk b/src/language/stats/automake.mk
index 33d9cd4c62..aa385529eb 100644
--- a/src/language/stats/automake.mk
+++ b/src/language/stats/automake.mk
@@ -54,6 +54,9 @@ language_stats_sources = \
 	src/language/stats/mann-whitney.c \
 	src/language/stats/mann-whitney.h \
 	src/language/stats/means.c \
+	src/language/stats/means.h \
+	src/language/stats/means-calc.c \
+	src/language/stats/means-parser.c \
 	src/language/stats/mcnemar.c \
 	src/language/stats/mcnemar.h \
 	src/language/stats/median.c \
diff --git a/src/language/stats/means-calc.c b/src/language/stats/means-calc.c
new file mode 100644
index 0000000000..7e5142fd78
--- /dev/null
+++ b/src/language/stats/means-calc.c
@@ -0,0 +1,451 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011, 2012, 2013, 2019 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "data/case.h"
+#include "data/format.h"
+#include "data/variable.h"
+
+#include "libpspp/bt.h"
+#include "libpspp/hmap.h"
+#include "libpspp/misc.h"
+#include "libpspp/pool.h"
+
+#include "math/moments.h"
+#include "output/pivot-table.h"
+
+#include <math.h>
+
+#include "means.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+#define N_(msgid) (msgid)
+
+struct statistic
+{
+};
+
+struct statistic_simple
+{
+  struct statistic parent;
+  double acc;
+};
+
+struct statistic_moment
+{
+  struct statistic parent;
+  struct moments1 *mom;
+};
+
+static struct statistic *
+default_create (struct pool *pool)
+{
+  struct statistic_moment *pvd = pool_alloc (pool, sizeof *pvd);
+
+  pvd->mom = moments1_create (MOMENT_KURTOSIS);
+
+  return (struct statistic *) pvd;
+}
+
+static void
+default_update (struct statistic *stat, double w, double x)
+{
+  struct statistic_moment *pvd = (struct statistic_moment *) stat;
+
+  moments1_add (pvd->mom, x, w);
+}
+
+static void
+default_destroy (struct statistic *stat)
+{
+  struct statistic_moment *pvd = (struct statistic_moment *) stat;
+  moments1_destroy (pvd->mom);
+}
+
+static void
+simple_destroy (struct statistic *stat UNUSED)
+{
+}
+
+
+
+/* HARMONIC MEAN: The reciprocal of the sum of the reciprocals:
+   1 / ( 1/(x_0) + 1/(x_1) + ... + 1/(x_{n-1}) ) */
+
+struct harmonic_mean
+{
+  struct statistic parent;
+  double rsum;
+  double n;
+};
+
+static struct statistic *
+harmonic_create (struct pool *pool)
+{
+  struct harmonic_mean *hm = pool_alloc (pool, sizeof *hm);
+
+  hm->rsum = 0;
+  hm->n = 0;
+
+  return (struct statistic *) hm;
+}
+
+
+static void
+harmonic_update (struct statistic *stat, double w, double x)
+{
+  struct harmonic_mean *hm = (struct harmonic_mean *) stat;
+  hm->rsum  += w / x;
+  hm->n += w;
+}
+
+
+static double
+harmonic_get (const struct statistic *pvd)
+{
+  const struct harmonic_mean *hm = (const struct harmonic_mean *) pvd;
+
+  return hm->n / hm->rsum;
+}
+
+
+
+/* GEOMETRIC MEAN:  The nth root of the product of all n observations
+   pow ((x_0 * x_1 * ... x_{n - 1}), 1/n)  */
+struct geometric_mean
+{
+  struct statistic parent;
+  double prod;
+  double n;
+};
+
+static struct statistic *
+geometric_create (struct pool *pool)
+{
+  struct geometric_mean *gm = pool_alloc (pool, sizeof *gm);
+
+  gm->prod = 1.0;
+  gm->n = 0;
+
+  return (struct statistic *) gm;
+}
+
+static void
+geometric_update (struct statistic  *pvd, double w, double x)
+{
+  struct geometric_mean *gm = (struct geometric_mean *)pvd;
+  gm->prod  *=  pow (x, w);
+  gm->n += w;
+}
+
+
+static double
+geometric_get (const struct statistic *pvd)
+{
+  const struct geometric_mean *gm = (const struct geometric_mean *)pvd;
+  return pow (gm->prod, 1.0 / gm->n);
+}
+
+
+
+static double
+sum_get (const struct statistic *pvd)
+{
+  double n, mean;
+
+  moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, &mean, 0, 0, 0);
+
+  return mean * n;
+}
+
+
+static double
+n_get (const struct statistic *pvd)
+{
+  double n;
+
+  moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, 0, 0, 0, 0);
+
+  return n;
+}
+
+static double
+arithmean_get (const struct statistic *pvd)
+{
+  double n, mean;
+
+  moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, &mean, 0, 0, 0);
+
+  return mean;
+}
+
+static double
+variance_get (const struct statistic *pvd)
+{
+  double n, mean, variance;
+
+  moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, &mean, &variance, 0, 0);
+
+  return variance;
+}
+
+
+static double
+stddev_get (const struct statistic *pvd)
+{
+  return sqrt (variance_get (pvd));
+}
+
+
+
+
+static double
+skew_get (const struct statistic *pvd)
+{
+  double skew;
+
+  moments1_calculate (((struct statistic_moment *)pvd)->mom, NULL, NULL, NULL, &skew, 0);
+
+  return skew;
+}
+
+static double
+sekurt_get (const struct statistic *pvd)
+{
+  double n;
+
+  moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, NULL, NULL, NULL, NULL);
+
+  return calc_sekurt (n);
+}
+
+static double
+seskew_get (const struct statistic *pvd)
+{
+  double n;
+
+  moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, NULL, NULL, NULL, NULL);
+
+  return calc_seskew (n);
+}
+
+static double
+kurt_get (const struct statistic *pvd)
+{
+  double kurt;
+
+  moments1_calculate (((struct statistic_moment *)pvd)->mom, NULL, NULL, NULL, NULL, &kurt);
+
+  return kurt;
+}
+
+static double
+semean_get (const struct statistic *pvd)
+{
+  double n, var;
+
+  moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, NULL, &var, NULL, NULL);
+
+  return sqrt (var / n);
+}
+
+
+
+/* MIN: The smallest (closest to minus infinity) value. */
+
+static struct statistic *
+min_create (struct pool *pool)
+{
+  struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd);
+
+  pvd->acc = DBL_MAX;
+
+  return (struct statistic *) pvd;
+}
+
+static void
+min_update (struct statistic *pvd, double w UNUSED, double x)
+{
+  double *r = &((struct statistic_simple *)pvd)->acc;
+
+  if (x < *r)
+    *r = x;
+}
+
+static double
+min_get (const struct statistic *pvd)
+{
+  double *r = &((struct statistic_simple *)pvd)->acc;
+
+  return *r;
+}
+
+/* MAX: The largest (closest to plus infinity) value. */
+
+static struct statistic *
+max_create (struct pool *pool)
+{
+  struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd);
+
+  pvd->acc = -DBL_MAX;
+
+  return (struct statistic *) pvd;
+}
+
+static void
+max_update (struct statistic *pvd, double w UNUSED, double x)
+{
+  double *r = &((struct statistic_simple *)pvd)->acc;
+
+  if (x > *r)
+    *r = x;
+}
+
+static double
+max_get (const struct statistic *pvd)
+{
+  double *r = &((struct statistic_simple *)pvd)->acc;
+
+  return *r;
+}
+
+
+
+struct range
+{
+  struct statistic parent;
+  double min;
+  double max;
+};
+
+static struct statistic *
+range_create (struct pool *pool)
+{
+  struct range *r = pool_alloc (pool, sizeof *r);
+
+  r->min = DBL_MAX;
+  r->max = -DBL_MAX;
+
+  return (struct statistic *) r;
+}
+
+static void
+range_update (struct statistic *pvd, double w UNUSED, double x)
+{
+  struct range *r = (struct range *) pvd;
+
+  if (x > r->max)
+    r->max = x;
+
+  if (x < r->min)
+    r->min = x;
+}
+
+static double
+range_get (const struct statistic *pvd)
+{
+  const struct range *r = (struct range *) pvd;
+
+  return r->max - r->min;
+}
+
+
+
+/* LAST: The last value (the one closest to the end of the file).  */
+
+static struct statistic *
+last_create (struct pool *pool)
+{
+  struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd);
+
+  return (struct statistic *) pvd;
+}
+
+static void
+last_update (struct statistic *pvd, double w UNUSED, double x)
+{
+  struct statistic_simple *stat = (struct statistic_simple *) pvd;
+
+  stat->acc = x;
+}
+
+static double
+last_get (const struct statistic *pvd)
+{
+  const struct statistic_simple *stat = (struct statistic_simple *) pvd;
+
+  return stat->acc;
+}
+
+/* FIRST: The first value (the one closest to the start of the file).  */
+
+static struct statistic *
+first_create (struct pool *pool)
+{
+  struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd);
+
+  pvd->acc = SYSMIS;
+
+  return (struct statistic *) pvd;
+}
+
+static void
+first_update (struct statistic *pvd, double w UNUSED, double x)
+{
+  struct statistic_simple *stat = (struct statistic_simple *) pvd;
+
+  if (stat->acc == SYSMIS)
+    stat->acc = x;
+}
+
+static double
+first_get (const struct statistic *pvd)
+{
+  const struct statistic_simple *stat = (struct statistic_simple *) pvd;
+
+  return stat->acc;
+}
+
+/* Table of cell_specs */
+const struct cell_spec cell_spec[n_MEANS_STATISTICS] = {
+  {N_("Mean"),           "MEAN",      PIVOT_RC_OTHER,   default_create,   default_update,   arithmean_get, default_destroy},
+  {N_("N"),              "COUNT",     PIVOT_RC_COUNT,   default_create,   default_update,   n_get,         default_destroy},
+  {N_("Std. Deviation"), "STDDEV",    PIVOT_RC_OTHER,   default_create,   default_update,   stddev_get,    default_destroy},
+#if 0
+  {N_("Median"),         "MEDIAN",    PIVOT_RC_OTHER,   default_create,   default_update,   NULL,          default_destroy},
+  {N_("Group Median"),   "GMEDIAN",   PIVOT_RC_OTHER,   default_create,   default_update,   NULL,          default_destroy},
+#endif
+  {N_("S.E. Mean"),      "SEMEAN",    PIVOT_RC_OTHER,   default_create,   default_update,   semean_get,    default_destroy},
+  {N_("Sum"),            "SUM",       PIVOT_RC_OTHER,   default_create,   default_update,   sum_get,       default_destroy},
+  {N_("Minimum"),        "MIN",       PIVOT_RC_OTHER,   min_create,       min_update,       min_get,       simple_destroy},
+  {N_("Maximum"),        "MAX",       PIVOT_RC_OTHER,   max_create,       max_update,       max_get,       simple_destroy},
+  {N_("Range"),          "RANGE",     PIVOT_RC_OTHER,   range_create,     range_update,     range_get,     simple_destroy},
+  {N_("Variance"),       "VARIANCE",  PIVOT_RC_OTHER,   default_create,   default_update,   variance_get,  default_destroy},
+  {N_("Kurtosis"),       "KURT",      PIVOT_RC_OTHER,   default_create,   default_update,   kurt_get,      default_destroy},
+  {N_("S.E. Kurt"),      "SEKURT",    PIVOT_RC_OTHER,   default_create,   default_update,   sekurt_get,    default_destroy},
+  {N_("Skewness"),       "SKEW",      PIVOT_RC_OTHER,   default_create,   default_update,   skew_get,      default_destroy},
+  {N_("S.E. Skew"),      "SESKEW",    PIVOT_RC_OTHER,   default_create,   default_update,   seskew_get,    default_destroy},
+  {N_("First"),          "FIRST",     PIVOT_RC_OTHER,   first_create,     first_update,     first_get,     simple_destroy},
+  {N_("Last"),           "LAST",      PIVOT_RC_OTHER,   last_create,      last_update,      last_get,      simple_destroy},
+#if 0
+  {N_("Percent N"),      "NPCT",      PIVOT_RC_PERCENT, default_create,   default_update,   NULL,          default_destroy},
+  {N_("Percent Sum"),    "SPCT",      PIVOT_RC_PERCENT, default_create,   default_update,   NULL,          default_destroy},
+#endif
+  {N_("Harmonic Mean"),  "HARMONIC",  PIVOT_RC_OTHER,   harmonic_create,  harmonic_update,  harmonic_get,  simple_destroy},
+  {N_("Geom. Mean"),     "GEOMETRIC", PIVOT_RC_OTHER,   geometric_create, geometric_update, geometric_get, simple_destroy}
+};
diff --git a/src/language/stats/means-parser.c b/src/language/stats/means-parser.c
new file mode 100644
index 0000000000..50e25d9c34
--- /dev/null
+++ b/src/language/stats/means-parser.c
@@ -0,0 +1,305 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011, 2012, 2013, 2019 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "data/case.h"
+#include "data/casegrouper.h"
+#include "data/casereader.h"
+#include "data/dataset.h"
+#include "data/dictionary.h"
+#include "data/format.h"
+#include "data/variable.h"
+
+#include "language/command.h"
+#include "language/lexer/lexer.h"
+#include "language/lexer/variable-parser.h"
+
+#include "libpspp/hmap.h"
+#include "libpspp/bt.h"
+#include "libpspp/misc.h"
+#include "libpspp/pool.h"
+
+#include "means.h"
+
+/* Parse the /TABLES stanza of the command.  */
+static bool
+parse_means_table_syntax (struct lexer *lexer, const struct means *cmd,
+			  struct mtable *table)
+{
+  memset (table, 0, sizeof *table);
+
+  /* Dependent variable (s) */
+  if (!parse_variables_const_pool (lexer, cmd->pool, cmd->dict,
+				   &table->dep_vars, &table->n_dep_vars,
+				   PV_NO_DUPLICATE | PV_NUMERIC))
+    return false;
+
+  /* Factor variable (s) */
+  while (lex_match (lexer, T_BY))
+    {
+      struct layer *layer = pool_zalloc (cmd->pool, sizeof *layer);
+
+      table->layers =
+	pool_nrealloc (cmd->pool, table->layers, table->n_layers + 1,
+		       sizeof *table->layers);
+      table->layers[table->n_layers] = layer;
+      table->n_layers++;
+
+      if (!parse_variables_const_pool
+	  (lexer, cmd->pool, cmd->dict,
+	   &layer->factor_vars,
+	   &layer->n_factor_vars,
+	   PV_NO_DUPLICATE))
+	return false;
+    }
+
+  return true;
+}
+
+/* Match a variable.
+   If the match succeeds, the variable will be placed in VAR.
+   Returns true if successful */
+static bool
+lex_is_variable (struct lexer *lexer, const struct dictionary *dict,
+		 int n)
+{
+  const char *tstr;
+  if (lex_next_token (lexer, n) !=  T_ID)
+    return false;
+
+  tstr = lex_next_tokcstr (lexer, n);
+
+  if (NULL == dict_lookup_var (dict, tstr) )
+    return false;
+
+  return true;
+}
+
+static bool
+means_parse (struct lexer *lexer, struct means *means)
+{
+  /*   Optional TABLES =   */
+  if (lex_match_id (lexer, "TABLES"))
+    {
+      if (! lex_force_match (lexer, T_EQUALS))
+	return false;
+    }
+
+  bool more_tables = true;
+  /* Parse the "tables" */
+  while (more_tables)
+    {
+      means->table = pool_realloc (means->pool, means->table,
+				   (means->n_tables + 1) * sizeof (*means->table));
+
+      if (! parse_means_table_syntax (lexer, means,
+				      &means->table[means->n_tables]))
+	{
+	  return false;
+	}
+      means->n_tables ++;
+
+      /* Look ahead to see if there are more tables to be parsed */
+      more_tables = false;
+      if ( T_SLASH == lex_next_token (lexer, 0) )
+	{
+	  if (lex_is_variable (lexer, means->dict, 1) )
+	    {
+	      more_tables = true;
+	      lex_match (lexer, T_SLASH);
+	    }
+	}
+    }
+
+  /* /MISSING subcommand */
+  while (lex_token (lexer) != T_ENDCMD)
+    {
+      lex_match (lexer, T_SLASH);
+
+      if (lex_match_id (lexer, "MISSING"))
+	{
+	  /*
+	    If no MISSING subcommand is specified, each combination of
+	    a dependent variable and categorical variables is handled
+	    separately.
+	  */
+	  lex_match (lexer, T_EQUALS);
+	  if (lex_match_id (lexer, "INCLUDE"))
+	    {
+	      /*
+		Use the subcommand  "/MISSING=INCLUDE" to include user-missing
+		values in the analysis.
+	      */
+
+	      means->ctrl_exclude = MV_SYSTEM;
+	      means->dep_exclude = MV_SYSTEM;
+	    }
+	  else if (lex_match_id (lexer, "DEPENDENT"))
+	    /*
+	      Use the command "/MISSING=DEPENDENT" to
+	      include user-missing values for the categorical variables,
+	      while excluding them for the dependent variables.
+
+	      Cases are dropped only when user-missing values
+	      appear in dependent  variables.  User-missing
+	      values for categorical variables are treated according to
+	      their face value.
+
+	      Cases are ALWAYS dropped when System Missing values appear
+	      in the categorical variables.
+	    */
+	    {
+	      means->dep_exclude = MV_ANY;
+	      means->ctrl_exclude = MV_SYSTEM;
+	    }
+	  else
+	    {
+	      lex_error (lexer, NULL);
+	      return false;
+	    }
+	}
+      else if (lex_match_id (lexer, "CELLS"))
+	{
+	  lex_match (lexer, T_EQUALS);
+
+	  /* The default values become overwritten */
+	  means->n_statistics = 0;
+	  pool_free (means->pool, means->statistics);
+	  means->statistics = 0;
+	  while (lex_token (lexer) != T_ENDCMD
+		 && lex_token (lexer) != T_SLASH)
+	    {
+	      if (lex_match (lexer, T_ALL))
+	      	{
+		  pool_free (means->pool, means->statistics);
+		  means->statistics = pool_calloc (means->pool,
+						   n_MEANS_STATISTICS,
+						   sizeof (*means->statistics));
+		  means->n_statistics = n_MEANS_STATISTICS;
+		  int i;
+		  for (i = 0; i < n_MEANS_STATISTICS; ++i)
+		    {
+		      means->statistics[i] = i;
+		    }
+	      	}
+	      else if (lex_match_id (lexer, "NONE"))
+		{
+		  means->n_statistics = 0;
+		  pool_free (means->pool, means->statistics);
+		  means->statistics = 0;
+		}
+	      else if (lex_match_id (lexer, "DEFAULT"))
+		{
+		  pool_free (means->pool, means->statistics);
+		  means->statistics = pool_calloc (means->pool,
+						   3,
+						   sizeof *means->statistics);
+		  means->statistics[0] = MEANS_MEAN;
+		  means->statistics[1] = MEANS_N;
+		  means->statistics[2] = MEANS_STDDEV;
+		}
+	      else
+		{
+		  int i;
+		  for (i = 0; i < n_MEANS_STATISTICS; ++i)
+		    {
+		      const struct cell_spec *cs = cell_spec + i;
+		      if (lex_match_id (lexer, cs->keyword))
+			{
+			  means->statistics
+			    = pool_realloc (means->pool,
+					   means->statistics,
+					   (means->n_statistics + 1)
+					   * sizeof (*means->statistics));
+
+			  means->statistics[means->n_statistics] = i;
+			  means->n_statistics++;
+			  break;
+			}
+		    }
+
+		  if (i >= n_MEANS_STATISTICS)
+		    {
+		      lex_error (lexer, NULL);
+		      return false;
+		    }
+		}
+	    }
+	}
+      else
+	{
+	  lex_error (lexer, NULL);
+	  return false;
+	}
+    }
+  return true;
+}
+
+
+int
+cmd_means (struct lexer *lexer, struct dataset *ds)
+{
+  struct means means;
+  means.pool = pool_create ();
+
+  means.ctrl_exclude = MV_ANY;
+  means.dep_exclude = MV_ANY;
+  means.table = NULL;
+  means.n_tables = 0;
+
+  means.dict = dataset_dict (ds);
+
+  means.n_statistics = 3;
+  means.statistics = pool_calloc (means.pool, 3, sizeof *means.statistics);
+  means.statistics[0] = MEANS_MEAN;
+  means.statistics[1] = MEANS_N;
+  means.statistics[2] = MEANS_STDDEV;
+
+  if (! means_parse (lexer, &means))
+    goto error;
+
+  {
+    struct casegrouper *grouper;
+    struct casereader *group;
+    bool ok;
+
+    grouper = casegrouper_create_splits (proc_open (ds), means.dict);
+    while (casegrouper_get_next_group (grouper, &group))
+      {
+      	run_means (&means, group, ds);
+      }
+    ok = casegrouper_destroy (grouper);
+    ok = proc_commit (ds) && ok;
+  }
+
+  for (int t = 0; t < means.n_tables; ++t)
+    {
+      const struct mtable *table = means.table + t;
+
+      means_case_processing_summary (table);
+      means_shipout (table, &means);
+    }
+  destroy_means (&means);
+  pool_destroy (means.pool);
+  return CMD_SUCCESS;
+
+ error:
+
+  destroy_means (&means);
+  pool_destroy (means.pool);
+  return CMD_FAILURE;
+}
diff --git a/src/language/stats/means.c b/src/language/stats/means.c
index 7e7642adf1..f0174c4ea3 100644
--- a/src/language/stats/means.c
+++ b/src/language/stats/means.c
@@ -1,5 +1,5 @@
 /* PSPP - a program for statistical analysis.
-   Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc.
+   Copyright (C) 2019 Free Software Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -24,1167 +24,1103 @@
 #include "data/format.h"
 #include "data/variable.h"
 
-#include "language/command.h"
-#include "language/lexer/lexer.h"
-#include "language/lexer/variable-parser.h"
+#include "libpspp/hmap.h"
+#include "libpspp/bt.h"
+#include "libpspp/hash-functions.h"
 
-#include "libpspp/misc.h"
-#include "libpspp/pool.h"
-
-#include "math/categoricals.h"
-#include "math/interaction.h"
-#include "math/moments.h"
+#include "count-one-bits.h"
+#include "count-leading-zeros.h"
 
 #include "output/pivot-table.h"
 
-#include <math.h>
+#include "means.h"
+
 
 #include "gettext.h"
 #define _(msgid) gettext (msgid)
 #define N_(msgid) (msgid)
 
 
-struct means;
+/* A "cell" in this procedure represents a distinct value of the
+   procedure's categorical variables,  and a set of summary statistics
+   of all cases which whose categorical variables have that set of
+   values.   For example,  the dataset
 
-struct per_var_data
-{
-  void **cell_stats;
-  struct moments1 *mom;
-};
+   v1    v2    cat1     cat2
+   100   202      0     1
+   100   202      0     2
+   100   202      1     0
+   100   202      0     1
 
 
-typedef void *stat_create (struct pool *pool);
-typedef void stat_update  (void *stat, double w, double x);
-typedef double stat_get   (const struct per_var_data *, void *aux);
+   has three cells in layer 0 and two cells in layer 1  in addition
+   to a "grand summary" cell to which all (non-missing) cases
+   contribute.
 
-struct cell_spec
+   The cells form a n-ary tree structure with the "grand summary"
+   cell at the root.
+*/
+struct cell
 {
-  /* Printable title for output */
-  const char *title;
-
-  /* Keyword for syntax */
-  const char *keyword;
-
-  stat_create *sc;
-  stat_update *su;
-  stat_get *sd;
-};
-
-struct harmonic_mean
-{
-  double rsum;
-  double n;
-};
-
-static void *
-harmonic_create (struct pool *pool)
-{
-  struct harmonic_mean *hm = pool_alloc (pool, sizeof *hm);
-
-  hm->rsum = 0;
-  hm->n = 0;
+  struct hmap_node hmap_node; /* Element in hash table. */
+  struct bt_node  bt_node;    /* Element in binary tree */
 
-  return hm;
-}
+  int n_children;
+  struct cell_container *children;
 
+  /* The statistics to be calculated for the cell.  */
+  struct statistic **stat;
 
-static void
-harmonic_update (void *stat, double w, double x)
-{
-  struct harmonic_mean *hm = stat;
-  hm->rsum  += w / x;
-  hm->n += w;
-}
+  /* The parent of this cell, or NULL if this is the root cell.  */
+  const struct cell *parent_cell;
 
+  /* A bit-field variable which indicates which control variables
+     are allocated a fixed value (for this cell),  and which are
+     "wildcards".
 
-static double
-harmonic_get (const struct per_var_data *pvd UNUSED, void *stat)
-{
-  struct harmonic_mean *hm = stat;
-
-  return hm->n / hm->rsum;
-}
+     A one indicates a fixed value.  A zero indicates a wildcard.
+     Wildcard values are used to calculate totals and sub-totals.
+  */
+  unsigned int not_wild;
 
-
+  /* The value(s). */
+  union value *values;
 
-struct geometric_mean
-{
-  double prod;
-  double n;
+  /* The variables corresponding to the above values.  */
+  const struct variable **vars;
 };
 
-
-static void *
-geometric_create (struct pool *pool)
+/*  A structure used to find the union of all values used
+    within a layer, and to sort those values.  */
+struct instance
 {
-  struct geometric_mean *gm = pool_alloc (pool, sizeof *gm);
+  struct hmap_node hmap_node; /* Element in hash table. */
+  struct bt_node  bt_node;    /* Element in binary tree */
 
-  gm->prod = 1.0;
-  gm->n = 0;
+  /* A unique, consecutive, zero based index identifying this
+     instance.  */
+  int index;
 
-  return gm;
-}
+  /* The top level value of this instance.  */
+  union value value;
+  const struct variable *var;
+};
 
 
 static void
-geometric_update (void *stat, double w, double x)
+destroy_workspace (const struct mtable *mt, struct workspace *ws)
 {
-  struct geometric_mean *gm = stat;
-  gm->prod  *=  pow (x, w);
-  gm->n += w;
+  for (int l = 0; l < mt->n_layers; ++l)
+    {
+      struct cell_container *instances = ws->instances + l;
+      struct instance *inst;
+      struct instance *next;
+      HMAP_FOR_EACH_SAFE (inst, next, struct instance, hmap_node,
+		     &instances->map)
+	{
+	  free (inst);
+	}
+      hmap_destroy (&instances->map);
+    }
+  free (ws->control_idx);
+  free (ws->instances);
 }
 
-
-static double
-geometric_get (const struct per_var_data *pvd UNUSED, void *stat)
+/* Destroy CELL.  */
+static void
+destroy_cell (const struct means *means,
+	      const struct mtable *mt, struct cell *cell)
 {
-  struct geometric_mean *gm = stat;
-
-  return pow (gm->prod, 1.0 / gm->n);
-}
+  int idx = 0;
+  for (int i = 0; i < mt->n_layers; ++i)
+    {
+      if (0 == ((cell->not_wild >> i) & 0x1))
+	continue;
 
-
+      const struct layer *layer = mt->layers[i];
+      for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
+      {
+        struct workspace *ws = mt->ws + cmb;
+        const struct variable *var
+          = layer->factor_vars[ws->control_idx[i]];
 
-static double
-sum_get (const struct per_var_data *pvd, void *stat UNUSED)
-{
-  double n, mean;
+        int width = var_get_width (var);
+        value_destroy (&cell->values[idx++], width);
+      }
+    }
+  for (int i = 0; i < cell->n_children; ++i)
+    {
+      struct cell_container *container = cell->children + i;
+      hmap_destroy (&container->map);
+    }
 
-  moments1_calculate (pvd->mom, &n, &mean, 0, 0, 0);
+  for (int v = 0; v < mt->n_dep_vars; ++v)
+    {
+      for (int s = 0; s < means->n_statistics; ++s)
+        {
+          stat_destroy *des = cell_spec[means->statistics[s]].sf;
+          des (cell->stat[s + v * means->n_statistics]);
+        }
+    }
+  free (cell->stat);
 
-  return mean * n;
+  free (cell->children);
+  free (cell->values);
+  free (cell->vars);
+  free (cell);
 }
 
 
-static double
-n_get (const struct per_var_data *pvd, void *stat UNUSED)
-{
-  double n;
-
-  moments1_calculate (pvd->mom, &n, 0, 0, 0, 0);
-
-  return n;
-}
-
-static double
-arithmean_get (const struct per_var_data *pvd, void *stat UNUSED)
+/* Walk the tree in postorder starting from CELL and destroy all the
+   cells.  */
+static void
+means_destroy_cells (const struct means *means, struct cell *cell,
+		     const struct mtable *table)
 {
-  double n, mean;
-
-  moments1_calculate (pvd->mom, &n, &mean, 0, 0, 0);
+  for (int i = 0; i < cell->n_children; ++i)
+    {
+      struct cell_container *container = cell->children + i;
+      struct cell *sub_cell;
+      struct cell *next;
+      HMAP_FOR_EACH_SAFE (sub_cell,  next, struct cell, hmap_node,
+			  &container->map)
+	{
+	  means_destroy_cells (means, sub_cell, table);
+	}
+    }
 
-  return mean;
+  destroy_cell (means, table, cell);
 }
 
-static double
-variance_get (const struct per_var_data *pvd, void *stat UNUSED)
+static void
+dump_cell (const struct cell *cell, const struct mtable *mt, int level)
 {
-  double n, mean, variance;
-
-  moments1_calculate (pvd->mom, &n, &mean, &variance, 0, 0);
+  for (int l = 0; l < level; ++l)
+    putchar (' ');
+  printf ("%p: ", cell);
+  for (int i = 0; i < mt->n_layers; ++i)
+    {
+      putchar (((cell->not_wild >> i) & 0x1) ? 'w' : '.');
+    }
+  printf (" - ");
+  int x = 0;
+  for (int i = 0; i < mt->n_layers; ++i)
+    {
+      if ((cell->not_wild >> i) & 0x1)
+	{
+	  printf ("%s: ", var_get_name (cell->vars[x]));
+	  printf ("%g ", cell->values[x++].f);
+	}
+      else
+	printf ("x ");
+    }
+  stat_get *sg = cell_spec[MEANS_N].sd;
+  printf ("--- S1: %g", sg (cell->stat[0]));
 
-  return variance;
+  printf ("--- N Children: %d", cell->n_children);
+  //  printf ("--- Level: %d", level);
+  printf ("--- Parent: %p", cell->parent_cell);
+  printf ("\n");
 }
 
-
-static double
-stddev_get (const struct per_var_data *pvd, void *stat)
+static void
+dump_indeces (const size_t *indexes, int n)
 {
-  return sqrt (variance_get (pvd, stat));
+  for (int i = 0 ; i < n; ++i)
+    {
+      printf ("%ld; ", indexes[i]);
+    }
+  printf ("\n");
 }
 
-
-
-
-static double
-skew_get (const struct per_var_data *pvd, void *stat UNUSED)
+/* Dump the tree in pre-order.  */
+static void
+dump_tree (const struct cell *cell, const struct mtable *table,
+	   int level, const struct cell *parent)
 {
-  double skew;
-
-  moments1_calculate (pvd->mom, NULL, NULL, NULL, &skew, 0);
+  assert (cell->parent_cell == parent);
+  dump_cell (cell, table, level);
 
-  return skew;
+  for (int i = 0; i < cell->n_children; ++i)
+    {
+      struct cell_container *container = cell->children + i;
+      struct cell *sub_cell;
+      BT_FOR_EACH (sub_cell, struct cell, bt_node, &container->bt)
+	{
+	  dump_tree (sub_cell, table, level + 1, cell);
+	}
+    }
 }
 
-static double
-sekurt_get (const struct per_var_data *pvd, void *stat UNUSED)
+/* Generate a hash based on the values of the N variables in
+   the array VARS which are taken from the case C.  */
+static unsigned int
+generate_hash (const struct mtable *mt,
+	       const struct ccase *c,
+	       unsigned int not_wild,
+	       const struct workspace *ws)
 {
-  double n;
-
-  moments1_calculate (pvd->mom, &n, NULL, NULL, NULL, NULL);
+  unsigned int hash = 0;
+  for (int i = 0; i < mt->n_layers; ++i)
+    {
+      if (0 == ((not_wild >> i) & 0x1))
+	continue;
+
+      const struct layer *layer = mt->layers[i];
+      const struct variable *var = layer->factor_vars[ws->control_idx[i]];
+      const union value *vv = case_data (c, var);
+      int width = var_get_width (var);
+      hash = hash_int (i, hash);
+      hash = value_hash (vv, width, hash);
+    }
 
-  return calc_sekurt (n);
+  return hash;
 }
 
-static double
-seskew_get (const struct per_var_data *pvd, void *stat UNUSED)
+/* Create a cell based on the N variables in the array VARS,
+   which are indeces into the case C.
+   The caller is responsible for destroying this cell when
+   no longer needed. */
+static struct cell *
+generate_cell (const struct means *means,
+	       const struct mtable *mt,
+	       const struct ccase *c,
+               unsigned int not_wild,
+	       const struct cell *pcell,
+	       const struct workspace *ws)
 {
-  double n;
-
-  moments1_calculate (pvd->mom, &n, NULL, NULL, NULL, NULL);
-
-  return calc_seskew (n);
-}
+  int n_vars = count_one_bits (not_wild);
+  struct cell *cell = xzalloc ((sizeof *cell));
+  cell->values = xcalloc (n_vars, sizeof *cell->values);
+  cell->vars = xcalloc (n_vars, sizeof *cell->vars);
+  cell->not_wild = not_wild;
+
+  cell->parent_cell = pcell;
+  cell->n_children = mt->n_layers -
+    (sizeof (cell->not_wild) * CHAR_BIT) +
+    count_leading_zeros (cell->not_wild);
+
+  int idx = 0;
+  for (int i = 0; i < mt->n_layers; ++i)
+    {
+      if (0 == ((not_wild >> i) & 0x1))
+	continue;
+
+      const struct layer *layer = mt->layers[i];
+      const struct variable *var = layer->factor_vars[ws->control_idx[i]];
+      const union value *vv = case_data (c, var);
+      int width = var_get_width (var);
+      cell->vars[idx] = var;
+      value_clone (&cell->values[idx++], vv, width);
+    }
+  assert (idx == n_vars);
 
-static double
-kurt_get (const struct per_var_data *pvd, void *stat UNUSED)
-{
-  double kurt;
+  cell->children = xcalloc (cell->n_children, sizeof *cell->children);
+  for (int i = 0; i < cell->n_children; ++i)
+    {
+      struct cell_container *container = cell->children + i;
+      hmap_init (&container->map);
+    }
 
-  moments1_calculate (pvd->mom, NULL, NULL, NULL, NULL, &kurt);
+  cell->stat = xcalloc (means->n_statistics * mt->n_dep_vars, sizeof *cell->stat);
+  for (int v = 0; v < mt->n_dep_vars; ++v)
+    {
+      for (int stat = 0; stat < means->n_statistics; ++stat)
+        {
+          stat_create *sc = cell_spec[means->statistics[stat]].sc;
 
-  return kurt;
+          cell->stat[stat + v * means->n_statistics] = sc (means->pool);
+        }
+    }
+  return cell;
 }
 
-static double
-semean_get (const struct per_var_data *pvd, void *stat UNUSED)
-{
-  double n, var;
 
-  moments1_calculate (pvd->mom, &n, NULL, &var, NULL, NULL);
+/* If a  cell based on the N variables in the array VARS,
+   which are indeces into the case C and whose hash is HASH,
+   exists in HMAP, then return that cell.
+   Otherwise, return NULL.  */
+static struct cell *
+lookup_cell (const struct mtable *mt,
+	     struct hmap *hmap,  unsigned int hash,
+	     const struct ccase *c,
+	     unsigned int not_wild,
+	     const struct workspace *ws)
+{
+  struct cell *cell = NULL;
+  HMAP_FOR_EACH_WITH_HASH (cell, struct cell, hmap_node, hash, hmap)
+    {
+      bool match = true;
+      int idx = 0;
+      if (cell->not_wild != not_wild)
+      	continue;
+      for (int i = 0; i < mt->n_layers; ++i)
+	{
+	  if (0 == ((cell->not_wild >> i) & 0x1))
+	    continue;
 
-  return sqrt (var / n);
+	  const struct layer *layer = mt->layers[i];
+	  const struct variable *var = layer->factor_vars[ws->control_idx[i]];
+	  const union value *vv = case_data (c, var);
+	  int width = var_get_width (var);
+	  assert (var == cell->vars[idx]);
+	  if (!value_equal (vv, &cell->values[idx++], width))
+	    {
+	      match = false;
+	      break;
+	    }
+	}
+      if (match)
+	return cell;
+    }
+  return NULL;
 }
 
-
 
-static void *
-min_create (struct pool *pool)
+/*  A comparison function used to sort cells in a binary tree.
+    Only the innermost value needs to be compared, because no
+    two cells with similar outer values will appear in the same
+    tree/map.   */
+static int
+cell_compare_3way (const struct bt_node *a,
+		   const struct bt_node *b,
+		   const void *aux UNUSED)
 {
-  double *r = pool_alloc (pool, sizeof *r);
+  const struct cell *fa = BT_DATA (a, struct cell, bt_node);
+  const struct cell *fb = BT_DATA (b, struct cell, bt_node);
 
-  *r = DBL_MAX;
+  assert (fa->not_wild == fb->not_wild);
+  int vidx = count_one_bits (fa->not_wild) - 1;
+  assert (fa->vars[vidx] == fb->vars[vidx]);
 
-  return r;
+  return value_compare_3way (&fa->values[vidx],
+			     &fb->values[vidx],
+			     var_get_width (fa->vars[vidx]));
 }
 
-static void
-min_update (void *stat, double w UNUSED, double x)
+/*  A comparison function used to sort cells in a binary tree.  */
+static int
+compare_instance_3way (const struct bt_node *a,
+		       const struct bt_node *b,
+		       const void *aux UNUSED)
 {
-  double *r = stat;
+  const struct instance *fa = BT_DATA (a, struct instance, bt_node);
+  const struct instance *fb = BT_DATA (b, struct instance, bt_node);
 
-  if (x < *r)
-    *r = x;
-}
+  assert (fa->var == fb->var);
 
-static double
-min_get (const struct per_var_data *pvd UNUSED, void *stat)
-{
-  double *r = stat;
-
-  return *r;
+  return  value_compare_3way (&fa->value,
+			      &fb->value,
+			      var_get_width (fa->var));
 }
 
-static void *
-max_create (struct pool *pool)
-{
-  double *r = pool_alloc (pool, sizeof *r);
 
-  *r = -DBL_MAX;
+static void arrange_cells (struct workspace *ws,
+			   struct cell *cell, const struct mtable *table);
 
-  return r;
-}
 
+/* Iterate CONTAINER's map inserting a copy of its elements into
+   CONTAINER's binary tree.    Also, for each layer in TABLE, create
+   an instance container, containing the union of all elements in
+   CONTAINER.  */
 static void
-max_update (void *stat, double w UNUSED, double x)
+arrange_cell (struct workspace *ws, struct cell_container *container,
+	      const struct mtable *mt)
 {
-  double *r = stat;
+  struct bt *bt = &container->bt;
+  struct hmap *map = &container->map;
+  bt_init (bt, cell_compare_3way, NULL);
 
-  if (x > *r)
-    *r = x;
-}
-
-static double
-max_get (const struct per_var_data *pvd UNUSED, void *stat)
-{
-  double *r = stat;
-
-  return *r;
-}
+  struct cell *cell;
+  HMAP_FOR_EACH (cell, struct cell, hmap_node, map)
+    {
+      bt_insert (bt, &cell->bt_node);
 
-
+      int idx = 0;
+      for (int i = 0; i < mt->n_layers; ++i)
+	{
+	  if (0 == ((cell->not_wild >> i) & 0x1))
+	    continue;
 
-struct range
-{
-  double min;
-  double max;
-};
+	  struct cell_container *instances = ws->instances + i;
+	  const struct variable *var = cell->vars[idx];
+	  int width = var_get_width (var);
+	  unsigned int hash
+	    = value_hash (&cell->values[idx], width, 0);
+
+	  struct instance *inst = NULL;
+	  struct instance *next = NULL;
+	  HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next, struct instance,
+					hmap_node,
+					hash, &instances->map)
+	    {
+	      assert (cell->vars[idx] == var);
+	      if (value_equal (&inst->value,
+			       &cell->values[idx],
+			       width))
+		{
+		  break;
+		}
+	    }
 
-static void *
-range_create (struct pool *pool)
-{
-  struct range *r = pool_alloc (pool, sizeof *r);
+	  if (!inst)
+	    {
+	      inst = xzalloc (sizeof *inst);
+	      inst->index = -1;
+	      inst->var = var;
+	      value_clone (&inst->value, &cell->values[idx],
+			   width);
+	      hmap_insert (&instances->map, &inst->hmap_node, hash);
+	    }
 
-  r->min = DBL_MAX;
-  r->max = -DBL_MAX;
+	  idx++;
+	}
 
-  return r;
+      arrange_cells (ws, cell, mt);
+    }
 }
 
+/* Arrange the children and then all the subtotals.  */
 static void
-range_update (void *stat, double w UNUSED, double x)
+arrange_cells (struct workspace *ws, struct cell *cell,
+	       const struct mtable *table)
 {
-  struct range *r = stat;
-
-  if (x > r->max)
-    r->max = x;
-
-  if (x < r->min)
-    r->min = x;
+  for (int i = 0; i < cell->n_children; ++i)
+    {
+      struct cell_container *container = cell->children + i;
+      arrange_cell (ws, container, table);
+    }
 }
 
-static double
-range_get (const struct per_var_data *pvd UNUSED, void *stat)
-{
-  struct range *r = stat;
-
-  return r->max - r->min;
-}
 
 
 
-static void *
-last_create (struct pool *pool)
+/*  If the top level value in CELL, has an instance in the L_IDX'th layer,
+    then return that instance.  Otherwise return NULL.  */
+static const struct instance *
+lookup_instance (const struct mtable *mt, const struct workspace *ws,
+		 int l_idx, const struct cell *cell)
 {
-  double *l = pool_alloc (pool, sizeof *l);
-
-  return l;
+  const struct layer *layer = mt->layers[l_idx];
+  int n_vals = count_one_bits (cell->not_wild);
+  const struct variable *var = layer->factor_vars[ws->control_idx[l_idx]];
+  const union value *val = cell->values + n_vals - 1;
+  int width = var_get_width (var);
+  unsigned int hash = value_hash (val, width, 0);
+  const struct cell_container *instances = ws->instances + l_idx;
+  struct instance *inst = NULL;
+  struct instance *next;
+  HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next,
+				struct instance, hmap_node,
+				hash, &instances->map)
+    {
+      if (value_equal (val, &inst->value, width))
+	break;
+    }
+  return inst;
 }
 
+/* Enter the values into PT.  */
 static void
-last_update (void *stat, double w UNUSED, double x)
+populate_table (const struct means *means, const struct mtable *mt,
+		const struct workspace *ws,
+                const struct cell *cell,
+                struct pivot_table *pt)
 {
-  double *l = stat;
+  size_t *indexes = xcalloc (pt->n_dimensions, sizeof *indexes);
+  for (int v = 0; v < mt->n_dep_vars; ++v)
+    {
+      for (int s = 0; s < means->n_statistics; ++s)
+        {
+          int i = 0;
+          if (mt->n_dep_vars > 1)
+            indexes[i++] = v;
+          indexes[i++] = s;
+          int stat = means->statistics[s];
+          stat_get *sg = cell_spec[stat].sd;
+          {
+            const struct cell *pc = cell;
+            for (; i < pt->n_dimensions; ++i)
+              {
+                int l_idx = pt->n_dimensions - i - 1;
+		const struct cell_container *instances = ws->instances + l_idx;
+                if (0 == (cell->not_wild >> l_idx & 0x1U))
+                  {
+                    indexes [i] = hmap_count (&instances->map);
+                  }
+                else
+                  {
+                    assert (pc);
+                    const struct instance *inst
+		      = lookup_instance (mt, ws, l_idx, pc);
+                    assert (inst);
+                    indexes [i] = inst->index;
+                    pc = pc->parent_cell;
+                  }
+              }
+          }
+
+	  int idx = s + v * means->n_statistics;
+          pivot_table_put (pt, indexes, pt->n_dimensions,
+                           pivot_value_new_number (sg (cell->stat[idx])));
+        }
+    }
+  free (indexes);
 
-  *l = x;
+  for (int i = 0; i < cell->n_children; ++i)
+    {
+      struct cell_container *container = cell->children + i;
+      struct cell *child = NULL;
+      BT_FOR_EACH (child, struct cell, bt_node, &container->bt)
+	{
+          populate_table (means, mt, ws, child, pt);
+	}
+    }
 }
 
-static double
-last_get (const struct per_var_data *pvd UNUSED, void *stat)
+static void
+create_table_structure (const struct mtable *mt, struct pivot_table *pt,
+			const struct workspace *ws)
 {
-  double *l = stat;
+  int * lindexes = ws->control_idx;
+  /* The inner layers are situated rightmost in the table.
+     So this iteration is in reverse order.  */
+  for (int l = mt->n_layers -1; l >=0 ; --l)
+    {
+      const struct layer *layer = mt->layers[l];
+      const struct cell_container *instances = ws->instances + l;
+      const struct variable *var = layer->factor_vars[lindexes[l]];
+      struct pivot_dimension *dim_layer
+	= pivot_dimension_create (pt, PIVOT_AXIS_ROW,
+				  var_to_string (var));
+      dim_layer->root->show_label = true;
+
+      /* Place the values of the control variables as table headings.  */
+      {
+	struct instance *inst = NULL;
+	BT_FOR_EACH (inst, struct instance, bt_node, &instances->bt)
+	  {
+	    struct substring space = SS_LITERAL_INITIALIZER ("\t ");
+	    struct string str;
+	    ds_init_empty (&str);
+	    var_append_value_name (var,
+				   &inst->value,
+				   &str);
 
-  return *l;
-}
+	    ds_ltrim (&str, space);
 
+	    pivot_category_create_leaf (dim_layer->root,
+                                        pivot_value_new_text (ds_cstr (&str)));
 
-static void *
-first_create (struct pool *pool)
-{
-  double *f = pool_alloc (pool, sizeof *f);
-
-  *f = SYSMIS;
+	    ds_destroy (&str);
+	  }
+      }
 
-  return f;
+      pivot_category_create_leaf (dim_layer->root,
+                                  pivot_value_new_text ("Total"));
+    }
 }
 
+/* Initialise C_DES with a string describing the control variable
+   relating to MT, LINDEXES.  */
 static void
-first_update (void *stat, double w UNUSED, double x)
+layers_to_string (const struct mtable *mt, const int *lindexes,
+		  struct string *c_des)
 {
-  double *f = stat;
-
-  if (*f == SYSMIS)
-    *f = x;
+  for (int l = 0; l < mt->n_layers; ++l)
+    {
+      const struct layer *layer = mt->layers[l];
+      const struct variable *ctrl_var = layer->factor_vars[lindexes[l]];
+      if (l > 0)
+	ds_put_cstr (c_des, " * ");
+      ds_put_cstr (c_des, var_get_name (ctrl_var));
+    }
 }
 
-static double
-first_get (const struct per_var_data *pvd UNUSED,  void *stat)
+static void
+populate_case_processing_summary (struct pivot_category *pc,
+				  const struct mtable *mt,
+				  const int *lindexes)
 {
-  double *f = stat;
+  struct string ds;
+  ds_init_empty (&ds);
+  int l = 0;
+  for (l = 0; l < mt->n_layers; ++l)
+    {
+      const struct layer *layer = mt->layers[l];
+      const struct variable *ctrl_var = layer->factor_vars[lindexes[l]];
+      if (l > 0)
+	ds_put_cstr (&ds, " * ");
+      ds_put_cstr (&ds, var_get_name (ctrl_var));
+    }
+  for (int dv = 0; dv < mt->n_dep_vars; ++dv)
+    {
+      struct string dss;
+      ds_init_empty (&dss);
+      ds_put_cstr (&dss, var_get_name (mt->dep_vars[dv]));
+      if (mt->n_layers > 0)
+	{
+	  ds_put_cstr (&dss, " * ");
+	  ds_put_substring (&dss, ds.ss);
+	}
+      pivot_category_create_leaf (pc,
+				  pivot_value_new_text (ds_cstr (&dss)));
+      ds_destroy (&dss);
+    }
 
-  return *f;
+  ds_destroy (&ds);
 }
 
-enum
-  {
-    MEANS_MEAN = 0,
-    MEANS_N,
-    MEANS_STDDEV
-  };
-
-/* Table of cell_specs */
-static const struct cell_spec cell_spec[] = {
-  {N_("Mean"), "MEAN", NULL, NULL, arithmean_get},
-  {N_("N"), "COUNT", NULL, NULL, n_get},
-  {N_("Std. Deviation"), "STDDEV", NULL, NULL, stddev_get},
-#if 0
-  {N_("Median"), "MEDIAN", NULL, NULL, NULL},
-  {N_("Group Median"), "GMEDIAN", NULL, NULL, NULL},
-#endif
-  {N_("S.E. Mean"), "SEMEAN", NULL, NULL, semean_get},
-  {N_("Sum"), "SUM", NULL, NULL, sum_get},
-  {N_("Min"), "MIN", min_create, min_update, min_get},
-  {N_("Max"), "MAX", max_create, max_update, max_get},
-  {N_("Range"), "RANGE", range_create, range_update, range_get},
-  {N_("Variance"), "VARIANCE", NULL, NULL, variance_get},
-  {N_("Kurtosis"), "KURT", NULL, NULL, kurt_get},
-  {N_("S.E. Kurt"), "SEKURT", NULL, NULL, sekurt_get},
-  {N_("Skewness"), "SKEW", NULL, NULL, skew_get},
-  {N_("S.E. Skew"), "SESKEW", NULL, NULL, seskew_get},
-  {N_("First"), "FIRST", first_create, first_update, first_get},
-  {N_("Last"), "LAST", last_create, last_update, last_get},
-#if 0
-  {N_("Percent N"), "NPCT", NULL, NULL, NULL},
-  {N_("Percent Sum"), "SPCT", NULL, NULL, NULL},
-#endif
-  {N_("Harmonic Mean"), "HARMONIC", harmonic_create, harmonic_update, harmonic_get},
-  {N_("Geom. Mean"), "GEOMETRIC", geometric_create, geometric_update, geometric_get}
-};
-
-#define n_C (sizeof (cell_spec) / sizeof (struct cell_spec))
-
-
-struct summary
+/* Create the "Case Processing Summary" table.  */
+void
+means_case_processing_summary (const struct mtable *mt)
 {
-  casenumber missing;
-  casenumber non_missing;
-};
-
-
-struct layer
-{
-  size_t n_factor_vars;
-  const struct variable **factor_vars;
-};
-
-/* The thing parsed after TABLES= */
-struct mtable
-{
-  size_t n_dep_vars;
-  const struct variable **dep_vars;
-
-  int n_layers;
-  struct layer *layers;
-
-  struct interaction **interactions;
-  struct summary *summary;
-
-  int ii;
-
-  struct categoricals *cats;
-};
-
-struct means
-{
-  const struct dictionary *dict;
-
-  struct mtable *table;
-  size_t n_tables;
-
-  /* Missing value class for categorical variables */
-  enum mv_class exclude;
-
-  /* Missing value class for dependent variables */
-  enum mv_class dep_exclude;
-
-  bool listwise_exclude;
-
-  /* an array  indicating which statistics are to be calculated */
-  int *cells;
-
-  /* Size of cells */
-  int n_cells;
-
-  /* Pool on which cell functions may allocate data */
-  struct pool *pool;
-};
-
-
-static void
-run_means (struct means *cmd, struct casereader *input,
-	   const struct dataset *ds);
-
-
-
-static bool
-parse_means_table_syntax (struct lexer *lexer, const struct means *cmd, struct mtable *table)
-{
-  table->ii = 0;
-  table->n_layers = 0;
-  table->layers = NULL;
-  table->interactions = NULL;
-
-  /* Dependent variable (s) */
-  if (!parse_variables_const_pool (lexer, cmd->pool, cmd->dict,
-				   &table->dep_vars, &table->n_dep_vars,
-				   PV_NO_DUPLICATE | PV_NUMERIC))
-    return false;
-
-  /* Factor variable (s) */
-  while (lex_match (lexer, T_BY))
+  struct pivot_table *pt = pivot_table_create (N_("Case Processing Summary"));
+
+  struct pivot_dimension *dim_cases =
+    pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Cases"));
+  dim_cases->root->show_label = true;
+
+  struct pivot_category *cats[3];
+  cats[0] = pivot_category_create_group (dim_cases->root,
+					 N_("Included"), NULL);
+  cats[1] = pivot_category_create_group (dim_cases->root,
+					 N_("Excluded"), NULL);
+  cats[2] = pivot_category_create_group (dim_cases->root,
+					 N_("Total"), NULL);
+  for (int i = 0; i < 3; ++i)
     {
-      table->n_layers++;
-      table->layers =
-	pool_realloc (cmd->pool, table->layers,
-		      sizeof (*table->layers) * table->n_layers);
-
-      if (!parse_variables_const_pool
-	  (lexer, cmd->pool, cmd->dict,
-	   &table->layers[table->n_layers - 1].factor_vars,
-	   &table->layers[table->n_layers - 1].n_factor_vars,
-	   PV_NO_DUPLICATE))
-	return false;
+      pivot_category_create_leaf_rc (cats[i],
+                                     pivot_value_new_text (N_("N")),
+				     PIVOT_RC_COUNT);
+      pivot_category_create_leaf_rc (cats[i],
+                                     pivot_value_new_text (N_("Percent")),
+				     PIVOT_RC_PERCENT);
     }
 
-  /* There is always at least one layer.
-     However the final layer is the total, and not
-     normally considered by the user as a
-     layer.
-  */
+  struct pivot_dimension *rows =
+    pivot_dimension_create (pt, PIVOT_AXIS_ROW, N_("Variables"));
 
-  table->n_layers++;
-  table->layers =
-    pool_realloc (cmd->pool, table->layers,
-		  sizeof (*table->layers) * table->n_layers);
-  table->layers[table->n_layers - 1].factor_vars = NULL;
-  table->layers[table->n_layers - 1].n_factor_vars = 0;
+  for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
+    {
+      const struct workspace *ws = mt->ws + cmb;
+      populate_case_processing_summary (rows->root, mt, ws->control_idx);
+      for (int dv = 0; dv < mt->n_dep_vars; ++dv)
+        {
+          int idx = cmb * mt->n_dep_vars + dv;
+          const struct summary *summ = mt->summ + idx;
+          double n_included = summ->n_total - summ->n_missing;
+          pivot_table_put2 (pt, 5, idx,
+                            pivot_value_new_number (100.0 * summ->n_total / summ->n_total));
+          pivot_table_put2 (pt, 4, idx,
+                            pivot_value_new_number (summ->n_total));
+
+          pivot_table_put2 (pt, 3, idx,
+                            pivot_value_new_number (100.0 * summ->n_missing / summ->n_total));
+          pivot_table_put2 (pt, 2, idx,
+                            pivot_value_new_number (summ->n_missing));
+
+          pivot_table_put2 (pt, 1, idx,
+                            pivot_value_new_number (100.0 * n_included / summ->n_total));
+          pivot_table_put2 (pt, 0, idx,
+                            pivot_value_new_number (n_included));
+        }
+    }
 
-  return true;
+  pivot_table_submit (pt);
 }
 
-/* Match a variable.
-   If the match succeeds, the variable will be placed in VAR.
-   Returns true if successful */
-static bool
-lex_is_variable (struct lexer *lexer, const struct dictionary *dict,
-		 int n)
+static void
+means_shipout_single (const struct mtable *mt, const struct means *means,
+		      const struct workspace *ws)
 {
-  const char *tstr;
-  if (lex_next_token (lexer, n) !=  T_ID)
-    return false;
+  struct pivot_table *pt = pivot_table_create (N_("Report"));
+  pt->omit_empty = true;
 
-  tstr = lex_next_tokcstr (lexer, n);
+  struct pivot_dimension *dim_cells =
+    pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Statistics"));
 
-  if (NULL == dict_lookup_var (dict, tstr) )
-    return false;
+  /* Set the statistics headings, eg "Mean", "Std. Dev" etc.  */
+  for (int i = 0; i < means->n_statistics; ++i)
+    {
+      const struct cell_spec *cs = cell_spec + means->statistics[i];
+      pivot_category_create_leaf_rc
+	(dim_cells->root,
+	 pivot_value_new_text (gettext (cs->title)), cs->rc);
+    }
 
-  return true;
+  create_table_structure (mt, pt, ws);
+  populate_table (means, mt, ws, ws->root_cell, pt);
+  pivot_table_submit (pt);
 }
 
 
-int
-cmd_means (struct lexer *lexer, struct dataset *ds)
+static void
+means_shipout_multivar (const struct mtable *mt, const struct means *means,
+			const struct workspace *ws)
 {
-  int t;
-  int i;
-  int l;
-  struct means means;
-  bool more_tables = true;
-
-  means.pool = pool_create ();
-
-  means.exclude = MV_ANY;
-  means.dep_exclude = MV_ANY;
-  means.listwise_exclude = false;
-  means.table = NULL;
-  means.n_tables = 0;
-
-  means.dict = dataset_dict (ds);
-
-  means.n_cells = 3;
-  means.cells = pool_calloc (means.pool, means.n_cells, sizeof (*means.cells));
+  struct string dss;
+  ds_init_empty (&dss);
+  for (int dv = 0; dv < mt->n_dep_vars; ++dv)
+    {
+      ds_put_cstr (&dss, var_get_name (mt->dep_vars[dv]));
+      if (mt->n_layers > 0)
+	ds_put_cstr (&dss, " * ");
+    }
 
+  for (int l = 0; l < mt->n_layers; ++l)
+    {
+      const struct layer *layer = mt->layers[l];
+      const struct variable *var = layer->factor_vars[ws->control_idx[l]];
+      ds_put_cstr (&dss, var_get_name (var));
+      if (l < mt->n_layers - 1)
+	ds_put_cstr (&dss, " * ");
+    }
 
-  /* The first three items (MEAN, COUNT, STDDEV) are the default */
-  for (i = 0; i < 3; ++i)
-    means.cells[i] = i;
+  struct pivot_table *pt = pivot_table_create (ds_cstr (&dss));
+  pt->omit_empty = true;
+  ds_destroy (&dss);
 
+  struct pivot_dimension *dim_cells =
+    pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Variables"));
 
-  /*   Optional TABLES =   */
-  if (lex_match_id (lexer, "TABLES"))
+  for (int i = 0; i < mt->n_dep_vars; ++i)
     {
-      if (! lex_force_match (lexer, T_EQUALS))
-	goto error;
+      pivot_category_create_leaf
+	(dim_cells->root,
+	 pivot_value_new_variable (mt->dep_vars[i]));
     }
 
+  struct pivot_dimension *dim_stats
+    = pivot_dimension_create (pt, PIVOT_AXIS_ROW,
+  			      N_ ("Statistics"));
+  dim_stats->root->show_label = false;
 
-  more_tables = true;
-  /* Parse the "tables" */
-  while (more_tables)
+  for (int i = 0; i < means->n_statistics; ++i)
     {
-      means.n_tables ++;
-      means.table = pool_realloc (means.pool, means.table, means.n_tables * sizeof (*means.table));
-
-      if (! parse_means_table_syntax (lexer, &means,
-				      &means.table[means.n_tables - 1]))
-	{
-	  goto error;
-	}
-
-      /* Look ahead to see if there are more tables to be parsed */
-      more_tables = false;
-      if ( T_SLASH == lex_next_token (lexer, 0) )
-	{
-	  if (lex_is_variable (lexer, means.dict, 1) )
-	    {
-	      more_tables = true;
-	      lex_match (lexer, T_SLASH);
-	    }
-	}
+      const struct cell_spec *cs = cell_spec + means->statistics[i];
+      pivot_category_create_leaf_rc
+	(dim_stats->root,
+	 pivot_value_new_text (gettext (cs->title)), cs->rc);
     }
 
-  /* /MISSING subcommand */
-  while (lex_token (lexer) != T_ENDCMD)
-    {
-      lex_match (lexer, T_SLASH);
+  create_table_structure (mt, pt, ws);
+  populate_table (means, mt, ws, ws->root_cell, pt);
+  pivot_table_submit (pt);
+}
 
-      if (lex_match_id (lexer, "MISSING"))
+void
+means_shipout (const struct mtable *mt, const struct means *means)
+{
+  for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
+    {
+      const struct workspace *ws = mt->ws + cmb;
+      if (ws->root_cell == NULL)
 	{
-	  /*
-	    If no MISSING subcommand is specified, each combination of
-	    a dependent variable and categorical variables is handled
-	    separately.
-	  */
-	  lex_match (lexer, T_EQUALS);
-	  if (lex_match_id (lexer, "INCLUDE"))
-	    {
-	      /*
-		Use the subcommand  "/MISSING=INCLUDE" to include user-missing
-		values in the analysis.
-	      */
-
-	      means.exclude = MV_SYSTEM;
-	      means.dep_exclude = MV_SYSTEM;
-	    }
-	  else if (lex_match_id (lexer, "TABLE"))
-	    /*
-	      This is the default. (I think).
-	      Every case containing a complete set of variables for a given
-	      table. If any variable, categorical or dependent for in a table
-	      is missing (as defined by what?), then that variable will
-	      be dropped FOR THAT TABLE ONLY.
-	    */
-	    {
-	      means.listwise_exclude = true;
-	    }
-	  else if (lex_match_id (lexer, "DEPENDENT"))
-	    /*
-	      Use the command "/MISSING=DEPENDENT" to
-	      include user-missing values for the categorical variables,
-	      while excluding them for the dependent variables.
-
-	      Cases are dropped only when user-missing values
-	      appear in dependent  variables.  User-missing
-	      values for categorical variables are treated according to
-	      their face value.
-
-	      Cases are ALWAYS dropped when System Missing values appear
-	      in the categorical variables.
-	    */
-	    {
-	      means.dep_exclude = MV_ANY;
-	      means.exclude = MV_SYSTEM;
-	    }
-	  else
-	    {
-	      lex_error (lexer, NULL);
-	      goto error;
-	    }
-	}
-      else if (lex_match_id (lexer, "CELLS"))
-	{
-	  lex_match (lexer, T_EQUALS);
-
-	  /* The default values become overwritten */
-	  means.n_cells = 0;
-	  while (lex_token (lexer) != T_ENDCMD
-		 && lex_token (lexer) != T_SLASH)
-	    {
-	      int k = 0;
-	      if (lex_match (lexer, T_ALL))
-		{
-		  int x;
-		  means.cells =
-		    pool_realloc (means.pool, means.cells,
-				  (means.n_cells += n_C) * sizeof (*means.cells));
-
-		  for (x = 0; x < n_C; ++x)
-		    means.cells[means.n_cells - (n_C - 1 - x) - 1] = x;
-		}
-	      else if (lex_match_id (lexer, "NONE"))
-		{
-		  /* Do nothing */
-		}
-	      else if (lex_match_id (lexer, "DEFAULT"))
-		{
-		  means.cells =
-		    pool_realloc (means.pool, means.cells,
-				  (means.n_cells += 3) * sizeof (*means.cells));
-
-		  means.cells[means.n_cells - 2 - 1] = MEANS_MEAN;
-		  means.cells[means.n_cells - 1 - 1] = MEANS_N;
-		  means.cells[means.n_cells - 0 - 1] = MEANS_STDDEV;
-		}
-	      else
-		{
-		  for (; k < n_C; ++k)
-		    {
-		      if (lex_match_id (lexer, cell_spec[k].keyword))
-			{
-			  means.cells =
-			    pool_realloc (means.pool, means.cells,
-					  ++means.n_cells * sizeof (*means.cells));
-
-			  means.cells[means.n_cells - 1] = k;
-			  break;
-			}
-		    }
-		}
-	      if (k >= n_C)
-		{
-		  lex_error (lexer, NULL);
-		  goto error;
-		}
-	    }
+	  struct string des;
+	  ds_init_empty (&des);
+	  layers_to_string (mt, ws->control_idx, &des);
+	  msg (MW, _("The table \"%s\" has no non-empty control variables."
+		     "  No result for this table will be displayed."),
+	       ds_cstr (&des));
+	  ds_destroy (&des);
+	  continue;
 	}
+      if (mt->n_dep_vars > 1)
+	means_shipout_multivar (mt, means, ws);
       else
-	{
-	  lex_error (lexer, NULL);
-	  goto error;
-	}
+	means_shipout_single (mt, means, ws);
     }
+}
 
 
+
 
-  for (t = 0; t < means.n_tables; ++t)
+static bool
+control_var_missing (const struct means *means,
+		     const struct mtable *mt,
+		     unsigned int not_wild UNUSED,
+		     const struct ccase *c,
+		     const struct workspace *ws)
+{
+  bool miss = false;
+  for (int l = 0; l < mt->n_layers; ++l)
     {
-      struct mtable *table = &means.table[t];
-
-      table->interactions =
-	pool_calloc (means.pool, table->n_layers, sizeof (*table->interactions));
+      /* if (0 == ((not_wild >> l) & 0x1)) */
+      /* { */
+      /*   continue; */
+      /* } */
+
+      const struct layer *layer = mt->layers[l];
+      const struct variable *var = layer->factor_vars[ws->control_idx[l]];
+      const union value *vv = case_data (c, var);
+
+      miss = var_is_value_missing (var, vv, means->ctrl_exclude);
+      if (miss)
+	break;
+    }
 
-      table->summary =
-	pool_calloc (means.pool, table->n_dep_vars * table->n_layers, sizeof (*table->summary));
+  return miss;
+}
 
-      for (l = 0; l < table->n_layers; ++l)
+/* Lookup the set of control variables described by MT, C and NOT_WILD,
+   in the hash table MAP.  If there is no such entry, then create a
+   cell with these paremeters and add is to MAP.
+   If the generated cell has childen, repeat for all the children.
+   Returns the root cell.
+*/
+static struct cell *
+service_cell_map (const struct means *means, const struct mtable *mt,
+		 const struct ccase *c,
+                 unsigned int not_wild,
+		 struct hmap *map,
+		 const struct cell *pcell,
+                 int level,
+		 const struct workspace *ws)
+{
+  struct cell *cell = NULL;
+  if (map)
+    {
+      if (!control_var_missing (means, mt, not_wild, c, ws))
 	{
-	  int v;
-	  const struct layer *lyr = &table->layers[l];
-	  const int n_vars = lyr->n_factor_vars;
-	  table->interactions[l] = interaction_create (NULL);
-	  for (v = 0; v < n_vars ; ++v)
+	  /* Lookup this set of values in the cell's hash table.  */
+	  unsigned int hash = generate_hash (mt, c, not_wild, ws);
+	  cell = lookup_cell (mt, map, hash, c, not_wild, ws);
+
+	  /* If it has not been seen before, then create a new
+	     subcell, with this set of values, and insert it
+	     into the table.  */
+	  if (cell == NULL)
 	    {
-	      interaction_add_variable (table->interactions[l],
-					lyr->factor_vars[v]);
+              cell = generate_cell (means, mt, c, not_wild, pcell, ws);
+	      hmap_insert (map, &cell->hmap_node, hash);
 	    }
 	}
     }
-
-  {
-    struct casegrouper *grouper;
-    struct casereader *group;
-    bool ok;
-
-    grouper = casegrouper_create_splits (proc_open (ds), means.dict);
-    while (casegrouper_get_next_group (grouper, &group))
-      {
-	run_means (&means, group, ds);
-      }
-    ok = casegrouper_destroy (grouper);
-    ok = proc_commit (ds) && ok;
-  }
-
-  for (t = 0; t < means.n_tables; ++t)
+  else
     {
-      int l;
-      struct mtable *table = &means.table[t];
-      if (table->interactions)
-	for (l = 0; l < table->n_layers; ++l)
-	  {
-	    interaction_destroy (table->interactions[l]);
-	  }
+      /* This condition should only happen in the root node case. */
+      cell = ws->root_cell;
+      if (cell == NULL &&
+	  !control_var_missing (means, mt, not_wild, c, ws))
+	cell = generate_cell (means, mt, c, not_wild, pcell, ws);
     }
 
-  pool_destroy (means.pool);
-  return CMD_SUCCESS;
-
- error:
-
-  for (t = 0; t < means.n_tables; ++t)
+  if (cell)
     {
-      int l;
-      struct mtable *table = &means.table[t];
-      if (table->interactions)
-	for (l = 0; l < table->n_layers; ++l)
-	  {
-	    interaction_destroy (table->interactions[l]);
-	  }
-    }
-
-  pool_destroy (means.pool);
-  return CMD_FAILURE;
-}
-
-
-static bool
-is_missing (const struct means *cmd,
-	    const struct variable *dvar,
-	    const struct interaction *iact,
-	    const struct ccase *c)
-{
-  if ( interaction_case_is_missing (iact, c, cmd->exclude) )
-    return true;
-
+      /* Here is where the business really happens!   After
+	 testing for missing values, the cell's statistics
+	 are accumulated.  */
+      if (!control_var_missing (means, mt, not_wild, c, ws))
+        {
+          for (int v = 0; v < mt->n_dep_vars; ++v)
+            {
+              const struct variable *dep_var = mt->dep_vars[v];
+	      const union value *vv = case_data (c, dep_var);
+	      if (var_is_value_missing (dep_var, vv, means->dep_exclude))
+		continue;
+
+              for (int stat = 0; stat < means->n_statistics; ++stat)
+                {
+                  const double weight = dict_get_case_weight (means->dict, c,
+                                                              NULL);
+                  stat_update *su = cell_spec[means->statistics[stat]].su;
+                  su (cell->stat[stat + v * means->n_statistics], weight,
+		      case_data (c, dep_var)->f);
+                }
+            }
+        }
 
-  if (var_is_value_missing (dvar,
-			    case_data (c, dvar),
-			    cmd->dep_exclude))
-    return true;
+      /* Recurse into all the children (if there are any).  */
+      for (int i = 0; i < cell->n_children; ++i)
+	{
+	  struct cell_container *cc = cell->children + i;
+	  service_cell_map (means, mt, c,
+                           not_wild | (0x1U << (i + level)),
+			   &cc->map, cell, level + i + 1, ws);
+	}
+    }
 
-  return false;
+  return cell;
 }
 
-static void output_case_processing_summary (const struct mtable *,
-                                            const struct variable *wv);
-
-static void output_report (const struct means *, int, const struct mtable *);
-
-
-struct per_cat_data
-{
-  struct per_var_data *pvd;
-
-  bool warn;
-};
-
-
+/*  Do all the necessary preparation and pre-calculation that
+    needs to be done before iterating the data.  */
 static void
-destroy_n (const void *aux1 UNUSED, void *aux2, void *user_data)
-{
-  struct mtable *table = aux2;
-  int v;
-  struct per_cat_data *per_cat_data = user_data;
-  struct per_var_data *pvd = per_cat_data->pvd;
-
-  for (v = 0; v < table->n_dep_vars; ++v)
-    {
-      struct per_var_data *pp = &pvd[v];
-      moments1_destroy (pp->mom);
-    }
-}
-
-static void *
-create_n (const void *aux1, void *aux2)
+prepare_means (struct means *cmd)
 {
-  int i, v;
-  const struct means *means = aux1;
-  struct mtable *table = aux2;
-  struct per_cat_data *per_cat_data = pool_malloc (means->pool, sizeof *per_cat_data);
-
-  struct per_var_data *pvd = pool_calloc (means->pool, table->n_dep_vars, sizeof *pvd);
-
-  for (v = 0; v < table->n_dep_vars; ++v)
+  for (int t = 0; t < cmd->n_tables; ++t)
     {
-      enum moment maxmom = MOMENT_KURTOSIS;
-      struct per_var_data *pp = &pvd[v];
-
-      pp->cell_stats = pool_calloc (means->pool, means->n_cells, sizeof *pp->cell_stats);
+      struct mtable *mt = cmd->table + t;
 
+      mt->n_combinations = 1;
+      for (int l = 0; l < mt->n_layers; ++l)
+        mt->n_combinations *= mt->layers[l]->n_factor_vars;
 
-      for (i = 0; i < means->n_cells; ++i)
-	{
-	  int csi = means->cells[i];
-	  const struct cell_spec *cs = &cell_spec[csi];
-	  if (cs->sc)
-	    {
-	      pp->cell_stats[i] = cs->sc (means->pool);
-	    }
-	}
-      pp->mom = moments1_create (maxmom);
+      mt->ws = xzalloc (mt->n_combinations * sizeof (*mt->ws));
+      mt->summ = xzalloc (mt->n_combinations * mt->n_dep_vars
+			     * sizeof (*mt->summ));
+      for (int i = 0; i < mt->n_combinations; ++i)
+        {
+          struct workspace *ws = mt->ws + i;
+	  ws->root_cell = NULL;
+          ws->control_idx = xzalloc (mt->n_layers
+					 * sizeof *ws->control_idx);
+          ws->instances = xzalloc (mt->n_layers
+					 * sizeof *ws->instances);
+          int cmb = i;
+          for (int l = mt->n_layers - 1; l >= 0; --l)
+            {
+	      struct cell_container *instances = ws->instances + l;
+              const struct layer *layer = mt->layers[l];
+              ws->control_idx[l] = cmb % layer->n_factor_vars;
+              cmb /= layer->n_factor_vars;
+	      hmap_init (&instances->map);
+            }
+        }
     }
-
-
-  per_cat_data->pvd = pvd;
-  per_cat_data->warn = true;
-  return per_cat_data;
 }
 
+
+/* Do all the necessary calculations that occur AFTER iterating
+   the data.  */
 static void
-update_n (const void *aux1, void *aux2, void *user_data, const struct ccase *c, double weight)
+post_means (struct means *cmd)
 {
-  int i;
-  int v = 0;
-  const struct means *means = aux1;
-  struct mtable *table = aux2;
-  struct per_cat_data *per_cat_data = user_data;
-
-  for (v = 0; v < table->n_dep_vars; ++v)
+  for (int t = 0; t < cmd->n_tables; ++t)
     {
-      struct per_var_data *pvd = &per_cat_data->pvd[v];
-
-      const double x = case_data (c, table->dep_vars[v])->f;
-
-      for (i = 0; i < table->n_layers; ++i)
+      struct mtable *mt = cmd->table + t;
+      for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
 	{
-	  if ( is_missing (means, table->dep_vars[v],
-			   table->interactions[i], c))
-	    goto end;
-	}
-
-      for (i = 0; i < means->n_cells; ++i)
-	{
-	  const int csi = means->cells[i];
-	  const struct cell_spec *cs = &cell_spec[csi];
+	  struct workspace *ws = mt->ws + cmb;
+	  if (ws->root_cell == NULL)
+	    continue;
+	  arrange_cells (ws, ws->root_cell, mt);
+	  /*  The root cell should have no parent.  */
+	  assert (ws->root_cell->parent_cell == 0);
 
+	  for (int l = 0; l < mt->n_layers; ++l)
+	    {
+	      struct cell_container *instances = ws->instances + l;
+	      bt_init (&instances->bt, compare_instance_3way, NULL);
+
+	      /* Iterate the instance hash table, and insert each instance
+		 into the binary tree BT.  */
+	      struct instance *inst;
+	      HMAP_FOR_EACH (inst, struct instance, hmap_node,
+			     &instances->map)
+		{
+		  bt_insert (&instances->bt, &inst->bt_node);
+		}
 
-	  if (cs->su)
-	    cs->su (pvd->cell_stats[i],
-		    weight, x);
+	      /* Iterate the binary tree (in order) and assign the index
+		 member accordingly.  */
+	      int index = 0;
+	      BT_FOR_EACH (inst, struct instance, bt_node, &instances->bt)
+		{
+		  inst->index = index++;
+		}
+	    }
 	}
-
-      moments1_add (pvd->mom, x, weight);
-
-    end:
-      continue;
     }
 }
 
+
+/* Update the summary information (the missings and the totals).  */
 static void
-calculate_n (const void *aux1, void *aux2, void *user_data)
+update_summaries (const struct means *means, struct mtable *mt,
+		  const struct ccase *c, double weight)
 {
-  int i;
-  int v = 0;
-  struct per_cat_data *per_cat_data = user_data;
-  const struct means *means = aux1;
-  struct mtable *table = aux2;
-
-  for (v = 0; v < table->n_dep_vars; ++v)
+  for (int dv = 0; dv < mt->n_dep_vars; ++dv)
     {
-      struct per_var_data *pvd = &per_cat_data->pvd[v];
-      for (i = 0; i < means->n_cells; ++i)
+      for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
 	{
-	  int csi = means->cells[i];
-	  const struct cell_spec *cs = &cell_spec[csi];
-
-	  if (cs->su)
-	    cs->sd (pvd, pvd->cell_stats[i]);
+	  struct workspace *ws = mt->ws + cmb;
+	  struct summary *summ = mt->summ
+	    + cmb * mt->n_dep_vars + dv;
+
+	  summ->n_total += weight;
+	  const struct variable *var = mt->dep_vars[dv];
+	  const union value *vv = case_data (c, var);
+	  /* First check if the dependent variable is missing.  */
+	  if (var_is_value_missing (var, vv, means->dep_exclude))
+	    summ->n_missing += weight;
+	  /* If the dep var is not missing, then check each
+	     control variable.  */
+	  else
+	    for (int l = 0; l < mt->n_layers; ++l)
+	      {
+		const struct layer *layer = mt->layers [l];
+		const struct variable *var
+		  = layer->factor_vars[ws->control_idx[l]];
+		const union value *vv = case_data (c, var);
+		if (var_is_value_missing (var, vv, means->ctrl_exclude))
+		  {
+		    summ->n_missing += weight;
+		    break;
+		  }
+	      }
 	}
     }
 }
 
-static void
+
+void
 run_means (struct means *cmd, struct casereader *input,
 	   const struct dataset *ds UNUSED)
 {
-  int t;
-  const struct variable *wv = dict_get_weight (cmd->dict);
-  struct ccase *c;
+  struct ccase *c = NULL;
   struct casereader *reader;
 
-  struct payload payload;
-  payload.create = create_n;
-  payload.update = update_n;
-  payload.calculate = calculate_n;
-  payload.destroy = destroy_n;
-
-  for (t = 0; t < cmd->n_tables; ++t)
-    {
-      struct mtable *table = &cmd->table[t];
-      table->cats
-	= categoricals_create (table->interactions,
-			       table->n_layers, wv, cmd->exclude);
-
-      categoricals_set_payload (table->cats, &payload, cmd, table);
-    }
+  prepare_means (cmd);
 
   for (reader = input;
        (c = casereader_read (reader)) != NULL; case_unref (c))
     {
-      for (t = 0; t < cmd->n_tables; ++t)
+      const double weight
+	= dict_get_case_weight (cmd->dict, c, NULL);
+      for (int t = 0; t < cmd->n_tables; ++t)
 	{
-	  bool something_missing = false;
-	  int  v;
-	  struct mtable *table = &cmd->table[t];
+	  struct mtable *mt = cmd->table + t;
+	  update_summaries (cmd, mt, c, weight);
 
-	  for (v = 0; v < table->n_dep_vars; ++v)
+	  for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
 	    {
-	      int i;
-	      for (i = 0; i < table->n_layers; ++i)
-		{
-		  const bool missing =
-		    is_missing (cmd, table->dep_vars[v],
-				table->interactions[i], c);
-		  if (missing)
-		    {
-		      something_missing = true;
-		      table->summary[v * table->n_layers + i].missing++;
-		    }
-		  else
-		    table->summary[v * table->n_layers  + i].non_missing++;
-		}
-	    }
-	  if ( something_missing && cmd->listwise_exclude)
-	    continue;
+	      struct workspace *ws = mt->ws + cmb;
 
-	  categoricals_update (table->cats, c);
+	      ws->root_cell = service_cell_map (cmd, mt, c,
+						0U, NULL, NULL, 0, ws);
+	    }
 	}
     }
   casereader_destroy (reader);
 
-  for (t = 0; t < cmd->n_tables; ++t)
-    {
-      struct mtable *table = &cmd->table[t];
-
-      categoricals_done (table->cats);
-    }
-
-
-  for (t = 0; t < cmd->n_tables; ++t)
-    {
-      int i;
-      const struct mtable *table = &cmd->table[t];
-
-      output_case_processing_summary (table, wv);
-
-      for (i = 0; i < table->n_layers; ++i)
-	{
-	  output_report (cmd, i, table);
-	}
-      categoricals_destroy (table->cats);
-    }
-
+  post_means (cmd);
 }
 
 
-
-static void
-output_case_processing_summary (const struct mtable *mt,
-                                const struct variable *wv)
+/* Release all resources allocated by this routine.
+   This does not include those allocated by the parser,
+   which exclusively use MEANS->pool.  */
+void
+destroy_means (struct means *means)
 {
-  struct pivot_table *table = pivot_table_create (
-    N_("Case Processing Summary"));
-  pivot_table_set_weight_var (table, wv);
-
-  pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
-                          N_("N"), PIVOT_RC_COUNT,
-                          N_("Percent"), PIVOT_RC_PERCENT);
-  pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Cases"),
-                          N_("Included"), N_("Excluded"), N_("Total"))
-    ->root->show_label = true;
-
-  struct pivot_dimension *tables = pivot_dimension_create (
-    table, PIVOT_AXIS_ROW, N_("Tables"));
-
-  for (size_t v = 0; v < mt->n_dep_vars; ++v)
+  for (int t = 0; t < means->n_tables; ++t)
     {
-      const struct variable *var = mt->dep_vars[v];
-      for (size_t i = 0; i < mt->n_layers; ++i)
-	{
-	  const int row = v * mt->n_layers + i;
-	  const struct interaction *iact = mt->interactions[i];
-
-	  struct string str = DS_EMPTY_INITIALIZER;
-          ds_put_format (&str, "%s: ", var_to_string (var));
-	  interaction_to_string (iact, &str);
-          int table_idx = pivot_category_create_leaf (
-            tables->root, pivot_value_new_user_text_nocopy (
-              ds_steal_cstr (&str)));
-
-          const struct summary *s = &mt->summary[row];
-	  double n_total = s->missing + s->non_missing;
-          struct entry
-            {
-              int stat_idx;
-              int case_idx;
-              double x;
-            }
-          entries[] =
-            {
-              { 0, 0, s->non_missing },
-              { 1, 0, s->non_missing / n_total * 100.0 },
-              { 0, 1, s->missing },
-              { 1, 1, s->missing / n_total * 100.0 },
-              { 0, 2, n_total },
-              { 1, 2, 100.0 },
-            };
-
-          for (size_t j = 0; j < sizeof entries / sizeof *entries; j++)
-            {
-              const struct entry *e = &entries[j];
-              pivot_table_put3 (table, e->stat_idx, e->case_idx, table_idx,
-                                pivot_value_new_number (e->x));
-            }
+      const struct mtable *table = means->table + t;
+      for (int i = 0; i < table->n_combinations; ++i)
+        {
+	  struct workspace *ws = table->ws + i;
+	  if (ws->root_cell == NULL)
+	    continue;
+	  means_destroy_cells (means, ws->root_cell, table);
 	}
-    }
-
-  pivot_table_submit (table);
-}
-
-static void
-create_interaction_dimensions (struct pivot_table *table,
-                               const struct categoricals *cats,
-                               const struct interaction *iact)
-{
-  for (size_t i = iact->n_vars; i-- > 0; )
-    {
-      const struct variable *var = iact->vars[i];
-      struct pivot_dimension *d = pivot_dimension_create__ (
-        table, PIVOT_AXIS_ROW, pivot_value_new_variable (var));
-      d->root->show_label = true;
-
-      size_t n;
-      union value *values = categoricals_get_var_values (cats, var, &n);
-      for (size_t j = 0; j < n; j++)
-        pivot_category_create_leaf (
-          d->root, pivot_value_new_var_value (var, &values[j]));
-    }
-}
-
-static void
-output_report (const struct means *cmd,  int iact_idx,
-	       const struct mtable *mt)
-{
-  struct pivot_table *table = pivot_table_create (N_("Report"));
-  table->omit_empty = true;
-
-  struct pivot_dimension *statistics = pivot_dimension_create (
-    table, PIVOT_AXIS_COLUMN, N_("Statistics"));
-  for (int i = 0; i < cmd->n_cells; ++i)
-    pivot_category_create_leaf (
-      statistics->root, pivot_value_new_text (cell_spec[cmd->cells[i]].title));
-
-  const struct interaction *iact = mt->interactions[iact_idx];
-  create_interaction_dimensions (table, mt->cats, iact);
-
-  struct pivot_dimension *dep_dim = pivot_dimension_create (
-    table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
-
-  size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes);
-
-  size_t n_cats = categoricals_n_count (mt->cats, iact_idx);
-  for (size_t v = 0; v < mt->n_dep_vars; ++v)
-    {
-      indexes[table->n_dimensions - 1] = pivot_category_create_leaf (
-        dep_dim->root, pivot_value_new_variable (mt->dep_vars[v]));
-
-      for (size_t i = 0; i < n_cats; ++i)
+      for (int i = 0; i < table->n_combinations; ++i)
         {
-          for (size_t j = 0; j < iact->n_vars; j++)
-            {
-              int idx = categoricals_get_value_index_by_category_real (
-                mt->cats, iact_idx, i, j);
-              indexes[table->n_dimensions - 2 - j] = idx;
-            }
-
-          struct per_cat_data *per_cat_data
-            = categoricals_get_user_data_by_category_real (
-              mt->cats, iact_idx, i);
-
-          const struct per_var_data *pvd = &per_cat_data->pvd[v];
-          for (int stat_idx = 0; stat_idx < cmd->n_cells; ++stat_idx)
-            {
-              indexes[0] = stat_idx;
-              const int csi = cmd->cells[stat_idx];
-              const struct cell_spec *cs = &cell_spec[csi];
-
-              double result = cs->sd (pvd, pvd->cell_stats[stat_idx]);
-              pivot_table_put (table, indexes, table->n_dimensions,
-                               pivot_value_new_number (result));
-            }
-        }
+	  struct workspace *ws = table->ws + i;
+	  destroy_workspace (table, ws);
+	}
+      free (table->ws);
+      free (table->summ);
     }
-  free (indexes);
-
-  pivot_table_submit (table);
 }
-
diff --git a/src/language/stats/means.h b/src/language/stats/means.h
new file mode 100644
index 0000000000..9d93a4f07a
--- /dev/null
+++ b/src/language/stats/means.h
@@ -0,0 +1,163 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011, 2012, 2013, 2019 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef MEANS_H
+#define MEANS_H
+
+#include "libpspp/compiler.h"
+
+struct cell_container
+{
+  /* A hash table containing the cells.  The table is indexed by a hash
+     based on the cell's categorical value.  */
+  struct hmap map;
+
+  /* A binary tree containing the cells.  This  is
+   used to sort the elements in order of their categorical
+   values.  */
+  struct bt bt;
+};
+
+
+
+struct layer
+{
+  size_t n_factor_vars;
+  const struct variable **factor_vars;
+};
+
+
+struct statistic;
+
+typedef struct statistic *stat_create (struct pool *pool);
+typedef void stat_update  (struct statistic *stat, double w, double x);
+typedef double stat_get   (const struct statistic *);
+typedef void stat_destroy (struct statistic *);
+
+
+struct cell_spec
+{
+  /* Printable title for output */
+  const char *title;
+
+  /* Keyword for syntax */
+  const char *keyword;
+
+  /* The result class for the datum.  */
+  const char *rc;
+
+  stat_create *sc;
+  stat_update *su;
+  stat_get *sd;
+  stat_destroy *sf;
+};
+
+struct summary
+{
+  double n_total;
+  double n_missing;
+};
+
+/* Intermediate data per table.  */
+struct workspace
+{
+  /* An array of n_layers integers which are used
+     to permute access into the factor_vars of each layer.  */
+  int *control_idx;
+
+  /* An array of n_layers cell_containers which hold the union
+     of instances used respectively by each layer.  */
+  struct cell_container *instances;
+
+  struct cell *root_cell;
+};
+
+/* The thing parsed after TABLES= */
+struct mtable
+{
+  size_t n_dep_vars;
+  const struct variable **dep_vars;
+
+  struct layer **layers;
+  int n_layers;
+
+  int n_combinations;
+
+  /* An array of n_combinations workspaces.  */
+  struct workspace *ws;
+
+  /* An array of n_combinations * n_dep_vars summaries.
+     These are displayed in the Case Processing
+     Summary box.  */
+  struct summary *summ;
+};
+
+/* A structure created by the parser.  Contains the definition of the
+   what the procedure should calculate.  */
+struct means
+{
+  const struct dictionary *dict;
+
+  /* The "tables" (ie, a definition of how the data should
+     be broken down).  */
+  struct mtable *table;
+  size_t n_tables;
+
+  /* Missing value class for categorical variables.  */
+  enum mv_class ctrl_exclude;
+
+  /* Missing value class for dependent variables */
+  enum mv_class dep_exclude;
+
+  /* The statistics to be calculated for each cell.  */
+  int *statistics;
+  int n_statistics;
+
+  /* Pool on which cell functions may allocate data.  */
+  struct pool *pool;
+};
+
+
+
+#define n_MEANS_STATISTICS 17
+extern const struct cell_spec cell_spec[n_MEANS_STATISTICS];
+
+/* This enum must be consistent with the array cell_spec (in means-calc.c).
+   A bitfield instead of enums would in my opinion be
+   more elegent.  However we want the order of the specified
+   statistics to be retained in the output.  */
+enum
+  {
+    MEANS_MEAN = 0,
+    MEANS_N,
+    MEANS_STDDEV
+  };
+
+
+
+struct dataset;
+struct casereader;
+void run_means (struct means *cmd, struct casereader *input, const struct dataset *ds UNUSED);
+
+void means_shipout (const struct mtable *mt, const struct means *means);
+
+void means_case_processing_summary (const struct mtable *mt);
+
+
+void destroy_means (struct means *means);
+
+
+#endif
diff --git a/tests/language/stats/means.at b/tests/language/stats/means.at
index 93ef68ac70..c6e298b5af 100644
--- a/tests/language/stats/means.at
+++ b/tests/language/stats/means.at
@@ -1,614 +1,1047 @@
 dnl PSPP - a program for statistical analysis.
-dnl Copyright (C) 2017 Free Software Foundation, Inc.
-dnl 
+dnl Copyright (C) 2017, 2019 Free Software Foundation, Inc.
+dnl
 dnl This program is free software: you can redistribute it and/or modify
 dnl it under the terms of the GNU General Public License as published by
 dnl the Free Software Foundation, either version 3 of the License, or
 dnl (at your option) any later version.
-dnl 
+dnl
 dnl This program is distributed in the hope that it will be useful,
 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 dnl GNU General Public License for more details.
-dnl 
+dnl
 dnl You should have received a copy of the GNU General Public License
 dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
 dnl
 AT_BANNER([MEANS procedure])
 
-AT_SETUP([MEANS simple example])
+AT_SETUP([MEANS simple])
 AT_KEYWORDS([categorical categoricals])
 
 AT_DATA([means-simple.sps], [dnl
-SET FORMAT=F12.5.
-
-data list notable list /score * factor *.
-BEGIN DATA.
-22 01
-22 01
-29 01
-16 01
-24 02
-21 02
-22 01
-24 01
-19 01
-17 01
-22 01
-17 02
-23 02
-25 02
-20 01
-15 01
-18 01
-26 01
-23 02
-35 02
-20 01
-16 01
-19 01
-14 01
-14 01
-21 01
-END DATA.
-
-MEANS TABLES = score BY factor.
+data list notable list /hand * score * w *.
+begin data.
+1 17 4
+1 16 5
+2 21 1
+2 22 1
+2 20 8
+end data.
+
+weight by w.
+
+means tables = score by hand
+ /cells = mean count.
 ])
 
-AT_CHECK([pspp -O format=csv means-simple.sps], [0],
-  [dnl
+AT_CHECK([pspp -O format=csv means-simple.sps], [0], [dnl
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Included,,Excluded,,Total,
 ,N,Percent,N,Percent,N,Percent
-score: factor,26,100.0%,0,.0%,26,100.0%
-score: ,26,100.0%,0,.0%,26,100.0%
+score * hand,19,100.0%,0,.0%,19,100.0%
 
 Table: Report
-,factor,Mean,N,Std. Deviation
-score,1.00000,19.78947,19.00000,4.03566
-,2.00000,24.00000,7.00000,5.50757
-
-Table: Report
-,Mean,N,Std. Deviation
-score,20.92308,26.00000,4.75750
+hand,Mean,N
+1.00,16.44,9
+2.00,20.30,10
+Total,18.47,19
 ])
 
 AT_CLEANUP
 
-
-
-AT_SETUP([MEANS very simple example])
+AT_SETUP([MEANS very simple])
 AT_KEYWORDS([categorical categoricals])
 
-AT_DATA([means-vsimple.sps], [dnl
-SET FORMAT=F12.5.
-
-data list notable list /score.
+AT_DATA([very-simple.sps], [dnl
+data list notable list /score *.
 begin data.
-1
-1
-2
-2
+17
+17
+17
+16
+17
+16
+16
+16
+16
+21
+22
+20
+20
+20
+20
+20
+20
+20
+20
 end data.
 
-means tables = score.
+means tables = score
+ /cells = mean count.
 ])
 
-AT_CHECK([pspp -O format=csv means-vsimple.sps], [0],
-  [dnl
+AT_CHECK([pspp -O format=csv very-simple.sps], [0], [dnl
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Included,,Excluded,,Total,
 ,N,Percent,N,Percent,N,Percent
-score: ,4,100.0%,0,.0%,4,100.0%
+score,19,100.0%,0,.0%,19,100.0%
 
 Table: Report
-,Mean,N,Std. Deviation
-score,1.50000,4.00000,.57735
+Mean,N
+18.47,19
 ])
 
 AT_CLEANUP
 
 
-
-
-AT_SETUP([MEANS default missing])
+AT_SETUP([MEANS empty factor spec])
 AT_KEYWORDS([categorical categoricals])
 
-AT_DATA([means-dmiss.sps], [dnl
-SET FORMAT=F12.2.
-data list notable list /a * g1 * g2 *.
+AT_DATA([means-bad.sps], [dnl
+data list list /outcome *.
 begin data.
-3 1 . 
-4 1 11
-3 1 21
-6 2 21
-2 2 31
-. 2 31
-8 2 31
-7 2 31
+1
+2
+3
 end data.
 
-MEANS TABLES = 
-      a BY g1 g2
-      BY g2
-      /cells = MEAN COUNT
-      .
+MEANS TABLES =  outcome
+	BY.
 ])
 
-AT_CHECK([pspp -O format=csv means-dmiss.sps], [0],
-  [dnl
-Table: Case Processing Summary
-,Cases,,,,,
-,Included,,Excluded,,Total,
-,N,Percent,N,Percent,N,Percent
-a: g1 Ã g2,6,75.0%,2,25.0%,8,100.0%
-a: g2,6,75.0%,2,25.0%,8,100.0%
-a: ,7,87.5%,1,12.5%,8,100.0%
+AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore])
 
-Table: Report
-,g1,g2,Mean,N
-a,1.00,11.00,4.00,1.00
-,,21.00,3.00,1.00
-,2.00,,6.00,1.00
-,,31.00,5.67,3.00
+AT_CLEANUP
 
-Table: Report
-,g2,Mean,N
-a,11.00,4.00,1.00
-,21.00,4.50,2.00
-,31.00,5.67,3.00
 
-Table: Report
-,Mean,N
-a,5.00,6.00
+
+AT_SETUP([MEANS parser bug])
+AT_KEYWORDS([categorical categoricals])
+
+dnl This bug caused an infinite loop
+AT_DATA([means-bad.sps], [dnl
+DATA LIST notable LIST /a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 fylo *.
+begin data.
+1 2 3 4 5 6 7 8 9 0 11
+end data.
+
+MEANS TABLES = a1 a2 a3 a4 a5 a6 a7 a8 a9 a10a BY fylo.
 ])
 
+AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore])
+
 AT_CLEANUP
 
 
-AT_SETUP([MEANS linear stats])
+dnl This example is based upon info from https://libguides.library.kent.edu/SPSS/CompareMeans
+AT_SETUP([MEANS default missing behaviour])
 AT_KEYWORDS([categorical categoricals])
 
-dnl Slightly more involved example to test the linear statistics
-AT_DATA([means-linear.sps], [dnl
-set format F12.4.
-data list notable list /id * group * test1 *
-begin data.
-1 1 85
-2 1 90
-3 1 82
-4 1 75
-5 1 99
-6 2 70
-7 2 66
-8 2 52
-9 2 71
-10 2 50
+AT_DATA([means-missing.sps], [dnl
+data list notable list /w * score * a * b *.
+begin data
+     12      . 0 0
+     13      . 0 1
+     11      . 1 0
+      7      . 1 1
+      5      1 0 .
+     91      1 0 0
+    130      1 0 1
+      4      1 1 .
+     90      1 1 0
+     72      1 1 1
 end data.
 
-add value labels /group 1 "experimental group" 2 "control group".
+weight by w.
 
-means test1 by group
-  /cells = mean count stddev sum min max range variance kurt skew
-  .
+MEANS tables=score
+	/cells = count.
+
+MEANS tables=score by a
+	/cells = count.
+
+MEANS tables=score by a by b
+      /cells = count.
 ])
 
+AT_CHECK([pspp -O format=csv means-missing.sps], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Included,,Excluded,,Total,
+,N,Percent,N,Percent,N,Percent
+score,392,90.1%,43,9.9%,435,100.0%
+
+Table: Report
+N
+392
 
-AT_CHECK([pspp -O format=csv means-linear.sps], [0],
-  [dnl
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Included,,Excluded,,Total,
 ,N,Percent,N,Percent,N,Percent
-test1: group,10,100.0%,0,.0%,10,100.0%
-test1: ,10,100.0%,0,.0%,10,100.0%
+score * a,392,90.1%,43,9.9%,435,100.0%
 
 Table: Report
-,group,Mean,N,Std. Deviation,Sum,Min,Max,Range,Variance,Kurtosis,Skewness
-test1,experimental group,86.2000,5.0000,8.9833,431.0000,75.0000,99.0000,24.0000,80.7000,.2727,.3858
-,control group,61.8000,5.0000,10.0598,309.0000,50.0000,71.0000,21.0000,101.2000,-3.0437,-.4830
+a,N
+.00,226
+1.00,166
+Total,392
+
+Table: Case Processing Summary
+,Cases,,,,,
+,Included,,Excluded,,Total,
+,N,Percent,N,Percent,N,Percent
+score * a * b,383,88.0%,52,12.0%,435,100.0%
 
 Table: Report
-,Mean,N,Std. Deviation,Sum,Min,Max,Range,Variance,Kurtosis,Skewness
-test1,74.0000,10.0000,15.6915,740.0000,50.0000,99.0000,49.0000,246.2222,-.5759,-.1262
+a,b,N
+.00,.00,91
+,1.00,130
+,Total,221
+1.00,.00,90
+,1.00,72
+,Total,162
+Total,.00,181
+,1.00,202
+,Total,383
 ])
 
 AT_CLEANUP
 
 
-AT_SETUP([MEANS standard errors])
+dnl This example from https://www.spss-tutorials.com/spss-means-command/
+AT_SETUP([MEANS two way])
 AT_KEYWORDS([categorical categoricals])
 
-AT_DATA([means-stderr.sps], [dnl
-set format F12.4.
-data list notable list /id * group * test1 *
-begin data.
-1 1 85
-2 1 90
-3 1 82
-4 1 75
-5 1 99
-6 1 70
-7 2 66
-8 2 52
-9 2 71
-10 2 50
+AT_DATA([means-freelancer.sps], [dnl
+data list notable list /income_2010 * gender  sector_2010.
+begin data
+6072.40 0 5
+12706.65 1 4
+14912.82 0 2
+16338.36 1 5
+22606.99 0 .
+23544.95 1 1
+24985.21 0 2
+26586.48 0 1
+29076.24 1 3
+31010.18 0 2
+33190.63 1 1
+35570.67 1 4
+36202.60 1 4
+36205.85 1 2
+36262.56 1 .
+38283.56 0 1
+38569.91 1 5
+39057.56 1 4
+39594.68 1 5
+42087.38 0 1
+42370.92 0 2
+42931.32 1 2
+45907.58 0 4
+45911.32 1 .
+47227.09 1 3
+50440.71 1 5
+57440.17 1 3
+58918.86 0 5
+59430.07 1 2
+61135.95 0 4
+64193.85 0 4
+64857.02 0 3
+65903.42 0 4
+66592.38 1 3
+70986.10 0 3
+71229.94 0 4
+74663.05 1 4
+76676.14 1 4
+79260.80 0 4
+80311.71 0 4
 end data.
 
-means test1 by group 
-	/cells = mean count semean seskew sekurt.
+means income_2010 by gender by sector_2010
+	/cells count min mean stddev.
 ])
 
-
-AT_CHECK([pspp -O format=csv means-stderr.sps], [0],
-  [dnl
+AT_CHECK([pspp -O format=csv means-freelancer.sps], [0], [dnl
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Included,,Excluded,,Total,
 ,N,Percent,N,Percent,N,Percent
-test1: group,10,100.0%,0,.0%,10,100.0%
-test1: ,10,100.0%,0,.0%,10,100.0%
+income_2010 * gender * sector_2010,37,92.5%,3,7.5%,40,100.0%
 
 Table: Report
-,group,Mean,N,S.E. Mean,S.E. Skew,S.E. Kurt
-test1,1.0000,83.5000,6.0000,4.2485,.8452,1.7408
-,2.0000,59.7500,4.0000,5.1700,1.0142,2.6186
-
-Table: Report
-,Mean,N,S.E. Mean,S.E. Skew,S.E. Kurt
-test1,74.0000,10.0000,4.9621,.6870,1.3342
+gender,sector_2010,N,Minimum,Mean,Std. Deviation
+.00,1.00,3,26586.48,35652.47,8078.46
+,2.00,4,14912.82,28319.78,11482.43
+,3.00,2,64857.02,67921.56,4333.91
+,4.00,7,45907.58,66849.04,11787.11
+,5.00,2,6072.40,32495.63,37368.09
+,Total,18,6072.40,49389.68,22371.48
+1.00,1.00,2,23544.95,28367.79,6820.53
+,2.00,3,36205.85,46189.08,11949.93
+,3.00,4,29076.24,50083.97,16084.44
+,4.00,6,12706.65,45812.78,24995.16
+,5.00,4,16338.36,36235.92,14311.04
+,Total,19,12706.65,42918.90,17851.64
+Total,1.00,5,23544.95,32738.60,7757.62
+,2.00,7,14912.82,35978.05,14309.27
+,3.00,6,29076.24,56029.83,15615.06
+,4.00,13,12706.65,57139.99,21187.85
+,5.00,6,6072.40,34989.15,20146.69
+,Total,37,6072.40,46066.84,20160.12
 ])
 
 AT_CLEANUP
 
 
-
-AT_SETUP([MEANS harmonic and geometric means])
+dnl Check that rows are suppressed and that things generally work ok
+dnl when there are a 2 way instance contains an unbalanced set of
+dnl categorical values.
+AT_SETUP([MEANS unbalanced])
 AT_KEYWORDS([categorical categoricals])
 
-AT_DATA([means-hg.sps], [dnl
-set format F12.4.
-data list notable list /x * y *.
+AT_DATA([means-unbalanced.sps], [dnl
+data list notable list /b c x *.
 begin data.
-1 3
-2 3
-3 3
-4 3
-5 3
+4 1 123
+3 1 123
+5 0 246
+4 0 246
+3 0 246
 end data.
 
+* The data above lack a 5 1 case.
 
-means x y
-	/cells = mean harmonic geometric
-.
+means
+	table=x by b by c
+	/cells = mean count
+	.
 ])
 
-
-AT_CHECK([pspp -O format=csv means-hg.sps], [0],
-  [dnl
+AT_CHECK([pspp -O format=csv means-unbalanced.sps], [0], [dnl
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Included,,Excluded,,Total,
 ,N,Percent,N,Percent,N,Percent
-x: ,5,100.0%,0,.0%,5,100.0%
-y: ,5,100.0%,0,.0%,5,100.0%
+x * b * c,5,100.0%,0,.0%,5,100.0%
 
 Table: Report
-,Mean,Harmonic Mean,Geom. Mean
-x,3.0000,2.1898,2.6052
-y,3.0000,3.0000,3.0000
+b,c,Mean,N
+3.00,.00,246.00,1
+,1.00,123.00,1
+,Total,184.50,2
+4.00,.00,246.00,1
+,1.00,123.00,1
+,Total,184.50,2
+5.00,.00,246.00,1
+,Total,246.00,1
+Total,.00,246.00,3
+,1.00,123.00,2
+,Total,196.80,5
 ])
 
 AT_CLEANUP
 
-
-
-
-
-
-AT_SETUP([MEANS all/none/default])
+dnl This example kindly provided by Dana Williams
+AT_SETUP([MEANS three way])
 AT_KEYWORDS([categorical categoricals])
 
-dnl Make sure that /CELLS = {ALL,NONE,DEFAULT} work properly
-AT_DATA([means-stat-keywords.sps], [dnl
-SET FORMAT=F12.2.
-SET DECIMAL=DOT.
-
-DATA LIST NOTABLE LIST /score *.
-BEGIN DATA.
-22
-22
-29
-16
-23
-END DATA.
+AT_DATA([means-threeway.sps], [dnl
+data list notable list /score a b c.
+begin data.
+3  0 0 0
+4  0 0 1
+41 0 0 2
+5  0 1 0
+6  0 1 1
+7  1 0 0
+8  1 0 1
+9  1 1 0
+10 1 1 1
+end data.
 
-MEANS score /CELLS = ALL.
-MEANS score /CELLS = DEFAULT.
-MEANS score /CELLS = NONE.
+means score by a by b by c.
 ])
 
-
-AT_CHECK([pspp -O format=csv means-stat-keywords.sps], [0],
-  [dnl
+AT_CHECK([pspp -O format=csv means-threeway.sps], [0], [dnl
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Included,,Excluded,,Total,
 ,N,Percent,N,Percent,N,Percent
-score: ,5,100.0%,0,.0%,5,100.0%
+score * a * b * c,9,100.0%,0,.0%,9,100.0%
 
 Table: Report
-,Mean,N,Std. Deviation,S.E. Mean,Sum,Min,Max,Range,Variance,Kurtosis,S.E. Kurt,Skewness,S.E. Skew,First,Last,Harmonic Mean,Geom. Mean
-score,22.40,5.00,4.62,2.06,112.00,16.00,29.00,13.00,21.30,1.85,2.00,.11,.91,22.00,23.00,21.61,22.01
+a,b,c,Mean,N,Std. Deviation
+.00,.00,.00,3.00,1,NaN
+,,1.00,4.00,1,NaN
+,,2.00,41.00,1,NaN
+,,Total,16.00,3,21.66
+,1.00,.00,5.00,1,NaN
+,,1.00,6.00,1,NaN
+,,Total,5.50,2,.71
+,Total,.00,4.00,2,1.41
+,,1.00,5.00,2,1.41
+,,2.00,41.00,1,NaN
+,,Total,11.80,5,16.36
+1.00,.00,.00,7.00,1,NaN
+,,1.00,8.00,1,NaN
+,,Total,7.50,2,.71
+,1.00,.00,9.00,1,NaN
+,,1.00,10.00,1,NaN
+,,Total,9.50,2,.71
+,Total,.00,8.00,2,1.41
+,,1.00,9.00,2,1.41
+,,Total,8.50,4,1.29
+Total,.00,.00,5.00,2,2.83
+,,1.00,6.00,2,2.83
+,,2.00,41.00,1,NaN
+,,Total,12.60,5,16.01
+,1.00,.00,7.00,2,2.83
+,,1.00,8.00,2,2.83
+,,Total,7.50,4,2.38
+,Total,.00,6.00,4,2.58
+,,1.00,7.00,4,2.58
+,,2.00,41.00,1,NaN
+,,Total,10.33,9,11.73
+])
 
-Table: Case Processing Summary
-,Cases,,,,,
-,Included,,Excluded,,Total,
-,N,Percent,N,Percent,N,Percent
-score: ,5,100.0%,0,.0%,5,100.0%
+AT_CLEANUP
 
-Table: Report
-,Mean,N,Std. Deviation
-score,22.40,5.00,4.62
+dnl The above example again, but with string variables for
+dnl the control vars.
+AT_SETUP([MEANS three way string])
+AT_KEYWORDS([categorical categoricals])
+
+AT_DATA([means-threeway-string.sps], [dnl
+data list notable list /score (f22.2) a (a24) b (a16) c (a8).
+begin data.
+3  fooberrycrumblexzaQ  fosilationwereqd  zero
+4  fooberrycrumblexzaQ  fosilationwereqd  one
+41 fooberrycrumblexzaQ  fosilationwereqd  two
+5  fooberrycrumblexzaQ  onlyonekonboys    zero
+6  fooberrycrumblexzaQ  onlyonekonboys    one
+7  wontledingbatsXASDF  fosilationwereqd  zero
+8  wontledingbatsXASDF  fosilationwereqd  one
+9  wontledingbatsXASDF  onlyonekonboys    zero
+10 wontledingbatsXASDF  onlyonekonboys    one
+end data.
+
+means score by a by b by c.
+])
 
+AT_CHECK([pspp -O format=csv means-threeway-string.sps], [0], [dnl
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Included,,Excluded,,Total,
 ,N,Percent,N,Percent,N,Percent
-score: ,5,100.0%,0,.0%,5,100.0%
+score * a * b * c,9,100.0%,0,.0%,9,100.0%
 
 Table: Report
+a,b,c,Mean,N,Std. Deviation
+fooberrycrumblexzaQ     ,fosilationwereqd,one     ,4.00,1,NaN
+,,two     ,41.00,1,NaN
+,,zero    ,3.00,1,NaN
+,,Total,16.00,3,21.66
+,onlyonekonboys  ,one     ,6.00,1,NaN
+,,zero    ,5.00,1,NaN
+,,Total,5.50,2,.71
+,Total,one     ,5.00,2,1.41
+,,two     ,41.00,1,NaN
+,,zero    ,4.00,2,1.41
+,,Total,11.80,5,16.36
+wontledingbatsXASDF     ,fosilationwereqd,one     ,8.00,1,NaN
+,,zero    ,7.00,1,NaN
+,,Total,7.50,2,.71
+,onlyonekonboys  ,one     ,10.00,1,NaN
+,,zero    ,9.00,1,NaN
+,,Total,9.50,2,.71
+,Total,one     ,9.00,2,1.41
+,,zero    ,8.00,2,1.41
+,,Total,8.50,4,1.29
+Total,fosilationwereqd,one     ,6.00,2,2.83
+,,two     ,41.00,1,NaN
+,,zero    ,5.00,2,2.83
+,,Total,12.60,5,16.01
+,onlyonekonboys  ,one     ,8.00,2,2.83
+,,zero    ,7.00,2,2.83
+,,Total,7.50,4,2.38
+,Total,one     ,7.00,4,2.58
+,,two     ,41.00,1,NaN
+,,zero    ,6.00,4,2.58
+,,Total,10.33,9,11.73
 ])
 
 AT_CLEANUP
 
 
 
-
-AT_SETUP([MEANS missing=table ])
+dnl An example with multiple tables
+AT_SETUP([MEANS multiple tables])
 AT_KEYWORDS([categorical categoricals])
 
-AT_DATA([means-miss-table.sps], [dnl
-data list notable list /a * b * g1.
+AT_DATA([means-multi-table.sps], [dnl
+data list notable list /a * b * c * x * y *.
 begin data.
-1 9 1  
-2 9 1 
-3 9 1 
-4 9 2 
-5 9 2 
-6 9 2 
-7 . 2 
+6 3 0 123 456
+6 3 1 123 456
+6 4 0 123 456
+6 4 1 123 456
+6 5 0 123 456
+6 5 1 123 456
+7 3 0 123 456
+7 3 1 123 456
+7 4 0 123 456
+7 4 1 123 456
+7 5 0 123 456
+7 5 1 123 456
+8 3 0 123 456
+8 3 1 123 456
+8 4 0 123 456
+8 4 1 123 456
+8 5 0 123 456
+8 5 1 123 456
+9 3 0 123 456
+9 3 1 123 456
+9 4 0 123 456
+9 4 1 123 456
+9 5 0 123 456
+9 5 1 123 456
 end data.
 
-MEANS a b BY g1
-      /a  BY g1
-      /cells =  COUNT
-      /missing = TABLE
-      .
 
-MEANS a b BY g1
-      /a  BY g1
-      /cells =  COUNT
-      .
+means table = x by b by c
+	/x by b
+	/y by a by b
+  cells = min count  .
 ])
 
-
-AT_CHECK([pspp -o pspp.csv -o pspp.txt means-miss-table.sps])
-AT_CHECK([cat pspp.csv], [0], [dnl
+AT_CHECK([pspp -O format=csv means-multi-table.sps], [0], [dnl
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Included,,Excluded,,Total,
 ,N,Percent,N,Percent,N,Percent
-a: g1,7,100.0%,0,.0%,7,100.0%
-a: ,7,100.0%,0,.0%,7,100.0%
-b: g1,6,85.7%,1,14.3%,7,100.0%
-b: ,6,85.7%,1,14.3%,7,100.0%
+x * b * c,24,100.0%,0,.0%,24,100.0%
 
 Table: Report
-,g1,N
-a,1.00,3.00
-,2.00,3.00
-b,1.00,3.00
-,2.00,3.00
-
-Table: Report
-,N
-a,6.00
-b,6.00
+b,c,Minimum,N
+3.00,.00,123.00,4
+,1.00,123.00,4
+,Total,123.00,8
+4.00,.00,123.00,4
+,1.00,123.00,4
+,Total,123.00,8
+5.00,.00,123.00,4
+,1.00,123.00,4
+,Total,123.00,8
+Total,.00,123.00,12
+,1.00,123.00,12
+,Total,123.00,24
 
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Included,,Excluded,,Total,
 ,N,Percent,N,Percent,N,Percent
-a: g1,7,100.0%,0,.0%,7,100.0%
-a: ,7,100.0%,0,.0%,7,100.0%
-
-Table: Report
-,g1,N
-a,1.00,3.00
-,2.00,4.00
+x * b,24,100.0%,0,.0%,24,100.0%
 
 Table: Report
-,N
-a,7.00
+b,Minimum,N
+3.00,123.00,8
+4.00,123.00,8
+5.00,123.00,8
+Total,123.00,24
 
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Included,,Excluded,,Total,
 ,N,Percent,N,Percent,N,Percent
-a: g1,7,100.0%,0,.0%,7,100.0%
-a: ,7,100.0%,0,.0%,7,100.0%
-b: g1,6,85.7%,1,14.3%,7,100.0%
-b: ,6,85.7%,1,14.3%,7,100.0%
+y * a * b,24,100.0%,0,.0%,24,100.0%
 
 Table: Report
-,g1,N
-a,1.00,3.00
-,2.00,4.00
-b,1.00,3.00
-,2.00,3.00
+a,b,Minimum,N
+6.00,3.00,456.00,2
+,4.00,456.00,2
+,5.00,456.00,2
+,Total,456.00,6
+7.00,3.00,456.00,2
+,4.00,456.00,2
+,5.00,456.00,2
+,Total,456.00,6
+8.00,3.00,456.00,2
+,4.00,456.00,2
+,5.00,456.00,2
+,Total,456.00,6
+9.00,3.00,456.00,2
+,4.00,456.00,2
+,5.00,456.00,2
+,Total,456.00,6
+Total,3.00,456.00,8
+,4.00,456.00,8
+,5.00,456.00,8
+,Total,456.00,24
+])
 
-Table: Report
-,N
-a,7.00
-b,6.00
+AT_CLEANUP
 
+
+
+dnl An example with more than one dependent variable.
+dnl This case uses a somewhat different table layout.
+AT_SETUP([MEANS multi variable])
+AT_KEYWORDS([categorical categoricals])
+
+AT_DATA([means-multi-variable.sps], [dnl
+data list notable list /b c x y.
+begin data.
+5 1 123 55
+5 1 123 55
+5 1 123 55
+5 1 123 55
+4 1 456 44
+4 1 456 44
+4 1 456 44
+4 1 456 44
+3 1 789 55
+3 1 789 55
+3 1 789 55
+3 1 789 55
+5 0 246 99
+5 0 246 99
+5 0 246 99
+5 0 246 .
+4 0 987 99
+4 0 987 99
+4 0 987 99
+4 0 987 99
+3 0 654 11
+3 0 654 11
+3 0 654 11
+3 0 654 11
+end data.
+
+means
+	table = x y by b by c
+	.
+])
+
+AT_CHECK([pspp -O format=csv means-multi-variable.sps], [0], [dnl
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Included,,Excluded,,Total,
 ,N,Percent,N,Percent,N,Percent
-a: g1,7,100.0%,0,.0%,7,100.0%
-a: ,7,100.0%,0,.0%,7,100.0%
+x * b * c,24,100.0%,0,.0%,24,100.0%
+y * b * c,23,95.8%,1,4.2%,24,100.0%
+
+Table: x * y * b * c
+b,c,,x,y
+3.00,.00,Mean,654.00,11.00
+,,N,4,4
+,,Std. Deviation,.00,.00
+,1.00,Mean,789.00,55.00
+,,N,4,4
+,,Std. Deviation,.00,.00
+,Total,Mean,721.50,33.00
+,,N,8,8
+,,Std. Deviation,72.16,23.52
+4.00,.00,Mean,987.00,99.00
+,,N,4,4
+,,Std. Deviation,.00,.00
+,1.00,Mean,456.00,44.00
+,,N,4,4
+,,Std. Deviation,.00,.00
+,Total,Mean,721.50,71.50
+,,N,8,8
+,,Std. Deviation,283.83,29.40
+5.00,.00,Mean,246.00,99.00
+,,N,4,3
+,,Std. Deviation,.00,.00
+,1.00,Mean,123.00,55.00
+,,N,4,4
+,,Std. Deviation,.00,.00
+,Total,Mean,184.50,73.86
+,,N,8,7
+,,Std. Deviation,65.75,23.52
+Total,.00,Mean,629.00,67.00
+,,N,12,11
+,,Std. Deviation,316.50,44.40
+,1.00,Mean,456.00,51.33
+,,N,12,12
+,,Std. Deviation,283.98,5.42
+,Total,Mean,542.50,58.83
+,,N,24,23
+,,Std. Deviation,307.06,31.22
+])
 
-Table: Report
-,g1,N
-a,1.00,3.00
-,2.00,4.00
 
-Table: Report
-,N
-a,7.00
-])
 AT_CLEANUP
 
-AT_SETUP([MEANS user missing values])
+
+dnl This example is based upon one kindly provided by Dana Williams
+dnl It exercises the most complex case where there are multiple
+dnl dependent variables AND multiple control variables in each layer.
+AT_SETUP([MEANS multi combination])
 AT_KEYWORDS([categorical categoricals])
 
-AT_DATA([means-missing.sps], [dnl
-data list notable list /a * b * g1.
-begin data.
-1 2 9  
-2 2 1 
-3 2 1 
-4 2 2 
-5 2 2 
-6 2 2 
-7 9 2 
+AT_DATA([means-multi-combination.sps], [dnl
+data list notable list /one two three four five six.
+begin data
+1 1 1 1 1 1
+2 1 1 1 1 1
+1 2 1 1 1 1
+2 2 1 1 1 1
+1 1 2 1 1 1
+2 1 2 1 1 1
+1 2 2 1 1 1
+2 2 2 1 1 1
+1 1 1 2 1 1
+2 1 1 2 1 1
+1 2 1 2 1 1
+2 2 1 2 1 1
+1 1 2 2 1 1
+2 1 2 2 1 1
+1 2 2 2 1 1
+2 2 2 2 1 1
+1 1 1 1 2 1
+2 1 1 1 2 1
+1 2 1 1 2 1
+2 2 1 1 2 1
+1 1 2 1 2 1
+2 1 2 1 2 1
+1 2 2 1 2 1
+2 2 2 1 2 1
+1 1 1 2 2 1
+2 1 1 2 2 1
+1 2 1 2 2 1
+2 2 1 2 2 1
+1 1 2 2 2 1
+2 1 2 2 2 1
+1 2 2 2 2 1
+2 2 2 2 2 1
+1 1 1 1 1 2
+2 1 1 1 1 2
+1 2 1 1 1 2
+2 2 1 1 1 2
+1 1 2 1 1 2
+2 1 2 1 1 2
+1 2 2 1 1 2
+2 2 2 1 1 2
+1 1 1 2 1 2
+2 1 1 2 1 2
+1 2 1 2 1 2
+2 2 1 2 1 2
+1 1 2 2 1 2
+2 1 2 2 1 2
+1 2 2 2 1 2
+2 2 2 2 1 2
+1 1 1 1 2 2
+2 1 1 1 2 2
+1 2 1 1 2 2
+2 2 1 1 2 2
+1 1 2 1 2 2
+2 1 2 1 2 2
+1 2 2 1 2 2
+2 2 2 1 2 2
+1 1 1 2 2 2
+2 1 1 2 2 2
+1 2 1 2 2 2
+2 2 1 2 2 2
+1 1 2 2 2 2
+2 1 2 2 2 2
+1 2 2 2 2 2
+2 2 2 2 2 2
 end data.
 
-MISSING VALUES a b g1 (9).
-
-MEANS a b BY g1 /cells =  COUNT .
+recode six  (2 = 62) (1 = 61).
+recode five (2 = 52) (1 = 51).
+recode four (2 = 42) (1 = 41).
+recode three (2 = 32) (1 = 31).
 
-MEANS a b BY g1 /cells =  COUNT /missing = include .
+set format F22.5.
 
-MEANS a b BY g1 /cells =  COUNT /missing = dependent .
+means tables = one two BY three four BY five six.
 ])
 
-
-AT_CHECK([pspp -o pspp.csv -o pspp.txt means-missing.sps])
-AT_CHECK([cat pspp.csv], [0], [dnl
+AT_CHECK([pspp -O format=csv means-multi-combination.sps], [0], [dnl
 Table: Case Processing Summary
 ,Cases,,,,,
 ,Included,,Excluded,,Total,
 ,N,Percent,N,Percent,N,Percent
-a: g1,6,85.7%,1,14.3%,7,100.0%
-a: ,7,100.0%,0,.0%,7,100.0%
-b: g1,5,71.4%,2,28.6%,7,100.0%
-b: ,6,85.7%,1,14.3%,7,100.0%
-
-Table: Report
-,g1,N
-a,1.00,2.00
-,2.00,4.00
-b,1.00,2.00
-,2.00,3.00
+one * three * five,64,100.0%,0,.0%,64,100.0%
+two * three * five,64,100.0%,0,.0%,64,100.0%
+one * three * six,64,100.0%,0,.0%,64,100.0%
+two * three * six,64,100.0%,0,.0%,64,100.0%
+one * four * five,64,100.0%,0,.0%,64,100.0%
+two * four * five,64,100.0%,0,.0%,64,100.0%
+one * four * six,64,100.0%,0,.0%,64,100.0%
+two * four * six,64,100.0%,0,.0%,64,100.0%
+
+Table: one * two * three * five
+three,five,,one,two
+31.00,51.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,52.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,Total,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+32.00,51.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,52.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,Total,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+Total,51.00,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+,52.00,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+,Total,Mean,1.50000,1.50000
+,,N,64,64
+,,Std. Deviation,.50395,.50395
+
+Table: one * two * three * six
+three,six,,one,two
+31.00,61.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,62.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,Total,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+32.00,61.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,62.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,Total,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+Total,61.00,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+,62.00,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+,Total,Mean,1.50000,1.50000
+,,N,64,64
+,,Std. Deviation,.50395,.50395
+
+Table: one * two * four * five
+four,five,,one,two
+41.00,51.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,52.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,Total,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+42.00,51.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,52.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,Total,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+Total,51.00,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+,52.00,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+,Total,Mean,1.50000,1.50000
+,,N,64,64
+,,Std. Deviation,.50395,.50395
+
+Table: one * two * four * six
+four,six,,one,two
+41.00,61.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,62.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,Total,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+42.00,61.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,62.00,Mean,1.50000,1.50000
+,,N,16,16
+,,Std. Deviation,.51640,.51640
+,Total,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+Total,61.00,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+,62.00,Mean,1.50000,1.50000
+,,N,32,32
+,,Std. Deviation,.50800,.50800
+,Total,Mean,1.50000,1.50000
+,,N,64,64
+,,Std. Deviation,.50395,.50395
+])
 
-Table: Report
-,N
-a,6.00
-b,5.00
+AT_CLEANUP
 
-Table: Case Processing Summary
-,Cases,,,,,
-,Included,,Excluded,,Total,
-,N,Percent,N,Percent,N,Percent
-a: g1,7,100.0%,0,.0%,7,100.0%
-a: ,7,100.0%,0,.0%,7,100.0%
-b: g1,7,100.0%,0,.0%,7,100.0%
-b: ,7,100.0%,0,.0%,7,100.0%
 
-Table: Report
-,g1,N
-a,1.00,2.00
-,2.00,4.00
-,9.00,1.00
-b,1.00,2.00
-,2.00,4.00
-,9.00,1.00
+dnl This example was observed to cause a crash in the
+dnl destructor.  Found by zzuf.
+AT_SETUP([MEANS clean up])
+AT_KEYWORDS([categorical categoricals])
 
-Table: Report
-,N
-a,7.00
-b,7.00
+AT_DATA([means-bad.sps], [dnl
+data list notable list /one two three four five six.
+begin data
+1 1 1 1 1 1
+2 1 1 1 1 !
+1 2 2 2 2 2
+2 2 2 2 2 2
+end data.
 
-Table: Case Processing Summary
-,Cases,,,,,
-,Included,,Excluded,,Total,
-,N,Percent,N,Percent,N,Percent
-a: g1,7,100.0%,0,.0%,7,100.0%
-a: ,7,100.0%,0,.0%,7,100.0%
-b: g1,6,85.7%,1,14.3%,7,100.0%
-b: ,6,85.7%,1,14.3%,7,100.0%
+means tables = one two BY thsee four BY five six.
+])
 
-Table: Report
-,g1,N
-a,1.00,2.00
-,2.00,4.00
-,9.00,1.00
-b,1.00,2.00
-,2.00,3.00
-,9.00,1.00
+AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore])
 
-Table: Report
-,N
-a,7.00
-b,6.00
-])
 AT_CLEANUP
 
 
-
-AT_SETUP([MEANS empty factor spec])
+dnl Another example which caused a crash.
+dnl Found by zzuf.
+AT_SETUP([MEANS control all missing])
 AT_KEYWORDS([categorical categoricals])
 
 AT_DATA([means-bad.sps], [dnl
-data list list /outcome *.
+data list notable list /a * b *  y * uu *.
 begin data.
-1
-2
-3
+6 3 . 5
+6 3 . 5
+6 4 . 5
 end data.
 
-MEANS TABLES =  outcome 
-	BY.
+means table = b by a by y by uu
+  .
 ])
 
-AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore])
+AT_CHECK([pspp -O format=csv means-bad.sps], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Included,,Excluded,,Total,
+,N,Percent,N,Percent,N,Percent
+b * a * y * uu,0,.0%,3,100.0%,3,100.0%
 
-AT_CLEANUP
+"warning: The table ""a * y * uu"" has no non-empty control variables.  No result for this table will be displayed."
+])
 
+AT_CLEANUP
 
 
-AT_SETUP([MEANS parser bug])
+dnl Do some tests on the MISSING keyword.
+AT_SETUP([MEANS missing classes])
 AT_KEYWORDS([categorical categoricals])
 
-dnl This bug caused an infinite loop
-AT_DATA([means-bad.sps], [dnl
-DATA LIST notable LIST /a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 fylo *.
+AT_DATA([means-missing-classes.sps], [dnl
+data list notable list /hand * score *.
 begin data.
-1 2 3 4 5 6 7 8 9 0 11
+1 17
+1 17
+1 17
+1 16
+1 17
+1 16
+1 16
+1 .
+1 99
+2 21
+2 22
+2 20
+2 20
+2 20
+2 20
+2 20
+2 20
+2 20
+2 20
+9 55
 end data.
 
-MEANS TABLES = a1 a2 a3 a4 a5 a6 a7 a8 a9 a10a BY fylo.
+missing values score (99).
+missing values hand (9).
+
+means tables=score  by hand
+	/cells = count max
+	/missing = dependent
+	.
+
+means tables=score  by hand
+	/cells = count max
+	/missing = include
+	.
+
+means tables=score  by hand
+	/cells = count max
+	.
+
 ])
 
-AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore])
+AT_CHECK([pspp -O format=csv means-missing-classes.sps], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Included,,Excluded,,Total,
+,N,Percent,N,Percent,N,Percent
+score * hand,18,90.0%,2,10.0%,20,100.0%
 
-AT_CLEANUP
+Table: Report
+hand,N,Maximum
+1.00,7,17.00
+2.00,10,22.00
+9.00,1,55.00
+Total,18,55.00
 
+Table: Case Processing Summary
+,Cases,,,,,
+,Included,,Excluded,,Total,
+,N,Percent,N,Percent,N,Percent
+score * hand,19,95.0%,1,5.0%,20,100.0%
+
+Table: Report
+hand,N,Maximum
+1.00,8,99.00
+2.00,10,22.00
+9.00,1,55.00
+Total,19,99.00
+
+Table: Case Processing Summary
+,Cases,,,,,
+,Included,,Excluded,,Total,
+,N,Percent,N,Percent,N,Percent
+score * hand,17,85.0%,3,15.0%,20,100.0%
+
+Table: Report
+hand,N,Maximum
+1.00,7,17.00
+2.00,10,22.00
+Total,17,22.00
+])
+
+AT_CLEANUP
 
 
 
-- 
2.30.2