+++ /dev/null
-/* PSPP - a program for statistical analysis.
- Copyright (C) 2011, 2012, 2013, 2019 Free Software Foundation, Inc.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-#include <config.h>
-
-#include "data/case.h"
-#include "data/format.h"
-#include "data/variable.h"
-
-#include "libpspp/bt.h"
-#include "libpspp/hmap.h"
-#include "libpspp/misc.h"
-#include "libpspp/pool.h"
-
-#include "math/moments.h"
-
-#include <math.h>
-
-#include "means.h"
-
-#include "gettext.h"
-#define _(msgid) gettext (msgid)
-#define N_(msgid) (msgid)
-
-struct per_var_data
-{
-};
-
-struct per_var_data_simple
-{
- struct per_var_data parent;
- double acc;
-};
-
-struct per_var_data_moment
-{
- struct per_var_data parent;
- struct moments1 *mom;
-};
-
-static struct per_var_data *
-default_create (struct pool *pool)
-{
- struct per_var_data_moment *pvd = pool_alloc (pool, sizeof *pvd);
-
- pvd->mom = moments1_create (MOMENT_KURTOSIS);
-
- return (struct per_var_data *) pvd;
-}
-
-static void
-default_update (struct per_var_data *stat, double w, double x)
-{
- struct per_var_data_moment *pvd = (struct per_var_data_moment *)stat;
-
- moments1_add (pvd->mom, x, w);
-}
-
-\f
-
-/* HARMONIC MEAN: The reciprocal of the sum of the reciprocals:
- 1 / ( 1/(x_0) + 1/(x_1) + ... + 1/(x_{n-1}) ) */
-
-struct harmonic_mean
-{
- struct per_var_data parent;
- double rsum;
- double n;
-};
-
-static struct per_var_data *
-harmonic_create (struct pool *pool)
-{
- struct harmonic_mean *hm = pool_alloc (pool, sizeof *hm);
-
- hm->rsum = 0;
- hm->n = 0;
-
- return (struct per_var_data *) hm;
-}
-
-
-static void
-harmonic_update (struct per_var_data *stat, double w, double x)
-{
- struct harmonic_mean *hm = (struct harmonic_mean *)stat;
- hm->rsum += w / x;
- hm->n += w;
-}
-
-
-static double
-harmonic_get (const struct per_var_data *pvd)
-{
- const struct harmonic_mean *hm = (const struct harmonic_mean *) pvd;
-
- return hm->n / hm->rsum;
-}
-
-\f
-
-/* GEOMETRIC MEAN: The nth root of the product of all n observations
- pow ((x_0 * x_1 * ... x_{n - 1}), 1/n) */
-struct geometric_mean
-{
- struct per_var_data parent;
- double prod;
- double n;
-};
-
-static struct per_var_data *
-geometric_create (struct pool *pool)
-{
- struct geometric_mean *gm = pool_alloc (pool, sizeof *gm);
-
- gm->prod = 1.0;
- gm->n = 0;
-
- return (struct per_var_data *) gm;
-}
-
-static void
-geometric_update (struct per_var_data *pvd, double w, double x)
-{
- struct geometric_mean *gm = (struct geometric_mean *)pvd;
- gm->prod *= pow (x, w);
- gm->n += w;
-}
-
-
-static double
-geometric_get (const struct per_var_data *pvd)
-{
- const struct geometric_mean *gm = (const struct geometric_mean *)pvd;
- return pow (gm->prod, 1.0 / gm->n);
-}
-
-\f
-
-static double
-sum_get (const struct per_var_data *pvd)
-{
- double n, mean;
-
- moments1_calculate (((struct per_var_data_moment *)pvd)->mom, &n, &mean, 0, 0, 0);
-
- return mean * n;
-}
-
-
-static double
-n_get (const struct per_var_data *pvd)
-{
- double n;
-
- moments1_calculate (((struct per_var_data_moment *)pvd)->mom, &n, 0, 0, 0, 0);
-
- return n;
-}
-
-static double
-arithmean_get (const struct per_var_data *pvd)
-{
- double n, mean;
-
- moments1_calculate (((struct per_var_data_moment *)pvd)->mom, &n, &mean, 0, 0, 0);
-
- return mean;
-}
-
-static double
-variance_get (const struct per_var_data *pvd)
-{
- double n, mean, variance;
-
- moments1_calculate (((struct per_var_data_moment *)pvd)->mom, &n, &mean, &variance, 0, 0);
-
- return variance;
-}
-
-
-static double
-stddev_get (const struct per_var_data *pvd)
-{
- return sqrt (variance_get (pvd));
-}
-
-
-\f
-
-static double
-skew_get (const struct per_var_data *pvd)
-{
- double skew;
-
- moments1_calculate (((struct per_var_data_moment *)pvd)->mom, NULL, NULL, NULL, &skew, 0);
-
- return skew;
-}
-
-static double
-sekurt_get (const struct per_var_data *pvd)
-{
- double n;
-
- moments1_calculate (((struct per_var_data_moment *)pvd)->mom, &n, NULL, NULL, NULL, NULL);
-
- return calc_sekurt (n);
-}
-
-static double
-seskew_get (const struct per_var_data *pvd)
-{
- double n;
-
- moments1_calculate (((struct per_var_data_moment *)pvd)->mom, &n, NULL, NULL, NULL, NULL);
-
- return calc_seskew (n);
-}
-
-static double
-kurt_get (const struct per_var_data *pvd)
-{
- double kurt;
-
- moments1_calculate (((struct per_var_data_moment *)pvd)->mom, NULL, NULL, NULL, NULL, &kurt);
-
- return kurt;
-}
-
-static double
-semean_get (const struct per_var_data *pvd)
-{
- double n, var;
-
- moments1_calculate (((struct per_var_data_moment *)pvd)->mom, &n, NULL, &var, NULL, NULL);
-
- return sqrt (var / n);
-}
-
-\f
-
-/* MIN: The smallest (closest to minus infinity) value. */
-
-static struct per_var_data *
-min_create (struct pool *pool)
-{
- struct per_var_data_simple *pvd = pool_alloc (pool, sizeof *pvd);
-
- pvd->acc = DBL_MAX;
-
- return (struct per_var_data *) pvd;
-}
-
-static void
-min_update (struct per_var_data *pvd, double w UNUSED, double x)
-{
- double *r = &((struct per_var_data_simple *)pvd)->acc;
-
- if (x < *r)
- *r = x;
-}
-
-static double
-min_get (const struct per_var_data *pvd)
-{
- double *r = &((struct per_var_data_simple *)pvd)->acc;
-
- return *r;
-}
-
-/* MAX: The largest (closest to plus infinity) value. */
-
-static struct per_var_data *
-max_create (struct pool *pool)
-{
- struct per_var_data_simple *pvd = pool_alloc (pool, sizeof *pvd);
-
- pvd->acc = -DBL_MAX;
-
- return (struct per_var_data *) pvd;
-}
-
-static void
-max_update (struct per_var_data *pvd, double w UNUSED, double x)
-{
- double *r = &((struct per_var_data_simple *)pvd)->acc;
-
- if (x > *r)
- *r = x;
-}
-
-static double
-max_get (const struct per_var_data *pvd)
-{
- double *r = &((struct per_var_data_simple *)pvd)->acc;
-
- return *r;
-}
-
-\f
-
-struct range
-{
- struct per_var_data parent;
- double min;
- double max;
-};
-
-static struct per_var_data *
-range_create (struct pool *pool)
-{
- struct range *r = pool_alloc (pool, sizeof *r);
-
- r->min = DBL_MAX;
- r->max = -DBL_MAX;
-
- return (struct per_var_data *) r;
-}
-
-static void
-range_update (struct per_var_data *pvd, double w UNUSED, double x)
-{
- struct range *r = (struct range *) pvd;
-
- if (x > r->max)
- r->max = x;
-
- if (x < r->min)
- r->min = x;
-}
-
-static double
-range_get (const struct per_var_data *pvd)
-{
- const struct range *r = (struct range *) pvd;
-
- return r->max - r->min;
-}
-
-\f
-
-/* LAST: The last value (the one closest to the end of the file). */
-
-static struct per_var_data *
-last_create (struct pool *pool)
-{
- struct per_var_data_simple *pvd = pool_alloc (pool, sizeof *pvd);
-
- return (struct per_var_data *) pvd;
-}
-
-static void
-last_update (struct per_var_data *pvd, double w UNUSED, double x)
-{
- struct per_var_data_simple *stat = (struct per_var_data_simple *) pvd;
-
- stat->acc = x;
-}
-
-static double
-last_get (const struct per_var_data *pvd)
-{
- const struct per_var_data_simple *stat = (struct per_var_data_simple *) pvd;
-
- return stat->acc;
-}
-
-/* FIRST: The first value (the one closest to the start of the file). */
-
-static struct per_var_data *
-first_create (struct pool *pool)
-{
- struct per_var_data_simple *pvd = pool_alloc (pool, sizeof *pvd);
-
- pvd->acc = SYSMIS;
-
- return (struct per_var_data *) pvd;
-}
-
-static void
-first_update (struct per_var_data *pvd, double w UNUSED, double x)
-{
- struct per_var_data_simple *stat = (struct per_var_data_simple *) pvd;
-
- if (stat->acc == SYSMIS)
- stat->acc = x;
-}
-
-static double
-first_get (const struct per_var_data *pvd)
-{
- const struct per_var_data_simple *stat = (struct per_var_data_simple *) pvd;
-
- return stat->acc;
-}
-
-/* Table of cell_specs */
-const struct cell_spec cell_spec[n_MEANS_STATISTICS] = {
- {N_("Mean"), "MEAN", default_create, default_update, arithmean_get},
- {N_("N"), "COUNT", default_create, default_update, n_get},
- {N_("Std. Deviation"), "STDDEV", default_create, default_update, stddev_get},
-#if 0
- {N_("Median"), "MEDIAN", default_create, default_update, NULL},
- {N_("Group Median"), "GMEDIAN", default_create, default_update, NULL},
-#endif
- {N_("S.E. Mean"), "SEMEAN", default_create, default_update, semean_get},
- {N_("Sum"), "SUM", default_create, default_update, sum_get},
- {N_("Minimum"), "MIN", min_create, min_update, min_get},
- {N_("Maximum"), "MAX", max_create, max_update, max_get},
- {N_("Range"), "RANGE", range_create, range_update, range_get},
- {N_("Variance"), "VARIANCE", default_create, default_update, variance_get},
- {N_("Kurtosis"), "KURT", default_create, default_update, kurt_get},
- {N_("S.E. Kurt"), "SEKURT", default_create, default_update, sekurt_get},
- {N_("Skewness"), "SKEW", default_create, default_update, skew_get},
- {N_("S.E. Skew"), "SESKEW", default_create, default_update, seskew_get},
- {N_("First"), "FIRST", first_create, first_update, first_get},
- {N_("Last"), "LAST", last_create, last_update, last_get},
-#if 0
- {N_("Percent N"), "NPCT", default_create, default_update, NULL},
- {N_("Percent Sum"), "SPCT", default_create, default_update, NULL},
-#endif
- {N_("Harmonic Mean"), "HARMONIC", harmonic_create, harmonic_update, harmonic_get},
- {N_("Geom. Mean"), "GEOMETRIC", geometric_create, geometric_update, geometric_get}
-};
/* PSPP - a program for statistical analysis.
- Copyright (C) 2011, 2012, 2013, 2019 Free Software Foundation, Inc.
+ Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "data/format.h"
#include "data/variable.h"
-#include "libpspp/hmap.h"
-#include "libpspp/bt.h"
+#include "language/command.h"
+#include "language/lexer/lexer.h"
+#include "language/lexer/variable-parser.h"
-#include "output/pivot-table.h"
+#include "libpspp/misc.h"
+#include "libpspp/pool.h"
+
+#include "math/categoricals.h"
+#include "math/interaction.h"
+#include "math/moments.h"
-#include "means.h"
+#include "output/pivot-table.h"
+#include <math.h>
#include "gettext.h"
#define _(msgid) gettext (msgid)
#define N_(msgid) (msgid)
-/* A "cell" in this procedure represents a distinct value of the
- procedure's categorical variables, and a set of summary statistics
- of all cases which whose categorical variables have that set of
- values. For example, the dataset
- v1 v2 cat1 cat2
- 100 202 0 1
- 100 202 0 2
- 100 202 1 0
- 100 202 0 1
+struct means;
+struct per_var_data
+{
+ void **cell_stats;
+ struct moments1 *mom;
+};
- has three cells in layer 0 and two cells in layer 1 in addition
- to a "grand summary" cell to which all (non-missing) cases
- contribute.
- The cells form a n-ary tree structure with the "grand summary"
- cell at the root.
- */
-struct cell
+typedef void *stat_create (struct pool *pool);
+typedef void stat_update (void *stat, double w, double x);
+typedef double stat_get (const struct per_var_data *, void *aux);
+
+struct cell_spec
{
- struct hmap_node hmap_node; /* Element in hash table. */
- struct bt_node bt_node; /* Element in binary tree */
+ /* Printable title for output */
+ const char *title;
- struct cell_container children;
+ /* Keyword for syntax */
+ const char *keyword;
- /* The level of the subtotal to which this cell pertains, or
- -1 if it does not pertain to a subtotal. */
- int subtotal;
+ stat_create *sc;
+ stat_update *su;
+ stat_get *sd;
+};
- /* The statistics to be calculated for the cell. */
- struct per_var_data **stat;
+struct harmonic_mean
+{
+ double rsum;
+ double n;
+};
- /* The parent of this cell, or NULL if this is the root cell. */
- struct cell *parent_cell;
+static void *
+harmonic_create (struct pool *pool)
+{
+ struct harmonic_mean *hm = pool_alloc (pool, sizeof *hm);
- int n_subtotals;
- struct cell_container *subtotals;
+ hm->rsum = 0;
+ hm->n = 0;
+
+ return hm;
+}
- int n_values;
- union value values[1]; /* The value(s). */
-};
static void
-dump_cell (const struct cell *cell, int level)
+harmonic_update (void *stat, double w, double x)
+{
+ struct harmonic_mean *hm = stat;
+ hm->rsum += w / x;
+ hm->n += w;
+}
+
+
+static double
+harmonic_get (const struct per_var_data *pvd UNUSED, void *stat)
+{
+ struct harmonic_mean *hm = stat;
+
+ return hm->n / hm->rsum;
+}
+
+\f
+
+struct geometric_mean
+{
+ double prod;
+ double n;
+};
+
+
+static void *
+geometric_create (struct pool *pool)
{
- printf ("%p: ", cell);
- for (int i = 0; i < cell->n_values; ++i)
- printf ("%g; ", cell->values[i].f);
- // printf ("--- Count: %g", cell->count);
- // printf ("--- N Subtotals: %d", cell->n_subtotals);
- printf ("--- Level: %d", level);
- printf ("--- Subtotal: %d", cell->subtotal);
- printf ("--- Parent: %p", cell->parent_cell);
- printf ("\n");
+ struct geometric_mean *gm = pool_alloc (pool, sizeof *gm);
+
+ gm->prod = 1.0;
+ gm->n = 0;
+
+ return gm;
}
+
static void
-dump_tree (const struct cell *cell, int level)
+geometric_update (void *stat, double w, double x)
{
- struct cell *sub_cell;
- BT_FOR_EACH (sub_cell, struct cell, bt_node, &cell->children.bt)
- {
- dump_tree (sub_cell, level + 1);
- }
+ struct geometric_mean *gm = stat;
+ gm->prod *= pow (x, w);
+ gm->n += w;
+}
- for (int i = 0; i < cell->n_subtotals; ++i)
- {
- struct cell_container *container = cell->subtotals + i;
- struct cell *scell;
- BT_FOR_EACH (scell, struct cell, bt_node, &container->bt)
- {
- dump_cell (scell, level);
- }
- }
- dump_cell (cell, level);
+static double
+geometric_get (const struct per_var_data *pvd UNUSED, void *stat)
+{
+ struct geometric_mean *gm = stat;
+
+ return pow (gm->prod, 1.0 / gm->n);
}
-struct instance
+\f
+
+static double
+sum_get (const struct per_var_data *pvd, void *stat UNUSED)
{
- struct hmap_node hmap_node; /* Element in hash table. */
- struct bt_node bt_node; /* Element in binary tree */
+ double n, mean;
- /* A unique, consecutive, zero based index identifying this
- instance. */
- int index;
+ moments1_calculate (pvd->mom, &n, &mean, 0, 0, 0);
- /* The top level value of this instance. */
- union value value;
-};
+ return mean * n;
+}
-static void
-dump_layer (const struct layer *layer)
+
+static double
+n_get (const struct per_var_data *pvd, void *stat UNUSED)
{
- printf ("Layer: %p; fv0: %s; N %ld\n", layer,
- layer->n_factor_vars
- ? var_get_name (layer->factor_vars[0])
- : "(null)",
- hmap_count (&layer->instances.map)
- );
+ double n;
- struct instance *inst;
- BT_FOR_EACH (inst, struct instance, bt_node, &layer->instances.bt)
- {
- printf ("Val %g; Index %d\n", inst->value.f, inst->index);
- }
- printf ("\n");
+ moments1_calculate (pvd->mom, &n, 0, 0, 0, 0);
+
+ return n;
}
+static double
+arithmean_get (const struct per_var_data *pvd, void *stat UNUSED)
+{
+ double n, mean;
+
+ moments1_calculate (pvd->mom, &n, &mean, 0, 0, 0);
+
+ return mean;
+}
-/* Generate a hash based on the values of the N variables in
- the array VARS which are taken from the case C. */
-static size_t
-generate_hash (const struct ccase *c, int n,
- const struct variable * const * vars)
+static double
+variance_get (const struct per_var_data *pvd, void *stat UNUSED)
{
- size_t hash = 0;
- for (int i = 0; i < n; ++i)
- {
- const struct variable *var = vars[i];
- const union value *vv = case_data (c, var);
- int width = var_get_width (var);
- hash = value_hash (vv, width, hash);
- }
+ double n, mean, variance;
- return hash;
+ moments1_calculate (pvd->mom, &n, &mean, &variance, 0, 0);
+
+ return variance;
}
-/* Create a cell based on the N variables in the array VARS,
- which are indeces into the case C.
- The caller is responsible for destroying this cell when
- no longer needed. */
-static struct cell *
-generate_cell (const struct means *means, const struct ccase *c, int n,
- const struct variable * const * vars)
+
+static double
+stddev_get (const struct per_var_data *pvd, void *stat)
{
- struct cell *cell = xzalloc ((sizeof *cell)
- + (n - 1) * sizeof (union value));
- cell->subtotal = -1;
- cell->n_values = n;
- for (int i = 0; i < n; ++i)
- {
- const struct variable *var = vars[i];
- const union value *vv = case_data (c, var);
- int width = var_get_width (var);
- value_clone (&cell->values[i], vv, width);
- }
+ return sqrt (variance_get (pvd, stat));
+}
- hmap_init (&cell->children.map);
- cell->stat = xcalloc (means->n_statistics, sizeof *cell->stat);
- for (int stat = 0; stat < means->n_statistics; ++stat)
- {
- stat_create *sc = cell_spec[means->statistics[stat]].sc;
+\f
- cell->stat[stat] = sc (means->pool);
- }
+static double
+skew_get (const struct per_var_data *pvd, void *stat UNUSED)
+{
+ double skew;
- return cell;
+ moments1_calculate (pvd->mom, NULL, NULL, NULL, &skew, 0);
+
+ return skew;
}
+static double
+sekurt_get (const struct per_var_data *pvd, void *stat UNUSED)
+{
+ double n;
+
+ moments1_calculate (pvd->mom, &n, NULL, NULL, NULL, NULL);
+
+ return calc_sekurt (n);
+}
-/* If a cell based on the N variables in the array VARS,
- which are indeces into the case C and whose hash is HASH,
- exists in HMAP, then return that cell.
- Otherwise, return NULL. */
-static struct cell *
-lookup_cell (struct hmap *hmap, size_t hash,
- const struct ccase *c,
- int n, const struct variable * const * vars)
+static double
+seskew_get (const struct per_var_data *pvd, void *stat UNUSED)
{
- struct cell *fcol = NULL;
- HMAP_FOR_EACH_WITH_HASH (fcol, struct cell, hmap_node, hash, hmap)
- {
- bool match = true;
- for (int i = 0; i < n; ++i)
- {
- const struct variable *var = vars[i];
- const union value *vv = case_data (c, var);
- int width = var_get_width (var);
- if (!value_equal (vv, &fcol->values[i], width))
- {
- match = false;
- break;
- }
- }
- if (match)
- return fcol;
- }
- return NULL;
+ double n;
+
+ moments1_calculate (pvd->mom, &n, NULL, NULL, NULL, NULL);
+
+ return calc_seskew (n);
}
+static double
+kurt_get (const struct per_var_data *pvd, void *stat UNUSED)
+{
+ double kurt;
+
+ moments1_calculate (pvd->mom, NULL, NULL, NULL, NULL, &kurt);
+
+ return kurt;
+}
-/* A comparison function used to sort cells in a binary tree. */
-static int
-my_compare_func (const struct bt_node *a,
- const struct bt_node *b,
- const void *aux)
+static double
+semean_get (const struct per_var_data *pvd, void *stat UNUSED)
{
- const struct cell *fa = BT_DATA (a, struct cell, bt_node);
- const struct cell *fb = BT_DATA (b, struct cell, bt_node);
+ double n, var;
- const struct variable *const *cv = aux;
+ moments1_calculate (pvd->mom, &n, NULL, &var, NULL, NULL);
- int vidx = fa->n_values - 1;
- assert (fa->n_values == fb->n_values);
+ return sqrt (var / n);
+}
+
+\f
+
+static void *
+min_create (struct pool *pool)
+{
+ double *r = pool_alloc (pool, sizeof *r);
+
+ *r = DBL_MAX;
- // FIXME: Consider whether other layers need to be compared.
- int r = value_compare_3way (&fa->values[vidx],
- &fb->values[vidx],
- var_get_width (cv[vidx]));
return r;
}
-/* A comparison function used to sort cells in a binary tree. */
-static int
-my_other_compare_func (const struct bt_node *a,
- const struct bt_node *b,
- const void *aux)
+static void
+min_update (void *stat, double w UNUSED, double x)
{
- const struct instance *fa = BT_DATA (a, struct instance, bt_node);
- const struct instance *fb = BT_DATA (b, struct instance, bt_node);
+ double *r = stat;
- const struct variable *var = aux;
+ if (x < *r)
+ *r = x;
+}
- int r = value_compare_3way (&fa->value,
- &fb->value,
- var_get_width (var));
- return r;
+static double
+min_get (const struct per_var_data *pvd UNUSED, void *stat)
+{
+ double *r = stat;
+
+ return *r;
}
+static void *
+max_create (struct pool *pool)
+{
+ double *r = pool_alloc (pool, sizeof *r);
-static void arrange_cells (struct cell *cell, const struct mtable *table);
+ *r = -DBL_MAX;
+ return r;
+}
-/* Walk the tree starting at CELL, creating and populating the BT for
- each cell. Also assigns the "rank", "parent_cell" and "subtotal" members
- of each cell.*/
static void
-arrange_cell (struct cell_container *container,
- struct cell *cell, const struct mtable *table,
- int subtotal)
+max_update (void *stat, double w UNUSED, double x)
{
- const struct variable **control_vars = table->control_vars;
- struct bt *bt = &container->bt;
- struct hmap *map = &container->map;
- bt_init (bt, my_compare_func, control_vars);
+ double *r = stat;
- struct cell *scell;
- HMAP_FOR_EACH (scell, struct cell, hmap_node, map)
- {
- scell->parent_cell = cell;
- scell->subtotal = subtotal;
- bt_insert (bt, &scell->bt_node);
- arrange_cells (scell, table);
- }
+ if (x > *r)
+ *r = x;
+}
- if (cell->n_values > 0 && cell->subtotal == -1)
- {
- struct layer *layer = table->layers[cell->n_values - 1];
-
- const struct variable *var = control_vars[cell->n_values - 1];
- int width = var_get_width (var);
- unsigned int hash
- = value_hash (&cell->values[cell->n_values - 1], width, 0);
-
-
- struct instance *inst = NULL;
- struct instance *next = NULL;
- HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next, struct instance, hmap_node,
- hash, &layer->instances.map)
- {
- if (value_equal (&inst->value,
- &cell->values[cell->n_values - 1],
- width))
- {
- break;
- }
- }
-
- if (!inst)
- {
- inst = xzalloc (sizeof *inst);
- inst->index = -1;
- value_copy (&inst->value, &cell->values[cell->n_values -1],
- width);
- hmap_insert (&layer->instances.map, &inst->hmap_node, hash);
- }
- }
+static double
+max_get (const struct per_var_data *pvd UNUSED, void *stat)
+{
+ double *r = stat;
+
+ return *r;
+}
+
+\f
+
+struct range
+{
+ double min;
+ double max;
+};
+
+static void *
+range_create (struct pool *pool)
+{
+ struct range *r = pool_alloc (pool, sizeof *r);
+
+ r->min = DBL_MAX;
+ r->max = -DBL_MAX;
+
+ return r;
}
-/* Arrange the children and then all the subtotals. */
static void
-arrange_cells (struct cell *cell, const struct mtable *table)
+range_update (void *stat, double w UNUSED, double x)
{
- arrange_cell (&cell->children, cell, table, -1);
+ struct range *r = stat;
- for (int s = 0; s < cell->n_subtotals; ++ s)
- {
- arrange_cell (cell->subtotals + s, cell, table, s);
- }
+ if (x > r->max)
+ r->max = x;
+
+ if (x < r->min)
+ r->min = x;
}
+static double
+range_get (const struct per_var_data *pvd UNUSED, void *stat)
+{
+ struct range *r = stat;
+
+ return r->max - r->min;
+}
\f
-/* If the top level value in CELL, has an instance in LAYER, then
- return that instance. Otherwise return NULL. */
-static const struct instance *
-lookup_instance (const struct layer *layer, const struct cell *cell)
-{
- const struct variable *var = layer->factor_vars[0];
- const union value *val = cell->values + cell->n_values - 1;
- int width = var_get_width (var);
- unsigned int hash = value_hash (val, width, 0);
- struct instance *inst = NULL;
- struct instance *next;
- HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next,
- struct instance, hmap_node,
- hash, &layer->instances.map)
+static void *
+last_create (struct pool *pool)
+{
+ double *l = pool_alloc (pool, sizeof *l);
+
+ return l;
+}
+
+static void
+last_update (void *stat, double w UNUSED, double x)
+{
+ double *l = stat;
+
+ *l = x;
+}
+
+static double
+last_get (const struct per_var_data *pvd UNUSED, void *stat)
+{
+ double *l = stat;
+
+ return *l;
+}
+
+
+static void *
+first_create (struct pool *pool)
+{
+ double *f = pool_alloc (pool, sizeof *f);
+
+ *f = SYSMIS;
+
+ return f;
+}
+
+static void
+first_update (void *stat, double w UNUSED, double x)
+{
+ double *f = stat;
+
+ if (*f == SYSMIS)
+ *f = x;
+}
+
+static double
+first_get (const struct per_var_data *pvd UNUSED, void *stat)
+{
+ double *f = stat;
+
+ return *f;
+}
+
+enum
+ {
+ MEANS_MEAN = 0,
+ MEANS_N,
+ MEANS_STDDEV
+ };
+
+/* Table of cell_specs */
+static const struct cell_spec cell_spec[] = {
+ {N_("Mean"), "MEAN", NULL, NULL, arithmean_get},
+ {N_("N"), "COUNT", NULL, NULL, n_get},
+ {N_("Std. Deviation"), "STDDEV", NULL, NULL, stddev_get},
+#if 0
+ {N_("Median"), "MEDIAN", NULL, NULL, NULL},
+ {N_("Group Median"), "GMEDIAN", NULL, NULL, NULL},
+#endif
+ {N_("S.E. Mean"), "SEMEAN", NULL, NULL, semean_get},
+ {N_("Sum"), "SUM", NULL, NULL, sum_get},
+ {N_("Min"), "MIN", min_create, min_update, min_get},
+ {N_("Max"), "MAX", max_create, max_update, max_get},
+ {N_("Range"), "RANGE", range_create, range_update, range_get},
+ {N_("Variance"), "VARIANCE", NULL, NULL, variance_get},
+ {N_("Kurtosis"), "KURT", NULL, NULL, kurt_get},
+ {N_("S.E. Kurt"), "SEKURT", NULL, NULL, sekurt_get},
+ {N_("Skewness"), "SKEW", NULL, NULL, skew_get},
+ {N_("S.E. Skew"), "SESKEW", NULL, NULL, seskew_get},
+ {N_("First"), "FIRST", first_create, first_update, first_get},
+ {N_("Last"), "LAST", last_create, last_update, last_get},
+#if 0
+ {N_("Percent N"), "NPCT", NULL, NULL, NULL},
+ {N_("Percent Sum"), "SPCT", NULL, NULL, NULL},
+#endif
+ {N_("Harmonic Mean"), "HARMONIC", harmonic_create, harmonic_update, harmonic_get},
+ {N_("Geom. Mean"), "GEOMETRIC", geometric_create, geometric_update, geometric_get}
+};
+
+#define n_C (sizeof (cell_spec) / sizeof (struct cell_spec))
+
+
+struct summary
+{
+ casenumber missing;
+ casenumber non_missing;
+};
+
+
+struct layer
+{
+ size_t n_factor_vars;
+ const struct variable **factor_vars;
+};
+
+/* The thing parsed after TABLES= */
+struct mtable
+{
+ size_t n_dep_vars;
+ const struct variable **dep_vars;
+
+ int n_layers;
+ struct layer *layers;
+
+ struct interaction **interactions;
+ struct summary *summary;
+
+ int ii;
+
+ struct categoricals *cats;
+};
+
+struct means
+{
+ const struct dictionary *dict;
+
+ struct mtable *table;
+ size_t n_tables;
+
+ /* Missing value class for categorical variables */
+ enum mv_class exclude;
+
+ /* Missing value class for dependent variables */
+ enum mv_class dep_exclude;
+
+ bool listwise_exclude;
+
+ /* an array indicating which statistics are to be calculated */
+ int *cells;
+
+ /* Size of cells */
+ int n_cells;
+
+ /* Pool on which cell functions may allocate data */
+ struct pool *pool;
+};
+
+
+static void
+run_means (struct means *cmd, struct casereader *input,
+ const struct dataset *ds);
+
+
+
+static bool
+parse_means_table_syntax (struct lexer *lexer, const struct means *cmd, struct mtable *table)
+{
+ table->ii = 0;
+ table->n_layers = 0;
+ table->layers = NULL;
+ table->interactions = NULL;
+
+ /* Dependent variable (s) */
+ if (!parse_variables_const_pool (lexer, cmd->pool, cmd->dict,
+ &table->dep_vars, &table->n_dep_vars,
+ PV_NO_DUPLICATE | PV_NUMERIC))
+ return false;
+
+ /* Factor variable (s) */
+ while (lex_match (lexer, T_BY))
{
- if (value_equal (val, &inst->value, width))
- break;
+ table->n_layers++;
+ table->layers =
+ pool_realloc (cmd->pool, table->layers,
+ sizeof (*table->layers) * table->n_layers);
+
+ if (!parse_variables_const_pool
+ (lexer, cmd->pool, cmd->dict,
+ &table->layers[table->n_layers - 1].factor_vars,
+ &table->layers[table->n_layers - 1].n_factor_vars,
+ PV_NO_DUPLICATE))
+ return false;
}
- return inst;
+
+ /* There is always at least one layer.
+ However the final layer is the total, and not
+ normally considered by the user as a
+ layer.
+ */
+
+ table->n_layers++;
+ table->layers =
+ pool_realloc (cmd->pool, table->layers,
+ sizeof (*table->layers) * table->n_layers);
+ table->layers[table->n_layers - 1].factor_vars = NULL;
+ table->layers[table->n_layers - 1].n_factor_vars = 0;
+
+ return true;
}
-static void
-populate_table (const struct cell *cell, const struct means *means,
- struct pivot_table *table, int level)
+/* Match a variable.
+ If the match succeeds, the variable will be placed in VAR.
+ Returns true if successful */
+static bool
+lex_is_variable (struct lexer *lexer, const struct dictionary *dict,
+ int n)
{
- /* It is easier to overallocate this table by 2, than to adjust the
- logic when assigning its contents, and it is cheap to do so. */
- size_t *indexes = xcalloc (table->n_dimensions + 2, sizeof *indexes);
- for (int s = 0; s < means->n_statistics; ++s)
+ const char *tstr;
+ if (lex_next_token (lexer, n) != T_ID)
+ return false;
+
+ tstr = lex_next_tokcstr (lexer, n);
+
+ if (NULL == dict_lookup_var (dict, tstr) )
+ return false;
+
+ return true;
+}
+
+
+int
+cmd_means (struct lexer *lexer, struct dataset *ds)
+{
+ int t;
+ int i;
+ int l;
+ struct means means;
+ bool more_tables = true;
+
+ means.pool = pool_create ();
+
+ means.exclude = MV_ANY;
+ means.dep_exclude = MV_ANY;
+ means.listwise_exclude = false;
+ means.table = NULL;
+ means.n_tables = 0;
+
+ means.dict = dataset_dict (ds);
+
+ means.n_cells = 3;
+ means.cells = pool_calloc (means.pool, means.n_cells, sizeof (*means.cells));
+
+
+ /* The first three items (MEAN, COUNT, STDDEV) are the default */
+ for (i = 0; i < 3; ++i)
+ means.cells[i] = i;
+
+
+ /* Optional TABLES = */
+ if (lex_match_id (lexer, "TABLES"))
+ {
+ if (! lex_force_match (lexer, T_EQUALS))
+ goto error;
+ }
+
+
+ more_tables = true;
+ /* Parse the "tables" */
+ while (more_tables)
{
- bool kludge = false; // FIXME: Remove this for production.
- indexes[0] = s;
- int stat = means->statistics[s];
- if (cell->subtotal != -1)
+ means.n_tables ++;
+ means.table = pool_realloc (means.pool, means.table, means.n_tables * sizeof (*means.table));
+
+ if (! parse_means_table_syntax (lexer, &means,
+ &means.table[means.n_tables - 1]))
{
- // for (int i = 1; i < table->n_dimensions; ++i)
- {
- int i = 1;
- const struct layer *layer
- = means->table->layers[table->n_dimensions - 1 - i];
- assert (layer);
- const struct instance *inst = lookup_instance (layer, cell);
- assert (inst);
- indexes[i] = inst->index;
- }
- int i = 2;
- const struct layer *layer
- = means->table->layers[table->n_dimensions - 1 - i];
- indexes[i] = hmap_count (&layer->instances.map);
- kludge = true;
+ goto error;
}
- else if (hmap_is_empty (&cell->children.map))
+
+ /* Look ahead to see if there are more tables to be parsed */
+ more_tables = false;
+ if ( T_SLASH == lex_next_token (lexer, 0) )
{
- const struct cell *pc = cell;
- for (int i = 1; i < table->n_dimensions; ++i)
+ if (lex_is_variable (lexer, means.dict, 1) )
{
- const struct layer *layer
- = means->table->layers[table->n_dimensions - 1 - i];
-
- const struct instance *inst = lookup_instance (layer, pc);
- assert (inst);
- indexes[i] = inst->index;
- pc = pc->parent_cell;
+ more_tables = true;
+ lex_match (lexer, T_SLASH);
}
- kludge = true;
}
- else
+ }
+
+ /* /MISSING subcommand */
+ while (lex_token (lexer) != T_ENDCMD)
+ {
+ lex_match (lexer, T_SLASH);
+
+ if (lex_match_id (lexer, "MISSING"))
{
- const struct layer *layer;
- int i = 0;
- for (int st = 0; st < cell->n_subtotals; ++st)
+ /*
+ If no MISSING subcommand is specified, each combination of
+ a dependent variable and categorical variables is handled
+ separately.
+ */
+ lex_match (lexer, T_EQUALS);
+ if (lex_match_id (lexer, "INCLUDE"))
{
- layer = means->table->layers[table->n_dimensions - i - 2];
- indexes[++i] = hmap_count (&layer->instances.map);
+ /*
+ Use the subcommand "/MISSING=INCLUDE" to include user-missing
+ values in the analysis.
+ */
+
+ means.exclude = MV_SYSTEM;
+ means.dep_exclude = MV_SYSTEM;
}
- layer = means->table->layers[table->n_dimensions - i - 2];
- indexes[++i] = hmap_count (&layer->instances.map);
- if (++i < table->n_dimensions)
+ else if (lex_match_id (lexer, "TABLE"))
+ /*
+ This is the default. (I think).
+ Every case containing a complete set of variables for a given
+ table. If any variable, categorical or dependent for in a table
+ is missing (as defined by what?), then that variable will
+ be dropped FOR THAT TABLE ONLY.
+ */
{
- layer = means->table->layers[table->n_dimensions - i - 1];
- const struct instance *inst = lookup_instance (layer, cell);
- assert (inst);
- indexes[i] = inst->index;
+ means.listwise_exclude = true;
+ }
+ else if (lex_match_id (lexer, "DEPENDENT"))
+ /*
+ Use the command "/MISSING=DEPENDENT" to
+ include user-missing values for the categorical variables,
+ while excluding them for the dependent variables.
+
+ Cases are dropped only when user-missing values
+ appear in dependent variables. User-missing
+ values for categorical variables are treated according to
+ their face value.
+
+ Cases are ALWAYS dropped when System Missing values appear
+ in the categorical variables.
+ */
+ {
+ means.dep_exclude = MV_ANY;
+ means.exclude = MV_SYSTEM;
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ goto error;
}
- kludge = true;
}
+ else if (lex_match_id (lexer, "CELLS"))
+ {
+ lex_match (lexer, T_EQUALS);
- if (kludge)
+ /* The default values become overwritten */
+ means.n_cells = 0;
+ while (lex_token (lexer) != T_ENDCMD
+ && lex_token (lexer) != T_SLASH)
+ {
+ int k = 0;
+ if (lex_match (lexer, T_ALL))
+ {
+ int x;
+ means.cells =
+ pool_realloc (means.pool, means.cells,
+ (means.n_cells += n_C) * sizeof (*means.cells));
+
+ for (x = 0; x < n_C; ++x)
+ means.cells[means.n_cells - (n_C - 1 - x) - 1] = x;
+ }
+ else if (lex_match_id (lexer, "NONE"))
+ {
+ /* Do nothing */
+ }
+ else if (lex_match_id (lexer, "DEFAULT"))
+ {
+ means.cells =
+ pool_realloc (means.pool, means.cells,
+ (means.n_cells += 3) * sizeof (*means.cells));
+
+ means.cells[means.n_cells - 2 - 1] = MEANS_MEAN;
+ means.cells[means.n_cells - 1 - 1] = MEANS_N;
+ means.cells[means.n_cells - 0 - 1] = MEANS_STDDEV;
+ }
+ else
+ {
+ for (; k < n_C; ++k)
+ {
+ if (lex_match_id (lexer, cell_spec[k].keyword))
+ {
+ means.cells =
+ pool_realloc (means.pool, means.cells,
+ ++means.n_cells * sizeof (*means.cells));
+
+ means.cells[means.n_cells - 1] = k;
+ break;
+ }
+ }
+ }
+ if (k >= n_C)
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
+ }
+ }
+ else
{
- stat_get *sg = cell_spec[stat].sd;
- pivot_table_put (table, indexes, table->n_dimensions,
- pivot_value_new_number (sg (cell->stat[s])));
+ lex_error (lexer, NULL);
+ goto error;
}
}
- free (indexes);
- const struct bt *bt = &cell->children.bt;
- struct cell *child = NULL;
- BT_FOR_EACH (child, struct cell, bt_node, bt)
- {
- populate_table (child, means, table, level + 1);
- }
- // printf ("There are %d subtotals\n", cell->n_subtotals);
- for (int i = 0; i < cell->n_subtotals; ++i)
+
+ for (t = 0; t < means.n_tables; ++t)
{
- // printf ("aa\n");
- const struct cell_container *st = cell->subtotals + i;
- struct cell *scell;
- HMAP_FOR_EACH (scell, struct cell, hmap_node, &st->map)
- {
- // printf ("%s:%d xxx\n", __FILE__, __LINE__);
- /* dump_cell (scell, 0); */
- populate_table (scell, means, table, level + 1);
- }
- // printf ("zz\n");
- }
- // printf ("ooo\n");
-}
+ struct mtable *table = &means.table[t];
-static void
-ann_dim (struct pivot_table *t, int d, size_t *indeces)
-{
- if (d < 0)
- return;
- char label[10];
+ table->interactions =
+ pool_calloc (means.pool, table->n_layers, sizeof (*table->interactions));
- const struct pivot_dimension *dim = t->dimensions[d];
- for (int l = 0; l < dim->n_leaves; ++l)
- {
- indeces[d] = l;
- int x;
- for (x = 0; x < t->n_dimensions; ++x)
+ table->summary =
+ pool_calloc (means.pool, table->n_dep_vars * table->n_layers, sizeof (*table->summary));
+
+ for (l = 0; l < table->n_layers; ++l)
{
- label[x] = '0' + indeces[x];
+ int v;
+ const struct layer *lyr = &table->layers[l];
+ const int n_vars = lyr->n_factor_vars;
+ table->interactions[l] = interaction_create (NULL);
+ for (v = 0; v < n_vars ; ++v)
+ {
+ interaction_add_variable (table->interactions[l],
+ lyr->factor_vars[v]);
+ }
}
- label[x] = '\0';
- pivot_table_put (t, indeces, t->n_dimensions,
- pivot_value_new_user_text (label, x));
- ann_dim (t, d - 1, indeces);
}
-}
-static void
-annotate_table (struct pivot_table *t)
-{
- size_t *indeces = xcalloc (t->n_dimensions, sizeof *indeces);
+ {
+ struct casegrouper *grouper;
+ struct casereader *group;
+ bool ok;
- for (int d = 0; d < t->n_dimensions; ++d)
- {
- ann_dim (t, d, indeces);
- }
- free (indeces);
-}
+ grouper = casegrouper_create_splits (proc_open (ds), means.dict);
+ while (casegrouper_get_next_group (grouper, &group))
+ {
+ run_means (&means, group, ds);
+ }
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
+ }
-static void
-create_table_structure (const struct mtable *mt, struct pivot_table *table)
-{
- for (int l = mt->n_layers -1; l >=0 ; --l)
+ for (t = 0; t < means.n_tables; ++t)
{
- const struct layer *layer = mt->layers[l];
- assert (layer->n_factor_vars > 0);
- const struct variable *var = layer->factor_vars[0];
- struct pivot_dimension *dim_layer
- = pivot_dimension_create (table, PIVOT_AXIS_ROW,
- var_to_string (var));
- dim_layer->root->show_label = true;
-
- {
- struct instance *inst = NULL;
- BT_FOR_EACH (inst, struct instance, bt_node, &layer->instances.bt)
+ int l;
+ struct mtable *table = &means.table[t];
+ if (table->interactions)
+ for (l = 0; l < table->n_layers; ++l)
{
- struct substring space = SS_LITERAL_INITIALIZER ("\t ");
- struct string str;
- ds_init_empty (&str);
- var_append_value_name (var,
- &inst->value,
- &str);
+ interaction_destroy (table->interactions[l]);
+ }
+ }
- ds_ltrim (&str, space);
+ pool_destroy (means.pool);
+ return CMD_SUCCESS;
- pivot_category_create_leaf
- (dim_layer->root,
- pivot_value_new_text (ds_cstr (&str)));
+ error:
- ds_destroy (&str);
+ for (t = 0; t < means.n_tables; ++t)
+ {
+ int l;
+ struct mtable *table = &means.table[t];
+ if (table->interactions)
+ for (l = 0; l < table->n_layers; ++l)
+ {
+ interaction_destroy (table->interactions[l]);
}
- }
-
- pivot_category_create_leaf
- (dim_layer->root,
- pivot_value_new_text ("Total"));
}
+
+ pool_destroy (means.pool);
+ return CMD_FAILURE;
}
-void
-means_shipout (const struct mtable *mt, const struct means *means)
+
+static bool
+is_missing (const struct means *cmd,
+ const struct variable *dvar,
+ const struct interaction *iact,
+ const struct ccase *c)
{
- struct pivot_table *table = pivot_table_create (N_("Report"));
+ if ( interaction_case_is_missing (iact, c, cmd->exclude) )
+ return true;
- struct pivot_dimension *dim_cells =
- pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"));
- for (int i = 0; i < means->n_statistics; ++i)
- {
- const struct cell_spec *cs = cell_spec + means->statistics[i];
- pivot_category_create_leaf
- (dim_cells->root,
- pivot_value_new_text (gettext (cs->title)));
- }
+ if (var_is_value_missing (dvar,
+ case_data (c, dvar),
+ cmd->dep_exclude))
+ return true;
- create_table_structure (mt, table);
+ return false;
+}
- populate_table (mt->root_cell, means, table, 0);
- // pivot_table_dump (table, 0);
- // annotate_table (table);
+static void output_case_processing_summary (const struct mtable *,
+ const struct variable *wv);
- pivot_table_submit (table);
-}
+static void output_report (const struct means *, int, const struct mtable *);
-\f
-static bool
-missing_for_layer (const struct layer *layer, const struct ccase *c)
+struct per_cat_data
{
- if (layer->n_factor_vars == 0)
- return false;
-
- const struct variable *var = layer->factor_vars[0];
- const union value *vv = case_data (c, var);
+ struct per_var_data *pvd;
- return var_is_value_missing (var, vv, MV_ANY);
-}
+ bool warn;
+};
-static bool
-missing_for_any_layer (const struct mtable *table,
- int startx,
- const struct ccase *c)
+static void
+destroy_n (const void *aux1 UNUSED, void *aux2, void *user_data)
{
- bool miss = false;
- for (int l = startx; l < table->n_layers; ++l)
+ struct mtable *table = aux2;
+ int v;
+ struct per_cat_data *per_cat_data = user_data;
+ struct per_var_data *pvd = per_cat_data->pvd;
+
+ for (v = 0; v < table->n_dep_vars; ++v)
{
- miss = missing_for_layer (table->layers[l], c);
+ struct per_var_data *pp = &pvd[v];
+ moments1_destroy (pp->mom);
}
-
- if (miss)
- return true;
-
- return false;
}
-static struct cell *
-update_map_from_data (const struct means *means,
- const struct mtable *table,
- int startx,
- struct hmap *map,
- const struct ccase *c,
- int start_var,
- int n_vars,
- double weight)
+static void *
+create_n (const void *aux1, void *aux2)
{
- const struct variable **cv = table->control_vars + start_var;
- if (! missing_for_any_layer (table, startx, c))
+ int i, v;
+ const struct means *means = aux1;
+ struct mtable *table = aux2;
+ struct per_cat_data *per_cat_data = pool_malloc (means->pool, sizeof *per_cat_data);
+
+ struct per_var_data *pvd = pool_calloc (means->pool, table->n_dep_vars, sizeof *pvd);
+
+ for (v = 0; v < table->n_dep_vars; ++v)
{
- const struct variable *dep_var = table->dep_vars[0];
+ enum moment maxmom = MOMENT_KURTOSIS;
+ struct per_var_data *pp = &pvd[v];
+
+ pp->cell_stats = pool_calloc (means->pool, means->n_cells, sizeof *pp->cell_stats);
- size_t hash = generate_hash (c, n_vars, cv);
- struct cell *fcol = lookup_cell (map, hash,
- c, n_vars, cv);
- if (!fcol)
+ for (i = 0; i < means->n_cells; ++i)
{
- fcol = generate_cell (means, c, n_vars, cv);
- fcol->n_subtotals = table->n_layers - 2 - start_var;
- if (fcol->n_subtotals < 0)
- fcol->n_subtotals = 0;
- fcol->subtotals = xcalloc (fcol->n_subtotals,
- sizeof *fcol->subtotals);
- for (int i = 0; i < fcol->n_subtotals; ++i)
+ int csi = means->cells[i];
+ const struct cell_spec *cs = &cell_spec[csi];
+ if (cs->sc)
{
- struct cell_container *c = fcol->subtotals + i;
- hmap_init (&c->map);
+ pp->cell_stats[i] = cs->sc (means->pool);
}
+ }
+ pp->mom = moments1_create (maxmom);
+ }
+
+
+ per_cat_data->pvd = pvd;
+ per_cat_data->warn = true;
+ return per_cat_data;
+}
- hmap_insert (map, &fcol->hmap_node, hash);
+static void
+update_n (const void *aux1, void *aux2, void *user_data, const struct ccase *c, double weight)
+{
+ int i;
+ int v = 0;
+ const struct means *means = aux1;
+ struct mtable *table = aux2;
+ struct per_cat_data *per_cat_data = user_data;
+
+ for (v = 0; v < table->n_dep_vars; ++v)
+ {
+ struct per_var_data *pvd = &per_cat_data->pvd[v];
+
+ const double x = case_data (c, table->dep_vars[v])->f;
+
+ for (i = 0; i < table->n_layers; ++i)
+ {
+ if ( is_missing (means, table->dep_vars[v],
+ table->interactions[i], c))
+ goto end;
}
- for (int stat = 0; stat < means->n_statistics; ++stat)
+ for (i = 0; i < means->n_cells; ++i)
{
- stat_update *su = cell_spec[means->statistics[stat]].su;
- su (fcol->stat[stat], weight, case_data (c, dep_var)->f);
+ const int csi = means->cells[i];
+ const struct cell_spec *cs = &cell_spec[csi];
+
+
+ if (cs->su)
+ cs->su (pvd->cell_stats[i],
+ weight, x);
}
- return fcol;
+ moments1_add (pvd->mom, x, weight);
+
+ end:
+ continue;
}
- return NULL;
}
+static void
+calculate_n (const void *aux1, void *aux2, void *user_data)
+{
+ int i;
+ int v = 0;
+ struct per_cat_data *per_cat_data = user_data;
+ const struct means *means = aux1;
+ struct mtable *table = aux2;
-void
+ for (v = 0; v < table->n_dep_vars; ++v)
+ {
+ struct per_var_data *pvd = &per_cat_data->pvd[v];
+ for (i = 0; i < means->n_cells; ++i)
+ {
+ int csi = means->cells[i];
+ const struct cell_spec *cs = &cell_spec[csi];
+
+ if (cs->su)
+ cs->sd (pvd, pvd->cell_stats[i]);
+ }
+ }
+}
+
+static void
run_means (struct means *cmd, struct casereader *input,
const struct dataset *ds UNUSED)
{
- struct mtable *table = cmd->table + 0;
+ int t;
+ const struct variable *wv = dict_get_weight (cmd->dict);
struct ccase *c;
struct casereader *reader;
- table->root_cell = generate_cell (cmd, 0, 0, 0);
- table->root_cell->n_subtotals = table->n_layers - 1;
- if (table->root_cell->n_subtotals < 0)
- table->root_cell->n_subtotals = 0;
- table->root_cell->subtotals
- = xcalloc (table->root_cell->n_subtotals,
- sizeof *table->root_cell->subtotals);
- for (int i = 0; i < table->root_cell->n_subtotals; ++i)
- {
- struct cell_container *c = table->root_cell->subtotals + i;
- hmap_init (&c->map);
- }
+ struct payload payload;
+ payload.create = create_n;
+ payload.update = update_n;
+ payload.calculate = calculate_n;
+ payload.destroy = destroy_n;
- table->control_vars
- = calloc (table->n_layers, sizeof *table->control_vars);
- for (int l = 0; l < table->n_layers; ++l)
+ for (t = 0; t < cmd->n_tables; ++t)
{
- const struct layer *layer = table->layers[l];
- if (layer->n_factor_vars > 0)
- table->control_vars[l] = layer->factor_vars[0];
+ struct mtable *table = &cmd->table[t];
+ table->cats
+ = categoricals_create (table->interactions,
+ table->n_layers, wv, cmd->exclude);
+
+ categoricals_set_payload (table->cats, &payload, cmd, table);
}
- struct cell *cell = table->root_cell;
- const struct variable *dep_var = table->dep_vars[0];
for (reader = input;
(c = casereader_read (reader)) != NULL; case_unref (c))
{
- const double weight = dict_get_case_weight (cmd->dict, c, NULL);
- if (! missing_for_any_layer (table, 0, c))
+ for (t = 0; t < cmd->n_tables; ++t)
{
- for (int stat = 0; stat < cmd->n_statistics; ++stat)
+ bool something_missing = false;
+ int v;
+ struct mtable *table = &cmd->table[t];
+
+ for (v = 0; v < table->n_dep_vars; ++v)
{
- stat_update *su = cell_spec[cmd->statistics[stat]].su;
- su (cell->stat[stat], weight, case_data (c, dep_var)->f);
+ int i;
+ for (i = 0; i < table->n_layers; ++i)
+ {
+ const bool missing =
+ is_missing (cmd, table->dep_vars[v],
+ table->interactions[i], c);
+ if (missing)
+ {
+ something_missing = true;
+ table->summary[v * table->n_layers + i].missing++;
+ }
+ else
+ table->summary[v * table->n_layers + i].non_missing++;
+ }
}
+ if ( something_missing && cmd->listwise_exclude)
+ continue;
+
+ categoricals_update (table->cats, c);
}
+ }
+ casereader_destroy (reader);
+
+ for (t = 0; t < cmd->n_tables; ++t)
+ {
+ struct mtable *table = &cmd->table[t];
+
+ categoricals_done (table->cats);
+ }
+
+
+ for (t = 0; t < cmd->n_tables; ++t)
+ {
+ int i;
+ const struct mtable *table = &cmd->table[t];
+
+ output_case_processing_summary (table, wv);
- for (int i = 0; i < cell->n_subtotals; ++i)
+ for (i = 0; i < table->n_layers; ++i)
{
- struct cell_container *container = cell->subtotals + i;
- update_map_from_data (cmd, table, 1, &container->map, c, 1, 1,
- weight);
+ output_report (cmd, i, table);
}
+ categoricals_destroy (table->cats);
+ }
- struct hmap *map = &cell->children.map;
- for (int l = 0; l < table->n_layers; ++l)
- {
- struct cell *cell
- = update_map_from_data (cmd, table, l, map, c,
- 0, l + 1, weight);
+}
- if (cell)
- map = &cell->children.map;
+
+
+static void
+output_case_processing_summary (const struct mtable *mt,
+ const struct variable *wv)
+{
+ struct pivot_table *table = pivot_table_create (
+ N_("Case Processing Summary"));
+ pivot_table_set_weight_var (table, wv);
+
+ pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
+ N_("N"), PIVOT_RC_COUNT,
+ N_("Percent"), PIVOT_RC_PERCENT);
+ pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Cases"),
+ N_("Included"), N_("Excluded"), N_("Total"))
+ ->root->show_label = true;
+
+ struct pivot_dimension *tables = pivot_dimension_create (
+ table, PIVOT_AXIS_ROW, N_("Tables"));
+
+ for (size_t v = 0; v < mt->n_dep_vars; ++v)
+ {
+ const struct variable *var = mt->dep_vars[v];
+ for (size_t i = 0; i < mt->n_layers; ++i)
+ {
+ const int row = v * mt->n_layers + i;
+ const struct interaction *iact = mt->interactions[i];
+
+ struct string str = DS_EMPTY_INITIALIZER;
+ ds_put_format (&str, "%s: ", var_to_string (var));
+ interaction_to_string (iact, &str);
+ int table_idx = pivot_category_create_leaf (
+ tables->root, pivot_value_new_user_text_nocopy (
+ ds_steal_cstr (&str)));
+
+ const struct summary *s = &mt->summary[row];
+ double n_total = s->missing + s->non_missing;
+ struct entry
+ {
+ int stat_idx;
+ int case_idx;
+ double x;
+ }
+ entries[] =
+ {
+ { 0, 0, s->non_missing },
+ { 1, 0, s->non_missing / n_total * 100.0 },
+ { 0, 1, s->missing },
+ { 1, 1, s->missing / n_total * 100.0 },
+ { 0, 2, n_total },
+ { 1, 2, 100.0 },
+ };
+
+ for (size_t j = 0; j < sizeof entries / sizeof *entries; j++)
+ {
+ const struct entry *e = &entries[j];
+ pivot_table_put3 (table, e->stat_idx, e->case_idx, table_idx,
+ pivot_value_new_number (e->x));
+ }
}
}
- casereader_destroy (reader);
- arrange_cells (table->root_cell, table);
- for (int l = 0; l < table->n_layers; ++l)
+ pivot_table_submit (table);
+}
+
+static void
+create_interaction_dimensions (struct pivot_table *table,
+ const struct categoricals *cats,
+ const struct interaction *iact)
+{
+ for (size_t i = iact->n_vars; i-- > 0; )
+ {
+ const struct variable *var = iact->vars[i];
+ struct pivot_dimension *d = pivot_dimension_create__ (
+ table, PIVOT_AXIS_ROW, pivot_value_new_variable (var));
+ d->root->show_label = true;
+
+ size_t n;
+ union value *values = categoricals_get_var_values (cats, var, &n);
+ for (size_t j = 0; j < n; j++)
+ pivot_category_create_leaf (
+ d->root, pivot_value_new_var_value (var, &values[j]));
+ }
+}
+
+static void
+output_report (const struct means *cmd, int iact_idx,
+ const struct mtable *mt)
+{
+ struct pivot_table *table = pivot_table_create (N_("Report"));
+ table->omit_empty = true;
+
+ struct pivot_dimension *statistics = pivot_dimension_create (
+ table, PIVOT_AXIS_COLUMN, N_("Statistics"));
+ for (int i = 0; i < cmd->n_cells; ++i)
+ pivot_category_create_leaf (
+ statistics->root, pivot_value_new_text (cell_spec[cmd->cells[i]].title));
+
+ const struct interaction *iact = mt->interactions[iact_idx];
+ create_interaction_dimensions (table, mt->cats, iact);
+
+ struct pivot_dimension *dep_dim = pivot_dimension_create (
+ table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
+
+ size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes);
+
+ size_t n_cats = categoricals_n_count (mt->cats, iact_idx);
+ for (size_t v = 0; v < mt->n_dep_vars; ++v)
{
- struct layer *layer = table->layers[l];
- bt_init (&layer->instances.bt, my_other_compare_func,
- table->control_vars[l]);
-
- /* Iterate the instance hash table, and insert each instance
- into the binary tree BT. */
- struct instance *inst;
- HMAP_FOR_EACH (inst, struct instance, hmap_node,
- &layer->instances.map)
- {
- bt_insert (&layer->instances.bt, &inst->bt_node);
- }
-
- /* Iterate the binary tree (in order) and assign the index
- member accordingly. */
- int index = 0;
- BT_FOR_EACH (inst, struct instance, bt_node, &layer->instances.bt)
- {
- inst->index = index++;
- }
+ indexes[table->n_dimensions - 1] = pivot_category_create_leaf (
+ dep_dim->root, pivot_value_new_variable (mt->dep_vars[v]));
+
+ for (size_t i = 0; i < n_cats; ++i)
+ {
+ for (size_t j = 0; j < iact->n_vars; j++)
+ {
+ int idx = categoricals_get_value_index_by_category_real (
+ mt->cats, iact_idx, i, j);
+ indexes[table->n_dimensions - 2 - j] = idx;
+ }
+
+ struct per_cat_data *per_cat_data
+ = categoricals_get_user_data_by_category_real (
+ mt->cats, iact_idx, i);
+
+ const struct per_var_data *pvd = &per_cat_data->pvd[v];
+ for (int stat_idx = 0; stat_idx < cmd->n_cells; ++stat_idx)
+ {
+ indexes[0] = stat_idx;
+ const int csi = cmd->cells[stat_idx];
+ const struct cell_spec *cs = &cell_spec[csi];
+
+ double result = cs->sd (pvd, pvd->cell_stats[stat_idx]);
+ pivot_table_put (table, indexes, table->n_dimensions,
+ pivot_value_new_number (result));
+ }
+ }
}
+ free (indexes);
- /* The root cell should have no parent. */
- assert (table->root_cell->parent_cell == 0);
- dump_tree (table->root_cell, 0);
+ pivot_table_submit (table);
}
+