/* PSPP - a program for statistical analysis.
- Copyright (C) 2011, 2012 Free Software Foundation, Inc.
+ Copyright (C) 2019 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "data/format.h"
#include "data/variable.h"
-#include "language/command.h"
-#include "language/lexer/lexer.h"
-#include "language/lexer/variable-parser.h"
-
+#include "libpspp/hmap.h"
+#include "libpspp/bt.h"
+#include "libpspp/hash-functions.h"
#include "libpspp/misc.h"
#include "libpspp/pool.h"
-#include "math/categoricals.h"
-#include "math/interaction.h"
-#include "math/moments.h"
+#include "language/command.h"
+
+#include "count-one-bits.h"
+#include "count-leading-zeros.h"
-#include "output/tab.h"
+#include "output/pivot-table.h"
+
+#include "means.h"
-#include <math.h>
#include "gettext.h"
#define _(msgid) gettext (msgid)
#define N_(msgid) (msgid)
-struct means;
+/* A "cell" in this procedure represents a distinct value of the
+ procedure's categorical variables, and a set of summary statistics
+ of all cases which whose categorical variables have that set of
+ values. For example, the dataset
-struct per_var_data
-{
- void **cell_stats;
- struct moments1 *mom;
-};
+ v1 v2 cat1 cat2
+ 100 202 0 1
+ 100 202 0 2
+ 100 202 1 0
+ 100 202 0 1
-typedef void *stat_create (struct pool *pool);
-typedef void stat_update (void *stat, double w, double x);
-typedef double stat_get (const struct per_var_data *, void *aux);
+ has three cells in layer 0 and two cells in layer 1 in addition
+ to a "grand summary" cell to which all (non-missing) cases
+ contribute.
-struct cell_spec
+ The cells form a n-ary tree structure with the "grand summary"
+ cell at the root.
+*/
+struct cell
{
- /* Printable title for output */
- const char *title;
+ struct hmap_node hmap_node; /* Element in hash table. */
+ struct bt_node bt_node; /* Element in binary tree */
- /* Keyword for syntax */
- const char *keyword;
+ int n_children;
+ struct cell_container *children;
- stat_create *sc;
- stat_update *su;
- stat_get *sd;
-};
+ /* The statistics to be calculated for the cell. */
+ struct statistic **stat;
-struct harmonic_mean
-{
- double rsum;
- double n;
-};
+ /* The parent of this cell, or NULL if this is the root cell. */
+ const struct cell *parent_cell;
-static void *
-harmonic_create (struct pool *pool)
-{
- struct harmonic_mean *hm = pool_alloc (pool, sizeof *hm);
+ /* A bit-field variable which indicates which control variables
+ are allocated a fixed value (for this cell), and which are
+ "wildcards".
- hm->rsum = 0;
- hm->n = 0;
+ A one indicates a fixed value. A zero indicates a wildcard.
+ Wildcard values are used to calculate totals and sub-totals.
+ */
+ unsigned int not_wild;
- return hm;
-}
-
-
-static void
-harmonic_update (void *stat, double w, double x)
-{
- struct harmonic_mean *hm = stat;
- hm->rsum += w / x;
- hm->n += w;
-}
+ /* The value(s). */
+ union value *values;
+ /* The variables corresponding to the above values. */
+ const struct variable **vars;
+};
-static double
-harmonic_get (const struct per_var_data *pvd UNUSED, void *stat)
+/* A structure used to find the union of all values used
+ within a layer, and to sort those values. */
+struct instance
{
- struct harmonic_mean *hm = stat;
+ struct hmap_node hmap_node; /* Element in hash table. */
+ struct bt_node bt_node; /* Element in binary tree */
- return hm->n / hm->rsum;
-}
+ /* A unique, consecutive, zero based index identifying this
+ instance. */
+ int index;
-\f
-
-struct geometric_mean
-{
- double prod;
- double n;
+ /* The top level value of this instance. */
+ union value value;
+ const struct variable *var;
};
-static void *
-geometric_create (struct pool *pool)
+static void
+destroy_workspace (const struct mtable *mt, struct workspace *ws)
{
- struct geometric_mean *gm = pool_alloc (pool, sizeof *gm);
-
- gm->prod = 1.0;
- gm->n = 0;
-
- return gm;
+ for (int l = 0; l < mt->n_layers; ++l)
+ {
+ struct cell_container *instances = ws->instances + l;
+ struct instance *inst;
+ struct instance *next;
+ HMAP_FOR_EACH_SAFE (inst, next, struct instance, hmap_node,
+ &instances->map)
+ {
+ int width = var_get_width (inst->var);
+ value_destroy (&inst->value, width);
+ free (inst);
+ }
+ hmap_destroy (&instances->map);
+ }
+ free (ws->control_idx);
+ free (ws->instances);
}
-
+/* Destroy CELL. */
static void
-geometric_update (void *stat, double w, double x)
+destroy_cell (const struct means *means,
+ const struct mtable *mt, struct cell *cell)
{
- struct geometric_mean *gm = stat;
- gm->prod *= pow (x, w);
- gm->n += w;
-}
+ int idx = 0;
+ for (int i = 0; i < mt->n_layers; ++i)
+ {
+ if (0 == ((cell->not_wild >> i) & 0x1))
+ continue;
+ const struct layer *layer = mt->layers[i];
+ for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
+ {
+ struct workspace *ws = mt->ws + cmb;
+ const struct variable *var
+ = layer->factor_vars[ws->control_idx[i]];
-static double
-geometric_get (const struct per_var_data *pvd UNUSED, void *stat)
-{
- struct geometric_mean *gm = stat;
+ int width = var_get_width (var);
+ value_destroy (&cell->values[idx++], width);
+ }
+ }
+ for (int i = 0; i < cell->n_children; ++i)
+ {
+ struct cell_container *container = cell->children + i;
+ hmap_destroy (&container->map);
+ }
- return pow (gm->prod, 1.0 / gm->n);
+ for (int v = 0; v < mt->n_dep_vars; ++v)
+ {
+ for (int s = 0; s < means->n_statistics; ++s)
+ {
+ stat_destroy *des = cell_spec[means->statistics[s]].sf;
+ des (cell->stat[s + v * means->n_statistics]);
+ }
+ }
+ free (cell->stat);
+
+ free (cell->children);
+ free (cell->values);
+ free (cell->vars);
+ free (cell);
}
-\f
-static double
-sum_get (const struct per_var_data *pvd, void *stat UNUSED)
+/* Walk the tree in postorder starting from CELL and destroy all the
+ cells. */
+static void
+means_destroy_cells (const struct means *means, struct cell *cell,
+ const struct mtable *table)
{
- double n, mean;
-
- moments1_calculate (pvd->mom, &n, &mean, 0, 0, 0);
+ for (int i = 0; i < cell->n_children; ++i)
+ {
+ struct cell_container *container = cell->children + i;
+ struct cell *sub_cell;
+ struct cell *next;
+ HMAP_FOR_EACH_SAFE (sub_cell, next, struct cell, hmap_node,
+ &container->map)
+ {
+ means_destroy_cells (means, sub_cell, table);
+ }
+ }
- return mean * n;
+ destroy_cell (means, table, cell);
}
+#if 0
-static double
-n_get (const struct per_var_data *pvd, void *stat UNUSED)
+static void
+dump_cell (const struct cell *cell, const struct mtable *mt, int level)
{
- double n;
-
- moments1_calculate (pvd->mom, &n, 0, 0, 0, 0);
+ for (int l = 0; l < level; ++l)
+ putchar (' ');
+ printf ("%p: ", cell);
+ for (int i = 0; i < mt->n_layers; ++i)
+ {
+ putchar (((cell->not_wild >> i) & 0x1) ? 'w' : '.');
+ }
+ printf (" - ");
+ int x = 0;
+ for (int i = 0; i < mt->n_layers; ++i)
+ {
+ if ((cell->not_wild >> i) & 0x1)
+ {
+ printf ("%s: ", var_get_name (cell->vars[x]));
+ printf ("%g ", cell->values[x++].f);
+ }
+ else
+ printf ("x ");
+ }
+ stat_get *sg = cell_spec[MEANS_N].sd;
+ printf ("--- S1: %g", sg (cell->stat[0]));
- return n;
+ printf ("--- N Children: %d", cell->n_children);
+ // printf ("--- Level: %d", level);
+ printf ("--- Parent: %p", cell->parent_cell);
+ printf ("\n");
}
-static double
-arithmean_get (const struct per_var_data *pvd, void *stat UNUSED)
+static void
+dump_indeces (const size_t *indexes, int n)
{
- double n, mean;
-
- moments1_calculate (pvd->mom, &n, &mean, 0, 0, 0);
-
- return mean;
+ for (int i = 0 ; i < n; ++i)
+ {
+ printf ("%ld; ", indexes[i]);
+ }
+ printf ("\n");
}
-static double
-variance_get (const struct per_var_data *pvd, void *stat UNUSED)
+/* Dump the tree in pre-order. */
+static void
+dump_tree (const struct cell *cell, const struct mtable *table,
+ int level, const struct cell *parent)
{
- double n, mean, variance;
-
- moments1_calculate (pvd->mom, &n, &mean, &variance, 0, 0);
-
- return variance;
-}
+ assert (cell->parent_cell == parent);
+ dump_cell (cell, table, level);
-
-static double
-stddev_get (const struct per_var_data *pvd, void *stat)
-{
- return sqrt (variance_get (pvd, stat));
+ for (int i = 0; i < cell->n_children; ++i)
+ {
+ struct cell_container *container = cell->children + i;
+ struct cell *sub_cell;
+ BT_FOR_EACH (sub_cell, struct cell, bt_node, &container->bt)
+ {
+ dump_tree (sub_cell, table, level + 1, cell);
+ }
+ }
}
+#endif
-\f
-
-static double
-skew_get (const struct per_var_data *pvd, void *stat UNUSED)
+/* Generate a hash based on the values of the N variables in
+ the array VARS which are taken from the case C. */
+static unsigned int
+generate_hash (const struct mtable *mt,
+ const struct ccase *c,
+ unsigned int not_wild,
+ const struct workspace *ws)
{
- double skew;
-
- moments1_calculate (pvd->mom, NULL, NULL, NULL, &skew, 0);
+ unsigned int hash = 0;
+ for (int i = 0; i < mt->n_layers; ++i)
+ {
+ if (0 == ((not_wild >> i) & 0x1))
+ continue;
+
+ const struct layer *layer = mt->layers[i];
+ const struct variable *var = layer->factor_vars[ws->control_idx[i]];
+ const union value *vv = case_data (c, var);
+ int width = var_get_width (var);
+ hash = hash_int (i, hash);
+ hash = value_hash (vv, width, hash);
+ }
- return skew;
+ return hash;
}
-static double
-sekurt_get (const struct per_var_data *pvd, void *stat UNUSED)
+/* Create a cell based on the N variables in the array VARS,
+ which are indeces into the case C.
+ The caller is responsible for destroying this cell when
+ no longer needed. */
+static struct cell *
+generate_cell (const struct means *means,
+ const struct mtable *mt,
+ const struct ccase *c,
+ unsigned int not_wild,
+ const struct cell *pcell,
+ const struct workspace *ws)
{
- double n;
-
- moments1_calculate (pvd->mom, &n, NULL, NULL, NULL, NULL);
-
- return calc_sekurt (n);
-}
+ int n_vars = count_one_bits (not_wild);
+ struct cell *cell = XZALLOC (struct cell);
+ cell->values = xcalloc (n_vars, sizeof *cell->values);
+ cell->vars = xcalloc (n_vars, sizeof *cell->vars);
+ cell->not_wild = not_wild;
+
+ cell->parent_cell = pcell;
+ cell->n_children = mt->n_layers -
+ (sizeof (cell->not_wild) * CHAR_BIT) +
+ count_leading_zeros (cell->not_wild);
+
+ int idx = 0;
+ for (int i = 0; i < mt->n_layers; ++i)
+ {
+ if (0 == ((not_wild >> i) & 0x1))
+ continue;
+
+ const struct layer *layer = mt->layers[i];
+ const struct variable *var = layer->factor_vars[ws->control_idx[i]];
+ const union value *vv = case_data (c, var);
+ int width = var_get_width (var);
+ cell->vars[idx] = var;
+ value_clone (&cell->values[idx++], vv, width);
+ }
+ assert (idx == n_vars);
-static double
-seskew_get (const struct per_var_data *pvd, void *stat UNUSED)
-{
- double n;
+ cell->children = xcalloc (cell->n_children, sizeof *cell->children);
+ for (int i = 0; i < cell->n_children; ++i)
+ {
+ struct cell_container *container = cell->children + i;
+ hmap_init (&container->map);
+ }
- moments1_calculate (pvd->mom, &n, NULL, NULL, NULL, NULL);
+ cell->stat = xcalloc (means->n_statistics * mt->n_dep_vars, sizeof *cell->stat);
+ for (int v = 0; v < mt->n_dep_vars; ++v)
+ {
+ for (int stat = 0; stat < means->n_statistics; ++stat)
+ {
+ stat_create *sc = cell_spec[means->statistics[stat]].sc;
- return calc_seskew (n);
+ cell->stat[stat + v * means->n_statistics] = sc (means->pool);
+ }
+ }
+ return cell;
}
-static double
-kurt_get (const struct per_var_data *pvd, void *stat UNUSED)
-{
- double kurt;
-
- moments1_calculate (pvd->mom, NULL, NULL, NULL, NULL, &kurt);
- return kurt;
-}
-
-static double
-semean_get (const struct per_var_data *pvd, void *stat UNUSED)
+/* If a cell based on the N variables in the array VARS,
+ which are indeces into the case C and whose hash is HASH,
+ exists in HMAP, then return that cell.
+ Otherwise, return NULL. */
+static struct cell *
+lookup_cell (const struct mtable *mt,
+ struct hmap *hmap, unsigned int hash,
+ const struct ccase *c,
+ unsigned int not_wild,
+ const struct workspace *ws)
{
- double n, var;
-
- moments1_calculate (pvd->mom, &n, NULL, &var, NULL, NULL);
+ struct cell *cell = NULL;
+ HMAP_FOR_EACH_WITH_HASH (cell, struct cell, hmap_node, hash, hmap)
+ {
+ bool match = true;
+ int idx = 0;
+ if (cell->not_wild != not_wild)
+ continue;
+ for (int i = 0; i < mt->n_layers; ++i)
+ {
+ if (0 == ((cell->not_wild >> i) & 0x1))
+ continue;
- return sqrt (var / n);
+ const struct layer *layer = mt->layers[i];
+ const struct variable *var = layer->factor_vars[ws->control_idx[i]];
+ const union value *vv = case_data (c, var);
+ int width = var_get_width (var);
+ assert (var == cell->vars[idx]);
+ if (!value_equal (vv, &cell->values[idx++], width))
+ {
+ match = false;
+ break;
+ }
+ }
+ if (match)
+ return cell;
+ }
+ return NULL;
}
-\f
-static void *
-min_create (struct pool *pool)
+/* A comparison function used to sort cells in a binary tree.
+ Only the innermost value needs to be compared, because no
+ two cells with similar outer values will appear in the same
+ tree/map. */
+static int
+cell_compare_3way (const struct bt_node *a,
+ const struct bt_node *b,
+ const void *aux UNUSED)
{
- double *r = pool_alloc (pool, sizeof *r);
+ const struct cell *fa = BT_DATA (a, struct cell, bt_node);
+ const struct cell *fb = BT_DATA (b, struct cell, bt_node);
- *r = DBL_MAX;
+ assert (fa->not_wild == fb->not_wild);
+ int vidx = count_one_bits (fa->not_wild) - 1;
+ assert (fa->vars[vidx] == fb->vars[vidx]);
- return r;
+ return value_compare_3way (&fa->values[vidx],
+ &fb->values[vidx],
+ var_get_width (fa->vars[vidx]));
}
-static void
-min_update (void *stat, double w UNUSED, double x)
+/* A comparison function used to sort cells in a binary tree. */
+static int
+compare_instance_3way (const struct bt_node *a,
+ const struct bt_node *b,
+ const void *aux UNUSED)
{
- double *r = stat;
-
- if (x < *r)
- *r = x;
-}
+ const struct instance *fa = BT_DATA (a, struct instance, bt_node);
+ const struct instance *fb = BT_DATA (b, struct instance, bt_node);
-static double
-min_get (const struct per_var_data *pvd UNUSED, void *stat)
-{
- double *r = stat;
+ assert (fa->var == fb->var);
- return *r;
+ return value_compare_3way (&fa->value,
+ &fb->value,
+ var_get_width (fa->var));
}
-static void *
-max_create (struct pool *pool)
-{
- double *r = pool_alloc (pool, sizeof *r);
- *r = -DBL_MAX;
+static void arrange_cells (struct workspace *ws,
+ struct cell *cell, const struct mtable *table);
- return r;
-}
+/* Iterate CONTAINER's map inserting a copy of its elements into
+ CONTAINER's binary tree. Also, for each layer in TABLE, create
+ an instance container, containing the union of all elements in
+ CONTAINER. */
static void
-max_update (void *stat, double w UNUSED, double x)
-{
- double *r = stat;
-
- if (x > *r)
- *r = x;
-}
-
-static double
-max_get (const struct per_var_data *pvd UNUSED, void *stat)
+arrange_cell (struct workspace *ws, struct cell_container *container,
+ const struct mtable *mt)
{
- double *r = stat;
+ struct bt *bt = &container->bt;
+ struct hmap *map = &container->map;
+ bt_init (bt, cell_compare_3way, NULL);
- return *r;
-}
+ struct cell *cell;
+ HMAP_FOR_EACH (cell, struct cell, hmap_node, map)
+ {
+ bt_insert (bt, &cell->bt_node);
-\f
+ int idx = 0;
+ for (int i = 0; i < mt->n_layers; ++i)
+ {
+ if (0 == ((cell->not_wild >> i) & 0x1))
+ continue;
-struct range
-{
- double min;
- double max;
-};
+ struct cell_container *instances = ws->instances + i;
+ const struct variable *var = cell->vars[idx];
+ int width = var_get_width (var);
+ unsigned int hash
+ = value_hash (&cell->values[idx], width, 0);
+
+ struct instance *inst = NULL;
+ struct instance *next = NULL;
+ HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next, struct instance,
+ hmap_node,
+ hash, &instances->map)
+ {
+ assert (cell->vars[idx] == var);
+ if (value_equal (&inst->value,
+ &cell->values[idx],
+ width))
+ {
+ break;
+ }
+ }
-static void *
-range_create (struct pool *pool)
-{
- struct range *r = pool_alloc (pool, sizeof *r);
+ if (!inst)
+ {
+ inst = xzalloc (sizeof *inst);
+ inst->index = -1;
+ inst->var = var;
+ value_clone (&inst->value, &cell->values[idx],
+ width);
+ hmap_insert (&instances->map, &inst->hmap_node, hash);
+ }
- r->min = DBL_MAX;
- r->max = -DBL_MAX;
+ idx++;
+ }
- return r;
+ arrange_cells (ws, cell, mt);
+ }
}
+/* Arrange the children and then all the subtotals. */
static void
-range_update (void *stat, double w UNUSED, double x)
+arrange_cells (struct workspace *ws, struct cell *cell,
+ const struct mtable *table)
{
- struct range *r = stat;
-
- if (x > r->max)
- r->max = x;
-
- if (x < r->min)
- r->min = x;
+ for (int i = 0; i < cell->n_children; ++i)
+ {
+ struct cell_container *container = cell->children + i;
+ arrange_cell (ws, container, table);
+ }
}
-static double
-range_get (const struct per_var_data *pvd UNUSED, void *stat)
-{
- struct range *r = stat;
-
- return r->max - r->min;
-}
\f
-static void *
-last_create (struct pool *pool)
+/* If the top level value in CELL, has an instance in the L_IDX'th layer,
+ then return that instance. Otherwise return NULL. */
+static const struct instance *
+lookup_instance (const struct mtable *mt, const struct workspace *ws,
+ int l_idx, const struct cell *cell)
{
- double *l = pool_alloc (pool, sizeof *l);
-
- return l;
+ const struct layer *layer = mt->layers[l_idx];
+ int n_vals = count_one_bits (cell->not_wild);
+ const struct variable *var = layer->factor_vars[ws->control_idx[l_idx]];
+ const union value *val = cell->values + n_vals - 1;
+ int width = var_get_width (var);
+ unsigned int hash = value_hash (val, width, 0);
+ const struct cell_container *instances = ws->instances + l_idx;
+ struct instance *inst = NULL;
+ struct instance *next;
+ HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next,
+ struct instance, hmap_node,
+ hash, &instances->map)
+ {
+ if (value_equal (val, &inst->value, width))
+ break;
+ }
+ return inst;
}
+/* Enter the values into PT. */
static void
-last_update (void *stat, double w UNUSED, double x)
+populate_table (const struct means *means, const struct mtable *mt,
+ const struct workspace *ws,
+ const struct cell *cell,
+ struct pivot_table *pt)
{
- double *l = stat;
-
- *l = x;
-}
-
-static double
-last_get (const struct per_var_data *pvd UNUSED, void *stat)
-{
- double *l = stat;
-
- return *l;
-}
-
-
-static void *
-first_create (struct pool *pool)
-{
- double *f = pool_alloc (pool, sizeof *f);
-
- *f = SYSMIS;
+ size_t *indexes = XCALLOC (pt->n_dimensions, size_t);
+ for (int v = 0; v < mt->n_dep_vars; ++v)
+ {
+ for (int s = 0; s < means->n_statistics; ++s)
+ {
+ int i = 0;
+ if (mt->n_dep_vars > 1)
+ indexes[i++] = v;
+ indexes[i++] = s;
+ int stat = means->statistics[s];
+ stat_get *sg = cell_spec[stat].sd;
+ {
+ const struct cell *pc = cell;
+ for (; i < pt->n_dimensions; ++i)
+ {
+ int l_idx = pt->n_dimensions - i - 1;
+ const struct cell_container *instances = ws->instances + l_idx;
+ if (0 == (cell->not_wild >> l_idx & 0x1U))
+ {
+ indexes [i] = hmap_count (&instances->map);
+ }
+ else
+ {
+ assert (pc);
+ const struct instance *inst
+ = lookup_instance (mt, ws, l_idx, pc);
+ assert (inst);
+ indexes [i] = inst->index;
+ pc = pc->parent_cell;
+ }
+ }
+ }
+
+ int idx = s + v * means->n_statistics;
+ struct pivot_value *pv
+ = pivot_value_new_number (sg (cell->stat[idx]));
+ if (NULL == cell_spec[stat].rc)
+ {
+ const struct variable *dv = mt->dep_vars[v];
+ pv->numeric.format = * var_get_print_format (dv);
+ }
+ pivot_table_put (pt, indexes, pt->n_dimensions, pv);
+ }
+ }
+ free (indexes);
- return f;
+ for (int i = 0; i < cell->n_children; ++i)
+ {
+ struct cell_container *container = cell->children + i;
+ struct cell *child = NULL;
+ BT_FOR_EACH (child, struct cell, bt_node, &container->bt)
+ {
+ populate_table (means, mt, ws, child, pt);
+ }
+ }
}
static void
-first_update (void *stat, double w UNUSED, double x)
+create_table_structure (const struct mtable *mt, struct pivot_table *pt,
+ const struct workspace *ws)
{
- double *f = stat;
+ int * lindexes = ws->control_idx;
+ /* The inner layers are situated rightmost in the table.
+ So this iteration is in reverse order. */
+ for (int l = mt->n_layers -1; l >=0 ; --l)
+ {
+ const struct layer *layer = mt->layers[l];
+ const struct cell_container *instances = ws->instances + l;
+ const struct variable *var = layer->factor_vars[lindexes[l]];
+ struct pivot_dimension *dim_layer
+ = pivot_dimension_create (pt, PIVOT_AXIS_ROW,
+ var_to_string (var));
+ dim_layer->root->show_label = true;
+
+ /* Place the values of the control variables as table headings. */
+ {
+ struct instance *inst = NULL;
+ BT_FOR_EACH (inst, struct instance, bt_node, &instances->bt)
+ {
+ struct substring space = SS_LITERAL_INITIALIZER ("\t ");
+ struct string str;
+ ds_init_empty (&str);
+ var_append_value_name (var,
+ &inst->value,
+ &str);
+
+ ds_ltrim (&str, space);
+
+ pivot_category_create_leaf (dim_layer->root,
+ pivot_value_new_text (ds_cstr (&str)));
+
+ ds_destroy (&str);
+ }
+ }
- if (*f == SYSMIS)
- *f = x;
+ pivot_category_create_leaf (dim_layer->root,
+ pivot_value_new_text ("Total"));
+ }
}
-static double
-first_get (const struct per_var_data *pvd UNUSED, void *stat)
+/* Initialise C_DES with a string describing the control variable
+ relating to MT, LINDEXES. */
+static void
+layers_to_string (const struct mtable *mt, const int *lindexes,
+ struct string *c_des)
{
- double *f = stat;
-
- return *f;
+ for (int l = 0; l < mt->n_layers; ++l)
+ {
+ const struct layer *layer = mt->layers[l];
+ const struct variable *ctrl_var = layer->factor_vars[lindexes[l]];
+ if (l > 0)
+ ds_put_cstr (c_des, " * ");
+ ds_put_cstr (c_des, var_get_name (ctrl_var));
+ }
}
-enum
- {
- MEANS_MEAN = 0,
- MEANS_N,
- MEANS_STDDEV
- };
-
-/* Table of cell_specs */
-static const struct cell_spec cell_spec[] = {
- {N_("Mean"), "MEAN", NULL, NULL, arithmean_get},
- {N_("N"), "COUNT", NULL, NULL, n_get},
- {N_("Std. Deviation"), "STDDEV", NULL, NULL, stddev_get},
-#if 0
- {N_("Median"), "MEDIAN", NULL, NULL, NULL},
- {N_("Group Median"), "GMEDIAN", NULL, NULL, NULL},
-#endif
- {N_("S.E. Mean"), "SEMEAN", NULL, NULL, semean_get},
- {N_("Sum"), "SUM", NULL, NULL, sum_get},
- {N_("Min"), "MIN", min_create, min_update, min_get},
- {N_("Max"), "MAX", max_create, max_update, max_get},
- {N_("Range"), "RANGE", range_create, range_update, range_get},
- {N_("Variance"), "VARIANCE", NULL, NULL, variance_get},
- {N_("Kurtosis"), "KURT", NULL, NULL, kurt_get},
- {N_("S.E. Kurt"), "SEKURT", NULL, NULL, sekurt_get},
- {N_("Skewness"), "SKEW", NULL, NULL, skew_get},
- {N_("S.E. Skew"), "SESKEW", NULL, NULL, seskew_get},
- {N_("First"), "FIRST", first_create, first_update, first_get},
- {N_("Last"), "LAST", last_create, last_update, last_get},
-#if 0
- {N_("Percent N"), "NPCT", NULL, NULL, NULL},
- {N_("Percent Sum"), "SPCT", NULL, NULL, NULL},
-#endif
- {N_("Harmonic Mean"), "HARMONIC", harmonic_create, harmonic_update, harmonic_get},
- {N_("Geom. Mean"), "GEOMETRIC", geometric_create, geometric_update, geometric_get}
-};
-
-#define n_C (sizeof (cell_spec) / sizeof (struct cell_spec))
-
-
-struct summary
-{
- casenumber missing;
- casenumber non_missing;
-};
-
-
-/* The thing parsed after TABLES= */
-struct mtable
-{
- size_t n_dep_vars;
- const struct variable **dep_vars;
-
- size_t n_interactions;
- struct interaction **interactions;
- struct summary *summary;
-
- size_t *n_factor_vars;
- const struct variable ***factor_vars;
-
- int ii;
-
- int n_layers;
-
- struct categoricals *cats;
-};
-
-struct means
-{
- const struct dictionary *dict;
-
- struct mtable *table;
- size_t n_tables;
-
- /* Missing value class for categorical variables */
- enum mv_class exclude;
-
- /* Missing value class for dependent variables */
- enum mv_class dep_exclude;
-
- bool listwise_exclude;
-
- /* an array indicating which statistics are to be calculated */
- int *cells;
-
- /* Size of cells */
- int n_cells;
-
- /* Pool on which cell functions may allocate data */
- struct pool *pool;
-};
-
-
static void
-run_means (struct means *cmd, struct casereader *input,
- const struct dataset *ds);
-
-/* Append all the variables belonging to layer and all subsequent layers
- to iact. And then append iact to the means->interaction.
- This is a recursive function.
- */
-static void
-iact_append_factor (struct mtable *means, int layer,
- const struct interaction *iact)
+populate_case_processing_summary (struct pivot_category *pc,
+ const struct mtable *mt,
+ const int *lindexes)
{
- int v;
- const struct variable **fv;
-
- if (layer >= means->n_layers)
- return;
-
- fv = means->factor_vars[layer];
-
- for (v = 0; v < means->n_factor_vars[layer]; ++v)
+ struct string ds;
+ ds_init_empty (&ds);
+ int l = 0;
+ for (l = 0; l < mt->n_layers; ++l)
{
- struct interaction *nexti = interaction_clone (iact);
-
- interaction_add_variable (nexti, fv[v]);
-
- iact_append_factor (means, layer + 1, nexti);
-
- if (layer == means->n_layers - 1)
+ const struct layer *layer = mt->layers[l];
+ const struct variable *ctrl_var = layer->factor_vars[lindexes[l]];
+ if (l > 0)
+ ds_put_cstr (&ds, " * ");
+ ds_put_cstr (&ds, var_get_name (ctrl_var));
+ }
+ for (int dv = 0; dv < mt->n_dep_vars; ++dv)
+ {
+ struct string dss;
+ ds_init_empty (&dss);
+ ds_put_cstr (&dss, var_get_name (mt->dep_vars[dv]));
+ if (mt->n_layers > 0)
{
- means->interactions[means->ii++] = nexti;
+ ds_put_cstr (&dss, " * ");
+ ds_put_substring (&dss, ds.ss);
}
+ pivot_category_create_leaf (pc,
+ pivot_value_new_text (ds_cstr (&dss)));
+ ds_destroy (&dss);
}
+
+ ds_destroy (&ds);
}
-static bool
-parse_means_table_syntax (struct lexer *lexer, const struct means *cmd, struct mtable *table)
+/* Create the "Case Processing Summary" table. */
+static void
+means_case_processing_summary (const struct mtable *mt)
{
- table->ii = 0;
- table->n_layers = 0;
- table->factor_vars = NULL;
- table->n_factor_vars = NULL;
-
- /* Dependent variable (s) */
- if (!parse_variables_const (lexer, cmd->dict,
- &table->dep_vars, &table->n_dep_vars,
- PV_NO_DUPLICATE | PV_NUMERIC))
- return false;
-
- /* Factor variable (s) */
- while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
+ struct pivot_table *pt = pivot_table_create (N_("Case Processing Summary"));
+
+ struct pivot_dimension *dim_cases =
+ pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Cases"));
+ dim_cases->root->show_label = true;
+
+ struct pivot_category *cats[3];
+ cats[0] = pivot_category_create_group (dim_cases->root,
+ N_("Included"), NULL);
+ cats[1] = pivot_category_create_group (dim_cases->root,
+ N_("Excluded"), NULL);
+ cats[2] = pivot_category_create_group (dim_cases->root,
+ N_("Total"), NULL);
+ for (int i = 0; i < 3; ++i)
{
- if (lex_match (lexer, T_BY))
- {
- table->n_layers++;
- table->factor_vars =
- xrealloc (table->factor_vars,
- sizeof (*table->factor_vars) * table->n_layers);
-
- table->n_factor_vars =
- xrealloc (table->n_factor_vars,
- sizeof (*table->n_factor_vars) * table->n_layers);
-
- if (!parse_variables_const (lexer, cmd->dict,
- &table->factor_vars[table->n_layers - 1],
- &table->n_factor_vars[table->n_layers -
- 1],
- PV_NO_DUPLICATE))
- return false;
+ pivot_category_create_leaf_rc (cats[i],
+ pivot_value_new_text (N_("N")),
+ PIVOT_RC_COUNT);
+ pivot_category_create_leaf_rc (cats[i],
+ pivot_value_new_text (N_("Percent")),
+ PIVOT_RC_PERCENT);
+ }
- }
+ struct pivot_dimension *rows =
+ pivot_dimension_create (pt, PIVOT_AXIS_ROW, N_("Variables"));
+
+ for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
+ {
+ const struct workspace *ws = mt->ws + cmb;
+ populate_case_processing_summary (rows->root, mt, ws->control_idx);
+ for (int dv = 0; dv < mt->n_dep_vars; ++dv)
+ {
+ int idx = cmb * mt->n_dep_vars + dv;
+ const struct summary *summ = mt->summ + idx;
+ double n_included = summ->n_total - summ->n_missing;
+ pivot_table_put2 (pt, 5, idx,
+ pivot_value_new_number (100.0 * summ->n_total / summ->n_total));
+ pivot_table_put2 (pt, 4, idx,
+ pivot_value_new_number (summ->n_total));
+
+ pivot_table_put2 (pt, 3, idx,
+ pivot_value_new_number (100.0 * summ->n_missing / summ->n_total));
+ pivot_table_put2 (pt, 2, idx,
+ pivot_value_new_number (summ->n_missing));
+
+ pivot_table_put2 (pt, 1, idx,
+ pivot_value_new_number (100.0 * n_included / summ->n_total));
+ pivot_table_put2 (pt, 0, idx,
+ pivot_value_new_number (n_included));
+ }
}
- return true;
+ pivot_table_submit (pt);
}
-/* Match a variable.
- If the match succeeds, the variable will be placed in VAR.
- Returns true if successful */
-static bool
-lex_is_variable (struct lexer *lexer, const struct dictionary *dict,
- int n)
+static void
+means_shipout_single (const struct mtable *mt, const struct means *means,
+ const struct workspace *ws)
{
- const char *tstr;
- if (lex_next_token (lexer, n) != T_ID)
- return false;
+ struct pivot_table *pt = pivot_table_create (N_("Report"));
- tstr = lex_next_tokcstr (lexer, n);
+ struct pivot_dimension *dim_cells =
+ pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Statistics"));
- if (NULL == dict_lookup_var (dict, tstr) )
- return false;
+ /* Set the statistics headings, eg "Mean", "Std. Dev" etc. */
+ for (int i = 0; i < means->n_statistics; ++i)
+ {
+ const struct cell_spec *cs = cell_spec + means->statistics[i];
+ pivot_category_create_leaf_rc
+ (dim_cells->root,
+ pivot_value_new_text (gettext (cs->title)), cs->rc);
+ }
- return true;
+ create_table_structure (mt, pt, ws);
+ populate_table (means, mt, ws, ws->root_cell, pt);
+ pivot_table_submit (pt);
}
-int
-cmd_means (struct lexer *lexer, struct dataset *ds)
+static void
+means_shipout_multivar (const struct mtable *mt, const struct means *means,
+ const struct workspace *ws)
{
- int t;
- int i;
- int l;
- struct means means;
- bool more_tables = true;
-
- means.exclude = MV_ANY;
- means.dep_exclude = MV_ANY;
- means.listwise_exclude = false;
- means.table = NULL;
- means.n_tables = 0;
-
- means.dict = dataset_dict (ds);
-
- means.n_cells = 3;
- means.cells = xcalloc (means.n_cells, sizeof (*means.cells));
+ struct string dss;
+ ds_init_empty (&dss);
+ for (int dv = 0; dv < mt->n_dep_vars; ++dv)
+ {
+ if (dv > 0)
+ ds_put_cstr (&dss, " * ");
+ ds_put_cstr (&dss, var_get_name (mt->dep_vars[dv]));
+ }
+ for (int l = 0; l < mt->n_layers; ++l)
+ {
+ ds_put_cstr (&dss, " * ");
+ const struct layer *layer = mt->layers[l];
+ const struct variable *var = layer->factor_vars[ws->control_idx[l]];
+ ds_put_cstr (&dss, var_get_name (var));
+ }
- /* The first three items (MEAN, COUNT, STDDEV) are the default */
- for (i = 0; i < 3; ++i)
- means.cells[i] = i;
+ struct pivot_table *pt = pivot_table_create (ds_cstr (&dss));
+ ds_destroy (&dss);
+ struct pivot_dimension *dim_cells =
+ pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Variables"));
- /* Optional TABLES = */
- if (lex_match_id (lexer, "TABLES"))
+ for (int i = 0; i < mt->n_dep_vars; ++i)
{
- lex_force_match (lexer, T_EQUALS);
+ pivot_category_create_leaf
+ (dim_cells->root,
+ pivot_value_new_variable (mt->dep_vars[i]));
}
+ struct pivot_dimension *dim_stats
+ = pivot_dimension_create (pt, PIVOT_AXIS_ROW,
+ N_ ("Statistics"));
+ dim_stats->root->show_label = false;
- more_tables = true;
- /* Parse the "tables" */
- while (more_tables)
+ for (int i = 0; i < means->n_statistics; ++i)
{
- means.n_tables ++;
- means.table = xrealloc (means.table, means.n_tables * sizeof (*means.table));
-
- if (! parse_means_table_syntax (lexer, &means,
- &means.table[means.n_tables - 1]))
- {
- goto error;
- }
-
- /* Look ahead to see if there are more tables to be parsed */
- more_tables = false;
- if ( T_SLASH == lex_next_token (lexer, 0) )
- {
- if (lex_is_variable (lexer, means.dict, 1) )
- {
- more_tables = true;
- lex_force_match (lexer, T_SLASH);
- }
- }
+ const struct cell_spec *cs = cell_spec + means->statistics[i];
+ pivot_category_create_leaf_rc
+ (dim_stats->root,
+ pivot_value_new_text (gettext (cs->title)), cs->rc);
}
- /* /MISSING subcommand */
- while (lex_token (lexer) != T_ENDCMD)
- {
- lex_match (lexer, T_SLASH);
-
- if (lex_match_id (lexer, "MISSING"))
- {
- /*
- If no MISSING subcommand is specified, each combination of
- a dependent variable and categorical variables is handled
- separately.
- */
- lex_match (lexer, T_EQUALS);
- if (lex_match_id (lexer, "INCLUDE"))
- {
- /*
- Use the subcommand "/MISSING=INCLUDE" to include user-missing
- values in the analysis.
- */
+ create_table_structure (mt, pt, ws);
+ populate_table (means, mt, ws, ws->root_cell, pt);
+ pivot_table_submit (pt);
+}
- means.exclude = MV_SYSTEM;
- means.dep_exclude = MV_SYSTEM;
- }
- else if (lex_match_id (lexer, "TABLE"))
- /*
- This is the default. (I think).
- Every case containing a complete set of variables for a given
- table. If any variable, categorical or dependent for in a table
- is missing (as defined by what?), then that variable will
- be dropped FOR THAT TABLE ONLY.
- */
- {
- means.listwise_exclude = true;
- }
- else if (lex_match_id (lexer, "DEPENDENT"))
- /*
- Use the command "/MISSING=DEPENDENT" to
- include user-missing values for the categorical variables,
- while excluding them for the dependent variables.
-
- Cases are dropped only when user-missing values
- appear in dependent variables. User-missing
- values for categorical variables are treated according to
- their face value.
-
- Cases are ALWAYS dropped when System Missing values appear
- in the categorical variables.
- */
- {
- means.dep_exclude = MV_ANY;
- means.exclude = MV_SYSTEM;
- }
- else
- {
- lex_error (lexer, NULL);
- goto error;
- }
- }
- else if (lex_match_id (lexer, "CELLS"))
+static void
+means_shipout (const struct mtable *mt, const struct means *means)
+{
+ for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
+ {
+ const struct workspace *ws = mt->ws + cmb;
+ if (ws->root_cell == NULL)
{
- lex_match (lexer, T_EQUALS);
-
- /* The default values become overwritten */
- means.n_cells = 0;
- while (lex_token (lexer) != T_ENDCMD
- && lex_token (lexer) != T_SLASH)
- {
- int k = 0;
- if (lex_match (lexer, T_ALL))
- {
- int x;
- means.cells =
- xrealloc (means.cells,
- (means.n_cells += n_C) * sizeof (*means.cells));
-
- for (x = 0; x < n_C; ++x)
- means.cells[means.n_cells - (n_C - 1 - x) - 1] = x;
- }
- else if (lex_match_id (lexer, "NONE"))
- {
- /* Do nothing */
- }
- else if (lex_match_id (lexer, "DEFAULT"))
- {
- means.cells =
- xrealloc (means.cells,
- (means.n_cells += 3) * sizeof (*means.cells));
-
- means.cells[means.n_cells - 2 - 1] = MEANS_MEAN;
- means.cells[means.n_cells - 1 - 1] = MEANS_N;
- means.cells[means.n_cells - 0 - 1] = MEANS_STDDEV;
- }
- else
- {
- for (; k < n_C; ++k)
- {
- if (lex_match_id (lexer, cell_spec[k].keyword))
- {
- means.cells =
- xrealloc (means.cells,
- ++means.n_cells * sizeof (*means.cells));
-
- means.cells[means.n_cells - 1] = k;
- break;
- }
- }
- }
- if (k >= n_C)
- {
- lex_error (lexer, NULL);
- goto error;
- }
- }
+ struct string des;
+ ds_init_empty (&des);
+ layers_to_string (mt, ws->control_idx, &des);
+ msg (MW, _("The table \"%s\" has no non-empty control variables."
+ " No result for this table will be displayed."),
+ ds_cstr (&des));
+ ds_destroy (&des);
+ continue;
}
+ if (mt->n_dep_vars > 1)
+ means_shipout_multivar (mt, means, ws);
else
- {
- lex_error (lexer, NULL);
- goto error;
- }
+ means_shipout_single (mt, means, ws);
}
-
- means.pool = pool_create ();
-
-
- for (t = 0; t < means.n_tables; ++t)
- {
- struct mtable *table = &means.table[t];
- table->n_interactions = 1;
- for (l = 0; l < table->n_layers; ++l)
- {
- const int n_vars = table->n_factor_vars[l];
- table->n_interactions *= n_vars;
- }
-
- table->interactions =
- xcalloc (table->n_interactions, sizeof (*table->interactions));
-
- table->summary =
- xcalloc (table->n_dep_vars * table->n_interactions, sizeof (*table->summary));
-
-
- if (table->n_layers > 0)
- iact_append_factor (table, 0, interaction_create (NULL));
- else
- table->interactions[0] = interaction_create (NULL);
-
- }
-
-
- {
- struct casegrouper *grouper;
- struct casereader *group;
- bool ok;
-
- grouper = casegrouper_create_splits (proc_open (ds), means.dict);
- while (casegrouper_get_next_group (grouper, &group))
- {
- run_means (&means, group, ds);
- }
- ok = casegrouper_destroy (grouper);
- ok = proc_commit (ds) && ok;
- }
-
-
- return CMD_SUCCESS;
-
-error:
-
- return CMD_FAILURE;
}
+\f
+
static bool
-is_missing (const struct means *cmd,
- const struct variable *dvar,
- const struct interaction *iact,
- const struct ccase *c)
+control_var_missing (const struct means *means,
+ const struct mtable *mt,
+ unsigned int not_wild UNUSED,
+ const struct ccase *c,
+ const struct workspace *ws)
{
- if ( interaction_case_is_missing (iact, c, cmd->exclude) )
- return true;
-
-
- if (var_is_value_missing (dvar,
- case_data (c, dvar),
- cmd->dep_exclude))
- return true;
+ bool miss = false;
+ for (int l = 0; l < mt->n_layers; ++l)
+ {
+ /* if (0 == ((not_wild >> l) & 0x1)) */
+ /* { */
+ /* continue; */
+ /* } */
+
+ const struct layer *layer = mt->layers[l];
+ const struct variable *var = layer->factor_vars[ws->control_idx[l]];
+ const union value *vv = case_data (c, var);
+
+ miss = (var_is_value_missing (var, vv) & means->ctrl_exclude) != 0;
+ if (miss)
+ break;
+ }
- return false;
+ return miss;
}
-static void output_case_processing_summary (const struct mtable *);
-
-static void output_report (const struct means *, int, const struct mtable *);
-
-
-struct per_cat_data
+/* Lookup the set of control variables described by MT, C and NOT_WILD,
+ in the hash table MAP. If there is no such entry, then create a
+ cell with these paremeters and add is to MAP.
+ If the generated cell has childen, repeat for all the children.
+ Returns the root cell.
+*/
+static struct cell *
+service_cell_map (const struct means *means, const struct mtable *mt,
+ const struct ccase *c,
+ unsigned int not_wild,
+ struct hmap *map,
+ const struct cell *pcell,
+ int level,
+ const struct workspace *ws)
{
- struct per_var_data *pvd;
-
- bool warn;
-};
-
-static void *
-create_n (const void *aux1, void *aux2)
-{
- int i, v;
- const struct means *means = aux1;
- struct mtable *table = aux2;
- struct per_cat_data *per_cat_data = xmalloc (sizeof *per_cat_data);
-
- struct per_var_data *pvd = xcalloc (table->n_dep_vars, sizeof *pvd);
-
- for (v = 0; v < table->n_dep_vars; ++v)
+ struct cell *cell = NULL;
+ if (map)
{
- enum moment maxmom = MOMENT_KURTOSIS;
- struct per_var_data *pp = &pvd[v];
-
- pp->cell_stats = xcalloc (means->n_cells, sizeof *pp->cell_stats);
-
-
- for (i = 0; i < means->n_cells; ++i)
+ if (!control_var_missing (means, mt, not_wild, c, ws))
{
- int csi = means->cells[i];
- const struct cell_spec *cs = &cell_spec[csi];
- if (cs->sc)
+ /* Lookup this set of values in the cell's hash table. */
+ unsigned int hash = generate_hash (mt, c, not_wild, ws);
+ cell = lookup_cell (mt, map, hash, c, not_wild, ws);
+
+ /* If it has not been seen before, then create a new
+ subcell, with this set of values, and insert it
+ into the table. */
+ if (cell == NULL)
{
- pp->cell_stats[i] = cs->sc (means->pool);
+ cell = generate_cell (means, mt, c, not_wild, pcell, ws);
+ hmap_insert (map, &cell->hmap_node, hash);
}
}
- pp->mom = moments1_create (maxmom);
}
-
-
- per_cat_data->pvd = pvd;
- per_cat_data->warn = true;
- return per_cat_data;
-}
-
-static void
-update_n (const void *aux1, void *aux2, void *user_data, const struct ccase *c, double weight)
-{
- int i;
- int v = 0;
- const struct means *means = aux1;
- struct mtable *table = aux2;
- struct per_cat_data *per_cat_data = user_data;
-
- for (v = 0; v < table->n_dep_vars; ++v)
+ else
{
- struct per_var_data *pvd = &per_cat_data->pvd[v];
-
- const double x = case_data (c, table->dep_vars[v])->f;
-
- for (i = 0; i < table->n_interactions; ++i)
- {
- if ( is_missing (means, table->dep_vars[v],
- table->interactions[i], c))
- goto end;
- }
+ /* This condition should only happen in the root node case. */
+ cell = ws->root_cell;
+ if (cell == NULL &&
+ !control_var_missing (means, mt, not_wild, c, ws))
+ cell = generate_cell (means, mt, c, not_wild, pcell, ws);
+ }
- for (i = 0; i < means->n_cells; ++i)
+ if (cell)
+ {
+ /* Here is where the business really happens! After
+ testing for missing values, the cell's statistics
+ are accumulated. */
+ if (!control_var_missing (means, mt, not_wild, c, ws))
+ {
+ for (int v = 0; v < mt->n_dep_vars; ++v)
+ {
+ const struct variable *dep_var = mt->dep_vars[v];
+ const union value *vv = case_data (c, dep_var);
+ if (var_is_value_missing (dep_var, vv) & means->dep_exclude)
+ continue;
+
+ for (int stat = 0; stat < means->n_statistics; ++stat)
+ {
+ const double weight = dict_get_case_weight (means->dict, c,
+ NULL);
+ stat_update *su = cell_spec[means->statistics[stat]].su;
+ su (cell->stat[stat + v * means->n_statistics], weight,
+ case_num (c, dep_var));
+ }
+ }
+ }
+
+ /* Recurse into all the children (if there are any). */
+ for (int i = 0; i < cell->n_children; ++i)
{
- const int csi = means->cells[i];
- const struct cell_spec *cs = &cell_spec[csi];
-
-
- if (cs->su)
- cs->su (pvd->cell_stats[i],
- weight, x);
+ struct cell_container *cc = cell->children + i;
+ service_cell_map (means, mt, c,
+ not_wild | (0x1U << (i + level)),
+ &cc->map, cell, level + i + 1, ws);
}
-
- moments1_add (pvd->mom, x, weight);
-
- end:
- continue;
}
+
+ return cell;
}
+/* Do all the necessary preparation and pre-calculation that
+ needs to be done before iterating the data. */
static void
-calculate_n (const void *aux1, void *aux2, void *user_data)
+prepare_means (struct means *cmd)
{
- int i;
- int v = 0;
- struct per_cat_data *per_cat_data = user_data;
- const struct means *means = aux1;
- struct mtable *table = aux2;
-
- for (v = 0; v < table->n_dep_vars; ++v)
+ for (int t = 0; t < cmd->n_tables; ++t)
{
- struct per_var_data *pvd = &per_cat_data->pvd[v];
- for (i = 0; i < means->n_cells; ++i)
- {
- int csi = means->cells[i];
- const struct cell_spec *cs = &cell_spec[csi];
-
- if (cs->su)
- cs->sd (pvd, pvd->cell_stats[i]);
- }
+ struct mtable *mt = cmd->table + t;
+
+ for (int i = 0; i < mt->n_combinations; ++i)
+ {
+ struct workspace *ws = mt->ws + i;
+ ws->root_cell = NULL;
+ ws->control_idx = xcalloc (mt->n_layers, sizeof *ws->control_idx);
+ ws->instances = xcalloc (mt->n_layers, sizeof *ws->instances);
+ int cmb = i;
+ for (int l = mt->n_layers - 1; l >= 0; --l)
+ {
+ struct cell_container *instances = ws->instances + l;
+ const struct layer *layer = mt->layers[l];
+ ws->control_idx[l] = cmb % layer->n_factor_vars;
+ cmb /= layer->n_factor_vars;
+ hmap_init (&instances->map);
+ }
+ }
}
}
+/* Do all the necessary calculations that occur AFTER iterating
+ the data. */
static void
-run_means (struct means *cmd, struct casereader *input,
- const struct dataset *ds UNUSED)
+post_means (struct means *cmd)
{
- int i,t;
- const struct variable *wv = dict_get_weight (cmd->dict);
- struct ccase *c;
- struct casereader *reader;
-
- struct payload payload;
- payload.create = create_n;
- payload.update = update_n;
- payload.destroy = calculate_n;
-
- for (t = 0; t < cmd->n_tables; ++t)
- {
- struct mtable *table = &cmd->table[t];
- table->cats
- = categoricals_create (table->interactions,
- table->n_interactions, wv, cmd->exclude);
-
- categoricals_set_payload (table->cats, &payload, cmd, table);
- }
-
- for (reader = casereader_clone (input);
- (c = casereader_read (reader)) != NULL; case_unref (c))
+ for (int t = 0; t < cmd->n_tables; ++t)
{
- for (t = 0; t < cmd->n_tables; ++t)
+ struct mtable *mt = cmd->table + t;
+ for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
{
- bool something_missing = false;
- int v;
- struct mtable *table = &cmd->table[t];
+ struct workspace *ws = mt->ws + cmb;
+ if (ws->root_cell == NULL)
+ continue;
+ arrange_cells (ws, ws->root_cell, mt);
+ /* The root cell should have no parent. */
+ assert (ws->root_cell->parent_cell == 0);
- for (v = 0; v < table->n_dep_vars; ++v)
+ for (int l = 0; l < mt->n_layers; ++l)
{
- int i;
- for (i = 0; i < table->n_interactions; ++i)
+ struct cell_container *instances = ws->instances + l;
+ bt_init (&instances->bt, compare_instance_3way, NULL);
+
+ /* Iterate the instance hash table, and insert each instance
+ into the binary tree BT. */
+ struct instance *inst;
+ HMAP_FOR_EACH (inst, struct instance, hmap_node,
+ &instances->map)
{
- const bool missing =
- is_missing (cmd, table->dep_vars[v],
- table->interactions[i], c);
- if (missing)
- {
- something_missing = true;
- table->summary[v * table->n_interactions + i].missing++;
- }
- else
- table->summary[v * table->n_interactions + i].non_missing++;
+ bt_insert (&instances->bt, &inst->bt_node);
}
- }
- if ( something_missing && cmd->listwise_exclude)
- continue;
- categoricals_update (table->cats, c);
+ /* Iterate the binary tree (in order) and assign the index
+ member accordingly. */
+ int index = 0;
+ BT_FOR_EACH (inst, struct instance, bt_node, &instances->bt)
+ {
+ inst->index = index++;
+ }
+ }
}
}
- casereader_destroy (reader);
-
- for (t = 0; t < cmd->n_tables; ++t)
- {
- struct mtable *table = &cmd->table[t];
-
- categoricals_done (table->cats);
- }
+}
- for (t = 0; t < cmd->n_tables; ++t)
+/* Update the summary information (the missings and the totals). */
+static void
+update_summaries (const struct means *means, struct mtable *mt,
+ const struct ccase *c, double weight)
+{
+ for (int dv = 0; dv < mt->n_dep_vars; ++dv)
{
- const struct mtable *table = &cmd->table[t];
-
- output_case_processing_summary (table);
-
- for (i = 0; i < table->n_interactions; ++i)
+ for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
{
- output_report (cmd, i, table);
+ struct workspace *ws = mt->ws + cmb;
+ struct summary *summ = mt->summ
+ + cmb * mt->n_dep_vars + dv;
+
+ summ->n_total += weight;
+ const struct variable *var = mt->dep_vars[dv];
+ const union value *vv = case_data (c, var);
+ /* First check if the dependent variable is missing. */
+ if (var_is_value_missing (var, vv) & means->dep_exclude)
+ summ->n_missing += weight;
+ /* If the dep var is not missing, then check each
+ control variable. */
+ else
+ for (int l = 0; l < mt->n_layers; ++l)
+ {
+ const struct layer *layer = mt->layers [l];
+ const struct variable *var
+ = layer->factor_vars[ws->control_idx[l]];
+ const union value *vv = case_data (c, var);
+ if (var_is_value_missing (var, vv) & means->ctrl_exclude)
+ {
+ summ->n_missing += weight;
+ break;
+ }
+ }
}
-
- categoricals_destroy (table->cats);
}
}
-static void
-output_case_processing_summary (const struct mtable *table)
+void
+run_means (struct means *cmd, struct casereader *input,
+ const struct dataset *ds UNUSED)
{
- int i, v;
- const int heading_columns = 1;
- const int heading_rows = 3;
- struct tab_table *t;
-
- const int nr = heading_rows + table->n_interactions * table->n_dep_vars;
- const int nc = 7;
-
- t = tab_create (nc, nr);
- tab_title (t, _("Case Processing Summary"));
-
- tab_headers (t, heading_columns, 0, heading_rows, 0);
-
- tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1);
-
- tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
- tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
-
-
- tab_joint_text (t, heading_columns, 0,
- nc - 1, 0, TAB_CENTER | TAT_TITLE, _("Cases"));
-
- tab_joint_text (t, 1, 1, 2, 1, TAB_CENTER | TAT_TITLE, _("Included"));
- tab_joint_text (t, 3, 1, 4, 1, TAB_CENTER | TAT_TITLE, _("Excluded"));
- tab_joint_text (t, 5, 1, 6, 1, TAB_CENTER | TAT_TITLE, _("Total"));
-
- tab_hline (t, TAL_1, heading_columns, nc - 1, 1);
- tab_hline (t, TAL_1, heading_columns, nc - 1, 2);
-
+ struct ccase *c = NULL;
+ struct casereader *reader;
- for (i = 0; i < 3; ++i)
- {
- tab_text (t, heading_columns + i * 2, 2, TAB_CENTER | TAT_TITLE,
- _("N"));
- tab_text (t, heading_columns + i * 2 + 1, 2, TAB_CENTER | TAT_TITLE,
- _("Percent"));
- }
+ prepare_means (cmd);
- for (v = 0; v < table->n_dep_vars; ++v)
+ for (reader = input;
+ (c = casereader_read (reader)) != NULL; case_unref (c))
{
- const struct variable *var = table->dep_vars[v];
- const char *dv_name = var_to_string (var);
- for (i = 0; i < table->n_interactions; ++i)
+ const double weight
+ = dict_get_case_weight (cmd->dict, c, NULL);
+ for (int t = 0; t < cmd->n_tables; ++t)
{
- const int row = v * table->n_interactions + i;
- const struct interaction *iact = table->interactions[i];
- casenumber n_total;
-
- struct string str;
- ds_init_cstr (&str, dv_name);
- ds_put_cstr (&str, ": ");
-
- interaction_to_string (iact, &str);
-
- tab_text (t, 0, row + heading_rows,
- TAB_LEFT | TAT_TITLE, ds_cstr (&str));
-
-
- n_total = table->summary[row].missing +
- table->summary[row].non_missing;
-
- tab_double (t, 1, row + heading_rows,
- 0, table->summary[row].non_missing, &F_8_0);
-
- tab_text_format (t, 2, row + heading_rows,
- 0, _("%g%%"),
- table->summary[row].non_missing / (double) n_total * 100.0);
-
-
- tab_double (t, 3, row + heading_rows,
- 0, table->summary[row].missing, &F_8_0);
-
-
- tab_text_format (t, 4, row + heading_rows,
- 0, _("%g%%"),
- table->summary[row].missing / (double) n_total * 100.0);
-
-
- tab_double (t, 5, row + heading_rows,
- 0, table->summary[row].missing +
- table->summary[row].non_missing, &F_8_0);
-
- tab_text_format (t, 6, row + heading_rows,
- 0, _("%g%%"),
- n_total / (double) n_total * 100.0);
+ struct mtable *mt = cmd->table + t;
+ update_summaries (cmd, mt, c, weight);
+ for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
+ {
+ struct workspace *ws = mt->ws + cmb;
- ds_destroy (&str);
+ ws->root_cell = service_cell_map (cmd, mt, c,
+ 0U, NULL, NULL, 0, ws);
+ }
}
}
+ casereader_destroy (reader);
- tab_submit (t);
+ post_means (cmd);
}
+struct lexer;
-
-static void
-output_report (const struct means *cmd, int iact_idx,
- const struct mtable *table)
+int
+cmd_means (struct lexer *lexer, struct dataset *ds)
{
- int grp;
- int i;
-
- const struct interaction *iact = table->interactions[iact_idx];
-
- const int heading_columns = 1 + iact->n_vars;
- const int heading_rows = 1;
- struct tab_table *t;
-
- const int n_cats = categoricals_n_count (table->cats, iact_idx);
-
- const int nr = n_cats * table->n_dep_vars + heading_rows;
-
- const int nc = heading_columns + cmd->n_cells;
-
- t = tab_create (nc, nr);
- tab_title (t, _("Report"));
+ struct means means;
+ means.pool = pool_create ();
- tab_headers (t, heading_columns, 0, heading_rows, 0);
+ means.ctrl_exclude = MV_ANY;
+ means.dep_exclude = MV_ANY;
+ means.table = NULL;
+ means.n_tables = 0;
- tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1);
+ means.dict = dataset_dict (ds);
- tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
- tab_vline (t, TAL_2, iact->n_vars, 0, nr - 1);
+ means.n_statistics = 3;
+ means.statistics = pool_calloc (means.pool, 3, sizeof *means.statistics);
+ means.statistics[0] = MEANS_MEAN;
+ means.statistics[1] = MEANS_N;
+ means.statistics[2] = MEANS_STDDEV;
- for (i = 0; i < iact->n_vars; ++i)
- {
- tab_text (t, 1 + i, 0, TAB_CENTER | TAT_TITLE,
- var_to_string (iact->vars[i]));
- }
+ if (! means_parse (lexer, &means))
+ goto error;
- for (i = 0; i < cmd->n_cells; ++i)
+ /* Calculate some constant data for each table. */
+ for (int t = 0; t < means.n_tables; ++t)
{
- tab_text (t, heading_columns + i, 0,
- TAB_CENTER | TAT_TITLE,
- gettext (cell_spec[cmd->cells[i]].title));
+ struct mtable *mt = means.table + t;
+ mt->n_combinations = 1;
+ for (int l = 0; l < mt->n_layers; ++l)
+ mt->n_combinations *= mt->layers[l]->n_factor_vars;
}
+ {
+ struct casegrouper *grouper;
+ struct casereader *group;
+ bool ok;
- for (i = 0; i < n_cats; ++i)
- {
- int v, dv;
- const struct ccase *c =
- categoricals_get_case_by_category_real (table->cats, iact_idx, i);
-
- for (dv = 0; dv < table->n_dep_vars; ++dv)
+ grouper = casegrouper_create_splits (proc_open (ds), means.dict);
+ while (casegrouper_get_next_group (grouper, &group))
+ {
+ /* Allocate the workspaces. */
+ for (int t = 0; t < means.n_tables; ++t)
{
- tab_text (t, 0,
- heading_rows + dv * n_cats,
- TAB_RIGHT | TAT_TITLE,
- var_get_name (table->dep_vars[dv])
- );
-
- if ( dv > 0)
- tab_hline (t, TAL_1, 0, nc - 1, heading_rows + dv * n_cats);
-
- for (v = 0; v < iact->n_vars; ++v)
- {
- const struct variable *var = iact->vars[v];
- const union value *val = case_data (c, var);
- struct string str;
- ds_init_empty (&str);
- var_append_value_name (var, val, &str);
-
- tab_text (t, 1 + v, heading_rows + dv * n_cats + i,
- TAB_RIGHT | TAT_TITLE, ds_cstr (&str));
-
- ds_destroy (&str);
- }
+ struct mtable *mt = means.table + t;
+ mt->summ = xcalloc (mt->n_combinations * mt->n_dep_vars,
+ sizeof (*mt->summ));
+ mt->ws = xcalloc (mt->n_combinations, sizeof (*mt->ws));
}
- }
-
- for (grp = 0; grp < n_cats; ++grp)
- {
- int dv;
- struct per_cat_data *per_cat_data =
- categoricals_get_user_data_by_category_real (table->cats, iact_idx, grp);
-
- for (dv = 0; dv < table->n_dep_vars; ++dv)
- {
- const struct per_var_data *pvd = &per_cat_data->pvd[dv];
- for (i = 0; i < cmd->n_cells; ++i)
- {
- const int csi = cmd->cells[i];
- const struct cell_spec *cs = &cell_spec[csi];
+ run_means (&means, group, ds);
+ for (int t = 0; t < means.n_tables; ++t)
+ {
+ const struct mtable *mt = means.table + t;
+
+ means_case_processing_summary (mt);
+ means_shipout (mt, &means);
+
+ for (int i = 0; i < mt->n_combinations; ++i)
+ {
+ struct workspace *ws = mt->ws + i;
+ if (ws->root_cell == NULL)
+ continue;
+
+ means_destroy_cells (&means, ws->root_cell, mt);
+ }
+ }
+
+ /* Destroy the workspaces. */
+ for (int t = 0; t < means.n_tables; ++t)
+ {
+ struct mtable *mt = means.table + t;
+ free (mt->summ);
+ for (int i = 0; i < mt->n_combinations; ++i)
+ {
+ struct workspace *ws = mt->ws + i;
+ destroy_workspace (mt, ws);
+ }
+ free (mt->ws);
+ }
+ }
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
+ }
- double result = cs->sd (pvd, pvd->cell_stats[i]);
+ pool_destroy (means.pool);
+ return CMD_SUCCESS;
- tab_double (t, heading_columns + i,
- heading_rows + grp + dv * n_cats,
- 0, result, 0);
- }
- }
- }
+ error:
- tab_submit (t);
+ pool_destroy (means.pool);
+ return CMD_FAILURE;
}