1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2019 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/case.h"
20 #include "data/casegrouper.h"
21 #include "data/casereader.h"
22 #include "data/dataset.h"
23 #include "data/dictionary.h"
24 #include "data/format.h"
25 #include "data/variable.h"
27 #include "libpspp/hmap.h"
28 #include "libpspp/bt.h"
29 #include "libpspp/hash-functions.h"
31 #include "count-one-bits.h"
32 #include "count-leading-zeros.h"
34 #include "output/pivot-table.h"
40 #define _(msgid) gettext (msgid)
41 #define N_(msgid) (msgid)
44 /* A "cell" in this procedure represents a distinct value of the
45 procedure's categorical variables, and a set of summary statistics
46 of all cases which whose categorical variables have that set of
47 values. For example, the dataset
56 has three cells in layer 0 and two cells in layer 1 in addition
57 to a "grand summary" cell to which all (non-missing) cases
60 The cells form a n-ary tree structure with the "grand summary"
65 struct hmap_node hmap_node; /* Element in hash table. */
66 struct bt_node bt_node; /* Element in binary tree */
69 struct cell_container *children;
71 /* The statistics to be calculated for the cell. */
72 struct statistic **stat;
74 /* The parent of this cell, or NULL if this is the root cell. */
75 const struct cell *parent_cell;
77 /* A bit-field variable which indicates which control variables
78 are allocated a fixed value (for this cell), and which are
81 A one indicates a fixed value. A zero indicates a wildcard.
82 Wildcard values are used to calculate totals and sub-totals.
84 unsigned int not_wild;
89 /* The variables corresponding to the above values. */
90 const struct variable **vars;
93 /* A structure used to find the union of all values used
94 within a layer, and to sort those values. */
97 struct hmap_node hmap_node; /* Element in hash table. */
98 struct bt_node bt_node; /* Element in binary tree */
100 /* A unique, consecutive, zero based index identifying this
104 /* The top level value of this instance. */
106 const struct variable *var;
111 destroy_workspace (const struct mtable *mt, struct workspace *ws)
113 for (int l = 0; l < mt->n_layers; ++l)
115 struct cell_container *instances = ws->instances + l;
116 struct instance *inst;
117 struct instance *next;
118 HMAP_FOR_EACH_SAFE (inst, next, struct instance, hmap_node,
123 hmap_destroy (&instances->map);
125 free (ws->control_idx);
126 free (ws->instances);
131 destroy_cell (const struct means *means,
132 const struct mtable *mt, struct cell *cell)
135 for (int i = 0; i < mt->n_layers; ++i)
137 if (0 == ((cell->not_wild >> i) & 0x1))
140 const struct layer *layer = mt->layers[i];
141 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
143 struct workspace *ws = mt->ws + cmb;
144 const struct variable *var
145 = layer->factor_vars[ws->control_idx[i]];
147 int width = var_get_width (var);
148 value_destroy (&cell->values[idx++], width);
151 for (int i = 0; i < cell->n_children; ++i)
153 struct cell_container *container = cell->children + i;
154 hmap_destroy (&container->map);
157 for (int v = 0; v < mt->n_dep_vars; ++v)
159 for (int s = 0; s < means->n_statistics; ++s)
161 stat_destroy *des = cell_spec[means->statistics[s]].sf;
162 des (cell->stat[s + v * means->n_statistics]);
167 free (cell->children);
174 /* Walk the tree in postorder starting from CELL and destroy all the
177 means_destroy_cells (const struct means *means, struct cell *cell,
178 const struct mtable *table)
180 for (int i = 0; i < cell->n_children; ++i)
182 struct cell_container *container = cell->children + i;
183 struct cell *sub_cell;
185 HMAP_FOR_EACH_SAFE (sub_cell, next, struct cell, hmap_node,
188 means_destroy_cells (means, sub_cell, table);
192 destroy_cell (means, table, cell);
196 dump_cell (const struct cell *cell, const struct mtable *mt, int level)
198 for (int l = 0; l < level; ++l)
200 printf ("%p: ", cell);
201 for (int i = 0; i < mt->n_layers; ++i)
203 putchar (((cell->not_wild >> i) & 0x1) ? 'w' : '.');
207 for (int i = 0; i < mt->n_layers; ++i)
209 if ((cell->not_wild >> i) & 0x1)
211 printf ("%s: ", var_get_name (cell->vars[x]));
212 printf ("%g ", cell->values[x++].f);
217 stat_get *sg = cell_spec[MEANS_N].sd;
218 printf ("--- S1: %g", sg (cell->stat[0]));
220 printf ("--- N Children: %d", cell->n_children);
221 // printf ("--- Level: %d", level);
222 printf ("--- Parent: %p", cell->parent_cell);
227 dump_indeces (const size_t *indexes, int n)
229 for (int i = 0 ; i < n; ++i)
231 printf ("%ld; ", indexes[i]);
236 /* Dump the tree in pre-order. */
238 dump_tree (const struct cell *cell, const struct mtable *table,
239 int level, const struct cell *parent)
241 assert (cell->parent_cell == parent);
242 dump_cell (cell, table, level);
244 for (int i = 0; i < cell->n_children; ++i)
246 struct cell_container *container = cell->children + i;
247 struct cell *sub_cell;
248 BT_FOR_EACH (sub_cell, struct cell, bt_node, &container->bt)
250 dump_tree (sub_cell, table, level + 1, cell);
255 /* Generate a hash based on the values of the N variables in
256 the array VARS which are taken from the case C. */
258 generate_hash (const struct mtable *mt,
259 const struct ccase *c,
260 unsigned int not_wild,
261 const struct workspace *ws)
263 unsigned int hash = 0;
264 for (int i = 0; i < mt->n_layers; ++i)
266 if (0 == ((not_wild >> i) & 0x1))
269 const struct layer *layer = mt->layers[i];
270 const struct variable *var = layer->factor_vars[ws->control_idx[i]];
271 const union value *vv = case_data (c, var);
272 int width = var_get_width (var);
273 hash = hash_int (i, hash);
274 hash = value_hash (vv, width, hash);
280 /* Create a cell based on the N variables in the array VARS,
281 which are indeces into the case C.
282 The caller is responsible for destroying this cell when
285 generate_cell (const struct means *means,
286 const struct mtable *mt,
287 const struct ccase *c,
288 unsigned int not_wild,
289 const struct cell *pcell,
290 const struct workspace *ws)
292 int n_vars = count_one_bits (not_wild);
293 struct cell *cell = xzalloc ((sizeof *cell));
294 cell->values = xcalloc (n_vars, sizeof *cell->values);
295 cell->vars = xcalloc (n_vars, sizeof *cell->vars);
296 cell->not_wild = not_wild;
298 cell->parent_cell = pcell;
299 cell->n_children = mt->n_layers -
300 (sizeof (cell->not_wild) * CHAR_BIT) +
301 count_leading_zeros (cell->not_wild);
304 for (int i = 0; i < mt->n_layers; ++i)
306 if (0 == ((not_wild >> i) & 0x1))
309 const struct layer *layer = mt->layers[i];
310 const struct variable *var = layer->factor_vars[ws->control_idx[i]];
311 const union value *vv = case_data (c, var);
312 int width = var_get_width (var);
313 cell->vars[idx] = var;
314 value_clone (&cell->values[idx++], vv, width);
316 assert (idx == n_vars);
318 cell->children = xcalloc (cell->n_children, sizeof *cell->children);
319 for (int i = 0; i < cell->n_children; ++i)
321 struct cell_container *container = cell->children + i;
322 hmap_init (&container->map);
325 cell->stat = xcalloc (means->n_statistics * mt->n_dep_vars, sizeof *cell->stat);
326 for (int v = 0; v < mt->n_dep_vars; ++v)
328 for (int stat = 0; stat < means->n_statistics; ++stat)
330 stat_create *sc = cell_spec[means->statistics[stat]].sc;
332 cell->stat[stat + v * means->n_statistics] = sc (means->pool);
339 /* If a cell based on the N variables in the array VARS,
340 which are indeces into the case C and whose hash is HASH,
341 exists in HMAP, then return that cell.
342 Otherwise, return NULL. */
344 lookup_cell (const struct mtable *mt,
345 struct hmap *hmap, unsigned int hash,
346 const struct ccase *c,
347 unsigned int not_wild,
348 const struct workspace *ws)
350 struct cell *cell = NULL;
351 HMAP_FOR_EACH_WITH_HASH (cell, struct cell, hmap_node, hash, hmap)
355 if (cell->not_wild != not_wild)
357 for (int i = 0; i < mt->n_layers; ++i)
359 if (0 == ((cell->not_wild >> i) & 0x1))
362 const struct layer *layer = mt->layers[i];
363 const struct variable *var = layer->factor_vars[ws->control_idx[i]];
364 const union value *vv = case_data (c, var);
365 int width = var_get_width (var);
366 assert (var == cell->vars[idx]);
367 if (!value_equal (vv, &cell->values[idx++], width))
380 /* A comparison function used to sort cells in a binary tree.
381 Only the innermost value needs to be compared, because no
382 two cells with similar outer values will appear in the same
385 cell_compare_3way (const struct bt_node *a,
386 const struct bt_node *b,
387 const void *aux UNUSED)
389 const struct cell *fa = BT_DATA (a, struct cell, bt_node);
390 const struct cell *fb = BT_DATA (b, struct cell, bt_node);
392 assert (fa->not_wild == fb->not_wild);
393 int vidx = count_one_bits (fa->not_wild) - 1;
394 assert (fa->vars[vidx] == fb->vars[vidx]);
396 return value_compare_3way (&fa->values[vidx],
398 var_get_width (fa->vars[vidx]));
401 /* A comparison function used to sort cells in a binary tree. */
403 compare_instance_3way (const struct bt_node *a,
404 const struct bt_node *b,
405 const void *aux UNUSED)
407 const struct instance *fa = BT_DATA (a, struct instance, bt_node);
408 const struct instance *fb = BT_DATA (b, struct instance, bt_node);
410 assert (fa->var == fb->var);
412 return value_compare_3way (&fa->value,
414 var_get_width (fa->var));
418 static void arrange_cells (struct workspace *ws,
419 struct cell *cell, const struct mtable *table);
422 /* Iterate CONTAINER's map inserting a copy of its elements into
423 CONTAINER's binary tree. Also, for each layer in TABLE, create
424 an instance container, containing the union of all elements in
427 arrange_cell (struct workspace *ws, struct cell_container *container,
428 const struct mtable *mt)
430 struct bt *bt = &container->bt;
431 struct hmap *map = &container->map;
432 bt_init (bt, cell_compare_3way, NULL);
435 HMAP_FOR_EACH (cell, struct cell, hmap_node, map)
437 bt_insert (bt, &cell->bt_node);
440 for (int i = 0; i < mt->n_layers; ++i)
442 if (0 == ((cell->not_wild >> i) & 0x1))
445 struct cell_container *instances = ws->instances + i;
446 const struct variable *var = cell->vars[idx];
447 int width = var_get_width (var);
449 = value_hash (&cell->values[idx], width, 0);
451 struct instance *inst = NULL;
452 struct instance *next = NULL;
453 HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next, struct instance,
455 hash, &instances->map)
457 assert (cell->vars[idx] == var);
458 if (value_equal (&inst->value,
468 inst = xzalloc (sizeof *inst);
471 value_clone (&inst->value, &cell->values[idx],
473 hmap_insert (&instances->map, &inst->hmap_node, hash);
479 arrange_cells (ws, cell, mt);
483 /* Arrange the children and then all the subtotals. */
485 arrange_cells (struct workspace *ws, struct cell *cell,
486 const struct mtable *table)
488 for (int i = 0; i < cell->n_children; ++i)
490 struct cell_container *container = cell->children + i;
491 arrange_cell (ws, container, table);
498 /* If the top level value in CELL, has an instance in the L_IDX'th layer,
499 then return that instance. Otherwise return NULL. */
500 static const struct instance *
501 lookup_instance (const struct mtable *mt, const struct workspace *ws,
502 int l_idx, const struct cell *cell)
504 const struct layer *layer = mt->layers[l_idx];
505 int n_vals = count_one_bits (cell->not_wild);
506 const struct variable *var = layer->factor_vars[ws->control_idx[l_idx]];
507 const union value *val = cell->values + n_vals - 1;
508 int width = var_get_width (var);
509 unsigned int hash = value_hash (val, width, 0);
510 const struct cell_container *instances = ws->instances + l_idx;
511 struct instance *inst = NULL;
512 struct instance *next;
513 HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next,
514 struct instance, hmap_node,
515 hash, &instances->map)
517 if (value_equal (val, &inst->value, width))
523 /* Enter the values into PT. */
525 populate_table (const struct means *means, const struct mtable *mt,
526 const struct workspace *ws,
527 const struct cell *cell,
528 struct pivot_table *pt)
530 size_t *indexes = xcalloc (pt->n_dimensions, sizeof *indexes);
531 for (int v = 0; v < mt->n_dep_vars; ++v)
533 for (int s = 0; s < means->n_statistics; ++s)
536 if (mt->n_dep_vars > 1)
539 int stat = means->statistics[s];
540 stat_get *sg = cell_spec[stat].sd;
542 const struct cell *pc = cell;
543 for (; i < pt->n_dimensions; ++i)
545 int l_idx = pt->n_dimensions - i - 1;
546 const struct cell_container *instances = ws->instances + l_idx;
547 if (0 == (cell->not_wild >> l_idx & 0x1U))
549 indexes [i] = hmap_count (&instances->map);
554 const struct instance *inst
555 = lookup_instance (mt, ws, l_idx, pc);
557 indexes [i] = inst->index;
558 pc = pc->parent_cell;
563 int idx = s + v * means->n_statistics;
564 pivot_table_put (pt, indexes, pt->n_dimensions,
565 pivot_value_new_number (sg (cell->stat[idx])));
570 for (int i = 0; i < cell->n_children; ++i)
572 struct cell_container *container = cell->children + i;
573 struct cell *child = NULL;
574 BT_FOR_EACH (child, struct cell, bt_node, &container->bt)
576 populate_table (means, mt, ws, child, pt);
582 create_table_structure (const struct mtable *mt, struct pivot_table *pt,
583 const struct workspace *ws)
585 int * lindexes = ws->control_idx;
586 /* The inner layers are situated rightmost in the table.
587 So this iteration is in reverse order. */
588 for (int l = mt->n_layers -1; l >=0 ; --l)
590 const struct layer *layer = mt->layers[l];
591 const struct cell_container *instances = ws->instances + l;
592 const struct variable *var = layer->factor_vars[lindexes[l]];
593 struct pivot_dimension *dim_layer
594 = pivot_dimension_create (pt, PIVOT_AXIS_ROW,
595 var_to_string (var));
596 dim_layer->root->show_label = true;
598 /* Place the values of the control variables as table headings. */
600 struct instance *inst = NULL;
601 BT_FOR_EACH (inst, struct instance, bt_node, &instances->bt)
603 struct substring space = SS_LITERAL_INITIALIZER ("\t ");
605 ds_init_empty (&str);
606 var_append_value_name (var,
610 ds_ltrim (&str, space);
612 pivot_category_create_leaf (dim_layer->root,
613 pivot_value_new_text (ds_cstr (&str)));
619 pivot_category_create_leaf (dim_layer->root,
620 pivot_value_new_text ("Total"));
624 /* Initialise C_DES with a string describing the control variable
625 relating to MT, LINDEXES. */
627 layers_to_string (const struct mtable *mt, const int *lindexes,
628 struct string *c_des)
630 for (int l = 0; l < mt->n_layers; ++l)
632 const struct layer *layer = mt->layers[l];
633 const struct variable *ctrl_var = layer->factor_vars[lindexes[l]];
635 ds_put_cstr (c_des, " * ");
636 ds_put_cstr (c_des, var_get_name (ctrl_var));
641 populate_case_processing_summary (struct pivot_category *pc,
642 const struct mtable *mt,
648 for (l = 0; l < mt->n_layers; ++l)
650 const struct layer *layer = mt->layers[l];
651 const struct variable *ctrl_var = layer->factor_vars[lindexes[l]];
653 ds_put_cstr (&ds, " * ");
654 ds_put_cstr (&ds, var_get_name (ctrl_var));
656 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
659 ds_init_empty (&dss);
660 ds_put_cstr (&dss, var_get_name (mt->dep_vars[dv]));
661 if (mt->n_layers > 0)
663 ds_put_cstr (&dss, " * ");
664 ds_put_substring (&dss, ds.ss);
666 pivot_category_create_leaf (pc,
667 pivot_value_new_text (ds_cstr (&dss)));
674 /* Create the "Case Processing Summary" table. */
676 means_case_processing_summary (const struct mtable *mt)
678 struct pivot_table *pt = pivot_table_create (N_("Case Processing Summary"));
680 struct pivot_dimension *dim_cases =
681 pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Cases"));
682 dim_cases->root->show_label = true;
684 struct pivot_category *cats[3];
685 cats[0] = pivot_category_create_group (dim_cases->root,
686 N_("Included"), NULL);
687 cats[1] = pivot_category_create_group (dim_cases->root,
688 N_("Excluded"), NULL);
689 cats[2] = pivot_category_create_group (dim_cases->root,
691 for (int i = 0; i < 3; ++i)
693 pivot_category_create_leaf_rc (cats[i],
694 pivot_value_new_text (N_("N")),
696 pivot_category_create_leaf_rc (cats[i],
697 pivot_value_new_text (N_("Percent")),
701 struct pivot_dimension *rows =
702 pivot_dimension_create (pt, PIVOT_AXIS_ROW, N_("Variables"));
704 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
706 const struct workspace *ws = mt->ws + cmb;
707 populate_case_processing_summary (rows->root, mt, ws->control_idx);
708 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
710 int idx = cmb * mt->n_dep_vars + dv;
711 const struct summary *summ = mt->summ + idx;
712 double n_included = summ->n_total - summ->n_missing;
713 pivot_table_put2 (pt, 5, idx,
714 pivot_value_new_number (100.0 * summ->n_total / summ->n_total));
715 pivot_table_put2 (pt, 4, idx,
716 pivot_value_new_number (summ->n_total));
718 pivot_table_put2 (pt, 3, idx,
719 pivot_value_new_number (100.0 * summ->n_missing / summ->n_total));
720 pivot_table_put2 (pt, 2, idx,
721 pivot_value_new_number (summ->n_missing));
723 pivot_table_put2 (pt, 1, idx,
724 pivot_value_new_number (100.0 * n_included / summ->n_total));
725 pivot_table_put2 (pt, 0, idx,
726 pivot_value_new_number (n_included));
730 pivot_table_submit (pt);
734 means_shipout_single (const struct mtable *mt, const struct means *means,
735 const struct workspace *ws)
737 struct pivot_table *pt = pivot_table_create (N_("Report"));
738 pt->omit_empty = true;
740 struct pivot_dimension *dim_cells =
741 pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Statistics"));
743 /* Set the statistics headings, eg "Mean", "Std. Dev" etc. */
744 for (int i = 0; i < means->n_statistics; ++i)
746 const struct cell_spec *cs = cell_spec + means->statistics[i];
747 pivot_category_create_leaf_rc
749 pivot_value_new_text (gettext (cs->title)), cs->rc);
752 create_table_structure (mt, pt, ws);
753 populate_table (means, mt, ws, ws->root_cell, pt);
754 pivot_table_submit (pt);
759 means_shipout_multivar (const struct mtable *mt, const struct means *means,
760 const struct workspace *ws)
763 ds_init_empty (&dss);
764 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
766 ds_put_cstr (&dss, var_get_name (mt->dep_vars[dv]));
767 if (mt->n_layers > 0)
768 ds_put_cstr (&dss, " * ");
771 for (int l = 0; l < mt->n_layers; ++l)
773 const struct layer *layer = mt->layers[l];
774 const struct variable *var = layer->factor_vars[ws->control_idx[l]];
775 ds_put_cstr (&dss, var_get_name (var));
776 if (l < mt->n_layers - 1)
777 ds_put_cstr (&dss, " * ");
780 struct pivot_table *pt = pivot_table_create (ds_cstr (&dss));
781 pt->omit_empty = true;
784 struct pivot_dimension *dim_cells =
785 pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Variables"));
787 for (int i = 0; i < mt->n_dep_vars; ++i)
789 pivot_category_create_leaf
791 pivot_value_new_variable (mt->dep_vars[i]));
794 struct pivot_dimension *dim_stats
795 = pivot_dimension_create (pt, PIVOT_AXIS_ROW,
797 dim_stats->root->show_label = false;
799 for (int i = 0; i < means->n_statistics; ++i)
801 const struct cell_spec *cs = cell_spec + means->statistics[i];
802 pivot_category_create_leaf_rc
804 pivot_value_new_text (gettext (cs->title)), cs->rc);
807 create_table_structure (mt, pt, ws);
808 populate_table (means, mt, ws, ws->root_cell, pt);
809 pivot_table_submit (pt);
813 means_shipout (const struct mtable *mt, const struct means *means)
815 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
817 const struct workspace *ws = mt->ws + cmb;
818 if (ws->root_cell == NULL)
821 ds_init_empty (&des);
822 layers_to_string (mt, ws->control_idx, &des);
823 msg (MW, _("The table \"%s\" has no non-empty control variables."
824 " No result for this table will be displayed."),
829 if (mt->n_dep_vars > 1)
830 means_shipout_multivar (mt, means, ws);
832 means_shipout_single (mt, means, ws);
840 control_var_missing (const struct means *means,
841 const struct mtable *mt,
842 unsigned int not_wild UNUSED,
843 const struct ccase *c,
844 const struct workspace *ws)
847 for (int l = 0; l < mt->n_layers; ++l)
849 /* if (0 == ((not_wild >> l) & 0x1)) */
854 const struct layer *layer = mt->layers[l];
855 const struct variable *var = layer->factor_vars[ws->control_idx[l]];
856 const union value *vv = case_data (c, var);
858 miss = var_is_value_missing (var, vv, means->ctrl_exclude);
866 /* Lookup the set of control variables described by MT, C and NOT_WILD,
867 in the hash table MAP. If there is no such entry, then create a
868 cell with these paremeters and add is to MAP.
869 If the generated cell has childen, repeat for all the children.
870 Returns the root cell.
873 service_cell_map (const struct means *means, const struct mtable *mt,
874 const struct ccase *c,
875 unsigned int not_wild,
877 const struct cell *pcell,
879 const struct workspace *ws)
881 struct cell *cell = NULL;
884 if (!control_var_missing (means, mt, not_wild, c, ws))
886 /* Lookup this set of values in the cell's hash table. */
887 unsigned int hash = generate_hash (mt, c, not_wild, ws);
888 cell = lookup_cell (mt, map, hash, c, not_wild, ws);
890 /* If it has not been seen before, then create a new
891 subcell, with this set of values, and insert it
895 cell = generate_cell (means, mt, c, not_wild, pcell, ws);
896 hmap_insert (map, &cell->hmap_node, hash);
902 /* This condition should only happen in the root node case. */
903 cell = ws->root_cell;
905 !control_var_missing (means, mt, not_wild, c, ws))
906 cell = generate_cell (means, mt, c, not_wild, pcell, ws);
911 /* Here is where the business really happens! After
912 testing for missing values, the cell's statistics
914 if (!control_var_missing (means, mt, not_wild, c, ws))
916 for (int v = 0; v < mt->n_dep_vars; ++v)
918 const struct variable *dep_var = mt->dep_vars[v];
919 const union value *vv = case_data (c, dep_var);
920 if (var_is_value_missing (dep_var, vv, means->dep_exclude))
923 for (int stat = 0; stat < means->n_statistics; ++stat)
925 const double weight = dict_get_case_weight (means->dict, c,
927 stat_update *su = cell_spec[means->statistics[stat]].su;
928 su (cell->stat[stat + v * means->n_statistics], weight,
929 case_data (c, dep_var)->f);
934 /* Recurse into all the children (if there are any). */
935 for (int i = 0; i < cell->n_children; ++i)
937 struct cell_container *cc = cell->children + i;
938 service_cell_map (means, mt, c,
939 not_wild | (0x1U << (i + level)),
940 &cc->map, cell, level + i + 1, ws);
947 /* Do all the necessary preparation and pre-calculation that
948 needs to be done before iterating the data. */
950 prepare_means (struct means *cmd)
952 for (int t = 0; t < cmd->n_tables; ++t)
954 struct mtable *mt = cmd->table + t;
956 mt->n_combinations = 1;
957 for (int l = 0; l < mt->n_layers; ++l)
958 mt->n_combinations *= mt->layers[l]->n_factor_vars;
960 mt->ws = xzalloc (mt->n_combinations * sizeof (*mt->ws));
961 mt->summ = xzalloc (mt->n_combinations * mt->n_dep_vars
962 * sizeof (*mt->summ));
963 for (int i = 0; i < mt->n_combinations; ++i)
965 struct workspace *ws = mt->ws + i;
966 ws->root_cell = NULL;
967 ws->control_idx = xzalloc (mt->n_layers
968 * sizeof *ws->control_idx);
969 ws->instances = xzalloc (mt->n_layers
970 * sizeof *ws->instances);
972 for (int l = mt->n_layers - 1; l >= 0; --l)
974 struct cell_container *instances = ws->instances + l;
975 const struct layer *layer = mt->layers[l];
976 ws->control_idx[l] = cmb % layer->n_factor_vars;
977 cmb /= layer->n_factor_vars;
978 hmap_init (&instances->map);
985 /* Do all the necessary calculations that occur AFTER iterating
988 post_means (struct means *cmd)
990 for (int t = 0; t < cmd->n_tables; ++t)
992 struct mtable *mt = cmd->table + t;
993 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
995 struct workspace *ws = mt->ws + cmb;
996 if (ws->root_cell == NULL)
998 arrange_cells (ws, ws->root_cell, mt);
999 /* The root cell should have no parent. */
1000 assert (ws->root_cell->parent_cell == 0);
1002 for (int l = 0; l < mt->n_layers; ++l)
1004 struct cell_container *instances = ws->instances + l;
1005 bt_init (&instances->bt, compare_instance_3way, NULL);
1007 /* Iterate the instance hash table, and insert each instance
1008 into the binary tree BT. */
1009 struct instance *inst;
1010 HMAP_FOR_EACH (inst, struct instance, hmap_node,
1013 bt_insert (&instances->bt, &inst->bt_node);
1016 /* Iterate the binary tree (in order) and assign the index
1017 member accordingly. */
1019 BT_FOR_EACH (inst, struct instance, bt_node, &instances->bt)
1021 inst->index = index++;
1029 /* Update the summary information (the missings and the totals). */
1031 update_summaries (const struct means *means, struct mtable *mt,
1032 const struct ccase *c, double weight)
1034 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
1036 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
1038 struct workspace *ws = mt->ws + cmb;
1039 struct summary *summ = mt->summ
1040 + cmb * mt->n_dep_vars + dv;
1042 summ->n_total += weight;
1043 const struct variable *var = mt->dep_vars[dv];
1044 const union value *vv = case_data (c, var);
1045 /* First check if the dependent variable is missing. */
1046 if (var_is_value_missing (var, vv, means->dep_exclude))
1047 summ->n_missing += weight;
1048 /* If the dep var is not missing, then check each
1049 control variable. */
1051 for (int l = 0; l < mt->n_layers; ++l)
1053 const struct layer *layer = mt->layers [l];
1054 const struct variable *var
1055 = layer->factor_vars[ws->control_idx[l]];
1056 const union value *vv = case_data (c, var);
1057 if (var_is_value_missing (var, vv, means->ctrl_exclude))
1059 summ->n_missing += weight;
1069 run_means (struct means *cmd, struct casereader *input,
1070 const struct dataset *ds UNUSED)
1072 struct ccase *c = NULL;
1073 struct casereader *reader;
1075 prepare_means (cmd);
1077 for (reader = input;
1078 (c = casereader_read (reader)) != NULL; case_unref (c))
1081 = dict_get_case_weight (cmd->dict, c, NULL);
1082 for (int t = 0; t < cmd->n_tables; ++t)
1084 struct mtable *mt = cmd->table + t;
1085 update_summaries (cmd, mt, c, weight);
1087 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
1089 struct workspace *ws = mt->ws + cmb;
1091 ws->root_cell = service_cell_map (cmd, mt, c,
1092 0U, NULL, NULL, 0, ws);
1096 casereader_destroy (reader);
1102 /* Release all resources allocated by this routine.
1103 This does not include those allocated by the parser,
1104 which exclusively use MEANS->pool. */
1106 destroy_means (struct means *means)
1108 for (int t = 0; t < means->n_tables; ++t)
1110 const struct mtable *table = means->table + t;
1111 for (int i = 0; i < table->n_combinations; ++i)
1113 struct workspace *ws = table->ws + i;
1114 if (ws->root_cell == NULL)
1116 means_destroy_cells (means, ws->root_cell, table);
1118 for (int i = 0; i < table->n_combinations; ++i)
1120 struct workspace *ws = table->ws + i;
1121 destroy_workspace (table, ws);