1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2019 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/case.h"
20 #include "data/casegrouper.h"
21 #include "data/casereader.h"
22 #include "data/dataset.h"
23 #include "data/dictionary.h"
24 #include "data/format.h"
25 #include "data/variable.h"
27 #include "libpspp/hmap.h"
28 #include "libpspp/bt.h"
29 #include "libpspp/hash-functions.h"
31 #include "count-one-bits.h"
32 #include "count-leading-zeros.h"
34 #include "output/pivot-table.h"
40 #define _(msgid) gettext (msgid)
41 #define N_(msgid) (msgid)
44 /* A "cell" in this procedure represents a distinct value of the
45 procedure's categorical variables, and a set of summary statistics
46 of all cases which whose categorical variables have that set of
47 values. For example, the dataset
56 has three cells in layer 0 and two cells in layer 1 in addition
57 to a "grand summary" cell to which all (non-missing) cases
60 The cells form a n-ary tree structure with the "grand summary"
65 struct hmap_node hmap_node; /* Element in hash table. */
66 struct bt_node bt_node; /* Element in binary tree */
69 struct cell_container *children;
71 /* The statistics to be calculated for the cell. */
72 struct statistic **stat;
74 /* The parent of this cell, or NULL if this is the root cell. */
75 const struct cell *parent_cell;
77 /* A bit-field variable which indicates which control variables
78 are allocated a fixed value (for this cell), and which are
81 A one indicates a fixed value. A zero indicates a wildcard.
82 Wildcard values are used to calculate totals and sub-totals.
84 unsigned int not_wild;
89 /* The variables corresponding to the above values. */
90 const struct variable **vars;
93 /* A structure used to find the union of all values used
94 within a layer, and to sort those values. */
97 struct hmap_node hmap_node; /* Element in hash table. */
98 struct bt_node bt_node; /* Element in binary tree */
100 /* A unique, consecutive, zero based index identifying this
104 /* The top level value of this instance. */
106 const struct variable *var;
111 destroy_workspace (const struct mtable *mt, struct workspace *ws)
113 for (int l = 0; l < mt->n_layers; ++l)
115 struct cell_container *instances = ws->instances + l;
116 struct instance *inst;
117 struct instance *next;
118 HMAP_FOR_EACH_SAFE (inst, next, struct instance, hmap_node,
121 int width = var_get_width (inst->var);
122 value_destroy (&inst->value, width);
125 hmap_destroy (&instances->map);
127 free (ws->control_idx);
128 free (ws->instances);
133 destroy_cell (const struct means *means,
134 const struct mtable *mt, struct cell *cell)
137 for (int i = 0; i < mt->n_layers; ++i)
139 if (0 == ((cell->not_wild >> i) & 0x1))
142 const struct layer *layer = mt->layers[i];
143 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
145 struct workspace *ws = mt->ws + cmb;
146 const struct variable *var
147 = layer->factor_vars[ws->control_idx[i]];
149 int width = var_get_width (var);
150 value_destroy (&cell->values[idx++], width);
153 for (int i = 0; i < cell->n_children; ++i)
155 struct cell_container *container = cell->children + i;
156 hmap_destroy (&container->map);
159 for (int v = 0; v < mt->n_dep_vars; ++v)
161 for (int s = 0; s < means->n_statistics; ++s)
163 stat_destroy *des = cell_spec[means->statistics[s]].sf;
164 des (cell->stat[s + v * means->n_statistics]);
169 free (cell->children);
176 /* Walk the tree in postorder starting from CELL and destroy all the
179 means_destroy_cells (const struct means *means, struct cell *cell,
180 const struct mtable *table)
182 for (int i = 0; i < cell->n_children; ++i)
184 struct cell_container *container = cell->children + i;
185 struct cell *sub_cell;
187 HMAP_FOR_EACH_SAFE (sub_cell, next, struct cell, hmap_node,
190 means_destroy_cells (means, sub_cell, table);
194 destroy_cell (means, table, cell);
198 dump_cell (const struct cell *cell, const struct mtable *mt, int level)
200 for (int l = 0; l < level; ++l)
202 printf ("%p: ", cell);
203 for (int i = 0; i < mt->n_layers; ++i)
205 putchar (((cell->not_wild >> i) & 0x1) ? 'w' : '.');
209 for (int i = 0; i < mt->n_layers; ++i)
211 if ((cell->not_wild >> i) & 0x1)
213 printf ("%s: ", var_get_name (cell->vars[x]));
214 printf ("%g ", cell->values[x++].f);
219 stat_get *sg = cell_spec[MEANS_N].sd;
220 printf ("--- S1: %g", sg (cell->stat[0]));
222 printf ("--- N Children: %d", cell->n_children);
223 // printf ("--- Level: %d", level);
224 printf ("--- Parent: %p", cell->parent_cell);
229 dump_indeces (const size_t *indexes, int n)
231 for (int i = 0 ; i < n; ++i)
233 printf ("%ld; ", indexes[i]);
238 /* Dump the tree in pre-order. */
240 dump_tree (const struct cell *cell, const struct mtable *table,
241 int level, const struct cell *parent)
243 assert (cell->parent_cell == parent);
244 dump_cell (cell, table, level);
246 for (int i = 0; i < cell->n_children; ++i)
248 struct cell_container *container = cell->children + i;
249 struct cell *sub_cell;
250 BT_FOR_EACH (sub_cell, struct cell, bt_node, &container->bt)
252 dump_tree (sub_cell, table, level + 1, cell);
257 /* Generate a hash based on the values of the N variables in
258 the array VARS which are taken from the case C. */
260 generate_hash (const struct mtable *mt,
261 const struct ccase *c,
262 unsigned int not_wild,
263 const struct workspace *ws)
265 unsigned int hash = 0;
266 for (int i = 0; i < mt->n_layers; ++i)
268 if (0 == ((not_wild >> i) & 0x1))
271 const struct layer *layer = mt->layers[i];
272 const struct variable *var = layer->factor_vars[ws->control_idx[i]];
273 const union value *vv = case_data (c, var);
274 int width = var_get_width (var);
275 hash = hash_int (i, hash);
276 hash = value_hash (vv, width, hash);
282 /* Create a cell based on the N variables in the array VARS,
283 which are indeces into the case C.
284 The caller is responsible for destroying this cell when
287 generate_cell (const struct means *means,
288 const struct mtable *mt,
289 const struct ccase *c,
290 unsigned int not_wild,
291 const struct cell *pcell,
292 const struct workspace *ws)
294 int n_vars = count_one_bits (not_wild);
295 struct cell *cell = xzalloc ((sizeof *cell));
296 cell->values = xcalloc (n_vars, sizeof *cell->values);
297 cell->vars = xcalloc (n_vars, sizeof *cell->vars);
298 cell->not_wild = not_wild;
300 cell->parent_cell = pcell;
301 cell->n_children = mt->n_layers -
302 (sizeof (cell->not_wild) * CHAR_BIT) +
303 count_leading_zeros (cell->not_wild);
306 for (int i = 0; i < mt->n_layers; ++i)
308 if (0 == ((not_wild >> i) & 0x1))
311 const struct layer *layer = mt->layers[i];
312 const struct variable *var = layer->factor_vars[ws->control_idx[i]];
313 const union value *vv = case_data (c, var);
314 int width = var_get_width (var);
315 cell->vars[idx] = var;
316 value_clone (&cell->values[idx++], vv, width);
318 assert (idx == n_vars);
320 cell->children = xcalloc (cell->n_children, sizeof *cell->children);
321 for (int i = 0; i < cell->n_children; ++i)
323 struct cell_container *container = cell->children + i;
324 hmap_init (&container->map);
327 cell->stat = xcalloc (means->n_statistics * mt->n_dep_vars, sizeof *cell->stat);
328 for (int v = 0; v < mt->n_dep_vars; ++v)
330 for (int stat = 0; stat < means->n_statistics; ++stat)
332 stat_create *sc = cell_spec[means->statistics[stat]].sc;
334 cell->stat[stat + v * means->n_statistics] = sc (means->pool);
341 /* If a cell based on the N variables in the array VARS,
342 which are indeces into the case C and whose hash is HASH,
343 exists in HMAP, then return that cell.
344 Otherwise, return NULL. */
346 lookup_cell (const struct mtable *mt,
347 struct hmap *hmap, unsigned int hash,
348 const struct ccase *c,
349 unsigned int not_wild,
350 const struct workspace *ws)
352 struct cell *cell = NULL;
353 HMAP_FOR_EACH_WITH_HASH (cell, struct cell, hmap_node, hash, hmap)
357 if (cell->not_wild != not_wild)
359 for (int i = 0; i < mt->n_layers; ++i)
361 if (0 == ((cell->not_wild >> i) & 0x1))
364 const struct layer *layer = mt->layers[i];
365 const struct variable *var = layer->factor_vars[ws->control_idx[i]];
366 const union value *vv = case_data (c, var);
367 int width = var_get_width (var);
368 assert (var == cell->vars[idx]);
369 if (!value_equal (vv, &cell->values[idx++], width))
382 /* A comparison function used to sort cells in a binary tree.
383 Only the innermost value needs to be compared, because no
384 two cells with similar outer values will appear in the same
387 cell_compare_3way (const struct bt_node *a,
388 const struct bt_node *b,
389 const void *aux UNUSED)
391 const struct cell *fa = BT_DATA (a, struct cell, bt_node);
392 const struct cell *fb = BT_DATA (b, struct cell, bt_node);
394 assert (fa->not_wild == fb->not_wild);
395 int vidx = count_one_bits (fa->not_wild) - 1;
396 assert (fa->vars[vidx] == fb->vars[vidx]);
398 return value_compare_3way (&fa->values[vidx],
400 var_get_width (fa->vars[vidx]));
403 /* A comparison function used to sort cells in a binary tree. */
405 compare_instance_3way (const struct bt_node *a,
406 const struct bt_node *b,
407 const void *aux UNUSED)
409 const struct instance *fa = BT_DATA (a, struct instance, bt_node);
410 const struct instance *fb = BT_DATA (b, struct instance, bt_node);
412 assert (fa->var == fb->var);
414 return value_compare_3way (&fa->value,
416 var_get_width (fa->var));
420 static void arrange_cells (struct workspace *ws,
421 struct cell *cell, const struct mtable *table);
424 /* Iterate CONTAINER's map inserting a copy of its elements into
425 CONTAINER's binary tree. Also, for each layer in TABLE, create
426 an instance container, containing the union of all elements in
429 arrange_cell (struct workspace *ws, struct cell_container *container,
430 const struct mtable *mt)
432 struct bt *bt = &container->bt;
433 struct hmap *map = &container->map;
434 bt_init (bt, cell_compare_3way, NULL);
437 HMAP_FOR_EACH (cell, struct cell, hmap_node, map)
439 bt_insert (bt, &cell->bt_node);
442 for (int i = 0; i < mt->n_layers; ++i)
444 if (0 == ((cell->not_wild >> i) & 0x1))
447 struct cell_container *instances = ws->instances + i;
448 const struct variable *var = cell->vars[idx];
449 int width = var_get_width (var);
451 = value_hash (&cell->values[idx], width, 0);
453 struct instance *inst = NULL;
454 struct instance *next = NULL;
455 HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next, struct instance,
457 hash, &instances->map)
459 assert (cell->vars[idx] == var);
460 if (value_equal (&inst->value,
470 inst = xzalloc (sizeof *inst);
473 value_clone (&inst->value, &cell->values[idx],
475 hmap_insert (&instances->map, &inst->hmap_node, hash);
481 arrange_cells (ws, cell, mt);
485 /* Arrange the children and then all the subtotals. */
487 arrange_cells (struct workspace *ws, struct cell *cell,
488 const struct mtable *table)
490 for (int i = 0; i < cell->n_children; ++i)
492 struct cell_container *container = cell->children + i;
493 arrange_cell (ws, container, table);
500 /* If the top level value in CELL, has an instance in the L_IDX'th layer,
501 then return that instance. Otherwise return NULL. */
502 static const struct instance *
503 lookup_instance (const struct mtable *mt, const struct workspace *ws,
504 int l_idx, const struct cell *cell)
506 const struct layer *layer = mt->layers[l_idx];
507 int n_vals = count_one_bits (cell->not_wild);
508 const struct variable *var = layer->factor_vars[ws->control_idx[l_idx]];
509 const union value *val = cell->values + n_vals - 1;
510 int width = var_get_width (var);
511 unsigned int hash = value_hash (val, width, 0);
512 const struct cell_container *instances = ws->instances + l_idx;
513 struct instance *inst = NULL;
514 struct instance *next;
515 HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next,
516 struct instance, hmap_node,
517 hash, &instances->map)
519 if (value_equal (val, &inst->value, width))
525 /* Enter the values into PT. */
527 populate_table (const struct means *means, const struct mtable *mt,
528 const struct workspace *ws,
529 const struct cell *cell,
530 struct pivot_table *pt)
532 size_t *indexes = xcalloc (pt->n_dimensions, sizeof *indexes);
533 for (int v = 0; v < mt->n_dep_vars; ++v)
535 for (int s = 0; s < means->n_statistics; ++s)
538 if (mt->n_dep_vars > 1)
541 int stat = means->statistics[s];
542 stat_get *sg = cell_spec[stat].sd;
544 const struct cell *pc = cell;
545 for (; i < pt->n_dimensions; ++i)
547 int l_idx = pt->n_dimensions - i - 1;
548 const struct cell_container *instances = ws->instances + l_idx;
549 if (0 == (cell->not_wild >> l_idx & 0x1U))
551 indexes [i] = hmap_count (&instances->map);
556 const struct instance *inst
557 = lookup_instance (mt, ws, l_idx, pc);
559 indexes [i] = inst->index;
560 pc = pc->parent_cell;
565 int idx = s + v * means->n_statistics;
566 pivot_table_put (pt, indexes, pt->n_dimensions,
567 pivot_value_new_number (sg (cell->stat[idx])));
572 for (int i = 0; i < cell->n_children; ++i)
574 struct cell_container *container = cell->children + i;
575 struct cell *child = NULL;
576 BT_FOR_EACH (child, struct cell, bt_node, &container->bt)
578 populate_table (means, mt, ws, child, pt);
584 create_table_structure (const struct mtable *mt, struct pivot_table *pt,
585 const struct workspace *ws)
587 int * lindexes = ws->control_idx;
588 /* The inner layers are situated rightmost in the table.
589 So this iteration is in reverse order. */
590 for (int l = mt->n_layers -1; l >=0 ; --l)
592 const struct layer *layer = mt->layers[l];
593 const struct cell_container *instances = ws->instances + l;
594 const struct variable *var = layer->factor_vars[lindexes[l]];
595 struct pivot_dimension *dim_layer
596 = pivot_dimension_create (pt, PIVOT_AXIS_ROW,
597 var_to_string (var));
598 dim_layer->root->show_label = true;
600 /* Place the values of the control variables as table headings. */
602 struct instance *inst = NULL;
603 BT_FOR_EACH (inst, struct instance, bt_node, &instances->bt)
605 struct substring space = SS_LITERAL_INITIALIZER ("\t ");
607 ds_init_empty (&str);
608 var_append_value_name (var,
612 ds_ltrim (&str, space);
614 pivot_category_create_leaf (dim_layer->root,
615 pivot_value_new_text (ds_cstr (&str)));
621 pivot_category_create_leaf (dim_layer->root,
622 pivot_value_new_text ("Total"));
626 /* Initialise C_DES with a string describing the control variable
627 relating to MT, LINDEXES. */
629 layers_to_string (const struct mtable *mt, const int *lindexes,
630 struct string *c_des)
632 for (int l = 0; l < mt->n_layers; ++l)
634 const struct layer *layer = mt->layers[l];
635 const struct variable *ctrl_var = layer->factor_vars[lindexes[l]];
637 ds_put_cstr (c_des, " * ");
638 ds_put_cstr (c_des, var_get_name (ctrl_var));
643 populate_case_processing_summary (struct pivot_category *pc,
644 const struct mtable *mt,
650 for (l = 0; l < mt->n_layers; ++l)
652 const struct layer *layer = mt->layers[l];
653 const struct variable *ctrl_var = layer->factor_vars[lindexes[l]];
655 ds_put_cstr (&ds, " * ");
656 ds_put_cstr (&ds, var_get_name (ctrl_var));
658 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
661 ds_init_empty (&dss);
662 ds_put_cstr (&dss, var_get_name (mt->dep_vars[dv]));
663 if (mt->n_layers > 0)
665 ds_put_cstr (&dss, " * ");
666 ds_put_substring (&dss, ds.ss);
668 pivot_category_create_leaf (pc,
669 pivot_value_new_text (ds_cstr (&dss)));
676 /* Create the "Case Processing Summary" table. */
678 means_case_processing_summary (const struct mtable *mt)
680 struct pivot_table *pt = pivot_table_create (N_("Case Processing Summary"));
682 struct pivot_dimension *dim_cases =
683 pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Cases"));
684 dim_cases->root->show_label = true;
686 struct pivot_category *cats[3];
687 cats[0] = pivot_category_create_group (dim_cases->root,
688 N_("Included"), NULL);
689 cats[1] = pivot_category_create_group (dim_cases->root,
690 N_("Excluded"), NULL);
691 cats[2] = pivot_category_create_group (dim_cases->root,
693 for (int i = 0; i < 3; ++i)
695 pivot_category_create_leaf_rc (cats[i],
696 pivot_value_new_text (N_("N")),
698 pivot_category_create_leaf_rc (cats[i],
699 pivot_value_new_text (N_("Percent")),
703 struct pivot_dimension *rows =
704 pivot_dimension_create (pt, PIVOT_AXIS_ROW, N_("Variables"));
706 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
708 const struct workspace *ws = mt->ws + cmb;
709 populate_case_processing_summary (rows->root, mt, ws->control_idx);
710 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
712 int idx = cmb * mt->n_dep_vars + dv;
713 const struct summary *summ = mt->summ + idx;
714 double n_included = summ->n_total - summ->n_missing;
715 pivot_table_put2 (pt, 5, idx,
716 pivot_value_new_number (100.0 * summ->n_total / summ->n_total));
717 pivot_table_put2 (pt, 4, idx,
718 pivot_value_new_number (summ->n_total));
720 pivot_table_put2 (pt, 3, idx,
721 pivot_value_new_number (100.0 * summ->n_missing / summ->n_total));
722 pivot_table_put2 (pt, 2, idx,
723 pivot_value_new_number (summ->n_missing));
725 pivot_table_put2 (pt, 1, idx,
726 pivot_value_new_number (100.0 * n_included / summ->n_total));
727 pivot_table_put2 (pt, 0, idx,
728 pivot_value_new_number (n_included));
732 pivot_table_submit (pt);
736 means_shipout_single (const struct mtable *mt, const struct means *means,
737 const struct workspace *ws)
739 struct pivot_table *pt = pivot_table_create (N_("Report"));
740 pt->omit_empty = true;
742 struct pivot_dimension *dim_cells =
743 pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Statistics"));
745 /* Set the statistics headings, eg "Mean", "Std. Dev" etc. */
746 for (int i = 0; i < means->n_statistics; ++i)
748 const struct cell_spec *cs = cell_spec + means->statistics[i];
749 pivot_category_create_leaf_rc
751 pivot_value_new_text (gettext (cs->title)), cs->rc);
754 create_table_structure (mt, pt, ws);
755 populate_table (means, mt, ws, ws->root_cell, pt);
756 pivot_table_submit (pt);
761 means_shipout_multivar (const struct mtable *mt, const struct means *means,
762 const struct workspace *ws)
765 ds_init_empty (&dss);
766 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
768 ds_put_cstr (&dss, var_get_name (mt->dep_vars[dv]));
769 if (mt->n_layers > 0)
770 ds_put_cstr (&dss, " * ");
773 for (int l = 0; l < mt->n_layers; ++l)
775 const struct layer *layer = mt->layers[l];
776 const struct variable *var = layer->factor_vars[ws->control_idx[l]];
777 ds_put_cstr (&dss, var_get_name (var));
778 if (l < mt->n_layers - 1)
779 ds_put_cstr (&dss, " * ");
782 struct pivot_table *pt = pivot_table_create (ds_cstr (&dss));
783 pt->omit_empty = true;
786 struct pivot_dimension *dim_cells =
787 pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Variables"));
789 for (int i = 0; i < mt->n_dep_vars; ++i)
791 pivot_category_create_leaf
793 pivot_value_new_variable (mt->dep_vars[i]));
796 struct pivot_dimension *dim_stats
797 = pivot_dimension_create (pt, PIVOT_AXIS_ROW,
799 dim_stats->root->show_label = false;
801 for (int i = 0; i < means->n_statistics; ++i)
803 const struct cell_spec *cs = cell_spec + means->statistics[i];
804 pivot_category_create_leaf_rc
806 pivot_value_new_text (gettext (cs->title)), cs->rc);
809 create_table_structure (mt, pt, ws);
810 populate_table (means, mt, ws, ws->root_cell, pt);
811 pivot_table_submit (pt);
815 means_shipout (const struct mtable *mt, const struct means *means)
817 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
819 const struct workspace *ws = mt->ws + cmb;
820 if (ws->root_cell == NULL)
823 ds_init_empty (&des);
824 layers_to_string (mt, ws->control_idx, &des);
825 msg (MW, _("The table \"%s\" has no non-empty control variables."
826 " No result for this table will be displayed."),
831 if (mt->n_dep_vars > 1)
832 means_shipout_multivar (mt, means, ws);
834 means_shipout_single (mt, means, ws);
842 control_var_missing (const struct means *means,
843 const struct mtable *mt,
844 unsigned int not_wild UNUSED,
845 const struct ccase *c,
846 const struct workspace *ws)
849 for (int l = 0; l < mt->n_layers; ++l)
851 /* if (0 == ((not_wild >> l) & 0x1)) */
856 const struct layer *layer = mt->layers[l];
857 const struct variable *var = layer->factor_vars[ws->control_idx[l]];
858 const union value *vv = case_data (c, var);
860 miss = var_is_value_missing (var, vv, means->ctrl_exclude);
868 /* Lookup the set of control variables described by MT, C and NOT_WILD,
869 in the hash table MAP. If there is no such entry, then create a
870 cell with these paremeters and add is to MAP.
871 If the generated cell has childen, repeat for all the children.
872 Returns the root cell.
875 service_cell_map (const struct means *means, const struct mtable *mt,
876 const struct ccase *c,
877 unsigned int not_wild,
879 const struct cell *pcell,
881 const struct workspace *ws)
883 struct cell *cell = NULL;
886 if (!control_var_missing (means, mt, not_wild, c, ws))
888 /* Lookup this set of values in the cell's hash table. */
889 unsigned int hash = generate_hash (mt, c, not_wild, ws);
890 cell = lookup_cell (mt, map, hash, c, not_wild, ws);
892 /* If it has not been seen before, then create a new
893 subcell, with this set of values, and insert it
897 cell = generate_cell (means, mt, c, not_wild, pcell, ws);
898 hmap_insert (map, &cell->hmap_node, hash);
904 /* This condition should only happen in the root node case. */
905 cell = ws->root_cell;
907 !control_var_missing (means, mt, not_wild, c, ws))
908 cell = generate_cell (means, mt, c, not_wild, pcell, ws);
913 /* Here is where the business really happens! After
914 testing for missing values, the cell's statistics
916 if (!control_var_missing (means, mt, not_wild, c, ws))
918 for (int v = 0; v < mt->n_dep_vars; ++v)
920 const struct variable *dep_var = mt->dep_vars[v];
921 const union value *vv = case_data (c, dep_var);
922 if (var_is_value_missing (dep_var, vv, means->dep_exclude))
925 for (int stat = 0; stat < means->n_statistics; ++stat)
927 const double weight = dict_get_case_weight (means->dict, c,
929 stat_update *su = cell_spec[means->statistics[stat]].su;
930 su (cell->stat[stat + v * means->n_statistics], weight,
931 case_data (c, dep_var)->f);
936 /* Recurse into all the children (if there are any). */
937 for (int i = 0; i < cell->n_children; ++i)
939 struct cell_container *cc = cell->children + i;
940 service_cell_map (means, mt, c,
941 not_wild | (0x1U << (i + level)),
942 &cc->map, cell, level + i + 1, ws);
949 /* Do all the necessary preparation and pre-calculation that
950 needs to be done before iterating the data. */
952 prepare_means (struct means *cmd)
954 for (int t = 0; t < cmd->n_tables; ++t)
956 struct mtable *mt = cmd->table + t;
958 mt->n_combinations = 1;
959 for (int l = 0; l < mt->n_layers; ++l)
960 mt->n_combinations *= mt->layers[l]->n_factor_vars;
962 mt->ws = xzalloc (mt->n_combinations * sizeof (*mt->ws));
963 mt->summ = xzalloc (mt->n_combinations * mt->n_dep_vars
964 * sizeof (*mt->summ));
965 for (int i = 0; i < mt->n_combinations; ++i)
967 struct workspace *ws = mt->ws + i;
968 ws->root_cell = NULL;
969 ws->control_idx = xzalloc (mt->n_layers
970 * sizeof *ws->control_idx);
971 ws->instances = xzalloc (mt->n_layers
972 * sizeof *ws->instances);
974 for (int l = mt->n_layers - 1; l >= 0; --l)
976 struct cell_container *instances = ws->instances + l;
977 const struct layer *layer = mt->layers[l];
978 ws->control_idx[l] = cmb % layer->n_factor_vars;
979 cmb /= layer->n_factor_vars;
980 hmap_init (&instances->map);
987 /* Do all the necessary calculations that occur AFTER iterating
990 post_means (struct means *cmd)
992 for (int t = 0; t < cmd->n_tables; ++t)
994 struct mtable *mt = cmd->table + t;
995 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
997 struct workspace *ws = mt->ws + cmb;
998 if (ws->root_cell == NULL)
1000 arrange_cells (ws, ws->root_cell, mt);
1001 /* The root cell should have no parent. */
1002 assert (ws->root_cell->parent_cell == 0);
1004 for (int l = 0; l < mt->n_layers; ++l)
1006 struct cell_container *instances = ws->instances + l;
1007 bt_init (&instances->bt, compare_instance_3way, NULL);
1009 /* Iterate the instance hash table, and insert each instance
1010 into the binary tree BT. */
1011 struct instance *inst;
1012 HMAP_FOR_EACH (inst, struct instance, hmap_node,
1015 bt_insert (&instances->bt, &inst->bt_node);
1018 /* Iterate the binary tree (in order) and assign the index
1019 member accordingly. */
1021 BT_FOR_EACH (inst, struct instance, bt_node, &instances->bt)
1023 inst->index = index++;
1031 /* Update the summary information (the missings and the totals). */
1033 update_summaries (const struct means *means, struct mtable *mt,
1034 const struct ccase *c, double weight)
1036 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
1038 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
1040 struct workspace *ws = mt->ws + cmb;
1041 struct summary *summ = mt->summ
1042 + cmb * mt->n_dep_vars + dv;
1044 summ->n_total += weight;
1045 const struct variable *var = mt->dep_vars[dv];
1046 const union value *vv = case_data (c, var);
1047 /* First check if the dependent variable is missing. */
1048 if (var_is_value_missing (var, vv, means->dep_exclude))
1049 summ->n_missing += weight;
1050 /* If the dep var is not missing, then check each
1051 control variable. */
1053 for (int l = 0; l < mt->n_layers; ++l)
1055 const struct layer *layer = mt->layers [l];
1056 const struct variable *var
1057 = layer->factor_vars[ws->control_idx[l]];
1058 const union value *vv = case_data (c, var);
1059 if (var_is_value_missing (var, vv, means->ctrl_exclude))
1061 summ->n_missing += weight;
1071 run_means (struct means *cmd, struct casereader *input,
1072 const struct dataset *ds UNUSED)
1074 struct ccase *c = NULL;
1075 struct casereader *reader;
1077 prepare_means (cmd);
1079 for (reader = input;
1080 (c = casereader_read (reader)) != NULL; case_unref (c))
1083 = dict_get_case_weight (cmd->dict, c, NULL);
1084 for (int t = 0; t < cmd->n_tables; ++t)
1086 struct mtable *mt = cmd->table + t;
1087 update_summaries (cmd, mt, c, weight);
1089 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
1091 struct workspace *ws = mt->ws + cmb;
1093 ws->root_cell = service_cell_map (cmd, mt, c,
1094 0U, NULL, NULL, 0, ws);
1098 casereader_destroy (reader);
1104 /* Release all resources allocated by this routine.
1105 This does not include those allocated by the parser,
1106 which exclusively use MEANS->pool. */
1108 destroy_means (struct means *means)
1110 for (int t = 0; t < means->n_tables; ++t)
1112 const struct mtable *table = means->table + t;
1113 for (int i = 0; i < table->n_combinations; ++i)
1115 struct workspace *ws = table->ws + i;
1116 if (ws->root_cell == NULL)
1118 means_destroy_cells (means, ws->root_cell, table);
1120 for (int i = 0; i < table->n_combinations; ++i)
1122 struct workspace *ws = table->ws + i;
1123 destroy_workspace (table, ws);