1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2019 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/case.h"
20 #include "data/casegrouper.h"
21 #include "data/casereader.h"
22 #include "data/dataset.h"
23 #include "data/dictionary.h"
24 #include "data/format.h"
25 #include "data/variable.h"
27 #include "libpspp/hmap.h"
28 #include "libpspp/bt.h"
29 #include "libpspp/hash-functions.h"
30 #include "libpspp/misc.h"
31 #include "libpspp/pool.h"
33 #include "language/command.h"
35 #include "count-one-bits.h"
36 #include "count-leading-zeros.h"
38 #include "output/pivot-table.h"
44 #define _(msgid) gettext (msgid)
45 #define N_(msgid) (msgid)
48 /* A "cell" in this procedure represents a distinct value of the
49 procedure's categorical variables, and a set of summary statistics
50 of all cases which whose categorical variables have that set of
51 values. For example, the dataset
60 has three cells in layer 0 and two cells in layer 1 in addition
61 to a "grand summary" cell to which all (non-missing) cases
64 The cells form a n-ary tree structure with the "grand summary"
69 struct hmap_node hmap_node; /* Element in hash table. */
70 struct bt_node bt_node; /* Element in binary tree */
73 struct cell_container *children;
75 /* The statistics to be calculated for the cell. */
76 struct statistic **stat;
78 /* The parent of this cell, or NULL if this is the root cell. */
79 const struct cell *parent_cell;
81 /* A bit-field variable which indicates which control variables
82 are allocated a fixed value (for this cell), and which are
85 A one indicates a fixed value. A zero indicates a wildcard.
86 Wildcard values are used to calculate totals and sub-totals.
88 unsigned int not_wild;
93 /* The variables corresponding to the above values. */
94 const struct variable **vars;
97 /* A structure used to find the union of all values used
98 within a layer, and to sort those values. */
101 struct hmap_node hmap_node; /* Element in hash table. */
102 struct bt_node bt_node; /* Element in binary tree */
104 /* A unique, consecutive, zero based index identifying this
108 /* The top level value of this instance. */
110 const struct variable *var;
115 destroy_workspace (const struct mtable *mt, struct workspace *ws)
117 for (int l = 0; l < mt->n_layers; ++l)
119 struct cell_container *instances = ws->instances + l;
120 struct instance *inst;
121 struct instance *next;
122 HMAP_FOR_EACH_SAFE (inst, next, struct instance, hmap_node,
125 int width = var_get_width (inst->var);
126 value_destroy (&inst->value, width);
129 hmap_destroy (&instances->map);
131 free (ws->control_idx);
132 free (ws->instances);
137 destroy_cell (const struct means *means,
138 const struct mtable *mt, struct cell *cell)
141 for (int i = 0; i < mt->n_layers; ++i)
143 if (0 == ((cell->not_wild >> i) & 0x1))
146 const struct layer *layer = mt->layers[i];
147 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
149 struct workspace *ws = mt->ws + cmb;
150 const struct variable *var
151 = layer->factor_vars[ws->control_idx[i]];
153 int width = var_get_width (var);
154 value_destroy (&cell->values[idx++], width);
157 for (int i = 0; i < cell->n_children; ++i)
159 struct cell_container *container = cell->children + i;
160 hmap_destroy (&container->map);
163 for (int v = 0; v < mt->n_dep_vars; ++v)
165 for (int s = 0; s < means->n_statistics; ++s)
167 stat_destroy *des = cell_spec[means->statistics[s]].sf;
168 des (cell->stat[s + v * means->n_statistics]);
173 free (cell->children);
180 /* Walk the tree in postorder starting from CELL and destroy all the
183 means_destroy_cells (const struct means *means, struct cell *cell,
184 const struct mtable *table)
186 for (int i = 0; i < cell->n_children; ++i)
188 struct cell_container *container = cell->children + i;
189 struct cell *sub_cell;
191 HMAP_FOR_EACH_SAFE (sub_cell, next, struct cell, hmap_node,
194 means_destroy_cells (means, sub_cell, table);
198 destroy_cell (means, table, cell);
202 dump_cell (const struct cell *cell, const struct mtable *mt, int level)
204 for (int l = 0; l < level; ++l)
206 printf ("%p: ", cell);
207 for (int i = 0; i < mt->n_layers; ++i)
209 putchar (((cell->not_wild >> i) & 0x1) ? 'w' : '.');
213 for (int i = 0; i < mt->n_layers; ++i)
215 if ((cell->not_wild >> i) & 0x1)
217 printf ("%s: ", var_get_name (cell->vars[x]));
218 printf ("%g ", cell->values[x++].f);
223 stat_get *sg = cell_spec[MEANS_N].sd;
224 printf ("--- S1: %g", sg (cell->stat[0]));
226 printf ("--- N Children: %d", cell->n_children);
227 // printf ("--- Level: %d", level);
228 printf ("--- Parent: %p", cell->parent_cell);
233 dump_indeces (const size_t *indexes, int n)
235 for (int i = 0 ; i < n; ++i)
237 printf ("%ld; ", indexes[i]);
242 /* Dump the tree in pre-order. */
244 dump_tree (const struct cell *cell, const struct mtable *table,
245 int level, const struct cell *parent)
247 assert (cell->parent_cell == parent);
248 dump_cell (cell, table, level);
250 for (int i = 0; i < cell->n_children; ++i)
252 struct cell_container *container = cell->children + i;
253 struct cell *sub_cell;
254 BT_FOR_EACH (sub_cell, struct cell, bt_node, &container->bt)
256 dump_tree (sub_cell, table, level + 1, cell);
261 /* Generate a hash based on the values of the N variables in
262 the array VARS which are taken from the case C. */
264 generate_hash (const struct mtable *mt,
265 const struct ccase *c,
266 unsigned int not_wild,
267 const struct workspace *ws)
269 unsigned int hash = 0;
270 for (int i = 0; i < mt->n_layers; ++i)
272 if (0 == ((not_wild >> i) & 0x1))
275 const struct layer *layer = mt->layers[i];
276 const struct variable *var = layer->factor_vars[ws->control_idx[i]];
277 const union value *vv = case_data (c, var);
278 int width = var_get_width (var);
279 hash = hash_int (i, hash);
280 hash = value_hash (vv, width, hash);
286 /* Create a cell based on the N variables in the array VARS,
287 which are indeces into the case C.
288 The caller is responsible for destroying this cell when
291 generate_cell (const struct means *means,
292 const struct mtable *mt,
293 const struct ccase *c,
294 unsigned int not_wild,
295 const struct cell *pcell,
296 const struct workspace *ws)
298 int n_vars = count_one_bits (not_wild);
299 struct cell *cell = xzalloc ((sizeof *cell));
300 cell->values = xcalloc (n_vars, sizeof *cell->values);
301 cell->vars = xcalloc (n_vars, sizeof *cell->vars);
302 cell->not_wild = not_wild;
304 cell->parent_cell = pcell;
305 cell->n_children = mt->n_layers -
306 (sizeof (cell->not_wild) * CHAR_BIT) +
307 count_leading_zeros (cell->not_wild);
310 for (int i = 0; i < mt->n_layers; ++i)
312 if (0 == ((not_wild >> i) & 0x1))
315 const struct layer *layer = mt->layers[i];
316 const struct variable *var = layer->factor_vars[ws->control_idx[i]];
317 const union value *vv = case_data (c, var);
318 int width = var_get_width (var);
319 cell->vars[idx] = var;
320 value_clone (&cell->values[idx++], vv, width);
322 assert (idx == n_vars);
324 cell->children = xcalloc (cell->n_children, sizeof *cell->children);
325 for (int i = 0; i < cell->n_children; ++i)
327 struct cell_container *container = cell->children + i;
328 hmap_init (&container->map);
331 cell->stat = xcalloc (means->n_statistics * mt->n_dep_vars, sizeof *cell->stat);
332 for (int v = 0; v < mt->n_dep_vars; ++v)
334 for (int stat = 0; stat < means->n_statistics; ++stat)
336 stat_create *sc = cell_spec[means->statistics[stat]].sc;
338 cell->stat[stat + v * means->n_statistics] = sc (means->pool);
345 /* If a cell based on the N variables in the array VARS,
346 which are indeces into the case C and whose hash is HASH,
347 exists in HMAP, then return that cell.
348 Otherwise, return NULL. */
350 lookup_cell (const struct mtable *mt,
351 struct hmap *hmap, unsigned int hash,
352 const struct ccase *c,
353 unsigned int not_wild,
354 const struct workspace *ws)
356 struct cell *cell = NULL;
357 HMAP_FOR_EACH_WITH_HASH (cell, struct cell, hmap_node, hash, hmap)
361 if (cell->not_wild != not_wild)
363 for (int i = 0; i < mt->n_layers; ++i)
365 if (0 == ((cell->not_wild >> i) & 0x1))
368 const struct layer *layer = mt->layers[i];
369 const struct variable *var = layer->factor_vars[ws->control_idx[i]];
370 const union value *vv = case_data (c, var);
371 int width = var_get_width (var);
372 assert (var == cell->vars[idx]);
373 if (!value_equal (vv, &cell->values[idx++], width))
386 /* A comparison function used to sort cells in a binary tree.
387 Only the innermost value needs to be compared, because no
388 two cells with similar outer values will appear in the same
391 cell_compare_3way (const struct bt_node *a,
392 const struct bt_node *b,
393 const void *aux UNUSED)
395 const struct cell *fa = BT_DATA (a, struct cell, bt_node);
396 const struct cell *fb = BT_DATA (b, struct cell, bt_node);
398 assert (fa->not_wild == fb->not_wild);
399 int vidx = count_one_bits (fa->not_wild) - 1;
400 assert (fa->vars[vidx] == fb->vars[vidx]);
402 return value_compare_3way (&fa->values[vidx],
404 var_get_width (fa->vars[vidx]));
407 /* A comparison function used to sort cells in a binary tree. */
409 compare_instance_3way (const struct bt_node *a,
410 const struct bt_node *b,
411 const void *aux UNUSED)
413 const struct instance *fa = BT_DATA (a, struct instance, bt_node);
414 const struct instance *fb = BT_DATA (b, struct instance, bt_node);
416 assert (fa->var == fb->var);
418 return value_compare_3way (&fa->value,
420 var_get_width (fa->var));
424 static void arrange_cells (struct workspace *ws,
425 struct cell *cell, const struct mtable *table);
428 /* Iterate CONTAINER's map inserting a copy of its elements into
429 CONTAINER's binary tree. Also, for each layer in TABLE, create
430 an instance container, containing the union of all elements in
433 arrange_cell (struct workspace *ws, struct cell_container *container,
434 const struct mtable *mt)
436 struct bt *bt = &container->bt;
437 struct hmap *map = &container->map;
438 bt_init (bt, cell_compare_3way, NULL);
441 HMAP_FOR_EACH (cell, struct cell, hmap_node, map)
443 bt_insert (bt, &cell->bt_node);
446 for (int i = 0; i < mt->n_layers; ++i)
448 if (0 == ((cell->not_wild >> i) & 0x1))
451 struct cell_container *instances = ws->instances + i;
452 const struct variable *var = cell->vars[idx];
453 int width = var_get_width (var);
455 = value_hash (&cell->values[idx], width, 0);
457 struct instance *inst = NULL;
458 struct instance *next = NULL;
459 HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next, struct instance,
461 hash, &instances->map)
463 assert (cell->vars[idx] == var);
464 if (value_equal (&inst->value,
474 inst = xzalloc (sizeof *inst);
477 value_clone (&inst->value, &cell->values[idx],
479 hmap_insert (&instances->map, &inst->hmap_node, hash);
485 arrange_cells (ws, cell, mt);
489 /* Arrange the children and then all the subtotals. */
491 arrange_cells (struct workspace *ws, struct cell *cell,
492 const struct mtable *table)
494 for (int i = 0; i < cell->n_children; ++i)
496 struct cell_container *container = cell->children + i;
497 arrange_cell (ws, container, table);
504 /* If the top level value in CELL, has an instance in the L_IDX'th layer,
505 then return that instance. Otherwise return NULL. */
506 static const struct instance *
507 lookup_instance (const struct mtable *mt, const struct workspace *ws,
508 int l_idx, const struct cell *cell)
510 const struct layer *layer = mt->layers[l_idx];
511 int n_vals = count_one_bits (cell->not_wild);
512 const struct variable *var = layer->factor_vars[ws->control_idx[l_idx]];
513 const union value *val = cell->values + n_vals - 1;
514 int width = var_get_width (var);
515 unsigned int hash = value_hash (val, width, 0);
516 const struct cell_container *instances = ws->instances + l_idx;
517 struct instance *inst = NULL;
518 struct instance *next;
519 HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next,
520 struct instance, hmap_node,
521 hash, &instances->map)
523 if (value_equal (val, &inst->value, width))
529 /* Enter the values into PT. */
531 populate_table (const struct means *means, const struct mtable *mt,
532 const struct workspace *ws,
533 const struct cell *cell,
534 struct pivot_table *pt)
536 size_t *indexes = xcalloc (pt->n_dimensions, sizeof *indexes);
537 for (int v = 0; v < mt->n_dep_vars; ++v)
539 for (int s = 0; s < means->n_statistics; ++s)
542 if (mt->n_dep_vars > 1)
545 int stat = means->statistics[s];
546 stat_get *sg = cell_spec[stat].sd;
548 const struct cell *pc = cell;
549 for (; i < pt->n_dimensions; ++i)
551 int l_idx = pt->n_dimensions - i - 1;
552 const struct cell_container *instances = ws->instances + l_idx;
553 if (0 == (cell->not_wild >> l_idx & 0x1U))
555 indexes [i] = hmap_count (&instances->map);
560 const struct instance *inst
561 = lookup_instance (mt, ws, l_idx, pc);
563 indexes [i] = inst->index;
564 pc = pc->parent_cell;
569 int idx = s + v * means->n_statistics;
570 struct pivot_value *pv
571 = pivot_value_new_number (sg (cell->stat[idx]));
572 if (NULL == cell_spec[stat].rc)
574 const struct variable *dv = mt->dep_vars[v];
575 pv->numeric.format = * var_get_print_format (dv);
577 pivot_table_put (pt, indexes, pt->n_dimensions, pv);
582 for (int i = 0; i < cell->n_children; ++i)
584 struct cell_container *container = cell->children + i;
585 struct cell *child = NULL;
586 BT_FOR_EACH (child, struct cell, bt_node, &container->bt)
588 populate_table (means, mt, ws, child, pt);
594 create_table_structure (const struct mtable *mt, struct pivot_table *pt,
595 const struct workspace *ws)
597 int * lindexes = ws->control_idx;
598 /* The inner layers are situated rightmost in the table.
599 So this iteration is in reverse order. */
600 for (int l = mt->n_layers -1; l >=0 ; --l)
602 const struct layer *layer = mt->layers[l];
603 const struct cell_container *instances = ws->instances + l;
604 const struct variable *var = layer->factor_vars[lindexes[l]];
605 struct pivot_dimension *dim_layer
606 = pivot_dimension_create (pt, PIVOT_AXIS_ROW,
607 var_to_string (var));
608 dim_layer->root->show_label = true;
610 /* Place the values of the control variables as table headings. */
612 struct instance *inst = NULL;
613 BT_FOR_EACH (inst, struct instance, bt_node, &instances->bt)
615 struct substring space = SS_LITERAL_INITIALIZER ("\t ");
617 ds_init_empty (&str);
618 var_append_value_name (var,
622 ds_ltrim (&str, space);
624 pivot_category_create_leaf (dim_layer->root,
625 pivot_value_new_text (ds_cstr (&str)));
631 pivot_category_create_leaf (dim_layer->root,
632 pivot_value_new_text ("Total"));
636 /* Initialise C_DES with a string describing the control variable
637 relating to MT, LINDEXES. */
639 layers_to_string (const struct mtable *mt, const int *lindexes,
640 struct string *c_des)
642 for (int l = 0; l < mt->n_layers; ++l)
644 const struct layer *layer = mt->layers[l];
645 const struct variable *ctrl_var = layer->factor_vars[lindexes[l]];
647 ds_put_cstr (c_des, " * ");
648 ds_put_cstr (c_des, var_get_name (ctrl_var));
653 populate_case_processing_summary (struct pivot_category *pc,
654 const struct mtable *mt,
660 for (l = 0; l < mt->n_layers; ++l)
662 const struct layer *layer = mt->layers[l];
663 const struct variable *ctrl_var = layer->factor_vars[lindexes[l]];
665 ds_put_cstr (&ds, " * ");
666 ds_put_cstr (&ds, var_get_name (ctrl_var));
668 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
671 ds_init_empty (&dss);
672 ds_put_cstr (&dss, var_get_name (mt->dep_vars[dv]));
673 if (mt->n_layers > 0)
675 ds_put_cstr (&dss, " * ");
676 ds_put_substring (&dss, ds.ss);
678 pivot_category_create_leaf (pc,
679 pivot_value_new_text (ds_cstr (&dss)));
686 /* Create the "Case Processing Summary" table. */
688 means_case_processing_summary (const struct mtable *mt)
690 struct pivot_table *pt = pivot_table_create (N_("Case Processing Summary"));
692 struct pivot_dimension *dim_cases =
693 pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Cases"));
694 dim_cases->root->show_label = true;
696 struct pivot_category *cats[3];
697 cats[0] = pivot_category_create_group (dim_cases->root,
698 N_("Included"), NULL);
699 cats[1] = pivot_category_create_group (dim_cases->root,
700 N_("Excluded"), NULL);
701 cats[2] = pivot_category_create_group (dim_cases->root,
703 for (int i = 0; i < 3; ++i)
705 pivot_category_create_leaf_rc (cats[i],
706 pivot_value_new_text (N_("N")),
708 pivot_category_create_leaf_rc (cats[i],
709 pivot_value_new_text (N_("Percent")),
713 struct pivot_dimension *rows =
714 pivot_dimension_create (pt, PIVOT_AXIS_ROW, N_("Variables"));
716 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
718 const struct workspace *ws = mt->ws + cmb;
719 populate_case_processing_summary (rows->root, mt, ws->control_idx);
720 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
722 int idx = cmb * mt->n_dep_vars + dv;
723 const struct summary *summ = mt->summ + idx;
724 double n_included = summ->n_total - summ->n_missing;
725 pivot_table_put2 (pt, 5, idx,
726 pivot_value_new_number (100.0 * summ->n_total / summ->n_total));
727 pivot_table_put2 (pt, 4, idx,
728 pivot_value_new_number (summ->n_total));
730 pivot_table_put2 (pt, 3, idx,
731 pivot_value_new_number (100.0 * summ->n_missing / summ->n_total));
732 pivot_table_put2 (pt, 2, idx,
733 pivot_value_new_number (summ->n_missing));
735 pivot_table_put2 (pt, 1, idx,
736 pivot_value_new_number (100.0 * n_included / summ->n_total));
737 pivot_table_put2 (pt, 0, idx,
738 pivot_value_new_number (n_included));
742 pivot_table_submit (pt);
746 means_shipout_single (const struct mtable *mt, const struct means *means,
747 const struct workspace *ws)
749 struct pivot_table *pt = pivot_table_create (N_("Report"));
750 pt->omit_empty = true;
752 struct pivot_dimension *dim_cells =
753 pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Statistics"));
755 /* Set the statistics headings, eg "Mean", "Std. Dev" etc. */
756 for (int i = 0; i < means->n_statistics; ++i)
758 const struct cell_spec *cs = cell_spec + means->statistics[i];
759 pivot_category_create_leaf_rc
761 pivot_value_new_text (gettext (cs->title)), cs->rc);
764 create_table_structure (mt, pt, ws);
765 populate_table (means, mt, ws, ws->root_cell, pt);
766 pivot_table_submit (pt);
771 means_shipout_multivar (const struct mtable *mt, const struct means *means,
772 const struct workspace *ws)
775 ds_init_empty (&dss);
776 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
779 ds_put_cstr (&dss, " * ");
780 ds_put_cstr (&dss, var_get_name (mt->dep_vars[dv]));
783 for (int l = 0; l < mt->n_layers; ++l)
785 ds_put_cstr (&dss, " * ");
786 const struct layer *layer = mt->layers[l];
787 const struct variable *var = layer->factor_vars[ws->control_idx[l]];
788 ds_put_cstr (&dss, var_get_name (var));
791 struct pivot_table *pt = pivot_table_create (ds_cstr (&dss));
792 pt->omit_empty = true;
795 struct pivot_dimension *dim_cells =
796 pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Variables"));
798 for (int i = 0; i < mt->n_dep_vars; ++i)
800 pivot_category_create_leaf
802 pivot_value_new_variable (mt->dep_vars[i]));
805 struct pivot_dimension *dim_stats
806 = pivot_dimension_create (pt, PIVOT_AXIS_ROW,
808 dim_stats->root->show_label = false;
810 for (int i = 0; i < means->n_statistics; ++i)
812 const struct cell_spec *cs = cell_spec + means->statistics[i];
813 pivot_category_create_leaf_rc
815 pivot_value_new_text (gettext (cs->title)), cs->rc);
818 create_table_structure (mt, pt, ws);
819 populate_table (means, mt, ws, ws->root_cell, pt);
820 pivot_table_submit (pt);
824 means_shipout (const struct mtable *mt, const struct means *means)
826 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
828 const struct workspace *ws = mt->ws + cmb;
829 if (ws->root_cell == NULL)
832 ds_init_empty (&des);
833 layers_to_string (mt, ws->control_idx, &des);
834 msg (MW, _("The table \"%s\" has no non-empty control variables."
835 " No result for this table will be displayed."),
840 if (mt->n_dep_vars > 1)
841 means_shipout_multivar (mt, means, ws);
843 means_shipout_single (mt, means, ws);
851 control_var_missing (const struct means *means,
852 const struct mtable *mt,
853 unsigned int not_wild UNUSED,
854 const struct ccase *c,
855 const struct workspace *ws)
858 for (int l = 0; l < mt->n_layers; ++l)
860 /* if (0 == ((not_wild >> l) & 0x1)) */
865 const struct layer *layer = mt->layers[l];
866 const struct variable *var = layer->factor_vars[ws->control_idx[l]];
867 const union value *vv = case_data (c, var);
869 miss = var_is_value_missing (var, vv, means->ctrl_exclude);
877 /* Lookup the set of control variables described by MT, C and NOT_WILD,
878 in the hash table MAP. If there is no such entry, then create a
879 cell with these paremeters and add is to MAP.
880 If the generated cell has childen, repeat for all the children.
881 Returns the root cell.
884 service_cell_map (const struct means *means, const struct mtable *mt,
885 const struct ccase *c,
886 unsigned int not_wild,
888 const struct cell *pcell,
890 const struct workspace *ws)
892 struct cell *cell = NULL;
895 if (!control_var_missing (means, mt, not_wild, c, ws))
897 /* Lookup this set of values in the cell's hash table. */
898 unsigned int hash = generate_hash (mt, c, not_wild, ws);
899 cell = lookup_cell (mt, map, hash, c, not_wild, ws);
901 /* If it has not been seen before, then create a new
902 subcell, with this set of values, and insert it
906 cell = generate_cell (means, mt, c, not_wild, pcell, ws);
907 hmap_insert (map, &cell->hmap_node, hash);
913 /* This condition should only happen in the root node case. */
914 cell = ws->root_cell;
916 !control_var_missing (means, mt, not_wild, c, ws))
917 cell = generate_cell (means, mt, c, not_wild, pcell, ws);
922 /* Here is where the business really happens! After
923 testing for missing values, the cell's statistics
925 if (!control_var_missing (means, mt, not_wild, c, ws))
927 for (int v = 0; v < mt->n_dep_vars; ++v)
929 const struct variable *dep_var = mt->dep_vars[v];
930 const union value *vv = case_data (c, dep_var);
931 if (var_is_value_missing (dep_var, vv, means->dep_exclude))
934 for (int stat = 0; stat < means->n_statistics; ++stat)
936 const double weight = dict_get_case_weight (means->dict, c,
938 stat_update *su = cell_spec[means->statistics[stat]].su;
939 su (cell->stat[stat + v * means->n_statistics], weight,
940 case_data (c, dep_var)->f);
945 /* Recurse into all the children (if there are any). */
946 for (int i = 0; i < cell->n_children; ++i)
948 struct cell_container *cc = cell->children + i;
949 service_cell_map (means, mt, c,
950 not_wild | (0x1U << (i + level)),
951 &cc->map, cell, level + i + 1, ws);
958 /* Do all the necessary preparation and pre-calculation that
959 needs to be done before iterating the data. */
961 prepare_means (struct means *cmd)
963 for (int t = 0; t < cmd->n_tables; ++t)
965 struct mtable *mt = cmd->table + t;
967 for (int i = 0; i < mt->n_combinations; ++i)
969 struct workspace *ws = mt->ws + i;
970 ws->root_cell = NULL;
971 ws->control_idx = xzalloc (mt->n_layers
972 * sizeof *ws->control_idx);
973 ws->instances = xzalloc (mt->n_layers
974 * sizeof *ws->instances);
976 for (int l = mt->n_layers - 1; l >= 0; --l)
978 struct cell_container *instances = ws->instances + l;
979 const struct layer *layer = mt->layers[l];
980 ws->control_idx[l] = cmb % layer->n_factor_vars;
981 cmb /= layer->n_factor_vars;
982 hmap_init (&instances->map);
989 /* Do all the necessary calculations that occur AFTER iterating
992 post_means (struct means *cmd)
994 for (int t = 0; t < cmd->n_tables; ++t)
996 struct mtable *mt = cmd->table + t;
997 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
999 struct workspace *ws = mt->ws + cmb;
1000 if (ws->root_cell == NULL)
1002 arrange_cells (ws, ws->root_cell, mt);
1003 /* The root cell should have no parent. */
1004 assert (ws->root_cell->parent_cell == 0);
1006 for (int l = 0; l < mt->n_layers; ++l)
1008 struct cell_container *instances = ws->instances + l;
1009 bt_init (&instances->bt, compare_instance_3way, NULL);
1011 /* Iterate the instance hash table, and insert each instance
1012 into the binary tree BT. */
1013 struct instance *inst;
1014 HMAP_FOR_EACH (inst, struct instance, hmap_node,
1017 bt_insert (&instances->bt, &inst->bt_node);
1020 /* Iterate the binary tree (in order) and assign the index
1021 member accordingly. */
1023 BT_FOR_EACH (inst, struct instance, bt_node, &instances->bt)
1025 inst->index = index++;
1033 /* Update the summary information (the missings and the totals). */
1035 update_summaries (const struct means *means, struct mtable *mt,
1036 const struct ccase *c, double weight)
1038 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
1040 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
1042 struct workspace *ws = mt->ws + cmb;
1043 struct summary *summ = mt->summ
1044 + cmb * mt->n_dep_vars + dv;
1046 summ->n_total += weight;
1047 const struct variable *var = mt->dep_vars[dv];
1048 const union value *vv = case_data (c, var);
1049 /* First check if the dependent variable is missing. */
1050 if (var_is_value_missing (var, vv, means->dep_exclude))
1051 summ->n_missing += weight;
1052 /* If the dep var is not missing, then check each
1053 control variable. */
1055 for (int l = 0; l < mt->n_layers; ++l)
1057 const struct layer *layer = mt->layers [l];
1058 const struct variable *var
1059 = layer->factor_vars[ws->control_idx[l]];
1060 const union value *vv = case_data (c, var);
1061 if (var_is_value_missing (var, vv, means->ctrl_exclude))
1063 summ->n_missing += weight;
1073 run_means (struct means *cmd, struct casereader *input,
1074 const struct dataset *ds UNUSED)
1076 struct ccase *c = NULL;
1077 struct casereader *reader;
1079 prepare_means (cmd);
1081 for (reader = input;
1082 (c = casereader_read (reader)) != NULL; case_unref (c))
1085 = dict_get_case_weight (cmd->dict, c, NULL);
1086 for (int t = 0; t < cmd->n_tables; ++t)
1088 struct mtable *mt = cmd->table + t;
1089 update_summaries (cmd, mt, c, weight);
1091 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
1093 struct workspace *ws = mt->ws + cmb;
1095 ws->root_cell = service_cell_map (cmd, mt, c,
1096 0U, NULL, NULL, 0, ws);
1100 casereader_destroy (reader);
1108 cmd_means (struct lexer *lexer, struct dataset *ds)
1111 means.pool = pool_create ();
1113 means.ctrl_exclude = MV_ANY;
1114 means.dep_exclude = MV_ANY;
1118 means.dict = dataset_dict (ds);
1120 means.n_statistics = 3;
1121 means.statistics = pool_calloc (means.pool, 3, sizeof *means.statistics);
1122 means.statistics[0] = MEANS_MEAN;
1123 means.statistics[1] = MEANS_N;
1124 means.statistics[2] = MEANS_STDDEV;
1126 if (! means_parse (lexer, &means))
1129 /* Calculate some constant data for each table. */
1130 for (int t = 0; t < means.n_tables; ++t)
1132 struct mtable *mt = means.table + t;
1133 mt->n_combinations = 1;
1134 for (int l = 0; l < mt->n_layers; ++l)
1135 mt->n_combinations *= mt->layers[l]->n_factor_vars;
1139 struct casegrouper *grouper;
1140 struct casereader *group;
1143 grouper = casegrouper_create_splits (proc_open (ds), means.dict);
1144 while (casegrouper_get_next_group (grouper, &group))
1146 /* Allocate the workspaces. */
1147 for (int t = 0; t < means.n_tables; ++t)
1149 struct mtable *mt = means.table + t;
1150 mt->summ = xzalloc (mt->n_combinations * mt->n_dep_vars
1151 * sizeof (*mt->summ));
1152 mt->ws = xzalloc (mt->n_combinations * sizeof (*mt->ws));
1154 run_means (&means, group, ds);
1155 for (int t = 0; t < means.n_tables; ++t)
1157 const struct mtable *mt = means.table + t;
1159 means_case_processing_summary (mt);
1160 means_shipout (mt, &means);
1162 for (int i = 0; i < mt->n_combinations; ++i)
1164 struct workspace *ws = mt->ws + i;
1165 if (ws->root_cell == NULL)
1168 means_destroy_cells (&means, ws->root_cell, mt);
1172 /* Destroy the workspaces. */
1173 for (int t = 0; t < means.n_tables; ++t)
1175 struct mtable *mt = means.table + t;
1177 for (int i = 0; i < mt->n_combinations; ++i)
1179 struct workspace *ws = mt->ws + i;
1180 destroy_workspace (mt, ws);
1185 ok = casegrouper_destroy (grouper);
1186 ok = proc_commit (ds) && ok;
1189 pool_destroy (means.pool);
1194 pool_destroy (means.pool);