1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2019 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/case.h"
20 #include "data/casegrouper.h"
21 #include "data/casereader.h"
22 #include "data/dataset.h"
23 #include "data/dictionary.h"
24 #include "data/format.h"
25 #include "data/variable.h"
27 #include "libpspp/hmap.h"
28 #include "libpspp/bt.h"
29 #include "libpspp/hash-functions.h"
30 #include "libpspp/misc.h"
31 #include "libpspp/pool.h"
33 #include "language/command.h"
34 #include "language/lexer/lexer.h"
36 #include "count-one-bits.h"
37 #include "count-leading-zeros.h"
39 #include "output/pivot-table.h"
45 #define _(msgid) gettext (msgid)
46 #define N_(msgid) (msgid)
49 /* A "cell" in this procedure represents a distinct value of the
50 procedure's categorical variables, and a set of summary statistics
51 of all cases which whose categorical variables have that set of
52 values. For example, the dataset
61 has three cells in layer 0 and two cells in layer 1 in addition
62 to a "grand summary" cell to which all (non-missing) cases
65 The cells form a n-ary tree structure with the "grand summary"
70 struct hmap_node hmap_node; /* Element in hash table. */
71 struct bt_node bt_node; /* Element in binary tree */
74 struct cell_container *children;
76 /* The statistics to be calculated for the cell. */
77 struct statistic **stat;
79 /* The parent of this cell, or NULL if this is the root cell. */
80 const struct cell *parent_cell;
82 /* A bit-field variable which indicates which control variables
83 are allocated a fixed value (for this cell), and which are
86 A one indicates a fixed value. A zero indicates a wildcard.
87 Wildcard values are used to calculate totals and sub-totals.
89 unsigned int not_wild;
94 /* The variables corresponding to the above values. */
95 const struct variable **vars;
98 /* A structure used to find the union of all values used
99 within a layer, and to sort those values. */
102 struct hmap_node hmap_node; /* Element in hash table. */
103 struct bt_node bt_node; /* Element in binary tree */
105 /* A unique, consecutive, zero based index identifying this
109 /* The top level value of this instance. */
111 const struct variable *var;
116 destroy_workspace (const struct mtable *mt, struct workspace *ws)
118 for (int l = 0; l < mt->n_layers; ++l)
120 struct cell_container *instances = ws->instances + l;
121 struct instance *inst;
122 struct instance *next;
123 HMAP_FOR_EACH_SAFE (inst, next, struct instance, hmap_node,
126 int width = var_get_width (inst->var);
127 value_destroy (&inst->value, width);
130 hmap_destroy (&instances->map);
132 free (ws->control_idx);
133 free (ws->instances);
138 destroy_cell (const struct means *means,
139 const struct mtable *mt, struct cell *cell)
142 for (int i = 0; i < mt->n_layers; ++i)
144 if (0 == ((cell->not_wild >> i) & 0x1))
147 const struct layer *layer = mt->layers[i];
148 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
150 struct workspace *ws = mt->ws + cmb;
151 const struct variable *var
152 = layer->factor_vars[ws->control_idx[i]];
154 int width = var_get_width (var);
155 value_destroy (&cell->values[idx++], width);
158 for (int i = 0; i < cell->n_children; ++i)
160 struct cell_container *container = cell->children + i;
161 hmap_destroy (&container->map);
164 for (int v = 0; v < mt->n_dep_vars; ++v)
166 for (int s = 0; s < means->n_statistics; ++s)
168 stat_destroy *des = cell_spec[means->statistics[s]].sf;
169 des (cell->stat[s + v * means->n_statistics]);
174 free (cell->children);
181 /* Walk the tree in postorder starting from CELL and destroy all the
184 means_destroy_cells (const struct means *means, struct cell *cell,
185 const struct mtable *table)
187 for (int i = 0; i < cell->n_children; ++i)
189 struct cell_container *container = cell->children + i;
190 struct cell *sub_cell;
192 HMAP_FOR_EACH_SAFE (sub_cell, next, struct cell, hmap_node,
195 means_destroy_cells (means, sub_cell, table);
199 destroy_cell (means, table, cell);
205 dump_cell (const struct cell *cell, const struct mtable *mt, int level)
207 for (int l = 0; l < level; ++l)
209 printf ("%p: ", cell);
210 for (int i = 0; i < mt->n_layers; ++i)
212 putchar (((cell->not_wild >> i) & 0x1) ? 'w' : '.');
216 for (int i = 0; i < mt->n_layers; ++i)
218 if ((cell->not_wild >> i) & 0x1)
220 printf ("%s: ", var_get_name (cell->vars[x]));
221 printf ("%g ", cell->values[x++].f);
226 stat_get *sg = cell_spec[MEANS_N].sd;
227 printf ("--- S1: %g", sg (cell->stat[0]));
229 printf ("--- N Children: %d", cell->n_children);
230 // printf ("--- Level: %d", level);
231 printf ("--- Parent: %p", cell->parent_cell);
236 dump_indeces (const size_t *indexes, int n)
238 for (int i = 0; i < n; ++i)
240 printf ("%ld; ", indexes[i]);
245 /* Dump the tree in pre-order. */
247 dump_tree (const struct cell *cell, const struct mtable *table,
248 int level, const struct cell *parent)
250 assert (cell->parent_cell == parent);
251 dump_cell (cell, table, level);
253 for (int i = 0; i < cell->n_children; ++i)
255 struct cell_container *container = cell->children + i;
256 struct cell *sub_cell;
257 BT_FOR_EACH (sub_cell, struct cell, bt_node, &container->bt)
259 dump_tree (sub_cell, table, level + 1, cell);
266 /* Generate a hash based on the values of the N variables in
267 the array VARS which are taken from the case C. */
269 generate_hash (const struct mtable *mt,
270 const struct ccase *c,
271 unsigned int not_wild,
272 const struct workspace *ws)
274 unsigned int hash = 0;
275 for (int i = 0; i < mt->n_layers; ++i)
277 if (0 == ((not_wild >> i) & 0x1))
280 const struct layer *layer = mt->layers[i];
281 const struct variable *var = layer->factor_vars[ws->control_idx[i]];
282 const union value *vv = case_data (c, var);
283 int width = var_get_width (var);
284 hash = hash_int (i, hash);
285 hash = value_hash (vv, width, hash);
291 /* Create a cell based on the N variables in the array VARS,
292 which are indeces into the case C.
293 The caller is responsible for destroying this cell when
296 generate_cell (const struct means *means,
297 const struct mtable *mt,
298 const struct ccase *c,
299 unsigned int not_wild,
300 const struct cell *pcell,
301 const struct workspace *ws)
303 int n_vars = count_one_bits (not_wild);
304 struct cell *cell = XZALLOC (struct cell);
305 cell->values = xcalloc (n_vars, sizeof *cell->values);
306 cell->vars = xcalloc (n_vars, sizeof *cell->vars);
307 cell->not_wild = not_wild;
309 cell->parent_cell = pcell;
310 cell->n_children = mt->n_layers -
311 (sizeof (cell->not_wild) * CHAR_BIT) +
312 count_leading_zeros (cell->not_wild);
315 for (int i = 0; i < mt->n_layers; ++i)
317 if (0 == ((not_wild >> i) & 0x1))
320 const struct layer *layer = mt->layers[i];
321 const struct variable *var = layer->factor_vars[ws->control_idx[i]];
322 const union value *vv = case_data (c, var);
323 int width = var_get_width (var);
324 cell->vars[idx] = var;
325 value_clone (&cell->values[idx++], vv, width);
327 assert (idx == n_vars);
329 cell->children = xcalloc (cell->n_children, sizeof *cell->children);
330 for (int i = 0; i < cell->n_children; ++i)
332 struct cell_container *container = cell->children + i;
333 hmap_init (&container->map);
336 cell->stat = xcalloc (means->n_statistics * mt->n_dep_vars, sizeof *cell->stat);
337 for (int v = 0; v < mt->n_dep_vars; ++v)
339 for (int stat = 0; stat < means->n_statistics; ++stat)
341 stat_create *sc = cell_spec[means->statistics[stat]].sc;
343 cell->stat[stat + v * means->n_statistics] = sc (means->pool);
350 /* If a cell based on the N variables in the array VARS,
351 which are indeces into the case C and whose hash is HASH,
352 exists in HMAP, then return that cell.
353 Otherwise, return NULL. */
355 lookup_cell (const struct mtable *mt,
356 struct hmap *hmap, unsigned int hash,
357 const struct ccase *c,
358 unsigned int not_wild,
359 const struct workspace *ws)
361 struct cell *cell = NULL;
362 HMAP_FOR_EACH_WITH_HASH (cell, struct cell, hmap_node, hash, hmap)
366 if (cell->not_wild != not_wild)
368 for (int i = 0; i < mt->n_layers; ++i)
370 if (0 == ((cell->not_wild >> i) & 0x1))
373 const struct layer *layer = mt->layers[i];
374 const struct variable *var = layer->factor_vars[ws->control_idx[i]];
375 const union value *vv = case_data (c, var);
376 int width = var_get_width (var);
377 assert (var == cell->vars[idx]);
378 if (!value_equal (vv, &cell->values[idx++], width))
391 /* A comparison function used to sort cells in a binary tree.
392 Only the innermost value needs to be compared, because no
393 two cells with similar outer values will appear in the same
396 cell_compare_3way (const struct bt_node *a,
397 const struct bt_node *b,
398 const void *aux UNUSED)
400 const struct cell *fa = BT_DATA (a, struct cell, bt_node);
401 const struct cell *fb = BT_DATA (b, struct cell, bt_node);
403 assert (fa->not_wild == fb->not_wild);
404 int vidx = count_one_bits (fa->not_wild) - 1;
405 assert (fa->vars[vidx] == fb->vars[vidx]);
407 return value_compare_3way (&fa->values[vidx],
409 var_get_width (fa->vars[vidx]));
412 /* A comparison function used to sort cells in a binary tree. */
414 compare_instance_3way (const struct bt_node *a,
415 const struct bt_node *b,
416 const void *aux UNUSED)
418 const struct instance *fa = BT_DATA (a, struct instance, bt_node);
419 const struct instance *fb = BT_DATA (b, struct instance, bt_node);
421 assert (fa->var == fb->var);
423 return value_compare_3way (&fa->value,
425 var_get_width (fa->var));
429 static void arrange_cells (struct workspace *ws,
430 struct cell *cell, const struct mtable *table);
433 /* Iterate CONTAINER's map inserting a copy of its elements into
434 CONTAINER's binary tree. Also, for each layer in TABLE, create
435 an instance container, containing the union of all elements in
438 arrange_cell (struct workspace *ws, struct cell_container *container,
439 const struct mtable *mt)
441 struct bt *bt = &container->bt;
442 struct hmap *map = &container->map;
443 bt_init (bt, cell_compare_3way, NULL);
446 HMAP_FOR_EACH (cell, struct cell, hmap_node, map)
448 bt_insert (bt, &cell->bt_node);
451 for (int i = 0; i < mt->n_layers; ++i)
453 if (0 == ((cell->not_wild >> i) & 0x1))
456 struct cell_container *instances = ws->instances + i;
457 const struct variable *var = cell->vars[idx];
458 int width = var_get_width (var);
460 = value_hash (&cell->values[idx], width, 0);
462 struct instance *inst = NULL;
463 struct instance *next = NULL;
464 HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next, struct instance,
466 hash, &instances->map)
468 assert (cell->vars[idx] == var);
469 if (value_equal (&inst->value,
479 inst = xzalloc (sizeof *inst);
482 value_clone (&inst->value, &cell->values[idx],
484 hmap_insert (&instances->map, &inst->hmap_node, hash);
490 arrange_cells (ws, cell, mt);
494 /* Arrange the children and then all the subtotals. */
496 arrange_cells (struct workspace *ws, struct cell *cell,
497 const struct mtable *table)
499 for (int i = 0; i < cell->n_children; ++i)
501 struct cell_container *container = cell->children + i;
502 arrange_cell (ws, container, table);
509 /* If the top level value in CELL, has an instance in the L_IDX'th layer,
510 then return that instance. Otherwise return NULL. */
511 static const struct instance *
512 lookup_instance (const struct mtable *mt, const struct workspace *ws,
513 int l_idx, const struct cell *cell)
515 const struct layer *layer = mt->layers[l_idx];
516 int n_vals = count_one_bits (cell->not_wild);
517 const struct variable *var = layer->factor_vars[ws->control_idx[l_idx]];
518 const union value *val = cell->values + n_vals - 1;
519 int width = var_get_width (var);
520 unsigned int hash = value_hash (val, width, 0);
521 const struct cell_container *instances = ws->instances + l_idx;
522 struct instance *inst = NULL;
523 struct instance *next;
524 HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next,
525 struct instance, hmap_node,
526 hash, &instances->map)
528 if (value_equal (val, &inst->value, width))
534 /* Enter the values into PT. */
536 populate_table (const struct means *means, const struct mtable *mt,
537 const struct workspace *ws,
538 const struct cell *cell,
539 struct pivot_table *pt)
541 size_t *indexes = XCALLOC (pt->n_dimensions, size_t);
542 for (int v = 0; v < mt->n_dep_vars; ++v)
544 for (int s = 0; s < means->n_statistics; ++s)
547 if (mt->n_dep_vars > 1)
550 int stat = means->statistics[s];
551 stat_get *sg = cell_spec[stat].sd;
553 const struct cell *pc = cell;
554 for (; i < pt->n_dimensions; ++i)
556 int l_idx = pt->n_dimensions - i - 1;
557 const struct cell_container *instances = ws->instances + l_idx;
558 if (0 == (cell->not_wild >> l_idx & 0x1U))
560 indexes [i] = hmap_count (&instances->map);
565 const struct instance *inst
566 = lookup_instance (mt, ws, l_idx, pc);
568 indexes [i] = inst->index;
569 pc = pc->parent_cell;
574 int idx = s + v * means->n_statistics;
575 struct pivot_value *pv
576 = pivot_value_new_number (sg (cell->stat[idx]));
577 if (NULL == cell_spec[stat].rc)
579 const struct variable *dv = mt->dep_vars[v];
580 pv->numeric.format = * var_get_print_format (dv);
582 pivot_table_put (pt, indexes, pt->n_dimensions, pv);
587 for (int i = 0; i < cell->n_children; ++i)
589 struct cell_container *container = cell->children + i;
590 struct cell *child = NULL;
591 BT_FOR_EACH (child, struct cell, bt_node, &container->bt)
593 populate_table (means, mt, ws, child, pt);
599 create_table_structure (const struct mtable *mt, struct pivot_table *pt,
600 const struct workspace *ws)
602 int * lindexes = ws->control_idx;
603 /* The inner layers are situated rightmost in the table.
604 So this iteration is in reverse order. */
605 for (int l = mt->n_layers - 1; l >= 0; --l)
607 const struct layer *layer = mt->layers[l];
608 const struct cell_container *instances = ws->instances + l;
609 const struct variable *var = layer->factor_vars[lindexes[l]];
610 struct pivot_dimension *dim_layer
611 = pivot_dimension_create (pt, PIVOT_AXIS_ROW,
612 var_to_string (var));
613 dim_layer->root->show_label = true;
615 /* Place the values of the control variables as table headings. */
617 struct instance *inst = NULL;
618 BT_FOR_EACH (inst, struct instance, bt_node, &instances->bt)
620 struct substring space = SS_LITERAL_INITIALIZER ("\t ");
622 ds_init_empty (&str);
623 var_append_value_name (var,
627 ds_ltrim (&str, space);
629 pivot_category_create_leaf (dim_layer->root,
630 pivot_value_new_text (ds_cstr (&str)));
636 pivot_category_create_leaf (dim_layer->root,
637 pivot_value_new_text ("Total"));
641 /* Initialise C_DES with a string describing the control variable
642 relating to MT, LINDEXES. */
644 layers_to_string (const struct mtable *mt, const int *lindexes,
645 struct string *c_des)
647 for (int l = 0; l < mt->n_layers; ++l)
649 const struct layer *layer = mt->layers[l];
650 const struct variable *ctrl_var = layer->factor_vars[lindexes[l]];
652 ds_put_cstr (c_des, " * ");
653 ds_put_cstr (c_des, var_get_name (ctrl_var));
658 populate_case_processing_summary (struct pivot_category *pc,
659 const struct mtable *mt,
665 for (l = 0; l < mt->n_layers; ++l)
667 const struct layer *layer = mt->layers[l];
668 const struct variable *ctrl_var = layer->factor_vars[lindexes[l]];
670 ds_put_cstr (&ds, " * ");
671 ds_put_cstr (&ds, var_get_name (ctrl_var));
673 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
676 ds_init_empty (&dss);
677 ds_put_cstr (&dss, var_get_name (mt->dep_vars[dv]));
678 if (mt->n_layers > 0)
680 ds_put_cstr (&dss, " * ");
681 ds_put_substring (&dss, ds.ss);
683 pivot_category_create_leaf (pc,
684 pivot_value_new_text (ds_cstr (&dss)));
691 /* Create the "Case Processing Summary" table. */
693 means_case_processing_summary (const struct mtable *mt)
695 struct pivot_table *pt = pivot_table_create (N_("Case Processing Summary"));
697 struct pivot_dimension *dim_cases =
698 pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Cases"));
699 dim_cases->root->show_label = true;
701 struct pivot_category *cats[3];
702 cats[0] = pivot_category_create_group (dim_cases->root,
703 N_("Included"), NULL);
704 cats[1] = pivot_category_create_group (dim_cases->root,
705 N_("Excluded"), NULL);
706 cats[2] = pivot_category_create_group (dim_cases->root,
708 for (int i = 0; i < 3; ++i)
710 pivot_category_create_leaf_rc (cats[i],
711 pivot_value_new_text (N_("N")),
713 pivot_category_create_leaf_rc (cats[i],
714 pivot_value_new_text (N_("Percent")),
718 struct pivot_dimension *rows =
719 pivot_dimension_create (pt, PIVOT_AXIS_ROW, N_("Variables"));
721 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
723 const struct workspace *ws = mt->ws + cmb;
724 populate_case_processing_summary (rows->root, mt, ws->control_idx);
725 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
727 int idx = cmb * mt->n_dep_vars + dv;
728 const struct summary *summ = mt->summ + idx;
729 double n_included = summ->n_total - summ->n_missing;
730 pivot_table_put2 (pt, 5, idx,
731 pivot_value_new_number (100.0 * summ->n_total / summ->n_total));
732 pivot_table_put2 (pt, 4, idx,
733 pivot_value_new_number (summ->n_total));
735 pivot_table_put2 (pt, 3, idx,
736 pivot_value_new_number (100.0 * summ->n_missing / summ->n_total));
737 pivot_table_put2 (pt, 2, idx,
738 pivot_value_new_number (summ->n_missing));
740 pivot_table_put2 (pt, 1, idx,
741 pivot_value_new_number (100.0 * n_included / summ->n_total));
742 pivot_table_put2 (pt, 0, idx,
743 pivot_value_new_number (n_included));
747 pivot_table_submit (pt);
751 means_shipout_single (const struct mtable *mt, const struct means *means,
752 const struct workspace *ws)
754 struct pivot_table *pt = pivot_table_create (N_("Report"));
756 struct pivot_dimension *dim_cells =
757 pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Statistics"));
759 /* Set the statistics headings, eg "Mean", "Std. Dev" etc. */
760 for (int i = 0; i < means->n_statistics; ++i)
762 const struct cell_spec *cs = cell_spec + means->statistics[i];
763 pivot_category_create_leaf_rc
765 pivot_value_new_text (gettext (cs->title)), cs->rc);
768 create_table_structure (mt, pt, ws);
769 populate_table (means, mt, ws, ws->root_cell, pt);
770 pivot_table_submit (pt);
775 means_shipout_multivar (const struct mtable *mt, const struct means *means,
776 const struct workspace *ws)
779 ds_init_empty (&dss);
780 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
783 ds_put_cstr (&dss, " * ");
784 ds_put_cstr (&dss, var_get_name (mt->dep_vars[dv]));
787 for (int l = 0; l < mt->n_layers; ++l)
789 ds_put_cstr (&dss, " * ");
790 const struct layer *layer = mt->layers[l];
791 const struct variable *var = layer->factor_vars[ws->control_idx[l]];
792 ds_put_cstr (&dss, var_get_name (var));
795 struct pivot_table *pt = pivot_table_create (ds_cstr (&dss));
798 struct pivot_dimension *dim_cells =
799 pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Variables"));
801 for (int i = 0; i < mt->n_dep_vars; ++i)
803 pivot_category_create_leaf
805 pivot_value_new_variable (mt->dep_vars[i]));
808 struct pivot_dimension *dim_stats
809 = pivot_dimension_create (pt, PIVOT_AXIS_ROW,
811 dim_stats->root->show_label = false;
813 for (int i = 0; i < means->n_statistics; ++i)
815 const struct cell_spec *cs = cell_spec + means->statistics[i];
816 pivot_category_create_leaf_rc
818 pivot_value_new_text (gettext (cs->title)), cs->rc);
821 create_table_structure (mt, pt, ws);
822 populate_table (means, mt, ws, ws->root_cell, pt);
823 pivot_table_submit (pt);
827 means_shipout (const struct mtable *mt, const struct means *means)
829 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
831 const struct workspace *ws = mt->ws + cmb;
832 if (ws->root_cell == NULL)
835 ds_init_empty (&des);
836 layers_to_string (mt, ws->control_idx, &des);
837 msg (MW, _("The table \"%s\" has no non-empty control variables."
838 " No result for this table will be displayed."),
843 if (mt->n_dep_vars > 1)
844 means_shipout_multivar (mt, means, ws);
846 means_shipout_single (mt, means, ws);
854 control_var_missing (const struct means *means,
855 const struct mtable *mt,
856 unsigned int not_wild UNUSED,
857 const struct ccase *c,
858 const struct workspace *ws)
861 for (int l = 0; l < mt->n_layers; ++l)
863 /* if (0 == ((not_wild >> l) & 0x1)) */
868 const struct layer *layer = mt->layers[l];
869 const struct variable *var = layer->factor_vars[ws->control_idx[l]];
870 const union value *vv = case_data (c, var);
872 miss = (var_is_value_missing (var, vv) & means->ctrl_exclude) != 0;
880 /* Lookup the set of control variables described by MT, C and NOT_WILD,
881 in the hash table MAP. If there is no such entry, then create a
882 cell with these paremeters and add is to MAP.
883 If the generated cell has childen, repeat for all the children.
884 Returns the root cell.
887 service_cell_map (const struct means *means, const struct mtable *mt,
888 const struct ccase *c,
889 unsigned int not_wild,
891 const struct cell *pcell,
893 const struct workspace *ws)
895 struct cell *cell = NULL;
898 if (!control_var_missing (means, mt, not_wild, c, ws))
900 /* Lookup this set of values in the cell's hash table. */
901 unsigned int hash = generate_hash (mt, c, not_wild, ws);
902 cell = lookup_cell (mt, map, hash, c, not_wild, ws);
904 /* If it has not been seen before, then create a new
905 subcell, with this set of values, and insert it
909 cell = generate_cell (means, mt, c, not_wild, pcell, ws);
910 hmap_insert (map, &cell->hmap_node, hash);
916 /* This condition should only happen in the root node case. */
917 cell = ws->root_cell;
919 !control_var_missing (means, mt, not_wild, c, ws))
920 cell = generate_cell (means, mt, c, not_wild, pcell, ws);
925 /* Here is where the business really happens! After
926 testing for missing values, the cell's statistics
928 if (!control_var_missing (means, mt, not_wild, c, ws))
930 for (int v = 0; v < mt->n_dep_vars; ++v)
932 const struct variable *dep_var = mt->dep_vars[v];
933 const union value *vv = case_data (c, dep_var);
934 if (var_is_value_missing (dep_var, vv) & means->dep_exclude)
937 for (int stat = 0; stat < means->n_statistics; ++stat)
939 const double weight = dict_get_case_weight (means->dict, c,
941 stat_update *su = cell_spec[means->statistics[stat]].su;
942 su (cell->stat[stat + v * means->n_statistics], weight,
943 case_num (c, dep_var));
948 /* Recurse into all the children (if there are any). */
949 for (int i = 0; i < cell->n_children; ++i)
951 struct cell_container *cc = cell->children + i;
952 service_cell_map (means, mt, c,
953 not_wild | (0x1U << (i + level)),
954 &cc->map, cell, level + i + 1, ws);
961 /* Do all the necessary preparation and pre-calculation that
962 needs to be done before iterating the data. */
964 prepare_means (struct means *cmd)
966 for (int t = 0; t < cmd->n_tables; ++t)
968 struct mtable *mt = cmd->table + t;
970 for (int i = 0; i < mt->n_combinations; ++i)
972 struct workspace *ws = mt->ws + i;
973 ws->root_cell = NULL;
974 ws->control_idx = xcalloc (mt->n_layers, sizeof *ws->control_idx);
975 ws->instances = xcalloc (mt->n_layers, sizeof *ws->instances);
977 for (int l = mt->n_layers - 1; l >= 0; --l)
979 struct cell_container *instances = ws->instances + l;
980 const struct layer *layer = mt->layers[l];
981 ws->control_idx[l] = cmb % layer->n_factor_vars;
982 cmb /= layer->n_factor_vars;
983 hmap_init (&instances->map);
990 /* Do all the necessary calculations that occur AFTER iterating
993 post_means (struct means *cmd)
995 for (int t = 0; t < cmd->n_tables; ++t)
997 struct mtable *mt = cmd->table + t;
998 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
1000 struct workspace *ws = mt->ws + cmb;
1001 if (ws->root_cell == NULL)
1003 arrange_cells (ws, ws->root_cell, mt);
1004 /* The root cell should have no parent. */
1005 assert (ws->root_cell->parent_cell == 0);
1007 for (int l = 0; l < mt->n_layers; ++l)
1009 struct cell_container *instances = ws->instances + l;
1010 bt_init (&instances->bt, compare_instance_3way, NULL);
1012 /* Iterate the instance hash table, and insert each instance
1013 into the binary tree BT. */
1014 struct instance *inst;
1015 HMAP_FOR_EACH (inst, struct instance, hmap_node,
1018 bt_insert (&instances->bt, &inst->bt_node);
1021 /* Iterate the binary tree (in order) and assign the index
1022 member accordingly. */
1024 BT_FOR_EACH (inst, struct instance, bt_node, &instances->bt)
1026 inst->index = index++;
1034 /* Update the summary information (the missings and the totals). */
1036 update_summaries (const struct means *means, struct mtable *mt,
1037 const struct ccase *c, double weight)
1039 for (int dv = 0; dv < mt->n_dep_vars; ++dv)
1041 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
1043 struct workspace *ws = mt->ws + cmb;
1044 struct summary *summ = mt->summ
1045 + cmb * mt->n_dep_vars + dv;
1047 summ->n_total += weight;
1048 const struct variable *var = mt->dep_vars[dv];
1049 const union value *vv = case_data (c, var);
1050 /* First check if the dependent variable is missing. */
1051 if (var_is_value_missing (var, vv) & means->dep_exclude)
1052 summ->n_missing += weight;
1053 /* If the dep var is not missing, then check each
1054 control variable. */
1056 for (int l = 0; l < mt->n_layers; ++l)
1058 const struct layer *layer = mt->layers [l];
1059 const struct variable *var
1060 = layer->factor_vars[ws->control_idx[l]];
1061 const union value *vv = case_data (c, var);
1062 if (var_is_value_missing (var, vv) & means->ctrl_exclude)
1064 summ->n_missing += weight;
1074 run_means (struct means *cmd, struct casereader *input,
1075 const struct dataset *ds UNUSED)
1077 struct ccase *c = NULL;
1078 struct casereader *reader;
1080 prepare_means (cmd);
1082 for (reader = input;
1083 (c = casereader_read (reader)) != NULL; case_unref (c))
1086 = dict_get_case_weight (cmd->dict, c, NULL);
1087 for (int t = 0; t < cmd->n_tables; ++t)
1089 struct mtable *mt = cmd->table + t;
1090 update_summaries (cmd, mt, c, weight);
1092 for (int cmb = 0; cmb < mt->n_combinations; ++cmb)
1094 struct workspace *ws = mt->ws + cmb;
1096 ws->root_cell = service_cell_map (cmd, mt, c,
1097 0U, NULL, NULL, 0, ws);
1101 casereader_destroy (reader);
1107 cmd_means (struct lexer *lexer, struct dataset *ds)
1109 struct means means = {
1110 .pool = pool_create (),
1111 .ctrl_exclude = MV_ANY,
1112 .dep_exclude = MV_ANY,
1113 .dict = dataset_dict (ds),
1115 means_set_default_statistics (&means);
1117 if (!means_parse (lexer, &means))
1120 /* Calculate some constant data for each table. */
1121 for (int t = 0; t < means.n_tables; ++t)
1123 struct mtable *mt = means.table + t;
1124 mt->n_combinations = 1;
1125 for (int l = 0; l < mt->n_layers; ++l)
1126 mt->n_combinations *= mt->layers[l]->n_factor_vars;
1129 struct casegrouper *grouper
1130 = casegrouper_create_splits (proc_open (ds), means.dict);
1131 struct casereader *group;
1132 while (casegrouper_get_next_group (grouper, &group))
1134 /* Allocate the workspaces. */
1135 for (int t = 0; t < means.n_tables; ++t)
1137 struct mtable *mt = means.table + t;
1138 mt->summ = xcalloc (mt->n_combinations * mt->n_dep_vars,
1140 mt->ws = xcalloc (mt->n_combinations, sizeof *mt->ws);
1142 run_means (&means, group, ds);
1143 for (int t = 0; t < means.n_tables; ++t)
1145 const struct mtable *mt = means.table + t;
1147 means_case_processing_summary (mt);
1148 means_shipout (mt, &means);
1150 for (int i = 0; i < mt->n_combinations; ++i)
1152 struct workspace *ws = mt->ws + i;
1154 means_destroy_cells (&means, ws->root_cell, mt);
1158 /* Destroy the workspaces. */
1159 for (int t = 0; t < means.n_tables; ++t)
1161 struct mtable *mt = means.table + t;
1163 for (int i = 0; i < mt->n_combinations; ++i)
1165 struct workspace *ws = mt->ws + i;
1166 destroy_workspace (mt, ws);
1172 bool ok = casegrouper_destroy (grouper);
1173 ok = proc_commit (ds) && ok;
1177 pool_destroy (means.pool);
1181 pool_destroy (means.pool);