1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2012, 2013, 2019 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/case.h"
20 #include "data/casegrouper.h"
21 #include "data/casereader.h"
22 #include "data/dataset.h"
23 #include "data/dictionary.h"
24 #include "data/format.h"
25 #include "data/variable.h"
27 #include "libpspp/hmap.h"
28 #include "libpspp/bt.h"
30 #include "output/pivot-table.h"
36 #define _(msgid) gettext (msgid)
37 #define N_(msgid) (msgid)
39 /* A "cell" in this procedure represents a distinct value of the
40 procedure's categorical variables, and a set of summary statistics
41 of all cases which whose categorical variables have that set of
42 values. For example, the dataset
51 has three cells in layer 0 and two cells in layer 1 in addition
52 to a "grand summary" cell to which all (non-missing) cases
55 The cells form a n-ary tree structure with the "grand summary"
60 struct hmap_node hmap_node; /* Element in hash table. */
61 struct bt_node bt_node; /* Element in binary tree */
63 struct cell_container children;
65 /* The level of the subtotal to which this cell pertains, or
66 -1 if it does not pertain to a subtotal. */
69 /* The statistics to be calculated for the cell. */
70 struct per_var_data **stat;
72 /* The parent of this cell, or NULL if this is the root cell. */
73 struct cell *parent_cell;
76 struct cell_container *subtotals;
79 union value values[1]; /* The value(s). */
83 dump_cell (const struct cell *cell, int level)
85 printf ("%p: ", cell);
86 for (int i = 0; i < cell->n_values; ++i)
87 printf ("%g; ", cell->values[i].f);
88 // printf ("--- Count: %g", cell->count);
89 // printf ("--- N Subtotals: %d", cell->n_subtotals);
90 printf ("--- Level: %d", level);
91 printf ("--- Subtotal: %d", cell->subtotal);
92 printf ("--- Parent: %p", cell->parent_cell);
97 dump_tree (const struct cell *cell, int level)
99 struct cell *sub_cell;
100 BT_FOR_EACH (sub_cell, struct cell, bt_node, &cell->children.bt)
102 dump_tree (sub_cell, level + 1);
105 for (int i = 0; i < cell->n_subtotals; ++i)
107 struct cell_container *container = cell->subtotals + i;
109 BT_FOR_EACH (scell, struct cell, bt_node, &container->bt)
111 dump_cell (scell, level);
115 dump_cell (cell, level);
120 struct hmap_node hmap_node; /* Element in hash table. */
121 struct bt_node bt_node; /* Element in binary tree */
123 /* A unique, consecutive, zero based index identifying this
127 /* The top level value of this instance. */
132 dump_layer (const struct layer *layer)
134 printf ("Layer: %p; fv0: %s; N %ld\n", layer,
136 ? var_get_name (layer->factor_vars[0])
138 hmap_count (&layer->instances.map)
141 struct instance *inst;
142 BT_FOR_EACH (inst, struct instance, bt_node, &layer->instances.bt)
144 printf ("Val %g; Index %d\n", inst->value.f, inst->index);
150 /* Generate a hash based on the values of the N variables in
151 the array VARS which are taken from the case C. */
153 generate_hash (const struct ccase *c, int n,
154 const struct variable * const * vars)
157 for (int i = 0; i < n; ++i)
159 const struct variable *var = vars[i];
160 const union value *vv = case_data (c, var);
161 int width = var_get_width (var);
162 hash = value_hash (vv, width, hash);
168 /* Create a cell based on the N variables in the array VARS,
169 which are indeces into the case C.
170 The caller is responsible for destroying this cell when
173 generate_cell (const struct means *means, const struct ccase *c, int n,
174 const struct variable * const * vars)
176 struct cell *cell = xzalloc ((sizeof *cell)
177 + (n - 1) * sizeof (union value));
180 for (int i = 0; i < n; ++i)
182 const struct variable *var = vars[i];
183 const union value *vv = case_data (c, var);
184 int width = var_get_width (var);
185 value_clone (&cell->values[i], vv, width);
188 hmap_init (&cell->children.map);
190 cell->stat = xcalloc (means->n_statistics, sizeof *cell->stat);
191 for (int stat = 0; stat < means->n_statistics; ++stat)
193 stat_create *sc = cell_spec[means->statistics[stat]].sc;
195 cell->stat[stat] = sc (means->pool);
202 /* If a cell based on the N variables in the array VARS,
203 which are indeces into the case C and whose hash is HASH,
204 exists in HMAP, then return that cell.
205 Otherwise, return NULL. */
207 lookup_cell (struct hmap *hmap, size_t hash,
208 const struct ccase *c,
209 int n, const struct variable * const * vars)
211 struct cell *fcol = NULL;
212 HMAP_FOR_EACH_WITH_HASH (fcol, struct cell, hmap_node, hash, hmap)
215 for (int i = 0; i < n; ++i)
217 const struct variable *var = vars[i];
218 const union value *vv = case_data (c, var);
219 int width = var_get_width (var);
220 if (!value_equal (vv, &fcol->values[i], width))
233 /* A comparison function used to sort cells in a binary tree. */
235 my_compare_func (const struct bt_node *a,
236 const struct bt_node *b,
239 const struct cell *fa = BT_DATA (a, struct cell, bt_node);
240 const struct cell *fb = BT_DATA (b, struct cell, bt_node);
242 const struct variable *const *cv = aux;
244 int vidx = fa->n_values - 1;
245 assert (fa->n_values == fb->n_values);
247 // FIXME: Consider whether other layers need to be compared.
248 int r = value_compare_3way (&fa->values[vidx],
250 var_get_width (cv[vidx]));
254 /* A comparison function used to sort cells in a binary tree. */
256 my_other_compare_func (const struct bt_node *a,
257 const struct bt_node *b,
260 const struct instance *fa = BT_DATA (a, struct instance, bt_node);
261 const struct instance *fb = BT_DATA (b, struct instance, bt_node);
263 const struct variable *var = aux;
265 int r = value_compare_3way (&fa->value,
267 var_get_width (var));
272 static void arrange_cells (struct cell *cell, const struct mtable *table);
275 /* Walk the tree starting at CELL, creating and populating the BT for
276 each cell. Also assigns the "rank", "parent_cell" and "subtotal" members
279 arrange_cell (struct cell_container *container,
280 struct cell *cell, const struct mtable *table,
283 const struct variable **control_vars = table->control_vars;
284 struct bt *bt = &container->bt;
285 struct hmap *map = &container->map;
286 bt_init (bt, my_compare_func, control_vars);
289 HMAP_FOR_EACH (scell, struct cell, hmap_node, map)
291 scell->parent_cell = cell;
292 scell->subtotal = subtotal;
293 bt_insert (bt, &scell->bt_node);
294 arrange_cells (scell, table);
297 if (cell->n_values > 0 && cell->subtotal == -1)
299 struct layer *layer = table->layers[cell->n_values - 1];
301 const struct variable *var = control_vars[cell->n_values - 1];
302 int width = var_get_width (var);
304 = value_hash (&cell->values[cell->n_values - 1], width, 0);
307 struct instance *inst = NULL;
308 struct instance *next = NULL;
309 HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next, struct instance, hmap_node,
310 hash, &layer->instances.map)
312 if (value_equal (&inst->value,
313 &cell->values[cell->n_values - 1],
322 inst = xzalloc (sizeof *inst);
324 value_copy (&inst->value, &cell->values[cell->n_values -1],
326 hmap_insert (&layer->instances.map, &inst->hmap_node, hash);
331 /* Arrange the children and then all the subtotals. */
333 arrange_cells (struct cell *cell, const struct mtable *table)
335 arrange_cell (&cell->children, cell, table, -1);
337 for (int s = 0; s < cell->n_subtotals; ++ s)
339 arrange_cell (cell->subtotals + s, cell, table, s);
346 /* If the top level value in CELL, has an instance in LAYER, then
347 return that instance. Otherwise return NULL. */
348 static const struct instance *
349 lookup_instance (const struct layer *layer, const struct cell *cell)
351 const struct variable *var = layer->factor_vars[0];
352 const union value *val = cell->values + cell->n_values - 1;
353 int width = var_get_width (var);
354 unsigned int hash = value_hash (val, width, 0);
355 struct instance *inst = NULL;
356 struct instance *next;
357 HMAP_FOR_EACH_WITH_HASH_SAFE (inst, next,
358 struct instance, hmap_node,
359 hash, &layer->instances.map)
361 if (value_equal (val, &inst->value, width))
368 populate_table (const struct cell *cell, const struct means *means,
369 struct pivot_table *table, int level)
371 /* It is easier to overallocate this table by 2, than to adjust the
372 logic when assigning its contents, and it is cheap to do so. */
373 size_t *indexes = xcalloc (table->n_dimensions + 2, sizeof *indexes);
374 for (int s = 0; s < means->n_statistics; ++s)
376 bool kludge = false; // FIXME: Remove this for production.
378 int stat = means->statistics[s];
379 if (cell->subtotal != -1)
381 // for (int i = 1; i < table->n_dimensions; ++i)
384 const struct layer *layer
385 = means->table->layers[table->n_dimensions - 1 - i];
387 const struct instance *inst = lookup_instance (layer, cell);
389 indexes[i] = inst->index;
392 const struct layer *layer
393 = means->table->layers[table->n_dimensions - 1 - i];
394 indexes[i] = hmap_count (&layer->instances.map);
397 else if (hmap_is_empty (&cell->children.map))
399 const struct cell *pc = cell;
400 for (int i = 1; i < table->n_dimensions; ++i)
402 const struct layer *layer
403 = means->table->layers[table->n_dimensions - 1 - i];
405 const struct instance *inst = lookup_instance (layer, pc);
407 indexes[i] = inst->index;
408 pc = pc->parent_cell;
414 const struct layer *layer;
416 for (int st = 0; st < cell->n_subtotals; ++st)
418 layer = means->table->layers[table->n_dimensions - i - 2];
419 indexes[++i] = hmap_count (&layer->instances.map);
421 layer = means->table->layers[table->n_dimensions - i - 2];
422 indexes[++i] = hmap_count (&layer->instances.map);
423 if (++i < table->n_dimensions)
425 layer = means->table->layers[table->n_dimensions - i - 1];
426 const struct instance *inst = lookup_instance (layer, cell);
428 indexes[i] = inst->index;
435 stat_get *sg = cell_spec[stat].sd;
436 pivot_table_put (table, indexes, table->n_dimensions,
437 pivot_value_new_number (sg (cell->stat[s])));
442 const struct bt *bt = &cell->children.bt;
443 struct cell *child = NULL;
444 BT_FOR_EACH (child, struct cell, bt_node, bt)
446 populate_table (child, means, table, level + 1);
449 // printf ("There are %d subtotals\n", cell->n_subtotals);
450 for (int i = 0; i < cell->n_subtotals; ++i)
453 const struct cell_container *st = cell->subtotals + i;
455 HMAP_FOR_EACH (scell, struct cell, hmap_node, &st->map)
457 // printf ("%s:%d xxx\n", __FILE__, __LINE__);
458 /* dump_cell (scell, 0); */
459 populate_table (scell, means, table, level + 1);
467 ann_dim (struct pivot_table *t, int d, size_t *indeces)
473 const struct pivot_dimension *dim = t->dimensions[d];
474 for (int l = 0; l < dim->n_leaves; ++l)
478 for (x = 0; x < t->n_dimensions; ++x)
480 label[x] = '0' + indeces[x];
483 pivot_table_put (t, indeces, t->n_dimensions,
484 pivot_value_new_user_text (label, x));
485 ann_dim (t, d - 1, indeces);
490 annotate_table (struct pivot_table *t)
492 size_t *indeces = xcalloc (t->n_dimensions, sizeof *indeces);
494 for (int d = 0; d < t->n_dimensions; ++d)
496 ann_dim (t, d, indeces);
502 create_table_structure (const struct mtable *mt, struct pivot_table *table)
504 for (int l = mt->n_layers -1; l >=0 ; --l)
506 const struct layer *layer = mt->layers[l];
507 assert (layer->n_factor_vars > 0);
508 const struct variable *var = layer->factor_vars[0];
509 struct pivot_dimension *dim_layer
510 = pivot_dimension_create (table, PIVOT_AXIS_ROW,
511 var_to_string (var));
512 dim_layer->root->show_label = true;
515 struct instance *inst = NULL;
516 BT_FOR_EACH (inst, struct instance, bt_node, &layer->instances.bt)
518 struct substring space = SS_LITERAL_INITIALIZER ("\t ");
520 ds_init_empty (&str);
521 var_append_value_name (var,
525 ds_ltrim (&str, space);
527 pivot_category_create_leaf
529 pivot_value_new_text (ds_cstr (&str)));
535 pivot_category_create_leaf
537 pivot_value_new_text ("Total"));
542 means_shipout (const struct mtable *mt, const struct means *means)
544 struct pivot_table *table = pivot_table_create (N_("Report"));
546 struct pivot_dimension *dim_cells =
547 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"));
549 for (int i = 0; i < means->n_statistics; ++i)
551 const struct cell_spec *cs = cell_spec + means->statistics[i];
552 pivot_category_create_leaf
554 pivot_value_new_text (gettext (cs->title)));
557 create_table_structure (mt, table);
559 populate_table (mt->root_cell, means, table, 0);
560 // pivot_table_dump (table, 0);
561 // annotate_table (table);
563 pivot_table_submit (table);
569 missing_for_layer (const struct layer *layer, const struct ccase *c)
571 if (layer->n_factor_vars == 0)
574 const struct variable *var = layer->factor_vars[0];
575 const union value *vv = case_data (c, var);
577 return var_is_value_missing (var, vv, MV_ANY);
582 missing_for_any_layer (const struct mtable *table,
584 const struct ccase *c)
587 for (int l = startx; l < table->n_layers; ++l)
589 miss = missing_for_layer (table->layers[l], c);
599 update_map_from_data (const struct means *means,
600 const struct mtable *table,
603 const struct ccase *c,
608 const struct variable **cv = table->control_vars + start_var;
609 if (! missing_for_any_layer (table, startx, c))
611 const struct variable *dep_var = table->dep_vars[0];
613 size_t hash = generate_hash (c, n_vars, cv);
615 struct cell *fcol = lookup_cell (map, hash,
619 fcol = generate_cell (means, c, n_vars, cv);
620 fcol->n_subtotals = table->n_layers - 2 - start_var;
621 if (fcol->n_subtotals < 0)
622 fcol->n_subtotals = 0;
623 fcol->subtotals = xcalloc (fcol->n_subtotals,
624 sizeof *fcol->subtotals);
625 for (int i = 0; i < fcol->n_subtotals; ++i)
627 struct cell_container *c = fcol->subtotals + i;
631 hmap_insert (map, &fcol->hmap_node, hash);
634 for (int stat = 0; stat < means->n_statistics; ++stat)
636 stat_update *su = cell_spec[means->statistics[stat]].su;
637 su (fcol->stat[stat], weight, case_data (c, dep_var)->f);
647 run_means (struct means *cmd, struct casereader *input,
648 const struct dataset *ds UNUSED)
650 struct mtable *table = cmd->table + 0;
652 struct casereader *reader;
654 table->root_cell = generate_cell (cmd, 0, 0, 0);
655 table->root_cell->n_subtotals = table->n_layers - 1;
656 if (table->root_cell->n_subtotals < 0)
657 table->root_cell->n_subtotals = 0;
658 table->root_cell->subtotals
659 = xcalloc (table->root_cell->n_subtotals,
660 sizeof *table->root_cell->subtotals);
661 for (int i = 0; i < table->root_cell->n_subtotals; ++i)
663 struct cell_container *c = table->root_cell->subtotals + i;
668 = calloc (table->n_layers, sizeof *table->control_vars);
669 for (int l = 0; l < table->n_layers; ++l)
671 const struct layer *layer = table->layers[l];
672 if (layer->n_factor_vars > 0)
673 table->control_vars[l] = layer->factor_vars[0];
676 struct cell *cell = table->root_cell;
677 const struct variable *dep_var = table->dep_vars[0];
679 (c = casereader_read (reader)) != NULL; case_unref (c))
681 const double weight = dict_get_case_weight (cmd->dict, c, NULL);
682 if (! missing_for_any_layer (table, 0, c))
684 for (int stat = 0; stat < cmd->n_statistics; ++stat)
686 stat_update *su = cell_spec[cmd->statistics[stat]].su;
687 su (cell->stat[stat], weight, case_data (c, dep_var)->f);
691 for (int i = 0; i < cell->n_subtotals; ++i)
693 struct cell_container *container = cell->subtotals + i;
694 update_map_from_data (cmd, table, 1, &container->map, c, 1, 1,
698 struct hmap *map = &cell->children.map;
699 for (int l = 0; l < table->n_layers; ++l)
702 = update_map_from_data (cmd, table, l, map, c,
706 map = &cell->children.map;
709 casereader_destroy (reader);
711 arrange_cells (table->root_cell, table);
712 for (int l = 0; l < table->n_layers; ++l)
714 struct layer *layer = table->layers[l];
715 bt_init (&layer->instances.bt, my_other_compare_func,
716 table->control_vars[l]);
718 /* Iterate the instance hash table, and insert each instance
719 into the binary tree BT. */
720 struct instance *inst;
721 HMAP_FOR_EACH (inst, struct instance, hmap_node,
722 &layer->instances.map)
724 bt_insert (&layer->instances.bt, &inst->bt_node);
727 /* Iterate the binary tree (in order) and assign the index
728 member accordingly. */
730 BT_FOR_EACH (inst, struct instance, bt_node, &layer->instances.bt)
732 inst->index = index++;
736 /* The root cell should have no parent. */
737 assert (table->root_cell->parent_cell == 0);
738 dump_tree (table->root_cell, 0);