From 79438d764f7c0490daac9e234c47cf049d8284df Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 5 Feb 2022 15:03:27 -0800 Subject: [PATCH] work toward EMPTY=INCLUDE --- src/language/stats/ctables.c | 172 ++++++++++++++++++++++++++++++++++- 1 file changed, 167 insertions(+), 5 deletions(-) diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index 4ee180f518..0ff1a4d41d 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -188,8 +188,8 @@ enum ctables_summary_variant struct ctables_cell { - /* In struct ctables's 'cells' hmap. Indexed by all the values in all the - axes (except the scalar variable, if any). */ + /* In struct ctables_section's 'cells' hmap. Indexed by all the values in + all the axes (except the scalar variable, if any). */ struct hmap_node node; /* The domains that contain this cell. */ @@ -199,7 +199,7 @@ struct ctables_cell bool hide; enum ctables_summary_variant sv; - struct + struct ctables_cell_axis { struct ctables_cell_value { @@ -216,6 +216,7 @@ struct ctables_cell struct ctables { + const struct dictionary *dict; struct pivot_table_look *look; /* If this is NULL, zeros are displayed using the normal print format. @@ -324,16 +325,24 @@ struct ctables_value int leaf; }; +struct ctables_section_value + { + struct hmap_node node; + union value value; + }; + struct ctables_section { struct ctables_table *table; struct ctables_nest *nests[PIVOT_N_AXES]; + struct hmap *occurrences[PIVOT_N_AXES]; struct hmap cells; /* Contains "struct ctable_cell"s. */ struct hmap domains[N_CTDTS]; /* Contains "struct ctable_domain"s. */ }; struct ctables_table { + struct ctables *ctables; struct ctables_axis *axes[PIVOT_N_AXES]; struct ctables_stack stacks[PIVOT_N_AXES]; struct ctables_section *sections; @@ -411,16 +420,19 @@ struct ctables_category { enum ctables_category_type { + /* Explicit category lists. */ CCT_NUMBER, CCT_STRING, CCT_RANGE, CCT_MISSING, CCT_OTHERNM, + /* Totals and subtotals. */ CCT_SUBTOTAL, CCT_HSUBTOTAL, CCT_TOTAL, + /* Implicit category lists. */ CCT_VALUE, CCT_LABEL, CCT_FUNCTION, @@ -2634,12 +2646,31 @@ recurse_subtotals (struct ctables_section *s, const struct ccase *c, } } +static void +ctables_add_occurrence (const struct variable *var, + const union value *value, + struct hmap *occurrences) +{ + int width = var_get_width (var); + unsigned int hash = value_hash (value, width, 0); + + struct ctables_section_value *sv; + HMAP_FOR_EACH_WITH_HASH (sv, struct ctables_section_value, node, hash, + occurrences) + if (value_equal (value, &sv->value, width)) + return; + + sv = xmalloc (sizeof *sv); + value_clone (&sv->value, value, width); + hmap_insert (occurrences, &sv->node, hash); +} + static void ctables_cell_insert (struct ctables_section *s, const struct ccase *c, double d_weight, double e_weight) { - const struct ctables_category *cats[PIVOT_N_AXES][10]; + const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct ctables_nest *nest = s->nests[a]; @@ -2661,6 +2692,18 @@ ctables_cell_insert (struct ctables_section *s, } } + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + { + const struct variable *var = nest->vars[i]; + const union value *value = case_data (c, var); + ctables_add_occurrence (var, value, &s->occurrences[a][i]); + } + } + ctables_cell_add__ (s, c, cats, d_weight, e_weight); recurse_totals (s, c, cats, d_weight, e_weight, 0, 0); @@ -2733,7 +2776,14 @@ ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a, .cells = HMAP_INITIALIZER (s->cells), }; for (a = 0; a < PIVOT_N_AXES; a++) - s->nests[a] = t->stacks[a].n ? &t->stacks[a].nests[ix[a]] : NULL; + if (t->stacks[a].n) + { + struct ctables_nest *nest = &t->stacks[a].nests[ix[a]]; + s->nests[a] = nest; + s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]); + for (size_t i = 0; i < nest->n; i++) + hmap_init (&s->occurrences[a][i]); + } for (size_t i = 0; i < N_CTDTS; i++) hmap_init (&s->domains[i]); } @@ -3344,6 +3394,113 @@ ctables_sort_clabels_values (struct ctables_table *t) t->clabels_values[i]->leaf = i; } +static void +ctables_add_category_occurrences (const struct variable *var, + struct hmap *occurrences, + const struct ctables_categories *cats) +{ + const struct val_labs *val_labs = var_get_value_labels (var); + + for (size_t i = 0; i < cats->n_cats; i++) + { + const struct ctables_category *c = &cats->cats[i]; + switch (c->type) + { + case CCT_NUMBER: + ctables_add_occurrence (var, &(const union value) { .f = c->number }, + occurrences); + break; + + case CCT_STRING: + abort (); /* XXX */ + + case CCT_RANGE: + assert (var_is_numeric (var)); + for (const struct val_lab *vl = val_labs_first (val_labs); vl; + vl = val_labs_next (val_labs, vl)) + if (vl->value.f >= c->range[0] && vl->value.f <= c->range[1]) + ctables_add_occurrence (var, &vl->value, occurrences); + break; + + case CCT_MISSING: + for (const struct val_lab *vl = val_labs_first (val_labs); vl; + vl = val_labs_next (val_labs, vl)) + if (var_is_value_missing (var, &vl->value)) + ctables_add_occurrence (var, &vl->value, occurrences); + break; + + case CCT_OTHERNM: + for (const struct val_lab *vl = val_labs_first (val_labs); vl; + vl = val_labs_next (val_labs, vl)) + ctables_add_occurrence (var, &vl->value, occurrences); + break; + + case CCT_SUBTOTAL: + case CCT_HSUBTOTAL: + case CCT_TOTAL: + break; + + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + for (const struct val_lab *vl = val_labs_first (val_labs); vl; + vl = val_labs_next (val_labs, vl)) + if (c->include_missing || !var_is_value_missing (var, &vl->value)) + ctables_add_occurrence (var, &vl->value, occurrences); + break; + } + } +} + +static void +ctables_section_recurse_add_empty_categories ( + struct ctables_section *s, + const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c, + enum pivot_axis_type a, size_t a_idx) +{ + if (a >= PIVOT_N_AXES) + { + + } + else if (!s->nests[a] || idx >= s->nests[a]->n) + { + + + } + else + { + + for (size_t i = 0; i < s->nests[a]->n; i++) + + } +} + +static void +ctables_section_add_empty_categories (struct ctables_section *s) +{ + bool show_empty = false; + for (size_t a = 0; a < PIVOT_N_AXES; a++) + if (s->nests[a]) + for (size_t k = 0; k < s->nests[a]->n; k++) + if (k != s->nests[a]->scale_idx) + { + const struct variable *var = s->nests[a]->vars[k]; + const struct ctables_categories *cats = s->table->categories[ + var_get_dict_index (var)]; + if (cats->show_empty) + { + show_empty = true; + ctables_add_category_occurrences (var, &s->occurrences[a][k], cats); + } + } + if (!show_empty) + return; + + const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */ + struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict)); + ctables_section_recurse_add_empty_categories (s, c, cats, a, 0); +} + static bool ctables_execute (struct dataset *ds, struct ctables *ct) { @@ -3392,6 +3549,9 @@ ctables_execute (struct dataset *ds, struct ctables *ct) if (t->clabels_example) ctables_sort_clabels_values (t); + for (size_t j = 0; j < t->n_sections; j++) + ctables_section_add_empty_categories (&t->sections[j]); + ctables_table_output (ct, ct->tables[i]); } return proc_commit (ds); @@ -3408,6 +3568,7 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) struct ctables *ct = xmalloc (sizeof *ct); *ct = (struct ctables) { + .dict = dataset_dict (ds), .look = pivot_table_look_unshare (pivot_table_look_ref ( pivot_table_look_get_default ())), .vlabels = vlabels, @@ -3631,6 +3792,7 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) struct ctables_table *t = xmalloc (sizeof *t); *t = (struct ctables_table) { + .ctables = ct, .slabels_axis = PIVOT_AXIS_COLUMN, .slabels_visible = true, .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map), -- 2.30.2