From 1539e422694afe290756782e400a99f83cf7de79 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 8 Jul 2022 17:25:51 -0700 Subject: [PATCH] CTABLES SPLIT FILE --- src/data/dictionary.c | 6 + src/language/stats/ctables.c | 195 ++++++++++++++++++++++++++------ tests/language/stats/ctables.at | 2 +- 3 files changed, 165 insertions(+), 38 deletions(-) diff --git a/src/data/dictionary.c b/src/data/dictionary.c index c331ea2345..9ce3c46505 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -413,6 +413,12 @@ dict_set_split_vars__ (struct dictionary *d, } } +enum split_type +dict_get_split_type (const struct dictionary *d) +{ + return d->split_type; +} + /* Sets N split vars SPLIT in dictionary D. */ void dict_set_split_vars (struct dictionary *d, diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index 1d7760f57e..a25205e48d 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -19,6 +19,7 @@ #include #include +#include "data/casegrouper.h" #include "data/casereader.h" #include "data/casewriter.h" #include "data/data-in.h" @@ -29,6 +30,7 @@ #include "data/subcase.h" #include "data/value-labels.h" #include "language/command.h" +#include "language/dictionary/split-file.h" #include "language/lexer/format-parser.h" #include "language/lexer/lexer.h" #include "language/lexer/token.h" @@ -458,11 +460,14 @@ struct ctables_occurrence struct ctables_section { + /* Settings. */ struct ctables_table *table; struct ctables_nest *nests[PIVOT_N_AXES]; - struct hmap *occurrences[PIVOT_N_AXES]; - struct hmap cells; /* Contains "struct ctable_cell"s. */ - struct hmap domains[N_CTDTS]; /* Contains "struct ctable_domain"s. */ + + /* Data. */ + struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */ + struct hmap cells; /* Contains "struct ctables_cell"s. */ + struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */ }; struct ctables_table @@ -477,11 +482,6 @@ struct ctables_table struct variable **sum_vars; size_t n_sum_vars; - const struct variable *clabels_example; - struct hmap clabels_values_map; - struct ctables_value **clabels_values; - size_t n_clabels_values; - enum pivot_axis_type slabels_axis; bool slabels_visible; @@ -493,9 +493,26 @@ struct ctables_table If ROWLABELS or COLLABELS is specified, then one of label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ. + + If any category labels are moved, then 'clabels_example' is one of the + variables being moved (and it is otherwise NULL). All of the variables + being moved have the same width, value labels, and categories, so this + example variable can be used to find those out. + + The remaining members in this group are relevant only if category labels + are moved. + + 'clabels_values_map' holds a "struct ctables_value" for all the values + that appear in all of the variables in the moved categories. It is + accumulated as the data is read. Once the data is fully read, its + sorted values are put into 'clabels_values' and 'n_clabels_values'. */ enum pivot_axis_type label_axis[PIVOT_N_AXES]; enum pivot_axis_type clabels_from_axis; + const struct variable *clabels_example; + struct hmap clabels_values_map; + struct ctables_value **clabels_values; + size_t n_clabels_values; /* Indexed by variable dictionary index. */ struct ctables_categories **categories; @@ -2503,7 +2520,7 @@ ctables_summary_init (union ctables_summary *s, } } -static void UNUSED +static void ctables_summary_uninit (union ctables_summary *s, const struct ctables_summary_spec *ss) { @@ -5154,6 +5171,92 @@ ctables_section_add_empty_categories (struct ctables_section *s) case_unref (c); } +static void +ctables_section_clear (struct ctables_section *s) +{ + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + { + const struct variable *var = nest->vars[i]; + int width = var_get_width (var); + struct ctables_occurrence *o, *next; + struct hmap *map = &s->occurrences[a][i]; + HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map) + { + value_destroy (&o->value, width); + hmap_delete (map, &o->node); + free (o); + } + hmap_shrink (map); + } + } + + struct ctables_cell *cell, *next_cell; + HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells) + { + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + value_destroy (&cell->axes[a].cvs[i].value, + var_get_width (nest->vars[i])); + free (cell->axes[a].cvs); + } + + const struct ctables_nest *ss = s->nests[s->table->summary_axis]; + const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; + for (size_t i = 0; i < specs->n; i++) + ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]); + free (cell->summaries); + + hmap_delete (&s->cells, &cell->node); + free (cell); + } + hmap_shrink (&s->cells); + + for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++) + { + struct ctables_domain *domain, *next_domain; + HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node, + &s->domains[dt]) + { + free (domain->sums); + hmap_delete (&s->domains[dt], &domain->node); + free (domain); + } + hmap_shrink (&s->domains[dt]); + } +} + +static void +ctables_table_clear (struct ctables_table *t) +{ + for (size_t i = 0; i < t->n_sections; i++) + ctables_section_clear (&t->sections[i]); + + if (t->clabels_example) + { + int width = var_get_width (t->clabels_example); + struct ctables_value *value, *next_value; + HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node, + &t->clabels_values_map) + { + value_destroy (&value->value, width); + hmap_delete (&t->clabels_values_map, &value->node); + free (value); + } + hmap_shrink (&t->clabels_values_map); + + free (t->clabels_values); + t->clabels_values = NULL; + t->n_clabels_values = 0; + } +} + static bool ctables_execute (struct dataset *ds, struct ctables *ct) { @@ -5168,46 +5271,64 @@ ctables_execute (struct dataset *ds, struct ctables *ct) ctables_table_add_section (t, 0, ix); } + struct dictionary *dict = dataset_dict (ds); struct casereader *input = proc_open (ds); - bool warn_on_invalid = true; - for (struct ccase *c = casereader_read (input); c; - case_unref (c), c = casereader_read (input)) + struct casegrouper *grouper + = (dict_get_split_type (dict) == SPLIT_SEPARATE + ? casegrouper_create_splits (input, dict) + : casegrouper_create_vars (input, NULL, 0)); + struct casereader *group; + while (casegrouper_get_next_group (grouper, &group)) { - double d_weight = dict_get_case_weight (dataset_dict (ds), c, - &warn_on_invalid); - double e_weight = (ct->e_weight - ? var_force_valid_weight (ct->e_weight, - case_num (c, ct->e_weight), - &warn_on_invalid) - : d_weight); + /* Output SPLIT FILE variables. */ + struct ccase *c = casereader_peek (group, 0); + if (c != NULL) + { + output_split_file_values (ds, c); + case_unref (c); + } - for (size_t i = 0; i < ct->n_tables; i++) + bool warn_on_invalid = true; + for (c = casereader_read (group); c; + case_unref (c), c = casereader_read (group)) { - struct ctables_table *t = ct->tables[i]; + double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid); + double e_weight = (ct->e_weight + ? var_force_valid_weight (ct->e_weight, + case_num (c, ct->e_weight), + &warn_on_invalid) + : d_weight); + + for (size_t i = 0; i < ct->n_tables; i++) + { + struct ctables_table *t = ct->tables[i]; - for (size_t j = 0; j < t->n_sections; j++) - ctables_cell_insert (&t->sections[j], c, d_weight, e_weight); + for (size_t j = 0; j < t->n_sections; j++) + ctables_cell_insert (&t->sections[j], c, d_weight, e_weight); - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - if (t->label_axis[a] != a) - ctables_insert_clabels_values (t, c, a); + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + if (t->label_axis[a] != a) + ctables_insert_clabels_values (t, c, a); + } } - } - casereader_destroy (input); + casereader_destroy (group); - for (size_t i = 0; i < ct->n_tables; i++) - { - struct ctables_table *t = ct->tables[i]; + for (size_t i = 0; i < ct->n_tables; i++) + { + struct ctables_table *t = ct->tables[i]; - if (t->clabels_example) - ctables_sort_clabels_values (t); + if (t->clabels_example) + ctables_sort_clabels_values (t); - for (size_t j = 0; j < t->n_sections; j++) - ctables_section_add_empty_categories (&t->sections[j]); + for (size_t j = 0; j < t->n_sections; j++) + ctables_section_add_empty_categories (&t->sections[j]); - ctables_table_output (ct, ct->tables[i]); + ctables_table_output (ct, t); + ctables_table_clear (t); + } } - return proc_commit (ds); + bool ok = casegrouper_destroy (grouper); + return proc_commit (ds) && ok; } /* Postcomputes. */ diff --git a/tests/language/stats/ctables.at b/tests/language/stats/ctables.at index d480df3431..65740d97b2 100644 --- a/tests/language/stats/ctables.at +++ b/tests/language/stats/ctables.at @@ -2,7 +2,6 @@ AT_BANNER([CTABLES]) dnl Features not yet implemented: dnl -dnl - SPLIT FILE with SEPARATE splits dnl - Definition of columns/rows when labels are rotated from one axis to another. dnl - Preprocessing to distinguish categorical from scale. dnl - PCOMPUTE: @@ -47,6 +46,7 @@ dnl * )LABEL[N]. dnl - Summary functions: dnl * U-prefix for unweighted summaries. dnl * areaPCT.SUM and UareaPCT.SUM functions. +dnl - SPLIT FILE with SEPARATE splits dnl dnl Not for v1: dnl - Multiple response sets -- 2.30.2