From 8f5b28b2ee26edf1276cce0c0a2fa4a8761da50d Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 15 Jan 2022 16:06:06 -0800 Subject: [PATCH] make rowlabels and collabels more regular --- src/language/stats/ctables.c | 180 ++++++++++++++++++++++++++--------- 1 file changed, 135 insertions(+), 45 deletions(-) diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index 0ce4a333e9..87f7d23a05 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -278,26 +278,6 @@ struct ctables_postcompute_expr }; }; -enum ctables_label_position - { - CTLP_NORMAL, - CTLP_OPPOSITE, - CTLP_LAYER, - }; - -static const char * -ctables_label_position_to_string (enum ctables_label_position p) -{ - switch (p) - { - case CTLP_NORMAL: return "NORMAL"; - case CTLP_OPPOSITE: return "OPPOSITE"; - case CTLP_LAYER: return "LAYER"; - } - - NOT_REACHED (); -} - struct ctables_summary_spec_set { struct ctables_summary_spec *specs; @@ -330,6 +310,12 @@ struct ctables_stack size_t n; }; +struct ctables_value + { + struct hmap_node node; + union value value; + }; + struct ctables_table { struct ctables_axis *axes[PIVOT_N_AXES]; @@ -339,11 +325,23 @@ struct ctables_table struct hmap cells; struct hmap domains[N_CTDTS]; + const struct variable *clabels_example; + struct hmap clabels_values_map; + union value *clabels_values; + enum pivot_axis_type slabels_axis; bool slabels_visible; - enum ctables_label_position row_labels; - enum ctables_label_position col_labels; + /* The innermost category labels for axis 'a' appear on axis label_axis[a]. + + Most commonly, label_axis[a] == a, and in particular we always have + label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER. + + If ROWLABELS or COLLABELS is specified, then one of + label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the + opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ. + */ + enum pivot_axis_type label_axis[PIVOT_N_AXES]; /* Indexed by variable dictionary index. */ struct ctables_categories **categories; @@ -2699,7 +2697,6 @@ ctables_table_output_same_axis (struct ctables *ct, struct ctables_table *t) pivot_table_submit (pt); } - static void ctables_table_output_different_axis (struct ctables *ct, struct ctables_table *t) { @@ -2757,6 +2754,27 @@ ctables_table_output_different_axis (struct ctables *ct, struct ctables_table *t struct ctables_cell *cell = sorted[j]; const struct ctables_nest *nest = &t->stacks[a].nests[cell->axes[a].stack_idx]; + /* Pivot categories: + + - variable label for nest->vars[0], if vlabel != CTVL_NONE + - category for nest->vars[0] + - variable label for nest->vars[1], if vlabel != CTVL_NONE + - category for nest->vars[1] + ... + - variable label for nest->vars[nest->n - 1], if vlabel != CTVL_NONE + - category for nest->vars[nest->n - 1], unless axis A's + labels are moved to another axis + - summary function, if 'a == t->summary_axis && t->slabels_axis == + t->summary_axis' + + Additional dimensions: + + - If 't->slabels_axis != t->summary_axis', add a summary dimension + to t->slabels_axis. + - If 't->row_labels == + + */ + size_t n_common = 0; bool new_subtable = false; if (j > 0) @@ -3003,6 +3021,60 @@ ctables_prepare_table (struct ctables_table *t) #endif } +static void +ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c, + enum pivot_axis_type a) +{ + struct ctables_stack *stack = &t->stacks[a]; + for (size_t i = 0; i < stack->n; i++) + { + const struct ctables_nest *nest = &stack->nests[i]; + const struct variable *v = nest->vars[nest->n - 1]; + int width = var_get_width (v); + const union value *value = case_data (c, v); + unsigned int hash = value_hash (value, width, 0); + + struct ctables_value *clv; + HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node, hash, + &t->clabels_values_map) + if (value_equal (value, &clv->value, width)) + goto next_stack; + + clv = xmalloc (sizeof *clv); + value_clone (&clv->value, value, width); + hmap_insert (&t->clabels_values_map, &clv->node, hash); + + next_stack: ; + } +} + +static int +compare_clabels_values_3way (const void *a_, const void *b_, const void *width_) +{ + const union value *a = a_; + const union value *b = b_; + const int *width = width_; + return value_compare_3way (a, b, *width); +} + +static void +ctables_sort_clabels_values (struct ctables_table *t) +{ + int width = var_get_width (t->clabels_example); + + size_t n = hmap_count (&t->clabels_values_map); + t->clabels_values = xnmalloc (n, sizeof *t->clabels_values); + + const struct ctables_value *clv; + size_t i = 0; + HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map) + t->clabels_values[i++] = clv->value; + assert (i == n); + + sort (t->clabels_values, n, sizeof *t->clabels_values, + compare_clabels_values_3way, &width); +} + static bool ctables_execute (struct dataset *ds, struct ctables *ct) { @@ -3026,6 +3098,10 @@ ctables_execute (struct dataset *ds, struct ctables *ct) for (size_t ic = 0; ic < t->stacks[PIVOT_AXIS_COLUMN].n; ic++) for (size_t il = 0; il < t->stacks[PIVOT_AXIS_LAYER].n; il++) ctables_cell_insert (t, c, ir, ic, il, weight); + + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + if (t->label_axis[a] != a) + ctables_insert_clabels_values (t, c, a); } } casereader_destroy (input); @@ -3033,6 +3109,10 @@ ctables_execute (struct dataset *ds, struct ctables *ct) for (size_t i = 0; i < ct->n_tables; i++) { struct ctables_table *t = ct->tables[i]; + + if (t->clabels_example) + ctables_sort_clabels_values (t); + if (t->summary_axis == t->slabels_axis) ctables_table_output_same_axis (ct, ct->tables[i]); else @@ -3042,34 +3122,38 @@ ctables_execute (struct dataset *ds, struct ctables *ct) } static bool -ctables_check_label_position (struct ctables_table *t, - enum pivot_axis_type axis, - enum ctables_label_position label_pos, - const char *subcommand_name) +ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a) { - if (label_pos == CTLP_NORMAL) + enum pivot_axis_type label_pos = t->label_axis[a]; + if (label_pos == a) return true; - const struct ctables_stack *stack = &t->stacks[axis]; + const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS"; + const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE"; + + const struct ctables_stack *stack = &t->stacks[a]; if (!stack->n) return true; const struct ctables_nest *n0 = &stack->nests[0]; + assert (n0->n > 0); const struct variable *v0 = n0->vars[n0->n - 1]; struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)]; + t->clabels_example = v0; for (size_t i = 0; i < c0->n_cats; i++) if (c0->cats[i].type == CCT_FUNCTION) { msg (SE, _("%s=%s is not allowed with sorting based " "on a summary function."), - subcommand_name, ctables_label_position_to_string (label_pos)); + subcommand_name, pos_name); return false; } for (size_t i = 1; i < stack->n; i++) { const struct ctables_nest *ni = &stack->nests[i]; + assert (ni->n > 0); const struct variable *vi = ni->vars[ni->n - 1]; struct ctables_categories *ci = t->categories[var_get_dict_index (vi)]; @@ -3078,7 +3162,7 @@ ctables_check_label_position (struct ctables_table *t, msg (SE, _("%s=%s requires the variables to be " "moved to have the same width, but %s has " "width %d and %s has width %d."), - subcommand_name, ctables_label_position_to_string (label_pos), + subcommand_name, pos_name, var_get_name (v0), var_get_width (v0), var_get_name (vi), var_get_width (vi)); return false; @@ -3089,7 +3173,7 @@ ctables_check_label_position (struct ctables_table *t, msg (SE, _("%s=%s requires the variables to be " "moved to have the same value labels, but %s " "and %s have different value labels."), - subcommand_name, ctables_label_position_to_string (label_pos), + subcommand_name, pos_name, var_get_name (v0), var_get_name (vi)); return false; } @@ -3099,7 +3183,7 @@ ctables_check_label_position (struct ctables_table *t, "moved to have the same category " "specifications, but %s and %s have different " "category specifications."), - subcommand_name, ctables_label_position_to_string (label_pos), + subcommand_name, pos_name, var_get_name (v0), var_get_name (vi)); return false; } @@ -3345,8 +3429,12 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) .cells = HMAP_INITIALIZER (t->cells), .slabels_axis = PIVOT_AXIS_COLUMN, .slabels_visible = true, - .row_labels = CTLP_NORMAL, - .col_labels = CTLP_NORMAL, + .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map), + .label_axis = { + [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW, + [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN, + [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER, + }, .categories = categories, .n_categories = n_vars, .cilevel = 95, @@ -3477,14 +3565,17 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD) { if (lex_match_id (lexer, "AUTO")) - t->row_labels = t->col_labels = CTLP_NORMAL; + { + t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW; + t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN; + } else if (lex_match_id (lexer, "ROWLABELS")) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "OPPOSITE")) - t->row_labels = CTLP_OPPOSITE; + t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN; else if (lex_match_id (lexer, "LAYER")) - t->row_labels = CTLP_LAYER; + t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER; else { lex_error_expecting (lexer, "OPPOSITE", "LAYER"); @@ -3495,9 +3586,9 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "OPPOSITE")) - t->col_labels = CTLP_OPPOSITE; + t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW; else if (lex_match_id (lexer, "LAYER")) - t->col_labels = CTLP_LAYER; + t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER; else { lex_error_expecting (lexer, "OPPOSITE", "LAYER"); @@ -3772,7 +3863,8 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) break; } - if (t->row_labels != CTLP_NORMAL && t->col_labels != CTLP_NORMAL) + if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW + && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN) { msg (SE, _("ROWLABELS and COLLABELS may not both be specified.")); goto error; @@ -3780,10 +3872,8 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) ctables_prepare_table (t); - ctables_check_label_position (t, PIVOT_AXIS_ROW, t->row_labels, - "ROWLABELS"); - ctables_check_label_position (t, PIVOT_AXIS_COLUMN, t->col_labels, - "COLLABELS"); + ctables_check_label_position (t, PIVOT_AXIS_ROW); + ctables_check_label_position (t, PIVOT_AXIS_COLUMN); } while (lex_token (lexer) != T_ENDCMD); -- 2.30.2