From 0bf71a7711a7d3f86606c72a4fb037732e2a2bbf Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 1 Jan 2022 15:47:23 -0800 Subject: [PATCH] toward better summaries --- src/language/stats/ctables.c | 120 ++++++++++++++++++++++++++++------- 1 file changed, 98 insertions(+), 22 deletions(-) diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index bef413a879..22ca909d98 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -224,9 +224,12 @@ enum ctables_label_position struct var_array { - const struct ctables_axis *summary; struct variable **vars; size_t n; + + struct ctables_summary_spec *summaries; + size_t n_summaries; + struct variable *summary_var; }; struct var_array2 @@ -239,6 +242,7 @@ struct ctables_table { struct ctables_axis *axes[PIVOT_N_AXES]; struct var_array2 vaas[PIVOT_N_AXES]; + enum pivot_axis_type summary_axis; struct hmap ft; enum pivot_axis_type slabels_position; @@ -616,6 +620,21 @@ ctables_summary_default_format (enum ctables_summary_function function, } } +static char * +ctables_summary_default_label (enum ctables_summary_function function, + double percentile) +{ + static const char *default_labels[] = { +#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL, + SUMMARIES +#undef S + }; + + return (function == CTSF_PTILE + ? xasprintf (_("Percentile %.2f"), percentile) + : xstrdup (gettext (default_labels[function]))); +} + static const char * ctables_summary_function_name (enum ctables_summary_function function) { @@ -792,17 +811,8 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) label = ss_xstrdup (lex_tokss (ctx->lexer)); lex_get (ctx->lexer); } - else if (function == CTSF_PTILE) - label = xasprintf (_("Percentile %.2f"), percentile); else - { - static const char *default_labels[] = { -#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL, - SUMMARIES -#undef S - }; - label = xstrdup (gettext (default_labels[function])); - } + label = ctables_summary_default_label (function, percentile); /* Parse format. */ struct fmt_spec format; @@ -1299,11 +1309,19 @@ nest_fts (struct var_array2 va0, struct var_array2 va1) vars[n++] = b->vars[k]; assert (n == allocate); - assert (!(a->summary && b->summary)); + const struct var_array *summary_src; + if (!a->summary_var) + summary_src = b; + else if (!b->summary_var) + summary_src = a; + else + NOT_REACHED (); vaa.vas[vaa.n++] = (struct var_array) { - .summary = a->summary ? a->summary : b->summary, .vars = vars, - .n = n + .n = n, + .summaries = summary_src->summaries, + .n_summaries = summary_src->n_summaries, + .summary_var = summary_src->summary_var, }; } var_array2_uninit (&va0); @@ -1346,7 +1364,12 @@ enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a) *axes = axis_type; *va = (struct var_array) { .vars = vars, .n = 1 }; } - va->summary = a->scale || a->n_summaries ? a : NULL; + if (a->n_summaries || a->scale) + { + va->summaries = a->summaries; + va->n_summaries = a->n_summaries; + va->summary_var = a->var.var; + } return (struct var_array2) { .vas = va, .n = 1 }; case CTAO_STACK: @@ -1951,6 +1974,28 @@ ctables_execute (struct dataset *ds, struct ctables *ct) *va = (struct var_array) { .n = 0 }; t->vaas[a] = (struct var_array2) { .vas = va, .n = 1 }; } + + for (size_t i = 0; i < t->vaas[t->summary_axis].n; i++) + { + struct var_array *va = &t->vaas[t->summary_axis].vas[i]; + if (!va->n_summaries) + { + va->summaries = xmalloc (sizeof *va->summaries); + va->n_summaries = 1; + + enum ctables_summary_function function + = va->summary_var ? CTSF_MEAN : CTSF_COUNT; + struct ctables_var var = { .is_mrset = false, .var = va->summary_var }; + + *va->summaries = (struct ctables_summary_spec) { + .function = function, + .format = ctables_summary_default_format (function, &var), + .label = ctables_summary_default_label (function, 0), + }; + if (!va->summary_var) + va->summary_var = va->vars[0]; + } + } } struct casereader *input = casereader_create_filter_weight (proc_open (ds), @@ -2363,26 +2408,57 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) const struct ctables_axis *scales[PIVOT_N_AXES]; size_t n_scales = 0; - for (size_t i = 0; i < 3; i++) + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { - scales[i] = find_scale (t->axes[i]); - if (scales[i]) + scales[a] = find_scale (t->axes[a]); + if (scales[a]) n_scales++; } if (n_scales > 1) { - msg (SE, _("Scale variables may appear only on one dimension.")); + msg (SE, _("Scale variables may appear only on one axis.")); if (scales[PIVOT_AXIS_ROW]) msg_at (SN, scales[PIVOT_AXIS_ROW]->loc, - _("This scale variable appears in the rows dimension.")); + _("This scale variable appears on the rows axis.")); if (scales[PIVOT_AXIS_COLUMN]) msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc, - _("This scale variable appears in the columns dimension.")); + _("This scale variable appears on the columns axis.")); if (scales[PIVOT_AXIS_LAYER]) msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc, - _("This scale variable appears in the layer dimension.")); + _("This scale variable appears on the layer axis.")); + goto error; + } + + const struct ctables_axis *summaries[PIVOT_N_AXES]; + size_t n_summaries = 0; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + summaries[a] = (scales[a] + ? scales[a] + : find_categorical_summary_spec (t->axes[a])); + if (summaries[a]) + n_summaries++; + } + if (n_summaries > 1) + { + msg (SE, _("Summaries may appear only on one axis.")); + if (summaries[PIVOT_AXIS_ROW]) + msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc, + _("This variable on the rows axis has a summary.")); + if (summaries[PIVOT_AXIS_COLUMN]) + msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc, + _("This variable on the columns axis has a summary.")); + if (summaries[PIVOT_AXIS_LAYER]) + msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc, + _("This variable on the layers axis has a summary.")); goto error; } + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + if (n_summaries ? summaries[a] : t->axes[a]) + { + t->summary_axis = a; + break; + } if (lex_token (lexer) == T_ENDCMD) break; -- 2.30.2