From: Ben Pfaff Date: Thu, 13 Jan 2022 05:51:10 +0000 (-0800) Subject: work on configurable summaries for totals X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=778b0aa95a03dd8d69ee0ddce69eeb01ba851b3e work on configurable summaries for totals --- diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index 74afa0136d..e5c1328f03 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -181,6 +181,7 @@ struct ctables_cell struct ctables_domain *domains[N_CTDTS]; bool hide; + bool total; struct { @@ -276,6 +277,17 @@ enum ctables_label_position CTLP_LAYER, }; +struct ctables_summary_spec_set + { + struct ctables_summary_spec *summaries; + size_t n; + size_t allocated; + + struct variable *var; + }; + +static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *); + struct var_array { struct variable **vars; @@ -284,9 +296,8 @@ struct var_array size_t *domains[N_CTDTS]; size_t n_domains[N_CTDTS]; - struct ctables_summary_spec *summaries; - size_t n_summaries; - struct variable *summary_var; + struct ctables_summary_spec_set cell_summaries; + struct ctables_summary_spec_set total_summaries; }; struct var_array2 @@ -487,9 +498,8 @@ struct ctables_axis { struct ctables_var var; bool scale; - struct ctables_summary_spec *summaries; - size_t n_summaries; - size_t allocated_summaries; + struct ctables_summary_spec_set cell_summaries; + struct ctables_summary_spec_set total_summaries; }; /* Nonterminals. */ @@ -530,6 +540,14 @@ ctables_summary_spec_uninit (struct ctables_summary_spec *s) free (s->label); } +static void +ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set) +{ + for (size_t i = 0; i < set->n; i++) + ctables_summary_spec_uninit (&set->summaries[i]); + free (set->summaries); +} + static bool parse_col_width (struct lexer *lexer, const char *name, double *width) { @@ -623,9 +641,8 @@ ctables_axis_destroy (struct ctables_axis *axis) switch (axis->op) { case CTAO_VAR: - for (size_t i = 0; i < axis->n_summaries; i++) - ctables_summary_spec_uninit (&axis->summaries[i]); - free (axis->summaries); + ctables_summary_spec_set_uninit (&axis->cell_summaries); + ctables_summary_spec_set_uninit (&axis->total_summaries); break; case CTAO_STACK: @@ -716,15 +733,10 @@ static bool add_summary_spec (struct ctables_axis *axis, enum ctables_summary_function function, double percentile, const char *label, const struct fmt_spec *format, - const struct msg_location *loc) + const struct msg_location *loc, bool totals) { if (axis->op == CTAO_VAR) { - if (axis->n_summaries >= axis->allocated_summaries) - axis->summaries = x2nrealloc (axis->summaries, - &axis->allocated_summaries, - sizeof *axis->summaries); - const char *function_name = ctables_summary_function_name (function); const char *var_name = ctables_var_name (&axis->var); switch (ctables_function_availability (function)) @@ -756,7 +768,13 @@ add_summary_spec (struct ctables_axis *axis, break; } - struct ctables_summary_spec *dst = &axis->summaries[axis->n_summaries++]; + struct ctables_summary_spec_set *set = (totals ? &axis->total_summaries + : &axis->cell_summaries); + if (set->n >= set->allocated) + set->summaries = x2nrealloc (set->summaries, &set->allocated, + sizeof *set->summaries); + + struct ctables_summary_spec *dst = &set->summaries[set->n++]; *dst = (struct ctables_summary_spec) { .function = function, .percentile = percentile, @@ -770,7 +788,7 @@ add_summary_spec (struct ctables_axis *axis, { for (size_t i = 0; i < 2; i++) if (!add_summary_spec (axis->subs[i], function, percentile, label, - format, loc)) + format, loc, totals)) return false; return true; } @@ -851,7 +869,8 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) if (!sub || !lex_match (ctx->lexer, T_LBRACK)) return sub; - do + bool totals = false; + for (;;) { int start_ofs = lex_ofs (ctx->lexer); @@ -899,15 +918,28 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs, lex_ofs (ctx->lexer) - 1); - add_summary_spec (sub, function, percentile, label, formatp, loc); + add_summary_spec (sub, function, percentile, label, formatp, loc, + totals); free (label); msg_location_destroy (loc); - lex_match (ctx->lexer, T_COMMA); + if (lex_match (ctx->lexer, T_COMMA)) + { + if (!totals && lex_match_id (ctx->lexer, "TOTALS")) + { + if (!lex_force_match (ctx->lexer, T_LBRACK)) + goto error; + } + } + else if (lex_force_match (ctx->lexer, T_RBRACK)) + { + if (totals && !lex_force_match (ctx->lexer, T_RBRACK)) + goto error; + return sub; + } + else + goto error; } - while (!lex_match (ctx->lexer, T_RBRACK)); - - return sub; error: ctables_axis_destroy (sub); @@ -947,7 +979,7 @@ find_categorical_summary_spec (const struct ctables_axis *axis) if (!axis) return NULL; else if (axis->op == CTAO_VAR) - return !axis->scale && axis->n_summaries ? axis : NULL; + return !axis->scale && axis->cell_summaries.n ? axis : NULL; else { for (size_t i = 0; i < 2; i++) @@ -1446,9 +1478,9 @@ nest_fts (struct var_array2 va0, struct var_array2 va1) assert (n == allocate); const struct var_array *summary_src; - if (!a->summary_var) + if (!a->cell_summaries.var) summary_src = b; - else if (!b->summary_var) + else if (!b->cell_summaries.var) summary_src = a; else NOT_REACHED (); @@ -1458,9 +1490,8 @@ nest_fts (struct var_array2 va0, struct var_array2 va1) : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx : SIZE_MAX), .n = n, - .summaries = summary_src->summaries, - .n_summaries = summary_src->n_summaries, - .summary_var = summary_src->summary_var, + .cell_summaries = summary_src->cell_summaries, + .total_summaries = summary_src->total_summaries, }; } var_array2_uninit (&va0); @@ -1502,11 +1533,12 @@ enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a) .n = 1, .scale_idx = a->scale ? 0 : SIZE_MAX, }; - if (a->n_summaries || a->scale) + if (a->cell_summaries.n || a->scale) { - va->summaries = a->summaries; - va->n_summaries = a->n_summaries; - va->summary_var = a->var.var; + va->cell_summaries = a->cell_summaries; + va->total_summaries = a->total_summaries; + va->cell_summaries.var = a->var.var; + va->total_summaries.var = a->var.var; } return (struct var_array2) { .vas = va, .n = 1 }; @@ -2180,6 +2212,7 @@ ctables_cell_insert__ (struct ctables_table *t, const struct ccase *c, const struct var_array *ss = &t->vaas[t->summary_axis].vas[ix[t->summary_axis]]; size_t hash = 0; + bool total = false; for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct var_array *va = &t->vaas[a].vas[ix[a]]; @@ -2193,6 +2226,8 @@ ctables_cell_insert__ (struct ctables_table *t, const struct ccase *c, && cats[a][i]->type != CCT_HSUBTOTAL) hash = value_hash (case_data (c, va->vars[i]), var_get_width (va->vars[i]), hash); + else + total = true; } } @@ -2223,6 +2258,7 @@ ctables_cell_insert__ (struct ctables_table *t, const struct ccase *c, cell = xmalloc (sizeof *cell); cell->hide = false; + cell->total = total; for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct var_array *va = &t->vaas[a].vas[ix[a]]; @@ -2244,17 +2280,22 @@ ctables_cell_insert__ (struct ctables_table *t, const struct ccase *c, var_get_width (va->vars[i])); } } - cell->summaries = xmalloc (ss->n_summaries * sizeof *cell->summaries); - for (size_t i = 0; i < ss->n_summaries; i++) - ctables_summary_init (&cell->summaries[i], &ss->summaries[i]); + + { + const struct ctables_summary_spec_set *sss = &ss->cell_summaries; + cell->summaries = xmalloc (sss->n * sizeof *cell->summaries); + for (size_t i = 0; i < sss->n; i++) + ctables_summary_init (&cell->summaries[i], &sss->summaries[i]); + } for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++) cell->domains[dt] = ctables_domain_insert (t, cell, dt); hmap_insert (&t->cells, &cell->node, hash); -summarize: - for (size_t i = 0; i < ss->n_summaries; i++) - ctables_summary_add (&cell->summaries[i], &ss->summaries[i], ss->summary_var, - case_data (c, ss->summary_var), weight); +summarize: ; + const struct ctables_summary_spec_set *sss = &ss->cell_summaries; + for (size_t i = 0; i < sss->n; i++) + ctables_summary_add (&cell->summaries[i], &sss->summaries[i], sss->var, + case_data (c, sss->var), weight); for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++) cell->domains[dt]->valid += weight; } @@ -2422,22 +2463,25 @@ ctables_execute (struct dataset *ds, struct ctables *ct) for (size_t i = 0; i < t->vaas[t->summary_axis].n; i++) { struct var_array *va = &t->vaas[t->summary_axis].vas[i]; - if (!va->n_summaries) + if (!va->cell_summaries.n) { - va->summaries = xmalloc (sizeof *va->summaries); - va->n_summaries = 1; + struct ctables_summary_spec_set *css = &va->cell_summaries; + css->summaries = xmalloc (sizeof *css->summaries); + css->n = 1; enum ctables_summary_function function - = va->summary_var ? CTSF_MEAN : CTSF_COUNT; - struct ctables_var var = { .is_mrset = false, .var = va->summary_var }; + = css->var ? CTSF_MEAN : CTSF_COUNT; + struct ctables_var var = { .is_mrset = false, .var = css->var }; - *va->summaries = (struct ctables_summary_spec) { + *css->summaries = (struct ctables_summary_spec) { .function = function, .format = ctables_summary_default_format (function, &var), .label = ctables_summary_default_label (function, 0), }; - if (!va->summary_var) - va->summary_var = va->vars[0]; + if (!css->var) + css->var = va->vars[0]; + + va->total_summaries = va->cell_summaries; } } } @@ -2579,10 +2623,10 @@ ctables_execute (struct dataset *ds, struct ctables *ct) { if (label) parent = pivot_category_create_group__ (parent, label); - for (size_t m = 0; m < va->n_summaries; m++) + for (size_t m = 0; m < va->cell_summaries.n; m++) { int leaf = pivot_category_create_leaf ( - parent, pivot_value_new_text (va->summaries[m].label)); + parent, pivot_value_new_text (va->cell_summaries.summaries[m].label)); if (m == 0) prev_leaf = leaf; } @@ -2619,8 +2663,8 @@ ctables_execute (struct dataset *ds, struct ctables *ct) if (cell->hide) continue; - const struct var_array *ss = &t->vaas[t->summary_axis].vas[cell->axes[t->summary_axis].vaa_idx]; - for (size_t j = 0; j < ss->n_summaries; j++) + const struct ctables_summary_spec_set *sss = &t->vaas[t->summary_axis].vas[cell->axes[t->summary_axis].vaa_idx].cell_summaries; + for (size_t j = 0; j < sss->n; j++) { size_t dindexes[3]; size_t n_dindexes = 0; @@ -2634,9 +2678,9 @@ ctables_execute (struct dataset *ds, struct ctables *ct) dindexes[n_dindexes++] = leaf; } - double d = ctables_summary_value (cell, &cell->summaries[j], &ss->summaries[j]); + double d = ctables_summary_value (cell, &cell->summaries[j], &sss->summaries[j]); struct pivot_value *value = pivot_value_new_number (d); - value->numeric.format = ss->summaries[j].format; + value->numeric.format = sss->summaries[j].format; pivot_table_put (pt, dindexes, n_dindexes, value); } }