From 1a3b4c93b578813e8a4c78bb4ea3c0f3fcd4884f Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 30 Dec 2021 21:34:47 -0800 Subject: [PATCH] some summary functions work --- src/language/stats/ctables.c | 591 ++++++++++++++++++++++++++++++++--- 1 file changed, 550 insertions(+), 41 deletions(-) diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index 7a18ac5f13..7023c05adb 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -16,6 +16,8 @@ #include +#include + #include "data/casereader.h" #include "data/dataset.h" #include "data/dictionary.h" @@ -29,6 +31,7 @@ #include "libpspp/hmap.h" #include "libpspp/message.h" #include "libpspp/string-array.h" +#include "math/moments.h" #include "output/pivot-table.h" #include "gl/minmax.h" @@ -400,7 +403,7 @@ struct ctables_axis { struct ctables_var var; bool scale; - struct ctables_summary *summaries; + struct ctables_summary_spec *summaries; size_t n_summaries; size_t allocated_summaries; }; @@ -428,7 +431,7 @@ enum ctables_function_availability CTFA_MRSETS, /* Only multiple-response sets */ }; -struct ctables_summary +struct ctables_summary_spec { enum ctables_summary_function function; double percentile; /* CTSF_PTILE only. */ @@ -437,7 +440,7 @@ struct ctables_summary }; static void -ctables_summary_uninit (struct ctables_summary *s) +ctables_summary_spec_uninit (struct ctables_summary_spec *s) { if (s) free (s->label); @@ -537,7 +540,7 @@ ctables_axis_destroy (struct ctables_axis *axis) { case CTAO_VAR: for (size_t i = 0; i < axis->n_summaries; i++) - ctables_summary_uninit (&axis->summaries[i]); + ctables_summary_spec_uninit (&axis->summaries[i]); free (axis->summaries); break; @@ -611,10 +614,10 @@ ctables_summary_function_name (enum ctables_summary_function function) } static bool -add_summary (struct ctables_axis *axis, - enum ctables_summary_function function, double percentile, - const char *label, const struct fmt_spec *format, - const struct msg_location *loc) +add_summary_spec (struct ctables_axis *axis, + enum ctables_summary_function function, double percentile, + const char *label, const struct fmt_spec *format, + const struct msg_location *loc) { if (axis->op == CTAO_VAR) { @@ -654,8 +657,8 @@ add_summary (struct ctables_axis *axis, break; } - struct ctables_summary *dst = &axis->summaries[axis->n_summaries++]; - *dst = (struct ctables_summary) { + struct ctables_summary_spec *dst = &axis->summaries[axis->n_summaries++]; + *dst = (struct ctables_summary_spec) { .function = function, .percentile = percentile, .label = xstrdup (label), @@ -667,8 +670,8 @@ add_summary (struct ctables_axis *axis, else { for (size_t i = 0; i < 2; i++) - if (!add_summary (axis->subs[i], function, percentile, label, format, - loc)) + if (!add_summary_spec (axis->subs[i], function, percentile, label, + format, loc)) return false; return true; } @@ -806,7 +809,7 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs, lex_ofs (ctx->lexer) - 1); - add_summary (sub, function, percentile, label, formatp, loc); + add_summary_spec (sub, function, percentile, label, formatp, loc); free (label); msg_location_destroy (loc); @@ -849,7 +852,7 @@ find_scale (const struct ctables_axis *axis) } static const struct ctables_axis * -find_categorical_summary (const struct ctables_axis *axis) +find_categorical_summary_spec (const struct ctables_axis *axis) { if (!axis) return NULL; @@ -860,7 +863,7 @@ find_categorical_summary (const struct ctables_axis *axis) for (size_t i = 0; i < 2; i++) { const struct ctables_axis *sum - = find_categorical_summary (axis->subs[i]); + = find_categorical_summary_spec (axis->subs[i]); if (sum) return sum; } @@ -896,7 +899,7 @@ ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx) return NULL; } - const struct ctables_axis *outer_sum = find_categorical_summary (lhs); + const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs); if (outer_sum) { msg_at (SE, nest->loc, @@ -1240,6 +1243,7 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, struct var_array { + const struct ctables_axis *summary; struct variable **vars; size_t n; }; @@ -1280,16 +1284,24 @@ nest_fts (struct var_array2 va0, struct var_array2 va1) for (size_t i = 0; i < va0.n; i++) for (size_t j = 0; j < va1.n; j++) { - size_t allocate = va0.vas[i].n + va1.vas[j].n; + const struct var_array *a = &va0.vas[i]; + const struct var_array *b = &va1.vas[j]; + + size_t allocate = a->n + b->n; struct variable **vars = xnmalloc (allocate, sizeof *vars); size_t n = 0; - for (size_t k = 0; k < va0.vas[i].n; k++) - vars[n++] = va0.vas[i].vars[k]; - for (size_t k = 0; k < va1.vas[j].n; k++) - vars[n++] = va1.vas[j].vars[k]; + for (size_t k = 0; k < a->n; k++) + vars[n++] = a->vars[k]; + for (size_t k = 0; k < b->n; k++) + vars[n++] = b->vars[k]; assert (n == allocate); - vaa.vas[vaa.n++] = (struct var_array) { .vars = vars, n = n }; + assert (!(a->summary && b->summary)); + vaa.vas[vaa.n++] = (struct var_array) { + .summary = a->summary ? a->summary : b->summary, + .vars = vars, + .n = n + }; } var_array2_uninit (&va0); var_array2_uninit (&va1); @@ -1320,10 +1332,16 @@ enumerate_fts (const struct ctables_axis *a) { case CTAO_VAR: assert (!a->var.is_mrset); - struct variable **v = xmalloc (sizeof *v); - *v = a->var.var; struct var_array *va = xmalloc (sizeof *va); - *va = (struct var_array) { .vars = v, .n = 1 }; + if (a->scale) + *va = (struct var_array) { .n = 0 }; + else + { + struct variable **v = xmalloc (sizeof *v); + *v = a->var.var; + *va = (struct var_array) { .vars = v, .n = 1 }; + } + va->summary = a->scale || a->n_summaries ? a : NULL; return (struct var_array2) { .vas = va, .n = 1 }; case CTAO_STACK: @@ -1338,27 +1356,471 @@ enumerate_fts (const struct ctables_axis *a) NOT_REACHED (); } -struct ctables_freq +union ctables_summary { - struct hmap_node node; /* Element in hash table. */ - double count; - union value values[]; /* The value. */ + /* COUNT, VALIDN, TOTALN. */ + struct + { + double valid; + double missing; + }; + + /* MINIMUM, MAXIMUM, RANGE. */ + struct + { + double min; + double max; + }; + + /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */ + struct moments1 *moments; + + /* XXX percentiles, median, mode, multiple response */ }; -static struct ctables_freq * -ctables_freq_allocate (size_t n_values) +static void +ctables_summary_init (union ctables_summary *s, + const struct ctables_summary_spec *ss) +{ + switch (ss->function) + { + case CTSF_COUNT: + case CTSF_ECOUNT: + case CTSF_ROWPCT_COUNT: + case CTSF_COLPCT_COUNT: + case CTSF_TABLEPCT_COUNT: + case CTSF_SUBTABLEPCT_COUNT: + case CTSF_LAYERPCT_COUNT: + case CTSF_LAYERROWPCT_COUNT: + case CTSF_LAYERCOLPCT_COUNT: + case CTSF_ROWPCT_VALIDN: + case CTSF_COLPCT_VALIDN: + case CTSF_TABLEPCT_VALIDN: + case CTSF_SUBTABLEPCT_VALIDN: + case CTSF_LAYERPCT_VALIDN: + case CTSF_LAYERROWPCT_VALIDN: + case CTSF_LAYERCOLPCT_VALIDN: + case CTSF_ROWPCT_TOTALN: + case CTSF_COLPCT_TOTALN: + case CTSF_TABLEPCT_TOTALN: + case CTSF_SUBTABLEPCT_TOTALN: + case CTSF_LAYERPCT_TOTALN: + case CTSF_LAYERROWPCT_TOTALN: + case CTSF_LAYERCOLPCT_TOTALN: + case CSTF_TOTALN: + case CTSF_ETOTALN: + case CTSF_VALIDN: + case CTSF_EVALIDN: + s->missing = s->valid = 0; + break; + + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + s->min = s->max = SYSMIS; + break; + + case CTSF_MEAN: + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_SUM: + case CTSF_VARIANCE: + case CTSF_ROWPCT_SUM: + case CTSF_COLPCT_SUM: + case CTSF_TABLEPCT_SUM: + case CTSF_SUBTABLEPCT_SUM: + case CTSF_LAYERPCT_SUM: + case CTSF_LAYERROWPCT_SUM: + case CTSF_LAYERCOLPCT_SUM: + s->moments = moments1_create (MOMENT_VARIANCE); + break; + + case CTSF_MEDIAN: + case CTSF_MISSING: + case CTSF_MODE: + case CTSF_PTILE: + NOT_REACHED (); + + case CTSF_RESPONSES: + case CTSF_ROWPCT_RESPONSES: + case CTSF_COLPCT_RESPONSES: + case CTSF_TABLEPCT_RESPONSES: + case CTSF_SUBTABLEPCT_RESPONSES: + case CTSF_LAYERPCT_RESPONSES: + case CTSF_LAYERROWPCT_RESPONSES: + case CTSF_LAYERCOLPCT_RESPONSES: + case CTSF_ROWPCT_RESPONSES_COUNT: + case CTSF_COLPCT_RESPONSES_COUNT: + case CTSF_TABLEPCT_RESPONSES_COUNT: + case CTSF_SUBTABLEPCT_RESPONSES_COUNT: + case CTSF_LAYERPCT_RESPONSES_COUNT: + case CTSF_LAYERROWPCT_RESPONSES_COUNT: + case CTSF_LAYERCOLPCT_RESPONSES_COUNT: + case CTSF_ROWPCT_COUNT_RESPONSES: + case CTSF_COLPCT_COUNT_RESPONSES: + case CTSF_TABLEPCT_COUNT_RESPONSES: + case CTSF_SUBTABLEPCT_COUNT_RESPONSES: + case CTSF_LAYERPCT_COUNT_RESPONSES: + case CTSF_LAYERROWPCT_COUNT_RESPONSES: + case CTSF_LAYERCOLPCT_COUNT_RESPONSES: + NOT_REACHED (); + } +} + +static void +ctables_summary_uninit (union ctables_summary *s, + const struct ctables_summary_spec *ss) { - struct ctables_freq *f; - return xmalloc (sizeof *f + n_values * sizeof *f->values); + switch (ss->function) + { + case CTSF_COUNT: + case CTSF_ECOUNT: + case CTSF_ROWPCT_COUNT: + case CTSF_COLPCT_COUNT: + case CTSF_TABLEPCT_COUNT: + case CTSF_SUBTABLEPCT_COUNT: + case CTSF_LAYERPCT_COUNT: + case CTSF_LAYERROWPCT_COUNT: + case CTSF_LAYERCOLPCT_COUNT: + case CTSF_ROWPCT_VALIDN: + case CTSF_COLPCT_VALIDN: + case CTSF_TABLEPCT_VALIDN: + case CTSF_SUBTABLEPCT_VALIDN: + case CTSF_LAYERPCT_VALIDN: + case CTSF_LAYERROWPCT_VALIDN: + case CTSF_LAYERCOLPCT_VALIDN: + case CTSF_ROWPCT_TOTALN: + case CTSF_COLPCT_TOTALN: + case CTSF_TABLEPCT_TOTALN: + case CTSF_SUBTABLEPCT_TOTALN: + case CTSF_LAYERPCT_TOTALN: + case CTSF_LAYERROWPCT_TOTALN: + case CTSF_LAYERCOLPCT_TOTALN: + case CSTF_TOTALN: + case CTSF_ETOTALN: + case CTSF_VALIDN: + case CTSF_EVALIDN: + break; + + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + break; + + case CTSF_MEAN: + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_SUM: + case CTSF_VARIANCE: + case CTSF_ROWPCT_SUM: + case CTSF_COLPCT_SUM: + case CTSF_TABLEPCT_SUM: + case CTSF_SUBTABLEPCT_SUM: + case CTSF_LAYERPCT_SUM: + case CTSF_LAYERROWPCT_SUM: + case CTSF_LAYERCOLPCT_SUM: + moments1_destroy (s->moments); + break; + + case CTSF_MEDIAN: + case CTSF_MISSING: + case CTSF_MODE: + case CTSF_PTILE: + NOT_REACHED (); + + case CTSF_RESPONSES: + case CTSF_ROWPCT_RESPONSES: + case CTSF_COLPCT_RESPONSES: + case CTSF_TABLEPCT_RESPONSES: + case CTSF_SUBTABLEPCT_RESPONSES: + case CTSF_LAYERPCT_RESPONSES: + case CTSF_LAYERROWPCT_RESPONSES: + case CTSF_LAYERCOLPCT_RESPONSES: + case CTSF_ROWPCT_RESPONSES_COUNT: + case CTSF_COLPCT_RESPONSES_COUNT: + case CTSF_TABLEPCT_RESPONSES_COUNT: + case CTSF_SUBTABLEPCT_RESPONSES_COUNT: + case CTSF_LAYERPCT_RESPONSES_COUNT: + case CTSF_LAYERROWPCT_RESPONSES_COUNT: + case CTSF_LAYERCOLPCT_RESPONSES_COUNT: + case CTSF_ROWPCT_COUNT_RESPONSES: + case CTSF_COLPCT_COUNT_RESPONSES: + case CTSF_TABLEPCT_COUNT_RESPONSES: + case CTSF_SUBTABLEPCT_COUNT_RESPONSES: + case CTSF_LAYERPCT_COUNT_RESPONSES: + case CTSF_LAYERROWPCT_COUNT_RESPONSES: + case CTSF_LAYERCOLPCT_COUNT_RESPONSES: + NOT_REACHED (); + } } +static void +ctables_summary_add (union ctables_summary *s, + const struct ctables_summary_spec *ss, + const struct variable *var, const union value *value, + double weight) +{ + switch (ss->function) + { + case CTSF_COUNT: + case CTSF_ECOUNT: + case CTSF_ROWPCT_COUNT: + case CTSF_COLPCT_COUNT: + case CTSF_TABLEPCT_COUNT: + case CTSF_SUBTABLEPCT_COUNT: + case CTSF_LAYERPCT_COUNT: + case CTSF_LAYERROWPCT_COUNT: + case CTSF_LAYERCOLPCT_COUNT: + case CTSF_ROWPCT_VALIDN: + case CTSF_COLPCT_VALIDN: + case CTSF_TABLEPCT_VALIDN: + case CTSF_SUBTABLEPCT_VALIDN: + case CTSF_LAYERPCT_VALIDN: + case CTSF_LAYERROWPCT_VALIDN: + case CTSF_LAYERCOLPCT_VALIDN: + case CTSF_ROWPCT_TOTALN: + case CTSF_COLPCT_TOTALN: + case CTSF_TABLEPCT_TOTALN: + case CTSF_SUBTABLEPCT_TOTALN: + case CTSF_LAYERPCT_TOTALN: + case CTSF_LAYERROWPCT_TOTALN: + case CTSF_LAYERCOLPCT_TOTALN: + case CSTF_TOTALN: + case CTSF_ETOTALN: + case CTSF_VALIDN: + case CTSF_EVALIDN: + if (var_is_value_missing (var, value)) + s->missing += weight; + else + s->valid += weight; + break; + + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + if (!var_is_value_missing (var, value)) + { + assert (!var_is_alpha (var)); /* XXX? */ + if (s->min == SYSMIS || value->f < s->min) + s->min = value->f; + if (s->max == SYSMIS || value->f > s->max) + s->max = value->f; + } + break; + + case CTSF_MEAN: + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_SUM: + case CTSF_VARIANCE: + case CTSF_ROWPCT_SUM: + case CTSF_COLPCT_SUM: + case CTSF_TABLEPCT_SUM: + case CTSF_SUBTABLEPCT_SUM: + case CTSF_LAYERPCT_SUM: + case CTSF_LAYERROWPCT_SUM: + case CTSF_LAYERCOLPCT_SUM: + moments1_add (s->moments, value->f, weight); + break; + + case CTSF_MEDIAN: + case CTSF_MISSING: + case CTSF_MODE: + case CTSF_PTILE: + NOT_REACHED (); + + case CTSF_RESPONSES: + case CTSF_ROWPCT_RESPONSES: + case CTSF_COLPCT_RESPONSES: + case CTSF_TABLEPCT_RESPONSES: + case CTSF_SUBTABLEPCT_RESPONSES: + case CTSF_LAYERPCT_RESPONSES: + case CTSF_LAYERROWPCT_RESPONSES: + case CTSF_LAYERCOLPCT_RESPONSES: + case CTSF_ROWPCT_RESPONSES_COUNT: + case CTSF_COLPCT_RESPONSES_COUNT: + case CTSF_TABLEPCT_RESPONSES_COUNT: + case CTSF_SUBTABLEPCT_RESPONSES_COUNT: + case CTSF_LAYERPCT_RESPONSES_COUNT: + case CTSF_LAYERROWPCT_RESPONSES_COUNT: + case CTSF_LAYERCOLPCT_RESPONSES_COUNT: + case CTSF_ROWPCT_COUNT_RESPONSES: + case CTSF_COLPCT_COUNT_RESPONSES: + case CTSF_TABLEPCT_COUNT_RESPONSES: + case CTSF_SUBTABLEPCT_COUNT_RESPONSES: + case CTSF_LAYERPCT_COUNT_RESPONSES: + case CTSF_LAYERROWPCT_COUNT_RESPONSES: + case CTSF_LAYERCOLPCT_COUNT_RESPONSES: + NOT_REACHED (); + } +} + + +static double +ctables_summary_value (union ctables_summary *s, + const struct ctables_summary_spec *ss) +{ + switch (ss->function) + { + case CTSF_COUNT: + case CTSF_ECOUNT: + return s->valid; + + case CTSF_ROWPCT_COUNT: + case CTSF_COLPCT_COUNT: + case CTSF_TABLEPCT_COUNT: + case CTSF_SUBTABLEPCT_COUNT: + case CTSF_LAYERPCT_COUNT: + case CTSF_LAYERROWPCT_COUNT: + case CTSF_LAYERCOLPCT_COUNT: + case CTSF_ROWPCT_VALIDN: + case CTSF_COLPCT_VALIDN: + case CTSF_TABLEPCT_VALIDN: + case CTSF_SUBTABLEPCT_VALIDN: + case CTSF_LAYERPCT_VALIDN: + case CTSF_LAYERROWPCT_VALIDN: + case CTSF_LAYERCOLPCT_VALIDN: + case CTSF_ROWPCT_TOTALN: + case CTSF_COLPCT_TOTALN: + case CTSF_TABLEPCT_TOTALN: + case CTSF_SUBTABLEPCT_TOTALN: + case CTSF_LAYERPCT_TOTALN: + case CTSF_LAYERROWPCT_TOTALN: + case CTSF_LAYERCOLPCT_TOTALN: + NOT_REACHED (); + + case CSTF_TOTALN: + case CTSF_ETOTALN: + return s->valid + s->missing; + + case CTSF_VALIDN: + case CTSF_EVALIDN: + return s->valid; + + case CTSF_MAXIMUM: + return s->max; + + case CTSF_MINIMUM: + return s->min; + + case CTSF_RANGE: + return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS; + + case CTSF_MEAN: + { + double mean; + moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL); + return mean; + } + + case CTSF_SEMEAN: + { + double weight, variance; + moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL); + return calc_semean (variance, weight); + } + + case CTSF_STDDEV: + { + double variance; + moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); + return variance != SYSMIS ? sqrt (variance) : SYSMIS; + } + + case CTSF_SUM: + { + double weight, mean; + moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); + return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS; + } + + case CTSF_VARIANCE: + { + double variance; + moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); + return variance; + } + + case CTSF_ROWPCT_SUM: + case CTSF_COLPCT_SUM: + case CTSF_TABLEPCT_SUM: + case CTSF_SUBTABLEPCT_SUM: + case CTSF_LAYERPCT_SUM: + case CTSF_LAYERROWPCT_SUM: + case CTSF_LAYERCOLPCT_SUM: + NOT_REACHED (); + + case CTSF_MEDIAN: + case CTSF_MISSING: + case CTSF_MODE: + case CTSF_PTILE: + NOT_REACHED (); + + case CTSF_RESPONSES: + case CTSF_ROWPCT_RESPONSES: + case CTSF_COLPCT_RESPONSES: + case CTSF_TABLEPCT_RESPONSES: + case CTSF_SUBTABLEPCT_RESPONSES: + case CTSF_LAYERPCT_RESPONSES: + case CTSF_LAYERROWPCT_RESPONSES: + case CTSF_LAYERCOLPCT_RESPONSES: + case CTSF_ROWPCT_RESPONSES_COUNT: + case CTSF_COLPCT_RESPONSES_COUNT: + case CTSF_TABLEPCT_RESPONSES_COUNT: + case CTSF_SUBTABLEPCT_RESPONSES_COUNT: + case CTSF_LAYERPCT_RESPONSES_COUNT: + case CTSF_LAYERROWPCT_RESPONSES_COUNT: + case CTSF_LAYERCOLPCT_RESPONSES_COUNT: + case CTSF_ROWPCT_COUNT_RESPONSES: + case CTSF_COLPCT_COUNT_RESPONSES: + case CTSF_TABLEPCT_COUNT_RESPONSES: + case CTSF_SUBTABLEPCT_COUNT_RESPONSES: + case CTSF_LAYERPCT_COUNT_RESPONSES: + case CTSF_LAYERROWPCT_COUNT_RESPONSES: + case CTSF_LAYERCOLPCT_COUNT_RESPONSES: + NOT_REACHED (); + } + + NOT_REACHED (); +} + +struct ctables_freq + { + struct hmap_node node; /* Element in hash table. */ + union ctables_summary *summaries; + union value values[]; /* The value. */ + }; + struct ctables_freqtab { struct var_array vars; struct hmap data; /* Contains "struct ctables_freq"s. */ + const struct ctables_summary_spec *summaries; + size_t n_summaries; + const struct variable *summary_var; struct ctables_freq **sorted; }; +static struct ctables_freq * +ctables_freq_create (struct ctables_freqtab *ft) +{ + struct ctables_freq *f = xmalloc (sizeof *f + ft->vars.n * sizeof *f->values); + f->summaries = xmalloc (ft->n_summaries * sizeof *f->summaries); + for (size_t i = 0; i < ft->n_summaries; i++) + ctables_summary_init (&f->summaries[i], &ft->summaries[i]); + return f; +} + +static void +ctables_freq_add (struct ctables_freqtab *ft, struct ctables_freq *f, + const struct variable *var, const union value *value, + double weight) +{ + for (size_t i = 0; i < ft->n_summaries; i++) + ctables_summary_add (&f->summaries[i], &ft->summaries[i], + var, value, weight); +} + static int ctables_freq_compare_3way (const void *a_, const void *b_, const void *vars_) { @@ -1400,9 +1862,45 @@ ctables_execute (struct dataset *ds, struct ctables *ct) for (size_t j = 0; j < vaa.n; j++) { + const struct var_array *va = &vaa.vas[j]; + const struct ctables_summary_spec *summaries; + size_t n_summaries; + const struct variable *summary_var; + if (!va->summary) + { + static const struct ctables_summary_spec count = { + .function = CTSF_COUNT, + .label = (char *) N_("Count"), + .format = { .type = FMT_F, .w = 40 }, + }; + summaries = &count; + n_summaries = 1; + summary_var = va->vars[0]; + } + else if (va->summary->n_summaries) + { + summaries = va->summary->summaries; + n_summaries = va->summary->n_summaries; + summary_var = va->summary->var.var; + } + else + { + static const struct ctables_summary_spec mean = { + .function = CTSF_MEAN, + .label = (char *) N_("Mean"), + .format = { .type = FMT_F, .w = 40, .d = 2}, /* XXX */ + }; + summaries = &mean; + n_summaries = 1; + summary_var = va->summary->var.var; + }; + struct ctables_freqtab *ft = xmalloc (sizeof *ft); *ft = (struct ctables_freqtab) { - .vars = vaa.vas[j], + .vars = *va, + .summaries = summaries, + .n_summaries = n_summaries, + .summary_var = summary_var, .data = HMAP_INITIALIZER (ft->data), }; @@ -1464,15 +1962,12 @@ ctables_execute (struct dataset *ds, struct ctables *ct) var_get_width (var))) goto next_hash_node; } - - f->count += weight; - goto next_ft; + goto found; next_hash_node: ; } - f = ctables_freq_allocate (ft->vars.n); - f->count = weight; + f = ctables_freq_create (ft); for (size_t k = 0; k < ft->vars.n; k++) { const struct variable *var = ft->vars.vars[k]; @@ -1481,6 +1976,10 @@ ctables_execute (struct dataset *ds, struct ctables *ct) } hmap_insert (&ft->data, &f->node, hash); + found: + ctables_freq_add (ft, f, ft->summary_var, + case_data (c, ft->summary_var), weight); + next_ft: ; } } @@ -1540,11 +2039,18 @@ ctables_execute (struct dataset *ds, struct ctables *ct) if (m == ft->vars.n - 1) { - int leaf = pivot_category_create_leaf ( + struct pivot_category *c = pivot_category_create_group__ ( parent, pivot_value_new_var_value (ft->vars.vars[ft->vars.n - 1], &f->values[ft->vars.n - 1])); - pivot_table_put1 (pt, leaf, pivot_value_new_number (f->count)); + for (size_t p = 0; p < ft->n_summaries; p++) + { + double value = ctables_summary_value ( + &f->summaries[p], &ft->summaries[p]); + int leaf = pivot_category_create_leaf ( + c, pivot_value_new_text (ft->summaries[p].label)); + pivot_table_put1 (pt, leaf, pivot_value_new_number (value)); + } } } } @@ -1564,6 +2070,9 @@ ctables_execute (struct dataset *ds, struct ctables *ct) HMAP_FOR_EACH_SAFE (f, next, struct ctables_freq, node, &ft->data) { hmap_delete (&ft->data, &f->node); + for (size_t k = 0; k < ft->n_summaries; k++) + ctables_summary_uninit (&f->summaries[k], &ft->summaries[k]); + free (f->summaries); for (size_t k = 0; k < ft->vars.n; k++) { const struct variable *var = ft->vars.vars[k]; -- 2.30.2