From: Ben Pfaff Date: Thu, 7 Jul 2022 04:49:02 +0000 (-0700) Subject: areapct_sum X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=48ff512fdcc7ebd38013837789eaf6b2a87f5251 areapct_sum --- diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index 470179127b..930c09c633 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -234,6 +234,13 @@ struct ctables_domain double u_valid; /* Unweighted. */ double u_count; double u_total; + struct ctables_sum *sums; + }; + +struct ctables_sum + { + double e_sum; + double u_sum; }; enum ctables_summary_variant @@ -467,6 +474,8 @@ struct ctables_table size_t n_sections; enum pivot_axis_type summary_axis; struct ctables_summary_spec_set summary_specs; + struct variable **sum_vars; + size_t n_sum_vars; const struct variable *clabels_example; struct hmap clabels_values_map; @@ -777,6 +786,7 @@ struct ctables_summary_spec bool is_ctables_format; /* Is 'format' one of CTEF_*? */ size_t axis_idx; + size_t sum_var_idx; }; static void @@ -2902,6 +2912,105 @@ ctables_function_domain (enum ctables_summary_function function) NOT_REACHED (); } +static enum ctables_domain_type +ctables_function_is_pctsum (enum ctables_summary_function function) +{ + switch (function) + { + case CTSF_COUNT: + case CTSF_ECOUNT: + case CTSF_MISSING: + case CSTF_TOTALN: + case CTSF_ETOTALN: + case CTSF_VALIDN: + case CTSF_EVALIDN: + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + case CTSF_MEAN: + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_SUM: + case CTSF_VARIANCE: + case CTSF_MEDIAN: + case CTSF_PTILE: + case CTSF_MODE: + case CTSF_UCOUNT: + case CTSF_UMISSING: + case CSTF_UTOTALN: + case CTSF_UVALIDN: + case CTSF_UMEAN: + case CTSF_USEMEAN: + case CTSF_USTDDEV: + case CTSF_USUM: + case CTSF_UVARIANCE: + case CTSF_UMEDIAN: + case CTSF_UPTILE: + case CTSF_UMODE: + case CTSF_COLPCT_COUNT: + case CTSF_COLPCT_TOTALN: + case CTSF_COLPCT_VALIDN: + case CTSF_UCOLPCT_COUNT: + case CTSF_UCOLPCT_TOTALN: + case CTSF_UCOLPCT_VALIDN: + case CTSF_LAYERCOLPCT_COUNT: + case CTSF_LAYERCOLPCT_TOTALN: + case CTSF_LAYERCOLPCT_VALIDN: + case CTSF_ULAYERCOLPCT_COUNT: + case CTSF_ULAYERCOLPCT_TOTALN: + case CTSF_ULAYERCOLPCT_VALIDN: + case CTSF_LAYERPCT_COUNT: + case CTSF_LAYERPCT_TOTALN: + case CTSF_LAYERPCT_VALIDN: + case CTSF_ULAYERPCT_COUNT: + case CTSF_ULAYERPCT_TOTALN: + case CTSF_ULAYERPCT_VALIDN: + case CTSF_LAYERROWPCT_COUNT: + case CTSF_LAYERROWPCT_TOTALN: + case CTSF_LAYERROWPCT_VALIDN: + case CTSF_ULAYERROWPCT_COUNT: + case CTSF_ULAYERROWPCT_TOTALN: + case CTSF_ULAYERROWPCT_VALIDN: + case CTSF_ROWPCT_COUNT: + case CTSF_ROWPCT_TOTALN: + case CTSF_ROWPCT_VALIDN: + case CTSF_UROWPCT_COUNT: + case CTSF_UROWPCT_TOTALN: + case CTSF_UROWPCT_VALIDN: + case CTSF_SUBTABLEPCT_COUNT: + case CTSF_SUBTABLEPCT_TOTALN: + case CTSF_SUBTABLEPCT_VALIDN: + case CTSF_USUBTABLEPCT_COUNT: + case CTSF_USUBTABLEPCT_TOTALN: + case CTSF_USUBTABLEPCT_VALIDN: + case CTSF_TABLEPCT_COUNT: + case CTSF_TABLEPCT_TOTALN: + case CTSF_TABLEPCT_VALIDN: + case CTSF_UTABLEPCT_COUNT: + case CTSF_UTABLEPCT_TOTALN: + case CTSF_UTABLEPCT_VALIDN: + return false; + + case CTSF_COLPCT_SUM: + case CTSF_UCOLPCT_SUM: + case CTSF_LAYERCOLPCT_SUM: + case CTSF_ULAYERCOLPCT_SUM: + case CTSF_LAYERPCT_SUM: + case CTSF_ULAYERPCT_SUM: + case CTSF_LAYERROWPCT_SUM: + case CTSF_ULAYERROWPCT_SUM: + case CTSF_ROWPCT_SUM: + case CTSF_UROWPCT_SUM: + case CTSF_SUBTABLEPCT_SUM: + case CTSF_USUBTABLEPCT_SUM: + case CTSF_TABLEPCT_SUM: + case CTSF_UTABLEPCT_SUM: + return true; + } + + NOT_REACHED (); +} + static double ctables_summary_value (const struct ctables_cell *cell, union ctables_summary *s, @@ -3064,6 +3173,16 @@ ctables_summary_value (const struct ctables_cell *cell, case CTSF_LAYERPCT_SUM: case CTSF_LAYERROWPCT_SUM: case CTSF_LAYERCOLPCT_SUM: + { + double weight, mean; + moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); + if (weight == SYSMIS || mean == SYSMIS) + return SYSMIS; + enum ctables_domain_type d = ctables_function_domain (ss->function); + double num = weight * mean; + double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum; + return denom != 0 ? num / denom * 100 : SYSMIS; + } case CTSF_UROWPCT_SUM: case CTSF_UCOLPCT_SUM: case CTSF_UTABLEPCT_SUM: @@ -3071,7 +3190,16 @@ ctables_summary_value (const struct ctables_cell *cell, case CTSF_ULAYERPCT_SUM: case CTSF_ULAYERROWPCT_SUM: case CTSF_ULAYERCOLPCT_SUM: - NOT_REACHED (); + { + double weight, mean; + moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); + if (weight == SYSMIS || mean == SYSMIS) + return SYSMIS; + enum ctables_domain_type d = ctables_function_domain (ss->function); + double num = weight * mean; + double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum; + return denom != 0 ? num / denom * 100 : SYSMIS; + } case CTSF_MEDIAN: case CTSF_PTILE: @@ -3252,8 +3380,12 @@ ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell, not_equal: ; } + struct ctables_sum *sums = (s->table->n_sum_vars + ? xzalloc (s->table->n_sum_vars * sizeof *sums) + : NULL); + d = xmalloc (sizeof *d); - *d = (struct ctables_domain) { .example = cell }; + *d = (struct ctables_domain) { .example = cell, .sums = sums }; hmap_insert (&s->domains[domain], &d->node, hash); return d; } @@ -3543,6 +3675,19 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, d->d_valid += d_weight; d->e_valid += e_weight; d->u_count += 1.0; + + for (size_t i = 0; i < s->table->n_sum_vars; i++) + { + /* XXX listwise_missing??? */ + const struct variable *var = s->table->sum_vars[i]; + double addend = case_num (c, var); + if (!var_is_num_missing (var, addend)) + { + struct ctables_sum *sum = &d->sums[i]; + sum->e_sum += addend * e_weight; + sum->u_sum += addend; + } + } } } } @@ -4560,6 +4705,47 @@ ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a) return true; } +static size_t +add_sum_var (struct variable *var, + struct variable ***sum_vars, size_t *n, size_t *allocated) +{ + for (size_t i = 0; i < *n; i++) + if (var == (*sum_vars)[i]) + return i; + + if (*n >= *allocated) + *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars); + (*sum_vars)[*n] = var; + return (*n)++; +} + +static void +enumerate_sum_vars (const struct ctables_axis *a, + struct variable ***sum_vars, size_t *n, size_t *allocated) +{ + if (!a) + return; + + switch (a->op) + { + case CTAO_VAR: + for (size_t i = 0; i < N_CSVS; i++) + for (size_t j = 0; j < a->specs[i].n; j++) + { + struct ctables_summary_spec *spec = &a->specs[i].specs[j]; + if (ctables_function_is_pctsum (spec->function)) + spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated); + } + break; + + case CTAO_STACK: + case CTAO_NEST: + for (size_t i = 0; i < 2; i++) + enumerate_sum_vars (a->subs[i], sum_vars, n, allocated); + break; + } +} + static bool ctables_prepare_table (struct ctables_table *t) { @@ -4684,7 +4870,7 @@ ctables_prepare_table (struct ctables_table *t) } struct ctables_summary_spec_set *merged = &t->summary_specs; - struct merge_item *items = xnmalloc (2 * stack->n, sizeof *items); + struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items); size_t n_left = 0; for (size_t j = 0; j < stack->n; j++) { @@ -4740,6 +4926,10 @@ ctables_prepare_table (struct ctables_table *t) } #endif + size_t allocated_sum_vars = 0; + enumerate_sum_vars (t->axes[t->summary_axis], + &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars); + return (ctables_check_label_position (t, PIVOT_AXIS_ROW) && ctables_check_label_position (t, PIVOT_AXIS_COLUMN)); } diff --git a/tests/language/stats/ctables.at b/tests/language/stats/ctables.at index 22ea358d1e..7a52b185de 100644 --- a/tests/language/stats/ctables.at +++ b/tests/language/stats/ctables.at @@ -5,8 +5,6 @@ dnl dnl - SPLIT FILE with SEPARATE splits dnl - Definition of columns/rows when labels are rotated from one axis to another. dnl - Preprocessing to distinguish categorical from scale. -dnl - Summary functions: -dnl * areaPCT.SUM and UareaPCT.SUM functions. dnl - PCOMPUTE: dnl * multi-dimensional (multiple CCT_POSTCOMPUTE in one cell) dnl * dates @@ -48,6 +46,7 @@ dnl - PPROPERTIES: dnl * )LABEL[N]. dnl - Summary functions: dnl * U-prefix for unweighted summaries. +dnl * areaPCT.SUM and UareaPCT.SUM functions. dnl dnl Not for v1: dnl - Multiple response sets