From: Ben Pfaff Date: Thu, 25 Aug 2022 20:18:58 +0000 (-0700) Subject: simplify weighting X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=c4bc3574d974d3aaf4d291097c995a31515a308a simplify weighting --- diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index b5152ef636..7e899d1b66 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -63,18 +63,32 @@ enum ctables_vlabel CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH, }; +enum ctables_weighting + { + CTW_EFFECTIVE, + CTW_DICTIONARY, + CTW_UNWEIGHTED +#define N_CTWS 3 + }; + enum ctables_function_type { - /* A function that operates on data in a single cell. The function does - not have an unweighted version. */ + /* A function that operates on data in a single cell. It operates on + effective weights. It does not have an unweighted version. */ CTFT_CELL, - /* A function that operates on data in a single cell. The function has an - unweighted version. */ + /* A function that operates on data in a single cell. The function + operates on effective weights and has a U-prefixed unweighted + version. */ CTFT_UCELL, - /* A function that operates on an area of cells. The function has an - unweighted version. */ + /* A function that operates on data in a single cell. It operates on + dictionary weights, and has U-prefixed unweighted version and an + E-prefixed effective weight version. */ + CTFT_UECELL, + + /* A function that operates on an area of cells. It operates on effective + weights and has a U-prefixed unweighted version. */ CTFT_AREA, }; @@ -113,8 +127,9 @@ struct ctables_function_info enum ctables_format format; enum ctables_function_availability availability; - bool may_be_unweighted; - bool is_area; + bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */ + bool e_prefix; /* Accepts an 'E' prefix (for effective)? */ + bool is_area; /* Needs an area prefix. */ }; static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = { #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \ @@ -123,15 +138,14 @@ static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS .type = TYPE, \ .format = FORMAT, \ .availability = AVAILABILITY, \ - .may_be_unweighted = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_AREA, \ + .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \ + .e_prefix = (TYPE) == CTFT_UECELL, \ .is_area = (TYPE) == CTFT_AREA \ }, #include "ctables.inc" #undef S }; -static bool ctables_summary_function_is_count (enum ctables_summary_function); - enum ctables_area_type { /* Within a section, where stacked variables divide one section from @@ -169,22 +183,15 @@ struct ctables_area const struct ctables_cell *example; size_t sequence; - double d_valid; /* Dictionary weight. */ - double d_count; - double d_total; - double e_valid; /* Effective weight */ - double e_count; - double e_total; - double u_valid; /* Unweighted. */ - double u_count; - double u_total; + double count[N_CTWS]; + double valid[N_CTWS]; + double total[N_CTWS]; struct ctables_sum *sums; }; struct ctables_sum { - double e_sum; - double u_sum; + double sum[N_CTWS]; }; enum ctables_summary_variant @@ -543,7 +550,7 @@ struct ctables_category /* CCT_FUNCTION. */ enum ctables_summary_function sort_function; - bool weighted; + enum ctables_weighting weighting; enum ctables_area_type area; struct variable *sort_var; double percentile; @@ -746,7 +753,7 @@ struct ctables_summary_spec cell, it must be 0). For CTSF_PTILE only, 'percentile' is the percentile between 0 and 100 (for other functions it must be 0). */ enum ctables_summary_function function; - bool weighted; + enum ctables_weighting weighting; enum ctables_area_type calc_area; double percentile; /* CTSF_PTILE only. */ @@ -856,23 +863,18 @@ ctables_function_availability (enum ctables_summary_function f) return availability[f]; } -static bool -ctables_summary_function_is_count (enum ctables_summary_function f) -{ - return f == CTSF_COUNT || f == CTSF_ECOUNT; -} - static bool parse_ctables_summary_function (struct lexer *lexer, enum ctables_summary_function *function, - bool *weighted, + enum ctables_weighting *weighting, enum ctables_area_type *area) { if (!lex_force_id (lexer)) return false; struct substring name = lex_tokss (lexer); - *weighted = !(ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u')); + bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u'); + bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e')); bool has_area = false; *area = 0; @@ -886,6 +888,7 @@ parse_ctables_summary_function (struct lexer *lexer, { /* Special case where .COUNT suffix is omitted. */ *function = CTSF_areaPCT_COUNT; + *weighting = CTW_EFFECTIVE; lex_get (lexer); return true; } @@ -898,11 +901,13 @@ parse_ctables_summary_function (struct lexer *lexer, if (ss_equals_case (cfi->basename, name)) { *function = f; - if (!*weighted && !cfi->may_be_unweighted) - break; - if (has_area != cfi->is_area) + if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area)) break; + *weighting = (e ? CTW_EFFECTIVE + : u ? CTW_UNWEIGHTED + : cfi->e_prefix ? CTW_DICTIONARY + : CTW_EFFECTIVE); lex_get (lexer); return true; } @@ -986,15 +991,15 @@ ctables_summary_default_format (enum ctables_summary_function function, static const char * ctables_summary_label__ (const struct ctables_summary_spec *spec) { - bool w = spec->weighted; + bool w = spec->weighting != CTW_UNWEIGHTED; + bool d = spec->weighting == CTW_DICTIONARY; enum ctables_area_type a = spec->user_area; switch (spec->function) { case CTSF_COUNT: - return w ? N_("Count") : N_("Unweighted Count"); - - case CTSF_ECOUNT: - return N_("Adjusted Count"); + return (d ? N_("Count") + : w ? N_("Adjusted Count") + : N_("Unweighted Count")); case CTSF_areaPCT_COUNT: switch (a) @@ -1037,20 +1042,22 @@ ctables_summary_label__ (const struct ctables_summary_spec *spec) case CTSF_MAXIMUM: return N_("Maximum"); case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean"); - case CTSF_MEDIAN: return N_("Median"); + case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median"); case CTSF_MINIMUM: return N_("Minimum"); - case CTSF_MISSING: return N_("Missing"); - case CTSF_MODE: return N_("Mode"); + case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing"); + case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode"); case CTSF_PTILE: NOT_REACHED (); case CTSF_RANGE: return N_("Range"); - case CTSF_SEMEAN: return N_("Std Error of Mean"); - case CTSF_STDDEV: return N_("Std Deviation"); - case CTSF_SUM: return N_("Sum"); - case CTSF_TOTALN: return N_("Total N"); - case CTSF_ETOTALN: return N_("Adjusted Total N"); - case CTSF_VALIDN: return N_("Valid N"); - case CTSF_EVALIDN: return N_("Adjusted Valid N"); - case CTSF_VARIANCE: return N_("Variance"); + case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean"); + case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation"); + case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum"); + case CTSF_TOTALN: return (d ? N_("Total N") + : w ? N_("Adjusted Total N") + : N_("Unweighted Total N")); + case CTSF_VALIDN: return (d ? N_("Valid N") + : w ? N_("Adjusted Valid N") + : N_("Unweighted Valid N")); + case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance"); case CTSF_areaPCT_SUM: switch (a) { @@ -1090,7 +1097,7 @@ ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel) if (spec->function == CTSF_PTILE) { double p = spec->percentile; - char *s = (spec->weighted + char *s = (spec->weighting != CTW_UNWEIGHTED ? xasprintf (_("Percentile %.2f"), p) : xasprintf (_("Unweighted Percentile %.2f"), p)); return pivot_value_new_user_text_nocopy (s); @@ -1120,13 +1127,16 @@ ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel) static const char * ctables_summary_function_name (enum ctables_summary_function function, - bool weighted, + enum ctables_weighting weighting, enum ctables_area_type area, char *buffer, size_t bufsize) { const struct ctables_function_info *cfi = &ctables_function_info[function]; snprintf (buffer, bufsize, "%s%s%s", - weighted ? "" : "U", + (weighting == CTW_UNWEIGHTED ? "U" + : weighting == CTW_DICTIONARY ? "" + : cfi->e_prefix ? "E" + : ""), cfi->is_area ? ctables_area_type_name[area] : "", cfi->basename.string); return buffer; @@ -1134,7 +1144,8 @@ ctables_summary_function_name (enum ctables_summary_function function, static bool add_summary_spec (struct ctables_axis *axis, - enum ctables_summary_function function, bool weighted, + enum ctables_summary_function function, + enum ctables_weighting weighting, enum ctables_area_type area, double percentile, const char *label, const struct fmt_spec *format, bool is_ctables_format, const struct msg_location *loc, @@ -1143,7 +1154,7 @@ add_summary_spec (struct ctables_axis *axis, if (axis->op == CTAO_VAR) { char function_name[128]; - ctables_summary_function_name (function, weighted, area, + ctables_summary_function_name (function, weighting, area, function_name, sizeof function_name); const char *var_name = var_get_name (axis->var); switch (ctables_function_availability (function)) @@ -1181,7 +1192,7 @@ add_summary_spec (struct ctables_axis *axis, struct ctables_summary_spec *dst = &set->specs[set->n++]; *dst = (struct ctables_summary_spec) { .function = function, - .weighted = weighted, + .weighting = weighting, .calc_area = area, .user_area = area, .percentile = percentile, @@ -1195,7 +1206,7 @@ add_summary_spec (struct ctables_axis *axis, else { for (size_t i = 0; i < 2; i++) - if (!add_summary_spec (axis->subs[i], function, weighted, area, + if (!add_summary_spec (axis->subs[i], function, weighting, area, percentile, label, format, is_ctables_format, loc, sv)) return false; @@ -1313,9 +1324,9 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) /* Parse function. */ enum ctables_summary_function function; - bool weighted; + enum ctables_weighting weighting; enum ctables_area_type area; - if (!parse_ctables_summary_function (ctx->lexer, &function, &weighted, + if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting, &area)) goto error; @@ -1357,7 +1368,7 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs, lex_ofs (ctx->lexer) - 1); - add_summary_spec (sub, function, weighted, area, percentile, label, + add_summary_spec (sub, function, weighting, area, percentile, label, formatp, is_ctables_format, loc, sv); free (label); msg_location_destroy (loc); @@ -2107,7 +2118,7 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, { cat.type = CCT_FUNCTION; if (!parse_ctables_summary_function (lexer, &cat.sort_function, - &cat.weighted, &cat.area)) + &cat.weighting, &cat.area)) goto error; if (lex_match (lexer, T_LPAREN)) @@ -2535,15 +2546,12 @@ ctables_summary_init (union ctables_summary *s, switch (ss->function) { case CTSF_COUNT: - case CTSF_ECOUNT: case CTSF_areaPCT_COUNT: case CTSF_areaPCT_VALIDN: case CTSF_areaPCT_TOTALN: case CTSF_MISSING: case CTSF_TOTALN: - case CTSF_ETOTALN: case CTSF_VALIDN: - case CTSF_EVALIDN: s->count = 0; break; @@ -2596,15 +2604,12 @@ ctables_summary_uninit (union ctables_summary *s, switch (ss->function) { case CTSF_COUNT: - case CTSF_ECOUNT: case CTSF_areaPCT_COUNT: case CTSF_areaPCT_VALIDN: case CTSF_areaPCT_TOTALN: case CTSF_MISSING: case CTSF_TOTALN: - case CTSF_ETOTALN: case CTSF_VALIDN: - case CTSF_EVALIDN: break; case CTSF_areaID: @@ -2638,7 +2643,7 @@ ctables_summary_add (union ctables_summary *s, const struct variable *var, const union value *value, bool is_scale, bool is_scale_missing, bool is_missing, bool is_included, - double d_weight, double e_weight) + double weight) { /* To determine whether a case is included in a given table for a particular kind of summary, consider the following charts for each variable in the @@ -2661,35 +2666,35 @@ ctables_summary_add (union ctables_summary *s, switch (ss->function) { case CTSF_TOTALN: - s->count += ss->weighted ? d_weight : 1.0; + s->count += weight; break; case CTSF_areaPCT_TOTALN: - s->count += ss->weighted ? e_weight : 1.0; + s->count += weight; break; case CTSF_COUNT: if (is_scale || is_included) - s->count += ss->weighted ? d_weight : 1.0; + s->count += weight; break; case CTSF_areaPCT_COUNT: if (is_scale || is_included) - s->count += ss->weighted ? e_weight : 1.0; + s->count += weight; break; case CTSF_VALIDN: if (is_scale ? !is_scale_missing : !is_missing) - s->count += ss->weighted ? d_weight : 1.0; + s->count += weight; break; case CTSF_areaPCT_VALIDN: if (is_scale ? !is_scale_missing : !is_missing) - s->count += ss->weighted ? e_weight : 1.0; + s->count += weight; break; case CTSF_areaID: @@ -2699,23 +2704,7 @@ ctables_summary_add (union ctables_summary *s, if (is_scale ? is_scale_missing : is_missing) - s->count += ss->weighted ? e_weight : 1.0; - break; - - case CTSF_ECOUNT: - if (is_scale || is_included) - s->count += e_weight; - break; - - case CTSF_EVALIDN: - if (is_scale - ? !is_scale_missing - : !is_missing) - s->count += e_weight; - break; - - case CTSF_ETOTALN: - s->count += e_weight; + s->count += weight; break; case CTSF_MAXIMUM: @@ -2737,12 +2726,12 @@ ctables_summary_add (union ctables_summary *s, case CTSF_SUM: case CTSF_VARIANCE: if (!is_scale_missing) - moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0); + moments1_add (s->moments, value->f, weight); break; case CTSF_areaPCT_SUM: if (!is_missing && !is_scale_missing) - moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0); + moments1_add (s->moments, value->f, weight); break; case CTSF_MEDIAN: @@ -2750,12 +2739,11 @@ ctables_summary_add (union ctables_summary *s, case CTSF_PTILE: if (!is_scale_missing) { - double w = ss->weighted ? e_weight : 1.0; - s->ovalid += w; + s->ovalid += weight; struct ccase *c = case_create (casewriter_get_proto (s->writer)); *case_num_rw_idx (c, 0) = value->f; - *case_num_rw_idx (c, 1) = w; + *case_num_rw_idx (c, 1) = weight; casewriter_write (s->writer, c); } break; @@ -2770,7 +2758,6 @@ ctables_summary_value (const struct ctables_cell *cell, switch (ss->function) { case CTSF_COUNT: - case CTSF_ECOUNT: return s->count; case CTSF_areaID: @@ -2779,29 +2766,27 @@ ctables_summary_value (const struct ctables_cell *cell, case CTSF_areaPCT_COUNT: { const struct ctables_area *a = cell->areas[ss->calc_area]; - double a_count = ss->weighted ? a->e_count : a->u_count; + double a_count = a->count[ss->weighting]; return a_count ? s->count / a_count * 100 : SYSMIS; } case CTSF_areaPCT_VALIDN: { const struct ctables_area *a = cell->areas[ss->calc_area]; - double a_valid = ss->weighted ? a->e_valid : a->u_valid; + double a_valid = a->valid[ss->weighting]; return a_valid ? s->count / a_valid * 100 : SYSMIS; } case CTSF_areaPCT_TOTALN: { const struct ctables_area *a = cell->areas[ss->calc_area]; - double a_total = ss->weighted ? a->e_total : a->u_total; + double a_total = a->total[ss->weighting]; return a_total ? s->count / a_total * 100 : SYSMIS; } case CTSF_MISSING: case CTSF_TOTALN: - case CTSF_ETOTALN: case CTSF_VALIDN: - case CTSF_EVALIDN: return s->count; case CTSF_MAXIMUM: @@ -2857,7 +2842,7 @@ ctables_summary_value (const struct ctables_cell *cell, const struct ctables_area *a = cell->areas[ss->calc_area]; const struct ctables_sum *sum = &a->sums[ss->sum_var_idx]; - double denom = ss->weighted ? sum->e_sum : sum->u_sum; + double denom = sum->sum[ss->weighting]; return denom != 0 ? weight * mean / denom * 100 : SYSMIS; } @@ -3319,10 +3304,17 @@ is_listwise_missing (const struct ctables_summary_spec_set *specs, return false; } +static void +add_weight (double dst[N_CTWS], const double src[N_CTWS]) +{ + for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) + dst[wt] += src[wt]; +} + static void ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, const struct ctables_category *cats[PIVOT_N_AXES][10], - bool is_included, double d_weight, double e_weight) + bool is_included, double weight[N_CTWS]) { struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats); const struct ctables_nest *ss = s->nests[s->table->summary_axis]; @@ -3334,27 +3326,20 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, for (size_t i = 0; i < specs->n; i++) ctables_summary_add (&cell->summaries[i], &specs->specs[i], - specs->var, value, specs->is_scale, - scale_missing, is_missing, is_included, - d_weight, e_weight); + specs->var, value, specs->is_scale, + scale_missing, is_missing, is_included, + weight[specs->specs[i].weighting]); for (enum ctables_area_type at = 0; at < N_CTATS; at++) if (!(cell->omit_areas && (1u << at))) { struct ctables_area *a = cell->areas[at]; - a->d_total += d_weight; - a->e_total += e_weight; - a->u_total += 1.0; + + add_weight (a->total, weight); if (is_included) - { - a->d_count += d_weight; - a->e_count += e_weight; - a->u_count += 1.0; - } + add_weight (a->count, weight); if (!is_missing) { - a->d_valid += d_weight; - a->e_valid += e_weight; - a->u_count += 1.0; + add_weight (a->valid, weight); for (size_t i = 0; i < s->table->n_sum_vars; i++) { @@ -3364,8 +3349,8 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, if (!var_is_num_missing (var, addend)) { struct ctables_sum *sum = &a->sums[i]; - sum->e_sum += addend * e_weight; - sum->u_sum += addend; + for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) + sum->sum[wt] += addend * weight[wt]; } } } @@ -3375,7 +3360,7 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, static void recurse_totals (struct ctables_section *s, const struct ccase *c, const struct ctables_category *cats[PIVOT_N_AXES][10], - bool is_included, double d_weight, double e_weight, + bool is_included, double weight[N_CTWS], enum pivot_axis_type start_axis, size_t start_nest) { for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) @@ -3394,8 +3379,8 @@ recurse_totals (struct ctables_section *s, const struct ccase *c, { const struct ctables_category *save = cats[a][i]; cats[a][i] = total; - ctables_cell_add__ (s, c, cats, is_included, d_weight, e_weight); - recurse_totals (s, c, cats, is_included, d_weight, e_weight, a, i + 1); + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_totals (s, c, cats, is_included, weight, a, i + 1); cats[a][i] = save; } } @@ -3406,7 +3391,7 @@ recurse_totals (struct ctables_section *s, const struct ccase *c, static void recurse_subtotals (struct ctables_section *s, const struct ccase *c, const struct ctables_category *cats[PIVOT_N_AXES][10], - bool is_included, double d_weight, double e_weight, + bool is_included, double weight[N_CTWS], enum pivot_axis_type start_axis, size_t start_nest) { for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) @@ -3421,8 +3406,8 @@ recurse_subtotals (struct ctables_section *s, const struct ccase *c, if (save->subtotal) { cats[a][i] = save->subtotal; - ctables_cell_add__ (s, c, cats, is_included, d_weight, e_weight); - recurse_subtotals (s, c, cats, is_included, d_weight, e_weight, a, i + 1); + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_subtotals (s, c, cats, is_included, weight, a, i + 1); cats[a][i] = save; } } @@ -3450,9 +3435,8 @@ ctables_add_occurrence (const struct variable *var, } static void -ctables_cell_insert (struct ctables_section *s, - const struct ccase *c, - double d_weight, double e_weight) +ctables_cell_insert (struct ctables_section *s, const struct ccase *c, + double weight[N_CTWS]) { const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */ @@ -3500,9 +3484,9 @@ ctables_cell_insert (struct ctables_section *s, } } - ctables_cell_add__ (s, c, cats, is_included, d_weight, e_weight); - recurse_totals (s, c, cats, is_included, d_weight, e_weight, 0, 0); - recurse_subtotals (s, c, cats, is_included, d_weight, e_weight, 0, 0); + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_totals (s, c, cats, is_included, weight, 0, 0); + recurse_subtotals (s, c, cats, is_included, weight, 0, 0); } struct merge_item @@ -3518,8 +3502,8 @@ merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b) const struct ctables_summary_spec *bs = &b->set->specs[b->ofs]; if (as->function != bs->function) return as->function > bs->function ? 1 : -1; - else if (as->weighted != bs->weighted) - return as->weighted > bs->weighted ? 1 : -1; + else if (as->weighting != bs->weighting) + return as->weighting > bs->weighting ? 1 : -1; else if (as->calc_area != bs->calc_area) return as->calc_area > bs->calc_area ? 1 : -1; else if (as->percentile != bs->percentile) @@ -4016,7 +4000,7 @@ ctables_cell_calculate_postcompute (const struct ctables_section *s, { const struct ctables_summary_spec *ss2 = &pc->specs->specs[i]; if (ss->function == ss2->function - && ss->weighted == ss2->weighted + && ss->weighting == ss2->weighting && ss->calc_area == ss2->calc_area && ss->percentile == ss2->percentile) { @@ -4464,7 +4448,7 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t) struct pivot_value *value; if (ct->hide_threshold != 0 && d < ct->hide_threshold - && ctables_summary_function_is_count (ss->function)) + && ss->function == CTSF_COUNT) { value = pivot_value_new_user_text_nocopy ( xasprintf ("<%d", ct->hide_threshold)); @@ -4786,7 +4770,7 @@ ctables_prepare_table (struct ctables_table *t) } *ss->specs = (struct ctables_summary_spec) { .function = function, - .weighted = true, + .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY, .format = ctables_summary_default_format (function, ss->var), }; @@ -5269,13 +5253,18 @@ ctables_execute (struct dataset *ds, struct casereader *input, case_num (c, ct->e_weight), &warn_on_invalid) : d_weight); + double weight[] = { + [CTW_DICTIONARY] = d_weight, + [CTW_EFFECTIVE] = e_weight, + [CTW_UNWEIGHTED] = 1.0, + }; for (size_t i = 0; i < ct->n_tables; i++) { struct ctables_table *t = ct->tables[i]; for (size_t j = 0; j < t->n_sections; j++) - ctables_cell_insert (&t->sections[j], c, d_weight, e_weight); + ctables_cell_insert (&t->sections[j], c, weight); for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) if (t->label_axis[a] != a) @@ -5773,9 +5762,9 @@ ctables_parse_pproperties_format (struct lexer *lexer, { /* Parse function. */ enum ctables_summary_function function; - bool weighted; + enum ctables_weighting weighting; enum ctables_area_type area; - if (!parse_ctables_summary_function (lexer, &function, &weighted, &area)) + if (!parse_ctables_summary_function (lexer, &function, &weighting, &area)) goto error; /* Parse percentile. */ @@ -5799,7 +5788,7 @@ ctables_parse_pproperties_format (struct lexer *lexer, sizeof *sss->specs); sss->specs[sss->n++] = (struct ctables_summary_spec) { .function = function, - .weighted = weighted, + .weighting = weighting, .calc_area = area, .user_area = area, .percentile = percentile, diff --git a/src/language/stats/ctables.inc b/src/language/stats/ctables.inc index 1a418fe03d..27160997e0 100644 --- a/src/language/stats/ctables.inc +++ b/src/language/stats/ctables.inc @@ -1,30 +1,27 @@ /* -*- c -*- */ /* Summary functions for all variables. */ -S(CTSF_COUNT, "COUNT", CTFT_UCELL, CTF_COUNT, CTFA_ALL) -S(CTSF_ECOUNT, "ECOUNT", CTFT_CELL, CTF_COUNT, CTFA_ALL) -S(CTSF_areaPCT_COUNT, "PCT.COUNT", CTFT_AREA, CTF_PERCENT, CTFA_ALL) -S(CTSF_areaPCT_VALIDN, "PCT.VALIDN", CTFT_AREA, CTF_PERCENT, CTFA_ALL) -S(CTSF_areaPCT_TOTALN, "PCT.TOTALN", CTFT_AREA, CTF_PERCENT, CTFA_ALL) +S(CTSF_COUNT, "COUNT", CTFT_UECELL, CTF_COUNT, CTFA_ALL) +S(CTSF_areaPCT_COUNT, "PCT.COUNT", CTFT_AREA, CTF_PERCENT, CTFA_ALL) +S(CTSF_areaPCT_VALIDN, "PCT.VALIDN", CTFT_AREA, CTF_PERCENT, CTFA_ALL) +S(CTSF_areaPCT_TOTALN, "PCT.TOTALN", CTFT_AREA, CTF_PERCENT, CTFA_ALL) /* Scale variables, totals, and subtotals. */ -S(CTSF_MAXIMUM, "MAXIMUM", CTFT_CELL, CTF_GENERAL, CTFA_SCALE) -S(CTSF_MEAN, "MEAN", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) -S(CTSF_MEDIAN, "MEDIAN", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) -S(CTSF_MINIMUM, "MINIMUM", CTFT_CELL, CTF_GENERAL, CTFA_SCALE) -S(CTSF_MISSING, "MISSING", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) -S(CTSF_MODE, "MODE", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) -S(CTSF_PTILE, "PTILE", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) -S(CTSF_RANGE, "RANGE", CTFT_CELL, CTF_GENERAL, CTFA_SCALE) -S(CTSF_SEMEAN, "SEMEAN", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) -S(CTSF_STDDEV, "STDDEV", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) -S(CTSF_SUM, "SUM", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) -S(CTSF_TOTALN, "TOTALN", CTFT_UCELL, CTF_COUNT, CTFA_SCALE) -S(CTSF_ETOTALN, "ETOTALN", CTFT_CELL, CTF_COUNT, CTFA_SCALE) -S(CTSF_VALIDN, "VALIDN", CTFT_UCELL, CTF_COUNT, CTFA_SCALE) -S(CTSF_EVALIDN, "EVALIDN", CTFT_CELL, CTF_COUNT, CTFA_SCALE) -S(CTSF_VARIANCE, "VARIANCE", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) -S(CTSF_areaPCT_SUM, "PCT.SUM", CTFT_AREA, CTF_PERCENT, CTFA_SCALE) +S(CTSF_MAXIMUM, "MAXIMUM", CTFT_CELL, CTF_GENERAL, CTFA_SCALE) +S(CTSF_MEAN, "MEAN", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) +S(CTSF_MEDIAN, "MEDIAN", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) +S(CTSF_MINIMUM, "MINIMUM", CTFT_CELL, CTF_GENERAL, CTFA_SCALE) +S(CTSF_MISSING, "MISSING", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) +S(CTSF_MODE, "MODE", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) +S(CTSF_PTILE, "PTILE", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) +S(CTSF_RANGE, "RANGE", CTFT_CELL, CTF_GENERAL, CTFA_SCALE) +S(CTSF_SEMEAN, "SEMEAN", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) +S(CTSF_STDDEV, "STDDEV", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) +S(CTSF_SUM, "SUM", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) +S(CTSF_TOTALN, "TOTALN", CTFT_UECELL, CTF_COUNT, CTFA_SCALE) +S(CTSF_VALIDN, "VALIDN", CTFT_UECELL, CTF_COUNT, CTFA_SCALE) +S(CTSF_VARIANCE, "VARIANCE", CTFT_UCELL, CTF_GENERAL, CTFA_SCALE) +S(CTSF_areaPCT_SUM, "PCT.SUM", CTFT_AREA, CTF_PERCENT, CTFA_SCALE) /* Debugging and troubleshooting. */ -S(CTSF_areaID, "ID", CTFT_AREA, CTF_COUNT, CTFA_ALL) +S(CTSF_areaID, "ID", CTFT_AREA, CTF_COUNT, CTFA_ALL) diff --git a/tests/language/stats/ctables.at b/tests/language/stats/ctables.at index 40be19464b..67d6db4b26 100644 --- a/tests/language/stats/ctables.at +++ b/tests/language/stats/ctables.at @@ -5051,29 +5051,29 @@ AT_CHECK([pspp ctables.sps -O box=unicode -O width=120], [0], [dnl │ Layer Row Sum % │ 13.3%│ 10.3%│ 11.3%│ 8.9%│ 12.6%│ 11.5%│ 9.8%│ 9.9%│ 12.4%│ ╰───────────────────┴───────┴──────┴──────┴──────┴───────┴──────┴──────┴──────┴───────╯ - Custom Tables -╭────────────────────────────┬──────────────────────────────────────────────────────────────╮ -│ │ a │ -│ ├────────────────────┬────────────────────┬────────────────────┤ -│ │ 1 │ 2 │ 9 │ -│ ├────────────────────┼────────────────────┼────────────────────┤ -│ │ b │ b │ b │ -│ ├──────┬──────┬──────┼──────┬──────┬──────┼──────┬──────┬──────┤ -│ │ 3 │ 4 │ 9 │ 3 │ 4 │ 9 │ 3 │ 4 │ 9 │ -├────────────────────────────┼──────┼──────┼──────┼──────┼──────┼──────┼──────┼──────┼──────┤ -│c Valid N │ 7│ 6│ 8│ 7│ 7│ 8│ 7│ 7│ 8│ -│ Missing │ 3│ 4│ 2│ 3│ 3│ 2│ 3│ 3│ 2│ -│ Unweighted Mean │ 25.86│ 24.50│ 24.63│ 25.86│ 25.71│ 24.25│ 25.43│ 25.29│ 23.88│ -│ Std Error of Mean │ 2.44│ 2.14│ 2.58│ 2.44│ 2.18│ 2.43│ 2.36│ 2.18│ 2.47│ -│ Median │ 25.00│ 24.50│ 25.00│ 25.00│ 27.00│ 25.00│ 25.00│ 24.00│ 23.50│ -│ Mode │ 16│ 18│ 15│ 16│ 18│ 15│ 16│ 18│ 15│ -│ Std Deviation │ 6.47│ 5.24│ 7.31│ 6.47│ 5.77│ 6.88│ 6.24│ 5.77│ 6.98│ -│ Variance │ 41.81│ 27.50│ 53.41│ 41.81│ 33.24│ 47.36│ 38.95│ 33.24│ 48.70│ -│ Sum │181.00│147.00│197.00│181.00│180.00│194.00│178.00│177.00│191.00│ -│ Unweighted Count │ 10│ 10│ 10│ 10│ 10│ 10│ 10│ 10│ 10│ -│ Total N │ 10│ 10│ 10│ 10│ 10│ 10│ 10│ 10│ 10│ -│ Unweighted Layer Row Sum %│ 11.1%│ 9.0%│ 12.1%│ 11.1%│ 11.1%│ 11.9%│ 10.9%│ 10.9%│ 11.7%│ -╰────────────────────────────┴──────┴──────┴──────┴──────┴──────┴──────┴──────┴──────┴──────╯ + Custom Tables +╭──────────────────────────────┬──────────────────────────────────────────────────────────────╮ +│ │ a │ +│ ├────────────────────┬────────────────────┬────────────────────┤ +│ │ 1 │ 2 │ 9 │ +│ ├────────────────────┼────────────────────┼────────────────────┤ +│ │ b │ b │ b │ +│ ├──────┬──────┬──────┼──────┬──────┬──────┼──────┬──────┬──────┤ +│ │ 3 │ 4 │ 9 │ 3 │ 4 │ 9 │ 3 │ 4 │ 9 │ +├──────────────────────────────┼──────┼──────┼──────┼──────┼──────┼──────┼──────┼──────┼──────┤ +│c Unweighted Valid N │ 7│ 6│ 8│ 7│ 7│ 8│ 7│ 7│ 8│ +│ Unweighted Missing │ 3│ 4│ 2│ 3│ 3│ 2│ 3│ 3│ 2│ +│ Unweighted Mean │ 25.86│ 24.50│ 24.63│ 25.86│ 25.71│ 24.25│ 25.43│ 25.29│ 23.88│ +│ Unweighted Std Error of Mean│ 2.44│ 2.14│ 2.58│ 2.44│ 2.18│ 2.43│ 2.36│ 2.18│ 2.47│ +│ Unweighted Median │ 25.00│ 24.50│ 25.00│ 25.00│ 27.00│ 25.00│ 25.00│ 24.00│ 23.50│ +│ Unweighted Mode │ 16│ 18│ 15│ 16│ 18│ 15│ 16│ 18│ 15│ +│ Unweighted Std Deviation │ 6.47│ 5.24│ 7.31│ 6.47│ 5.77│ 6.88│ 6.24│ 5.77│ 6.98│ +│ Unweighted Variance │ 41.81│ 27.50│ 53.41│ 41.81│ 33.24│ 47.36│ 38.95│ 33.24│ 48.70│ +│ Unweighted Sum │181.00│147.00│197.00│181.00│180.00│194.00│178.00│177.00│191.00│ +│ Unweighted Count │ 10│ 10│ 10│ 10│ 10│ 10│ 10│ 10│ 10│ +│ Unweighted Total N │ 10│ 10│ 10│ 10│ 10│ 10│ 10│ 10│ 10│ +│ Unweighted Layer Row Sum % │ 11.1%│ 9.0%│ 12.1%│ 11.1%│ 11.1%│ 11.9%│ 10.9%│ 10.9%│ 11.7%│ +╰──────────────────────────────┴──────┴──────┴──────┴──────┴──────┴──────┴──────┴──────┴──────╯ ]) AT_CLEANUP