From: Ben Pfaff Date: Wed, 29 Jun 2022 05:25:29 +0000 (-0700) Subject: CTABLES missing values start to make some minimal amount of sense X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=b73682f56dec8f99610aed9a6b34c53a02d09db6 CTABLES missing values start to make some minimal amount of sense --- diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index b64ea2b565..92e7a55fa5 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -206,12 +206,6 @@ struct ctables_cell bool hide; - /* Is at least one value missing, whether included or excluded? */ - bool is_missing; - - /* Is at least one value missing and excluded? */ - bool excluded_missing; - bool postcompute; enum ctables_summary_variant sv; @@ -2253,10 +2247,10 @@ ctables_summary_uninit (union ctables_summary *s, } static void -ctables_summary_add (const struct ctables_cell *cell, union ctables_summary *s, +ctables_summary_add (union ctables_summary *s, const struct ctables_summary_spec *ss, const struct variable *var, const union value *value, - bool is_scale, bool is_missing, + bool is_scale, bool is_missing, bool excluded_missing, double d_weight, double e_weight) { /* To determine whether a case is included in a given table for a particular @@ -2280,15 +2274,36 @@ ctables_summary_add (const struct ctables_cell *cell, union ctables_summary *s, switch (ss->function) { case CSTF_TOTALN: + case CTSF_ROWPCT_TOTALN: + case CTSF_COLPCT_TOTALN: + case CTSF_TABLEPCT_TOTALN: + case CTSF_SUBTABLEPCT_TOTALN: + case CTSF_LAYERPCT_TOTALN: + case CTSF_LAYERROWPCT_TOTALN: + case CTSF_LAYERCOLPCT_TOTALN: s->count += d_weight; break; case CTSF_COUNT: - if (is_scale || !cell->excluded_missing) + case CTSF_ROWPCT_COUNT: + case CTSF_COLPCT_COUNT: + case CTSF_TABLEPCT_COUNT: + case CTSF_SUBTABLEPCT_COUNT: + case CTSF_LAYERPCT_COUNT: + case CTSF_LAYERROWPCT_COUNT: + case CTSF_LAYERCOLPCT_COUNT: + if (is_scale || !excluded_missing) s->count += d_weight; break; case CTSF_VALIDN: + case CTSF_ROWPCT_VALIDN: + case CTSF_COLPCT_VALIDN: + case CTSF_TABLEPCT_VALIDN: + case CTSF_SUBTABLEPCT_VALIDN: + case CTSF_LAYERPCT_VALIDN: + case CTSF_LAYERROWPCT_VALIDN: + case CTSF_LAYERCOLPCT_VALIDN: if (is_scale ? !var_is_value_missing (var, value) : !is_missing) @@ -2301,28 +2316,8 @@ ctables_summary_add (const struct ctables_cell *cell, union ctables_summary *s, break; case CTSF_ECOUNT: - case CTSF_ROWPCT_COUNT: - case CTSF_COLPCT_COUNT: - case CTSF_TABLEPCT_COUNT: - case CTSF_SUBTABLEPCT_COUNT: - case CTSF_LAYERPCT_COUNT: - case CTSF_LAYERROWPCT_COUNT: - case CTSF_LAYERCOLPCT_COUNT: - case CTSF_ROWPCT_VALIDN: - case CTSF_COLPCT_VALIDN: - case CTSF_TABLEPCT_VALIDN: - case CTSF_SUBTABLEPCT_VALIDN: - case CTSF_LAYERPCT_VALIDN: - case CTSF_LAYERROWPCT_VALIDN: - case CTSF_LAYERCOLPCT_VALIDN: - case CTSF_ROWPCT_TOTALN: - case CTSF_COLPCT_TOTALN: - case CTSF_TABLEPCT_TOTALN: - case CTSF_SUBTABLEPCT_TOTALN: - case CTSF_LAYERPCT_TOTALN: - case CTSF_LAYERROWPCT_TOTALN: - case CTSF_LAYERCOLPCT_TOTALN: - s->count += d_weight; + if (is_scale || !excluded_missing) + s->count += e_weight; break; case CTSF_EVALIDN: @@ -2472,8 +2467,8 @@ ctables_summary_value (const struct ctables_cell *cell, case CTSF_LAYERCOLPCT_COUNT: { enum ctables_domain_type d = ctables_function_domain (ss->function); - return (cell->domains[d]->e_valid - ? s->count / cell->domains[d]->e_valid * 100 + return (cell->domains[d]->e_count + ? s->count / cell->domains[d]->e_count * 100 : SYSMIS); } @@ -2484,6 +2479,13 @@ ctables_summary_value (const struct ctables_cell *cell, case CTSF_LAYERPCT_VALIDN: case CTSF_LAYERROWPCT_VALIDN: case CTSF_LAYERCOLPCT_VALIDN: + { + enum ctables_domain_type d = ctables_function_domain (ss->function); + return (cell->domains[d]->e_valid + ? s->count / cell->domains[d]->e_valid * 100 + : SYSMIS); + } + case CTSF_ROWPCT_TOTALN: case CTSF_COLPCT_TOTALN: case CTSF_TABLEPCT_TOTALN: @@ -2491,7 +2493,12 @@ ctables_summary_value (const struct ctables_cell *cell, case CTSF_LAYERPCT_TOTALN: case CTSF_LAYERROWPCT_TOTALN: case CTSF_LAYERCOLPCT_TOTALN: - NOT_REACHED (); + { + enum ctables_domain_type d = ctables_function_domain (ss->function); + return (cell->domains[d]->e_total + ? s->count / cell->domains[d]->e_total * 100 + : SYSMIS); + } case CTSF_MISSING: return s->count; @@ -2846,8 +2853,6 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, cell = xmalloc (sizeof *cell); cell->hide = false; - cell->is_missing = false; - cell->excluded_missing = false; cell->sv = sv; cell->contributes_to_domains = true; cell->postcompute = false; @@ -2872,10 +2877,6 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, || cat->type == CCT_SUBTOTAL || cat->type == CCT_POSTCOMPUTE) cell->contributes_to_domains = false; - else if (var_is_value_missing (var, value)) - cell->is_missing = true; - if (cat->type == CCT_EXCLUDED_MISSING) - cell->excluded_missing = true; if (cat->type == CCT_POSTCOMPUTE) cell->postcompute = true; } @@ -2899,16 +2900,17 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, static void ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, const struct ctables_category *cats[PIVOT_N_AXES][10], - bool is_missing, double d_weight, double e_weight) + bool is_missing, bool excluded_missing, + double d_weight, double e_weight) { struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats); const struct ctables_nest *ss = s->nests[s->table->summary_axis]; const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; for (size_t i = 0; i < specs->n; i++) - ctables_summary_add (cell, &cell->summaries[i], &specs->specs[i], + ctables_summary_add (&cell->summaries[i], &specs->specs[i], specs->var, case_data (c, specs->var), specs->is_scale, - is_missing, d_weight, e_weight); + is_missing, excluded_missing, d_weight, e_weight); if (cell->contributes_to_domains) { for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++) @@ -2916,12 +2918,12 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, struct ctables_domain *d = cell->domains[dt]; d->d_total += d_weight; d->e_total += e_weight; - if (!cell->excluded_missing) + if (!excluded_missing) { d->d_count += d_weight; d->e_count += e_weight; } - if (!cell->is_missing) + if (!is_missing) { d->d_valid += d_weight; d->e_valid += e_weight; @@ -2933,7 +2935,8 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, static void recurse_totals (struct ctables_section *s, const struct ccase *c, const struct ctables_category *cats[PIVOT_N_AXES][10], - bool is_missing, double d_weight, double e_weight, + bool is_missing, bool excluded_missing, + double d_weight, double e_weight, enum pivot_axis_type start_axis, size_t start_nest) { for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) @@ -2952,8 +2955,9 @@ recurse_totals (struct ctables_section *s, const struct ccase *c, { const struct ctables_category *save = cats[a][i]; cats[a][i] = total; - ctables_cell_add__ (s, c, cats, is_missing, d_weight, e_weight); - recurse_totals (s, c, cats, is_missing, + ctables_cell_add__ (s, c, cats, is_missing, excluded_missing, + d_weight, e_weight); + recurse_totals (s, c, cats, is_missing, excluded_missing, d_weight, e_weight, a, i + 1); cats[a][i] = save; } @@ -2965,7 +2969,8 @@ recurse_totals (struct ctables_section *s, const struct ccase *c, static void recurse_subtotals (struct ctables_section *s, const struct ccase *c, const struct ctables_category *cats[PIVOT_N_AXES][10], - bool is_missing, double d_weight, double e_weight, + bool is_missing, bool excluded_missing, + double d_weight, double e_weight, enum pivot_axis_type start_axis, size_t start_nest) { for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) @@ -2980,8 +2985,9 @@ recurse_subtotals (struct ctables_section *s, const struct ccase *c, if (save->subtotal) { cats[a][i] = save->subtotal; - ctables_cell_add__ (s, c, cats, is_missing, d_weight, e_weight); - recurse_subtotals (s, c, cats, is_missing, + ctables_cell_add__ (s, c, cats, is_missing, excluded_missing, + d_weight, e_weight); + recurse_subtotals (s, c, cats, is_missing, excluded_missing, d_weight, e_weight, a, i + 1); cats[a][i] = save; } @@ -3015,8 +3021,15 @@ ctables_cell_insert (struct ctables_section *s, double d_weight, double e_weight) { const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */ + + /* Does at least one categorical variable have a missing value in an included + or excluded category? */ bool is_missing = false; + + /* Does at least one categorical variable have a missing value in an excluded + category? */ bool excluded_missing = false; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct ctables_nest *nest = s->nests[a]; @@ -3036,7 +3049,7 @@ ctables_cell_insert (struct ctables_section *s, s->table->categories[var_get_dict_index (var)], value, var); if (!cats[a][i]) { - if (!is_missing) + if (!var_missing) return; static const struct ctables_category cct_excluded_missing = { @@ -3062,12 +3075,15 @@ ctables_cell_insert (struct ctables_section *s, } } - ctables_cell_add__ (s, c, cats, is_missing, d_weight, e_weight); + ctables_cell_add__ (s, c, cats, is_missing, excluded_missing, + d_weight, e_weight); - if (!excluded_missing) + //if (!excluded_missing) { - recurse_totals (s, c, cats, is_missing, d_weight, e_weight, 0, 0); - recurse_subtotals (s, c, cats, is_missing, d_weight, e_weight, 0, 0); + recurse_totals (s, c, cats, is_missing, excluded_missing, + d_weight, e_weight, 0, 0); + recurse_subtotals (s, c, cats, is_missing, excluded_missing, + d_weight, e_weight, 0, 0); } } diff --git a/tests/language/stats/ctables.at b/tests/language/stats/ctables.at index 1b7021438e..8f038da121 100644 --- a/tests/language/stats/ctables.at +++ b/tests/language/stats/ctables.at @@ -978,17 +978,33 @@ BEGIN DATA. . 3 . 4 . 5 +. . END DATA. MISSING VALUES x (1, 2) y (2, 3). VARIABLE LEVEL ALL (NOMINAL). -CTABLES /TABLE x[COUNT,TOTALS[COUNT, VALIDN, TOTALN]] +*TABLES /TABLE x[COUNT, TOTALS[COUNT, VALIDN, TOTALN]] /CATEGORIES VARIABLES=ALL TOTAL=YES. -CTABLES /TABLE x[COUNT,TOTALS[COUNT, VALIDN, TOTALN]] +CTABLES /TABLE x[COUNT, COLPCT, COLPCT.VALIDN, COLPCT.TOTALN, TOTALS[COUNT, COLPCT, COLPCT.VALIDN, COLPCT.TOTALN, VALIDN, TOTALN]] /CATEGORIES VARIABLES=ALL TOTAL=YES MISSING=INCLUDE. -CTABLES /TABLE x BY y. -CTABLES /TABLE x BY y /CATEGORIES VARIABLES=ALL MISSING=INCLUDE. -CTABLES /TABLE y BY x /CATEGORIES VARIABLES=ALL MISSING=INCLUDE. +*CTABLES /TABLE x BY y. +*CTABLES /TABLE x BY y /CATEGORIES VARIABLES=ALL MISSING=INCLUDE. +*CTABLES /TABLE y BY x /CATEGORIES VARIABLES=ALL MISSING=INCLUDE. ]]) -AT_CHECK([pspp ctables.sps -O box=unicode -O width=120], [0], []) +AT_CHECK([pspp ctables.sps -O box=unicode -O width=120], [0], [dnl + Custom Tables +╭───────┬─────┬────────┬────────────────┬────────────────┬───────┬───────╮ +│ │Count│Column %│Column Valid N %│Column Total N %│Valid N│Total N│ +├───────┼─────┼────────┼────────────────┼────────────────┼───────┼───────┤ +│x 1.00 │ 6│ 20.0%│ .0%│ 16.7%│ │ │ +│ 2.00 │ 6│ 20.0%│ .0%│ 16.7%│ │ │ +│ 3.00 │ 6│ 20.0%│ 33.3%│ 16.7%│ │ │ +│ 4.00 │ 6│ 20.0%│ 33.3%│ 16.7%│ │ │ +│ 5.00 │ 6│ 20.0%│ 33.3%│ 16.7%│ │ │ +│ Total│ 30│ 100.0%│ 100.0%│ 100.0%│ 18│ 36│ +╰───────┴─────┴────────┴────────────────┴────────────────┴───────┴───────╯ +dnl Note that the Total N % doesn't add up to 100 because system-missing +dnl values are included in the total but not shown as a category and this +dnl is expected behavior. +]) AT_CLEANUP \ No newline at end of file