X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fctables.c;h=b64ea2b565de6412c5f84d41eee4c6d0839e83fe;hb=refs%2Fheads%2Fctables11;hp=fe46f10c9254cb8e556ff045e24792503af5771c;hpb=3d859a4d0fc88efa2f2bd946f621799ef73739bd;p=pspp diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index fe46f10c92..b64ea2b565 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -180,9 +180,11 @@ struct ctables_domain const struct ctables_cell *example; double d_valid; /* Dictionary weight. */ - double d_missing; + double d_count; + double d_total; double e_valid; /* Effective weight */ - double e_missing; + double e_count; + double e_total; }; enum ctables_summary_variant @@ -203,6 +205,13 @@ struct ctables_cell struct ctables_domain *domains[N_CTDTS]; bool hide; + + /* Is at least one value missing, whether included or excluded? */ + bool is_missing; + + /* Is at least one value missing and excluded? */ + bool excluded_missing; + bool postcompute; enum ctables_summary_variant sv; @@ -337,7 +346,15 @@ struct ctables_summary_spec_set size_t n; size_t allocated; + /* The variable to which the summary specs are applied. */ struct variable *var; + + /* Whether the variable to which the summary specs are applied is a scale + variable for the purpose of summarization. + + (VALIDN and TOTALN act differently for summarizing scale and categorical + variables.) */ + bool is_scale; }; static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *, @@ -481,6 +498,9 @@ struct ctables_category CCT_VALUE, CCT_LABEL, CCT_FUNCTION, + + /* For contributing to TOTALN. */ + CCT_EXCLUDED_MISSING, } type; @@ -516,7 +536,7 @@ struct ctables_category }; /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL, - CCT_FUNCTION. */ + CCT_FUNCTION, CCT_EXCLUDED_MISSING. */ struct msg_location *location; }; @@ -548,6 +568,9 @@ ctables_category_uninit (struct ctables_category *cat) case CCT_LABEL: case CCT_FUNCTION: break; + + case CCT_EXCLUDED_MISSING: + break; } } @@ -588,6 +611,9 @@ ctables_category_equal (const struct ctables_category *a, && a->sort_function == b->sort_function && a->sort_var == b->sort_var && a->percentile == b->percentile); + + case CCT_EXCLUDED_MISSING: + return true; } NOT_REACHED (); @@ -730,7 +756,8 @@ ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst, .specs = specs, .n = src->n, .allocated = src->n, - .var = src->var + .var = src->var, + .is_scale = src->is_scale, }; } @@ -1007,6 +1034,7 @@ add_summary_spec (struct ctables_axis *axis, break; case CTFA_SCALE: +#if 0 if (!axis->scale) { msg_at (SE, loc, @@ -1016,6 +1044,7 @@ add_summary_spec (struct ctables_axis *axis, var_name); return false; } +#endif break; case CTFA_ALL: @@ -1924,6 +1953,7 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, case CCT_VALUE: case CCT_LABEL: case CCT_FUNCTION: + case CCT_EXCLUDED_MISSING: break; } } @@ -2037,6 +2067,7 @@ enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a) { ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]); nest->specs[sv].var = a->var.var; + nest->specs[sv].is_scale = a->scale; } return (struct ctables_stack) { .nests = nest, .n = 1 }; @@ -2055,11 +2086,7 @@ enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a) union ctables_summary { /* COUNT, VALIDN, TOTALN. */ - struct - { - double valid; - double missing; - }; + double count; /* MINIMUM, MAXIMUM, RANGE. */ struct @@ -2116,7 +2143,7 @@ ctables_summary_init (union ctables_summary *s, case CTSF_ETOTALN: case CTSF_VALIDN: case CTSF_EVALIDN: - s->missing = s->valid = 0; + s->count = 0; break; case CTSF_MAXIMUM: @@ -2226,9 +2253,10 @@ ctables_summary_uninit (union ctables_summary *s, } static void -ctables_summary_add (union ctables_summary *s, +ctables_summary_add (const struct ctables_cell *cell, union ctables_summary *s, const struct ctables_summary_spec *ss, const struct variable *var, const union value *value, + bool is_scale, bool is_missing, double d_weight, double e_weight) { /* To determine whether a case is included in a given table for a particular @@ -2251,13 +2279,25 @@ ctables_summary_add (union ctables_summary *s, */ switch (ss->function) { - case CTSF_COUNT: case CSTF_TOTALN: + s->count += d_weight; + break; + + case CTSF_COUNT: + if (is_scale || !cell->excluded_missing) + s->count += d_weight; + break; + case CTSF_VALIDN: - if (var_is_value_missing (var, value)) - s->missing += d_weight; - else - s->valid += d_weight; + if (is_scale + ? !var_is_value_missing (var, value) + : !is_missing) + s->count += d_weight; + break; + + case CTSF_MISSING: + if (is_missing) + s->count += d_weight; break; case CTSF_ECOUNT: @@ -2282,13 +2322,18 @@ ctables_summary_add (union ctables_summary *s, case CTSF_LAYERPCT_TOTALN: case CTSF_LAYERROWPCT_TOTALN: case CTSF_LAYERCOLPCT_TOTALN: - case CTSF_MISSING: - case CTSF_ETOTALN: + s->count += d_weight; + break; + case CTSF_EVALIDN: - if (var_is_value_missing (var, value)) - s->missing += e_weight; - else - s->valid += e_weight; + if (is_scale + ? !var_is_value_missing (var, value) + : !is_missing) + s->count += e_weight; + break; + + case CTSF_ETOTALN: + s->count += e_weight; break; case CTSF_MAXIMUM: @@ -2416,7 +2461,7 @@ ctables_summary_value (const struct ctables_cell *cell, { case CTSF_COUNT: case CTSF_ECOUNT: - return s->valid; + return s->count; case CTSF_ROWPCT_COUNT: case CTSF_COLPCT_COUNT: @@ -2428,7 +2473,7 @@ ctables_summary_value (const struct ctables_cell *cell, { enum ctables_domain_type d = ctables_function_domain (ss->function); return (cell->domains[d]->e_valid - ? s->valid / cell->domains[d]->e_valid * 100 + ? s->count / cell->domains[d]->e_valid * 100 : SYSMIS); } @@ -2449,15 +2494,17 @@ ctables_summary_value (const struct ctables_cell *cell, NOT_REACHED (); case CTSF_MISSING: - return s->missing; + return s->count; case CSTF_TOTALN: case CTSF_ETOTALN: - return s->valid + s->missing; + return s->count; case CTSF_VALIDN: + return s->count; + case CTSF_EVALIDN: - return s->valid; + return s->count; case CTSF_MAXIMUM: return s->max; @@ -2580,6 +2627,7 @@ ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_) case CCT_SUBTOTAL: case CCT_TOTAL: case CCT_POSTCOMPUTE: + case CCT_EXCLUDED_MISSING: /* Must be equal. */ continue; @@ -2686,6 +2734,9 @@ static const struct ctables_category * ctables_categories_match (const struct ctables_categories *c, const union value *v, const struct variable *var) { + if (var_is_numeric (var) && v->f == SYSMIS) + return NULL; + const struct ctables_category *othernm = NULL; for (size_t i = c->n_cats; i-- > 0; ) { @@ -2728,6 +2779,9 @@ ctables_categories_match (const struct ctables_categories *c, case CCT_FUNCTION: return (cat->include_missing || !var_is_value_missing (var, v) ? cat : NULL); + + case CCT_EXCLUDED_MISSING: + break; } } @@ -2792,6 +2846,8 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, cell = xmalloc (sizeof *cell); cell->hide = false; + cell->is_missing = false; + cell->excluded_missing = false; cell->sv = sv; cell->contributes_to_domains = true; cell->postcompute = false; @@ -2804,6 +2860,8 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, for (size_t i = 0; i < nest->n; i++) { const struct ctables_category *cat = cats[a][i]; + const struct variable *var = nest->vars[i]; + const union value *value = case_data (c, var); if (i != nest->scale_idx) { const struct ctables_category *subtotal = cat->subtotal; @@ -2814,13 +2872,16 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, || cat->type == CCT_SUBTOTAL || cat->type == CCT_POSTCOMPUTE) cell->contributes_to_domains = false; + else if (var_is_value_missing (var, value)) + cell->is_missing = true; + if (cat->type == CCT_EXCLUDED_MISSING) + cell->excluded_missing = true; if (cat->type == CCT_POSTCOMPUTE) cell->postcompute = true; } cell->axes[a].cvs[i].category = cat; - value_clone (&cell->axes[a].cvs[i].value, case_data (c, nest->vars[i]), - var_get_width (nest->vars[i])); + value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var)); } } @@ -2838,21 +2899,33 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, static void ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, const struct ctables_category *cats[PIVOT_N_AXES][10], - double d_weight, double e_weight) + bool is_missing, double d_weight, double e_weight) { struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats); const struct ctables_nest *ss = s->nests[s->table->summary_axis]; const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; for (size_t i = 0; i < specs->n; i++) - ctables_summary_add (&cell->summaries[i], &specs->specs[i], specs->var, - case_data (c, specs->var), d_weight, e_weight); + ctables_summary_add (cell, &cell->summaries[i], &specs->specs[i], + specs->var, case_data (c, specs->var), specs->is_scale, + is_missing, d_weight, e_weight); if (cell->contributes_to_domains) { for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++) { - cell->domains[dt]->d_valid += d_weight; - cell->domains[dt]->e_valid += e_weight; + struct ctables_domain *d = cell->domains[dt]; + d->d_total += d_weight; + d->e_total += e_weight; + if (!cell->excluded_missing) + { + d->d_count += d_weight; + d->e_count += e_weight; + } + if (!cell->is_missing) + { + d->d_valid += d_weight; + d->e_valid += e_weight; + } } } } @@ -2860,7 +2933,7 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, static void recurse_totals (struct ctables_section *s, const struct ccase *c, const struct ctables_category *cats[PIVOT_N_AXES][10], - double d_weight, double e_weight, + bool is_missing, double d_weight, double e_weight, enum pivot_axis_type start_axis, size_t start_nest) { for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) @@ -2879,8 +2952,9 @@ recurse_totals (struct ctables_section *s, const struct ccase *c, { const struct ctables_category *save = cats[a][i]; cats[a][i] = total; - ctables_cell_add__ (s, c, cats, d_weight, e_weight); - recurse_totals (s, c, cats, d_weight, e_weight, a, i + 1); + ctables_cell_add__ (s, c, cats, is_missing, d_weight, e_weight); + recurse_totals (s, c, cats, is_missing, + d_weight, e_weight, a, i + 1); cats[a][i] = save; } } @@ -2891,7 +2965,7 @@ recurse_totals (struct ctables_section *s, const struct ccase *c, static void recurse_subtotals (struct ctables_section *s, const struct ccase *c, const struct ctables_category *cats[PIVOT_N_AXES][10], - double d_weight, double e_weight, + bool is_missing, double d_weight, double e_weight, enum pivot_axis_type start_axis, size_t start_nest) { for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) @@ -2906,8 +2980,9 @@ recurse_subtotals (struct ctables_section *s, const struct ccase *c, if (save->subtotal) { cats[a][i] = save->subtotal; - ctables_cell_add__ (s, c, cats, d_weight, e_weight); - recurse_subtotals (s, c, cats, d_weight, e_weight, a, i + 1); + ctables_cell_add__ (s, c, cats, is_missing, d_weight, e_weight); + recurse_subtotals (s, c, cats, is_missing, + d_weight, e_weight, a, i + 1); cats[a][i] = save; } } @@ -2940,6 +3015,8 @@ ctables_cell_insert (struct ctables_section *s, double d_weight, double e_weight) { const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */ + bool is_missing = false; + bool excluded_missing = false; for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct ctables_nest *nest = s->nests[a]; @@ -2951,32 +3028,47 @@ ctables_cell_insert (struct ctables_section *s, const struct variable *var = nest->vars[i]; const union value *value = case_data (c, var); - if (var_is_numeric (var) && value->f == SYSMIS) - return; + bool var_missing = var_is_value_missing (var, value) != 0; + if (var_missing) + is_missing = true; cats[a][i] = ctables_categories_match ( s->table->categories[var_get_dict_index (var)], value, var); if (!cats[a][i]) - return; + { + if (!is_missing) + return; + + static const struct ctables_category cct_excluded_missing = { + .type = CCT_EXCLUDED_MISSING, + .hide = true, + }; + cats[a][i] = &cct_excluded_missing; + excluded_missing = true; + } } } - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx) - { - const struct variable *var = nest->vars[i]; - const union value *value = case_data (c, var); - ctables_add_occurrence (var, value, &s->occurrences[a][i]); - } - } + if (!excluded_missing) + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + { + const struct variable *var = nest->vars[i]; + const union value *value = case_data (c, var); + ctables_add_occurrence (var, value, &s->occurrences[a][i]); + } + } - ctables_cell_add__ (s, c, cats, d_weight, e_weight); + ctables_cell_add__ (s, c, cats, is_missing, d_weight, e_weight); - recurse_totals (s, c, cats, d_weight, e_weight, 0, 0); - recurse_subtotals (s, c, cats, d_weight, e_weight, 0, 0); + if (!excluded_missing) + { + recurse_totals (s, c, cats, is_missing, d_weight, e_weight, 0, 0); + recurse_subtotals (s, c, cats, is_missing, d_weight, e_weight, 0, 0); + } } struct merge_item @@ -3805,7 +3897,7 @@ ctables_prepare_table (struct ctables_table *t) specs->n = 1; enum ctables_summary_function function - = specs->var ? CTSF_MEAN : CTSF_COUNT; + = specs->is_scale ? CTSF_MEAN : CTSF_COUNT; struct ctables_var var = { .is_mrset = false, .var = specs->var }; *specs->specs = (struct ctables_summary_spec) { @@ -4005,6 +4097,9 @@ ctables_add_category_occurrences (const struct variable *var, if (c->include_missing || !var_is_value_missing (var, &vl->value)) ctables_add_occurrence (var, &vl->value, occurrences); break; + + case CCT_EXCLUDED_MISSING: + break; } } }