X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fctables.c;h=b5152ef6365bedfdc1791010ce9ada75793c6382;hb=eeae0e2599fc01e7917d79b22541c9cfe1d0db4d;hp=cd963e785feb0b83ba8ea1a4e4e3f36b5e6c77d6;hpb=a58399ea2ce9421f72cc5771cd215b121bd8f9dd;p=pspp diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index cd963e785f..b5152ef636 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -373,6 +373,7 @@ struct ctables_nest struct variable **vars; size_t n; size_t scale_idx; + size_t summary_idx; size_t *areas[N_CTATS]; size_t n_areas[N_CTATS]; size_t group_head; @@ -2421,6 +2422,9 @@ nest_fts (struct ctables_stack s0, struct ctables_stack s1) .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx : SIZE_MAX), + .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx + : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx + : SIZE_MAX), .n = n, }; for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) @@ -2455,13 +2459,15 @@ var_fts (const struct ctables_axis *a) struct variable **vars = xmalloc (sizeof *vars); *vars = a->var; + bool is_summary = a->specs[CSV_CELL].n || a->scale; struct ctables_nest *nest = xmalloc (sizeof *nest); *nest = (struct ctables_nest) { .vars = vars, .n = 1, .scale_idx = a->scale ? 0 : SIZE_MAX, + .summary_idx = is_summary ? 0 : SIZE_MAX, }; - if (a->specs[CSV_CELL].n || a->scale) + if (is_summary) for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) { ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]); @@ -2551,11 +2557,14 @@ ctables_summary_init (union ctables_summary *s, break; case CTSF_MEAN: + case CTSF_SUM: + case CTSF_areaPCT_SUM: + s->moments = moments1_create (MOMENT_MEAN); + break; + case CTSF_SEMEAN: case CTSF_STDDEV: - case CTSF_SUM: case CTSF_VARIANCE: - case CTSF_areaPCT_SUM: s->moments = moments1_create (MOMENT_VARIANCE); break; @@ -2628,7 +2637,7 @@ ctables_summary_add (union ctables_summary *s, const struct ctables_summary_spec *ss, const struct variable *var, const union value *value, bool is_scale, bool is_scale_missing, - bool is_missing, bool excluded_missing, + bool is_missing, bool is_included, double d_weight, double e_weight) { /* To determine whether a case is included in a given table for a particular @@ -2652,24 +2661,37 @@ ctables_summary_add (union ctables_summary *s, switch (ss->function) { case CTSF_TOTALN: - case CTSF_areaPCT_TOTALN: s->count += ss->weighted ? d_weight : 1.0; break; + case CTSF_areaPCT_TOTALN: + s->count += ss->weighted ? e_weight : 1.0; + break; + case CTSF_COUNT: - case CTSF_areaPCT_COUNT: - if (is_scale || !excluded_missing) + if (is_scale || is_included) s->count += ss->weighted ? d_weight : 1.0; break; + case CTSF_areaPCT_COUNT: + if (is_scale || is_included) + s->count += ss->weighted ? e_weight : 1.0; + break; + case CTSF_VALIDN: - case CTSF_areaPCT_VALIDN: if (is_scale ? !is_scale_missing : !is_missing) s->count += ss->weighted ? d_weight : 1.0; break; + case CTSF_areaPCT_VALIDN: + if (is_scale + ? !is_scale_missing + : !is_missing) + s->count += ss->weighted ? e_weight : 1.0; + break; + case CTSF_areaID: break; @@ -2677,11 +2699,11 @@ ctables_summary_add (union ctables_summary *s, if (is_scale ? is_scale_missing : is_missing) - s->count += ss->weighted ? d_weight : 1.0; + s->count += ss->weighted ? e_weight : 1.0; break; case CTSF_ECOUNT: - if (is_scale || !excluded_missing) + if (is_scale || is_included) s->count += e_weight; break; @@ -2714,11 +2736,15 @@ ctables_summary_add (union ctables_summary *s, case CTSF_STDDEV: case CTSF_SUM: case CTSF_VARIANCE: - case CTSF_areaPCT_SUM: if (!is_scale_missing) moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0); break; + case CTSF_areaPCT_SUM: + if (!is_missing && !is_scale_missing) + moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0); + break; + case CTSF_MEDIAN: case CTSF_MODE: case CTSF_PTILE: @@ -3280,15 +3306,9 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, } static bool -is_scale_missing (const struct ctables_summary_spec_set *specs, - const struct ccase *c) +is_listwise_missing (const struct ctables_summary_spec_set *specs, + const struct ccase *c) { - if (!specs->is_scale) - return false; - - if (var_is_num_missing (specs->var, case_num (c, specs->var))) - return true; - for (size_t i = 0; i < specs->n_listwise_vars; i++) { const struct variable *var = specs->listwise_vars[i]; @@ -3302,19 +3322,20 @@ is_scale_missing (const struct ctables_summary_spec_set *specs, static void ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, const struct ctables_category *cats[PIVOT_N_AXES][10], - bool is_missing, bool excluded_missing, - double d_weight, double e_weight) + bool is_included, double d_weight, double e_weight) { struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats); const struct ctables_nest *ss = s->nests[s->table->summary_axis]; const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; + const union value *value = case_data (c, specs->var); + bool is_missing = var_is_value_missing (specs->var, value); + bool scale_missing = specs->is_scale && (is_missing || is_listwise_missing (specs, c)); - bool scale_missing = is_scale_missing (specs, c); for (size_t i = 0; i < specs->n; i++) - ctables_summary_add (&cell->summaries[i], &specs->specs[i], - specs->var, case_data (c, specs->var), specs->is_scale, - scale_missing, is_missing, excluded_missing, + ctables_summary_add (&cell->summaries[i], &specs->specs[i], + specs->var, value, specs->is_scale, + scale_missing, is_missing, is_included, d_weight, e_weight); for (enum ctables_area_type at = 0; at < N_CTATS; at++) if (!(cell->omit_areas && (1u << at))) @@ -3323,7 +3344,7 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, a->d_total += d_weight; a->e_total += e_weight; a->u_total += 1.0; - if (!excluded_missing) + if (is_included) { a->d_count += d_weight; a->e_count += e_weight; @@ -3354,8 +3375,7 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, static void recurse_totals (struct ctables_section *s, const struct ccase *c, const struct ctables_category *cats[PIVOT_N_AXES][10], - bool is_missing, bool excluded_missing, - double d_weight, double e_weight, + bool is_included, double d_weight, double e_weight, enum pivot_axis_type start_axis, size_t start_nest) { for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) @@ -3374,10 +3394,8 @@ recurse_totals (struct ctables_section *s, const struct ccase *c, { const struct ctables_category *save = cats[a][i]; cats[a][i] = total; - ctables_cell_add__ (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight); - recurse_totals (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight, a, i + 1); + ctables_cell_add__ (s, c, cats, is_included, d_weight, e_weight); + recurse_totals (s, c, cats, is_included, d_weight, e_weight, a, i + 1); cats[a][i] = save; } } @@ -3388,8 +3406,7 @@ recurse_totals (struct ctables_section *s, const struct ccase *c, static void recurse_subtotals (struct ctables_section *s, const struct ccase *c, const struct ctables_category *cats[PIVOT_N_AXES][10], - bool is_missing, bool excluded_missing, - double d_weight, double e_weight, + bool is_included, double d_weight, double e_weight, enum pivot_axis_type start_axis, size_t start_nest) { for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) @@ -3404,10 +3421,8 @@ recurse_subtotals (struct ctables_section *s, const struct ccase *c, if (save->subtotal) { cats[a][i] = save->subtotal; - ctables_cell_add__ (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight); - recurse_subtotals (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight, a, i + 1); + ctables_cell_add__ (s, c, cats, is_included, d_weight, e_weight); + recurse_subtotals (s, c, cats, is_included, d_weight, e_weight, a, i + 1); cats[a][i] = save; } } @@ -3441,47 +3456,38 @@ ctables_cell_insert (struct ctables_section *s, { const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */ - /* Does at least one categorical variable have a missing value in an included - or excluded category? */ - bool is_missing = false; - - /* Does at least one categorical variable have a missing value in an excluded - category? */ - bool excluded_missing = false; + bool is_included = true; for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct ctables_nest *nest = s->nests[a]; for (size_t i = 0; i < nest->n; i++) - { - if (i == nest->scale_idx) - continue; - - const struct variable *var = nest->vars[i]; - const union value *value = case_data (c, var); - - bool var_missing = var_is_value_missing (var, value) != 0; - if (var_missing) - is_missing = true; - - cats[a][i] = ctables_categories_match ( - s->table->categories[var_get_dict_index (var)], value, var); - if (!cats[a][i]) - { - if (!var_missing) - return; + if (i != nest->scale_idx) + { + const struct variable *var = nest->vars[i]; + const union value *value = case_data (c, var); - static const struct ctables_category cct_excluded_missing = { - .type = CCT_EXCLUDED_MISSING, - .hide = true, - }; - cats[a][i] = &cct_excluded_missing; - excluded_missing = true; - } + cats[a][i] = ctables_categories_match ( + s->table->categories[var_get_dict_index (var)], value, var); + if (!cats[a][i]) + { + if (i != nest->summary_idx) + return; + + if (!var_is_value_missing (var, value)) + return; + + static const struct ctables_category cct_excluded_missing = { + .type = CCT_EXCLUDED_MISSING, + .hide = true, + }; + cats[a][i] = &cct_excluded_missing; + is_included = false; + } } } - if (!excluded_missing) + if (is_included) for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct ctables_nest *nest = s->nests[a]; @@ -3494,16 +3500,9 @@ ctables_cell_insert (struct ctables_section *s, } } - ctables_cell_add__ (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight); - - //if (!excluded_missing) - { - recurse_totals (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight, 0, 0); - recurse_subtotals (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight, 0, 0); - } + ctables_cell_add__ (s, c, cats, is_included, d_weight, e_weight); + recurse_totals (s, c, cats, is_included, d_weight, e_weight, 0, 0); + recurse_subtotals (s, c, cats, is_included, d_weight, e_weight, 0, 0); } struct merge_item @@ -3726,8 +3725,8 @@ ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a, for (size_t i = 0; i < nest->n; i++) hmap_init (&s->occurrences[a][i]); } - for (size_t i = 0; i < N_CTATS; i++) - hmap_init (&s->areas[i]); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + hmap_init (&s->areas[at]); } } @@ -4665,87 +4664,76 @@ ctables_prepare_table (struct ctables_table *t) nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]); nest->n_areas[at] = 0; - bool add_vars = (at == CTAT_LAYER ? a == PIVOT_AXIS_LAYER - : at == CTAT_LAYERROW ? a != PIVOT_AXIS_COLUMN - : at == CTAT_LAYERCOL ? a != PIVOT_AXIS_ROW - : at == CTAT_TABLE ? false - : true); - if (add_vars) - for (size_t k = 0; k < nest->n; k++) - { - if (k == nest->scale_idx) - continue; - nest->areas[at][nest->n_areas[at]++] = k; - } - else if ((at == CTAT_LAYERCOL && a == PIVOT_AXIS_ROW && t->label_axis[a] != a) - || (at == CTAT_LAYERROW && a == PIVOT_AXIS_COLUMN && t->label_axis[a] != a) - || (at == CTAT_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER)) - { - for (size_t k = nest->n - 1; k < nest->n; k--) - { - if (k == nest->scale_idx) - continue; - nest->areas[at][nest->n_areas[at]++] = k; - break; - } - continue; - } - - size_t n_drop; - bool drop_inner = false; - if (at == CTAT_SUBTABLE - && t->clabels_from_axis != PIVOT_AXIS_LAYER) - { - if (t->clabels_to_axis != PIVOT_AXIS_LAYER) - n_drop = a == t->clabels_from_axis ? 2 : 0; - else - { - drop_inner = a == t->clabels_from_axis; - n_drop = a != t->clabels_from_axis && a != PIVOT_AXIS_LAYER; - } - } - else if (at == CTAT_ROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN) - n_drop = 0; - else if (at == CTAT_ROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW) - { - drop_inner = true; - n_drop = 0; - } - else if (at == CTAT_ROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_LAYER) + enum pivot_axis_type ata, atb; + if (at == CTAT_ROW || at == CTAT_LAYERROW) { - drop_inner = true; - n_drop = 0; + ata = PIVOT_AXIS_ROW; + atb = PIVOT_AXIS_COLUMN; } - else if (at == CTAT_COL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN) + else if (at == CTAT_COL || at == CTAT_LAYERCOL) { - drop_inner = true; - n_drop = 0; + ata = PIVOT_AXIS_COLUMN; + atb = PIVOT_AXIS_ROW; } - else if (at == CTAT_COL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW) - n_drop = 0; - else if (at == CTAT_COL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_LAYER) + + if (at == CTAT_LAYER + ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER + : at == CTAT_LAYERCOL || at == CTAT_LAYERROW + ? a == atb && t->label_axis[a] != a + : false) { - drop_inner = true; - n_drop = 0; + for (size_t k = nest->n - 1; k < nest->n; k--) + if (k != nest->scale_idx) + { + nest->areas[at][nest->n_areas[at]++] = k; + break; + } + continue; } - else if (at == CTAT_LAYERROW) - n_drop = a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN; - else if (at == CTAT_LAYERCOL) - n_drop = a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW; - else + + if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER + : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb + : at == CTAT_TABLE ? true + : false) + continue; + + for (size_t k = 0; k < nest->n; k++) + if (k != nest->scale_idx) + nest->areas[at][nest->n_areas[at]++] = k; + + int n_drop; + switch (at) { + case CTAT_SUBTABLE: +#define L PIVOT_AXIS_LAYER + n_drop = (t->clabels_from_axis == L ? a != L + : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L) + : t->clabels_from_axis == a ? 2 + : 0); +#undef L + break; + + case CTAT_LAYERROW: + case CTAT_LAYERCOL: + n_drop = a == ata && t->label_axis[ata] == atb; + break; + + case CTAT_ROW: + case CTAT_COL: + n_drop = (a == ata ? t->label_axis[ata] == atb + : a != atb ? 0 + : t->clabels_from_axis == atb ? -1 + : t->clabels_to_axis != atb ? 1 + : 0); + break; + + case CTAT_LAYER: + case CTAT_TABLE: n_drop = 0; - if (at == CTAT_SUBTABLE ? a != PIVOT_AXIS_LAYER - : at == CTAT_ROW ? a == PIVOT_AXIS_COLUMN - : at == CTAT_COL ? a == PIVOT_AXIS_ROW - : false) - n_drop++; - if ((at == CTAT_ROW && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN) - || (at == CTAT_COL && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)) - n_drop++; + break; } - if (drop_inner) + if (n_drop < 0) { size_t n = nest->n_areas[at]; if (n > 1) @@ -4754,16 +4742,23 @@ ctables_prepare_table (struct ctables_table *t) nest->n_areas[at]--; } } - for (size_t i = 0; i < n_drop; i++) - if (nest->n_areas[at] > 0) - nest->n_areas[at]--; + else + { + for (int i = 0; i < n_drop; i++) + if (nest->n_areas[at] > 0) + nest->n_areas[at]--; + } } } } else { struct ctables_nest *nest = xmalloc (sizeof *nest); - *nest = (struct ctables_nest) { .n = 0 }; + *nest = (struct ctables_nest) { + .n = 0, + .scale_idx = SIZE_MAX, + .summary_idx = SIZE_MAX + }; t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 }; /* There's no point in moving labels away from an axis that has no @@ -4777,20 +4772,23 @@ ctables_prepare_table (struct ctables_table *t) struct ctables_nest *nest = &stack->nests[i]; if (!nest->specs[CSV_CELL].n) { - struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL]; - specs->specs = xmalloc (sizeof *specs->specs); - specs->n = 1; + struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL]; + ss->specs = xmalloc (sizeof *ss->specs); + ss->n = 1; enum ctables_summary_function function - = specs->is_scale ? CTSF_MEAN : CTSF_COUNT; + = ss->is_scale ? CTSF_MEAN : CTSF_COUNT; - *specs->specs = (struct ctables_summary_spec) { + if (!ss->var) + { + nest->summary_idx = nest->n - 1; + ss->var = nest->vars[nest->summary_idx]; + } + *ss->specs = (struct ctables_summary_spec) { .function = function, .weighted = true, - .format = ctables_summary_default_format (function, specs->var), + .format = ctables_summary_default_format (function, ss->var), }; - if (!specs->var) - specs->var = nest->vars[0]; ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL], &nest->specs[CSV_CELL]); @@ -5197,8 +5195,8 @@ ctables_section_uninit (struct ctables_section *s) } hmap_destroy (&s->cells); - for (size_t i = 0; i < N_CTATS; i++) - hmap_destroy (&s->areas[i]); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + hmap_destroy (&s->areas[at]); } static void @@ -5265,7 +5263,7 @@ ctables_execute (struct dataset *ds, struct casereader *input, for (struct ccase *c = casereader_read (group); c; case_unref (c), c = casereader_read (group)) { - double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid); + double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid); double e_weight = (ct->e_weight ? var_force_valid_weight (ct->e_weight, case_num (c, ct->e_weight),