From c5d4a219c4f28f2a0adb223be62fe4bb8df02f8f Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 27 Aug 2022 16:30:17 -0700 Subject: [PATCH] more reactoring --- src/language/stats/ctables.c | 2234 +++++++++++++++++----------------- 1 file changed, 1119 insertions(+), 1115 deletions(-) diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index ad9e453a9b..ff4a261f89 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -531,6 +531,20 @@ ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set) free (set->listwise_vars); free (set->specs); } + +static bool +is_listwise_missing (const struct ctables_summary_spec_set *specs, + const struct ccase *c) +{ + for (size_t i = 0; i < specs->n_listwise_vars; i++) + { + const struct variable *var = specs->listwise_vars[i]; + if (var_is_num_missing (var, case_num (c, var))) + return true; + } + + return false; +} /* CTABLES postcompute expressions. */ @@ -593,6 +607,10 @@ struct ctables_pcexpr struct msg_location *location; }; +struct ctables; +static struct ctables_postcompute *ctables_find_postcompute (struct ctables *, + const char *name); + static struct ctables_pcexpr *ctables_pcexpr_allocate_binary ( enum ctables_pcexpr_op, struct ctables_pcexpr *sub0, struct ctables_pcexpr *sub1); @@ -1696,119 +1714,411 @@ ctables_categories_equal (const struct ctables_categories *a, return true; } - -/* CTABLES variable nesting and stacking. */ - -/* A nested sequence of variables, e.g. a > b > c. */ -struct ctables_nest - { - struct variable **vars; - size_t n; - size_t scale_idx; - size_t summary_idx; - size_t *areas[N_CTATS]; - size_t n_areas[N_CTATS]; - size_t group_head; - - struct ctables_summary_spec_set specs[N_CSVS]; - }; -/* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */ -struct ctables_stack - { - struct ctables_nest *nests; - size_t n; +static struct ctables_category +cct_nrange (double low, double high) +{ + return (struct ctables_category) { + .type = CCT_NRANGE, + .nrange = { low, high } }; +} -static void -ctables_nest_uninit (struct ctables_nest *nest) +static struct ctables_category +cct_srange (struct substring low, struct substring high) { - free (nest->vars); - for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) - ctables_summary_spec_set_uninit (&nest->specs[sv]); - for (enum ctables_area_type at = 0; at < N_CTATS; at++) - free (nest->areas[at]); + return (struct ctables_category) { + .type = CCT_SRANGE, + .srange = { low, high } + }; } -static void -ctables_stack_uninit (struct ctables_stack *stack) +static bool +ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories, + struct ctables_category *cat) { - if (stack) + char *total_label; + if (lex_match (lexer, T_EQUALS)) { - for (size_t i = 0; i < stack->n; i++) - ctables_nest_uninit (&stack->nests[i]); - free (stack->nests); + if (!lex_force_string (lexer)) + return false; + + total_label = ss_xstrdup (lex_tokss (lexer)); + lex_get (lexer); } + else + total_label = xstrdup (_("Subtotal")); + + *cat = (struct ctables_category) { + .type = CCT_SUBTOTAL, + .hide_subcategories = hide_subcategories, + .total_label = total_label + }; + return true; } -static struct ctables_stack -nest_fts (struct ctables_stack s0, struct ctables_stack s1) +static bool +ctables_table_parse_explicit_category (struct lexer *lexer, + struct dictionary *dict, + struct ctables *ct, + struct ctables_category *cat) { - if (!s0.n) - return s1; - else if (!s1.n) - return s0; - - struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) }; - for (size_t i = 0; i < s0.n; i++) - for (size_t j = 0; j < s1.n; j++) - { - const struct ctables_nest *a = &s0.nests[i]; - const struct ctables_nest *b = &s1.nests[j]; - - size_t allocate = a->n + b->n; - struct variable **vars = xnmalloc (allocate, sizeof *vars); - size_t n = 0; - for (size_t k = 0; k < a->n; k++) - vars[n++] = a->vars[k]; - for (size_t k = 0; k < b->n; k++) - vars[n++] = b->vars[k]; - assert (n == allocate); + if (lex_match_id (lexer, "OTHERNM")) + *cat = (struct ctables_category) { .type = CCT_OTHERNM }; + else if (lex_match_id (lexer, "MISSING")) + *cat = (struct ctables_category) { .type = CCT_MISSING }; + else if (lex_match_id (lexer, "SUBTOTAL")) + return ctables_table_parse_subtotal (lexer, false, cat); + else if (lex_match_id (lexer, "HSUBTOTAL")) + return ctables_table_parse_subtotal (lexer, true, cat); + else if (lex_match_id (lexer, "LO")) + { + if (!lex_force_match_id (lexer, "THRU")) + return false; + if (lex_is_string (lexer)) + { + struct substring sr0 = { .string = NULL }; + struct substring sr1 = parse_substring (lexer, dict); + *cat = cct_srange (sr0, sr1); + } + else if (lex_force_num (lexer)) + { + *cat = cct_nrange (-DBL_MAX, lex_number (lexer)); + lex_get (lexer); + } + else + return false; + } + else if (lex_is_number (lexer)) + { + double number = lex_number (lexer); + lex_get (lexer); + if (lex_match_id (lexer, "THRU")) + { + if (lex_match_id (lexer, "HI")) + *cat = cct_nrange (number, DBL_MAX); + else + { + if (!lex_force_num (lexer)) + return false; + *cat = cct_nrange (number, lex_number (lexer)); + lex_get (lexer); + } + } + else + *cat = (struct ctables_category) { + .type = CCT_NUMBER, + .number = number + }; + } + else if (lex_is_string (lexer)) + { + struct substring s = parse_substring (lexer, dict); + if (lex_match_id (lexer, "THRU")) + { + if (lex_match_id (lexer, "HI")) + { + struct substring sr1 = { .string = NULL }; + *cat = cct_srange (s, sr1); + } + else + { + if (!lex_force_string (lexer)) + { + ss_dealloc (&s); + return false; + } + struct substring sr1 = parse_substring (lexer, dict); + *cat = cct_srange (s, sr1); + } + } + else + *cat = (struct ctables_category) { .type = CCT_STRING, .string = s }; + } + else if (lex_match (lexer, T_AND)) + { + if (!lex_force_id (lexer)) + return false; + struct ctables_postcompute *pc = ctables_find_postcompute ( + ct, lex_tokcstr (lexer)); + if (!pc) + { + struct msg_location *loc = lex_get_location (lexer, -1, 0); + msg_at (SE, loc, _("Unknown postcompute &%s."), + lex_tokcstr (lexer)); + msg_location_destroy (loc); + return false; + } + lex_get (lexer); - const struct ctables_nest *summary_src; - if (!a->specs[CSV_CELL].var) - summary_src = b; - else if (!b->specs[CSV_CELL].var) - summary_src = a; - else - NOT_REACHED (); + *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc }; + } + else + { + lex_error (lexer, NULL); + return false; + } - struct ctables_nest *new = &stack.nests[stack.n++]; - *new = (struct ctables_nest) { - .vars = vars, - .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx - : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx - : SIZE_MAX), - .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx - : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx - : SIZE_MAX), - .n = n, - }; - for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) - ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]); - } - ctables_stack_uninit (&s0); - ctables_stack_uninit (&s1); - return stack; + return true; } -static struct ctables_stack -stack_fts (struct ctables_stack s0, struct ctables_stack s1) +static bool +parse_category_string (struct msg_location *location, + struct substring s, const struct dictionary *dict, + enum fmt_type format, double *n) { - struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) }; - for (size_t i = 0; i < s0.n; i++) - stack.nests[stack.n++] = s0.nests[i]; - for (size_t i = 0; i < s1.n; i++) + union value v; + char *error = data_in (s, dict_get_encoding (dict), format, + settings_get_fmt_settings (), &v, 0, NULL); + if (error) { - stack.nests[stack.n] = s1.nests[i]; - stack.nests[stack.n].group_head += s0.n; - stack.n++; + msg_at (SE, location, + _("Failed to parse category specification as format %s: %s."), + fmt_name (format), error); + free (error); + return false; } - assert (stack.n == s0.n + s1.n); - free (s0.nests); - free (s1.nests); - return stack; -} + + *n = v.f; + return true; +} + +static struct ctables_category * +ctables_find_category_for_postcompute__ (const struct ctables_categories *cats, + const struct ctables_pcexpr *e) +{ + struct ctables_category *best = NULL; + size_t n_subtotals = 0; + for (size_t i = 0; i < cats->n_cats; i++) + { + struct ctables_category *cat = &cats->cats[i]; + switch (e->op) + { + case CTPO_CAT_NUMBER: + if (cat->type == CCT_NUMBER && cat->number == e->number) + best = cat; + break; + + case CTPO_CAT_STRING: + if (cat->type == CCT_STRING && ss_equals (cat->string, e->string)) + best = cat; + break; + + case CTPO_CAT_NRANGE: + if (cat->type == CCT_NRANGE + && cat->nrange[0] == e->nrange[0] + && cat->nrange[1] == e->nrange[1]) + best = cat; + break; + + case CTPO_CAT_SRANGE: + if (cat->type == CCT_SRANGE + && nullable_substring_equal (&cat->srange[0], &e->srange[0]) + && nullable_substring_equal (&cat->srange[1], &e->srange[1])) + best = cat; + break; + + case CTPO_CAT_MISSING: + if (cat->type == CCT_MISSING) + best = cat; + break; + + case CTPO_CAT_OTHERNM: + if (cat->type == CCT_OTHERNM) + best = cat; + break; + + case CTPO_CAT_SUBTOTAL: + if (cat->type == CCT_SUBTOTAL) + { + n_subtotals++; + if (e->subtotal_index == n_subtotals) + return cat; + else if (e->subtotal_index == 0) + best = cat; + } + break; + + case CTPO_CAT_TOTAL: + if (cat->type == CCT_TOTAL) + return cat; + break; + + case CTPO_CONSTANT: + case CTPO_ADD: + case CTPO_SUB: + case CTPO_MUL: + case CTPO_DIV: + case CTPO_POW: + case CTPO_NEG: + NOT_REACHED (); + } + } + if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1) + return NULL; + return best; +} + +static struct ctables_category * +ctables_find_category_for_postcompute (const struct dictionary *dict, + const struct ctables_categories *cats, + enum fmt_type parse_format, + const struct ctables_pcexpr *e) +{ + if (parse_format != FMT_F) + { + if (e->op == CTPO_CAT_STRING) + { + double number; + if (!parse_category_string (e->location, e->string, dict, + parse_format, &number)) + return NULL; + + struct ctables_pcexpr e2 = { + .op = CTPO_CAT_NUMBER, + .number = number, + .location = e->location, + }; + return ctables_find_category_for_postcompute__ (cats, &e2); + } + else if (e->op == CTPO_CAT_SRANGE) + { + double nrange[2]; + if (!e->srange[0].string) + nrange[0] = -DBL_MAX; + else if (!parse_category_string (e->location, e->srange[0], dict, + parse_format, &nrange[0])) + return NULL; + + if (!e->srange[1].string) + nrange[1] = DBL_MAX; + else if (!parse_category_string (e->location, e->srange[1], dict, + parse_format, &nrange[1])) + return NULL; + + struct ctables_pcexpr e2 = { + .op = CTPO_CAT_NRANGE, + .nrange = { nrange[0], nrange[1] }, + .location = e->location, + }; + return ctables_find_category_for_postcompute__ (cats, &e2); + } + } + return ctables_find_category_for_postcompute__ (cats, e); +} + +/* CTABLES variable nesting and stacking. */ + +/* A nested sequence of variables, e.g. a > b > c. */ +struct ctables_nest + { + struct variable **vars; + size_t n; + size_t scale_idx; + size_t summary_idx; + size_t *areas[N_CTATS]; + size_t n_areas[N_CTATS]; + size_t group_head; + + struct ctables_summary_spec_set specs[N_CSVS]; + }; + +/* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */ +struct ctables_stack + { + struct ctables_nest *nests; + size_t n; + }; + +static void +ctables_nest_uninit (struct ctables_nest *nest) +{ + free (nest->vars); + for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) + ctables_summary_spec_set_uninit (&nest->specs[sv]); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + free (nest->areas[at]); +} + +static void +ctables_stack_uninit (struct ctables_stack *stack) +{ + if (stack) + { + for (size_t i = 0; i < stack->n; i++) + ctables_nest_uninit (&stack->nests[i]); + free (stack->nests); + } +} + +static struct ctables_stack +nest_fts (struct ctables_stack s0, struct ctables_stack s1) +{ + if (!s0.n) + return s1; + else if (!s1.n) + return s0; + + struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) }; + for (size_t i = 0; i < s0.n; i++) + for (size_t j = 0; j < s1.n; j++) + { + const struct ctables_nest *a = &s0.nests[i]; + const struct ctables_nest *b = &s1.nests[j]; + + size_t allocate = a->n + b->n; + struct variable **vars = xnmalloc (allocate, sizeof *vars); + size_t n = 0; + for (size_t k = 0; k < a->n; k++) + vars[n++] = a->vars[k]; + for (size_t k = 0; k < b->n; k++) + vars[n++] = b->vars[k]; + assert (n == allocate); + + const struct ctables_nest *summary_src; + if (!a->specs[CSV_CELL].var) + summary_src = b; + else if (!b->specs[CSV_CELL].var) + summary_src = a; + else + NOT_REACHED (); + + struct ctables_nest *new = &stack.nests[stack.n++]; + *new = (struct ctables_nest) { + .vars = vars, + .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx + : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx + : SIZE_MAX), + .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx + : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx + : SIZE_MAX), + .n = n, + }; + for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) + ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]); + } + ctables_stack_uninit (&s0); + ctables_stack_uninit (&s1); + return stack; +} + +static struct ctables_stack +stack_fts (struct ctables_stack s0, struct ctables_stack s1) +{ + struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) }; + for (size_t i = 0; i < s0.n; i++) + stack.nests[stack.n++] = s0.nests[i]; + for (size_t i = 0; i < s1.n; i++) + { + stack.nests[stack.n] = s1.nests[i]; + stack.nests[stack.n].group_head += s0.n; + stack.n++; + } + assert (stack.n == s0.n + s1.n); + free (s0.nests); + free (s1.nests); + return stack; +} static struct ctables_stack var_fts (const struct ctables_axis *a) @@ -1859,619 +2169,675 @@ enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a) NOT_REACHED (); } -enum ctables_vlabel - { - CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT, - CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE, - CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL, - CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH, - }; + +/* CTABLES summary calculation. */ -struct ctables_cell +union ctables_summary { - /* In struct ctables_section's 'cells' hmap. Indexed by all the values in - all the axes (except the scalar variable, if any). */ - struct hmap_node node; + /* COUNT, VALIDN, TOTALN. */ + double count; - /* The areas that contain this cell. */ - uint32_t omit_areas; - struct ctables_area *areas[N_CTATS]; - - bool hide; + /* MINIMUM, MAXIMUM, RANGE. */ + struct + { + double min; + double max; + }; - bool postcompute; - enum ctables_summary_variant sv; + /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */ + struct moments1 *moments; - struct ctables_cell_axis + /* MEDIAN, MODE, PTILE. */ + struct { - struct ctables_cell_value - { - const struct ctables_category *category; - union value value; - } - *cvs; - int leaf; - } - axes[PIVOT_N_AXES]; - - union ctables_summary *summaries; + struct casewriter *writer; + double ovalid; + double ovalue; + }; }; -struct ctables_postcompute - { - struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */ - char *name; /* Name, without leading &. */ +static void +ctables_summary_init (union ctables_summary *s, + const struct ctables_summary_spec *ss) +{ + switch (ss->function) + { + case CTSF_COUNT: + case CTSF_areaPCT_COUNT: + case CTSF_areaPCT_VALIDN: + case CTSF_areaPCT_TOTALN: + case CTSF_MISSING: + case CTSF_TOTALN: + case CTSF_VALIDN: + s->count = 0; + break; - struct msg_location *location; /* Location of definition. */ - struct ctables_pcexpr *expr; - char *label; - struct ctables_summary_spec_set *specs; - bool hide_source_cats; - }; + case CTSF_areaID: + break; -struct ctables - { - const struct dictionary *dict; - struct pivot_table_look *look; + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + s->min = s->max = SYSMIS; + break; - /* For CTEF_* formats. */ - struct fmt_settings ctables_formats; + case CTSF_MEAN: + case CTSF_SUM: + case CTSF_areaPCT_SUM: + s->moments = moments1_create (MOMENT_MEAN); + break; - /* If this is NULL, zeros are displayed using the normal print format. - Otherwise, this string is displayed. */ - char *zero; + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_VARIANCE: + s->moments = moments1_create (MOMENT_VARIANCE); + break; - /* If this is NULL, missing values are displayed using the normal print - format. Otherwise, this string is displayed. */ - char *missing; + case CTSF_MEDIAN: + case CTSF_MODE: + case CTSF_PTILE: + { + struct caseproto *proto = caseproto_create (); + proto = caseproto_add_width (proto, 0); + proto = caseproto_add_width (proto, 0); - /* Indexed by variable dictionary index. */ - enum ctables_vlabel *vlabels; + struct subcase ordering; + subcase_init (&ordering, 0, 0, SC_ASCEND); + s->writer = sort_create_writer (&ordering, proto); + subcase_uninit (&ordering); + caseproto_unref (proto); - struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */ + s->ovalid = 0; + s->ovalue = SYSMIS; + } + break; + } +} - bool mrsets_count_duplicates; /* MRSETS. */ - bool smissing_listwise; /* SMISSING. */ - struct variable *e_weight; /* WEIGHT. */ - int hide_threshold; /* HIDESMALLCOUNTS. */ +static void +ctables_summary_uninit (union ctables_summary *s, + const struct ctables_summary_spec *ss) +{ + switch (ss->function) + { + case CTSF_COUNT: + case CTSF_areaPCT_COUNT: + case CTSF_areaPCT_VALIDN: + case CTSF_areaPCT_TOTALN: + case CTSF_MISSING: + case CTSF_TOTALN: + case CTSF_VALIDN: + break; - struct ctables_table **tables; - size_t n_tables; - }; + case CTSF_areaID: + break; -static struct ctables_postcompute *ctables_find_postcompute (struct ctables *, - const char *name); + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + break; -struct ctables_value - { - struct hmap_node node; - union value value; - int leaf; - }; + case CTSF_MEAN: + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_SUM: + case CTSF_VARIANCE: + case CTSF_areaPCT_SUM: + moments1_destroy (s->moments); + break; -struct ctables_occurrence - { - struct hmap_node node; - union value value; - }; + case CTSF_MEDIAN: + case CTSF_MODE: + case CTSF_PTILE: + casewriter_destroy (s->writer); + break; + } +} -struct ctables_section - { - /* Settings. */ - struct ctables_table *table; - struct ctables_nest *nests[PIVOT_N_AXES]; +static void +ctables_summary_add (union ctables_summary *s, + const struct ctables_summary_spec *ss, + const union value *value, + bool is_missing, bool is_included, + double weight) +{ + /* To determine whether a case is included in a given table for a particular + kind of summary, consider the following charts for the variable being + summarized. Only if "yes" appears is the case counted. - /* Data. */ - struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */ - struct hmap cells; /* Contains "struct ctables_cell"s. */ - struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */ - }; + Categorical variables: VALIDN other TOTALN + Valid values in included categories yes yes yes + Missing values in included categories --- yes yes + Missing values in excluded categories --- --- yes + Valid values in excluded categories --- --- --- -static void ctables_section_uninit (struct ctables_section *); + Scale variables: VALIDN other TOTALN + Valid value yes yes yes + Missing value --- yes yes -struct ctables_table - { - struct ctables *ctables; - struct ctables_axis *axes[PIVOT_N_AXES]; - struct ctables_stack stacks[PIVOT_N_AXES]; - struct ctables_section *sections; - size_t n_sections; - enum pivot_axis_type summary_axis; - struct ctables_summary_spec_set summary_specs; - struct variable **sum_vars; - size_t n_sum_vars; + Missing values include both user- and system-missing. (The system-missing + value is always in an excluded category.) - enum pivot_axis_type slabels_axis; - bool slabels_visible; + One way to interpret the above table is that scale variables are like + categorical variables in which all values are in included categories. + */ + switch (ss->function) + { + case CTSF_TOTALN: + case CTSF_areaPCT_TOTALN: + s->count += weight; + break; - /* The innermost category labels for axis 'a' appear on axis label_axis[a]. + case CTSF_COUNT: + case CTSF_areaPCT_COUNT: + if (is_included) + s->count += weight; + break; - Most commonly, label_axis[a] == a, and in particular we always have - label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER. + case CTSF_VALIDN: + case CTSF_areaPCT_VALIDN: + if (!is_missing) + s->count += weight; + break; - If ROWLABELS or COLLABELS is specified, then one of - label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the - opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ. + case CTSF_areaID: + break; - If any category labels are moved, then 'clabels_example' is one of the - variables being moved (and it is otherwise NULL). All of the variables - being moved have the same width, value labels, and categories, so this - example variable can be used to find those out. + case CTSF_MISSING: + if (is_missing) + s->count += weight; + break; - The remaining members in this group are relevant only if category labels - are moved. + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + if (!is_missing) + { + if (s->min == SYSMIS || value->f < s->min) + s->min = value->f; + if (s->max == SYSMIS || value->f > s->max) + s->max = value->f; + } + break; - 'clabels_values_map' holds a "struct ctables_value" for all the values - that appear in all of the variables in the moved categories. It is - accumulated as the data is read. Once the data is fully read, its - sorted values are put into 'clabels_values' and 'n_clabels_values'. - */ - enum pivot_axis_type label_axis[PIVOT_N_AXES]; - enum pivot_axis_type clabels_from_axis; - enum pivot_axis_type clabels_to_axis; - const struct variable *clabels_example; - struct hmap clabels_values_map; - struct ctables_value **clabels_values; - size_t n_clabels_values; - - /* Indexed by variable dictionary index. */ - struct ctables_categories **categories; - size_t n_categories; + case CTSF_MEAN: + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_SUM: + case CTSF_VARIANCE: + if (!is_missing) + moments1_add (s->moments, value->f, weight); + break; - double cilevel; + case CTSF_areaPCT_SUM: + if (!is_missing) + moments1_add (s->moments, value->f, weight); + break; - char *caption; - char *corner; - char *title; + case CTSF_MEDIAN: + case CTSF_MODE: + case CTSF_PTILE: + if (!is_missing) + { + s->ovalid += weight; - struct ctables_chisq *chisq; - struct ctables_pairwise *pairwise; - }; + struct ccase *c = case_create (casewriter_get_proto (s->writer)); + *case_num_rw_idx (c, 0) = value->f; + *case_num_rw_idx (c, 1) = weight; + casewriter_write (s->writer, c); + } + break; + } +} -/* Chi-square test (SIGTEST). */ -struct ctables_chisq - { - double alpha; - bool include_mrsets; - bool all_visible; - }; +static double +ctables_summary_value (struct ctables_area *areas[N_CTATS], + union ctables_summary *s, + const struct ctables_summary_spec *ss) +{ + switch (ss->function) + { + case CTSF_COUNT: + return s->count; -/* Pairwise comparison test (COMPARETEST). */ -struct ctables_pairwise - { - enum { PROP, MEAN } type; - double alpha[2]; - bool include_mrsets; - bool meansvariance_allcats; - bool all_visible; - enum { BONFERRONI = 1, BH } adjust; - bool merge; - bool apa_style; - bool show_sig; - }; + case CTSF_areaID: + return areas[ss->calc_area]->sequence; + case CTSF_areaPCT_COUNT: + { + const struct ctables_area *a = areas[ss->calc_area]; + double a_count = a->count[ss->weighting]; + return a_count ? s->count / a_count * 100 : SYSMIS; + } + case CTSF_areaPCT_VALIDN: + { + const struct ctables_area *a = areas[ss->calc_area]; + double a_valid = a->valid[ss->weighting]; + return a_valid ? s->count / a_valid * 100 : SYSMIS; + } -static bool -parse_col_width (struct lexer *lexer, const char *name, double *width) -{ - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "DEFAULT")) - *width = SYSMIS; - else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX)) - { - *width = lex_number (lexer); - lex_get (lexer); - } - else - return false; + case CTSF_areaPCT_TOTALN: + { + const struct ctables_area *a = areas[ss->calc_area]; + double a_total = a->total[ss->weighting]; + return a_total ? s->count / a_total * 100 : SYSMIS; + } - return true; -} + case CTSF_MISSING: + case CTSF_TOTALN: + case CTSF_VALIDN: + return s->count; -static bool -parse_bool (struct lexer *lexer, bool *b) -{ - if (lex_match_id (lexer, "NO")) - *b = false; - else if (lex_match_id (lexer, "YES")) - *b = true; - else - { - lex_error_expecting (lexer, "YES", "NO"); - return false; - } - return true; -} + case CTSF_MAXIMUM: + return s->max; -static void -ctables_chisq_destroy (struct ctables_chisq *chisq) -{ - free (chisq); -} + case CTSF_MINIMUM: + return s->min; -static void -ctables_pairwise_destroy (struct ctables_pairwise *pairwise) -{ - free (pairwise); -} + case CTSF_RANGE: + return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS; -static void -ctables_table_destroy (struct ctables_table *t) -{ - if (!t) - return; + case CTSF_MEAN: + { + double mean; + moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL); + return mean; + } - for (size_t i = 0; i < t->n_sections; i++) - ctables_section_uninit (&t->sections[i]); - free (t->sections); + case CTSF_SEMEAN: + { + double weight, variance; + moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL); + return calc_semean (variance, weight); + } - for (size_t i = 0; i < t->n_categories; i++) - ctables_categories_unref (t->categories[i]); - free (t->categories); + case CTSF_STDDEV: + { + double variance; + moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); + return variance != SYSMIS ? sqrt (variance) : SYSMIS; + } - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - { - ctables_axis_destroy (t->axes[a]); - ctables_stack_uninit (&t->stacks[a]); - } - free (t->summary_specs.specs); + case CTSF_SUM: + { + double weight, mean; + moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); + return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS; + } - struct ctables_value *ctv, *next_ctv; - HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node, - &t->clabels_values_map) - { - value_destroy (&ctv->value, var_get_width (t->clabels_example)); - hmap_delete (&t->clabels_values_map, &ctv->node); - free (ctv); - } - hmap_destroy (&t->clabels_values_map); - free (t->clabels_values); + case CTSF_VARIANCE: + { + double variance; + moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); + return variance; + } - free (t->sum_vars); - free (t->caption); - free (t->corner); - free (t->title); - ctables_chisq_destroy (t->chisq); - ctables_pairwise_destroy (t->pairwise); - free (t); -} + case CTSF_areaPCT_SUM: + { + double weight, mean; + moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); + if (weight == SYSMIS || mean == SYSMIS) + return SYSMIS; -static void -ctables_destroy (struct ctables *ct) -{ - if (!ct) - return; + const struct ctables_area *a = areas[ss->calc_area]; + const struct ctables_sum *sum = &a->sums[ss->sum_var_idx]; + double denom = sum->sum[ss->weighting]; + return denom != 0 ? weight * mean / denom * 100 : SYSMIS; + } - struct ctables_postcompute *pc, *next_pc; - HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node, - &ct->postcomputes) - { - free (pc->name); - msg_location_destroy (pc->location); - ctables_pcexpr_destroy (pc->expr); - free (pc->label); - if (pc->specs) + case CTSF_MEDIAN: + case CTSF_PTILE: + if (s->writer) { - ctables_summary_spec_set_uninit (pc->specs); - free (pc->specs); + struct casereader *reader = casewriter_make_reader (s->writer); + s->writer = NULL; + + struct percentile *ptile = percentile_create ( + ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid); + struct order_stats *os = &ptile->parent; + order_stats_accumulate_idx (&os, 1, reader, 1, 0); + s->ovalue = percentile_calculate (ptile, PC_HAVERAGE); + statistic_destroy (&ptile->parent.parent); } - hmap_delete (&ct->postcomputes, &pc->hmap_node); - free (pc); - } - hmap_destroy (&ct->postcomputes); + return s->ovalue; - fmt_settings_uninit (&ct->ctables_formats); - pivot_table_look_unref (ct->look); - free (ct->zero); - free (ct->missing); - free (ct->vlabels); - for (size_t i = 0; i < ct->n_tables; i++) - ctables_table_destroy (ct->tables[i]); - free (ct->tables); - free (ct); + case CTSF_MODE: + if (s->writer) + { + struct casereader *reader = casewriter_make_reader (s->writer); + s->writer = NULL; + + struct mode *mode = mode_create (); + struct order_stats *os = &mode->parent; + order_stats_accumulate_idx (&os, 1, reader, 1, 0); + s->ovalue = mode->mode; + statistic_destroy (&mode->parent.parent); + } + return s->ovalue; + } + + NOT_REACHED (); } + +enum ctables_vlabel + { + CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT, + CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE, + CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL, + CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH, + }; -static struct ctables_category -cct_nrange (double low, double high) -{ - return (struct ctables_category) { - .type = CCT_NRANGE, - .nrange = { low, high } +struct ctables_cell + { + /* In struct ctables_section's 'cells' hmap. Indexed by all the values in + all the axes (except the scalar variable, if any). */ + struct hmap_node node; + + /* The areas that contain this cell. */ + uint32_t omit_areas; + struct ctables_area *areas[N_CTATS]; + + bool hide; + + bool postcompute; + enum ctables_summary_variant sv; + + struct ctables_cell_axis + { + struct ctables_cell_value + { + const struct ctables_category *category; + union value value; + } + *cvs; + int leaf; + } + axes[PIVOT_N_AXES]; + + union ctables_summary *summaries; }; -} -static struct ctables_category -cct_srange (struct substring low, struct substring high) -{ - return (struct ctables_category) { - .type = CCT_SRANGE, - .srange = { low, high } +struct ctables_postcompute + { + struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */ + char *name; /* Name, without leading &. */ + + struct msg_location *location; /* Location of definition. */ + struct ctables_pcexpr *expr; + char *label; + struct ctables_summary_spec_set *specs; + bool hide_source_cats; + }; + +struct ctables + { + const struct dictionary *dict; + struct pivot_table_look *look; + + /* For CTEF_* formats. */ + struct fmt_settings ctables_formats; + + /* If this is NULL, zeros are displayed using the normal print format. + Otherwise, this string is displayed. */ + char *zero; + + /* If this is NULL, missing values are displayed using the normal print + format. Otherwise, this string is displayed. */ + char *missing; + + /* Indexed by variable dictionary index. */ + enum ctables_vlabel *vlabels; + + struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */ + + bool mrsets_count_duplicates; /* MRSETS. */ + bool smissing_listwise; /* SMISSING. */ + struct variable *e_weight; /* WEIGHT. */ + int hide_threshold; /* HIDESMALLCOUNTS. */ + + struct ctables_table **tables; + size_t n_tables; + }; + +struct ctables_value + { + struct hmap_node node; + union value value; + int leaf; + }; + +struct ctables_occurrence + { + struct hmap_node node; + union value value; + }; + +struct ctables_section + { + /* Settings. */ + struct ctables_table *table; + struct ctables_nest *nests[PIVOT_N_AXES]; + + /* Data. */ + struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */ + struct hmap cells; /* Contains "struct ctables_cell"s. */ + struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */ + }; + +static void ctables_section_uninit (struct ctables_section *); + +struct ctables_table + { + struct ctables *ctables; + struct ctables_axis *axes[PIVOT_N_AXES]; + struct ctables_stack stacks[PIVOT_N_AXES]; + struct ctables_section *sections; + size_t n_sections; + enum pivot_axis_type summary_axis; + struct ctables_summary_spec_set summary_specs; + struct variable **sum_vars; + size_t n_sum_vars; + + enum pivot_axis_type slabels_axis; + bool slabels_visible; + + /* The innermost category labels for axis 'a' appear on axis label_axis[a]. + + Most commonly, label_axis[a] == a, and in particular we always have + label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER. + + If ROWLABELS or COLLABELS is specified, then one of + label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the + opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ. + + If any category labels are moved, then 'clabels_example' is one of the + variables being moved (and it is otherwise NULL). All of the variables + being moved have the same width, value labels, and categories, so this + example variable can be used to find those out. + + The remaining members in this group are relevant only if category labels + are moved. + + 'clabels_values_map' holds a "struct ctables_value" for all the values + that appear in all of the variables in the moved categories. It is + accumulated as the data is read. Once the data is fully read, its + sorted values are put into 'clabels_values' and 'n_clabels_values'. + */ + enum pivot_axis_type label_axis[PIVOT_N_AXES]; + enum pivot_axis_type clabels_from_axis; + enum pivot_axis_type clabels_to_axis; + const struct variable *clabels_example; + struct hmap clabels_values_map; + struct ctables_value **clabels_values; + size_t n_clabels_values; + + /* Indexed by variable dictionary index. */ + struct ctables_categories **categories; + size_t n_categories; + + double cilevel; + + char *caption; + char *corner; + char *title; + + struct ctables_chisq *chisq; + struct ctables_pairwise *pairwise; + }; + +/* Chi-square test (SIGTEST). */ +struct ctables_chisq + { + double alpha; + bool include_mrsets; + bool all_visible; + }; + +/* Pairwise comparison test (COMPARETEST). */ +struct ctables_pairwise + { + enum { PROP, MEAN } type; + double alpha[2]; + bool include_mrsets; + bool meansvariance_allcats; + bool all_visible; + enum { BONFERRONI = 1, BH } adjust; + bool merge; + bool apa_style; + bool show_sig; }; -} + + static bool -ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories, - struct ctables_category *cat) +parse_col_width (struct lexer *lexer, const char *name, double *width) { - char *total_label; - if (lex_match (lexer, T_EQUALS)) + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "DEFAULT")) + *width = SYSMIS; + else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX)) { - if (!lex_force_string (lexer)) - return false; - - total_label = ss_xstrdup (lex_tokss (lexer)); + *width = lex_number (lexer); lex_get (lexer); } else - total_label = xstrdup (_("Subtotal")); + return false; - *cat = (struct ctables_category) { - .type = CCT_SUBTOTAL, - .hide_subcategories = hide_subcategories, - .total_label = total_label - }; return true; } static bool -ctables_table_parse_explicit_category (struct lexer *lexer, - struct dictionary *dict, - struct ctables *ct, - struct ctables_category *cat) +parse_bool (struct lexer *lexer, bool *b) { - if (lex_match_id (lexer, "OTHERNM")) - *cat = (struct ctables_category) { .type = CCT_OTHERNM }; - else if (lex_match_id (lexer, "MISSING")) - *cat = (struct ctables_category) { .type = CCT_MISSING }; - else if (lex_match_id (lexer, "SUBTOTAL")) - return ctables_table_parse_subtotal (lexer, false, cat); - else if (lex_match_id (lexer, "HSUBTOTAL")) - return ctables_table_parse_subtotal (lexer, true, cat); - else if (lex_match_id (lexer, "LO")) - { - if (!lex_force_match_id (lexer, "THRU")) - return false; - if (lex_is_string (lexer)) - { - struct substring sr0 = { .string = NULL }; - struct substring sr1 = parse_substring (lexer, dict); - *cat = cct_srange (sr0, sr1); - } - else if (lex_force_num (lexer)) - { - *cat = cct_nrange (-DBL_MAX, lex_number (lexer)); - lex_get (lexer); - } - else - return false; - } - else if (lex_is_number (lexer)) - { - double number = lex_number (lexer); - lex_get (lexer); - if (lex_match_id (lexer, "THRU")) - { - if (lex_match_id (lexer, "HI")) - *cat = cct_nrange (number, DBL_MAX); - else - { - if (!lex_force_num (lexer)) - return false; - *cat = cct_nrange (number, lex_number (lexer)); - lex_get (lexer); - } - } - else - *cat = (struct ctables_category) { - .type = CCT_NUMBER, - .number = number - }; - } - else if (lex_is_string (lexer)) - { - struct substring s = parse_substring (lexer, dict); - if (lex_match_id (lexer, "THRU")) - { - if (lex_match_id (lexer, "HI")) - { - struct substring sr1 = { .string = NULL }; - *cat = cct_srange (s, sr1); - } - else - { - if (!lex_force_string (lexer)) - { - ss_dealloc (&s); - return false; - } - struct substring sr1 = parse_substring (lexer, dict); - *cat = cct_srange (s, sr1); - } - } - else - *cat = (struct ctables_category) { .type = CCT_STRING, .string = s }; - } - else if (lex_match (lexer, T_AND)) - { - if (!lex_force_id (lexer)) - return false; - struct ctables_postcompute *pc = ctables_find_postcompute ( - ct, lex_tokcstr (lexer)); - if (!pc) - { - struct msg_location *loc = lex_get_location (lexer, -1, 0); - msg_at (SE, loc, _("Unknown postcompute &%s."), - lex_tokcstr (lexer)); - msg_location_destroy (loc); - return false; - } - lex_get (lexer); - - *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc }; - } + if (lex_match_id (lexer, "NO")) + *b = false; + else if (lex_match_id (lexer, "YES")) + *b = true; else { - lex_error (lexer, NULL); + lex_error_expecting (lexer, "YES", "NO"); return false; } - return true; } -static bool -parse_category_string (struct msg_location *location, - struct substring s, const struct dictionary *dict, - enum fmt_type format, double *n) +static void +ctables_chisq_destroy (struct ctables_chisq *chisq) { - union value v; - char *error = data_in (s, dict_get_encoding (dict), format, - settings_get_fmt_settings (), &v, 0, NULL); - if (error) - { - msg_at (SE, location, - _("Failed to parse category specification as format %s: %s."), - fmt_name (format), error); - free (error); - return false; - } - - *n = v.f; - return true; + free (chisq); } -static struct ctables_category * -ctables_find_category_for_postcompute__ (const struct ctables_categories *cats, - const struct ctables_pcexpr *e) +static void +ctables_pairwise_destroy (struct ctables_pairwise *pairwise) { - struct ctables_category *best = NULL; - size_t n_subtotals = 0; - for (size_t i = 0; i < cats->n_cats; i++) - { - struct ctables_category *cat = &cats->cats[i]; - switch (e->op) - { - case CTPO_CAT_NUMBER: - if (cat->type == CCT_NUMBER && cat->number == e->number) - best = cat; - break; + free (pairwise); +} - case CTPO_CAT_STRING: - if (cat->type == CCT_STRING && ss_equals (cat->string, e->string)) - best = cat; - break; +static void +ctables_table_destroy (struct ctables_table *t) +{ + if (!t) + return; - case CTPO_CAT_NRANGE: - if (cat->type == CCT_NRANGE - && cat->nrange[0] == e->nrange[0] - && cat->nrange[1] == e->nrange[1]) - best = cat; - break; + for (size_t i = 0; i < t->n_sections; i++) + ctables_section_uninit (&t->sections[i]); + free (t->sections); - case CTPO_CAT_SRANGE: - if (cat->type == CCT_SRANGE - && nullable_substring_equal (&cat->srange[0], &e->srange[0]) - && nullable_substring_equal (&cat->srange[1], &e->srange[1])) - best = cat; - break; + for (size_t i = 0; i < t->n_categories; i++) + ctables_categories_unref (t->categories[i]); + free (t->categories); - case CTPO_CAT_MISSING: - if (cat->type == CCT_MISSING) - best = cat; - break; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + ctables_axis_destroy (t->axes[a]); + ctables_stack_uninit (&t->stacks[a]); + } + free (t->summary_specs.specs); - case CTPO_CAT_OTHERNM: - if (cat->type == CCT_OTHERNM) - best = cat; - break; + struct ctables_value *ctv, *next_ctv; + HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node, + &t->clabels_values_map) + { + value_destroy (&ctv->value, var_get_width (t->clabels_example)); + hmap_delete (&t->clabels_values_map, &ctv->node); + free (ctv); + } + hmap_destroy (&t->clabels_values_map); + free (t->clabels_values); - case CTPO_CAT_SUBTOTAL: - if (cat->type == CCT_SUBTOTAL) - { - n_subtotals++; - if (e->subtotal_index == n_subtotals) - return cat; - else if (e->subtotal_index == 0) - best = cat; - } - break; + free (t->sum_vars); + free (t->caption); + free (t->corner); + free (t->title); + ctables_chisq_destroy (t->chisq); + ctables_pairwise_destroy (t->pairwise); + free (t); +} - case CTPO_CAT_TOTAL: - if (cat->type == CCT_TOTAL) - return cat; - break; +static void +ctables_destroy (struct ctables *ct) +{ + if (!ct) + return; - case CTPO_CONSTANT: - case CTPO_ADD: - case CTPO_SUB: - case CTPO_MUL: - case CTPO_DIV: - case CTPO_POW: - case CTPO_NEG: - NOT_REACHED (); + struct ctables_postcompute *pc, *next_pc; + HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node, + &ct->postcomputes) + { + free (pc->name); + msg_location_destroy (pc->location); + ctables_pcexpr_destroy (pc->expr); + free (pc->label); + if (pc->specs) + { + ctables_summary_spec_set_uninit (pc->specs); + free (pc->specs); } + hmap_delete (&ct->postcomputes, &pc->hmap_node); + free (pc); } - if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1) - return NULL; - return best; + hmap_destroy (&ct->postcomputes); + + fmt_settings_uninit (&ct->ctables_formats); + pivot_table_look_unref (ct->look); + free (ct->zero); + free (ct->missing); + free (ct->vlabels); + for (size_t i = 0; i < ct->n_tables; i++) + ctables_table_destroy (ct->tables[i]); + free (ct->tables); + free (ct); } -static struct ctables_category * -ctables_find_category_for_postcompute (const struct dictionary *dict, - const struct ctables_categories *cats, - enum fmt_type parse_format, - const struct ctables_pcexpr *e) +static bool +ctables_recursive_check_postcompute (struct dictionary *dict, + const struct ctables_pcexpr *e, + struct ctables_category *pc_cat, + const struct ctables_categories *cats, + const struct msg_location *cats_location) { - if (parse_format != FMT_F) - { - if (e->op == CTPO_CAT_STRING) - { - double number; - if (!parse_category_string (e->location, e->string, dict, - parse_format, &number)) - return NULL; - - struct ctables_pcexpr e2 = { - .op = CTPO_CAT_NUMBER, - .number = number, - .location = e->location, - }; - return ctables_find_category_for_postcompute__ (cats, &e2); - } - else if (e->op == CTPO_CAT_SRANGE) - { - double nrange[2]; - if (!e->srange[0].string) - nrange[0] = -DBL_MAX; - else if (!parse_category_string (e->location, e->srange[0], dict, - parse_format, &nrange[0])) - return NULL; - - if (!e->srange[1].string) - nrange[1] = DBL_MAX; - else if (!parse_category_string (e->location, e->srange[1], dict, - parse_format, &nrange[1])) - return NULL; - - struct ctables_pcexpr e2 = { - .op = CTPO_CAT_NRANGE, - .nrange = { nrange[0], nrange[1] }, - .location = e->location, - }; - return ctables_find_category_for_postcompute__ (cats, &e2); - } - } - return ctables_find_category_for_postcompute__ (cats, e); -} - -static bool -ctables_recursive_check_postcompute (struct dictionary *dict, - const struct ctables_pcexpr *e, - struct ctables_category *pc_cat, - const struct ctables_categories *cats, - const struct msg_location *cats_location) -{ - switch (e->op) + switch (e->op) { case CTPO_CAT_NUMBER: case CTPO_CAT_STRING: @@ -2766,516 +3132,168 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, } } - if (!c->n_cats) - { - if (c->n_cats >= allocated_cats) - c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); - c->cats[c->n_cats++] = cat; - } - - if (show_totals) - { - if (c->n_cats >= allocated_cats) - c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); - - struct ctables_category *totals; - if (totals_before) - { - insert_element (c->cats, c->n_cats, sizeof *c->cats, 0); - totals = &c->cats[0]; - } - else - totals = &c->cats[c->n_cats]; - c->n_cats++; - - *totals = (struct ctables_category) { - .type = CCT_TOTAL, - .total_label = total_label ? total_label : xstrdup (_("Total")), - }; - } - - struct ctables_category *subtotal = NULL; - for (size_t i = totals_before ? 0 : c->n_cats; - totals_before ? i < c->n_cats : i-- > 0; - totals_before ? i++ : 0) - { - struct ctables_category *cat = &c->cats[i]; - switch (cat->type) - { - case CCT_NUMBER: - case CCT_STRING: - case CCT_NRANGE: - case CCT_SRANGE: - case CCT_MISSING: - case CCT_OTHERNM: - cat->subtotal = subtotal; - break; - - case CCT_POSTCOMPUTE: - break; - - case CCT_SUBTOTAL: - subtotal = cat; - break; - - case CCT_TOTAL: - case CCT_VALUE: - case CCT_LABEL: - case CCT_FUNCTION: - case CCT_EXCLUDED_MISSING: - break; - } - } - - if (cats_start_ofs != -1) - { - for (size_t i = 0; i < c->n_cats; i++) - { - struct ctables_category *cat = &c->cats[i]; - switch (cat->type) - { - case CCT_POSTCOMPUTE: - cat->parse_format = parse_strings ? common_format->type : FMT_F; - struct msg_location *cats_location - = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs); - bool ok = ctables_recursive_check_postcompute ( - dict, cat->pc->expr, cat, c, cats_location); - msg_location_destroy (cats_location); - if (!ok) - goto error; - break; - - case CCT_NUMBER: - case CCT_NRANGE: - for (size_t j = 0; j < n_vars; j++) - if (var_is_alpha (vars[j])) - { - msg_at (SE, cat->location, - _("This category specification may be applied " - "only to numeric variables, but this " - "subcommand tries to apply it to string " - "variable %s."), - var_get_name (vars[j])); - goto error; - } - break; - - case CCT_STRING: - if (parse_strings) - { - double n; - if (!parse_category_string (cat->location, cat->string, dict, - common_format->type, &n)) - goto error; - - ss_dealloc (&cat->string); - - cat->type = CCT_NUMBER; - cat->number = n; - } - else if (!all_strings (vars, n_vars, cat)) - goto error; - break; - - case CCT_SRANGE: - if (parse_strings) - { - double n[2]; - - if (!cat->srange[0].string) - n[0] = -DBL_MAX; - else if (!parse_category_string (cat->location, - cat->srange[0], dict, - common_format->type, &n[0])) - goto error; - - if (!cat->srange[1].string) - n[1] = DBL_MAX; - else if (!parse_category_string (cat->location, - cat->srange[1], dict, - common_format->type, &n[1])) - goto error; - - ss_dealloc (&cat->srange[0]); - ss_dealloc (&cat->srange[1]); - - cat->type = CCT_NRANGE; - cat->nrange[0] = n[0]; - cat->nrange[1] = n[1]; - } - else if (!all_strings (vars, n_vars, cat)) - goto error; - break; - - case CCT_MISSING: - case CCT_OTHERNM: - case CCT_SUBTOTAL: - case CCT_TOTAL: - case CCT_VALUE: - case CCT_LABEL: - case CCT_FUNCTION: - case CCT_EXCLUDED_MISSING: - break; - } - } - } - - free (vars); - return true; - -error: - free (vars); - return false; -} - -union ctables_summary - { - /* COUNT, VALIDN, TOTALN. */ - double count; - - /* MINIMUM, MAXIMUM, RANGE. */ - struct - { - double min; - double max; - }; - - /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */ - struct moments1 *moments; - - /* MEDIAN, MODE, PTILE. */ - struct - { - struct casewriter *writer; - double ovalid; - double ovalue; - }; - }; - -static void -ctables_summary_init (union ctables_summary *s, - const struct ctables_summary_spec *ss) -{ - switch (ss->function) - { - case CTSF_COUNT: - case CTSF_areaPCT_COUNT: - case CTSF_areaPCT_VALIDN: - case CTSF_areaPCT_TOTALN: - case CTSF_MISSING: - case CTSF_TOTALN: - case CTSF_VALIDN: - s->count = 0; - break; - - case CTSF_areaID: - break; - - case CTSF_MAXIMUM: - case CTSF_MINIMUM: - case CTSF_RANGE: - s->min = s->max = SYSMIS; - break; - - case CTSF_MEAN: - case CTSF_SUM: - case CTSF_areaPCT_SUM: - s->moments = moments1_create (MOMENT_MEAN); - break; - - case CTSF_SEMEAN: - case CTSF_STDDEV: - case CTSF_VARIANCE: - s->moments = moments1_create (MOMENT_VARIANCE); - break; - - case CTSF_MEDIAN: - case CTSF_MODE: - case CTSF_PTILE: - { - struct caseproto *proto = caseproto_create (); - proto = caseproto_add_width (proto, 0); - proto = caseproto_add_width (proto, 0); - - struct subcase ordering; - subcase_init (&ordering, 0, 0, SC_ASCEND); - s->writer = sort_create_writer (&ordering, proto); - subcase_uninit (&ordering); - caseproto_unref (proto); - - s->ovalid = 0; - s->ovalue = SYSMIS; - } - break; - } -} - -static void -ctables_summary_uninit (union ctables_summary *s, - const struct ctables_summary_spec *ss) -{ - switch (ss->function) - { - case CTSF_COUNT: - case CTSF_areaPCT_COUNT: - case CTSF_areaPCT_VALIDN: - case CTSF_areaPCT_TOTALN: - case CTSF_MISSING: - case CTSF_TOTALN: - case CTSF_VALIDN: - break; - - case CTSF_areaID: - break; - - case CTSF_MAXIMUM: - case CTSF_MINIMUM: - case CTSF_RANGE: - break; - - case CTSF_MEAN: - case CTSF_SEMEAN: - case CTSF_STDDEV: - case CTSF_SUM: - case CTSF_VARIANCE: - case CTSF_areaPCT_SUM: - moments1_destroy (s->moments); - break; - - case CTSF_MEDIAN: - case CTSF_MODE: - case CTSF_PTILE: - casewriter_destroy (s->writer); - break; - } -} - -static void -ctables_summary_add (union ctables_summary *s, - const struct ctables_summary_spec *ss, - const union value *value, - bool is_missing, bool is_included, - double weight) -{ - /* To determine whether a case is included in a given table for a particular - kind of summary, consider the following charts for the variable being - summarized. Only if "yes" appears is the case counted. - - Categorical variables: VALIDN other TOTALN - Valid values in included categories yes yes yes - Missing values in included categories --- yes yes - Missing values in excluded categories --- --- yes - Valid values in excluded categories --- --- --- - - Scale variables: VALIDN other TOTALN - Valid value yes yes yes - Missing value --- yes yes - - Missing values include both user- and system-missing. (The system-missing - value is always in an excluded category.) - - One way to interpret the above table is that scale variables are like - categorical variables in which all values are in included categories. - */ - switch (ss->function) - { - case CTSF_TOTALN: - case CTSF_areaPCT_TOTALN: - s->count += weight; - break; - - case CTSF_COUNT: - case CTSF_areaPCT_COUNT: - if (is_included) - s->count += weight; - break; - - case CTSF_VALIDN: - case CTSF_areaPCT_VALIDN: - if (!is_missing) - s->count += weight; - break; - - case CTSF_areaID: - break; - - case CTSF_MISSING: - if (is_missing) - s->count += weight; - break; - - case CTSF_MAXIMUM: - case CTSF_MINIMUM: - case CTSF_RANGE: - if (!is_missing) - { - if (s->min == SYSMIS || value->f < s->min) - s->min = value->f; - if (s->max == SYSMIS || value->f > s->max) - s->max = value->f; - } - break; - - case CTSF_MEAN: - case CTSF_SEMEAN: - case CTSF_STDDEV: - case CTSF_SUM: - case CTSF_VARIANCE: - if (!is_missing) - moments1_add (s->moments, value->f, weight); - break; - - case CTSF_areaPCT_SUM: - if (!is_missing) - moments1_add (s->moments, value->f, weight); - break; - - case CTSF_MEDIAN: - case CTSF_MODE: - case CTSF_PTILE: - if (!is_missing) - { - s->ovalid += weight; - - struct ccase *c = case_create (casewriter_get_proto (s->writer)); - *case_num_rw_idx (c, 0) = value->f; - *case_num_rw_idx (c, 1) = weight; - casewriter_write (s->writer, c); - } - break; - } -} - -static double -ctables_summary_value (const struct ctables_cell *cell, - union ctables_summary *s, - const struct ctables_summary_spec *ss) -{ - switch (ss->function) + if (!c->n_cats) { - case CTSF_COUNT: - return s->count; - - case CTSF_areaID: - return cell->areas[ss->calc_area]->sequence; + if (c->n_cats >= allocated_cats) + c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); + c->cats[c->n_cats++] = cat; + } - case CTSF_areaPCT_COUNT: - { - const struct ctables_area *a = cell->areas[ss->calc_area]; - double a_count = a->count[ss->weighting]; - return a_count ? s->count / a_count * 100 : SYSMIS; - } + if (show_totals) + { + if (c->n_cats >= allocated_cats) + c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); - case CTSF_areaPCT_VALIDN: - { - const struct ctables_area *a = cell->areas[ss->calc_area]; - double a_valid = a->valid[ss->weighting]; - return a_valid ? s->count / a_valid * 100 : SYSMIS; - } + struct ctables_category *totals; + if (totals_before) + { + insert_element (c->cats, c->n_cats, sizeof *c->cats, 0); + totals = &c->cats[0]; + } + else + totals = &c->cats[c->n_cats]; + c->n_cats++; - case CTSF_areaPCT_TOTALN: - { - const struct ctables_area *a = cell->areas[ss->calc_area]; - double a_total = a->total[ss->weighting]; - return a_total ? s->count / a_total * 100 : SYSMIS; - } + *totals = (struct ctables_category) { + .type = CCT_TOTAL, + .total_label = total_label ? total_label : xstrdup (_("Total")), + }; + } - case CTSF_MISSING: - case CTSF_TOTALN: - case CTSF_VALIDN: - return s->count; + struct ctables_category *subtotal = NULL; + for (size_t i = totals_before ? 0 : c->n_cats; + totals_before ? i < c->n_cats : i-- > 0; + totals_before ? i++ : 0) + { + struct ctables_category *cat = &c->cats[i]; + switch (cat->type) + { + case CCT_NUMBER: + case CCT_STRING: + case CCT_NRANGE: + case CCT_SRANGE: + case CCT_MISSING: + case CCT_OTHERNM: + cat->subtotal = subtotal; + break; - case CTSF_MAXIMUM: - return s->max; + case CCT_POSTCOMPUTE: + break; - case CTSF_MINIMUM: - return s->min; + case CCT_SUBTOTAL: + subtotal = cat; + break; - case CTSF_RANGE: - return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS; + case CCT_TOTAL: + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + case CCT_EXCLUDED_MISSING: + break; + } + } - case CTSF_MEAN: - { - double mean; - moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL); - return mean; - } + if (cats_start_ofs != -1) + { + for (size_t i = 0; i < c->n_cats; i++) + { + struct ctables_category *cat = &c->cats[i]; + switch (cat->type) + { + case CCT_POSTCOMPUTE: + cat->parse_format = parse_strings ? common_format->type : FMT_F; + struct msg_location *cats_location + = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs); + bool ok = ctables_recursive_check_postcompute ( + dict, cat->pc->expr, cat, c, cats_location); + msg_location_destroy (cats_location); + if (!ok) + goto error; + break; - case CTSF_SEMEAN: - { - double weight, variance; - moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL); - return calc_semean (variance, weight); - } + case CCT_NUMBER: + case CCT_NRANGE: + for (size_t j = 0; j < n_vars; j++) + if (var_is_alpha (vars[j])) + { + msg_at (SE, cat->location, + _("This category specification may be applied " + "only to numeric variables, but this " + "subcommand tries to apply it to string " + "variable %s."), + var_get_name (vars[j])); + goto error; + } + break; - case CTSF_STDDEV: - { - double variance; - moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); - return variance != SYSMIS ? sqrt (variance) : SYSMIS; - } + case CCT_STRING: + if (parse_strings) + { + double n; + if (!parse_category_string (cat->location, cat->string, dict, + common_format->type, &n)) + goto error; - case CTSF_SUM: - { - double weight, mean; - moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); - return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS; - } + ss_dealloc (&cat->string); - case CTSF_VARIANCE: - { - double variance; - moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); - return variance; - } + cat->type = CCT_NUMBER; + cat->number = n; + } + else if (!all_strings (vars, n_vars, cat)) + goto error; + break; - case CTSF_areaPCT_SUM: - { - double weight, mean; - moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); - if (weight == SYSMIS || mean == SYSMIS) - return SYSMIS; + case CCT_SRANGE: + if (parse_strings) + { + double n[2]; - const struct ctables_area *a = cell->areas[ss->calc_area]; - const struct ctables_sum *sum = &a->sums[ss->sum_var_idx]; - double denom = sum->sum[ss->weighting]; - return denom != 0 ? weight * mean / denom * 100 : SYSMIS; - } + if (!cat->srange[0].string) + n[0] = -DBL_MAX; + else if (!parse_category_string (cat->location, + cat->srange[0], dict, + common_format->type, &n[0])) + goto error; - case CTSF_MEDIAN: - case CTSF_PTILE: - if (s->writer) - { - struct casereader *reader = casewriter_make_reader (s->writer); - s->writer = NULL; + if (!cat->srange[1].string) + n[1] = DBL_MAX; + else if (!parse_category_string (cat->location, + cat->srange[1], dict, + common_format->type, &n[1])) + goto error; - struct percentile *ptile = percentile_create ( - ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid); - struct order_stats *os = &ptile->parent; - order_stats_accumulate_idx (&os, 1, reader, 1, 0); - s->ovalue = percentile_calculate (ptile, PC_HAVERAGE); - statistic_destroy (&ptile->parent.parent); - } - return s->ovalue; + ss_dealloc (&cat->srange[0]); + ss_dealloc (&cat->srange[1]); - case CTSF_MODE: - if (s->writer) - { - struct casereader *reader = casewriter_make_reader (s->writer); - s->writer = NULL; + cat->type = CCT_NRANGE; + cat->nrange[0] = n[0]; + cat->nrange[1] = n[1]; + } + else if (!all_strings (vars, n_vars, cat)) + goto error; + break; - struct mode *mode = mode_create (); - struct order_stats *os = &mode->parent; - order_stats_accumulate_idx (&os, 1, reader, 1, 0); - s->ovalue = mode->mode; - statistic_destroy (&mode->parent.parent); + case CCT_MISSING: + case CCT_OTHERNM: + case CCT_SUBTOTAL: + case CCT_TOTAL: + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + case CCT_EXCLUDED_MISSING: + break; + } } - return s->ovalue; } - NOT_REACHED (); -} + free (vars); + return true; +error: + free (vars); + return false; +} + struct ctables_cell_sort_aux { const struct ctables_nest *nest; @@ -3645,20 +3663,6 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, return cell; } -static bool -is_listwise_missing (const struct ctables_summary_spec_set *specs, - const struct ccase *c) -{ - for (size_t i = 0; i < specs->n_listwise_vars; i++) - { - const struct variable *var = specs->listwise_vars[i]; - if (var_is_num_missing (var, case_num (c, var))) - return true; - } - - return false; -} - static void add_weight (double dst[N_CTWS], const double src[N_CTWS]) { @@ -4205,7 +4209,7 @@ found: ; const struct ctables_table *t = s->table; const struct ctables_nest *specs_nest = s->nests[t->summary_axis]; const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv]; - return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx], + return ctables_summary_value (tc->areas, &tc->summaries[ctx->summary_idx], &specs->specs[ctx->summary_idx]); } @@ -4776,8 +4780,8 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t) double d = (cell->postcompute ? ctables_cell_calculate_postcompute ( s, cell, ss, &format, &is_ctables_format, j) - : ctables_summary_value (cell, &cell->summaries[j], - ss)); + : ctables_summary_value (cell->areas, + &cell->summaries[j], ss)); struct pivot_value *value; if (ct->hide_threshold != 0 -- 2.30.2