From e784ed9c161b6b7aec1402de566e6c09cb2031e0 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 27 Aug 2022 17:23:29 -0700 Subject: [PATCH] refactor --- src/language/stats/ctables.c | 2109 +++++++++++++++++----------------- 1 file changed, 1056 insertions(+), 1053 deletions(-) diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index 0e704be074..0024ac28ef 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -2189,6 +2189,153 @@ ctables_category_format_label (const struct ctables_category *cat, return false; } + +static bool +ctables_recursive_check_postcompute (struct dictionary *dict, + const struct ctables_pcexpr *e, + struct ctables_category *pc_cat, + const struct ctables_categories *cats, + const struct msg_location *cats_location) +{ + switch (e->op) + { + case CTPO_CAT_NUMBER: + case CTPO_CAT_STRING: + case CTPO_CAT_NRANGE: + case CTPO_CAT_SRANGE: + case CTPO_CAT_MISSING: + case CTPO_CAT_OTHERNM: + case CTPO_CAT_SUBTOTAL: + case CTPO_CAT_TOTAL: + { + struct ctables_category *cat = ctables_find_category_for_postcompute ( + dict, cats, pc_cat->parse_format, e); + if (!cat) + { + if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0) + { + size_t n_subtotals = 0; + for (size_t i = 0; i < cats->n_cats; i++) + n_subtotals += cats->cats[i].type == CCT_SUBTOTAL; + if (n_subtotals > 1) + { + msg_at (SE, cats_location, + ngettext ("These categories include %zu instance " + "of SUBTOTAL or HSUBTOTAL, so references " + "from computed categories must refer to " + "subtotals by position, " + "e.g. SUBTOTAL[1].", + "These categories include %zu instances " + "of SUBTOTAL or HSUBTOTAL, so references " + "from computed categories must refer to " + "subtotals by position, " + "e.g. SUBTOTAL[1].", + n_subtotals), + n_subtotals); + msg_at (SN, e->location, + _("This is the reference that lacks a position.")); + return NULL; + } + } + + msg_at (SE, pc_cat->location, + _("Computed category &%s references a category not included " + "in the category list."), + pc_cat->pc->name); + msg_at (SN, e->location, _("This is the missing category.")); + if (e->op == CTPO_CAT_SUBTOTAL) + msg_at (SN, cats_location, + _("To fix the problem, add subtotals to the " + "list of categories here.")); + else if (e->op == CTPO_CAT_TOTAL) + msg (SN, _("To fix the problem, add TOTAL=YES to the variable's " + "CATEGORIES specification.")); + else + msg_at (SN, cats_location, + _("To fix the problem, add the missing category to the " + "list of categories here.")); + return false; + } + if (pc_cat->pc->hide_source_cats) + cat->hide = true; + return true; + } + + case CTPO_CONSTANT: + return true; + + case CTPO_ADD: + case CTPO_SUB: + case CTPO_MUL: + case CTPO_DIV: + case CTPO_POW: + case CTPO_NEG: + for (size_t i = 0; i < 2; i++) + if (e->subs[i] && !ctables_recursive_check_postcompute ( + dict, e->subs[i], pc_cat, cats, cats_location)) + return false; + return true; + } + + NOT_REACHED (); +} + +static struct pivot_value * +ctables_postcompute_label (const struct ctables_categories *cats, + const struct ctables_category *cat, + const struct variable *var) +{ + struct substring in = ss_cstr (cat->pc->label); + struct substring target = ss_cstr (")LABEL["); + + struct string out = DS_EMPTY_INITIALIZER; + for (;;) + { + size_t chunk = ss_find_substring (in, target); + if (chunk == SIZE_MAX) + { + if (ds_is_empty (&out)) + return pivot_value_new_user_text (in.string, in.length); + else + { + ds_put_substring (&out, in); + return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out)); + } + } + + ds_put_substring (&out, ss_head (in, chunk)); + ss_advance (&in, chunk + target.length); + + struct substring idx_s; + if (!ss_get_until (&in, ']', &idx_s)) + goto error; + char *tail; + long int idx = strtol (idx_s.string, &tail, 10); + if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s)) + goto error; + + struct ctables_category *cat2 = &cats->cats[idx - 1]; + if (!ctables_category_format_label (cat2, var, &out)) + goto error; + } + +error: + ds_destroy (&out); + return pivot_value_new_user_text (cat->pc->label, SIZE_MAX); +} + +static struct pivot_value * +ctables_category_create_value_label (const struct ctables_categories *cats, + const struct ctables_category *cat, + const struct variable *var, + const union value *value) +{ + return (cat->type == CCT_POSTCOMPUTE && cat->pc->label + ? ctables_postcompute_label (cats, cat, var) + : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL + ? pivot_value_new_user_text (cat->total_label, SIZE_MAX) + : pivot_value_new_var_value (var, value)); +} /* CTABLES variable nesting and stacking. */ @@ -2702,6 +2849,33 @@ ctables_summary_value (struct ctables_area *areas[N_CTATS], NOT_REACHED (); } +/* CTABLES occurrences. */ + +struct ctables_occurrence + { + struct hmap_node node; + union value value; + }; + +static void +ctables_add_occurrence (const struct variable *var, + const union value *value, + struct hmap *occurrences) +{ + int width = var_get_width (var); + unsigned int hash = value_hash (value, width, 0); + + struct ctables_occurrence *o; + HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash, + occurrences) + if (value_equal (value, &o->value, width)) + return; + + o = xmalloc (sizeof *o); + value_clone (&o->value, value, width); + hmap_insert (occurrences, &o->node, hash); +} + enum ctables_vlabel { CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT, @@ -2740,49 +2914,6 @@ struct ctables_cell union ctables_summary *summaries; }; -struct ctables - { - const struct dictionary *dict; - struct pivot_table_look *look; - - /* For CTEF_* formats. */ - struct fmt_settings ctables_formats; - - /* If this is NULL, zeros are displayed using the normal print format. - Otherwise, this string is displayed. */ - char *zero; - - /* If this is NULL, missing values are displayed using the normal print - format. Otherwise, this string is displayed. */ - char *missing; - - /* Indexed by variable dictionary index. */ - enum ctables_vlabel *vlabels; - - struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */ - - bool mrsets_count_duplicates; /* MRSETS. */ - bool smissing_listwise; /* SMISSING. */ - struct variable *e_weight; /* WEIGHT. */ - int hide_threshold; /* HIDESMALLCOUNTS. */ - - struct ctables_table **tables; - size_t n_tables; - }; - -struct ctables_value - { - struct hmap_node node; - union value value; - int leaf; - }; - -struct ctables_occurrence - { - struct hmap_node node; - union value value; - }; - struct ctables_section { /* Settings. */ @@ -2856,1079 +2987,1042 @@ struct ctables_table struct ctables_pairwise *pairwise; }; -/* Chi-square test (SIGTEST). */ -struct ctables_chisq - { - double alpha; - bool include_mrsets; - bool all_visible; - }; - -/* Pairwise comparison test (COMPARETEST). */ -struct ctables_pairwise +struct ctables_cell_sort_aux { - enum { PROP, MEAN } type; - double alpha[2]; - bool include_mrsets; - bool meansvariance_allcats; - bool all_visible; - enum { BONFERRONI = 1, BH } adjust; - bool merge; - bool apa_style; - bool show_sig; + const struct ctables_nest *nest; + enum pivot_axis_type a; }; +static int +ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_) +{ + const struct ctables_cell_sort_aux *aux = aux_; + struct ctables_cell *const *ap = a_; + struct ctables_cell *const *bp = b_; + const struct ctables_cell *a = *ap; + const struct ctables_cell *b = *bp; + const struct ctables_nest *nest = aux->nest; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + { + const struct variable *var = nest->vars[i]; + const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i]; + const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i]; + if (a_cv->category != b_cv->category) + return a_cv->category > b_cv->category ? 1 : -1; -static bool -parse_col_width (struct lexer *lexer, const char *name, double *width) -{ - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "DEFAULT")) - *width = SYSMIS; - else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX)) - { - *width = lex_number (lexer); - lex_get (lexer); - } - else - return false; + const union value *a_val = &a_cv->value; + const union value *b_val = &b_cv->value; + switch (a_cv->category->type) + { + case CCT_NUMBER: + case CCT_STRING: + case CCT_SUBTOTAL: + case CCT_TOTAL: + case CCT_POSTCOMPUTE: + case CCT_EXCLUDED_MISSING: + /* Must be equal. */ + continue; - return true; -} + case CCT_NRANGE: + case CCT_SRANGE: + case CCT_MISSING: + case CCT_OTHERNM: + { + int cmp = value_compare_3way (a_val, b_val, var_get_width (var)); + if (cmp) + return cmp; + } + break; -static bool -parse_bool (struct lexer *lexer, bool *b) -{ - if (lex_match_id (lexer, "NO")) - *b = false; - else if (lex_match_id (lexer, "YES")) - *b = true; - else - { - lex_error_expecting (lexer, "YES", "NO"); - return false; - } - return true; -} + case CCT_VALUE: + { + int cmp = value_compare_3way (a_val, b_val, var_get_width (var)); + if (cmp) + return a_cv->category->sort_ascending ? cmp : -cmp; + } + break; -static void -ctables_chisq_destroy (struct ctables_chisq *chisq) -{ - free (chisq); -} + case CCT_LABEL: + { + const char *a_label = var_lookup_value_label (var, a_val); + const char *b_label = var_lookup_value_label (var, b_val); + int cmp; + if (a_label) + { + if (!b_label) + return -1; + cmp = strcmp (a_label, b_label); + } + else + { + if (b_label) + return 1; + cmp = value_compare_3way (a_val, b_val, var_get_width (var)); + } + if (cmp) + return a_cv->category->sort_ascending ? cmp : -cmp; + } + break; -static void -ctables_pairwise_destroy (struct ctables_pairwise *pairwise) -{ - free (pairwise); + case CCT_FUNCTION: + NOT_REACHED (); + } + } + return 0; } -static void -ctables_table_destroy (struct ctables_table *t) +static int +ctables_cell_compare_leaf_3way (const void *a_, const void *b_, + const void *aux UNUSED) { - if (!t) - return; - - for (size_t i = 0; i < t->n_sections; i++) - ctables_section_uninit (&t->sections[i]); - free (t->sections); + struct ctables_cell *const *ap = a_; + struct ctables_cell *const *bp = b_; + const struct ctables_cell *a = *ap; + const struct ctables_cell *b = *bp; - for (size_t i = 0; i < t->n_categories; i++) - ctables_categories_unref (t->categories[i]); - free (t->categories); + for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++) + { + int al = a->axes[axis].leaf; + int bl = b->axes[axis].leaf; + if (al != bl) + return al > bl ? 1 : -1; + } + return 0; +} +static struct ctables_area * +ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell, + enum ctables_area_type area) +{ + size_t hash = 0; for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { - ctables_axis_destroy (t->axes[a]); - ctables_stack_uninit (&t->stacks[a]); + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n_areas[area]; i++) + { + size_t v_idx = nest->areas[area][i]; + struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx]; + hash = hash_pointer (cv->category, hash); + if (cv->category->type != CCT_TOTAL + && cv->category->type != CCT_SUBTOTAL + && cv->category->type != CCT_POSTCOMPUTE) + hash = value_hash (&cv->value, + var_get_width (nest->vars[v_idx]), hash); + } } - free (t->summary_specs.specs); - struct ctables_value *ctv, *next_ctv; - HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node, - &t->clabels_values_map) + struct ctables_area *a; + HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area]) { - value_destroy (&ctv->value, var_get_width (t->clabels_example)); - hmap_delete (&t->clabels_values_map, &ctv->node); - free (ctv); + const struct ctables_cell *df = a->example; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n_areas[area]; i++) + { + size_t v_idx = nest->areas[area][i]; + struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx]; + struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx]; + if (cv1->category != cv2->category + || (cv1->category->type != CCT_TOTAL + && cv1->category->type != CCT_SUBTOTAL + && cv1->category->type != CCT_POSTCOMPUTE + && !value_equal (&cv1->value, &cv2->value, + var_get_width (nest->vars[v_idx])))) + goto not_equal; + } + } + return a; + + not_equal: ; } - hmap_destroy (&t->clabels_values_map); - free (t->clabels_values); - free (t->sum_vars); - free (t->caption); - free (t->corner); - free (t->title); - ctables_chisq_destroy (t->chisq); - ctables_pairwise_destroy (t->pairwise); - free (t); + struct ctables_sum *sums = (s->table->n_sum_vars + ? xzalloc (s->table->n_sum_vars * sizeof *sums) + : NULL); + + a = xmalloc (sizeof *a); + *a = (struct ctables_area) { .example = cell, .sums = sums }; + hmap_insert (&s->areas[area], &a->node, hash); + return a; } -static void -ctables_destroy (struct ctables *ct) +static struct ctables_cell * +ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, + const struct ctables_category **cats[PIVOT_N_AXES]) { - if (!ct) - return; + size_t hash = 0; + enum ctables_summary_variant sv = CSV_CELL; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + { + hash = hash_pointer (cats[a][i], hash); + if (cats[a][i]->type != CCT_TOTAL + && cats[a][i]->type != CCT_SUBTOTAL + && cats[a][i]->type != CCT_POSTCOMPUTE) + hash = value_hash (case_data (c, nest->vars[i]), + var_get_width (nest->vars[i]), hash); + else + sv = CSV_TOTAL; + } + } - struct ctables_postcompute *pc, *next_pc; - HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node, - &ct->postcomputes) + struct ctables_cell *cell; + HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells) { - free (pc->name); - msg_location_destroy (pc->location); - ctables_pcexpr_destroy (pc->expr); - free (pc->label); - if (pc->specs) + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { - ctables_summary_spec_set_uninit (pc->specs); - free (pc->specs); + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx + && (cats[a][i] != cell->axes[a].cvs[i].category + || (cats[a][i]->type != CCT_TOTAL + && cats[a][i]->type != CCT_SUBTOTAL + && cats[a][i]->type != CCT_POSTCOMPUTE + && !value_equal (case_data (c, nest->vars[i]), + &cell->axes[a].cvs[i].value, + var_get_width (nest->vars[i]))))) + goto not_equal; } - hmap_delete (&ct->postcomputes, &pc->hmap_node); - free (pc); - } - hmap_destroy (&ct->postcomputes); - fmt_settings_uninit (&ct->ctables_formats); - pivot_table_look_unref (ct->look); - free (ct->zero); - free (ct->missing); - free (ct->vlabels); - for (size_t i = 0; i < ct->n_tables; i++) - ctables_table_destroy (ct->tables[i]); - free (ct->tables); - free (ct); -} + return cell; -static bool -ctables_recursive_check_postcompute (struct dictionary *dict, - const struct ctables_pcexpr *e, - struct ctables_category *pc_cat, - const struct ctables_categories *cats, - const struct msg_location *cats_location) -{ - switch (e->op) - { - case CTPO_CAT_NUMBER: - case CTPO_CAT_STRING: - case CTPO_CAT_NRANGE: - case CTPO_CAT_SRANGE: - case CTPO_CAT_MISSING: - case CTPO_CAT_OTHERNM: - case CTPO_CAT_SUBTOTAL: - case CTPO_CAT_TOTAL: - { - struct ctables_category *cat = ctables_find_category_for_postcompute ( - dict, cats, pc_cat->parse_format, e); - if (!cat) - { - if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0) - { - size_t n_subtotals = 0; - for (size_t i = 0; i < cats->n_cats; i++) - n_subtotals += cats->cats[i].type == CCT_SUBTOTAL; - if (n_subtotals > 1) - { - msg_at (SE, cats_location, - ngettext ("These categories include %zu instance " - "of SUBTOTAL or HSUBTOTAL, so references " - "from computed categories must refer to " - "subtotals by position, " - "e.g. SUBTOTAL[1].", - "These categories include %zu instances " - "of SUBTOTAL or HSUBTOTAL, so references " - "from computed categories must refer to " - "subtotals by position, " - "e.g. SUBTOTAL[1].", - n_subtotals), - n_subtotals); - msg_at (SN, e->location, - _("This is the reference that lacks a position.")); - return NULL; - } - } + not_equal: ; + } - msg_at (SE, pc_cat->location, - _("Computed category &%s references a category not included " - "in the category list."), - pc_cat->pc->name); - msg_at (SN, e->location, _("This is the missing category.")); - if (e->op == CTPO_CAT_SUBTOTAL) - msg_at (SN, cats_location, - _("To fix the problem, add subtotals to the " - "list of categories here.")); - else if (e->op == CTPO_CAT_TOTAL) - msg (SN, _("To fix the problem, add TOTAL=YES to the variable's " - "CATEGORIES specification.")); - else - msg_at (SN, cats_location, - _("To fix the problem, add the missing category to the " - "list of categories here.")); - return false; - } - if (pc_cat->pc->hide_source_cats) - cat->hide = true; - return true; - } + cell = xmalloc (sizeof *cell); + cell->hide = false; + cell->sv = sv; + cell->omit_areas = 0; + cell->postcompute = false; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct ctables_nest *nest = s->nests[a]; + cell->axes[a].cvs = (nest->n + ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs) + : NULL); + for (size_t i = 0; i < nest->n; i++) + { + const struct ctables_category *cat = cats[a][i]; + const struct variable *var = nest->vars[i]; + const union value *value = case_data (c, var); + if (i != nest->scale_idx) + { + const struct ctables_category *subtotal = cat->subtotal; + if (cat->hide || (subtotal && subtotal->hide_subcategories)) + cell->hide = true; - case CTPO_CONSTANT: - return true; + if (cat->type == CCT_TOTAL + || cat->type == CCT_SUBTOTAL + || cat->type == CCT_POSTCOMPUTE) + { + switch (a) + { + case PIVOT_AXIS_COLUMN: + cell->omit_areas |= ((1u << CTAT_TABLE) | + (1u << CTAT_LAYER) | + (1u << CTAT_LAYERCOL) | + (1u << CTAT_SUBTABLE) | + (1u << CTAT_COL)); + break; + case PIVOT_AXIS_ROW: + cell->omit_areas |= ((1u << CTAT_TABLE) | + (1u << CTAT_LAYER) | + (1u << CTAT_LAYERROW) | + (1u << CTAT_SUBTABLE) | + (1u << CTAT_ROW)); + break; + case PIVOT_AXIS_LAYER: + cell->omit_areas |= ((1u << CTAT_TABLE) | + (1u << CTAT_LAYER)); + break; + } + } + if (cat->type == CCT_POSTCOMPUTE) + cell->postcompute = true; + } - case CTPO_ADD: - case CTPO_SUB: - case CTPO_MUL: - case CTPO_DIV: - case CTPO_POW: - case CTPO_NEG: - for (size_t i = 0; i < 2; i++) - if (e->subs[i] && !ctables_recursive_check_postcompute ( - dict, e->subs[i], pc_cat, cats, cats_location)) - return false; - return true; + cell->axes[a].cvs[i].category = cat; + value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var)); + } } - NOT_REACHED (); + const struct ctables_nest *ss = s->nests[s->table->summary_axis]; + const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; + cell->summaries = xmalloc (specs->n * sizeof *cell->summaries); + for (size_t i = 0; i < specs->n; i++) + ctables_summary_init (&cell->summaries[i], &specs->specs[i]); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + cell->areas[at] = ctables_area_insert (s, cell, at); + hmap_insert (&s->cells, &cell->node, hash); + return cell; } -static bool -all_strings (struct variable **vars, size_t n_vars, - const struct ctables_category *cat) +static void +add_weight (double dst[N_CTWS], const double src[N_CTWS]) { - for (size_t j = 0; j < n_vars; j++) - if (var_is_numeric (vars[j])) - { - msg_at (SE, cat->location, - _("This category specification may be applied only to string " - "variables, but this subcommand tries to apply it to " - "numeric variable %s."), - var_get_name (vars[j])); - return false; - } - return true; + for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) + dst[wt] += src[wt]; } -static bool -ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, - struct ctables *ct, struct ctables_table *t) +static void +ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, + const struct ctables_category **cats[PIVOT_N_AXES], + bool is_included, double weight[N_CTWS]) { - if (!lex_match_id (lexer, "VARIABLES")) - return false; - lex_match (lexer, T_EQUALS); + struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats); + const struct ctables_nest *ss = s->nests[s->table->summary_axis]; - struct variable **vars; - size_t n_vars; - if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH)) - return false; + const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; + const union value *value = case_data (c, specs->var); + bool is_missing = var_is_value_missing (specs->var, value); + bool is_scale_missing + = is_missing || (specs->is_scale && is_listwise_missing (specs, c)); - const struct fmt_spec *common_format = var_get_print_format (vars[0]); - for (size_t i = 1; i < n_vars; i++) - { - const struct fmt_spec *f = var_get_print_format (vars[i]); - if (f->type != common_format->type) - { - common_format = NULL; - break; - } - } - bool parse_strings - = (common_format - && (fmt_get_category (common_format->type) - & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT))); + for (size_t i = 0; i < specs->n; i++) + ctables_summary_add (&cell->summaries[i], &specs->specs[i], value, + is_scale_missing, is_included, + weight[specs->specs[i].weighting]); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + if (!(cell->omit_areas && (1u << at))) + { + struct ctables_area *a = cell->areas[at]; - struct ctables_categories *c = xmalloc (sizeof *c); - *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true }; - for (size_t i = 0; i < n_vars; i++) - { - struct ctables_categories **cp - = &t->categories[var_get_dict_index (vars[i])]; - ctables_categories_unref (*cp); - *cp = c; - } + add_weight (a->total, weight); + if (is_included) + add_weight (a->count, weight); + if (!is_missing) + { + add_weight (a->valid, weight); - size_t allocated_cats = 0; - int cats_start_ofs = -1; - int cats_end_ofs = -1; - if (lex_match (lexer, T_LBRACK)) + if (!is_scale_missing) + for (size_t i = 0; i < s->table->n_sum_vars; i++) + { + const struct variable *var = s->table->sum_vars[i]; + double addend = case_num (c, var); + if (!var_is_num_missing (var, addend)) + for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) + a->sums[i].sum[wt] += addend * weight[wt]; + } + } + } +} + +static void +recurse_totals (struct ctables_section *s, const struct ccase *c, + const struct ctables_category **cats[PIVOT_N_AXES], + bool is_included, double weight[N_CTWS], + enum pivot_axis_type start_axis, size_t start_nest) +{ + for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) { - cats_start_ofs = lex_ofs (lexer); - do + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = start_nest; i < nest->n; i++) { - if (c->n_cats >= allocated_cats) - c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); + if (i == nest->scale_idx) + continue; - int start_ofs = lex_ofs (lexer); - struct ctables_category *cat = &c->cats[c->n_cats]; - if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat)) - goto error; - cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1); - c->n_cats++; + const struct variable *var = nest->vars[i]; - lex_match (lexer, T_COMMA); + const struct ctables_category *total = ctables_categories_total ( + s->table->categories[var_get_dict_index (var)]); + if (total) + { + const struct ctables_category *save = cats[a][i]; + cats[a][i] = total; + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_totals (s, c, cats, is_included, weight, a, i + 1); + cats[a][i] = save; + } } - while (!lex_match (lexer, T_RBRACK)); - cats_end_ofs = lex_ofs (lexer) - 1; + start_nest = 0; } +} - struct ctables_category cat = { - .type = CCT_VALUE, - .include_missing = false, - .sort_ascending = true, - }; - bool show_totals = false; - char *total_label = NULL; - bool totals_before = false; - while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD) +static void +recurse_subtotals (struct ctables_section *s, const struct ccase *c, + const struct ctables_category **cats[PIVOT_N_AXES], + bool is_included, double weight[N_CTWS], + enum pivot_axis_type start_axis, size_t start_nest) +{ + for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) { - if (!c->n_cats && lex_match_id (lexer, "ORDER")) - { - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "A")) - cat.sort_ascending = true; - else if (lex_match_id (lexer, "D")) - cat.sort_ascending = false; - else - { - lex_error_expecting (lexer, "A", "D"); - goto error; - } - } - else if (!c->n_cats && lex_match_id (lexer, "KEY")) + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = start_nest; i < nest->n; i++) { - int start_ofs = lex_ofs (lexer) - 1; - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "VALUE")) - cat.type = CCT_VALUE; - else if (lex_match_id (lexer, "LABEL")) - cat.type = CCT_LABEL; - else - { - cat.type = CCT_FUNCTION; - if (!parse_ctables_summary_function (lexer, &cat.sort_function, - &cat.weighting, &cat.area)) - goto error; - - if (lex_match (lexer, T_LPAREN)) - { - cat.sort_var = parse_variable (lexer, dict); - if (!cat.sort_var) - goto error; - - if (cat.sort_function == CTSF_PTILE) - { - lex_match (lexer, T_COMMA); - if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100)) - goto error; - cat.percentile = lex_number (lexer); - lex_get (lexer); - } - - if (!lex_force_match (lexer, T_RPAREN)) - goto error; - } - else if (ctables_function_availability (cat.sort_function) - == CTFA_SCALE) - { - bool UNUSED b = lex_force_match (lexer, T_LPAREN); - goto error; - } + if (i == nest->scale_idx) + continue; - lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1, - _("Data-dependent sorting is not implemented.")); - goto error; - } - } - else if (!c->n_cats && lex_match_id (lexer, "MISSING")) - { - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "INCLUDE")) - cat.include_missing = true; - else if (lex_match_id (lexer, "EXCLUDE")) - cat.include_missing = false; - else - { - lex_error_expecting (lexer, "INCLUDE", "EXCLUDE"); - goto error; - } - } - else if (lex_match_id (lexer, "TOTAL")) - { - lex_match (lexer, T_EQUALS); - if (!parse_bool (lexer, &show_totals)) - goto error; - } - else if (lex_match_id (lexer, "LABEL")) - { - lex_match (lexer, T_EQUALS); - if (!lex_force_string (lexer)) - goto error; - free (total_label); - total_label = ss_xstrdup (lex_tokss (lexer)); - lex_get (lexer); - } - else if (lex_match_id (lexer, "POSITION")) - { - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "BEFORE")) - totals_before = true; - else if (lex_match_id (lexer, "AFTER")) - totals_before = false; - else - { - lex_error_expecting (lexer, "BEFORE", "AFTER"); - goto error; - } - } - else if (lex_match_id (lexer, "EMPTY")) - { - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "INCLUDE")) - c->show_empty = true; - else if (lex_match_id (lexer, "EXCLUDE")) - c->show_empty = false; - else + const struct ctables_category *save = cats[a][i]; + if (save->subtotal) { - lex_error_expecting (lexer, "INCLUDE", "EXCLUDE"); - goto error; + cats[a][i] = save->subtotal; + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_subtotals (s, c, cats, is_included, weight, a, i + 1); + cats[a][i] = save; } } - else - { - if (!c->n_cats) - lex_error_expecting (lexer, "ORDER", "KEY", "MISSING", - "TOTAL", "LABEL", "POSITION", "EMPTY"); - else - lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY"); - goto error; - } - } - - if (!c->n_cats) - { - if (c->n_cats >= allocated_cats) - c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); - c->cats[c->n_cats++] = cat; + start_nest = 0; } +} - if (show_totals) +static void +ctables_cell_insert (struct ctables_section *s, const struct ccase *c, + double weight[N_CTWS]) +{ + const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n]; + const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n]; + const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n]; + const struct ctables_category **cats[PIVOT_N_AXES] = { - if (c->n_cats >= allocated_cats) - c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); - - struct ctables_category *totals; - if (totals_before) - { - insert_element (c->cats, c->n_cats, sizeof *c->cats, 0); - totals = &c->cats[0]; - } - else - totals = &c->cats[c->n_cats]; - c->n_cats++; + [PIVOT_AXIS_LAYER] = layer_cats, + [PIVOT_AXIS_ROW] = row_cats, + [PIVOT_AXIS_COLUMN] = column_cats, + }; - *totals = (struct ctables_category) { - .type = CCT_TOTAL, - .total_label = total_label ? total_label : xstrdup (_("Total")), - }; - } + bool is_included = true; - struct ctables_category *subtotal = NULL; - for (size_t i = totals_before ? 0 : c->n_cats; - totals_before ? i < c->n_cats : i-- > 0; - totals_before ? i++ : 0) + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { - struct ctables_category *cat = &c->cats[i]; - switch (cat->type) - { - case CCT_NUMBER: - case CCT_STRING: - case CCT_NRANGE: - case CCT_SRANGE: - case CCT_MISSING: - case CCT_OTHERNM: - cat->subtotal = subtotal; - break; + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + { + const struct variable *var = nest->vars[i]; + const union value *value = case_data (c, var); - case CCT_POSTCOMPUTE: - break; + cats[a][i] = ctables_categories_match ( + s->table->categories[var_get_dict_index (var)], value, var); + if (!cats[a][i]) + { + if (i != nest->summary_idx) + return; - case CCT_SUBTOTAL: - subtotal = cat; - break; + if (!var_is_value_missing (var, value)) + return; - case CCT_TOTAL: - case CCT_VALUE: - case CCT_LABEL: - case CCT_FUNCTION: - case CCT_EXCLUDED_MISSING: - break; + static const struct ctables_category cct_excluded_missing = { + .type = CCT_EXCLUDED_MISSING, + .hide = true, + }; + cats[a][i] = &cct_excluded_missing; + is_included = false; + } } } - if (cats_start_ofs != -1) - { - for (size_t i = 0; i < c->n_cats; i++) - { - struct ctables_category *cat = &c->cats[i]; - switch (cat->type) + if (is_included) + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) { - case CCT_POSTCOMPUTE: - cat->parse_format = parse_strings ? common_format->type : FMT_F; - struct msg_location *cats_location - = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs); - bool ok = ctables_recursive_check_postcompute ( - dict, cat->pc->expr, cat, c, cats_location); - msg_location_destroy (cats_location); - if (!ok) - goto error; - break; + const struct variable *var = nest->vars[i]; + const union value *value = case_data (c, var); + ctables_add_occurrence (var, value, &s->occurrences[a][i]); + } + } - case CCT_NUMBER: - case CCT_NRANGE: - for (size_t j = 0; j < n_vars; j++) - if (var_is_alpha (vars[j])) - { - msg_at (SE, cat->location, - _("This category specification may be applied " - "only to numeric variables, but this " - "subcommand tries to apply it to string " - "variable %s."), - var_get_name (vars[j])); - goto error; - } - break; + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_totals (s, c, cats, is_included, weight, 0, 0); + recurse_subtotals (s, c, cats, is_included, weight, 0, 0); +} + +struct ctables_value + { + struct hmap_node node; + union value value; + int leaf; + }; - case CCT_STRING: - if (parse_strings) - { - double n; - if (!parse_category_string (cat->location, cat->string, dict, - common_format->type, &n)) - goto error; +static struct ctables_value * +ctables_value_find__ (struct ctables_table *t, const union value *value, + int width, unsigned int hash) +{ + struct ctables_value *clv; + HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node, + hash, &t->clabels_values_map) + if (value_equal (value, &clv->value, width)) + return clv; + return NULL; +} - ss_dealloc (&cat->string); +static void +ctables_value_insert (struct ctables_table *t, const union value *value, + int width) +{ + unsigned int hash = value_hash (value, width, 0); + struct ctables_value *clv = ctables_value_find__ (t, value, width, hash); + if (!clv) + { + clv = xmalloc (sizeof *clv); + value_clone (&clv->value, value, width); + hmap_insert (&t->clabels_values_map, &clv->node, hash); + } +} - cat->type = CCT_NUMBER; - cat->number = n; - } - else if (!all_strings (vars, n_vars, cat)) - goto error; - break; +static struct ctables_value * +ctables_value_find (struct ctables_table *t, + const union value *value, int width) +{ + return ctables_value_find__ (t, value, width, + value_hash (value, width, 0)); +} + +struct ctables + { + const struct dictionary *dict; + struct pivot_table_look *look; - case CCT_SRANGE: - if (parse_strings) - { - double n[2]; + /* For CTEF_* formats. */ + struct fmt_settings ctables_formats; - if (!cat->srange[0].string) - n[0] = -DBL_MAX; - else if (!parse_category_string (cat->location, - cat->srange[0], dict, - common_format->type, &n[0])) - goto error; + /* If this is NULL, zeros are displayed using the normal print format. + Otherwise, this string is displayed. */ + char *zero; - if (!cat->srange[1].string) - n[1] = DBL_MAX; - else if (!parse_category_string (cat->location, - cat->srange[1], dict, - common_format->type, &n[1])) - goto error; + /* If this is NULL, missing values are displayed using the normal print + format. Otherwise, this string is displayed. */ + char *missing; - ss_dealloc (&cat->srange[0]); - ss_dealloc (&cat->srange[1]); + /* Indexed by variable dictionary index. */ + enum ctables_vlabel *vlabels; - cat->type = CCT_NRANGE; - cat->nrange[0] = n[0]; - cat->nrange[1] = n[1]; - } - else if (!all_strings (vars, n_vars, cat)) - goto error; - break; + struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */ - case CCT_MISSING: - case CCT_OTHERNM: - case CCT_SUBTOTAL: - case CCT_TOTAL: - case CCT_VALUE: - case CCT_LABEL: - case CCT_FUNCTION: - case CCT_EXCLUDED_MISSING: - break; - } - } - } + bool mrsets_count_duplicates; /* MRSETS. */ + bool smissing_listwise; /* SMISSING. */ + struct variable *e_weight; /* WEIGHT. */ + int hide_threshold; /* HIDESMALLCOUNTS. */ - free (vars); - return true; + struct ctables_table **tables; + size_t n_tables; + }; -error: - free (vars); - return false; -} - -struct ctables_cell_sort_aux +/* Chi-square test (SIGTEST). */ +struct ctables_chisq { - const struct ctables_nest *nest; - enum pivot_axis_type a; + double alpha; + bool include_mrsets; + bool all_visible; }; -static int -ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_) -{ - const struct ctables_cell_sort_aux *aux = aux_; - struct ctables_cell *const *ap = a_; - struct ctables_cell *const *bp = b_; - const struct ctables_cell *a = *ap; - const struct ctables_cell *b = *bp; - - const struct ctables_nest *nest = aux->nest; - for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx) - { - const struct variable *var = nest->vars[i]; - const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i]; - const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i]; - if (a_cv->category != b_cv->category) - return a_cv->category > b_cv->category ? 1 : -1; +/* Pairwise comparison test (COMPARETEST). */ +struct ctables_pairwise + { + enum { PROP, MEAN } type; + double alpha[2]; + bool include_mrsets; + bool meansvariance_allcats; + bool all_visible; + enum { BONFERRONI = 1, BH } adjust; + bool merge; + bool apa_style; + bool show_sig; + }; - const union value *a_val = &a_cv->value; - const union value *b_val = &b_cv->value; - switch (a_cv->category->type) - { - case CCT_NUMBER: - case CCT_STRING: - case CCT_SUBTOTAL: - case CCT_TOTAL: - case CCT_POSTCOMPUTE: - case CCT_EXCLUDED_MISSING: - /* Must be equal. */ - continue; - case CCT_NRANGE: - case CCT_SRANGE: - case CCT_MISSING: - case CCT_OTHERNM: - { - int cmp = value_compare_3way (a_val, b_val, var_get_width (var)); - if (cmp) - return cmp; - } - break; - case CCT_VALUE: - { - int cmp = value_compare_3way (a_val, b_val, var_get_width (var)); - if (cmp) - return a_cv->category->sort_ascending ? cmp : -cmp; - } - break; +static bool +parse_col_width (struct lexer *lexer, const char *name, double *width) +{ + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "DEFAULT")) + *width = SYSMIS; + else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX)) + { + *width = lex_number (lexer); + lex_get (lexer); + } + else + return false; - case CCT_LABEL: - { - const char *a_label = var_lookup_value_label (var, a_val); - const char *b_label = var_lookup_value_label (var, b_val); - int cmp; - if (a_label) - { - if (!b_label) - return -1; - cmp = strcmp (a_label, b_label); - } - else - { - if (b_label) - return 1; - cmp = value_compare_3way (a_val, b_val, var_get_width (var)); - } - if (cmp) - return a_cv->category->sort_ascending ? cmp : -cmp; - } - break; + return true; +} - case CCT_FUNCTION: - NOT_REACHED (); - } - } - return 0; +static bool +parse_bool (struct lexer *lexer, bool *b) +{ + if (lex_match_id (lexer, "NO")) + *b = false; + else if (lex_match_id (lexer, "YES")) + *b = true; + else + { + lex_error_expecting (lexer, "YES", "NO"); + return false; + } + return true; } -static int -ctables_cell_compare_leaf_3way (const void *a_, const void *b_, - const void *aux UNUSED) +static void +ctables_chisq_destroy (struct ctables_chisq *chisq) { - struct ctables_cell *const *ap = a_; - struct ctables_cell *const *bp = b_; - const struct ctables_cell *a = *ap; - const struct ctables_cell *b = *bp; + free (chisq); +} - for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++) - { - int al = a->axes[axis].leaf; - int bl = b->axes[axis].leaf; - if (al != bl) - return al > bl ? 1 : -1; - } - return 0; +static void +ctables_pairwise_destroy (struct ctables_pairwise *pairwise) +{ + free (pairwise); } -static struct ctables_area * -ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell, - enum ctables_area_type area) +static void +ctables_table_destroy (struct ctables_table *t) { - size_t hash = 0; + if (!t) + return; + + for (size_t i = 0; i < t->n_sections; i++) + ctables_section_uninit (&t->sections[i]); + free (t->sections); + + for (size_t i = 0; i < t->n_categories; i++) + ctables_categories_unref (t->categories[i]); + free (t->categories); + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n_areas[area]; i++) - { - size_t v_idx = nest->areas[area][i]; - struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx]; - hash = hash_pointer (cv->category, hash); - if (cv->category->type != CCT_TOTAL - && cv->category->type != CCT_SUBTOTAL - && cv->category->type != CCT_POSTCOMPUTE) - hash = value_hash (&cv->value, - var_get_width (nest->vars[v_idx]), hash); - } + ctables_axis_destroy (t->axes[a]); + ctables_stack_uninit (&t->stacks[a]); } + free (t->summary_specs.specs); - struct ctables_area *a; - HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area]) + struct ctables_value *ctv, *next_ctv; + HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node, + &t->clabels_values_map) { - const struct ctables_cell *df = a->example; - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + value_destroy (&ctv->value, var_get_width (t->clabels_example)); + hmap_delete (&t->clabels_values_map, &ctv->node); + free (ctv); + } + hmap_destroy (&t->clabels_values_map); + free (t->clabels_values); + + free (t->sum_vars); + free (t->caption); + free (t->corner); + free (t->title); + ctables_chisq_destroy (t->chisq); + ctables_pairwise_destroy (t->pairwise); + free (t); +} + +static void +ctables_destroy (struct ctables *ct) +{ + if (!ct) + return; + + struct ctables_postcompute *pc, *next_pc; + HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node, + &ct->postcomputes) + { + free (pc->name); + msg_location_destroy (pc->location); + ctables_pcexpr_destroy (pc->expr); + free (pc->label); + if (pc->specs) { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n_areas[area]; i++) - { - size_t v_idx = nest->areas[area][i]; - struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx]; - struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx]; - if (cv1->category != cv2->category - || (cv1->category->type != CCT_TOTAL - && cv1->category->type != CCT_SUBTOTAL - && cv1->category->type != CCT_POSTCOMPUTE - && !value_equal (&cv1->value, &cv2->value, - var_get_width (nest->vars[v_idx])))) - goto not_equal; - } + ctables_summary_spec_set_uninit (pc->specs); + free (pc->specs); } - return a; - - not_equal: ; + hmap_delete (&ct->postcomputes, &pc->hmap_node); + free (pc); } + hmap_destroy (&ct->postcomputes); - struct ctables_sum *sums = (s->table->n_sum_vars - ? xzalloc (s->table->n_sum_vars * sizeof *sums) - : NULL); + fmt_settings_uninit (&ct->ctables_formats); + pivot_table_look_unref (ct->look); + free (ct->zero); + free (ct->missing); + free (ct->vlabels); + for (size_t i = 0; i < ct->n_tables; i++) + ctables_table_destroy (ct->tables[i]); + free (ct->tables); + free (ct); +} - a = xmalloc (sizeof *a); - *a = (struct ctables_area) { .example = cell, .sums = sums }; - hmap_insert (&s->areas[area], &a->node, hash); - return a; +static bool +all_strings (struct variable **vars, size_t n_vars, + const struct ctables_category *cat) +{ + for (size_t j = 0; j < n_vars; j++) + if (var_is_numeric (vars[j])) + { + msg_at (SE, cat->location, + _("This category specification may be applied only to string " + "variables, but this subcommand tries to apply it to " + "numeric variable %s."), + var_get_name (vars[j])); + return false; + } + return true; } -static struct ctables_cell * -ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, - const struct ctables_category **cats[PIVOT_N_AXES]) +static bool +ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, + struct ctables *ct, struct ctables_table *t) { - size_t hash = 0; - enum ctables_summary_variant sv = CSV_CELL; - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx) - { - hash = hash_pointer (cats[a][i], hash); - if (cats[a][i]->type != CCT_TOTAL - && cats[a][i]->type != CCT_SUBTOTAL - && cats[a][i]->type != CCT_POSTCOMPUTE) - hash = value_hash (case_data (c, nest->vars[i]), - var_get_width (nest->vars[i]), hash); - else - sv = CSV_TOTAL; - } - } + if (!lex_match_id (lexer, "VARIABLES")) + return false; + lex_match (lexer, T_EQUALS); - struct ctables_cell *cell; - HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells) + struct variable **vars; + size_t n_vars; + if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH)) + return false; + + const struct fmt_spec *common_format = var_get_print_format (vars[0]); + for (size_t i = 1; i < n_vars; i++) { - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + const struct fmt_spec *f = var_get_print_format (vars[i]); + if (f->type != common_format->type) { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx - && (cats[a][i] != cell->axes[a].cvs[i].category - || (cats[a][i]->type != CCT_TOTAL - && cats[a][i]->type != CCT_SUBTOTAL - && cats[a][i]->type != CCT_POSTCOMPUTE - && !value_equal (case_data (c, nest->vars[i]), - &cell->axes[a].cvs[i].value, - var_get_width (nest->vars[i]))))) - goto not_equal; + common_format = NULL; + break; } + } + bool parse_strings + = (common_format + && (fmt_get_category (common_format->type) + & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT))); - return cell; - - not_equal: ; + struct ctables_categories *c = xmalloc (sizeof *c); + *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true }; + for (size_t i = 0; i < n_vars; i++) + { + struct ctables_categories **cp + = &t->categories[var_get_dict_index (vars[i])]; + ctables_categories_unref (*cp); + *cp = c; } - cell = xmalloc (sizeof *cell); - cell->hide = false; - cell->sv = sv; - cell->omit_areas = 0; - cell->postcompute = false; - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + size_t allocated_cats = 0; + int cats_start_ofs = -1; + int cats_end_ofs = -1; + if (lex_match (lexer, T_LBRACK)) { - const struct ctables_nest *nest = s->nests[a]; - cell->axes[a].cvs = (nest->n - ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs) - : NULL); - for (size_t i = 0; i < nest->n; i++) + cats_start_ofs = lex_ofs (lexer); + do { - const struct ctables_category *cat = cats[a][i]; - const struct variable *var = nest->vars[i]; - const union value *value = case_data (c, var); - if (i != nest->scale_idx) - { - const struct ctables_category *subtotal = cat->subtotal; - if (cat->hide || (subtotal && subtotal->hide_subcategories)) - cell->hide = true; + if (c->n_cats >= allocated_cats) + c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); - if (cat->type == CCT_TOTAL - || cat->type == CCT_SUBTOTAL - || cat->type == CCT_POSTCOMPUTE) - { - switch (a) - { - case PIVOT_AXIS_COLUMN: - cell->omit_areas |= ((1u << CTAT_TABLE) | - (1u << CTAT_LAYER) | - (1u << CTAT_LAYERCOL) | - (1u << CTAT_SUBTABLE) | - (1u << CTAT_COL)); - break; - case PIVOT_AXIS_ROW: - cell->omit_areas |= ((1u << CTAT_TABLE) | - (1u << CTAT_LAYER) | - (1u << CTAT_LAYERROW) | - (1u << CTAT_SUBTABLE) | - (1u << CTAT_ROW)); - break; - case PIVOT_AXIS_LAYER: - cell->omit_areas |= ((1u << CTAT_TABLE) | - (1u << CTAT_LAYER)); - break; - } - } - if (cat->type == CCT_POSTCOMPUTE) - cell->postcompute = true; - } + int start_ofs = lex_ofs (lexer); + struct ctables_category *cat = &c->cats[c->n_cats]; + if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat)) + goto error; + cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1); + c->n_cats++; - cell->axes[a].cvs[i].category = cat; - value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var)); + lex_match (lexer, T_COMMA); } + while (!lex_match (lexer, T_RBRACK)); + cats_end_ofs = lex_ofs (lexer) - 1; } - const struct ctables_nest *ss = s->nests[s->table->summary_axis]; - const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; - cell->summaries = xmalloc (specs->n * sizeof *cell->summaries); - for (size_t i = 0; i < specs->n; i++) - ctables_summary_init (&cell->summaries[i], &specs->specs[i]); - for (enum ctables_area_type at = 0; at < N_CTATS; at++) - cell->areas[at] = ctables_area_insert (s, cell, at); - hmap_insert (&s->cells, &cell->node, hash); - return cell; -} - -static void -add_weight (double dst[N_CTWS], const double src[N_CTWS]) -{ - for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) - dst[wt] += src[wt]; -} - -static void -ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, - const struct ctables_category **cats[PIVOT_N_AXES], - bool is_included, double weight[N_CTWS]) -{ - struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats); - const struct ctables_nest *ss = s->nests[s->table->summary_axis]; - - const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; - const union value *value = case_data (c, specs->var); - bool is_missing = var_is_value_missing (specs->var, value); - bool is_scale_missing - = is_missing || (specs->is_scale && is_listwise_missing (specs, c)); + struct ctables_category cat = { + .type = CCT_VALUE, + .include_missing = false, + .sort_ascending = true, + }; + bool show_totals = false; + char *total_label = NULL; + bool totals_before = false; + while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD) + { + if (!c->n_cats && lex_match_id (lexer, "ORDER")) + { + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "A")) + cat.sort_ascending = true; + else if (lex_match_id (lexer, "D")) + cat.sort_ascending = false; + else + { + lex_error_expecting (lexer, "A", "D"); + goto error; + } + } + else if (!c->n_cats && lex_match_id (lexer, "KEY")) + { + int start_ofs = lex_ofs (lexer) - 1; + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "VALUE")) + cat.type = CCT_VALUE; + else if (lex_match_id (lexer, "LABEL")) + cat.type = CCT_LABEL; + else + { + cat.type = CCT_FUNCTION; + if (!parse_ctables_summary_function (lexer, &cat.sort_function, + &cat.weighting, &cat.area)) + goto error; - for (size_t i = 0; i < specs->n; i++) - ctables_summary_add (&cell->summaries[i], &specs->specs[i], value, - is_scale_missing, is_included, - weight[specs->specs[i].weighting]); - for (enum ctables_area_type at = 0; at < N_CTATS; at++) - if (!(cell->omit_areas && (1u << at))) - { - struct ctables_area *a = cell->areas[at]; + if (lex_match (lexer, T_LPAREN)) + { + cat.sort_var = parse_variable (lexer, dict); + if (!cat.sort_var) + goto error; - add_weight (a->total, weight); - if (is_included) - add_weight (a->count, weight); - if (!is_missing) - { - add_weight (a->valid, weight); + if (cat.sort_function == CTSF_PTILE) + { + lex_match (lexer, T_COMMA); + if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100)) + goto error; + cat.percentile = lex_number (lexer); + lex_get (lexer); + } - if (!is_scale_missing) - for (size_t i = 0; i < s->table->n_sum_vars; i++) + if (!lex_force_match (lexer, T_RPAREN)) + goto error; + } + else if (ctables_function_availability (cat.sort_function) + == CTFA_SCALE) { - const struct variable *var = s->table->sum_vars[i]; - double addend = case_num (c, var); - if (!var_is_num_missing (var, addend)) - for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) - a->sums[i].sum[wt] += addend * weight[wt]; + bool UNUSED b = lex_force_match (lexer, T_LPAREN); + goto error; } - } - } -} -static void -recurse_totals (struct ctables_section *s, const struct ccase *c, - const struct ctables_category **cats[PIVOT_N_AXES], - bool is_included, double weight[N_CTWS], - enum pivot_axis_type start_axis, size_t start_nest) -{ - for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) - { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = start_nest; i < nest->n; i++) + lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1, + _("Data-dependent sorting is not implemented.")); + goto error; + } + } + else if (!c->n_cats && lex_match_id (lexer, "MISSING")) { - if (i == nest->scale_idx) - continue; - - const struct variable *var = nest->vars[i]; - - const struct ctables_category *total = ctables_categories_total ( - s->table->categories[var_get_dict_index (var)]); - if (total) + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "INCLUDE")) + cat.include_missing = true; + else if (lex_match_id (lexer, "EXCLUDE")) + cat.include_missing = false; + else { - const struct ctables_category *save = cats[a][i]; - cats[a][i] = total; - ctables_cell_add__ (s, c, cats, is_included, weight); - recurse_totals (s, c, cats, is_included, weight, a, i + 1); - cats[a][i] = save; + lex_error_expecting (lexer, "INCLUDE", "EXCLUDE"); + goto error; } } - start_nest = 0; + else if (lex_match_id (lexer, "TOTAL")) + { + lex_match (lexer, T_EQUALS); + if (!parse_bool (lexer, &show_totals)) + goto error; + } + else if (lex_match_id (lexer, "LABEL")) + { + lex_match (lexer, T_EQUALS); + if (!lex_force_string (lexer)) + goto error; + free (total_label); + total_label = ss_xstrdup (lex_tokss (lexer)); + lex_get (lexer); + } + else if (lex_match_id (lexer, "POSITION")) + { + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "BEFORE")) + totals_before = true; + else if (lex_match_id (lexer, "AFTER")) + totals_before = false; + else + { + lex_error_expecting (lexer, "BEFORE", "AFTER"); + goto error; + } + } + else if (lex_match_id (lexer, "EMPTY")) + { + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "INCLUDE")) + c->show_empty = true; + else if (lex_match_id (lexer, "EXCLUDE")) + c->show_empty = false; + else + { + lex_error_expecting (lexer, "INCLUDE", "EXCLUDE"); + goto error; + } + } + else + { + if (!c->n_cats) + lex_error_expecting (lexer, "ORDER", "KEY", "MISSING", + "TOTAL", "LABEL", "POSITION", "EMPTY"); + else + lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY"); + goto error; + } } -} -static void -recurse_subtotals (struct ctables_section *s, const struct ccase *c, - const struct ctables_category **cats[PIVOT_N_AXES], - bool is_included, double weight[N_CTWS], - enum pivot_axis_type start_axis, size_t start_nest) -{ - for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) + if (!c->n_cats) { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = start_nest; i < nest->n; i++) + if (c->n_cats >= allocated_cats) + c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); + c->cats[c->n_cats++] = cat; + } + + if (show_totals) + { + if (c->n_cats >= allocated_cats) + c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); + + struct ctables_category *totals; + if (totals_before) { - if (i == nest->scale_idx) - continue; + insert_element (c->cats, c->n_cats, sizeof *c->cats, 0); + totals = &c->cats[0]; + } + else + totals = &c->cats[c->n_cats]; + c->n_cats++; - const struct ctables_category *save = cats[a][i]; - if (save->subtotal) - { - cats[a][i] = save->subtotal; - ctables_cell_add__ (s, c, cats, is_included, weight); - recurse_subtotals (s, c, cats, is_included, weight, a, i + 1); - cats[a][i] = save; - } + *totals = (struct ctables_category) { + .type = CCT_TOTAL, + .total_label = total_label ? total_label : xstrdup (_("Total")), + }; + } + + struct ctables_category *subtotal = NULL; + for (size_t i = totals_before ? 0 : c->n_cats; + totals_before ? i < c->n_cats : i-- > 0; + totals_before ? i++ : 0) + { + struct ctables_category *cat = &c->cats[i]; + switch (cat->type) + { + case CCT_NUMBER: + case CCT_STRING: + case CCT_NRANGE: + case CCT_SRANGE: + case CCT_MISSING: + case CCT_OTHERNM: + cat->subtotal = subtotal; + break; + + case CCT_POSTCOMPUTE: + break; + + case CCT_SUBTOTAL: + subtotal = cat; + break; + + case CCT_TOTAL: + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + case CCT_EXCLUDED_MISSING: + break; } - start_nest = 0; } -} -static void -ctables_add_occurrence (const struct variable *var, - const union value *value, - struct hmap *occurrences) -{ - int width = var_get_width (var); - unsigned int hash = value_hash (value, width, 0); + if (cats_start_ofs != -1) + { + for (size_t i = 0; i < c->n_cats; i++) + { + struct ctables_category *cat = &c->cats[i]; + switch (cat->type) + { + case CCT_POSTCOMPUTE: + cat->parse_format = parse_strings ? common_format->type : FMT_F; + struct msg_location *cats_location + = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs); + bool ok = ctables_recursive_check_postcompute ( + dict, cat->pc->expr, cat, c, cats_location); + msg_location_destroy (cats_location); + if (!ok) + goto error; + break; - struct ctables_occurrence *o; - HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash, - occurrences) - if (value_equal (value, &o->value, width)) - return; + case CCT_NUMBER: + case CCT_NRANGE: + for (size_t j = 0; j < n_vars; j++) + if (var_is_alpha (vars[j])) + { + msg_at (SE, cat->location, + _("This category specification may be applied " + "only to numeric variables, but this " + "subcommand tries to apply it to string " + "variable %s."), + var_get_name (vars[j])); + goto error; + } + break; - o = xmalloc (sizeof *o); - value_clone (&o->value, value, width); - hmap_insert (occurrences, &o->node, hash); -} + case CCT_STRING: + if (parse_strings) + { + double n; + if (!parse_category_string (cat->location, cat->string, dict, + common_format->type, &n)) + goto error; -static void -ctables_cell_insert (struct ctables_section *s, const struct ccase *c, - double weight[N_CTWS]) -{ - const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n]; - const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n]; - const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n]; - const struct ctables_category **cats[PIVOT_N_AXES] = - { - [PIVOT_AXIS_LAYER] = layer_cats, - [PIVOT_AXIS_ROW] = row_cats, - [PIVOT_AXIS_COLUMN] = column_cats, - }; + ss_dealloc (&cat->string); - bool is_included = true; + cat->type = CCT_NUMBER; + cat->number = n; + } + else if (!all_strings (vars, n_vars, cat)) + goto error; + break; - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx) - { - const struct variable *var = nest->vars[i]; - const union value *value = case_data (c, var); + case CCT_SRANGE: + if (parse_strings) + { + double n[2]; - cats[a][i] = ctables_categories_match ( - s->table->categories[var_get_dict_index (var)], value, var); - if (!cats[a][i]) - { - if (i != nest->summary_idx) - return; + if (!cat->srange[0].string) + n[0] = -DBL_MAX; + else if (!parse_category_string (cat->location, + cat->srange[0], dict, + common_format->type, &n[0])) + goto error; - if (!var_is_value_missing (var, value)) - return; + if (!cat->srange[1].string) + n[1] = DBL_MAX; + else if (!parse_category_string (cat->location, + cat->srange[1], dict, + common_format->type, &n[1])) + goto error; - static const struct ctables_category cct_excluded_missing = { - .type = CCT_EXCLUDED_MISSING, - .hide = true, - }; - cats[a][i] = &cct_excluded_missing; - is_included = false; - } + ss_dealloc (&cat->srange[0]); + ss_dealloc (&cat->srange[1]); + + cat->type = CCT_NRANGE; + cat->nrange[0] = n[0]; + cat->nrange[1] = n[1]; + } + else if (!all_strings (vars, n_vars, cat)) + goto error; + break; + + case CCT_MISSING: + case CCT_OTHERNM: + case CCT_SUBTOTAL: + case CCT_TOTAL: + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + case CCT_EXCLUDED_MISSING: + break; + } } } - if (is_included) - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx) - { - const struct variable *var = nest->vars[i]; - const union value *value = case_data (c, var); - ctables_add_occurrence (var, value, &s->occurrences[a][i]); - } - } + free (vars); + return true; - ctables_cell_add__ (s, c, cats, is_included, weight); - recurse_totals (s, c, cats, is_included, weight, 0, 0); - recurse_subtotals (s, c, cats, is_included, weight, 0, 0); +error: + free (vars); + return false; } + struct merge_item { @@ -3955,97 +4049,6 @@ merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b) return strcmp (as_label, bs_label); } -static struct pivot_value * -ctables_postcompute_label (const struct ctables_categories *cats, - const struct ctables_category *cat, - const struct variable *var) -{ - struct substring in = ss_cstr (cat->pc->label); - struct substring target = ss_cstr (")LABEL["); - - struct string out = DS_EMPTY_INITIALIZER; - for (;;) - { - size_t chunk = ss_find_substring (in, target); - if (chunk == SIZE_MAX) - { - if (ds_is_empty (&out)) - return pivot_value_new_user_text (in.string, in.length); - else - { - ds_put_substring (&out, in); - return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out)); - } - } - - ds_put_substring (&out, ss_head (in, chunk)); - ss_advance (&in, chunk + target.length); - - struct substring idx_s; - if (!ss_get_until (&in, ']', &idx_s)) - goto error; - char *tail; - long int idx = strtol (idx_s.string, &tail, 10); - if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s)) - goto error; - - struct ctables_category *cat2 = &cats->cats[idx - 1]; - if (!ctables_category_format_label (cat2, var, &out)) - goto error; - } - -error: - ds_destroy (&out); - return pivot_value_new_user_text (cat->pc->label, SIZE_MAX); -} - -static struct pivot_value * -ctables_category_create_value_label (const struct ctables_categories *cats, - const struct ctables_category *cat, - const struct variable *var, - const union value *value) -{ - return (cat->type == CCT_POSTCOMPUTE && cat->pc->label - ? ctables_postcompute_label (cats, cat, var) - : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL - ? pivot_value_new_user_text (cat->total_label, SIZE_MAX) - : pivot_value_new_var_value (var, value)); -} - -static struct ctables_value * -ctables_value_find__ (struct ctables_table *t, const union value *value, - int width, unsigned int hash) -{ - struct ctables_value *clv; - HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node, - hash, &t->clabels_values_map) - if (value_equal (value, &clv->value, width)) - return clv; - return NULL; -} - -static void -ctables_value_insert (struct ctables_table *t, const union value *value, - int width) -{ - unsigned int hash = value_hash (value, width, 0); - struct ctables_value *clv = ctables_value_find__ (t, value, width, hash); - if (!clv) - { - clv = xmalloc (sizeof *clv); - value_clone (&clv->value, value, width); - hmap_insert (&t->clabels_values_map, &clv->node, hash); - } -} - -static struct ctables_value * -ctables_value_find (struct ctables_table *t, - const union value *value, int width) -{ - return ctables_value_find__ (t, value, width, - value_hash (value, width, 0)); -} - static void ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a, size_t ix[PIVOT_N_AXES]) @@ -5232,7 +5235,7 @@ ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c, } static int -compare_clabels_values_3way (const void *a_, const void *b_, const void *width_) +compare_ctables_values_3way (const void *a_, const void *b_, const void *width_) { const struct ctables_value *const *ap = a_; const struct ctables_value *const *bp = b_; @@ -5269,7 +5272,7 @@ ctables_sort_clabels_values (struct ctables_table *t) assert (i == n); sort (t->clabels_values, n, sizeof *t->clabels_values, - compare_clabels_values_3way, &width); + compare_ctables_values_3way, &width); for (size_t i = 0; i < n; i++) t->clabels_values[i]->leaf = i; -- 2.30.2