From 2088d7438791ad96dda2037a6ac7e9b0f3998c8b Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 27 Aug 2022 21:38:29 -0700 Subject: [PATCH] add examples --- doc/automake.mk | 4 + doc/pspp-figures/ctables27.sps | 3 + doc/pspp-figures/ctables28.sps | 3 + doc/pspp-figures/ctables29.sps | 6 + doc/pspp-figures/ctables30.sps | 3 + doc/statistics.texi | 45 +- src/language/stats/ctables.c | 1550 ++++++++++++++++--------------- tests/language/stats/ctables.at | 5 + 8 files changed, 839 insertions(+), 780 deletions(-) create mode 100644 doc/pspp-figures/ctables27.sps create mode 100644 doc/pspp-figures/ctables28.sps create mode 100644 doc/pspp-figures/ctables29.sps create mode 100644 doc/pspp-figures/ctables30.sps diff --git a/doc/automake.mk b/doc/automake.mk index cdb6843465..13fd8fdd15 100644 --- a/doc/automake.mk +++ b/doc/automake.mk @@ -143,6 +143,10 @@ FIGURE_SYNTAX = \ doc/pspp-figures/ctables24.sps \ doc/pspp-figures/ctables25.sps \ doc/pspp-figures/ctables26.sps \ + doc/pspp-figures/ctables27.sps \ + doc/pspp-figures/ctables28.sps \ + doc/pspp-figures/ctables29.sps \ + doc/pspp-figures/ctables30.sps \ doc/pspp-figures/crosstabs.sps \ doc/pspp-figures/descriptives.sps \ doc/pspp-figures/flip.sps \ diff --git a/doc/pspp-figures/ctables27.sps b/doc/pspp-figures/ctables27.sps new file mode 100644 index 0000000000..bc613c0e37 --- /dev/null +++ b/doc/pspp-figures/ctables27.sps @@ -0,0 +1,3 @@ +GET FILE='nhtsa.sav'. +CTABLES /TABLE qn1. +CTABLES /TABLE qn1 /CATEGORIES VARIABLES=qn1 [1, 2, 3]. diff --git a/doc/pspp-figures/ctables28.sps b/doc/pspp-figures/ctables28.sps new file mode 100644 index 0000000000..dd8ffb4deb --- /dev/null +++ b/doc/pspp-figures/ctables28.sps @@ -0,0 +1,3 @@ +GET FILE='nhtsa.sav'. +CTABLES /TABLE qn1. +CTABLES /TABLE qn1 /CATEGORIES VARIABLES=qn1 MISSING=INCLUDE. diff --git a/doc/pspp-figures/ctables29.sps b/doc/pspp-figures/ctables29.sps new file mode 100644 index 0000000000..944ab1e5db --- /dev/null +++ b/doc/pspp-figures/ctables29.sps @@ -0,0 +1,6 @@ +GET FILE='nhtsa.sav'. +CTABLES + /TABLE qn1 + /CATEGORIES VARIABLES=qn1 [OTHERNM, SUBTOTAL='Valid Total', + MISSING, SUBTOTAL='Missing Total'] + TOTAL=YES LABEL='Overall Total'. diff --git a/doc/pspp-figures/ctables30.sps b/doc/pspp-figures/ctables30.sps new file mode 100644 index 0000000000..569e4aed50 --- /dev/null +++ b/doc/pspp-figures/ctables30.sps @@ -0,0 +1,3 @@ +GET FILE='nhtsa.sav'. +CTABLES /TABLE=qn20 [MEAN F8.1, COUNT, VALIDN] > region + /CATEGORIES VARIABLES=region TOTAL=YES LABEL='All regions'. diff --git a/doc/statistics.texi b/doc/statistics.texi index 2e4c96c6a1..0cfceb225a 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -1658,7 +1658,14 @@ A subtotal (@pxref{CTABLES Totals and Subtotals}). If multiple elements of the list cover a given category, the last one in the list takes precedence. -@c TODO example +The following example syntax and output show how an explicit category +can limit the displayed categories: + +@example +CTABLES /TABLE qn1. +CTABLES /TABLE qn1 /CATEGORIES VARIABLES=qn1 [1, 2, 3]. +@end example +@psppoutput {ctables27} @node CTABLES Implicit Categories @subsubsection Implicit Categories @@ -1687,7 +1694,15 @@ User-missing values are excluded by default, or with @code{MISSING=EXCLUDE}. Specify @code{MISSING=INCLUDE} to include user-missing values. The system-missing value is always excluded. -@c TODO example +The following example syntax and output show how +@code{MISSING=INCLUDE} causes missing values to be included in a +category list. + +@example +CTABLES /TABLE qn1. +CTABLES /TABLE qn1 /CATEGORIES VARIABLES=qn1 MISSING=INCLUDE. +@end example +@psppoutput {ctables28} @node CTABLES Totals and Subtotals @subsubsection Totals and Subtotals @@ -1706,21 +1721,35 @@ subtotal. Either way, the default label is ``Subtotal'', use @code{SUBTOTAL="@i{label}"} or @code{HSUBTOTAL="@i{label}"} to specify a custom label. -@c TODO +The following example syntax and output show how to use +@code{TOTAL=YES} and @code{SUBTOTAL}: + +@example +CTABLES + /TABLE qn1 + /CATEGORIES VARIABLES=qn1 [OTHERNM, SUBTOTAL='Valid Total', + MISSING, SUBTOTAL='Missing Total'] + TOTAL=YES LABEL='Overall Total'. +@end example +@psppoutput {ctables29} By default, or with @code{POSITION=AFTER}, totals are displayed in the output after the last category and subtotals apply to categories that precede them. With @code{POSITION=BEFORE}, totals come before the first category and subtotals apply to categories that follow them. -@c TODO - Only categorical variables may have totals and subtotals. Scalar variables may be ``totaled'' indirectly by enabling totals and -subtotals on a categorical variable within which the scalar variable is -summarized. +subtotals on a categorical variable within which the scalar variable +is summarized. For example, the following syntax produces a mean, +count, and valid count across all data by adding a total on the +categorical @code{region} variable, as shown: -@c TODO +@example +CTABLES /TABLE=region > qn20 [MEAN, VALIDN] + /CATEGORIES VARIABLES=region TOTAL=YES LABEL='All regions'. +@end example +@psppoutput {ctables30} By default, @pspp{} uses the same summary functions for totals and subtotals as other categories. To summarize totals and subtotals diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index 938591dd47..7d2f15df5a 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -55,11 +55,14 @@ #define _(msgid) gettext (msgid) #define N_(msgid) (msgid) +struct ctables; + +/* The three forms of weighting supported by CTABLES. */ enum ctables_weighting { - CTW_EFFECTIVE, - CTW_DICTIONARY, - CTW_UNWEIGHTED + CTW_EFFECTIVE, /* Effective base weight (WEIGHT subcommand). */ + CTW_DICTIONARY, /* Dictionary weight. */ + CTW_UNWEIGHTED /* No weight. */ #define N_CTWS 3 }; @@ -95,16 +98,22 @@ static const char *ctables_area_type_name[N_CTATS] = { [CTAT_COL] = "COL", }; +/* Summary statistics for an area. */ struct ctables_area { struct hmap_node node; - const struct ctables_cell *example; + /* Sequence number used for CTSF_ID. */ size_t sequence; + + /* Weights for CTSF_areaPCT_COUNT, CTSF_areaPCT_VALIDN, and + CTSF_areaPCT_TOTALN. */ double count[N_CTWS]; double valid[N_CTWS]; double total[N_CTWS]; + + /* Sums for CTSF_areaPCT_SUM. */ struct ctables_sum *sums; }; @@ -138,9 +147,9 @@ enum ctables_function_type enum ctables_format { - CTF_COUNT, - CTF_PERCENT, - CTF_GENERAL + CTF_COUNT, /* F40.0. */ + CTF_PERCENT, /* PCT40.1. */ + CTF_GENERAL /* Variable's print format. */ }; enum ctables_function_availability @@ -318,9 +327,7 @@ ctables_summary_function_label__ (enum ctables_summary_function function, switch (function) { case CTSF_COUNT: - return (d ? N_("Count") - : w ? N_("Adjusted Count") - : N_("Unweighted Count")); + return d ? N_("Count") : w ? N_("Adjusted Count") : N_("Unweighted Count"); case CTSF_areaPCT_COUNT: switch (a) @@ -461,8 +468,8 @@ struct ctables_summary_spec struct fmt_spec format; bool is_ctables_format; /* Is 'format' one of CTEF_*? */ - size_t axis_idx; - size_t sum_var_idx; + size_t axis_idx; /* Leaf index if summary dimension in use. */ + size_t sum_var_idx; /* Offset into 'sums' in ctables_area. */ }; static void @@ -619,7 +626,6 @@ struct ctables_pcexpr struct msg_location *location; }; -struct ctables; static struct ctables_postcompute *ctables_find_postcompute (struct ctables *, const char *name); @@ -3548,892 +3554,892 @@ struct ctables struct ctables_table **tables; size_t n_tables; }; - -/* Chi-square test (SIGTEST). */ -struct ctables_chisq - { - double alpha; - bool include_mrsets; - bool all_visible; - }; - -/* Pairwise comparison test (COMPARETEST). */ -struct ctables_pairwise - { - enum { PROP, MEAN } type; - double alpha[2]; - bool include_mrsets; - bool meansvariance_allcats; - bool all_visible; - enum { BONFERRONI = 1, BH } adjust; - bool merge; - bool apa_style; - bool show_sig; - }; - - - -static bool -parse_col_width (struct lexer *lexer, const char *name, double *width) + +static double +ctpo_add (double a, double b) { - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "DEFAULT")) - *width = SYSMIS; - else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX)) - { - *width = lex_number (lexer); - lex_get (lexer); - } - else - return false; - - return true; + return a + b; } -static bool -parse_bool (struct lexer *lexer, bool *b) +static double +ctpo_sub (double a, double b) { - if (lex_match_id (lexer, "NO")) - *b = false; - else if (lex_match_id (lexer, "YES")) - *b = true; - else - { - lex_error_expecting (lexer, "YES", "NO"); - return false; - } - return true; + return a - b; } -static void -ctables_chisq_destroy (struct ctables_chisq *chisq) +static double +ctpo_mul (double a, double b) { - free (chisq); + return a * b; } -static void -ctables_pairwise_destroy (struct ctables_pairwise *pairwise) +static double +ctpo_div (double a, double b) { - free (pairwise); + return b ? a / b : SYSMIS; } -static void -ctables_table_destroy (struct ctables_table *t) +static double +ctpo_pow (double a, double b) { - if (!t) - return; + int save_errno = errno; + errno = 0; + double result = pow (a, b); + if (errno) + result = SYSMIS; + errno = save_errno; + return result; +} - for (size_t i = 0; i < t->n_sections; i++) - ctables_section_uninit (&t->sections[i]); - free (t->sections); +static double +ctpo_neg (double a, double b UNUSED) +{ + return -a; +} - for (size_t i = 0; i < t->n_categories; i++) - ctables_categories_unref (t->categories[i]); - free (t->categories); +struct ctables_pcexpr_evaluate_ctx + { + const struct ctables_cell *cell; + const struct ctables_section *section; + const struct ctables_categories *cats; + enum pivot_axis_type pc_a; + size_t pc_a_idx; + size_t summary_idx; + enum fmt_type parse_format; + }; - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - { - ctables_axis_destroy (t->axes[a]); - ctables_stack_uninit (&t->stacks[a]); - } - free (t->summary_specs.specs); +static double ctables_pcexpr_evaluate ( + const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *); - struct ctables_value *ctv, *next_ctv; - HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node, - &t->clabels_values_map) +static double +ctables_pcexpr_evaluate_nonterminal ( + const struct ctables_pcexpr_evaluate_ctx *ctx, + const struct ctables_pcexpr *e, size_t n_args, + double evaluate (double, double)) +{ + double args[2] = { 0, 0 }; + for (size_t i = 0; i < n_args; i++) { - value_destroy (&ctv->value, var_get_width (t->clabels_example)); - hmap_delete (&t->clabels_values_map, &ctv->node); - free (ctv); + args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]); + if (!isfinite (args[i]) || args[i] == SYSMIS) + return SYSMIS; } - hmap_destroy (&t->clabels_values_map); - free (t->clabels_values); - - free (t->sum_vars); - free (t->caption); - free (t->corner); - free (t->title); - ctables_chisq_destroy (t->chisq); - ctables_pairwise_destroy (t->pairwise); - free (t); + return evaluate (args[0], args[1]); } -static void -ctables_destroy (struct ctables *ct) +static double +ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx, + const struct ctables_cell_value *pc_cv) { - if (!ct) - return; + const struct ctables_section *s = ctx->section; - struct ctables_postcompute *pc, *next_pc; - HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node, - &ct->postcomputes) + size_t hash = 0; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { - free (pc->name); - msg_location_destroy (pc->location); - ctables_pcexpr_destroy (pc->expr); - free (pc->label); - if (pc->specs) + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + { + const struct ctables_cell_value *cv + = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv + : &ctx->cell->axes[a].cvs[i]); + hash = hash_pointer (cv->category, hash); + if (cv->category->type != CCT_TOTAL + && cv->category->type != CCT_SUBTOTAL + && cv->category->type != CCT_POSTCOMPUTE) + hash = value_hash (&cv->value, + var_get_width (nest->vars[i]), hash); + } + } + + struct ctables_cell *tc; + HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells) + { + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { - ctables_summary_spec_set_uninit (pc->specs); - free (pc->specs); + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + { + const struct ctables_cell_value *p_cv + = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv + : &ctx->cell->axes[a].cvs[i]); + const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i]; + if (p_cv->category != t_cv->category + || (p_cv->category->type != CCT_TOTAL + && p_cv->category->type != CCT_SUBTOTAL + && p_cv->category->type != CCT_POSTCOMPUTE + && !value_equal (&p_cv->value, + &t_cv->value, + var_get_width (nest->vars[i])))) + goto not_equal; + } } - hmap_delete (&ct->postcomputes, &pc->hmap_node); - free (pc); - } - hmap_destroy (&ct->postcomputes); - fmt_settings_uninit (&ct->ctables_formats); - pivot_table_look_unref (ct->look); - free (ct->zero); - free (ct->missing); - free (ct->vlabels); - for (size_t i = 0; i < ct->n_tables; i++) - ctables_table_destroy (ct->tables[i]); - free (ct->tables); - free (ct); -} + goto found; -static bool -all_strings (struct variable **vars, size_t n_vars, - const struct ctables_category *cat) -{ - for (size_t j = 0; j < n_vars; j++) - if (var_is_numeric (vars[j])) - { - msg_at (SE, cat->location, - _("This category specification may be applied only to string " - "variables, but this subcommand tries to apply it to " - "numeric variable %s."), - var_get_name (vars[j])); - return false; - } - return true; + not_equal: ; + } + return 0; + +found: ; + const struct ctables_table *t = s->table; + const struct ctables_nest *specs_nest = s->nests[t->summary_axis]; + const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv]; + return ctables_summary_value (tc->areas, &tc->summaries[ctx->summary_idx], + &specs->specs[ctx->summary_idx]); } -static bool -ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, - struct ctables *ct, struct ctables_table *t) +static double +ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx, + const struct ctables_pcexpr *e) { - if (!lex_match_id (lexer, "VARIABLES")) - return false; - lex_match (lexer, T_EQUALS); + switch (e->op) + { + case CTPO_CONSTANT: + return e->number; - struct variable **vars; - size_t n_vars; - if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH)) - return false; - - const struct fmt_spec *common_format = var_get_print_format (vars[0]); - for (size_t i = 1; i < n_vars; i++) - { - const struct fmt_spec *f = var_get_print_format (vars[i]); - if (f->type != common_format->type) - { - common_format = NULL; - break; - } - } - bool parse_strings - = (common_format - && (fmt_get_category (common_format->type) - & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT))); + case CTPO_CAT_NRANGE: + case CTPO_CAT_SRANGE: + case CTPO_CAT_MISSING: + case CTPO_CAT_OTHERNM: + { + struct ctables_cell_value cv = { + .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e) + }; + assert (cv.category != NULL); - struct ctables_categories *c = xmalloc (sizeof *c); - *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true }; - for (size_t i = 0; i < n_vars; i++) - { - struct ctables_categories **cp - = &t->categories[var_get_dict_index (vars[i])]; - ctables_categories_unref (*cp); - *cp = c; - } + struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx]; + const struct ctables_occurrence *o; - size_t allocated_cats = 0; - int cats_start_ofs = -1; - int cats_end_ofs = -1; - if (lex_match (lexer, T_LBRACK)) - { - cats_start_ofs = lex_ofs (lexer); - do - { - if (c->n_cats >= allocated_cats) - c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); + double sum = 0.0; + const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]; + HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences) + if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category) + { + cv.value = o->value; + sum += ctables_pcexpr_evaluate_category (ctx, &cv); + } + return sum; + } - int start_ofs = lex_ofs (lexer); - struct ctables_category *cat = &c->cats[c->n_cats]; - if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat)) - goto error; - cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1); - c->n_cats++; + case CTPO_CAT_NUMBER: + case CTPO_CAT_SUBTOTAL: + case CTPO_CAT_TOTAL: + { + struct ctables_cell_value cv = { + .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e), + .value = { .f = e->number }, + }; + assert (cv.category != NULL); + return ctables_pcexpr_evaluate_category (ctx, &cv); + } - lex_match (lexer, T_COMMA); - } - while (!lex_match (lexer, T_RBRACK)); - cats_end_ofs = lex_ofs (lexer) - 1; - } + case CTPO_CAT_STRING: + { + int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]); + char *s = NULL; + if (width > e->string.length) + { + s = xmalloc (width); + buf_copy_rpad (s, width, e->string.string, e->string.length, ' '); + } - struct ctables_category cat = { - .type = CCT_VALUE, - .include_missing = false, - .sort_ascending = true, - }; - bool show_totals = false; - char *total_label = NULL; - bool totals_before = false; - while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD) - { - if (!c->n_cats && lex_match_id (lexer, "ORDER")) - { - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "A")) - cat.sort_ascending = true; - else if (lex_match_id (lexer, "D")) - cat.sort_ascending = false; - else - { - lex_error_expecting (lexer, "A", "D"); - goto error; - } - } - else if (!c->n_cats && lex_match_id (lexer, "KEY")) - { - int start_ofs = lex_ofs (lexer) - 1; - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "VALUE")) - cat.type = CCT_VALUE; - else if (lex_match_id (lexer, "LABEL")) - cat.type = CCT_LABEL; - else - { - cat.type = CCT_FUNCTION; - if (!parse_ctables_summary_function (lexer, &cat.sort_function, - &cat.weighting, &cat.area)) - goto error; + const struct ctables_category *category + = ctables_find_category_for_postcompute ( + ctx->section->table->ctables->dict, + ctx->cats, ctx->parse_format, e); + assert (category != NULL); - if (lex_match (lexer, T_LPAREN)) - { - cat.sort_var = parse_variable (lexer, dict); - if (!cat.sort_var) - goto error; + struct ctables_cell_value cv = { .category = category }; + if (category->type == CCT_NUMBER) + cv.value.f = category->number; + else if (category->type == CCT_STRING) + cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string); + else + NOT_REACHED (); - if (cat.sort_function == CTSF_PTILE) - { - lex_match (lexer, T_COMMA); - if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100)) - goto error; - cat.percentile = lex_number (lexer); - lex_get (lexer); - } + double retval = ctables_pcexpr_evaluate_category (ctx, &cv); + free (s); + return retval; + } - if (!lex_force_match (lexer, T_RPAREN)) - goto error; - } - else if (ctables_function_availability (cat.sort_function) - == CTFA_SCALE) - { - bool UNUSED b = lex_force_match (lexer, T_LPAREN); - goto error; - } + case CTPO_ADD: + return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add); - lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1, - _("Data-dependent sorting is not implemented.")); - goto error; - } - } - else if (!c->n_cats && lex_match_id (lexer, "MISSING")) - { - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "INCLUDE")) - cat.include_missing = true; - else if (lex_match_id (lexer, "EXCLUDE")) - cat.include_missing = false; - else - { - lex_error_expecting (lexer, "INCLUDE", "EXCLUDE"); - goto error; - } - } - else if (lex_match_id (lexer, "TOTAL")) - { - lex_match (lexer, T_EQUALS); - if (!parse_bool (lexer, &show_totals)) - goto error; - } - else if (lex_match_id (lexer, "LABEL")) - { - lex_match (lexer, T_EQUALS); - if (!lex_force_string (lexer)) - goto error; - free (total_label); - total_label = ss_xstrdup (lex_tokss (lexer)); - lex_get (lexer); - } - else if (lex_match_id (lexer, "POSITION")) - { - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "BEFORE")) - totals_before = true; - else if (lex_match_id (lexer, "AFTER")) - totals_before = false; - else - { - lex_error_expecting (lexer, "BEFORE", "AFTER"); - goto error; - } - } - else if (lex_match_id (lexer, "EMPTY")) - { - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "INCLUDE")) - c->show_empty = true; - else if (lex_match_id (lexer, "EXCLUDE")) - c->show_empty = false; - else - { - lex_error_expecting (lexer, "INCLUDE", "EXCLUDE"); - goto error; - } - } - else - { - if (!c->n_cats) - lex_error_expecting (lexer, "ORDER", "KEY", "MISSING", - "TOTAL", "LABEL", "POSITION", "EMPTY"); - else - lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY"); - goto error; - } - } + case CTPO_SUB: + return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub); - if (!c->n_cats) - { - if (c->n_cats >= allocated_cats) - c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); - c->cats[c->n_cats++] = cat; - } + case CTPO_MUL: + return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul); - if (show_totals) - { - if (c->n_cats >= allocated_cats) - c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); + case CTPO_DIV: + return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div); - struct ctables_category *totals; - if (totals_before) - { - insert_element (c->cats, c->n_cats, sizeof *c->cats, 0); - totals = &c->cats[0]; - } - else - totals = &c->cats[c->n_cats]; - c->n_cats++; + case CTPO_POW: + return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow); - *totals = (struct ctables_category) { - .type = CCT_TOTAL, - .total_label = total_label ? total_label : xstrdup (_("Total")), - }; + case CTPO_NEG: + return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg); } - struct ctables_category *subtotal = NULL; - for (size_t i = totals_before ? 0 : c->n_cats; - totals_before ? i < c->n_cats : i-- > 0; - totals_before ? i++ : 0) - { - struct ctables_category *cat = &c->cats[i]; - switch (cat->type) - { - case CCT_NUMBER: - case CCT_STRING: - case CCT_NRANGE: - case CCT_SRANGE: - case CCT_MISSING: - case CCT_OTHERNM: - cat->subtotal = subtotal; - break; + NOT_REACHED (); +} - case CCT_POSTCOMPUTE: - break; +static const struct ctables_category * +ctables_cell_postcompute (const struct ctables_section *s, + const struct ctables_cell *cell, + enum pivot_axis_type *pc_a_p, + size_t *pc_a_idx_p) +{ + assert (cell->postcompute); + const struct ctables_category *pc_cat = NULL; + for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++) + for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++) + { + const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx]; + if (cv->category->type == CCT_POSTCOMPUTE) + { + if (pc_cat) + { + /* Multiple postcomputes cross each other. The value is + undefined. */ + return NULL; + } - case CCT_SUBTOTAL: - subtotal = cat; - break; + pc_cat = cv->category; + if (pc_a_p) + *pc_a_p = pc_a; + if (pc_a_idx_p) + *pc_a_idx_p = pc_a_idx; + } + } - case CCT_TOTAL: - case CCT_VALUE: - case CCT_LABEL: - case CCT_FUNCTION: - case CCT_EXCLUDED_MISSING: - break; - } - } + assert (pc_cat != NULL); + return pc_cat; +} - if (cats_start_ofs != -1) +static double +ctables_cell_calculate_postcompute (const struct ctables_section *s, + const struct ctables_cell *cell, + const struct ctables_summary_spec *ss, + struct fmt_spec *format, + bool *is_ctables_format, + size_t summary_idx) +{ + enum pivot_axis_type pc_a = 0; + size_t pc_a_idx = 0; + const struct ctables_category *pc_cat = ctables_cell_postcompute ( + s, cell, &pc_a, &pc_a_idx); + if (!pc_cat) + return SYSMIS; + + const struct ctables_postcompute *pc = pc_cat->pc; + if (pc->specs) { - for (size_t i = 0; i < c->n_cats; i++) + for (size_t i = 0; i < pc->specs->n; i++) { - struct ctables_category *cat = &c->cats[i]; - switch (cat->type) + const struct ctables_summary_spec *ss2 = &pc->specs->specs[i]; + if (ss->function == ss2->function + && ss->weighting == ss2->weighting + && ss->calc_area == ss2->calc_area + && ss->percentile == ss2->percentile) { - case CCT_POSTCOMPUTE: - cat->parse_format = parse_strings ? common_format->type : FMT_F; - struct msg_location *cats_location - = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs); - bool ok = ctables_recursive_check_postcompute ( - dict, cat->pc->expr, cat, c, cats_location); - msg_location_destroy (cats_location); - if (!ok) - goto error; - break; - - case CCT_NUMBER: - case CCT_NRANGE: - for (size_t j = 0; j < n_vars; j++) - if (var_is_alpha (vars[j])) - { - msg_at (SE, cat->location, - _("This category specification may be applied " - "only to numeric variables, but this " - "subcommand tries to apply it to string " - "variable %s."), - var_get_name (vars[j])); - goto error; - } + *format = ss2->format; + *is_ctables_format = ss2->is_ctables_format; break; + } + } + } - case CCT_STRING: - if (parse_strings) - { - double n; - if (!parse_category_string (cat->location, cat->string, dict, - common_format->type, &n)) - goto error; - - ss_dealloc (&cat->string); - - cat->type = CCT_NUMBER; - cat->number = n; - } - else if (!all_strings (vars, n_vars, cat)) - goto error; - break; + const struct variable *var = s->nests[pc_a]->vars[pc_a_idx]; + const struct ctables_categories *cats = s->table->categories[ + var_get_dict_index (var)]; + struct ctables_pcexpr_evaluate_ctx ctx = { + .cell = cell, + .section = s, + .cats = cats, + .pc_a = pc_a, + .pc_a_idx = pc_a_idx, + .summary_idx = summary_idx, + .parse_format = pc_cat->parse_format, + }; + return ctables_pcexpr_evaluate (&ctx, pc->expr); +} + +/* Chi-square test (SIGTEST). */ +struct ctables_chisq + { + double alpha; + bool include_mrsets; + bool all_visible; + }; - case CCT_SRANGE: - if (parse_strings) - { - double n[2]; +/* Pairwise comparison test (COMPARETEST). */ +struct ctables_pairwise + { + enum { PROP, MEAN } type; + double alpha[2]; + bool include_mrsets; + bool meansvariance_allcats; + bool all_visible; + enum { BONFERRONI = 1, BH } adjust; + bool merge; + bool apa_style; + bool show_sig; + }; - if (!cat->srange[0].string) - n[0] = -DBL_MAX; - else if (!parse_category_string (cat->location, - cat->srange[0], dict, - common_format->type, &n[0])) - goto error; - if (!cat->srange[1].string) - n[1] = DBL_MAX; - else if (!parse_category_string (cat->location, - cat->srange[1], dict, - common_format->type, &n[1])) - goto error; - ss_dealloc (&cat->srange[0]); - ss_dealloc (&cat->srange[1]); +static bool +parse_col_width (struct lexer *lexer, const char *name, double *width) +{ + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "DEFAULT")) + *width = SYSMIS; + else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX)) + { + *width = lex_number (lexer); + lex_get (lexer); + } + else + return false; - cat->type = CCT_NRANGE; - cat->nrange[0] = n[0]; - cat->nrange[1] = n[1]; - } - else if (!all_strings (vars, n_vars, cat)) - goto error; - break; + return true; +} - case CCT_MISSING: - case CCT_OTHERNM: - case CCT_SUBTOTAL: - case CCT_TOTAL: - case CCT_VALUE: - case CCT_LABEL: - case CCT_FUNCTION: - case CCT_EXCLUDED_MISSING: - break; - } - } +static bool +parse_bool (struct lexer *lexer, bool *b) +{ + if (lex_match_id (lexer, "NO")) + *b = false; + else if (lex_match_id (lexer, "YES")) + *b = true; + else + { + lex_error_expecting (lexer, "YES", "NO"); + return false; } - - free (vars); return true; - -error: - free (vars); - return false; } - - -struct merge_item - { - const struct ctables_summary_spec_set *set; - size_t ofs; - }; -static int -merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b) +static void +ctables_chisq_destroy (struct ctables_chisq *chisq) { - const struct ctables_summary_spec *as = &a->set->specs[a->ofs]; - const struct ctables_summary_spec *bs = &b->set->specs[b->ofs]; - if (as->function != bs->function) - return as->function > bs->function ? 1 : -1; - else if (as->weighting != bs->weighting) - return as->weighting > bs->weighting ? 1 : -1; - else if (as->calc_area != bs->calc_area) - return as->calc_area > bs->calc_area ? 1 : -1; - else if (as->percentile != bs->percentile) - return as->percentile < bs->percentile ? 1 : -1; + free (chisq); +} - const char *as_label = as->label ? as->label : ""; - const char *bs_label = bs->label ? bs->label : ""; - return strcmp (as_label, bs_label); +static void +ctables_pairwise_destroy (struct ctables_pairwise *pairwise) +{ + free (pairwise); } static void -ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a, - size_t ix[PIVOT_N_AXES]) +ctables_table_destroy (struct ctables_table *t) { - if (a < PIVOT_N_AXES) + if (!t) + return; + + for (size_t i = 0; i < t->n_sections; i++) + ctables_section_uninit (&t->sections[i]); + free (t->sections); + + for (size_t i = 0; i < t->n_categories; i++) + ctables_categories_unref (t->categories[i]); + free (t->categories); + + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { - size_t limit = MAX (t->stacks[a].n, 1); - for (ix[a] = 0; ix[a] < limit; ix[a]++) - ctables_table_add_section (t, a + 1, ix); + ctables_axis_destroy (t->axes[a]); + ctables_stack_uninit (&t->stacks[a]); } - else + free (t->summary_specs.specs); + + struct ctables_value *ctv, *next_ctv; + HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node, + &t->clabels_values_map) { - struct ctables_section *s = &t->sections[t->n_sections++]; - *s = (struct ctables_section) { - .table = t, - .cells = HMAP_INITIALIZER (s->cells), - }; - for (a = 0; a < PIVOT_N_AXES; a++) - if (t->stacks[a].n) - { - struct ctables_nest *nest = &t->stacks[a].nests[ix[a]]; - s->nests[a] = nest; - s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]); - for (size_t i = 0; i < nest->n; i++) - hmap_init (&s->occurrences[a][i]); - } - for (enum ctables_area_type at = 0; at < N_CTATS; at++) - hmap_init (&s->areas[at]); + value_destroy (&ctv->value, var_get_width (t->clabels_example)); + hmap_delete (&t->clabels_values_map, &ctv->node); + free (ctv); } -} + hmap_destroy (&t->clabels_values_map); + free (t->clabels_values); -static double -ctpo_add (double a, double b) -{ - return a + b; + free (t->sum_vars); + free (t->caption); + free (t->corner); + free (t->title); + ctables_chisq_destroy (t->chisq); + ctables_pairwise_destroy (t->pairwise); + free (t); } -static double -ctpo_sub (double a, double b) +static void +ctables_destroy (struct ctables *ct) { - return a - b; -} + if (!ct) + return; -static double -ctpo_mul (double a, double b) -{ - return a * b; + struct ctables_postcompute *pc, *next_pc; + HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node, + &ct->postcomputes) + { + free (pc->name); + msg_location_destroy (pc->location); + ctables_pcexpr_destroy (pc->expr); + free (pc->label); + if (pc->specs) + { + ctables_summary_spec_set_uninit (pc->specs); + free (pc->specs); + } + hmap_delete (&ct->postcomputes, &pc->hmap_node); + free (pc); + } + hmap_destroy (&ct->postcomputes); + + fmt_settings_uninit (&ct->ctables_formats); + pivot_table_look_unref (ct->look); + free (ct->zero); + free (ct->missing); + free (ct->vlabels); + for (size_t i = 0; i < ct->n_tables; i++) + ctables_table_destroy (ct->tables[i]); + free (ct->tables); + free (ct); } -static double -ctpo_div (double a, double b) +static bool +all_strings (struct variable **vars, size_t n_vars, + const struct ctables_category *cat) { - return b ? a / b : SYSMIS; + for (size_t j = 0; j < n_vars; j++) + if (var_is_numeric (vars[j])) + { + msg_at (SE, cat->location, + _("This category specification may be applied only to string " + "variables, but this subcommand tries to apply it to " + "numeric variable %s."), + var_get_name (vars[j])); + return false; + } + return true; } -static double -ctpo_pow (double a, double b) +static bool +ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, + struct ctables *ct, struct ctables_table *t) { - int save_errno = errno; - errno = 0; - double result = pow (a, b); - if (errno) - result = SYSMIS; - errno = save_errno; - return result; -} + if (!lex_force_match_id (lexer, "VARIABLES")) + return false; + lex_match (lexer, T_EQUALS); + + struct variable **vars; + size_t n_vars; + if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH)) + return false; + + const struct fmt_spec *common_format = var_get_print_format (vars[0]); + for (size_t i = 1; i < n_vars; i++) + { + const struct fmt_spec *f = var_get_print_format (vars[i]); + if (f->type != common_format->type) + { + common_format = NULL; + break; + } + } + bool parse_strings + = (common_format + && (fmt_get_category (common_format->type) + & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT))); + + struct ctables_categories *c = xmalloc (sizeof *c); + *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true }; + for (size_t i = 0; i < n_vars; i++) + { + struct ctables_categories **cp + = &t->categories[var_get_dict_index (vars[i])]; + ctables_categories_unref (*cp); + *cp = c; + } + + size_t allocated_cats = 0; + int cats_start_ofs = -1; + int cats_end_ofs = -1; + if (lex_match (lexer, T_LBRACK)) + { + cats_start_ofs = lex_ofs (lexer); + do + { + if (c->n_cats >= allocated_cats) + c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); + + int start_ofs = lex_ofs (lexer); + struct ctables_category *cat = &c->cats[c->n_cats]; + if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat)) + goto error; + cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1); + c->n_cats++; + + lex_match (lexer, T_COMMA); + } + while (!lex_match (lexer, T_RBRACK)); + cats_end_ofs = lex_ofs (lexer) - 1; + } + + struct ctables_category cat = { + .type = CCT_VALUE, + .include_missing = false, + .sort_ascending = true, + }; + bool show_totals = false; + char *total_label = NULL; + bool totals_before = false; + while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD) + { + if (!c->n_cats && lex_match_id (lexer, "ORDER")) + { + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "A")) + cat.sort_ascending = true; + else if (lex_match_id (lexer, "D")) + cat.sort_ascending = false; + else + { + lex_error_expecting (lexer, "A", "D"); + goto error; + } + } + else if (!c->n_cats && lex_match_id (lexer, "KEY")) + { + int start_ofs = lex_ofs (lexer) - 1; + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "VALUE")) + cat.type = CCT_VALUE; + else if (lex_match_id (lexer, "LABEL")) + cat.type = CCT_LABEL; + else + { + cat.type = CCT_FUNCTION; + if (!parse_ctables_summary_function (lexer, &cat.sort_function, + &cat.weighting, &cat.area)) + goto error; -static double -ctpo_neg (double a, double b UNUSED) -{ - return -a; -} + if (lex_match (lexer, T_LPAREN)) + { + cat.sort_var = parse_variable (lexer, dict); + if (!cat.sort_var) + goto error; -struct ctables_pcexpr_evaluate_ctx - { - const struct ctables_cell *cell; - const struct ctables_section *section; - const struct ctables_categories *cats; - enum pivot_axis_type pc_a; - size_t pc_a_idx; - size_t summary_idx; - enum fmt_type parse_format; - }; + if (cat.sort_function == CTSF_PTILE) + { + lex_match (lexer, T_COMMA); + if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100)) + goto error; + cat.percentile = lex_number (lexer); + lex_get (lexer); + } -static double ctables_pcexpr_evaluate ( - const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *); + if (!lex_force_match (lexer, T_RPAREN)) + goto error; + } + else if (ctables_function_availability (cat.sort_function) + == CTFA_SCALE) + { + bool UNUSED b = lex_force_match (lexer, T_LPAREN); + goto error; + } -static double -ctables_pcexpr_evaluate_nonterminal ( - const struct ctables_pcexpr_evaluate_ctx *ctx, - const struct ctables_pcexpr *e, size_t n_args, - double evaluate (double, double)) -{ - double args[2] = { 0, 0 }; - for (size_t i = 0; i < n_args; i++) - { - args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]); - if (!isfinite (args[i]) || args[i] == SYSMIS) - return SYSMIS; + lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1, + _("Data-dependent sorting is not implemented.")); + goto error; + } + } + else if (!c->n_cats && lex_match_id (lexer, "MISSING")) + { + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "INCLUDE")) + cat.include_missing = true; + else if (lex_match_id (lexer, "EXCLUDE")) + cat.include_missing = false; + else + { + lex_error_expecting (lexer, "INCLUDE", "EXCLUDE"); + goto error; + } + } + else if (lex_match_id (lexer, "TOTAL")) + { + lex_match (lexer, T_EQUALS); + if (!parse_bool (lexer, &show_totals)) + goto error; + } + else if (lex_match_id (lexer, "LABEL")) + { + lex_match (lexer, T_EQUALS); + if (!lex_force_string (lexer)) + goto error; + free (total_label); + total_label = ss_xstrdup (lex_tokss (lexer)); + lex_get (lexer); + } + else if (lex_match_id (lexer, "POSITION")) + { + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "BEFORE")) + totals_before = true; + else if (lex_match_id (lexer, "AFTER")) + totals_before = false; + else + { + lex_error_expecting (lexer, "BEFORE", "AFTER"); + goto error; + } + } + else if (lex_match_id (lexer, "EMPTY")) + { + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "INCLUDE")) + c->show_empty = true; + else if (lex_match_id (lexer, "EXCLUDE")) + c->show_empty = false; + else + { + lex_error_expecting (lexer, "INCLUDE", "EXCLUDE"); + goto error; + } + } + else + { + if (!c->n_cats) + lex_error_expecting (lexer, "ORDER", "KEY", "MISSING", + "TOTAL", "LABEL", "POSITION", "EMPTY"); + else + lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY"); + goto error; + } } - return evaluate (args[0], args[1]); -} - -static double -ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx, - const struct ctables_cell_value *pc_cv) -{ - const struct ctables_section *s = ctx->section; - size_t hash = 0; - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + if (!c->n_cats) { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx) - { - const struct ctables_cell_value *cv - = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv - : &ctx->cell->axes[a].cvs[i]); - hash = hash_pointer (cv->category, hash); - if (cv->category->type != CCT_TOTAL - && cv->category->type != CCT_SUBTOTAL - && cv->category->type != CCT_POSTCOMPUTE) - hash = value_hash (&cv->value, - var_get_width (nest->vars[i]), hash); - } + if (c->n_cats >= allocated_cats) + c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); + c->cats[c->n_cats++] = cat; } - struct ctables_cell *tc; - HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells) + if (show_totals) { - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + if (c->n_cats >= allocated_cats) + c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); + + struct ctables_category *totals; + if (totals_before) { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx) - { - const struct ctables_cell_value *p_cv - = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv - : &ctx->cell->axes[a].cvs[i]); - const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i]; - if (p_cv->category != t_cv->category - || (p_cv->category->type != CCT_TOTAL - && p_cv->category->type != CCT_SUBTOTAL - && p_cv->category->type != CCT_POSTCOMPUTE - && !value_equal (&p_cv->value, - &t_cv->value, - var_get_width (nest->vars[i])))) - goto not_equal; - } + insert_element (c->cats, c->n_cats, sizeof *c->cats, 0); + totals = &c->cats[0]; } + else + totals = &c->cats[c->n_cats]; + c->n_cats++; - goto found; - - not_equal: ; + *totals = (struct ctables_category) { + .type = CCT_TOTAL, + .total_label = total_label ? total_label : xstrdup (_("Total")), + }; } - return 0; - -found: ; - const struct ctables_table *t = s->table; - const struct ctables_nest *specs_nest = s->nests[t->summary_axis]; - const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv]; - return ctables_summary_value (tc->areas, &tc->summaries[ctx->summary_idx], - &specs->specs[ctx->summary_idx]); -} -static double -ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx, - const struct ctables_pcexpr *e) -{ - switch (e->op) + struct ctables_category *subtotal = NULL; + for (size_t i = totals_before ? 0 : c->n_cats; + totals_before ? i < c->n_cats : i-- > 0; + totals_before ? i++ : 0) { - case CTPO_CONSTANT: - return e->number; + struct ctables_category *cat = &c->cats[i]; + switch (cat->type) + { + case CCT_NUMBER: + case CCT_STRING: + case CCT_NRANGE: + case CCT_SRANGE: + case CCT_MISSING: + case CCT_OTHERNM: + cat->subtotal = subtotal; + break; - case CTPO_CAT_NRANGE: - case CTPO_CAT_SRANGE: - case CTPO_CAT_MISSING: - case CTPO_CAT_OTHERNM: - { - struct ctables_cell_value cv = { - .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e) - }; - assert (cv.category != NULL); + case CCT_POSTCOMPUTE: + break; - struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx]; - const struct ctables_occurrence *o; + case CCT_SUBTOTAL: + subtotal = cat; + break; - double sum = 0.0; - const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]; - HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences) - if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category) - { - cv.value = o->value; - sum += ctables_pcexpr_evaluate_category (ctx, &cv); - } - return sum; - } + case CCT_TOTAL: + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + case CCT_EXCLUDED_MISSING: + break; + } + } - case CTPO_CAT_NUMBER: - case CTPO_CAT_SUBTOTAL: - case CTPO_CAT_TOTAL: - { - struct ctables_cell_value cv = { - .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e), - .value = { .f = e->number }, - }; - assert (cv.category != NULL); - return ctables_pcexpr_evaluate_category (ctx, &cv); - } + if (cats_start_ofs != -1) + { + for (size_t i = 0; i < c->n_cats; i++) + { + struct ctables_category *cat = &c->cats[i]; + switch (cat->type) + { + case CCT_POSTCOMPUTE: + cat->parse_format = parse_strings ? common_format->type : FMT_F; + struct msg_location *cats_location + = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs); + bool ok = ctables_recursive_check_postcompute ( + dict, cat->pc->expr, cat, c, cats_location); + msg_location_destroy (cats_location); + if (!ok) + goto error; + break; - case CTPO_CAT_STRING: - { - int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]); - char *s = NULL; - if (width > e->string.length) - { - s = xmalloc (width); - buf_copy_rpad (s, width, e->string.string, e->string.length, ' '); - } + case CCT_NUMBER: + case CCT_NRANGE: + for (size_t j = 0; j < n_vars; j++) + if (var_is_alpha (vars[j])) + { + msg_at (SE, cat->location, + _("This category specification may be applied " + "only to numeric variables, but this " + "subcommand tries to apply it to string " + "variable %s."), + var_get_name (vars[j])); + goto error; + } + break; - const struct ctables_category *category - = ctables_find_category_for_postcompute ( - ctx->section->table->ctables->dict, - ctx->cats, ctx->parse_format, e); - assert (category != NULL); + case CCT_STRING: + if (parse_strings) + { + double n; + if (!parse_category_string (cat->location, cat->string, dict, + common_format->type, &n)) + goto error; - struct ctables_cell_value cv = { .category = category }; - if (category->type == CCT_NUMBER) - cv.value.f = category->number; - else if (category->type == CCT_STRING) - cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string); - else - NOT_REACHED (); + ss_dealloc (&cat->string); - double retval = ctables_pcexpr_evaluate_category (ctx, &cv); - free (s); - return retval; - } + cat->type = CCT_NUMBER; + cat->number = n; + } + else if (!all_strings (vars, n_vars, cat)) + goto error; + break; - case CTPO_ADD: - return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add); + case CCT_SRANGE: + if (parse_strings) + { + double n[2]; - case CTPO_SUB: - return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub); + if (!cat->srange[0].string) + n[0] = -DBL_MAX; + else if (!parse_category_string (cat->location, + cat->srange[0], dict, + common_format->type, &n[0])) + goto error; - case CTPO_MUL: - return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul); + if (!cat->srange[1].string) + n[1] = DBL_MAX; + else if (!parse_category_string (cat->location, + cat->srange[1], dict, + common_format->type, &n[1])) + goto error; - case CTPO_DIV: - return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div); + ss_dealloc (&cat->srange[0]); + ss_dealloc (&cat->srange[1]); - case CTPO_POW: - return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow); + cat->type = CCT_NRANGE; + cat->nrange[0] = n[0]; + cat->nrange[1] = n[1]; + } + else if (!all_strings (vars, n_vars, cat)) + goto error; + break; - case CTPO_NEG: - return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg); + case CCT_MISSING: + case CCT_OTHERNM: + case CCT_SUBTOTAL: + case CCT_TOTAL: + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + case CCT_EXCLUDED_MISSING: + break; + } + } } - NOT_REACHED (); + free (vars); + return true; + +error: + free (vars); + return false; } + -static const struct ctables_category * -ctables_cell_postcompute (const struct ctables_section *s, - const struct ctables_cell *cell, - enum pivot_axis_type *pc_a_p, - size_t *pc_a_idx_p) -{ - assert (cell->postcompute); - const struct ctables_category *pc_cat = NULL; - for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++) - for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++) - { - const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx]; - if (cv->category->type == CCT_POSTCOMPUTE) - { - if (pc_cat) - { - /* Multiple postcomputes cross each other. The value is - undefined. */ - return NULL; - } +struct merge_item + { + const struct ctables_summary_spec_set *set; + size_t ofs; + }; - pc_cat = cv->category; - if (pc_a_p) - *pc_a_p = pc_a; - if (pc_a_idx_p) - *pc_a_idx_p = pc_a_idx; - } - } +static int +merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b) +{ + const struct ctables_summary_spec *as = &a->set->specs[a->ofs]; + const struct ctables_summary_spec *bs = &b->set->specs[b->ofs]; + if (as->function != bs->function) + return as->function > bs->function ? 1 : -1; + else if (as->weighting != bs->weighting) + return as->weighting > bs->weighting ? 1 : -1; + else if (as->calc_area != bs->calc_area) + return as->calc_area > bs->calc_area ? 1 : -1; + else if (as->percentile != bs->percentile) + return as->percentile < bs->percentile ? 1 : -1; - assert (pc_cat != NULL); - return pc_cat; + const char *as_label = as->label ? as->label : ""; + const char *bs_label = bs->label ? bs->label : ""; + return strcmp (as_label, bs_label); } -static double -ctables_cell_calculate_postcompute (const struct ctables_section *s, - const struct ctables_cell *cell, - const struct ctables_summary_spec *ss, - struct fmt_spec *format, - bool *is_ctables_format, - size_t summary_idx) +static void +ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a, + size_t ix[PIVOT_N_AXES]) { - enum pivot_axis_type pc_a = 0; - size_t pc_a_idx = 0; - const struct ctables_category *pc_cat = ctables_cell_postcompute ( - s, cell, &pc_a, &pc_a_idx); - if (!pc_cat) - return SYSMIS; - - const struct ctables_postcompute *pc = pc_cat->pc; - if (pc->specs) + if (a < PIVOT_N_AXES) { - for (size_t i = 0; i < pc->specs->n; i++) - { - const struct ctables_summary_spec *ss2 = &pc->specs->specs[i]; - if (ss->function == ss2->function - && ss->weighting == ss2->weighting - && ss->calc_area == ss2->calc_area - && ss->percentile == ss2->percentile) - { - *format = ss2->format; - *is_ctables_format = ss2->is_ctables_format; - break; - } + size_t limit = MAX (t->stacks[a].n, 1); + for (ix[a] = 0; ix[a] < limit; ix[a]++) + ctables_table_add_section (t, a + 1, ix); + } + else + { + struct ctables_section *s = &t->sections[t->n_sections++]; + *s = (struct ctables_section) { + .table = t, + .cells = HMAP_INITIALIZER (s->cells), + }; + for (a = 0; a < PIVOT_N_AXES; a++) + if (t->stacks[a].n) + { + struct ctables_nest *nest = &t->stacks[a].nests[ix[a]]; + s->nests[a] = nest; + s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]); + for (size_t i = 0; i < nest->n; i++) + hmap_init (&s->occurrences[a][i]); } + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + hmap_init (&s->areas[at]); } - - const struct variable *var = s->nests[pc_a]->vars[pc_a_idx]; - const struct ctables_categories *cats = s->table->categories[ - var_get_dict_index (var)]; - struct ctables_pcexpr_evaluate_ctx ctx = { - .cell = cell, - .section = s, - .cats = cats, - .pc_a = pc_a, - .pc_a_idx = pc_a_idx, - .summary_idx = summary_idx, - .parse_format = pc_cat->parse_format, - }; - return ctables_pcexpr_evaluate (&ctx, pc->expr); } static char * @@ -5238,8 +5244,8 @@ ctables_prepare_table (struct ctables_table *t) if (merge_item_compare_3way (&items[j], &min) == 0) { struct merge_item *item = &items[j]; - item->set->specs[item->ofs].axis_idx = merged->n - 1; - if (++item->ofs >= item->set->n) + item->set->specs[item->ofs++].axis_idx = merged->n - 1; + if (item->ofs >= item->set->n) { items[j] = items[--n_left]; continue; diff --git a/tests/language/stats/ctables.at b/tests/language/stats/ctables.at index 6885f935ea..3ca41fc249 100644 --- a/tests/language/stats/ctables.at +++ b/tests/language/stats/ctables.at @@ -535,6 +535,8 @@ CTABLES /TABLE qn113 /SIGTEST TYPE=CHISQUARE. CTABLES /TABLE qn113 /COMPARETEST TYPE=PROP. CTABLES /TABLE qn113 [COUNT.UCL]. + +CTABLES /TABLE qn1 /CATEGORIES **. ]]) AT_CHECK([pspp ctables.sps -O box=unicode -O width=80], [1], [[ctables.sps:2.76-2.78: error: CTABLES: Computed category &pc references a @@ -666,6 +668,9 @@ for COMPARETEST not yet implemented. ctables.sps:32.23-32.31: error: CTABLES: Syntax error at `COUNT.UCL': Support for LCL, UCL, and SE summary functions is not yet implemented. + +ctables.sps:34.32-34.33: error: CTABLES: Syntax error at `**': expecting +VARIABLES. ]]) AT_CLEANUP -- 2.30.2