From: Ben Pfaff Date: Sat, 27 Aug 2022 22:34:10 +0000 (-0700) Subject: more refactoring X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=7a7e6f880c486f3ede85d268644761295615d490 more refactoring --- diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index cb38a37dea..ad9e453a9b 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -55,14 +55,6 @@ #define _(msgid) gettext (msgid) #define N_(msgid) (msgid) -enum ctables_vlabel - { - CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT, - CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE, - CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL, - CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH, - }; - enum ctables_weighting { CTW_EFFECTIVE, @@ -70,6 +62,58 @@ enum ctables_weighting CTW_UNWEIGHTED #define N_CTWS 3 }; + +/* CTABLES table areas. */ + +enum ctables_area_type + { + /* Within a section, where stacked variables divide one section from + another. + + Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that + parse_ctables_summary_function() parses correctly. */ + CTAT_TABLE, /* All layers of a whole section. */ + CTAT_LAYERROW, /* Row in one layer within a section. */ + CTAT_LAYERCOL, /* Column in one layer within a section. */ + CTAT_LAYER, /* One layer within a section. */ + + /* Within a subtable, where a subtable pairs an innermost row variable with + an innermost column variable within a single layer. */ + CTAT_SUBTABLE, /* Whole subtable. */ + CTAT_ROW, /* Row within a subtable. */ + CTAT_COL, /* Column within a subtable. */ +#define N_CTATS 7 + }; + +static const char *ctables_area_type_name[N_CTATS] = { + [CTAT_TABLE] = "TABLE", + [CTAT_LAYER] = "LAYER", + [CTAT_LAYERROW] = "LAYERROW", + [CTAT_LAYERCOL] = "LAYERCOL", + [CTAT_SUBTABLE] = "SUBTABLE", + [CTAT_ROW] = "ROW", + [CTAT_COL] = "COL", +}; + +struct ctables_area + { + struct hmap_node node; + + const struct ctables_cell *example; + + size_t sequence; + double count[N_CTWS]; + double valid[N_CTWS]; + double total[N_CTWS]; + struct ctables_sum *sums; + }; + +struct ctables_sum + { + double sum[N_CTWS]; + }; + +/* CTABLES summary functions. */ enum ctables_function_type { @@ -146,140 +190,349 @@ static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS #undef S }; -enum ctables_area_type - { - /* Within a section, where stacked variables divide one section from - another. - - Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that - parse_ctables_summary_function() parses correctly. */ - CTAT_TABLE, /* All layers of a whole section. */ - CTAT_LAYERROW, /* Row in one layer within a section. */ - CTAT_LAYERCOL, /* Column in one layer within a section. */ - CTAT_LAYER, /* One layer within a section. */ - - /* Within a subtable, where a subtable pairs an innermost row variable with - an innermost column variable within a single layer. */ - CTAT_SUBTABLE, /* Whole subtable. */ - CTAT_ROW, /* Row within a subtable. */ - CTAT_COL, /* Column within a subtable. */ -#define N_CTATS 7 +static struct fmt_spec +ctables_summary_default_format (enum ctables_summary_function function, + const struct variable *var) +{ + static const enum ctables_format default_formats[] = { +#define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT, +#include "ctables.inc" +#undef S }; + switch (default_formats[function]) + { + case CTF_COUNT: + return (struct fmt_spec) { .type = FMT_F, .w = 40 }; -static const char *ctables_area_type_name[N_CTATS] = { - [CTAT_TABLE] = "TABLE", - [CTAT_LAYER] = "LAYER", - [CTAT_LAYERROW] = "LAYERROW", - [CTAT_LAYERCOL] = "LAYERCOL", - [CTAT_SUBTABLE] = "SUBTABLE", - [CTAT_ROW] = "ROW", - [CTAT_COL] = "COL", -}; - -struct ctables_area - { - struct hmap_node node; - - const struct ctables_cell *example; + case CTF_PERCENT: + return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 }; - size_t sequence; - double count[N_CTWS]; - double valid[N_CTWS]; - double total[N_CTWS]; - struct ctables_sum *sums; - }; + case CTF_GENERAL: + return *var_get_print_format (var); -struct ctables_sum - { - double sum[N_CTWS]; - }; + default: + NOT_REACHED (); + } +} -enum ctables_summary_variant - { - CSV_CELL, - CSV_TOTAL -#define N_CSVS 2 +static enum ctables_function_availability +ctables_function_availability (enum ctables_summary_function f) +{ + static enum ctables_function_availability availability[] = { +#define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY, +#include "ctables.inc" +#undef S }; -struct ctables_cell - { - /* In struct ctables_section's 'cells' hmap. Indexed by all the values in - all the axes (except the scalar variable, if any). */ - struct hmap_node node; + return availability[f]; +} - /* The areas that contain this cell. */ - uint32_t omit_areas; - struct ctables_area *areas[N_CTATS]; +static bool +parse_ctables_summary_function (struct lexer *lexer, + enum ctables_summary_function *function, + enum ctables_weighting *weighting, + enum ctables_area_type *area) +{ + if (!lex_force_id (lexer)) + return false; - bool hide; + struct substring name = lex_tokss (lexer); + if (ss_ends_with_case (name, ss_cstr (".LCL")) + || ss_ends_with_case (name, ss_cstr (".UCL")) + || ss_ends_with_case (name, ss_cstr (".SE"))) + { + lex_error (lexer, _("Support for LCL, UCL, and SE summary functions " + "is not yet implemented.")); + return false; + } - bool postcompute; - enum ctables_summary_variant sv; + bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u'); + bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e')); - struct ctables_cell_axis + bool has_area = false; + *area = 0; + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at]))) { - struct ctables_cell_value + has_area = true; + *area = at; + + if (ss_equals_case (name, ss_cstr ("PCT"))) { - const struct ctables_category *category; - union value value; + /* Special case where .COUNT suffix is omitted. */ + *function = CTSF_areaPCT_COUNT; + *weighting = CTW_EFFECTIVE; + lex_get (lexer); + return true; } - *cvs; - int leaf; + break; } - axes[PIVOT_N_AXES]; - - union ctables_summary *summaries; - }; - -struct ctables - { - const struct dictionary *dict; - struct pivot_table_look *look; - - /* CTABLES has a number of extra formats that we implement via custom - currency specifications on an alternate fmt_settings. */ -#define CTEF_NEGPAREN FMT_CCA -#define CTEF_NEQUAL FMT_CCB -#define CTEF_PAREN FMT_CCC -#define CTEF_PCTPAREN FMT_CCD - struct fmt_settings ctables_formats; - /* If this is NULL, zeros are displayed using the normal print format. - Otherwise, this string is displayed. */ - char *zero; + for (int f = 0; f < N_CTSF_FUNCTIONS; f++) + { + const struct ctables_function_info *cfi = &ctables_function_info[f]; + if (ss_equals_case (cfi->basename, name)) + { + *function = f; + if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area)) + break; - /* If this is NULL, missing values are displayed using the normal print - format. Otherwise, this string is displayed. */ - char *missing; + *weighting = (e ? CTW_EFFECTIVE + : u ? CTW_UNWEIGHTED + : cfi->e_prefix ? CTW_DICTIONARY + : CTW_EFFECTIVE); + lex_get (lexer); + return true; + } + } - /* Indexed by variable dictionary index. */ - enum ctables_vlabel *vlabels; + lex_error (lexer, _("Expecting summary function name.")); + return false; +} - struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */ +static const char * +ctables_summary_function_name (enum ctables_summary_function function, + enum ctables_weighting weighting, + enum ctables_area_type area, + char *buffer, size_t bufsize) +{ + const struct ctables_function_info *cfi = &ctables_function_info[function]; + snprintf (buffer, bufsize, "%s%s%s", + (weighting == CTW_UNWEIGHTED ? "U" + : weighting == CTW_DICTIONARY ? "" + : cfi->e_prefix ? "E" + : ""), + cfi->is_area ? ctables_area_type_name[area] : "", + cfi->basename.string); + return buffer; +} - bool mrsets_count_duplicates; /* MRSETS. */ - bool smissing_listwise; /* SMISSING. */ - struct variable *e_weight; /* WEIGHT. */ - int hide_threshold; /* HIDESMALLCOUNTS. */ +static const char * +ctables_summary_function_label__ (enum ctables_summary_function function, + enum ctables_weighting weighting, + enum ctables_area_type area) +{ + bool w = weighting != CTW_UNWEIGHTED; + bool d = weighting == CTW_DICTIONARY; + enum ctables_area_type a = area; + switch (function) + { + case CTSF_COUNT: + return (d ? N_("Count") + : w ? N_("Adjusted Count") + : N_("Unweighted Count")); - struct ctables_table **tables; - size_t n_tables; - }; + case CTSF_areaPCT_COUNT: + switch (a) + { + case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %"); + case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %"); + case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %"); + case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %"); + case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %"); + case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %"); + case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %"); + } + NOT_REACHED (); -static struct ctables_postcompute *ctables_find_postcompute (struct ctables *, - const char *name); + case CTSF_areaPCT_VALIDN: + switch (a) + { + case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %"); + case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %"); + case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %"); + case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %"); + case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %"); + case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %"); + case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %"); + } + NOT_REACHED (); -struct ctables_postcompute + case CTSF_areaPCT_TOTALN: + switch (a) + { + case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %"); + case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %"); + case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %"); + case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %"); + case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %"); + case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %"); + case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %"); + } + NOT_REACHED (); + + case CTSF_MAXIMUM: return N_("Maximum"); + case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean"); + case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median"); + case CTSF_MINIMUM: return N_("Minimum"); + case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing"); + case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode"); + case CTSF_PTILE: NOT_REACHED (); + case CTSF_RANGE: return N_("Range"); + case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean"); + case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation"); + case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum"); + case CTSF_TOTALN: return (d ? N_("Total N") + : w ? N_("Adjusted Total N") + : N_("Unweighted Total N")); + case CTSF_VALIDN: return (d ? N_("Valid N") + : w ? N_("Adjusted Valid N") + : N_("Unweighted Valid N")); + case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance"); + case CTSF_areaPCT_SUM: + switch (a) + { + case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %"); + case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %"); + case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %"); + case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %"); + case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %"); + case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %"); + case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %"); + } + NOT_REACHED (); + + case CTSF_areaID: + switch (a) + { + /* Don't bother translating these: they are for developers only. */ + case CTAT_TABLE: return "Table ID"; + case CTAT_LAYER: return "Layer ID"; + case CTAT_LAYERROW: return "Layer Row ID"; + case CTAT_LAYERCOL: return "Layer Column ID"; + case CTAT_SUBTABLE: return "Subtable ID"; + case CTAT_ROW: return "Row ID"; + case CTAT_COL: return "Column ID"; + } + NOT_REACHED (); + } + + NOT_REACHED (); +} + +static struct pivot_value * +ctables_summary_function_label (enum ctables_summary_function function, + enum ctables_weighting weighting, + enum ctables_area_type area, + double percentile) +{ + if (function == CTSF_PTILE) + { + char *s = (weighting != CTW_UNWEIGHTED + ? xasprintf (_("Percentile %.2f"), percentile) + : xasprintf (_("Unweighted Percentile %.2f"), percentile)); + return pivot_value_new_user_text_nocopy (s); + } + else + return pivot_value_new_text (ctables_summary_function_label__ ( + function, weighting, area)); +} + +/* CTABLES summaries. */ + +struct ctables_summary_spec { - struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */ - char *name; /* Name, without leading &. */ + /* The calculation to be performed. - struct msg_location *location; /* Location of definition. */ - struct ctables_pcexpr *expr; + 'function' is the function to calculate. 'weighted' specifies whether + to use weighted or unweighted data (for functions that do not support a + choice, it must be true). 'calc_area' is the area over which the + calculation takes place (for functions that target only an individual + cell, it must be 0). For CTSF_PTILE only, 'percentile' is the + percentile between 0 and 100 (for other functions it must be 0). */ + enum ctables_summary_function function; + enum ctables_weighting weighting; + enum ctables_area_type calc_area; + double percentile; /* CTSF_PTILE only. */ + + /* How to display the result of the calculation. + + 'label' is a user-specified label, NULL if the user didn't specify + one. + + 'user_area' is usually the same as 'calc_area', but when category labels + are rotated from one axis to another it swaps rows and columns. + + 'format' is the format for displaying the output. If + 'is_ctables_format' is true, then 'format.type' is one of the special + CTEF_* formats instead of the standard ones. */ char *label; - struct ctables_summary_spec_set *specs; - bool hide_source_cats; + enum ctables_area_type user_area; + struct fmt_spec format; + bool is_ctables_format; /* Is 'format' one of CTEF_*? */ + + size_t axis_idx; + size_t sum_var_idx; + }; + +static void +ctables_summary_spec_clone (struct ctables_summary_spec *dst, + const struct ctables_summary_spec *src) +{ + *dst = *src; + dst->label = xstrdup_if_nonnull (src->label); +} + +static void +ctables_summary_spec_uninit (struct ctables_summary_spec *s) +{ + if (s) + free (s->label); +} + +/* Collections of summary functions. */ + +struct ctables_summary_spec_set + { + struct ctables_summary_spec *specs; + size_t n; + size_t allocated; + + /* The variable to which the summary specs are applied. */ + struct variable *var; + + /* Whether the variable to which the summary specs are applied is a scale + variable for the purpose of summarization. + + (VALIDN and TOTALN act differently for summarizing scale and categorical + variables.) */ + bool is_scale; + + /* If any of these optional additional scale variables are missing, then + treat 'var' as if it's missing too. This is for implementing + SMISSING=LISTWISE. */ + struct variable **listwise_vars; + size_t n_listwise_vars; + }; + +static void +ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst, + const struct ctables_summary_spec_set *src) +{ + struct ctables_summary_spec *specs + = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL); + for (size_t i = 0; i < src->n; i++) + ctables_summary_spec_clone (&specs[i], &src->specs[i]); + + *dst = (struct ctables_summary_spec_set) { + .specs = specs, + .n = src->n, + .allocated = src->n, + .var = src->var, + .is_scale = src->is_scale, }; +} + +static void +ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set) +{ + for (size_t i = 0; i < set->n; i++) + ctables_summary_spec_uninit (&set->specs[i]); + free (set->listwise_vars); + free (set->specs); +} + +/* CTABLES postcompute expressions. */ struct ctables_pcexpr { @@ -291,7 +544,7 @@ struct ctables_pcexpr * / - + */ - enum ctables_postcompute_op + enum ctables_pcexpr_op { /* Terminals. */ CTPO_CONSTANT, /* 5 */ @@ -340,372 +593,414 @@ struct ctables_pcexpr struct msg_location *location; }; -static void ctables_pcexpr_destroy (struct ctables_pcexpr *); static struct ctables_pcexpr *ctables_pcexpr_allocate_binary ( - enum ctables_postcompute_op, struct ctables_pcexpr *sub0, + enum ctables_pcexpr_op, struct ctables_pcexpr *sub0, struct ctables_pcexpr *sub1); -struct ctables_summary_spec_set - { - struct ctables_summary_spec *specs; - size_t n; - size_t allocated; - - /* The variable to which the summary specs are applied. */ - struct variable *var; +typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *, + struct dictionary *); - /* Whether the variable to which the summary specs are applied is a scale - variable for the purpose of summarization. +static void +ctables_pcexpr_destroy (struct ctables_pcexpr *e) +{ + if (e) + { + switch (e->op) + { + case CTPO_CAT_STRING: + ss_dealloc (&e->string); + break; - (VALIDN and TOTALN act differently for summarizing scale and categorical - variables.) */ - bool is_scale; + case CTPO_CAT_SRANGE: + for (size_t i = 0; i < 2; i++) + ss_dealloc (&e->srange[i]); + break; - /* If any of these optional additional scale variables are missing, then - treat 'var' as if it's missing too. This is for implementing - SMISSING=LISTWISE. */ - struct variable **listwise_vars; - size_t n_listwise_vars; - }; + case CTPO_ADD: + case CTPO_SUB: + case CTPO_MUL: + case CTPO_DIV: + case CTPO_POW: + case CTPO_NEG: + for (size_t i = 0; i < 2; i++) + ctables_pcexpr_destroy (e->subs[i]); + break; -static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *, - const struct ctables_summary_spec_set *); -static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *); + case CTPO_CONSTANT: + case CTPO_CAT_NUMBER: + case CTPO_CAT_NRANGE: + case CTPO_CAT_MISSING: + case CTPO_CAT_OTHERNM: + case CTPO_CAT_SUBTOTAL: + case CTPO_CAT_TOTAL: + break; + } -/* A nested sequence of variables, e.g. a > b > c. */ -struct ctables_nest - { - struct variable **vars; - size_t n; - size_t scale_idx; - size_t summary_idx; - size_t *areas[N_CTATS]; - size_t n_areas[N_CTATS]; - size_t group_head; - - struct ctables_summary_spec_set specs[N_CSVS]; - }; - -/* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */ -struct ctables_stack - { - struct ctables_nest *nests; - size_t n; - }; - -static void ctables_stack_uninit (struct ctables_stack *); - -struct ctables_value - { - struct hmap_node node; - union value value; - int leaf; - }; + msg_location_destroy (e->location); + free (e); + } +} -struct ctables_occurrence - { - struct hmap_node node; - union value value; +static struct ctables_pcexpr * +ctables_pcexpr_allocate_binary (enum ctables_pcexpr_op op, + struct ctables_pcexpr *sub0, + struct ctables_pcexpr *sub1) +{ + struct ctables_pcexpr *e = xmalloc (sizeof *e); + *e = (struct ctables_pcexpr) { + .op = op, + .subs = { sub0, sub1 }, + .location = msg_location_merged (sub0->location, sub1->location), }; + return e; +} -struct ctables_section +/* How to parse an operator. */ +struct operator { - /* Settings. */ - struct ctables_table *table; - struct ctables_nest *nests[PIVOT_N_AXES]; - - /* Data. */ - struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */ - struct hmap cells; /* Contains "struct ctables_cell"s. */ - struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */ + enum token_type token; + enum ctables_pcexpr_op op; }; -static void ctables_section_uninit (struct ctables_section *); - -struct ctables_table - { - struct ctables *ctables; - struct ctables_axis *axes[PIVOT_N_AXES]; - struct ctables_stack stacks[PIVOT_N_AXES]; - struct ctables_section *sections; - size_t n_sections; - enum pivot_axis_type summary_axis; - struct ctables_summary_spec_set summary_specs; - struct variable **sum_vars; - size_t n_sum_vars; - - enum pivot_axis_type slabels_axis; - bool slabels_visible; +static const struct operator * +ctables_pcexpr_match_operator (struct lexer *lexer, + const struct operator ops[], size_t n_ops) +{ + for (const struct operator *op = ops; op < ops + n_ops; op++) + if (lex_token (lexer) == op->token) + { + if (op->token != T_NEG_NUM) + lex_get (lexer); - /* The innermost category labels for axis 'a' appear on axis label_axis[a]. + return op; + } - Most commonly, label_axis[a] == a, and in particular we always have - label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER. + return NULL; +} - If ROWLABELS or COLLABELS is specified, then one of - label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the - opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ. +static struct ctables_pcexpr * +ctables_pcexpr_parse_binary_operators__ ( + struct lexer *lexer, struct dictionary *dict, + const struct operator ops[], size_t n_ops, + parse_recursively_func *parse_next_level, + const char *chain_warning, struct ctables_pcexpr *lhs) +{ + for (int op_count = 0; ; op_count++) + { + const struct operator *op + = ctables_pcexpr_match_operator (lexer, ops, n_ops); + if (!op) + { + if (op_count > 1 && chain_warning) + msg_at (SW, lhs->location, "%s", chain_warning); - If any category labels are moved, then 'clabels_example' is one of the - variables being moved (and it is otherwise NULL). All of the variables - being moved have the same width, value labels, and categories, so this - example variable can be used to find those out. + return lhs; + } - The remaining members in this group are relevant only if category labels - are moved. + struct ctables_pcexpr *rhs = parse_next_level (lexer, dict); + if (!rhs) + { + ctables_pcexpr_destroy (lhs); + return NULL; + } - 'clabels_values_map' holds a "struct ctables_value" for all the values - that appear in all of the variables in the moved categories. It is - accumulated as the data is read. Once the data is fully read, its - sorted values are put into 'clabels_values' and 'n_clabels_values'. - */ - enum pivot_axis_type label_axis[PIVOT_N_AXES]; - enum pivot_axis_type clabels_from_axis; - enum pivot_axis_type clabels_to_axis; - const struct variable *clabels_example; - struct hmap clabels_values_map; - struct ctables_value **clabels_values; - size_t n_clabels_values; + lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs); + } +} - /* Indexed by variable dictionary index. */ - struct ctables_categories **categories; - size_t n_categories; +static struct ctables_pcexpr * +ctables_pcexpr_parse_binary_operators ( + struct lexer *lexer, struct dictionary *dict, + const struct operator ops[], size_t n_ops, + parse_recursively_func *parse_next_level, const char *chain_warning) +{ + struct ctables_pcexpr *lhs = parse_next_level (lexer, dict); + if (!lhs) + return NULL; - double cilevel; + return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops, + parse_next_level, + chain_warning, lhs); +} - char *caption; - char *corner; - char *title; +static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *, + struct dictionary *); - struct ctables_chisq *chisq; - struct ctables_pairwise *pairwise; +static struct ctables_pcexpr +ctpo_cat_nrange (double low, double high) +{ + return (struct ctables_pcexpr) { + .op = CTPO_CAT_NRANGE, + .nrange = { low, high }, }; +} -struct ctables_categories - { - size_t n_refs; - struct ctables_category *cats; - size_t n_cats; - bool show_empty; +static struct ctables_pcexpr +ctpo_cat_srange (struct substring low, struct substring high) +{ + return (struct ctables_pcexpr) { + .op = CTPO_CAT_SRANGE, + .srange = { low, high }, }; +} -struct ctables_category - { - enum ctables_category_type - { - /* Explicit category lists. */ - CCT_NUMBER, - CCT_STRING, - CCT_NRANGE, /* Numerical range. */ - CCT_SRANGE, /* String range. */ - CCT_MISSING, - CCT_OTHERNM, - CCT_POSTCOMPUTE, - - /* Totals and subtotals. */ - CCT_SUBTOTAL, - CCT_TOTAL, - - /* Implicit category lists. */ - CCT_VALUE, - CCT_LABEL, - CCT_FUNCTION, - - /* For contributing to TOTALN. */ - CCT_EXCLUDED_MISSING, - } - type; - - struct ctables_category *subtotal; +static struct substring +parse_substring (struct lexer *lexer, struct dictionary *dict) +{ + struct substring s = recode_substring_pool ( + dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL); + ss_rtrim (&s, ss_cstr (" ")); + lex_get (lexer); + return s; +} - bool hide; +static struct ctables_pcexpr * +ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict) +{ + int start_ofs = lex_ofs (lexer); + struct ctables_pcexpr e; + if (lex_is_number (lexer)) + { + e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT, + .number = lex_number (lexer) }; + lex_get (lexer); + } + else if (lex_match_id (lexer, "MISSING")) + e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING }; + else if (lex_match_id (lexer, "OTHERNM")) + e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM }; + else if (lex_match_id (lexer, "TOTAL")) + e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL }; + else if (lex_match_id (lexer, "SUBTOTAL")) + { + size_t subtotal_index = 0; + if (lex_match (lexer, T_LBRACK)) + { + if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX)) + return NULL; + subtotal_index = lex_integer (lexer); + lex_get (lexer); + if (!lex_force_match (lexer, T_RBRACK)) + return NULL; + } + e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL, + .subtotal_index = subtotal_index }; + } + else if (lex_match (lexer, T_LBRACK)) + { + if (lex_match_id (lexer, "LO")) + { + if (!lex_force_match_id (lexer, "THRU")) + return false; - union - { - double number; /* CCT_NUMBER. */ - struct substring string; /* CCT_STRING, in dictionary encoding. */ - double nrange[2]; /* CCT_NRANGE. */ - struct substring srange[2]; /* CCT_SRANGE. */ + if (lex_is_string (lexer)) + { + struct substring low = { .string = NULL }; + struct substring high = parse_substring (lexer, dict); + e = ctpo_cat_srange (low, high); + } + else + { + if (!lex_force_num (lexer)) + return false; + e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer)); + lex_get (lexer); + } + } + else if (lex_is_number (lexer)) + { + double number = lex_number (lexer); + lex_get (lexer); + if (lex_match_id (lexer, "THRU")) + { + if (lex_match_id (lexer, "HI")) + e = ctpo_cat_nrange (number, DBL_MAX); + else + { + if (!lex_force_num (lexer)) + return false; + e = ctpo_cat_nrange (number, lex_number (lexer)); + lex_get (lexer); + } + } + else + e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER, + .number = number }; + } + else if (lex_is_string (lexer)) + { + struct substring s = parse_substring (lexer, dict); - struct - { - char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */ - bool hide_subcategories; /* CCT_SUBTOTAL. */ - }; + if (lex_match_id (lexer, "THRU")) + { + struct substring high; - /* CCT_POSTCOMPUTE. */ - struct - { - const struct ctables_postcompute *pc; - enum fmt_type parse_format; - }; + if (lex_match_id (lexer, "HI")) + high = (struct substring) { .string = NULL }; + else + { + if (!lex_force_string (lexer)) + { + ss_dealloc (&s); + return false; + } + high = parse_substring (lexer, dict); + } - /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */ - struct - { - bool include_missing; - bool sort_ascending; + e = ctpo_cat_srange (s, high); + } + else + e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s }; + } + else + { + lex_error (lexer, NULL); + return NULL; + } - /* CCT_FUNCTION. */ - enum ctables_summary_function sort_function; - enum ctables_weighting weighting; - enum ctables_area_type area; - struct variable *sort_var; - double percentile; - }; - }; + if (!lex_force_match (lexer, T_RBRACK)) + { + if (e.op == CTPO_CAT_STRING) + ss_dealloc (&e.string); + else if (e.op == CTPO_CAT_SRANGE) + { + ss_dealloc (&e.srange[0]); + ss_dealloc (&e.srange[1]); + } + return NULL; + } + } + else if (lex_match (lexer, T_LPAREN)) + { + struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict); + if (!ep) + return NULL; + if (!lex_force_match (lexer, T_RPAREN)) + { + ctables_pcexpr_destroy (ep); + return NULL; + } + return ep; + } + else + { + lex_error (lexer, NULL); + return NULL; + } - /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL, - CCT_FUNCTION, CCT_EXCLUDED_MISSING. */ - struct msg_location *location; + e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1); + return xmemdup (&e, sizeof e); +} + +static struct ctables_pcexpr * +ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub, + struct lexer *lexer, int start_ofs) +{ + struct ctables_pcexpr *e = xmalloc (sizeof *e); + *e = (struct ctables_pcexpr) { + .op = CTPO_NEG, + .subs = { sub }, + .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1), }; + return e; +} -static void -ctables_category_uninit (struct ctables_category *cat) +static struct ctables_pcexpr * +ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict) { - if (!cat) - return; + static const struct operator op = { T_EXP, CTPO_POW }; - msg_location_destroy (cat->location); - switch (cat->type) - { - case CCT_NUMBER: - case CCT_NRANGE: - case CCT_MISSING: - case CCT_OTHERNM: - case CCT_POSTCOMPUTE: - break; + const char *chain_warning = + _("The exponentiation operator (`**') is left-associative: " + "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. " + "To disable this warning, insert parentheses."); - case CCT_STRING: - ss_dealloc (&cat->string); - break; + if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP) + return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1, + ctables_pcexpr_parse_primary, + chain_warning); - case CCT_SRANGE: - ss_dealloc (&cat->srange[0]); - ss_dealloc (&cat->srange[1]); - break; + /* Special case for situations like "-5**6", which must be parsed as + -(5**6). */ - case CCT_SUBTOTAL: - case CCT_TOTAL: - free (cat->total_label); - break; + int start_ofs = lex_ofs (lexer); + struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs); + *lhs = (struct ctables_pcexpr) { + .op = CTPO_CONSTANT, + .number = -lex_tokval (lexer), + .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)), + }; + lex_get (lexer); - case CCT_VALUE: - case CCT_LABEL: - case CCT_FUNCTION: - break; + struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ ( + lexer, dict, &op, 1, + ctables_pcexpr_parse_primary, chain_warning, lhs); + if (!node) + return NULL; - case CCT_EXCLUDED_MISSING: - break; - } + return ctables_pcexpr_allocate_neg (node, lexer, start_ofs); } -static bool -nullable_substring_equal (const struct substring *a, - const struct substring *b) +/* Parses the unary minus level. */ +static struct ctables_pcexpr * +ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict) { - return !a->string ? !b->string : b->string && ss_equals (*a, *b); + int start_ofs = lex_ofs (lexer); + if (!lex_match (lexer, T_DASH)) + return ctables_pcexpr_parse_exp (lexer, dict); + + struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict); + if (!inner) + return NULL; + + return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs); } -static bool -ctables_category_equal (const struct ctables_category *a, - const struct ctables_category *b) +/* Parses the multiplication and division level. */ +static struct ctables_pcexpr * +ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict) { - if (a->type != b->type) - return false; - - switch (a->type) + static const struct operator ops[] = { - case CCT_NUMBER: - return a->number == b->number; + { T_ASTERISK, CTPO_MUL }, + { T_SLASH, CTPO_DIV }, + }; - case CCT_STRING: - return ss_equals (a->string, b->string); + return ctables_pcexpr_parse_binary_operators (lexer, dict, ops, + sizeof ops / sizeof *ops, + ctables_pcexpr_parse_neg, NULL); +} - case CCT_NRANGE: - return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1]; +/* Parses the addition and subtraction level. */ +static struct ctables_pcexpr * +ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict) +{ + static const struct operator ops[] = + { + { T_PLUS, CTPO_ADD }, + { T_DASH, CTPO_SUB }, + { T_NEG_NUM, CTPO_ADD }, + }; - case CCT_SRANGE: - return (nullable_substring_equal (&a->srange[0], &b->srange[0]) - && nullable_substring_equal (&a->srange[1], &b->srange[1])); + return ctables_pcexpr_parse_binary_operators (lexer, dict, + ops, sizeof ops / sizeof *ops, + ctables_pcexpr_parse_mul, NULL); +} + +/* CTABLES axis expressions. */ - case CCT_MISSING: - case CCT_OTHERNM: - return true; - - case CCT_POSTCOMPUTE: - return a->pc == b->pc; - - case CCT_SUBTOTAL: - case CCT_TOTAL: - return !strcmp (a->total_label, b->total_label); - - case CCT_VALUE: - case CCT_LABEL: - case CCT_FUNCTION: - return (a->include_missing == b->include_missing - && a->sort_ascending == b->sort_ascending - && a->sort_function == b->sort_function - && a->sort_var == b->sort_var - && a->percentile == b->percentile); - - case CCT_EXCLUDED_MISSING: - return true; - } - - NOT_REACHED (); -} - -static void -ctables_categories_unref (struct ctables_categories *c) -{ - if (!c) - return; - - assert (c->n_refs > 0); - if (--c->n_refs) - return; - - for (size_t i = 0; i < c->n_cats; i++) - ctables_category_uninit (&c->cats[i]); - free (c->cats); - free (c); -} - -static bool -ctables_categories_equal (const struct ctables_categories *a, - const struct ctables_categories *b) -{ - if (a->n_cats != b->n_cats || a->show_empty != b->show_empty) - return false; - - for (size_t i = 0; i < a->n_cats; i++) - if (!ctables_category_equal (&a->cats[i], &b->cats[i])) - return false; - - return true; -} - -/* Chi-square test (SIGTEST). */ -struct ctables_chisq - { - double alpha; - bool include_mrsets; - bool all_visible; - }; +/* CTABLES has a number of extra formats that we implement via custom + currency specifications on an alternate fmt_settings. */ +#define CTEF_NEGPAREN FMT_CCA +#define CTEF_NEQUAL FMT_CCB +#define CTEF_PAREN FMT_CCC +#define CTEF_PCTPAREN FMT_CCD -/* Pairwise comparison test (COMPARETEST). */ -struct ctables_pairwise +enum ctables_summary_variant { - enum { PROP, MEAN } type; - double alpha[2]; - bool include_mrsets; - bool meansvariance_allcats; - bool all_visible; - enum { BONFERRONI = 1, BH } adjust; - bool merge; - bool apa_style; - bool show_sig; + CSV_CELL, + CSV_TOTAL +#define N_CSVS 2 }; struct ctables_axis @@ -738,4867 +1033,4587 @@ struct ctables_axis struct msg_location *loc; }; -static void ctables_axis_destroy (struct ctables_axis *); - -struct ctables_summary_spec - { - /* The calculation to be performed. - - 'function' is the function to calculate. 'weighted' specifies whether - to use weighted or unweighted data (for functions that do not support a - choice, it must be true). 'calc_area' is the area over which the - calculation takes place (for functions that target only an individual - cell, it must be 0). For CTSF_PTILE only, 'percentile' is the - percentile between 0 and 100 (for other functions it must be 0). */ - enum ctables_summary_function function; - enum ctables_weighting weighting; - enum ctables_area_type calc_area; - double percentile; /* CTSF_PTILE only. */ - - /* How to display the result of the calculation. - - 'label' is a user-specified label, NULL if the user didn't specify - one. - - 'user_area' is usually the same as 'calc_area', but when category labels - are rotated from one axis to another it swaps rows and columns. - - 'format' is the format for displaying the output. If - 'is_ctables_format' is true, then 'format.type' is one of the special - CTEF_* formats instead of the standard ones. */ - char *label; - enum ctables_area_type user_area; - struct fmt_spec format; - bool is_ctables_format; /* Is 'format' one of CTEF_*? */ - - size_t axis_idx; - size_t sum_var_idx; - }; - -static void -ctables_summary_spec_clone (struct ctables_summary_spec *dst, - const struct ctables_summary_spec *src) -{ - *dst = *src; - dst->label = xstrdup_if_nonnull (src->label); -} - static void -ctables_summary_spec_uninit (struct ctables_summary_spec *s) +ctables_axis_destroy (struct ctables_axis *axis) { - if (s) - free (s->label); -} + if (!axis) + return; -static void -ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst, - const struct ctables_summary_spec_set *src) -{ - struct ctables_summary_spec *specs - = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL); - for (size_t i = 0; i < src->n; i++) - ctables_summary_spec_clone (&specs[i], &src->specs[i]); + switch (axis->op) + { + case CTAO_VAR: + for (size_t i = 0; i < N_CSVS; i++) + ctables_summary_spec_set_uninit (&axis->specs[i]); + break; - *dst = (struct ctables_summary_spec_set) { - .specs = specs, - .n = src->n, - .allocated = src->n, - .var = src->var, - .is_scale = src->is_scale, - }; + case CTAO_STACK: + case CTAO_NEST: + ctables_axis_destroy (axis->subs[0]); + ctables_axis_destroy (axis->subs[1]); + break; + } + msg_location_destroy (axis->loc); + free (axis); } -static void -ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set) +static struct ctables_axis * +ctables_axis_new_nonterminal (enum ctables_axis_op op, + struct ctables_axis *sub0, + struct ctables_axis *sub1, + struct lexer *lexer, int start_ofs) { - for (size_t i = 0; i < set->n; i++) - ctables_summary_spec_uninit (&set->specs[i]); - free (set->listwise_vars); - free (set->specs); + struct ctables_axis *axis = xmalloc (sizeof *axis); + *axis = (struct ctables_axis) { + .op = op, + .subs = { sub0, sub1 }, + .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1), + }; + return axis; } -static bool -parse_col_width (struct lexer *lexer, const char *name, double *width) -{ - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "DEFAULT")) - *width = SYSMIS; - else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX)) - { - *width = lex_number (lexer); - lex_get (lexer); - } - else - return false; - - return true; -} +struct ctables_axis_parse_ctx + { + struct lexer *lexer; + struct dictionary *dict; + }; -static bool -parse_bool (struct lexer *lexer, bool *b) +static struct pivot_value * +ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel) { - if (lex_match_id (lexer, "NO")) - *b = false; - else if (lex_match_id (lexer, "YES")) - *b = true; + if (!spec->label) + return ctables_summary_function_label (spec->function, spec->weighting, + spec->user_area, spec->percentile); else { - lex_error_expecting (lexer, "YES", "NO"); - return false; - } - return true; -} + struct substring in = ss_cstr (spec->label); + struct substring target = ss_cstr (")CILEVEL"); -static enum ctables_function_availability -ctables_function_availability (enum ctables_summary_function f) -{ - static enum ctables_function_availability availability[] = { -#define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY, -#include "ctables.inc" -#undef S - }; + struct string out = DS_EMPTY_INITIALIZER; + for (;;) + { + size_t chunk = ss_find_substring (in, target); + ds_put_substring (&out, ss_head (in, chunk)); + ss_advance (&in, chunk); + if (!in.length) + return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out)); - return availability[f]; + ss_advance (&in, target.length); + ds_put_format (&out, "%g", cilevel); + } + } } static bool -parse_ctables_summary_function (struct lexer *lexer, - enum ctables_summary_function *function, - enum ctables_weighting *weighting, - enum ctables_area_type *area) +add_summary_spec (struct ctables_axis *axis, + enum ctables_summary_function function, + enum ctables_weighting weighting, + enum ctables_area_type area, double percentile, + const char *label, const struct fmt_spec *format, + bool is_ctables_format, const struct msg_location *loc, + enum ctables_summary_variant sv) { - if (!lex_force_id (lexer)) - return false; - - struct substring name = lex_tokss (lexer); - if (ss_ends_with_case (name, ss_cstr (".LCL")) - || ss_ends_with_case (name, ss_cstr (".UCL")) - || ss_ends_with_case (name, ss_cstr (".SE"))) + if (axis->op == CTAO_VAR) { - lex_error (lexer, _("Support for LCL, UCL, and SE summary functions " - "is not yet implemented.")); - return false; - } + char function_name[128]; + ctables_summary_function_name (function, weighting, area, + function_name, sizeof function_name); + const char *var_name = var_get_name (axis->var); + switch (ctables_function_availability (function)) + { +#if 0 + case CTFA_MRSETS: + msg_at (SE, loc, _("Summary function %s applies only to multiple " + "response sets."), function_name); + msg_at (SN, axis->loc, _("'%s' is not a multiple response set."), + var_name); + return false; +#endif - bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u'); - bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e')); + case CTFA_SCALE: + if (!axis->scale && sv != CSV_TOTAL) + { + msg_at (SE, loc, + _("Summary function %s applies only to scale variables."), + function_name); + msg_at (SN, axis->loc, _("'%s' is not a scale variable."), + var_name); + return false; + } + break; - bool has_area = false; - *area = 0; - for (enum ctables_area_type at = 0; at < N_CTATS; at++) - if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at]))) - { - has_area = true; - *area = at; + case CTFA_ALL: + break; + } - if (ss_equals_case (name, ss_cstr ("PCT"))) - { - /* Special case where .COUNT suffix is omitted. */ - *function = CTSF_areaPCT_COUNT; - *weighting = CTW_EFFECTIVE; - lex_get (lexer); - return true; - } - break; - } + struct ctables_summary_spec_set *set = &axis->specs[sv]; + if (set->n >= set->allocated) + set->specs = x2nrealloc (set->specs, &set->allocated, + sizeof *set->specs); - for (int f = 0; f < N_CTSF_FUNCTIONS; f++) + struct ctables_summary_spec *dst = &set->specs[set->n++]; + *dst = (struct ctables_summary_spec) { + .function = function, + .weighting = weighting, + .calc_area = area, + .user_area = area, + .percentile = percentile, + .label = xstrdup_if_nonnull (label), + .format = (format ? *format + : ctables_summary_default_format (function, axis->var)), + .is_ctables_format = is_ctables_format, + }; + return true; + } + else { - const struct ctables_function_info *cfi = &ctables_function_info[f]; - if (ss_equals_case (cfi->basename, name)) - { - *function = f; - if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area)) - break; - - *weighting = (e ? CTW_EFFECTIVE - : u ? CTW_UNWEIGHTED - : cfi->e_prefix ? CTW_DICTIONARY - : CTW_EFFECTIVE); - lex_get (lexer); - return true; - } + for (size_t i = 0; i < 2; i++) + if (!add_summary_spec (axis->subs[i], function, weighting, area, + percentile, label, format, is_ctables_format, + loc, sv)) + return false; + return true; } - - lex_error (lexer, _("Expecting summary function name.")); - return false; } -static void -ctables_axis_destroy (struct ctables_axis *axis) -{ - if (!axis) - return; +static struct ctables_axis *ctables_axis_parse_stack ( + struct ctables_axis_parse_ctx *); - switch (axis->op) +static struct ctables_axis * +ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx) +{ + if (lex_match (ctx->lexer, T_LPAREN)) { - case CTAO_VAR: - for (size_t i = 0; i < N_CSVS; i++) - ctables_summary_spec_set_uninit (&axis->specs[i]); - break; + struct ctables_axis *sub = ctables_axis_parse_stack (ctx); + if (!sub || !lex_force_match (ctx->lexer, T_RPAREN)) + { + ctables_axis_destroy (sub); + return NULL; + } + return sub; + } - case CTAO_STACK: - case CTAO_NEST: - ctables_axis_destroy (axis->subs[0]); - ctables_axis_destroy (axis->subs[1]); - break; + if (!lex_force_id (ctx->lexer)) + return NULL; + + if (lex_tokcstr (ctx->lexer)[0] == '$') + { + lex_error (ctx->lexer, + _("Multiple response set support not implemented.")); + return NULL; } - msg_location_destroy (axis->loc); - free (axis); -} -static struct ctables_axis * -ctables_axis_new_nonterminal (enum ctables_axis_op op, - struct ctables_axis *sub0, - struct ctables_axis *sub1, - struct lexer *lexer, int start_ofs) -{ + int start_ofs = lex_ofs (ctx->lexer); + struct variable *var = parse_variable (ctx->lexer, ctx->dict); + if (!var) + return NULL; + struct ctables_axis *axis = xmalloc (sizeof *axis); - *axis = (struct ctables_axis) { - .op = op, - .subs = { sub0, sub1 }, - .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1), - }; + *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var }; + + axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true + : lex_match_phrase (ctx->lexer, "[C]") ? false + : var_get_measure (var) == MEASURE_SCALE); + axis->loc = lex_ofs_location (ctx->lexer, start_ofs, + lex_ofs (ctx->lexer) - 1); + if (axis->scale && var_is_alpha (var)) + { + msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale " + "variable."), + var_get_name (var)); + ctables_axis_destroy (axis); + return NULL; + } + return axis; } -struct ctables_axis_parse_ctx - { - struct lexer *lexer; - struct dictionary *dict; - struct ctables *ct; - struct ctables_table *t; - }; - -static struct fmt_spec -ctables_summary_default_format (enum ctables_summary_function function, - const struct variable *var) +static bool +has_digit (const char *s) { - static const enum ctables_format default_formats[] = { -#define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT, -#include "ctables.inc" -#undef S - }; - switch (default_formats[function]) - { - case CTF_COUNT: - return (struct fmt_spec) { .type = FMT_F, .w = 40 }; + return s[strcspn (s, "0123456789")] != '\0'; +} - case CTF_PERCENT: - return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 }; +static bool +parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format, + bool *is_ctables_format) +{ + char type[FMT_TYPE_LEN_MAX + 1]; + if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d)) + return false; - case CTF_GENERAL: - return *var_get_print_format (var); + if (!strcasecmp (type, "NEGPAREN")) + format->type = CTEF_NEGPAREN; + else if (!strcasecmp (type, "NEQUAL")) + format->type = CTEF_NEQUAL; + else if (!strcasecmp (type, "PAREN")) + format->type = CTEF_PAREN; + else if (!strcasecmp (type, "PCTPAREN")) + format->type = CTEF_PCTPAREN; + else + { + *is_ctables_format = false; + return (parse_format_specifier (lexer, format) + && fmt_check_output (format) + && fmt_check_type_compat (format, VAL_NUMERIC)); + } - default: - NOT_REACHED (); + lex_get (lexer); + if (format->w < 2) + { + lex_next_error (lexer, -1, -1, + _("Output format %s requires width 2 or greater."), type); + return false; + } + else if (format->d > format->w - 1) + { + lex_next_error (lexer, -1, -1, _("Output format %s requires width " + "greater than decimals."), type); + return false; + } + else + { + *is_ctables_format = true; + return true; } } -static const char * -ctables_summary_label__ (const struct ctables_summary_spec *spec) +static struct ctables_axis * +ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) { - bool w = spec->weighting != CTW_UNWEIGHTED; - bool d = spec->weighting == CTW_DICTIONARY; - enum ctables_area_type a = spec->user_area; - switch (spec->function) + struct ctables_axis *sub = ctables_axis_parse_primary (ctx); + if (!sub || !lex_match (ctx->lexer, T_LBRACK)) + return sub; + + enum ctables_summary_variant sv = CSV_CELL; + for (;;) { - case CTSF_COUNT: - return (d ? N_("Count") - : w ? N_("Adjusted Count") - : N_("Unweighted Count")); + int start_ofs = lex_ofs (ctx->lexer); - case CTSF_areaPCT_COUNT: - switch (a) - { - case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %"); - case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %"); - case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %"); - case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %"); - case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %"); - case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %"); - case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %"); - } - NOT_REACHED (); + /* Parse function. */ + enum ctables_summary_function function; + enum ctables_weighting weighting; + enum ctables_area_type area; + if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting, + &area)) + goto error; - case CTSF_areaPCT_VALIDN: - switch (a) + /* Parse percentile. */ + double percentile = 0; + if (function == CTSF_PTILE) { - case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %"); - case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %"); - case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %"); - case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %"); - case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %"); - case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %"); - case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %"); + if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100)) + goto error; + percentile = lex_number (ctx->lexer); + lex_get (ctx->lexer); } - NOT_REACHED (); - case CTSF_areaPCT_TOTALN: - switch (a) + /* Parse label. */ + char *label = NULL; + if (lex_is_string (ctx->lexer)) { - case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %"); - case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %"); - case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %"); - case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %"); - case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %"); - case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %"); - case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %"); + label = ss_xstrdup (lex_tokss (ctx->lexer)); + lex_get (ctx->lexer); } - NOT_REACHED (); - case CTSF_MAXIMUM: return N_("Maximum"); - case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean"); - case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median"); - case CTSF_MINIMUM: return N_("Minimum"); - case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing"); - case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode"); - case CTSF_PTILE: NOT_REACHED (); - case CTSF_RANGE: return N_("Range"); - case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean"); - case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation"); - case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum"); - case CTSF_TOTALN: return (d ? N_("Total N") - : w ? N_("Adjusted Total N") - : N_("Unweighted Total N")); - case CTSF_VALIDN: return (d ? N_("Valid N") - : w ? N_("Adjusted Valid N") - : N_("Unweighted Valid N")); - case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance"); - case CTSF_areaPCT_SUM: - switch (a) + /* Parse format. */ + struct fmt_spec format; + const struct fmt_spec *formatp; + bool is_ctables_format = false; + if (lex_token (ctx->lexer) == T_ID + && has_digit (lex_tokcstr (ctx->lexer))) { - case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %"); - case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %"); - case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %"); - case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %"); - case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %"); - case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %"); - case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %"); + if (!parse_ctables_format_specifier (ctx->lexer, &format, + &is_ctables_format)) + { + free (label); + goto error; + } + formatp = &format; } - NOT_REACHED (); + else + formatp = NULL; - case CTSF_areaID: - switch (a) + struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs, + lex_ofs (ctx->lexer) - 1); + add_summary_spec (sub, function, weighting, area, percentile, label, + formatp, is_ctables_format, loc, sv); + free (label); + msg_location_destroy (loc); + + lex_match (ctx->lexer, T_COMMA); + if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS")) { - /* Don't bother translating these: they are for developers only. */ - case CTAT_TABLE: return "Table ID"; - case CTAT_LAYER: return "Layer ID"; - case CTAT_LAYERROW: return "Layer Row ID"; - case CTAT_LAYERCOL: return "Layer Column ID"; - case CTAT_SUBTABLE: return "Subtable ID"; - case CTAT_ROW: return "Row ID"; - case CTAT_COL: return "Column ID"; + if (!lex_force_match (ctx->lexer, T_LBRACK)) + goto error; + sv = CSV_TOTAL; + } + else if (lex_match (ctx->lexer, T_RBRACK)) + { + if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK)) + goto error; + return sub; } - NOT_REACHED (); } - NOT_REACHED (); +error: + ctables_axis_destroy (sub); + return NULL; } -static struct pivot_value * -ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel) +static const struct ctables_axis * +find_scale (const struct ctables_axis *axis) { - if (!spec->label) + if (!axis) + return NULL; + else if (axis->op == CTAO_VAR) + return axis->scale ? axis : NULL; + else { - if (spec->function == CTSF_PTILE) + for (size_t i = 0; i < 2; i++) { - double p = spec->percentile; - char *s = (spec->weighting != CTW_UNWEIGHTED - ? xasprintf (_("Percentile %.2f"), p) - : xasprintf (_("Unweighted Percentile %.2f"), p)); - return pivot_value_new_user_text_nocopy (s); + const struct ctables_axis *scale = find_scale (axis->subs[i]); + if (scale) + return scale; } - else - return pivot_value_new_text (ctables_summary_label__ (spec)); + return NULL; } +} + +static const struct ctables_axis * +find_categorical_summary_spec (const struct ctables_axis *axis) +{ + if (!axis) + return NULL; + else if (axis->op == CTAO_VAR) + return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL; else { - struct substring in = ss_cstr (spec->label); - struct substring target = ss_cstr (")CILEVEL"); - - struct string out = DS_EMPTY_INITIALIZER; - for (;;) + for (size_t i = 0; i < 2; i++) { - size_t chunk = ss_find_substring (in, target); - ds_put_substring (&out, ss_head (in, chunk)); - ss_advance (&in, chunk); - if (!in.length) - return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out)); - - ss_advance (&in, target.length); - ds_put_format (&out, "%g", cilevel); + const struct ctables_axis *sum + = find_categorical_summary_spec (axis->subs[i]); + if (sum) + return sum; } + return NULL; } } -static const char * -ctables_summary_function_name (enum ctables_summary_function function, - enum ctables_weighting weighting, - enum ctables_area_type area, - char *buffer, size_t bufsize) +static struct ctables_axis * +ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx) { - const struct ctables_function_info *cfi = &ctables_function_info[function]; - snprintf (buffer, bufsize, "%s%s%s", - (weighting == CTW_UNWEIGHTED ? "U" - : weighting == CTW_DICTIONARY ? "" - : cfi->e_prefix ? "E" - : ""), - cfi->is_area ? ctables_area_type_name[area] : "", - cfi->basename.string); - return buffer; -} + int start_ofs = lex_ofs (ctx->lexer); + struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx); + if (!lhs) + return NULL; -static bool -add_summary_spec (struct ctables_axis *axis, - enum ctables_summary_function function, - enum ctables_weighting weighting, - enum ctables_area_type area, double percentile, - const char *label, const struct fmt_spec *format, - bool is_ctables_format, const struct msg_location *loc, - enum ctables_summary_variant sv) -{ - if (axis->op == CTAO_VAR) + while (lex_match (ctx->lexer, T_GT)) { - char function_name[128]; - ctables_summary_function_name (function, weighting, area, - function_name, sizeof function_name); - const char *var_name = var_get_name (axis->var); - switch (ctables_function_availability (function)) + struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx); + if (!rhs) { -#if 0 - case CTFA_MRSETS: - msg_at (SE, loc, _("Summary function %s applies only to multiple " - "response sets."), function_name); - msg_at (SN, axis->loc, _("'%s' is not a multiple response set."), - var_name); - return false; -#endif + ctables_axis_destroy (lhs); + return NULL; + } - case CTFA_SCALE: - if (!axis->scale && sv != CSV_TOTAL) - { - msg_at (SE, loc, - _("Summary function %s applies only to scale variables."), - function_name); - msg_at (SN, axis->loc, _("'%s' is not a scale variable."), - var_name); - return false; - } - break; + struct ctables_axis *nest = ctables_axis_new_nonterminal ( + CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs); - case CTFA_ALL: - break; + const struct ctables_axis *outer_scale = find_scale (lhs); + const struct ctables_axis *inner_scale = find_scale (rhs); + if (outer_scale && inner_scale) + { + msg_at (SE, nest->loc, _("Cannot nest scale variables.")); + msg_at (SN, outer_scale->loc, _("This is an outer scale variable.")); + msg_at (SN, inner_scale->loc, _("This is an inner scale variable.")); + ctables_axis_destroy (nest); + return NULL; } - struct ctables_summary_spec_set *set = &axis->specs[sv]; - if (set->n >= set->allocated) - set->specs = x2nrealloc (set->specs, &set->allocated, - sizeof *set->specs); + const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs); + if (outer_sum) + { + msg_at (SE, nest->loc, + _("Summaries may only be requested for categorical variables " + "at the innermost nesting level.")); + msg_at (SN, outer_sum->loc, + _("This outer categorical variable has a summary.")); + ctables_axis_destroy (nest); + return NULL; + } - struct ctables_summary_spec *dst = &set->specs[set->n++]; - *dst = (struct ctables_summary_spec) { - .function = function, - .weighting = weighting, - .calc_area = area, - .user_area = area, - .percentile = percentile, - .label = xstrdup_if_nonnull (label), - .format = (format ? *format - : ctables_summary_default_format (function, axis->var)), - .is_ctables_format = is_ctables_format, - }; - return true; - } - else - { - for (size_t i = 0; i < 2; i++) - if (!add_summary_spec (axis->subs[i], function, weighting, area, - percentile, label, format, is_ctables_format, - loc, sv)) - return false; - return true; + lhs = nest; } -} -static struct ctables_axis *ctables_axis_parse_stack ( - struct ctables_axis_parse_ctx *); + return lhs; +} static struct ctables_axis * -ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx) +ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx) { - if (lex_match (ctx->lexer, T_LPAREN)) + int start_ofs = lex_ofs (ctx->lexer); + struct ctables_axis *lhs = ctables_axis_parse_nest (ctx); + if (!lhs) + return NULL; + + while (lex_match (ctx->lexer, T_PLUS)) { - struct ctables_axis *sub = ctables_axis_parse_stack (ctx); - if (!sub || !lex_force_match (ctx->lexer, T_RPAREN)) + struct ctables_axis *rhs = ctables_axis_parse_nest (ctx); + if (!rhs) { - ctables_axis_destroy (sub); + ctables_axis_destroy (lhs); return NULL; } - return sub; + + lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs, + ctx->lexer, start_ofs); } - if (!lex_force_id (ctx->lexer)) - return NULL; + return lhs; +} - if (lex_tokcstr (ctx->lexer)[0] == '$') - { - lex_error (ctx->lexer, - _("Multiple response set support not implemented.")); - return NULL; - } +static bool +ctables_axis_parse (struct lexer *lexer, struct dictionary *dict, + struct ctables_axis **axisp) +{ + *axisp = NULL; + if (lex_token (lexer) == T_BY + || lex_token (lexer) == T_SLASH + || lex_token (lexer) == T_ENDCMD) + return true; - int start_ofs = lex_ofs (ctx->lexer); - struct variable *var = parse_variable (ctx->lexer, ctx->dict); - if (!var) - return NULL; + struct ctables_axis_parse_ctx ctx = { + .lexer = lexer, + .dict = dict, + }; + *axisp = ctables_axis_parse_stack (&ctx); + return *axisp; +} + +/* CTABLES categories. */ - struct ctables_axis *axis = xmalloc (sizeof *axis); - *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var }; +struct ctables_categories + { + size_t n_refs; + struct ctables_category *cats; + size_t n_cats; + bool show_empty; + }; - axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true - : lex_match_phrase (ctx->lexer, "[C]") ? false - : var_get_measure (var) == MEASURE_SCALE); - axis->loc = lex_ofs_location (ctx->lexer, start_ofs, - lex_ofs (ctx->lexer) - 1); - if (axis->scale && var_is_alpha (var)) +struct ctables_category + { + enum ctables_category_type + { + /* Explicit category lists. */ + CCT_NUMBER, + CCT_STRING, + CCT_NRANGE, /* Numerical range. */ + CCT_SRANGE, /* String range. */ + CCT_MISSING, + CCT_OTHERNM, + CCT_POSTCOMPUTE, + + /* Totals and subtotals. */ + CCT_SUBTOTAL, + CCT_TOTAL, + + /* Implicit category lists. */ + CCT_VALUE, + CCT_LABEL, + CCT_FUNCTION, + + /* For contributing to TOTALN. */ + CCT_EXCLUDED_MISSING, + } + type; + + struct ctables_category *subtotal; + + bool hide; + + union + { + double number; /* CCT_NUMBER. */ + struct substring string; /* CCT_STRING, in dictionary encoding. */ + double nrange[2]; /* CCT_NRANGE. */ + struct substring srange[2]; /* CCT_SRANGE. */ + + struct + { + char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */ + bool hide_subcategories; /* CCT_SUBTOTAL. */ + }; + + /* CCT_POSTCOMPUTE. */ + struct + { + const struct ctables_postcompute *pc; + enum fmt_type parse_format; + }; + + /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */ + struct + { + bool include_missing; + bool sort_ascending; + + /* CCT_FUNCTION. */ + enum ctables_summary_function sort_function; + enum ctables_weighting weighting; + enum ctables_area_type area; + struct variable *sort_var; + double percentile; + }; + }; + + /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL, + CCT_FUNCTION, CCT_EXCLUDED_MISSING. */ + struct msg_location *location; + }; + +static void +ctables_category_uninit (struct ctables_category *cat) +{ + if (!cat) + return; + + msg_location_destroy (cat->location); + switch (cat->type) { - msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale " - "variable."), - var_get_name (var)); - ctables_axis_destroy (axis); - return NULL; - } + case CCT_NUMBER: + case CCT_NRANGE: + case CCT_MISSING: + case CCT_OTHERNM: + case CCT_POSTCOMPUTE: + break; - return axis; + case CCT_STRING: + ss_dealloc (&cat->string); + break; + + case CCT_SRANGE: + ss_dealloc (&cat->srange[0]); + ss_dealloc (&cat->srange[1]); + break; + + case CCT_SUBTOTAL: + case CCT_TOTAL: + free (cat->total_label); + break; + + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + break; + + case CCT_EXCLUDED_MISSING: + break; + } } static bool -has_digit (const char *s) +nullable_substring_equal (const struct substring *a, + const struct substring *b) { - return s[strcspn (s, "0123456789")] != '\0'; + return !a->string ? !b->string : b->string && ss_equals (*a, *b); } static bool -parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format, - bool *is_ctables_format) +ctables_category_equal (const struct ctables_category *a, + const struct ctables_category *b) { - char type[FMT_TYPE_LEN_MAX + 1]; - if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d)) + if (a->type != b->type) return false; - if (!strcasecmp (type, "NEGPAREN")) - format->type = CTEF_NEGPAREN; - else if (!strcasecmp (type, "NEQUAL")) - format->type = CTEF_NEQUAL; - else if (!strcasecmp (type, "PAREN")) - format->type = CTEF_PAREN; - else if (!strcasecmp (type, "PCTPAREN")) - format->type = CTEF_PCTPAREN; - else + switch (a->type) { - *is_ctables_format = false; - return (parse_format_specifier (lexer, format) - && fmt_check_output (format) - && fmt_check_type_compat (format, VAL_NUMERIC)); - } + case CCT_NUMBER: + return a->number == b->number; - lex_get (lexer); - if (format->w < 2) - { - lex_next_error (lexer, -1, -1, - _("Output format %s requires width 2 or greater."), type); - return false; - } - else if (format->d > format->w - 1) - { - lex_next_error (lexer, -1, -1, _("Output format %s requires width " - "greater than decimals."), type); - return false; - } - else - { - *is_ctables_format = true; - return true; - } -} + case CCT_STRING: + return ss_equals (a->string, b->string); -static struct ctables_axis * -ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) -{ - struct ctables_axis *sub = ctables_axis_parse_primary (ctx); - if (!sub || !lex_match (ctx->lexer, T_LBRACK)) - return sub; + case CCT_NRANGE: + return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1]; - enum ctables_summary_variant sv = CSV_CELL; - for (;;) - { - int start_ofs = lex_ofs (ctx->lexer); - - /* Parse function. */ - enum ctables_summary_function function; - enum ctables_weighting weighting; - enum ctables_area_type area; - if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting, - &area)) - goto error; - - /* Parse percentile. */ - double percentile = 0; - if (function == CTSF_PTILE) - { - if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100)) - goto error; - percentile = lex_number (ctx->lexer); - lex_get (ctx->lexer); - } - - /* Parse label. */ - char *label = NULL; - if (lex_is_string (ctx->lexer)) - { - label = ss_xstrdup (lex_tokss (ctx->lexer)); - lex_get (ctx->lexer); - } + case CCT_SRANGE: + return (nullable_substring_equal (&a->srange[0], &b->srange[0]) + && nullable_substring_equal (&a->srange[1], &b->srange[1])); - /* Parse format. */ - struct fmt_spec format; - const struct fmt_spec *formatp; - bool is_ctables_format = false; - if (lex_token (ctx->lexer) == T_ID - && has_digit (lex_tokcstr (ctx->lexer))) - { - if (!parse_ctables_format_specifier (ctx->lexer, &format, - &is_ctables_format)) - { - free (label); - goto error; - } - formatp = &format; - } - else - formatp = NULL; + case CCT_MISSING: + case CCT_OTHERNM: + return true; - struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs, - lex_ofs (ctx->lexer) - 1); - add_summary_spec (sub, function, weighting, area, percentile, label, - formatp, is_ctables_format, loc, sv); - free (label); - msg_location_destroy (loc); + case CCT_POSTCOMPUTE: + return a->pc == b->pc; - lex_match (ctx->lexer, T_COMMA); - if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS")) - { - if (!lex_force_match (ctx->lexer, T_LBRACK)) - goto error; - sv = CSV_TOTAL; - } - else if (lex_match (ctx->lexer, T_RBRACK)) - { - if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK)) - goto error; - return sub; - } - } + case CCT_SUBTOTAL: + case CCT_TOTAL: + return !strcmp (a->total_label, b->total_label); -error: - ctables_axis_destroy (sub); - return NULL; -} + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + return (a->include_missing == b->include_missing + && a->sort_ascending == b->sort_ascending + && a->sort_function == b->sort_function + && a->sort_var == b->sort_var + && a->percentile == b->percentile); -static const struct ctables_axis * -find_scale (const struct ctables_axis *axis) -{ - if (!axis) - return NULL; - else if (axis->op == CTAO_VAR) - return axis->scale ? axis : NULL; - else - { - for (size_t i = 0; i < 2; i++) - { - const struct ctables_axis *scale = find_scale (axis->subs[i]); - if (scale) - return scale; - } - return NULL; + case CCT_EXCLUDED_MISSING: + return true; } -} -static const struct ctables_axis * -find_categorical_summary_spec (const struct ctables_axis *axis) -{ - if (!axis) - return NULL; - else if (axis->op == CTAO_VAR) - return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL; - else - { - for (size_t i = 0; i < 2; i++) - { - const struct ctables_axis *sum - = find_categorical_summary_spec (axis->subs[i]); - if (sum) - return sum; - } - return NULL; - } + NOT_REACHED (); } -static struct ctables_axis * -ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx) +static void +ctables_categories_unref (struct ctables_categories *c) { - int start_ofs = lex_ofs (ctx->lexer); - struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx); - if (!lhs) - return NULL; - - while (lex_match (ctx->lexer, T_GT)) - { - struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx); - if (!rhs) - { - ctables_axis_destroy (lhs); - return NULL; - } - - struct ctables_axis *nest = ctables_axis_new_nonterminal ( - CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs); - - const struct ctables_axis *outer_scale = find_scale (lhs); - const struct ctables_axis *inner_scale = find_scale (rhs); - if (outer_scale && inner_scale) - { - msg_at (SE, nest->loc, _("Cannot nest scale variables.")); - msg_at (SN, outer_scale->loc, _("This is an outer scale variable.")); - msg_at (SN, inner_scale->loc, _("This is an inner scale variable.")); - ctables_axis_destroy (nest); - return NULL; - } - - const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs); - if (outer_sum) - { - msg_at (SE, nest->loc, - _("Summaries may only be requested for categorical variables " - "at the innermost nesting level.")); - msg_at (SN, outer_sum->loc, - _("This outer categorical variable has a summary.")); - ctables_axis_destroy (nest); - return NULL; - } + if (!c) + return; - lhs = nest; - } + assert (c->n_refs > 0); + if (--c->n_refs) + return; - return lhs; + for (size_t i = 0; i < c->n_cats; i++) + ctables_category_uninit (&c->cats[i]); + free (c->cats); + free (c); } -static struct ctables_axis * -ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx) +static bool +ctables_categories_equal (const struct ctables_categories *a, + const struct ctables_categories *b) { - int start_ofs = lex_ofs (ctx->lexer); - struct ctables_axis *lhs = ctables_axis_parse_nest (ctx); - if (!lhs) - return NULL; - - while (lex_match (ctx->lexer, T_PLUS)) - { - struct ctables_axis *rhs = ctables_axis_parse_nest (ctx); - if (!rhs) - { - ctables_axis_destroy (lhs); - return NULL; - } + if (a->n_cats != b->n_cats || a->show_empty != b->show_empty) + return false; - lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs, - ctx->lexer, start_ofs); - } + for (size_t i = 0; i < a->n_cats; i++) + if (!ctables_category_equal (&a->cats[i], &b->cats[i])) + return false; - return lhs; + return true; } + +/* CTABLES variable nesting and stacking. */ -static bool -ctables_axis_parse (struct lexer *lexer, struct dictionary *dict, - struct ctables *ct, struct ctables_table *t, - enum pivot_axis_type a) -{ - if (lex_token (lexer) == T_BY - || lex_token (lexer) == T_SLASH - || lex_token (lexer) == T_ENDCMD) - return true; +/* A nested sequence of variables, e.g. a > b > c. */ +struct ctables_nest + { + struct variable **vars; + size_t n; + size_t scale_idx; + size_t summary_idx; + size_t *areas[N_CTATS]; + size_t n_areas[N_CTATS]; + size_t group_head; - struct ctables_axis_parse_ctx ctx = { - .lexer = lexer, - .dict = dict, - .ct = ct, - .t = t + struct ctables_summary_spec_set specs[N_CSVS]; }; - t->axes[a] = ctables_axis_parse_stack (&ctx); - return t->axes[a] != NULL; -} -static void -ctables_chisq_destroy (struct ctables_chisq *chisq) -{ - free (chisq); -} +/* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */ +struct ctables_stack + { + struct ctables_nest *nests; + size_t n; + }; static void -ctables_pairwise_destroy (struct ctables_pairwise *pairwise) +ctables_nest_uninit (struct ctables_nest *nest) { - free (pairwise); + free (nest->vars); + for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) + ctables_summary_spec_set_uninit (&nest->specs[sv]); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + free (nest->areas[at]); } static void -ctables_table_destroy (struct ctables_table *t) +ctables_stack_uninit (struct ctables_stack *stack) { - if (!t) - return; + if (stack) + { + for (size_t i = 0; i < stack->n; i++) + ctables_nest_uninit (&stack->nests[i]); + free (stack->nests); + } +} - for (size_t i = 0; i < t->n_sections; i++) - ctables_section_uninit (&t->sections[i]); - free (t->sections); +static struct ctables_stack +nest_fts (struct ctables_stack s0, struct ctables_stack s1) +{ + if (!s0.n) + return s1; + else if (!s1.n) + return s0; - for (size_t i = 0; i < t->n_categories; i++) - ctables_categories_unref (t->categories[i]); - free (t->categories); + struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) }; + for (size_t i = 0; i < s0.n; i++) + for (size_t j = 0; j < s1.n; j++) + { + const struct ctables_nest *a = &s0.nests[i]; + const struct ctables_nest *b = &s1.nests[j]; - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - { - ctables_axis_destroy (t->axes[a]); - ctables_stack_uninit (&t->stacks[a]); - } - free (t->summary_specs.specs); + size_t allocate = a->n + b->n; + struct variable **vars = xnmalloc (allocate, sizeof *vars); + size_t n = 0; + for (size_t k = 0; k < a->n; k++) + vars[n++] = a->vars[k]; + for (size_t k = 0; k < b->n; k++) + vars[n++] = b->vars[k]; + assert (n == allocate); - struct ctables_value *ctv, *next_ctv; - HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node, - &t->clabels_values_map) - { - value_destroy (&ctv->value, var_get_width (t->clabels_example)); - hmap_delete (&t->clabels_values_map, &ctv->node); - free (ctv); - } - hmap_destroy (&t->clabels_values_map); - free (t->clabels_values); + const struct ctables_nest *summary_src; + if (!a->specs[CSV_CELL].var) + summary_src = b; + else if (!b->specs[CSV_CELL].var) + summary_src = a; + else + NOT_REACHED (); - free (t->sum_vars); - free (t->caption); - free (t->corner); - free (t->title); - ctables_chisq_destroy (t->chisq); - ctables_pairwise_destroy (t->pairwise); - free (t); + struct ctables_nest *new = &stack.nests[stack.n++]; + *new = (struct ctables_nest) { + .vars = vars, + .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx + : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx + : SIZE_MAX), + .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx + : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx + : SIZE_MAX), + .n = n, + }; + for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) + ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]); + } + ctables_stack_uninit (&s0); + ctables_stack_uninit (&s1); + return stack; } -static void -ctables_destroy (struct ctables *ct) +static struct ctables_stack +stack_fts (struct ctables_stack s0, struct ctables_stack s1) { - if (!ct) - return; - - struct ctables_postcompute *pc, *next_pc; - HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node, - &ct->postcomputes) + struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) }; + for (size_t i = 0; i < s0.n; i++) + stack.nests[stack.n++] = s0.nests[i]; + for (size_t i = 0; i < s1.n; i++) { - free (pc->name); - msg_location_destroy (pc->location); - ctables_pcexpr_destroy (pc->expr); - free (pc->label); - if (pc->specs) - { - ctables_summary_spec_set_uninit (pc->specs); - free (pc->specs); - } - hmap_delete (&ct->postcomputes, &pc->hmap_node); - free (pc); + stack.nests[stack.n] = s1.nests[i]; + stack.nests[stack.n].group_head += s0.n; + stack.n++; } - hmap_destroy (&ct->postcomputes); - - fmt_settings_uninit (&ct->ctables_formats); - pivot_table_look_unref (ct->look); - free (ct->zero); - free (ct->missing); - free (ct->vlabels); - for (size_t i = 0; i < ct->n_tables; i++) - ctables_table_destroy (ct->tables[i]); - free (ct->tables); - free (ct); + assert (stack.n == s0.n + s1.n); + free (s0.nests); + free (s1.nests); + return stack; } -static struct ctables_category -cct_nrange (double low, double high) +static struct ctables_stack +var_fts (const struct ctables_axis *a) { - return (struct ctables_category) { - .type = CCT_NRANGE, - .nrange = { low, high } - }; -} + struct variable **vars = xmalloc (sizeof *vars); + *vars = a->var; -static struct ctables_category -cct_srange (struct substring low, struct substring high) -{ - return (struct ctables_category) { - .type = CCT_SRANGE, - .srange = { low, high } + bool is_summary = a->specs[CSV_CELL].n || a->scale; + struct ctables_nest *nest = xmalloc (sizeof *nest); + *nest = (struct ctables_nest) { + .vars = vars, + .n = 1, + .scale_idx = a->scale ? 0 : SIZE_MAX, + .summary_idx = is_summary ? 0 : SIZE_MAX, }; + if (is_summary) + for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) + { + ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]); + nest->specs[sv].var = a->var; + nest->specs[sv].is_scale = a->scale; + } + return (struct ctables_stack) { .nests = nest, .n = 1 }; } -static bool -ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories, - struct ctables_category *cat) +static struct ctables_stack +enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a) { - char *total_label; - if (lex_match (lexer, T_EQUALS)) + if (!a) + return (struct ctables_stack) { .n = 0 }; + + switch (a->op) { - if (!lex_force_string (lexer)) - return false; + case CTAO_VAR: + return var_fts (a); - total_label = ss_xstrdup (lex_tokss (lexer)); - lex_get (lexer); + case CTAO_STACK: + return stack_fts (enumerate_fts (axis_type, a->subs[0]), + enumerate_fts (axis_type, a->subs[1])); + + case CTAO_NEST: + /* This should consider any of the scale variables found in the result to + be linked to each other listwise for SMISSING=LISTWISE. */ + return nest_fts (enumerate_fts (axis_type, a->subs[0]), + enumerate_fts (axis_type, a->subs[1])); } - else - total_label = xstrdup (_("Subtotal")); - *cat = (struct ctables_category) { - .type = CCT_SUBTOTAL, - .hide_subcategories = hide_subcategories, - .total_label = total_label - }; - return true; + NOT_REACHED (); } + +enum ctables_vlabel + { + CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT, + CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE, + CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL, + CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH, + }; -static struct substring -parse_substring (struct lexer *lexer, struct dictionary *dict) -{ - struct substring s = recode_substring_pool ( - dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL); - ss_rtrim (&s, ss_cstr (" ")); - lex_get (lexer); - return s; -} +struct ctables_cell + { + /* In struct ctables_section's 'cells' hmap. Indexed by all the values in + all the axes (except the scalar variable, if any). */ + struct hmap_node node; -static bool -ctables_table_parse_explicit_category (struct lexer *lexer, - struct dictionary *dict, - struct ctables *ct, - struct ctables_category *cat) -{ - if (lex_match_id (lexer, "OTHERNM")) - *cat = (struct ctables_category) { .type = CCT_OTHERNM }; - else if (lex_match_id (lexer, "MISSING")) - *cat = (struct ctables_category) { .type = CCT_MISSING }; - else if (lex_match_id (lexer, "SUBTOTAL")) - return ctables_table_parse_subtotal (lexer, false, cat); - else if (lex_match_id (lexer, "HSUBTOTAL")) - return ctables_table_parse_subtotal (lexer, true, cat); - else if (lex_match_id (lexer, "LO")) - { - if (!lex_force_match_id (lexer, "THRU")) - return false; - if (lex_is_string (lexer)) - { - struct substring sr0 = { .string = NULL }; - struct substring sr1 = parse_substring (lexer, dict); - *cat = cct_srange (sr0, sr1); - } - else if (lex_force_num (lexer)) - { - *cat = cct_nrange (-DBL_MAX, lex_number (lexer)); - lex_get (lexer); - } - else - return false; - } - else if (lex_is_number (lexer)) - { - double number = lex_number (lexer); - lex_get (lexer); - if (lex_match_id (lexer, "THRU")) - { - if (lex_match_id (lexer, "HI")) - *cat = cct_nrange (number, DBL_MAX); - else - { - if (!lex_force_num (lexer)) - return false; - *cat = cct_nrange (number, lex_number (lexer)); - lex_get (lexer); - } - } - else - *cat = (struct ctables_category) { - .type = CCT_NUMBER, - .number = number - }; - } - else if (lex_is_string (lexer)) - { - struct substring s = parse_substring (lexer, dict); - if (lex_match_id (lexer, "THRU")) - { - if (lex_match_id (lexer, "HI")) - { - struct substring sr1 = { .string = NULL }; - *cat = cct_srange (s, sr1); - } - else - { - if (!lex_force_string (lexer)) - { - ss_dealloc (&s); - return false; - } - struct substring sr1 = parse_substring (lexer, dict); - *cat = cct_srange (s, sr1); - } - } - else - *cat = (struct ctables_category) { .type = CCT_STRING, .string = s }; - } - else if (lex_match (lexer, T_AND)) - { - if (!lex_force_id (lexer)) - return false; - struct ctables_postcompute *pc = ctables_find_postcompute ( - ct, lex_tokcstr (lexer)); - if (!pc) - { - struct msg_location *loc = lex_get_location (lexer, -1, 0); - msg_at (SE, loc, _("Unknown postcompute &%s."), - lex_tokcstr (lexer)); - msg_location_destroy (loc); - return false; - } - lex_get (lexer); + /* The areas that contain this cell. */ + uint32_t omit_areas; + struct ctables_area *areas[N_CTATS]; - *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc }; - } - else - { - lex_error (lexer, NULL); - return false; - } + bool hide; - return true; -} + bool postcompute; + enum ctables_summary_variant sv; -static bool -parse_category_string (struct msg_location *location, - struct substring s, const struct dictionary *dict, - enum fmt_type format, double *n) -{ - union value v; - char *error = data_in (s, dict_get_encoding (dict), format, - settings_get_fmt_settings (), &v, 0, NULL); - if (error) - { - msg_at (SE, location, - _("Failed to parse category specification as format %s: %s."), - fmt_name (format), error); - free (error); - return false; - } + struct ctables_cell_axis + { + struct ctables_cell_value + { + const struct ctables_category *category; + union value value; + } + *cvs; + int leaf; + } + axes[PIVOT_N_AXES]; - *n = v.f; - return true; -} + union ctables_summary *summaries; + }; -static struct ctables_category * -ctables_find_category_for_postcompute__ (const struct ctables_categories *cats, - const struct ctables_pcexpr *e) -{ - struct ctables_category *best = NULL; - size_t n_subtotals = 0; - for (size_t i = 0; i < cats->n_cats; i++) - { - struct ctables_category *cat = &cats->cats[i]; - switch (e->op) - { - case CTPO_CAT_NUMBER: - if (cat->type == CCT_NUMBER && cat->number == e->number) - best = cat; - break; +struct ctables_postcompute + { + struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */ + char *name; /* Name, without leading &. */ - case CTPO_CAT_STRING: - if (cat->type == CCT_STRING && ss_equals (cat->string, e->string)) - best = cat; - break; + struct msg_location *location; /* Location of definition. */ + struct ctables_pcexpr *expr; + char *label; + struct ctables_summary_spec_set *specs; + bool hide_source_cats; + }; - case CTPO_CAT_NRANGE: - if (cat->type == CCT_NRANGE - && cat->nrange[0] == e->nrange[0] - && cat->nrange[1] == e->nrange[1]) - best = cat; - break; +struct ctables + { + const struct dictionary *dict; + struct pivot_table_look *look; - case CTPO_CAT_SRANGE: - if (cat->type == CCT_SRANGE - && nullable_substring_equal (&cat->srange[0], &e->srange[0]) - && nullable_substring_equal (&cat->srange[1], &e->srange[1])) - best = cat; - break; + /* For CTEF_* formats. */ + struct fmt_settings ctables_formats; - case CTPO_CAT_MISSING: - if (cat->type == CCT_MISSING) - best = cat; - break; + /* If this is NULL, zeros are displayed using the normal print format. + Otherwise, this string is displayed. */ + char *zero; - case CTPO_CAT_OTHERNM: - if (cat->type == CCT_OTHERNM) - best = cat; - break; + /* If this is NULL, missing values are displayed using the normal print + format. Otherwise, this string is displayed. */ + char *missing; - case CTPO_CAT_SUBTOTAL: - if (cat->type == CCT_SUBTOTAL) - { - n_subtotals++; - if (e->subtotal_index == n_subtotals) - return cat; - else if (e->subtotal_index == 0) - best = cat; - } - break; + /* Indexed by variable dictionary index. */ + enum ctables_vlabel *vlabels; - case CTPO_CAT_TOTAL: - if (cat->type == CCT_TOTAL) - return cat; - break; + struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */ - case CTPO_CONSTANT: - case CTPO_ADD: - case CTPO_SUB: - case CTPO_MUL: - case CTPO_DIV: - case CTPO_POW: - case CTPO_NEG: - NOT_REACHED (); - } - } - if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1) - return NULL; - return best; -} + bool mrsets_count_duplicates; /* MRSETS. */ + bool smissing_listwise; /* SMISSING. */ + struct variable *e_weight; /* WEIGHT. */ + int hide_threshold; /* HIDESMALLCOUNTS. */ -static struct ctables_category * -ctables_find_category_for_postcompute (const struct dictionary *dict, - const struct ctables_categories *cats, - enum fmt_type parse_format, - const struct ctables_pcexpr *e) -{ - if (parse_format != FMT_F) - { - if (e->op == CTPO_CAT_STRING) - { - double number; - if (!parse_category_string (e->location, e->string, dict, - parse_format, &number)) - return NULL; + struct ctables_table **tables; + size_t n_tables; + }; - struct ctables_pcexpr e2 = { - .op = CTPO_CAT_NUMBER, - .number = number, - .location = e->location, - }; - return ctables_find_category_for_postcompute__ (cats, &e2); - } - else if (e->op == CTPO_CAT_SRANGE) - { - double nrange[2]; - if (!e->srange[0].string) - nrange[0] = -DBL_MAX; - else if (!parse_category_string (e->location, e->srange[0], dict, - parse_format, &nrange[0])) - return NULL; +static struct ctables_postcompute *ctables_find_postcompute (struct ctables *, + const char *name); - if (!e->srange[1].string) - nrange[1] = DBL_MAX; - else if (!parse_category_string (e->location, e->srange[1], dict, - parse_format, &nrange[1])) - return NULL; +struct ctables_value + { + struct hmap_node node; + union value value; + int leaf; + }; - struct ctables_pcexpr e2 = { - .op = CTPO_CAT_NRANGE, - .nrange = { nrange[0], nrange[1] }, - .location = e->location, - }; - return ctables_find_category_for_postcompute__ (cats, &e2); - } - } - return ctables_find_category_for_postcompute__ (cats, e); -} +struct ctables_occurrence + { + struct hmap_node node; + union value value; + }; -static bool -ctables_recursive_check_postcompute (struct dictionary *dict, - const struct ctables_pcexpr *e, - struct ctables_category *pc_cat, - const struct ctables_categories *cats, - const struct msg_location *cats_location) -{ - switch (e->op) - { - case CTPO_CAT_NUMBER: - case CTPO_CAT_STRING: - case CTPO_CAT_NRANGE: - case CTPO_CAT_SRANGE: - case CTPO_CAT_MISSING: - case CTPO_CAT_OTHERNM: - case CTPO_CAT_SUBTOTAL: - case CTPO_CAT_TOTAL: - { - struct ctables_category *cat = ctables_find_category_for_postcompute ( - dict, cats, pc_cat->parse_format, e); - if (!cat) - { - if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0) - { - size_t n_subtotals = 0; - for (size_t i = 0; i < cats->n_cats; i++) - n_subtotals += cats->cats[i].type == CCT_SUBTOTAL; - if (n_subtotals > 1) - { - msg_at (SE, cats_location, - ngettext ("These categories include %zu instance " - "of SUBTOTAL or HSUBTOTAL, so references " - "from computed categories must refer to " - "subtotals by position, " - "e.g. SUBTOTAL[1].", - "These categories include %zu instances " - "of SUBTOTAL or HSUBTOTAL, so references " - "from computed categories must refer to " - "subtotals by position, " - "e.g. SUBTOTAL[1].", - n_subtotals), - n_subtotals); - msg_at (SN, e->location, - _("This is the reference that lacks a position.")); - return NULL; - } - } +struct ctables_section + { + /* Settings. */ + struct ctables_table *table; + struct ctables_nest *nests[PIVOT_N_AXES]; - msg_at (SE, pc_cat->location, - _("Computed category &%s references a category not included " - "in the category list."), - pc_cat->pc->name); - msg_at (SN, e->location, _("This is the missing category.")); - if (e->op == CTPO_CAT_SUBTOTAL) - msg_at (SN, cats_location, - _("To fix the problem, add subtotals to the " - "list of categories here.")); - else if (e->op == CTPO_CAT_TOTAL) - msg (SN, _("To fix the problem, add TOTAL=YES to the variable's " - "CATEGORIES specification.")); - else - msg_at (SN, cats_location, - _("To fix the problem, add the missing category to the " - "list of categories here.")); - return false; - } - if (pc_cat->pc->hide_source_cats) - cat->hide = true; - return true; - } + /* Data. */ + struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */ + struct hmap cells; /* Contains "struct ctables_cell"s. */ + struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */ + }; - case CTPO_CONSTANT: - return true; +static void ctables_section_uninit (struct ctables_section *); - case CTPO_ADD: - case CTPO_SUB: - case CTPO_MUL: - case CTPO_DIV: - case CTPO_POW: - case CTPO_NEG: - for (size_t i = 0; i < 2; i++) - if (e->subs[i] && !ctables_recursive_check_postcompute ( - dict, e->subs[i], pc_cat, cats, cats_location)) - return false; - return true; +struct ctables_table + { + struct ctables *ctables; + struct ctables_axis *axes[PIVOT_N_AXES]; + struct ctables_stack stacks[PIVOT_N_AXES]; + struct ctables_section *sections; + size_t n_sections; + enum pivot_axis_type summary_axis; + struct ctables_summary_spec_set summary_specs; + struct variable **sum_vars; + size_t n_sum_vars; + + enum pivot_axis_type slabels_axis; + bool slabels_visible; + + /* The innermost category labels for axis 'a' appear on axis label_axis[a]. + + Most commonly, label_axis[a] == a, and in particular we always have + label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER. + + If ROWLABELS or COLLABELS is specified, then one of + label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the + opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ. + + If any category labels are moved, then 'clabels_example' is one of the + variables being moved (and it is otherwise NULL). All of the variables + being moved have the same width, value labels, and categories, so this + example variable can be used to find those out. + + The remaining members in this group are relevant only if category labels + are moved. + + 'clabels_values_map' holds a "struct ctables_value" for all the values + that appear in all of the variables in the moved categories. It is + accumulated as the data is read. Once the data is fully read, its + sorted values are put into 'clabels_values' and 'n_clabels_values'. + */ + enum pivot_axis_type label_axis[PIVOT_N_AXES]; + enum pivot_axis_type clabels_from_axis; + enum pivot_axis_type clabels_to_axis; + const struct variable *clabels_example; + struct hmap clabels_values_map; + struct ctables_value **clabels_values; + size_t n_clabels_values; + + /* Indexed by variable dictionary index. */ + struct ctables_categories **categories; + size_t n_categories; + + double cilevel; + + char *caption; + char *corner; + char *title; + + struct ctables_chisq *chisq; + struct ctables_pairwise *pairwise; + }; + +/* Chi-square test (SIGTEST). */ +struct ctables_chisq + { + double alpha; + bool include_mrsets; + bool all_visible; + }; + +/* Pairwise comparison test (COMPARETEST). */ +struct ctables_pairwise + { + enum { PROP, MEAN } type; + double alpha[2]; + bool include_mrsets; + bool meansvariance_allcats; + bool all_visible; + enum { BONFERRONI = 1, BH } adjust; + bool merge; + bool apa_style; + bool show_sig; + }; + + + +static bool +parse_col_width (struct lexer *lexer, const char *name, double *width) +{ + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "DEFAULT")) + *width = SYSMIS; + else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX)) + { + *width = lex_number (lexer); + lex_get (lexer); } + else + return false; - NOT_REACHED (); + return true; } static bool -all_strings (struct variable **vars, size_t n_vars, - const struct ctables_category *cat) +parse_bool (struct lexer *lexer, bool *b) { - for (size_t j = 0; j < n_vars; j++) - if (var_is_numeric (vars[j])) - { - msg_at (SE, cat->location, - _("This category specification may be applied only to string " - "variables, but this subcommand tries to apply it to " - "numeric variable %s."), - var_get_name (vars[j])); - return false; - } + if (lex_match_id (lexer, "NO")) + *b = false; + else if (lex_match_id (lexer, "YES")) + *b = true; + else + { + lex_error_expecting (lexer, "YES", "NO"); + return false; + } return true; } -static bool -ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, - struct ctables *ct, struct ctables_table *t) +static void +ctables_chisq_destroy (struct ctables_chisq *chisq) { - if (!lex_match_id (lexer, "VARIABLES")) - return false; - lex_match (lexer, T_EQUALS); + free (chisq); +} - struct variable **vars; - size_t n_vars; - if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH)) - return false; +static void +ctables_pairwise_destroy (struct ctables_pairwise *pairwise) +{ + free (pairwise); +} - const struct fmt_spec *common_format = var_get_print_format (vars[0]); - for (size_t i = 1; i < n_vars; i++) +static void +ctables_table_destroy (struct ctables_table *t) +{ + if (!t) + return; + + for (size_t i = 0; i < t->n_sections; i++) + ctables_section_uninit (&t->sections[i]); + free (t->sections); + + for (size_t i = 0; i < t->n_categories; i++) + ctables_categories_unref (t->categories[i]); + free (t->categories); + + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { - const struct fmt_spec *f = var_get_print_format (vars[i]); - if (f->type != common_format->type) - { - common_format = NULL; - break; - } + ctables_axis_destroy (t->axes[a]); + ctables_stack_uninit (&t->stacks[a]); } - bool parse_strings - = (common_format - && (fmt_get_category (common_format->type) - & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT))); + free (t->summary_specs.specs); - struct ctables_categories *c = xmalloc (sizeof *c); - *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true }; - for (size_t i = 0; i < n_vars; i++) + struct ctables_value *ctv, *next_ctv; + HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node, + &t->clabels_values_map) { - struct ctables_categories **cp - = &t->categories[var_get_dict_index (vars[i])]; - ctables_categories_unref (*cp); - *cp = c; + value_destroy (&ctv->value, var_get_width (t->clabels_example)); + hmap_delete (&t->clabels_values_map, &ctv->node); + free (ctv); } + hmap_destroy (&t->clabels_values_map); + free (t->clabels_values); - size_t allocated_cats = 0; - int cats_start_ofs = -1; - int cats_end_ofs = -1; - if (lex_match (lexer, T_LBRACK)) + free (t->sum_vars); + free (t->caption); + free (t->corner); + free (t->title); + ctables_chisq_destroy (t->chisq); + ctables_pairwise_destroy (t->pairwise); + free (t); +} + +static void +ctables_destroy (struct ctables *ct) +{ + if (!ct) + return; + + struct ctables_postcompute *pc, *next_pc; + HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node, + &ct->postcomputes) { - cats_start_ofs = lex_ofs (lexer); - do + free (pc->name); + msg_location_destroy (pc->location); + ctables_pcexpr_destroy (pc->expr); + free (pc->label); + if (pc->specs) { - if (c->n_cats >= allocated_cats) - c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); + ctables_summary_spec_set_uninit (pc->specs); + free (pc->specs); + } + hmap_delete (&ct->postcomputes, &pc->hmap_node); + free (pc); + } + hmap_destroy (&ct->postcomputes); - int start_ofs = lex_ofs (lexer); - struct ctables_category *cat = &c->cats[c->n_cats]; - if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat)) - goto error; - cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1); - c->n_cats++; + fmt_settings_uninit (&ct->ctables_formats); + pivot_table_look_unref (ct->look); + free (ct->zero); + free (ct->missing); + free (ct->vlabels); + for (size_t i = 0; i < ct->n_tables; i++) + ctables_table_destroy (ct->tables[i]); + free (ct->tables); + free (ct); +} - lex_match (lexer, T_COMMA); - } - while (!lex_match (lexer, T_RBRACK)); - cats_end_ofs = lex_ofs (lexer) - 1; +static struct ctables_category +cct_nrange (double low, double high) +{ + return (struct ctables_category) { + .type = CCT_NRANGE, + .nrange = { low, high } + }; +} + +static struct ctables_category +cct_srange (struct substring low, struct substring high) +{ + return (struct ctables_category) { + .type = CCT_SRANGE, + .srange = { low, high } + }; +} + +static bool +ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories, + struct ctables_category *cat) +{ + char *total_label; + if (lex_match (lexer, T_EQUALS)) + { + if (!lex_force_string (lexer)) + return false; + + total_label = ss_xstrdup (lex_tokss (lexer)); + lex_get (lexer); } + else + total_label = xstrdup (_("Subtotal")); - struct ctables_category cat = { - .type = CCT_VALUE, - .include_missing = false, - .sort_ascending = true, + *cat = (struct ctables_category) { + .type = CCT_SUBTOTAL, + .hide_subcategories = hide_subcategories, + .total_label = total_label }; - bool show_totals = false; - char *total_label = NULL; - bool totals_before = false; - while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD) + return true; +} + +static bool +ctables_table_parse_explicit_category (struct lexer *lexer, + struct dictionary *dict, + struct ctables *ct, + struct ctables_category *cat) +{ + if (lex_match_id (lexer, "OTHERNM")) + *cat = (struct ctables_category) { .type = CCT_OTHERNM }; + else if (lex_match_id (lexer, "MISSING")) + *cat = (struct ctables_category) { .type = CCT_MISSING }; + else if (lex_match_id (lexer, "SUBTOTAL")) + return ctables_table_parse_subtotal (lexer, false, cat); + else if (lex_match_id (lexer, "HSUBTOTAL")) + return ctables_table_parse_subtotal (lexer, true, cat); + else if (lex_match_id (lexer, "LO")) { - if (!c->n_cats && lex_match_id (lexer, "ORDER")) + if (!lex_force_match_id (lexer, "THRU")) + return false; + if (lex_is_string (lexer)) { - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "A")) - cat.sort_ascending = true; - else if (lex_match_id (lexer, "D")) - cat.sort_ascending = false; + struct substring sr0 = { .string = NULL }; + struct substring sr1 = parse_substring (lexer, dict); + *cat = cct_srange (sr0, sr1); + } + else if (lex_force_num (lexer)) + { + *cat = cct_nrange (-DBL_MAX, lex_number (lexer)); + lex_get (lexer); + } + else + return false; + } + else if (lex_is_number (lexer)) + { + double number = lex_number (lexer); + lex_get (lexer); + if (lex_match_id (lexer, "THRU")) + { + if (lex_match_id (lexer, "HI")) + *cat = cct_nrange (number, DBL_MAX); else { - lex_error_expecting (lexer, "A", "D"); - goto error; + if (!lex_force_num (lexer)) + return false; + *cat = cct_nrange (number, lex_number (lexer)); + lex_get (lexer); } } - else if (!c->n_cats && lex_match_id (lexer, "KEY")) + else + *cat = (struct ctables_category) { + .type = CCT_NUMBER, + .number = number + }; + } + else if (lex_is_string (lexer)) + { + struct substring s = parse_substring (lexer, dict); + if (lex_match_id (lexer, "THRU")) { - int start_ofs = lex_ofs (lexer) - 1; - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "VALUE")) - cat.type = CCT_VALUE; - else if (lex_match_id (lexer, "LABEL")) - cat.type = CCT_LABEL; + if (lex_match_id (lexer, "HI")) + { + struct substring sr1 = { .string = NULL }; + *cat = cct_srange (s, sr1); + } else { - cat.type = CCT_FUNCTION; - if (!parse_ctables_summary_function (lexer, &cat.sort_function, - &cat.weighting, &cat.area)) - goto error; - - if (lex_match (lexer, T_LPAREN)) - { - cat.sort_var = parse_variable (lexer, dict); - if (!cat.sort_var) - goto error; - - if (cat.sort_function == CTSF_PTILE) - { - lex_match (lexer, T_COMMA); - if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100)) - goto error; - cat.percentile = lex_number (lexer); - lex_get (lexer); - } - - if (!lex_force_match (lexer, T_RPAREN)) - goto error; - } - else if (ctables_function_availability (cat.sort_function) - == CTFA_SCALE) + if (!lex_force_string (lexer)) { - bool UNUSED b = lex_force_match (lexer, T_LPAREN); - goto error; + ss_dealloc (&s); + return false; } - - lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1, - _("Data-dependent sorting is not implemented.")); - goto error; + struct substring sr1 = parse_substring (lexer, dict); + *cat = cct_srange (s, sr1); } } - else if (!c->n_cats && lex_match_id (lexer, "MISSING")) + else + *cat = (struct ctables_category) { .type = CCT_STRING, .string = s }; + } + else if (lex_match (lexer, T_AND)) + { + if (!lex_force_id (lexer)) + return false; + struct ctables_postcompute *pc = ctables_find_postcompute ( + ct, lex_tokcstr (lexer)); + if (!pc) { - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "INCLUDE")) - cat.include_missing = true; - else if (lex_match_id (lexer, "EXCLUDE")) - cat.include_missing = false; - else - { - lex_error_expecting (lexer, "INCLUDE", "EXCLUDE"); - goto error; - } + struct msg_location *loc = lex_get_location (lexer, -1, 0); + msg_at (SE, loc, _("Unknown postcompute &%s."), + lex_tokcstr (lexer)); + msg_location_destroy (loc); + return false; } - else if (lex_match_id (lexer, "TOTAL")) - { - lex_match (lexer, T_EQUALS); - if (!parse_bool (lexer, &show_totals)) - goto error; - } - else if (lex_match_id (lexer, "LABEL")) - { - lex_match (lexer, T_EQUALS); - if (!lex_force_string (lexer)) - goto error; - free (total_label); - total_label = ss_xstrdup (lex_tokss (lexer)); - lex_get (lexer); - } - else if (lex_match_id (lexer, "POSITION")) - { - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "BEFORE")) - totals_before = true; - else if (lex_match_id (lexer, "AFTER")) - totals_before = false; - else - { - lex_error_expecting (lexer, "BEFORE", "AFTER"); - goto error; - } - } - else if (lex_match_id (lexer, "EMPTY")) - { - lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "INCLUDE")) - c->show_empty = true; - else if (lex_match_id (lexer, "EXCLUDE")) - c->show_empty = false; - else - { - lex_error_expecting (lexer, "INCLUDE", "EXCLUDE"); - goto error; - } - } - else - { - if (!c->n_cats) - lex_error_expecting (lexer, "ORDER", "KEY", "MISSING", - "TOTAL", "LABEL", "POSITION", "EMPTY"); - else - lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY"); - goto error; - } - } + lex_get (lexer); - if (!c->n_cats) - { - if (c->n_cats >= allocated_cats) - c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); - c->cats[c->n_cats++] = cat; + *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc }; } - - if (show_totals) + else { - if (c->n_cats >= allocated_cats) - c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); + lex_error (lexer, NULL); + return false; + } - struct ctables_category *totals; - if (totals_before) - { - insert_element (c->cats, c->n_cats, sizeof *c->cats, 0); - totals = &c->cats[0]; - } - else - totals = &c->cats[c->n_cats]; - c->n_cats++; + return true; +} - *totals = (struct ctables_category) { - .type = CCT_TOTAL, - .total_label = total_label ? total_label : xstrdup (_("Total")), - }; +static bool +parse_category_string (struct msg_location *location, + struct substring s, const struct dictionary *dict, + enum fmt_type format, double *n) +{ + union value v; + char *error = data_in (s, dict_get_encoding (dict), format, + settings_get_fmt_settings (), &v, 0, NULL); + if (error) + { + msg_at (SE, location, + _("Failed to parse category specification as format %s: %s."), + fmt_name (format), error); + free (error); + return false; } - struct ctables_category *subtotal = NULL; - for (size_t i = totals_before ? 0 : c->n_cats; - totals_before ? i < c->n_cats : i-- > 0; - totals_before ? i++ : 0) + *n = v.f; + return true; +} + +static struct ctables_category * +ctables_find_category_for_postcompute__ (const struct ctables_categories *cats, + const struct ctables_pcexpr *e) +{ + struct ctables_category *best = NULL; + size_t n_subtotals = 0; + for (size_t i = 0; i < cats->n_cats; i++) { - struct ctables_category *cat = &c->cats[i]; - switch (cat->type) + struct ctables_category *cat = &cats->cats[i]; + switch (e->op) { - case CCT_NUMBER: - case CCT_STRING: - case CCT_NRANGE: - case CCT_SRANGE: - case CCT_MISSING: - case CCT_OTHERNM: - cat->subtotal = subtotal; + case CTPO_CAT_NUMBER: + if (cat->type == CCT_NUMBER && cat->number == e->number) + best = cat; break; - case CCT_POSTCOMPUTE: + case CTPO_CAT_STRING: + if (cat->type == CCT_STRING && ss_equals (cat->string, e->string)) + best = cat; break; - case CCT_SUBTOTAL: - subtotal = cat; + case CTPO_CAT_NRANGE: + if (cat->type == CCT_NRANGE + && cat->nrange[0] == e->nrange[0] + && cat->nrange[1] == e->nrange[1]) + best = cat; break; - case CCT_TOTAL: - case CCT_VALUE: - case CCT_LABEL: - case CCT_FUNCTION: - case CCT_EXCLUDED_MISSING: + case CTPO_CAT_SRANGE: + if (cat->type == CCT_SRANGE + && nullable_substring_equal (&cat->srange[0], &e->srange[0]) + && nullable_substring_equal (&cat->srange[1], &e->srange[1])) + best = cat; + break; + + case CTPO_CAT_MISSING: + if (cat->type == CCT_MISSING) + best = cat; + break; + + case CTPO_CAT_OTHERNM: + if (cat->type == CCT_OTHERNM) + best = cat; + break; + + case CTPO_CAT_SUBTOTAL: + if (cat->type == CCT_SUBTOTAL) + { + n_subtotals++; + if (e->subtotal_index == n_subtotals) + return cat; + else if (e->subtotal_index == 0) + best = cat; + } + break; + + case CTPO_CAT_TOTAL: + if (cat->type == CCT_TOTAL) + return cat; break; + + case CTPO_CONSTANT: + case CTPO_ADD: + case CTPO_SUB: + case CTPO_MUL: + case CTPO_DIV: + case CTPO_POW: + case CTPO_NEG: + NOT_REACHED (); } } + if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1) + return NULL; + return best; +} - if (cats_start_ofs != -1) +static struct ctables_category * +ctables_find_category_for_postcompute (const struct dictionary *dict, + const struct ctables_categories *cats, + enum fmt_type parse_format, + const struct ctables_pcexpr *e) +{ + if (parse_format != FMT_F) { - for (size_t i = 0; i < c->n_cats; i++) + if (e->op == CTPO_CAT_STRING) { - struct ctables_category *cat = &c->cats[i]; - switch (cat->type) - { - case CCT_POSTCOMPUTE: - cat->parse_format = parse_strings ? common_format->type : FMT_F; - struct msg_location *cats_location - = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs); - bool ok = ctables_recursive_check_postcompute ( - dict, cat->pc->expr, cat, c, cats_location); - msg_location_destroy (cats_location); - if (!ok) - goto error; - break; - - case CCT_NUMBER: - case CCT_NRANGE: - for (size_t j = 0; j < n_vars; j++) - if (var_is_alpha (vars[j])) - { - msg_at (SE, cat->location, - _("This category specification may be applied " - "only to numeric variables, but this " - "subcommand tries to apply it to string " - "variable %s."), - var_get_name (vars[j])); - goto error; - } - break; + double number; + if (!parse_category_string (e->location, e->string, dict, + parse_format, &number)) + return NULL; - case CCT_STRING: - if (parse_strings) - { - double n; - if (!parse_category_string (cat->location, cat->string, dict, - common_format->type, &n)) - goto error; + struct ctables_pcexpr e2 = { + .op = CTPO_CAT_NUMBER, + .number = number, + .location = e->location, + }; + return ctables_find_category_for_postcompute__ (cats, &e2); + } + else if (e->op == CTPO_CAT_SRANGE) + { + double nrange[2]; + if (!e->srange[0].string) + nrange[0] = -DBL_MAX; + else if (!parse_category_string (e->location, e->srange[0], dict, + parse_format, &nrange[0])) + return NULL; - ss_dealloc (&cat->string); + if (!e->srange[1].string) + nrange[1] = DBL_MAX; + else if (!parse_category_string (e->location, e->srange[1], dict, + parse_format, &nrange[1])) + return NULL; - cat->type = CCT_NUMBER; - cat->number = n; - } - else if (!all_strings (vars, n_vars, cat)) - goto error; - break; - - case CCT_SRANGE: - if (parse_strings) - { - double n[2]; - - if (!cat->srange[0].string) - n[0] = -DBL_MAX; - else if (!parse_category_string (cat->location, - cat->srange[0], dict, - common_format->type, &n[0])) - goto error; + struct ctables_pcexpr e2 = { + .op = CTPO_CAT_NRANGE, + .nrange = { nrange[0], nrange[1] }, + .location = e->location, + }; + return ctables_find_category_for_postcompute__ (cats, &e2); + } + } + return ctables_find_category_for_postcompute__ (cats, e); +} - if (!cat->srange[1].string) - n[1] = DBL_MAX; - else if (!parse_category_string (cat->location, - cat->srange[1], dict, - common_format->type, &n[1])) - goto error; +static bool +ctables_recursive_check_postcompute (struct dictionary *dict, + const struct ctables_pcexpr *e, + struct ctables_category *pc_cat, + const struct ctables_categories *cats, + const struct msg_location *cats_location) +{ + switch (e->op) + { + case CTPO_CAT_NUMBER: + case CTPO_CAT_STRING: + case CTPO_CAT_NRANGE: + case CTPO_CAT_SRANGE: + case CTPO_CAT_MISSING: + case CTPO_CAT_OTHERNM: + case CTPO_CAT_SUBTOTAL: + case CTPO_CAT_TOTAL: + { + struct ctables_category *cat = ctables_find_category_for_postcompute ( + dict, cats, pc_cat->parse_format, e); + if (!cat) + { + if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0) + { + size_t n_subtotals = 0; + for (size_t i = 0; i < cats->n_cats; i++) + n_subtotals += cats->cats[i].type == CCT_SUBTOTAL; + if (n_subtotals > 1) + { + msg_at (SE, cats_location, + ngettext ("These categories include %zu instance " + "of SUBTOTAL or HSUBTOTAL, so references " + "from computed categories must refer to " + "subtotals by position, " + "e.g. SUBTOTAL[1].", + "These categories include %zu instances " + "of SUBTOTAL or HSUBTOTAL, so references " + "from computed categories must refer to " + "subtotals by position, " + "e.g. SUBTOTAL[1].", + n_subtotals), + n_subtotals); + msg_at (SN, e->location, + _("This is the reference that lacks a position.")); + return NULL; + } + } - ss_dealloc (&cat->srange[0]); - ss_dealloc (&cat->srange[1]); + msg_at (SE, pc_cat->location, + _("Computed category &%s references a category not included " + "in the category list."), + pc_cat->pc->name); + msg_at (SN, e->location, _("This is the missing category.")); + if (e->op == CTPO_CAT_SUBTOTAL) + msg_at (SN, cats_location, + _("To fix the problem, add subtotals to the " + "list of categories here.")); + else if (e->op == CTPO_CAT_TOTAL) + msg (SN, _("To fix the problem, add TOTAL=YES to the variable's " + "CATEGORIES specification.")); + else + msg_at (SN, cats_location, + _("To fix the problem, add the missing category to the " + "list of categories here.")); + return false; + } + if (pc_cat->pc->hide_source_cats) + cat->hide = true; + return true; + } - cat->type = CCT_NRANGE; - cat->nrange[0] = n[0]; - cat->nrange[1] = n[1]; - } - else if (!all_strings (vars, n_vars, cat)) - goto error; - break; + case CTPO_CONSTANT: + return true; - case CCT_MISSING: - case CCT_OTHERNM: - case CCT_SUBTOTAL: - case CCT_TOTAL: - case CCT_VALUE: - case CCT_LABEL: - case CCT_FUNCTION: - case CCT_EXCLUDED_MISSING: - break; - } - } + case CTPO_ADD: + case CTPO_SUB: + case CTPO_MUL: + case CTPO_DIV: + case CTPO_POW: + case CTPO_NEG: + for (size_t i = 0; i < 2; i++) + if (e->subs[i] && !ctables_recursive_check_postcompute ( + dict, e->subs[i], pc_cat, cats, cats_location)) + return false; + return true; } - free (vars); - return true; - -error: - free (vars); - return false; + NOT_REACHED (); } -static void -ctables_nest_uninit (struct ctables_nest *nest) +static bool +all_strings (struct variable **vars, size_t n_vars, + const struct ctables_category *cat) { - free (nest->vars); - for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) - ctables_summary_spec_set_uninit (&nest->specs[sv]); - for (enum ctables_area_type at = 0; at < N_CTATS; at++) - free (nest->areas[at]); + for (size_t j = 0; j < n_vars; j++) + if (var_is_numeric (vars[j])) + { + msg_at (SE, cat->location, + _("This category specification may be applied only to string " + "variables, but this subcommand tries to apply it to " + "numeric variable %s."), + var_get_name (vars[j])); + return false; + } + return true; } -static void -ctables_stack_uninit (struct ctables_stack *stack) +static bool +ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, + struct ctables *ct, struct ctables_table *t) { - if (stack) + if (!lex_match_id (lexer, "VARIABLES")) + return false; + lex_match (lexer, T_EQUALS); + + struct variable **vars; + size_t n_vars; + if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH)) + return false; + + const struct fmt_spec *common_format = var_get_print_format (vars[0]); + for (size_t i = 1; i < n_vars; i++) { - for (size_t i = 0; i < stack->n; i++) - ctables_nest_uninit (&stack->nests[i]); - free (stack->nests); + const struct fmt_spec *f = var_get_print_format (vars[i]); + if (f->type != common_format->type) + { + common_format = NULL; + break; + } } -} + bool parse_strings + = (common_format + && (fmt_get_category (common_format->type) + & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT))); -static struct ctables_stack -nest_fts (struct ctables_stack s0, struct ctables_stack s1) -{ - if (!s0.n) - return s1; - else if (!s1.n) - return s0; + struct ctables_categories *c = xmalloc (sizeof *c); + *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true }; + for (size_t i = 0; i < n_vars; i++) + { + struct ctables_categories **cp + = &t->categories[var_get_dict_index (vars[i])]; + ctables_categories_unref (*cp); + *cp = c; + } - struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) }; - for (size_t i = 0; i < s0.n; i++) - for (size_t j = 0; j < s1.n; j++) - { - const struct ctables_nest *a = &s0.nests[i]; - const struct ctables_nest *b = &s1.nests[j]; + size_t allocated_cats = 0; + int cats_start_ofs = -1; + int cats_end_ofs = -1; + if (lex_match (lexer, T_LBRACK)) + { + cats_start_ofs = lex_ofs (lexer); + do + { + if (c->n_cats >= allocated_cats) + c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); - size_t allocate = a->n + b->n; - struct variable **vars = xnmalloc (allocate, sizeof *vars); - size_t n = 0; - for (size_t k = 0; k < a->n; k++) - vars[n++] = a->vars[k]; - for (size_t k = 0; k < b->n; k++) - vars[n++] = b->vars[k]; - assert (n == allocate); + int start_ofs = lex_ofs (lexer); + struct ctables_category *cat = &c->cats[c->n_cats]; + if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat)) + goto error; + cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1); + c->n_cats++; - const struct ctables_nest *summary_src; - if (!a->specs[CSV_CELL].var) - summary_src = b; - else if (!b->specs[CSV_CELL].var) - summary_src = a; - else - NOT_REACHED (); + lex_match (lexer, T_COMMA); + } + while (!lex_match (lexer, T_RBRACK)); + cats_end_ofs = lex_ofs (lexer) - 1; + } - struct ctables_nest *new = &stack.nests[stack.n++]; - *new = (struct ctables_nest) { - .vars = vars, - .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx - : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx - : SIZE_MAX), - .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx - : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx - : SIZE_MAX), - .n = n, - }; - for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) - ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]); - } - ctables_stack_uninit (&s0); - ctables_stack_uninit (&s1); - return stack; -} - -static struct ctables_stack -stack_fts (struct ctables_stack s0, struct ctables_stack s1) -{ - struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) }; - for (size_t i = 0; i < s0.n; i++) - stack.nests[stack.n++] = s0.nests[i]; - for (size_t i = 0; i < s1.n; i++) - { - stack.nests[stack.n] = s1.nests[i]; - stack.nests[stack.n].group_head += s0.n; - stack.n++; - } - assert (stack.n == s0.n + s1.n); - free (s0.nests); - free (s1.nests); - return stack; -} - -static struct ctables_stack -var_fts (const struct ctables_axis *a) -{ - struct variable **vars = xmalloc (sizeof *vars); - *vars = a->var; - - bool is_summary = a->specs[CSV_CELL].n || a->scale; - struct ctables_nest *nest = xmalloc (sizeof *nest); - *nest = (struct ctables_nest) { - .vars = vars, - .n = 1, - .scale_idx = a->scale ? 0 : SIZE_MAX, - .summary_idx = is_summary ? 0 : SIZE_MAX, - }; - if (is_summary) - for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) - { - ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]); - nest->specs[sv].var = a->var; - nest->specs[sv].is_scale = a->scale; - } - return (struct ctables_stack) { .nests = nest, .n = 1 }; -} - -static struct ctables_stack -enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a) -{ - if (!a) - return (struct ctables_stack) { .n = 0 }; - - switch (a->op) - { - case CTAO_VAR: - return var_fts (a); - - case CTAO_STACK: - return stack_fts (enumerate_fts (axis_type, a->subs[0]), - enumerate_fts (axis_type, a->subs[1])); - - case CTAO_NEST: - /* This should consider any of the scale variables found in the result to - be linked to each other listwise for SMISSING=LISTWISE. */ - return nest_fts (enumerate_fts (axis_type, a->subs[0]), - enumerate_fts (axis_type, a->subs[1])); - } - - NOT_REACHED (); -} - -union ctables_summary - { - /* COUNT, VALIDN, TOTALN. */ - double count; - - /* MINIMUM, MAXIMUM, RANGE. */ - struct - { - double min; - double max; - }; - - /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */ - struct moments1 *moments; - - /* MEDIAN, MODE, PTILE. */ - struct - { - struct casewriter *writer; - double ovalid; - double ovalue; - }; - }; - -static void -ctables_summary_init (union ctables_summary *s, - const struct ctables_summary_spec *ss) -{ - switch (ss->function) - { - case CTSF_COUNT: - case CTSF_areaPCT_COUNT: - case CTSF_areaPCT_VALIDN: - case CTSF_areaPCT_TOTALN: - case CTSF_MISSING: - case CTSF_TOTALN: - case CTSF_VALIDN: - s->count = 0; - break; - - case CTSF_areaID: - break; - - case CTSF_MAXIMUM: - case CTSF_MINIMUM: - case CTSF_RANGE: - s->min = s->max = SYSMIS; - break; - - case CTSF_MEAN: - case CTSF_SUM: - case CTSF_areaPCT_SUM: - s->moments = moments1_create (MOMENT_MEAN); - break; - - case CTSF_SEMEAN: - case CTSF_STDDEV: - case CTSF_VARIANCE: - s->moments = moments1_create (MOMENT_VARIANCE); - break; - - case CTSF_MEDIAN: - case CTSF_MODE: - case CTSF_PTILE: - { - struct caseproto *proto = caseproto_create (); - proto = caseproto_add_width (proto, 0); - proto = caseproto_add_width (proto, 0); - - struct subcase ordering; - subcase_init (&ordering, 0, 0, SC_ASCEND); - s->writer = sort_create_writer (&ordering, proto); - subcase_uninit (&ordering); - caseproto_unref (proto); - - s->ovalid = 0; - s->ovalue = SYSMIS; - } - break; - } -} - -static void -ctables_summary_uninit (union ctables_summary *s, - const struct ctables_summary_spec *ss) -{ - switch (ss->function) - { - case CTSF_COUNT: - case CTSF_areaPCT_COUNT: - case CTSF_areaPCT_VALIDN: - case CTSF_areaPCT_TOTALN: - case CTSF_MISSING: - case CTSF_TOTALN: - case CTSF_VALIDN: - break; - - case CTSF_areaID: - break; - - case CTSF_MAXIMUM: - case CTSF_MINIMUM: - case CTSF_RANGE: - break; - - case CTSF_MEAN: - case CTSF_SEMEAN: - case CTSF_STDDEV: - case CTSF_SUM: - case CTSF_VARIANCE: - case CTSF_areaPCT_SUM: - moments1_destroy (s->moments); - break; - - case CTSF_MEDIAN: - case CTSF_MODE: - case CTSF_PTILE: - casewriter_destroy (s->writer); - break; - } -} - -static void -ctables_summary_add (union ctables_summary *s, - const struct ctables_summary_spec *ss, - const union value *value, - bool is_missing, bool is_included, - double weight) -{ - /* To determine whether a case is included in a given table for a particular - kind of summary, consider the following charts for the variable being - summarized. Only if "yes" appears is the case counted. - - Categorical variables: VALIDN other TOTALN - Valid values in included categories yes yes yes - Missing values in included categories --- yes yes - Missing values in excluded categories --- --- yes - Valid values in excluded categories --- --- --- - - Scale variables: VALIDN other TOTALN - Valid value yes yes yes - Missing value --- yes yes - - Missing values include both user- and system-missing. (The system-missing - value is always in an excluded category.) - - One way to interpret the above table is that scale variables are like - categorical variables in which all values are in included categories. - */ - switch (ss->function) - { - case CTSF_TOTALN: - case CTSF_areaPCT_TOTALN: - s->count += weight; - break; - - case CTSF_COUNT: - case CTSF_areaPCT_COUNT: - if (is_included) - s->count += weight; - break; - - case CTSF_VALIDN: - case CTSF_areaPCT_VALIDN: - if (!is_missing) - s->count += weight; - break; - - case CTSF_areaID: - break; - - case CTSF_MISSING: - if (is_missing) - s->count += weight; - break; - - case CTSF_MAXIMUM: - case CTSF_MINIMUM: - case CTSF_RANGE: - if (!is_missing) - { - if (s->min == SYSMIS || value->f < s->min) - s->min = value->f; - if (s->max == SYSMIS || value->f > s->max) - s->max = value->f; - } - break; - - case CTSF_MEAN: - case CTSF_SEMEAN: - case CTSF_STDDEV: - case CTSF_SUM: - case CTSF_VARIANCE: - if (!is_missing) - moments1_add (s->moments, value->f, weight); - break; - - case CTSF_areaPCT_SUM: - if (!is_missing) - moments1_add (s->moments, value->f, weight); - break; - - case CTSF_MEDIAN: - case CTSF_MODE: - case CTSF_PTILE: - if (!is_missing) - { - s->ovalid += weight; - - struct ccase *c = case_create (casewriter_get_proto (s->writer)); - *case_num_rw_idx (c, 0) = value->f; - *case_num_rw_idx (c, 1) = weight; - casewriter_write (s->writer, c); - } - break; - } -} - -static double -ctables_summary_value (const struct ctables_cell *cell, - union ctables_summary *s, - const struct ctables_summary_spec *ss) -{ - switch (ss->function) - { - case CTSF_COUNT: - return s->count; - - case CTSF_areaID: - return cell->areas[ss->calc_area]->sequence; - - case CTSF_areaPCT_COUNT: - { - const struct ctables_area *a = cell->areas[ss->calc_area]; - double a_count = a->count[ss->weighting]; - return a_count ? s->count / a_count * 100 : SYSMIS; - } - - case CTSF_areaPCT_VALIDN: - { - const struct ctables_area *a = cell->areas[ss->calc_area]; - double a_valid = a->valid[ss->weighting]; - return a_valid ? s->count / a_valid * 100 : SYSMIS; - } - - case CTSF_areaPCT_TOTALN: - { - const struct ctables_area *a = cell->areas[ss->calc_area]; - double a_total = a->total[ss->weighting]; - return a_total ? s->count / a_total * 100 : SYSMIS; - } - - case CTSF_MISSING: - case CTSF_TOTALN: - case CTSF_VALIDN: - return s->count; - - case CTSF_MAXIMUM: - return s->max; - - case CTSF_MINIMUM: - return s->min; - - case CTSF_RANGE: - return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS; - - case CTSF_MEAN: - { - double mean; - moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL); - return mean; - } - - case CTSF_SEMEAN: - { - double weight, variance; - moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL); - return calc_semean (variance, weight); - } - - case CTSF_STDDEV: - { - double variance; - moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); - return variance != SYSMIS ? sqrt (variance) : SYSMIS; - } - - case CTSF_SUM: - { - double weight, mean; - moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); - return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS; - } - - case CTSF_VARIANCE: - { - double variance; - moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); - return variance; - } - - case CTSF_areaPCT_SUM: - { - double weight, mean; - moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); - if (weight == SYSMIS || mean == SYSMIS) - return SYSMIS; - - const struct ctables_area *a = cell->areas[ss->calc_area]; - const struct ctables_sum *sum = &a->sums[ss->sum_var_idx]; - double denom = sum->sum[ss->weighting]; - return denom != 0 ? weight * mean / denom * 100 : SYSMIS; - } - - case CTSF_MEDIAN: - case CTSF_PTILE: - if (s->writer) - { - struct casereader *reader = casewriter_make_reader (s->writer); - s->writer = NULL; - - struct percentile *ptile = percentile_create ( - ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid); - struct order_stats *os = &ptile->parent; - order_stats_accumulate_idx (&os, 1, reader, 1, 0); - s->ovalue = percentile_calculate (ptile, PC_HAVERAGE); - statistic_destroy (&ptile->parent.parent); - } - return s->ovalue; - - case CTSF_MODE: - if (s->writer) - { - struct casereader *reader = casewriter_make_reader (s->writer); - s->writer = NULL; - - struct mode *mode = mode_create (); - struct order_stats *os = &mode->parent; - order_stats_accumulate_idx (&os, 1, reader, 1, 0); - s->ovalue = mode->mode; - statistic_destroy (&mode->parent.parent); - } - return s->ovalue; - } - - NOT_REACHED (); -} - -struct ctables_cell_sort_aux - { - const struct ctables_nest *nest; - enum pivot_axis_type a; + struct ctables_category cat = { + .type = CCT_VALUE, + .include_missing = false, + .sort_ascending = true, }; - -static int -ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_) -{ - const struct ctables_cell_sort_aux *aux = aux_; - struct ctables_cell *const *ap = a_; - struct ctables_cell *const *bp = b_; - const struct ctables_cell *a = *ap; - const struct ctables_cell *b = *bp; - - const struct ctables_nest *nest = aux->nest; - for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx) - { - const struct variable *var = nest->vars[i]; - const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i]; - const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i]; - if (a_cv->category != b_cv->category) - return a_cv->category > b_cv->category ? 1 : -1; - - const union value *a_val = &a_cv->value; - const union value *b_val = &b_cv->value; - switch (a_cv->category->type) - { - case CCT_NUMBER: - case CCT_STRING: - case CCT_SUBTOTAL: - case CCT_TOTAL: - case CCT_POSTCOMPUTE: - case CCT_EXCLUDED_MISSING: - /* Must be equal. */ - continue; - - case CCT_NRANGE: - case CCT_SRANGE: - case CCT_MISSING: - case CCT_OTHERNM: + bool show_totals = false; + char *total_label = NULL; + bool totals_before = false; + while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD) + { + if (!c->n_cats && lex_match_id (lexer, "ORDER")) + { + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "A")) + cat.sort_ascending = true; + else if (lex_match_id (lexer, "D")) + cat.sort_ascending = false; + else { - int cmp = value_compare_3way (a_val, b_val, var_get_width (var)); - if (cmp) - return cmp; + lex_error_expecting (lexer, "A", "D"); + goto error; } - break; - - case CCT_VALUE: + } + else if (!c->n_cats && lex_match_id (lexer, "KEY")) + { + int start_ofs = lex_ofs (lexer) - 1; + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "VALUE")) + cat.type = CCT_VALUE; + else if (lex_match_id (lexer, "LABEL")) + cat.type = CCT_LABEL; + else { - int cmp = value_compare_3way (a_val, b_val, var_get_width (var)); - if (cmp) - return a_cv->category->sort_ascending ? cmp : -cmp; - } - break; + cat.type = CCT_FUNCTION; + if (!parse_ctables_summary_function (lexer, &cat.sort_function, + &cat.weighting, &cat.area)) + goto error; - case CCT_LABEL: - { - const char *a_label = var_lookup_value_label (var, a_val); - const char *b_label = var_lookup_value_label (var, b_val); - int cmp; - if (a_label) + if (lex_match (lexer, T_LPAREN)) { - if (!b_label) - return -1; - cmp = strcmp (a_label, b_label); + cat.sort_var = parse_variable (lexer, dict); + if (!cat.sort_var) + goto error; + + if (cat.sort_function == CTSF_PTILE) + { + lex_match (lexer, T_COMMA); + if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100)) + goto error; + cat.percentile = lex_number (lexer); + lex_get (lexer); + } + + if (!lex_force_match (lexer, T_RPAREN)) + goto error; } - else + else if (ctables_function_availability (cat.sort_function) + == CTFA_SCALE) { - if (b_label) - return 1; - cmp = value_compare_3way (a_val, b_val, var_get_width (var)); + bool UNUSED b = lex_force_match (lexer, T_LPAREN); + goto error; } - if (cmp) - return a_cv->category->sort_ascending ? cmp : -cmp; - } - break; - - case CCT_FUNCTION: - NOT_REACHED (); - } - } - return 0; -} - -static int -ctables_cell_compare_leaf_3way (const void *a_, const void *b_, - const void *aux UNUSED) -{ - struct ctables_cell *const *ap = a_; - struct ctables_cell *const *bp = b_; - const struct ctables_cell *a = *ap; - const struct ctables_cell *b = *bp; - - for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++) - { - int al = a->axes[axis].leaf; - int bl = b->axes[axis].leaf; - if (al != bl) - return al > bl ? 1 : -1; - } - return 0; -} -static struct ctables_area * -ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell, - enum ctables_area_type area) -{ - size_t hash = 0; - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n_areas[area]; i++) + lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1, + _("Data-dependent sorting is not implemented.")); + goto error; + } + } + else if (!c->n_cats && lex_match_id (lexer, "MISSING")) { - size_t v_idx = nest->areas[area][i]; - struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx]; - hash = hash_pointer (cv->category, hash); - if (cv->category->type != CCT_TOTAL - && cv->category->type != CCT_SUBTOTAL - && cv->category->type != CCT_POSTCOMPUTE) - hash = value_hash (&cv->value, - var_get_width (nest->vars[v_idx]), hash); + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "INCLUDE")) + cat.include_missing = true; + else if (lex_match_id (lexer, "EXCLUDE")) + cat.include_missing = false; + else + { + lex_error_expecting (lexer, "INCLUDE", "EXCLUDE"); + goto error; + } } - } - - struct ctables_area *a; - HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area]) - { - const struct ctables_cell *df = a->example; - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + else if (lex_match_id (lexer, "TOTAL")) { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n_areas[area]; i++) + lex_match (lexer, T_EQUALS); + if (!parse_bool (lexer, &show_totals)) + goto error; + } + else if (lex_match_id (lexer, "LABEL")) + { + lex_match (lexer, T_EQUALS); + if (!lex_force_string (lexer)) + goto error; + free (total_label); + total_label = ss_xstrdup (lex_tokss (lexer)); + lex_get (lexer); + } + else if (lex_match_id (lexer, "POSITION")) + { + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "BEFORE")) + totals_before = true; + else if (lex_match_id (lexer, "AFTER")) + totals_before = false; + else { - size_t v_idx = nest->areas[area][i]; - struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx]; - struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx]; - if (cv1->category != cv2->category - || (cv1->category->type != CCT_TOTAL - && cv1->category->type != CCT_SUBTOTAL - && cv1->category->type != CCT_POSTCOMPUTE - && !value_equal (&cv1->value, &cv2->value, - var_get_width (nest->vars[v_idx])))) - goto not_equal; + lex_error_expecting (lexer, "BEFORE", "AFTER"); + goto error; } } - return a; - - not_equal: ; + else if (lex_match_id (lexer, "EMPTY")) + { + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "INCLUDE")) + c->show_empty = true; + else if (lex_match_id (lexer, "EXCLUDE")) + c->show_empty = false; + else + { + lex_error_expecting (lexer, "INCLUDE", "EXCLUDE"); + goto error; + } + } + else + { + if (!c->n_cats) + lex_error_expecting (lexer, "ORDER", "KEY", "MISSING", + "TOTAL", "LABEL", "POSITION", "EMPTY"); + else + lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY"); + goto error; + } } - struct ctables_sum *sums = (s->table->n_sum_vars - ? xzalloc (s->table->n_sum_vars * sizeof *sums) - : NULL); - - a = xmalloc (sizeof *a); - *a = (struct ctables_area) { .example = cell, .sums = sums }; - hmap_insert (&s->areas[area], &a->node, hash); - return a; -} + if (!c->n_cats) + { + if (c->n_cats >= allocated_cats) + c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); + c->cats[c->n_cats++] = cat; + } -static struct substring -rtrim_value (const union value *v, const struct variable *var) -{ - struct substring s = ss_buffer (CHAR_CAST (char *, v->s), - var_get_width (var)); - ss_rtrim (&s, ss_cstr (" ")); - return s; -} + if (show_totals) + { + if (c->n_cats >= allocated_cats) + c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats); -static bool -in_string_range (const union value *v, const struct variable *var, - const struct substring *srange) -{ - struct substring s = rtrim_value (v, var); - return ((!srange[0].string || ss_compare (s, srange[0]) >= 0) - && (!srange[1].string || ss_compare (s, srange[1]) <= 0)); -} + struct ctables_category *totals; + if (totals_before) + { + insert_element (c->cats, c->n_cats, sizeof *c->cats, 0); + totals = &c->cats[0]; + } + else + totals = &c->cats[c->n_cats]; + c->n_cats++; -static const struct ctables_category * -ctables_categories_match (const struct ctables_categories *c, - const union value *v, const struct variable *var) -{ - if (var_is_numeric (var) && v->f == SYSMIS) - return NULL; + *totals = (struct ctables_category) { + .type = CCT_TOTAL, + .total_label = total_label ? total_label : xstrdup (_("Total")), + }; + } - const struct ctables_category *othernm = NULL; - for (size_t i = c->n_cats; i-- > 0; ) + struct ctables_category *subtotal = NULL; + for (size_t i = totals_before ? 0 : c->n_cats; + totals_before ? i < c->n_cats : i-- > 0; + totals_before ? i++ : 0) { - const struct ctables_category *cat = &c->cats[i]; + struct ctables_category *cat = &c->cats[i]; switch (cat->type) { case CCT_NUMBER: - if (cat->number == v->f) - return cat; - break; - case CCT_STRING: - if (ss_equals (cat->string, rtrim_value (v, var))) - return cat; - break; - case CCT_NRANGE: - if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0]) - && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1])) - return cat; - break; - case CCT_SRANGE: - if (in_string_range (v, var, cat->srange)) - return cat; - break; - case CCT_MISSING: - if (var_is_value_missing (var, v)) - return cat; + case CCT_OTHERNM: + cat->subtotal = subtotal; break; case CCT_POSTCOMPUTE: break; - case CCT_OTHERNM: - if (!othernm) - othernm = cat; + case CCT_SUBTOTAL: + subtotal = cat; break; - case CCT_SUBTOTAL: case CCT_TOTAL: + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + case CCT_EXCLUDED_MISSING: break; + } + } + + if (cats_start_ofs != -1) + { + for (size_t i = 0; i < c->n_cats; i++) + { + struct ctables_category *cat = &c->cats[i]; + switch (cat->type) + { + case CCT_POSTCOMPUTE: + cat->parse_format = parse_strings ? common_format->type : FMT_F; + struct msg_location *cats_location + = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs); + bool ok = ctables_recursive_check_postcompute ( + dict, cat->pc->expr, cat, c, cats_location); + msg_location_destroy (cats_location); + if (!ok) + goto error; + break; + + case CCT_NUMBER: + case CCT_NRANGE: + for (size_t j = 0; j < n_vars; j++) + if (var_is_alpha (vars[j])) + { + msg_at (SE, cat->location, + _("This category specification may be applied " + "only to numeric variables, but this " + "subcommand tries to apply it to string " + "variable %s."), + var_get_name (vars[j])); + goto error; + } + break; + + case CCT_STRING: + if (parse_strings) + { + double n; + if (!parse_category_string (cat->location, cat->string, dict, + common_format->type, &n)) + goto error; + + ss_dealloc (&cat->string); + + cat->type = CCT_NUMBER; + cat->number = n; + } + else if (!all_strings (vars, n_vars, cat)) + goto error; + break; + + case CCT_SRANGE: + if (parse_strings) + { + double n[2]; + + if (!cat->srange[0].string) + n[0] = -DBL_MAX; + else if (!parse_category_string (cat->location, + cat->srange[0], dict, + common_format->type, &n[0])) + goto error; + + if (!cat->srange[1].string) + n[1] = DBL_MAX; + else if (!parse_category_string (cat->location, + cat->srange[1], dict, + common_format->type, &n[1])) + goto error; + + ss_dealloc (&cat->srange[0]); + ss_dealloc (&cat->srange[1]); - case CCT_VALUE: - case CCT_LABEL: - case CCT_FUNCTION: - return (cat->include_missing || !var_is_value_missing (var, v) ? cat - : NULL); + cat->type = CCT_NRANGE; + cat->nrange[0] = n[0]; + cat->nrange[1] = n[1]; + } + else if (!all_strings (vars, n_vars, cat)) + goto error; + break; - case CCT_EXCLUDED_MISSING: - break; + case CCT_MISSING: + case CCT_OTHERNM: + case CCT_SUBTOTAL: + case CCT_TOTAL: + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + case CCT_EXCLUDED_MISSING: + break; + } } } - return var_is_value_missing (var, v) ? NULL : othernm; -} + free (vars); + return true; -static const struct ctables_category * -ctables_categories_total (const struct ctables_categories *c) -{ - const struct ctables_category *first = &c->cats[0]; - const struct ctables_category *last = &c->cats[c->n_cats - 1]; - return (first->type == CCT_TOTAL ? first - : last->type == CCT_TOTAL ? last - : NULL); +error: + free (vars); + return false; } + +union ctables_summary + { + /* COUNT, VALIDN, TOTALN. */ + double count; -static struct ctables_cell * -ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, - const struct ctables_category **cats[PIVOT_N_AXES]) + /* MINIMUM, MAXIMUM, RANGE. */ + struct + { + double min; + double max; + }; + + /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */ + struct moments1 *moments; + + /* MEDIAN, MODE, PTILE. */ + struct + { + struct casewriter *writer; + double ovalid; + double ovalue; + }; + }; + +static void +ctables_summary_init (union ctables_summary *s, + const struct ctables_summary_spec *ss) { - size_t hash = 0; - enum ctables_summary_variant sv = CSV_CELL; - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + switch (ss->function) { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx) - { - hash = hash_pointer (cats[a][i], hash); - if (cats[a][i]->type != CCT_TOTAL - && cats[a][i]->type != CCT_SUBTOTAL - && cats[a][i]->type != CCT_POSTCOMPUTE) - hash = value_hash (case_data (c, nest->vars[i]), - var_get_width (nest->vars[i]), hash); - else - sv = CSV_TOTAL; - } - } + case CTSF_COUNT: + case CTSF_areaPCT_COUNT: + case CTSF_areaPCT_VALIDN: + case CTSF_areaPCT_TOTALN: + case CTSF_MISSING: + case CTSF_TOTALN: + case CTSF_VALIDN: + s->count = 0; + break; - struct ctables_cell *cell; - HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells) - { - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx - && (cats[a][i] != cell->axes[a].cvs[i].category - || (cats[a][i]->type != CCT_TOTAL - && cats[a][i]->type != CCT_SUBTOTAL - && cats[a][i]->type != CCT_POSTCOMPUTE - && !value_equal (case_data (c, nest->vars[i]), - &cell->axes[a].cvs[i].value, - var_get_width (nest->vars[i]))))) - goto not_equal; - } + case CTSF_areaID: + break; - return cell; + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + s->min = s->max = SYSMIS; + break; - not_equal: ; - } + case CTSF_MEAN: + case CTSF_SUM: + case CTSF_areaPCT_SUM: + s->moments = moments1_create (MOMENT_MEAN); + break; - cell = xmalloc (sizeof *cell); - cell->hide = false; - cell->sv = sv; - cell->omit_areas = 0; - cell->postcompute = false; - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - { - const struct ctables_nest *nest = s->nests[a]; - cell->axes[a].cvs = (nest->n - ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs) - : NULL); - for (size_t i = 0; i < nest->n; i++) - { - const struct ctables_category *cat = cats[a][i]; - const struct variable *var = nest->vars[i]; - const union value *value = case_data (c, var); - if (i != nest->scale_idx) - { - const struct ctables_category *subtotal = cat->subtotal; - if (cat->hide || (subtotal && subtotal->hide_subcategories)) - cell->hide = true; + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_VARIANCE: + s->moments = moments1_create (MOMENT_VARIANCE); + break; - if (cat->type == CCT_TOTAL - || cat->type == CCT_SUBTOTAL - || cat->type == CCT_POSTCOMPUTE) - { - switch (a) - { - case PIVOT_AXIS_COLUMN: - cell->omit_areas |= ((1u << CTAT_TABLE) | - (1u << CTAT_LAYER) | - (1u << CTAT_LAYERCOL) | - (1u << CTAT_SUBTABLE) | - (1u << CTAT_COL)); - break; - case PIVOT_AXIS_ROW: - cell->omit_areas |= ((1u << CTAT_TABLE) | - (1u << CTAT_LAYER) | - (1u << CTAT_LAYERROW) | - (1u << CTAT_SUBTABLE) | - (1u << CTAT_ROW)); - break; - case PIVOT_AXIS_LAYER: - cell->omit_areas |= ((1u << CTAT_TABLE) | - (1u << CTAT_LAYER)); - break; - } - } - if (cat->type == CCT_POSTCOMPUTE) - cell->postcompute = true; - } + case CTSF_MEDIAN: + case CTSF_MODE: + case CTSF_PTILE: + { + struct caseproto *proto = caseproto_create (); + proto = caseproto_add_width (proto, 0); + proto = caseproto_add_width (proto, 0); - cell->axes[a].cvs[i].category = cat; - value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var)); - } - } + struct subcase ordering; + subcase_init (&ordering, 0, 0, SC_ASCEND); + s->writer = sort_create_writer (&ordering, proto); + subcase_uninit (&ordering); + caseproto_unref (proto); - const struct ctables_nest *ss = s->nests[s->table->summary_axis]; - const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; - cell->summaries = xmalloc (specs->n * sizeof *cell->summaries); - for (size_t i = 0; i < specs->n; i++) - ctables_summary_init (&cell->summaries[i], &specs->specs[i]); - for (enum ctables_area_type at = 0; at < N_CTATS; at++) - cell->areas[at] = ctables_area_insert (s, cell, at); - hmap_insert (&s->cells, &cell->node, hash); - return cell; + s->ovalid = 0; + s->ovalue = SYSMIS; + } + break; + } } -static bool -is_listwise_missing (const struct ctables_summary_spec_set *specs, - const struct ccase *c) +static void +ctables_summary_uninit (union ctables_summary *s, + const struct ctables_summary_spec *ss) { - for (size_t i = 0; i < specs->n_listwise_vars; i++) + switch (ss->function) { - const struct variable *var = specs->listwise_vars[i]; - if (var_is_num_missing (var, case_num (c, var))) - return true; + case CTSF_COUNT: + case CTSF_areaPCT_COUNT: + case CTSF_areaPCT_VALIDN: + case CTSF_areaPCT_TOTALN: + case CTSF_MISSING: + case CTSF_TOTALN: + case CTSF_VALIDN: + break; + + case CTSF_areaID: + break; + + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + break; + + case CTSF_MEAN: + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_SUM: + case CTSF_VARIANCE: + case CTSF_areaPCT_SUM: + moments1_destroy (s->moments); + break; + + case CTSF_MEDIAN: + case CTSF_MODE: + case CTSF_PTILE: + casewriter_destroy (s->writer); + break; } - - return false; } static void -add_weight (double dst[N_CTWS], const double src[N_CTWS]) +ctables_summary_add (union ctables_summary *s, + const struct ctables_summary_spec *ss, + const union value *value, + bool is_missing, bool is_included, + double weight) { - for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) - dst[wt] += src[wt]; -} + /* To determine whether a case is included in a given table for a particular + kind of summary, consider the following charts for the variable being + summarized. Only if "yes" appears is the case counted. -static void -ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, - const struct ctables_category **cats[PIVOT_N_AXES], - bool is_included, double weight[N_CTWS]) -{ - struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats); - const struct ctables_nest *ss = s->nests[s->table->summary_axis]; + Categorical variables: VALIDN other TOTALN + Valid values in included categories yes yes yes + Missing values in included categories --- yes yes + Missing values in excluded categories --- --- yes + Valid values in excluded categories --- --- --- - const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; - const union value *value = case_data (c, specs->var); - bool is_missing = var_is_value_missing (specs->var, value); - bool is_scale_missing - = is_missing || (specs->is_scale && is_listwise_missing (specs, c)); + Scale variables: VALIDN other TOTALN + Valid value yes yes yes + Missing value --- yes yes - for (size_t i = 0; i < specs->n; i++) - ctables_summary_add (&cell->summaries[i], &specs->specs[i], value, - is_scale_missing, is_included, - weight[specs->specs[i].weighting]); - for (enum ctables_area_type at = 0; at < N_CTATS; at++) - if (!(cell->omit_areas && (1u << at))) - { - struct ctables_area *a = cell->areas[at]; + Missing values include both user- and system-missing. (The system-missing + value is always in an excluded category.) - add_weight (a->total, weight); - if (is_included) - add_weight (a->count, weight); - if (!is_missing) - { - add_weight (a->valid, weight); + One way to interpret the above table is that scale variables are like + categorical variables in which all values are in included categories. + */ + switch (ss->function) + { + case CTSF_TOTALN: + case CTSF_areaPCT_TOTALN: + s->count += weight; + break; - if (!is_scale_missing) - for (size_t i = 0; i < s->table->n_sum_vars; i++) - { - const struct variable *var = s->table->sum_vars[i]; - double addend = case_num (c, var); - if (!var_is_num_missing (var, addend)) - for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) - a->sums[i].sum[wt] += addend * weight[wt]; - } - } - } -} + case CTSF_COUNT: + case CTSF_areaPCT_COUNT: + if (is_included) + s->count += weight; + break; -static void -recurse_totals (struct ctables_section *s, const struct ccase *c, - const struct ctables_category **cats[PIVOT_N_AXES], - bool is_included, double weight[N_CTWS], - enum pivot_axis_type start_axis, size_t start_nest) -{ - for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) - { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = start_nest; i < nest->n; i++) - { - if (i == nest->scale_idx) - continue; + case CTSF_VALIDN: + case CTSF_areaPCT_VALIDN: + if (!is_missing) + s->count += weight; + break; - const struct variable *var = nest->vars[i]; + case CTSF_areaID: + break; - const struct ctables_category *total = ctables_categories_total ( - s->table->categories[var_get_dict_index (var)]); - if (total) - { - const struct ctables_category *save = cats[a][i]; - cats[a][i] = total; - ctables_cell_add__ (s, c, cats, is_included, weight); - recurse_totals (s, c, cats, is_included, weight, a, i + 1); - cats[a][i] = save; - } + case CTSF_MISSING: + if (is_missing) + s->count += weight; + break; + + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + if (!is_missing) + { + if (s->min == SYSMIS || value->f < s->min) + s->min = value->f; + if (s->max == SYSMIS || value->f > s->max) + s->max = value->f; } - start_nest = 0; - } -} + break; -static void -recurse_subtotals (struct ctables_section *s, const struct ccase *c, - const struct ctables_category **cats[PIVOT_N_AXES], - bool is_included, double weight[N_CTWS], - enum pivot_axis_type start_axis, size_t start_nest) -{ - for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) - { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = start_nest; i < nest->n; i++) + case CTSF_MEAN: + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_SUM: + case CTSF_VARIANCE: + if (!is_missing) + moments1_add (s->moments, value->f, weight); + break; + + case CTSF_areaPCT_SUM: + if (!is_missing) + moments1_add (s->moments, value->f, weight); + break; + + case CTSF_MEDIAN: + case CTSF_MODE: + case CTSF_PTILE: + if (!is_missing) { - if (i == nest->scale_idx) - continue; + s->ovalid += weight; - const struct ctables_category *save = cats[a][i]; - if (save->subtotal) - { - cats[a][i] = save->subtotal; - ctables_cell_add__ (s, c, cats, is_included, weight); - recurse_subtotals (s, c, cats, is_included, weight, a, i + 1); - cats[a][i] = save; - } + struct ccase *c = case_create (casewriter_get_proto (s->writer)); + *case_num_rw_idx (c, 0) = value->f; + *case_num_rw_idx (c, 1) = weight; + casewriter_write (s->writer, c); } - start_nest = 0; + break; } } -static void -ctables_add_occurrence (const struct variable *var, - const union value *value, - struct hmap *occurrences) +static double +ctables_summary_value (const struct ctables_cell *cell, + union ctables_summary *s, + const struct ctables_summary_spec *ss) { - int width = var_get_width (var); - unsigned int hash = value_hash (value, width, 0); + switch (ss->function) + { + case CTSF_COUNT: + return s->count; - struct ctables_occurrence *o; - HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash, - occurrences) - if (value_equal (value, &o->value, width)) - return; + case CTSF_areaID: + return cell->areas[ss->calc_area]->sequence; - o = xmalloc (sizeof *o); - value_clone (&o->value, value, width); - hmap_insert (occurrences, &o->node, hash); -} + case CTSF_areaPCT_COUNT: + { + const struct ctables_area *a = cell->areas[ss->calc_area]; + double a_count = a->count[ss->weighting]; + return a_count ? s->count / a_count * 100 : SYSMIS; + } + + case CTSF_areaPCT_VALIDN: + { + const struct ctables_area *a = cell->areas[ss->calc_area]; + double a_valid = a->valid[ss->weighting]; + return a_valid ? s->count / a_valid * 100 : SYSMIS; + } + + case CTSF_areaPCT_TOTALN: + { + const struct ctables_area *a = cell->areas[ss->calc_area]; + double a_total = a->total[ss->weighting]; + return a_total ? s->count / a_total * 100 : SYSMIS; + } + + case CTSF_MISSING: + case CTSF_TOTALN: + case CTSF_VALIDN: + return s->count; + + case CTSF_MAXIMUM: + return s->max; + + case CTSF_MINIMUM: + return s->min; + + case CTSF_RANGE: + return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS; + + case CTSF_MEAN: + { + double mean; + moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL); + return mean; + } + + case CTSF_SEMEAN: + { + double weight, variance; + moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL); + return calc_semean (variance, weight); + } + + case CTSF_STDDEV: + { + double variance; + moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); + return variance != SYSMIS ? sqrt (variance) : SYSMIS; + } + + case CTSF_SUM: + { + double weight, mean; + moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); + return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS; + } -static void -ctables_cell_insert (struct ctables_section *s, const struct ccase *c, - double weight[N_CTWS]) -{ - const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n]; - const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n]; - const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n]; - const struct ctables_category **cats[PIVOT_N_AXES] = - { - [PIVOT_AXIS_LAYER] = layer_cats, - [PIVOT_AXIS_ROW] = row_cats, - [PIVOT_AXIS_COLUMN] = column_cats, - }; + case CTSF_VARIANCE: + { + double variance; + moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); + return variance; + } - bool is_included = true; + case CTSF_areaPCT_SUM: + { + double weight, mean; + moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); + if (weight == SYSMIS || mean == SYSMIS) + return SYSMIS; - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx) - { - const struct variable *var = nest->vars[i]; - const union value *value = case_data (c, var); + const struct ctables_area *a = cell->areas[ss->calc_area]; + const struct ctables_sum *sum = &a->sums[ss->sum_var_idx]; + double denom = sum->sum[ss->weighting]; + return denom != 0 ? weight * mean / denom * 100 : SYSMIS; + } - cats[a][i] = ctables_categories_match ( - s->table->categories[var_get_dict_index (var)], value, var); - if (!cats[a][i]) - { - if (i != nest->summary_idx) - return; + case CTSF_MEDIAN: + case CTSF_PTILE: + if (s->writer) + { + struct casereader *reader = casewriter_make_reader (s->writer); + s->writer = NULL; - if (!var_is_value_missing (var, value)) - return; + struct percentile *ptile = percentile_create ( + ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid); + struct order_stats *os = &ptile->parent; + order_stats_accumulate_idx (&os, 1, reader, 1, 0); + s->ovalue = percentile_calculate (ptile, PC_HAVERAGE); + statistic_destroy (&ptile->parent.parent); + } + return s->ovalue; - static const struct ctables_category cct_excluded_missing = { - .type = CCT_EXCLUDED_MISSING, - .hide = true, - }; - cats[a][i] = &cct_excluded_missing; - is_included = false; - } + case CTSF_MODE: + if (s->writer) + { + struct casereader *reader = casewriter_make_reader (s->writer); + s->writer = NULL; + + struct mode *mode = mode_create (); + struct order_stats *os = &mode->parent; + order_stats_accumulate_idx (&os, 1, reader, 1, 0); + s->ovalue = mode->mode; + statistic_destroy (&mode->parent.parent); } + return s->ovalue; } - if (is_included) - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx) - { - const struct variable *var = nest->vars[i]; - const union value *value = case_data (c, var); - ctables_add_occurrence (var, value, &s->occurrences[a][i]); - } - } - - ctables_cell_add__ (s, c, cats, is_included, weight); - recurse_totals (s, c, cats, is_included, weight, 0, 0); - recurse_subtotals (s, c, cats, is_included, weight, 0, 0); + NOT_REACHED (); } -struct merge_item +struct ctables_cell_sort_aux { - const struct ctables_summary_spec_set *set; - size_t ofs; + const struct ctables_nest *nest; + enum pivot_axis_type a; }; static int -merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b) -{ - const struct ctables_summary_spec *as = &a->set->specs[a->ofs]; - const struct ctables_summary_spec *bs = &b->set->specs[b->ofs]; - if (as->function != bs->function) - return as->function > bs->function ? 1 : -1; - else if (as->weighting != bs->weighting) - return as->weighting > bs->weighting ? 1 : -1; - else if (as->calc_area != bs->calc_area) - return as->calc_area > bs->calc_area ? 1 : -1; - else if (as->percentile != bs->percentile) - return as->percentile < bs->percentile ? 1 : -1; - - const char *as_label = as->label ? as->label : ""; - const char *bs_label = bs->label ? bs->label : ""; - return strcmp (as_label, bs_label); -} - -static void -ctables_category_format_number (double number, const struct variable *var, - struct string *s) -{ - struct pivot_value *pv = pivot_value_new_var_value ( - var, &(union value) { .f = number }); - pivot_value_format (pv, NULL, s); - pivot_value_destroy (pv); -} - -static void -ctables_category_format_string (struct substring string, - const struct variable *var, struct string *out) -{ - int width = var_get_width (var); - char *s = xmalloc (width); - buf_copy_rpad (s, width, string.string, string.length, ' '); - struct pivot_value *pv = pivot_value_new_var_value ( - var, &(union value) { .s = CHAR_CAST (uint8_t *, s) }); - pivot_value_format (pv, NULL, out); - pivot_value_destroy (pv); - free (s); -} - -static bool -ctables_category_format_label (const struct ctables_category *cat, - const struct variable *var, - struct string *s) +ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_) { - switch (cat->type) - { - case CCT_NUMBER: - ctables_category_format_number (cat->number, var, s); - return true; + const struct ctables_cell_sort_aux *aux = aux_; + struct ctables_cell *const *ap = a_; + struct ctables_cell *const *bp = b_; + const struct ctables_cell *a = *ap; + const struct ctables_cell *b = *bp; - case CCT_STRING: - ctables_category_format_string (cat->string, var, s); - return true; + const struct ctables_nest *nest = aux->nest; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + { + const struct variable *var = nest->vars[i]; + const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i]; + const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i]; + if (a_cv->category != b_cv->category) + return a_cv->category > b_cv->category ? 1 : -1; - case CCT_NRANGE: - ctables_category_format_number (cat->nrange[0], var, s); - ds_put_format (s, " THRU "); - ctables_category_format_number (cat->nrange[1], var, s); - return true; + const union value *a_val = &a_cv->value; + const union value *b_val = &b_cv->value; + switch (a_cv->category->type) + { + case CCT_NUMBER: + case CCT_STRING: + case CCT_SUBTOTAL: + case CCT_TOTAL: + case CCT_POSTCOMPUTE: + case CCT_EXCLUDED_MISSING: + /* Must be equal. */ + continue; - case CCT_SRANGE: - ctables_category_format_string (cat->srange[0], var, s); - ds_put_format (s, " THRU "); - ctables_category_format_string (cat->srange[1], var, s); - return true; + case CCT_NRANGE: + case CCT_SRANGE: + case CCT_MISSING: + case CCT_OTHERNM: + { + int cmp = value_compare_3way (a_val, b_val, var_get_width (var)); + if (cmp) + return cmp; + } + break; - case CCT_MISSING: - ds_put_cstr (s, "MISSING"); - return true; + case CCT_VALUE: + { + int cmp = value_compare_3way (a_val, b_val, var_get_width (var)); + if (cmp) + return a_cv->category->sort_ascending ? cmp : -cmp; + } + break; - case CCT_OTHERNM: - ds_put_cstr (s, "OTHERNM"); - return true; + case CCT_LABEL: + { + const char *a_label = var_lookup_value_label (var, a_val); + const char *b_label = var_lookup_value_label (var, b_val); + int cmp; + if (a_label) + { + if (!b_label) + return -1; + cmp = strcmp (a_label, b_label); + } + else + { + if (b_label) + return 1; + cmp = value_compare_3way (a_val, b_val, var_get_width (var)); + } + if (cmp) + return a_cv->category->sort_ascending ? cmp : -cmp; + } + break; - case CCT_POSTCOMPUTE: - ds_put_format (s, "&%s", cat->pc->name); - return true; + case CCT_FUNCTION: + NOT_REACHED (); + } + } + return 0; +} - case CCT_TOTAL: - case CCT_SUBTOTAL: - ds_put_cstr (s, cat->total_label); - return true; +static int +ctables_cell_compare_leaf_3way (const void *a_, const void *b_, + const void *aux UNUSED) +{ + struct ctables_cell *const *ap = a_; + struct ctables_cell *const *bp = b_; + const struct ctables_cell *a = *ap; + const struct ctables_cell *b = *bp; - case CCT_VALUE: - case CCT_LABEL: - case CCT_FUNCTION: - case CCT_EXCLUDED_MISSING: - return false; + for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++) + { + int al = a->axes[axis].leaf; + int bl = b->axes[axis].leaf; + if (al != bl) + return al > bl ? 1 : -1; } - - return false; + return 0; } -static struct pivot_value * -ctables_postcompute_label (const struct ctables_categories *cats, - const struct ctables_category *cat, - const struct variable *var) +static struct ctables_area * +ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell, + enum ctables_area_type area) { - struct substring in = ss_cstr (cat->pc->label); - struct substring target = ss_cstr (")LABEL["); + size_t hash = 0; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n_areas[area]; i++) + { + size_t v_idx = nest->areas[area][i]; + struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx]; + hash = hash_pointer (cv->category, hash); + if (cv->category->type != CCT_TOTAL + && cv->category->type != CCT_SUBTOTAL + && cv->category->type != CCT_POSTCOMPUTE) + hash = value_hash (&cv->value, + var_get_width (nest->vars[v_idx]), hash); + } + } - struct string out = DS_EMPTY_INITIALIZER; - for (;;) + struct ctables_area *a; + HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area]) { - size_t chunk = ss_find_substring (in, target); - if (chunk == SIZE_MAX) + const struct ctables_cell *df = a->example; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { - if (ds_is_empty (&out)) - return pivot_value_new_user_text (in.string, in.length); - else + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n_areas[area]; i++) { - ds_put_substring (&out, in); - return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out)); + size_t v_idx = nest->areas[area][i]; + struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx]; + struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx]; + if (cv1->category != cv2->category + || (cv1->category->type != CCT_TOTAL + && cv1->category->type != CCT_SUBTOTAL + && cv1->category->type != CCT_POSTCOMPUTE + && !value_equal (&cv1->value, &cv2->value, + var_get_width (nest->vars[v_idx])))) + goto not_equal; } } + return a; - ds_put_substring (&out, ss_head (in, chunk)); - ss_advance (&in, chunk + target.length); - - struct substring idx_s; - if (!ss_get_until (&in, ']', &idx_s)) - goto error; - char *tail; - long int idx = strtol (idx_s.string, &tail, 10); - if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s)) - goto error; - - struct ctables_category *cat2 = &cats->cats[idx - 1]; - if (!ctables_category_format_label (cat2, var, &out)) - goto error; + not_equal: ; } -error: - ds_destroy (&out); - return pivot_value_new_user_text (cat->pc->label, SIZE_MAX); -} + struct ctables_sum *sums = (s->table->n_sum_vars + ? xzalloc (s->table->n_sum_vars * sizeof *sums) + : NULL); -static struct pivot_value * -ctables_category_create_value_label (const struct ctables_categories *cats, - const struct ctables_category *cat, - const struct variable *var, - const union value *value) -{ - return (cat->type == CCT_POSTCOMPUTE && cat->pc->label - ? ctables_postcompute_label (cats, cat, var) - : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL - ? pivot_value_new_user_text (cat->total_label, SIZE_MAX) - : pivot_value_new_var_value (var, value)); + a = xmalloc (sizeof *a); + *a = (struct ctables_area) { .example = cell, .sums = sums }; + hmap_insert (&s->areas[area], &a->node, hash); + return a; } -static struct ctables_value * -ctables_value_find__ (struct ctables_table *t, const union value *value, - int width, unsigned int hash) +static struct substring +rtrim_value (const union value *v, const struct variable *var) { - struct ctables_value *clv; - HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node, - hash, &t->clabels_values_map) - if (value_equal (value, &clv->value, width)) - return clv; - return NULL; + struct substring s = ss_buffer (CHAR_CAST (char *, v->s), + var_get_width (var)); + ss_rtrim (&s, ss_cstr (" ")); + return s; } -static void -ctables_value_insert (struct ctables_table *t, const union value *value, - int width) +static bool +in_string_range (const union value *v, const struct variable *var, + const struct substring *srange) { - unsigned int hash = value_hash (value, width, 0); - struct ctables_value *clv = ctables_value_find__ (t, value, width, hash); - if (!clv) - { - clv = xmalloc (sizeof *clv); - value_clone (&clv->value, value, width); - hmap_insert (&t->clabels_values_map, &clv->node, hash); - } + struct substring s = rtrim_value (v, var); + return ((!srange[0].string || ss_compare (s, srange[0]) >= 0) + && (!srange[1].string || ss_compare (s, srange[1]) <= 0)); } -static struct ctables_value * -ctables_value_find (struct ctables_table *t, - const union value *value, int width) +static const struct ctables_category * +ctables_categories_match (const struct ctables_categories *c, + const union value *v, const struct variable *var) { - return ctables_value_find__ (t, value, width, - value_hash (value, width, 0)); -} + if (var_is_numeric (var) && v->f == SYSMIS) + return NULL; -static void -ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a, - size_t ix[PIVOT_N_AXES]) -{ - if (a < PIVOT_N_AXES) - { - size_t limit = MAX (t->stacks[a].n, 1); - for (ix[a] = 0; ix[a] < limit; ix[a]++) - ctables_table_add_section (t, a + 1, ix); - } - else + const struct ctables_category *othernm = NULL; + for (size_t i = c->n_cats; i-- > 0; ) { - struct ctables_section *s = &t->sections[t->n_sections++]; - *s = (struct ctables_section) { - .table = t, - .cells = HMAP_INITIALIZER (s->cells), - }; - for (a = 0; a < PIVOT_N_AXES; a++) - if (t->stacks[a].n) - { - struct ctables_nest *nest = &t->stacks[a].nests[ix[a]]; - s->nests[a] = nest; - s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]); - for (size_t i = 0; i < nest->n; i++) - hmap_init (&s->occurrences[a][i]); - } - for (enum ctables_area_type at = 0; at < N_CTATS; at++) - hmap_init (&s->areas[at]); - } -} + const struct ctables_category *cat = &c->cats[i]; + switch (cat->type) + { + case CCT_NUMBER: + if (cat->number == v->f) + return cat; + break; -static double -ctpo_add (double a, double b) -{ - return a + b; -} + case CCT_STRING: + if (ss_equals (cat->string, rtrim_value (v, var))) + return cat; + break; -static double -ctpo_sub (double a, double b) -{ - return a - b; -} + case CCT_NRANGE: + if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0]) + && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1])) + return cat; + break; -static double -ctpo_mul (double a, double b) -{ - return a * b; -} + case CCT_SRANGE: + if (in_string_range (v, var, cat->srange)) + return cat; + break; -static double -ctpo_div (double a, double b) -{ - return b ? a / b : SYSMIS; -} + case CCT_MISSING: + if (var_is_value_missing (var, v)) + return cat; + break; -static double -ctpo_pow (double a, double b) -{ - int save_errno = errno; - errno = 0; - double result = pow (a, b); - if (errno) - result = SYSMIS; - errno = save_errno; - return result; -} + case CCT_POSTCOMPUTE: + break; -static double -ctpo_neg (double a, double b UNUSED) -{ - return -a; -} + case CCT_OTHERNM: + if (!othernm) + othernm = cat; + break; -struct ctables_pcexpr_evaluate_ctx - { - const struct ctables_cell *cell; - const struct ctables_section *section; - const struct ctables_categories *cats; - enum pivot_axis_type pc_a; - size_t pc_a_idx; - size_t summary_idx; - enum fmt_type parse_format; - }; + case CCT_SUBTOTAL: + case CCT_TOTAL: + break; -static double ctables_pcexpr_evaluate ( - const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *); + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + return (cat->include_missing || !var_is_value_missing (var, v) ? cat + : NULL); -static double -ctables_pcexpr_evaluate_nonterminal ( - const struct ctables_pcexpr_evaluate_ctx *ctx, - const struct ctables_pcexpr *e, size_t n_args, - double evaluate (double, double)) -{ - double args[2] = { 0, 0 }; - for (size_t i = 0; i < n_args; i++) - { - args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]); - if (!isfinite (args[i]) || args[i] == SYSMIS) - return SYSMIS; + case CCT_EXCLUDED_MISSING: + break; + } } - return evaluate (args[0], args[1]); + + return var_is_value_missing (var, v) ? NULL : othernm; } -static double -ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx, - const struct ctables_cell_value *pc_cv) +static const struct ctables_category * +ctables_categories_total (const struct ctables_categories *c) { - const struct ctables_section *s = ctx->section; + const struct ctables_category *first = &c->cats[0]; + const struct ctables_category *last = &c->cats[c->n_cats - 1]; + return (first->type == CCT_TOTAL ? first + : last->type == CCT_TOTAL ? last + : NULL); +} +static struct ctables_cell * +ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, + const struct ctables_category **cats[PIVOT_N_AXES]) +{ size_t hash = 0; + enum ctables_summary_variant sv = CSV_CELL; for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct ctables_nest *nest = s->nests[a]; for (size_t i = 0; i < nest->n; i++) if (i != nest->scale_idx) { - const struct ctables_cell_value *cv - = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv - : &ctx->cell->axes[a].cvs[i]); - hash = hash_pointer (cv->category, hash); - if (cv->category->type != CCT_TOTAL - && cv->category->type != CCT_SUBTOTAL - && cv->category->type != CCT_POSTCOMPUTE) - hash = value_hash (&cv->value, + hash = hash_pointer (cats[a][i], hash); + if (cats[a][i]->type != CCT_TOTAL + && cats[a][i]->type != CCT_SUBTOTAL + && cats[a][i]->type != CCT_POSTCOMPUTE) + hash = value_hash (case_data (c, nest->vars[i]), var_get_width (nest->vars[i]), hash); + else + sv = CSV_TOTAL; } } - struct ctables_cell *tc; - HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells) + struct ctables_cell *cell; + HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells) { for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct ctables_nest *nest = s->nests[a]; for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx) - { - const struct ctables_cell_value *p_cv - = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv - : &ctx->cell->axes[a].cvs[i]); - const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i]; - if (p_cv->category != t_cv->category - || (p_cv->category->type != CCT_TOTAL - && p_cv->category->type != CCT_SUBTOTAL - && p_cv->category->type != CCT_POSTCOMPUTE - && !value_equal (&p_cv->value, - &t_cv->value, - var_get_width (nest->vars[i])))) - goto not_equal; - } + if (i != nest->scale_idx + && (cats[a][i] != cell->axes[a].cvs[i].category + || (cats[a][i]->type != CCT_TOTAL + && cats[a][i]->type != CCT_SUBTOTAL + && cats[a][i]->type != CCT_POSTCOMPUTE + && !value_equal (case_data (c, nest->vars[i]), + &cell->axes[a].cvs[i].value, + var_get_width (nest->vars[i]))))) + goto not_equal; } - goto found; + return cell; not_equal: ; } - return 0; -found: ; - const struct ctables_table *t = s->table; - const struct ctables_nest *specs_nest = s->nests[t->summary_axis]; - const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv]; - return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx], - &specs->specs[ctx->summary_idx]); + cell = xmalloc (sizeof *cell); + cell->hide = false; + cell->sv = sv; + cell->omit_areas = 0; + cell->postcompute = false; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct ctables_nest *nest = s->nests[a]; + cell->axes[a].cvs = (nest->n + ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs) + : NULL); + for (size_t i = 0; i < nest->n; i++) + { + const struct ctables_category *cat = cats[a][i]; + const struct variable *var = nest->vars[i]; + const union value *value = case_data (c, var); + if (i != nest->scale_idx) + { + const struct ctables_category *subtotal = cat->subtotal; + if (cat->hide || (subtotal && subtotal->hide_subcategories)) + cell->hide = true; + + if (cat->type == CCT_TOTAL + || cat->type == CCT_SUBTOTAL + || cat->type == CCT_POSTCOMPUTE) + { + switch (a) + { + case PIVOT_AXIS_COLUMN: + cell->omit_areas |= ((1u << CTAT_TABLE) | + (1u << CTAT_LAYER) | + (1u << CTAT_LAYERCOL) | + (1u << CTAT_SUBTABLE) | + (1u << CTAT_COL)); + break; + case PIVOT_AXIS_ROW: + cell->omit_areas |= ((1u << CTAT_TABLE) | + (1u << CTAT_LAYER) | + (1u << CTAT_LAYERROW) | + (1u << CTAT_SUBTABLE) | + (1u << CTAT_ROW)); + break; + case PIVOT_AXIS_LAYER: + cell->omit_areas |= ((1u << CTAT_TABLE) | + (1u << CTAT_LAYER)); + break; + } + } + if (cat->type == CCT_POSTCOMPUTE) + cell->postcompute = true; + } + + cell->axes[a].cvs[i].category = cat; + value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var)); + } + } + + const struct ctables_nest *ss = s->nests[s->table->summary_axis]; + const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; + cell->summaries = xmalloc (specs->n * sizeof *cell->summaries); + for (size_t i = 0; i < specs->n; i++) + ctables_summary_init (&cell->summaries[i], &specs->specs[i]); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + cell->areas[at] = ctables_area_insert (s, cell, at); + hmap_insert (&s->cells, &cell->node, hash); + return cell; +} + +static bool +is_listwise_missing (const struct ctables_summary_spec_set *specs, + const struct ccase *c) +{ + for (size_t i = 0; i < specs->n_listwise_vars; i++) + { + const struct variable *var = specs->listwise_vars[i]; + if (var_is_num_missing (var, case_num (c, var))) + return true; + } + + return false; +} + +static void +add_weight (double dst[N_CTWS], const double src[N_CTWS]) +{ + for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) + dst[wt] += src[wt]; +} + +static void +ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, + const struct ctables_category **cats[PIVOT_N_AXES], + bool is_included, double weight[N_CTWS]) +{ + struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats); + const struct ctables_nest *ss = s->nests[s->table->summary_axis]; + + const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; + const union value *value = case_data (c, specs->var); + bool is_missing = var_is_value_missing (specs->var, value); + bool is_scale_missing + = is_missing || (specs->is_scale && is_listwise_missing (specs, c)); + + for (size_t i = 0; i < specs->n; i++) + ctables_summary_add (&cell->summaries[i], &specs->specs[i], value, + is_scale_missing, is_included, + weight[specs->specs[i].weighting]); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + if (!(cell->omit_areas && (1u << at))) + { + struct ctables_area *a = cell->areas[at]; + + add_weight (a->total, weight); + if (is_included) + add_weight (a->count, weight); + if (!is_missing) + { + add_weight (a->valid, weight); + + if (!is_scale_missing) + for (size_t i = 0; i < s->table->n_sum_vars; i++) + { + const struct variable *var = s->table->sum_vars[i]; + double addend = case_num (c, var); + if (!var_is_num_missing (var, addend)) + for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) + a->sums[i].sum[wt] += addend * weight[wt]; + } + } + } +} + +static void +recurse_totals (struct ctables_section *s, const struct ccase *c, + const struct ctables_category **cats[PIVOT_N_AXES], + bool is_included, double weight[N_CTWS], + enum pivot_axis_type start_axis, size_t start_nest) +{ + for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) + { + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = start_nest; i < nest->n; i++) + { + if (i == nest->scale_idx) + continue; + + const struct variable *var = nest->vars[i]; + + const struct ctables_category *total = ctables_categories_total ( + s->table->categories[var_get_dict_index (var)]); + if (total) + { + const struct ctables_category *save = cats[a][i]; + cats[a][i] = total; + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_totals (s, c, cats, is_included, weight, a, i + 1); + cats[a][i] = save; + } + } + start_nest = 0; + } } -static double -ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx, - const struct ctables_pcexpr *e) +static void +recurse_subtotals (struct ctables_section *s, const struct ccase *c, + const struct ctables_category **cats[PIVOT_N_AXES], + bool is_included, double weight[N_CTWS], + enum pivot_axis_type start_axis, size_t start_nest) { - switch (e->op) + for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) { - case CTPO_CONSTANT: - return e->number; - - case CTPO_CAT_NRANGE: - case CTPO_CAT_SRANGE: - case CTPO_CAT_MISSING: - case CTPO_CAT_OTHERNM: - { - struct ctables_cell_value cv = { - .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e) - }; - assert (cv.category != NULL); - - struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx]; - const struct ctables_occurrence *o; + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = start_nest; i < nest->n; i++) + { + if (i == nest->scale_idx) + continue; - double sum = 0.0; - const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]; - HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences) - if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category) + const struct ctables_category *save = cats[a][i]; + if (save->subtotal) { - cv.value = o->value; - sum += ctables_pcexpr_evaluate_category (ctx, &cv); + cats[a][i] = save->subtotal; + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_subtotals (s, c, cats, is_included, weight, a, i + 1); + cats[a][i] = save; } - return sum; - } - - case CTPO_CAT_NUMBER: - case CTPO_CAT_SUBTOTAL: - case CTPO_CAT_TOTAL: - { - struct ctables_cell_value cv = { - .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e), - .value = { .f = e->number }, - }; - assert (cv.category != NULL); - return ctables_pcexpr_evaluate_category (ctx, &cv); - } - - case CTPO_CAT_STRING: - { - int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]); - char *s = NULL; - if (width > e->string.length) - { - s = xmalloc (width); - buf_copy_rpad (s, width, e->string.string, e->string.length, ' '); - } + } + start_nest = 0; + } +} - const struct ctables_category *category - = ctables_find_category_for_postcompute ( - ctx->section->table->ctables->dict, - ctx->cats, ctx->parse_format, e); - assert (category != NULL); +static void +ctables_add_occurrence (const struct variable *var, + const union value *value, + struct hmap *occurrences) +{ + int width = var_get_width (var); + unsigned int hash = value_hash (value, width, 0); - struct ctables_cell_value cv = { .category = category }; - if (category->type == CCT_NUMBER) - cv.value.f = category->number; - else if (category->type == CCT_STRING) - cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string); - else - NOT_REACHED (); + struct ctables_occurrence *o; + HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash, + occurrences) + if (value_equal (value, &o->value, width)) + return; - double retval = ctables_pcexpr_evaluate_category (ctx, &cv); - free (s); - return retval; - } + o = xmalloc (sizeof *o); + value_clone (&o->value, value, width); + hmap_insert (occurrences, &o->node, hash); +} - case CTPO_ADD: - return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add); +static void +ctables_cell_insert (struct ctables_section *s, const struct ccase *c, + double weight[N_CTWS]) +{ + const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n]; + const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n]; + const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n]; + const struct ctables_category **cats[PIVOT_N_AXES] = + { + [PIVOT_AXIS_LAYER] = layer_cats, + [PIVOT_AXIS_ROW] = row_cats, + [PIVOT_AXIS_COLUMN] = column_cats, + }; - case CTPO_SUB: - return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub); + bool is_included = true; - case CTPO_MUL: - return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul); + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + { + const struct variable *var = nest->vars[i]; + const union value *value = case_data (c, var); - case CTPO_DIV: - return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div); + cats[a][i] = ctables_categories_match ( + s->table->categories[var_get_dict_index (var)], value, var); + if (!cats[a][i]) + { + if (i != nest->summary_idx) + return; - case CTPO_POW: - return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow); + if (!var_is_value_missing (var, value)) + return; - case CTPO_NEG: - return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg); + static const struct ctables_category cct_excluded_missing = { + .type = CCT_EXCLUDED_MISSING, + .hide = true, + }; + cats[a][i] = &cct_excluded_missing; + is_included = false; + } + } } - NOT_REACHED (); -} - -static const struct ctables_category * -ctables_cell_postcompute (const struct ctables_section *s, - const struct ctables_cell *cell, - enum pivot_axis_type *pc_a_p, - size_t *pc_a_idx_p) -{ - assert (cell->postcompute); - const struct ctables_category *pc_cat = NULL; - for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++) - for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++) + if (is_included) + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { - const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx]; - if (cv->category->type == CCT_POSTCOMPUTE) - { - if (pc_cat) - { - /* Multiple postcomputes cross each other. The value is - undefined. */ - return NULL; - } - - pc_cat = cv->category; - if (pc_a_p) - *pc_a_p = pc_a; - if (pc_a_idx_p) - *pc_a_idx_p = pc_a_idx; - } + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + { + const struct variable *var = nest->vars[i]; + const union value *value = case_data (c, var); + ctables_add_occurrence (var, value, &s->occurrences[a][i]); + } } - assert (pc_cat != NULL); - return pc_cat; + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_totals (s, c, cats, is_included, weight, 0, 0); + recurse_subtotals (s, c, cats, is_included, weight, 0, 0); } -static double -ctables_cell_calculate_postcompute (const struct ctables_section *s, - const struct ctables_cell *cell, - const struct ctables_summary_spec *ss, - struct fmt_spec *format, - bool *is_ctables_format, - size_t summary_idx) -{ - enum pivot_axis_type pc_a = 0; - size_t pc_a_idx = 0; - const struct ctables_category *pc_cat = ctables_cell_postcompute ( - s, cell, &pc_a, &pc_a_idx); - if (!pc_cat) - return SYSMIS; - - const struct ctables_postcompute *pc = pc_cat->pc; - if (pc->specs) - { - for (size_t i = 0; i < pc->specs->n; i++) - { - const struct ctables_summary_spec *ss2 = &pc->specs->specs[i]; - if (ss->function == ss2->function - && ss->weighting == ss2->weighting - && ss->calc_area == ss2->calc_area - && ss->percentile == ss2->percentile) - { - *format = ss2->format; - *is_ctables_format = ss2->is_ctables_format; - break; - } - } - } - - const struct variable *var = s->nests[pc_a]->vars[pc_a_idx]; - const struct ctables_categories *cats = s->table->categories[ - var_get_dict_index (var)]; - struct ctables_pcexpr_evaluate_ctx ctx = { - .cell = cell, - .section = s, - .cats = cats, - .pc_a = pc_a, - .pc_a_idx = pc_a_idx, - .summary_idx = summary_idx, - .parse_format = pc_cat->parse_format, +struct merge_item + { + const struct ctables_summary_spec_set *set; + size_t ofs; }; - return ctables_pcexpr_evaluate (&ctx, pc->expr); + +static int +merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b) +{ + const struct ctables_summary_spec *as = &a->set->specs[a->ofs]; + const struct ctables_summary_spec *bs = &b->set->specs[b->ofs]; + if (as->function != bs->function) + return as->function > bs->function ? 1 : -1; + else if (as->weighting != bs->weighting) + return as->weighting > bs->weighting ? 1 : -1; + else if (as->calc_area != bs->calc_area) + return as->calc_area > bs->calc_area ? 1 : -1; + else if (as->percentile != bs->percentile) + return as->percentile < bs->percentile ? 1 : -1; + + const char *as_label = as->label ? as->label : ""; + const char *bs_label = bs->label ? bs->label : ""; + return strcmp (as_label, bs_label); } -static char * -ctables_format (double d, const struct fmt_spec *format, - const struct fmt_settings *settings) +static void +ctables_category_format_number (double number, const struct variable *var, + struct string *s) { - const union value v = { .f = d }; - char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL); - - /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't - produce the results we want for negative numbers, putting the negative - sign in the wrong spot, before the prefix instead of after it. We can't, - in fact, produce the desired results using a custom-currency - specification. Instead, we postprocess the output, moving the negative - sign into place: - - NEQUAL: "-N=3" => "N=-3" - PAREN: "-(3)" => "(-3)" - PCTPAREN: "-(3%)" => "(-3%)" + struct pivot_value *pv = pivot_value_new_var_value ( + var, &(union value) { .f = number }); + pivot_value_format (pv, NULL, s); + pivot_value_destroy (pv); +} - This transformation doesn't affect NEGPAREN. */ - char *minus_src = strchr (s, '-'); - if (minus_src && (minus_src == s || minus_src[-1] != 'E')) - { - char *n_equals = strstr (s, "N="); - char *lparen = strchr (s, '('); - char *minus_dst = n_equals ? n_equals + 1 : lparen; - if (minus_dst) - move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s); - } - return s; +static void +ctables_category_format_string (struct substring string, + const struct variable *var, struct string *out) +{ + int width = var_get_width (var); + char *s = xmalloc (width); + buf_copy_rpad (s, width, string.string, string.length, ' '); + struct pivot_value *pv = pivot_value_new_var_value ( + var, &(union value) { .s = CHAR_CAST (uint8_t *, s) }); + pivot_value_format (pv, NULL, out); + pivot_value_destroy (pv); + free (s); } static bool -all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a) +ctables_category_format_label (const struct ctables_category *cat, + const struct variable *var, + struct string *s) { - for (size_t i = 0; i < t->stacks[a].n; i++) + switch (cat->type) { - struct ctables_nest *nest = &t->stacks[a].nests[i]; - if (nest->n != 1 || nest->scale_idx != 0) - return false; + case CCT_NUMBER: + ctables_category_format_number (cat->number, var, s); + return true; - enum ctables_vlabel vlabel - = t->ctables->vlabels[var_get_dict_index (nest->vars[0])]; - if (vlabel != CTVL_NONE) - return false; + case CCT_STRING: + ctables_category_format_string (cat->string, var, s); + return true; + + case CCT_NRANGE: + ctables_category_format_number (cat->nrange[0], var, s); + ds_put_format (s, " THRU "); + ctables_category_format_number (cat->nrange[1], var, s); + return true; + + case CCT_SRANGE: + ctables_category_format_string (cat->srange[0], var, s); + ds_put_format (s, " THRU "); + ctables_category_format_string (cat->srange[1], var, s); + return true; + + case CCT_MISSING: + ds_put_cstr (s, "MISSING"); + return true; + + case CCT_OTHERNM: + ds_put_cstr (s, "OTHERNM"); + return true; + + case CCT_POSTCOMPUTE: + ds_put_format (s, "&%s", cat->pc->name); + return true; + + case CCT_TOTAL: + case CCT_SUBTOTAL: + ds_put_cstr (s, cat->total_label); + return true; + + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + case CCT_EXCLUDED_MISSING: + return false; } - return true; + + return false; } -static void -ctables_table_output (struct ctables *ct, struct ctables_table *t) +static struct pivot_value * +ctables_postcompute_label (const struct ctables_categories *cats, + const struct ctables_category *cat, + const struct variable *var) { - struct pivot_table *pt = pivot_table_create__ ( - (t->title - ? pivot_value_new_user_text (t->title, SIZE_MAX) - : pivot_value_new_text (N_("Custom Tables"))), - "Custom Tables"); - if (t->caption) - pivot_table_set_caption ( - pt, pivot_value_new_user_text (t->caption, SIZE_MAX)); - if (t->corner) - pivot_table_set_corner_text ( - pt, pivot_value_new_user_text (t->corner, SIZE_MAX)); - - bool summary_dimension = (t->summary_axis != t->slabels_axis - || (!t->slabels_visible - && t->summary_specs.n > 1)); - if (summary_dimension) - { - struct pivot_dimension *d = pivot_dimension_create ( - pt, t->slabels_axis, N_("Statistics")); - const struct ctables_summary_spec_set *specs = &t->summary_specs; - if (!t->slabels_visible) - d->hide_all_labels = true; - for (size_t i = 0; i < specs->n; i++) - pivot_category_create_leaf ( - d->root, ctables_summary_label (&specs->specs[i], t->cilevel)); - } + struct substring in = ss_cstr (cat->pc->label); + struct substring target = ss_cstr (")LABEL["); - bool categories_dimension = t->clabels_example != NULL; - if (categories_dimension) + struct string out = DS_EMPTY_INITIALIZER; + for (;;) { - struct pivot_dimension *d = pivot_dimension_create ( - pt, t->label_axis[t->clabels_from_axis], - t->clabels_from_axis == PIVOT_AXIS_ROW - ? N_("Row Categories") - : N_("Column Categories")); - const struct variable *var = t->clabels_example; - const struct ctables_categories *c = t->categories[var_get_dict_index (var)]; - for (size_t i = 0; i < t->n_clabels_values; i++) + size_t chunk = ss_find_substring (in, target); + if (chunk == SIZE_MAX) { - const struct ctables_value *value = t->clabels_values[i]; - const struct ctables_category *cat = ctables_categories_match (c, &value->value, var); - assert (cat != NULL); - pivot_category_create_leaf ( - d->root, ctables_category_create_value_label (c, cat, - t->clabels_example, - &value->value)); + if (ds_is_empty (&out)) + return pivot_value_new_user_text (in.string, in.length); + else + { + ds_put_substring (&out, in); + return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out)); + } } - } - pivot_table_set_look (pt, ct->look); - struct pivot_dimension *d[PIVOT_N_AXES]; - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - { - static const char *names[] = { - [PIVOT_AXIS_ROW] = N_("Rows"), - [PIVOT_AXIS_COLUMN] = N_("Columns"), - [PIVOT_AXIS_LAYER] = N_("Layers"), - }; - d[a] = (t->axes[a] || a == t->summary_axis - ? pivot_dimension_create (pt, a, names[a]) - : NULL); - if (!d[a]) - continue; + ds_put_substring (&out, ss_head (in, chunk)); + ss_advance (&in, chunk + target.length); - assert (t->axes[a]); + struct substring idx_s; + if (!ss_get_until (&in, ']', &idx_s)) + goto error; + char *tail; + long int idx = strtol (idx_s.string, &tail, 10); + if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s)) + goto error; - for (size_t i = 0; i < t->stacks[a].n; i++) - { - struct ctables_nest *nest = &t->stacks[a].nests[i]; - struct ctables_section **sections = xnmalloc (t->n_sections, - sizeof *sections); - size_t n_sections = 0; + struct ctables_category *cat2 = &cats->cats[idx - 1]; + if (!ctables_category_format_label (cat2, var, &out)) + goto error; + } - size_t n_total_cells = 0; - size_t max_depth = 0; - for (size_t j = 0; j < t->n_sections; j++) - if (t->sections[j].nests[a] == nest) - { - struct ctables_section *s = &t->sections[j]; - sections[n_sections++] = s; - n_total_cells += hmap_count (&s->cells); +error: + ds_destroy (&out); + return pivot_value_new_user_text (cat->pc->label, SIZE_MAX); +} - size_t depth = s->nests[a]->n; - max_depth = MAX (depth, max_depth); - } +static struct pivot_value * +ctables_category_create_value_label (const struct ctables_categories *cats, + const struct ctables_category *cat, + const struct variable *var, + const union value *value) +{ + return (cat->type == CCT_POSTCOMPUTE && cat->pc->label + ? ctables_postcompute_label (cats, cat, var) + : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL + ? pivot_value_new_user_text (cat->total_label, SIZE_MAX) + : pivot_value_new_var_value (var, value)); +} - struct ctables_cell **sorted = xnmalloc (n_total_cells, - sizeof *sorted); - size_t n_sorted = 0; +static struct ctables_value * +ctables_value_find__ (struct ctables_table *t, const union value *value, + int width, unsigned int hash) +{ + struct ctables_value *clv; + HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node, + hash, &t->clabels_values_map) + if (value_equal (value, &clv->value, width)) + return clv; + return NULL; +} - for (size_t j = 0; j < n_sections; j++) - { - struct ctables_section *s = sections[j]; +static void +ctables_value_insert (struct ctables_table *t, const union value *value, + int width) +{ + unsigned int hash = value_hash (value, width, 0); + struct ctables_value *clv = ctables_value_find__ (t, value, width, hash); + if (!clv) + { + clv = xmalloc (sizeof *clv); + value_clone (&clv->value, value, width); + hmap_insert (&t->clabels_values_map, &clv->node, hash); + } +} - struct ctables_cell *cell; - HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells) - if (!cell->hide) - sorted[n_sorted++] = cell; - assert (n_sorted <= n_total_cells); - } +static struct ctables_value * +ctables_value_find (struct ctables_table *t, + const union value *value, int width) +{ + return ctables_value_find__ (t, value, width, + value_hash (value, width, 0)); +} - struct ctables_cell_sort_aux aux = { .nest = nest, .a = a }; - sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux); +static void +ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a, + size_t ix[PIVOT_N_AXES]) +{ + if (a < PIVOT_N_AXES) + { + size_t limit = MAX (t->stacks[a].n, 1); + for (ix[a] = 0; ix[a] < limit; ix[a]++) + ctables_table_add_section (t, a + 1, ix); + } + else + { + struct ctables_section *s = &t->sections[t->n_sections++]; + *s = (struct ctables_section) { + .table = t, + .cells = HMAP_INITIALIZER (s->cells), + }; + for (a = 0; a < PIVOT_N_AXES; a++) + if (t->stacks[a].n) + { + struct ctables_nest *nest = &t->stacks[a].nests[ix[a]]; + s->nests[a] = nest; + s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]); + for (size_t i = 0; i < nest->n; i++) + hmap_init (&s->occurrences[a][i]); + } + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + hmap_init (&s->areas[at]); + } +} - struct ctables_level - { - enum ctables_level_type - { - CTL_VAR, /* Variable label for nest->vars[var_idx]. */ - CTL_CATEGORY, /* Category for nest->vars[var_idx]. */ - CTL_SUMMARY, /* Summary functions. */ - } - type; +static double +ctpo_add (double a, double b) +{ + return a + b; +} - enum settings_value_show vlabel; /* CTL_VAR only. */ - size_t var_idx; - }; - struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels); - size_t n_levels = 0; - for (size_t k = 0; k < nest->n; k++) - { - enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])]; - if (vlabel == CTVL_NONE && nest->scale_idx == k) - vlabel = CTVL_NAME; - if (vlabel != CTVL_NONE) - { - levels[n_levels++] = (struct ctables_level) { - .type = CTL_VAR, - .vlabel = (enum settings_value_show) vlabel, - .var_idx = k, - }; - } +static double +ctpo_sub (double a, double b) +{ + return a - b; +} - if (nest->scale_idx != k - && (k != nest->n - 1 || t->label_axis[a] == a)) - { - levels[n_levels++] = (struct ctables_level) { - .type = CTL_CATEGORY, - .var_idx = k, - }; - } - } +static double +ctpo_mul (double a, double b) +{ + return a * b; +} - if (!summary_dimension && a == t->slabels_axis) - { - levels[n_levels++] = (struct ctables_level) { - .type = CTL_SUMMARY, - .var_idx = SIZE_MAX, - }; - } +static double +ctpo_div (double a, double b) +{ + return b ? a / b : SYSMIS; +} - /* Pivot categories: +static double +ctpo_pow (double a, double b) +{ + int save_errno = errno; + errno = 0; + double result = pow (a, b); + if (errno) + result = SYSMIS; + errno = save_errno; + return result; +} - - variable label for nest->vars[0], if vlabel != CTVL_NONE - - category for nest->vars[0], if nest->scale_idx != 0 - - variable label for nest->vars[1], if vlabel != CTVL_NONE - - category for nest->vars[1], if nest->scale_idx != 1 - ... - - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE - - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1. - - summary function, if 'a == t->slabels_axis && a == - t->summary_axis'. +static double +ctpo_neg (double a, double b UNUSED) +{ + return -a; +} - Additional dimensions: +struct ctables_pcexpr_evaluate_ctx + { + const struct ctables_cell *cell; + const struct ctables_section *section; + const struct ctables_categories *cats; + enum pivot_axis_type pc_a; + size_t pc_a_idx; + size_t summary_idx; + enum fmt_type parse_format; + }; - - If 'a == t->slabels_axis && a != t->summary_axis', add a summary - dimension. - - If 't->label_axis[b] == a' for some 'b != a', add a category - dimension to 'a'. - */ +static double ctables_pcexpr_evaluate ( + const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *); +static double +ctables_pcexpr_evaluate_nonterminal ( + const struct ctables_pcexpr_evaluate_ctx *ctx, + const struct ctables_pcexpr *e, size_t n_args, + double evaluate (double, double)) +{ + double args[2] = { 0, 0 }; + for (size_t i = 0; i < n_args; i++) + { + args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]); + if (!isfinite (args[i]) || args[i] == SYSMIS) + return SYSMIS; + } + return evaluate (args[0], args[1]); +} - struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups); - int prev_leaf = 0; - for (size_t j = 0; j < n_sorted; j++) - { - struct ctables_cell *cell = sorted[j]; - struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL; +static double +ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx, + const struct ctables_cell_value *pc_cv) +{ + const struct ctables_section *s = ctx->section; - size_t n_common = 0; - if (j > 0) - { - for (; n_common < n_levels; n_common++) - { - const struct ctables_level *level = &levels[n_common]; - if (level->type == CTL_CATEGORY) - { - size_t var_idx = level->var_idx; - const struct ctables_category *c = cell->axes[a].cvs[var_idx].category; - if (prev->axes[a].cvs[var_idx].category != c) - break; - else if (c->type != CCT_SUBTOTAL - && c->type != CCT_TOTAL - && c->type != CCT_POSTCOMPUTE - && !value_equal (&prev->axes[a].cvs[var_idx].value, - &cell->axes[a].cvs[var_idx].value, - var_get_type (nest->vars[var_idx]))) - break; - } - } - } + size_t hash = 0; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + { + const struct ctables_cell_value *cv + = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv + : &ctx->cell->axes[a].cvs[i]); + hash = hash_pointer (cv->category, hash); + if (cv->category->type != CCT_TOTAL + && cv->category->type != CCT_SUBTOTAL + && cv->category->type != CCT_POSTCOMPUTE) + hash = value_hash (&cv->value, + var_get_width (nest->vars[i]), hash); + } + } - for (size_t k = n_common; k < n_levels; k++) - { - const struct ctables_level *level = &levels[k]; - struct pivot_category *parent = k ? groups[k - 1] : d[a]->root; - if (level->type == CTL_SUMMARY) - { - assert (k == n_levels - 1); + struct ctables_cell *tc; + HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells) + { + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + { + const struct ctables_cell_value *p_cv + = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv + : &ctx->cell->axes[a].cvs[i]); + const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i]; + if (p_cv->category != t_cv->category + || (p_cv->category->type != CCT_TOTAL + && p_cv->category->type != CCT_SUBTOTAL + && p_cv->category->type != CCT_POSTCOMPUTE + && !value_equal (&p_cv->value, + &t_cv->value, + var_get_width (nest->vars[i])))) + goto not_equal; + } + } - const struct ctables_summary_spec_set *specs = &t->summary_specs; - for (size_t m = 0; m < specs->n; m++) - { - int leaf = pivot_category_create_leaf ( - parent, ctables_summary_label (&specs->specs[m], - t->cilevel)); - if (!m) - prev_leaf = leaf; - } - } - else - { - const struct variable *var = nest->vars[level->var_idx]; - struct pivot_value *label; - if (level->type == CTL_VAR) - { - label = pivot_value_new_variable (var); - label->variable.show = level->vlabel; - } - else if (level->type == CTL_CATEGORY) - { - const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx]; - label = ctables_category_create_value_label ( - t->categories[var_get_dict_index (var)], - cv->category, var, &cv->value); - } - else - NOT_REACHED (); + goto found; - if (k == n_levels - 1) - prev_leaf = pivot_category_create_leaf (parent, label); - else - groups[k] = pivot_category_create_group__ (parent, label); - } - } + not_equal: ; + } + return 0; - cell->axes[a].leaf = prev_leaf; - } - free (sorted); - free (groups); - free (levels); - free (sections); +found: ; + const struct ctables_table *t = s->table; + const struct ctables_nest *specs_nest = s->nests[t->summary_axis]; + const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv]; + return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx], + &specs->specs[ctx->summary_idx]); +} - } +static double +ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx, + const struct ctables_pcexpr *e) +{ + switch (e->op) + { + case CTPO_CONSTANT: + return e->number; - d[a]->hide_all_labels = all_hidden_vlabels (t, a); - } + case CTPO_CAT_NRANGE: + case CTPO_CAT_SRANGE: + case CTPO_CAT_MISSING: + case CTPO_CAT_OTHERNM: + { + struct ctables_cell_value cv = { + .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e) + }; + assert (cv.category != NULL); - { - size_t n_total_cells = 0; - for (size_t j = 0; j < t->n_sections; j++) - n_total_cells += hmap_count (&t->sections[j].cells); + struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx]; + const struct ctables_occurrence *o; - struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted); - size_t n_sorted = 0; - for (size_t j = 0; j < t->n_sections; j++) + double sum = 0.0; + const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]; + HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences) + if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category) + { + cv.value = o->value; + sum += ctables_pcexpr_evaluate_category (ctx, &cv); + } + return sum; + } + + case CTPO_CAT_NUMBER: + case CTPO_CAT_SUBTOTAL: + case CTPO_CAT_TOTAL: { - const struct ctables_section *s = &t->sections[j]; - struct ctables_cell *cell; - HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells) - if (!cell->hide) - sorted[n_sorted++] = cell; + struct ctables_cell_value cv = { + .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e), + .value = { .f = e->number }, + }; + assert (cv.category != NULL); + return ctables_pcexpr_evaluate_category (ctx, &cv); } - assert (n_sorted <= n_total_cells); - sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way, - NULL); - size_t ids[N_CTATS]; - memset (ids, 0, sizeof ids); - for (size_t j = 0; j < n_sorted; j++) + + case CTPO_CAT_STRING: { - struct ctables_cell *cell = sorted[j]; - for (enum ctables_area_type at = 0; at < N_CTATS; at++) + int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]); + char *s = NULL; + if (width > e->string.length) { - struct ctables_area *area = cell->areas[at]; - if (!area->sequence) - area->sequence = ++ids[at]; + s = xmalloc (width); + buf_copy_rpad (s, width, e->string.string, e->string.length, ' '); } + + const struct ctables_category *category + = ctables_find_category_for_postcompute ( + ctx->section->table->ctables->dict, + ctx->cats, ctx->parse_format, e); + assert (category != NULL); + + struct ctables_cell_value cv = { .category = category }; + if (category->type == CCT_NUMBER) + cv.value.f = category->number; + else if (category->type == CCT_STRING) + cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string); + else + NOT_REACHED (); + + double retval = ctables_pcexpr_evaluate_category (ctx, &cv); + free (s); + return retval; } - free (sorted); - } + case CTPO_ADD: + return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add); - for (size_t i = 0; i < t->n_sections; i++) - { - struct ctables_section *s = &t->sections[i]; + case CTPO_SUB: + return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub); - struct ctables_cell *cell; - HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells) - { - if (cell->hide) - continue; + case CTPO_MUL: + return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul); - const struct ctables_nest *specs_nest = s->nests[t->summary_axis]; - const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv]; - for (size_t j = 0; j < specs->n; j++) - { - size_t dindexes[5]; - size_t n_dindexes = 0; + case CTPO_DIV: + return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div); - if (summary_dimension) - dindexes[n_dindexes++] = specs->specs[j].axis_idx; + case CTPO_POW: + return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow); - if (categories_dimension) - { - const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis]; - const struct variable *var = clabels_nest->vars[clabels_nest->n - 1]; - const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value; - const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var)); - if (!ctv) - continue; - dindexes[n_dindexes++] = ctv->leaf; - } + case CTPO_NEG: + return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg); + } - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - if (d[a]) - { - int leaf = cell->axes[a].leaf; - if (a == t->summary_axis && !summary_dimension) - leaf += j; - dindexes[n_dindexes++] = leaf; - } + NOT_REACHED (); +} - const struct ctables_summary_spec *ss = &specs->specs[j]; +static const struct ctables_category * +ctables_cell_postcompute (const struct ctables_section *s, + const struct ctables_cell *cell, + enum pivot_axis_type *pc_a_p, + size_t *pc_a_idx_p) +{ + assert (cell->postcompute); + const struct ctables_category *pc_cat = NULL; + for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++) + for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++) + { + const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx]; + if (cv->category->type == CCT_POSTCOMPUTE) + { + if (pc_cat) + { + /* Multiple postcomputes cross each other. The value is + undefined. */ + return NULL; + } - struct fmt_spec format = specs->specs[j].format; - bool is_ctables_format = ss->is_ctables_format; - double d = (cell->postcompute - ? ctables_cell_calculate_postcompute ( - s, cell, ss, &format, &is_ctables_format, j) - : ctables_summary_value (cell, &cell->summaries[j], - ss)); + pc_cat = cv->category; + if (pc_a_p) + *pc_a_p = pc_a; + if (pc_a_idx_p) + *pc_a_idx_p = pc_a_idx; + } + } - struct pivot_value *value; - if (ct->hide_threshold != 0 - && d < ct->hide_threshold - && ss->function == CTSF_COUNT) - { - value = pivot_value_new_user_text_nocopy ( - xasprintf ("<%d", ct->hide_threshold)); - } - else if (d == 0 && ct->zero) - value = pivot_value_new_user_text (ct->zero, SIZE_MAX); - else if (d == SYSMIS && ct->missing) - value = pivot_value_new_user_text (ct->missing, SIZE_MAX); - else if (is_ctables_format) - value = pivot_value_new_user_text_nocopy ( - ctables_format (d, &format, &ct->ctables_formats)); - else - { - value = pivot_value_new_number (d); - value->numeric.format = format; - } - /* XXX should text values be right-justified? */ - pivot_table_put (pt, dindexes, n_dindexes, value); + assert (pc_cat != NULL); + return pc_cat; +} + +static double +ctables_cell_calculate_postcompute (const struct ctables_section *s, + const struct ctables_cell *cell, + const struct ctables_summary_spec *ss, + struct fmt_spec *format, + bool *is_ctables_format, + size_t summary_idx) +{ + enum pivot_axis_type pc_a = 0; + size_t pc_a_idx = 0; + const struct ctables_category *pc_cat = ctables_cell_postcompute ( + s, cell, &pc_a, &pc_a_idx); + if (!pc_cat) + return SYSMIS; + + const struct ctables_postcompute *pc = pc_cat->pc; + if (pc->specs) + { + for (size_t i = 0; i < pc->specs->n; i++) + { + const struct ctables_summary_spec *ss2 = &pc->specs->specs[i]; + if (ss->function == ss2->function + && ss->weighting == ss2->weighting + && ss->calc_area == ss2->calc_area + && ss->percentile == ss2->percentile) + { + *format = ss2->format; + *is_ctables_format = ss2->is_ctables_format; + break; } } } - pivot_table_submit (pt); + const struct variable *var = s->nests[pc_a]->vars[pc_a_idx]; + const struct ctables_categories *cats = s->table->categories[ + var_get_dict_index (var)]; + struct ctables_pcexpr_evaluate_ctx ctx = { + .cell = cell, + .section = s, + .cats = cats, + .pc_a = pc_a, + .pc_a_idx = pc_a_idx, + .summary_idx = summary_idx, + .parse_format = pc_cat->parse_format, + }; + return ctables_pcexpr_evaluate (&ctx, pc->expr); } -static bool -ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a) +static char * +ctables_format (double d, const struct fmt_spec *format, + const struct fmt_settings *settings) { - enum pivot_axis_type label_pos = t->label_axis[a]; - if (label_pos == a) - return true; + const union value v = { .f = d }; + char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL); - const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS"; - const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE"; + /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't + produce the results we want for negative numbers, putting the negative + sign in the wrong spot, before the prefix instead of after it. We can't, + in fact, produce the desired results using a custom-currency + specification. Instead, we postprocess the output, moving the negative + sign into place: - const struct ctables_stack *stack = &t->stacks[a]; - if (!stack->n) - return true; + NEQUAL: "-N=3" => "N=-3" + PAREN: "-(3)" => "(-3)" + PCTPAREN: "-(3%)" => "(-3%)" - const struct ctables_nest *n0 = &stack->nests[0]; - if (n0->n == 0) + This transformation doesn't affect NEGPAREN. */ + char *minus_src = strchr (s, '-'); + if (minus_src && (minus_src == s || minus_src[-1] != 'E')) { - assert (stack->n == 1); - return true; + char *n_equals = strstr (s, "N="); + char *lparen = strchr (s, '('); + char *minus_dst = n_equals ? n_equals + 1 : lparen; + if (minus_dst) + move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s); } + return s; +} - const struct variable *v0 = n0->vars[n0->n - 1]; - struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)]; - t->clabels_example = v0; +static bool +all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a) +{ + for (size_t i = 0; i < t->stacks[a].n; i++) + { + struct ctables_nest *nest = &t->stacks[a].nests[i]; + if (nest->n != 1 || nest->scale_idx != 0) + return false; - for (size_t i = 0; i < c0->n_cats; i++) - if (c0->cats[i].type == CCT_FUNCTION) - { - msg (SE, _("%s=%s is not allowed with sorting based " - "on a summary function."), - subcommand_name, pos_name); + enum ctables_vlabel vlabel + = t->ctables->vlabels[var_get_dict_index (nest->vars[0])]; + if (vlabel != CTVL_NONE) return false; - } - if (n0->n - 1 == n0->scale_idx) - { - msg (SE, _("%s=%s requires the variables to be moved to be categorical, " - "but %s is a scale variable."), - subcommand_name, pos_name, var_get_name (v0)); - return false; } + return true; +} - for (size_t i = 1; i < stack->n; i++) +static void +ctables_table_output (struct ctables *ct, struct ctables_table *t) +{ + struct pivot_table *pt = pivot_table_create__ ( + (t->title + ? pivot_value_new_user_text (t->title, SIZE_MAX) + : pivot_value_new_text (N_("Custom Tables"))), + "Custom Tables"); + if (t->caption) + pivot_table_set_caption ( + pt, pivot_value_new_user_text (t->caption, SIZE_MAX)); + if (t->corner) + pivot_table_set_corner_text ( + pt, pivot_value_new_user_text (t->corner, SIZE_MAX)); + + bool summary_dimension = (t->summary_axis != t->slabels_axis + || (!t->slabels_visible + && t->summary_specs.n > 1)); + if (summary_dimension) { - const struct ctables_nest *ni = &stack->nests[i]; - assert (ni->n > 0); - const struct variable *vi = ni->vars[ni->n - 1]; - struct ctables_categories *ci = t->categories[var_get_dict_index (vi)]; + struct pivot_dimension *d = pivot_dimension_create ( + pt, t->slabels_axis, N_("Statistics")); + const struct ctables_summary_spec_set *specs = &t->summary_specs; + if (!t->slabels_visible) + d->hide_all_labels = true; + for (size_t i = 0; i < specs->n; i++) + pivot_category_create_leaf ( + d->root, ctables_summary_label (&specs->specs[i], t->cilevel)); + } - if (ni->n - 1 == ni->scale_idx) - { - msg (SE, _("%s=%s requires the variables to be moved to be " - "categorical, but %s is a scale variable."), - subcommand_name, pos_name, var_get_name (vi)); - return false; - } - if (var_get_width (v0) != var_get_width (vi)) - { - msg (SE, _("%s=%s requires the variables to be " - "moved to have the same width, but %s has " - "width %d and %s has width %d."), - subcommand_name, pos_name, - var_get_name (v0), var_get_width (v0), - var_get_name (vi), var_get_width (vi)); - return false; - } - if (!val_labs_equal (var_get_value_labels (v0), - var_get_value_labels (vi))) - { - msg (SE, _("%s=%s requires the variables to be " - "moved to have the same value labels, but %s " - "and %s have different value labels."), - subcommand_name, pos_name, - var_get_name (v0), var_get_name (vi)); - return false; - } - if (!ctables_categories_equal (c0, ci)) + bool categories_dimension = t->clabels_example != NULL; + if (categories_dimension) + { + struct pivot_dimension *d = pivot_dimension_create ( + pt, t->label_axis[t->clabels_from_axis], + t->clabels_from_axis == PIVOT_AXIS_ROW + ? N_("Row Categories") + : N_("Column Categories")); + const struct variable *var = t->clabels_example; + const struct ctables_categories *c = t->categories[var_get_dict_index (var)]; + for (size_t i = 0; i < t->n_clabels_values; i++) { - msg (SE, _("%s=%s requires the variables to be " - "moved to have the same category " - "specifications, but %s and %s have different " - "category specifications."), - subcommand_name, pos_name, - var_get_name (v0), var_get_name (vi)); - return false; + const struct ctables_value *value = t->clabels_values[i]; + const struct ctables_category *cat = ctables_categories_match (c, &value->value, var); + assert (cat != NULL); + pivot_category_create_leaf ( + d->root, ctables_category_create_value_label (c, cat, + t->clabels_example, + &value->value)); } } - return true; -} + pivot_table_set_look (pt, ct->look); + struct pivot_dimension *d[PIVOT_N_AXES]; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + static const char *names[] = { + [PIVOT_AXIS_ROW] = N_("Rows"), + [PIVOT_AXIS_COLUMN] = N_("Columns"), + [PIVOT_AXIS_LAYER] = N_("Layers"), + }; + d[a] = (t->axes[a] || a == t->summary_axis + ? pivot_dimension_create (pt, a, names[a]) + : NULL); + if (!d[a]) + continue; -static size_t -add_sum_var (struct variable *var, - struct variable ***sum_vars, size_t *n, size_t *allocated) -{ - for (size_t i = 0; i < *n; i++) - if (var == (*sum_vars)[i]) - return i; + assert (t->axes[a]); - if (*n >= *allocated) - *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars); - (*sum_vars)[*n] = var; - return (*n)++; -} + for (size_t i = 0; i < t->stacks[a].n; i++) + { + struct ctables_nest *nest = &t->stacks[a].nests[i]; + struct ctables_section **sections = xnmalloc (t->n_sections, + sizeof *sections); + size_t n_sections = 0; -static enum ctables_area_type -rotate_area (enum ctables_area_type area) -{ - return area; - switch (area) - { - case CTAT_TABLE: - case CTAT_LAYER: - case CTAT_SUBTABLE: - return area; + size_t n_total_cells = 0; + size_t max_depth = 0; + for (size_t j = 0; j < t->n_sections; j++) + if (t->sections[j].nests[a] == nest) + { + struct ctables_section *s = &t->sections[j]; + sections[n_sections++] = s; + n_total_cells += hmap_count (&s->cells); + + size_t depth = s->nests[a]->n; + max_depth = MAX (depth, max_depth); + } + + struct ctables_cell **sorted = xnmalloc (n_total_cells, + sizeof *sorted); + size_t n_sorted = 0; - case CTAT_LAYERROW: - return CTAT_LAYERCOL; + for (size_t j = 0; j < n_sections; j++) + { + struct ctables_section *s = sections[j]; - case CTAT_LAYERCOL: - return CTAT_LAYERROW; + struct ctables_cell *cell; + HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells) + if (!cell->hide) + sorted[n_sorted++] = cell; + assert (n_sorted <= n_total_cells); + } - case CTAT_ROW: - return CTAT_COL; + struct ctables_cell_sort_aux aux = { .nest = nest, .a = a }; + sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux); - case CTAT_COL: - return CTAT_ROW; - } + struct ctables_level + { + enum ctables_level_type + { + CTL_VAR, /* Variable label for nest->vars[var_idx]. */ + CTL_CATEGORY, /* Category for nest->vars[var_idx]. */ + CTL_SUMMARY, /* Summary functions. */ + } + type; - NOT_REACHED (); -} + enum settings_value_show vlabel; /* CTL_VAR only. */ + size_t var_idx; + }; + struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels); + size_t n_levels = 0; + for (size_t k = 0; k < nest->n; k++) + { + enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])]; + if (vlabel == CTVL_NONE && nest->scale_idx == k) + vlabel = CTVL_NAME; + if (vlabel != CTVL_NONE) + { + levels[n_levels++] = (struct ctables_level) { + .type = CTL_VAR, + .vlabel = (enum settings_value_show) vlabel, + .var_idx = k, + }; + } -static void -enumerate_sum_vars (const struct ctables_axis *a, - struct variable ***sum_vars, size_t *n, size_t *allocated) -{ - if (!a) - return; + if (nest->scale_idx != k + && (k != nest->n - 1 || t->label_axis[a] == a)) + { + levels[n_levels++] = (struct ctables_level) { + .type = CTL_CATEGORY, + .var_idx = k, + }; + } + } - switch (a->op) - { - case CTAO_VAR: - for (size_t i = 0; i < N_CSVS; i++) - for (size_t j = 0; j < a->specs[i].n; j++) - { - struct ctables_summary_spec *spec = &a->specs[i].specs[j]; - if (spec->function == CTSF_areaPCT_SUM) - spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated); - } - break; + if (!summary_dimension && a == t->slabels_axis) + { + levels[n_levels++] = (struct ctables_level) { + .type = CTL_SUMMARY, + .var_idx = SIZE_MAX, + }; + } - case CTAO_STACK: - case CTAO_NEST: - for (size_t i = 0; i < 2; i++) - enumerate_sum_vars (a->subs[i], sum_vars, n, allocated); - break; - } -} + /* Pivot categories: -static bool -ctables_prepare_table (struct ctables_table *t) -{ - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - if (t->axes[a]) - { - t->stacks[a] = enumerate_fts (a, t->axes[a]); + - variable label for nest->vars[0], if vlabel != CTVL_NONE + - category for nest->vars[0], if nest->scale_idx != 0 + - variable label for nest->vars[1], if vlabel != CTVL_NONE + - category for nest->vars[1], if nest->scale_idx != 1 + ... + - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE + - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1. + - summary function, if 'a == t->slabels_axis && a == + t->summary_axis'. - for (size_t j = 0; j < t->stacks[a].n; j++) - { - struct ctables_nest *nest = &t->stacks[a].nests[j]; - for (enum ctables_area_type at = 0; at < N_CTATS; at++) - { - nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]); - nest->n_areas[at] = 0; + Additional dimensions: - enum pivot_axis_type ata, atb; - if (at == CTAT_ROW || at == CTAT_LAYERROW) - { - ata = PIVOT_AXIS_ROW; - atb = PIVOT_AXIS_COLUMN; - } - else if (at == CTAT_COL || at == CTAT_LAYERCOL) - { - ata = PIVOT_AXIS_COLUMN; - atb = PIVOT_AXIS_ROW; - } + - If 'a == t->slabels_axis && a != t->summary_axis', add a summary + dimension. + - If 't->label_axis[b] == a' for some 'b != a', add a category + dimension to 'a'. + */ - if (at == CTAT_LAYER - ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER - : at == CTAT_LAYERCOL || at == CTAT_LAYERROW - ? a == atb && t->label_axis[a] != a - : false) - { - for (size_t k = nest->n - 1; k < nest->n; k--) - if (k != nest->scale_idx) + + struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups); + int prev_leaf = 0; + for (size_t j = 0; j < n_sorted; j++) + { + struct ctables_cell *cell = sorted[j]; + struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL; + + size_t n_common = 0; + if (j > 0) + { + for (; n_common < n_levels; n_common++) + { + const struct ctables_level *level = &levels[n_common]; + if (level->type == CTL_CATEGORY) { - nest->areas[at][nest->n_areas[at]++] = k; - break; + size_t var_idx = level->var_idx; + const struct ctables_category *c = cell->axes[a].cvs[var_idx].category; + if (prev->axes[a].cvs[var_idx].category != c) + break; + else if (c->type != CCT_SUBTOTAL + && c->type != CCT_TOTAL + && c->type != CCT_POSTCOMPUTE + && !value_equal (&prev->axes[a].cvs[var_idx].value, + &cell->axes[a].cvs[var_idx].value, + var_get_type (nest->vars[var_idx]))) + break; } - continue; - } + } + } - if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER - : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb - : at == CTAT_TABLE ? true - : false) - continue; + for (size_t k = n_common; k < n_levels; k++) + { + const struct ctables_level *level = &levels[k]; + struct pivot_category *parent = k ? groups[k - 1] : d[a]->root; + if (level->type == CTL_SUMMARY) + { + assert (k == n_levels - 1); - for (size_t k = 0; k < nest->n; k++) - if (k != nest->scale_idx) - nest->areas[at][nest->n_areas[at]++] = k; + const struct ctables_summary_spec_set *specs = &t->summary_specs; + for (size_t m = 0; m < specs->n; m++) + { + int leaf = pivot_category_create_leaf ( + parent, ctables_summary_label (&specs->specs[m], + t->cilevel)); + if (!m) + prev_leaf = leaf; + } + } + else + { + const struct variable *var = nest->vars[level->var_idx]; + struct pivot_value *label; + if (level->type == CTL_VAR) + { + label = pivot_value_new_variable (var); + label->variable.show = level->vlabel; + } + else if (level->type == CTL_CATEGORY) + { + const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx]; + label = ctables_category_create_value_label ( + t->categories[var_get_dict_index (var)], + cv->category, var, &cv->value); + } + else + NOT_REACHED (); - int n_drop; - switch (at) - { - case CTAT_SUBTABLE: -#define L PIVOT_AXIS_LAYER - n_drop = (t->clabels_from_axis == L ? a != L - : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L) - : t->clabels_from_axis == a ? 2 - : 0); -#undef L - break; + if (k == n_levels - 1) + prev_leaf = pivot_category_create_leaf (parent, label); + else + groups[k] = pivot_category_create_group__ (parent, label); + } + } - case CTAT_LAYERROW: - case CTAT_LAYERCOL: - n_drop = a == ata && t->label_axis[ata] == atb; - break; + cell->axes[a].leaf = prev_leaf; + } + free (sorted); + free (groups); + free (levels); + free (sections); - case CTAT_ROW: - case CTAT_COL: - n_drop = (a == ata ? t->label_axis[ata] == atb - : a != atb ? 0 - : t->clabels_from_axis == atb ? -1 - : t->clabels_to_axis != atb ? 1 - : 0); - break; + } - case CTAT_LAYER: - case CTAT_TABLE: - n_drop = 0; - break; - } + d[a]->hide_all_labels = all_hidden_vlabels (t, a); + } - if (n_drop < 0) - { - size_t n = nest->n_areas[at]; - if (n > 1) - { - nest->areas[at][n - 2] = nest->areas[at][n - 1]; - nest->n_areas[at]--; - } - } - else - { - for (int i = 0; i < n_drop; i++) - if (nest->n_areas[at] > 0) - nest->n_areas[at]--; - } - } - } + { + size_t n_total_cells = 0; + for (size_t j = 0; j < t->n_sections; j++) + n_total_cells += hmap_count (&t->sections[j].cells); + + struct ctables_cell **sorted = xnmalloc (n_total_cells, sizeof *sorted); + size_t n_sorted = 0; + for (size_t j = 0; j < t->n_sections; j++) + { + const struct ctables_section *s = &t->sections[j]; + struct ctables_cell *cell; + HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells) + if (!cell->hide) + sorted[n_sorted++] = cell; } - else + assert (n_sorted <= n_total_cells); + sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way, + NULL); + size_t ids[N_CTATS]; + memset (ids, 0, sizeof ids); + for (size_t j = 0; j < n_sorted; j++) { - struct ctables_nest *nest = xmalloc (sizeof *nest); - *nest = (struct ctables_nest) { - .n = 0, - .scale_idx = SIZE_MAX, - .summary_idx = SIZE_MAX - }; - t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 }; - - /* There's no point in moving labels away from an axis that has no - labels, so avoid dealing with the special cases around that. */ - t->label_axis[a] = a; + struct ctables_cell *cell = sorted[j]; + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + { + struct ctables_area *area = cell->areas[at]; + if (!area->sequence) + area->sequence = ++ids[at]; + } } - struct ctables_stack *stack = &t->stacks[t->summary_axis]; - for (size_t i = 0; i < stack->n; i++) + free (sorted); + } + + for (size_t i = 0; i < t->n_sections; i++) { - struct ctables_nest *nest = &stack->nests[i]; - if (!nest->specs[CSV_CELL].n) - { - struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL]; - ss->specs = xmalloc (sizeof *ss->specs); - ss->n = 1; + struct ctables_section *s = &t->sections[i]; - enum ctables_summary_function function - = ss->is_scale ? CTSF_MEAN : CTSF_COUNT; + struct ctables_cell *cell; + HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells) + { + if (cell->hide) + continue; - if (!ss->var) + const struct ctables_nest *specs_nest = s->nests[t->summary_axis]; + const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv]; + for (size_t j = 0; j < specs->n; j++) { - nest->summary_idx = nest->n - 1; - ss->var = nest->vars[nest->summary_idx]; - } - *ss->specs = (struct ctables_summary_spec) { - .function = function, - .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY, - .format = ctables_summary_default_format (function, ss->var), - }; + size_t dindexes[5]; + size_t n_dindexes = 0; - ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL], - &nest->specs[CSV_CELL]); - } - else if (!nest->specs[CSV_TOTAL].n) - ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL], - &nest->specs[CSV_CELL]); + if (summary_dimension) + dindexes[n_dindexes++] = specs->specs[j].axis_idx; - if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN - || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW) - { - for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) - for (size_t i = 0; i < nest->specs[sv].n; i++) - { - struct ctables_summary_spec *ss = &nest->specs[sv].specs[i]; - const struct ctables_function_info *cfi = - &ctables_function_info[ss->function]; - if (cfi->is_area) - ss->calc_area = rotate_area (ss->calc_area); - } - } + if (categories_dimension) + { + const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis]; + const struct variable *var = clabels_nest->vars[clabels_nest->n - 1]; + const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value; + const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var)); + if (!ctv) + continue; + dindexes[n_dindexes++] = ctv->leaf; + } - if (t->ctables->smissing_listwise) - { - struct variable **listwise_vars = NULL; - size_t n = 0; - size_t allocated = 0; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + if (d[a]) + { + int leaf = cell->axes[a].leaf; + if (a == t->summary_axis && !summary_dimension) + leaf += j; + dindexes[n_dindexes++] = leaf; + } - for (size_t j = nest->group_head; j < stack->n; j++) - { - const struct ctables_nest *other_nest = &stack->nests[j]; - if (other_nest->group_head != nest->group_head) - break; + const struct ctables_summary_spec *ss = &specs->specs[j]; - if (nest != other_nest && other_nest->scale_idx < other_nest->n) + struct fmt_spec format = specs->specs[j].format; + bool is_ctables_format = ss->is_ctables_format; + double d = (cell->postcompute + ? ctables_cell_calculate_postcompute ( + s, cell, ss, &format, &is_ctables_format, j) + : ctables_summary_value (cell, &cell->summaries[j], + ss)); + + struct pivot_value *value; + if (ct->hide_threshold != 0 + && d < ct->hide_threshold + && ss->function == CTSF_COUNT) { - if (n >= allocated) - listwise_vars = x2nrealloc (listwise_vars, &allocated, - sizeof *listwise_vars); - listwise_vars[n++] = other_nest->vars[other_nest->scale_idx]; + value = pivot_value_new_user_text_nocopy ( + xasprintf ("<%d", ct->hide_threshold)); } - } - for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) - { - if (sv > 0) - listwise_vars = xmemdup (listwise_vars, - n * sizeof *listwise_vars); - nest->specs[sv].listwise_vars = listwise_vars; - nest->specs[sv].n_listwise_vars = n; + else if (d == 0 && ct->zero) + value = pivot_value_new_user_text (ct->zero, SIZE_MAX); + else if (d == SYSMIS && ct->missing) + value = pivot_value_new_user_text (ct->missing, SIZE_MAX); + else if (is_ctables_format) + value = pivot_value_new_user_text_nocopy ( + ctables_format (d, &format, &ct->ctables_formats)); + else + { + value = pivot_value_new_number (d); + value->numeric.format = format; + } + /* XXX should text values be right-justified? */ + pivot_table_put (pt, dindexes, n_dindexes, value); } } } - struct ctables_summary_spec_set *merged = &t->summary_specs; - struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items); - size_t n_left = 0; - for (size_t j = 0; j < stack->n; j++) + pivot_table_submit (pt); +} + +static bool +ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a) +{ + enum pivot_axis_type label_pos = t->label_axis[a]; + if (label_pos == a) + return true; + + const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS"; + const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE"; + + const struct ctables_stack *stack = &t->stacks[a]; + if (!stack->n) + return true; + + const struct ctables_nest *n0 = &stack->nests[0]; + if (n0->n == 0) { - const struct ctables_nest *nest = &stack->nests[j]; - if (nest->n) - for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) - items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] }; + assert (stack->n == 1); + return true; } - while (n_left > 0) + const struct variable *v0 = n0->vars[n0->n - 1]; + struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)]; + t->clabels_example = v0; + + for (size_t i = 0; i < c0->n_cats; i++) + if (c0->cats[i].type == CCT_FUNCTION) + { + msg (SE, _("%s=%s is not allowed with sorting based " + "on a summary function."), + subcommand_name, pos_name); + return false; + } + if (n0->n - 1 == n0->scale_idx) { - struct merge_item min = items[0]; - for (size_t j = 1; j < n_left; j++) - if (merge_item_compare_3way (&items[j], &min) < 0) - min = items[j]; + msg (SE, _("%s=%s requires the variables to be moved to be categorical, " + "but %s is a scale variable."), + subcommand_name, pos_name, var_get_name (v0)); + return false; + } - if (merged->n >= merged->allocated) - merged->specs = x2nrealloc (merged->specs, &merged->allocated, - sizeof *merged->specs); - merged->specs[merged->n++] = min.set->specs[min.ofs]; + for (size_t i = 1; i < stack->n; i++) + { + const struct ctables_nest *ni = &stack->nests[i]; + assert (ni->n > 0); + const struct variable *vi = ni->vars[ni->n - 1]; + struct ctables_categories *ci = t->categories[var_get_dict_index (vi)]; - for (size_t j = 0; j < n_left; ) + if (ni->n - 1 == ni->scale_idx) + { + msg (SE, _("%s=%s requires the variables to be moved to be " + "categorical, but %s is a scale variable."), + subcommand_name, pos_name, var_get_name (vi)); + return false; + } + if (var_get_width (v0) != var_get_width (vi)) + { + msg (SE, _("%s=%s requires the variables to be " + "moved to have the same width, but %s has " + "width %d and %s has width %d."), + subcommand_name, pos_name, + var_get_name (v0), var_get_width (v0), + var_get_name (vi), var_get_width (vi)); + return false; + } + if (!val_labs_equal (var_get_value_labels (v0), + var_get_value_labels (vi))) { - if (merge_item_compare_3way (&items[j], &min) == 0) - { - struct merge_item *item = &items[j]; - item->set->specs[item->ofs].axis_idx = merged->n - 1; - if (++item->ofs >= item->set->n) - { - items[j] = items[--n_left]; - continue; - } - } - j++; + msg (SE, _("%s=%s requires the variables to be " + "moved to have the same value labels, but %s " + "and %s have different value labels."), + subcommand_name, pos_name, + var_get_name (v0), var_get_name (vi)); + return false; + } + if (!ctables_categories_equal (c0, ci)) + { + msg (SE, _("%s=%s requires the variables to be " + "moved to have the same category " + "specifications, but %s and %s have different " + "category specifications."), + subcommand_name, pos_name, + var_get_name (v0), var_get_name (vi)); + return false; } } - free (items); - size_t allocated_sum_vars = 0; - enumerate_sum_vars (t->axes[t->summary_axis], - &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars); + return true; +} - return (ctables_check_label_position (t, PIVOT_AXIS_ROW) - && ctables_check_label_position (t, PIVOT_AXIS_COLUMN)); +static size_t +add_sum_var (struct variable *var, + struct variable ***sum_vars, size_t *n, size_t *allocated) +{ + for (size_t i = 0; i < *n; i++) + if (var == (*sum_vars)[i]) + return i; + + if (*n >= *allocated) + *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars); + (*sum_vars)[*n] = var; + return (*n)++; } -static void -ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c, - enum pivot_axis_type a) +static enum ctables_area_type +rotate_area (enum ctables_area_type area) { - struct ctables_stack *stack = &t->stacks[a]; - for (size_t i = 0; i < stack->n; i++) + return area; + switch (area) { - const struct ctables_nest *nest = &stack->nests[i]; - const struct variable *var = nest->vars[nest->n - 1]; - const union value *value = case_data (c, var); + case CTAT_TABLE: + case CTAT_LAYER: + case CTAT_SUBTABLE: + return area; - if (var_is_numeric (var) && value->f == SYSMIS) - continue; + case CTAT_LAYERROW: + return CTAT_LAYERCOL; - if (ctables_categories_match (t->categories [var_get_dict_index (var)], - value, var)) - ctables_value_insert (t, value, var_get_width (var)); + case CTAT_LAYERCOL: + return CTAT_LAYERROW; + + case CTAT_ROW: + return CTAT_COL; + + case CTAT_COL: + return CTAT_ROW; } -} -static int -compare_clabels_values_3way (const void *a_, const void *b_, const void *width_) -{ - const struct ctables_value *const *ap = a_; - const struct ctables_value *const *bp = b_; - const struct ctables_value *a = *ap; - const struct ctables_value *b = *bp; - const int *width = width_; - return value_compare_3way (&a->value, &b->value, *width); + NOT_REACHED (); } static void -ctables_sort_clabels_values (struct ctables_table *t) +enumerate_sum_vars (const struct ctables_axis *a, + struct variable ***sum_vars, size_t *n, size_t *allocated) { - const struct variable *v0 = t->clabels_example; - int width = var_get_width (v0); + if (!a) + return; - struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)]; - if (c0->show_empty) + switch (a->op) { - const struct val_labs *val_labs = var_get_value_labels (v0); - for (const struct val_lab *vl = val_labs_first (val_labs); vl; - vl = val_labs_next (val_labs, vl)) - if (ctables_categories_match (c0, &vl->value, v0)) - ctables_value_insert (t, &vl->value, width); + case CTAO_VAR: + for (size_t i = 0; i < N_CSVS; i++) + for (size_t j = 0; j < a->specs[i].n; j++) + { + struct ctables_summary_spec *spec = &a->specs[i].specs[j]; + if (spec->function == CTSF_areaPCT_SUM) + spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated); + } + break; + + case CTAO_STACK: + case CTAO_NEST: + for (size_t i = 0; i < 2; i++) + enumerate_sum_vars (a->subs[i], sum_vars, n, allocated); + break; } +} - size_t n = hmap_count (&t->clabels_values_map); - t->clabels_values = xnmalloc (n, sizeof *t->clabels_values); +static bool +ctables_prepare_table (struct ctables_table *t) +{ + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + if (t->axes[a]) + { + t->stacks[a] = enumerate_fts (a, t->axes[a]); - struct ctables_value *clv; - size_t i = 0; - HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map) - t->clabels_values[i++] = clv; - t->n_clabels_values = n; - assert (i == n); + for (size_t j = 0; j < t->stacks[a].n; j++) + { + struct ctables_nest *nest = &t->stacks[a].nests[j]; + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + { + nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]); + nest->n_areas[at] = 0; - sort (t->clabels_values, n, sizeof *t->clabels_values, - compare_clabels_values_3way, &width); + enum pivot_axis_type ata, atb; + if (at == CTAT_ROW || at == CTAT_LAYERROW) + { + ata = PIVOT_AXIS_ROW; + atb = PIVOT_AXIS_COLUMN; + } + else if (at == CTAT_COL || at == CTAT_LAYERCOL) + { + ata = PIVOT_AXIS_COLUMN; + atb = PIVOT_AXIS_ROW; + } - for (size_t i = 0; i < n; i++) - t->clabels_values[i]->leaf = i; -} + if (at == CTAT_LAYER + ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER + : at == CTAT_LAYERCOL || at == CTAT_LAYERROW + ? a == atb && t->label_axis[a] != a + : false) + { + for (size_t k = nest->n - 1; k < nest->n; k--) + if (k != nest->scale_idx) + { + nest->areas[at][nest->n_areas[at]++] = k; + break; + } + continue; + } -static void -ctables_add_category_occurrences (const struct variable *var, - struct hmap *occurrences, - const struct ctables_categories *cats) -{ - const struct val_labs *val_labs = var_get_value_labels (var); + if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER + : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb + : at == CTAT_TABLE ? true + : false) + continue; - for (size_t i = 0; i < cats->n_cats; i++) - { - const struct ctables_category *c = &cats->cats[i]; - switch (c->type) - { - case CCT_NUMBER: - ctables_add_occurrence (var, &(const union value) { .f = c->number }, - occurrences); - break; + for (size_t k = 0; k < nest->n; k++) + if (k != nest->scale_idx) + nest->areas[at][nest->n_areas[at]++] = k; - case CCT_STRING: - { - int width = var_get_width (var); - union value value; - value_init (&value, width); - value_copy_buf_rpad (&value, width, - CHAR_CAST (uint8_t *, c->string.string), - c->string.length, ' '); - ctables_add_occurrence (var, &value, occurrences); - value_destroy (&value, width); - } - break; + int n_drop; + switch (at) + { + case CTAT_SUBTABLE: +#define L PIVOT_AXIS_LAYER + n_drop = (t->clabels_from_axis == L ? a != L + : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L) + : t->clabels_from_axis == a ? 2 + : 0); +#undef L + break; - case CCT_NRANGE: - assert (var_is_numeric (var)); - for (const struct val_lab *vl = val_labs_first (val_labs); vl; - vl = val_labs_next (val_labs, vl)) - if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1]) - ctables_add_occurrence (var, &vl->value, occurrences); - break; + case CTAT_LAYERROW: + case CTAT_LAYERCOL: + n_drop = a == ata && t->label_axis[ata] == atb; + break; - case CCT_SRANGE: - assert (var_is_alpha (var)); - for (const struct val_lab *vl = val_labs_first (val_labs); vl; - vl = val_labs_next (val_labs, vl)) - if (in_string_range (&vl->value, var, c->srange)) - ctables_add_occurrence (var, &vl->value, occurrences); - break; + case CTAT_ROW: + case CTAT_COL: + n_drop = (a == ata ? t->label_axis[ata] == atb + : a != atb ? 0 + : t->clabels_from_axis == atb ? -1 + : t->clabels_to_axis != atb ? 1 + : 0); + break; + + case CTAT_LAYER: + case CTAT_TABLE: + n_drop = 0; + break; + } - case CCT_MISSING: - for (const struct val_lab *vl = val_labs_first (val_labs); vl; - vl = val_labs_next (val_labs, vl)) - if (var_is_value_missing (var, &vl->value)) - ctables_add_occurrence (var, &vl->value, occurrences); - break; + if (n_drop < 0) + { + size_t n = nest->n_areas[at]; + if (n > 1) + { + nest->areas[at][n - 2] = nest->areas[at][n - 1]; + nest->n_areas[at]--; + } + } + else + { + for (int i = 0; i < n_drop; i++) + if (nest->n_areas[at] > 0) + nest->n_areas[at]--; + } + } + } + } + else + { + struct ctables_nest *nest = xmalloc (sizeof *nest); + *nest = (struct ctables_nest) { + .n = 0, + .scale_idx = SIZE_MAX, + .summary_idx = SIZE_MAX + }; + t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 }; - case CCT_OTHERNM: - for (const struct val_lab *vl = val_labs_first (val_labs); vl; - vl = val_labs_next (val_labs, vl)) - ctables_add_occurrence (var, &vl->value, occurrences); - break; + /* There's no point in moving labels away from an axis that has no + labels, so avoid dealing with the special cases around that. */ + t->label_axis[a] = a; + } - case CCT_POSTCOMPUTE: - break; + struct ctables_stack *stack = &t->stacks[t->summary_axis]; + for (size_t i = 0; i < stack->n; i++) + { + struct ctables_nest *nest = &stack->nests[i]; + if (!nest->specs[CSV_CELL].n) + { + struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL]; + ss->specs = xmalloc (sizeof *ss->specs); + ss->n = 1; - case CCT_SUBTOTAL: - case CCT_TOTAL: - break; + enum ctables_summary_function function + = ss->is_scale ? CTSF_MEAN : CTSF_COUNT; - case CCT_VALUE: - case CCT_LABEL: - case CCT_FUNCTION: - for (const struct val_lab *vl = val_labs_first (val_labs); vl; - vl = val_labs_next (val_labs, vl)) - if (c->include_missing || !var_is_value_missing (var, &vl->value)) - ctables_add_occurrence (var, &vl->value, occurrences); - break; + if (!ss->var) + { + nest->summary_idx = nest->n - 1; + ss->var = nest->vars[nest->summary_idx]; + } + *ss->specs = (struct ctables_summary_spec) { + .function = function, + .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY, + .format = ctables_summary_default_format (function, ss->var), + }; - case CCT_EXCLUDED_MISSING: - break; + ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL], + &nest->specs[CSV_CELL]); } - } -} + else if (!nest->specs[CSV_TOTAL].n) + ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL], + &nest->specs[CSV_CELL]); -static void -ctables_section_recurse_add_empty_categories ( - struct ctables_section *s, - const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c, - enum pivot_axis_type a, size_t a_idx) -{ - if (a >= PIVOT_N_AXES) - ctables_cell_insert__ (s, c, cats); - else if (!s->nests[a] || a_idx >= s->nests[a]->n) - ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0); - else - { - const struct variable *var = s->nests[a]->vars[a_idx]; - const struct ctables_categories *categories = s->table->categories[ - var_get_dict_index (var)]; - int width = var_get_width (var); - const struct hmap *occurrences = &s->occurrences[a][a_idx]; - const struct ctables_occurrence *o; - HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences) + if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN + || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW) { - union value *value = case_data_rw (c, var); - value_destroy (value, width); - value_clone (value, &o->value, width); - cats[a][a_idx] = ctables_categories_match (categories, value, var); - assert (cats[a][a_idx] != NULL); - ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1); + for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) + for (size_t i = 0; i < nest->specs[sv].n; i++) + { + struct ctables_summary_spec *ss = &nest->specs[sv].specs[i]; + const struct ctables_function_info *cfi = + &ctables_function_info[ss->function]; + if (cfi->is_area) + ss->calc_area = rotate_area (ss->calc_area); + } } - for (size_t i = 0; i < categories->n_cats; i++) + if (t->ctables->smissing_listwise) { - const struct ctables_category *cat = &categories->cats[i]; - if (cat->type == CCT_POSTCOMPUTE) + struct variable **listwise_vars = NULL; + size_t n = 0; + size_t allocated = 0; + + for (size_t j = nest->group_head; j < stack->n; j++) { - cats[a][a_idx] = cat; - ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1); + const struct ctables_nest *other_nest = &stack->nests[j]; + if (other_nest->group_head != nest->group_head) + break; + + if (nest != other_nest && other_nest->scale_idx < other_nest->n) + { + if (n >= allocated) + listwise_vars = x2nrealloc (listwise_vars, &allocated, + sizeof *listwise_vars); + listwise_vars[n++] = other_nest->vars[other_nest->scale_idx]; + } + } + for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) + { + if (sv > 0) + listwise_vars = xmemdup (listwise_vars, + n * sizeof *listwise_vars); + nest->specs[sv].listwise_vars = listwise_vars; + nest->specs[sv].n_listwise_vars = n; } } } -} - -static void -ctables_section_add_empty_categories (struct ctables_section *s) -{ - bool show_empty = false; - for (size_t a = 0; a < PIVOT_N_AXES; a++) - if (s->nests[a]) - for (size_t k = 0; k < s->nests[a]->n; k++) - if (k != s->nests[a]->scale_idx) - { - const struct variable *var = s->nests[a]->vars[k]; - const struct ctables_categories *cats = s->table->categories[ - var_get_dict_index (var)]; - if (cats->show_empty) - { - show_empty = true; - ctables_add_category_occurrences (var, &s->occurrences[a][k], cats); - } - } - if (!show_empty) - return; - - const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n]; - const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n]; - const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n]; - const struct ctables_category **cats[PIVOT_N_AXES] = - { - [PIVOT_AXIS_LAYER] = layer_cats, - [PIVOT_AXIS_ROW] = row_cats, - [PIVOT_AXIS_COLUMN] = column_cats, - }; - struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict)); - ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0); - case_unref (c); -} -static void -ctables_section_clear (struct ctables_section *s) -{ - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + struct ctables_summary_spec_set *merged = &t->summary_specs; + struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items); + size_t n_left = 0; + for (size_t j = 0; j < stack->n; j++) { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx) - { - const struct variable *var = nest->vars[i]; - int width = var_get_width (var); - struct ctables_occurrence *o, *next; - struct hmap *map = &s->occurrences[a][i]; - HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map) - { - value_destroy (&o->value, width); - hmap_delete (map, &o->node); - free (o); - } - hmap_shrink (map); - } + const struct ctables_nest *nest = &stack->nests[j]; + if (nest->n) + for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) + items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] }; } - struct ctables_cell *cell, *next_cell; - HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells) + while (n_left > 0) { - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - { - const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n; i++) - if (i != nest->scale_idx) - value_destroy (&cell->axes[a].cvs[i].value, - var_get_width (nest->vars[i])); - free (cell->axes[a].cvs); - } - - const struct ctables_nest *ss = s->nests[s->table->summary_axis]; - const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; - for (size_t i = 0; i < specs->n; i++) - ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]); - free (cell->summaries); + struct merge_item min = items[0]; + for (size_t j = 1; j < n_left; j++) + if (merge_item_compare_3way (&items[j], &min) < 0) + min = items[j]; - hmap_delete (&s->cells, &cell->node); - free (cell); - } - hmap_shrink (&s->cells); + if (merged->n >= merged->allocated) + merged->specs = x2nrealloc (merged->specs, &merged->allocated, + sizeof *merged->specs); + merged->specs[merged->n++] = min.set->specs[min.ofs]; - for (enum ctables_area_type at = 0; at < N_CTATS; at++) - { - struct ctables_area *area, *next_area; - HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node, - &s->areas[at]) - { - free (area->sums); - hmap_delete (&s->areas[at], &area->node); - free (area); + for (size_t j = 0; j < n_left; ) + { + if (merge_item_compare_3way (&items[j], &min) == 0) + { + struct merge_item *item = &items[j]; + item->set->specs[item->ofs].axis_idx = merged->n - 1; + if (++item->ofs >= item->set->n) + { + items[j] = items[--n_left]; + continue; + } + } + j++; } - hmap_shrink (&s->areas[at]); } -} - -static void -ctables_section_uninit (struct ctables_section *s) -{ - ctables_section_clear (s); + free (items); - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - { - struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n; i++) - hmap_destroy (&s->occurrences[a][i]); - free (s->occurrences[a]); - } + size_t allocated_sum_vars = 0; + enumerate_sum_vars (t->axes[t->summary_axis], + &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars); - hmap_destroy (&s->cells); - for (enum ctables_area_type at = 0; at < N_CTATS; at++) - hmap_destroy (&s->areas[at]); + return (ctables_check_label_position (t, PIVOT_AXIS_ROW) + && ctables_check_label_position (t, PIVOT_AXIS_COLUMN)); } static void -ctables_table_clear (struct ctables_table *t) +ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c, + enum pivot_axis_type a) { - for (size_t i = 0; i < t->n_sections; i++) - ctables_section_clear (&t->sections[i]); - - if (t->clabels_example) + struct ctables_stack *stack = &t->stacks[a]; + for (size_t i = 0; i < stack->n; i++) { - int width = var_get_width (t->clabels_example); - struct ctables_value *value, *next_value; - HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node, - &t->clabels_values_map) - { - value_destroy (&value->value, width); - hmap_delete (&t->clabels_values_map, &value->node); - free (value); - } - hmap_shrink (&t->clabels_values_map); + const struct ctables_nest *nest = &stack->nests[i]; + const struct variable *var = nest->vars[nest->n - 1]; + const union value *value = case_data (c, var); - free (t->clabels_values); - t->clabels_values = NULL; - t->n_clabels_values = 0; + if (var_is_numeric (var) && value->f == SYSMIS) + continue; + + if (ctables_categories_match (t->categories [var_get_dict_index (var)], + value, var)) + ctables_value_insert (t, value, var_get_width (var)); } } -static bool -ctables_execute (struct dataset *ds, struct casereader *input, - struct ctables *ct) +static int +compare_clabels_values_3way (const void *a_, const void *b_, const void *width_) { - for (size_t i = 0; i < ct->n_tables; i++) - { - struct ctables_table *t = ct->tables[i]; - t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) * - MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) * - MAX (1, t->stacks[PIVOT_AXIS_LAYER].n), - sizeof *t->sections); - size_t ix[PIVOT_N_AXES]; - ctables_table_add_section (t, 0, ix); - } + const struct ctables_value *const *ap = a_; + const struct ctables_value *const *bp = b_; + const struct ctables_value *a = *ap; + const struct ctables_value *b = *bp; + const int *width = width_; + return value_compare_3way (&a->value, &b->value, *width); +} - struct dictionary *dict = dataset_dict (ds); +static void +ctables_sort_clabels_values (struct ctables_table *t) +{ + const struct variable *v0 = t->clabels_example; + int width = var_get_width (v0); - bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE; - struct casegrouper *grouper - = (splitting - ? casegrouper_create_splits (input, dict) - : casegrouper_create_vars (input, NULL, 0)); - struct casereader *group; - while (casegrouper_get_next_group (grouper, &group)) + struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)]; + if (c0->show_empty) { - if (splitting) - { - struct ccase *c = casereader_peek (group, 0); - if (c != NULL) - { - output_split_file_values (ds, c); - case_unref (c); - } - } - - bool warn_on_invalid = true; - for (struct ccase *c = casereader_read (group); c; - case_unref (c), c = casereader_read (group)) - { - double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid); - double e_weight = (ct->e_weight - ? var_force_valid_weight (ct->e_weight, - case_num (c, ct->e_weight), - &warn_on_invalid) - : d_weight); - double weight[] = { - [CTW_DICTIONARY] = d_weight, - [CTW_EFFECTIVE] = e_weight, - [CTW_UNWEIGHTED] = 1.0, - }; - - for (size_t i = 0; i < ct->n_tables; i++) - { - struct ctables_table *t = ct->tables[i]; - - for (size_t j = 0; j < t->n_sections; j++) - ctables_cell_insert (&t->sections[j], c, weight); - - for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) - if (t->label_axis[a] != a) - ctables_insert_clabels_values (t, c, a); - } - } - casereader_destroy (group); + const struct val_labs *val_labs = var_get_value_labels (v0); + for (const struct val_lab *vl = val_labs_first (val_labs); vl; + vl = val_labs_next (val_labs, vl)) + if (ctables_categories_match (c0, &vl->value, v0)) + ctables_value_insert (t, &vl->value, width); + } - for (size_t i = 0; i < ct->n_tables; i++) - { - struct ctables_table *t = ct->tables[i]; + size_t n = hmap_count (&t->clabels_values_map); + t->clabels_values = xnmalloc (n, sizeof *t->clabels_values); - if (t->clabels_example) - ctables_sort_clabels_values (t); + struct ctables_value *clv; + size_t i = 0; + HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map) + t->clabels_values[i++] = clv; + t->n_clabels_values = n; + assert (i == n); - for (size_t j = 0; j < t->n_sections; j++) - ctables_section_add_empty_categories (&t->sections[j]); + sort (t->clabels_values, n, sizeof *t->clabels_values, + compare_clabels_values_3way, &width); - ctables_table_output (ct, t); - ctables_table_clear (t); - } - } - return casegrouper_destroy (grouper); + for (size_t i = 0; i < n; i++) + t->clabels_values[i]->leaf = i; } - -/* Postcomputes. */ - -typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *, - struct dictionary *); static void -ctables_pcexpr_destroy (struct ctables_pcexpr *e) +ctables_add_category_occurrences (const struct variable *var, + struct hmap *occurrences, + const struct ctables_categories *cats) { - if (e) + const struct val_labs *val_labs = var_get_value_labels (var); + + for (size_t i = 0; i < cats->n_cats; i++) { - switch (e->op) + const struct ctables_category *c = &cats->cats[i]; + switch (c->type) { - case CTPO_CAT_STRING: - ss_dealloc (&e->string); + case CCT_NUMBER: + ctables_add_occurrence (var, &(const union value) { .f = c->number }, + occurrences); break; - case CTPO_CAT_SRANGE: - for (size_t i = 0; i < 2; i++) - ss_dealloc (&e->srange[i]); + case CCT_STRING: + { + int width = var_get_width (var); + union value value; + value_init (&value, width); + value_copy_buf_rpad (&value, width, + CHAR_CAST (uint8_t *, c->string.string), + c->string.length, ' '); + ctables_add_occurrence (var, &value, occurrences); + value_destroy (&value, width); + } break; - case CTPO_ADD: - case CTPO_SUB: - case CTPO_MUL: - case CTPO_DIV: - case CTPO_POW: - case CTPO_NEG: - for (size_t i = 0; i < 2; i++) - ctables_pcexpr_destroy (e->subs[i]); + case CCT_NRANGE: + assert (var_is_numeric (var)); + for (const struct val_lab *vl = val_labs_first (val_labs); vl; + vl = val_labs_next (val_labs, vl)) + if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1]) + ctables_add_occurrence (var, &vl->value, occurrences); break; - case CTPO_CONSTANT: - case CTPO_CAT_NUMBER: - case CTPO_CAT_NRANGE: - case CTPO_CAT_MISSING: - case CTPO_CAT_OTHERNM: - case CTPO_CAT_SUBTOTAL: - case CTPO_CAT_TOTAL: + case CCT_SRANGE: + assert (var_is_alpha (var)); + for (const struct val_lab *vl = val_labs_first (val_labs); vl; + vl = val_labs_next (val_labs, vl)) + if (in_string_range (&vl->value, var, c->srange)) + ctables_add_occurrence (var, &vl->value, occurrences); break; - } - - msg_location_destroy (e->location); - free (e); - } -} -static struct ctables_pcexpr * -ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op, - struct ctables_pcexpr *sub0, - struct ctables_pcexpr *sub1) -{ - struct ctables_pcexpr *e = xmalloc (sizeof *e); - *e = (struct ctables_pcexpr) { - .op = op, - .subs = { sub0, sub1 }, - .location = msg_location_merged (sub0->location, sub1->location), - }; - return e; -} + case CCT_MISSING: + for (const struct val_lab *vl = val_labs_first (val_labs); vl; + vl = val_labs_next (val_labs, vl)) + if (var_is_value_missing (var, &vl->value)) + ctables_add_occurrence (var, &vl->value, occurrences); + break; -/* How to parse an operator. */ -struct operator - { - enum token_type token; - enum ctables_postcompute_op op; - }; + case CCT_OTHERNM: + for (const struct val_lab *vl = val_labs_first (val_labs); vl; + vl = val_labs_next (val_labs, vl)) + ctables_add_occurrence (var, &vl->value, occurrences); + break; -static const struct operator * -ctables_pcexpr_match_operator (struct lexer *lexer, - const struct operator ops[], size_t n_ops) -{ - for (const struct operator *op = ops; op < ops + n_ops; op++) - if (lex_token (lexer) == op->token) - { - if (op->token != T_NEG_NUM) - lex_get (lexer); + case CCT_POSTCOMPUTE: + break; - return op; - } + case CCT_SUBTOTAL: + case CCT_TOTAL: + break; - return NULL; + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + for (const struct val_lab *vl = val_labs_first (val_labs); vl; + vl = val_labs_next (val_labs, vl)) + if (c->include_missing || !var_is_value_missing (var, &vl->value)) + ctables_add_occurrence (var, &vl->value, occurrences); + break; + + case CCT_EXCLUDED_MISSING: + break; + } + } } -static struct ctables_pcexpr * -ctables_pcexpr_parse_binary_operators__ ( - struct lexer *lexer, struct dictionary *dict, - const struct operator ops[], size_t n_ops, - parse_recursively_func *parse_next_level, - const char *chain_warning, struct ctables_pcexpr *lhs) +static void +ctables_section_recurse_add_empty_categories ( + struct ctables_section *s, + const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c, + enum pivot_axis_type a, size_t a_idx) { - for (int op_count = 0; ; op_count++) + if (a >= PIVOT_N_AXES) + ctables_cell_insert__ (s, c, cats); + else if (!s->nests[a] || a_idx >= s->nests[a]->n) + ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0); + else { - const struct operator *op - = ctables_pcexpr_match_operator (lexer, ops, n_ops); - if (!op) + const struct variable *var = s->nests[a]->vars[a_idx]; + const struct ctables_categories *categories = s->table->categories[ + var_get_dict_index (var)]; + int width = var_get_width (var); + const struct hmap *occurrences = &s->occurrences[a][a_idx]; + const struct ctables_occurrence *o; + HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences) { - if (op_count > 1 && chain_warning) - msg_at (SW, lhs->location, "%s", chain_warning); - - return lhs; + union value *value = case_data_rw (c, var); + value_destroy (value, width); + value_clone (value, &o->value, width); + cats[a][a_idx] = ctables_categories_match (categories, value, var); + assert (cats[a][a_idx] != NULL); + ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1); } - struct ctables_pcexpr *rhs = parse_next_level (lexer, dict); - if (!rhs) + for (size_t i = 0; i < categories->n_cats; i++) { - ctables_pcexpr_destroy (lhs); - return NULL; + const struct ctables_category *cat = &categories->cats[i]; + if (cat->type == CCT_POSTCOMPUTE) + { + cats[a][a_idx] = cat; + ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1); + } } - - lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs); } } -static struct ctables_pcexpr * -ctables_pcexpr_parse_binary_operators ( - struct lexer *lexer, struct dictionary *dict, - const struct operator ops[], size_t n_ops, - parse_recursively_func *parse_next_level, const char *chain_warning) -{ - struct ctables_pcexpr *lhs = parse_next_level (lexer, dict); - if (!lhs) - return NULL; - - return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops, - parse_next_level, - chain_warning, lhs); -} - -static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *, - struct dictionary *); - -static struct ctables_pcexpr -ctpo_cat_nrange (double low, double high) +static void +ctables_section_add_empty_categories (struct ctables_section *s) { - return (struct ctables_pcexpr) { - .op = CTPO_CAT_NRANGE, - .nrange = { low, high }, - }; -} + bool show_empty = false; + for (size_t a = 0; a < PIVOT_N_AXES; a++) + if (s->nests[a]) + for (size_t k = 0; k < s->nests[a]->n; k++) + if (k != s->nests[a]->scale_idx) + { + const struct variable *var = s->nests[a]->vars[k]; + const struct ctables_categories *cats = s->table->categories[ + var_get_dict_index (var)]; + if (cats->show_empty) + { + show_empty = true; + ctables_add_category_occurrences (var, &s->occurrences[a][k], cats); + } + } + if (!show_empty) + return; -static struct ctables_pcexpr -ctpo_cat_srange (struct substring low, struct substring high) -{ - return (struct ctables_pcexpr) { - .op = CTPO_CAT_SRANGE, - .srange = { low, high }, - }; + const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n]; + const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n]; + const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n]; + const struct ctables_category **cats[PIVOT_N_AXES] = + { + [PIVOT_AXIS_LAYER] = layer_cats, + [PIVOT_AXIS_ROW] = row_cats, + [PIVOT_AXIS_COLUMN] = column_cats, + }; + struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict)); + ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0); + case_unref (c); } -static struct ctables_pcexpr * -ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict) +static void +ctables_section_clear (struct ctables_section *s) { - int start_ofs = lex_ofs (lexer); - struct ctables_pcexpr e; - if (lex_is_number (lexer)) - { - e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT, - .number = lex_number (lexer) }; - lex_get (lexer); - } - else if (lex_match_id (lexer, "MISSING")) - e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING }; - else if (lex_match_id (lexer, "OTHERNM")) - e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM }; - else if (lex_match_id (lexer, "TOTAL")) - e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL }; - else if (lex_match_id (lexer, "SUBTOTAL")) - { - size_t subtotal_index = 0; - if (lex_match (lexer, T_LBRACK)) - { - if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX)) - return NULL; - subtotal_index = lex_integer (lexer); - lex_get (lexer); - if (!lex_force_match (lexer, T_RBRACK)) - return NULL; - } - e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL, - .subtotal_index = subtotal_index }; - } - else if (lex_match (lexer, T_LBRACK)) + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { - if (lex_match_id (lexer, "LO")) - { - if (!lex_force_match_id (lexer, "THRU")) - return false; - - if (lex_is_string (lexer)) - { - struct substring low = { .string = NULL }; - struct substring high = parse_substring (lexer, dict); - e = ctpo_cat_srange (low, high); - } - else - { - if (!lex_force_num (lexer)) - return false; - e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer)); - lex_get (lexer); - } - } - else if (lex_is_number (lexer)) - { - double number = lex_number (lexer); - lex_get (lexer); - if (lex_match_id (lexer, "THRU")) - { - if (lex_match_id (lexer, "HI")) - e = ctpo_cat_nrange (number, DBL_MAX); - else - { - if (!lex_force_num (lexer)) - return false; - e = ctpo_cat_nrange (number, lex_number (lexer)); - lex_get (lexer); - } - } - else - e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER, - .number = number }; - } - else if (lex_is_string (lexer)) - { - struct substring s = parse_substring (lexer, dict); - - if (lex_match_id (lexer, "THRU")) - { - struct substring high; - - if (lex_match_id (lexer, "HI")) - high = (struct substring) { .string = NULL }; - else - { - if (!lex_force_string (lexer)) - { - ss_dealloc (&s); - return false; - } - high = parse_substring (lexer, dict); - } + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + { + const struct variable *var = nest->vars[i]; + int width = var_get_width (var); + struct ctables_occurrence *o, *next; + struct hmap *map = &s->occurrences[a][i]; + HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map) + { + value_destroy (&o->value, width); + hmap_delete (map, &o->node); + free (o); + } + hmap_shrink (map); + } + } - e = ctpo_cat_srange (s, high); - } - else - e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s }; - } - else + struct ctables_cell *cell, *next_cell; + HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells) + { + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { - lex_error (lexer, NULL); - return NULL; + const struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + if (i != nest->scale_idx) + value_destroy (&cell->axes[a].cvs[i].value, + var_get_width (nest->vars[i])); + free (cell->axes[a].cvs); } - if (!lex_force_match (lexer, T_RBRACK)) - { - if (e.op == CTPO_CAT_STRING) - ss_dealloc (&e.string); - else if (e.op == CTPO_CAT_SRANGE) - { - ss_dealloc (&e.srange[0]); - ss_dealloc (&e.srange[1]); - } - return NULL; - } + const struct ctables_nest *ss = s->nests[s->table->summary_axis]; + const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; + for (size_t i = 0; i < specs->n; i++) + ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]); + free (cell->summaries); + + hmap_delete (&s->cells, &cell->node); + free (cell); } - else if (lex_match (lexer, T_LPAREN)) + hmap_shrink (&s->cells); + + for (enum ctables_area_type at = 0; at < N_CTATS; at++) { - struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict); - if (!ep) - return NULL; - if (!lex_force_match (lexer, T_RPAREN)) + struct ctables_area *area, *next_area; + HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node, + &s->areas[at]) { - ctables_pcexpr_destroy (ep); - return NULL; + free (area->sums); + hmap_delete (&s->areas[at], &area->node); + free (area); } - return ep; - } - else - { - lex_error (lexer, NULL); - return NULL; + hmap_shrink (&s->areas[at]); } - - e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1); - return xmemdup (&e, sizeof e); } -static struct ctables_pcexpr * -ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub, - struct lexer *lexer, int start_ofs) +static void +ctables_section_uninit (struct ctables_section *s) { - struct ctables_pcexpr *e = xmalloc (sizeof *e); - *e = (struct ctables_pcexpr) { - .op = CTPO_NEG, - .subs = { sub }, - .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1), - }; - return e; + ctables_section_clear (s); + + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + struct ctables_nest *nest = s->nests[a]; + for (size_t i = 0; i < nest->n; i++) + hmap_destroy (&s->occurrences[a][i]); + free (s->occurrences[a]); + } + + hmap_destroy (&s->cells); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + hmap_destroy (&s->areas[at]); } -static struct ctables_pcexpr * -ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict) +static void +ctables_table_clear (struct ctables_table *t) { - static const struct operator op = { T_EXP, CTPO_POW }; + for (size_t i = 0; i < t->n_sections; i++) + ctables_section_clear (&t->sections[i]); - const char *chain_warning = - _("The exponentiation operator (`**') is left-associative: " - "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. " - "To disable this warning, insert parentheses."); + if (t->clabels_example) + { + int width = var_get_width (t->clabels_example); + struct ctables_value *value, *next_value; + HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node, + &t->clabels_values_map) + { + value_destroy (&value->value, width); + hmap_delete (&t->clabels_values_map, &value->node); + free (value); + } + hmap_shrink (&t->clabels_values_map); - if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP) - return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1, - ctables_pcexpr_parse_primary, - chain_warning); + free (t->clabels_values); + t->clabels_values = NULL; + t->n_clabels_values = 0; + } +} - /* Special case for situations like "-5**6", which must be parsed as - -(5**6). */ +static bool +ctables_execute (struct dataset *ds, struct casereader *input, + struct ctables *ct) +{ + for (size_t i = 0; i < ct->n_tables; i++) + { + struct ctables_table *t = ct->tables[i]; + t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) * + MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) * + MAX (1, t->stacks[PIVOT_AXIS_LAYER].n), + sizeof *t->sections); + size_t ix[PIVOT_N_AXES]; + ctables_table_add_section (t, 0, ix); + } - int start_ofs = lex_ofs (lexer); - struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs); - *lhs = (struct ctables_pcexpr) { - .op = CTPO_CONSTANT, - .number = -lex_tokval (lexer), - .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)), - }; - lex_get (lexer); + struct dictionary *dict = dataset_dict (ds); - struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ ( - lexer, dict, &op, 1, - ctables_pcexpr_parse_primary, chain_warning, lhs); - if (!node) - return NULL; + bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE; + struct casegrouper *grouper + = (splitting + ? casegrouper_create_splits (input, dict) + : casegrouper_create_vars (input, NULL, 0)); + struct casereader *group; + while (casegrouper_get_next_group (grouper, &group)) + { + if (splitting) + { + struct ccase *c = casereader_peek (group, 0); + if (c != NULL) + { + output_split_file_values (ds, c); + case_unref (c); + } + } - return ctables_pcexpr_allocate_neg (node, lexer, start_ofs); -} + bool warn_on_invalid = true; + for (struct ccase *c = casereader_read (group); c; + case_unref (c), c = casereader_read (group)) + { + double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid); + double e_weight = (ct->e_weight + ? var_force_valid_weight (ct->e_weight, + case_num (c, ct->e_weight), + &warn_on_invalid) + : d_weight); + double weight[] = { + [CTW_DICTIONARY] = d_weight, + [CTW_EFFECTIVE] = e_weight, + [CTW_UNWEIGHTED] = 1.0, + }; -/* Parses the unary minus level. */ -static struct ctables_pcexpr * -ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict) -{ - int start_ofs = lex_ofs (lexer); - if (!lex_match (lexer, T_DASH)) - return ctables_pcexpr_parse_exp (lexer, dict); + for (size_t i = 0; i < ct->n_tables; i++) + { + struct ctables_table *t = ct->tables[i]; - struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict); - if (!inner) - return NULL; + for (size_t j = 0; j < t->n_sections; j++) + ctables_cell_insert (&t->sections[j], c, weight); - return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs); -} + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + if (t->label_axis[a] != a) + ctables_insert_clabels_values (t, c, a); + } + } + casereader_destroy (group); -/* Parses the multiplication and division level. */ -static struct ctables_pcexpr * -ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict) -{ - static const struct operator ops[] = - { - { T_ASTERISK, CTPO_MUL }, - { T_SLASH, CTPO_DIV }, - }; + for (size_t i = 0; i < ct->n_tables; i++) + { + struct ctables_table *t = ct->tables[i]; - return ctables_pcexpr_parse_binary_operators (lexer, dict, ops, - sizeof ops / sizeof *ops, - ctables_pcexpr_parse_neg, NULL); -} + if (t->clabels_example) + ctables_sort_clabels_values (t); -/* Parses the addition and subtraction level. */ -static struct ctables_pcexpr * -ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict) -{ - static const struct operator ops[] = - { - { T_PLUS, CTPO_ADD }, - { T_DASH, CTPO_SUB }, - { T_NEG_NUM, CTPO_ADD }, - }; + for (size_t j = 0; j < t->n_sections; j++) + ctables_section_add_empty_categories (&t->sections[j]); - return ctables_pcexpr_parse_binary_operators (lexer, dict, - ops, sizeof ops / sizeof *ops, - ctables_pcexpr_parse_mul, NULL); + ctables_table_output (ct, t); + ctables_table_clear (t); + } + } + return casegrouper_destroy (grouper); } - + static struct ctables_postcompute * ctables_find_postcompute (struct ctables *ct, const char *name) { @@ -6201,18 +6216,19 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) lex_match (lexer, T_EQUALS); int expr_start = lex_ofs (lexer); - if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW)) + if (!ctables_axis_parse (lexer, dataset_dict (ds), + &t->axes[PIVOT_AXIS_ROW])) goto error; if (lex_match (lexer, T_BY)) { if (!ctables_axis_parse (lexer, dataset_dict (ds), - ct, t, PIVOT_AXIS_COLUMN)) + &t->axes[PIVOT_AXIS_COLUMN])) goto error; if (lex_match (lexer, T_BY)) { if (!ctables_axis_parse (lexer, dataset_dict (ds), - ct, t, PIVOT_AXIS_LAYER)) + &t->axes[PIVOT_AXIS_LAYER])) goto error; } }