1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casereader.h"
23 #include "data/casewriter.h"
24 #include "data/data-out.h"
25 #include "data/dataset.h"
26 #include "data/dictionary.h"
27 #include "data/mrset.h"
28 #include "data/subcase.h"
29 #include "data/value-labels.h"
30 #include "language/command.h"
31 #include "language/lexer/format-parser.h"
32 #include "language/lexer/lexer.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/array.h"
35 #include "libpspp/assertion.h"
36 #include "libpspp/hash-functions.h"
37 #include "libpspp/hmap.h"
38 #include "libpspp/i18n.h"
39 #include "libpspp/message.h"
40 #include "libpspp/string-array.h"
41 #include "math/mode.h"
42 #include "math/moments.h"
43 #include "math/percentiles.h"
44 #include "math/sort.h"
45 #include "output/pivot-table.h"
47 #include "gl/minmax.h"
48 #include "gl/xalloc.h"
51 #define _(msgid) gettext (msgid)
52 #define N_(msgid) (msgid)
56 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
57 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
58 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
59 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
63 - unweighted summaries (U*)
64 - lower confidence limits (*.LCL)
65 - upper confidence limits (*.UCL)
66 - standard error (*.SE)
69 /* All variables. */ \
70 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
71 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
72 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
73 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
74 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
75 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
76 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
94 /* Scale variables, totals, and subtotals. */ \
95 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
96 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
97 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
98 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
99 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
100 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
101 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
102 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
103 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
104 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
105 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
106 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
107 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
108 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
109 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
110 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
111 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
112 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
113 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
114 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
115 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
116 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
117 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
119 #if 0 /* Multiple response sets not yet implemented. */
120 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
121 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
122 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
123 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
124 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
125 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
126 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
127 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
128 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
129 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
130 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
131 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
132 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
133 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
134 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
135 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
136 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
137 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
138 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
139 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
140 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
141 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
144 enum ctables_summary_function
146 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
152 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
153 N_CTSF_FUNCTIONS = SUMMARIES
157 static bool ctables_summary_function_is_count (enum ctables_summary_function);
159 enum ctables_domain_type
161 /* Within a section, where stacked variables divide one section from
163 CTDT_TABLE, /* All layers of a whole section. */
164 CTDT_LAYER, /* One layer within a section. */
165 CTDT_LAYERROW, /* Row in one layer within a section. */
166 CTDT_LAYERCOL, /* Column in one layer within a section. */
168 /* Within a subtable, where a subtable pairs an innermost row variable with
169 an innermost column variable within a single layer. */
170 CTDT_SUBTABLE, /* Whole subtable. */
171 CTDT_ROW, /* Row within a subtable. */
172 CTDT_COL, /* Column within a subtable. */
176 struct ctables_domain
178 struct hmap_node node;
180 const struct ctables_cell *example;
182 double d_valid; /* Dictionary weight. */
185 double e_valid; /* Effective weight */
190 enum ctables_summary_variant
199 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
200 all the axes (except the scalar variable, if any). */
201 struct hmap_node node;
203 /* The domains that contain this cell. */
204 bool contributes_to_domains;
205 struct ctables_domain *domains[N_CTDTS];
210 enum ctables_summary_variant sv;
212 struct ctables_cell_axis
214 struct ctables_cell_value
216 const struct ctables_category *category;
224 union ctables_summary *summaries;
229 const struct dictionary *dict;
230 struct pivot_table_look *look;
232 /* CTABLES has a number of extra formats that we implement via custom
233 currency specifications on an alternate fmt_settings. */
234 #define CTEF_NEGPAREN FMT_CCA
235 #define CTEF_NEQUAL FMT_CCB
236 #define CTEF_PAREN FMT_CCC
237 #define CTEF_PCTPAREN FMT_CCD
238 struct fmt_settings ctables_formats;
240 /* If this is NULL, zeros are displayed using the normal print format.
241 Otherwise, this string is displayed. */
244 /* If this is NULL, missing values are displayed using the normal print
245 format. Otherwise, this string is displayed. */
248 /* Indexed by variable dictionary index. */
249 enum ctables_vlabel *vlabels;
251 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
253 bool mrsets_count_duplicates; /* MRSETS. */
254 bool smissing_listwise; /* SMISSING. */
255 struct variable *e_weight; /* WEIGHT. */
256 int hide_threshold; /* HIDESMALLCOUNTS. */
258 struct ctables_table **tables;
262 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
265 struct ctables_postcompute
267 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
268 char *name; /* Name, without leading &. */
270 struct msg_location *location; /* Location of definition. */
271 struct ctables_pcexpr *expr;
273 struct ctables_summary_spec_set *specs;
274 bool hide_source_cats;
277 struct ctables_pcexpr
287 enum ctables_postcompute_op
290 CTPO_CONSTANT, /* 5 */
291 CTPO_CAT_NUMBER, /* [5] */
292 CTPO_CAT_STRING, /* ["STRING"] */
293 CTPO_CAT_RANGE, /* [LO THRU 5] */
294 CTPO_CAT_MISSING, /* MISSING */
295 CTPO_CAT_OTHERNM, /* OTHERNM */
296 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
297 CTPO_CAT_TOTAL, /* TOTAL */
311 /* CTPO_CAT_NUMBER. */
314 /* CTPO_CAT_STRING. */
317 /* CTPO_CAT_RANGE. */
320 /* CTPO_CAT_SUBTOTAL. */
321 size_t subtotal_index;
323 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
324 One element: CTPO_NEG. */
325 struct ctables_pcexpr *subs[2];
328 /* Source location. */
329 struct msg_location *location;
332 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
333 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
334 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
335 struct ctables_pcexpr *sub1);
337 struct ctables_summary_spec_set
339 struct ctables_summary_spec *specs;
343 /* The variable to which the summary specs are applied. */
344 struct variable *var;
346 /* Whether the variable to which the summary specs are applied is a scale
347 variable for the purpose of summarization.
349 (VALIDN and TOTALN act differently for summarizing scale and categorical
354 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
355 const struct ctables_summary_spec_set *);
356 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
358 /* A nested sequence of variables, e.g. a > b > c. */
361 struct variable **vars;
364 size_t *domains[N_CTDTS];
365 size_t n_domains[N_CTDTS];
367 struct ctables_summary_spec_set specs[N_CSVS];
370 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
373 struct ctables_nest *nests;
379 struct hmap_node node;
384 struct ctables_occurrence
386 struct hmap_node node;
390 struct ctables_section
392 struct ctables_table *table;
393 struct ctables_nest *nests[PIVOT_N_AXES];
394 struct hmap *occurrences[PIVOT_N_AXES];
395 struct hmap cells; /* Contains "struct ctable_cell"s. */
396 struct hmap domains[N_CTDTS]; /* Contains "struct ctable_domain"s. */
401 struct ctables *ctables;
402 struct ctables_axis *axes[PIVOT_N_AXES];
403 struct ctables_stack stacks[PIVOT_N_AXES];
404 struct ctables_section *sections;
406 enum pivot_axis_type summary_axis;
407 struct ctables_summary_spec_set summary_specs;
409 const struct variable *clabels_example;
410 struct hmap clabels_values_map;
411 struct ctables_value **clabels_values;
412 size_t n_clabels_values;
414 enum pivot_axis_type slabels_axis;
415 bool slabels_visible;
417 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
419 Most commonly, label_axis[a] == a, and in particular we always have
420 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
422 If ROWLABELS or COLLABELS is specified, then one of
423 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
424 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
426 enum pivot_axis_type label_axis[PIVOT_N_AXES];
427 enum pivot_axis_type clabels_from_axis;
429 /* Indexed by variable dictionary index. */
430 struct ctables_categories **categories;
439 struct ctables_chisq *chisq;
440 struct ctables_pairwise *pairwise;
448 struct variable *var;
449 const struct mrset *mrset;
453 static const struct fmt_spec *
454 ctables_var_get_print_format (const struct ctables_var *var)
456 return (var->is_mrset
457 ? var_get_print_format (var->mrset->vars[0])
458 : var_get_print_format (var->var));
462 ctables_var_name (const struct ctables_var *var)
464 return var->is_mrset ? var->mrset->name : var_get_name (var->var);
467 struct ctables_categories
470 struct ctables_category *cats;
475 struct ctables_category
477 enum ctables_category_type
479 /* Explicit category lists. */
487 /* Totals and subtotals. */
491 /* Implicit category lists. */
496 /* For contributing to TOTALN. */
497 CCT_EXCLUDED_MISSING,
501 struct ctables_category *subtotal;
507 double number; /* CCT_NUMBER. */
508 char *string; /* CCT_STRING. */
509 double range[2]; /* CCT_RANGE. */
513 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
514 bool hide_subcategories; /* CCT_SUBTOTAL. */
517 const struct ctables_postcompute *pc; /* CCT_POSTCOMPUTE. */
519 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
522 bool include_missing;
526 enum ctables_summary_function sort_function;
527 struct variable *sort_var;
532 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
533 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
534 struct msg_location *location;
538 ctables_category_uninit (struct ctables_category *cat)
549 case CCT_POSTCOMPUTE:
558 free (cat->total_label);
566 case CCT_EXCLUDED_MISSING:
572 ctables_category_equal (const struct ctables_category *a,
573 const struct ctables_category *b)
575 if (a->type != b->type)
581 return a->number == b->number;
584 return strcmp (a->string, b->string);
587 return a->range[0] == b->range[0] && a->range[1] == b->range[1];
593 case CCT_POSTCOMPUTE:
594 return a->pc == b->pc;
598 return !strcmp (a->total_label, b->total_label);
603 return (a->include_missing == b->include_missing
604 && a->sort_ascending == b->sort_ascending
605 && a->sort_function == b->sort_function
606 && a->sort_var == b->sort_var
607 && a->percentile == b->percentile);
609 case CCT_EXCLUDED_MISSING:
617 ctables_categories_unref (struct ctables_categories *c)
622 assert (c->n_refs > 0);
626 for (size_t i = 0; i < c->n_cats; i++)
627 ctables_category_uninit (&c->cats[i]);
633 ctables_categories_equal (const struct ctables_categories *a,
634 const struct ctables_categories *b)
636 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
639 for (size_t i = 0; i < a->n_cats; i++)
640 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
646 /* Chi-square test (SIGTEST). */
654 /* Pairwise comparison test (COMPARETEST). */
655 struct ctables_pairwise
657 enum { PROP, MEAN } type;
660 bool meansvariance_allcats;
662 enum { BONFERRONI = 1, BH } adjust;
686 struct ctables_var var;
688 struct ctables_summary_spec_set specs[N_CSVS];
692 struct ctables_axis *subs[2];
695 struct msg_location *loc;
698 static void ctables_axis_destroy (struct ctables_axis *);
707 enum ctables_function_availability
709 CTFA_ALL, /* Any variables. */
710 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
711 CTFA_MRSETS, /* Only multiple-response sets */
714 struct ctables_summary_spec
716 enum ctables_summary_function function;
717 double percentile; /* CTSF_PTILE only. */
720 struct fmt_spec format;
721 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
727 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
728 const struct ctables_summary_spec *src)
731 dst->label = xstrdup (src->label);
735 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
742 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
743 const struct ctables_summary_spec_set *src)
745 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
746 for (size_t i = 0; i < src->n; i++)
747 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
749 *dst = (struct ctables_summary_spec_set) {
754 .is_scale = src->is_scale,
759 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
761 for (size_t i = 0; i < set->n; i++)
762 ctables_summary_spec_uninit (&set->specs[i]);
767 parse_col_width (struct lexer *lexer, const char *name, double *width)
769 lex_match (lexer, T_EQUALS);
770 if (lex_match_id (lexer, "DEFAULT"))
772 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
774 *width = lex_number (lexer);
784 parse_bool (struct lexer *lexer, bool *b)
786 if (lex_match_id (lexer, "NO"))
788 else if (lex_match_id (lexer, "YES"))
792 lex_error_expecting (lexer, "YES", "NO");
798 static enum ctables_function_availability
799 ctables_function_availability (enum ctables_summary_function f)
801 static enum ctables_function_availability availability[] = {
802 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
807 return availability[f];
811 ctables_summary_function_is_count (enum ctables_summary_function f)
817 case CTSF_ROWPCT_COUNT:
818 case CTSF_COLPCT_COUNT:
819 case CTSF_TABLEPCT_COUNT:
820 case CTSF_SUBTABLEPCT_COUNT:
821 case CTSF_LAYERPCT_COUNT:
822 case CTSF_LAYERROWPCT_COUNT:
823 case CTSF_LAYERCOLPCT_COUNT:
826 case CTSF_ROWPCT_VALIDN:
827 case CTSF_COLPCT_VALIDN:
828 case CTSF_TABLEPCT_VALIDN:
829 case CTSF_SUBTABLEPCT_VALIDN:
830 case CTSF_LAYERPCT_VALIDN:
831 case CTSF_LAYERROWPCT_VALIDN:
832 case CTSF_LAYERCOLPCT_VALIDN:
833 case CTSF_ROWPCT_TOTALN:
834 case CTSF_COLPCT_TOTALN:
835 case CTSF_TABLEPCT_TOTALN:
836 case CTSF_SUBTABLEPCT_TOTALN:
837 case CTSF_LAYERPCT_TOTALN:
838 case CTSF_LAYERROWPCT_TOTALN:
839 case CTSF_LAYERCOLPCT_TOTALN:
856 case CTSF_ROWPCT_SUM:
857 case CTSF_COLPCT_SUM:
858 case CTSF_TABLEPCT_SUM:
859 case CTSF_SUBTABLEPCT_SUM:
860 case CTSF_LAYERPCT_SUM:
861 case CTSF_LAYERROWPCT_SUM:
862 case CTSF_LAYERCOLPCT_SUM:
870 parse_ctables_summary_function (struct lexer *lexer,
871 enum ctables_summary_function *f)
875 enum ctables_summary_function function;
876 struct substring name;
878 static struct pair names[] = {
879 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
880 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
883 /* The .COUNT suffix may be omitted. */
884 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
885 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
886 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
887 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
888 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
889 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
890 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
894 if (!lex_force_id (lexer))
897 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
898 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
900 *f = names[i].function;
905 lex_error (lexer, _("Expecting summary function name."));
910 ctables_axis_destroy (struct ctables_axis *axis)
918 for (size_t i = 0; i < N_CSVS; i++)
919 ctables_summary_spec_set_uninit (&axis->specs[i]);
924 ctables_axis_destroy (axis->subs[0]);
925 ctables_axis_destroy (axis->subs[1]);
928 msg_location_destroy (axis->loc);
932 static struct ctables_axis *
933 ctables_axis_new_nonterminal (enum ctables_axis_op op,
934 struct ctables_axis *sub0,
935 struct ctables_axis *sub1,
936 struct lexer *lexer, int start_ofs)
938 struct ctables_axis *axis = xmalloc (sizeof *axis);
939 *axis = (struct ctables_axis) {
941 .subs = { sub0, sub1 },
942 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
947 struct ctables_axis_parse_ctx
950 struct dictionary *dict;
952 struct ctables_table *t;
955 static struct fmt_spec
956 ctables_summary_default_format (enum ctables_summary_function function,
957 const struct ctables_var *var)
959 static const enum ctables_format default_formats[] = {
960 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
964 switch (default_formats[function])
967 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
970 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
973 return *ctables_var_get_print_format (var);
981 ctables_summary_default_label (enum ctables_summary_function function,
984 static const char *default_labels[] = {
985 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
990 return (function == CTSF_PTILE
991 ? xasprintf (_("Percentile %.2f"), percentile)
992 : xstrdup (gettext (default_labels[function])));
996 ctables_summary_function_name (enum ctables_summary_function function)
998 static const char *names[] = {
999 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1003 return names[function];
1007 add_summary_spec (struct ctables_axis *axis,
1008 enum ctables_summary_function function, double percentile,
1009 const char *label, const struct fmt_spec *format,
1010 bool is_ctables_format, const struct msg_location *loc,
1011 enum ctables_summary_variant sv)
1013 if (axis->op == CTAO_VAR)
1015 const char *function_name = ctables_summary_function_name (function);
1016 const char *var_name = ctables_var_name (&axis->var);
1017 switch (ctables_function_availability (function))
1020 if (!axis->var.is_mrset)
1022 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1023 "response sets."), function_name);
1024 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1035 _("Summary function %s applies only to scale variables."),
1037 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1048 struct ctables_summary_spec_set *set = &axis->specs[sv];
1049 if (set->n >= set->allocated)
1050 set->specs = x2nrealloc (set->specs, &set->allocated,
1051 sizeof *set->specs);
1053 struct ctables_summary_spec *dst = &set->specs[set->n++];
1054 *dst = (struct ctables_summary_spec) {
1055 .function = function,
1056 .percentile = percentile,
1057 .label = xstrdup (label),
1058 .format = (format ? *format
1059 : ctables_summary_default_format (function, &axis->var)),
1060 .is_ctables_format = is_ctables_format,
1066 for (size_t i = 0; i < 2; i++)
1067 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1068 format, is_ctables_format, loc, sv))
1074 static struct ctables_axis *ctables_axis_parse_stack (
1075 struct ctables_axis_parse_ctx *);
1078 ctables_var_parse (struct lexer *lexer, struct dictionary *dict,
1079 struct ctables_var *var)
1081 if (ss_starts_with (lex_tokss (lexer), ss_cstr ("$")))
1083 *var = (struct ctables_var) {
1085 .mrset = dict_lookup_mrset (dict, lex_tokcstr (lexer))
1089 lex_error (lexer, _("'%s' does not name a multiple-response set "
1090 "in the active file dictionary."),
1091 lex_tokcstr (lexer));
1099 *var = (struct ctables_var) {
1101 .var = parse_variable (lexer, dict),
1103 return var->var != NULL;
1107 static struct ctables_axis *
1108 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1110 if (lex_match (ctx->lexer, T_LPAREN))
1112 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1113 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1115 ctables_axis_destroy (sub);
1121 if (!lex_force_id (ctx->lexer))
1124 int start_ofs = lex_ofs (ctx->lexer);
1125 struct ctables_var var;
1126 if (!ctables_var_parse (ctx->lexer, ctx->dict, &var))
1129 struct ctables_axis *axis = xmalloc (sizeof *axis);
1130 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1132 /* XXX should figure out default measures by reading data */
1133 axis->scale = (var.is_mrset ? false
1134 : lex_match_phrase (ctx->lexer, "[S]") ? true
1135 : lex_match_phrase (ctx->lexer, "[C]") ? false
1136 : var_get_measure (var.var) == MEASURE_SCALE);
1137 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1138 lex_ofs (ctx->lexer) - 1);
1143 has_digit (const char *s)
1145 return s[strcspn (s, "0123456789")] != '\0';
1149 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1150 bool *is_ctables_format)
1152 char type[FMT_TYPE_LEN_MAX + 1];
1153 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1156 if (!strcasecmp (type, "NEGPAREN"))
1157 format->type = CTEF_NEGPAREN;
1158 else if (!strcasecmp (type, "NEQUAL"))
1159 format->type = CTEF_NEQUAL;
1160 else if (!strcasecmp (type, "PAREN"))
1161 format->type = CTEF_PAREN;
1162 else if (!strcasecmp (type, "PCTPAREN"))
1163 format->type = CTEF_PCTPAREN;
1166 *is_ctables_format = false;
1167 return (parse_format_specifier (lexer, format)
1168 && fmt_check_output (format)
1169 && fmt_check_type_compat (format, VAL_NUMERIC));
1174 msg (SE, _("Output format %s requires width 2 or greater."), type);
1177 else if (format->d > format->w - 1)
1179 msg (SE, _("Output format %s requires width greater than decimals."),
1185 *is_ctables_format = true;
1190 static struct ctables_axis *
1191 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1193 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1194 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1197 enum ctables_summary_variant sv = CSV_CELL;
1200 int start_ofs = lex_ofs (ctx->lexer);
1202 /* Parse function. */
1203 enum ctables_summary_function function;
1204 if (!parse_ctables_summary_function (ctx->lexer, &function))
1207 /* Parse percentile. */
1208 double percentile = 0;
1209 if (function == CTSF_PTILE)
1211 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1213 percentile = lex_number (ctx->lexer);
1214 lex_get (ctx->lexer);
1219 if (lex_is_string (ctx->lexer))
1221 label = ss_xstrdup (lex_tokss (ctx->lexer));
1222 lex_get (ctx->lexer);
1225 label = ctables_summary_default_label (function, percentile);
1228 struct fmt_spec format;
1229 const struct fmt_spec *formatp;
1230 bool is_ctables_format = false;
1231 if (lex_token (ctx->lexer) == T_ID
1232 && has_digit (lex_tokcstr (ctx->lexer)))
1234 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1235 &is_ctables_format))
1245 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1246 lex_ofs (ctx->lexer) - 1);
1247 add_summary_spec (sub, function, percentile, label, formatp,
1248 is_ctables_format, loc, sv);
1250 msg_location_destroy (loc);
1252 lex_match (ctx->lexer, T_COMMA);
1253 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1255 if (!lex_force_match (ctx->lexer, T_LBRACK))
1259 else if (lex_match (ctx->lexer, T_RBRACK))
1261 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1268 ctables_axis_destroy (sub);
1272 static const struct ctables_axis *
1273 find_scale (const struct ctables_axis *axis)
1277 else if (axis->op == CTAO_VAR)
1281 assert (!axis->var.is_mrset);
1289 for (size_t i = 0; i < 2; i++)
1291 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1299 static const struct ctables_axis *
1300 find_categorical_summary_spec (const struct ctables_axis *axis)
1304 else if (axis->op == CTAO_VAR)
1305 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1308 for (size_t i = 0; i < 2; i++)
1310 const struct ctables_axis *sum
1311 = find_categorical_summary_spec (axis->subs[i]);
1319 static struct ctables_axis *
1320 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1322 int start_ofs = lex_ofs (ctx->lexer);
1323 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1327 while (lex_match (ctx->lexer, T_GT))
1329 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1333 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1334 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1336 const struct ctables_axis *outer_scale = find_scale (lhs);
1337 const struct ctables_axis *inner_scale = find_scale (rhs);
1338 if (outer_scale && inner_scale)
1340 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1341 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1342 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1343 ctables_axis_destroy (nest);
1347 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1350 msg_at (SE, nest->loc,
1351 _("Summaries may only be requested for categorical variables "
1352 "at the innermost nesting level."));
1353 msg_at (SN, outer_sum->loc,
1354 _("This outer categorical variable has a summary."));
1355 ctables_axis_destroy (nest);
1365 static struct ctables_axis *
1366 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1368 int start_ofs = lex_ofs (ctx->lexer);
1369 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1373 while (lex_match (ctx->lexer, T_PLUS))
1375 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1379 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1380 ctx->lexer, start_ofs);
1387 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1388 struct ctables *ct, struct ctables_table *t,
1389 enum pivot_axis_type a)
1391 if (lex_token (lexer) == T_BY
1392 || lex_token (lexer) == T_SLASH
1393 || lex_token (lexer) == T_ENDCMD)
1396 struct ctables_axis_parse_ctx ctx = {
1402 t->axes[a] = ctables_axis_parse_stack (&ctx);
1403 return t->axes[a] != NULL;
1407 ctables_chisq_destroy (struct ctables_chisq *chisq)
1413 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1419 ctables_table_destroy (struct ctables_table *t)
1424 for (size_t i = 0; i < t->n_categories; i++)
1425 ctables_categories_unref (t->categories[i]);
1426 free (t->categories);
1428 ctables_axis_destroy (t->axes[PIVOT_AXIS_COLUMN]);
1429 ctables_axis_destroy (t->axes[PIVOT_AXIS_ROW]);
1430 ctables_axis_destroy (t->axes[PIVOT_AXIS_LAYER]);
1434 ctables_chisq_destroy (t->chisq);
1435 ctables_pairwise_destroy (t->pairwise);
1440 ctables_destroy (struct ctables *ct)
1445 pivot_table_look_unref (ct->look);
1449 for (size_t i = 0; i < ct->n_tables; i++)
1450 ctables_table_destroy (ct->tables[i]);
1455 static struct ctables_category
1456 cct_range (double low, double high)
1458 return (struct ctables_category) {
1460 .range = { low, high }
1465 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1466 struct ctables_category *cat)
1469 if (lex_match (lexer, T_EQUALS))
1471 if (!lex_force_string (lexer))
1474 total_label = ss_xstrdup (lex_tokss (lexer));
1478 total_label = xstrdup (_("Subtotal"));
1480 *cat = (struct ctables_category) {
1481 .type = CCT_SUBTOTAL,
1482 .hide_subcategories = hide_subcategories,
1483 .total_label = total_label
1489 ctables_table_parse_explicit_category (struct lexer *lexer, struct ctables *ct,
1490 struct ctables_category *cat)
1492 if (lex_match_id (lexer, "OTHERNM"))
1493 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1494 else if (lex_match_id (lexer, "MISSING"))
1495 *cat = (struct ctables_category) { .type = CCT_MISSING };
1496 else if (lex_match_id (lexer, "SUBTOTAL"))
1497 return ctables_table_parse_subtotal (lexer, false, cat);
1498 else if (lex_match_id (lexer, "HSUBTOTAL"))
1499 return ctables_table_parse_subtotal (lexer, true, cat);
1500 else if (lex_match_id (lexer, "LO"))
1502 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
1504 *cat = cct_range (-DBL_MAX, lex_number (lexer));
1507 else if (lex_is_number (lexer))
1509 double number = lex_number (lexer);
1511 if (lex_match_id (lexer, "THRU"))
1513 if (lex_match_id (lexer, "HI"))
1514 *cat = cct_range (number, DBL_MAX);
1517 if (!lex_force_num (lexer))
1519 *cat = cct_range (number, lex_number (lexer));
1524 *cat = (struct ctables_category) {
1529 else if (lex_is_string (lexer))
1531 *cat = (struct ctables_category) {
1533 .string = ss_xstrdup (lex_tokss (lexer)),
1537 else if (lex_match (lexer, T_AND))
1539 if (!lex_force_id (lexer))
1541 struct ctables_postcompute *pc = ctables_find_postcompute (
1542 ct, lex_tokcstr (lexer));
1545 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1546 msg_at (SE, loc, _("Unknown postcompute &%s."),
1547 lex_tokcstr (lexer));
1548 msg_location_destroy (loc);
1553 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1557 lex_error (lexer, NULL);
1564 static struct ctables_category *
1565 ctables_find_category_for_postcompute (const struct ctables_categories *cats,
1566 const struct ctables_pcexpr *e)
1568 struct ctables_category *best = NULL;
1569 size_t n_subtotals = 0;
1570 for (size_t i = 0; i < cats->n_cats; i++)
1572 struct ctables_category *cat = &cats->cats[i];
1575 case CTPO_CAT_NUMBER:
1576 if (cat->type == CCT_NUMBER && cat->number == e->number)
1580 case CTPO_CAT_STRING:
1581 if (cat->type == CCT_STRING && !strcmp (cat->string, e->string))
1585 case CTPO_CAT_RANGE:
1586 if (cat->type == CCT_RANGE
1587 && cat->range[0] == e->range[0]
1588 && cat->range[1] == e->range[1])
1592 case CTPO_CAT_MISSING:
1593 if (cat->type == CCT_MISSING)
1597 case CTPO_CAT_OTHERNM:
1598 if (cat->type == CCT_OTHERNM)
1602 case CTPO_CAT_SUBTOTAL:
1603 if (cat->type == CCT_SUBTOTAL)
1606 if (e->subtotal_index == n_subtotals)
1608 else if (e->subtotal_index == 0)
1613 case CTPO_CAT_TOTAL:
1614 if (cat->type == CCT_TOTAL)
1628 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1634 ctables_recursive_check_postcompute (const struct ctables_pcexpr *e,
1635 struct ctables_category *pc_cat,
1636 const struct ctables_categories *cats,
1637 const struct msg_location *cats_location)
1641 case CTPO_CAT_NUMBER:
1642 case CTPO_CAT_STRING:
1643 case CTPO_CAT_RANGE:
1644 case CTPO_CAT_MISSING:
1645 case CTPO_CAT_OTHERNM:
1646 case CTPO_CAT_SUBTOTAL:
1647 case CTPO_CAT_TOTAL:
1649 struct ctables_category *cat = ctables_find_category_for_postcompute (
1653 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1655 size_t n_subtotals = 0;
1656 for (size_t i = 0; i < cats->n_cats; i++)
1657 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1658 if (n_subtotals > 1)
1660 msg_at (SE, cats_location,
1661 ngettext ("These categories include %zu instance "
1662 "of SUBTOTAL or HSUBTOTAL, so references "
1663 "from computed categories must refer to "
1664 "subtotals by position.",
1665 "These categories include %zu instances "
1666 "of SUBTOTAL or HSUBTOTAL, so references "
1667 "from computed categories must refer to "
1668 "subtotals by position.",
1671 msg_at (SN, e->location,
1672 _("This is the reference that lacks a position."));
1677 msg_at (SE, pc_cat->location,
1678 _("Computed category &%s references a category not included "
1679 "in the category list."),
1681 msg_at (SN, e->location, _("This is the missing category."));
1682 msg_at (SN, cats_location,
1683 _("To fix the problem, add the missing category to the "
1684 "list of categories here."));
1687 if (pc_cat->pc->hide_source_cats)
1701 for (size_t i = 0; i < 2; i++)
1702 if (e->subs[i] && !ctables_recursive_check_postcompute (
1703 e->subs[i], pc_cat, cats, cats_location))
1713 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1714 struct ctables *ct, struct ctables_table *t)
1716 if (!lex_match_id (lexer, "VARIABLES"))
1718 lex_match (lexer, T_EQUALS);
1720 struct variable **vars;
1722 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1725 struct ctables_categories *c = xmalloc (sizeof *c);
1726 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1727 for (size_t i = 0; i < n_vars; i++)
1729 struct ctables_categories **cp
1730 = &t->categories[var_get_dict_index (vars[i])];
1731 ctables_categories_unref (*cp);
1736 size_t allocated_cats = 0;
1737 if (lex_match (lexer, T_LBRACK))
1739 int cats_start_ofs = lex_ofs (lexer);
1742 if (c->n_cats >= allocated_cats)
1743 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1745 int start_ofs = lex_ofs (lexer);
1746 struct ctables_category *cat = &c->cats[c->n_cats];
1747 if (!ctables_table_parse_explicit_category (lexer, ct, cat))
1749 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1752 lex_match (lexer, T_COMMA);
1754 while (!lex_match (lexer, T_RBRACK));
1756 struct msg_location *cats_location
1757 = lex_ofs_location (lexer, cats_start_ofs, lex_ofs (lexer) - 1);
1758 for (size_t i = 0; i < c->n_cats; i++)
1760 struct ctables_category *cat = &c->cats[i];
1761 if (cat->type == CCT_POSTCOMPUTE
1762 && !ctables_recursive_check_postcompute (cat->pc->expr, cat,
1768 struct ctables_category cat = {
1770 .include_missing = false,
1771 .sort_ascending = true,
1773 bool show_totals = false;
1774 char *total_label = NULL;
1775 bool totals_before = false;
1776 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1778 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1780 lex_match (lexer, T_EQUALS);
1781 if (lex_match_id (lexer, "A"))
1782 cat.sort_ascending = true;
1783 else if (lex_match_id (lexer, "D"))
1784 cat.sort_ascending = false;
1787 lex_error_expecting (lexer, "A", "D");
1791 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1793 lex_match (lexer, T_EQUALS);
1794 if (lex_match_id (lexer, "VALUE"))
1795 cat.type = CCT_VALUE;
1796 else if (lex_match_id (lexer, "LABEL"))
1797 cat.type = CCT_LABEL;
1800 cat.type = CCT_FUNCTION;
1801 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1804 if (lex_match (lexer, T_LPAREN))
1806 cat.sort_var = parse_variable (lexer, dict);
1810 if (cat.sort_function == CTSF_PTILE)
1812 lex_match (lexer, T_COMMA);
1813 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1815 cat.percentile = lex_number (lexer);
1819 if (!lex_force_match (lexer, T_RPAREN))
1822 else if (ctables_function_availability (cat.sort_function)
1825 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1830 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1832 lex_match (lexer, T_EQUALS);
1833 if (lex_match_id (lexer, "INCLUDE"))
1834 cat.include_missing = true;
1835 else if (lex_match_id (lexer, "EXCLUDE"))
1836 cat.include_missing = false;
1839 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1843 else if (lex_match_id (lexer, "TOTAL"))
1845 lex_match (lexer, T_EQUALS);
1846 if (!parse_bool (lexer, &show_totals))
1849 else if (lex_match_id (lexer, "LABEL"))
1851 lex_match (lexer, T_EQUALS);
1852 if (!lex_force_string (lexer))
1855 total_label = ss_xstrdup (lex_tokss (lexer));
1858 else if (lex_match_id (lexer, "POSITION"))
1860 lex_match (lexer, T_EQUALS);
1861 if (lex_match_id (lexer, "BEFORE"))
1862 totals_before = true;
1863 else if (lex_match_id (lexer, "AFTER"))
1864 totals_before = false;
1867 lex_error_expecting (lexer, "BEFORE", "AFTER");
1871 else if (lex_match_id (lexer, "EMPTY"))
1873 lex_match (lexer, T_EQUALS);
1874 if (lex_match_id (lexer, "INCLUDE"))
1875 c->show_empty = true;
1876 else if (lex_match_id (lexer, "EXCLUDE"))
1877 c->show_empty = false;
1880 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1887 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
1888 "TOTAL", "LABEL", "POSITION", "EMPTY");
1890 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
1897 if (c->n_cats >= allocated_cats)
1898 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1899 c->cats[c->n_cats++] = cat;
1904 if (c->n_cats >= allocated_cats)
1905 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1907 struct ctables_category *totals;
1910 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
1911 totals = &c->cats[0];
1914 totals = &c->cats[c->n_cats];
1917 *totals = (struct ctables_category) {
1919 .total_label = total_label ? total_label : xstrdup (_("Total")),
1923 struct ctables_category *subtotal = NULL;
1924 for (size_t i = totals_before ? 0 : c->n_cats;
1925 totals_before ? i < c->n_cats : i-- > 0;
1926 totals_before ? i++ : 0)
1928 struct ctables_category *cat = &c->cats[i];
1936 cat->subtotal = subtotal;
1939 case CCT_POSTCOMPUTE:
1950 case CCT_EXCLUDED_MISSING:
1959 ctables_nest_uninit (struct ctables_nest *nest)
1966 ctables_stack_uninit (struct ctables_stack *stack)
1970 for (size_t i = 0; i < stack->n; i++)
1971 ctables_nest_uninit (&stack->nests[i]);
1972 free (stack->nests);
1976 static struct ctables_stack
1977 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
1984 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
1985 for (size_t i = 0; i < s0.n; i++)
1986 for (size_t j = 0; j < s1.n; j++)
1988 const struct ctables_nest *a = &s0.nests[i];
1989 const struct ctables_nest *b = &s1.nests[j];
1991 size_t allocate = a->n + b->n;
1992 struct variable **vars = xnmalloc (allocate, sizeof *vars);
1993 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
1995 for (size_t k = 0; k < a->n; k++)
1996 vars[n++] = a->vars[k];
1997 for (size_t k = 0; k < b->n; k++)
1998 vars[n++] = b->vars[k];
1999 assert (n == allocate);
2001 const struct ctables_nest *summary_src;
2002 if (!a->specs[CSV_CELL].var)
2004 else if (!b->specs[CSV_CELL].var)
2009 struct ctables_nest *new = &stack.nests[stack.n++];
2010 *new = (struct ctables_nest) {
2012 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2013 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2017 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2018 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2020 ctables_stack_uninit (&s0);
2021 ctables_stack_uninit (&s1);
2025 static struct ctables_stack
2026 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2028 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2029 for (size_t i = 0; i < s0.n; i++)
2030 stack.nests[stack.n++] = s0.nests[i];
2031 for (size_t i = 0; i < s1.n; i++)
2032 stack.nests[stack.n++] = s1.nests[i];
2033 assert (stack.n == s0.n + s1.n);
2039 static struct ctables_stack
2040 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2043 return (struct ctables_stack) { .n = 0 };
2048 assert (!a->var.is_mrset);
2050 struct variable **vars = xmalloc (sizeof *vars);
2053 struct ctables_nest *nest = xmalloc (sizeof *nest);
2054 *nest = (struct ctables_nest) {
2057 .scale_idx = a->scale ? 0 : SIZE_MAX,
2059 if (a->specs[CSV_CELL].n || a->scale)
2060 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2062 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2063 nest->specs[sv].var = a->var.var;
2064 nest->specs[sv].is_scale = a->scale;
2066 return (struct ctables_stack) { .nests = nest, .n = 1 };
2069 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2070 enumerate_fts (axis_type, a->subs[1]));
2073 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2074 enumerate_fts (axis_type, a->subs[1]));
2080 union ctables_summary
2082 /* COUNT, VALIDN, TOTALN. */
2085 /* MINIMUM, MAXIMUM, RANGE. */
2092 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2093 struct moments1 *moments;
2095 /* MEDIAN, MODE, PTILE. */
2098 struct casewriter *writer;
2103 /* XXX multiple response */
2107 ctables_summary_init (union ctables_summary *s,
2108 const struct ctables_summary_spec *ss)
2110 switch (ss->function)
2114 case CTSF_ROWPCT_COUNT:
2115 case CTSF_COLPCT_COUNT:
2116 case CTSF_TABLEPCT_COUNT:
2117 case CTSF_SUBTABLEPCT_COUNT:
2118 case CTSF_LAYERPCT_COUNT:
2119 case CTSF_LAYERROWPCT_COUNT:
2120 case CTSF_LAYERCOLPCT_COUNT:
2121 case CTSF_ROWPCT_VALIDN:
2122 case CTSF_COLPCT_VALIDN:
2123 case CTSF_TABLEPCT_VALIDN:
2124 case CTSF_SUBTABLEPCT_VALIDN:
2125 case CTSF_LAYERPCT_VALIDN:
2126 case CTSF_LAYERROWPCT_VALIDN:
2127 case CTSF_LAYERCOLPCT_VALIDN:
2128 case CTSF_ROWPCT_TOTALN:
2129 case CTSF_COLPCT_TOTALN:
2130 case CTSF_TABLEPCT_TOTALN:
2131 case CTSF_SUBTABLEPCT_TOTALN:
2132 case CTSF_LAYERPCT_TOTALN:
2133 case CTSF_LAYERROWPCT_TOTALN:
2134 case CTSF_LAYERCOLPCT_TOTALN:
2146 s->min = s->max = SYSMIS;
2154 case CTSF_ROWPCT_SUM:
2155 case CTSF_COLPCT_SUM:
2156 case CTSF_TABLEPCT_SUM:
2157 case CTSF_SUBTABLEPCT_SUM:
2158 case CTSF_LAYERPCT_SUM:
2159 case CTSF_LAYERROWPCT_SUM:
2160 case CTSF_LAYERCOLPCT_SUM:
2161 s->moments = moments1_create (MOMENT_VARIANCE);
2168 struct caseproto *proto = caseproto_create ();
2169 proto = caseproto_add_width (proto, 0);
2170 proto = caseproto_add_width (proto, 0);
2172 struct subcase ordering;
2173 subcase_init (&ordering, 0, 0, SC_ASCEND);
2174 s->writer = sort_create_writer (&ordering, proto);
2175 subcase_uninit (&ordering);
2176 caseproto_unref (proto);
2186 ctables_summary_uninit (union ctables_summary *s,
2187 const struct ctables_summary_spec *ss)
2189 switch (ss->function)
2193 case CTSF_ROWPCT_COUNT:
2194 case CTSF_COLPCT_COUNT:
2195 case CTSF_TABLEPCT_COUNT:
2196 case CTSF_SUBTABLEPCT_COUNT:
2197 case CTSF_LAYERPCT_COUNT:
2198 case CTSF_LAYERROWPCT_COUNT:
2199 case CTSF_LAYERCOLPCT_COUNT:
2200 case CTSF_ROWPCT_VALIDN:
2201 case CTSF_COLPCT_VALIDN:
2202 case CTSF_TABLEPCT_VALIDN:
2203 case CTSF_SUBTABLEPCT_VALIDN:
2204 case CTSF_LAYERPCT_VALIDN:
2205 case CTSF_LAYERROWPCT_VALIDN:
2206 case CTSF_LAYERCOLPCT_VALIDN:
2207 case CTSF_ROWPCT_TOTALN:
2208 case CTSF_COLPCT_TOTALN:
2209 case CTSF_TABLEPCT_TOTALN:
2210 case CTSF_SUBTABLEPCT_TOTALN:
2211 case CTSF_LAYERPCT_TOTALN:
2212 case CTSF_LAYERROWPCT_TOTALN:
2213 case CTSF_LAYERCOLPCT_TOTALN:
2231 case CTSF_ROWPCT_SUM:
2232 case CTSF_COLPCT_SUM:
2233 case CTSF_TABLEPCT_SUM:
2234 case CTSF_SUBTABLEPCT_SUM:
2235 case CTSF_LAYERPCT_SUM:
2236 case CTSF_LAYERROWPCT_SUM:
2237 case CTSF_LAYERCOLPCT_SUM:
2238 moments1_destroy (s->moments);
2244 casewriter_destroy (s->writer);
2250 ctables_summary_add (union ctables_summary *s,
2251 const struct ctables_summary_spec *ss,
2252 const struct variable *var, const union value *value,
2253 bool is_scale, bool is_missing, bool excluded_missing,
2254 double d_weight, double e_weight)
2256 /* To determine whether a case is included in a given table for a particular
2257 kind of summary, consider the following charts for each variable in the
2258 table. Only if "yes" appears for every variable for the summary is the
2261 Categorical variables: VALIDN COUNT TOTALN
2262 Valid values in included categories yes yes yes
2263 Missing values in included categories --- yes yes
2264 Missing values in excluded categories --- --- yes
2265 Valid values in excluded categories --- --- ---
2267 Scale variables: VALIDN COUNT TOTALN
2268 Valid value yes yes yes
2269 Missing value --- yes yes
2271 Missing values include both user- and system-missing. (The system-missing
2272 value is always in an excluded category.)
2274 switch (ss->function)
2277 case CTSF_ROWPCT_TOTALN:
2278 case CTSF_COLPCT_TOTALN:
2279 case CTSF_TABLEPCT_TOTALN:
2280 case CTSF_SUBTABLEPCT_TOTALN:
2281 case CTSF_LAYERPCT_TOTALN:
2282 case CTSF_LAYERROWPCT_TOTALN:
2283 case CTSF_LAYERCOLPCT_TOTALN:
2284 s->count += d_weight;
2288 case CTSF_ROWPCT_COUNT:
2289 case CTSF_COLPCT_COUNT:
2290 case CTSF_TABLEPCT_COUNT:
2291 case CTSF_SUBTABLEPCT_COUNT:
2292 case CTSF_LAYERPCT_COUNT:
2293 case CTSF_LAYERROWPCT_COUNT:
2294 case CTSF_LAYERCOLPCT_COUNT:
2295 if (is_scale || !excluded_missing)
2296 s->count += d_weight;
2300 case CTSF_ROWPCT_VALIDN:
2301 case CTSF_COLPCT_VALIDN:
2302 case CTSF_TABLEPCT_VALIDN:
2303 case CTSF_SUBTABLEPCT_VALIDN:
2304 case CTSF_LAYERPCT_VALIDN:
2305 case CTSF_LAYERROWPCT_VALIDN:
2306 case CTSF_LAYERCOLPCT_VALIDN:
2308 ? !var_is_value_missing (var, value)
2310 s->count += d_weight;
2315 s->count += d_weight;
2319 if (is_scale || !excluded_missing)
2320 s->count += e_weight;
2325 ? !var_is_value_missing (var, value)
2327 s->count += e_weight;
2331 s->count += e_weight;
2337 if (!var_is_value_missing (var, value))
2339 assert (!var_is_alpha (var)); /* XXX? */
2340 if (s->min == SYSMIS || value->f < s->min)
2342 if (s->max == SYSMIS || value->f > s->max)
2352 case CTSF_ROWPCT_SUM:
2353 case CTSF_COLPCT_SUM:
2354 case CTSF_TABLEPCT_SUM:
2355 case CTSF_SUBTABLEPCT_SUM:
2356 case CTSF_LAYERPCT_SUM:
2357 case CTSF_LAYERROWPCT_SUM:
2358 case CTSF_LAYERCOLPCT_SUM:
2359 if (!var_is_value_missing (var, value))
2360 moments1_add (s->moments, value->f, e_weight);
2366 if (var_is_value_missing (var, value))
2368 s->ovalid += e_weight;
2370 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2371 *case_num_rw_idx (c, 0) = value->f;
2372 *case_num_rw_idx (c, 1) = e_weight;
2373 casewriter_write (s->writer, c);
2379 static enum ctables_domain_type
2380 ctables_function_domain (enum ctables_summary_function function)
2404 case CTSF_COLPCT_COUNT:
2405 case CTSF_COLPCT_SUM:
2406 case CTSF_COLPCT_TOTALN:
2407 case CTSF_COLPCT_VALIDN:
2410 case CTSF_LAYERCOLPCT_COUNT:
2411 case CTSF_LAYERCOLPCT_SUM:
2412 case CTSF_LAYERCOLPCT_TOTALN:
2413 case CTSF_LAYERCOLPCT_VALIDN:
2414 return CTDT_LAYERCOL;
2416 case CTSF_LAYERPCT_COUNT:
2417 case CTSF_LAYERPCT_SUM:
2418 case CTSF_LAYERPCT_TOTALN:
2419 case CTSF_LAYERPCT_VALIDN:
2422 case CTSF_LAYERROWPCT_COUNT:
2423 case CTSF_LAYERROWPCT_SUM:
2424 case CTSF_LAYERROWPCT_TOTALN:
2425 case CTSF_LAYERROWPCT_VALIDN:
2426 return CTDT_LAYERROW;
2428 case CTSF_ROWPCT_COUNT:
2429 case CTSF_ROWPCT_SUM:
2430 case CTSF_ROWPCT_TOTALN:
2431 case CTSF_ROWPCT_VALIDN:
2434 case CTSF_SUBTABLEPCT_COUNT:
2435 case CTSF_SUBTABLEPCT_SUM:
2436 case CTSF_SUBTABLEPCT_TOTALN:
2437 case CTSF_SUBTABLEPCT_VALIDN:
2438 return CTDT_SUBTABLE;
2440 case CTSF_TABLEPCT_COUNT:
2441 case CTSF_TABLEPCT_SUM:
2442 case CTSF_TABLEPCT_TOTALN:
2443 case CTSF_TABLEPCT_VALIDN:
2451 ctables_summary_value (const struct ctables_cell *cell,
2452 union ctables_summary *s,
2453 const struct ctables_summary_spec *ss)
2455 switch (ss->function)
2461 case CTSF_ROWPCT_COUNT:
2462 case CTSF_COLPCT_COUNT:
2463 case CTSF_TABLEPCT_COUNT:
2464 case CTSF_SUBTABLEPCT_COUNT:
2465 case CTSF_LAYERPCT_COUNT:
2466 case CTSF_LAYERROWPCT_COUNT:
2467 case CTSF_LAYERCOLPCT_COUNT:
2469 enum ctables_domain_type d = ctables_function_domain (ss->function);
2470 return (cell->domains[d]->e_count
2471 ? s->count / cell->domains[d]->e_count * 100
2475 case CTSF_ROWPCT_VALIDN:
2476 case CTSF_COLPCT_VALIDN:
2477 case CTSF_TABLEPCT_VALIDN:
2478 case CTSF_SUBTABLEPCT_VALIDN:
2479 case CTSF_LAYERPCT_VALIDN:
2480 case CTSF_LAYERROWPCT_VALIDN:
2481 case CTSF_LAYERCOLPCT_VALIDN:
2483 enum ctables_domain_type d = ctables_function_domain (ss->function);
2484 return (cell->domains[d]->e_valid
2485 ? s->count / cell->domains[d]->e_valid * 100
2489 case CTSF_ROWPCT_TOTALN:
2490 case CTSF_COLPCT_TOTALN:
2491 case CTSF_TABLEPCT_TOTALN:
2492 case CTSF_SUBTABLEPCT_TOTALN:
2493 case CTSF_LAYERPCT_TOTALN:
2494 case CTSF_LAYERROWPCT_TOTALN:
2495 case CTSF_LAYERCOLPCT_TOTALN:
2497 enum ctables_domain_type d = ctables_function_domain (ss->function);
2498 return (cell->domains[d]->e_total
2499 ? s->count / cell->domains[d]->e_total * 100
2523 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2528 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2534 double weight, variance;
2535 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2536 return calc_semean (variance, weight);
2542 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2543 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2548 double weight, mean;
2549 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2550 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2556 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2560 case CTSF_ROWPCT_SUM:
2561 case CTSF_COLPCT_SUM:
2562 case CTSF_TABLEPCT_SUM:
2563 case CTSF_SUBTABLEPCT_SUM:
2564 case CTSF_LAYERPCT_SUM:
2565 case CTSF_LAYERROWPCT_SUM:
2566 case CTSF_LAYERCOLPCT_SUM:
2573 struct casereader *reader = casewriter_make_reader (s->writer);
2576 struct percentile *ptile = percentile_create (
2577 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2578 struct order_stats *os = &ptile->parent;
2579 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2580 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2581 statistic_destroy (&ptile->parent.parent);
2588 struct casereader *reader = casewriter_make_reader (s->writer);
2591 struct mode *mode = mode_create ();
2592 struct order_stats *os = &mode->parent;
2593 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2594 s->ovalue = mode->mode;
2595 statistic_destroy (&mode->parent.parent);
2603 struct ctables_cell_sort_aux
2605 const struct ctables_nest *nest;
2606 enum pivot_axis_type a;
2610 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2612 const struct ctables_cell_sort_aux *aux = aux_;
2613 struct ctables_cell *const *ap = a_;
2614 struct ctables_cell *const *bp = b_;
2615 const struct ctables_cell *a = *ap;
2616 const struct ctables_cell *b = *bp;
2618 const struct ctables_nest *nest = aux->nest;
2619 for (size_t i = 0; i < nest->n; i++)
2620 if (i != nest->scale_idx)
2622 const struct variable *var = nest->vars[i];
2623 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2624 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2625 if (a_cv->category != b_cv->category)
2626 return a_cv->category > b_cv->category ? 1 : -1;
2628 const union value *a_val = &a_cv->value;
2629 const union value *b_val = &b_cv->value;
2630 switch (a_cv->category->type)
2636 case CCT_POSTCOMPUTE:
2637 case CCT_EXCLUDED_MISSING:
2638 /* Must be equal. */
2645 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2653 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2655 return a_cv->category->sort_ascending ? cmp : -cmp;
2661 const char *a_label = var_lookup_value_label (var, a_val);
2662 const char *b_label = var_lookup_value_label (var, b_val);
2664 ? (b_label ? strcmp (a_label, b_label) : 1)
2665 : (b_label ? -1 : value_compare_3way (
2666 a_val, b_val, var_get_width (var))));
2668 return a_cv->category->sort_ascending ? cmp : -cmp;
2682 For each ctables_table:
2683 For each combination of row vars:
2684 For each combination of column vars:
2685 For each combination of layer vars:
2687 Make a table of row values:
2688 Sort entries by row values
2689 Assign a 0-based index to each actual value
2690 Construct a dimension
2691 Make a table of column values
2692 Make a table of layer values
2694 Fill the table entry using the indexes from before.
2697 static struct ctables_domain *
2698 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
2699 enum ctables_domain_type domain)
2702 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2704 const struct ctables_nest *nest = s->nests[a];
2705 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2707 size_t v_idx = nest->domains[domain][i];
2708 hash = value_hash (&cell->axes[a].cvs[v_idx].value,
2709 var_get_width (nest->vars[v_idx]), hash);
2713 struct ctables_domain *d;
2714 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
2716 const struct ctables_cell *df = d->example;
2717 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2719 const struct ctables_nest *nest = s->nests[a];
2720 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2722 size_t v_idx = nest->domains[domain][i];
2723 if (!value_equal (&df->axes[a].cvs[v_idx].value,
2724 &cell->axes[a].cvs[v_idx].value,
2725 var_get_width (nest->vars[v_idx])))
2734 d = xmalloc (sizeof *d);
2735 *d = (struct ctables_domain) { .example = cell };
2736 hmap_insert (&s->domains[domain], &d->node, hash);
2740 static const struct ctables_category *
2741 ctables_categories_match (const struct ctables_categories *c,
2742 const union value *v, const struct variable *var)
2744 if (var_is_numeric (var) && v->f == SYSMIS)
2747 const struct ctables_category *othernm = NULL;
2748 for (size_t i = c->n_cats; i-- > 0; )
2750 const struct ctables_category *cat = &c->cats[i];
2754 if (cat->number == v->f)
2762 if ((cat->range[0] == -DBL_MAX || v->f >= cat->range[0])
2763 && (cat->range[1] == DBL_MAX || v->f <= cat->range[1]))
2768 if (var_is_value_missing (var, v))
2772 case CCT_POSTCOMPUTE:
2787 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2790 case CCT_EXCLUDED_MISSING:
2795 return var_is_value_missing (var, v) ? NULL : othernm;
2798 static const struct ctables_category *
2799 ctables_categories_total (const struct ctables_categories *c)
2801 const struct ctables_category *first = &c->cats[0];
2802 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2803 return (first->type == CCT_TOTAL ? first
2804 : last->type == CCT_TOTAL ? last
2808 static struct ctables_cell *
2809 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
2810 const struct ctables_category *cats[PIVOT_N_AXES][10])
2813 enum ctables_summary_variant sv = CSV_CELL;
2814 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2816 const struct ctables_nest *nest = s->nests[a];
2817 for (size_t i = 0; i < nest->n; i++)
2818 if (i != nest->scale_idx)
2820 hash = hash_pointer (cats[a][i], hash);
2821 if (cats[a][i]->type != CCT_TOTAL
2822 && cats[a][i]->type != CCT_SUBTOTAL
2823 && cats[a][i]->type != CCT_POSTCOMPUTE)
2824 hash = value_hash (case_data (c, nest->vars[i]),
2825 var_get_width (nest->vars[i]), hash);
2831 struct ctables_cell *cell;
2832 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
2834 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2836 const struct ctables_nest *nest = s->nests[a];
2837 for (size_t i = 0; i < nest->n; i++)
2838 if (i != nest->scale_idx
2839 && (cats[a][i] != cell->axes[a].cvs[i].category
2840 || (cats[a][i]->type != CCT_TOTAL
2841 && cats[a][i]->type != CCT_SUBTOTAL
2842 && cats[a][i]->type != CCT_POSTCOMPUTE
2843 && !value_equal (case_data (c, nest->vars[i]),
2844 &cell->axes[a].cvs[i].value,
2845 var_get_width (nest->vars[i])))))
2854 cell = xmalloc (sizeof *cell);
2857 cell->contributes_to_domains = true;
2858 cell->postcompute = false;
2859 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2861 const struct ctables_nest *nest = s->nests[a];
2862 cell->axes[a].cvs = (nest->n
2863 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
2865 for (size_t i = 0; i < nest->n; i++)
2867 const struct ctables_category *cat = cats[a][i];
2868 const struct variable *var = nest->vars[i];
2869 const union value *value = case_data (c, var);
2870 if (i != nest->scale_idx)
2872 const struct ctables_category *subtotal = cat->subtotal;
2873 if (cat->hide || (subtotal && subtotal->hide_subcategories))
2876 if (cat->type == CCT_TOTAL
2877 || cat->type == CCT_SUBTOTAL
2878 || cat->type == CCT_POSTCOMPUTE)
2879 cell->contributes_to_domains = false;
2880 if (cat->type == CCT_POSTCOMPUTE)
2881 cell->postcompute = true;
2884 cell->axes[a].cvs[i].category = cat;
2885 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
2889 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
2890 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
2891 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
2892 for (size_t i = 0; i < specs->n; i++)
2893 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
2894 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2895 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
2896 hmap_insert (&s->cells, &cell->node, hash);
2901 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
2902 const struct ctables_category *cats[PIVOT_N_AXES][10],
2903 bool is_missing, bool excluded_missing,
2904 double d_weight, double e_weight)
2906 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
2907 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
2909 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
2910 for (size_t i = 0; i < specs->n; i++)
2911 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
2912 specs->var, case_data (c, specs->var), specs->is_scale,
2913 is_missing, excluded_missing, d_weight, e_weight);
2914 if (cell->contributes_to_domains)
2916 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2918 struct ctables_domain *d = cell->domains[dt];
2919 d->d_total += d_weight;
2920 d->e_total += e_weight;
2921 if (!excluded_missing)
2923 d->d_count += d_weight;
2924 d->e_count += e_weight;
2928 d->d_valid += d_weight;
2929 d->e_valid += e_weight;
2936 recurse_totals (struct ctables_section *s, const struct ccase *c,
2937 const struct ctables_category *cats[PIVOT_N_AXES][10],
2938 bool is_missing, bool excluded_missing,
2939 double d_weight, double e_weight,
2940 enum pivot_axis_type start_axis, size_t start_nest)
2942 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
2944 const struct ctables_nest *nest = s->nests[a];
2945 for (size_t i = start_nest; i < nest->n; i++)
2947 if (i == nest->scale_idx)
2950 const struct variable *var = nest->vars[i];
2952 const struct ctables_category *total = ctables_categories_total (
2953 s->table->categories[var_get_dict_index (var)]);
2956 const struct ctables_category *save = cats[a][i];
2958 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
2959 d_weight, e_weight);
2960 recurse_totals (s, c, cats, is_missing, excluded_missing,
2961 d_weight, e_weight, a, i + 1);
2970 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
2971 const struct ctables_category *cats[PIVOT_N_AXES][10],
2972 bool is_missing, bool excluded_missing,
2973 double d_weight, double e_weight,
2974 enum pivot_axis_type start_axis, size_t start_nest)
2976 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
2978 const struct ctables_nest *nest = s->nests[a];
2979 for (size_t i = start_nest; i < nest->n; i++)
2981 if (i == nest->scale_idx)
2984 const struct ctables_category *save = cats[a][i];
2987 cats[a][i] = save->subtotal;
2988 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
2989 d_weight, e_weight);
2990 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
2991 d_weight, e_weight, a, i + 1);
3000 ctables_add_occurrence (const struct variable *var,
3001 const union value *value,
3002 struct hmap *occurrences)
3004 int width = var_get_width (var);
3005 unsigned int hash = value_hash (value, width, 0);
3007 struct ctables_occurrence *o;
3008 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3010 if (value_equal (value, &o->value, width))
3013 o = xmalloc (sizeof *o);
3014 value_clone (&o->value, value, width);
3015 hmap_insert (occurrences, &o->node, hash);
3019 ctables_cell_insert (struct ctables_section *s,
3020 const struct ccase *c,
3021 double d_weight, double e_weight)
3023 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3025 /* Does at least one categorical variable have a missing value in an included
3026 or excluded category? */
3027 bool is_missing = false;
3029 /* Does at least one categorical variable have a missing value in an excluded
3031 bool excluded_missing = false;
3033 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3035 const struct ctables_nest *nest = s->nests[a];
3036 for (size_t i = 0; i < nest->n; i++)
3038 if (i == nest->scale_idx)
3041 const struct variable *var = nest->vars[i];
3042 const union value *value = case_data (c, var);
3044 bool var_missing = var_is_value_missing (var, value) != 0;
3048 cats[a][i] = ctables_categories_match (
3049 s->table->categories[var_get_dict_index (var)], value, var);
3055 static const struct ctables_category cct_excluded_missing = {
3056 .type = CCT_EXCLUDED_MISSING,
3059 cats[a][i] = &cct_excluded_missing;
3060 excluded_missing = true;
3065 if (!excluded_missing)
3066 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3068 const struct ctables_nest *nest = s->nests[a];
3069 for (size_t i = 0; i < nest->n; i++)
3070 if (i != nest->scale_idx)
3072 const struct variable *var = nest->vars[i];
3073 const union value *value = case_data (c, var);
3074 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3078 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3079 d_weight, e_weight);
3081 //if (!excluded_missing)
3083 recurse_totals (s, c, cats, is_missing, excluded_missing,
3084 d_weight, e_weight, 0, 0);
3085 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3086 d_weight, e_weight, 0, 0);
3092 const struct ctables_summary_spec_set *set;
3097 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3099 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3100 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3101 if (as->function != bs->function)
3102 return as->function > bs->function ? 1 : -1;
3103 else if (as->percentile != bs->percentile)
3104 return as->percentile < bs->percentile ? 1 : -1;
3105 return strcmp (as->label, bs->label);
3108 static struct pivot_value *
3109 ctables_category_create_label (const struct ctables_category *cat,
3110 const struct variable *var,
3111 const union value *value)
3113 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3114 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3115 : cat->type == CCT_POSTCOMPUTE && cat->pc->label
3116 ? pivot_value_new_user_text (cat->pc->label, SIZE_MAX)
3117 : pivot_value_new_var_value (var, value));
3120 static struct ctables_value *
3121 ctables_value_find__ (struct ctables_table *t, const union value *value,
3122 int width, unsigned int hash)
3124 struct ctables_value *clv;
3125 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3126 hash, &t->clabels_values_map)
3127 if (value_equal (value, &clv->value, width))
3133 ctables_value_insert (struct ctables_table *t, const union value *value,
3136 unsigned int hash = value_hash (value, width, 0);
3137 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3140 clv = xmalloc (sizeof *clv);
3141 value_clone (&clv->value, value, width);
3142 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3146 static struct ctables_value *
3147 ctables_value_find (struct ctables_table *t,
3148 const union value *value, int width)
3150 return ctables_value_find__ (t, value, width,
3151 value_hash (value, width, 0));
3155 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3156 size_t ix[PIVOT_N_AXES])
3158 if (a < PIVOT_N_AXES)
3160 size_t limit = MAX (t->stacks[a].n, 1);
3161 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3162 ctables_table_add_section (t, a + 1, ix);
3166 struct ctables_section *s = &t->sections[t->n_sections++];
3167 *s = (struct ctables_section) {
3169 .cells = HMAP_INITIALIZER (s->cells),
3171 for (a = 0; a < PIVOT_N_AXES; a++)
3174 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3176 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3177 for (size_t i = 0; i < nest->n; i++)
3178 hmap_init (&s->occurrences[a][i]);
3180 for (size_t i = 0; i < N_CTDTS; i++)
3181 hmap_init (&s->domains[i]);
3186 ctpo_add (double a, double b)
3192 ctpo_sub (double a, double b)
3198 ctpo_mul (double a, double b)
3204 ctpo_div (double a, double b)
3206 return b ? a / b : SYSMIS;
3210 ctpo_pow (double a, double b)
3212 int save_errno = errno;
3214 double result = pow (a, b);
3222 ctpo_neg (double a, double b UNUSED)
3227 struct ctables_pcexpr_evaluate_ctx
3229 const struct ctables_cell *cell;
3230 const struct ctables_section *section;
3231 const struct ctables_categories *cats;
3232 enum pivot_axis_type pc_a;
3236 static double ctables_pcexpr_evaluate (
3237 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3240 ctables_pcexpr_evaluate_nonterminal (
3241 const struct ctables_pcexpr_evaluate_ctx *ctx,
3242 const struct ctables_pcexpr *e, size_t n_args,
3243 double evaluate (double, double))
3245 double args[2] = { 0, 0 };
3246 for (size_t i = 0; i < n_args; i++)
3248 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3249 if (!isfinite (args[i]) || args[i] == SYSMIS)
3252 return evaluate (args[0], args[1]);
3256 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3257 const struct ctables_cell_value *pc_cv)
3259 const struct ctables_section *s = ctx->section;
3262 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3264 const struct ctables_nest *nest = s->nests[a];
3265 for (size_t i = 0; i < nest->n; i++)
3266 if (i != nest->scale_idx)
3268 const struct ctables_cell_value *cv
3269 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3270 : &ctx->cell->axes[a].cvs[i]);
3271 hash = hash_pointer (cv->category, hash);
3272 if (cv->category->type != CCT_TOTAL
3273 && cv->category->type != CCT_SUBTOTAL
3274 && cv->category->type != CCT_POSTCOMPUTE)
3275 hash = value_hash (&cv->value,
3276 var_get_width (nest->vars[i]), hash);
3280 struct ctables_cell *tc;
3281 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3283 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3285 const struct ctables_nest *nest = s->nests[a];
3286 for (size_t i = 0; i < nest->n; i++)
3287 if (i != nest->scale_idx)
3289 const struct ctables_cell_value *p_cv
3290 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3291 : &ctx->cell->axes[a].cvs[i]);
3292 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3293 if (p_cv->category != t_cv->category
3294 || (p_cv->category->type != CCT_TOTAL
3295 && p_cv->category->type != CCT_SUBTOTAL
3296 && p_cv->category->type != CCT_POSTCOMPUTE
3297 && !value_equal (&p_cv->value,
3299 var_get_width (nest->vars[i]))))
3311 const struct ctables_table *t = s->table;
3312 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3313 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3314 size_t j = 0 /* XXX */;
3315 return ctables_summary_value (tc, &tc->summaries[j], &specs->specs[j]);
3319 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3320 const struct ctables_pcexpr *e)
3327 case CTPO_CAT_RANGE:
3329 struct ctables_cell_value cv = {
3330 .category = ctables_find_category_for_postcompute (ctx->cats, e)
3332 assert (cv.category != NULL);
3334 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3335 const struct ctables_occurrence *o;
3338 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3339 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3340 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3342 cv.value = o->value;
3343 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3348 case CTPO_CAT_NUMBER:
3349 case CTPO_CAT_STRING:
3350 case CTPO_CAT_MISSING:
3351 case CTPO_CAT_OTHERNM:
3352 case CTPO_CAT_SUBTOTAL:
3353 case CTPO_CAT_TOTAL:
3355 struct ctables_cell_value cv = {
3356 .category = ctables_find_category_for_postcompute (ctx->cats, e),
3357 .value = { .f = e->number },
3359 assert (cv.category != NULL);
3360 return ctables_pcexpr_evaluate_category (ctx, &cv);
3364 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3367 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3370 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3373 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3376 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3379 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3386 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3387 const struct ctables_cell *cell)
3389 enum pivot_axis_type pc_a;
3391 const struct ctables_postcompute *pc;
3392 for (pc_a = 0; ; pc_a++)
3394 assert (pc_a < PIVOT_N_AXES);
3395 for (pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3397 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3398 if (cv->category->type == CCT_POSTCOMPUTE)
3400 pc = cv->category->pc;
3407 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3408 const struct ctables_categories *cats = s->table->categories[
3409 var_get_dict_index (var)];
3410 struct ctables_pcexpr_evaluate_ctx ctx = {
3415 .pc_a_idx = pc_a_idx,
3417 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3421 ctables_table_output (struct ctables *ct, struct ctables_table *t)
3423 struct pivot_table *pt = pivot_table_create__ (
3425 ? pivot_value_new_user_text (t->title, SIZE_MAX)
3426 : pivot_value_new_text (N_("Custom Tables"))),
3429 pivot_table_set_caption (
3430 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
3432 pivot_table_set_caption (
3433 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
3435 bool summary_dimension = (t->summary_axis != t->slabels_axis
3436 || (!t->slabels_visible
3437 && t->summary_specs.n > 1));
3438 if (summary_dimension)
3440 struct pivot_dimension *d = pivot_dimension_create (
3441 pt, t->slabels_axis, N_("Statistics"));
3442 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3443 if (!t->slabels_visible)
3444 d->hide_all_labels = true;
3445 for (size_t i = 0; i < specs->n; i++)
3446 pivot_category_create_leaf (
3447 d->root, pivot_value_new_text (specs->specs[i].label));
3450 bool categories_dimension = t->clabels_example != NULL;
3451 if (categories_dimension)
3453 struct pivot_dimension *d = pivot_dimension_create (
3454 pt, t->label_axis[t->clabels_from_axis],
3455 t->clabels_from_axis == PIVOT_AXIS_ROW
3456 ? N_("Row Categories")
3457 : N_("Column Categories"));
3458 const struct variable *var = t->clabels_example;
3459 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
3460 for (size_t i = 0; i < t->n_clabels_values; i++)
3462 const struct ctables_value *value = t->clabels_values[i];
3463 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
3464 assert (cat != NULL);
3465 pivot_category_create_leaf (d->root, ctables_category_create_label (
3466 cat, t->clabels_example, &value->value));
3470 pivot_table_set_look (pt, ct->look);
3471 struct pivot_dimension *d[PIVOT_N_AXES];
3472 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3474 static const char *names[] = {
3475 [PIVOT_AXIS_ROW] = N_("Rows"),
3476 [PIVOT_AXIS_COLUMN] = N_("Columns"),
3477 [PIVOT_AXIS_LAYER] = N_("Layers"),
3479 d[a] = (t->axes[a] || a == t->summary_axis
3480 ? pivot_dimension_create (pt, a, names[a])
3485 assert (t->axes[a]);
3487 for (size_t i = 0; i < t->stacks[a].n; i++)
3489 struct ctables_nest *nest = &t->stacks[a].nests[i];
3490 struct ctables_section **sections = xnmalloc (t->n_sections,
3492 size_t n_sections = 0;
3494 size_t n_total_cells = 0;
3495 size_t max_depth = 0;
3496 for (size_t j = 0; j < t->n_sections; j++)
3497 if (t->sections[j].nests[a] == nest)
3499 struct ctables_section *s = &t->sections[j];
3500 sections[n_sections++] = s;
3501 n_total_cells += s->cells.count;
3503 size_t depth = s->nests[a]->n;
3504 max_depth = MAX (depth, max_depth);
3507 struct ctables_cell **sorted = xnmalloc (n_total_cells,
3509 size_t n_sorted = 0;
3511 for (size_t j = 0; j < n_sections; j++)
3513 struct ctables_section *s = sections[j];
3515 struct ctables_cell *cell;
3516 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3518 sorted[n_sorted++] = cell;
3519 assert (n_sorted <= n_total_cells);
3522 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
3523 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
3525 struct ctables_level
3527 enum ctables_level_type
3529 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
3530 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
3531 CTL_SUMMARY, /* Summary functions. */
3535 enum settings_value_show vlabel; /* CTL_VAR only. */
3538 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
3539 size_t n_levels = 0;
3540 for (size_t k = 0; k < nest->n; k++)
3542 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
3543 if (vlabel != CTVL_NONE)
3545 levels[n_levels++] = (struct ctables_level) {
3547 .vlabel = (enum settings_value_show) vlabel,
3552 if (nest->scale_idx != k
3553 && (k != nest->n - 1 || t->label_axis[a] == a))
3555 levels[n_levels++] = (struct ctables_level) {
3556 .type = CTL_CATEGORY,
3562 if (!summary_dimension && a == t->slabels_axis)
3564 levels[n_levels++] = (struct ctables_level) {
3565 .type = CTL_SUMMARY,
3566 .var_idx = SIZE_MAX,
3570 /* Pivot categories:
3572 - variable label for nest->vars[0], if vlabel != CTVL_NONE
3573 - category for nest->vars[0], if nest->scale_idx != 0
3574 - variable label for nest->vars[1], if vlabel != CTVL_NONE
3575 - category for nest->vars[1], if nest->scale_idx != 1
3577 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
3578 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
3579 - summary function, if 'a == t->slabels_axis && a ==
3582 Additional dimensions:
3584 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
3586 - If 't->label_axis[b] == a' for some 'b != a', add a category
3591 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
3593 for (size_t j = 0; j < n_sorted; j++)
3595 struct ctables_cell *cell = sorted[j];
3596 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
3598 size_t n_common = 0;
3601 for (; n_common < n_levels; n_common++)
3603 const struct ctables_level *level = &levels[n_common];
3604 if (level->type == CTL_CATEGORY)
3606 size_t var_idx = level->var_idx;
3607 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
3608 if (prev->axes[a].cvs[var_idx].category != c)
3610 else if (c->type != CCT_SUBTOTAL
3611 && c->type != CCT_TOTAL
3612 && c->type != CCT_POSTCOMPUTE
3613 && !value_equal (&prev->axes[a].cvs[var_idx].value,
3614 &cell->axes[a].cvs[var_idx].value,
3615 var_get_type (nest->vars[var_idx])))
3621 for (size_t k = n_common; k < n_levels; k++)
3623 const struct ctables_level *level = &levels[k];
3624 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
3625 if (level->type == CTL_SUMMARY)
3627 assert (k == n_levels - 1);
3629 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3630 for (size_t m = 0; m < specs->n; m++)
3632 int leaf = pivot_category_create_leaf (
3633 parent, pivot_value_new_text (specs->specs[m].label));
3640 const struct variable *var = nest->vars[level->var_idx];
3641 struct pivot_value *label;
3642 if (level->type == CTL_VAR)
3644 label = pivot_value_new_variable (var);
3645 label->variable.show = level->vlabel;
3647 else if (level->type == CTL_CATEGORY)
3649 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
3650 label = ctables_category_create_label (cv->category,
3656 if (k == n_levels - 1)
3657 prev_leaf = pivot_category_create_leaf (parent, label);
3659 groups[k] = pivot_category_create_group__ (parent, label);
3663 cell->axes[a].leaf = prev_leaf;
3670 for (size_t i = 0; i < t->n_sections; i++)
3672 struct ctables_section *s = &t->sections[i];
3674 struct ctables_cell *cell;
3675 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3680 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3681 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
3682 for (size_t j = 0; j < specs->n; j++)
3685 size_t n_dindexes = 0;
3687 if (summary_dimension)
3688 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
3690 if (categories_dimension)
3692 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
3693 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
3694 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
3695 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
3698 dindexes[n_dindexes++] = ctv->leaf;
3701 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3704 int leaf = cell->axes[a].leaf;
3705 if (a == t->summary_axis && !summary_dimension)
3707 dindexes[n_dindexes++] = leaf;
3710 const struct ctables_summary_spec *ss = &specs->specs[j];
3712 double d = (cell->postcompute
3713 ? ctables_cell_calculate_postcompute (s, cell)
3714 : ctables_summary_value (cell, &cell->summaries[j], ss));
3715 struct pivot_value *value;
3716 if (ct->hide_threshold != 0
3717 && d < ct->hide_threshold
3718 && (cell->postcompute
3720 : ctables_summary_function_is_count (ss->function)))
3722 value = pivot_value_new_user_text_nocopy (
3723 xasprintf ("<%d", ct->hide_threshold));
3725 else if (d == 0 && ct->zero)
3726 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
3727 else if (d == SYSMIS && ct->missing)
3728 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
3729 else if (specs->specs[j].is_ctables_format)
3731 char *s = data_out_stretchy (&(union value) { .f = d },
3733 &specs->specs[j].format,
3734 &ct->ctables_formats, NULL);
3735 value = pivot_value_new_user_text_nocopy (s);
3739 value = pivot_value_new_number (d);
3740 value->numeric.format = specs->specs[j].format;
3742 pivot_table_put (pt, dindexes, n_dindexes, value);
3747 pivot_table_submit (pt);
3751 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
3753 enum pivot_axis_type label_pos = t->label_axis[a];
3757 t->clabels_from_axis = a;
3759 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
3760 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
3762 const struct ctables_stack *stack = &t->stacks[a];
3766 const struct ctables_nest *n0 = &stack->nests[0];
3768 const struct variable *v0 = n0->vars[n0->n - 1];
3769 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
3770 t->clabels_example = v0;
3772 for (size_t i = 0; i < c0->n_cats; i++)
3773 if (c0->cats[i].type == CCT_FUNCTION)
3775 msg (SE, _("%s=%s is not allowed with sorting based "
3776 "on a summary function."),
3777 subcommand_name, pos_name);
3780 if (n0->n - 1 == n0->scale_idx)
3782 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
3783 "but %s is a scale variable."),
3784 subcommand_name, pos_name, var_get_name (v0));
3788 for (size_t i = 1; i < stack->n; i++)
3790 const struct ctables_nest *ni = &stack->nests[i];
3792 const struct variable *vi = ni->vars[ni->n - 1];
3793 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
3795 if (ni->n - 1 == ni->scale_idx)
3797 msg (SE, _("%s=%s requires the variables to be moved to be "
3798 "categorical, but %s is a scale variable."),
3799 subcommand_name, pos_name, var_get_name (vi));
3802 if (var_get_width (v0) != var_get_width (vi))
3804 msg (SE, _("%s=%s requires the variables to be "
3805 "moved to have the same width, but %s has "
3806 "width %d and %s has width %d."),
3807 subcommand_name, pos_name,
3808 var_get_name (v0), var_get_width (v0),
3809 var_get_name (vi), var_get_width (vi));
3812 if (!val_labs_equal (var_get_value_labels (v0),
3813 var_get_value_labels (vi)))
3815 msg (SE, _("%s=%s requires the variables to be "
3816 "moved to have the same value labels, but %s "
3817 "and %s have different value labels."),
3818 subcommand_name, pos_name,
3819 var_get_name (v0), var_get_name (vi));
3822 if (!ctables_categories_equal (c0, ci))
3824 msg (SE, _("%s=%s requires the variables to be "
3825 "moved to have the same category "
3826 "specifications, but %s and %s have different "
3827 "category specifications."),
3828 subcommand_name, pos_name,
3829 var_get_name (v0), var_get_name (vi));
3838 ctables_prepare_table (struct ctables_table *t)
3840 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3843 t->stacks[a] = enumerate_fts (a, t->axes[a]);
3845 for (size_t j = 0; j < t->stacks[a].n; j++)
3847 struct ctables_nest *nest = &t->stacks[a].nests[j];
3848 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3850 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
3851 nest->n_domains[dt] = 0;
3853 for (size_t k = 0; k < nest->n; k++)
3855 if (k == nest->scale_idx)
3864 if (a != PIVOT_AXIS_LAYER)
3871 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
3872 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
3873 : a == PIVOT_AXIS_ROW)
3875 if (k == nest->n - 1
3876 || (nest->scale_idx == nest->n - 1
3877 && k == nest->n - 2))
3883 if (a == PIVOT_AXIS_COLUMN)
3888 if (a == PIVOT_AXIS_ROW)
3893 nest->domains[dt][nest->n_domains[dt]++] = k;
3900 struct ctables_nest *nest = xmalloc (sizeof *nest);
3901 *nest = (struct ctables_nest) { .n = 0 };
3902 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
3905 struct ctables_stack *stack = &t->stacks[t->summary_axis];
3906 for (size_t i = 0; i < stack->n; i++)
3908 struct ctables_nest *nest = &stack->nests[i];
3909 if (!nest->specs[CSV_CELL].n)
3911 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
3912 specs->specs = xmalloc (sizeof *specs->specs);
3915 enum ctables_summary_function function
3916 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
3917 struct ctables_var var = { .is_mrset = false, .var = specs->var };
3919 *specs->specs = (struct ctables_summary_spec) {
3920 .function = function,
3921 .format = ctables_summary_default_format (function, &var),
3922 .label = ctables_summary_default_label (function, 0),
3925 specs->var = nest->vars[0];
3927 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
3928 &nest->specs[CSV_CELL]);
3930 else if (!nest->specs[CSV_TOTAL].n)
3931 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
3932 &nest->specs[CSV_CELL]);
3935 struct ctables_summary_spec_set *merged = &t->summary_specs;
3936 struct merge_item *items = xnmalloc (2 * stack->n, sizeof *items);
3938 for (size_t j = 0; j < stack->n; j++)
3940 const struct ctables_nest *nest = &stack->nests[j];
3942 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
3943 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
3948 struct merge_item min = items[0];
3949 for (size_t j = 1; j < n_left; j++)
3950 if (merge_item_compare_3way (&items[j], &min) < 0)
3953 if (merged->n >= merged->allocated)
3954 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
3955 sizeof *merged->specs);
3956 merged->specs[merged->n++] = min.set->specs[min.ofs];
3958 for (size_t j = 0; j < n_left; )
3960 if (merge_item_compare_3way (&items[j], &min) == 0)
3962 struct merge_item *item = &items[j];
3963 item->set->specs[item->ofs].axis_idx = merged->n - 1;
3964 if (++item->ofs >= item->set->n)
3966 items[j] = items[--n_left];
3975 for (size_t j = 0; j < merged->n; j++)
3976 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
3978 for (size_t j = 0; j < stack->n; j++)
3980 const struct ctables_nest *nest = &stack->nests[j];
3981 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
3983 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
3984 for (size_t k = 0; k < specs->n; k++)
3985 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
3986 specs->specs[k].axis_idx);
3992 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
3993 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
3997 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
3998 enum pivot_axis_type a)
4000 struct ctables_stack *stack = &t->stacks[a];
4001 for (size_t i = 0; i < stack->n; i++)
4003 const struct ctables_nest *nest = &stack->nests[i];
4004 const struct variable *var = nest->vars[nest->n - 1];
4005 const union value *value = case_data (c, var);
4007 if (var_is_numeric (var) && value->f == SYSMIS)
4010 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4012 ctables_value_insert (t, value, var_get_width (var));
4017 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4019 const struct ctables_value *const *ap = a_;
4020 const struct ctables_value *const *bp = b_;
4021 const struct ctables_value *a = *ap;
4022 const struct ctables_value *b = *bp;
4023 const int *width = width_;
4024 return value_compare_3way (&a->value, &b->value, *width);
4028 ctables_sort_clabels_values (struct ctables_table *t)
4030 const struct variable *v0 = t->clabels_example;
4031 int width = var_get_width (v0);
4033 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4036 const struct val_labs *val_labs = var_get_value_labels (v0);
4037 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4038 vl = val_labs_next (val_labs, vl))
4039 if (ctables_categories_match (c0, &vl->value, v0))
4040 ctables_value_insert (t, &vl->value, width);
4043 size_t n = hmap_count (&t->clabels_values_map);
4044 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4046 struct ctables_value *clv;
4048 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4049 t->clabels_values[i++] = clv;
4050 t->n_clabels_values = n;
4053 sort (t->clabels_values, n, sizeof *t->clabels_values,
4054 compare_clabels_values_3way, &width);
4056 for (size_t i = 0; i < n; i++)
4057 t->clabels_values[i]->leaf = i;
4061 ctables_add_category_occurrences (const struct variable *var,
4062 struct hmap *occurrences,
4063 const struct ctables_categories *cats)
4065 const struct val_labs *val_labs = var_get_value_labels (var);
4067 for (size_t i = 0; i < cats->n_cats; i++)
4069 const struct ctables_category *c = &cats->cats[i];
4073 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4081 assert (var_is_numeric (var));
4082 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4083 vl = val_labs_next (val_labs, vl))
4084 if (vl->value.f >= c->range[0] && vl->value.f <= c->range[1])
4085 ctables_add_occurrence (var, &vl->value, occurrences);
4089 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4090 vl = val_labs_next (val_labs, vl))
4091 if (var_is_value_missing (var, &vl->value))
4092 ctables_add_occurrence (var, &vl->value, occurrences);
4096 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4097 vl = val_labs_next (val_labs, vl))
4098 ctables_add_occurrence (var, &vl->value, occurrences);
4101 case CCT_POSTCOMPUTE:
4111 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4112 vl = val_labs_next (val_labs, vl))
4113 if (c->include_missing || !var_is_value_missing (var, &vl->value))
4114 ctables_add_occurrence (var, &vl->value, occurrences);
4117 case CCT_EXCLUDED_MISSING:
4124 ctables_section_recurse_add_empty_categories (
4125 struct ctables_section *s,
4126 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
4127 enum pivot_axis_type a, size_t a_idx)
4129 if (a >= PIVOT_N_AXES)
4130 ctables_cell_insert__ (s, c, cats);
4131 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
4132 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
4135 const struct variable *var = s->nests[a]->vars[a_idx];
4136 const struct ctables_categories *categories = s->table->categories[
4137 var_get_dict_index (var)];
4138 int width = var_get_width (var);
4139 const struct hmap *occurrences = &s->occurrences[a][a_idx];
4140 const struct ctables_occurrence *o;
4141 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4143 union value *value = case_data_rw (c, var);
4144 value_destroy (value, width);
4145 value_clone (value, &o->value, width);
4146 cats[a][a_idx] = ctables_categories_match (categories, value, var);
4147 assert (cats[a][a_idx] != NULL);
4148 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4151 for (size_t i = 0; i < categories->n_cats; i++)
4153 const struct ctables_category *cat = &categories->cats[i];
4154 if (cat->type == CCT_POSTCOMPUTE)
4156 cats[a][a_idx] = cat;
4157 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4164 ctables_section_add_empty_categories (struct ctables_section *s)
4166 bool show_empty = false;
4167 for (size_t a = 0; a < PIVOT_N_AXES; a++)
4169 for (size_t k = 0; k < s->nests[a]->n; k++)
4170 if (k != s->nests[a]->scale_idx)
4172 const struct variable *var = s->nests[a]->vars[k];
4173 const struct ctables_categories *cats = s->table->categories[
4174 var_get_dict_index (var)];
4175 if (cats->show_empty)
4178 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
4184 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
4185 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
4186 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
4191 ctables_execute (struct dataset *ds, struct ctables *ct)
4193 for (size_t i = 0; i < ct->n_tables; i++)
4195 struct ctables_table *t = ct->tables[i];
4196 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
4197 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
4198 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
4199 sizeof *t->sections);
4200 size_t ix[PIVOT_N_AXES];
4201 ctables_table_add_section (t, 0, ix);
4204 struct casereader *input = proc_open (ds);
4205 bool warn_on_invalid = true;
4206 for (struct ccase *c = casereader_read (input); c;
4207 case_unref (c), c = casereader_read (input))
4209 double d_weight = dict_get_case_weight (dataset_dict (ds), c,
4211 double e_weight = (ct->e_weight
4212 ? var_force_valid_weight (ct->e_weight,
4213 case_num (c, ct->e_weight),
4217 for (size_t i = 0; i < ct->n_tables; i++)
4219 struct ctables_table *t = ct->tables[i];
4221 for (size_t j = 0; j < t->n_sections; j++)
4222 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
4224 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4225 if (t->label_axis[a] != a)
4226 ctables_insert_clabels_values (t, c, a);
4229 casereader_destroy (input);
4231 for (size_t i = 0; i < ct->n_tables; i++)
4233 struct ctables_table *t = ct->tables[i];
4235 if (t->clabels_example)
4236 ctables_sort_clabels_values (t);
4238 for (size_t j = 0; j < t->n_sections; j++)
4239 ctables_section_add_empty_categories (&t->sections[j]);
4241 ctables_table_output (ct, ct->tables[i]);
4243 return proc_commit (ds);
4248 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *);
4251 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
4257 case CTPO_CAT_STRING:
4267 for (size_t i = 0; i < 2; i++)
4268 ctables_pcexpr_destroy (e->subs[i]);
4272 case CTPO_CAT_NUMBER:
4273 case CTPO_CAT_RANGE:
4274 case CTPO_CAT_MISSING:
4275 case CTPO_CAT_OTHERNM:
4276 case CTPO_CAT_SUBTOTAL:
4277 case CTPO_CAT_TOTAL:
4281 msg_location_destroy (e->location);
4286 static struct ctables_pcexpr *
4287 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
4288 struct ctables_pcexpr *sub0,
4289 struct ctables_pcexpr *sub1)
4291 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4292 *e = (struct ctables_pcexpr) {
4294 .subs = { sub0, sub1 },
4295 .location = msg_location_merged (sub0->location, sub1->location),
4300 /* How to parse an operator. */
4303 enum token_type token;
4304 enum ctables_postcompute_op op;
4307 static const struct operator *
4308 match_operator (struct lexer *lexer, const struct operator ops[], size_t n_ops)
4310 for (const struct operator *op = ops; op < ops + n_ops; op++)
4311 if (lex_token (lexer) == op->token)
4313 if (op->token != T_NEG_NUM)
4322 static struct ctables_pcexpr *
4323 parse_binary_operators__ (struct lexer *lexer,
4324 const struct operator ops[], size_t n_ops,
4325 parse_recursively_func *parse_next_level,
4326 const char *chain_warning,
4327 struct ctables_pcexpr *lhs)
4329 for (int op_count = 0; ; op_count++)
4331 const struct operator *op = match_operator (lexer, ops, n_ops);
4334 if (op_count > 1 && chain_warning)
4335 msg_at (SW, lhs->location, "%s", chain_warning);
4340 struct ctables_pcexpr *rhs = parse_next_level (lexer);
4343 ctables_pcexpr_destroy (lhs);
4347 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
4351 static struct ctables_pcexpr *
4352 parse_binary_operators (struct lexer *lexer,
4353 const struct operator ops[], size_t n_ops,
4354 parse_recursively_func *parse_next_level,
4355 const char *chain_warning)
4357 struct ctables_pcexpr *lhs = parse_next_level (lexer);
4361 return parse_binary_operators__ (lexer, ops, n_ops, parse_next_level,
4362 chain_warning, lhs);
4365 static struct ctables_pcexpr *parse_add (struct lexer *);
4367 static struct ctables_pcexpr
4368 ctpo_cat_range (double low, double high)
4370 return (struct ctables_pcexpr) {
4371 .op = CTPO_CAT_RANGE,
4372 .range = { low, high },
4376 static struct ctables_pcexpr *
4377 parse_primary (struct lexer *lexer)
4379 int start_ofs = lex_ofs (lexer);
4380 struct ctables_pcexpr e;
4381 if (lex_is_number (lexer))
4383 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
4384 .number = lex_number (lexer) };
4387 else if (lex_match_id (lexer, "MISSING"))
4388 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
4389 else if (lex_match_id (lexer, "OTHERNM"))
4390 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
4391 else if (lex_match_id (lexer, "TOTAL"))
4392 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
4393 else if (lex_match_id (lexer, "SUBTOTAL"))
4395 size_t subtotal_index = 0;
4396 if (lex_match (lexer, T_LBRACK))
4398 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
4400 subtotal_index = lex_integer (lexer);
4402 if (!lex_force_match (lexer, T_RBRACK))
4405 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
4406 .subtotal_index = subtotal_index };
4408 else if (lex_match (lexer, T_LBRACK))
4410 if (lex_match_id (lexer, "LO"))
4412 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
4414 e = ctpo_cat_range (-DBL_MAX, lex_number (lexer));
4417 else if (lex_is_number (lexer))
4419 double number = lex_number (lexer);
4421 if (lex_match_id (lexer, "THRU"))
4423 if (lex_match_id (lexer, "HI"))
4424 e = ctpo_cat_range (number, DBL_MAX);
4427 if (!lex_force_num (lexer))
4429 e = ctpo_cat_range (number, lex_number (lexer));
4434 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
4437 else if (lex_is_string (lexer))
4439 e = (struct ctables_pcexpr) {
4440 .op = CTPO_CAT_STRING,
4441 .string = ss_xstrdup (lex_tokss (lexer)),
4447 lex_error (lexer, NULL);
4451 if (!lex_force_match (lexer, T_RBRACK))
4453 if (e.op == CTPO_CAT_STRING)
4458 else if (lex_match (lexer, T_LPAREN))
4460 struct ctables_pcexpr *ep = parse_add (lexer);
4463 if (!lex_force_match (lexer, T_RPAREN))
4465 ctables_pcexpr_destroy (ep);
4472 lex_error (lexer, NULL);
4476 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4477 return xmemdup (&e, sizeof e);
4480 static struct ctables_pcexpr *
4481 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
4482 struct lexer *lexer, int start_ofs)
4484 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4485 *e = (struct ctables_pcexpr) {
4488 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
4493 static struct ctables_pcexpr *
4494 parse_exp (struct lexer *lexer)
4496 static const struct operator op = { T_EXP, CTPO_POW };
4498 const char *chain_warning =
4499 _("The exponentiation operator (`**') is left-associative: "
4500 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
4501 "To disable this warning, insert parentheses.");
4503 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
4504 return parse_binary_operators (lexer, &op, 1,
4505 parse_primary, chain_warning);
4507 /* Special case for situations like "-5**6", which must be parsed as
4510 int start_ofs = lex_ofs (lexer);
4511 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
4512 *lhs = (struct ctables_pcexpr) {
4513 .op = CTPO_CONSTANT,
4514 .number = -lex_tokval (lexer),
4515 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
4519 struct ctables_pcexpr *node = parse_binary_operators__ (
4520 lexer, &op, 1, parse_primary, chain_warning, lhs);
4524 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
4527 /* Parses the unary minus level. */
4528 static struct ctables_pcexpr *
4529 parse_neg (struct lexer *lexer)
4531 int start_ofs = lex_ofs (lexer);
4532 if (!lex_match (lexer, T_DASH))
4533 return parse_exp (lexer);
4535 struct ctables_pcexpr *inner = parse_neg (lexer);
4539 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
4542 /* Parses the multiplication and division level. */
4543 static struct ctables_pcexpr *
4544 parse_mul (struct lexer *lexer)
4546 static const struct operator ops[] =
4548 { T_ASTERISK, CTPO_MUL },
4549 { T_SLASH, CTPO_DIV },
4552 return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops,
4556 /* Parses the addition and subtraction level. */
4557 static struct ctables_pcexpr *
4558 parse_add (struct lexer *lexer)
4560 static const struct operator ops[] =
4562 { T_PLUS, CTPO_ADD },
4563 { T_DASH, CTPO_SUB },
4564 { T_NEG_NUM, CTPO_ADD },
4567 return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops,
4571 static struct ctables_postcompute *
4572 ctables_find_postcompute (struct ctables *ct, const char *name)
4574 struct ctables_postcompute *pc;
4575 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
4576 utf8_hash_case_string (name, 0), &ct->postcomputes)
4577 if (!utf8_strcasecmp (pc->name, name))
4583 ctables_parse_pcompute (struct lexer *lexer, struct ctables *ct)
4585 int pcompute_start = lex_ofs (lexer) - 1;
4587 if (!lex_force_match (lexer, T_AND) || !lex_force_id (lexer))
4590 char *name = ss_xstrdup (lex_tokss (lexer));
4593 if (!lex_force_match (lexer, T_EQUALS)
4594 || !lex_force_match_id (lexer, "EXPR")
4595 || !lex_force_match (lexer, T_LPAREN))
4601 int expr_start = lex_ofs (lexer);
4602 struct ctables_pcexpr *expr = parse_add (lexer);
4603 int expr_end = lex_ofs (lexer) - 1;
4604 if (!expr || !lex_force_match (lexer, T_RPAREN))
4609 int pcompute_end = lex_ofs (lexer) - 1;
4611 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
4614 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
4617 msg_at (SW, location, _("New definition of &%s will override the "
4618 "previous definition."),
4620 msg_at (SN, pc->location, _("This is the previous definition."));
4622 ctables_pcexpr_destroy (pc->expr);
4623 msg_location_destroy (pc->location);
4628 pc = xmalloc (sizeof *pc);
4629 *pc = (struct ctables_postcompute) { .name = name };
4630 hmap_insert (&ct->postcomputes, &pc->hmap_node,
4631 utf8_hash_case_string (pc->name, 0));
4634 pc->location = location;
4636 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
4641 ctables_parse_pproperties_format (struct lexer *lexer,
4642 struct ctables_summary_spec_set *sss)
4644 *sss = (struct ctables_summary_spec_set) { .n = 0 };
4646 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
4647 && !(lex_token (lexer) == T_ID
4648 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
4649 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
4650 lex_tokss (lexer)))))
4652 /* Parse function. */
4653 enum ctables_summary_function function;
4654 if (!parse_ctables_summary_function (lexer, &function))
4657 /* Parse percentile. */
4658 double percentile = 0;
4659 if (function == CTSF_PTILE)
4661 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
4663 percentile = lex_number (lexer);
4668 struct fmt_spec format;
4669 if (!parse_format_specifier (lexer, &format)
4670 || !fmt_check_output (&format)
4671 || !fmt_check_type_compat (&format, VAL_NUMERIC))
4674 if (sss->n >= sss->allocated)
4675 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
4676 sizeof *sss->specs);
4677 sss->specs[sss->n++] = (struct ctables_summary_spec) {
4678 .function = function,
4679 .percentile = percentile,
4686 ctables_summary_spec_set_uninit (sss);
4691 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
4693 struct ctables_postcompute **pcs = NULL;
4695 size_t allocated_pcs = 0;
4697 while (lex_match (lexer, T_AND))
4699 if (!lex_force_id (lexer))
4701 struct ctables_postcompute *pc
4702 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
4705 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
4710 if (n_pcs >= allocated_pcs)
4711 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
4715 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
4717 if (lex_match_id (lexer, "LABEL"))
4719 lex_match (lexer, T_EQUALS);
4720 if (!lex_force_string (lexer))
4723 for (size_t i = 0; i < n_pcs; i++)
4725 free (pcs[i]->label);
4726 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
4731 else if (lex_match_id (lexer, "FORMAT"))
4733 lex_match (lexer, T_EQUALS);
4735 struct ctables_summary_spec_set sss;
4736 if (!ctables_parse_pproperties_format (lexer, &sss))
4739 for (size_t i = 0; i < n_pcs; i++)
4742 ctables_summary_spec_set_uninit (pcs[i]->specs);
4744 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
4745 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
4747 ctables_summary_spec_set_uninit (&sss);
4749 else if (lex_match_id (lexer, "HIDESOURCECATS"))
4751 lex_match (lexer, T_EQUALS);
4752 bool hide_source_cats;
4753 if (!parse_bool (lexer, &hide_source_cats))
4755 for (size_t i = 0; i < n_pcs; i++)
4756 pcs[i]->hide_source_cats = hide_source_cats;
4760 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
4773 cmd_ctables (struct lexer *lexer, struct dataset *ds)
4775 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
4776 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
4777 enum settings_value_show tvars = settings_get_show_variables ();
4778 for (size_t i = 0; i < n_vars; i++)
4779 vlabels[i] = (enum ctables_vlabel) tvars;
4781 struct pivot_table_look *look = pivot_table_look_unshare (
4782 pivot_table_look_ref (pivot_table_look_get_default ()));
4783 look->omit_empty = false;
4785 struct ctables *ct = xmalloc (sizeof *ct);
4786 *ct = (struct ctables) {
4787 .dict = dataset_dict (ds),
4789 .ctables_formats = FMT_SETTINGS_INIT,
4791 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
4797 const char *dot_string;
4798 const char *comma_string;
4800 static const struct ctf ctfs[4] = {
4801 { CTEF_NEGPAREN, "(,,,)", "(...)" },
4802 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
4803 { CTEF_PAREN, "-,(,),", "-.(.)." },
4804 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
4806 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
4807 for (size_t i = 0; i < 4; i++)
4809 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
4810 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
4811 fmt_number_style_from_string (s));
4814 if (!lex_force_match (lexer, T_SLASH))
4817 while (!lex_match_id (lexer, "TABLE"))
4819 if (lex_match_id (lexer, "FORMAT"))
4821 double widths[2] = { SYSMIS, SYSMIS };
4822 double units_per_inch = 72.0;
4824 while (lex_token (lexer) != T_SLASH)
4826 if (lex_match_id (lexer, "MINCOLWIDTH"))
4828 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
4831 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
4833 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
4836 else if (lex_match_id (lexer, "UNITS"))
4838 lex_match (lexer, T_EQUALS);
4839 if (lex_match_id (lexer, "POINTS"))
4840 units_per_inch = 72.0;
4841 else if (lex_match_id (lexer, "INCHES"))
4842 units_per_inch = 1.0;
4843 else if (lex_match_id (lexer, "CM"))
4844 units_per_inch = 2.54;
4847 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
4851 else if (lex_match_id (lexer, "EMPTY"))
4856 lex_match (lexer, T_EQUALS);
4857 if (lex_match_id (lexer, "ZERO"))
4859 /* Nothing to do. */
4861 else if (lex_match_id (lexer, "BLANK"))
4862 ct->zero = xstrdup ("");
4863 else if (lex_force_string (lexer))
4865 ct->zero = ss_xstrdup (lex_tokss (lexer));
4871 else if (lex_match_id (lexer, "MISSING"))
4873 lex_match (lexer, T_EQUALS);
4874 if (!lex_force_string (lexer))
4878 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
4879 ? ss_xstrdup (lex_tokss (lexer))
4885 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
4886 "UNITS", "EMPTY", "MISSING");
4891 if (widths[0] != SYSMIS && widths[1] != SYSMIS
4892 && widths[0] > widths[1])
4894 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
4898 for (size_t i = 0; i < 2; i++)
4899 if (widths[i] != SYSMIS)
4901 int *wr = ct->look->width_ranges[TABLE_HORZ];
4902 wr[i] = widths[i] / units_per_inch * 96.0;
4907 else if (lex_match_id (lexer, "VLABELS"))
4909 if (!lex_force_match_id (lexer, "VARIABLES"))
4911 lex_match (lexer, T_EQUALS);
4913 struct variable **vars;
4915 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
4919 if (!lex_force_match_id (lexer, "DISPLAY"))
4924 lex_match (lexer, T_EQUALS);
4926 enum ctables_vlabel vlabel;
4927 if (lex_match_id (lexer, "DEFAULT"))
4928 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
4929 else if (lex_match_id (lexer, "NAME"))
4931 else if (lex_match_id (lexer, "LABEL"))
4932 vlabel = CTVL_LABEL;
4933 else if (lex_match_id (lexer, "BOTH"))
4935 else if (lex_match_id (lexer, "NONE"))
4939 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
4945 for (size_t i = 0; i < n_vars; i++)
4946 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
4949 else if (lex_match_id (lexer, "MRSETS"))
4951 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
4953 lex_match (lexer, T_EQUALS);
4954 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
4957 else if (lex_match_id (lexer, "SMISSING"))
4959 if (lex_match_id (lexer, "VARIABLE"))
4960 ct->smissing_listwise = false;
4961 else if (lex_match_id (lexer, "LISTWISE"))
4962 ct->smissing_listwise = true;
4965 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
4969 else if (lex_match_id (lexer, "PCOMPUTE"))
4971 if (!ctables_parse_pcompute (lexer, ct))
4974 else if (lex_match_id (lexer, "PPROPERTIES"))
4976 if (!ctables_parse_pproperties (lexer, ct))
4979 else if (lex_match_id (lexer, "WEIGHT"))
4981 if (!lex_force_match_id (lexer, "VARIABLE"))
4983 lex_match (lexer, T_EQUALS);
4984 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
4988 else if (lex_match_id (lexer, " HIDESMALLCOUNTS"))
4990 if (lex_match_id (lexer, "COUNT"))
4992 lex_match (lexer, T_EQUALS);
4993 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
4996 ct->hide_threshold = lex_integer (lexer);
4999 else if (ct->hide_threshold == 0)
5000 ct->hide_threshold = 5;
5004 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
5005 "SMISSING", "PCOMPUTE", "PPROPERTIES",
5006 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
5010 if (!lex_force_match (lexer, T_SLASH))
5014 size_t allocated_tables = 0;
5017 if (ct->n_tables >= allocated_tables)
5018 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
5019 sizeof *ct->tables);
5021 struct ctables_category *cat = xmalloc (sizeof *cat);
5022 *cat = (struct ctables_category) {
5024 .include_missing = false,
5025 .sort_ascending = true,
5028 struct ctables_categories *c = xmalloc (sizeof *c);
5029 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5030 *c = (struct ctables_categories) {
5037 struct ctables_categories **categories = xnmalloc (n_vars,
5038 sizeof *categories);
5039 for (size_t i = 0; i < n_vars; i++)
5042 struct ctables_table *t = xmalloc (sizeof *t);
5043 *t = (struct ctables_table) {
5045 .slabels_axis = PIVOT_AXIS_COLUMN,
5046 .slabels_visible = true,
5047 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
5049 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
5050 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
5051 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
5053 .clabels_from_axis = PIVOT_AXIS_LAYER,
5054 .categories = categories,
5055 .n_categories = n_vars,
5058 ct->tables[ct->n_tables++] = t;
5060 lex_match (lexer, T_EQUALS);
5061 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
5063 if (lex_match (lexer, T_BY))
5065 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5066 ct, t, PIVOT_AXIS_COLUMN))
5069 if (lex_match (lexer, T_BY))
5071 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5072 ct, t, PIVOT_AXIS_LAYER))
5077 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
5078 && !t->axes[PIVOT_AXIS_LAYER])
5080 lex_error (lexer, _("At least one variable must be specified."));
5084 const struct ctables_axis *scales[PIVOT_N_AXES];
5085 size_t n_scales = 0;
5086 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5088 scales[a] = find_scale (t->axes[a]);
5094 msg (SE, _("Scale variables may appear only on one axis."));
5095 if (scales[PIVOT_AXIS_ROW])
5096 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
5097 _("This scale variable appears on the rows axis."));
5098 if (scales[PIVOT_AXIS_COLUMN])
5099 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
5100 _("This scale variable appears on the columns axis."));
5101 if (scales[PIVOT_AXIS_LAYER])
5102 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
5103 _("This scale variable appears on the layer axis."));
5107 const struct ctables_axis *summaries[PIVOT_N_AXES];
5108 size_t n_summaries = 0;
5109 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5111 summaries[a] = (scales[a]
5113 : find_categorical_summary_spec (t->axes[a]));
5117 if (n_summaries > 1)
5119 msg (SE, _("Summaries may appear only on one axis."));
5120 if (summaries[PIVOT_AXIS_ROW])
5121 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
5122 _("This variable on the rows axis has a summary."));
5123 if (summaries[PIVOT_AXIS_COLUMN])
5124 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
5125 _("This variable on the columns axis has a summary."));
5126 if (summaries[PIVOT_AXIS_LAYER])
5127 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
5128 _("This variable on the layers axis has a summary."));
5131 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5132 if (n_summaries ? summaries[a] : t->axes[a])
5134 t->summary_axis = a;
5138 if (lex_token (lexer) == T_ENDCMD)
5140 if (!ctables_prepare_table (t))
5144 if (!lex_force_match (lexer, T_SLASH))
5147 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
5149 if (lex_match_id (lexer, "SLABELS"))
5151 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5153 if (lex_match_id (lexer, "POSITION"))
5155 lex_match (lexer, T_EQUALS);
5156 if (lex_match_id (lexer, "COLUMN"))
5157 t->slabels_axis = PIVOT_AXIS_COLUMN;
5158 else if (lex_match_id (lexer, "ROW"))
5159 t->slabels_axis = PIVOT_AXIS_ROW;
5160 else if (lex_match_id (lexer, "LAYER"))
5161 t->slabels_axis = PIVOT_AXIS_LAYER;
5164 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
5168 else if (lex_match_id (lexer, "VISIBLE"))
5170 lex_match (lexer, T_EQUALS);
5171 if (!parse_bool (lexer, &t->slabels_visible))
5176 lex_error_expecting (lexer, "POSITION", "VISIBLE");
5181 else if (lex_match_id (lexer, "CLABELS"))
5183 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5185 if (lex_match_id (lexer, "AUTO"))
5187 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
5188 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
5190 else if (lex_match_id (lexer, "ROWLABELS"))
5192 lex_match (lexer, T_EQUALS);
5193 if (lex_match_id (lexer, "OPPOSITE"))
5194 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
5195 else if (lex_match_id (lexer, "LAYER"))
5196 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
5199 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5203 else if (lex_match_id (lexer, "COLLABELS"))
5205 lex_match (lexer, T_EQUALS);
5206 if (lex_match_id (lexer, "OPPOSITE"))
5207 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
5208 else if (lex_match_id (lexer, "LAYER"))
5209 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
5212 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5218 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
5224 else if (lex_match_id (lexer, "CRITERIA"))
5226 if (!lex_force_match_id (lexer, "CILEVEL"))
5228 lex_match (lexer, T_EQUALS);
5230 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
5232 t->cilevel = lex_number (lexer);
5235 else if (lex_match_id (lexer, "CATEGORIES"))
5237 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
5241 else if (lex_match_id (lexer, "TITLES"))
5246 if (lex_match_id (lexer, "CAPTION"))
5247 textp = &t->caption;
5248 else if (lex_match_id (lexer, "CORNER"))
5250 else if (lex_match_id (lexer, "TITLE"))
5254 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
5257 lex_match (lexer, T_EQUALS);
5259 struct string s = DS_EMPTY_INITIALIZER;
5260 while (lex_is_string (lexer))
5262 if (!ds_is_empty (&s))
5263 ds_put_byte (&s, ' ');
5264 ds_put_substring (&s, lex_tokss (lexer));
5268 *textp = ds_steal_cstr (&s);
5270 while (lex_token (lexer) != T_SLASH
5271 && lex_token (lexer) != T_ENDCMD);
5273 else if (lex_match_id (lexer, "SIGTEST"))
5277 t->chisq = xmalloc (sizeof *t->chisq);
5278 *t->chisq = (struct ctables_chisq) {
5280 .include_mrsets = true,
5281 .all_visible = true,
5287 if (lex_match_id (lexer, "TYPE"))
5289 lex_match (lexer, T_EQUALS);
5290 if (!lex_force_match_id (lexer, "CHISQUARE"))
5293 else if (lex_match_id (lexer, "ALPHA"))
5295 lex_match (lexer, T_EQUALS);
5296 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
5298 t->chisq->alpha = lex_number (lexer);
5301 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5303 lex_match (lexer, T_EQUALS);
5304 if (parse_bool (lexer, &t->chisq->include_mrsets))
5307 else if (lex_match_id (lexer, "CATEGORIES"))
5309 lex_match (lexer, T_EQUALS);
5310 if (lex_match_id (lexer, "ALLVISIBLE"))
5311 t->chisq->all_visible = true;
5312 else if (lex_match_id (lexer, "SUBTOTALS"))
5313 t->chisq->all_visible = false;
5316 lex_error_expecting (lexer,
5317 "ALLVISIBLE", "SUBTOTALS");
5323 lex_error_expecting (lexer, "TYPE", "ALPHA",
5324 "INCLUDEMRSETS", "CATEGORIES");
5328 while (lex_token (lexer) != T_SLASH
5329 && lex_token (lexer) != T_ENDCMD);
5331 else if (lex_match_id (lexer, "COMPARETEST"))
5335 t->pairwise = xmalloc (sizeof *t->pairwise);
5336 *t->pairwise = (struct ctables_pairwise) {
5338 .alpha = { .05, .05 },
5339 .adjust = BONFERRONI,
5340 .include_mrsets = true,
5341 .meansvariance_allcats = true,
5342 .all_visible = true,
5351 if (lex_match_id (lexer, "TYPE"))
5353 lex_match (lexer, T_EQUALS);
5354 if (lex_match_id (lexer, "PROP"))
5355 t->pairwise->type = PROP;
5356 else if (lex_match_id (lexer, "MEAN"))
5357 t->pairwise->type = MEAN;
5360 lex_error_expecting (lexer, "PROP", "MEAN");
5364 else if (lex_match_id (lexer, "ALPHA"))
5366 lex_match (lexer, T_EQUALS);
5368 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5370 double a0 = lex_number (lexer);
5373 lex_match (lexer, T_COMMA);
5374 if (lex_is_number (lexer))
5376 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5378 double a1 = lex_number (lexer);
5381 t->pairwise->alpha[0] = MIN (a0, a1);
5382 t->pairwise->alpha[1] = MAX (a0, a1);
5385 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
5387 else if (lex_match_id (lexer, "ADJUST"))
5389 lex_match (lexer, T_EQUALS);
5390 if (lex_match_id (lexer, "BONFERRONI"))
5391 t->pairwise->adjust = BONFERRONI;
5392 else if (lex_match_id (lexer, "BH"))
5393 t->pairwise->adjust = BH;
5394 else if (lex_match_id (lexer, "NONE"))
5395 t->pairwise->adjust = 0;
5398 lex_error_expecting (lexer, "BONFERRONI", "BH",
5403 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5405 lex_match (lexer, T_EQUALS);
5406 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
5409 else if (lex_match_id (lexer, "MEANSVARIANCE"))
5411 lex_match (lexer, T_EQUALS);
5412 if (lex_match_id (lexer, "ALLCATS"))
5413 t->pairwise->meansvariance_allcats = true;
5414 else if (lex_match_id (lexer, "TESTEDCATS"))
5415 t->pairwise->meansvariance_allcats = false;
5418 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
5422 else if (lex_match_id (lexer, "CATEGORIES"))
5424 lex_match (lexer, T_EQUALS);
5425 if (lex_match_id (lexer, "ALLVISIBLE"))
5426 t->pairwise->all_visible = true;
5427 else if (lex_match_id (lexer, "SUBTOTALS"))
5428 t->pairwise->all_visible = false;
5431 lex_error_expecting (lexer, "ALLVISIBLE",
5436 else if (lex_match_id (lexer, "MERGE"))
5438 lex_match (lexer, T_EQUALS);
5439 if (!parse_bool (lexer, &t->pairwise->merge))
5442 else if (lex_match_id (lexer, "STYLE"))
5444 lex_match (lexer, T_EQUALS);
5445 if (lex_match_id (lexer, "APA"))
5446 t->pairwise->apa_style = true;
5447 else if (lex_match_id (lexer, "SIMPLE"))
5448 t->pairwise->apa_style = false;
5451 lex_error_expecting (lexer, "APA", "SIMPLE");
5455 else if (lex_match_id (lexer, "SHOWSIG"))
5457 lex_match (lexer, T_EQUALS);
5458 if (!parse_bool (lexer, &t->pairwise->show_sig))
5463 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
5464 "INCLUDEMRSETS", "MEANSVARIANCE",
5465 "CATEGORIES", "MERGE", "STYLE",
5470 while (lex_token (lexer) != T_SLASH
5471 && lex_token (lexer) != T_ENDCMD);
5475 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
5476 "CRITERIA", "CATEGORIES", "TITLES",
5477 "SIGTEST", "COMPARETEST");
5481 if (!lex_match (lexer, T_SLASH))
5485 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
5486 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
5488 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
5492 if (!ctables_prepare_table (t))
5495 while (lex_token (lexer) != T_ENDCMD);
5497 bool ok = ctables_execute (ds, ct);
5498 ctables_destroy (ct);
5499 return ok ? CMD_SUCCESS : CMD_FAILURE;
5502 ctables_destroy (ct);