1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casereader.h"
23 #include "data/casewriter.h"
24 #include "data/data-out.h"
25 #include "data/dataset.h"
26 #include "data/dictionary.h"
27 #include "data/mrset.h"
28 #include "data/subcase.h"
29 #include "data/value-labels.h"
30 #include "language/command.h"
31 #include "language/lexer/format-parser.h"
32 #include "language/lexer/lexer.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/array.h"
35 #include "libpspp/assertion.h"
36 #include "libpspp/hash-functions.h"
37 #include "libpspp/hmap.h"
38 #include "libpspp/i18n.h"
39 #include "libpspp/message.h"
40 #include "libpspp/string-array.h"
41 #include "math/mode.h"
42 #include "math/moments.h"
43 #include "math/percentiles.h"
44 #include "math/sort.h"
45 #include "output/pivot-table.h"
47 #include "gl/minmax.h"
48 #include "gl/xalloc.h"
51 #define _(msgid) gettext (msgid)
52 #define N_(msgid) (msgid)
56 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
57 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
58 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
59 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
63 - unweighted summaries (U*)
64 - lower confidence limits (*.LCL)
65 - upper confidence limits (*.UCL)
66 - standard error (*.SE)
69 /* All variables. */ \
70 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
71 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
72 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
73 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
74 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
75 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
76 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
94 /* Scale variables, totals, and subtotals. */ \
95 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
96 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
97 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
98 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
99 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
100 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
101 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
102 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
103 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
104 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
105 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
106 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
107 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
108 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
109 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
110 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
111 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
112 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
113 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
114 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
115 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
116 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
117 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
119 #if 0 /* Multiple response sets not yet implemented. */
120 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
121 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
122 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
123 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
124 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
125 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
126 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
127 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
128 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
129 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
130 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
131 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
132 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
133 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
134 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
135 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
136 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
137 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
138 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
139 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
140 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
141 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
144 enum ctables_summary_function
146 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
152 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
153 N_CTSF_FUNCTIONS = SUMMARIES
157 static bool ctables_summary_function_is_count (enum ctables_summary_function);
159 enum ctables_domain_type
161 /* Within a section, where stacked variables divide one section from
163 CTDT_TABLE, /* All layers of a whole section. */
164 CTDT_LAYER, /* One layer within a section. */
165 CTDT_LAYERROW, /* Row in one layer within a section. */
166 CTDT_LAYERCOL, /* Column in one layer within a section. */
168 /* Within a subtable, where a subtable pairs an innermost row variable with
169 an innermost column variable within a single layer. */
170 CTDT_SUBTABLE, /* Whole subtable. */
171 CTDT_ROW, /* Row within a subtable. */
172 CTDT_COL, /* Column within a subtable. */
176 struct ctables_domain
178 struct hmap_node node;
180 const struct ctables_cell *example;
182 double d_valid; /* Dictionary weight. */
185 double e_valid; /* Effective weight */
190 enum ctables_summary_variant
199 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
200 all the axes (except the scalar variable, if any). */
201 struct hmap_node node;
203 /* The domains that contain this cell. */
204 bool contributes_to_domains;
205 struct ctables_domain *domains[N_CTDTS];
209 /* Is at least one value missing, whether included or excluded? */
212 /* Is at least one value missing and excluded? */
213 bool excluded_missing;
216 enum ctables_summary_variant sv;
218 struct ctables_cell_axis
220 struct ctables_cell_value
222 const struct ctables_category *category;
230 union ctables_summary *summaries;
235 const struct dictionary *dict;
236 struct pivot_table_look *look;
238 /* CTABLES has a number of extra formats that we implement via custom
239 currency specifications on an alternate fmt_settings. */
240 #define CTEF_NEGPAREN FMT_CCA
241 #define CTEF_NEQUAL FMT_CCB
242 #define CTEF_PAREN FMT_CCC
243 #define CTEF_PCTPAREN FMT_CCD
244 struct fmt_settings ctables_formats;
246 /* If this is NULL, zeros are displayed using the normal print format.
247 Otherwise, this string is displayed. */
250 /* If this is NULL, missing values are displayed using the normal print
251 format. Otherwise, this string is displayed. */
254 /* Indexed by variable dictionary index. */
255 enum ctables_vlabel *vlabels;
257 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
259 bool mrsets_count_duplicates; /* MRSETS. */
260 bool smissing_listwise; /* SMISSING. */
261 struct variable *e_weight; /* WEIGHT. */
262 int hide_threshold; /* HIDESMALLCOUNTS. */
264 struct ctables_table **tables;
268 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
271 struct ctables_postcompute
273 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
274 char *name; /* Name, without leading &. */
276 struct msg_location *location; /* Location of definition. */
277 struct ctables_pcexpr *expr;
279 struct ctables_summary_spec_set *specs;
280 bool hide_source_cats;
283 struct ctables_pcexpr
293 enum ctables_postcompute_op
296 CTPO_CONSTANT, /* 5 */
297 CTPO_CAT_NUMBER, /* [5] */
298 CTPO_CAT_STRING, /* ["STRING"] */
299 CTPO_CAT_RANGE, /* [LO THRU 5] */
300 CTPO_CAT_MISSING, /* MISSING */
301 CTPO_CAT_OTHERNM, /* OTHERNM */
302 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
303 CTPO_CAT_TOTAL, /* TOTAL */
317 /* CTPO_CAT_NUMBER. */
320 /* CTPO_CAT_STRING. */
323 /* CTPO_CAT_RANGE. */
326 /* CTPO_CAT_SUBTOTAL. */
327 size_t subtotal_index;
329 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
330 One element: CTPO_NEG. */
331 struct ctables_pcexpr *subs[2];
334 /* Source location. */
335 struct msg_location *location;
338 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
339 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
340 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
341 struct ctables_pcexpr *sub1);
343 struct ctables_summary_spec_set
345 struct ctables_summary_spec *specs;
349 /* The variable to which the summary specs are applied. */
350 struct variable *var;
352 /* Whether the variable to which the summary specs are applied is a scale
353 variable for the purpose of summarization.
355 (VALIDN and TOTALN act differently for summarizing scale and categorical
360 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
361 const struct ctables_summary_spec_set *);
362 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
364 /* A nested sequence of variables, e.g. a > b > c. */
367 struct variable **vars;
370 size_t *domains[N_CTDTS];
371 size_t n_domains[N_CTDTS];
373 struct ctables_summary_spec_set specs[N_CSVS];
376 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
379 struct ctables_nest *nests;
385 struct hmap_node node;
390 struct ctables_occurrence
392 struct hmap_node node;
396 struct ctables_section
398 struct ctables_table *table;
399 struct ctables_nest *nests[PIVOT_N_AXES];
400 struct hmap *occurrences[PIVOT_N_AXES];
401 struct hmap cells; /* Contains "struct ctable_cell"s. */
402 struct hmap domains[N_CTDTS]; /* Contains "struct ctable_domain"s. */
407 struct ctables *ctables;
408 struct ctables_axis *axes[PIVOT_N_AXES];
409 struct ctables_stack stacks[PIVOT_N_AXES];
410 struct ctables_section *sections;
412 enum pivot_axis_type summary_axis;
413 struct ctables_summary_spec_set summary_specs;
415 const struct variable *clabels_example;
416 struct hmap clabels_values_map;
417 struct ctables_value **clabels_values;
418 size_t n_clabels_values;
420 enum pivot_axis_type slabels_axis;
421 bool slabels_visible;
423 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
425 Most commonly, label_axis[a] == a, and in particular we always have
426 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
428 If ROWLABELS or COLLABELS is specified, then one of
429 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
430 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
432 enum pivot_axis_type label_axis[PIVOT_N_AXES];
433 enum pivot_axis_type clabels_from_axis;
435 /* Indexed by variable dictionary index. */
436 struct ctables_categories **categories;
445 struct ctables_chisq *chisq;
446 struct ctables_pairwise *pairwise;
454 struct variable *var;
455 const struct mrset *mrset;
459 static const struct fmt_spec *
460 ctables_var_get_print_format (const struct ctables_var *var)
462 return (var->is_mrset
463 ? var_get_print_format (var->mrset->vars[0])
464 : var_get_print_format (var->var));
468 ctables_var_name (const struct ctables_var *var)
470 return var->is_mrset ? var->mrset->name : var_get_name (var->var);
473 struct ctables_categories
476 struct ctables_category *cats;
481 struct ctables_category
483 enum ctables_category_type
485 /* Explicit category lists. */
493 /* Totals and subtotals. */
497 /* Implicit category lists. */
502 /* For contributing to TOTALN. */
503 CCT_EXCLUDED_MISSING,
507 struct ctables_category *subtotal;
513 double number; /* CCT_NUMBER. */
514 char *string; /* CCT_STRING. */
515 double range[2]; /* CCT_RANGE. */
519 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
520 bool hide_subcategories; /* CCT_SUBTOTAL. */
523 const struct ctables_postcompute *pc; /* CCT_POSTCOMPUTE. */
525 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
528 bool include_missing;
532 enum ctables_summary_function sort_function;
533 struct variable *sort_var;
538 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
539 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
540 struct msg_location *location;
544 ctables_category_uninit (struct ctables_category *cat)
555 case CCT_POSTCOMPUTE:
564 free (cat->total_label);
572 case CCT_EXCLUDED_MISSING:
578 ctables_category_equal (const struct ctables_category *a,
579 const struct ctables_category *b)
581 if (a->type != b->type)
587 return a->number == b->number;
590 return strcmp (a->string, b->string);
593 return a->range[0] == b->range[0] && a->range[1] == b->range[1];
599 case CCT_POSTCOMPUTE:
600 return a->pc == b->pc;
604 return !strcmp (a->total_label, b->total_label);
609 return (a->include_missing == b->include_missing
610 && a->sort_ascending == b->sort_ascending
611 && a->sort_function == b->sort_function
612 && a->sort_var == b->sort_var
613 && a->percentile == b->percentile);
615 case CCT_EXCLUDED_MISSING:
623 ctables_categories_unref (struct ctables_categories *c)
628 assert (c->n_refs > 0);
632 for (size_t i = 0; i < c->n_cats; i++)
633 ctables_category_uninit (&c->cats[i]);
639 ctables_categories_equal (const struct ctables_categories *a,
640 const struct ctables_categories *b)
642 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
645 for (size_t i = 0; i < a->n_cats; i++)
646 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
652 /* Chi-square test (SIGTEST). */
660 /* Pairwise comparison test (COMPARETEST). */
661 struct ctables_pairwise
663 enum { PROP, MEAN } type;
666 bool meansvariance_allcats;
668 enum { BONFERRONI = 1, BH } adjust;
692 struct ctables_var var;
694 struct ctables_summary_spec_set specs[N_CSVS];
698 struct ctables_axis *subs[2];
701 struct msg_location *loc;
704 static void ctables_axis_destroy (struct ctables_axis *);
713 enum ctables_function_availability
715 CTFA_ALL, /* Any variables. */
716 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
717 CTFA_MRSETS, /* Only multiple-response sets */
720 struct ctables_summary_spec
722 enum ctables_summary_function function;
723 double percentile; /* CTSF_PTILE only. */
726 struct fmt_spec format;
727 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
733 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
734 const struct ctables_summary_spec *src)
737 dst->label = xstrdup (src->label);
741 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
748 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
749 const struct ctables_summary_spec_set *src)
751 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
752 for (size_t i = 0; i < src->n; i++)
753 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
755 *dst = (struct ctables_summary_spec_set) {
760 .is_scale = src->is_scale,
765 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
767 for (size_t i = 0; i < set->n; i++)
768 ctables_summary_spec_uninit (&set->specs[i]);
773 parse_col_width (struct lexer *lexer, const char *name, double *width)
775 lex_match (lexer, T_EQUALS);
776 if (lex_match_id (lexer, "DEFAULT"))
778 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
780 *width = lex_number (lexer);
790 parse_bool (struct lexer *lexer, bool *b)
792 if (lex_match_id (lexer, "NO"))
794 else if (lex_match_id (lexer, "YES"))
798 lex_error_expecting (lexer, "YES", "NO");
804 static enum ctables_function_availability
805 ctables_function_availability (enum ctables_summary_function f)
807 static enum ctables_function_availability availability[] = {
808 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
813 return availability[f];
817 ctables_summary_function_is_count (enum ctables_summary_function f)
823 case CTSF_ROWPCT_COUNT:
824 case CTSF_COLPCT_COUNT:
825 case CTSF_TABLEPCT_COUNT:
826 case CTSF_SUBTABLEPCT_COUNT:
827 case CTSF_LAYERPCT_COUNT:
828 case CTSF_LAYERROWPCT_COUNT:
829 case CTSF_LAYERCOLPCT_COUNT:
832 case CTSF_ROWPCT_VALIDN:
833 case CTSF_COLPCT_VALIDN:
834 case CTSF_TABLEPCT_VALIDN:
835 case CTSF_SUBTABLEPCT_VALIDN:
836 case CTSF_LAYERPCT_VALIDN:
837 case CTSF_LAYERROWPCT_VALIDN:
838 case CTSF_LAYERCOLPCT_VALIDN:
839 case CTSF_ROWPCT_TOTALN:
840 case CTSF_COLPCT_TOTALN:
841 case CTSF_TABLEPCT_TOTALN:
842 case CTSF_SUBTABLEPCT_TOTALN:
843 case CTSF_LAYERPCT_TOTALN:
844 case CTSF_LAYERROWPCT_TOTALN:
845 case CTSF_LAYERCOLPCT_TOTALN:
862 case CTSF_ROWPCT_SUM:
863 case CTSF_COLPCT_SUM:
864 case CTSF_TABLEPCT_SUM:
865 case CTSF_SUBTABLEPCT_SUM:
866 case CTSF_LAYERPCT_SUM:
867 case CTSF_LAYERROWPCT_SUM:
868 case CTSF_LAYERCOLPCT_SUM:
876 parse_ctables_summary_function (struct lexer *lexer,
877 enum ctables_summary_function *f)
881 enum ctables_summary_function function;
882 struct substring name;
884 static struct pair names[] = {
885 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
886 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
889 /* The .COUNT suffix may be omitted. */
890 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
891 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
892 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
893 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
894 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
895 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
896 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
900 if (!lex_force_id (lexer))
903 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
904 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
906 *f = names[i].function;
911 lex_error (lexer, _("Expecting summary function name."));
916 ctables_axis_destroy (struct ctables_axis *axis)
924 for (size_t i = 0; i < N_CSVS; i++)
925 ctables_summary_spec_set_uninit (&axis->specs[i]);
930 ctables_axis_destroy (axis->subs[0]);
931 ctables_axis_destroy (axis->subs[1]);
934 msg_location_destroy (axis->loc);
938 static struct ctables_axis *
939 ctables_axis_new_nonterminal (enum ctables_axis_op op,
940 struct ctables_axis *sub0,
941 struct ctables_axis *sub1,
942 struct lexer *lexer, int start_ofs)
944 struct ctables_axis *axis = xmalloc (sizeof *axis);
945 *axis = (struct ctables_axis) {
947 .subs = { sub0, sub1 },
948 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
953 struct ctables_axis_parse_ctx
956 struct dictionary *dict;
958 struct ctables_table *t;
961 static struct fmt_spec
962 ctables_summary_default_format (enum ctables_summary_function function,
963 const struct ctables_var *var)
965 static const enum ctables_format default_formats[] = {
966 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
970 switch (default_formats[function])
973 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
976 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
979 return *ctables_var_get_print_format (var);
987 ctables_summary_default_label (enum ctables_summary_function function,
990 static const char *default_labels[] = {
991 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
996 return (function == CTSF_PTILE
997 ? xasprintf (_("Percentile %.2f"), percentile)
998 : xstrdup (gettext (default_labels[function])));
1002 ctables_summary_function_name (enum ctables_summary_function function)
1004 static const char *names[] = {
1005 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1009 return names[function];
1013 add_summary_spec (struct ctables_axis *axis,
1014 enum ctables_summary_function function, double percentile,
1015 const char *label, const struct fmt_spec *format,
1016 bool is_ctables_format, const struct msg_location *loc,
1017 enum ctables_summary_variant sv)
1019 if (axis->op == CTAO_VAR)
1021 const char *function_name = ctables_summary_function_name (function);
1022 const char *var_name = ctables_var_name (&axis->var);
1023 switch (ctables_function_availability (function))
1026 if (!axis->var.is_mrset)
1028 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1029 "response sets."), function_name);
1030 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1041 _("Summary function %s applies only to scale variables."),
1043 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1054 struct ctables_summary_spec_set *set = &axis->specs[sv];
1055 if (set->n >= set->allocated)
1056 set->specs = x2nrealloc (set->specs, &set->allocated,
1057 sizeof *set->specs);
1059 struct ctables_summary_spec *dst = &set->specs[set->n++];
1060 *dst = (struct ctables_summary_spec) {
1061 .function = function,
1062 .percentile = percentile,
1063 .label = xstrdup (label),
1064 .format = (format ? *format
1065 : ctables_summary_default_format (function, &axis->var)),
1066 .is_ctables_format = is_ctables_format,
1072 for (size_t i = 0; i < 2; i++)
1073 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1074 format, is_ctables_format, loc, sv))
1080 static struct ctables_axis *ctables_axis_parse_stack (
1081 struct ctables_axis_parse_ctx *);
1084 ctables_var_parse (struct lexer *lexer, struct dictionary *dict,
1085 struct ctables_var *var)
1087 if (ss_starts_with (lex_tokss (lexer), ss_cstr ("$")))
1089 *var = (struct ctables_var) {
1091 .mrset = dict_lookup_mrset (dict, lex_tokcstr (lexer))
1095 lex_error (lexer, _("'%s' does not name a multiple-response set "
1096 "in the active file dictionary."),
1097 lex_tokcstr (lexer));
1105 *var = (struct ctables_var) {
1107 .var = parse_variable (lexer, dict),
1109 return var->var != NULL;
1113 static struct ctables_axis *
1114 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1116 if (lex_match (ctx->lexer, T_LPAREN))
1118 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1119 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1121 ctables_axis_destroy (sub);
1127 if (!lex_force_id (ctx->lexer))
1130 int start_ofs = lex_ofs (ctx->lexer);
1131 struct ctables_var var;
1132 if (!ctables_var_parse (ctx->lexer, ctx->dict, &var))
1135 struct ctables_axis *axis = xmalloc (sizeof *axis);
1136 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1138 /* XXX should figure out default measures by reading data */
1139 axis->scale = (var.is_mrset ? false
1140 : lex_match_phrase (ctx->lexer, "[S]") ? true
1141 : lex_match_phrase (ctx->lexer, "[C]") ? false
1142 : var_get_measure (var.var) == MEASURE_SCALE);
1143 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1144 lex_ofs (ctx->lexer) - 1);
1149 has_digit (const char *s)
1151 return s[strcspn (s, "0123456789")] != '\0';
1155 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1156 bool *is_ctables_format)
1158 char type[FMT_TYPE_LEN_MAX + 1];
1159 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1162 if (!strcasecmp (type, "NEGPAREN"))
1163 format->type = CTEF_NEGPAREN;
1164 else if (!strcasecmp (type, "NEQUAL"))
1165 format->type = CTEF_NEQUAL;
1166 else if (!strcasecmp (type, "PAREN"))
1167 format->type = CTEF_PAREN;
1168 else if (!strcasecmp (type, "PCTPAREN"))
1169 format->type = CTEF_PCTPAREN;
1172 *is_ctables_format = false;
1173 return (parse_format_specifier (lexer, format)
1174 && fmt_check_output (format)
1175 && fmt_check_type_compat (format, VAL_NUMERIC));
1180 msg (SE, _("Output format %s requires width 2 or greater."), type);
1183 else if (format->d > format->w - 1)
1185 msg (SE, _("Output format %s requires width greater than decimals."),
1191 *is_ctables_format = true;
1196 static struct ctables_axis *
1197 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1199 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1200 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1203 enum ctables_summary_variant sv = CSV_CELL;
1206 int start_ofs = lex_ofs (ctx->lexer);
1208 /* Parse function. */
1209 enum ctables_summary_function function;
1210 if (!parse_ctables_summary_function (ctx->lexer, &function))
1213 /* Parse percentile. */
1214 double percentile = 0;
1215 if (function == CTSF_PTILE)
1217 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1219 percentile = lex_number (ctx->lexer);
1220 lex_get (ctx->lexer);
1225 if (lex_is_string (ctx->lexer))
1227 label = ss_xstrdup (lex_tokss (ctx->lexer));
1228 lex_get (ctx->lexer);
1231 label = ctables_summary_default_label (function, percentile);
1234 struct fmt_spec format;
1235 const struct fmt_spec *formatp;
1236 bool is_ctables_format = false;
1237 if (lex_token (ctx->lexer) == T_ID
1238 && has_digit (lex_tokcstr (ctx->lexer)))
1240 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1241 &is_ctables_format))
1251 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1252 lex_ofs (ctx->lexer) - 1);
1253 add_summary_spec (sub, function, percentile, label, formatp,
1254 is_ctables_format, loc, sv);
1256 msg_location_destroy (loc);
1258 lex_match (ctx->lexer, T_COMMA);
1259 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1261 if (!lex_force_match (ctx->lexer, T_LBRACK))
1265 else if (lex_match (ctx->lexer, T_RBRACK))
1267 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1274 ctables_axis_destroy (sub);
1278 static const struct ctables_axis *
1279 find_scale (const struct ctables_axis *axis)
1283 else if (axis->op == CTAO_VAR)
1287 assert (!axis->var.is_mrset);
1295 for (size_t i = 0; i < 2; i++)
1297 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1305 static const struct ctables_axis *
1306 find_categorical_summary_spec (const struct ctables_axis *axis)
1310 else if (axis->op == CTAO_VAR)
1311 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1314 for (size_t i = 0; i < 2; i++)
1316 const struct ctables_axis *sum
1317 = find_categorical_summary_spec (axis->subs[i]);
1325 static struct ctables_axis *
1326 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1328 int start_ofs = lex_ofs (ctx->lexer);
1329 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1333 while (lex_match (ctx->lexer, T_GT))
1335 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1339 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1340 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1342 const struct ctables_axis *outer_scale = find_scale (lhs);
1343 const struct ctables_axis *inner_scale = find_scale (rhs);
1344 if (outer_scale && inner_scale)
1346 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1347 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1348 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1349 ctables_axis_destroy (nest);
1353 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1356 msg_at (SE, nest->loc,
1357 _("Summaries may only be requested for categorical variables "
1358 "at the innermost nesting level."));
1359 msg_at (SN, outer_sum->loc,
1360 _("This outer categorical variable has a summary."));
1361 ctables_axis_destroy (nest);
1371 static struct ctables_axis *
1372 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1374 int start_ofs = lex_ofs (ctx->lexer);
1375 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1379 while (lex_match (ctx->lexer, T_PLUS))
1381 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1385 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1386 ctx->lexer, start_ofs);
1393 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1394 struct ctables *ct, struct ctables_table *t,
1395 enum pivot_axis_type a)
1397 if (lex_token (lexer) == T_BY
1398 || lex_token (lexer) == T_SLASH
1399 || lex_token (lexer) == T_ENDCMD)
1402 struct ctables_axis_parse_ctx ctx = {
1408 t->axes[a] = ctables_axis_parse_stack (&ctx);
1409 return t->axes[a] != NULL;
1413 ctables_chisq_destroy (struct ctables_chisq *chisq)
1419 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1425 ctables_table_destroy (struct ctables_table *t)
1430 for (size_t i = 0; i < t->n_categories; i++)
1431 ctables_categories_unref (t->categories[i]);
1432 free (t->categories);
1434 ctables_axis_destroy (t->axes[PIVOT_AXIS_COLUMN]);
1435 ctables_axis_destroy (t->axes[PIVOT_AXIS_ROW]);
1436 ctables_axis_destroy (t->axes[PIVOT_AXIS_LAYER]);
1440 ctables_chisq_destroy (t->chisq);
1441 ctables_pairwise_destroy (t->pairwise);
1446 ctables_destroy (struct ctables *ct)
1451 pivot_table_look_unref (ct->look);
1455 for (size_t i = 0; i < ct->n_tables; i++)
1456 ctables_table_destroy (ct->tables[i]);
1461 static struct ctables_category
1462 cct_range (double low, double high)
1464 return (struct ctables_category) {
1466 .range = { low, high }
1471 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1472 struct ctables_category *cat)
1475 if (lex_match (lexer, T_EQUALS))
1477 if (!lex_force_string (lexer))
1480 total_label = ss_xstrdup (lex_tokss (lexer));
1484 total_label = xstrdup (_("Subtotal"));
1486 *cat = (struct ctables_category) {
1487 .type = CCT_SUBTOTAL,
1488 .hide_subcategories = hide_subcategories,
1489 .total_label = total_label
1495 ctables_table_parse_explicit_category (struct lexer *lexer, struct ctables *ct,
1496 struct ctables_category *cat)
1498 if (lex_match_id (lexer, "OTHERNM"))
1499 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1500 else if (lex_match_id (lexer, "MISSING"))
1501 *cat = (struct ctables_category) { .type = CCT_MISSING };
1502 else if (lex_match_id (lexer, "SUBTOTAL"))
1503 return ctables_table_parse_subtotal (lexer, false, cat);
1504 else if (lex_match_id (lexer, "HSUBTOTAL"))
1505 return ctables_table_parse_subtotal (lexer, true, cat);
1506 else if (lex_match_id (lexer, "LO"))
1508 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
1510 *cat = cct_range (-DBL_MAX, lex_number (lexer));
1513 else if (lex_is_number (lexer))
1515 double number = lex_number (lexer);
1517 if (lex_match_id (lexer, "THRU"))
1519 if (lex_match_id (lexer, "HI"))
1520 *cat = cct_range (number, DBL_MAX);
1523 if (!lex_force_num (lexer))
1525 *cat = cct_range (number, lex_number (lexer));
1530 *cat = (struct ctables_category) {
1535 else if (lex_is_string (lexer))
1537 *cat = (struct ctables_category) {
1539 .string = ss_xstrdup (lex_tokss (lexer)),
1543 else if (lex_match (lexer, T_AND))
1545 if (!lex_force_id (lexer))
1547 struct ctables_postcompute *pc = ctables_find_postcompute (
1548 ct, lex_tokcstr (lexer));
1551 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1552 msg_at (SE, loc, _("Unknown postcompute &%s."),
1553 lex_tokcstr (lexer));
1554 msg_location_destroy (loc);
1559 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1563 lex_error (lexer, NULL);
1570 static struct ctables_category *
1571 ctables_find_category_for_postcompute (const struct ctables_categories *cats,
1572 const struct ctables_pcexpr *e)
1574 struct ctables_category *best = NULL;
1575 size_t n_subtotals = 0;
1576 for (size_t i = 0; i < cats->n_cats; i++)
1578 struct ctables_category *cat = &cats->cats[i];
1581 case CTPO_CAT_NUMBER:
1582 if (cat->type == CCT_NUMBER && cat->number == e->number)
1586 case CTPO_CAT_STRING:
1587 if (cat->type == CCT_STRING && !strcmp (cat->string, e->string))
1591 case CTPO_CAT_RANGE:
1592 if (cat->type == CCT_RANGE
1593 && cat->range[0] == e->range[0]
1594 && cat->range[1] == e->range[1])
1598 case CTPO_CAT_MISSING:
1599 if (cat->type == CCT_MISSING)
1603 case CTPO_CAT_OTHERNM:
1604 if (cat->type == CCT_OTHERNM)
1608 case CTPO_CAT_SUBTOTAL:
1609 if (cat->type == CCT_SUBTOTAL)
1612 if (e->subtotal_index == n_subtotals)
1614 else if (e->subtotal_index == 0)
1619 case CTPO_CAT_TOTAL:
1620 if (cat->type == CCT_TOTAL)
1634 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1640 ctables_recursive_check_postcompute (const struct ctables_pcexpr *e,
1641 struct ctables_category *pc_cat,
1642 const struct ctables_categories *cats,
1643 const struct msg_location *cats_location)
1647 case CTPO_CAT_NUMBER:
1648 case CTPO_CAT_STRING:
1649 case CTPO_CAT_RANGE:
1650 case CTPO_CAT_MISSING:
1651 case CTPO_CAT_OTHERNM:
1652 case CTPO_CAT_SUBTOTAL:
1653 case CTPO_CAT_TOTAL:
1655 struct ctables_category *cat = ctables_find_category_for_postcompute (
1659 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1661 size_t n_subtotals = 0;
1662 for (size_t i = 0; i < cats->n_cats; i++)
1663 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1664 if (n_subtotals > 1)
1666 msg_at (SE, cats_location,
1667 ngettext ("These categories include %zu instance "
1668 "of SUBTOTAL or HSUBTOTAL, so references "
1669 "from computed categories must refer to "
1670 "subtotals by position.",
1671 "These categories include %zu instances "
1672 "of SUBTOTAL or HSUBTOTAL, so references "
1673 "from computed categories must refer to "
1674 "subtotals by position.",
1677 msg_at (SN, e->location,
1678 _("This is the reference that lacks a position."));
1683 msg_at (SE, pc_cat->location,
1684 _("Computed category &%s references a category not included "
1685 "in the category list."),
1687 msg_at (SN, e->location, _("This is the missing category."));
1688 msg_at (SN, cats_location,
1689 _("To fix the problem, add the missing category to the "
1690 "list of categories here."));
1693 if (pc_cat->pc->hide_source_cats)
1707 for (size_t i = 0; i < 2; i++)
1708 if (e->subs[i] && !ctables_recursive_check_postcompute (
1709 e->subs[i], pc_cat, cats, cats_location))
1719 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1720 struct ctables *ct, struct ctables_table *t)
1722 if (!lex_match_id (lexer, "VARIABLES"))
1724 lex_match (lexer, T_EQUALS);
1726 struct variable **vars;
1728 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1731 struct ctables_categories *c = xmalloc (sizeof *c);
1732 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1733 for (size_t i = 0; i < n_vars; i++)
1735 struct ctables_categories **cp
1736 = &t->categories[var_get_dict_index (vars[i])];
1737 ctables_categories_unref (*cp);
1742 size_t allocated_cats = 0;
1743 if (lex_match (lexer, T_LBRACK))
1745 int cats_start_ofs = lex_ofs (lexer);
1748 if (c->n_cats >= allocated_cats)
1749 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1751 int start_ofs = lex_ofs (lexer);
1752 struct ctables_category *cat = &c->cats[c->n_cats];
1753 if (!ctables_table_parse_explicit_category (lexer, ct, cat))
1755 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1758 lex_match (lexer, T_COMMA);
1760 while (!lex_match (lexer, T_RBRACK));
1762 struct msg_location *cats_location
1763 = lex_ofs_location (lexer, cats_start_ofs, lex_ofs (lexer) - 1);
1764 for (size_t i = 0; i < c->n_cats; i++)
1766 struct ctables_category *cat = &c->cats[i];
1767 if (cat->type == CCT_POSTCOMPUTE
1768 && !ctables_recursive_check_postcompute (cat->pc->expr, cat,
1774 struct ctables_category cat = {
1776 .include_missing = false,
1777 .sort_ascending = true,
1779 bool show_totals = false;
1780 char *total_label = NULL;
1781 bool totals_before = false;
1782 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1784 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1786 lex_match (lexer, T_EQUALS);
1787 if (lex_match_id (lexer, "A"))
1788 cat.sort_ascending = true;
1789 else if (lex_match_id (lexer, "D"))
1790 cat.sort_ascending = false;
1793 lex_error_expecting (lexer, "A", "D");
1797 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1799 lex_match (lexer, T_EQUALS);
1800 if (lex_match_id (lexer, "VALUE"))
1801 cat.type = CCT_VALUE;
1802 else if (lex_match_id (lexer, "LABEL"))
1803 cat.type = CCT_LABEL;
1806 cat.type = CCT_FUNCTION;
1807 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1810 if (lex_match (lexer, T_LPAREN))
1812 cat.sort_var = parse_variable (lexer, dict);
1816 if (cat.sort_function == CTSF_PTILE)
1818 lex_match (lexer, T_COMMA);
1819 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1821 cat.percentile = lex_number (lexer);
1825 if (!lex_force_match (lexer, T_RPAREN))
1828 else if (ctables_function_availability (cat.sort_function)
1831 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1836 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1838 lex_match (lexer, T_EQUALS);
1839 if (lex_match_id (lexer, "INCLUDE"))
1840 cat.include_missing = true;
1841 else if (lex_match_id (lexer, "EXCLUDE"))
1842 cat.include_missing = false;
1845 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1849 else if (lex_match_id (lexer, "TOTAL"))
1851 lex_match (lexer, T_EQUALS);
1852 if (!parse_bool (lexer, &show_totals))
1855 else if (lex_match_id (lexer, "LABEL"))
1857 lex_match (lexer, T_EQUALS);
1858 if (!lex_force_string (lexer))
1861 total_label = ss_xstrdup (lex_tokss (lexer));
1864 else if (lex_match_id (lexer, "POSITION"))
1866 lex_match (lexer, T_EQUALS);
1867 if (lex_match_id (lexer, "BEFORE"))
1868 totals_before = true;
1869 else if (lex_match_id (lexer, "AFTER"))
1870 totals_before = false;
1873 lex_error_expecting (lexer, "BEFORE", "AFTER");
1877 else if (lex_match_id (lexer, "EMPTY"))
1879 lex_match (lexer, T_EQUALS);
1880 if (lex_match_id (lexer, "INCLUDE"))
1881 c->show_empty = true;
1882 else if (lex_match_id (lexer, "EXCLUDE"))
1883 c->show_empty = false;
1886 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1893 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
1894 "TOTAL", "LABEL", "POSITION", "EMPTY");
1896 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
1903 if (c->n_cats >= allocated_cats)
1904 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1905 c->cats[c->n_cats++] = cat;
1910 if (c->n_cats >= allocated_cats)
1911 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1913 struct ctables_category *totals;
1916 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
1917 totals = &c->cats[0];
1920 totals = &c->cats[c->n_cats];
1923 *totals = (struct ctables_category) {
1925 .total_label = total_label ? total_label : xstrdup (_("Total")),
1929 struct ctables_category *subtotal = NULL;
1930 for (size_t i = totals_before ? 0 : c->n_cats;
1931 totals_before ? i < c->n_cats : i-- > 0;
1932 totals_before ? i++ : 0)
1934 struct ctables_category *cat = &c->cats[i];
1942 cat->subtotal = subtotal;
1945 case CCT_POSTCOMPUTE:
1956 case CCT_EXCLUDED_MISSING:
1965 ctables_nest_uninit (struct ctables_nest *nest)
1972 ctables_stack_uninit (struct ctables_stack *stack)
1976 for (size_t i = 0; i < stack->n; i++)
1977 ctables_nest_uninit (&stack->nests[i]);
1978 free (stack->nests);
1982 static struct ctables_stack
1983 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
1990 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
1991 for (size_t i = 0; i < s0.n; i++)
1992 for (size_t j = 0; j < s1.n; j++)
1994 const struct ctables_nest *a = &s0.nests[i];
1995 const struct ctables_nest *b = &s1.nests[j];
1997 size_t allocate = a->n + b->n;
1998 struct variable **vars = xnmalloc (allocate, sizeof *vars);
1999 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
2001 for (size_t k = 0; k < a->n; k++)
2002 vars[n++] = a->vars[k];
2003 for (size_t k = 0; k < b->n; k++)
2004 vars[n++] = b->vars[k];
2005 assert (n == allocate);
2007 const struct ctables_nest *summary_src;
2008 if (!a->specs[CSV_CELL].var)
2010 else if (!b->specs[CSV_CELL].var)
2015 struct ctables_nest *new = &stack.nests[stack.n++];
2016 *new = (struct ctables_nest) {
2018 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2019 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2023 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2024 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2026 ctables_stack_uninit (&s0);
2027 ctables_stack_uninit (&s1);
2031 static struct ctables_stack
2032 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2034 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2035 for (size_t i = 0; i < s0.n; i++)
2036 stack.nests[stack.n++] = s0.nests[i];
2037 for (size_t i = 0; i < s1.n; i++)
2038 stack.nests[stack.n++] = s1.nests[i];
2039 assert (stack.n == s0.n + s1.n);
2045 static struct ctables_stack
2046 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2049 return (struct ctables_stack) { .n = 0 };
2054 assert (!a->var.is_mrset);
2056 struct variable **vars = xmalloc (sizeof *vars);
2059 struct ctables_nest *nest = xmalloc (sizeof *nest);
2060 *nest = (struct ctables_nest) {
2063 .scale_idx = a->scale ? 0 : SIZE_MAX,
2065 if (a->specs[CSV_CELL].n || a->scale)
2066 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2068 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2069 nest->specs[sv].var = a->var.var;
2070 nest->specs[sv].is_scale = a->scale;
2072 return (struct ctables_stack) { .nests = nest, .n = 1 };
2075 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2076 enumerate_fts (axis_type, a->subs[1]));
2079 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2080 enumerate_fts (axis_type, a->subs[1]));
2086 union ctables_summary
2088 /* COUNT, VALIDN, TOTALN. */
2091 /* MINIMUM, MAXIMUM, RANGE. */
2098 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2099 struct moments1 *moments;
2101 /* MEDIAN, MODE, PTILE. */
2104 struct casewriter *writer;
2109 /* XXX multiple response */
2113 ctables_summary_init (union ctables_summary *s,
2114 const struct ctables_summary_spec *ss)
2116 switch (ss->function)
2120 case CTSF_ROWPCT_COUNT:
2121 case CTSF_COLPCT_COUNT:
2122 case CTSF_TABLEPCT_COUNT:
2123 case CTSF_SUBTABLEPCT_COUNT:
2124 case CTSF_LAYERPCT_COUNT:
2125 case CTSF_LAYERROWPCT_COUNT:
2126 case CTSF_LAYERCOLPCT_COUNT:
2127 case CTSF_ROWPCT_VALIDN:
2128 case CTSF_COLPCT_VALIDN:
2129 case CTSF_TABLEPCT_VALIDN:
2130 case CTSF_SUBTABLEPCT_VALIDN:
2131 case CTSF_LAYERPCT_VALIDN:
2132 case CTSF_LAYERROWPCT_VALIDN:
2133 case CTSF_LAYERCOLPCT_VALIDN:
2134 case CTSF_ROWPCT_TOTALN:
2135 case CTSF_COLPCT_TOTALN:
2136 case CTSF_TABLEPCT_TOTALN:
2137 case CTSF_SUBTABLEPCT_TOTALN:
2138 case CTSF_LAYERPCT_TOTALN:
2139 case CTSF_LAYERROWPCT_TOTALN:
2140 case CTSF_LAYERCOLPCT_TOTALN:
2152 s->min = s->max = SYSMIS;
2160 case CTSF_ROWPCT_SUM:
2161 case CTSF_COLPCT_SUM:
2162 case CTSF_TABLEPCT_SUM:
2163 case CTSF_SUBTABLEPCT_SUM:
2164 case CTSF_LAYERPCT_SUM:
2165 case CTSF_LAYERROWPCT_SUM:
2166 case CTSF_LAYERCOLPCT_SUM:
2167 s->moments = moments1_create (MOMENT_VARIANCE);
2174 struct caseproto *proto = caseproto_create ();
2175 proto = caseproto_add_width (proto, 0);
2176 proto = caseproto_add_width (proto, 0);
2178 struct subcase ordering;
2179 subcase_init (&ordering, 0, 0, SC_ASCEND);
2180 s->writer = sort_create_writer (&ordering, proto);
2181 subcase_uninit (&ordering);
2182 caseproto_unref (proto);
2192 ctables_summary_uninit (union ctables_summary *s,
2193 const struct ctables_summary_spec *ss)
2195 switch (ss->function)
2199 case CTSF_ROWPCT_COUNT:
2200 case CTSF_COLPCT_COUNT:
2201 case CTSF_TABLEPCT_COUNT:
2202 case CTSF_SUBTABLEPCT_COUNT:
2203 case CTSF_LAYERPCT_COUNT:
2204 case CTSF_LAYERROWPCT_COUNT:
2205 case CTSF_LAYERCOLPCT_COUNT:
2206 case CTSF_ROWPCT_VALIDN:
2207 case CTSF_COLPCT_VALIDN:
2208 case CTSF_TABLEPCT_VALIDN:
2209 case CTSF_SUBTABLEPCT_VALIDN:
2210 case CTSF_LAYERPCT_VALIDN:
2211 case CTSF_LAYERROWPCT_VALIDN:
2212 case CTSF_LAYERCOLPCT_VALIDN:
2213 case CTSF_ROWPCT_TOTALN:
2214 case CTSF_COLPCT_TOTALN:
2215 case CTSF_TABLEPCT_TOTALN:
2216 case CTSF_SUBTABLEPCT_TOTALN:
2217 case CTSF_LAYERPCT_TOTALN:
2218 case CTSF_LAYERROWPCT_TOTALN:
2219 case CTSF_LAYERCOLPCT_TOTALN:
2237 case CTSF_ROWPCT_SUM:
2238 case CTSF_COLPCT_SUM:
2239 case CTSF_TABLEPCT_SUM:
2240 case CTSF_SUBTABLEPCT_SUM:
2241 case CTSF_LAYERPCT_SUM:
2242 case CTSF_LAYERROWPCT_SUM:
2243 case CTSF_LAYERCOLPCT_SUM:
2244 moments1_destroy (s->moments);
2250 casewriter_destroy (s->writer);
2256 ctables_summary_add (const struct ctables_cell *cell, union ctables_summary *s,
2257 const struct ctables_summary_spec *ss,
2258 const struct variable *var, const union value *value,
2259 bool is_scale, bool is_missing,
2260 double d_weight, double e_weight)
2262 /* To determine whether a case is included in a given table for a particular
2263 kind of summary, consider the following charts for each variable in the
2264 table. Only if "yes" appears for every variable for the summary is the
2267 Categorical variables: VALIDN COUNT TOTALN
2268 Valid values in included categories yes yes yes
2269 Missing values in included categories --- yes yes
2270 Missing values in excluded categories --- --- yes
2271 Valid values in excluded categories --- --- ---
2273 Scale variables: VALIDN COUNT TOTALN
2274 Valid value yes yes yes
2275 Missing value --- yes yes
2277 Missing values include both user- and system-missing. (The system-missing
2278 value is always in an excluded category.)
2280 switch (ss->function)
2283 s->count += d_weight;
2287 if (is_scale || !cell->excluded_missing)
2288 s->count += d_weight;
2293 ? !var_is_value_missing (var, value)
2295 s->count += d_weight;
2300 s->count += d_weight;
2304 case CTSF_ROWPCT_COUNT:
2305 case CTSF_COLPCT_COUNT:
2306 case CTSF_TABLEPCT_COUNT:
2307 case CTSF_SUBTABLEPCT_COUNT:
2308 case CTSF_LAYERPCT_COUNT:
2309 case CTSF_LAYERROWPCT_COUNT:
2310 case CTSF_LAYERCOLPCT_COUNT:
2311 case CTSF_ROWPCT_VALIDN:
2312 case CTSF_COLPCT_VALIDN:
2313 case CTSF_TABLEPCT_VALIDN:
2314 case CTSF_SUBTABLEPCT_VALIDN:
2315 case CTSF_LAYERPCT_VALIDN:
2316 case CTSF_LAYERROWPCT_VALIDN:
2317 case CTSF_LAYERCOLPCT_VALIDN:
2318 case CTSF_ROWPCT_TOTALN:
2319 case CTSF_COLPCT_TOTALN:
2320 case CTSF_TABLEPCT_TOTALN:
2321 case CTSF_SUBTABLEPCT_TOTALN:
2322 case CTSF_LAYERPCT_TOTALN:
2323 case CTSF_LAYERROWPCT_TOTALN:
2324 case CTSF_LAYERCOLPCT_TOTALN:
2325 s->count += d_weight;
2330 ? !var_is_value_missing (var, value)
2332 s->count += e_weight;
2336 s->count += e_weight;
2342 if (!var_is_value_missing (var, value))
2344 assert (!var_is_alpha (var)); /* XXX? */
2345 if (s->min == SYSMIS || value->f < s->min)
2347 if (s->max == SYSMIS || value->f > s->max)
2357 case CTSF_ROWPCT_SUM:
2358 case CTSF_COLPCT_SUM:
2359 case CTSF_TABLEPCT_SUM:
2360 case CTSF_SUBTABLEPCT_SUM:
2361 case CTSF_LAYERPCT_SUM:
2362 case CTSF_LAYERROWPCT_SUM:
2363 case CTSF_LAYERCOLPCT_SUM:
2364 if (!var_is_value_missing (var, value))
2365 moments1_add (s->moments, value->f, e_weight);
2371 if (var_is_value_missing (var, value))
2373 s->ovalid += e_weight;
2375 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2376 *case_num_rw_idx (c, 0) = value->f;
2377 *case_num_rw_idx (c, 1) = e_weight;
2378 casewriter_write (s->writer, c);
2384 static enum ctables_domain_type
2385 ctables_function_domain (enum ctables_summary_function function)
2409 case CTSF_COLPCT_COUNT:
2410 case CTSF_COLPCT_SUM:
2411 case CTSF_COLPCT_TOTALN:
2412 case CTSF_COLPCT_VALIDN:
2415 case CTSF_LAYERCOLPCT_COUNT:
2416 case CTSF_LAYERCOLPCT_SUM:
2417 case CTSF_LAYERCOLPCT_TOTALN:
2418 case CTSF_LAYERCOLPCT_VALIDN:
2419 return CTDT_LAYERCOL;
2421 case CTSF_LAYERPCT_COUNT:
2422 case CTSF_LAYERPCT_SUM:
2423 case CTSF_LAYERPCT_TOTALN:
2424 case CTSF_LAYERPCT_VALIDN:
2427 case CTSF_LAYERROWPCT_COUNT:
2428 case CTSF_LAYERROWPCT_SUM:
2429 case CTSF_LAYERROWPCT_TOTALN:
2430 case CTSF_LAYERROWPCT_VALIDN:
2431 return CTDT_LAYERROW;
2433 case CTSF_ROWPCT_COUNT:
2434 case CTSF_ROWPCT_SUM:
2435 case CTSF_ROWPCT_TOTALN:
2436 case CTSF_ROWPCT_VALIDN:
2439 case CTSF_SUBTABLEPCT_COUNT:
2440 case CTSF_SUBTABLEPCT_SUM:
2441 case CTSF_SUBTABLEPCT_TOTALN:
2442 case CTSF_SUBTABLEPCT_VALIDN:
2443 return CTDT_SUBTABLE;
2445 case CTSF_TABLEPCT_COUNT:
2446 case CTSF_TABLEPCT_SUM:
2447 case CTSF_TABLEPCT_TOTALN:
2448 case CTSF_TABLEPCT_VALIDN:
2456 ctables_summary_value (const struct ctables_cell *cell,
2457 union ctables_summary *s,
2458 const struct ctables_summary_spec *ss)
2460 switch (ss->function)
2466 case CTSF_ROWPCT_COUNT:
2467 case CTSF_COLPCT_COUNT:
2468 case CTSF_TABLEPCT_COUNT:
2469 case CTSF_SUBTABLEPCT_COUNT:
2470 case CTSF_LAYERPCT_COUNT:
2471 case CTSF_LAYERROWPCT_COUNT:
2472 case CTSF_LAYERCOLPCT_COUNT:
2474 enum ctables_domain_type d = ctables_function_domain (ss->function);
2475 return (cell->domains[d]->e_valid
2476 ? s->count / cell->domains[d]->e_valid * 100
2480 case CTSF_ROWPCT_VALIDN:
2481 case CTSF_COLPCT_VALIDN:
2482 case CTSF_TABLEPCT_VALIDN:
2483 case CTSF_SUBTABLEPCT_VALIDN:
2484 case CTSF_LAYERPCT_VALIDN:
2485 case CTSF_LAYERROWPCT_VALIDN:
2486 case CTSF_LAYERCOLPCT_VALIDN:
2487 case CTSF_ROWPCT_TOTALN:
2488 case CTSF_COLPCT_TOTALN:
2489 case CTSF_TABLEPCT_TOTALN:
2490 case CTSF_SUBTABLEPCT_TOTALN:
2491 case CTSF_LAYERPCT_TOTALN:
2492 case CTSF_LAYERROWPCT_TOTALN:
2493 case CTSF_LAYERCOLPCT_TOTALN:
2516 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2521 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2527 double weight, variance;
2528 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2529 return calc_semean (variance, weight);
2535 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2536 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2541 double weight, mean;
2542 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2543 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2549 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2553 case CTSF_ROWPCT_SUM:
2554 case CTSF_COLPCT_SUM:
2555 case CTSF_TABLEPCT_SUM:
2556 case CTSF_SUBTABLEPCT_SUM:
2557 case CTSF_LAYERPCT_SUM:
2558 case CTSF_LAYERROWPCT_SUM:
2559 case CTSF_LAYERCOLPCT_SUM:
2566 struct casereader *reader = casewriter_make_reader (s->writer);
2569 struct percentile *ptile = percentile_create (
2570 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2571 struct order_stats *os = &ptile->parent;
2572 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2573 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2574 statistic_destroy (&ptile->parent.parent);
2581 struct casereader *reader = casewriter_make_reader (s->writer);
2584 struct mode *mode = mode_create ();
2585 struct order_stats *os = &mode->parent;
2586 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2587 s->ovalue = mode->mode;
2588 statistic_destroy (&mode->parent.parent);
2596 struct ctables_cell_sort_aux
2598 const struct ctables_nest *nest;
2599 enum pivot_axis_type a;
2603 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2605 const struct ctables_cell_sort_aux *aux = aux_;
2606 struct ctables_cell *const *ap = a_;
2607 struct ctables_cell *const *bp = b_;
2608 const struct ctables_cell *a = *ap;
2609 const struct ctables_cell *b = *bp;
2611 const struct ctables_nest *nest = aux->nest;
2612 for (size_t i = 0; i < nest->n; i++)
2613 if (i != nest->scale_idx)
2615 const struct variable *var = nest->vars[i];
2616 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2617 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2618 if (a_cv->category != b_cv->category)
2619 return a_cv->category > b_cv->category ? 1 : -1;
2621 const union value *a_val = &a_cv->value;
2622 const union value *b_val = &b_cv->value;
2623 switch (a_cv->category->type)
2629 case CCT_POSTCOMPUTE:
2630 case CCT_EXCLUDED_MISSING:
2631 /* Must be equal. */
2638 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2646 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2648 return a_cv->category->sort_ascending ? cmp : -cmp;
2654 const char *a_label = var_lookup_value_label (var, a_val);
2655 const char *b_label = var_lookup_value_label (var, b_val);
2657 ? (b_label ? strcmp (a_label, b_label) : 1)
2658 : (b_label ? -1 : value_compare_3way (
2659 a_val, b_val, var_get_width (var))));
2661 return a_cv->category->sort_ascending ? cmp : -cmp;
2675 For each ctables_table:
2676 For each combination of row vars:
2677 For each combination of column vars:
2678 For each combination of layer vars:
2680 Make a table of row values:
2681 Sort entries by row values
2682 Assign a 0-based index to each actual value
2683 Construct a dimension
2684 Make a table of column values
2685 Make a table of layer values
2687 Fill the table entry using the indexes from before.
2690 static struct ctables_domain *
2691 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
2692 enum ctables_domain_type domain)
2695 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2697 const struct ctables_nest *nest = s->nests[a];
2698 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2700 size_t v_idx = nest->domains[domain][i];
2701 hash = value_hash (&cell->axes[a].cvs[v_idx].value,
2702 var_get_width (nest->vars[v_idx]), hash);
2706 struct ctables_domain *d;
2707 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
2709 const struct ctables_cell *df = d->example;
2710 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2712 const struct ctables_nest *nest = s->nests[a];
2713 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2715 size_t v_idx = nest->domains[domain][i];
2716 if (!value_equal (&df->axes[a].cvs[v_idx].value,
2717 &cell->axes[a].cvs[v_idx].value,
2718 var_get_width (nest->vars[v_idx])))
2727 d = xmalloc (sizeof *d);
2728 *d = (struct ctables_domain) { .example = cell };
2729 hmap_insert (&s->domains[domain], &d->node, hash);
2733 static const struct ctables_category *
2734 ctables_categories_match (const struct ctables_categories *c,
2735 const union value *v, const struct variable *var)
2737 if (var_is_numeric (var) && v->f == SYSMIS)
2740 const struct ctables_category *othernm = NULL;
2741 for (size_t i = c->n_cats; i-- > 0; )
2743 const struct ctables_category *cat = &c->cats[i];
2747 if (cat->number == v->f)
2755 if ((cat->range[0] == -DBL_MAX || v->f >= cat->range[0])
2756 && (cat->range[1] == DBL_MAX || v->f <= cat->range[1]))
2761 if (var_is_value_missing (var, v))
2765 case CCT_POSTCOMPUTE:
2780 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2783 case CCT_EXCLUDED_MISSING:
2788 return var_is_value_missing (var, v) ? NULL : othernm;
2791 static const struct ctables_category *
2792 ctables_categories_total (const struct ctables_categories *c)
2794 const struct ctables_category *first = &c->cats[0];
2795 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2796 return (first->type == CCT_TOTAL ? first
2797 : last->type == CCT_TOTAL ? last
2801 static struct ctables_cell *
2802 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
2803 const struct ctables_category *cats[PIVOT_N_AXES][10])
2806 enum ctables_summary_variant sv = CSV_CELL;
2807 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2809 const struct ctables_nest *nest = s->nests[a];
2810 for (size_t i = 0; i < nest->n; i++)
2811 if (i != nest->scale_idx)
2813 hash = hash_pointer (cats[a][i], hash);
2814 if (cats[a][i]->type != CCT_TOTAL
2815 && cats[a][i]->type != CCT_SUBTOTAL
2816 && cats[a][i]->type != CCT_POSTCOMPUTE)
2817 hash = value_hash (case_data (c, nest->vars[i]),
2818 var_get_width (nest->vars[i]), hash);
2824 struct ctables_cell *cell;
2825 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
2827 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2829 const struct ctables_nest *nest = s->nests[a];
2830 for (size_t i = 0; i < nest->n; i++)
2831 if (i != nest->scale_idx
2832 && (cats[a][i] != cell->axes[a].cvs[i].category
2833 || (cats[a][i]->type != CCT_TOTAL
2834 && cats[a][i]->type != CCT_SUBTOTAL
2835 && cats[a][i]->type != CCT_POSTCOMPUTE
2836 && !value_equal (case_data (c, nest->vars[i]),
2837 &cell->axes[a].cvs[i].value,
2838 var_get_width (nest->vars[i])))))
2847 cell = xmalloc (sizeof *cell);
2849 cell->is_missing = false;
2850 cell->excluded_missing = false;
2852 cell->contributes_to_domains = true;
2853 cell->postcompute = false;
2854 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2856 const struct ctables_nest *nest = s->nests[a];
2857 cell->axes[a].cvs = (nest->n
2858 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
2860 for (size_t i = 0; i < nest->n; i++)
2862 const struct ctables_category *cat = cats[a][i];
2863 const struct variable *var = nest->vars[i];
2864 const union value *value = case_data (c, var);
2865 if (i != nest->scale_idx)
2867 const struct ctables_category *subtotal = cat->subtotal;
2868 if (cat->hide || (subtotal && subtotal->hide_subcategories))
2871 if (cat->type == CCT_TOTAL
2872 || cat->type == CCT_SUBTOTAL
2873 || cat->type == CCT_POSTCOMPUTE)
2874 cell->contributes_to_domains = false;
2875 else if (var_is_value_missing (var, value))
2876 cell->is_missing = true;
2877 if (cat->type == CCT_EXCLUDED_MISSING)
2878 cell->excluded_missing = true;
2879 if (cat->type == CCT_POSTCOMPUTE)
2880 cell->postcompute = true;
2883 cell->axes[a].cvs[i].category = cat;
2884 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
2888 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
2889 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
2890 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
2891 for (size_t i = 0; i < specs->n; i++)
2892 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
2893 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2894 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
2895 hmap_insert (&s->cells, &cell->node, hash);
2900 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
2901 const struct ctables_category *cats[PIVOT_N_AXES][10],
2902 bool is_missing, double d_weight, double e_weight)
2904 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
2905 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
2907 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
2908 for (size_t i = 0; i < specs->n; i++)
2909 ctables_summary_add (cell, &cell->summaries[i], &specs->specs[i],
2910 specs->var, case_data (c, specs->var), specs->is_scale,
2911 is_missing, d_weight, e_weight);
2912 if (cell->contributes_to_domains)
2914 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2916 struct ctables_domain *d = cell->domains[dt];
2917 d->d_total += d_weight;
2918 d->e_total += e_weight;
2919 if (!cell->excluded_missing)
2921 d->d_count += d_weight;
2922 d->e_count += e_weight;
2924 if (!cell->is_missing)
2926 d->d_valid += d_weight;
2927 d->e_valid += e_weight;
2934 recurse_totals (struct ctables_section *s, const struct ccase *c,
2935 const struct ctables_category *cats[PIVOT_N_AXES][10],
2936 bool is_missing, double d_weight, double e_weight,
2937 enum pivot_axis_type start_axis, size_t start_nest)
2939 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
2941 const struct ctables_nest *nest = s->nests[a];
2942 for (size_t i = start_nest; i < nest->n; i++)
2944 if (i == nest->scale_idx)
2947 const struct variable *var = nest->vars[i];
2949 const struct ctables_category *total = ctables_categories_total (
2950 s->table->categories[var_get_dict_index (var)]);
2953 const struct ctables_category *save = cats[a][i];
2955 ctables_cell_add__ (s, c, cats, is_missing, d_weight, e_weight);
2956 recurse_totals (s, c, cats, is_missing,
2957 d_weight, e_weight, a, i + 1);
2966 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
2967 const struct ctables_category *cats[PIVOT_N_AXES][10],
2968 bool is_missing, double d_weight, double e_weight,
2969 enum pivot_axis_type start_axis, size_t start_nest)
2971 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
2973 const struct ctables_nest *nest = s->nests[a];
2974 for (size_t i = start_nest; i < nest->n; i++)
2976 if (i == nest->scale_idx)
2979 const struct ctables_category *save = cats[a][i];
2982 cats[a][i] = save->subtotal;
2983 ctables_cell_add__ (s, c, cats, is_missing, d_weight, e_weight);
2984 recurse_subtotals (s, c, cats, is_missing,
2985 d_weight, e_weight, a, i + 1);
2994 ctables_add_occurrence (const struct variable *var,
2995 const union value *value,
2996 struct hmap *occurrences)
2998 int width = var_get_width (var);
2999 unsigned int hash = value_hash (value, width, 0);
3001 struct ctables_occurrence *o;
3002 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3004 if (value_equal (value, &o->value, width))
3007 o = xmalloc (sizeof *o);
3008 value_clone (&o->value, value, width);
3009 hmap_insert (occurrences, &o->node, hash);
3013 ctables_cell_insert (struct ctables_section *s,
3014 const struct ccase *c,
3015 double d_weight, double e_weight)
3017 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3018 bool is_missing = false;
3019 bool excluded_missing = false;
3020 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3022 const struct ctables_nest *nest = s->nests[a];
3023 for (size_t i = 0; i < nest->n; i++)
3025 if (i == nest->scale_idx)
3028 const struct variable *var = nest->vars[i];
3029 const union value *value = case_data (c, var);
3031 bool var_missing = var_is_value_missing (var, value) != 0;
3035 cats[a][i] = ctables_categories_match (
3036 s->table->categories[var_get_dict_index (var)], value, var);
3042 static const struct ctables_category cct_excluded_missing = {
3043 .type = CCT_EXCLUDED_MISSING,
3046 cats[a][i] = &cct_excluded_missing;
3047 excluded_missing = true;
3052 if (!excluded_missing)
3053 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3055 const struct ctables_nest *nest = s->nests[a];
3056 for (size_t i = 0; i < nest->n; i++)
3057 if (i != nest->scale_idx)
3059 const struct variable *var = nest->vars[i];
3060 const union value *value = case_data (c, var);
3061 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3065 ctables_cell_add__ (s, c, cats, is_missing, d_weight, e_weight);
3067 if (!excluded_missing)
3069 recurse_totals (s, c, cats, is_missing, d_weight, e_weight, 0, 0);
3070 recurse_subtotals (s, c, cats, is_missing, d_weight, e_weight, 0, 0);
3076 const struct ctables_summary_spec_set *set;
3081 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3083 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3084 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3085 if (as->function != bs->function)
3086 return as->function > bs->function ? 1 : -1;
3087 else if (as->percentile != bs->percentile)
3088 return as->percentile < bs->percentile ? 1 : -1;
3089 return strcmp (as->label, bs->label);
3092 static struct pivot_value *
3093 ctables_category_create_label (const struct ctables_category *cat,
3094 const struct variable *var,
3095 const union value *value)
3097 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3098 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3099 : cat->type == CCT_POSTCOMPUTE && cat->pc->label
3100 ? pivot_value_new_user_text (cat->pc->label, SIZE_MAX)
3101 : pivot_value_new_var_value (var, value));
3104 static struct ctables_value *
3105 ctables_value_find__ (struct ctables_table *t, const union value *value,
3106 int width, unsigned int hash)
3108 struct ctables_value *clv;
3109 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3110 hash, &t->clabels_values_map)
3111 if (value_equal (value, &clv->value, width))
3117 ctables_value_insert (struct ctables_table *t, const union value *value,
3120 unsigned int hash = value_hash (value, width, 0);
3121 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3124 clv = xmalloc (sizeof *clv);
3125 value_clone (&clv->value, value, width);
3126 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3130 static struct ctables_value *
3131 ctables_value_find (struct ctables_table *t,
3132 const union value *value, int width)
3134 return ctables_value_find__ (t, value, width,
3135 value_hash (value, width, 0));
3139 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3140 size_t ix[PIVOT_N_AXES])
3142 if (a < PIVOT_N_AXES)
3144 size_t limit = MAX (t->stacks[a].n, 1);
3145 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3146 ctables_table_add_section (t, a + 1, ix);
3150 struct ctables_section *s = &t->sections[t->n_sections++];
3151 *s = (struct ctables_section) {
3153 .cells = HMAP_INITIALIZER (s->cells),
3155 for (a = 0; a < PIVOT_N_AXES; a++)
3158 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3160 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3161 for (size_t i = 0; i < nest->n; i++)
3162 hmap_init (&s->occurrences[a][i]);
3164 for (size_t i = 0; i < N_CTDTS; i++)
3165 hmap_init (&s->domains[i]);
3170 ctpo_add (double a, double b)
3176 ctpo_sub (double a, double b)
3182 ctpo_mul (double a, double b)
3188 ctpo_div (double a, double b)
3190 return b ? a / b : SYSMIS;
3194 ctpo_pow (double a, double b)
3196 int save_errno = errno;
3198 double result = pow (a, b);
3206 ctpo_neg (double a, double b UNUSED)
3211 struct ctables_pcexpr_evaluate_ctx
3213 const struct ctables_cell *cell;
3214 const struct ctables_section *section;
3215 const struct ctables_categories *cats;
3216 enum pivot_axis_type pc_a;
3220 static double ctables_pcexpr_evaluate (
3221 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3224 ctables_pcexpr_evaluate_nonterminal (
3225 const struct ctables_pcexpr_evaluate_ctx *ctx,
3226 const struct ctables_pcexpr *e, size_t n_args,
3227 double evaluate (double, double))
3229 double args[2] = { 0, 0 };
3230 for (size_t i = 0; i < n_args; i++)
3232 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3233 if (!isfinite (args[i]) || args[i] == SYSMIS)
3236 return evaluate (args[0], args[1]);
3240 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3241 const struct ctables_cell_value *pc_cv)
3243 const struct ctables_section *s = ctx->section;
3246 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3248 const struct ctables_nest *nest = s->nests[a];
3249 for (size_t i = 0; i < nest->n; i++)
3250 if (i != nest->scale_idx)
3252 const struct ctables_cell_value *cv
3253 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3254 : &ctx->cell->axes[a].cvs[i]);
3255 hash = hash_pointer (cv->category, hash);
3256 if (cv->category->type != CCT_TOTAL
3257 && cv->category->type != CCT_SUBTOTAL
3258 && cv->category->type != CCT_POSTCOMPUTE)
3259 hash = value_hash (&cv->value,
3260 var_get_width (nest->vars[i]), hash);
3264 struct ctables_cell *tc;
3265 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3267 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3269 const struct ctables_nest *nest = s->nests[a];
3270 for (size_t i = 0; i < nest->n; i++)
3271 if (i != nest->scale_idx)
3273 const struct ctables_cell_value *p_cv
3274 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3275 : &ctx->cell->axes[a].cvs[i]);
3276 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3277 if (p_cv->category != t_cv->category
3278 || (p_cv->category->type != CCT_TOTAL
3279 && p_cv->category->type != CCT_SUBTOTAL
3280 && p_cv->category->type != CCT_POSTCOMPUTE
3281 && !value_equal (&p_cv->value,
3283 var_get_width (nest->vars[i]))))
3295 const struct ctables_table *t = s->table;
3296 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3297 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3298 size_t j = 0 /* XXX */;
3299 return ctables_summary_value (tc, &tc->summaries[j], &specs->specs[j]);
3303 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3304 const struct ctables_pcexpr *e)
3311 case CTPO_CAT_RANGE:
3313 struct ctables_cell_value cv = {
3314 .category = ctables_find_category_for_postcompute (ctx->cats, e)
3316 assert (cv.category != NULL);
3318 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3319 const struct ctables_occurrence *o;
3322 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3323 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3324 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3326 cv.value = o->value;
3327 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3332 case CTPO_CAT_NUMBER:
3333 case CTPO_CAT_STRING:
3334 case CTPO_CAT_MISSING:
3335 case CTPO_CAT_OTHERNM:
3336 case CTPO_CAT_SUBTOTAL:
3337 case CTPO_CAT_TOTAL:
3339 struct ctables_cell_value cv = {
3340 .category = ctables_find_category_for_postcompute (ctx->cats, e),
3341 .value = { .f = e->number },
3343 assert (cv.category != NULL);
3344 return ctables_pcexpr_evaluate_category (ctx, &cv);
3348 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3351 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3354 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3357 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3360 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3363 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3370 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3371 const struct ctables_cell *cell)
3373 enum pivot_axis_type pc_a;
3375 const struct ctables_postcompute *pc;
3376 for (pc_a = 0; ; pc_a++)
3378 assert (pc_a < PIVOT_N_AXES);
3379 for (pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3381 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3382 if (cv->category->type == CCT_POSTCOMPUTE)
3384 pc = cv->category->pc;
3391 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3392 const struct ctables_categories *cats = s->table->categories[
3393 var_get_dict_index (var)];
3394 struct ctables_pcexpr_evaluate_ctx ctx = {
3399 .pc_a_idx = pc_a_idx,
3401 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3405 ctables_table_output (struct ctables *ct, struct ctables_table *t)
3407 struct pivot_table *pt = pivot_table_create__ (
3409 ? pivot_value_new_user_text (t->title, SIZE_MAX)
3410 : pivot_value_new_text (N_("Custom Tables"))),
3413 pivot_table_set_caption (
3414 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
3416 pivot_table_set_caption (
3417 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
3419 bool summary_dimension = (t->summary_axis != t->slabels_axis
3420 || (!t->slabels_visible
3421 && t->summary_specs.n > 1));
3422 if (summary_dimension)
3424 struct pivot_dimension *d = pivot_dimension_create (
3425 pt, t->slabels_axis, N_("Statistics"));
3426 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3427 if (!t->slabels_visible)
3428 d->hide_all_labels = true;
3429 for (size_t i = 0; i < specs->n; i++)
3430 pivot_category_create_leaf (
3431 d->root, pivot_value_new_text (specs->specs[i].label));
3434 bool categories_dimension = t->clabels_example != NULL;
3435 if (categories_dimension)
3437 struct pivot_dimension *d = pivot_dimension_create (
3438 pt, t->label_axis[t->clabels_from_axis],
3439 t->clabels_from_axis == PIVOT_AXIS_ROW
3440 ? N_("Row Categories")
3441 : N_("Column Categories"));
3442 const struct variable *var = t->clabels_example;
3443 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
3444 for (size_t i = 0; i < t->n_clabels_values; i++)
3446 const struct ctables_value *value = t->clabels_values[i];
3447 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
3448 assert (cat != NULL);
3449 pivot_category_create_leaf (d->root, ctables_category_create_label (
3450 cat, t->clabels_example, &value->value));
3454 pivot_table_set_look (pt, ct->look);
3455 struct pivot_dimension *d[PIVOT_N_AXES];
3456 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3458 static const char *names[] = {
3459 [PIVOT_AXIS_ROW] = N_("Rows"),
3460 [PIVOT_AXIS_COLUMN] = N_("Columns"),
3461 [PIVOT_AXIS_LAYER] = N_("Layers"),
3463 d[a] = (t->axes[a] || a == t->summary_axis
3464 ? pivot_dimension_create (pt, a, names[a])
3469 assert (t->axes[a]);
3471 for (size_t i = 0; i < t->stacks[a].n; i++)
3473 struct ctables_nest *nest = &t->stacks[a].nests[i];
3474 struct ctables_section **sections = xnmalloc (t->n_sections,
3476 size_t n_sections = 0;
3478 size_t n_total_cells = 0;
3479 size_t max_depth = 0;
3480 for (size_t j = 0; j < t->n_sections; j++)
3481 if (t->sections[j].nests[a] == nest)
3483 struct ctables_section *s = &t->sections[j];
3484 sections[n_sections++] = s;
3485 n_total_cells += s->cells.count;
3487 size_t depth = s->nests[a]->n;
3488 max_depth = MAX (depth, max_depth);
3491 struct ctables_cell **sorted = xnmalloc (n_total_cells,
3493 size_t n_sorted = 0;
3495 for (size_t j = 0; j < n_sections; j++)
3497 struct ctables_section *s = sections[j];
3499 struct ctables_cell *cell;
3500 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3502 sorted[n_sorted++] = cell;
3503 assert (n_sorted <= n_total_cells);
3506 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
3507 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
3509 struct ctables_level
3511 enum ctables_level_type
3513 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
3514 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
3515 CTL_SUMMARY, /* Summary functions. */
3519 enum settings_value_show vlabel; /* CTL_VAR only. */
3522 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
3523 size_t n_levels = 0;
3524 for (size_t k = 0; k < nest->n; k++)
3526 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
3527 if (vlabel != CTVL_NONE)
3529 levels[n_levels++] = (struct ctables_level) {
3531 .vlabel = (enum settings_value_show) vlabel,
3536 if (nest->scale_idx != k
3537 && (k != nest->n - 1 || t->label_axis[a] == a))
3539 levels[n_levels++] = (struct ctables_level) {
3540 .type = CTL_CATEGORY,
3546 if (!summary_dimension && a == t->slabels_axis)
3548 levels[n_levels++] = (struct ctables_level) {
3549 .type = CTL_SUMMARY,
3550 .var_idx = SIZE_MAX,
3554 /* Pivot categories:
3556 - variable label for nest->vars[0], if vlabel != CTVL_NONE
3557 - category for nest->vars[0], if nest->scale_idx != 0
3558 - variable label for nest->vars[1], if vlabel != CTVL_NONE
3559 - category for nest->vars[1], if nest->scale_idx != 1
3561 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
3562 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
3563 - summary function, if 'a == t->slabels_axis && a ==
3566 Additional dimensions:
3568 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
3570 - If 't->label_axis[b] == a' for some 'b != a', add a category
3575 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
3577 for (size_t j = 0; j < n_sorted; j++)
3579 struct ctables_cell *cell = sorted[j];
3580 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
3582 size_t n_common = 0;
3585 for (; n_common < n_levels; n_common++)
3587 const struct ctables_level *level = &levels[n_common];
3588 if (level->type == CTL_CATEGORY)
3590 size_t var_idx = level->var_idx;
3591 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
3592 if (prev->axes[a].cvs[var_idx].category != c)
3594 else if (c->type != CCT_SUBTOTAL
3595 && c->type != CCT_TOTAL
3596 && c->type != CCT_POSTCOMPUTE
3597 && !value_equal (&prev->axes[a].cvs[var_idx].value,
3598 &cell->axes[a].cvs[var_idx].value,
3599 var_get_type (nest->vars[var_idx])))
3605 for (size_t k = n_common; k < n_levels; k++)
3607 const struct ctables_level *level = &levels[k];
3608 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
3609 if (level->type == CTL_SUMMARY)
3611 assert (k == n_levels - 1);
3613 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3614 for (size_t m = 0; m < specs->n; m++)
3616 int leaf = pivot_category_create_leaf (
3617 parent, pivot_value_new_text (specs->specs[m].label));
3624 const struct variable *var = nest->vars[level->var_idx];
3625 struct pivot_value *label;
3626 if (level->type == CTL_VAR)
3628 label = pivot_value_new_variable (var);
3629 label->variable.show = level->vlabel;
3631 else if (level->type == CTL_CATEGORY)
3633 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
3634 label = ctables_category_create_label (cv->category,
3640 if (k == n_levels - 1)
3641 prev_leaf = pivot_category_create_leaf (parent, label);
3643 groups[k] = pivot_category_create_group__ (parent, label);
3647 cell->axes[a].leaf = prev_leaf;
3654 for (size_t i = 0; i < t->n_sections; i++)
3656 struct ctables_section *s = &t->sections[i];
3658 struct ctables_cell *cell;
3659 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3664 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3665 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
3666 for (size_t j = 0; j < specs->n; j++)
3669 size_t n_dindexes = 0;
3671 if (summary_dimension)
3672 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
3674 if (categories_dimension)
3676 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
3677 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
3678 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
3679 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
3682 dindexes[n_dindexes++] = ctv->leaf;
3685 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3688 int leaf = cell->axes[a].leaf;
3689 if (a == t->summary_axis && !summary_dimension)
3691 dindexes[n_dindexes++] = leaf;
3694 const struct ctables_summary_spec *ss = &specs->specs[j];
3696 double d = (cell->postcompute
3697 ? ctables_cell_calculate_postcompute (s, cell)
3698 : ctables_summary_value (cell, &cell->summaries[j], ss));
3699 struct pivot_value *value;
3700 if (ct->hide_threshold != 0
3701 && d < ct->hide_threshold
3702 && (cell->postcompute
3704 : ctables_summary_function_is_count (ss->function)))
3706 value = pivot_value_new_user_text_nocopy (
3707 xasprintf ("<%d", ct->hide_threshold));
3709 else if (d == 0 && ct->zero)
3710 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
3711 else if (d == SYSMIS && ct->missing)
3712 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
3713 else if (specs->specs[j].is_ctables_format)
3715 char *s = data_out_stretchy (&(union value) { .f = d },
3717 &specs->specs[j].format,
3718 &ct->ctables_formats, NULL);
3719 value = pivot_value_new_user_text_nocopy (s);
3723 value = pivot_value_new_number (d);
3724 value->numeric.format = specs->specs[j].format;
3726 pivot_table_put (pt, dindexes, n_dindexes, value);
3731 pivot_table_submit (pt);
3735 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
3737 enum pivot_axis_type label_pos = t->label_axis[a];
3741 t->clabels_from_axis = a;
3743 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
3744 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
3746 const struct ctables_stack *stack = &t->stacks[a];
3750 const struct ctables_nest *n0 = &stack->nests[0];
3752 const struct variable *v0 = n0->vars[n0->n - 1];
3753 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
3754 t->clabels_example = v0;
3756 for (size_t i = 0; i < c0->n_cats; i++)
3757 if (c0->cats[i].type == CCT_FUNCTION)
3759 msg (SE, _("%s=%s is not allowed with sorting based "
3760 "on a summary function."),
3761 subcommand_name, pos_name);
3764 if (n0->n - 1 == n0->scale_idx)
3766 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
3767 "but %s is a scale variable."),
3768 subcommand_name, pos_name, var_get_name (v0));
3772 for (size_t i = 1; i < stack->n; i++)
3774 const struct ctables_nest *ni = &stack->nests[i];
3776 const struct variable *vi = ni->vars[ni->n - 1];
3777 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
3779 if (ni->n - 1 == ni->scale_idx)
3781 msg (SE, _("%s=%s requires the variables to be moved to be "
3782 "categorical, but %s is a scale variable."),
3783 subcommand_name, pos_name, var_get_name (vi));
3786 if (var_get_width (v0) != var_get_width (vi))
3788 msg (SE, _("%s=%s requires the variables to be "
3789 "moved to have the same width, but %s has "
3790 "width %d and %s has width %d."),
3791 subcommand_name, pos_name,
3792 var_get_name (v0), var_get_width (v0),
3793 var_get_name (vi), var_get_width (vi));
3796 if (!val_labs_equal (var_get_value_labels (v0),
3797 var_get_value_labels (vi)))
3799 msg (SE, _("%s=%s requires the variables to be "
3800 "moved to have the same value labels, but %s "
3801 "and %s have different value labels."),
3802 subcommand_name, pos_name,
3803 var_get_name (v0), var_get_name (vi));
3806 if (!ctables_categories_equal (c0, ci))
3808 msg (SE, _("%s=%s requires the variables to be "
3809 "moved to have the same category "
3810 "specifications, but %s and %s have different "
3811 "category specifications."),
3812 subcommand_name, pos_name,
3813 var_get_name (v0), var_get_name (vi));
3822 ctables_prepare_table (struct ctables_table *t)
3824 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3827 t->stacks[a] = enumerate_fts (a, t->axes[a]);
3829 for (size_t j = 0; j < t->stacks[a].n; j++)
3831 struct ctables_nest *nest = &t->stacks[a].nests[j];
3832 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3834 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
3835 nest->n_domains[dt] = 0;
3837 for (size_t k = 0; k < nest->n; k++)
3839 if (k == nest->scale_idx)
3848 if (a != PIVOT_AXIS_LAYER)
3855 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
3856 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
3857 : a == PIVOT_AXIS_ROW)
3859 if (k == nest->n - 1
3860 || (nest->scale_idx == nest->n - 1
3861 && k == nest->n - 2))
3867 if (a == PIVOT_AXIS_COLUMN)
3872 if (a == PIVOT_AXIS_ROW)
3877 nest->domains[dt][nest->n_domains[dt]++] = k;
3884 struct ctables_nest *nest = xmalloc (sizeof *nest);
3885 *nest = (struct ctables_nest) { .n = 0 };
3886 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
3889 struct ctables_stack *stack = &t->stacks[t->summary_axis];
3890 for (size_t i = 0; i < stack->n; i++)
3892 struct ctables_nest *nest = &stack->nests[i];
3893 if (!nest->specs[CSV_CELL].n)
3895 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
3896 specs->specs = xmalloc (sizeof *specs->specs);
3899 enum ctables_summary_function function
3900 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
3901 struct ctables_var var = { .is_mrset = false, .var = specs->var };
3903 *specs->specs = (struct ctables_summary_spec) {
3904 .function = function,
3905 .format = ctables_summary_default_format (function, &var),
3906 .label = ctables_summary_default_label (function, 0),
3909 specs->var = nest->vars[0];
3911 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
3912 &nest->specs[CSV_CELL]);
3914 else if (!nest->specs[CSV_TOTAL].n)
3915 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
3916 &nest->specs[CSV_CELL]);
3919 struct ctables_summary_spec_set *merged = &t->summary_specs;
3920 struct merge_item *items = xnmalloc (2 * stack->n, sizeof *items);
3922 for (size_t j = 0; j < stack->n; j++)
3924 const struct ctables_nest *nest = &stack->nests[j];
3926 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
3927 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
3932 struct merge_item min = items[0];
3933 for (size_t j = 1; j < n_left; j++)
3934 if (merge_item_compare_3way (&items[j], &min) < 0)
3937 if (merged->n >= merged->allocated)
3938 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
3939 sizeof *merged->specs);
3940 merged->specs[merged->n++] = min.set->specs[min.ofs];
3942 for (size_t j = 0; j < n_left; )
3944 if (merge_item_compare_3way (&items[j], &min) == 0)
3946 struct merge_item *item = &items[j];
3947 item->set->specs[item->ofs].axis_idx = merged->n - 1;
3948 if (++item->ofs >= item->set->n)
3950 items[j] = items[--n_left];
3959 for (size_t j = 0; j < merged->n; j++)
3960 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
3962 for (size_t j = 0; j < stack->n; j++)
3964 const struct ctables_nest *nest = &stack->nests[j];
3965 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
3967 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
3968 for (size_t k = 0; k < specs->n; k++)
3969 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
3970 specs->specs[k].axis_idx);
3976 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
3977 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
3981 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
3982 enum pivot_axis_type a)
3984 struct ctables_stack *stack = &t->stacks[a];
3985 for (size_t i = 0; i < stack->n; i++)
3987 const struct ctables_nest *nest = &stack->nests[i];
3988 const struct variable *var = nest->vars[nest->n - 1];
3989 const union value *value = case_data (c, var);
3991 if (var_is_numeric (var) && value->f == SYSMIS)
3994 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
3996 ctables_value_insert (t, value, var_get_width (var));
4001 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4003 const struct ctables_value *const *ap = a_;
4004 const struct ctables_value *const *bp = b_;
4005 const struct ctables_value *a = *ap;
4006 const struct ctables_value *b = *bp;
4007 const int *width = width_;
4008 return value_compare_3way (&a->value, &b->value, *width);
4012 ctables_sort_clabels_values (struct ctables_table *t)
4014 const struct variable *v0 = t->clabels_example;
4015 int width = var_get_width (v0);
4017 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4020 const struct val_labs *val_labs = var_get_value_labels (v0);
4021 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4022 vl = val_labs_next (val_labs, vl))
4023 if (ctables_categories_match (c0, &vl->value, v0))
4024 ctables_value_insert (t, &vl->value, width);
4027 size_t n = hmap_count (&t->clabels_values_map);
4028 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4030 struct ctables_value *clv;
4032 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4033 t->clabels_values[i++] = clv;
4034 t->n_clabels_values = n;
4037 sort (t->clabels_values, n, sizeof *t->clabels_values,
4038 compare_clabels_values_3way, &width);
4040 for (size_t i = 0; i < n; i++)
4041 t->clabels_values[i]->leaf = i;
4045 ctables_add_category_occurrences (const struct variable *var,
4046 struct hmap *occurrences,
4047 const struct ctables_categories *cats)
4049 const struct val_labs *val_labs = var_get_value_labels (var);
4051 for (size_t i = 0; i < cats->n_cats; i++)
4053 const struct ctables_category *c = &cats->cats[i];
4057 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4065 assert (var_is_numeric (var));
4066 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4067 vl = val_labs_next (val_labs, vl))
4068 if (vl->value.f >= c->range[0] && vl->value.f <= c->range[1])
4069 ctables_add_occurrence (var, &vl->value, occurrences);
4073 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4074 vl = val_labs_next (val_labs, vl))
4075 if (var_is_value_missing (var, &vl->value))
4076 ctables_add_occurrence (var, &vl->value, occurrences);
4080 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4081 vl = val_labs_next (val_labs, vl))
4082 ctables_add_occurrence (var, &vl->value, occurrences);
4085 case CCT_POSTCOMPUTE:
4095 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4096 vl = val_labs_next (val_labs, vl))
4097 if (c->include_missing || !var_is_value_missing (var, &vl->value))
4098 ctables_add_occurrence (var, &vl->value, occurrences);
4101 case CCT_EXCLUDED_MISSING:
4108 ctables_section_recurse_add_empty_categories (
4109 struct ctables_section *s,
4110 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
4111 enum pivot_axis_type a, size_t a_idx)
4113 if (a >= PIVOT_N_AXES)
4114 ctables_cell_insert__ (s, c, cats);
4115 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
4116 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
4119 const struct variable *var = s->nests[a]->vars[a_idx];
4120 const struct ctables_categories *categories = s->table->categories[
4121 var_get_dict_index (var)];
4122 int width = var_get_width (var);
4123 const struct hmap *occurrences = &s->occurrences[a][a_idx];
4124 const struct ctables_occurrence *o;
4125 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4127 union value *value = case_data_rw (c, var);
4128 value_destroy (value, width);
4129 value_clone (value, &o->value, width);
4130 cats[a][a_idx] = ctables_categories_match (categories, value, var);
4131 assert (cats[a][a_idx] != NULL);
4132 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4135 for (size_t i = 0; i < categories->n_cats; i++)
4137 const struct ctables_category *cat = &categories->cats[i];
4138 if (cat->type == CCT_POSTCOMPUTE)
4140 cats[a][a_idx] = cat;
4141 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4148 ctables_section_add_empty_categories (struct ctables_section *s)
4150 bool show_empty = false;
4151 for (size_t a = 0; a < PIVOT_N_AXES; a++)
4153 for (size_t k = 0; k < s->nests[a]->n; k++)
4154 if (k != s->nests[a]->scale_idx)
4156 const struct variable *var = s->nests[a]->vars[k];
4157 const struct ctables_categories *cats = s->table->categories[
4158 var_get_dict_index (var)];
4159 if (cats->show_empty)
4162 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
4168 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
4169 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
4170 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
4175 ctables_execute (struct dataset *ds, struct ctables *ct)
4177 for (size_t i = 0; i < ct->n_tables; i++)
4179 struct ctables_table *t = ct->tables[i];
4180 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
4181 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
4182 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
4183 sizeof *t->sections);
4184 size_t ix[PIVOT_N_AXES];
4185 ctables_table_add_section (t, 0, ix);
4188 struct casereader *input = proc_open (ds);
4189 bool warn_on_invalid = true;
4190 for (struct ccase *c = casereader_read (input); c;
4191 case_unref (c), c = casereader_read (input))
4193 double d_weight = dict_get_case_weight (dataset_dict (ds), c,
4195 double e_weight = (ct->e_weight
4196 ? var_force_valid_weight (ct->e_weight,
4197 case_num (c, ct->e_weight),
4201 for (size_t i = 0; i < ct->n_tables; i++)
4203 struct ctables_table *t = ct->tables[i];
4205 for (size_t j = 0; j < t->n_sections; j++)
4206 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
4208 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4209 if (t->label_axis[a] != a)
4210 ctables_insert_clabels_values (t, c, a);
4213 casereader_destroy (input);
4215 for (size_t i = 0; i < ct->n_tables; i++)
4217 struct ctables_table *t = ct->tables[i];
4219 if (t->clabels_example)
4220 ctables_sort_clabels_values (t);
4222 for (size_t j = 0; j < t->n_sections; j++)
4223 ctables_section_add_empty_categories (&t->sections[j]);
4225 ctables_table_output (ct, ct->tables[i]);
4227 return proc_commit (ds);
4232 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *);
4235 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
4241 case CTPO_CAT_STRING:
4251 for (size_t i = 0; i < 2; i++)
4252 ctables_pcexpr_destroy (e->subs[i]);
4256 case CTPO_CAT_NUMBER:
4257 case CTPO_CAT_RANGE:
4258 case CTPO_CAT_MISSING:
4259 case CTPO_CAT_OTHERNM:
4260 case CTPO_CAT_SUBTOTAL:
4261 case CTPO_CAT_TOTAL:
4265 msg_location_destroy (e->location);
4270 static struct ctables_pcexpr *
4271 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
4272 struct ctables_pcexpr *sub0,
4273 struct ctables_pcexpr *sub1)
4275 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4276 *e = (struct ctables_pcexpr) {
4278 .subs = { sub0, sub1 },
4279 .location = msg_location_merged (sub0->location, sub1->location),
4284 /* How to parse an operator. */
4287 enum token_type token;
4288 enum ctables_postcompute_op op;
4291 static const struct operator *
4292 match_operator (struct lexer *lexer, const struct operator ops[], size_t n_ops)
4294 for (const struct operator *op = ops; op < ops + n_ops; op++)
4295 if (lex_token (lexer) == op->token)
4297 if (op->token != T_NEG_NUM)
4306 static struct ctables_pcexpr *
4307 parse_binary_operators__ (struct lexer *lexer,
4308 const struct operator ops[], size_t n_ops,
4309 parse_recursively_func *parse_next_level,
4310 const char *chain_warning,
4311 struct ctables_pcexpr *lhs)
4313 for (int op_count = 0; ; op_count++)
4315 const struct operator *op = match_operator (lexer, ops, n_ops);
4318 if (op_count > 1 && chain_warning)
4319 msg_at (SW, lhs->location, "%s", chain_warning);
4324 struct ctables_pcexpr *rhs = parse_next_level (lexer);
4327 ctables_pcexpr_destroy (lhs);
4331 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
4335 static struct ctables_pcexpr *
4336 parse_binary_operators (struct lexer *lexer,
4337 const struct operator ops[], size_t n_ops,
4338 parse_recursively_func *parse_next_level,
4339 const char *chain_warning)
4341 struct ctables_pcexpr *lhs = parse_next_level (lexer);
4345 return parse_binary_operators__ (lexer, ops, n_ops, parse_next_level,
4346 chain_warning, lhs);
4349 static struct ctables_pcexpr *parse_add (struct lexer *);
4351 static struct ctables_pcexpr
4352 ctpo_cat_range (double low, double high)
4354 return (struct ctables_pcexpr) {
4355 .op = CTPO_CAT_RANGE,
4356 .range = { low, high },
4360 static struct ctables_pcexpr *
4361 parse_primary (struct lexer *lexer)
4363 int start_ofs = lex_ofs (lexer);
4364 struct ctables_pcexpr e;
4365 if (lex_is_number (lexer))
4367 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
4368 .number = lex_number (lexer) };
4371 else if (lex_match_id (lexer, "MISSING"))
4372 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
4373 else if (lex_match_id (lexer, "OTHERNM"))
4374 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
4375 else if (lex_match_id (lexer, "TOTAL"))
4376 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
4377 else if (lex_match_id (lexer, "SUBTOTAL"))
4379 size_t subtotal_index = 0;
4380 if (lex_match (lexer, T_LBRACK))
4382 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
4384 subtotal_index = lex_integer (lexer);
4386 if (!lex_force_match (lexer, T_RBRACK))
4389 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
4390 .subtotal_index = subtotal_index };
4392 else if (lex_match (lexer, T_LBRACK))
4394 if (lex_match_id (lexer, "LO"))
4396 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
4398 e = ctpo_cat_range (-DBL_MAX, lex_number (lexer));
4401 else if (lex_is_number (lexer))
4403 double number = lex_number (lexer);
4405 if (lex_match_id (lexer, "THRU"))
4407 if (lex_match_id (lexer, "HI"))
4408 e = ctpo_cat_range (number, DBL_MAX);
4411 if (!lex_force_num (lexer))
4413 e = ctpo_cat_range (number, lex_number (lexer));
4418 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
4421 else if (lex_is_string (lexer))
4423 e = (struct ctables_pcexpr) {
4424 .op = CTPO_CAT_STRING,
4425 .string = ss_xstrdup (lex_tokss (lexer)),
4431 lex_error (lexer, NULL);
4435 if (!lex_force_match (lexer, T_RBRACK))
4437 if (e.op == CTPO_CAT_STRING)
4442 else if (lex_match (lexer, T_LPAREN))
4444 struct ctables_pcexpr *ep = parse_add (lexer);
4447 if (!lex_force_match (lexer, T_RPAREN))
4449 ctables_pcexpr_destroy (ep);
4456 lex_error (lexer, NULL);
4460 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4461 return xmemdup (&e, sizeof e);
4464 static struct ctables_pcexpr *
4465 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
4466 struct lexer *lexer, int start_ofs)
4468 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4469 *e = (struct ctables_pcexpr) {
4472 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
4477 static struct ctables_pcexpr *
4478 parse_exp (struct lexer *lexer)
4480 static const struct operator op = { T_EXP, CTPO_POW };
4482 const char *chain_warning =
4483 _("The exponentiation operator (`**') is left-associative: "
4484 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
4485 "To disable this warning, insert parentheses.");
4487 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
4488 return parse_binary_operators (lexer, &op, 1,
4489 parse_primary, chain_warning);
4491 /* Special case for situations like "-5**6", which must be parsed as
4494 int start_ofs = lex_ofs (lexer);
4495 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
4496 *lhs = (struct ctables_pcexpr) {
4497 .op = CTPO_CONSTANT,
4498 .number = -lex_tokval (lexer),
4499 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
4503 struct ctables_pcexpr *node = parse_binary_operators__ (
4504 lexer, &op, 1, parse_primary, chain_warning, lhs);
4508 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
4511 /* Parses the unary minus level. */
4512 static struct ctables_pcexpr *
4513 parse_neg (struct lexer *lexer)
4515 int start_ofs = lex_ofs (lexer);
4516 if (!lex_match (lexer, T_DASH))
4517 return parse_exp (lexer);
4519 struct ctables_pcexpr *inner = parse_neg (lexer);
4523 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
4526 /* Parses the multiplication and division level. */
4527 static struct ctables_pcexpr *
4528 parse_mul (struct lexer *lexer)
4530 static const struct operator ops[] =
4532 { T_ASTERISK, CTPO_MUL },
4533 { T_SLASH, CTPO_DIV },
4536 return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops,
4540 /* Parses the addition and subtraction level. */
4541 static struct ctables_pcexpr *
4542 parse_add (struct lexer *lexer)
4544 static const struct operator ops[] =
4546 { T_PLUS, CTPO_ADD },
4547 { T_DASH, CTPO_SUB },
4548 { T_NEG_NUM, CTPO_ADD },
4551 return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops,
4555 static struct ctables_postcompute *
4556 ctables_find_postcompute (struct ctables *ct, const char *name)
4558 struct ctables_postcompute *pc;
4559 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
4560 utf8_hash_case_string (name, 0), &ct->postcomputes)
4561 if (!utf8_strcasecmp (pc->name, name))
4567 ctables_parse_pcompute (struct lexer *lexer, struct ctables *ct)
4569 int pcompute_start = lex_ofs (lexer) - 1;
4571 if (!lex_force_match (lexer, T_AND) || !lex_force_id (lexer))
4574 char *name = ss_xstrdup (lex_tokss (lexer));
4577 if (!lex_force_match (lexer, T_EQUALS)
4578 || !lex_force_match_id (lexer, "EXPR")
4579 || !lex_force_match (lexer, T_LPAREN))
4585 int expr_start = lex_ofs (lexer);
4586 struct ctables_pcexpr *expr = parse_add (lexer);
4587 int expr_end = lex_ofs (lexer) - 1;
4588 if (!expr || !lex_force_match (lexer, T_RPAREN))
4593 int pcompute_end = lex_ofs (lexer) - 1;
4595 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
4598 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
4601 msg_at (SW, location, _("New definition of &%s will override the "
4602 "previous definition."),
4604 msg_at (SN, pc->location, _("This is the previous definition."));
4606 ctables_pcexpr_destroy (pc->expr);
4607 msg_location_destroy (pc->location);
4612 pc = xmalloc (sizeof *pc);
4613 *pc = (struct ctables_postcompute) { .name = name };
4614 hmap_insert (&ct->postcomputes, &pc->hmap_node,
4615 utf8_hash_case_string (pc->name, 0));
4618 pc->location = location;
4620 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
4625 ctables_parse_pproperties_format (struct lexer *lexer,
4626 struct ctables_summary_spec_set *sss)
4628 *sss = (struct ctables_summary_spec_set) { .n = 0 };
4630 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
4631 && !(lex_token (lexer) == T_ID
4632 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
4633 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
4634 lex_tokss (lexer)))))
4636 /* Parse function. */
4637 enum ctables_summary_function function;
4638 if (!parse_ctables_summary_function (lexer, &function))
4641 /* Parse percentile. */
4642 double percentile = 0;
4643 if (function == CTSF_PTILE)
4645 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
4647 percentile = lex_number (lexer);
4652 struct fmt_spec format;
4653 if (!parse_format_specifier (lexer, &format)
4654 || !fmt_check_output (&format)
4655 || !fmt_check_type_compat (&format, VAL_NUMERIC))
4658 if (sss->n >= sss->allocated)
4659 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
4660 sizeof *sss->specs);
4661 sss->specs[sss->n++] = (struct ctables_summary_spec) {
4662 .function = function,
4663 .percentile = percentile,
4670 ctables_summary_spec_set_uninit (sss);
4675 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
4677 struct ctables_postcompute **pcs = NULL;
4679 size_t allocated_pcs = 0;
4681 while (lex_match (lexer, T_AND))
4683 if (!lex_force_id (lexer))
4685 struct ctables_postcompute *pc
4686 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
4689 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
4694 if (n_pcs >= allocated_pcs)
4695 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
4699 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
4701 if (lex_match_id (lexer, "LABEL"))
4703 lex_match (lexer, T_EQUALS);
4704 if (!lex_force_string (lexer))
4707 for (size_t i = 0; i < n_pcs; i++)
4709 free (pcs[i]->label);
4710 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
4715 else if (lex_match_id (lexer, "FORMAT"))
4717 lex_match (lexer, T_EQUALS);
4719 struct ctables_summary_spec_set sss;
4720 if (!ctables_parse_pproperties_format (lexer, &sss))
4723 for (size_t i = 0; i < n_pcs; i++)
4726 ctables_summary_spec_set_uninit (pcs[i]->specs);
4728 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
4729 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
4731 ctables_summary_spec_set_uninit (&sss);
4733 else if (lex_match_id (lexer, "HIDESOURCECATS"))
4735 lex_match (lexer, T_EQUALS);
4736 bool hide_source_cats;
4737 if (!parse_bool (lexer, &hide_source_cats))
4739 for (size_t i = 0; i < n_pcs; i++)
4740 pcs[i]->hide_source_cats = hide_source_cats;
4744 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
4757 cmd_ctables (struct lexer *lexer, struct dataset *ds)
4759 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
4760 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
4761 enum settings_value_show tvars = settings_get_show_variables ();
4762 for (size_t i = 0; i < n_vars; i++)
4763 vlabels[i] = (enum ctables_vlabel) tvars;
4765 struct pivot_table_look *look = pivot_table_look_unshare (
4766 pivot_table_look_ref (pivot_table_look_get_default ()));
4767 look->omit_empty = false;
4769 struct ctables *ct = xmalloc (sizeof *ct);
4770 *ct = (struct ctables) {
4771 .dict = dataset_dict (ds),
4773 .ctables_formats = FMT_SETTINGS_INIT,
4775 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
4781 const char *dot_string;
4782 const char *comma_string;
4784 static const struct ctf ctfs[4] = {
4785 { CTEF_NEGPAREN, "(,,,)", "(...)" },
4786 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
4787 { CTEF_PAREN, "-,(,),", "-.(.)." },
4788 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
4790 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
4791 for (size_t i = 0; i < 4; i++)
4793 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
4794 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
4795 fmt_number_style_from_string (s));
4798 if (!lex_force_match (lexer, T_SLASH))
4801 while (!lex_match_id (lexer, "TABLE"))
4803 if (lex_match_id (lexer, "FORMAT"))
4805 double widths[2] = { SYSMIS, SYSMIS };
4806 double units_per_inch = 72.0;
4808 while (lex_token (lexer) != T_SLASH)
4810 if (lex_match_id (lexer, "MINCOLWIDTH"))
4812 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
4815 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
4817 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
4820 else if (lex_match_id (lexer, "UNITS"))
4822 lex_match (lexer, T_EQUALS);
4823 if (lex_match_id (lexer, "POINTS"))
4824 units_per_inch = 72.0;
4825 else if (lex_match_id (lexer, "INCHES"))
4826 units_per_inch = 1.0;
4827 else if (lex_match_id (lexer, "CM"))
4828 units_per_inch = 2.54;
4831 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
4835 else if (lex_match_id (lexer, "EMPTY"))
4840 lex_match (lexer, T_EQUALS);
4841 if (lex_match_id (lexer, "ZERO"))
4843 /* Nothing to do. */
4845 else if (lex_match_id (lexer, "BLANK"))
4846 ct->zero = xstrdup ("");
4847 else if (lex_force_string (lexer))
4849 ct->zero = ss_xstrdup (lex_tokss (lexer));
4855 else if (lex_match_id (lexer, "MISSING"))
4857 lex_match (lexer, T_EQUALS);
4858 if (!lex_force_string (lexer))
4862 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
4863 ? ss_xstrdup (lex_tokss (lexer))
4869 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
4870 "UNITS", "EMPTY", "MISSING");
4875 if (widths[0] != SYSMIS && widths[1] != SYSMIS
4876 && widths[0] > widths[1])
4878 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
4882 for (size_t i = 0; i < 2; i++)
4883 if (widths[i] != SYSMIS)
4885 int *wr = ct->look->width_ranges[TABLE_HORZ];
4886 wr[i] = widths[i] / units_per_inch * 96.0;
4891 else if (lex_match_id (lexer, "VLABELS"))
4893 if (!lex_force_match_id (lexer, "VARIABLES"))
4895 lex_match (lexer, T_EQUALS);
4897 struct variable **vars;
4899 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
4903 if (!lex_force_match_id (lexer, "DISPLAY"))
4908 lex_match (lexer, T_EQUALS);
4910 enum ctables_vlabel vlabel;
4911 if (lex_match_id (lexer, "DEFAULT"))
4912 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
4913 else if (lex_match_id (lexer, "NAME"))
4915 else if (lex_match_id (lexer, "LABEL"))
4916 vlabel = CTVL_LABEL;
4917 else if (lex_match_id (lexer, "BOTH"))
4919 else if (lex_match_id (lexer, "NONE"))
4923 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
4929 for (size_t i = 0; i < n_vars; i++)
4930 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
4933 else if (lex_match_id (lexer, "MRSETS"))
4935 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
4937 lex_match (lexer, T_EQUALS);
4938 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
4941 else if (lex_match_id (lexer, "SMISSING"))
4943 if (lex_match_id (lexer, "VARIABLE"))
4944 ct->smissing_listwise = false;
4945 else if (lex_match_id (lexer, "LISTWISE"))
4946 ct->smissing_listwise = true;
4949 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
4953 else if (lex_match_id (lexer, "PCOMPUTE"))
4955 if (!ctables_parse_pcompute (lexer, ct))
4958 else if (lex_match_id (lexer, "PPROPERTIES"))
4960 if (!ctables_parse_pproperties (lexer, ct))
4963 else if (lex_match_id (lexer, "WEIGHT"))
4965 if (!lex_force_match_id (lexer, "VARIABLE"))
4967 lex_match (lexer, T_EQUALS);
4968 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
4972 else if (lex_match_id (lexer, " HIDESMALLCOUNTS"))
4974 if (lex_match_id (lexer, "COUNT"))
4976 lex_match (lexer, T_EQUALS);
4977 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
4980 ct->hide_threshold = lex_integer (lexer);
4983 else if (ct->hide_threshold == 0)
4984 ct->hide_threshold = 5;
4988 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
4989 "SMISSING", "PCOMPUTE", "PPROPERTIES",
4990 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
4994 if (!lex_force_match (lexer, T_SLASH))
4998 size_t allocated_tables = 0;
5001 if (ct->n_tables >= allocated_tables)
5002 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
5003 sizeof *ct->tables);
5005 struct ctables_category *cat = xmalloc (sizeof *cat);
5006 *cat = (struct ctables_category) {
5008 .include_missing = false,
5009 .sort_ascending = true,
5012 struct ctables_categories *c = xmalloc (sizeof *c);
5013 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5014 *c = (struct ctables_categories) {
5021 struct ctables_categories **categories = xnmalloc (n_vars,
5022 sizeof *categories);
5023 for (size_t i = 0; i < n_vars; i++)
5026 struct ctables_table *t = xmalloc (sizeof *t);
5027 *t = (struct ctables_table) {
5029 .slabels_axis = PIVOT_AXIS_COLUMN,
5030 .slabels_visible = true,
5031 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
5033 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
5034 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
5035 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
5037 .clabels_from_axis = PIVOT_AXIS_LAYER,
5038 .categories = categories,
5039 .n_categories = n_vars,
5042 ct->tables[ct->n_tables++] = t;
5044 lex_match (lexer, T_EQUALS);
5045 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
5047 if (lex_match (lexer, T_BY))
5049 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5050 ct, t, PIVOT_AXIS_COLUMN))
5053 if (lex_match (lexer, T_BY))
5055 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5056 ct, t, PIVOT_AXIS_LAYER))
5061 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
5062 && !t->axes[PIVOT_AXIS_LAYER])
5064 lex_error (lexer, _("At least one variable must be specified."));
5068 const struct ctables_axis *scales[PIVOT_N_AXES];
5069 size_t n_scales = 0;
5070 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5072 scales[a] = find_scale (t->axes[a]);
5078 msg (SE, _("Scale variables may appear only on one axis."));
5079 if (scales[PIVOT_AXIS_ROW])
5080 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
5081 _("This scale variable appears on the rows axis."));
5082 if (scales[PIVOT_AXIS_COLUMN])
5083 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
5084 _("This scale variable appears on the columns axis."));
5085 if (scales[PIVOT_AXIS_LAYER])
5086 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
5087 _("This scale variable appears on the layer axis."));
5091 const struct ctables_axis *summaries[PIVOT_N_AXES];
5092 size_t n_summaries = 0;
5093 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5095 summaries[a] = (scales[a]
5097 : find_categorical_summary_spec (t->axes[a]));
5101 if (n_summaries > 1)
5103 msg (SE, _("Summaries may appear only on one axis."));
5104 if (summaries[PIVOT_AXIS_ROW])
5105 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
5106 _("This variable on the rows axis has a summary."));
5107 if (summaries[PIVOT_AXIS_COLUMN])
5108 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
5109 _("This variable on the columns axis has a summary."));
5110 if (summaries[PIVOT_AXIS_LAYER])
5111 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
5112 _("This variable on the layers axis has a summary."));
5115 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5116 if (n_summaries ? summaries[a] : t->axes[a])
5118 t->summary_axis = a;
5122 if (lex_token (lexer) == T_ENDCMD)
5124 if (!ctables_prepare_table (t))
5128 if (!lex_force_match (lexer, T_SLASH))
5131 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
5133 if (lex_match_id (lexer, "SLABELS"))
5135 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5137 if (lex_match_id (lexer, "POSITION"))
5139 lex_match (lexer, T_EQUALS);
5140 if (lex_match_id (lexer, "COLUMN"))
5141 t->slabels_axis = PIVOT_AXIS_COLUMN;
5142 else if (lex_match_id (lexer, "ROW"))
5143 t->slabels_axis = PIVOT_AXIS_ROW;
5144 else if (lex_match_id (lexer, "LAYER"))
5145 t->slabels_axis = PIVOT_AXIS_LAYER;
5148 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
5152 else if (lex_match_id (lexer, "VISIBLE"))
5154 lex_match (lexer, T_EQUALS);
5155 if (!parse_bool (lexer, &t->slabels_visible))
5160 lex_error_expecting (lexer, "POSITION", "VISIBLE");
5165 else if (lex_match_id (lexer, "CLABELS"))
5167 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5169 if (lex_match_id (lexer, "AUTO"))
5171 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
5172 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
5174 else if (lex_match_id (lexer, "ROWLABELS"))
5176 lex_match (lexer, T_EQUALS);
5177 if (lex_match_id (lexer, "OPPOSITE"))
5178 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
5179 else if (lex_match_id (lexer, "LAYER"))
5180 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
5183 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5187 else if (lex_match_id (lexer, "COLLABELS"))
5189 lex_match (lexer, T_EQUALS);
5190 if (lex_match_id (lexer, "OPPOSITE"))
5191 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
5192 else if (lex_match_id (lexer, "LAYER"))
5193 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
5196 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5202 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
5208 else if (lex_match_id (lexer, "CRITERIA"))
5210 if (!lex_force_match_id (lexer, "CILEVEL"))
5212 lex_match (lexer, T_EQUALS);
5214 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
5216 t->cilevel = lex_number (lexer);
5219 else if (lex_match_id (lexer, "CATEGORIES"))
5221 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
5225 else if (lex_match_id (lexer, "TITLES"))
5230 if (lex_match_id (lexer, "CAPTION"))
5231 textp = &t->caption;
5232 else if (lex_match_id (lexer, "CORNER"))
5234 else if (lex_match_id (lexer, "TITLE"))
5238 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
5241 lex_match (lexer, T_EQUALS);
5243 struct string s = DS_EMPTY_INITIALIZER;
5244 while (lex_is_string (lexer))
5246 if (!ds_is_empty (&s))
5247 ds_put_byte (&s, ' ');
5248 ds_put_substring (&s, lex_tokss (lexer));
5252 *textp = ds_steal_cstr (&s);
5254 while (lex_token (lexer) != T_SLASH
5255 && lex_token (lexer) != T_ENDCMD);
5257 else if (lex_match_id (lexer, "SIGTEST"))
5261 t->chisq = xmalloc (sizeof *t->chisq);
5262 *t->chisq = (struct ctables_chisq) {
5264 .include_mrsets = true,
5265 .all_visible = true,
5271 if (lex_match_id (lexer, "TYPE"))
5273 lex_match (lexer, T_EQUALS);
5274 if (!lex_force_match_id (lexer, "CHISQUARE"))
5277 else if (lex_match_id (lexer, "ALPHA"))
5279 lex_match (lexer, T_EQUALS);
5280 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
5282 t->chisq->alpha = lex_number (lexer);
5285 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5287 lex_match (lexer, T_EQUALS);
5288 if (parse_bool (lexer, &t->chisq->include_mrsets))
5291 else if (lex_match_id (lexer, "CATEGORIES"))
5293 lex_match (lexer, T_EQUALS);
5294 if (lex_match_id (lexer, "ALLVISIBLE"))
5295 t->chisq->all_visible = true;
5296 else if (lex_match_id (lexer, "SUBTOTALS"))
5297 t->chisq->all_visible = false;
5300 lex_error_expecting (lexer,
5301 "ALLVISIBLE", "SUBTOTALS");
5307 lex_error_expecting (lexer, "TYPE", "ALPHA",
5308 "INCLUDEMRSETS", "CATEGORIES");
5312 while (lex_token (lexer) != T_SLASH
5313 && lex_token (lexer) != T_ENDCMD);
5315 else if (lex_match_id (lexer, "COMPARETEST"))
5319 t->pairwise = xmalloc (sizeof *t->pairwise);
5320 *t->pairwise = (struct ctables_pairwise) {
5322 .alpha = { .05, .05 },
5323 .adjust = BONFERRONI,
5324 .include_mrsets = true,
5325 .meansvariance_allcats = true,
5326 .all_visible = true,
5335 if (lex_match_id (lexer, "TYPE"))
5337 lex_match (lexer, T_EQUALS);
5338 if (lex_match_id (lexer, "PROP"))
5339 t->pairwise->type = PROP;
5340 else if (lex_match_id (lexer, "MEAN"))
5341 t->pairwise->type = MEAN;
5344 lex_error_expecting (lexer, "PROP", "MEAN");
5348 else if (lex_match_id (lexer, "ALPHA"))
5350 lex_match (lexer, T_EQUALS);
5352 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5354 double a0 = lex_number (lexer);
5357 lex_match (lexer, T_COMMA);
5358 if (lex_is_number (lexer))
5360 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5362 double a1 = lex_number (lexer);
5365 t->pairwise->alpha[0] = MIN (a0, a1);
5366 t->pairwise->alpha[1] = MAX (a0, a1);
5369 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
5371 else if (lex_match_id (lexer, "ADJUST"))
5373 lex_match (lexer, T_EQUALS);
5374 if (lex_match_id (lexer, "BONFERRONI"))
5375 t->pairwise->adjust = BONFERRONI;
5376 else if (lex_match_id (lexer, "BH"))
5377 t->pairwise->adjust = BH;
5378 else if (lex_match_id (lexer, "NONE"))
5379 t->pairwise->adjust = 0;
5382 lex_error_expecting (lexer, "BONFERRONI", "BH",
5387 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5389 lex_match (lexer, T_EQUALS);
5390 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
5393 else if (lex_match_id (lexer, "MEANSVARIANCE"))
5395 lex_match (lexer, T_EQUALS);
5396 if (lex_match_id (lexer, "ALLCATS"))
5397 t->pairwise->meansvariance_allcats = true;
5398 else if (lex_match_id (lexer, "TESTEDCATS"))
5399 t->pairwise->meansvariance_allcats = false;
5402 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
5406 else if (lex_match_id (lexer, "CATEGORIES"))
5408 lex_match (lexer, T_EQUALS);
5409 if (lex_match_id (lexer, "ALLVISIBLE"))
5410 t->pairwise->all_visible = true;
5411 else if (lex_match_id (lexer, "SUBTOTALS"))
5412 t->pairwise->all_visible = false;
5415 lex_error_expecting (lexer, "ALLVISIBLE",
5420 else if (lex_match_id (lexer, "MERGE"))
5422 lex_match (lexer, T_EQUALS);
5423 if (!parse_bool (lexer, &t->pairwise->merge))
5426 else if (lex_match_id (lexer, "STYLE"))
5428 lex_match (lexer, T_EQUALS);
5429 if (lex_match_id (lexer, "APA"))
5430 t->pairwise->apa_style = true;
5431 else if (lex_match_id (lexer, "SIMPLE"))
5432 t->pairwise->apa_style = false;
5435 lex_error_expecting (lexer, "APA", "SIMPLE");
5439 else if (lex_match_id (lexer, "SHOWSIG"))
5441 lex_match (lexer, T_EQUALS);
5442 if (!parse_bool (lexer, &t->pairwise->show_sig))
5447 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
5448 "INCLUDEMRSETS", "MEANSVARIANCE",
5449 "CATEGORIES", "MERGE", "STYLE",
5454 while (lex_token (lexer) != T_SLASH
5455 && lex_token (lexer) != T_ENDCMD);
5459 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
5460 "CRITERIA", "CATEGORIES", "TITLES",
5461 "SIGTEST", "COMPARETEST");
5465 if (!lex_match (lexer, T_SLASH))
5469 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
5470 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
5472 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
5476 if (!ctables_prepare_table (t))
5479 while (lex_token (lexer) != T_ENDCMD);
5481 bool ok = ctables_execute (ds, ct);
5482 ctables_destroy (ct);
5483 return ok ? CMD_SUCCESS : CMD_FAILURE;
5486 ctables_destroy (ct);