1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 /* All variables. */ \
74 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
75 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
76 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
94 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
95 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
96 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
98 /* All variables (unweighted.) */ \
99 S(CTSF_UCOUNT, "UCOUNT", N_("Unweighted Count"), CTF_COUNT, CTFA_ALL) \
100 S(CTSF_UROWPCT_COUNT, "UROWPCT.COUNT", N_("Unweighted Row %"), CTF_PERCENT, CTFA_ALL) \
101 S(CTSF_UCOLPCT_COUNT, "UCOLPCT.COUNT", N_("Unweighted Column %"), CTF_PERCENT, CTFA_ALL) \
102 S(CTSF_UTABLEPCT_COUNT, "UTABLEPCT.COUNT", N_("Unweighted Table %"), CTF_PERCENT, CTFA_ALL) \
103 S(CTSF_USUBTABLEPCT_COUNT, "USUBTABLEPCT.COUNT", N_("Unweighted Subtable %"), CTF_PERCENT, CTFA_ALL) \
104 S(CTSF_ULAYERPCT_COUNT, "ULAYERPCT.COUNT", N_("Unweighted Layer %"), CTF_PERCENT, CTFA_ALL) \
105 S(CTSF_ULAYERROWPCT_COUNT, "ULAYERROWPCT.COUNT", N_("Unweighted Layer Row %"), CTF_PERCENT, CTFA_ALL) \
106 S(CTSF_ULAYERCOLPCT_COUNT, "ULAYERCOLPCT.COUNT", N_("Unweighted Layer Column %"), CTF_PERCENT, CTFA_ALL) \
107 S(CTSF_UROWPCT_VALIDN, "UROWPCT.VALIDN", N_("Unweighted Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
108 S(CTSF_UCOLPCT_VALIDN, "UCOLPCT.VALIDN", N_("Unweighted Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
109 S(CTSF_UTABLEPCT_VALIDN, "UTABLEPCT.VALIDN", N_("Unweighted Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
110 S(CTSF_USUBTABLEPCT_VALIDN, "USUBTABLEPCT.VALIDN", N_("Unweighted Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
111 S(CTSF_ULAYERPCT_VALIDN, "ULAYERPCT.VALIDN", N_("Unweighted Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
112 S(CTSF_ULAYERROWPCT_VALIDN, "ULAYERROWPCT.VALIDN", N_("Unweighted Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
113 S(CTSF_ULAYERCOLPCT_VALIDN, "ULAYERCOLPCT.VALIDN", N_("Unweighted Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
114 S(CTSF_UROWPCT_TOTALN, "UROWPCT.TOTALN", N_("Unweighted Row Total N %"), CTF_PERCENT, CTFA_ALL) \
115 S(CTSF_UCOLPCT_TOTALN, "UCOLPCT.TOTALN", N_("Unweighted Column Total N %"), CTF_PERCENT, CTFA_ALL) \
116 S(CTSF_UTABLEPCT_TOTALN, "UTABLEPCT.TOTALN", N_("Unweighted Table Total N %"), CTF_PERCENT, CTFA_ALL) \
117 S(CTSF_USUBTABLEPCT_TOTALN, "USUBTABLEPCT.TOTALN", N_("Unweighted Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
118 S(CTSF_ULAYERPCT_TOTALN, "ULAYERPCT.TOTALN", N_("Unweighted Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
119 S(CTSF_ULAYERROWPCT_TOTALN, "ULAYERROWPCT.TOTALN", N_("Unweighted Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
120 S(CTSF_ULAYERCOLPCT_TOTALN, "ULAYERCOLPCT.TOTALN", N_("Unweighted Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
122 /* Scale variables, totals, and subtotals. */ \
123 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
124 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
125 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
126 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
127 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
128 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
129 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
130 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
131 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
132 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
133 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
134 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
135 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
136 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
137 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
138 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
139 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
140 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
141 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
142 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
143 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
144 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
145 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
147 /* Scale variables, totals, and subtotals (unweighted). */ \
148 S(CTSF_UMEAN, "UMEAN", N_("Unweighted Mean"), CTF_GENERAL, CTFA_SCALE) \
149 S(CTSF_UMEDIAN, "UMEDIAN", N_("Unweighted Median"), CTF_GENERAL, CTFA_SCALE) \
150 S(CTSF_UMISSING, "UMISSING", N_("Unweighted Missing"), CTF_GENERAL, CTFA_SCALE) \
151 S(CTSF_UMODE, "UMODE", N_("Unweighted Mode"), CTF_GENERAL, CTFA_SCALE) \
152 S(CTSF_UPTILE, "UPTILE", N_("Unweighted Percentile"), CTF_GENERAL, CTFA_SCALE) \
153 S(CTSF_USEMEAN, "USEMEAN", N_("Unweighted Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
154 S(CTSF_USTDDEV, "USTDDEV", N_("Unweighted Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
155 S(CTSF_USUM, "USUM", N_("Unweighted Sum"), CTF_GENERAL, CTFA_SCALE) \
156 S(CSTF_UTOTALN, "UTOTALN", N_("Unweighted Total N"), CTF_COUNT, CTFA_SCALE) \
157 S(CTSF_UVALIDN, "UVALIDN", N_("Unweighted Valid N"), CTF_COUNT, CTFA_SCALE) \
158 S(CTSF_UVARIANCE, "UVARIANCE", N_("Unweighted Variance"), CTF_GENERAL, CTFA_SCALE) \
159 S(CTSF_UROWPCT_SUM, "UROWPCT.SUM", N_("Unweighted Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
160 S(CTSF_UCOLPCT_SUM, "UCOLPCT.SUM", N_("Unweighted Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
161 S(CTSF_UTABLEPCT_SUM, "UTABLEPCT.SUM", N_("Unweighted Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
162 S(CTSF_USUBTABLEPCT_SUM, "USUBTABLEPCT.SUM", N_("Unweighted Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
163 S(CTSF_ULAYERPCT_SUM, "ULAYERPCT.SUM", N_("Unweighted Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
164 S(CTSF_ULAYERROWPCT_SUM, "ULAYERROWPCT.SUM", N_("Unweighted Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
165 S(CTSF_ULAYERCOLPCT_SUM, "ULAYERCOLPCT.SUM", N_("Unweighted Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
167 #if 0 /* Multiple response sets not yet implemented. */
168 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
169 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
170 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
171 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
172 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
173 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
174 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
175 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
176 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
177 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
178 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
179 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
180 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
181 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
182 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
183 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
184 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
185 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
186 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
187 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
188 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
189 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
192 enum ctables_summary_function
194 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
200 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
201 N_CTSF_FUNCTIONS = SUMMARIES
205 static bool ctables_summary_function_is_count (enum ctables_summary_function);
207 enum ctables_domain_type
209 /* Within a section, where stacked variables divide one section from
211 CTDT_TABLE, /* All layers of a whole section. */
212 CTDT_LAYER, /* One layer within a section. */
213 CTDT_LAYERROW, /* Row in one layer within a section. */
214 CTDT_LAYERCOL, /* Column in one layer within a section. */
216 /* Within a subtable, where a subtable pairs an innermost row variable with
217 an innermost column variable within a single layer. */
218 CTDT_SUBTABLE, /* Whole subtable. */
219 CTDT_ROW, /* Row within a subtable. */
220 CTDT_COL, /* Column within a subtable. */
224 struct ctables_domain
226 struct hmap_node node;
228 const struct ctables_cell *example;
230 double d_valid; /* Dictionary weight. */
233 double e_valid; /* Effective weight */
236 double u_valid; /* Unweighted. */
239 struct ctables_sum *sums;
248 enum ctables_summary_variant
257 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
258 all the axes (except the scalar variable, if any). */
259 struct hmap_node node;
261 /* The domains that contain this cell. */
262 uint32_t omit_domains;
263 struct ctables_domain *domains[N_CTDTS];
268 enum ctables_summary_variant sv;
270 struct ctables_cell_axis
272 struct ctables_cell_value
274 const struct ctables_category *category;
282 union ctables_summary *summaries;
289 const struct dictionary *dict;
290 struct pivot_table_look *look;
292 /* CTABLES has a number of extra formats that we implement via custom
293 currency specifications on an alternate fmt_settings. */
294 #define CTEF_NEGPAREN FMT_CCA
295 #define CTEF_NEQUAL FMT_CCB
296 #define CTEF_PAREN FMT_CCC
297 #define CTEF_PCTPAREN FMT_CCD
298 struct fmt_settings ctables_formats;
300 /* If this is NULL, zeros are displayed using the normal print format.
301 Otherwise, this string is displayed. */
304 /* If this is NULL, missing values are displayed using the normal print
305 format. Otherwise, this string is displayed. */
308 /* Indexed by variable dictionary index. */
309 enum ctables_vlabel *vlabels;
311 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
313 bool mrsets_count_duplicates; /* MRSETS. */
314 bool smissing_listwise; /* SMISSING. */
315 struct variable *e_weight; /* WEIGHT. */
316 int hide_threshold; /* HIDESMALLCOUNTS. */
318 struct ctables_table **tables;
322 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
325 struct ctables_postcompute
327 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
328 char *name; /* Name, without leading &. */
330 struct msg_location *location; /* Location of definition. */
331 struct ctables_pcexpr *expr;
333 struct ctables_summary_spec_set *specs;
334 bool hide_source_cats;
337 struct ctables_pcexpr
347 enum ctables_postcompute_op
350 CTPO_CONSTANT, /* 5 */
351 CTPO_CAT_NUMBER, /* [5] */
352 CTPO_CAT_STRING, /* ["STRING"] */
353 CTPO_CAT_NRANGE, /* [LO THRU 5] */
354 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
355 CTPO_CAT_MISSING, /* MISSING */
356 CTPO_CAT_OTHERNM, /* OTHERNM */
357 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
358 CTPO_CAT_TOTAL, /* TOTAL */
372 /* CTPO_CAT_NUMBER. */
375 /* CTPO_CAT_STRING, in dictionary encoding. */
376 struct substring string;
378 /* CTPO_CAT_NRANGE. */
381 /* CTPO_CAT_SRANGE. */
382 struct substring srange[2];
384 /* CTPO_CAT_SUBTOTAL. */
385 size_t subtotal_index;
387 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
388 One element: CTPO_NEG. */
389 struct ctables_pcexpr *subs[2];
392 /* Source location. */
393 struct msg_location *location;
396 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
397 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
398 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
399 struct ctables_pcexpr *sub1);
401 struct ctables_summary_spec_set
403 struct ctables_summary_spec *specs;
407 /* The variable to which the summary specs are applied. */
408 struct variable *var;
410 /* Whether the variable to which the summary specs are applied is a scale
411 variable for the purpose of summarization.
413 (VALIDN and TOTALN act differently for summarizing scale and categorical
417 /* If any of these optional additional scale variables are missing, then
418 treat 'var' as if it's missing too. This is for implementing
419 SMISSING=LISTWISE. */
420 struct variable **listwise_vars;
421 size_t n_listwise_vars;
424 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
425 const struct ctables_summary_spec_set *);
426 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
428 /* A nested sequence of variables, e.g. a > b > c. */
431 struct variable **vars;
434 size_t *domains[N_CTDTS];
435 size_t n_domains[N_CTDTS];
438 struct ctables_summary_spec_set specs[N_CSVS];
441 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
444 struct ctables_nest *nests;
450 struct hmap_node node;
455 struct ctables_occurrence
457 struct hmap_node node;
461 struct ctables_section
464 struct ctables_table *table;
465 struct ctables_nest *nests[PIVOT_N_AXES];
468 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
469 struct hmap cells; /* Contains "struct ctables_cell"s. */
470 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
475 struct ctables *ctables;
476 struct ctables_axis *axes[PIVOT_N_AXES];
477 struct ctables_stack stacks[PIVOT_N_AXES];
478 struct ctables_section *sections;
480 enum pivot_axis_type summary_axis;
481 struct ctables_summary_spec_set summary_specs;
482 struct variable **sum_vars;
485 enum pivot_axis_type slabels_axis;
486 bool slabels_visible;
488 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
490 Most commonly, label_axis[a] == a, and in particular we always have
491 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
493 If ROWLABELS or COLLABELS is specified, then one of
494 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
495 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
497 If any category labels are moved, then 'clabels_example' is one of the
498 variables being moved (and it is otherwise NULL). All of the variables
499 being moved have the same width, value labels, and categories, so this
500 example variable can be used to find those out.
502 The remaining members in this group are relevant only if category labels
505 'clabels_values_map' holds a "struct ctables_value" for all the values
506 that appear in all of the variables in the moved categories. It is
507 accumulated as the data is read. Once the data is fully read, its
508 sorted values are put into 'clabels_values' and 'n_clabels_values'.
510 enum pivot_axis_type label_axis[PIVOT_N_AXES];
511 enum pivot_axis_type clabels_from_axis;
512 const struct variable *clabels_example;
513 struct hmap clabels_values_map;
514 struct ctables_value **clabels_values;
515 size_t n_clabels_values;
517 /* Indexed by variable dictionary index. */
518 struct ctables_categories **categories;
527 struct ctables_chisq *chisq;
528 struct ctables_pairwise *pairwise;
531 struct ctables_categories
534 struct ctables_category *cats;
539 struct ctables_category
541 enum ctables_category_type
543 /* Explicit category lists. */
546 CCT_NRANGE, /* Numerical range. */
547 CCT_SRANGE, /* String range. */
552 /* Totals and subtotals. */
556 /* Implicit category lists. */
561 /* For contributing to TOTALN. */
562 CCT_EXCLUDED_MISSING,
566 struct ctables_category *subtotal;
572 double number; /* CCT_NUMBER. */
573 struct substring string; /* CCT_STRING, in dictionary encoding. */
574 double nrange[2]; /* CCT_NRANGE. */
575 struct substring srange[2]; /* CCT_SRANGE. */
579 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
580 bool hide_subcategories; /* CCT_SUBTOTAL. */
583 /* CCT_POSTCOMPUTE. */
586 const struct ctables_postcompute *pc;
587 enum fmt_type parse_format;
590 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
593 bool include_missing;
597 enum ctables_summary_function sort_function;
598 struct variable *sort_var;
603 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
604 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
605 struct msg_location *location;
609 ctables_category_uninit (struct ctables_category *cat)
620 case CCT_POSTCOMPUTE:
624 ss_dealloc (&cat->string);
628 ss_dealloc (&cat->srange[0]);
629 ss_dealloc (&cat->srange[1]);
634 free (cat->total_label);
642 case CCT_EXCLUDED_MISSING:
648 nullable_substring_equal (const struct substring *a,
649 const struct substring *b)
651 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
655 ctables_category_equal (const struct ctables_category *a,
656 const struct ctables_category *b)
658 if (a->type != b->type)
664 return a->number == b->number;
667 return ss_equals (a->string, b->string);
670 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
673 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
674 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
680 case CCT_POSTCOMPUTE:
681 return a->pc == b->pc;
685 return !strcmp (a->total_label, b->total_label);
690 return (a->include_missing == b->include_missing
691 && a->sort_ascending == b->sort_ascending
692 && a->sort_function == b->sort_function
693 && a->sort_var == b->sort_var
694 && a->percentile == b->percentile);
696 case CCT_EXCLUDED_MISSING:
704 ctables_categories_unref (struct ctables_categories *c)
709 assert (c->n_refs > 0);
713 for (size_t i = 0; i < c->n_cats; i++)
714 ctables_category_uninit (&c->cats[i]);
720 ctables_categories_equal (const struct ctables_categories *a,
721 const struct ctables_categories *b)
723 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
726 for (size_t i = 0; i < a->n_cats; i++)
727 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
733 /* Chi-square test (SIGTEST). */
741 /* Pairwise comparison test (COMPARETEST). */
742 struct ctables_pairwise
744 enum { PROP, MEAN } type;
747 bool meansvariance_allcats;
749 enum { BONFERRONI = 1, BH } adjust;
773 struct variable *var;
775 struct ctables_summary_spec_set specs[N_CSVS];
779 struct ctables_axis *subs[2];
782 struct msg_location *loc;
785 static void ctables_axis_destroy (struct ctables_axis *);
794 enum ctables_function_availability
796 CTFA_ALL, /* Any variables. */
797 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
798 //CTFA_MRSETS, /* Only multiple-response sets */
801 struct ctables_summary_spec
803 enum ctables_summary_function function;
804 double percentile; /* CTSF_PTILE only. */
807 struct fmt_spec format;
808 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
815 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
816 const struct ctables_summary_spec *src)
819 dst->label = xstrdup_if_nonnull (src->label);
823 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
830 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
831 const struct ctables_summary_spec_set *src)
833 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
834 for (size_t i = 0; i < src->n; i++)
835 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
837 *dst = (struct ctables_summary_spec_set) {
842 .is_scale = src->is_scale,
847 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
849 for (size_t i = 0; i < set->n; i++)
850 ctables_summary_spec_uninit (&set->specs[i]);
855 parse_col_width (struct lexer *lexer, const char *name, double *width)
857 lex_match (lexer, T_EQUALS);
858 if (lex_match_id (lexer, "DEFAULT"))
860 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
862 *width = lex_number (lexer);
872 parse_bool (struct lexer *lexer, bool *b)
874 if (lex_match_id (lexer, "NO"))
876 else if (lex_match_id (lexer, "YES"))
880 lex_error_expecting (lexer, "YES", "NO");
886 static enum ctables_function_availability
887 ctables_function_availability (enum ctables_summary_function f)
889 static enum ctables_function_availability availability[] = {
890 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
895 return availability[f];
899 ctables_summary_function_is_count (enum ctables_summary_function f)
905 case CTSF_ROWPCT_COUNT:
906 case CTSF_COLPCT_COUNT:
907 case CTSF_TABLEPCT_COUNT:
908 case CTSF_SUBTABLEPCT_COUNT:
909 case CTSF_LAYERPCT_COUNT:
910 case CTSF_LAYERROWPCT_COUNT:
911 case CTSF_LAYERCOLPCT_COUNT:
913 case CTSF_UROWPCT_COUNT:
914 case CTSF_UCOLPCT_COUNT:
915 case CTSF_UTABLEPCT_COUNT:
916 case CTSF_USUBTABLEPCT_COUNT:
917 case CTSF_ULAYERPCT_COUNT:
918 case CTSF_ULAYERROWPCT_COUNT:
919 case CTSF_ULAYERCOLPCT_COUNT:
922 case CTSF_ROWPCT_VALIDN:
923 case CTSF_COLPCT_VALIDN:
924 case CTSF_TABLEPCT_VALIDN:
925 case CTSF_SUBTABLEPCT_VALIDN:
926 case CTSF_LAYERPCT_VALIDN:
927 case CTSF_LAYERROWPCT_VALIDN:
928 case CTSF_LAYERCOLPCT_VALIDN:
929 case CTSF_ROWPCT_TOTALN:
930 case CTSF_COLPCT_TOTALN:
931 case CTSF_TABLEPCT_TOTALN:
932 case CTSF_SUBTABLEPCT_TOTALN:
933 case CTSF_LAYERPCT_TOTALN:
934 case CTSF_LAYERROWPCT_TOTALN:
935 case CTSF_LAYERCOLPCT_TOTALN:
952 case CTSF_ROWPCT_SUM:
953 case CTSF_COLPCT_SUM:
954 case CTSF_TABLEPCT_SUM:
955 case CTSF_SUBTABLEPCT_SUM:
956 case CTSF_LAYERPCT_SUM:
957 case CTSF_LAYERROWPCT_SUM:
958 case CTSF_LAYERCOLPCT_SUM:
959 case CTSF_UROWPCT_VALIDN:
960 case CTSF_UCOLPCT_VALIDN:
961 case CTSF_UTABLEPCT_VALIDN:
962 case CTSF_USUBTABLEPCT_VALIDN:
963 case CTSF_ULAYERPCT_VALIDN:
964 case CTSF_ULAYERROWPCT_VALIDN:
965 case CTSF_ULAYERCOLPCT_VALIDN:
966 case CTSF_UROWPCT_TOTALN:
967 case CTSF_UCOLPCT_TOTALN:
968 case CTSF_UTABLEPCT_TOTALN:
969 case CTSF_USUBTABLEPCT_TOTALN:
970 case CTSF_ULAYERPCT_TOTALN:
971 case CTSF_ULAYERROWPCT_TOTALN:
972 case CTSF_ULAYERCOLPCT_TOTALN:
984 case CTSF_UROWPCT_SUM:
985 case CTSF_UCOLPCT_SUM:
986 case CTSF_UTABLEPCT_SUM:
987 case CTSF_USUBTABLEPCT_SUM:
988 case CTSF_ULAYERPCT_SUM:
989 case CTSF_ULAYERROWPCT_SUM:
990 case CTSF_ULAYERCOLPCT_SUM:
998 parse_ctables_summary_function (struct lexer *lexer,
999 enum ctables_summary_function *f)
1003 enum ctables_summary_function function;
1004 struct substring name;
1006 static struct pair names[] = {
1007 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
1008 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
1011 /* The .COUNT suffix may be omitted. */
1012 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
1013 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
1014 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
1015 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
1016 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
1017 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
1018 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
1022 if (!lex_force_id (lexer))
1025 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
1026 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
1028 *f = names[i].function;
1033 lex_error (lexer, _("Expecting summary function name."));
1038 ctables_axis_destroy (struct ctables_axis *axis)
1046 for (size_t i = 0; i < N_CSVS; i++)
1047 ctables_summary_spec_set_uninit (&axis->specs[i]);
1052 ctables_axis_destroy (axis->subs[0]);
1053 ctables_axis_destroy (axis->subs[1]);
1056 msg_location_destroy (axis->loc);
1060 static struct ctables_axis *
1061 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1062 struct ctables_axis *sub0,
1063 struct ctables_axis *sub1,
1064 struct lexer *lexer, int start_ofs)
1066 struct ctables_axis *axis = xmalloc (sizeof *axis);
1067 *axis = (struct ctables_axis) {
1069 .subs = { sub0, sub1 },
1070 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1075 struct ctables_axis_parse_ctx
1077 struct lexer *lexer;
1078 struct dictionary *dict;
1080 struct ctables_table *t;
1083 static struct fmt_spec
1084 ctables_summary_default_format (enum ctables_summary_function function,
1085 const struct variable *var)
1087 static const enum ctables_format default_formats[] = {
1088 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
1092 switch (default_formats[function])
1095 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
1098 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
1101 return *var_get_print_format (var);
1108 static struct pivot_value *
1109 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1113 static const char *default_labels[] = {
1114 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
1119 return (spec->function == CTSF_PTILE
1120 ? pivot_value_new_text_format (N_("Percentile %.2f"),
1122 : pivot_value_new_text (default_labels[spec->function]));
1126 struct substring in = ss_cstr (spec->label);
1127 struct substring target = ss_cstr (")CILEVEL");
1129 struct string out = DS_EMPTY_INITIALIZER;
1132 size_t chunk = ss_find_substring (in, target);
1133 ds_put_substring (&out, ss_head (in, chunk));
1134 ss_advance (&in, chunk);
1136 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1138 ss_advance (&in, target.length);
1139 ds_put_format (&out, "%g", cilevel);
1145 ctables_summary_function_name (enum ctables_summary_function function)
1147 static const char *names[] = {
1148 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1152 return names[function];
1156 add_summary_spec (struct ctables_axis *axis,
1157 enum ctables_summary_function function, double percentile,
1158 const char *label, const struct fmt_spec *format,
1159 bool is_ctables_format, const struct msg_location *loc,
1160 enum ctables_summary_variant sv)
1162 if (axis->op == CTAO_VAR)
1164 const char *function_name = ctables_summary_function_name (function);
1165 const char *var_name = var_get_name (axis->var);
1166 switch (ctables_function_availability (function))
1170 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1171 "response sets."), function_name);
1172 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1178 if (!axis->scale && sv != CSV_TOTAL)
1181 _("Summary function %s applies only to scale variables."),
1183 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1193 struct ctables_summary_spec_set *set = &axis->specs[sv];
1194 if (set->n >= set->allocated)
1195 set->specs = x2nrealloc (set->specs, &set->allocated,
1196 sizeof *set->specs);
1198 struct ctables_summary_spec *dst = &set->specs[set->n++];
1199 *dst = (struct ctables_summary_spec) {
1200 .function = function,
1201 .percentile = percentile,
1202 .label = xstrdup_if_nonnull (label),
1203 .format = (format ? *format
1204 : ctables_summary_default_format (function, axis->var)),
1205 .is_ctables_format = is_ctables_format,
1211 for (size_t i = 0; i < 2; i++)
1212 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1213 format, is_ctables_format, loc, sv))
1219 static struct ctables_axis *ctables_axis_parse_stack (
1220 struct ctables_axis_parse_ctx *);
1223 static struct ctables_axis *
1224 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1226 if (lex_match (ctx->lexer, T_LPAREN))
1228 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1229 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1231 ctables_axis_destroy (sub);
1237 if (!lex_force_id (ctx->lexer))
1240 int start_ofs = lex_ofs (ctx->lexer);
1241 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1245 struct ctables_axis *axis = xmalloc (sizeof *axis);
1246 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1248 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1249 : lex_match_phrase (ctx->lexer, "[C]") ? false
1250 : var_get_measure (var) == MEASURE_SCALE);
1251 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1252 lex_ofs (ctx->lexer) - 1);
1253 if (axis->scale && var_is_alpha (var))
1255 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1257 var_get_name (var));
1258 ctables_axis_destroy (axis);
1266 has_digit (const char *s)
1268 return s[strcspn (s, "0123456789")] != '\0';
1272 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1273 bool *is_ctables_format)
1275 char type[FMT_TYPE_LEN_MAX + 1];
1276 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1279 if (!strcasecmp (type, "NEGPAREN"))
1280 format->type = CTEF_NEGPAREN;
1281 else if (!strcasecmp (type, "NEQUAL"))
1282 format->type = CTEF_NEQUAL;
1283 else if (!strcasecmp (type, "PAREN"))
1284 format->type = CTEF_PAREN;
1285 else if (!strcasecmp (type, "PCTPAREN"))
1286 format->type = CTEF_PCTPAREN;
1289 *is_ctables_format = false;
1290 return (parse_format_specifier (lexer, format)
1291 && fmt_check_output (format)
1292 && fmt_check_type_compat (format, VAL_NUMERIC));
1298 lex_next_error (lexer, -1, -1,
1299 _("Output format %s requires width 2 or greater."), type);
1302 else if (format->d > format->w - 1)
1304 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1305 "greater than decimals."), type);
1310 *is_ctables_format = true;
1315 static struct ctables_axis *
1316 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1318 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1319 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1322 enum ctables_summary_variant sv = CSV_CELL;
1325 int start_ofs = lex_ofs (ctx->lexer);
1327 /* Parse function. */
1328 enum ctables_summary_function function;
1329 if (!parse_ctables_summary_function (ctx->lexer, &function))
1332 /* Parse percentile. */
1333 double percentile = 0;
1334 if (function == CTSF_PTILE)
1336 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1338 percentile = lex_number (ctx->lexer);
1339 lex_get (ctx->lexer);
1344 if (lex_is_string (ctx->lexer))
1346 label = ss_xstrdup (lex_tokss (ctx->lexer));
1347 lex_get (ctx->lexer);
1351 struct fmt_spec format;
1352 const struct fmt_spec *formatp;
1353 bool is_ctables_format = false;
1354 if (lex_token (ctx->lexer) == T_ID
1355 && has_digit (lex_tokcstr (ctx->lexer)))
1357 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1358 &is_ctables_format))
1368 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1369 lex_ofs (ctx->lexer) - 1);
1370 add_summary_spec (sub, function, percentile, label, formatp,
1371 is_ctables_format, loc, sv);
1373 msg_location_destroy (loc);
1375 lex_match (ctx->lexer, T_COMMA);
1376 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1378 if (!lex_force_match (ctx->lexer, T_LBRACK))
1382 else if (lex_match (ctx->lexer, T_RBRACK))
1384 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1391 ctables_axis_destroy (sub);
1395 static const struct ctables_axis *
1396 find_scale (const struct ctables_axis *axis)
1400 else if (axis->op == CTAO_VAR)
1401 return axis->scale ? axis : NULL;
1404 for (size_t i = 0; i < 2; i++)
1406 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1414 static const struct ctables_axis *
1415 find_categorical_summary_spec (const struct ctables_axis *axis)
1419 else if (axis->op == CTAO_VAR)
1420 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1423 for (size_t i = 0; i < 2; i++)
1425 const struct ctables_axis *sum
1426 = find_categorical_summary_spec (axis->subs[i]);
1434 static struct ctables_axis *
1435 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1437 int start_ofs = lex_ofs (ctx->lexer);
1438 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1442 while (lex_match (ctx->lexer, T_GT))
1444 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1448 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1449 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1451 const struct ctables_axis *outer_scale = find_scale (lhs);
1452 const struct ctables_axis *inner_scale = find_scale (rhs);
1453 if (outer_scale && inner_scale)
1455 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1456 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1457 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1458 ctables_axis_destroy (nest);
1462 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1465 msg_at (SE, nest->loc,
1466 _("Summaries may only be requested for categorical variables "
1467 "at the innermost nesting level."));
1468 msg_at (SN, outer_sum->loc,
1469 _("This outer categorical variable has a summary."));
1470 ctables_axis_destroy (nest);
1480 static struct ctables_axis *
1481 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1483 int start_ofs = lex_ofs (ctx->lexer);
1484 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1488 while (lex_match (ctx->lexer, T_PLUS))
1490 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1494 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1495 ctx->lexer, start_ofs);
1502 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1503 struct ctables *ct, struct ctables_table *t,
1504 enum pivot_axis_type a)
1506 if (lex_token (lexer) == T_BY
1507 || lex_token (lexer) == T_SLASH
1508 || lex_token (lexer) == T_ENDCMD)
1511 struct ctables_axis_parse_ctx ctx = {
1517 t->axes[a] = ctables_axis_parse_stack (&ctx);
1518 return t->axes[a] != NULL;
1522 ctables_chisq_destroy (struct ctables_chisq *chisq)
1528 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1534 ctables_table_destroy (struct ctables_table *t)
1539 for (size_t i = 0; i < t->n_categories; i++)
1540 ctables_categories_unref (t->categories[i]);
1541 free (t->categories);
1543 ctables_axis_destroy (t->axes[PIVOT_AXIS_COLUMN]);
1544 ctables_axis_destroy (t->axes[PIVOT_AXIS_ROW]);
1545 ctables_axis_destroy (t->axes[PIVOT_AXIS_LAYER]);
1549 ctables_chisq_destroy (t->chisq);
1550 ctables_pairwise_destroy (t->pairwise);
1555 ctables_destroy (struct ctables *ct)
1560 pivot_table_look_unref (ct->look);
1564 for (size_t i = 0; i < ct->n_tables; i++)
1565 ctables_table_destroy (ct->tables[i]);
1570 static struct ctables_category
1571 cct_nrange (double low, double high)
1573 return (struct ctables_category) {
1575 .nrange = { low, high }
1579 static struct ctables_category
1580 cct_srange (struct substring low, struct substring high)
1582 return (struct ctables_category) {
1584 .srange = { low, high }
1589 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1590 struct ctables_category *cat)
1593 if (lex_match (lexer, T_EQUALS))
1595 if (!lex_force_string (lexer))
1598 total_label = ss_xstrdup (lex_tokss (lexer));
1602 total_label = xstrdup (_("Subtotal"));
1604 *cat = (struct ctables_category) {
1605 .type = CCT_SUBTOTAL,
1606 .hide_subcategories = hide_subcategories,
1607 .total_label = total_label
1612 static struct substring
1613 parse_substring (struct lexer *lexer, struct dictionary *dict)
1615 struct substring s = recode_substring_pool (
1616 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1617 ss_rtrim (&s, ss_cstr (" "));
1623 ctables_table_parse_explicit_category (struct lexer *lexer,
1624 struct dictionary *dict,
1626 struct ctables_category *cat)
1628 if (lex_match_id (lexer, "OTHERNM"))
1629 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1630 else if (lex_match_id (lexer, "MISSING"))
1631 *cat = (struct ctables_category) { .type = CCT_MISSING };
1632 else if (lex_match_id (lexer, "SUBTOTAL"))
1633 return ctables_table_parse_subtotal (lexer, false, cat);
1634 else if (lex_match_id (lexer, "HSUBTOTAL"))
1635 return ctables_table_parse_subtotal (lexer, true, cat);
1636 else if (lex_match_id (lexer, "LO"))
1638 if (!lex_force_match_id (lexer, "THRU"))
1640 if (lex_is_string (lexer))
1642 struct substring sr0 = { .string = NULL };
1643 struct substring sr1 = parse_substring (lexer, dict);
1644 *cat = cct_srange (sr0, sr1);
1646 else if (lex_force_num (lexer))
1648 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1654 else if (lex_is_number (lexer))
1656 double number = lex_number (lexer);
1658 if (lex_match_id (lexer, "THRU"))
1660 if (lex_match_id (lexer, "HI"))
1661 *cat = cct_nrange (number, DBL_MAX);
1664 if (!lex_force_num (lexer))
1666 *cat = cct_nrange (number, lex_number (lexer));
1671 *cat = (struct ctables_category) {
1676 else if (lex_is_string (lexer))
1678 struct substring s = parse_substring (lexer, dict);
1679 if (lex_match_id (lexer, "THRU"))
1681 if (lex_match_id (lexer, "HI"))
1683 struct substring sr1 = { .string = NULL };
1684 *cat = cct_srange (s, sr1);
1688 if (!lex_force_string (lexer))
1690 struct substring sr1 = parse_substring (lexer, dict);
1691 *cat = cct_srange (s, sr1);
1695 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1697 else if (lex_match (lexer, T_AND))
1699 if (!lex_force_id (lexer))
1701 struct ctables_postcompute *pc = ctables_find_postcompute (
1702 ct, lex_tokcstr (lexer));
1705 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1706 msg_at (SE, loc, _("Unknown postcompute &%s."),
1707 lex_tokcstr (lexer));
1708 msg_location_destroy (loc);
1713 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1717 lex_error (lexer, NULL);
1725 parse_category_string (struct msg_location *location,
1726 struct substring s, const struct dictionary *dict,
1727 enum fmt_type format, double *n)
1730 char *error = data_in (s, dict_get_encoding (dict), format,
1731 settings_get_fmt_settings (), &v, 0, NULL);
1734 msg_at (SE, location,
1735 _("Failed to parse category specification as format %s: %s."),
1736 fmt_name (format), error);
1745 static struct ctables_category *
1746 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1747 const struct ctables_pcexpr *e)
1749 struct ctables_category *best = NULL;
1750 size_t n_subtotals = 0;
1751 for (size_t i = 0; i < cats->n_cats; i++)
1753 struct ctables_category *cat = &cats->cats[i];
1756 case CTPO_CAT_NUMBER:
1757 if (cat->type == CCT_NUMBER && cat->number == e->number)
1761 case CTPO_CAT_STRING:
1762 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1766 case CTPO_CAT_NRANGE:
1767 if (cat->type == CCT_NRANGE
1768 && cat->nrange[0] == e->nrange[0]
1769 && cat->nrange[1] == e->nrange[1])
1773 case CTPO_CAT_SRANGE:
1774 if (cat->type == CCT_SRANGE
1775 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1776 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1780 case CTPO_CAT_MISSING:
1781 if (cat->type == CCT_MISSING)
1785 case CTPO_CAT_OTHERNM:
1786 if (cat->type == CCT_OTHERNM)
1790 case CTPO_CAT_SUBTOTAL:
1791 if (cat->type == CCT_SUBTOTAL)
1794 if (e->subtotal_index == n_subtotals)
1796 else if (e->subtotal_index == 0)
1801 case CTPO_CAT_TOTAL:
1802 if (cat->type == CCT_TOTAL)
1816 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1821 static struct ctables_category *
1822 ctables_find_category_for_postcompute (const struct dictionary *dict,
1823 const struct ctables_categories *cats,
1824 enum fmt_type parse_format,
1825 const struct ctables_pcexpr *e)
1827 if (parse_format != FMT_F)
1829 if (e->op == CTPO_CAT_STRING)
1832 if (!parse_category_string (e->location, e->string, dict,
1833 parse_format, &number))
1836 struct ctables_pcexpr e2 = {
1837 .op = CTPO_CAT_NUMBER,
1839 .location = e->location,
1841 return ctables_find_category_for_postcompute__ (cats, &e2);
1843 else if (e->op == CTPO_CAT_SRANGE)
1846 if (!e->srange[0].string)
1847 nrange[0] = -DBL_MAX;
1848 else if (!parse_category_string (e->location, e->srange[0], dict,
1849 parse_format, &nrange[0]))
1852 if (!e->srange[1].string)
1853 nrange[1] = DBL_MAX;
1854 else if (!parse_category_string (e->location, e->srange[1], dict,
1855 parse_format, &nrange[1]))
1858 struct ctables_pcexpr e2 = {
1859 .op = CTPO_CAT_NRANGE,
1860 .nrange = { nrange[0], nrange[1] },
1861 .location = e->location,
1863 return ctables_find_category_for_postcompute__ (cats, &e2);
1866 return ctables_find_category_for_postcompute__ (cats, e);
1870 ctables_recursive_check_postcompute (struct dictionary *dict,
1871 const struct ctables_pcexpr *e,
1872 struct ctables_category *pc_cat,
1873 const struct ctables_categories *cats,
1874 const struct msg_location *cats_location)
1878 case CTPO_CAT_NUMBER:
1879 case CTPO_CAT_STRING:
1880 case CTPO_CAT_NRANGE:
1881 case CTPO_CAT_SRANGE:
1882 case CTPO_CAT_MISSING:
1883 case CTPO_CAT_OTHERNM:
1884 case CTPO_CAT_SUBTOTAL:
1885 case CTPO_CAT_TOTAL:
1887 struct ctables_category *cat = ctables_find_category_for_postcompute (
1888 dict, cats, pc_cat->parse_format, e);
1891 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1893 size_t n_subtotals = 0;
1894 for (size_t i = 0; i < cats->n_cats; i++)
1895 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1896 if (n_subtotals > 1)
1898 msg_at (SE, cats_location,
1899 ngettext ("These categories include %zu instance "
1900 "of SUBTOTAL or HSUBTOTAL, so references "
1901 "from computed categories must refer to "
1902 "subtotals by position, "
1903 "e.g. SUBTOTAL[1].",
1904 "These categories include %zu instances "
1905 "of SUBTOTAL or HSUBTOTAL, so references "
1906 "from computed categories must refer to "
1907 "subtotals by position, "
1908 "e.g. SUBTOTAL[1].",
1911 msg_at (SN, e->location,
1912 _("This is the reference that lacks a position."));
1917 msg_at (SE, pc_cat->location,
1918 _("Computed category &%s references a category not included "
1919 "in the category list."),
1921 msg_at (SN, e->location, _("This is the missing category."));
1922 if (e->op == CTPO_CAT_SUBTOTAL)
1923 msg_at (SN, cats_location,
1924 _("To fix the problem, add subtotals to the "
1925 "list of categories here."));
1926 else if (e->op == CTPO_CAT_TOTAL)
1927 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1928 "CATEGORIES specification."));
1930 msg_at (SN, cats_location,
1931 _("To fix the problem, add the missing category to the "
1932 "list of categories here."));
1935 if (pc_cat->pc->hide_source_cats)
1949 for (size_t i = 0; i < 2; i++)
1950 if (e->subs[i] && !ctables_recursive_check_postcompute (
1951 dict, e->subs[i], pc_cat, cats, cats_location))
1960 all_strings (struct variable **vars, size_t n_vars,
1961 const struct ctables_category *cat)
1963 for (size_t j = 0; j < n_vars; j++)
1964 if (var_is_numeric (vars[j]))
1966 msg_at (SE, cat->location,
1967 _("This category specification may be applied only to string "
1968 "variables, but this subcommand tries to apply it to "
1969 "numeric variable %s."),
1970 var_get_name (vars[j]));
1977 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1978 struct ctables *ct, struct ctables_table *t)
1980 if (!lex_match_id (lexer, "VARIABLES"))
1982 lex_match (lexer, T_EQUALS);
1984 struct variable **vars;
1986 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1989 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1990 for (size_t i = 1; i < n_vars; i++)
1992 const struct fmt_spec *f = var_get_print_format (vars[i]);
1993 if (f->type != common_format->type)
1995 common_format = NULL;
2001 && (fmt_get_category (common_format->type)
2002 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2004 struct ctables_categories *c = xmalloc (sizeof *c);
2005 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2006 for (size_t i = 0; i < n_vars; i++)
2008 struct ctables_categories **cp
2009 = &t->categories[var_get_dict_index (vars[i])];
2010 ctables_categories_unref (*cp);
2014 size_t allocated_cats = 0;
2015 int cats_start_ofs = -1;
2016 int cats_end_ofs = -1;
2017 if (lex_match (lexer, T_LBRACK))
2019 cats_start_ofs = lex_ofs (lexer);
2022 if (c->n_cats >= allocated_cats)
2023 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2025 int start_ofs = lex_ofs (lexer);
2026 struct ctables_category *cat = &c->cats[c->n_cats];
2027 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2029 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2032 lex_match (lexer, T_COMMA);
2034 while (!lex_match (lexer, T_RBRACK));
2035 cats_end_ofs = lex_ofs (lexer) - 1;
2038 struct ctables_category cat = {
2040 .include_missing = false,
2041 .sort_ascending = true,
2043 bool show_totals = false;
2044 char *total_label = NULL;
2045 bool totals_before = false;
2046 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2048 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2050 lex_match (lexer, T_EQUALS);
2051 if (lex_match_id (lexer, "A"))
2052 cat.sort_ascending = true;
2053 else if (lex_match_id (lexer, "D"))
2054 cat.sort_ascending = false;
2057 lex_error_expecting (lexer, "A", "D");
2061 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2063 lex_match (lexer, T_EQUALS);
2064 if (lex_match_id (lexer, "VALUE"))
2065 cat.type = CCT_VALUE;
2066 else if (lex_match_id (lexer, "LABEL"))
2067 cat.type = CCT_LABEL;
2070 cat.type = CCT_FUNCTION;
2071 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
2074 if (lex_match (lexer, T_LPAREN))
2076 cat.sort_var = parse_variable (lexer, dict);
2080 if (cat.sort_function == CTSF_PTILE)
2082 lex_match (lexer, T_COMMA);
2083 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2085 cat.percentile = lex_number (lexer);
2089 if (!lex_force_match (lexer, T_RPAREN))
2092 else if (ctables_function_availability (cat.sort_function)
2095 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2100 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2102 lex_match (lexer, T_EQUALS);
2103 if (lex_match_id (lexer, "INCLUDE"))
2104 cat.include_missing = true;
2105 else if (lex_match_id (lexer, "EXCLUDE"))
2106 cat.include_missing = false;
2109 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2113 else if (lex_match_id (lexer, "TOTAL"))
2115 lex_match (lexer, T_EQUALS);
2116 if (!parse_bool (lexer, &show_totals))
2119 else if (lex_match_id (lexer, "LABEL"))
2121 lex_match (lexer, T_EQUALS);
2122 if (!lex_force_string (lexer))
2125 total_label = ss_xstrdup (lex_tokss (lexer));
2128 else if (lex_match_id (lexer, "POSITION"))
2130 lex_match (lexer, T_EQUALS);
2131 if (lex_match_id (lexer, "BEFORE"))
2132 totals_before = true;
2133 else if (lex_match_id (lexer, "AFTER"))
2134 totals_before = false;
2137 lex_error_expecting (lexer, "BEFORE", "AFTER");
2141 else if (lex_match_id (lexer, "EMPTY"))
2143 lex_match (lexer, T_EQUALS);
2144 if (lex_match_id (lexer, "INCLUDE"))
2145 c->show_empty = true;
2146 else if (lex_match_id (lexer, "EXCLUDE"))
2147 c->show_empty = false;
2150 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2157 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2158 "TOTAL", "LABEL", "POSITION", "EMPTY");
2160 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2167 if (c->n_cats >= allocated_cats)
2168 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2169 c->cats[c->n_cats++] = cat;
2174 if (c->n_cats >= allocated_cats)
2175 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2177 struct ctables_category *totals;
2180 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2181 totals = &c->cats[0];
2184 totals = &c->cats[c->n_cats];
2187 *totals = (struct ctables_category) {
2189 .total_label = total_label ? total_label : xstrdup (_("Total")),
2193 struct ctables_category *subtotal = NULL;
2194 for (size_t i = totals_before ? 0 : c->n_cats;
2195 totals_before ? i < c->n_cats : i-- > 0;
2196 totals_before ? i++ : 0)
2198 struct ctables_category *cat = &c->cats[i];
2207 cat->subtotal = subtotal;
2210 case CCT_POSTCOMPUTE:
2221 case CCT_EXCLUDED_MISSING:
2226 if (cats_start_ofs != -1)
2228 struct msg_location *cats_location
2229 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2230 for (size_t i = 0; i < c->n_cats; i++)
2232 struct ctables_category *cat = &c->cats[i];
2235 case CCT_POSTCOMPUTE:
2236 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2237 if (!ctables_recursive_check_postcompute (dict, cat->pc->expr,
2238 cat, c, cats_location))
2244 for (size_t j = 0; j < n_vars; j++)
2245 if (var_is_alpha (vars[j]))
2247 msg_at (SE, cat->location,
2248 _("This category specification may be applied "
2249 "only to numeric variables, but this "
2250 "subcommand tries to apply it to string "
2252 var_get_name (vars[j]));
2261 if (!parse_category_string (cat->location, cat->string, dict,
2262 common_format->type, &n))
2265 ss_dealloc (&cat->string);
2267 cat->type = CCT_NUMBER;
2270 else if (!all_strings (vars, n_vars, cat))
2279 if (!cat->srange[0].string)
2281 else if (!parse_category_string (cat->location,
2282 cat->srange[0], dict,
2283 common_format->type, &n[0]))
2286 if (!cat->srange[1].string)
2288 else if (!parse_category_string (cat->location,
2289 cat->srange[1], dict,
2290 common_format->type, &n[1]))
2293 ss_dealloc (&cat->srange[0]);
2294 ss_dealloc (&cat->srange[1]);
2296 cat->type = CCT_NRANGE;
2297 cat->nrange[0] = n[0];
2298 cat->nrange[1] = n[1];
2300 else if (!all_strings (vars, n_vars, cat))
2311 case CCT_EXCLUDED_MISSING:
2321 ctables_nest_uninit (struct ctables_nest *nest)
2328 ctables_stack_uninit (struct ctables_stack *stack)
2332 for (size_t i = 0; i < stack->n; i++)
2333 ctables_nest_uninit (&stack->nests[i]);
2334 free (stack->nests);
2338 static struct ctables_stack
2339 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2346 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2347 for (size_t i = 0; i < s0.n; i++)
2348 for (size_t j = 0; j < s1.n; j++)
2350 const struct ctables_nest *a = &s0.nests[i];
2351 const struct ctables_nest *b = &s1.nests[j];
2353 size_t allocate = a->n + b->n;
2354 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2355 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
2357 for (size_t k = 0; k < a->n; k++)
2358 vars[n++] = a->vars[k];
2359 for (size_t k = 0; k < b->n; k++)
2360 vars[n++] = b->vars[k];
2361 assert (n == allocate);
2363 const struct ctables_nest *summary_src;
2364 if (!a->specs[CSV_CELL].var)
2366 else if (!b->specs[CSV_CELL].var)
2371 struct ctables_nest *new = &stack.nests[stack.n++];
2372 *new = (struct ctables_nest) {
2374 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2375 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2379 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2380 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2382 ctables_stack_uninit (&s0);
2383 ctables_stack_uninit (&s1);
2387 static struct ctables_stack
2388 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2390 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2391 for (size_t i = 0; i < s0.n; i++)
2392 stack.nests[stack.n++] = s0.nests[i];
2393 for (size_t i = 0; i < s1.n; i++)
2395 stack.nests[stack.n] = s1.nests[i];
2396 stack.nests[stack.n].group_head += s0.n;
2399 assert (stack.n == s0.n + s1.n);
2405 static struct ctables_stack
2406 var_fts (const struct ctables_axis *a)
2408 struct variable **vars = xmalloc (sizeof *vars);
2411 struct ctables_nest *nest = xmalloc (sizeof *nest);
2412 *nest = (struct ctables_nest) {
2415 .scale_idx = a->scale ? 0 : SIZE_MAX,
2417 if (a->specs[CSV_CELL].n || a->scale)
2418 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2420 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2421 nest->specs[sv].var = a->var;
2422 nest->specs[sv].is_scale = a->scale;
2424 return (struct ctables_stack) { .nests = nest, .n = 1 };
2427 static struct ctables_stack
2428 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2431 return (struct ctables_stack) { .n = 0 };
2439 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2440 enumerate_fts (axis_type, a->subs[1]));
2443 /* This should consider any of the scale variables found in the result to
2444 be linked to each other listwise for SMISSING=LISTWISE. */
2445 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2446 enumerate_fts (axis_type, a->subs[1]));
2452 union ctables_summary
2454 /* COUNT, VALIDN, TOTALN. */
2457 /* MINIMUM, MAXIMUM, RANGE. */
2464 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2465 struct moments1 *moments;
2467 /* MEDIAN, MODE, PTILE. */
2470 struct casewriter *writer;
2475 /* XXX multiple response */
2479 ctables_summary_init (union ctables_summary *s,
2480 const struct ctables_summary_spec *ss)
2482 switch (ss->function)
2486 case CTSF_ROWPCT_COUNT:
2487 case CTSF_COLPCT_COUNT:
2488 case CTSF_TABLEPCT_COUNT:
2489 case CTSF_SUBTABLEPCT_COUNT:
2490 case CTSF_LAYERPCT_COUNT:
2491 case CTSF_LAYERROWPCT_COUNT:
2492 case CTSF_LAYERCOLPCT_COUNT:
2493 case CTSF_ROWPCT_VALIDN:
2494 case CTSF_COLPCT_VALIDN:
2495 case CTSF_TABLEPCT_VALIDN:
2496 case CTSF_SUBTABLEPCT_VALIDN:
2497 case CTSF_LAYERPCT_VALIDN:
2498 case CTSF_LAYERROWPCT_VALIDN:
2499 case CTSF_LAYERCOLPCT_VALIDN:
2500 case CTSF_ROWPCT_TOTALN:
2501 case CTSF_COLPCT_TOTALN:
2502 case CTSF_TABLEPCT_TOTALN:
2503 case CTSF_SUBTABLEPCT_TOTALN:
2504 case CTSF_LAYERPCT_TOTALN:
2505 case CTSF_LAYERROWPCT_TOTALN:
2506 case CTSF_LAYERCOLPCT_TOTALN:
2513 case CTSF_UROWPCT_COUNT:
2514 case CTSF_UCOLPCT_COUNT:
2515 case CTSF_UTABLEPCT_COUNT:
2516 case CTSF_USUBTABLEPCT_COUNT:
2517 case CTSF_ULAYERPCT_COUNT:
2518 case CTSF_ULAYERROWPCT_COUNT:
2519 case CTSF_ULAYERCOLPCT_COUNT:
2520 case CTSF_UROWPCT_VALIDN:
2521 case CTSF_UCOLPCT_VALIDN:
2522 case CTSF_UTABLEPCT_VALIDN:
2523 case CTSF_USUBTABLEPCT_VALIDN:
2524 case CTSF_ULAYERPCT_VALIDN:
2525 case CTSF_ULAYERROWPCT_VALIDN:
2526 case CTSF_ULAYERCOLPCT_VALIDN:
2527 case CTSF_UROWPCT_TOTALN:
2528 case CTSF_UCOLPCT_TOTALN:
2529 case CTSF_UTABLEPCT_TOTALN:
2530 case CTSF_USUBTABLEPCT_TOTALN:
2531 case CTSF_ULAYERPCT_TOTALN:
2532 case CTSF_ULAYERROWPCT_TOTALN:
2533 case CTSF_ULAYERCOLPCT_TOTALN:
2543 s->min = s->max = SYSMIS;
2551 case CTSF_ROWPCT_SUM:
2552 case CTSF_COLPCT_SUM:
2553 case CTSF_TABLEPCT_SUM:
2554 case CTSF_SUBTABLEPCT_SUM:
2555 case CTSF_LAYERPCT_SUM:
2556 case CTSF_LAYERROWPCT_SUM:
2557 case CTSF_LAYERCOLPCT_SUM:
2562 case CTSF_UVARIANCE:
2563 case CTSF_UROWPCT_SUM:
2564 case CTSF_UCOLPCT_SUM:
2565 case CTSF_UTABLEPCT_SUM:
2566 case CTSF_USUBTABLEPCT_SUM:
2567 case CTSF_ULAYERPCT_SUM:
2568 case CTSF_ULAYERROWPCT_SUM:
2569 case CTSF_ULAYERCOLPCT_SUM:
2570 s->moments = moments1_create (MOMENT_VARIANCE);
2580 struct caseproto *proto = caseproto_create ();
2581 proto = caseproto_add_width (proto, 0);
2582 proto = caseproto_add_width (proto, 0);
2584 struct subcase ordering;
2585 subcase_init (&ordering, 0, 0, SC_ASCEND);
2586 s->writer = sort_create_writer (&ordering, proto);
2587 subcase_uninit (&ordering);
2588 caseproto_unref (proto);
2598 ctables_summary_uninit (union ctables_summary *s,
2599 const struct ctables_summary_spec *ss)
2601 switch (ss->function)
2605 case CTSF_ROWPCT_COUNT:
2606 case CTSF_COLPCT_COUNT:
2607 case CTSF_TABLEPCT_COUNT:
2608 case CTSF_SUBTABLEPCT_COUNT:
2609 case CTSF_LAYERPCT_COUNT:
2610 case CTSF_LAYERROWPCT_COUNT:
2611 case CTSF_LAYERCOLPCT_COUNT:
2612 case CTSF_ROWPCT_VALIDN:
2613 case CTSF_COLPCT_VALIDN:
2614 case CTSF_TABLEPCT_VALIDN:
2615 case CTSF_SUBTABLEPCT_VALIDN:
2616 case CTSF_LAYERPCT_VALIDN:
2617 case CTSF_LAYERROWPCT_VALIDN:
2618 case CTSF_LAYERCOLPCT_VALIDN:
2619 case CTSF_ROWPCT_TOTALN:
2620 case CTSF_COLPCT_TOTALN:
2621 case CTSF_TABLEPCT_TOTALN:
2622 case CTSF_SUBTABLEPCT_TOTALN:
2623 case CTSF_LAYERPCT_TOTALN:
2624 case CTSF_LAYERROWPCT_TOTALN:
2625 case CTSF_LAYERCOLPCT_TOTALN:
2632 case CTSF_UROWPCT_COUNT:
2633 case CTSF_UCOLPCT_COUNT:
2634 case CTSF_UTABLEPCT_COUNT:
2635 case CTSF_USUBTABLEPCT_COUNT:
2636 case CTSF_ULAYERPCT_COUNT:
2637 case CTSF_ULAYERROWPCT_COUNT:
2638 case CTSF_ULAYERCOLPCT_COUNT:
2639 case CTSF_UROWPCT_VALIDN:
2640 case CTSF_UCOLPCT_VALIDN:
2641 case CTSF_UTABLEPCT_VALIDN:
2642 case CTSF_USUBTABLEPCT_VALIDN:
2643 case CTSF_ULAYERPCT_VALIDN:
2644 case CTSF_ULAYERROWPCT_VALIDN:
2645 case CTSF_ULAYERCOLPCT_VALIDN:
2646 case CTSF_UROWPCT_TOTALN:
2647 case CTSF_UCOLPCT_TOTALN:
2648 case CTSF_UTABLEPCT_TOTALN:
2649 case CTSF_USUBTABLEPCT_TOTALN:
2650 case CTSF_ULAYERPCT_TOTALN:
2651 case CTSF_ULAYERROWPCT_TOTALN:
2652 case CTSF_ULAYERCOLPCT_TOTALN:
2668 case CTSF_ROWPCT_SUM:
2669 case CTSF_COLPCT_SUM:
2670 case CTSF_TABLEPCT_SUM:
2671 case CTSF_SUBTABLEPCT_SUM:
2672 case CTSF_LAYERPCT_SUM:
2673 case CTSF_LAYERROWPCT_SUM:
2674 case CTSF_LAYERCOLPCT_SUM:
2679 case CTSF_UVARIANCE:
2680 case CTSF_UROWPCT_SUM:
2681 case CTSF_UCOLPCT_SUM:
2682 case CTSF_UTABLEPCT_SUM:
2683 case CTSF_USUBTABLEPCT_SUM:
2684 case CTSF_ULAYERPCT_SUM:
2685 case CTSF_ULAYERROWPCT_SUM:
2686 case CTSF_ULAYERCOLPCT_SUM:
2687 moments1_destroy (s->moments);
2696 casewriter_destroy (s->writer);
2702 ctables_summary_add (union ctables_summary *s,
2703 const struct ctables_summary_spec *ss,
2704 const struct variable *var, const union value *value,
2705 bool is_scale, bool is_scale_missing,
2706 bool is_missing, bool excluded_missing,
2707 double d_weight, double e_weight)
2709 /* To determine whether a case is included in a given table for a particular
2710 kind of summary, consider the following charts for each variable in the
2711 table. Only if "yes" appears for every variable for the summary is the
2714 Categorical variables: VALIDN COUNT TOTALN
2715 Valid values in included categories yes yes yes
2716 Missing values in included categories --- yes yes
2717 Missing values in excluded categories --- --- yes
2718 Valid values in excluded categories --- --- ---
2720 Scale variables: VALIDN COUNT TOTALN
2721 Valid value yes yes yes
2722 Missing value --- yes yes
2724 Missing values include both user- and system-missing. (The system-missing
2725 value is always in an excluded category.)
2727 switch (ss->function)
2730 case CTSF_ROWPCT_TOTALN:
2731 case CTSF_COLPCT_TOTALN:
2732 case CTSF_TABLEPCT_TOTALN:
2733 case CTSF_SUBTABLEPCT_TOTALN:
2734 case CTSF_LAYERPCT_TOTALN:
2735 case CTSF_LAYERROWPCT_TOTALN:
2736 case CTSF_LAYERCOLPCT_TOTALN:
2737 s->count += d_weight;
2741 case CTSF_UROWPCT_TOTALN:
2742 case CTSF_UCOLPCT_TOTALN:
2743 case CTSF_UTABLEPCT_TOTALN:
2744 case CTSF_USUBTABLEPCT_TOTALN:
2745 case CTSF_ULAYERPCT_TOTALN:
2746 case CTSF_ULAYERROWPCT_TOTALN:
2747 case CTSF_ULAYERCOLPCT_TOTALN:
2752 case CTSF_ROWPCT_COUNT:
2753 case CTSF_COLPCT_COUNT:
2754 case CTSF_TABLEPCT_COUNT:
2755 case CTSF_SUBTABLEPCT_COUNT:
2756 case CTSF_LAYERPCT_COUNT:
2757 case CTSF_LAYERROWPCT_COUNT:
2758 case CTSF_LAYERCOLPCT_COUNT:
2759 if (is_scale || !excluded_missing)
2760 s->count += d_weight;
2764 case CTSF_UROWPCT_COUNT:
2765 case CTSF_UCOLPCT_COUNT:
2766 case CTSF_UTABLEPCT_COUNT:
2767 case CTSF_USUBTABLEPCT_COUNT:
2768 case CTSF_ULAYERPCT_COUNT:
2769 case CTSF_ULAYERROWPCT_COUNT:
2770 case CTSF_ULAYERCOLPCT_COUNT:
2771 if (is_scale || !excluded_missing)
2776 case CTSF_ROWPCT_VALIDN:
2777 case CTSF_COLPCT_VALIDN:
2778 case CTSF_TABLEPCT_VALIDN:
2779 case CTSF_SUBTABLEPCT_VALIDN:
2780 case CTSF_LAYERPCT_VALIDN:
2781 case CTSF_LAYERROWPCT_VALIDN:
2782 case CTSF_LAYERCOLPCT_VALIDN:
2786 s->count += d_weight;
2790 case CTSF_UROWPCT_VALIDN:
2791 case CTSF_UCOLPCT_VALIDN:
2792 case CTSF_UTABLEPCT_VALIDN:
2793 case CTSF_USUBTABLEPCT_VALIDN:
2794 case CTSF_ULAYERPCT_VALIDN:
2795 case CTSF_ULAYERROWPCT_VALIDN:
2796 case CTSF_ULAYERCOLPCT_VALIDN:
2805 s->count += d_weight;
2814 if (is_scale || !excluded_missing)
2815 s->count += e_weight;
2822 s->count += e_weight;
2826 s->count += e_weight;
2832 if (!is_scale_missing)
2834 assert (!var_is_alpha (var)); /* XXX? */
2835 if (s->min == SYSMIS || value->f < s->min)
2837 if (s->max == SYSMIS || value->f > s->max)
2847 case CTSF_ROWPCT_SUM:
2848 case CTSF_COLPCT_SUM:
2849 case CTSF_TABLEPCT_SUM:
2850 case CTSF_SUBTABLEPCT_SUM:
2851 case CTSF_LAYERPCT_SUM:
2852 case CTSF_LAYERROWPCT_SUM:
2853 case CTSF_LAYERCOLPCT_SUM:
2854 if (!is_scale_missing)
2855 moments1_add (s->moments, value->f, e_weight);
2862 case CTSF_UVARIANCE:
2863 case CTSF_UROWPCT_SUM:
2864 case CTSF_UCOLPCT_SUM:
2865 case CTSF_UTABLEPCT_SUM:
2866 case CTSF_USUBTABLEPCT_SUM:
2867 case CTSF_ULAYERPCT_SUM:
2868 case CTSF_ULAYERROWPCT_SUM:
2869 case CTSF_ULAYERCOLPCT_SUM:
2870 if (!is_scale_missing)
2871 moments1_add (s->moments, value->f, 1.0);
2877 d_weight = e_weight = 1.0;
2882 if (!is_scale_missing)
2884 s->ovalid += e_weight;
2886 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2887 *case_num_rw_idx (c, 0) = value->f;
2888 *case_num_rw_idx (c, 1) = e_weight;
2889 casewriter_write (s->writer, c);
2895 static enum ctables_domain_type
2896 ctables_function_domain (enum ctables_summary_function function)
2926 case CTSF_UVARIANCE:
2932 case CTSF_COLPCT_COUNT:
2933 case CTSF_COLPCT_SUM:
2934 case CTSF_COLPCT_TOTALN:
2935 case CTSF_COLPCT_VALIDN:
2936 case CTSF_UCOLPCT_COUNT:
2937 case CTSF_UCOLPCT_SUM:
2938 case CTSF_UCOLPCT_TOTALN:
2939 case CTSF_UCOLPCT_VALIDN:
2942 case CTSF_LAYERCOLPCT_COUNT:
2943 case CTSF_LAYERCOLPCT_SUM:
2944 case CTSF_LAYERCOLPCT_TOTALN:
2945 case CTSF_LAYERCOLPCT_VALIDN:
2946 case CTSF_ULAYERCOLPCT_COUNT:
2947 case CTSF_ULAYERCOLPCT_SUM:
2948 case CTSF_ULAYERCOLPCT_TOTALN:
2949 case CTSF_ULAYERCOLPCT_VALIDN:
2950 return CTDT_LAYERCOL;
2952 case CTSF_LAYERPCT_COUNT:
2953 case CTSF_LAYERPCT_SUM:
2954 case CTSF_LAYERPCT_TOTALN:
2955 case CTSF_LAYERPCT_VALIDN:
2956 case CTSF_ULAYERPCT_COUNT:
2957 case CTSF_ULAYERPCT_SUM:
2958 case CTSF_ULAYERPCT_TOTALN:
2959 case CTSF_ULAYERPCT_VALIDN:
2962 case CTSF_LAYERROWPCT_COUNT:
2963 case CTSF_LAYERROWPCT_SUM:
2964 case CTSF_LAYERROWPCT_TOTALN:
2965 case CTSF_LAYERROWPCT_VALIDN:
2966 case CTSF_ULAYERROWPCT_COUNT:
2967 case CTSF_ULAYERROWPCT_SUM:
2968 case CTSF_ULAYERROWPCT_TOTALN:
2969 case CTSF_ULAYERROWPCT_VALIDN:
2970 return CTDT_LAYERROW;
2972 case CTSF_ROWPCT_COUNT:
2973 case CTSF_ROWPCT_SUM:
2974 case CTSF_ROWPCT_TOTALN:
2975 case CTSF_ROWPCT_VALIDN:
2976 case CTSF_UROWPCT_COUNT:
2977 case CTSF_UROWPCT_SUM:
2978 case CTSF_UROWPCT_TOTALN:
2979 case CTSF_UROWPCT_VALIDN:
2982 case CTSF_SUBTABLEPCT_COUNT:
2983 case CTSF_SUBTABLEPCT_SUM:
2984 case CTSF_SUBTABLEPCT_TOTALN:
2985 case CTSF_SUBTABLEPCT_VALIDN:
2986 case CTSF_USUBTABLEPCT_COUNT:
2987 case CTSF_USUBTABLEPCT_SUM:
2988 case CTSF_USUBTABLEPCT_TOTALN:
2989 case CTSF_USUBTABLEPCT_VALIDN:
2990 return CTDT_SUBTABLE;
2992 case CTSF_TABLEPCT_COUNT:
2993 case CTSF_TABLEPCT_SUM:
2994 case CTSF_TABLEPCT_TOTALN:
2995 case CTSF_TABLEPCT_VALIDN:
2996 case CTSF_UTABLEPCT_COUNT:
2997 case CTSF_UTABLEPCT_SUM:
2998 case CTSF_UTABLEPCT_TOTALN:
2999 case CTSF_UTABLEPCT_VALIDN:
3006 static enum ctables_domain_type
3007 ctables_function_is_pctsum (enum ctables_summary_function function)
3037 case CTSF_UVARIANCE:
3041 case CTSF_COLPCT_COUNT:
3042 case CTSF_COLPCT_TOTALN:
3043 case CTSF_COLPCT_VALIDN:
3044 case CTSF_UCOLPCT_COUNT:
3045 case CTSF_UCOLPCT_TOTALN:
3046 case CTSF_UCOLPCT_VALIDN:
3047 case CTSF_LAYERCOLPCT_COUNT:
3048 case CTSF_LAYERCOLPCT_TOTALN:
3049 case CTSF_LAYERCOLPCT_VALIDN:
3050 case CTSF_ULAYERCOLPCT_COUNT:
3051 case CTSF_ULAYERCOLPCT_TOTALN:
3052 case CTSF_ULAYERCOLPCT_VALIDN:
3053 case CTSF_LAYERPCT_COUNT:
3054 case CTSF_LAYERPCT_TOTALN:
3055 case CTSF_LAYERPCT_VALIDN:
3056 case CTSF_ULAYERPCT_COUNT:
3057 case CTSF_ULAYERPCT_TOTALN:
3058 case CTSF_ULAYERPCT_VALIDN:
3059 case CTSF_LAYERROWPCT_COUNT:
3060 case CTSF_LAYERROWPCT_TOTALN:
3061 case CTSF_LAYERROWPCT_VALIDN:
3062 case CTSF_ULAYERROWPCT_COUNT:
3063 case CTSF_ULAYERROWPCT_TOTALN:
3064 case CTSF_ULAYERROWPCT_VALIDN:
3065 case CTSF_ROWPCT_COUNT:
3066 case CTSF_ROWPCT_TOTALN:
3067 case CTSF_ROWPCT_VALIDN:
3068 case CTSF_UROWPCT_COUNT:
3069 case CTSF_UROWPCT_TOTALN:
3070 case CTSF_UROWPCT_VALIDN:
3071 case CTSF_SUBTABLEPCT_COUNT:
3072 case CTSF_SUBTABLEPCT_TOTALN:
3073 case CTSF_SUBTABLEPCT_VALIDN:
3074 case CTSF_USUBTABLEPCT_COUNT:
3075 case CTSF_USUBTABLEPCT_TOTALN:
3076 case CTSF_USUBTABLEPCT_VALIDN:
3077 case CTSF_TABLEPCT_COUNT:
3078 case CTSF_TABLEPCT_TOTALN:
3079 case CTSF_TABLEPCT_VALIDN:
3080 case CTSF_UTABLEPCT_COUNT:
3081 case CTSF_UTABLEPCT_TOTALN:
3082 case CTSF_UTABLEPCT_VALIDN:
3085 case CTSF_COLPCT_SUM:
3086 case CTSF_UCOLPCT_SUM:
3087 case CTSF_LAYERCOLPCT_SUM:
3088 case CTSF_ULAYERCOLPCT_SUM:
3089 case CTSF_LAYERPCT_SUM:
3090 case CTSF_ULAYERPCT_SUM:
3091 case CTSF_LAYERROWPCT_SUM:
3092 case CTSF_ULAYERROWPCT_SUM:
3093 case CTSF_ROWPCT_SUM:
3094 case CTSF_UROWPCT_SUM:
3095 case CTSF_SUBTABLEPCT_SUM:
3096 case CTSF_USUBTABLEPCT_SUM:
3097 case CTSF_TABLEPCT_SUM:
3098 case CTSF_UTABLEPCT_SUM:
3106 ctables_summary_value (const struct ctables_cell *cell,
3107 union ctables_summary *s,
3108 const struct ctables_summary_spec *ss)
3110 switch (ss->function)
3117 case CTSF_ROWPCT_COUNT:
3118 case CTSF_COLPCT_COUNT:
3119 case CTSF_TABLEPCT_COUNT:
3120 case CTSF_SUBTABLEPCT_COUNT:
3121 case CTSF_LAYERPCT_COUNT:
3122 case CTSF_LAYERROWPCT_COUNT:
3123 case CTSF_LAYERCOLPCT_COUNT:
3125 enum ctables_domain_type d = ctables_function_domain (ss->function);
3126 return (cell->domains[d]->e_count
3127 ? s->count / cell->domains[d]->e_count * 100
3131 case CTSF_UROWPCT_COUNT:
3132 case CTSF_UCOLPCT_COUNT:
3133 case CTSF_UTABLEPCT_COUNT:
3134 case CTSF_USUBTABLEPCT_COUNT:
3135 case CTSF_ULAYERPCT_COUNT:
3136 case CTSF_ULAYERROWPCT_COUNT:
3137 case CTSF_ULAYERCOLPCT_COUNT:
3139 enum ctables_domain_type d = ctables_function_domain (ss->function);
3140 return (cell->domains[d]->u_count
3141 ? s->count / cell->domains[d]->u_count * 100
3145 case CTSF_ROWPCT_VALIDN:
3146 case CTSF_COLPCT_VALIDN:
3147 case CTSF_TABLEPCT_VALIDN:
3148 case CTSF_SUBTABLEPCT_VALIDN:
3149 case CTSF_LAYERPCT_VALIDN:
3150 case CTSF_LAYERROWPCT_VALIDN:
3151 case CTSF_LAYERCOLPCT_VALIDN:
3153 enum ctables_domain_type d = ctables_function_domain (ss->function);
3154 return (cell->domains[d]->e_valid
3155 ? s->count / cell->domains[d]->e_valid * 100
3159 case CTSF_UROWPCT_VALIDN:
3160 case CTSF_UCOLPCT_VALIDN:
3161 case CTSF_UTABLEPCT_VALIDN:
3162 case CTSF_USUBTABLEPCT_VALIDN:
3163 case CTSF_ULAYERPCT_VALIDN:
3164 case CTSF_ULAYERROWPCT_VALIDN:
3165 case CTSF_ULAYERCOLPCT_VALIDN:
3167 enum ctables_domain_type d = ctables_function_domain (ss->function);
3168 return (cell->domains[d]->u_valid
3169 ? s->count / cell->domains[d]->u_valid * 100
3173 case CTSF_ROWPCT_TOTALN:
3174 case CTSF_COLPCT_TOTALN:
3175 case CTSF_TABLEPCT_TOTALN:
3176 case CTSF_SUBTABLEPCT_TOTALN:
3177 case CTSF_LAYERPCT_TOTALN:
3178 case CTSF_LAYERROWPCT_TOTALN:
3179 case CTSF_LAYERCOLPCT_TOTALN:
3181 enum ctables_domain_type d = ctables_function_domain (ss->function);
3182 return (cell->domains[d]->e_total
3183 ? s->count / cell->domains[d]->e_total * 100
3187 case CTSF_UROWPCT_TOTALN:
3188 case CTSF_UCOLPCT_TOTALN:
3189 case CTSF_UTABLEPCT_TOTALN:
3190 case CTSF_USUBTABLEPCT_TOTALN:
3191 case CTSF_ULAYERPCT_TOTALN:
3192 case CTSF_ULAYERROWPCT_TOTALN:
3193 case CTSF_ULAYERCOLPCT_TOTALN:
3195 enum ctables_domain_type d = ctables_function_domain (ss->function);
3196 return (cell->domains[d]->u_total
3197 ? s->count / cell->domains[d]->u_total * 100
3218 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3224 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3231 double weight, variance;
3232 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3233 return calc_semean (variance, weight);
3240 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3241 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3247 double weight, mean;
3248 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3249 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3253 case CTSF_UVARIANCE:
3256 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3260 case CTSF_ROWPCT_SUM:
3261 case CTSF_COLPCT_SUM:
3262 case CTSF_TABLEPCT_SUM:
3263 case CTSF_SUBTABLEPCT_SUM:
3264 case CTSF_LAYERPCT_SUM:
3265 case CTSF_LAYERROWPCT_SUM:
3266 case CTSF_LAYERCOLPCT_SUM:
3268 double weight, mean;
3269 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3270 if (weight == SYSMIS || mean == SYSMIS)
3272 enum ctables_domain_type d = ctables_function_domain (ss->function);
3273 double num = weight * mean;
3274 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3275 return denom != 0 ? num / denom * 100 : SYSMIS;
3277 case CTSF_UROWPCT_SUM:
3278 case CTSF_UCOLPCT_SUM:
3279 case CTSF_UTABLEPCT_SUM:
3280 case CTSF_USUBTABLEPCT_SUM:
3281 case CTSF_ULAYERPCT_SUM:
3282 case CTSF_ULAYERROWPCT_SUM:
3283 case CTSF_ULAYERCOLPCT_SUM:
3285 double weight, mean;
3286 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3287 if (weight == SYSMIS || mean == SYSMIS)
3289 enum ctables_domain_type d = ctables_function_domain (ss->function);
3290 double num = weight * mean;
3291 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3292 return denom != 0 ? num / denom * 100 : SYSMIS;
3301 struct casereader *reader = casewriter_make_reader (s->writer);
3304 struct percentile *ptile = percentile_create (
3305 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3306 struct order_stats *os = &ptile->parent;
3307 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3308 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3309 statistic_destroy (&ptile->parent.parent);
3317 struct casereader *reader = casewriter_make_reader (s->writer);
3320 struct mode *mode = mode_create ();
3321 struct order_stats *os = &mode->parent;
3322 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3323 s->ovalue = mode->mode;
3324 statistic_destroy (&mode->parent.parent);
3332 struct ctables_cell_sort_aux
3334 const struct ctables_nest *nest;
3335 enum pivot_axis_type a;
3339 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3341 const struct ctables_cell_sort_aux *aux = aux_;
3342 struct ctables_cell *const *ap = a_;
3343 struct ctables_cell *const *bp = b_;
3344 const struct ctables_cell *a = *ap;
3345 const struct ctables_cell *b = *bp;
3347 const struct ctables_nest *nest = aux->nest;
3348 for (size_t i = 0; i < nest->n; i++)
3349 if (i != nest->scale_idx)
3351 const struct variable *var = nest->vars[i];
3352 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3353 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3354 if (a_cv->category != b_cv->category)
3355 return a_cv->category > b_cv->category ? 1 : -1;
3357 const union value *a_val = &a_cv->value;
3358 const union value *b_val = &b_cv->value;
3359 switch (a_cv->category->type)
3365 case CCT_POSTCOMPUTE:
3366 case CCT_EXCLUDED_MISSING:
3367 /* Must be equal. */
3375 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3383 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3385 return a_cv->category->sort_ascending ? cmp : -cmp;
3391 const char *a_label = var_lookup_value_label (var, a_val);
3392 const char *b_label = var_lookup_value_label (var, b_val);
3394 ? (b_label ? strcmp (a_label, b_label) : 1)
3395 : (b_label ? -1 : value_compare_3way (
3396 a_val, b_val, var_get_width (var))));
3398 return a_cv->category->sort_ascending ? cmp : -cmp;
3412 For each ctables_table:
3413 For each combination of row vars:
3414 For each combination of column vars:
3415 For each combination of layer vars:
3417 Make a table of row values:
3418 Sort entries by row values
3419 Assign a 0-based index to each actual value
3420 Construct a dimension
3421 Make a table of column values
3422 Make a table of layer values
3424 Fill the table entry using the indexes from before.
3427 static struct ctables_domain *
3428 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3429 enum ctables_domain_type domain)
3432 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3434 const struct ctables_nest *nest = s->nests[a];
3435 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3437 size_t v_idx = nest->domains[domain][i];
3438 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3439 hash = hash_pointer (cv->category, hash);
3440 if (cv->category->type != CCT_TOTAL
3441 && cv->category->type != CCT_SUBTOTAL
3442 && cv->category->type != CCT_POSTCOMPUTE)
3443 hash = value_hash (&cv->value,
3444 var_get_width (nest->vars[v_idx]), hash);
3448 struct ctables_domain *d;
3449 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3451 const struct ctables_cell *df = d->example;
3452 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3454 const struct ctables_nest *nest = s->nests[a];
3455 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3457 size_t v_idx = nest->domains[domain][i];
3458 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3459 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3460 if (cv1->category != cv2->category
3461 || (cv1->category->type != CCT_TOTAL
3462 && cv1->category->type != CCT_SUBTOTAL
3463 && cv1->category->type != CCT_POSTCOMPUTE
3464 && !value_equal (&cv1->value, &cv2->value,
3465 var_get_width (nest->vars[v_idx]))))
3474 struct ctables_sum *sums = (s->table->n_sum_vars
3475 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3478 d = xmalloc (sizeof *d);
3479 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3480 hmap_insert (&s->domains[domain], &d->node, hash);
3484 static struct substring
3485 rtrim_value (const union value *v, const struct variable *var)
3487 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3488 var_get_width (var));
3489 ss_rtrim (&s, ss_cstr (" "));
3494 in_string_range (const union value *v, const struct variable *var,
3495 const struct substring *srange)
3497 struct substring s = rtrim_value (v, var);
3498 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3499 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3502 static const struct ctables_category *
3503 ctables_categories_match (const struct ctables_categories *c,
3504 const union value *v, const struct variable *var)
3506 if (var_is_numeric (var) && v->f == SYSMIS)
3509 const struct ctables_category *othernm = NULL;
3510 for (size_t i = c->n_cats; i-- > 0; )
3512 const struct ctables_category *cat = &c->cats[i];
3516 if (cat->number == v->f)
3521 if (ss_equals (cat->string, rtrim_value (v, var)))
3526 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3527 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3532 if (in_string_range (v, var, cat->srange))
3537 if (var_is_value_missing (var, v))
3541 case CCT_POSTCOMPUTE:
3556 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3559 case CCT_EXCLUDED_MISSING:
3564 return var_is_value_missing (var, v) ? NULL : othernm;
3567 static const struct ctables_category *
3568 ctables_categories_total (const struct ctables_categories *c)
3570 const struct ctables_category *first = &c->cats[0];
3571 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3572 return (first->type == CCT_TOTAL ? first
3573 : last->type == CCT_TOTAL ? last
3577 static struct ctables_cell *
3578 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3579 const struct ctables_category *cats[PIVOT_N_AXES][10])
3582 enum ctables_summary_variant sv = CSV_CELL;
3583 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3585 const struct ctables_nest *nest = s->nests[a];
3586 for (size_t i = 0; i < nest->n; i++)
3587 if (i != nest->scale_idx)
3589 hash = hash_pointer (cats[a][i], hash);
3590 if (cats[a][i]->type != CCT_TOTAL
3591 && cats[a][i]->type != CCT_SUBTOTAL
3592 && cats[a][i]->type != CCT_POSTCOMPUTE)
3593 hash = value_hash (case_data (c, nest->vars[i]),
3594 var_get_width (nest->vars[i]), hash);
3600 struct ctables_cell *cell;
3601 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3603 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3605 const struct ctables_nest *nest = s->nests[a];
3606 for (size_t i = 0; i < nest->n; i++)
3607 if (i != nest->scale_idx
3608 && (cats[a][i] != cell->axes[a].cvs[i].category
3609 || (cats[a][i]->type != CCT_TOTAL
3610 && cats[a][i]->type != CCT_SUBTOTAL
3611 && cats[a][i]->type != CCT_POSTCOMPUTE
3612 && !value_equal (case_data (c, nest->vars[i]),
3613 &cell->axes[a].cvs[i].value,
3614 var_get_width (nest->vars[i])))))
3623 cell = xmalloc (sizeof *cell);
3626 cell->omit_domains = 0;
3627 cell->postcompute = false;
3628 //struct string name = DS_EMPTY_INITIALIZER;
3629 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3631 const struct ctables_nest *nest = s->nests[a];
3632 cell->axes[a].cvs = (nest->n
3633 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3635 for (size_t i = 0; i < nest->n; i++)
3637 const struct ctables_category *cat = cats[a][i];
3638 const struct variable *var = nest->vars[i];
3639 const union value *value = case_data (c, var);
3640 if (i != nest->scale_idx)
3642 const struct ctables_category *subtotal = cat->subtotal;
3643 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3646 if (cat->type == CCT_TOTAL
3647 || cat->type == CCT_SUBTOTAL
3648 || cat->type == CCT_POSTCOMPUTE)
3650 /* XXX these should be more encompassing I think.*/
3654 case PIVOT_AXIS_COLUMN:
3655 cell->omit_domains |= ((1u << CTDT_TABLE) |
3656 (1u << CTDT_LAYER) |
3657 (1u << CTDT_LAYERCOL) |
3658 (1u << CTDT_SUBTABLE) |
3661 case PIVOT_AXIS_ROW:
3662 cell->omit_domains |= ((1u << CTDT_TABLE) |
3663 (1u << CTDT_LAYER) |
3664 (1u << CTDT_LAYERROW) |
3665 (1u << CTDT_SUBTABLE) |
3668 case PIVOT_AXIS_LAYER:
3669 cell->omit_domains |= ((1u << CTDT_TABLE) |
3670 (1u << CTDT_LAYER));
3674 if (cat->type == CCT_POSTCOMPUTE)
3675 cell->postcompute = true;
3678 cell->axes[a].cvs[i].category = cat;
3679 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3682 if (i != nest->scale_idx)
3684 if (!ds_is_empty (&name))
3685 ds_put_cstr (&name, ", ");
3686 char *value_s = data_out (value, var_get_encoding (var),
3687 var_get_print_format (var),
3688 settings_get_fmt_settings ());
3689 if (cat->type == CCT_TOTAL
3690 || cat->type == CCT_SUBTOTAL
3691 || cat->type == CCT_POSTCOMPUTE)
3692 ds_put_format (&name, "%s=total", var_get_name (var));
3694 ds_put_format (&name, "%s=%s", var_get_name (var),
3695 value_s + strspn (value_s, " "));
3701 //cell->name = ds_steal_cstr (&name);
3703 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3704 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3705 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3706 for (size_t i = 0; i < specs->n; i++)
3707 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3708 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3709 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3710 hmap_insert (&s->cells, &cell->node, hash);
3715 is_scale_missing (const struct ctables_summary_spec_set *specs,
3716 const struct ccase *c)
3718 if (!specs->is_scale)
3721 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3724 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3726 const struct variable *var = specs->listwise_vars[i];
3727 if (var_is_num_missing (var, case_num (c, var)))
3735 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3736 const struct ctables_category *cats[PIVOT_N_AXES][10],
3737 bool is_missing, bool excluded_missing,
3738 double d_weight, double e_weight)
3740 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3741 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3743 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3745 bool scale_missing = is_scale_missing (specs, c);
3746 for (size_t i = 0; i < specs->n; i++)
3747 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3748 specs->var, case_data (c, specs->var), specs->is_scale,
3749 scale_missing, is_missing, excluded_missing,
3750 d_weight, e_weight);
3751 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3752 if (!(cell->omit_domains && (1u << dt)))
3754 struct ctables_domain *d = cell->domains[dt];
3755 d->d_total += d_weight;
3756 d->e_total += e_weight;
3758 if (!excluded_missing)
3760 d->d_count += d_weight;
3761 d->e_count += e_weight;
3766 d->d_valid += d_weight;
3767 d->e_valid += e_weight;
3770 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3772 /* XXX listwise_missing??? */
3773 const struct variable *var = s->table->sum_vars[i];
3774 double addend = case_num (c, var);
3775 if (!var_is_num_missing (var, addend))
3777 struct ctables_sum *sum = &d->sums[i];
3778 sum->e_sum += addend * e_weight;
3779 sum->u_sum += addend;
3787 recurse_totals (struct ctables_section *s, const struct ccase *c,
3788 const struct ctables_category *cats[PIVOT_N_AXES][10],
3789 bool is_missing, bool excluded_missing,
3790 double d_weight, double e_weight,
3791 enum pivot_axis_type start_axis, size_t start_nest)
3793 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3795 const struct ctables_nest *nest = s->nests[a];
3796 for (size_t i = start_nest; i < nest->n; i++)
3798 if (i == nest->scale_idx)
3801 const struct variable *var = nest->vars[i];
3803 const struct ctables_category *total = ctables_categories_total (
3804 s->table->categories[var_get_dict_index (var)]);
3807 const struct ctables_category *save = cats[a][i];
3809 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3810 d_weight, e_weight);
3811 recurse_totals (s, c, cats, is_missing, excluded_missing,
3812 d_weight, e_weight, a, i + 1);
3821 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3822 const struct ctables_category *cats[PIVOT_N_AXES][10],
3823 bool is_missing, bool excluded_missing,
3824 double d_weight, double e_weight,
3825 enum pivot_axis_type start_axis, size_t start_nest)
3827 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3829 const struct ctables_nest *nest = s->nests[a];
3830 for (size_t i = start_nest; i < nest->n; i++)
3832 if (i == nest->scale_idx)
3835 const struct ctables_category *save = cats[a][i];
3838 cats[a][i] = save->subtotal;
3839 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3840 d_weight, e_weight);
3841 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3842 d_weight, e_weight, a, i + 1);
3851 ctables_add_occurrence (const struct variable *var,
3852 const union value *value,
3853 struct hmap *occurrences)
3855 int width = var_get_width (var);
3856 unsigned int hash = value_hash (value, width, 0);
3858 struct ctables_occurrence *o;
3859 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3861 if (value_equal (value, &o->value, width))
3864 o = xmalloc (sizeof *o);
3865 value_clone (&o->value, value, width);
3866 hmap_insert (occurrences, &o->node, hash);
3870 ctables_cell_insert (struct ctables_section *s,
3871 const struct ccase *c,
3872 double d_weight, double e_weight)
3874 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3876 /* Does at least one categorical variable have a missing value in an included
3877 or excluded category? */
3878 bool is_missing = false;
3880 /* Does at least one categorical variable have a missing value in an excluded
3882 bool excluded_missing = false;
3884 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3886 const struct ctables_nest *nest = s->nests[a];
3887 for (size_t i = 0; i < nest->n; i++)
3889 if (i == nest->scale_idx)
3892 const struct variable *var = nest->vars[i];
3893 const union value *value = case_data (c, var);
3895 bool var_missing = var_is_value_missing (var, value) != 0;
3899 cats[a][i] = ctables_categories_match (
3900 s->table->categories[var_get_dict_index (var)], value, var);
3906 static const struct ctables_category cct_excluded_missing = {
3907 .type = CCT_EXCLUDED_MISSING,
3910 cats[a][i] = &cct_excluded_missing;
3911 excluded_missing = true;
3916 if (!excluded_missing)
3917 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3919 const struct ctables_nest *nest = s->nests[a];
3920 for (size_t i = 0; i < nest->n; i++)
3921 if (i != nest->scale_idx)
3923 const struct variable *var = nest->vars[i];
3924 const union value *value = case_data (c, var);
3925 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3929 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3930 d_weight, e_weight);
3932 //if (!excluded_missing)
3934 recurse_totals (s, c, cats, is_missing, excluded_missing,
3935 d_weight, e_weight, 0, 0);
3936 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3937 d_weight, e_weight, 0, 0);
3943 const struct ctables_summary_spec_set *set;
3948 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3950 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3951 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3952 if (as->function != bs->function)
3953 return as->function > bs->function ? 1 : -1;
3954 else if (as->percentile != bs->percentile)
3955 return as->percentile < bs->percentile ? 1 : -1;
3957 const char *as_label = as->label ? as->label : "";
3958 const char *bs_label = bs->label ? bs->label : "";
3959 return strcmp (as_label, bs_label);
3962 static struct pivot_value *
3963 ctables_category_create_label__ (const struct ctables_category *cat,
3964 const struct variable *var,
3965 const union value *value)
3967 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3968 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3969 : pivot_value_new_var_value (var, value));
3972 static struct pivot_value *
3973 ctables_postcompute_label (const struct ctables_categories *cats,
3974 const struct ctables_category *cat,
3975 const struct variable *var,
3976 const union value *value)
3978 struct substring in = ss_cstr (cat->pc->label);
3979 struct substring target = ss_cstr (")LABEL[");
3981 struct string out = DS_EMPTY_INITIALIZER;
3984 size_t chunk = ss_find_substring (in, target);
3985 if (chunk == SIZE_MAX)
3987 if (ds_is_empty (&out))
3988 return pivot_value_new_user_text (in.string, in.length);
3991 ds_put_substring (&out, in);
3992 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3996 ds_put_substring (&out, ss_head (in, chunk));
3997 ss_advance (&in, chunk + target.length);
3999 struct substring idx_s;
4000 if (!ss_get_until (&in, ']', &idx_s))
4003 long int idx = strtol (idx_s.string, &tail, 10);
4004 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
4007 struct ctables_category *cat2 = &cats->cats[idx - 1];
4008 struct pivot_value *label2
4009 = ctables_category_create_label__ (cat2, var, value);
4010 char *label2_s = pivot_value_to_string_defaults (label2);
4011 ds_put_cstr (&out, label2_s);
4013 pivot_value_destroy (label2);
4018 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
4021 static struct pivot_value *
4022 ctables_category_create_label (const struct ctables_categories *cats,
4023 const struct ctables_category *cat,
4024 const struct variable *var,
4025 const union value *value)
4027 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4028 ? ctables_postcompute_label (cats, cat, var, value)
4029 : ctables_category_create_label__ (cat, var, value));
4032 static struct ctables_value *
4033 ctables_value_find__ (struct ctables_table *t, const union value *value,
4034 int width, unsigned int hash)
4036 struct ctables_value *clv;
4037 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4038 hash, &t->clabels_values_map)
4039 if (value_equal (value, &clv->value, width))
4045 ctables_value_insert (struct ctables_table *t, const union value *value,
4048 unsigned int hash = value_hash (value, width, 0);
4049 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4052 clv = xmalloc (sizeof *clv);
4053 value_clone (&clv->value, value, width);
4054 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4058 static struct ctables_value *
4059 ctables_value_find (struct ctables_table *t,
4060 const union value *value, int width)
4062 return ctables_value_find__ (t, value, width,
4063 value_hash (value, width, 0));
4067 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4068 size_t ix[PIVOT_N_AXES])
4070 if (a < PIVOT_N_AXES)
4072 size_t limit = MAX (t->stacks[a].n, 1);
4073 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4074 ctables_table_add_section (t, a + 1, ix);
4078 struct ctables_section *s = &t->sections[t->n_sections++];
4079 *s = (struct ctables_section) {
4081 .cells = HMAP_INITIALIZER (s->cells),
4083 for (a = 0; a < PIVOT_N_AXES; a++)
4086 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4088 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4089 for (size_t i = 0; i < nest->n; i++)
4090 hmap_init (&s->occurrences[a][i]);
4092 for (size_t i = 0; i < N_CTDTS; i++)
4093 hmap_init (&s->domains[i]);
4098 ctpo_add (double a, double b)
4104 ctpo_sub (double a, double b)
4110 ctpo_mul (double a, double b)
4116 ctpo_div (double a, double b)
4118 return b ? a / b : SYSMIS;
4122 ctpo_pow (double a, double b)
4124 int save_errno = errno;
4126 double result = pow (a, b);
4134 ctpo_neg (double a, double b UNUSED)
4139 struct ctables_pcexpr_evaluate_ctx
4141 const struct ctables_cell *cell;
4142 const struct ctables_section *section;
4143 const struct ctables_categories *cats;
4144 enum pivot_axis_type pc_a;
4147 enum fmt_type parse_format;
4150 static double ctables_pcexpr_evaluate (
4151 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4154 ctables_pcexpr_evaluate_nonterminal (
4155 const struct ctables_pcexpr_evaluate_ctx *ctx,
4156 const struct ctables_pcexpr *e, size_t n_args,
4157 double evaluate (double, double))
4159 double args[2] = { 0, 0 };
4160 for (size_t i = 0; i < n_args; i++)
4162 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4163 if (!isfinite (args[i]) || args[i] == SYSMIS)
4166 return evaluate (args[0], args[1]);
4170 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4171 const struct ctables_cell_value *pc_cv)
4173 const struct ctables_section *s = ctx->section;
4176 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4178 const struct ctables_nest *nest = s->nests[a];
4179 for (size_t i = 0; i < nest->n; i++)
4180 if (i != nest->scale_idx)
4182 const struct ctables_cell_value *cv
4183 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4184 : &ctx->cell->axes[a].cvs[i]);
4185 hash = hash_pointer (cv->category, hash);
4186 if (cv->category->type != CCT_TOTAL
4187 && cv->category->type != CCT_SUBTOTAL
4188 && cv->category->type != CCT_POSTCOMPUTE)
4189 hash = value_hash (&cv->value,
4190 var_get_width (nest->vars[i]), hash);
4194 struct ctables_cell *tc;
4195 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4197 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4199 const struct ctables_nest *nest = s->nests[a];
4200 for (size_t i = 0; i < nest->n; i++)
4201 if (i != nest->scale_idx)
4203 const struct ctables_cell_value *p_cv
4204 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4205 : &ctx->cell->axes[a].cvs[i]);
4206 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4207 if (p_cv->category != t_cv->category
4208 || (p_cv->category->type != CCT_TOTAL
4209 && p_cv->category->type != CCT_SUBTOTAL
4210 && p_cv->category->type != CCT_POSTCOMPUTE
4211 && !value_equal (&p_cv->value,
4213 var_get_width (nest->vars[i]))))
4225 const struct ctables_table *t = s->table;
4226 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4227 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4228 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4229 &specs->specs[ctx->summary_idx]);
4233 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4234 const struct ctables_pcexpr *e)
4241 case CTPO_CAT_NRANGE:
4242 case CTPO_CAT_SRANGE:
4244 struct ctables_cell_value cv = {
4245 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4247 assert (cv.category != NULL);
4249 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4250 const struct ctables_occurrence *o;
4253 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4254 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4255 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4257 cv.value = o->value;
4258 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4263 case CTPO_CAT_NUMBER:
4264 case CTPO_CAT_MISSING:
4265 case CTPO_CAT_OTHERNM:
4266 case CTPO_CAT_SUBTOTAL:
4267 case CTPO_CAT_TOTAL:
4269 struct ctables_cell_value cv = {
4270 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4271 .value = { .f = e->number },
4273 assert (cv.category != NULL);
4274 return ctables_pcexpr_evaluate_category (ctx, &cv);
4277 case CTPO_CAT_STRING:
4279 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4281 if (width > e->string.length)
4283 s = xmalloc (width);
4284 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4286 struct ctables_cell_value cv = {
4287 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4288 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4290 assert (cv.category != NULL);
4291 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4297 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4300 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4303 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4306 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4309 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4312 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4318 static const struct ctables_category *
4319 ctables_cell_postcompute (const struct ctables_section *s,
4320 const struct ctables_cell *cell,
4321 enum pivot_axis_type *pc_a_p,
4324 assert (cell->postcompute);
4325 const struct ctables_category *pc_cat = NULL;
4326 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4327 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4329 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4330 if (cv->category->type == CCT_POSTCOMPUTE)
4334 /* Multiple postcomputes cross each other. The value is
4339 pc_cat = cv->category;
4343 *pc_a_idx_p = pc_a_idx;
4347 assert (pc_cat != NULL);
4352 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4353 const struct ctables_cell *cell,
4354 const struct ctables_summary_spec *ss,
4355 struct fmt_spec *format,
4356 bool *is_ctables_format,
4359 enum pivot_axis_type pc_a = 0;
4360 size_t pc_a_idx = 0;
4361 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4362 s, cell, &pc_a, &pc_a_idx);
4366 const struct ctables_postcompute *pc = pc_cat->pc;
4369 for (size_t i = 0; i < pc->specs->n; i++)
4371 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4372 if (ss->function == ss2->function
4373 && ss->percentile == ss2->percentile)
4375 *format = ss2->format;
4376 *is_ctables_format = ss2->is_ctables_format;
4382 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4383 const struct ctables_categories *cats = s->table->categories[
4384 var_get_dict_index (var)];
4385 struct ctables_pcexpr_evaluate_ctx ctx = {
4390 .pc_a_idx = pc_a_idx,
4391 .summary_idx = summary_idx,
4392 .parse_format = pc_cat->parse_format,
4394 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4398 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4400 struct pivot_table *pt = pivot_table_create__ (
4402 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4403 : pivot_value_new_text (N_("Custom Tables"))),
4406 pivot_table_set_caption (
4407 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4409 pivot_table_set_corner_text (
4410 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4412 bool summary_dimension = (t->summary_axis != t->slabels_axis
4413 || (!t->slabels_visible
4414 && t->summary_specs.n > 1));
4415 if (summary_dimension)
4417 struct pivot_dimension *d = pivot_dimension_create (
4418 pt, t->slabels_axis, N_("Statistics"));
4419 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4420 if (!t->slabels_visible)
4421 d->hide_all_labels = true;
4422 for (size_t i = 0; i < specs->n; i++)
4423 pivot_category_create_leaf (
4424 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4427 bool categories_dimension = t->clabels_example != NULL;
4428 if (categories_dimension)
4430 struct pivot_dimension *d = pivot_dimension_create (
4431 pt, t->label_axis[t->clabels_from_axis],
4432 t->clabels_from_axis == PIVOT_AXIS_ROW
4433 ? N_("Row Categories")
4434 : N_("Column Categories"));
4435 const struct variable *var = t->clabels_example;
4436 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4437 for (size_t i = 0; i < t->n_clabels_values; i++)
4439 const struct ctables_value *value = t->clabels_values[i];
4440 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4441 assert (cat != NULL);
4442 pivot_category_create_leaf (d->root, ctables_category_create_label (
4443 c, cat, t->clabels_example,
4448 pivot_table_set_look (pt, ct->look);
4449 struct pivot_dimension *d[PIVOT_N_AXES];
4450 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4452 static const char *names[] = {
4453 [PIVOT_AXIS_ROW] = N_("Rows"),
4454 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4455 [PIVOT_AXIS_LAYER] = N_("Layers"),
4457 d[a] = (t->axes[a] || a == t->summary_axis
4458 ? pivot_dimension_create (pt, a, names[a])
4463 assert (t->axes[a]);
4465 for (size_t i = 0; i < t->stacks[a].n; i++)
4467 struct ctables_nest *nest = &t->stacks[a].nests[i];
4468 struct ctables_section **sections = xnmalloc (t->n_sections,
4470 size_t n_sections = 0;
4472 size_t n_total_cells = 0;
4473 size_t max_depth = 0;
4474 for (size_t j = 0; j < t->n_sections; j++)
4475 if (t->sections[j].nests[a] == nest)
4477 struct ctables_section *s = &t->sections[j];
4478 sections[n_sections++] = s;
4479 n_total_cells += s->cells.count;
4481 size_t depth = s->nests[a]->n;
4482 max_depth = MAX (depth, max_depth);
4485 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4487 size_t n_sorted = 0;
4489 for (size_t j = 0; j < n_sections; j++)
4491 struct ctables_section *s = sections[j];
4493 struct ctables_cell *cell;
4494 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4496 sorted[n_sorted++] = cell;
4497 assert (n_sorted <= n_total_cells);
4500 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4501 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4504 for (size_t j = 0; j < n_sorted; j++)
4506 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4511 struct ctables_level
4513 enum ctables_level_type
4515 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4516 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4517 CTL_SUMMARY, /* Summary functions. */
4521 enum settings_value_show vlabel; /* CTL_VAR only. */
4524 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4525 size_t n_levels = 0;
4526 for (size_t k = 0; k < nest->n; k++)
4528 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4529 if (vlabel != CTVL_NONE)
4531 levels[n_levels++] = (struct ctables_level) {
4533 .vlabel = (enum settings_value_show) vlabel,
4538 if (nest->scale_idx != k
4539 && (k != nest->n - 1 || t->label_axis[a] == a))
4541 levels[n_levels++] = (struct ctables_level) {
4542 .type = CTL_CATEGORY,
4548 if (!summary_dimension && a == t->slabels_axis)
4550 levels[n_levels++] = (struct ctables_level) {
4551 .type = CTL_SUMMARY,
4552 .var_idx = SIZE_MAX,
4556 /* Pivot categories:
4558 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4559 - category for nest->vars[0], if nest->scale_idx != 0
4560 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4561 - category for nest->vars[1], if nest->scale_idx != 1
4563 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4564 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4565 - summary function, if 'a == t->slabels_axis && a ==
4568 Additional dimensions:
4570 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4572 - If 't->label_axis[b] == a' for some 'b != a', add a category
4577 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4579 for (size_t j = 0; j < n_sorted; j++)
4581 struct ctables_cell *cell = sorted[j];
4582 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4584 size_t n_common = 0;
4587 for (; n_common < n_levels; n_common++)
4589 const struct ctables_level *level = &levels[n_common];
4590 if (level->type == CTL_CATEGORY)
4592 size_t var_idx = level->var_idx;
4593 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4594 if (prev->axes[a].cvs[var_idx].category != c)
4596 else if (c->type != CCT_SUBTOTAL
4597 && c->type != CCT_TOTAL
4598 && c->type != CCT_POSTCOMPUTE
4599 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4600 &cell->axes[a].cvs[var_idx].value,
4601 var_get_type (nest->vars[var_idx])))
4607 for (size_t k = n_common; k < n_levels; k++)
4609 const struct ctables_level *level = &levels[k];
4610 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4611 if (level->type == CTL_SUMMARY)
4613 assert (k == n_levels - 1);
4615 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4616 for (size_t m = 0; m < specs->n; m++)
4618 int leaf = pivot_category_create_leaf (
4619 parent, ctables_summary_label (&specs->specs[m],
4627 const struct variable *var = nest->vars[level->var_idx];
4628 struct pivot_value *label;
4629 if (level->type == CTL_VAR)
4631 label = pivot_value_new_variable (var);
4632 label->variable.show = level->vlabel;
4634 else if (level->type == CTL_CATEGORY)
4636 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4637 label = ctables_category_create_label (
4638 t->categories[var_get_dict_index (var)],
4639 cv->category, var, &cv->value);
4644 if (k == n_levels - 1)
4645 prev_leaf = pivot_category_create_leaf (parent, label);
4647 groups[k] = pivot_category_create_group__ (parent, label);
4651 cell->axes[a].leaf = prev_leaf;
4658 for (size_t i = 0; i < t->n_sections; i++)
4660 struct ctables_section *s = &t->sections[i];
4662 struct ctables_cell *cell;
4663 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4668 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4669 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4670 for (size_t j = 0; j < specs->n; j++)
4673 size_t n_dindexes = 0;
4675 if (summary_dimension)
4676 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4678 if (categories_dimension)
4680 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4681 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4682 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4683 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4686 dindexes[n_dindexes++] = ctv->leaf;
4689 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4692 int leaf = cell->axes[a].leaf;
4693 if (a == t->summary_axis && !summary_dimension)
4695 dindexes[n_dindexes++] = leaf;
4698 const struct ctables_summary_spec *ss = &specs->specs[j];
4700 struct fmt_spec format = specs->specs[j].format;
4701 bool is_ctables_format = ss->is_ctables_format;
4702 double d = (cell->postcompute
4703 ? ctables_cell_calculate_postcompute (
4704 s, cell, ss, &format, &is_ctables_format, j)
4705 : ctables_summary_value (cell, &cell->summaries[j],
4708 struct pivot_value *value;
4709 if (ct->hide_threshold != 0
4710 && d < ct->hide_threshold
4711 && ctables_summary_function_is_count (ss->function))
4713 value = pivot_value_new_user_text_nocopy (
4714 xasprintf ("<%d", ct->hide_threshold));
4716 else if (d == 0 && ct->zero)
4717 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4718 else if (d == SYSMIS && ct->missing)
4719 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4720 else if (is_ctables_format)
4722 char *s = data_out_stretchy (&(union value) { .f = d },
4724 &ct->ctables_formats, NULL);
4725 value = pivot_value_new_user_text_nocopy (s);
4729 value = pivot_value_new_number (d);
4730 value->numeric.format = format;
4732 pivot_table_put (pt, dindexes, n_dindexes, value);
4737 pivot_table_submit (pt);
4741 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4743 enum pivot_axis_type label_pos = t->label_axis[a];
4747 t->clabels_from_axis = a;
4749 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4750 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4752 const struct ctables_stack *stack = &t->stacks[a];
4756 const struct ctables_nest *n0 = &stack->nests[0];
4759 assert (stack->n == 1);
4763 const struct variable *v0 = n0->vars[n0->n - 1];
4764 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4765 t->clabels_example = v0;
4767 for (size_t i = 0; i < c0->n_cats; i++)
4768 if (c0->cats[i].type == CCT_FUNCTION)
4770 msg (SE, _("%s=%s is not allowed with sorting based "
4771 "on a summary function."),
4772 subcommand_name, pos_name);
4775 if (n0->n - 1 == n0->scale_idx)
4777 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4778 "but %s is a scale variable."),
4779 subcommand_name, pos_name, var_get_name (v0));
4783 for (size_t i = 1; i < stack->n; i++)
4785 const struct ctables_nest *ni = &stack->nests[i];
4787 const struct variable *vi = ni->vars[ni->n - 1];
4788 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4790 if (ni->n - 1 == ni->scale_idx)
4792 msg (SE, _("%s=%s requires the variables to be moved to be "
4793 "categorical, but %s is a scale variable."),
4794 subcommand_name, pos_name, var_get_name (vi));
4797 if (var_get_width (v0) != var_get_width (vi))
4799 msg (SE, _("%s=%s requires the variables to be "
4800 "moved to have the same width, but %s has "
4801 "width %d and %s has width %d."),
4802 subcommand_name, pos_name,
4803 var_get_name (v0), var_get_width (v0),
4804 var_get_name (vi), var_get_width (vi));
4807 if (!val_labs_equal (var_get_value_labels (v0),
4808 var_get_value_labels (vi)))
4810 msg (SE, _("%s=%s requires the variables to be "
4811 "moved to have the same value labels, but %s "
4812 "and %s have different value labels."),
4813 subcommand_name, pos_name,
4814 var_get_name (v0), var_get_name (vi));
4817 if (!ctables_categories_equal (c0, ci))
4819 msg (SE, _("%s=%s requires the variables to be "
4820 "moved to have the same category "
4821 "specifications, but %s and %s have different "
4822 "category specifications."),
4823 subcommand_name, pos_name,
4824 var_get_name (v0), var_get_name (vi));
4833 add_sum_var (struct variable *var,
4834 struct variable ***sum_vars, size_t *n, size_t *allocated)
4836 for (size_t i = 0; i < *n; i++)
4837 if (var == (*sum_vars)[i])
4840 if (*n >= *allocated)
4841 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4842 (*sum_vars)[*n] = var;
4847 enumerate_sum_vars (const struct ctables_axis *a,
4848 struct variable ***sum_vars, size_t *n, size_t *allocated)
4856 for (size_t i = 0; i < N_CSVS; i++)
4857 for (size_t j = 0; j < a->specs[i].n; j++)
4859 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4860 if (ctables_function_is_pctsum (spec->function))
4861 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4867 for (size_t i = 0; i < 2; i++)
4868 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4874 ctables_prepare_table (struct ctables_table *t)
4876 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4879 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4881 for (size_t j = 0; j < t->stacks[a].n; j++)
4883 struct ctables_nest *nest = &t->stacks[a].nests[j];
4884 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4886 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4887 nest->n_domains[dt] = 0;
4889 for (size_t k = 0; k < nest->n; k++)
4891 if (k == nest->scale_idx)
4900 if (a != PIVOT_AXIS_LAYER)
4907 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4908 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4909 : a == PIVOT_AXIS_ROW)
4911 if (k == nest->n - 1
4912 || (nest->scale_idx == nest->n - 1
4913 && k == nest->n - 2))
4919 if (a == PIVOT_AXIS_COLUMN)
4924 if (a == PIVOT_AXIS_ROW)
4929 nest->domains[dt][nest->n_domains[dt]++] = k;
4936 struct ctables_nest *nest = xmalloc (sizeof *nest);
4937 *nest = (struct ctables_nest) { .n = 0 };
4938 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4940 /* There's no point in moving labels away from an axis that has no
4941 labels, so avoid dealing with the special cases around that. */
4942 t->label_axis[a] = a;
4945 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4946 for (size_t i = 0; i < stack->n; i++)
4948 struct ctables_nest *nest = &stack->nests[i];
4949 if (!nest->specs[CSV_CELL].n)
4951 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4952 specs->specs = xmalloc (sizeof *specs->specs);
4955 enum ctables_summary_function function
4956 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4958 *specs->specs = (struct ctables_summary_spec) {
4959 .function = function,
4960 .format = ctables_summary_default_format (function, specs->var),
4963 specs->var = nest->vars[0];
4965 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4966 &nest->specs[CSV_CELL]);
4968 else if (!nest->specs[CSV_TOTAL].n)
4969 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4970 &nest->specs[CSV_CELL]);
4972 if (t->ctables->smissing_listwise)
4974 struct variable **listwise_vars = NULL;
4976 size_t allocated = 0;
4978 for (size_t j = nest->group_head; j < stack->n; j++)
4980 const struct ctables_nest *other_nest = &stack->nests[j];
4981 if (other_nest->group_head != nest->group_head)
4984 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4987 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4988 sizeof *listwise_vars);
4989 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4992 for (size_t j = 0; j < N_CSVS; j++)
4994 nest->specs[j].listwise_vars = listwise_vars;
4995 nest->specs[j].n_listwise_vars = n;
5000 struct ctables_summary_spec_set *merged = &t->summary_specs;
5001 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5003 for (size_t j = 0; j < stack->n; j++)
5005 const struct ctables_nest *nest = &stack->nests[j];
5007 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5008 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5013 struct merge_item min = items[0];
5014 for (size_t j = 1; j < n_left; j++)
5015 if (merge_item_compare_3way (&items[j], &min) < 0)
5018 if (merged->n >= merged->allocated)
5019 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5020 sizeof *merged->specs);
5021 merged->specs[merged->n++] = min.set->specs[min.ofs];
5023 for (size_t j = 0; j < n_left; )
5025 if (merge_item_compare_3way (&items[j], &min) == 0)
5027 struct merge_item *item = &items[j];
5028 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5029 if (++item->ofs >= item->set->n)
5031 items[j] = items[--n_left];
5040 for (size_t j = 0; j < merged->n; j++)
5041 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5043 for (size_t j = 0; j < stack->n; j++)
5045 const struct ctables_nest *nest = &stack->nests[j];
5046 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5048 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5049 for (size_t k = 0; k < specs->n; k++)
5050 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5051 specs->specs[k].axis_idx);
5057 size_t allocated_sum_vars = 0;
5058 enumerate_sum_vars (t->axes[t->summary_axis],
5059 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5061 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5062 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5066 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5067 enum pivot_axis_type a)
5069 struct ctables_stack *stack = &t->stacks[a];
5070 for (size_t i = 0; i < stack->n; i++)
5072 const struct ctables_nest *nest = &stack->nests[i];
5073 const struct variable *var = nest->vars[nest->n - 1];
5074 const union value *value = case_data (c, var);
5076 if (var_is_numeric (var) && value->f == SYSMIS)
5079 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5081 ctables_value_insert (t, value, var_get_width (var));
5086 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5088 const struct ctables_value *const *ap = a_;
5089 const struct ctables_value *const *bp = b_;
5090 const struct ctables_value *a = *ap;
5091 const struct ctables_value *b = *bp;
5092 const int *width = width_;
5093 return value_compare_3way (&a->value, &b->value, *width);
5097 ctables_sort_clabels_values (struct ctables_table *t)
5099 const struct variable *v0 = t->clabels_example;
5100 int width = var_get_width (v0);
5102 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5105 const struct val_labs *val_labs = var_get_value_labels (v0);
5106 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5107 vl = val_labs_next (val_labs, vl))
5108 if (ctables_categories_match (c0, &vl->value, v0))
5109 ctables_value_insert (t, &vl->value, width);
5112 size_t n = hmap_count (&t->clabels_values_map);
5113 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5115 struct ctables_value *clv;
5117 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5118 t->clabels_values[i++] = clv;
5119 t->n_clabels_values = n;
5122 sort (t->clabels_values, n, sizeof *t->clabels_values,
5123 compare_clabels_values_3way, &width);
5125 for (size_t i = 0; i < n; i++)
5126 t->clabels_values[i]->leaf = i;
5130 ctables_add_category_occurrences (const struct variable *var,
5131 struct hmap *occurrences,
5132 const struct ctables_categories *cats)
5134 const struct val_labs *val_labs = var_get_value_labels (var);
5136 for (size_t i = 0; i < cats->n_cats; i++)
5138 const struct ctables_category *c = &cats->cats[i];
5142 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5148 int width = var_get_width (var);
5150 value_init (&value, width);
5151 value_copy_buf_rpad (&value, width,
5152 CHAR_CAST (uint8_t *, c->string.string),
5153 c->string.length, ' ');
5154 ctables_add_occurrence (var, &value, occurrences);
5155 value_destroy (&value, width);
5160 assert (var_is_numeric (var));
5161 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5162 vl = val_labs_next (val_labs, vl))
5163 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5164 ctables_add_occurrence (var, &vl->value, occurrences);
5168 assert (var_is_alpha (var));
5169 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5170 vl = val_labs_next (val_labs, vl))
5171 if (in_string_range (&vl->value, var, c->srange))
5172 ctables_add_occurrence (var, &vl->value, occurrences);
5176 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5177 vl = val_labs_next (val_labs, vl))
5178 if (var_is_value_missing (var, &vl->value))
5179 ctables_add_occurrence (var, &vl->value, occurrences);
5183 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5184 vl = val_labs_next (val_labs, vl))
5185 ctables_add_occurrence (var, &vl->value, occurrences);
5188 case CCT_POSTCOMPUTE:
5198 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5199 vl = val_labs_next (val_labs, vl))
5200 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5201 ctables_add_occurrence (var, &vl->value, occurrences);
5204 case CCT_EXCLUDED_MISSING:
5211 ctables_section_recurse_add_empty_categories (
5212 struct ctables_section *s,
5213 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5214 enum pivot_axis_type a, size_t a_idx)
5216 if (a >= PIVOT_N_AXES)
5217 ctables_cell_insert__ (s, c, cats);
5218 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5219 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5222 const struct variable *var = s->nests[a]->vars[a_idx];
5223 const struct ctables_categories *categories = s->table->categories[
5224 var_get_dict_index (var)];
5225 int width = var_get_width (var);
5226 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5227 const struct ctables_occurrence *o;
5228 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5230 union value *value = case_data_rw (c, var);
5231 value_destroy (value, width);
5232 value_clone (value, &o->value, width);
5233 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5234 assert (cats[a][a_idx] != NULL);
5235 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5238 for (size_t i = 0; i < categories->n_cats; i++)
5240 const struct ctables_category *cat = &categories->cats[i];
5241 if (cat->type == CCT_POSTCOMPUTE)
5243 cats[a][a_idx] = cat;
5244 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5251 ctables_section_add_empty_categories (struct ctables_section *s)
5253 bool show_empty = false;
5254 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5256 for (size_t k = 0; k < s->nests[a]->n; k++)
5257 if (k != s->nests[a]->scale_idx)
5259 const struct variable *var = s->nests[a]->vars[k];
5260 const struct ctables_categories *cats = s->table->categories[
5261 var_get_dict_index (var)];
5262 if (cats->show_empty)
5265 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5271 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5272 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5273 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5278 ctables_section_clear (struct ctables_section *s)
5280 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5282 const struct ctables_nest *nest = s->nests[a];
5283 for (size_t i = 0; i < nest->n; i++)
5284 if (i != nest->scale_idx)
5286 const struct variable *var = nest->vars[i];
5287 int width = var_get_width (var);
5288 struct ctables_occurrence *o, *next;
5289 struct hmap *map = &s->occurrences[a][i];
5290 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5292 value_destroy (&o->value, width);
5293 hmap_delete (map, &o->node);
5300 struct ctables_cell *cell, *next_cell;
5301 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5303 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5305 const struct ctables_nest *nest = s->nests[a];
5306 for (size_t i = 0; i < nest->n; i++)
5307 if (i != nest->scale_idx)
5308 value_destroy (&cell->axes[a].cvs[i].value,
5309 var_get_width (nest->vars[i]));
5310 free (cell->axes[a].cvs);
5313 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5314 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5315 for (size_t i = 0; i < specs->n; i++)
5316 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5317 free (cell->summaries);
5319 hmap_delete (&s->cells, &cell->node);
5322 hmap_shrink (&s->cells);
5324 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5326 struct ctables_domain *domain, *next_domain;
5327 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5330 free (domain->sums);
5331 hmap_delete (&s->domains[dt], &domain->node);
5334 hmap_shrink (&s->domains[dt]);
5339 ctables_table_clear (struct ctables_table *t)
5341 for (size_t i = 0; i < t->n_sections; i++)
5342 ctables_section_clear (&t->sections[i]);
5344 if (t->clabels_example)
5346 int width = var_get_width (t->clabels_example);
5347 struct ctables_value *value, *next_value;
5348 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5349 &t->clabels_values_map)
5351 value_destroy (&value->value, width);
5352 hmap_delete (&t->clabels_values_map, &value->node);
5355 hmap_shrink (&t->clabels_values_map);
5357 free (t->clabels_values);
5358 t->clabels_values = NULL;
5359 t->n_clabels_values = 0;
5364 ctables_execute (struct dataset *ds, struct casereader *input,
5367 for (size_t i = 0; i < ct->n_tables; i++)
5369 struct ctables_table *t = ct->tables[i];
5370 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5371 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5372 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5373 sizeof *t->sections);
5374 size_t ix[PIVOT_N_AXES];
5375 ctables_table_add_section (t, 0, ix);
5378 struct dictionary *dict = dataset_dict (ds);
5379 struct casegrouper *grouper
5380 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5381 ? casegrouper_create_splits (input, dict)
5382 : casegrouper_create_vars (input, NULL, 0));
5383 struct casereader *group;
5384 while (casegrouper_get_next_group (grouper, &group))
5386 /* Output SPLIT FILE variables. */
5387 struct ccase *c = casereader_peek (group, 0);
5390 output_split_file_values (ds, c);
5394 bool warn_on_invalid = true;
5395 for (c = casereader_read (group); c;
5396 case_unref (c), c = casereader_read (group))
5398 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5399 double e_weight = (ct->e_weight
5400 ? var_force_valid_weight (ct->e_weight,
5401 case_num (c, ct->e_weight),
5405 for (size_t i = 0; i < ct->n_tables; i++)
5407 struct ctables_table *t = ct->tables[i];
5409 for (size_t j = 0; j < t->n_sections; j++)
5410 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5412 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5413 if (t->label_axis[a] != a)
5414 ctables_insert_clabels_values (t, c, a);
5417 casereader_destroy (group);
5419 for (size_t i = 0; i < ct->n_tables; i++)
5421 struct ctables_table *t = ct->tables[i];
5423 if (t->clabels_example)
5424 ctables_sort_clabels_values (t);
5426 for (size_t j = 0; j < t->n_sections; j++)
5427 ctables_section_add_empty_categories (&t->sections[j]);
5429 ctables_table_output (ct, t);
5430 ctables_table_clear (t);
5433 return casegrouper_destroy (grouper);
5438 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5439 struct dictionary *);
5442 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5448 case CTPO_CAT_STRING:
5449 ss_dealloc (&e->string);
5452 case CTPO_CAT_SRANGE:
5453 for (size_t i = 0; i < 2; i++)
5454 ss_dealloc (&e->srange[i]);
5463 for (size_t i = 0; i < 2; i++)
5464 ctables_pcexpr_destroy (e->subs[i]);
5468 case CTPO_CAT_NUMBER:
5469 case CTPO_CAT_NRANGE:
5470 case CTPO_CAT_MISSING:
5471 case CTPO_CAT_OTHERNM:
5472 case CTPO_CAT_SUBTOTAL:
5473 case CTPO_CAT_TOTAL:
5477 msg_location_destroy (e->location);
5482 static struct ctables_pcexpr *
5483 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5484 struct ctables_pcexpr *sub0,
5485 struct ctables_pcexpr *sub1)
5487 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5488 *e = (struct ctables_pcexpr) {
5490 .subs = { sub0, sub1 },
5491 .location = msg_location_merged (sub0->location, sub1->location),
5496 /* How to parse an operator. */
5499 enum token_type token;
5500 enum ctables_postcompute_op op;
5503 static const struct operator *
5504 ctable_pcexpr_match_operator (struct lexer *lexer,
5505 const struct operator ops[], size_t n_ops)
5507 for (const struct operator *op = ops; op < ops + n_ops; op++)
5508 if (lex_token (lexer) == op->token)
5510 if (op->token != T_NEG_NUM)
5519 static struct ctables_pcexpr *
5520 ctable_pcexpr_parse_binary_operators__ (
5521 struct lexer *lexer, struct dictionary *dict,
5522 const struct operator ops[], size_t n_ops,
5523 parse_recursively_func *parse_next_level,
5524 const char *chain_warning, struct ctables_pcexpr *lhs)
5526 for (int op_count = 0; ; op_count++)
5528 const struct operator *op
5529 = ctable_pcexpr_match_operator (lexer, ops, n_ops);
5532 if (op_count > 1 && chain_warning)
5533 msg_at (SW, lhs->location, "%s", chain_warning);
5538 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5541 ctables_pcexpr_destroy (lhs);
5545 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5549 static struct ctables_pcexpr *
5550 ctable_pcexpr_parse_binary_operators (struct lexer *lexer,
5551 struct dictionary *dict,
5552 const struct operator ops[], size_t n_ops,
5553 parse_recursively_func *parse_next_level,
5554 const char *chain_warning)
5556 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5560 return ctable_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5562 chain_warning, lhs);
5565 static struct ctables_pcexpr *ctable_pcexpr_parse_add (struct lexer *,
5566 struct dictionary *);
5568 static struct ctables_pcexpr
5569 ctpo_cat_nrange (double low, double high)
5571 return (struct ctables_pcexpr) {
5572 .op = CTPO_CAT_NRANGE,
5573 .nrange = { low, high },
5577 static struct ctables_pcexpr
5578 ctpo_cat_srange (struct substring low, struct substring high)
5580 return (struct ctables_pcexpr) {
5581 .op = CTPO_CAT_SRANGE,
5582 .srange = { low, high },
5586 static struct ctables_pcexpr *
5587 ctable_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5589 int start_ofs = lex_ofs (lexer);
5590 struct ctables_pcexpr e;
5591 if (lex_is_number (lexer))
5593 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5594 .number = lex_number (lexer) };
5597 else if (lex_match_id (lexer, "MISSING"))
5598 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5599 else if (lex_match_id (lexer, "OTHERNM"))
5600 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5601 else if (lex_match_id (lexer, "TOTAL"))
5602 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5603 else if (lex_match_id (lexer, "SUBTOTAL"))
5605 size_t subtotal_index = 0;
5606 if (lex_match (lexer, T_LBRACK))
5608 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5610 subtotal_index = lex_integer (lexer);
5612 if (!lex_force_match (lexer, T_RBRACK))
5615 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5616 .subtotal_index = subtotal_index };
5618 else if (lex_match (lexer, T_LBRACK))
5620 if (lex_match_id (lexer, "LO"))
5622 if (!lex_force_match_id (lexer, "THRU"))
5625 if (lex_is_string (lexer))
5627 struct substring low = { .string = NULL };
5628 struct substring high = parse_substring (lexer, dict);
5629 e = ctpo_cat_srange (low, high);
5633 if (lex_force_num (lexer))
5635 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5639 else if (lex_is_number (lexer))
5641 double number = lex_number (lexer);
5643 if (lex_match_id (lexer, "THRU"))
5645 if (lex_match_id (lexer, "HI"))
5646 e = ctpo_cat_nrange (number, DBL_MAX);
5649 if (!lex_force_num (lexer))
5651 e = ctpo_cat_nrange (number, lex_number (lexer));
5656 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5659 else if (lex_is_string (lexer))
5661 struct substring s = parse_substring (lexer, dict);
5663 if (lex_match_id (lexer, "THRU"))
5665 struct substring high;
5667 if (lex_match_id (lexer, "HI"))
5668 high = (struct substring) { .string = NULL };
5671 if (!lex_force_string (lexer))
5676 high = parse_substring (lexer, dict);
5679 e = ctpo_cat_srange (s, high);
5682 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5686 lex_error (lexer, NULL);
5690 if (!lex_force_match (lexer, T_RBRACK))
5692 if (e.op == CTPO_CAT_STRING)
5693 ss_dealloc (&e.string);
5694 else if (e.op == CTPO_CAT_SRANGE)
5696 ss_dealloc (&e.srange[0]);
5697 ss_dealloc (&e.srange[1]);
5702 else if (lex_match (lexer, T_LPAREN))
5704 struct ctables_pcexpr *ep = ctable_pcexpr_parse_add (lexer, dict);
5707 if (!lex_force_match (lexer, T_RPAREN))
5709 ctables_pcexpr_destroy (ep);
5716 lex_error (lexer, NULL);
5720 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5721 return xmemdup (&e, sizeof e);
5724 static struct ctables_pcexpr *
5725 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5726 struct lexer *lexer, int start_ofs)
5728 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5729 *e = (struct ctables_pcexpr) {
5732 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5737 static struct ctables_pcexpr *
5738 ctable_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5740 static const struct operator op = { T_EXP, CTPO_POW };
5742 const char *chain_warning =
5743 _("The exponentiation operator (`**') is left-associative: "
5744 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5745 "To disable this warning, insert parentheses.");
5747 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5748 return ctable_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5749 ctable_pcexpr_parse_primary,
5752 /* Special case for situations like "-5**6", which must be parsed as
5755 int start_ofs = lex_ofs (lexer);
5756 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5757 *lhs = (struct ctables_pcexpr) {
5758 .op = CTPO_CONSTANT,
5759 .number = -lex_tokval (lexer),
5760 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5764 struct ctables_pcexpr *node = ctable_pcexpr_parse_binary_operators__ (
5765 lexer, dict, &op, 1,
5766 ctable_pcexpr_parse_primary, chain_warning, lhs);
5770 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5773 /* Parses the unary minus level. */
5774 static struct ctables_pcexpr *
5775 ctable_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5777 int start_ofs = lex_ofs (lexer);
5778 if (!lex_match (lexer, T_DASH))
5779 return ctable_pcexpr_parse_exp (lexer, dict);
5781 struct ctables_pcexpr *inner = ctable_pcexpr_parse_neg (lexer, dict);
5785 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5788 /* Parses the multiplication and division level. */
5789 static struct ctables_pcexpr *
5790 ctable_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5792 static const struct operator ops[] =
5794 { T_ASTERISK, CTPO_MUL },
5795 { T_SLASH, CTPO_DIV },
5798 return ctable_pcexpr_parse_binary_operators (lexer, dict, ops,
5799 sizeof ops / sizeof *ops,
5800 ctable_pcexpr_parse_neg, NULL);
5803 /* Parses the addition and subtraction level. */
5804 static struct ctables_pcexpr *
5805 ctable_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5807 static const struct operator ops[] =
5809 { T_PLUS, CTPO_ADD },
5810 { T_DASH, CTPO_SUB },
5811 { T_NEG_NUM, CTPO_ADD },
5814 return ctable_pcexpr_parse_binary_operators (lexer, dict,
5815 ops, sizeof ops / sizeof *ops,
5816 ctable_pcexpr_parse_mul, NULL);
5819 static struct ctables_postcompute *
5820 ctables_find_postcompute (struct ctables *ct, const char *name)
5822 struct ctables_postcompute *pc;
5823 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5824 utf8_hash_case_string (name, 0), &ct->postcomputes)
5825 if (!utf8_strcasecmp (pc->name, name))
5831 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5834 int pcompute_start = lex_ofs (lexer) - 1;
5836 if (!lex_match (lexer, T_AND))
5838 lex_error_expecting (lexer, "&");
5841 if (!lex_force_id (lexer))
5844 char *name = ss_xstrdup (lex_tokss (lexer));
5847 if (!lex_force_match (lexer, T_EQUALS)
5848 || !lex_force_match_id (lexer, "EXPR")
5849 || !lex_force_match (lexer, T_LPAREN))
5855 int expr_start = lex_ofs (lexer);
5856 struct ctables_pcexpr *expr = ctable_pcexpr_parse_add (lexer, dict);
5857 int expr_end = lex_ofs (lexer) - 1;
5858 if (!expr || !lex_force_match (lexer, T_RPAREN))
5863 int pcompute_end = lex_ofs (lexer) - 1;
5865 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5868 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5871 msg_at (SW, location, _("New definition of &%s will override the "
5872 "previous definition."),
5874 msg_at (SN, pc->location, _("This is the previous definition."));
5876 ctables_pcexpr_destroy (pc->expr);
5877 msg_location_destroy (pc->location);
5882 pc = xmalloc (sizeof *pc);
5883 *pc = (struct ctables_postcompute) { .name = name };
5884 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5885 utf8_hash_case_string (pc->name, 0));
5888 pc->location = location;
5890 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5895 ctables_parse_pproperties_format (struct lexer *lexer,
5896 struct ctables_summary_spec_set *sss)
5898 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5900 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5901 && !(lex_token (lexer) == T_ID
5902 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5903 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5904 lex_tokss (lexer)))))
5906 /* Parse function. */
5907 enum ctables_summary_function function;
5908 if (!parse_ctables_summary_function (lexer, &function))
5911 /* Parse percentile. */
5912 double percentile = 0;
5913 if (function == CTSF_PTILE)
5915 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5917 percentile = lex_number (lexer);
5922 struct fmt_spec format;
5923 bool is_ctables_format;
5924 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5927 if (sss->n >= sss->allocated)
5928 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5929 sizeof *sss->specs);
5930 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5931 .function = function,
5932 .percentile = percentile,
5934 .is_ctables_format = is_ctables_format,
5940 ctables_summary_spec_set_uninit (sss);
5945 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5947 struct ctables_postcompute **pcs = NULL;
5949 size_t allocated_pcs = 0;
5951 while (lex_match (lexer, T_AND))
5953 if (!lex_force_id (lexer))
5955 struct ctables_postcompute *pc
5956 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5959 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5964 if (n_pcs >= allocated_pcs)
5965 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5969 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5971 if (lex_match_id (lexer, "LABEL"))
5973 lex_match (lexer, T_EQUALS);
5974 if (!lex_force_string (lexer))
5977 for (size_t i = 0; i < n_pcs; i++)
5979 free (pcs[i]->label);
5980 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5985 else if (lex_match_id (lexer, "FORMAT"))
5987 lex_match (lexer, T_EQUALS);
5989 struct ctables_summary_spec_set sss;
5990 if (!ctables_parse_pproperties_format (lexer, &sss))
5993 for (size_t i = 0; i < n_pcs; i++)
5996 ctables_summary_spec_set_uninit (pcs[i]->specs);
5998 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5999 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6001 ctables_summary_spec_set_uninit (&sss);
6003 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6005 lex_match (lexer, T_EQUALS);
6006 bool hide_source_cats;
6007 if (!parse_bool (lexer, &hide_source_cats))
6009 for (size_t i = 0; i < n_pcs; i++)
6010 pcs[i]->hide_source_cats = hide_source_cats;
6014 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6027 put_strftime (struct string *out, time_t now, const char *format)
6029 const struct tm *tm = localtime (&now);
6031 strftime (value, sizeof value, format, tm);
6032 ds_put_cstr (out, value);
6036 skip_prefix (struct substring *s, struct substring prefix)
6038 if (ss_starts_with (*s, prefix))
6040 ss_advance (s, prefix.length);
6048 put_table_expression (struct string *out, struct lexer *lexer,
6049 struct dictionary *dict, int expr_start, int expr_end)
6052 for (int ofs = expr_start; ofs < expr_end; ofs++)
6054 const struct token *t = lex_ofs_token (lexer, ofs);
6055 if (t->type == T_LBRACK)
6057 else if (t->type == T_RBRACK && nest > 0)
6063 else if (t->type == T_ID)
6065 const struct variable *var
6066 = dict_lookup_var (dict, t->string.string);
6067 const char *label = var ? var_get_label (var) : NULL;
6068 ds_put_cstr (out, label ? label : t->string.string);
6072 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6073 ds_put_byte (out, ' ');
6075 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6076 ds_put_cstr (out, repr);
6079 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6080 ds_put_byte (out, ' ');
6086 put_title_text (struct string *out, struct substring in, time_t now,
6087 struct lexer *lexer, struct dictionary *dict,
6088 int expr_start, int expr_end)
6092 size_t chunk = ss_find_byte (in, ')');
6093 ds_put_substring (out, ss_head (in, chunk));
6094 ss_advance (&in, chunk);
6095 if (ss_is_empty (in))
6098 if (skip_prefix (&in, ss_cstr (")DATE")))
6099 put_strftime (out, now, "%x");
6100 else if (skip_prefix (&in, ss_cstr (")TIME")))
6101 put_strftime (out, now, "%X");
6102 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6103 put_table_expression (out, lexer, dict, expr_start, expr_end);
6106 ds_put_byte (out, ')');
6107 ss_advance (&in, 1);
6113 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6115 struct casereader *input = NULL;
6117 struct measure_guesser *mg = measure_guesser_create (ds);
6120 input = proc_open (ds);
6121 measure_guesser_run (mg, input);
6122 measure_guesser_destroy (mg);
6125 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6126 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6127 enum settings_value_show tvars = settings_get_show_variables ();
6128 for (size_t i = 0; i < n_vars; i++)
6129 vlabels[i] = (enum ctables_vlabel) tvars;
6131 struct pivot_table_look *look = pivot_table_look_unshare (
6132 pivot_table_look_ref (pivot_table_look_get_default ()));
6133 look->omit_empty = false;
6135 struct ctables *ct = xmalloc (sizeof *ct);
6136 *ct = (struct ctables) {
6137 .dict = dataset_dict (ds),
6139 .ctables_formats = FMT_SETTINGS_INIT,
6141 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6144 time_t now = time (NULL);
6149 const char *dot_string;
6150 const char *comma_string;
6152 static const struct ctf ctfs[4] = {
6153 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6154 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6155 { CTEF_PAREN, "-,(,),", "-.(.)." },
6156 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6158 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6159 for (size_t i = 0; i < 4; i++)
6161 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6162 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6163 fmt_number_style_from_string (s));
6166 if (!lex_force_match (lexer, T_SLASH))
6169 while (!lex_match_id (lexer, "TABLE"))
6171 if (lex_match_id (lexer, "FORMAT"))
6173 double widths[2] = { SYSMIS, SYSMIS };
6174 double units_per_inch = 72.0;
6176 while (lex_token (lexer) != T_SLASH)
6178 if (lex_match_id (lexer, "MINCOLWIDTH"))
6180 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6183 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6185 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6188 else if (lex_match_id (lexer, "UNITS"))
6190 lex_match (lexer, T_EQUALS);
6191 if (lex_match_id (lexer, "POINTS"))
6192 units_per_inch = 72.0;
6193 else if (lex_match_id (lexer, "INCHES"))
6194 units_per_inch = 1.0;
6195 else if (lex_match_id (lexer, "CM"))
6196 units_per_inch = 2.54;
6199 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6203 else if (lex_match_id (lexer, "EMPTY"))
6208 lex_match (lexer, T_EQUALS);
6209 if (lex_match_id (lexer, "ZERO"))
6211 /* Nothing to do. */
6213 else if (lex_match_id (lexer, "BLANK"))
6214 ct->zero = xstrdup ("");
6215 else if (lex_force_string (lexer))
6217 ct->zero = ss_xstrdup (lex_tokss (lexer));
6223 else if (lex_match_id (lexer, "MISSING"))
6225 lex_match (lexer, T_EQUALS);
6226 if (!lex_force_string (lexer))
6230 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6231 ? ss_xstrdup (lex_tokss (lexer))
6237 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6238 "UNITS", "EMPTY", "MISSING");
6243 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6244 && widths[0] > widths[1])
6246 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6250 for (size_t i = 0; i < 2; i++)
6251 if (widths[i] != SYSMIS)
6253 int *wr = ct->look->width_ranges[TABLE_HORZ];
6254 wr[i] = widths[i] / units_per_inch * 96.0;
6259 else if (lex_match_id (lexer, "VLABELS"))
6261 if (!lex_force_match_id (lexer, "VARIABLES"))
6263 lex_match (lexer, T_EQUALS);
6265 struct variable **vars;
6267 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6271 if (!lex_force_match_id (lexer, "DISPLAY"))
6276 lex_match (lexer, T_EQUALS);
6278 enum ctables_vlabel vlabel;
6279 if (lex_match_id (lexer, "DEFAULT"))
6280 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6281 else if (lex_match_id (lexer, "NAME"))
6283 else if (lex_match_id (lexer, "LABEL"))
6284 vlabel = CTVL_LABEL;
6285 else if (lex_match_id (lexer, "BOTH"))
6287 else if (lex_match_id (lexer, "NONE"))
6291 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6297 for (size_t i = 0; i < n_vars; i++)
6298 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6301 else if (lex_match_id (lexer, "MRSETS"))
6303 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6305 lex_match (lexer, T_EQUALS);
6306 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6309 else if (lex_match_id (lexer, "SMISSING"))
6311 if (lex_match_id (lexer, "VARIABLE"))
6312 ct->smissing_listwise = false;
6313 else if (lex_match_id (lexer, "LISTWISE"))
6314 ct->smissing_listwise = true;
6317 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6321 else if (lex_match_id (lexer, "PCOMPUTE"))
6323 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6326 else if (lex_match_id (lexer, "PPROPERTIES"))
6328 if (!ctables_parse_pproperties (lexer, ct))
6331 else if (lex_match_id (lexer, "WEIGHT"))
6333 if (!lex_force_match_id (lexer, "VARIABLE"))
6335 lex_match (lexer, T_EQUALS);
6336 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6340 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6342 if (lex_match_id (lexer, "COUNT"))
6344 lex_match (lexer, T_EQUALS);
6345 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6348 ct->hide_threshold = lex_integer (lexer);
6351 else if (ct->hide_threshold == 0)
6352 ct->hide_threshold = 5;
6356 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6357 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6358 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6362 if (!lex_force_match (lexer, T_SLASH))
6366 size_t allocated_tables = 0;
6369 if (ct->n_tables >= allocated_tables)
6370 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6371 sizeof *ct->tables);
6373 struct ctables_category *cat = xmalloc (sizeof *cat);
6374 *cat = (struct ctables_category) {
6376 .include_missing = false,
6377 .sort_ascending = true,
6380 struct ctables_categories *c = xmalloc (sizeof *c);
6381 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6382 *c = (struct ctables_categories) {
6389 struct ctables_categories **categories = xnmalloc (n_vars,
6390 sizeof *categories);
6391 for (size_t i = 0; i < n_vars; i++)
6394 struct ctables_table *t = xmalloc (sizeof *t);
6395 *t = (struct ctables_table) {
6397 .slabels_axis = PIVOT_AXIS_COLUMN,
6398 .slabels_visible = true,
6399 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6401 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6402 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6403 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6405 .clabels_from_axis = PIVOT_AXIS_LAYER,
6406 .categories = categories,
6407 .n_categories = n_vars,
6410 ct->tables[ct->n_tables++] = t;
6412 lex_match (lexer, T_EQUALS);
6413 int expr_start = lex_ofs (lexer);
6414 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6416 if (lex_match (lexer, T_BY))
6418 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6419 ct, t, PIVOT_AXIS_COLUMN))
6422 if (lex_match (lexer, T_BY))
6424 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6425 ct, t, PIVOT_AXIS_LAYER))
6429 int expr_end = lex_ofs (lexer);
6431 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6432 && !t->axes[PIVOT_AXIS_LAYER])
6434 lex_error (lexer, _("At least one variable must be specified."));
6438 const struct ctables_axis *scales[PIVOT_N_AXES];
6439 size_t n_scales = 0;
6440 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6442 scales[a] = find_scale (t->axes[a]);
6448 msg (SE, _("Scale variables may appear only on one axis."));
6449 if (scales[PIVOT_AXIS_ROW])
6450 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6451 _("This scale variable appears on the rows axis."));
6452 if (scales[PIVOT_AXIS_COLUMN])
6453 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6454 _("This scale variable appears on the columns axis."));
6455 if (scales[PIVOT_AXIS_LAYER])
6456 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6457 _("This scale variable appears on the layer axis."));
6461 const struct ctables_axis *summaries[PIVOT_N_AXES];
6462 size_t n_summaries = 0;
6463 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6465 summaries[a] = (scales[a]
6467 : find_categorical_summary_spec (t->axes[a]));
6471 if (n_summaries > 1)
6473 msg (SE, _("Summaries may appear only on one axis."));
6474 if (summaries[PIVOT_AXIS_ROW])
6475 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
6476 _("This variable on the rows axis has a summary."));
6477 if (summaries[PIVOT_AXIS_COLUMN])
6478 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
6479 _("This variable on the columns axis has a summary."));
6480 if (summaries[PIVOT_AXIS_LAYER])
6481 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
6482 _("This variable on the layers axis has a summary."));
6485 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6486 if (n_summaries ? summaries[a] : t->axes[a])
6488 t->summary_axis = a;
6492 if (lex_token (lexer) == T_ENDCMD)
6494 if (!ctables_prepare_table (t))
6498 if (!lex_force_match (lexer, T_SLASH))
6501 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6503 if (lex_match_id (lexer, "SLABELS"))
6505 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6507 if (lex_match_id (lexer, "POSITION"))
6509 lex_match (lexer, T_EQUALS);
6510 if (lex_match_id (lexer, "COLUMN"))
6511 t->slabels_axis = PIVOT_AXIS_COLUMN;
6512 else if (lex_match_id (lexer, "ROW"))
6513 t->slabels_axis = PIVOT_AXIS_ROW;
6514 else if (lex_match_id (lexer, "LAYER"))
6515 t->slabels_axis = PIVOT_AXIS_LAYER;
6518 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6522 else if (lex_match_id (lexer, "VISIBLE"))
6524 lex_match (lexer, T_EQUALS);
6525 if (!parse_bool (lexer, &t->slabels_visible))
6530 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6535 else if (lex_match_id (lexer, "CLABELS"))
6537 if (lex_match_id (lexer, "AUTO"))
6539 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6540 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6542 else if (lex_match_id (lexer, "ROWLABELS"))
6544 lex_match (lexer, T_EQUALS);
6545 if (lex_match_id (lexer, "OPPOSITE"))
6546 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6547 else if (lex_match_id (lexer, "LAYER"))
6548 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6551 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6555 else if (lex_match_id (lexer, "COLLABELS"))
6557 lex_match (lexer, T_EQUALS);
6558 if (lex_match_id (lexer, "OPPOSITE"))
6559 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6560 else if (lex_match_id (lexer, "LAYER"))
6561 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6564 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6570 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6575 else if (lex_match_id (lexer, "CRITERIA"))
6577 if (!lex_force_match_id (lexer, "CILEVEL"))
6579 lex_match (lexer, T_EQUALS);
6581 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6583 t->cilevel = lex_number (lexer);
6586 else if (lex_match_id (lexer, "CATEGORIES"))
6588 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6592 else if (lex_match_id (lexer, "TITLES"))
6597 if (lex_match_id (lexer, "CAPTION"))
6598 textp = &t->caption;
6599 else if (lex_match_id (lexer, "CORNER"))
6601 else if (lex_match_id (lexer, "TITLE"))
6605 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6608 lex_match (lexer, T_EQUALS);
6610 struct string s = DS_EMPTY_INITIALIZER;
6611 while (lex_is_string (lexer))
6613 if (!ds_is_empty (&s))
6614 ds_put_byte (&s, ' ');
6615 put_title_text (&s, lex_tokss (lexer), now,
6616 lexer, dataset_dict (ds),
6617 expr_start, expr_end);
6621 *textp = ds_steal_cstr (&s);
6623 while (lex_token (lexer) != T_SLASH
6624 && lex_token (lexer) != T_ENDCMD);
6626 else if (lex_match_id (lexer, "SIGTEST"))
6630 t->chisq = xmalloc (sizeof *t->chisq);
6631 *t->chisq = (struct ctables_chisq) {
6633 .include_mrsets = true,
6634 .all_visible = true,
6640 if (lex_match_id (lexer, "TYPE"))
6642 lex_match (lexer, T_EQUALS);
6643 if (!lex_force_match_id (lexer, "CHISQUARE"))
6646 else if (lex_match_id (lexer, "ALPHA"))
6648 lex_match (lexer, T_EQUALS);
6649 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6651 t->chisq->alpha = lex_number (lexer);
6654 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6656 lex_match (lexer, T_EQUALS);
6657 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6660 else if (lex_match_id (lexer, "CATEGORIES"))
6662 lex_match (lexer, T_EQUALS);
6663 if (lex_match_id (lexer, "ALLVISIBLE"))
6664 t->chisq->all_visible = true;
6665 else if (lex_match_id (lexer, "SUBTOTALS"))
6666 t->chisq->all_visible = false;
6669 lex_error_expecting (lexer,
6670 "ALLVISIBLE", "SUBTOTALS");
6676 lex_error_expecting (lexer, "TYPE", "ALPHA",
6677 "INCLUDEMRSETS", "CATEGORIES");
6681 while (lex_token (lexer) != T_SLASH
6682 && lex_token (lexer) != T_ENDCMD);
6684 else if (lex_match_id (lexer, "COMPARETEST"))
6688 t->pairwise = xmalloc (sizeof *t->pairwise);
6689 *t->pairwise = (struct ctables_pairwise) {
6691 .alpha = { .05, .05 },
6692 .adjust = BONFERRONI,
6693 .include_mrsets = true,
6694 .meansvariance_allcats = true,
6695 .all_visible = true,
6704 if (lex_match_id (lexer, "TYPE"))
6706 lex_match (lexer, T_EQUALS);
6707 if (lex_match_id (lexer, "PROP"))
6708 t->pairwise->type = PROP;
6709 else if (lex_match_id (lexer, "MEAN"))
6710 t->pairwise->type = MEAN;
6713 lex_error_expecting (lexer, "PROP", "MEAN");
6717 else if (lex_match_id (lexer, "ALPHA"))
6719 lex_match (lexer, T_EQUALS);
6721 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6723 double a0 = lex_number (lexer);
6726 lex_match (lexer, T_COMMA);
6727 if (lex_is_number (lexer))
6729 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6731 double a1 = lex_number (lexer);
6734 t->pairwise->alpha[0] = MIN (a0, a1);
6735 t->pairwise->alpha[1] = MAX (a0, a1);
6738 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6740 else if (lex_match_id (lexer, "ADJUST"))
6742 lex_match (lexer, T_EQUALS);
6743 if (lex_match_id (lexer, "BONFERRONI"))
6744 t->pairwise->adjust = BONFERRONI;
6745 else if (lex_match_id (lexer, "BH"))
6746 t->pairwise->adjust = BH;
6747 else if (lex_match_id (lexer, "NONE"))
6748 t->pairwise->adjust = 0;
6751 lex_error_expecting (lexer, "BONFERRONI", "BH",
6756 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6758 lex_match (lexer, T_EQUALS);
6759 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6762 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6764 lex_match (lexer, T_EQUALS);
6765 if (lex_match_id (lexer, "ALLCATS"))
6766 t->pairwise->meansvariance_allcats = true;
6767 else if (lex_match_id (lexer, "TESTEDCATS"))
6768 t->pairwise->meansvariance_allcats = false;
6771 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6775 else if (lex_match_id (lexer, "CATEGORIES"))
6777 lex_match (lexer, T_EQUALS);
6778 if (lex_match_id (lexer, "ALLVISIBLE"))
6779 t->pairwise->all_visible = true;
6780 else if (lex_match_id (lexer, "SUBTOTALS"))
6781 t->pairwise->all_visible = false;
6784 lex_error_expecting (lexer, "ALLVISIBLE",
6789 else if (lex_match_id (lexer, "MERGE"))
6791 lex_match (lexer, T_EQUALS);
6792 if (!parse_bool (lexer, &t->pairwise->merge))
6795 else if (lex_match_id (lexer, "STYLE"))
6797 lex_match (lexer, T_EQUALS);
6798 if (lex_match_id (lexer, "APA"))
6799 t->pairwise->apa_style = true;
6800 else if (lex_match_id (lexer, "SIMPLE"))
6801 t->pairwise->apa_style = false;
6804 lex_error_expecting (lexer, "APA", "SIMPLE");
6808 else if (lex_match_id (lexer, "SHOWSIG"))
6810 lex_match (lexer, T_EQUALS);
6811 if (!parse_bool (lexer, &t->pairwise->show_sig))
6816 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6817 "INCLUDEMRSETS", "MEANSVARIANCE",
6818 "CATEGORIES", "MERGE", "STYLE",
6823 while (lex_token (lexer) != T_SLASH
6824 && lex_token (lexer) != T_ENDCMD);
6828 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6829 "CRITERIA", "CATEGORIES", "TITLES",
6830 "SIGTEST", "COMPARETEST");
6834 if (!lex_match (lexer, T_SLASH))
6838 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6839 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6841 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6845 if (!ctables_prepare_table (t))
6848 while (lex_token (lexer) != T_ENDCMD);
6851 input = proc_open (ds);
6852 bool ok = ctables_execute (ds, input, ct);
6853 ok = proc_commit (ds) && ok;
6855 ctables_destroy (ct);
6856 return ok ? CMD_SUCCESS : CMD_FAILURE;
6861 ctables_destroy (ct);