1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 /* All variables. */ \
74 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
75 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
76 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
94 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
95 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
96 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
98 /* All variables (unweighted.) */ \
99 S(CTSF_UCOUNT, "UCOUNT", N_("Unweighted Count"), CTF_COUNT, CTFA_ALL) \
100 S(CTSF_UROWPCT_COUNT, "UROWPCT.COUNT", N_("Unweighted Row %"), CTF_PERCENT, CTFA_ALL) \
101 S(CTSF_UCOLPCT_COUNT, "UCOLPCT.COUNT", N_("Unweighted Column %"), CTF_PERCENT, CTFA_ALL) \
102 S(CTSF_UTABLEPCT_COUNT, "UTABLEPCT.COUNT", N_("Unweighted Table %"), CTF_PERCENT, CTFA_ALL) \
103 S(CTSF_USUBTABLEPCT_COUNT, "USUBTABLEPCT.COUNT", N_("Unweighted Subtable %"), CTF_PERCENT, CTFA_ALL) \
104 S(CTSF_ULAYERPCT_COUNT, "ULAYERPCT.COUNT", N_("Unweighted Layer %"), CTF_PERCENT, CTFA_ALL) \
105 S(CTSF_ULAYERROWPCT_COUNT, "ULAYERROWPCT.COUNT", N_("Unweighted Layer Row %"), CTF_PERCENT, CTFA_ALL) \
106 S(CTSF_ULAYERCOLPCT_COUNT, "ULAYERCOLPCT.COUNT", N_("Unweighted Layer Column %"), CTF_PERCENT, CTFA_ALL) \
107 S(CTSF_UROWPCT_VALIDN, "UROWPCT.VALIDN", N_("Unweighted Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
108 S(CTSF_UCOLPCT_VALIDN, "UCOLPCT.VALIDN", N_("Unweighted Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
109 S(CTSF_UTABLEPCT_VALIDN, "UTABLEPCT.VALIDN", N_("Unweighted Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
110 S(CTSF_USUBTABLEPCT_VALIDN, "USUBTABLEPCT.VALIDN", N_("Unweighted Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
111 S(CTSF_ULAYERPCT_VALIDN, "ULAYERPCT.VALIDN", N_("Unweighted Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
112 S(CTSF_ULAYERROWPCT_VALIDN, "ULAYERROWPCT.VALIDN", N_("Unweighted Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
113 S(CTSF_ULAYERCOLPCT_VALIDN, "ULAYERCOLPCT.VALIDN", N_("Unweighted Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
114 S(CTSF_UROWPCT_TOTALN, "UROWPCT.TOTALN", N_("Unweighted Row Total N %"), CTF_PERCENT, CTFA_ALL) \
115 S(CTSF_UCOLPCT_TOTALN, "UCOLPCT.TOTALN", N_("Unweighted Column Total N %"), CTF_PERCENT, CTFA_ALL) \
116 S(CTSF_UTABLEPCT_TOTALN, "UTABLEPCT.TOTALN", N_("Unweighted Table Total N %"), CTF_PERCENT, CTFA_ALL) \
117 S(CTSF_USUBTABLEPCT_TOTALN, "USUBTABLEPCT.TOTALN", N_("Unweighted Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
118 S(CTSF_ULAYERPCT_TOTALN, "ULAYERPCT.TOTALN", N_("Unweighted Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
119 S(CTSF_ULAYERROWPCT_TOTALN, "ULAYERROWPCT.TOTALN", N_("Unweighted Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
120 S(CTSF_ULAYERCOLPCT_TOTALN, "ULAYERCOLPCT.TOTALN", N_("Unweighted Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
122 /* Scale variables, totals, and subtotals. */ \
123 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
124 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
125 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
126 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
127 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
128 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
129 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
130 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
131 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
132 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
133 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
134 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
135 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
136 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
137 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
138 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
139 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
140 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
141 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
142 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
143 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
144 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
145 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
147 /* Scale variables, totals, and subtotals (unweighted). */ \
148 S(CTSF_UMEAN, "UMEAN", N_("Unweighted Mean"), CTF_GENERAL, CTFA_SCALE) \
149 S(CTSF_UMEDIAN, "UMEDIAN", N_("Unweighted Median"), CTF_GENERAL, CTFA_SCALE) \
150 S(CTSF_UMISSING, "UMISSING", N_("Unweighted Missing"), CTF_GENERAL, CTFA_SCALE) \
151 S(CTSF_UMODE, "UMODE", N_("Unweighted Mode"), CTF_GENERAL, CTFA_SCALE) \
152 S(CTSF_UPTILE, "UPTILE", N_("Unweighted Percentile"), CTF_GENERAL, CTFA_SCALE) \
153 S(CTSF_USEMEAN, "USEMEAN", N_("Unweighted Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
154 S(CTSF_USTDDEV, "USTDDEV", N_("Unweighted Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
155 S(CTSF_USUM, "USUM", N_("Unweighted Sum"), CTF_GENERAL, CTFA_SCALE) \
156 S(CSTF_UTOTALN, "UTOTALN", N_("Unweighted Total N"), CTF_COUNT, CTFA_SCALE) \
157 S(CTSF_UVALIDN, "UVALIDN", N_("Unweighted Valid N"), CTF_COUNT, CTFA_SCALE) \
158 S(CTSF_UVARIANCE, "UVARIANCE", N_("Unweighted Variance"), CTF_GENERAL, CTFA_SCALE) \
159 S(CTSF_UROWPCT_SUM, "UROWPCT.SUM", N_("Unweighted Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
160 S(CTSF_UCOLPCT_SUM, "UCOLPCT.SUM", N_("Unweighted Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
161 S(CTSF_UTABLEPCT_SUM, "UTABLEPCT.SUM", N_("Unweighted Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
162 S(CTSF_USUBTABLEPCT_SUM, "USUBTABLEPCT.SUM", N_("Unweighted Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
163 S(CTSF_ULAYERPCT_SUM, "ULAYERPCT.SUM", N_("Unweighted Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
164 S(CTSF_ULAYERROWPCT_SUM, "ULAYERROWPCT.SUM", N_("Unweighted Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
165 S(CTSF_ULAYERCOLPCT_SUM, "ULAYERCOLPCT.SUM", N_("Unweighted Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
167 #if 0 /* Multiple response sets not yet implemented. */
168 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
169 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
170 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
171 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
172 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
173 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
174 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
175 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
176 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
177 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
178 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
179 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
180 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
181 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
182 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
183 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
184 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
185 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
186 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
187 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
188 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
189 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
192 enum ctables_summary_function
194 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
200 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
201 N_CTSF_FUNCTIONS = SUMMARIES
205 static bool ctables_summary_function_is_count (enum ctables_summary_function);
207 enum ctables_domain_type
209 /* Within a section, where stacked variables divide one section from
211 CTDT_TABLE, /* All layers of a whole section. */
212 CTDT_LAYER, /* One layer within a section. */
213 CTDT_LAYERROW, /* Row in one layer within a section. */
214 CTDT_LAYERCOL, /* Column in one layer within a section. */
216 /* Within a subtable, where a subtable pairs an innermost row variable with
217 an innermost column variable within a single layer. */
218 CTDT_SUBTABLE, /* Whole subtable. */
219 CTDT_ROW, /* Row within a subtable. */
220 CTDT_COL, /* Column within a subtable. */
224 struct ctables_domain
226 struct hmap_node node;
228 const struct ctables_cell *example;
230 double d_valid; /* Dictionary weight. */
233 double e_valid; /* Effective weight */
236 double u_valid; /* Unweighted. */
239 struct ctables_sum *sums;
248 enum ctables_summary_variant
257 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
258 all the axes (except the scalar variable, if any). */
259 struct hmap_node node;
261 /* The domains that contain this cell. */
262 uint32_t omit_domains;
263 struct ctables_domain *domains[N_CTDTS];
268 enum ctables_summary_variant sv;
270 struct ctables_cell_axis
272 struct ctables_cell_value
274 const struct ctables_category *category;
282 union ctables_summary *summaries;
289 const struct dictionary *dict;
290 struct pivot_table_look *look;
292 /* CTABLES has a number of extra formats that we implement via custom
293 currency specifications on an alternate fmt_settings. */
294 #define CTEF_NEGPAREN FMT_CCA
295 #define CTEF_NEQUAL FMT_CCB
296 #define CTEF_PAREN FMT_CCC
297 #define CTEF_PCTPAREN FMT_CCD
298 struct fmt_settings ctables_formats;
300 /* If this is NULL, zeros are displayed using the normal print format.
301 Otherwise, this string is displayed. */
304 /* If this is NULL, missing values are displayed using the normal print
305 format. Otherwise, this string is displayed. */
308 /* Indexed by variable dictionary index. */
309 enum ctables_vlabel *vlabels;
311 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
313 bool mrsets_count_duplicates; /* MRSETS. */
314 bool smissing_listwise; /* SMISSING. */
315 struct variable *e_weight; /* WEIGHT. */
316 int hide_threshold; /* HIDESMALLCOUNTS. */
318 struct ctables_table **tables;
322 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
325 struct ctables_postcompute
327 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
328 char *name; /* Name, without leading &. */
330 struct msg_location *location; /* Location of definition. */
331 struct ctables_pcexpr *expr;
333 struct ctables_summary_spec_set *specs;
334 bool hide_source_cats;
337 struct ctables_pcexpr
347 enum ctables_postcompute_op
350 CTPO_CONSTANT, /* 5 */
351 CTPO_CAT_NUMBER, /* [5] */
352 CTPO_CAT_STRING, /* ["STRING"] */
353 CTPO_CAT_NRANGE, /* [LO THRU 5] */
354 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
355 CTPO_CAT_MISSING, /* MISSING */
356 CTPO_CAT_OTHERNM, /* OTHERNM */
357 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
358 CTPO_CAT_TOTAL, /* TOTAL */
372 /* CTPO_CAT_NUMBER. */
375 /* CTPO_CAT_STRING, in dictionary encoding. */
376 struct substring string;
378 /* CTPO_CAT_NRANGE. */
381 /* CTPO_CAT_SRANGE. */
382 struct substring srange[2];
384 /* CTPO_CAT_SUBTOTAL. */
385 size_t subtotal_index;
387 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
388 One element: CTPO_NEG. */
389 struct ctables_pcexpr *subs[2];
392 /* Source location. */
393 struct msg_location *location;
396 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
397 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
398 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
399 struct ctables_pcexpr *sub1);
401 struct ctables_summary_spec_set
403 struct ctables_summary_spec *specs;
407 /* The variable to which the summary specs are applied. */
408 struct variable *var;
410 /* Whether the variable to which the summary specs are applied is a scale
411 variable for the purpose of summarization.
413 (VALIDN and TOTALN act differently for summarizing scale and categorical
417 /* If any of these optional additional scale variables are missing, then
418 treat 'var' as if it's missing too. This is for implementing
419 SMISSING=LISTWISE. */
420 struct variable **listwise_vars;
421 size_t n_listwise_vars;
424 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
425 const struct ctables_summary_spec_set *);
426 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
428 /* A nested sequence of variables, e.g. a > b > c. */
431 struct variable **vars;
434 size_t *domains[N_CTDTS];
435 size_t n_domains[N_CTDTS];
438 struct ctables_summary_spec_set specs[N_CSVS];
441 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
444 struct ctables_nest *nests;
448 static void ctables_stack_uninit (struct ctables_stack *);
452 struct hmap_node node;
457 struct ctables_occurrence
459 struct hmap_node node;
463 struct ctables_section
466 struct ctables_table *table;
467 struct ctables_nest *nests[PIVOT_N_AXES];
470 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
471 struct hmap cells; /* Contains "struct ctables_cell"s. */
472 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
475 static void ctables_section_uninit (struct ctables_section *);
479 struct ctables *ctables;
480 struct ctables_axis *axes[PIVOT_N_AXES];
481 struct ctables_stack stacks[PIVOT_N_AXES];
482 struct ctables_section *sections;
484 enum pivot_axis_type summary_axis;
485 struct ctables_summary_spec_set summary_specs;
486 struct variable **sum_vars;
489 enum pivot_axis_type slabels_axis;
490 bool slabels_visible;
492 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
494 Most commonly, label_axis[a] == a, and in particular we always have
495 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
497 If ROWLABELS or COLLABELS is specified, then one of
498 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
499 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
501 If any category labels are moved, then 'clabels_example' is one of the
502 variables being moved (and it is otherwise NULL). All of the variables
503 being moved have the same width, value labels, and categories, so this
504 example variable can be used to find those out.
506 The remaining members in this group are relevant only if category labels
509 'clabels_values_map' holds a "struct ctables_value" for all the values
510 that appear in all of the variables in the moved categories. It is
511 accumulated as the data is read. Once the data is fully read, its
512 sorted values are put into 'clabels_values' and 'n_clabels_values'.
514 enum pivot_axis_type label_axis[PIVOT_N_AXES];
515 enum pivot_axis_type clabels_from_axis;
516 const struct variable *clabels_example;
517 struct hmap clabels_values_map;
518 struct ctables_value **clabels_values;
519 size_t n_clabels_values;
521 /* Indexed by variable dictionary index. */
522 struct ctables_categories **categories;
531 struct ctables_chisq *chisq;
532 struct ctables_pairwise *pairwise;
535 struct ctables_categories
538 struct ctables_category *cats;
543 struct ctables_category
545 enum ctables_category_type
547 /* Explicit category lists. */
550 CCT_NRANGE, /* Numerical range. */
551 CCT_SRANGE, /* String range. */
556 /* Totals and subtotals. */
560 /* Implicit category lists. */
565 /* For contributing to TOTALN. */
566 CCT_EXCLUDED_MISSING,
570 struct ctables_category *subtotal;
576 double number; /* CCT_NUMBER. */
577 struct substring string; /* CCT_STRING, in dictionary encoding. */
578 double nrange[2]; /* CCT_NRANGE. */
579 struct substring srange[2]; /* CCT_SRANGE. */
583 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
584 bool hide_subcategories; /* CCT_SUBTOTAL. */
587 /* CCT_POSTCOMPUTE. */
590 const struct ctables_postcompute *pc;
591 enum fmt_type parse_format;
594 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
597 bool include_missing;
601 enum ctables_summary_function sort_function;
602 struct variable *sort_var;
607 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
608 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
609 struct msg_location *location;
613 ctables_category_uninit (struct ctables_category *cat)
624 case CCT_POSTCOMPUTE:
628 ss_dealloc (&cat->string);
632 ss_dealloc (&cat->srange[0]);
633 ss_dealloc (&cat->srange[1]);
638 free (cat->total_label);
646 case CCT_EXCLUDED_MISSING:
652 nullable_substring_equal (const struct substring *a,
653 const struct substring *b)
655 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
659 ctables_category_equal (const struct ctables_category *a,
660 const struct ctables_category *b)
662 if (a->type != b->type)
668 return a->number == b->number;
671 return ss_equals (a->string, b->string);
674 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
677 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
678 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
684 case CCT_POSTCOMPUTE:
685 return a->pc == b->pc;
689 return !strcmp (a->total_label, b->total_label);
694 return (a->include_missing == b->include_missing
695 && a->sort_ascending == b->sort_ascending
696 && a->sort_function == b->sort_function
697 && a->sort_var == b->sort_var
698 && a->percentile == b->percentile);
700 case CCT_EXCLUDED_MISSING:
708 ctables_categories_unref (struct ctables_categories *c)
713 assert (c->n_refs > 0);
717 for (size_t i = 0; i < c->n_cats; i++)
718 ctables_category_uninit (&c->cats[i]);
724 ctables_categories_equal (const struct ctables_categories *a,
725 const struct ctables_categories *b)
727 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
730 for (size_t i = 0; i < a->n_cats; i++)
731 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
737 /* Chi-square test (SIGTEST). */
745 /* Pairwise comparison test (COMPARETEST). */
746 struct ctables_pairwise
748 enum { PROP, MEAN } type;
751 bool meansvariance_allcats;
753 enum { BONFERRONI = 1, BH } adjust;
777 struct variable *var;
779 struct ctables_summary_spec_set specs[N_CSVS];
783 struct ctables_axis *subs[2];
786 struct msg_location *loc;
789 static void ctables_axis_destroy (struct ctables_axis *);
798 enum ctables_function_availability
800 CTFA_ALL, /* Any variables. */
801 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
802 //CTFA_MRSETS, /* Only multiple-response sets */
805 struct ctables_summary_spec
807 enum ctables_summary_function function;
808 double percentile; /* CTSF_PTILE only. */
811 struct fmt_spec format;
812 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
819 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
820 const struct ctables_summary_spec *src)
823 dst->label = xstrdup_if_nonnull (src->label);
827 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
834 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
835 const struct ctables_summary_spec_set *src)
837 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
838 for (size_t i = 0; i < src->n; i++)
839 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
841 *dst = (struct ctables_summary_spec_set) {
846 .is_scale = src->is_scale,
851 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
853 for (size_t i = 0; i < set->n; i++)
854 ctables_summary_spec_uninit (&set->specs[i]);
859 parse_col_width (struct lexer *lexer, const char *name, double *width)
861 lex_match (lexer, T_EQUALS);
862 if (lex_match_id (lexer, "DEFAULT"))
864 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
866 *width = lex_number (lexer);
876 parse_bool (struct lexer *lexer, bool *b)
878 if (lex_match_id (lexer, "NO"))
880 else if (lex_match_id (lexer, "YES"))
884 lex_error_expecting (lexer, "YES", "NO");
890 static enum ctables_function_availability
891 ctables_function_availability (enum ctables_summary_function f)
893 static enum ctables_function_availability availability[] = {
894 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
899 return availability[f];
903 ctables_summary_function_is_count (enum ctables_summary_function f)
909 case CTSF_ROWPCT_COUNT:
910 case CTSF_COLPCT_COUNT:
911 case CTSF_TABLEPCT_COUNT:
912 case CTSF_SUBTABLEPCT_COUNT:
913 case CTSF_LAYERPCT_COUNT:
914 case CTSF_LAYERROWPCT_COUNT:
915 case CTSF_LAYERCOLPCT_COUNT:
917 case CTSF_UROWPCT_COUNT:
918 case CTSF_UCOLPCT_COUNT:
919 case CTSF_UTABLEPCT_COUNT:
920 case CTSF_USUBTABLEPCT_COUNT:
921 case CTSF_ULAYERPCT_COUNT:
922 case CTSF_ULAYERROWPCT_COUNT:
923 case CTSF_ULAYERCOLPCT_COUNT:
926 case CTSF_ROWPCT_VALIDN:
927 case CTSF_COLPCT_VALIDN:
928 case CTSF_TABLEPCT_VALIDN:
929 case CTSF_SUBTABLEPCT_VALIDN:
930 case CTSF_LAYERPCT_VALIDN:
931 case CTSF_LAYERROWPCT_VALIDN:
932 case CTSF_LAYERCOLPCT_VALIDN:
933 case CTSF_ROWPCT_TOTALN:
934 case CTSF_COLPCT_TOTALN:
935 case CTSF_TABLEPCT_TOTALN:
936 case CTSF_SUBTABLEPCT_TOTALN:
937 case CTSF_LAYERPCT_TOTALN:
938 case CTSF_LAYERROWPCT_TOTALN:
939 case CTSF_LAYERCOLPCT_TOTALN:
956 case CTSF_ROWPCT_SUM:
957 case CTSF_COLPCT_SUM:
958 case CTSF_TABLEPCT_SUM:
959 case CTSF_SUBTABLEPCT_SUM:
960 case CTSF_LAYERPCT_SUM:
961 case CTSF_LAYERROWPCT_SUM:
962 case CTSF_LAYERCOLPCT_SUM:
963 case CTSF_UROWPCT_VALIDN:
964 case CTSF_UCOLPCT_VALIDN:
965 case CTSF_UTABLEPCT_VALIDN:
966 case CTSF_USUBTABLEPCT_VALIDN:
967 case CTSF_ULAYERPCT_VALIDN:
968 case CTSF_ULAYERROWPCT_VALIDN:
969 case CTSF_ULAYERCOLPCT_VALIDN:
970 case CTSF_UROWPCT_TOTALN:
971 case CTSF_UCOLPCT_TOTALN:
972 case CTSF_UTABLEPCT_TOTALN:
973 case CTSF_USUBTABLEPCT_TOTALN:
974 case CTSF_ULAYERPCT_TOTALN:
975 case CTSF_ULAYERROWPCT_TOTALN:
976 case CTSF_ULAYERCOLPCT_TOTALN:
988 case CTSF_UROWPCT_SUM:
989 case CTSF_UCOLPCT_SUM:
990 case CTSF_UTABLEPCT_SUM:
991 case CTSF_USUBTABLEPCT_SUM:
992 case CTSF_ULAYERPCT_SUM:
993 case CTSF_ULAYERROWPCT_SUM:
994 case CTSF_ULAYERCOLPCT_SUM:
1002 parse_ctables_summary_function (struct lexer *lexer,
1003 enum ctables_summary_function *f)
1007 enum ctables_summary_function function;
1008 struct substring name;
1010 static struct pair names[] = {
1011 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
1012 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
1015 /* The .COUNT suffix may be omitted. */
1016 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
1017 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
1018 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
1019 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
1020 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
1021 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
1022 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
1026 if (!lex_force_id (lexer))
1029 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
1030 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
1032 *f = names[i].function;
1037 lex_error (lexer, _("Expecting summary function name."));
1042 ctables_axis_destroy (struct ctables_axis *axis)
1050 for (size_t i = 0; i < N_CSVS; i++)
1051 ctables_summary_spec_set_uninit (&axis->specs[i]);
1056 ctables_axis_destroy (axis->subs[0]);
1057 ctables_axis_destroy (axis->subs[1]);
1060 msg_location_destroy (axis->loc);
1064 static struct ctables_axis *
1065 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1066 struct ctables_axis *sub0,
1067 struct ctables_axis *sub1,
1068 struct lexer *lexer, int start_ofs)
1070 struct ctables_axis *axis = xmalloc (sizeof *axis);
1071 *axis = (struct ctables_axis) {
1073 .subs = { sub0, sub1 },
1074 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1079 struct ctables_axis_parse_ctx
1081 struct lexer *lexer;
1082 struct dictionary *dict;
1084 struct ctables_table *t;
1087 static struct fmt_spec
1088 ctables_summary_default_format (enum ctables_summary_function function,
1089 const struct variable *var)
1091 static const enum ctables_format default_formats[] = {
1092 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
1096 switch (default_formats[function])
1099 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
1102 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
1105 return *var_get_print_format (var);
1112 static struct pivot_value *
1113 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1117 static const char *default_labels[] = {
1118 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
1123 return (spec->function == CTSF_PTILE
1124 ? pivot_value_new_text_format (N_("Percentile %.2f"),
1126 : pivot_value_new_text (default_labels[spec->function]));
1130 struct substring in = ss_cstr (spec->label);
1131 struct substring target = ss_cstr (")CILEVEL");
1133 struct string out = DS_EMPTY_INITIALIZER;
1136 size_t chunk = ss_find_substring (in, target);
1137 ds_put_substring (&out, ss_head (in, chunk));
1138 ss_advance (&in, chunk);
1140 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1142 ss_advance (&in, target.length);
1143 ds_put_format (&out, "%g", cilevel);
1149 ctables_summary_function_name (enum ctables_summary_function function)
1151 static const char *names[] = {
1152 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1156 return names[function];
1160 add_summary_spec (struct ctables_axis *axis,
1161 enum ctables_summary_function function, double percentile,
1162 const char *label, const struct fmt_spec *format,
1163 bool is_ctables_format, const struct msg_location *loc,
1164 enum ctables_summary_variant sv)
1166 if (axis->op == CTAO_VAR)
1168 const char *function_name = ctables_summary_function_name (function);
1169 const char *var_name = var_get_name (axis->var);
1170 switch (ctables_function_availability (function))
1174 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1175 "response sets."), function_name);
1176 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1182 if (!axis->scale && sv != CSV_TOTAL)
1185 _("Summary function %s applies only to scale variables."),
1187 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1197 struct ctables_summary_spec_set *set = &axis->specs[sv];
1198 if (set->n >= set->allocated)
1199 set->specs = x2nrealloc (set->specs, &set->allocated,
1200 sizeof *set->specs);
1202 struct ctables_summary_spec *dst = &set->specs[set->n++];
1203 *dst = (struct ctables_summary_spec) {
1204 .function = function,
1205 .percentile = percentile,
1206 .label = xstrdup_if_nonnull (label),
1207 .format = (format ? *format
1208 : ctables_summary_default_format (function, axis->var)),
1209 .is_ctables_format = is_ctables_format,
1215 for (size_t i = 0; i < 2; i++)
1216 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1217 format, is_ctables_format, loc, sv))
1223 static struct ctables_axis *ctables_axis_parse_stack (
1224 struct ctables_axis_parse_ctx *);
1227 static struct ctables_axis *
1228 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1230 if (lex_match (ctx->lexer, T_LPAREN))
1232 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1233 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1235 ctables_axis_destroy (sub);
1241 if (!lex_force_id (ctx->lexer))
1244 int start_ofs = lex_ofs (ctx->lexer);
1245 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1249 struct ctables_axis *axis = xmalloc (sizeof *axis);
1250 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1252 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1253 : lex_match_phrase (ctx->lexer, "[C]") ? false
1254 : var_get_measure (var) == MEASURE_SCALE);
1255 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1256 lex_ofs (ctx->lexer) - 1);
1257 if (axis->scale && var_is_alpha (var))
1259 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1261 var_get_name (var));
1262 ctables_axis_destroy (axis);
1270 has_digit (const char *s)
1272 return s[strcspn (s, "0123456789")] != '\0';
1276 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1277 bool *is_ctables_format)
1279 char type[FMT_TYPE_LEN_MAX + 1];
1280 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1283 if (!strcasecmp (type, "NEGPAREN"))
1284 format->type = CTEF_NEGPAREN;
1285 else if (!strcasecmp (type, "NEQUAL"))
1286 format->type = CTEF_NEQUAL;
1287 else if (!strcasecmp (type, "PAREN"))
1288 format->type = CTEF_PAREN;
1289 else if (!strcasecmp (type, "PCTPAREN"))
1290 format->type = CTEF_PCTPAREN;
1293 *is_ctables_format = false;
1294 return (parse_format_specifier (lexer, format)
1295 && fmt_check_output (format)
1296 && fmt_check_type_compat (format, VAL_NUMERIC));
1302 lex_next_error (lexer, -1, -1,
1303 _("Output format %s requires width 2 or greater."), type);
1306 else if (format->d > format->w - 1)
1308 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1309 "greater than decimals."), type);
1314 *is_ctables_format = true;
1319 static struct ctables_axis *
1320 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1322 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1323 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1326 enum ctables_summary_variant sv = CSV_CELL;
1329 int start_ofs = lex_ofs (ctx->lexer);
1331 /* Parse function. */
1332 enum ctables_summary_function function;
1333 if (!parse_ctables_summary_function (ctx->lexer, &function))
1336 /* Parse percentile. */
1337 double percentile = 0;
1338 if (function == CTSF_PTILE)
1340 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1342 percentile = lex_number (ctx->lexer);
1343 lex_get (ctx->lexer);
1348 if (lex_is_string (ctx->lexer))
1350 label = ss_xstrdup (lex_tokss (ctx->lexer));
1351 lex_get (ctx->lexer);
1355 struct fmt_spec format;
1356 const struct fmt_spec *formatp;
1357 bool is_ctables_format = false;
1358 if (lex_token (ctx->lexer) == T_ID
1359 && has_digit (lex_tokcstr (ctx->lexer)))
1361 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1362 &is_ctables_format))
1372 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1373 lex_ofs (ctx->lexer) - 1);
1374 add_summary_spec (sub, function, percentile, label, formatp,
1375 is_ctables_format, loc, sv);
1377 msg_location_destroy (loc);
1379 lex_match (ctx->lexer, T_COMMA);
1380 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1382 if (!lex_force_match (ctx->lexer, T_LBRACK))
1386 else if (lex_match (ctx->lexer, T_RBRACK))
1388 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1395 ctables_axis_destroy (sub);
1399 static const struct ctables_axis *
1400 find_scale (const struct ctables_axis *axis)
1404 else if (axis->op == CTAO_VAR)
1405 return axis->scale ? axis : NULL;
1408 for (size_t i = 0; i < 2; i++)
1410 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1418 static const struct ctables_axis *
1419 find_categorical_summary_spec (const struct ctables_axis *axis)
1423 else if (axis->op == CTAO_VAR)
1424 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1427 for (size_t i = 0; i < 2; i++)
1429 const struct ctables_axis *sum
1430 = find_categorical_summary_spec (axis->subs[i]);
1438 static struct ctables_axis *
1439 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1441 int start_ofs = lex_ofs (ctx->lexer);
1442 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1446 while (lex_match (ctx->lexer, T_GT))
1448 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1452 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1453 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1455 const struct ctables_axis *outer_scale = find_scale (lhs);
1456 const struct ctables_axis *inner_scale = find_scale (rhs);
1457 if (outer_scale && inner_scale)
1459 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1460 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1461 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1462 ctables_axis_destroy (nest);
1466 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1469 msg_at (SE, nest->loc,
1470 _("Summaries may only be requested for categorical variables "
1471 "at the innermost nesting level."));
1472 msg_at (SN, outer_sum->loc,
1473 _("This outer categorical variable has a summary."));
1474 ctables_axis_destroy (nest);
1484 static struct ctables_axis *
1485 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1487 int start_ofs = lex_ofs (ctx->lexer);
1488 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1492 while (lex_match (ctx->lexer, T_PLUS))
1494 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1498 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1499 ctx->lexer, start_ofs);
1506 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1507 struct ctables *ct, struct ctables_table *t,
1508 enum pivot_axis_type a)
1510 if (lex_token (lexer) == T_BY
1511 || lex_token (lexer) == T_SLASH
1512 || lex_token (lexer) == T_ENDCMD)
1515 struct ctables_axis_parse_ctx ctx = {
1521 t->axes[a] = ctables_axis_parse_stack (&ctx);
1522 return t->axes[a] != NULL;
1526 ctables_chisq_destroy (struct ctables_chisq *chisq)
1532 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1538 ctables_table_destroy (struct ctables_table *t)
1543 for (size_t i = 0; i < t->n_sections; i++)
1544 ctables_section_uninit (&t->sections[i]);
1547 for (size_t i = 0; i < t->n_categories; i++)
1548 ctables_categories_unref (t->categories[i]);
1549 free (t->categories);
1551 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1553 ctables_axis_destroy (t->axes[a]);
1554 ctables_stack_uninit (&t->stacks[a]);
1560 ctables_chisq_destroy (t->chisq);
1561 ctables_pairwise_destroy (t->pairwise);
1566 ctables_destroy (struct ctables *ct)
1571 fmt_settings_uninit (&ct->ctables_formats);
1572 pivot_table_look_unref (ct->look);
1576 for (size_t i = 0; i < ct->n_tables; i++)
1577 ctables_table_destroy (ct->tables[i]);
1582 static struct ctables_category
1583 cct_nrange (double low, double high)
1585 return (struct ctables_category) {
1587 .nrange = { low, high }
1591 static struct ctables_category
1592 cct_srange (struct substring low, struct substring high)
1594 return (struct ctables_category) {
1596 .srange = { low, high }
1601 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1602 struct ctables_category *cat)
1605 if (lex_match (lexer, T_EQUALS))
1607 if (!lex_force_string (lexer))
1610 total_label = ss_xstrdup (lex_tokss (lexer));
1614 total_label = xstrdup (_("Subtotal"));
1616 *cat = (struct ctables_category) {
1617 .type = CCT_SUBTOTAL,
1618 .hide_subcategories = hide_subcategories,
1619 .total_label = total_label
1624 static struct substring
1625 parse_substring (struct lexer *lexer, struct dictionary *dict)
1627 struct substring s = recode_substring_pool (
1628 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1629 ss_rtrim (&s, ss_cstr (" "));
1635 ctables_table_parse_explicit_category (struct lexer *lexer,
1636 struct dictionary *dict,
1638 struct ctables_category *cat)
1640 if (lex_match_id (lexer, "OTHERNM"))
1641 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1642 else if (lex_match_id (lexer, "MISSING"))
1643 *cat = (struct ctables_category) { .type = CCT_MISSING };
1644 else if (lex_match_id (lexer, "SUBTOTAL"))
1645 return ctables_table_parse_subtotal (lexer, false, cat);
1646 else if (lex_match_id (lexer, "HSUBTOTAL"))
1647 return ctables_table_parse_subtotal (lexer, true, cat);
1648 else if (lex_match_id (lexer, "LO"))
1650 if (!lex_force_match_id (lexer, "THRU"))
1652 if (lex_is_string (lexer))
1654 struct substring sr0 = { .string = NULL };
1655 struct substring sr1 = parse_substring (lexer, dict);
1656 *cat = cct_srange (sr0, sr1);
1658 else if (lex_force_num (lexer))
1660 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1666 else if (lex_is_number (lexer))
1668 double number = lex_number (lexer);
1670 if (lex_match_id (lexer, "THRU"))
1672 if (lex_match_id (lexer, "HI"))
1673 *cat = cct_nrange (number, DBL_MAX);
1676 if (!lex_force_num (lexer))
1678 *cat = cct_nrange (number, lex_number (lexer));
1683 *cat = (struct ctables_category) {
1688 else if (lex_is_string (lexer))
1690 struct substring s = parse_substring (lexer, dict);
1691 if (lex_match_id (lexer, "THRU"))
1693 if (lex_match_id (lexer, "HI"))
1695 struct substring sr1 = { .string = NULL };
1696 *cat = cct_srange (s, sr1);
1700 if (!lex_force_string (lexer))
1702 struct substring sr1 = parse_substring (lexer, dict);
1703 *cat = cct_srange (s, sr1);
1707 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1709 else if (lex_match (lexer, T_AND))
1711 if (!lex_force_id (lexer))
1713 struct ctables_postcompute *pc = ctables_find_postcompute (
1714 ct, lex_tokcstr (lexer));
1717 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1718 msg_at (SE, loc, _("Unknown postcompute &%s."),
1719 lex_tokcstr (lexer));
1720 msg_location_destroy (loc);
1725 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1729 lex_error (lexer, NULL);
1737 parse_category_string (struct msg_location *location,
1738 struct substring s, const struct dictionary *dict,
1739 enum fmt_type format, double *n)
1742 char *error = data_in (s, dict_get_encoding (dict), format,
1743 settings_get_fmt_settings (), &v, 0, NULL);
1746 msg_at (SE, location,
1747 _("Failed to parse category specification as format %s: %s."),
1748 fmt_name (format), error);
1757 static struct ctables_category *
1758 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1759 const struct ctables_pcexpr *e)
1761 struct ctables_category *best = NULL;
1762 size_t n_subtotals = 0;
1763 for (size_t i = 0; i < cats->n_cats; i++)
1765 struct ctables_category *cat = &cats->cats[i];
1768 case CTPO_CAT_NUMBER:
1769 if (cat->type == CCT_NUMBER && cat->number == e->number)
1773 case CTPO_CAT_STRING:
1774 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1778 case CTPO_CAT_NRANGE:
1779 if (cat->type == CCT_NRANGE
1780 && cat->nrange[0] == e->nrange[0]
1781 && cat->nrange[1] == e->nrange[1])
1785 case CTPO_CAT_SRANGE:
1786 if (cat->type == CCT_SRANGE
1787 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1788 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1792 case CTPO_CAT_MISSING:
1793 if (cat->type == CCT_MISSING)
1797 case CTPO_CAT_OTHERNM:
1798 if (cat->type == CCT_OTHERNM)
1802 case CTPO_CAT_SUBTOTAL:
1803 if (cat->type == CCT_SUBTOTAL)
1806 if (e->subtotal_index == n_subtotals)
1808 else if (e->subtotal_index == 0)
1813 case CTPO_CAT_TOTAL:
1814 if (cat->type == CCT_TOTAL)
1828 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1833 static struct ctables_category *
1834 ctables_find_category_for_postcompute (const struct dictionary *dict,
1835 const struct ctables_categories *cats,
1836 enum fmt_type parse_format,
1837 const struct ctables_pcexpr *e)
1839 if (parse_format != FMT_F)
1841 if (e->op == CTPO_CAT_STRING)
1844 if (!parse_category_string (e->location, e->string, dict,
1845 parse_format, &number))
1848 struct ctables_pcexpr e2 = {
1849 .op = CTPO_CAT_NUMBER,
1851 .location = e->location,
1853 return ctables_find_category_for_postcompute__ (cats, &e2);
1855 else if (e->op == CTPO_CAT_SRANGE)
1858 if (!e->srange[0].string)
1859 nrange[0] = -DBL_MAX;
1860 else if (!parse_category_string (e->location, e->srange[0], dict,
1861 parse_format, &nrange[0]))
1864 if (!e->srange[1].string)
1865 nrange[1] = DBL_MAX;
1866 else if (!parse_category_string (e->location, e->srange[1], dict,
1867 parse_format, &nrange[1]))
1870 struct ctables_pcexpr e2 = {
1871 .op = CTPO_CAT_NRANGE,
1872 .nrange = { nrange[0], nrange[1] },
1873 .location = e->location,
1875 return ctables_find_category_for_postcompute__ (cats, &e2);
1878 return ctables_find_category_for_postcompute__ (cats, e);
1882 ctables_recursive_check_postcompute (struct dictionary *dict,
1883 const struct ctables_pcexpr *e,
1884 struct ctables_category *pc_cat,
1885 const struct ctables_categories *cats,
1886 const struct msg_location *cats_location)
1890 case CTPO_CAT_NUMBER:
1891 case CTPO_CAT_STRING:
1892 case CTPO_CAT_NRANGE:
1893 case CTPO_CAT_SRANGE:
1894 case CTPO_CAT_MISSING:
1895 case CTPO_CAT_OTHERNM:
1896 case CTPO_CAT_SUBTOTAL:
1897 case CTPO_CAT_TOTAL:
1899 struct ctables_category *cat = ctables_find_category_for_postcompute (
1900 dict, cats, pc_cat->parse_format, e);
1903 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1905 size_t n_subtotals = 0;
1906 for (size_t i = 0; i < cats->n_cats; i++)
1907 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1908 if (n_subtotals > 1)
1910 msg_at (SE, cats_location,
1911 ngettext ("These categories include %zu instance "
1912 "of SUBTOTAL or HSUBTOTAL, so references "
1913 "from computed categories must refer to "
1914 "subtotals by position, "
1915 "e.g. SUBTOTAL[1].",
1916 "These categories include %zu instances "
1917 "of SUBTOTAL or HSUBTOTAL, so references "
1918 "from computed categories must refer to "
1919 "subtotals by position, "
1920 "e.g. SUBTOTAL[1].",
1923 msg_at (SN, e->location,
1924 _("This is the reference that lacks a position."));
1929 msg_at (SE, pc_cat->location,
1930 _("Computed category &%s references a category not included "
1931 "in the category list."),
1933 msg_at (SN, e->location, _("This is the missing category."));
1934 if (e->op == CTPO_CAT_SUBTOTAL)
1935 msg_at (SN, cats_location,
1936 _("To fix the problem, add subtotals to the "
1937 "list of categories here."));
1938 else if (e->op == CTPO_CAT_TOTAL)
1939 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1940 "CATEGORIES specification."));
1942 msg_at (SN, cats_location,
1943 _("To fix the problem, add the missing category to the "
1944 "list of categories here."));
1947 if (pc_cat->pc->hide_source_cats)
1961 for (size_t i = 0; i < 2; i++)
1962 if (e->subs[i] && !ctables_recursive_check_postcompute (
1963 dict, e->subs[i], pc_cat, cats, cats_location))
1972 all_strings (struct variable **vars, size_t n_vars,
1973 const struct ctables_category *cat)
1975 for (size_t j = 0; j < n_vars; j++)
1976 if (var_is_numeric (vars[j]))
1978 msg_at (SE, cat->location,
1979 _("This category specification may be applied only to string "
1980 "variables, but this subcommand tries to apply it to "
1981 "numeric variable %s."),
1982 var_get_name (vars[j]));
1989 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1990 struct ctables *ct, struct ctables_table *t)
1992 if (!lex_match_id (lexer, "VARIABLES"))
1994 lex_match (lexer, T_EQUALS);
1996 struct variable **vars;
1998 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2001 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2002 for (size_t i = 1; i < n_vars; i++)
2004 const struct fmt_spec *f = var_get_print_format (vars[i]);
2005 if (f->type != common_format->type)
2007 common_format = NULL;
2013 && (fmt_get_category (common_format->type)
2014 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2016 struct ctables_categories *c = xmalloc (sizeof *c);
2017 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2018 for (size_t i = 0; i < n_vars; i++)
2020 struct ctables_categories **cp
2021 = &t->categories[var_get_dict_index (vars[i])];
2022 ctables_categories_unref (*cp);
2026 size_t allocated_cats = 0;
2027 int cats_start_ofs = -1;
2028 int cats_end_ofs = -1;
2029 if (lex_match (lexer, T_LBRACK))
2031 cats_start_ofs = lex_ofs (lexer);
2034 if (c->n_cats >= allocated_cats)
2035 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2037 int start_ofs = lex_ofs (lexer);
2038 struct ctables_category *cat = &c->cats[c->n_cats];
2039 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2041 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2044 lex_match (lexer, T_COMMA);
2046 while (!lex_match (lexer, T_RBRACK));
2047 cats_end_ofs = lex_ofs (lexer) - 1;
2050 struct ctables_category cat = {
2052 .include_missing = false,
2053 .sort_ascending = true,
2055 bool show_totals = false;
2056 char *total_label = NULL;
2057 bool totals_before = false;
2058 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2060 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2062 lex_match (lexer, T_EQUALS);
2063 if (lex_match_id (lexer, "A"))
2064 cat.sort_ascending = true;
2065 else if (lex_match_id (lexer, "D"))
2066 cat.sort_ascending = false;
2069 lex_error_expecting (lexer, "A", "D");
2073 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2075 lex_match (lexer, T_EQUALS);
2076 if (lex_match_id (lexer, "VALUE"))
2077 cat.type = CCT_VALUE;
2078 else if (lex_match_id (lexer, "LABEL"))
2079 cat.type = CCT_LABEL;
2082 cat.type = CCT_FUNCTION;
2083 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
2086 if (lex_match (lexer, T_LPAREN))
2088 cat.sort_var = parse_variable (lexer, dict);
2092 if (cat.sort_function == CTSF_PTILE)
2094 lex_match (lexer, T_COMMA);
2095 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2097 cat.percentile = lex_number (lexer);
2101 if (!lex_force_match (lexer, T_RPAREN))
2104 else if (ctables_function_availability (cat.sort_function)
2107 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2112 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2114 lex_match (lexer, T_EQUALS);
2115 if (lex_match_id (lexer, "INCLUDE"))
2116 cat.include_missing = true;
2117 else if (lex_match_id (lexer, "EXCLUDE"))
2118 cat.include_missing = false;
2121 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2125 else if (lex_match_id (lexer, "TOTAL"))
2127 lex_match (lexer, T_EQUALS);
2128 if (!parse_bool (lexer, &show_totals))
2131 else if (lex_match_id (lexer, "LABEL"))
2133 lex_match (lexer, T_EQUALS);
2134 if (!lex_force_string (lexer))
2137 total_label = ss_xstrdup (lex_tokss (lexer));
2140 else if (lex_match_id (lexer, "POSITION"))
2142 lex_match (lexer, T_EQUALS);
2143 if (lex_match_id (lexer, "BEFORE"))
2144 totals_before = true;
2145 else if (lex_match_id (lexer, "AFTER"))
2146 totals_before = false;
2149 lex_error_expecting (lexer, "BEFORE", "AFTER");
2153 else if (lex_match_id (lexer, "EMPTY"))
2155 lex_match (lexer, T_EQUALS);
2156 if (lex_match_id (lexer, "INCLUDE"))
2157 c->show_empty = true;
2158 else if (lex_match_id (lexer, "EXCLUDE"))
2159 c->show_empty = false;
2162 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2169 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2170 "TOTAL", "LABEL", "POSITION", "EMPTY");
2172 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2179 if (c->n_cats >= allocated_cats)
2180 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2181 c->cats[c->n_cats++] = cat;
2186 if (c->n_cats >= allocated_cats)
2187 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2189 struct ctables_category *totals;
2192 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2193 totals = &c->cats[0];
2196 totals = &c->cats[c->n_cats];
2199 *totals = (struct ctables_category) {
2201 .total_label = total_label ? total_label : xstrdup (_("Total")),
2205 struct ctables_category *subtotal = NULL;
2206 for (size_t i = totals_before ? 0 : c->n_cats;
2207 totals_before ? i < c->n_cats : i-- > 0;
2208 totals_before ? i++ : 0)
2210 struct ctables_category *cat = &c->cats[i];
2219 cat->subtotal = subtotal;
2222 case CCT_POSTCOMPUTE:
2233 case CCT_EXCLUDED_MISSING:
2238 if (cats_start_ofs != -1)
2240 struct msg_location *cats_location
2241 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2242 for (size_t i = 0; i < c->n_cats; i++)
2244 struct ctables_category *cat = &c->cats[i];
2247 case CCT_POSTCOMPUTE:
2248 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2249 if (!ctables_recursive_check_postcompute (dict, cat->pc->expr,
2250 cat, c, cats_location))
2256 for (size_t j = 0; j < n_vars; j++)
2257 if (var_is_alpha (vars[j]))
2259 msg_at (SE, cat->location,
2260 _("This category specification may be applied "
2261 "only to numeric variables, but this "
2262 "subcommand tries to apply it to string "
2264 var_get_name (vars[j]));
2273 if (!parse_category_string (cat->location, cat->string, dict,
2274 common_format->type, &n))
2277 ss_dealloc (&cat->string);
2279 cat->type = CCT_NUMBER;
2282 else if (!all_strings (vars, n_vars, cat))
2291 if (!cat->srange[0].string)
2293 else if (!parse_category_string (cat->location,
2294 cat->srange[0], dict,
2295 common_format->type, &n[0]))
2298 if (!cat->srange[1].string)
2300 else if (!parse_category_string (cat->location,
2301 cat->srange[1], dict,
2302 common_format->type, &n[1]))
2305 ss_dealloc (&cat->srange[0]);
2306 ss_dealloc (&cat->srange[1]);
2308 cat->type = CCT_NRANGE;
2309 cat->nrange[0] = n[0];
2310 cat->nrange[1] = n[1];
2312 else if (!all_strings (vars, n_vars, cat))
2323 case CCT_EXCLUDED_MISSING:
2333 ctables_nest_uninit (struct ctables_nest *nest)
2340 ctables_stack_uninit (struct ctables_stack *stack)
2344 for (size_t i = 0; i < stack->n; i++)
2345 ctables_nest_uninit (&stack->nests[i]);
2346 free (stack->nests);
2350 static struct ctables_stack
2351 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2358 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2359 for (size_t i = 0; i < s0.n; i++)
2360 for (size_t j = 0; j < s1.n; j++)
2362 const struct ctables_nest *a = &s0.nests[i];
2363 const struct ctables_nest *b = &s1.nests[j];
2365 size_t allocate = a->n + b->n;
2366 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2367 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
2369 for (size_t k = 0; k < a->n; k++)
2370 vars[n++] = a->vars[k];
2371 for (size_t k = 0; k < b->n; k++)
2372 vars[n++] = b->vars[k];
2373 assert (n == allocate);
2375 const struct ctables_nest *summary_src;
2376 if (!a->specs[CSV_CELL].var)
2378 else if (!b->specs[CSV_CELL].var)
2383 struct ctables_nest *new = &stack.nests[stack.n++];
2384 *new = (struct ctables_nest) {
2386 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2387 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2391 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2392 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2394 ctables_stack_uninit (&s0);
2395 ctables_stack_uninit (&s1);
2399 static struct ctables_stack
2400 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2402 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2403 for (size_t i = 0; i < s0.n; i++)
2404 stack.nests[stack.n++] = s0.nests[i];
2405 for (size_t i = 0; i < s1.n; i++)
2407 stack.nests[stack.n] = s1.nests[i];
2408 stack.nests[stack.n].group_head += s0.n;
2411 assert (stack.n == s0.n + s1.n);
2417 static struct ctables_stack
2418 var_fts (const struct ctables_axis *a)
2420 struct variable **vars = xmalloc (sizeof *vars);
2423 struct ctables_nest *nest = xmalloc (sizeof *nest);
2424 *nest = (struct ctables_nest) {
2427 .scale_idx = a->scale ? 0 : SIZE_MAX,
2429 if (a->specs[CSV_CELL].n || a->scale)
2430 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2432 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2433 nest->specs[sv].var = a->var;
2434 nest->specs[sv].is_scale = a->scale;
2436 return (struct ctables_stack) { .nests = nest, .n = 1 };
2439 static struct ctables_stack
2440 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2443 return (struct ctables_stack) { .n = 0 };
2451 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2452 enumerate_fts (axis_type, a->subs[1]));
2455 /* This should consider any of the scale variables found in the result to
2456 be linked to each other listwise for SMISSING=LISTWISE. */
2457 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2458 enumerate_fts (axis_type, a->subs[1]));
2464 union ctables_summary
2466 /* COUNT, VALIDN, TOTALN. */
2469 /* MINIMUM, MAXIMUM, RANGE. */
2476 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2477 struct moments1 *moments;
2479 /* MEDIAN, MODE, PTILE. */
2482 struct casewriter *writer;
2487 /* XXX multiple response */
2491 ctables_summary_init (union ctables_summary *s,
2492 const struct ctables_summary_spec *ss)
2494 switch (ss->function)
2498 case CTSF_ROWPCT_COUNT:
2499 case CTSF_COLPCT_COUNT:
2500 case CTSF_TABLEPCT_COUNT:
2501 case CTSF_SUBTABLEPCT_COUNT:
2502 case CTSF_LAYERPCT_COUNT:
2503 case CTSF_LAYERROWPCT_COUNT:
2504 case CTSF_LAYERCOLPCT_COUNT:
2505 case CTSF_ROWPCT_VALIDN:
2506 case CTSF_COLPCT_VALIDN:
2507 case CTSF_TABLEPCT_VALIDN:
2508 case CTSF_SUBTABLEPCT_VALIDN:
2509 case CTSF_LAYERPCT_VALIDN:
2510 case CTSF_LAYERROWPCT_VALIDN:
2511 case CTSF_LAYERCOLPCT_VALIDN:
2512 case CTSF_ROWPCT_TOTALN:
2513 case CTSF_COLPCT_TOTALN:
2514 case CTSF_TABLEPCT_TOTALN:
2515 case CTSF_SUBTABLEPCT_TOTALN:
2516 case CTSF_LAYERPCT_TOTALN:
2517 case CTSF_LAYERROWPCT_TOTALN:
2518 case CTSF_LAYERCOLPCT_TOTALN:
2525 case CTSF_UROWPCT_COUNT:
2526 case CTSF_UCOLPCT_COUNT:
2527 case CTSF_UTABLEPCT_COUNT:
2528 case CTSF_USUBTABLEPCT_COUNT:
2529 case CTSF_ULAYERPCT_COUNT:
2530 case CTSF_ULAYERROWPCT_COUNT:
2531 case CTSF_ULAYERCOLPCT_COUNT:
2532 case CTSF_UROWPCT_VALIDN:
2533 case CTSF_UCOLPCT_VALIDN:
2534 case CTSF_UTABLEPCT_VALIDN:
2535 case CTSF_USUBTABLEPCT_VALIDN:
2536 case CTSF_ULAYERPCT_VALIDN:
2537 case CTSF_ULAYERROWPCT_VALIDN:
2538 case CTSF_ULAYERCOLPCT_VALIDN:
2539 case CTSF_UROWPCT_TOTALN:
2540 case CTSF_UCOLPCT_TOTALN:
2541 case CTSF_UTABLEPCT_TOTALN:
2542 case CTSF_USUBTABLEPCT_TOTALN:
2543 case CTSF_ULAYERPCT_TOTALN:
2544 case CTSF_ULAYERROWPCT_TOTALN:
2545 case CTSF_ULAYERCOLPCT_TOTALN:
2555 s->min = s->max = SYSMIS;
2563 case CTSF_ROWPCT_SUM:
2564 case CTSF_COLPCT_SUM:
2565 case CTSF_TABLEPCT_SUM:
2566 case CTSF_SUBTABLEPCT_SUM:
2567 case CTSF_LAYERPCT_SUM:
2568 case CTSF_LAYERROWPCT_SUM:
2569 case CTSF_LAYERCOLPCT_SUM:
2574 case CTSF_UVARIANCE:
2575 case CTSF_UROWPCT_SUM:
2576 case CTSF_UCOLPCT_SUM:
2577 case CTSF_UTABLEPCT_SUM:
2578 case CTSF_USUBTABLEPCT_SUM:
2579 case CTSF_ULAYERPCT_SUM:
2580 case CTSF_ULAYERROWPCT_SUM:
2581 case CTSF_ULAYERCOLPCT_SUM:
2582 s->moments = moments1_create (MOMENT_VARIANCE);
2592 struct caseproto *proto = caseproto_create ();
2593 proto = caseproto_add_width (proto, 0);
2594 proto = caseproto_add_width (proto, 0);
2596 struct subcase ordering;
2597 subcase_init (&ordering, 0, 0, SC_ASCEND);
2598 s->writer = sort_create_writer (&ordering, proto);
2599 subcase_uninit (&ordering);
2600 caseproto_unref (proto);
2610 ctables_summary_uninit (union ctables_summary *s,
2611 const struct ctables_summary_spec *ss)
2613 switch (ss->function)
2617 case CTSF_ROWPCT_COUNT:
2618 case CTSF_COLPCT_COUNT:
2619 case CTSF_TABLEPCT_COUNT:
2620 case CTSF_SUBTABLEPCT_COUNT:
2621 case CTSF_LAYERPCT_COUNT:
2622 case CTSF_LAYERROWPCT_COUNT:
2623 case CTSF_LAYERCOLPCT_COUNT:
2624 case CTSF_ROWPCT_VALIDN:
2625 case CTSF_COLPCT_VALIDN:
2626 case CTSF_TABLEPCT_VALIDN:
2627 case CTSF_SUBTABLEPCT_VALIDN:
2628 case CTSF_LAYERPCT_VALIDN:
2629 case CTSF_LAYERROWPCT_VALIDN:
2630 case CTSF_LAYERCOLPCT_VALIDN:
2631 case CTSF_ROWPCT_TOTALN:
2632 case CTSF_COLPCT_TOTALN:
2633 case CTSF_TABLEPCT_TOTALN:
2634 case CTSF_SUBTABLEPCT_TOTALN:
2635 case CTSF_LAYERPCT_TOTALN:
2636 case CTSF_LAYERROWPCT_TOTALN:
2637 case CTSF_LAYERCOLPCT_TOTALN:
2644 case CTSF_UROWPCT_COUNT:
2645 case CTSF_UCOLPCT_COUNT:
2646 case CTSF_UTABLEPCT_COUNT:
2647 case CTSF_USUBTABLEPCT_COUNT:
2648 case CTSF_ULAYERPCT_COUNT:
2649 case CTSF_ULAYERROWPCT_COUNT:
2650 case CTSF_ULAYERCOLPCT_COUNT:
2651 case CTSF_UROWPCT_VALIDN:
2652 case CTSF_UCOLPCT_VALIDN:
2653 case CTSF_UTABLEPCT_VALIDN:
2654 case CTSF_USUBTABLEPCT_VALIDN:
2655 case CTSF_ULAYERPCT_VALIDN:
2656 case CTSF_ULAYERROWPCT_VALIDN:
2657 case CTSF_ULAYERCOLPCT_VALIDN:
2658 case CTSF_UROWPCT_TOTALN:
2659 case CTSF_UCOLPCT_TOTALN:
2660 case CTSF_UTABLEPCT_TOTALN:
2661 case CTSF_USUBTABLEPCT_TOTALN:
2662 case CTSF_ULAYERPCT_TOTALN:
2663 case CTSF_ULAYERROWPCT_TOTALN:
2664 case CTSF_ULAYERCOLPCT_TOTALN:
2680 case CTSF_ROWPCT_SUM:
2681 case CTSF_COLPCT_SUM:
2682 case CTSF_TABLEPCT_SUM:
2683 case CTSF_SUBTABLEPCT_SUM:
2684 case CTSF_LAYERPCT_SUM:
2685 case CTSF_LAYERROWPCT_SUM:
2686 case CTSF_LAYERCOLPCT_SUM:
2691 case CTSF_UVARIANCE:
2692 case CTSF_UROWPCT_SUM:
2693 case CTSF_UCOLPCT_SUM:
2694 case CTSF_UTABLEPCT_SUM:
2695 case CTSF_USUBTABLEPCT_SUM:
2696 case CTSF_ULAYERPCT_SUM:
2697 case CTSF_ULAYERROWPCT_SUM:
2698 case CTSF_ULAYERCOLPCT_SUM:
2699 moments1_destroy (s->moments);
2708 casewriter_destroy (s->writer);
2714 ctables_summary_add (union ctables_summary *s,
2715 const struct ctables_summary_spec *ss,
2716 const struct variable *var, const union value *value,
2717 bool is_scale, bool is_scale_missing,
2718 bool is_missing, bool excluded_missing,
2719 double d_weight, double e_weight)
2721 /* To determine whether a case is included in a given table for a particular
2722 kind of summary, consider the following charts for each variable in the
2723 table. Only if "yes" appears for every variable for the summary is the
2726 Categorical variables: VALIDN COUNT TOTALN
2727 Valid values in included categories yes yes yes
2728 Missing values in included categories --- yes yes
2729 Missing values in excluded categories --- --- yes
2730 Valid values in excluded categories --- --- ---
2732 Scale variables: VALIDN COUNT TOTALN
2733 Valid value yes yes yes
2734 Missing value --- yes yes
2736 Missing values include both user- and system-missing. (The system-missing
2737 value is always in an excluded category.)
2739 switch (ss->function)
2742 case CTSF_ROWPCT_TOTALN:
2743 case CTSF_COLPCT_TOTALN:
2744 case CTSF_TABLEPCT_TOTALN:
2745 case CTSF_SUBTABLEPCT_TOTALN:
2746 case CTSF_LAYERPCT_TOTALN:
2747 case CTSF_LAYERROWPCT_TOTALN:
2748 case CTSF_LAYERCOLPCT_TOTALN:
2749 s->count += d_weight;
2753 case CTSF_UROWPCT_TOTALN:
2754 case CTSF_UCOLPCT_TOTALN:
2755 case CTSF_UTABLEPCT_TOTALN:
2756 case CTSF_USUBTABLEPCT_TOTALN:
2757 case CTSF_ULAYERPCT_TOTALN:
2758 case CTSF_ULAYERROWPCT_TOTALN:
2759 case CTSF_ULAYERCOLPCT_TOTALN:
2764 case CTSF_ROWPCT_COUNT:
2765 case CTSF_COLPCT_COUNT:
2766 case CTSF_TABLEPCT_COUNT:
2767 case CTSF_SUBTABLEPCT_COUNT:
2768 case CTSF_LAYERPCT_COUNT:
2769 case CTSF_LAYERROWPCT_COUNT:
2770 case CTSF_LAYERCOLPCT_COUNT:
2771 if (is_scale || !excluded_missing)
2772 s->count += d_weight;
2776 case CTSF_UROWPCT_COUNT:
2777 case CTSF_UCOLPCT_COUNT:
2778 case CTSF_UTABLEPCT_COUNT:
2779 case CTSF_USUBTABLEPCT_COUNT:
2780 case CTSF_ULAYERPCT_COUNT:
2781 case CTSF_ULAYERROWPCT_COUNT:
2782 case CTSF_ULAYERCOLPCT_COUNT:
2783 if (is_scale || !excluded_missing)
2788 case CTSF_ROWPCT_VALIDN:
2789 case CTSF_COLPCT_VALIDN:
2790 case CTSF_TABLEPCT_VALIDN:
2791 case CTSF_SUBTABLEPCT_VALIDN:
2792 case CTSF_LAYERPCT_VALIDN:
2793 case CTSF_LAYERROWPCT_VALIDN:
2794 case CTSF_LAYERCOLPCT_VALIDN:
2798 s->count += d_weight;
2802 case CTSF_UROWPCT_VALIDN:
2803 case CTSF_UCOLPCT_VALIDN:
2804 case CTSF_UTABLEPCT_VALIDN:
2805 case CTSF_USUBTABLEPCT_VALIDN:
2806 case CTSF_ULAYERPCT_VALIDN:
2807 case CTSF_ULAYERROWPCT_VALIDN:
2808 case CTSF_ULAYERCOLPCT_VALIDN:
2817 s->count += d_weight;
2826 if (is_scale || !excluded_missing)
2827 s->count += e_weight;
2834 s->count += e_weight;
2838 s->count += e_weight;
2844 if (!is_scale_missing)
2846 assert (!var_is_alpha (var)); /* XXX? */
2847 if (s->min == SYSMIS || value->f < s->min)
2849 if (s->max == SYSMIS || value->f > s->max)
2859 case CTSF_ROWPCT_SUM:
2860 case CTSF_COLPCT_SUM:
2861 case CTSF_TABLEPCT_SUM:
2862 case CTSF_SUBTABLEPCT_SUM:
2863 case CTSF_LAYERPCT_SUM:
2864 case CTSF_LAYERROWPCT_SUM:
2865 case CTSF_LAYERCOLPCT_SUM:
2866 if (!is_scale_missing)
2867 moments1_add (s->moments, value->f, e_weight);
2874 case CTSF_UVARIANCE:
2875 case CTSF_UROWPCT_SUM:
2876 case CTSF_UCOLPCT_SUM:
2877 case CTSF_UTABLEPCT_SUM:
2878 case CTSF_USUBTABLEPCT_SUM:
2879 case CTSF_ULAYERPCT_SUM:
2880 case CTSF_ULAYERROWPCT_SUM:
2881 case CTSF_ULAYERCOLPCT_SUM:
2882 if (!is_scale_missing)
2883 moments1_add (s->moments, value->f, 1.0);
2889 d_weight = e_weight = 1.0;
2894 if (!is_scale_missing)
2896 s->ovalid += e_weight;
2898 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2899 *case_num_rw_idx (c, 0) = value->f;
2900 *case_num_rw_idx (c, 1) = e_weight;
2901 casewriter_write (s->writer, c);
2907 static enum ctables_domain_type
2908 ctables_function_domain (enum ctables_summary_function function)
2938 case CTSF_UVARIANCE:
2944 case CTSF_COLPCT_COUNT:
2945 case CTSF_COLPCT_SUM:
2946 case CTSF_COLPCT_TOTALN:
2947 case CTSF_COLPCT_VALIDN:
2948 case CTSF_UCOLPCT_COUNT:
2949 case CTSF_UCOLPCT_SUM:
2950 case CTSF_UCOLPCT_TOTALN:
2951 case CTSF_UCOLPCT_VALIDN:
2954 case CTSF_LAYERCOLPCT_COUNT:
2955 case CTSF_LAYERCOLPCT_SUM:
2956 case CTSF_LAYERCOLPCT_TOTALN:
2957 case CTSF_LAYERCOLPCT_VALIDN:
2958 case CTSF_ULAYERCOLPCT_COUNT:
2959 case CTSF_ULAYERCOLPCT_SUM:
2960 case CTSF_ULAYERCOLPCT_TOTALN:
2961 case CTSF_ULAYERCOLPCT_VALIDN:
2962 return CTDT_LAYERCOL;
2964 case CTSF_LAYERPCT_COUNT:
2965 case CTSF_LAYERPCT_SUM:
2966 case CTSF_LAYERPCT_TOTALN:
2967 case CTSF_LAYERPCT_VALIDN:
2968 case CTSF_ULAYERPCT_COUNT:
2969 case CTSF_ULAYERPCT_SUM:
2970 case CTSF_ULAYERPCT_TOTALN:
2971 case CTSF_ULAYERPCT_VALIDN:
2974 case CTSF_LAYERROWPCT_COUNT:
2975 case CTSF_LAYERROWPCT_SUM:
2976 case CTSF_LAYERROWPCT_TOTALN:
2977 case CTSF_LAYERROWPCT_VALIDN:
2978 case CTSF_ULAYERROWPCT_COUNT:
2979 case CTSF_ULAYERROWPCT_SUM:
2980 case CTSF_ULAYERROWPCT_TOTALN:
2981 case CTSF_ULAYERROWPCT_VALIDN:
2982 return CTDT_LAYERROW;
2984 case CTSF_ROWPCT_COUNT:
2985 case CTSF_ROWPCT_SUM:
2986 case CTSF_ROWPCT_TOTALN:
2987 case CTSF_ROWPCT_VALIDN:
2988 case CTSF_UROWPCT_COUNT:
2989 case CTSF_UROWPCT_SUM:
2990 case CTSF_UROWPCT_TOTALN:
2991 case CTSF_UROWPCT_VALIDN:
2994 case CTSF_SUBTABLEPCT_COUNT:
2995 case CTSF_SUBTABLEPCT_SUM:
2996 case CTSF_SUBTABLEPCT_TOTALN:
2997 case CTSF_SUBTABLEPCT_VALIDN:
2998 case CTSF_USUBTABLEPCT_COUNT:
2999 case CTSF_USUBTABLEPCT_SUM:
3000 case CTSF_USUBTABLEPCT_TOTALN:
3001 case CTSF_USUBTABLEPCT_VALIDN:
3002 return CTDT_SUBTABLE;
3004 case CTSF_TABLEPCT_COUNT:
3005 case CTSF_TABLEPCT_SUM:
3006 case CTSF_TABLEPCT_TOTALN:
3007 case CTSF_TABLEPCT_VALIDN:
3008 case CTSF_UTABLEPCT_COUNT:
3009 case CTSF_UTABLEPCT_SUM:
3010 case CTSF_UTABLEPCT_TOTALN:
3011 case CTSF_UTABLEPCT_VALIDN:
3018 static enum ctables_domain_type
3019 ctables_function_is_pctsum (enum ctables_summary_function function)
3049 case CTSF_UVARIANCE:
3053 case CTSF_COLPCT_COUNT:
3054 case CTSF_COLPCT_TOTALN:
3055 case CTSF_COLPCT_VALIDN:
3056 case CTSF_UCOLPCT_COUNT:
3057 case CTSF_UCOLPCT_TOTALN:
3058 case CTSF_UCOLPCT_VALIDN:
3059 case CTSF_LAYERCOLPCT_COUNT:
3060 case CTSF_LAYERCOLPCT_TOTALN:
3061 case CTSF_LAYERCOLPCT_VALIDN:
3062 case CTSF_ULAYERCOLPCT_COUNT:
3063 case CTSF_ULAYERCOLPCT_TOTALN:
3064 case CTSF_ULAYERCOLPCT_VALIDN:
3065 case CTSF_LAYERPCT_COUNT:
3066 case CTSF_LAYERPCT_TOTALN:
3067 case CTSF_LAYERPCT_VALIDN:
3068 case CTSF_ULAYERPCT_COUNT:
3069 case CTSF_ULAYERPCT_TOTALN:
3070 case CTSF_ULAYERPCT_VALIDN:
3071 case CTSF_LAYERROWPCT_COUNT:
3072 case CTSF_LAYERROWPCT_TOTALN:
3073 case CTSF_LAYERROWPCT_VALIDN:
3074 case CTSF_ULAYERROWPCT_COUNT:
3075 case CTSF_ULAYERROWPCT_TOTALN:
3076 case CTSF_ULAYERROWPCT_VALIDN:
3077 case CTSF_ROWPCT_COUNT:
3078 case CTSF_ROWPCT_TOTALN:
3079 case CTSF_ROWPCT_VALIDN:
3080 case CTSF_UROWPCT_COUNT:
3081 case CTSF_UROWPCT_TOTALN:
3082 case CTSF_UROWPCT_VALIDN:
3083 case CTSF_SUBTABLEPCT_COUNT:
3084 case CTSF_SUBTABLEPCT_TOTALN:
3085 case CTSF_SUBTABLEPCT_VALIDN:
3086 case CTSF_USUBTABLEPCT_COUNT:
3087 case CTSF_USUBTABLEPCT_TOTALN:
3088 case CTSF_USUBTABLEPCT_VALIDN:
3089 case CTSF_TABLEPCT_COUNT:
3090 case CTSF_TABLEPCT_TOTALN:
3091 case CTSF_TABLEPCT_VALIDN:
3092 case CTSF_UTABLEPCT_COUNT:
3093 case CTSF_UTABLEPCT_TOTALN:
3094 case CTSF_UTABLEPCT_VALIDN:
3097 case CTSF_COLPCT_SUM:
3098 case CTSF_UCOLPCT_SUM:
3099 case CTSF_LAYERCOLPCT_SUM:
3100 case CTSF_ULAYERCOLPCT_SUM:
3101 case CTSF_LAYERPCT_SUM:
3102 case CTSF_ULAYERPCT_SUM:
3103 case CTSF_LAYERROWPCT_SUM:
3104 case CTSF_ULAYERROWPCT_SUM:
3105 case CTSF_ROWPCT_SUM:
3106 case CTSF_UROWPCT_SUM:
3107 case CTSF_SUBTABLEPCT_SUM:
3108 case CTSF_USUBTABLEPCT_SUM:
3109 case CTSF_TABLEPCT_SUM:
3110 case CTSF_UTABLEPCT_SUM:
3118 ctables_summary_value (const struct ctables_cell *cell,
3119 union ctables_summary *s,
3120 const struct ctables_summary_spec *ss)
3122 switch (ss->function)
3129 case CTSF_ROWPCT_COUNT:
3130 case CTSF_COLPCT_COUNT:
3131 case CTSF_TABLEPCT_COUNT:
3132 case CTSF_SUBTABLEPCT_COUNT:
3133 case CTSF_LAYERPCT_COUNT:
3134 case CTSF_LAYERROWPCT_COUNT:
3135 case CTSF_LAYERCOLPCT_COUNT:
3137 enum ctables_domain_type d = ctables_function_domain (ss->function);
3138 return (cell->domains[d]->e_count
3139 ? s->count / cell->domains[d]->e_count * 100
3143 case CTSF_UROWPCT_COUNT:
3144 case CTSF_UCOLPCT_COUNT:
3145 case CTSF_UTABLEPCT_COUNT:
3146 case CTSF_USUBTABLEPCT_COUNT:
3147 case CTSF_ULAYERPCT_COUNT:
3148 case CTSF_ULAYERROWPCT_COUNT:
3149 case CTSF_ULAYERCOLPCT_COUNT:
3151 enum ctables_domain_type d = ctables_function_domain (ss->function);
3152 return (cell->domains[d]->u_count
3153 ? s->count / cell->domains[d]->u_count * 100
3157 case CTSF_ROWPCT_VALIDN:
3158 case CTSF_COLPCT_VALIDN:
3159 case CTSF_TABLEPCT_VALIDN:
3160 case CTSF_SUBTABLEPCT_VALIDN:
3161 case CTSF_LAYERPCT_VALIDN:
3162 case CTSF_LAYERROWPCT_VALIDN:
3163 case CTSF_LAYERCOLPCT_VALIDN:
3165 enum ctables_domain_type d = ctables_function_domain (ss->function);
3166 return (cell->domains[d]->e_valid
3167 ? s->count / cell->domains[d]->e_valid * 100
3171 case CTSF_UROWPCT_VALIDN:
3172 case CTSF_UCOLPCT_VALIDN:
3173 case CTSF_UTABLEPCT_VALIDN:
3174 case CTSF_USUBTABLEPCT_VALIDN:
3175 case CTSF_ULAYERPCT_VALIDN:
3176 case CTSF_ULAYERROWPCT_VALIDN:
3177 case CTSF_ULAYERCOLPCT_VALIDN:
3179 enum ctables_domain_type d = ctables_function_domain (ss->function);
3180 return (cell->domains[d]->u_valid
3181 ? s->count / cell->domains[d]->u_valid * 100
3185 case CTSF_ROWPCT_TOTALN:
3186 case CTSF_COLPCT_TOTALN:
3187 case CTSF_TABLEPCT_TOTALN:
3188 case CTSF_SUBTABLEPCT_TOTALN:
3189 case CTSF_LAYERPCT_TOTALN:
3190 case CTSF_LAYERROWPCT_TOTALN:
3191 case CTSF_LAYERCOLPCT_TOTALN:
3193 enum ctables_domain_type d = ctables_function_domain (ss->function);
3194 return (cell->domains[d]->e_total
3195 ? s->count / cell->domains[d]->e_total * 100
3199 case CTSF_UROWPCT_TOTALN:
3200 case CTSF_UCOLPCT_TOTALN:
3201 case CTSF_UTABLEPCT_TOTALN:
3202 case CTSF_USUBTABLEPCT_TOTALN:
3203 case CTSF_ULAYERPCT_TOTALN:
3204 case CTSF_ULAYERROWPCT_TOTALN:
3205 case CTSF_ULAYERCOLPCT_TOTALN:
3207 enum ctables_domain_type d = ctables_function_domain (ss->function);
3208 return (cell->domains[d]->u_total
3209 ? s->count / cell->domains[d]->u_total * 100
3230 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3236 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3243 double weight, variance;
3244 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3245 return calc_semean (variance, weight);
3252 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3253 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3259 double weight, mean;
3260 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3261 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3265 case CTSF_UVARIANCE:
3268 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3272 case CTSF_ROWPCT_SUM:
3273 case CTSF_COLPCT_SUM:
3274 case CTSF_TABLEPCT_SUM:
3275 case CTSF_SUBTABLEPCT_SUM:
3276 case CTSF_LAYERPCT_SUM:
3277 case CTSF_LAYERROWPCT_SUM:
3278 case CTSF_LAYERCOLPCT_SUM:
3280 double weight, mean;
3281 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3282 if (weight == SYSMIS || mean == SYSMIS)
3284 enum ctables_domain_type d = ctables_function_domain (ss->function);
3285 double num = weight * mean;
3286 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3287 return denom != 0 ? num / denom * 100 : SYSMIS;
3289 case CTSF_UROWPCT_SUM:
3290 case CTSF_UCOLPCT_SUM:
3291 case CTSF_UTABLEPCT_SUM:
3292 case CTSF_USUBTABLEPCT_SUM:
3293 case CTSF_ULAYERPCT_SUM:
3294 case CTSF_ULAYERROWPCT_SUM:
3295 case CTSF_ULAYERCOLPCT_SUM:
3297 double weight, mean;
3298 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3299 if (weight == SYSMIS || mean == SYSMIS)
3301 enum ctables_domain_type d = ctables_function_domain (ss->function);
3302 double num = weight * mean;
3303 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3304 return denom != 0 ? num / denom * 100 : SYSMIS;
3313 struct casereader *reader = casewriter_make_reader (s->writer);
3316 struct percentile *ptile = percentile_create (
3317 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3318 struct order_stats *os = &ptile->parent;
3319 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3320 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3321 statistic_destroy (&ptile->parent.parent);
3329 struct casereader *reader = casewriter_make_reader (s->writer);
3332 struct mode *mode = mode_create ();
3333 struct order_stats *os = &mode->parent;
3334 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3335 s->ovalue = mode->mode;
3336 statistic_destroy (&mode->parent.parent);
3344 struct ctables_cell_sort_aux
3346 const struct ctables_nest *nest;
3347 enum pivot_axis_type a;
3351 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3353 const struct ctables_cell_sort_aux *aux = aux_;
3354 struct ctables_cell *const *ap = a_;
3355 struct ctables_cell *const *bp = b_;
3356 const struct ctables_cell *a = *ap;
3357 const struct ctables_cell *b = *bp;
3359 const struct ctables_nest *nest = aux->nest;
3360 for (size_t i = 0; i < nest->n; i++)
3361 if (i != nest->scale_idx)
3363 const struct variable *var = nest->vars[i];
3364 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3365 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3366 if (a_cv->category != b_cv->category)
3367 return a_cv->category > b_cv->category ? 1 : -1;
3369 const union value *a_val = &a_cv->value;
3370 const union value *b_val = &b_cv->value;
3371 switch (a_cv->category->type)
3377 case CCT_POSTCOMPUTE:
3378 case CCT_EXCLUDED_MISSING:
3379 /* Must be equal. */
3387 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3395 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3397 return a_cv->category->sort_ascending ? cmp : -cmp;
3403 const char *a_label = var_lookup_value_label (var, a_val);
3404 const char *b_label = var_lookup_value_label (var, b_val);
3406 ? (b_label ? strcmp (a_label, b_label) : 1)
3407 : (b_label ? -1 : value_compare_3way (
3408 a_val, b_val, var_get_width (var))));
3410 return a_cv->category->sort_ascending ? cmp : -cmp;
3424 For each ctables_table:
3425 For each combination of row vars:
3426 For each combination of column vars:
3427 For each combination of layer vars:
3429 Make a table of row values:
3430 Sort entries by row values
3431 Assign a 0-based index to each actual value
3432 Construct a dimension
3433 Make a table of column values
3434 Make a table of layer values
3436 Fill the table entry using the indexes from before.
3439 static struct ctables_domain *
3440 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3441 enum ctables_domain_type domain)
3444 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3446 const struct ctables_nest *nest = s->nests[a];
3447 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3449 size_t v_idx = nest->domains[domain][i];
3450 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3451 hash = hash_pointer (cv->category, hash);
3452 if (cv->category->type != CCT_TOTAL
3453 && cv->category->type != CCT_SUBTOTAL
3454 && cv->category->type != CCT_POSTCOMPUTE)
3455 hash = value_hash (&cv->value,
3456 var_get_width (nest->vars[v_idx]), hash);
3460 struct ctables_domain *d;
3461 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3463 const struct ctables_cell *df = d->example;
3464 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3466 const struct ctables_nest *nest = s->nests[a];
3467 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3469 size_t v_idx = nest->domains[domain][i];
3470 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3471 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3472 if (cv1->category != cv2->category
3473 || (cv1->category->type != CCT_TOTAL
3474 && cv1->category->type != CCT_SUBTOTAL
3475 && cv1->category->type != CCT_POSTCOMPUTE
3476 && !value_equal (&cv1->value, &cv2->value,
3477 var_get_width (nest->vars[v_idx]))))
3486 struct ctables_sum *sums = (s->table->n_sum_vars
3487 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3490 d = xmalloc (sizeof *d);
3491 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3492 hmap_insert (&s->domains[domain], &d->node, hash);
3496 static struct substring
3497 rtrim_value (const union value *v, const struct variable *var)
3499 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3500 var_get_width (var));
3501 ss_rtrim (&s, ss_cstr (" "));
3506 in_string_range (const union value *v, const struct variable *var,
3507 const struct substring *srange)
3509 struct substring s = rtrim_value (v, var);
3510 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3511 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3514 static const struct ctables_category *
3515 ctables_categories_match (const struct ctables_categories *c,
3516 const union value *v, const struct variable *var)
3518 if (var_is_numeric (var) && v->f == SYSMIS)
3521 const struct ctables_category *othernm = NULL;
3522 for (size_t i = c->n_cats; i-- > 0; )
3524 const struct ctables_category *cat = &c->cats[i];
3528 if (cat->number == v->f)
3533 if (ss_equals (cat->string, rtrim_value (v, var)))
3538 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3539 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3544 if (in_string_range (v, var, cat->srange))
3549 if (var_is_value_missing (var, v))
3553 case CCT_POSTCOMPUTE:
3568 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3571 case CCT_EXCLUDED_MISSING:
3576 return var_is_value_missing (var, v) ? NULL : othernm;
3579 static const struct ctables_category *
3580 ctables_categories_total (const struct ctables_categories *c)
3582 const struct ctables_category *first = &c->cats[0];
3583 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3584 return (first->type == CCT_TOTAL ? first
3585 : last->type == CCT_TOTAL ? last
3589 static struct ctables_cell *
3590 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3591 const struct ctables_category *cats[PIVOT_N_AXES][10])
3594 enum ctables_summary_variant sv = CSV_CELL;
3595 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3597 const struct ctables_nest *nest = s->nests[a];
3598 for (size_t i = 0; i < nest->n; i++)
3599 if (i != nest->scale_idx)
3601 hash = hash_pointer (cats[a][i], hash);
3602 if (cats[a][i]->type != CCT_TOTAL
3603 && cats[a][i]->type != CCT_SUBTOTAL
3604 && cats[a][i]->type != CCT_POSTCOMPUTE)
3605 hash = value_hash (case_data (c, nest->vars[i]),
3606 var_get_width (nest->vars[i]), hash);
3612 struct ctables_cell *cell;
3613 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3615 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3617 const struct ctables_nest *nest = s->nests[a];
3618 for (size_t i = 0; i < nest->n; i++)
3619 if (i != nest->scale_idx
3620 && (cats[a][i] != cell->axes[a].cvs[i].category
3621 || (cats[a][i]->type != CCT_TOTAL
3622 && cats[a][i]->type != CCT_SUBTOTAL
3623 && cats[a][i]->type != CCT_POSTCOMPUTE
3624 && !value_equal (case_data (c, nest->vars[i]),
3625 &cell->axes[a].cvs[i].value,
3626 var_get_width (nest->vars[i])))))
3635 cell = xmalloc (sizeof *cell);
3638 cell->omit_domains = 0;
3639 cell->postcompute = false;
3640 //struct string name = DS_EMPTY_INITIALIZER;
3641 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3643 const struct ctables_nest *nest = s->nests[a];
3644 cell->axes[a].cvs = (nest->n
3645 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3647 for (size_t i = 0; i < nest->n; i++)
3649 const struct ctables_category *cat = cats[a][i];
3650 const struct variable *var = nest->vars[i];
3651 const union value *value = case_data (c, var);
3652 if (i != nest->scale_idx)
3654 const struct ctables_category *subtotal = cat->subtotal;
3655 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3658 if (cat->type == CCT_TOTAL
3659 || cat->type == CCT_SUBTOTAL
3660 || cat->type == CCT_POSTCOMPUTE)
3662 /* XXX these should be more encompassing I think.*/
3666 case PIVOT_AXIS_COLUMN:
3667 cell->omit_domains |= ((1u << CTDT_TABLE) |
3668 (1u << CTDT_LAYER) |
3669 (1u << CTDT_LAYERCOL) |
3670 (1u << CTDT_SUBTABLE) |
3673 case PIVOT_AXIS_ROW:
3674 cell->omit_domains |= ((1u << CTDT_TABLE) |
3675 (1u << CTDT_LAYER) |
3676 (1u << CTDT_LAYERROW) |
3677 (1u << CTDT_SUBTABLE) |
3680 case PIVOT_AXIS_LAYER:
3681 cell->omit_domains |= ((1u << CTDT_TABLE) |
3682 (1u << CTDT_LAYER));
3686 if (cat->type == CCT_POSTCOMPUTE)
3687 cell->postcompute = true;
3690 cell->axes[a].cvs[i].category = cat;
3691 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3694 if (i != nest->scale_idx)
3696 if (!ds_is_empty (&name))
3697 ds_put_cstr (&name, ", ");
3698 char *value_s = data_out (value, var_get_encoding (var),
3699 var_get_print_format (var),
3700 settings_get_fmt_settings ());
3701 if (cat->type == CCT_TOTAL
3702 || cat->type == CCT_SUBTOTAL
3703 || cat->type == CCT_POSTCOMPUTE)
3704 ds_put_format (&name, "%s=total", var_get_name (var));
3706 ds_put_format (&name, "%s=%s", var_get_name (var),
3707 value_s + strspn (value_s, " "));
3713 //cell->name = ds_steal_cstr (&name);
3715 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3716 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3717 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3718 for (size_t i = 0; i < specs->n; i++)
3719 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3720 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3721 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3722 hmap_insert (&s->cells, &cell->node, hash);
3727 is_scale_missing (const struct ctables_summary_spec_set *specs,
3728 const struct ccase *c)
3730 if (!specs->is_scale)
3733 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3736 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3738 const struct variable *var = specs->listwise_vars[i];
3739 if (var_is_num_missing (var, case_num (c, var)))
3747 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3748 const struct ctables_category *cats[PIVOT_N_AXES][10],
3749 bool is_missing, bool excluded_missing,
3750 double d_weight, double e_weight)
3752 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3753 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3755 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3757 bool scale_missing = is_scale_missing (specs, c);
3758 for (size_t i = 0; i < specs->n; i++)
3759 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3760 specs->var, case_data (c, specs->var), specs->is_scale,
3761 scale_missing, is_missing, excluded_missing,
3762 d_weight, e_weight);
3763 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3764 if (!(cell->omit_domains && (1u << dt)))
3766 struct ctables_domain *d = cell->domains[dt];
3767 d->d_total += d_weight;
3768 d->e_total += e_weight;
3770 if (!excluded_missing)
3772 d->d_count += d_weight;
3773 d->e_count += e_weight;
3778 d->d_valid += d_weight;
3779 d->e_valid += e_weight;
3782 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3784 /* XXX listwise_missing??? */
3785 const struct variable *var = s->table->sum_vars[i];
3786 double addend = case_num (c, var);
3787 if (!var_is_num_missing (var, addend))
3789 struct ctables_sum *sum = &d->sums[i];
3790 sum->e_sum += addend * e_weight;
3791 sum->u_sum += addend;
3799 recurse_totals (struct ctables_section *s, const struct ccase *c,
3800 const struct ctables_category *cats[PIVOT_N_AXES][10],
3801 bool is_missing, bool excluded_missing,
3802 double d_weight, double e_weight,
3803 enum pivot_axis_type start_axis, size_t start_nest)
3805 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3807 const struct ctables_nest *nest = s->nests[a];
3808 for (size_t i = start_nest; i < nest->n; i++)
3810 if (i == nest->scale_idx)
3813 const struct variable *var = nest->vars[i];
3815 const struct ctables_category *total = ctables_categories_total (
3816 s->table->categories[var_get_dict_index (var)]);
3819 const struct ctables_category *save = cats[a][i];
3821 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3822 d_weight, e_weight);
3823 recurse_totals (s, c, cats, is_missing, excluded_missing,
3824 d_weight, e_weight, a, i + 1);
3833 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3834 const struct ctables_category *cats[PIVOT_N_AXES][10],
3835 bool is_missing, bool excluded_missing,
3836 double d_weight, double e_weight,
3837 enum pivot_axis_type start_axis, size_t start_nest)
3839 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3841 const struct ctables_nest *nest = s->nests[a];
3842 for (size_t i = start_nest; i < nest->n; i++)
3844 if (i == nest->scale_idx)
3847 const struct ctables_category *save = cats[a][i];
3850 cats[a][i] = save->subtotal;
3851 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3852 d_weight, e_weight);
3853 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3854 d_weight, e_weight, a, i + 1);
3863 ctables_add_occurrence (const struct variable *var,
3864 const union value *value,
3865 struct hmap *occurrences)
3867 int width = var_get_width (var);
3868 unsigned int hash = value_hash (value, width, 0);
3870 struct ctables_occurrence *o;
3871 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3873 if (value_equal (value, &o->value, width))
3876 o = xmalloc (sizeof *o);
3877 value_clone (&o->value, value, width);
3878 hmap_insert (occurrences, &o->node, hash);
3882 ctables_cell_insert (struct ctables_section *s,
3883 const struct ccase *c,
3884 double d_weight, double e_weight)
3886 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3888 /* Does at least one categorical variable have a missing value in an included
3889 or excluded category? */
3890 bool is_missing = false;
3892 /* Does at least one categorical variable have a missing value in an excluded
3894 bool excluded_missing = false;
3896 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3898 const struct ctables_nest *nest = s->nests[a];
3899 for (size_t i = 0; i < nest->n; i++)
3901 if (i == nest->scale_idx)
3904 const struct variable *var = nest->vars[i];
3905 const union value *value = case_data (c, var);
3907 bool var_missing = var_is_value_missing (var, value) != 0;
3911 cats[a][i] = ctables_categories_match (
3912 s->table->categories[var_get_dict_index (var)], value, var);
3918 static const struct ctables_category cct_excluded_missing = {
3919 .type = CCT_EXCLUDED_MISSING,
3922 cats[a][i] = &cct_excluded_missing;
3923 excluded_missing = true;
3928 if (!excluded_missing)
3929 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3931 const struct ctables_nest *nest = s->nests[a];
3932 for (size_t i = 0; i < nest->n; i++)
3933 if (i != nest->scale_idx)
3935 const struct variable *var = nest->vars[i];
3936 const union value *value = case_data (c, var);
3937 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3941 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3942 d_weight, e_weight);
3944 //if (!excluded_missing)
3946 recurse_totals (s, c, cats, is_missing, excluded_missing,
3947 d_weight, e_weight, 0, 0);
3948 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3949 d_weight, e_weight, 0, 0);
3955 const struct ctables_summary_spec_set *set;
3960 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3962 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3963 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3964 if (as->function != bs->function)
3965 return as->function > bs->function ? 1 : -1;
3966 else if (as->percentile != bs->percentile)
3967 return as->percentile < bs->percentile ? 1 : -1;
3969 const char *as_label = as->label ? as->label : "";
3970 const char *bs_label = bs->label ? bs->label : "";
3971 return strcmp (as_label, bs_label);
3974 static struct pivot_value *
3975 ctables_category_create_label__ (const struct ctables_category *cat,
3976 const struct variable *var,
3977 const union value *value)
3979 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3980 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3981 : pivot_value_new_var_value (var, value));
3984 static struct pivot_value *
3985 ctables_postcompute_label (const struct ctables_categories *cats,
3986 const struct ctables_category *cat,
3987 const struct variable *var,
3988 const union value *value)
3990 struct substring in = ss_cstr (cat->pc->label);
3991 struct substring target = ss_cstr (")LABEL[");
3993 struct string out = DS_EMPTY_INITIALIZER;
3996 size_t chunk = ss_find_substring (in, target);
3997 if (chunk == SIZE_MAX)
3999 if (ds_is_empty (&out))
4000 return pivot_value_new_user_text (in.string, in.length);
4003 ds_put_substring (&out, in);
4004 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
4008 ds_put_substring (&out, ss_head (in, chunk));
4009 ss_advance (&in, chunk + target.length);
4011 struct substring idx_s;
4012 if (!ss_get_until (&in, ']', &idx_s))
4015 long int idx = strtol (idx_s.string, &tail, 10);
4016 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
4019 struct ctables_category *cat2 = &cats->cats[idx - 1];
4020 struct pivot_value *label2
4021 = ctables_category_create_label__ (cat2, var, value);
4022 char *label2_s = pivot_value_to_string_defaults (label2);
4023 ds_put_cstr (&out, label2_s);
4025 pivot_value_destroy (label2);
4030 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
4033 static struct pivot_value *
4034 ctables_category_create_label (const struct ctables_categories *cats,
4035 const struct ctables_category *cat,
4036 const struct variable *var,
4037 const union value *value)
4039 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4040 ? ctables_postcompute_label (cats, cat, var, value)
4041 : ctables_category_create_label__ (cat, var, value));
4044 static struct ctables_value *
4045 ctables_value_find__ (struct ctables_table *t, const union value *value,
4046 int width, unsigned int hash)
4048 struct ctables_value *clv;
4049 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4050 hash, &t->clabels_values_map)
4051 if (value_equal (value, &clv->value, width))
4057 ctables_value_insert (struct ctables_table *t, const union value *value,
4060 unsigned int hash = value_hash (value, width, 0);
4061 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4064 clv = xmalloc (sizeof *clv);
4065 value_clone (&clv->value, value, width);
4066 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4070 static struct ctables_value *
4071 ctables_value_find (struct ctables_table *t,
4072 const union value *value, int width)
4074 return ctables_value_find__ (t, value, width,
4075 value_hash (value, width, 0));
4079 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4080 size_t ix[PIVOT_N_AXES])
4082 if (a < PIVOT_N_AXES)
4084 size_t limit = MAX (t->stacks[a].n, 1);
4085 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4086 ctables_table_add_section (t, a + 1, ix);
4090 struct ctables_section *s = &t->sections[t->n_sections++];
4091 *s = (struct ctables_section) {
4093 .cells = HMAP_INITIALIZER (s->cells),
4095 for (a = 0; a < PIVOT_N_AXES; a++)
4098 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4100 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4101 for (size_t i = 0; i < nest->n; i++)
4102 hmap_init (&s->occurrences[a][i]);
4104 for (size_t i = 0; i < N_CTDTS; i++)
4105 hmap_init (&s->domains[i]);
4110 ctpo_add (double a, double b)
4116 ctpo_sub (double a, double b)
4122 ctpo_mul (double a, double b)
4128 ctpo_div (double a, double b)
4130 return b ? a / b : SYSMIS;
4134 ctpo_pow (double a, double b)
4136 int save_errno = errno;
4138 double result = pow (a, b);
4146 ctpo_neg (double a, double b UNUSED)
4151 struct ctables_pcexpr_evaluate_ctx
4153 const struct ctables_cell *cell;
4154 const struct ctables_section *section;
4155 const struct ctables_categories *cats;
4156 enum pivot_axis_type pc_a;
4159 enum fmt_type parse_format;
4162 static double ctables_pcexpr_evaluate (
4163 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4166 ctables_pcexpr_evaluate_nonterminal (
4167 const struct ctables_pcexpr_evaluate_ctx *ctx,
4168 const struct ctables_pcexpr *e, size_t n_args,
4169 double evaluate (double, double))
4171 double args[2] = { 0, 0 };
4172 for (size_t i = 0; i < n_args; i++)
4174 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4175 if (!isfinite (args[i]) || args[i] == SYSMIS)
4178 return evaluate (args[0], args[1]);
4182 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4183 const struct ctables_cell_value *pc_cv)
4185 const struct ctables_section *s = ctx->section;
4188 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4190 const struct ctables_nest *nest = s->nests[a];
4191 for (size_t i = 0; i < nest->n; i++)
4192 if (i != nest->scale_idx)
4194 const struct ctables_cell_value *cv
4195 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4196 : &ctx->cell->axes[a].cvs[i]);
4197 hash = hash_pointer (cv->category, hash);
4198 if (cv->category->type != CCT_TOTAL
4199 && cv->category->type != CCT_SUBTOTAL
4200 && cv->category->type != CCT_POSTCOMPUTE)
4201 hash = value_hash (&cv->value,
4202 var_get_width (nest->vars[i]), hash);
4206 struct ctables_cell *tc;
4207 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4209 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4211 const struct ctables_nest *nest = s->nests[a];
4212 for (size_t i = 0; i < nest->n; i++)
4213 if (i != nest->scale_idx)
4215 const struct ctables_cell_value *p_cv
4216 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4217 : &ctx->cell->axes[a].cvs[i]);
4218 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4219 if (p_cv->category != t_cv->category
4220 || (p_cv->category->type != CCT_TOTAL
4221 && p_cv->category->type != CCT_SUBTOTAL
4222 && p_cv->category->type != CCT_POSTCOMPUTE
4223 && !value_equal (&p_cv->value,
4225 var_get_width (nest->vars[i]))))
4237 const struct ctables_table *t = s->table;
4238 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4239 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4240 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4241 &specs->specs[ctx->summary_idx]);
4245 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4246 const struct ctables_pcexpr *e)
4253 case CTPO_CAT_NRANGE:
4254 case CTPO_CAT_SRANGE:
4256 struct ctables_cell_value cv = {
4257 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4259 assert (cv.category != NULL);
4261 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4262 const struct ctables_occurrence *o;
4265 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4266 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4267 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4269 cv.value = o->value;
4270 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4275 case CTPO_CAT_NUMBER:
4276 case CTPO_CAT_MISSING:
4277 case CTPO_CAT_OTHERNM:
4278 case CTPO_CAT_SUBTOTAL:
4279 case CTPO_CAT_TOTAL:
4281 struct ctables_cell_value cv = {
4282 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4283 .value = { .f = e->number },
4285 assert (cv.category != NULL);
4286 return ctables_pcexpr_evaluate_category (ctx, &cv);
4289 case CTPO_CAT_STRING:
4291 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4293 if (width > e->string.length)
4295 s = xmalloc (width);
4296 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4298 struct ctables_cell_value cv = {
4299 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4300 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4302 assert (cv.category != NULL);
4303 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4309 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4312 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4315 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4318 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4321 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4324 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4330 static const struct ctables_category *
4331 ctables_cell_postcompute (const struct ctables_section *s,
4332 const struct ctables_cell *cell,
4333 enum pivot_axis_type *pc_a_p,
4336 assert (cell->postcompute);
4337 const struct ctables_category *pc_cat = NULL;
4338 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4339 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4341 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4342 if (cv->category->type == CCT_POSTCOMPUTE)
4346 /* Multiple postcomputes cross each other. The value is
4351 pc_cat = cv->category;
4355 *pc_a_idx_p = pc_a_idx;
4359 assert (pc_cat != NULL);
4364 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4365 const struct ctables_cell *cell,
4366 const struct ctables_summary_spec *ss,
4367 struct fmt_spec *format,
4368 bool *is_ctables_format,
4371 enum pivot_axis_type pc_a = 0;
4372 size_t pc_a_idx = 0;
4373 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4374 s, cell, &pc_a, &pc_a_idx);
4378 const struct ctables_postcompute *pc = pc_cat->pc;
4381 for (size_t i = 0; i < pc->specs->n; i++)
4383 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4384 if (ss->function == ss2->function
4385 && ss->percentile == ss2->percentile)
4387 *format = ss2->format;
4388 *is_ctables_format = ss2->is_ctables_format;
4394 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4395 const struct ctables_categories *cats = s->table->categories[
4396 var_get_dict_index (var)];
4397 struct ctables_pcexpr_evaluate_ctx ctx = {
4402 .pc_a_idx = pc_a_idx,
4403 .summary_idx = summary_idx,
4404 .parse_format = pc_cat->parse_format,
4406 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4410 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4412 struct pivot_table *pt = pivot_table_create__ (
4414 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4415 : pivot_value_new_text (N_("Custom Tables"))),
4418 pivot_table_set_caption (
4419 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4421 pivot_table_set_corner_text (
4422 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4424 bool summary_dimension = (t->summary_axis != t->slabels_axis
4425 || (!t->slabels_visible
4426 && t->summary_specs.n > 1));
4427 if (summary_dimension)
4429 struct pivot_dimension *d = pivot_dimension_create (
4430 pt, t->slabels_axis, N_("Statistics"));
4431 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4432 if (!t->slabels_visible)
4433 d->hide_all_labels = true;
4434 for (size_t i = 0; i < specs->n; i++)
4435 pivot_category_create_leaf (
4436 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4439 bool categories_dimension = t->clabels_example != NULL;
4440 if (categories_dimension)
4442 struct pivot_dimension *d = pivot_dimension_create (
4443 pt, t->label_axis[t->clabels_from_axis],
4444 t->clabels_from_axis == PIVOT_AXIS_ROW
4445 ? N_("Row Categories")
4446 : N_("Column Categories"));
4447 const struct variable *var = t->clabels_example;
4448 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4449 for (size_t i = 0; i < t->n_clabels_values; i++)
4451 const struct ctables_value *value = t->clabels_values[i];
4452 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4453 assert (cat != NULL);
4454 pivot_category_create_leaf (d->root, ctables_category_create_label (
4455 c, cat, t->clabels_example,
4460 pivot_table_set_look (pt, ct->look);
4461 struct pivot_dimension *d[PIVOT_N_AXES];
4462 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4464 static const char *names[] = {
4465 [PIVOT_AXIS_ROW] = N_("Rows"),
4466 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4467 [PIVOT_AXIS_LAYER] = N_("Layers"),
4469 d[a] = (t->axes[a] || a == t->summary_axis
4470 ? pivot_dimension_create (pt, a, names[a])
4475 assert (t->axes[a]);
4477 for (size_t i = 0; i < t->stacks[a].n; i++)
4479 struct ctables_nest *nest = &t->stacks[a].nests[i];
4480 struct ctables_section **sections = xnmalloc (t->n_sections,
4482 size_t n_sections = 0;
4484 size_t n_total_cells = 0;
4485 size_t max_depth = 0;
4486 for (size_t j = 0; j < t->n_sections; j++)
4487 if (t->sections[j].nests[a] == nest)
4489 struct ctables_section *s = &t->sections[j];
4490 sections[n_sections++] = s;
4491 n_total_cells += s->cells.count;
4493 size_t depth = s->nests[a]->n;
4494 max_depth = MAX (depth, max_depth);
4497 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4499 size_t n_sorted = 0;
4501 for (size_t j = 0; j < n_sections; j++)
4503 struct ctables_section *s = sections[j];
4505 struct ctables_cell *cell;
4506 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4508 sorted[n_sorted++] = cell;
4509 assert (n_sorted <= n_total_cells);
4512 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4513 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4516 for (size_t j = 0; j < n_sorted; j++)
4518 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4523 struct ctables_level
4525 enum ctables_level_type
4527 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4528 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4529 CTL_SUMMARY, /* Summary functions. */
4533 enum settings_value_show vlabel; /* CTL_VAR only. */
4536 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4537 size_t n_levels = 0;
4538 for (size_t k = 0; k < nest->n; k++)
4540 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4541 if (vlabel != CTVL_NONE)
4543 levels[n_levels++] = (struct ctables_level) {
4545 .vlabel = (enum settings_value_show) vlabel,
4550 if (nest->scale_idx != k
4551 && (k != nest->n - 1 || t->label_axis[a] == a))
4553 levels[n_levels++] = (struct ctables_level) {
4554 .type = CTL_CATEGORY,
4560 if (!summary_dimension && a == t->slabels_axis)
4562 levels[n_levels++] = (struct ctables_level) {
4563 .type = CTL_SUMMARY,
4564 .var_idx = SIZE_MAX,
4568 /* Pivot categories:
4570 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4571 - category for nest->vars[0], if nest->scale_idx != 0
4572 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4573 - category for nest->vars[1], if nest->scale_idx != 1
4575 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4576 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4577 - summary function, if 'a == t->slabels_axis && a ==
4580 Additional dimensions:
4582 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4584 - If 't->label_axis[b] == a' for some 'b != a', add a category
4589 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4591 for (size_t j = 0; j < n_sorted; j++)
4593 struct ctables_cell *cell = sorted[j];
4594 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4596 size_t n_common = 0;
4599 for (; n_common < n_levels; n_common++)
4601 const struct ctables_level *level = &levels[n_common];
4602 if (level->type == CTL_CATEGORY)
4604 size_t var_idx = level->var_idx;
4605 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4606 if (prev->axes[a].cvs[var_idx].category != c)
4608 else if (c->type != CCT_SUBTOTAL
4609 && c->type != CCT_TOTAL
4610 && c->type != CCT_POSTCOMPUTE
4611 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4612 &cell->axes[a].cvs[var_idx].value,
4613 var_get_type (nest->vars[var_idx])))
4619 for (size_t k = n_common; k < n_levels; k++)
4621 const struct ctables_level *level = &levels[k];
4622 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4623 if (level->type == CTL_SUMMARY)
4625 assert (k == n_levels - 1);
4627 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4628 for (size_t m = 0; m < specs->n; m++)
4630 int leaf = pivot_category_create_leaf (
4631 parent, ctables_summary_label (&specs->specs[m],
4639 const struct variable *var = nest->vars[level->var_idx];
4640 struct pivot_value *label;
4641 if (level->type == CTL_VAR)
4643 label = pivot_value_new_variable (var);
4644 label->variable.show = level->vlabel;
4646 else if (level->type == CTL_CATEGORY)
4648 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4649 label = ctables_category_create_label (
4650 t->categories[var_get_dict_index (var)],
4651 cv->category, var, &cv->value);
4656 if (k == n_levels - 1)
4657 prev_leaf = pivot_category_create_leaf (parent, label);
4659 groups[k] = pivot_category_create_group__ (parent, label);
4663 cell->axes[a].leaf = prev_leaf;
4672 for (size_t i = 0; i < t->n_sections; i++)
4674 struct ctables_section *s = &t->sections[i];
4676 struct ctables_cell *cell;
4677 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4682 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4683 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4684 for (size_t j = 0; j < specs->n; j++)
4687 size_t n_dindexes = 0;
4689 if (summary_dimension)
4690 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4692 if (categories_dimension)
4694 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4695 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4696 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4697 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4700 dindexes[n_dindexes++] = ctv->leaf;
4703 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4706 int leaf = cell->axes[a].leaf;
4707 if (a == t->summary_axis && !summary_dimension)
4709 dindexes[n_dindexes++] = leaf;
4712 const struct ctables_summary_spec *ss = &specs->specs[j];
4714 struct fmt_spec format = specs->specs[j].format;
4715 bool is_ctables_format = ss->is_ctables_format;
4716 double d = (cell->postcompute
4717 ? ctables_cell_calculate_postcompute (
4718 s, cell, ss, &format, &is_ctables_format, j)
4719 : ctables_summary_value (cell, &cell->summaries[j],
4722 struct pivot_value *value;
4723 if (ct->hide_threshold != 0
4724 && d < ct->hide_threshold
4725 && ctables_summary_function_is_count (ss->function))
4727 value = pivot_value_new_user_text_nocopy (
4728 xasprintf ("<%d", ct->hide_threshold));
4730 else if (d == 0 && ct->zero)
4731 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4732 else if (d == SYSMIS && ct->missing)
4733 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4734 else if (is_ctables_format)
4736 char *s = data_out_stretchy (&(union value) { .f = d },
4738 &ct->ctables_formats, NULL);
4739 value = pivot_value_new_user_text_nocopy (s);
4743 value = pivot_value_new_number (d);
4744 value->numeric.format = format;
4746 pivot_table_put (pt, dindexes, n_dindexes, value);
4751 pivot_table_submit (pt);
4755 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4757 enum pivot_axis_type label_pos = t->label_axis[a];
4761 t->clabels_from_axis = a;
4763 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4764 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4766 const struct ctables_stack *stack = &t->stacks[a];
4770 const struct ctables_nest *n0 = &stack->nests[0];
4773 assert (stack->n == 1);
4777 const struct variable *v0 = n0->vars[n0->n - 1];
4778 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4779 t->clabels_example = v0;
4781 for (size_t i = 0; i < c0->n_cats; i++)
4782 if (c0->cats[i].type == CCT_FUNCTION)
4784 msg (SE, _("%s=%s is not allowed with sorting based "
4785 "on a summary function."),
4786 subcommand_name, pos_name);
4789 if (n0->n - 1 == n0->scale_idx)
4791 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4792 "but %s is a scale variable."),
4793 subcommand_name, pos_name, var_get_name (v0));
4797 for (size_t i = 1; i < stack->n; i++)
4799 const struct ctables_nest *ni = &stack->nests[i];
4801 const struct variable *vi = ni->vars[ni->n - 1];
4802 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4804 if (ni->n - 1 == ni->scale_idx)
4806 msg (SE, _("%s=%s requires the variables to be moved to be "
4807 "categorical, but %s is a scale variable."),
4808 subcommand_name, pos_name, var_get_name (vi));
4811 if (var_get_width (v0) != var_get_width (vi))
4813 msg (SE, _("%s=%s requires the variables to be "
4814 "moved to have the same width, but %s has "
4815 "width %d and %s has width %d."),
4816 subcommand_name, pos_name,
4817 var_get_name (v0), var_get_width (v0),
4818 var_get_name (vi), var_get_width (vi));
4821 if (!val_labs_equal (var_get_value_labels (v0),
4822 var_get_value_labels (vi)))
4824 msg (SE, _("%s=%s requires the variables to be "
4825 "moved to have the same value labels, but %s "
4826 "and %s have different value labels."),
4827 subcommand_name, pos_name,
4828 var_get_name (v0), var_get_name (vi));
4831 if (!ctables_categories_equal (c0, ci))
4833 msg (SE, _("%s=%s requires the variables to be "
4834 "moved to have the same category "
4835 "specifications, but %s and %s have different "
4836 "category specifications."),
4837 subcommand_name, pos_name,
4838 var_get_name (v0), var_get_name (vi));
4847 add_sum_var (struct variable *var,
4848 struct variable ***sum_vars, size_t *n, size_t *allocated)
4850 for (size_t i = 0; i < *n; i++)
4851 if (var == (*sum_vars)[i])
4854 if (*n >= *allocated)
4855 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4856 (*sum_vars)[*n] = var;
4861 enumerate_sum_vars (const struct ctables_axis *a,
4862 struct variable ***sum_vars, size_t *n, size_t *allocated)
4870 for (size_t i = 0; i < N_CSVS; i++)
4871 for (size_t j = 0; j < a->specs[i].n; j++)
4873 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4874 if (ctables_function_is_pctsum (spec->function))
4875 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4881 for (size_t i = 0; i < 2; i++)
4882 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4888 ctables_prepare_table (struct ctables_table *t)
4890 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4893 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4895 for (size_t j = 0; j < t->stacks[a].n; j++)
4897 struct ctables_nest *nest = &t->stacks[a].nests[j];
4898 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4900 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4901 nest->n_domains[dt] = 0;
4903 for (size_t k = 0; k < nest->n; k++)
4905 if (k == nest->scale_idx)
4914 if (a != PIVOT_AXIS_LAYER)
4921 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4922 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4923 : a == PIVOT_AXIS_ROW)
4925 if (k == nest->n - 1
4926 || (nest->scale_idx == nest->n - 1
4927 && k == nest->n - 2))
4933 if (a == PIVOT_AXIS_COLUMN)
4938 if (a == PIVOT_AXIS_ROW)
4943 nest->domains[dt][nest->n_domains[dt]++] = k;
4950 struct ctables_nest *nest = xmalloc (sizeof *nest);
4951 *nest = (struct ctables_nest) { .n = 0 };
4952 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4954 /* There's no point in moving labels away from an axis that has no
4955 labels, so avoid dealing with the special cases around that. */
4956 t->label_axis[a] = a;
4959 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4960 for (size_t i = 0; i < stack->n; i++)
4962 struct ctables_nest *nest = &stack->nests[i];
4963 if (!nest->specs[CSV_CELL].n)
4965 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4966 specs->specs = xmalloc (sizeof *specs->specs);
4969 enum ctables_summary_function function
4970 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4972 *specs->specs = (struct ctables_summary_spec) {
4973 .function = function,
4974 .format = ctables_summary_default_format (function, specs->var),
4977 specs->var = nest->vars[0];
4979 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4980 &nest->specs[CSV_CELL]);
4982 else if (!nest->specs[CSV_TOTAL].n)
4983 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4984 &nest->specs[CSV_CELL]);
4986 if (t->ctables->smissing_listwise)
4988 struct variable **listwise_vars = NULL;
4990 size_t allocated = 0;
4992 for (size_t j = nest->group_head; j < stack->n; j++)
4994 const struct ctables_nest *other_nest = &stack->nests[j];
4995 if (other_nest->group_head != nest->group_head)
4998 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5001 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5002 sizeof *listwise_vars);
5003 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5006 for (size_t j = 0; j < N_CSVS; j++)
5008 nest->specs[j].listwise_vars = listwise_vars;
5009 nest->specs[j].n_listwise_vars = n;
5014 struct ctables_summary_spec_set *merged = &t->summary_specs;
5015 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5017 for (size_t j = 0; j < stack->n; j++)
5019 const struct ctables_nest *nest = &stack->nests[j];
5021 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5022 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5027 struct merge_item min = items[0];
5028 for (size_t j = 1; j < n_left; j++)
5029 if (merge_item_compare_3way (&items[j], &min) < 0)
5032 if (merged->n >= merged->allocated)
5033 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5034 sizeof *merged->specs);
5035 merged->specs[merged->n++] = min.set->specs[min.ofs];
5037 for (size_t j = 0; j < n_left; )
5039 if (merge_item_compare_3way (&items[j], &min) == 0)
5041 struct merge_item *item = &items[j];
5042 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5043 if (++item->ofs >= item->set->n)
5045 items[j] = items[--n_left];
5054 for (size_t j = 0; j < merged->n; j++)
5055 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5057 for (size_t j = 0; j < stack->n; j++)
5059 const struct ctables_nest *nest = &stack->nests[j];
5060 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5062 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5063 for (size_t k = 0; k < specs->n; k++)
5064 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5065 specs->specs[k].axis_idx);
5071 size_t allocated_sum_vars = 0;
5072 enumerate_sum_vars (t->axes[t->summary_axis],
5073 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5075 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5076 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5080 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5081 enum pivot_axis_type a)
5083 struct ctables_stack *stack = &t->stacks[a];
5084 for (size_t i = 0; i < stack->n; i++)
5086 const struct ctables_nest *nest = &stack->nests[i];
5087 const struct variable *var = nest->vars[nest->n - 1];
5088 const union value *value = case_data (c, var);
5090 if (var_is_numeric (var) && value->f == SYSMIS)
5093 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5095 ctables_value_insert (t, value, var_get_width (var));
5100 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5102 const struct ctables_value *const *ap = a_;
5103 const struct ctables_value *const *bp = b_;
5104 const struct ctables_value *a = *ap;
5105 const struct ctables_value *b = *bp;
5106 const int *width = width_;
5107 return value_compare_3way (&a->value, &b->value, *width);
5111 ctables_sort_clabels_values (struct ctables_table *t)
5113 const struct variable *v0 = t->clabels_example;
5114 int width = var_get_width (v0);
5116 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5119 const struct val_labs *val_labs = var_get_value_labels (v0);
5120 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5121 vl = val_labs_next (val_labs, vl))
5122 if (ctables_categories_match (c0, &vl->value, v0))
5123 ctables_value_insert (t, &vl->value, width);
5126 size_t n = hmap_count (&t->clabels_values_map);
5127 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5129 struct ctables_value *clv;
5131 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5132 t->clabels_values[i++] = clv;
5133 t->n_clabels_values = n;
5136 sort (t->clabels_values, n, sizeof *t->clabels_values,
5137 compare_clabels_values_3way, &width);
5139 for (size_t i = 0; i < n; i++)
5140 t->clabels_values[i]->leaf = i;
5144 ctables_add_category_occurrences (const struct variable *var,
5145 struct hmap *occurrences,
5146 const struct ctables_categories *cats)
5148 const struct val_labs *val_labs = var_get_value_labels (var);
5150 for (size_t i = 0; i < cats->n_cats; i++)
5152 const struct ctables_category *c = &cats->cats[i];
5156 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5162 int width = var_get_width (var);
5164 value_init (&value, width);
5165 value_copy_buf_rpad (&value, width,
5166 CHAR_CAST (uint8_t *, c->string.string),
5167 c->string.length, ' ');
5168 ctables_add_occurrence (var, &value, occurrences);
5169 value_destroy (&value, width);
5174 assert (var_is_numeric (var));
5175 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5176 vl = val_labs_next (val_labs, vl))
5177 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5178 ctables_add_occurrence (var, &vl->value, occurrences);
5182 assert (var_is_alpha (var));
5183 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5184 vl = val_labs_next (val_labs, vl))
5185 if (in_string_range (&vl->value, var, c->srange))
5186 ctables_add_occurrence (var, &vl->value, occurrences);
5190 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5191 vl = val_labs_next (val_labs, vl))
5192 if (var_is_value_missing (var, &vl->value))
5193 ctables_add_occurrence (var, &vl->value, occurrences);
5197 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5198 vl = val_labs_next (val_labs, vl))
5199 ctables_add_occurrence (var, &vl->value, occurrences);
5202 case CCT_POSTCOMPUTE:
5212 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5213 vl = val_labs_next (val_labs, vl))
5214 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5215 ctables_add_occurrence (var, &vl->value, occurrences);
5218 case CCT_EXCLUDED_MISSING:
5225 ctables_section_recurse_add_empty_categories (
5226 struct ctables_section *s,
5227 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5228 enum pivot_axis_type a, size_t a_idx)
5230 if (a >= PIVOT_N_AXES)
5231 ctables_cell_insert__ (s, c, cats);
5232 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5233 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5236 const struct variable *var = s->nests[a]->vars[a_idx];
5237 const struct ctables_categories *categories = s->table->categories[
5238 var_get_dict_index (var)];
5239 int width = var_get_width (var);
5240 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5241 const struct ctables_occurrence *o;
5242 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5244 union value *value = case_data_rw (c, var);
5245 value_destroy (value, width);
5246 value_clone (value, &o->value, width);
5247 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5248 assert (cats[a][a_idx] != NULL);
5249 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5252 for (size_t i = 0; i < categories->n_cats; i++)
5254 const struct ctables_category *cat = &categories->cats[i];
5255 if (cat->type == CCT_POSTCOMPUTE)
5257 cats[a][a_idx] = cat;
5258 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5265 ctables_section_add_empty_categories (struct ctables_section *s)
5267 bool show_empty = false;
5268 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5270 for (size_t k = 0; k < s->nests[a]->n; k++)
5271 if (k != s->nests[a]->scale_idx)
5273 const struct variable *var = s->nests[a]->vars[k];
5274 const struct ctables_categories *cats = s->table->categories[
5275 var_get_dict_index (var)];
5276 if (cats->show_empty)
5279 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5285 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5286 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5287 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5292 ctables_section_clear (struct ctables_section *s)
5294 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5296 const struct ctables_nest *nest = s->nests[a];
5297 for (size_t i = 0; i < nest->n; i++)
5298 if (i != nest->scale_idx)
5300 const struct variable *var = nest->vars[i];
5301 int width = var_get_width (var);
5302 struct ctables_occurrence *o, *next;
5303 struct hmap *map = &s->occurrences[a][i];
5304 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5306 value_destroy (&o->value, width);
5307 hmap_delete (map, &o->node);
5314 struct ctables_cell *cell, *next_cell;
5315 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5317 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5319 const struct ctables_nest *nest = s->nests[a];
5320 for (size_t i = 0; i < nest->n; i++)
5321 if (i != nest->scale_idx)
5322 value_destroy (&cell->axes[a].cvs[i].value,
5323 var_get_width (nest->vars[i]));
5324 free (cell->axes[a].cvs);
5327 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5328 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5329 for (size_t i = 0; i < specs->n; i++)
5330 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5331 free (cell->summaries);
5333 hmap_delete (&s->cells, &cell->node);
5336 hmap_shrink (&s->cells);
5338 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5340 struct ctables_domain *domain, *next_domain;
5341 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5344 free (domain->sums);
5345 hmap_delete (&s->domains[dt], &domain->node);
5348 hmap_shrink (&s->domains[dt]);
5353 ctables_section_uninit (struct ctables_section *s)
5355 ctables_section_clear (s);
5357 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5359 for (size_t i = 0; i < s->nests[a]->n; i++)
5360 hmap_destroy (&s->occurrences[a][i]);
5361 free (s->occurrences[a]);
5364 hmap_destroy (&s->cells);
5365 for (size_t i = 0; i < N_CTDTS; i++)
5366 hmap_destroy (&s->domains[i]);
5370 ctables_table_clear (struct ctables_table *t)
5372 for (size_t i = 0; i < t->n_sections; i++)
5373 ctables_section_clear (&t->sections[i]);
5375 if (t->clabels_example)
5377 int width = var_get_width (t->clabels_example);
5378 struct ctables_value *value, *next_value;
5379 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5380 &t->clabels_values_map)
5382 value_destroy (&value->value, width);
5383 hmap_delete (&t->clabels_values_map, &value->node);
5386 hmap_shrink (&t->clabels_values_map);
5388 free (t->clabels_values);
5389 t->clabels_values = NULL;
5390 t->n_clabels_values = 0;
5395 ctables_execute (struct dataset *ds, struct casereader *input,
5398 for (size_t i = 0; i < ct->n_tables; i++)
5400 struct ctables_table *t = ct->tables[i];
5401 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5402 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5403 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5404 sizeof *t->sections);
5405 size_t ix[PIVOT_N_AXES];
5406 ctables_table_add_section (t, 0, ix);
5409 struct dictionary *dict = dataset_dict (ds);
5410 struct casegrouper *grouper
5411 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5412 ? casegrouper_create_splits (input, dict)
5413 : casegrouper_create_vars (input, NULL, 0));
5414 struct casereader *group;
5415 while (casegrouper_get_next_group (grouper, &group))
5417 /* Output SPLIT FILE variables. */
5418 struct ccase *c = casereader_peek (group, 0);
5421 output_split_file_values (ds, c);
5425 bool warn_on_invalid = true;
5426 for (c = casereader_read (group); c;
5427 case_unref (c), c = casereader_read (group))
5429 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5430 double e_weight = (ct->e_weight
5431 ? var_force_valid_weight (ct->e_weight,
5432 case_num (c, ct->e_weight),
5436 for (size_t i = 0; i < ct->n_tables; i++)
5438 struct ctables_table *t = ct->tables[i];
5440 for (size_t j = 0; j < t->n_sections; j++)
5441 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5443 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5444 if (t->label_axis[a] != a)
5445 ctables_insert_clabels_values (t, c, a);
5448 casereader_destroy (group);
5450 for (size_t i = 0; i < ct->n_tables; i++)
5452 struct ctables_table *t = ct->tables[i];
5454 if (t->clabels_example)
5455 ctables_sort_clabels_values (t);
5457 for (size_t j = 0; j < t->n_sections; j++)
5458 ctables_section_add_empty_categories (&t->sections[j]);
5460 ctables_table_output (ct, t);
5461 ctables_table_clear (t);
5464 return casegrouper_destroy (grouper);
5469 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5470 struct dictionary *);
5473 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5479 case CTPO_CAT_STRING:
5480 ss_dealloc (&e->string);
5483 case CTPO_CAT_SRANGE:
5484 for (size_t i = 0; i < 2; i++)
5485 ss_dealloc (&e->srange[i]);
5494 for (size_t i = 0; i < 2; i++)
5495 ctables_pcexpr_destroy (e->subs[i]);
5499 case CTPO_CAT_NUMBER:
5500 case CTPO_CAT_NRANGE:
5501 case CTPO_CAT_MISSING:
5502 case CTPO_CAT_OTHERNM:
5503 case CTPO_CAT_SUBTOTAL:
5504 case CTPO_CAT_TOTAL:
5508 msg_location_destroy (e->location);
5513 static struct ctables_pcexpr *
5514 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5515 struct ctables_pcexpr *sub0,
5516 struct ctables_pcexpr *sub1)
5518 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5519 *e = (struct ctables_pcexpr) {
5521 .subs = { sub0, sub1 },
5522 .location = msg_location_merged (sub0->location, sub1->location),
5527 /* How to parse an operator. */
5530 enum token_type token;
5531 enum ctables_postcompute_op op;
5534 static const struct operator *
5535 ctable_pcexpr_match_operator (struct lexer *lexer,
5536 const struct operator ops[], size_t n_ops)
5538 for (const struct operator *op = ops; op < ops + n_ops; op++)
5539 if (lex_token (lexer) == op->token)
5541 if (op->token != T_NEG_NUM)
5550 static struct ctables_pcexpr *
5551 ctable_pcexpr_parse_binary_operators__ (
5552 struct lexer *lexer, struct dictionary *dict,
5553 const struct operator ops[], size_t n_ops,
5554 parse_recursively_func *parse_next_level,
5555 const char *chain_warning, struct ctables_pcexpr *lhs)
5557 for (int op_count = 0; ; op_count++)
5559 const struct operator *op
5560 = ctable_pcexpr_match_operator (lexer, ops, n_ops);
5563 if (op_count > 1 && chain_warning)
5564 msg_at (SW, lhs->location, "%s", chain_warning);
5569 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5572 ctables_pcexpr_destroy (lhs);
5576 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5580 static struct ctables_pcexpr *
5581 ctable_pcexpr_parse_binary_operators (struct lexer *lexer,
5582 struct dictionary *dict,
5583 const struct operator ops[], size_t n_ops,
5584 parse_recursively_func *parse_next_level,
5585 const char *chain_warning)
5587 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5591 return ctable_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5593 chain_warning, lhs);
5596 static struct ctables_pcexpr *ctable_pcexpr_parse_add (struct lexer *,
5597 struct dictionary *);
5599 static struct ctables_pcexpr
5600 ctpo_cat_nrange (double low, double high)
5602 return (struct ctables_pcexpr) {
5603 .op = CTPO_CAT_NRANGE,
5604 .nrange = { low, high },
5608 static struct ctables_pcexpr
5609 ctpo_cat_srange (struct substring low, struct substring high)
5611 return (struct ctables_pcexpr) {
5612 .op = CTPO_CAT_SRANGE,
5613 .srange = { low, high },
5617 static struct ctables_pcexpr *
5618 ctable_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5620 int start_ofs = lex_ofs (lexer);
5621 struct ctables_pcexpr e;
5622 if (lex_is_number (lexer))
5624 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5625 .number = lex_number (lexer) };
5628 else if (lex_match_id (lexer, "MISSING"))
5629 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5630 else if (lex_match_id (lexer, "OTHERNM"))
5631 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5632 else if (lex_match_id (lexer, "TOTAL"))
5633 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5634 else if (lex_match_id (lexer, "SUBTOTAL"))
5636 size_t subtotal_index = 0;
5637 if (lex_match (lexer, T_LBRACK))
5639 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5641 subtotal_index = lex_integer (lexer);
5643 if (!lex_force_match (lexer, T_RBRACK))
5646 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5647 .subtotal_index = subtotal_index };
5649 else if (lex_match (lexer, T_LBRACK))
5651 if (lex_match_id (lexer, "LO"))
5653 if (!lex_force_match_id (lexer, "THRU"))
5656 if (lex_is_string (lexer))
5658 struct substring low = { .string = NULL };
5659 struct substring high = parse_substring (lexer, dict);
5660 e = ctpo_cat_srange (low, high);
5664 if (!lex_force_num (lexer))
5666 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5670 else if (lex_is_number (lexer))
5672 double number = lex_number (lexer);
5674 if (lex_match_id (lexer, "THRU"))
5676 if (lex_match_id (lexer, "HI"))
5677 e = ctpo_cat_nrange (number, DBL_MAX);
5680 if (!lex_force_num (lexer))
5682 e = ctpo_cat_nrange (number, lex_number (lexer));
5687 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5690 else if (lex_is_string (lexer))
5692 struct substring s = parse_substring (lexer, dict);
5694 if (lex_match_id (lexer, "THRU"))
5696 struct substring high;
5698 if (lex_match_id (lexer, "HI"))
5699 high = (struct substring) { .string = NULL };
5702 if (!lex_force_string (lexer))
5707 high = parse_substring (lexer, dict);
5710 e = ctpo_cat_srange (s, high);
5713 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5717 lex_error (lexer, NULL);
5721 if (!lex_force_match (lexer, T_RBRACK))
5723 if (e.op == CTPO_CAT_STRING)
5724 ss_dealloc (&e.string);
5725 else if (e.op == CTPO_CAT_SRANGE)
5727 ss_dealloc (&e.srange[0]);
5728 ss_dealloc (&e.srange[1]);
5733 else if (lex_match (lexer, T_LPAREN))
5735 struct ctables_pcexpr *ep = ctable_pcexpr_parse_add (lexer, dict);
5738 if (!lex_force_match (lexer, T_RPAREN))
5740 ctables_pcexpr_destroy (ep);
5747 lex_error (lexer, NULL);
5751 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5752 return xmemdup (&e, sizeof e);
5755 static struct ctables_pcexpr *
5756 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5757 struct lexer *lexer, int start_ofs)
5759 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5760 *e = (struct ctables_pcexpr) {
5763 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5768 static struct ctables_pcexpr *
5769 ctable_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5771 static const struct operator op = { T_EXP, CTPO_POW };
5773 const char *chain_warning =
5774 _("The exponentiation operator (`**') is left-associative: "
5775 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5776 "To disable this warning, insert parentheses.");
5778 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5779 return ctable_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5780 ctable_pcexpr_parse_primary,
5783 /* Special case for situations like "-5**6", which must be parsed as
5786 int start_ofs = lex_ofs (lexer);
5787 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5788 *lhs = (struct ctables_pcexpr) {
5789 .op = CTPO_CONSTANT,
5790 .number = -lex_tokval (lexer),
5791 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5795 struct ctables_pcexpr *node = ctable_pcexpr_parse_binary_operators__ (
5796 lexer, dict, &op, 1,
5797 ctable_pcexpr_parse_primary, chain_warning, lhs);
5801 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5804 /* Parses the unary minus level. */
5805 static struct ctables_pcexpr *
5806 ctable_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5808 int start_ofs = lex_ofs (lexer);
5809 if (!lex_match (lexer, T_DASH))
5810 return ctable_pcexpr_parse_exp (lexer, dict);
5812 struct ctables_pcexpr *inner = ctable_pcexpr_parse_neg (lexer, dict);
5816 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5819 /* Parses the multiplication and division level. */
5820 static struct ctables_pcexpr *
5821 ctable_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5823 static const struct operator ops[] =
5825 { T_ASTERISK, CTPO_MUL },
5826 { T_SLASH, CTPO_DIV },
5829 return ctable_pcexpr_parse_binary_operators (lexer, dict, ops,
5830 sizeof ops / sizeof *ops,
5831 ctable_pcexpr_parse_neg, NULL);
5834 /* Parses the addition and subtraction level. */
5835 static struct ctables_pcexpr *
5836 ctable_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5838 static const struct operator ops[] =
5840 { T_PLUS, CTPO_ADD },
5841 { T_DASH, CTPO_SUB },
5842 { T_NEG_NUM, CTPO_ADD },
5845 return ctable_pcexpr_parse_binary_operators (lexer, dict,
5846 ops, sizeof ops / sizeof *ops,
5847 ctable_pcexpr_parse_mul, NULL);
5850 static struct ctables_postcompute *
5851 ctables_find_postcompute (struct ctables *ct, const char *name)
5853 struct ctables_postcompute *pc;
5854 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5855 utf8_hash_case_string (name, 0), &ct->postcomputes)
5856 if (!utf8_strcasecmp (pc->name, name))
5862 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5865 int pcompute_start = lex_ofs (lexer) - 1;
5867 if (!lex_match (lexer, T_AND))
5869 lex_error_expecting (lexer, "&");
5872 if (!lex_force_id (lexer))
5875 char *name = ss_xstrdup (lex_tokss (lexer));
5878 if (!lex_force_match (lexer, T_EQUALS)
5879 || !lex_force_match_id (lexer, "EXPR")
5880 || !lex_force_match (lexer, T_LPAREN))
5886 int expr_start = lex_ofs (lexer);
5887 struct ctables_pcexpr *expr = ctable_pcexpr_parse_add (lexer, dict);
5888 int expr_end = lex_ofs (lexer) - 1;
5889 if (!expr || !lex_force_match (lexer, T_RPAREN))
5894 int pcompute_end = lex_ofs (lexer) - 1;
5896 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5899 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5902 msg_at (SW, location, _("New definition of &%s will override the "
5903 "previous definition."),
5905 msg_at (SN, pc->location, _("This is the previous definition."));
5907 ctables_pcexpr_destroy (pc->expr);
5908 msg_location_destroy (pc->location);
5913 pc = xmalloc (sizeof *pc);
5914 *pc = (struct ctables_postcompute) { .name = name };
5915 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5916 utf8_hash_case_string (pc->name, 0));
5919 pc->location = location;
5921 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5926 ctables_parse_pproperties_format (struct lexer *lexer,
5927 struct ctables_summary_spec_set *sss)
5929 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5931 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5932 && !(lex_token (lexer) == T_ID
5933 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5934 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5935 lex_tokss (lexer)))))
5937 /* Parse function. */
5938 enum ctables_summary_function function;
5939 if (!parse_ctables_summary_function (lexer, &function))
5942 /* Parse percentile. */
5943 double percentile = 0;
5944 if (function == CTSF_PTILE)
5946 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5948 percentile = lex_number (lexer);
5953 struct fmt_spec format;
5954 bool is_ctables_format;
5955 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5958 if (sss->n >= sss->allocated)
5959 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5960 sizeof *sss->specs);
5961 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5962 .function = function,
5963 .percentile = percentile,
5965 .is_ctables_format = is_ctables_format,
5971 ctables_summary_spec_set_uninit (sss);
5976 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5978 struct ctables_postcompute **pcs = NULL;
5980 size_t allocated_pcs = 0;
5982 while (lex_match (lexer, T_AND))
5984 if (!lex_force_id (lexer))
5986 struct ctables_postcompute *pc
5987 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5990 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5995 if (n_pcs >= allocated_pcs)
5996 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
6000 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6002 if (lex_match_id (lexer, "LABEL"))
6004 lex_match (lexer, T_EQUALS);
6005 if (!lex_force_string (lexer))
6008 for (size_t i = 0; i < n_pcs; i++)
6010 free (pcs[i]->label);
6011 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6016 else if (lex_match_id (lexer, "FORMAT"))
6018 lex_match (lexer, T_EQUALS);
6020 struct ctables_summary_spec_set sss;
6021 if (!ctables_parse_pproperties_format (lexer, &sss))
6024 for (size_t i = 0; i < n_pcs; i++)
6027 ctables_summary_spec_set_uninit (pcs[i]->specs);
6029 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6030 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6032 ctables_summary_spec_set_uninit (&sss);
6034 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6036 lex_match (lexer, T_EQUALS);
6037 bool hide_source_cats;
6038 if (!parse_bool (lexer, &hide_source_cats))
6040 for (size_t i = 0; i < n_pcs; i++)
6041 pcs[i]->hide_source_cats = hide_source_cats;
6045 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6058 put_strftime (struct string *out, time_t now, const char *format)
6060 const struct tm *tm = localtime (&now);
6062 strftime (value, sizeof value, format, tm);
6063 ds_put_cstr (out, value);
6067 skip_prefix (struct substring *s, struct substring prefix)
6069 if (ss_starts_with (*s, prefix))
6071 ss_advance (s, prefix.length);
6079 put_table_expression (struct string *out, struct lexer *lexer,
6080 struct dictionary *dict, int expr_start, int expr_end)
6083 for (int ofs = expr_start; ofs < expr_end; ofs++)
6085 const struct token *t = lex_ofs_token (lexer, ofs);
6086 if (t->type == T_LBRACK)
6088 else if (t->type == T_RBRACK && nest > 0)
6094 else if (t->type == T_ID)
6096 const struct variable *var
6097 = dict_lookup_var (dict, t->string.string);
6098 const char *label = var ? var_get_label (var) : NULL;
6099 ds_put_cstr (out, label ? label : t->string.string);
6103 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6104 ds_put_byte (out, ' ');
6106 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6107 ds_put_cstr (out, repr);
6110 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6111 ds_put_byte (out, ' ');
6117 put_title_text (struct string *out, struct substring in, time_t now,
6118 struct lexer *lexer, struct dictionary *dict,
6119 int expr_start, int expr_end)
6123 size_t chunk = ss_find_byte (in, ')');
6124 ds_put_substring (out, ss_head (in, chunk));
6125 ss_advance (&in, chunk);
6126 if (ss_is_empty (in))
6129 if (skip_prefix (&in, ss_cstr (")DATE")))
6130 put_strftime (out, now, "%x");
6131 else if (skip_prefix (&in, ss_cstr (")TIME")))
6132 put_strftime (out, now, "%X");
6133 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6134 put_table_expression (out, lexer, dict, expr_start, expr_end);
6137 ds_put_byte (out, ')');
6138 ss_advance (&in, 1);
6144 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6146 struct casereader *input = NULL;
6148 struct measure_guesser *mg = measure_guesser_create (ds);
6151 input = proc_open (ds);
6152 measure_guesser_run (mg, input);
6153 measure_guesser_destroy (mg);
6156 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6157 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6158 enum settings_value_show tvars = settings_get_show_variables ();
6159 for (size_t i = 0; i < n_vars; i++)
6160 vlabels[i] = (enum ctables_vlabel) tvars;
6162 struct pivot_table_look *look = pivot_table_look_unshare (
6163 pivot_table_look_ref (pivot_table_look_get_default ()));
6164 look->omit_empty = false;
6166 struct ctables *ct = xmalloc (sizeof *ct);
6167 *ct = (struct ctables) {
6168 .dict = dataset_dict (ds),
6170 .ctables_formats = FMT_SETTINGS_INIT,
6172 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6175 time_t now = time (NULL);
6180 const char *dot_string;
6181 const char *comma_string;
6183 static const struct ctf ctfs[4] = {
6184 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6185 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6186 { CTEF_PAREN, "-,(,),", "-.(.)." },
6187 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6189 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6190 for (size_t i = 0; i < 4; i++)
6192 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6193 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6194 fmt_number_style_from_string (s));
6197 if (!lex_force_match (lexer, T_SLASH))
6200 while (!lex_match_id (lexer, "TABLE"))
6202 if (lex_match_id (lexer, "FORMAT"))
6204 double widths[2] = { SYSMIS, SYSMIS };
6205 double units_per_inch = 72.0;
6207 while (lex_token (lexer) != T_SLASH)
6209 if (lex_match_id (lexer, "MINCOLWIDTH"))
6211 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6214 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6216 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6219 else if (lex_match_id (lexer, "UNITS"))
6221 lex_match (lexer, T_EQUALS);
6222 if (lex_match_id (lexer, "POINTS"))
6223 units_per_inch = 72.0;
6224 else if (lex_match_id (lexer, "INCHES"))
6225 units_per_inch = 1.0;
6226 else if (lex_match_id (lexer, "CM"))
6227 units_per_inch = 2.54;
6230 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6234 else if (lex_match_id (lexer, "EMPTY"))
6239 lex_match (lexer, T_EQUALS);
6240 if (lex_match_id (lexer, "ZERO"))
6242 /* Nothing to do. */
6244 else if (lex_match_id (lexer, "BLANK"))
6245 ct->zero = xstrdup ("");
6246 else if (lex_force_string (lexer))
6248 ct->zero = ss_xstrdup (lex_tokss (lexer));
6254 else if (lex_match_id (lexer, "MISSING"))
6256 lex_match (lexer, T_EQUALS);
6257 if (!lex_force_string (lexer))
6261 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6262 ? ss_xstrdup (lex_tokss (lexer))
6268 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6269 "UNITS", "EMPTY", "MISSING");
6274 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6275 && widths[0] > widths[1])
6277 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6281 for (size_t i = 0; i < 2; i++)
6282 if (widths[i] != SYSMIS)
6284 int *wr = ct->look->width_ranges[TABLE_HORZ];
6285 wr[i] = widths[i] / units_per_inch * 96.0;
6290 else if (lex_match_id (lexer, "VLABELS"))
6292 if (!lex_force_match_id (lexer, "VARIABLES"))
6294 lex_match (lexer, T_EQUALS);
6296 struct variable **vars;
6298 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6302 if (!lex_force_match_id (lexer, "DISPLAY"))
6307 lex_match (lexer, T_EQUALS);
6309 enum ctables_vlabel vlabel;
6310 if (lex_match_id (lexer, "DEFAULT"))
6311 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6312 else if (lex_match_id (lexer, "NAME"))
6314 else if (lex_match_id (lexer, "LABEL"))
6315 vlabel = CTVL_LABEL;
6316 else if (lex_match_id (lexer, "BOTH"))
6318 else if (lex_match_id (lexer, "NONE"))
6322 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6328 for (size_t i = 0; i < n_vars; i++)
6329 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6332 else if (lex_match_id (lexer, "MRSETS"))
6334 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6336 lex_match (lexer, T_EQUALS);
6337 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6340 else if (lex_match_id (lexer, "SMISSING"))
6342 if (lex_match_id (lexer, "VARIABLE"))
6343 ct->smissing_listwise = false;
6344 else if (lex_match_id (lexer, "LISTWISE"))
6345 ct->smissing_listwise = true;
6348 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6352 else if (lex_match_id (lexer, "PCOMPUTE"))
6354 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6357 else if (lex_match_id (lexer, "PPROPERTIES"))
6359 if (!ctables_parse_pproperties (lexer, ct))
6362 else if (lex_match_id (lexer, "WEIGHT"))
6364 if (!lex_force_match_id (lexer, "VARIABLE"))
6366 lex_match (lexer, T_EQUALS);
6367 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6371 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6373 if (lex_match_id (lexer, "COUNT"))
6375 lex_match (lexer, T_EQUALS);
6376 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6379 ct->hide_threshold = lex_integer (lexer);
6382 else if (ct->hide_threshold == 0)
6383 ct->hide_threshold = 5;
6387 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6388 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6389 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6393 if (!lex_force_match (lexer, T_SLASH))
6397 size_t allocated_tables = 0;
6400 if (ct->n_tables >= allocated_tables)
6401 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6402 sizeof *ct->tables);
6404 struct ctables_category *cat = xmalloc (sizeof *cat);
6405 *cat = (struct ctables_category) {
6407 .include_missing = false,
6408 .sort_ascending = true,
6411 struct ctables_categories *c = xmalloc (sizeof *c);
6412 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6413 *c = (struct ctables_categories) {
6420 struct ctables_categories **categories = xnmalloc (n_vars,
6421 sizeof *categories);
6422 for (size_t i = 0; i < n_vars; i++)
6425 struct ctables_table *t = xmalloc (sizeof *t);
6426 *t = (struct ctables_table) {
6428 .slabels_axis = PIVOT_AXIS_COLUMN,
6429 .slabels_visible = true,
6430 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6432 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6433 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6434 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6436 .clabels_from_axis = PIVOT_AXIS_LAYER,
6437 .categories = categories,
6438 .n_categories = n_vars,
6441 ct->tables[ct->n_tables++] = t;
6443 lex_match (lexer, T_EQUALS);
6444 int expr_start = lex_ofs (lexer);
6445 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6447 if (lex_match (lexer, T_BY))
6449 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6450 ct, t, PIVOT_AXIS_COLUMN))
6453 if (lex_match (lexer, T_BY))
6455 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6456 ct, t, PIVOT_AXIS_LAYER))
6460 int expr_end = lex_ofs (lexer);
6462 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6463 && !t->axes[PIVOT_AXIS_LAYER])
6465 lex_error (lexer, _("At least one variable must be specified."));
6469 const struct ctables_axis *scales[PIVOT_N_AXES];
6470 size_t n_scales = 0;
6471 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6473 scales[a] = find_scale (t->axes[a]);
6479 msg (SE, _("Scale variables may appear only on one axis."));
6480 if (scales[PIVOT_AXIS_ROW])
6481 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6482 _("This scale variable appears on the rows axis."));
6483 if (scales[PIVOT_AXIS_COLUMN])
6484 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6485 _("This scale variable appears on the columns axis."));
6486 if (scales[PIVOT_AXIS_LAYER])
6487 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6488 _("This scale variable appears on the layer axis."));
6492 const struct ctables_axis *summaries[PIVOT_N_AXES];
6493 size_t n_summaries = 0;
6494 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6496 summaries[a] = (scales[a]
6498 : find_categorical_summary_spec (t->axes[a]));
6502 if (n_summaries > 1)
6504 msg (SE, _("Summaries may appear only on one axis."));
6505 if (summaries[PIVOT_AXIS_ROW])
6506 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
6507 _("This variable on the rows axis has a summary."));
6508 if (summaries[PIVOT_AXIS_COLUMN])
6509 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
6510 _("This variable on the columns axis has a summary."));
6511 if (summaries[PIVOT_AXIS_LAYER])
6512 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
6513 _("This variable on the layers axis has a summary."));
6516 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6517 if (n_summaries ? summaries[a] : t->axes[a])
6519 t->summary_axis = a;
6523 if (lex_token (lexer) == T_ENDCMD)
6525 if (!ctables_prepare_table (t))
6529 if (!lex_force_match (lexer, T_SLASH))
6532 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6534 if (lex_match_id (lexer, "SLABELS"))
6536 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6538 if (lex_match_id (lexer, "POSITION"))
6540 lex_match (lexer, T_EQUALS);
6541 if (lex_match_id (lexer, "COLUMN"))
6542 t->slabels_axis = PIVOT_AXIS_COLUMN;
6543 else if (lex_match_id (lexer, "ROW"))
6544 t->slabels_axis = PIVOT_AXIS_ROW;
6545 else if (lex_match_id (lexer, "LAYER"))
6546 t->slabels_axis = PIVOT_AXIS_LAYER;
6549 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6553 else if (lex_match_id (lexer, "VISIBLE"))
6555 lex_match (lexer, T_EQUALS);
6556 if (!parse_bool (lexer, &t->slabels_visible))
6561 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6566 else if (lex_match_id (lexer, "CLABELS"))
6568 if (lex_match_id (lexer, "AUTO"))
6570 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6571 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6573 else if (lex_match_id (lexer, "ROWLABELS"))
6575 lex_match (lexer, T_EQUALS);
6576 if (lex_match_id (lexer, "OPPOSITE"))
6577 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6578 else if (lex_match_id (lexer, "LAYER"))
6579 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6582 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6586 else if (lex_match_id (lexer, "COLLABELS"))
6588 lex_match (lexer, T_EQUALS);
6589 if (lex_match_id (lexer, "OPPOSITE"))
6590 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6591 else if (lex_match_id (lexer, "LAYER"))
6592 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6595 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6601 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6606 else if (lex_match_id (lexer, "CRITERIA"))
6608 if (!lex_force_match_id (lexer, "CILEVEL"))
6610 lex_match (lexer, T_EQUALS);
6612 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6614 t->cilevel = lex_number (lexer);
6617 else if (lex_match_id (lexer, "CATEGORIES"))
6619 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6623 else if (lex_match_id (lexer, "TITLES"))
6628 if (lex_match_id (lexer, "CAPTION"))
6629 textp = &t->caption;
6630 else if (lex_match_id (lexer, "CORNER"))
6632 else if (lex_match_id (lexer, "TITLE"))
6636 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6639 lex_match (lexer, T_EQUALS);
6641 struct string s = DS_EMPTY_INITIALIZER;
6642 while (lex_is_string (lexer))
6644 if (!ds_is_empty (&s))
6645 ds_put_byte (&s, ' ');
6646 put_title_text (&s, lex_tokss (lexer), now,
6647 lexer, dataset_dict (ds),
6648 expr_start, expr_end);
6652 *textp = ds_steal_cstr (&s);
6654 while (lex_token (lexer) != T_SLASH
6655 && lex_token (lexer) != T_ENDCMD);
6657 else if (lex_match_id (lexer, "SIGTEST"))
6661 t->chisq = xmalloc (sizeof *t->chisq);
6662 *t->chisq = (struct ctables_chisq) {
6664 .include_mrsets = true,
6665 .all_visible = true,
6671 if (lex_match_id (lexer, "TYPE"))
6673 lex_match (lexer, T_EQUALS);
6674 if (!lex_force_match_id (lexer, "CHISQUARE"))
6677 else if (lex_match_id (lexer, "ALPHA"))
6679 lex_match (lexer, T_EQUALS);
6680 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6682 t->chisq->alpha = lex_number (lexer);
6685 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6687 lex_match (lexer, T_EQUALS);
6688 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6691 else if (lex_match_id (lexer, "CATEGORIES"))
6693 lex_match (lexer, T_EQUALS);
6694 if (lex_match_id (lexer, "ALLVISIBLE"))
6695 t->chisq->all_visible = true;
6696 else if (lex_match_id (lexer, "SUBTOTALS"))
6697 t->chisq->all_visible = false;
6700 lex_error_expecting (lexer,
6701 "ALLVISIBLE", "SUBTOTALS");
6707 lex_error_expecting (lexer, "TYPE", "ALPHA",
6708 "INCLUDEMRSETS", "CATEGORIES");
6712 while (lex_token (lexer) != T_SLASH
6713 && lex_token (lexer) != T_ENDCMD);
6715 else if (lex_match_id (lexer, "COMPARETEST"))
6719 t->pairwise = xmalloc (sizeof *t->pairwise);
6720 *t->pairwise = (struct ctables_pairwise) {
6722 .alpha = { .05, .05 },
6723 .adjust = BONFERRONI,
6724 .include_mrsets = true,
6725 .meansvariance_allcats = true,
6726 .all_visible = true,
6735 if (lex_match_id (lexer, "TYPE"))
6737 lex_match (lexer, T_EQUALS);
6738 if (lex_match_id (lexer, "PROP"))
6739 t->pairwise->type = PROP;
6740 else if (lex_match_id (lexer, "MEAN"))
6741 t->pairwise->type = MEAN;
6744 lex_error_expecting (lexer, "PROP", "MEAN");
6748 else if (lex_match_id (lexer, "ALPHA"))
6750 lex_match (lexer, T_EQUALS);
6752 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6754 double a0 = lex_number (lexer);
6757 lex_match (lexer, T_COMMA);
6758 if (lex_is_number (lexer))
6760 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6762 double a1 = lex_number (lexer);
6765 t->pairwise->alpha[0] = MIN (a0, a1);
6766 t->pairwise->alpha[1] = MAX (a0, a1);
6769 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6771 else if (lex_match_id (lexer, "ADJUST"))
6773 lex_match (lexer, T_EQUALS);
6774 if (lex_match_id (lexer, "BONFERRONI"))
6775 t->pairwise->adjust = BONFERRONI;
6776 else if (lex_match_id (lexer, "BH"))
6777 t->pairwise->adjust = BH;
6778 else if (lex_match_id (lexer, "NONE"))
6779 t->pairwise->adjust = 0;
6782 lex_error_expecting (lexer, "BONFERRONI", "BH",
6787 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6789 lex_match (lexer, T_EQUALS);
6790 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6793 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6795 lex_match (lexer, T_EQUALS);
6796 if (lex_match_id (lexer, "ALLCATS"))
6797 t->pairwise->meansvariance_allcats = true;
6798 else if (lex_match_id (lexer, "TESTEDCATS"))
6799 t->pairwise->meansvariance_allcats = false;
6802 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6806 else if (lex_match_id (lexer, "CATEGORIES"))
6808 lex_match (lexer, T_EQUALS);
6809 if (lex_match_id (lexer, "ALLVISIBLE"))
6810 t->pairwise->all_visible = true;
6811 else if (lex_match_id (lexer, "SUBTOTALS"))
6812 t->pairwise->all_visible = false;
6815 lex_error_expecting (lexer, "ALLVISIBLE",
6820 else if (lex_match_id (lexer, "MERGE"))
6822 lex_match (lexer, T_EQUALS);
6823 if (!parse_bool (lexer, &t->pairwise->merge))
6826 else if (lex_match_id (lexer, "STYLE"))
6828 lex_match (lexer, T_EQUALS);
6829 if (lex_match_id (lexer, "APA"))
6830 t->pairwise->apa_style = true;
6831 else if (lex_match_id (lexer, "SIMPLE"))
6832 t->pairwise->apa_style = false;
6835 lex_error_expecting (lexer, "APA", "SIMPLE");
6839 else if (lex_match_id (lexer, "SHOWSIG"))
6841 lex_match (lexer, T_EQUALS);
6842 if (!parse_bool (lexer, &t->pairwise->show_sig))
6847 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6848 "INCLUDEMRSETS", "MEANSVARIANCE",
6849 "CATEGORIES", "MERGE", "STYLE",
6854 while (lex_token (lexer) != T_SLASH
6855 && lex_token (lexer) != T_ENDCMD);
6859 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6860 "CRITERIA", "CATEGORIES", "TITLES",
6861 "SIGTEST", "COMPARETEST");
6865 if (!lex_match (lexer, T_SLASH))
6869 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6870 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6872 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6876 if (!ctables_prepare_table (t))
6879 while (lex_token (lexer) != T_ENDCMD);
6882 input = proc_open (ds);
6883 bool ok = ctables_execute (ds, input, ct);
6884 ok = proc_commit (ds) && ok;
6886 ctables_destroy (ct);
6887 return ok ? CMD_SUCCESS : CMD_FAILURE;
6892 ctables_destroy (ct);