1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 /* All variables. */ \
74 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
75 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
76 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
94 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
95 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
96 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
98 /* All variables (unweighted.) */ \
99 S(CTSF_UCOUNT, "UCOUNT", N_("Unweighted Count"), CTF_COUNT, CTFA_ALL) \
100 S(CTSF_UROWPCT_COUNT, "UROWPCT.COUNT", N_("Unweighted Row %"), CTF_PERCENT, CTFA_ALL) \
101 S(CTSF_UCOLPCT_COUNT, "UCOLPCT.COUNT", N_("Unweighted Column %"), CTF_PERCENT, CTFA_ALL) \
102 S(CTSF_UTABLEPCT_COUNT, "UTABLEPCT.COUNT", N_("Unweighted Table %"), CTF_PERCENT, CTFA_ALL) \
103 S(CTSF_USUBTABLEPCT_COUNT, "USUBTABLEPCT.COUNT", N_("Unweighted Subtable %"), CTF_PERCENT, CTFA_ALL) \
104 S(CTSF_ULAYERPCT_COUNT, "ULAYERPCT.COUNT", N_("Unweighted Layer %"), CTF_PERCENT, CTFA_ALL) \
105 S(CTSF_ULAYERROWPCT_COUNT, "ULAYERROWPCT.COUNT", N_("Unweighted Layer Row %"), CTF_PERCENT, CTFA_ALL) \
106 S(CTSF_ULAYERCOLPCT_COUNT, "ULAYERCOLPCT.COUNT", N_("Unweighted Layer Column %"), CTF_PERCENT, CTFA_ALL) \
107 S(CTSF_UROWPCT_VALIDN, "UROWPCT.VALIDN", N_("Unweighted Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
108 S(CTSF_UCOLPCT_VALIDN, "UCOLPCT.VALIDN", N_("Unweighted Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
109 S(CTSF_UTABLEPCT_VALIDN, "UTABLEPCT.VALIDN", N_("Unweighted Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
110 S(CTSF_USUBTABLEPCT_VALIDN, "USUBTABLEPCT.VALIDN", N_("Unweighted Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
111 S(CTSF_ULAYERPCT_VALIDN, "ULAYERPCT.VALIDN", N_("Unweighted Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
112 S(CTSF_ULAYERROWPCT_VALIDN, "ULAYERROWPCT.VALIDN", N_("Unweighted Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
113 S(CTSF_ULAYERCOLPCT_VALIDN, "ULAYERCOLPCT.VALIDN", N_("Unweighted Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
114 S(CTSF_UROWPCT_TOTALN, "UROWPCT.TOTALN", N_("Unweighted Row Total N %"), CTF_PERCENT, CTFA_ALL) \
115 S(CTSF_UCOLPCT_TOTALN, "UCOLPCT.TOTALN", N_("Unweighted Column Total N %"), CTF_PERCENT, CTFA_ALL) \
116 S(CTSF_UTABLEPCT_TOTALN, "UTABLEPCT.TOTALN", N_("Unweighted Table Total N %"), CTF_PERCENT, CTFA_ALL) \
117 S(CTSF_USUBTABLEPCT_TOTALN, "USUBTABLEPCT.TOTALN", N_("Unweighted Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
118 S(CTSF_ULAYERPCT_TOTALN, "ULAYERPCT.TOTALN", N_("Unweighted Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
119 S(CTSF_ULAYERROWPCT_TOTALN, "ULAYERROWPCT.TOTALN", N_("Unweighted Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
120 S(CTSF_ULAYERCOLPCT_TOTALN, "ULAYERCOLPCT.TOTALN", N_("Unweighted Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
122 /* Scale variables, totals, and subtotals. */ \
123 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
124 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
125 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
126 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
127 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
128 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
129 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
130 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
131 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
132 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
133 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
134 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
135 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
136 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
137 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
138 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
139 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
140 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
141 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
142 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
143 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
144 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
145 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
147 /* Scale variables, totals, and subtotals (unweighted). */ \
148 S(CTSF_UMEAN, "UMEAN", N_("Unweighted Mean"), CTF_GENERAL, CTFA_SCALE) \
149 S(CTSF_UMEDIAN, "UMEDIAN", N_("Unweighted Median"), CTF_GENERAL, CTFA_SCALE) \
150 S(CTSF_UMISSING, "UMISSING", N_("Unweighted Missing"), CTF_GENERAL, CTFA_SCALE) \
151 S(CTSF_UMODE, "UMODE", N_("Unweighted Mode"), CTF_GENERAL, CTFA_SCALE) \
152 S(CTSF_UPTILE, "UPTILE", N_("Unweighted Percentile"), CTF_GENERAL, CTFA_SCALE) \
153 S(CTSF_USEMEAN, "USEMEAN", N_("Unweighted Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
154 S(CTSF_USTDDEV, "USTDDEV", N_("Unweighted Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
155 S(CTSF_USUM, "USUM", N_("Unweighted Sum"), CTF_GENERAL, CTFA_SCALE) \
156 S(CSTF_UTOTALN, "UTOTALN", N_("Unweighted Total N"), CTF_COUNT, CTFA_SCALE) \
157 S(CTSF_UVALIDN, "UVALIDN", N_("Unweighted Valid N"), CTF_COUNT, CTFA_SCALE) \
158 S(CTSF_UVARIANCE, "UVARIANCE", N_("Unweighted Variance"), CTF_GENERAL, CTFA_SCALE) \
159 S(CTSF_UROWPCT_SUM, "UROWPCT.SUM", N_("Unweighted Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
160 S(CTSF_UCOLPCT_SUM, "UCOLPCT.SUM", N_("Unweighted Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
161 S(CTSF_UTABLEPCT_SUM, "UTABLEPCT.SUM", N_("Unweighted Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
162 S(CTSF_USUBTABLEPCT_SUM, "USUBTABLEPCT.SUM", N_("Unweighted Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
163 S(CTSF_ULAYERPCT_SUM, "ULAYERPCT.SUM", N_("Unweighted Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
164 S(CTSF_ULAYERROWPCT_SUM, "ULAYERROWPCT.SUM", N_("Unweighted Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
165 S(CTSF_ULAYERCOLPCT_SUM, "ULAYERCOLPCT.SUM", N_("Unweighted Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
167 #if 0 /* Multiple response sets not yet implemented. */
168 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
169 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
170 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
171 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
172 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
173 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
174 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
175 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
176 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
177 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
178 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
179 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
180 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
181 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
182 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
183 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
184 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
185 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
186 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
187 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
188 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
189 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
192 enum ctables_summary_function
194 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
200 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
201 N_CTSF_FUNCTIONS = SUMMARIES
205 static bool ctables_summary_function_is_count (enum ctables_summary_function);
207 enum ctables_domain_type
209 /* Within a section, where stacked variables divide one section from
211 CTDT_TABLE, /* All layers of a whole section. */
212 CTDT_LAYER, /* One layer within a section. */
213 CTDT_LAYERROW, /* Row in one layer within a section. */
214 CTDT_LAYERCOL, /* Column in one layer within a section. */
216 /* Within a subtable, where a subtable pairs an innermost row variable with
217 an innermost column variable within a single layer. */
218 CTDT_SUBTABLE, /* Whole subtable. */
219 CTDT_ROW, /* Row within a subtable. */
220 CTDT_COL, /* Column within a subtable. */
224 struct ctables_domain
226 struct hmap_node node;
228 const struct ctables_cell *example;
230 double d_valid; /* Dictionary weight. */
233 double e_valid; /* Effective weight */
236 double u_valid; /* Unweighted. */
239 struct ctables_sum *sums;
248 enum ctables_summary_variant
257 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
258 all the axes (except the scalar variable, if any). */
259 struct hmap_node node;
261 /* The domains that contain this cell. */
262 uint32_t omit_domains;
263 struct ctables_domain *domains[N_CTDTS];
268 enum ctables_summary_variant sv;
270 struct ctables_cell_axis
272 struct ctables_cell_value
274 const struct ctables_category *category;
282 union ctables_summary *summaries;
289 const struct dictionary *dict;
290 struct pivot_table_look *look;
292 /* CTABLES has a number of extra formats that we implement via custom
293 currency specifications on an alternate fmt_settings. */
294 #define CTEF_NEGPAREN FMT_CCA
295 #define CTEF_NEQUAL FMT_CCB
296 #define CTEF_PAREN FMT_CCC
297 #define CTEF_PCTPAREN FMT_CCD
298 struct fmt_settings ctables_formats;
300 /* If this is NULL, zeros are displayed using the normal print format.
301 Otherwise, this string is displayed. */
304 /* If this is NULL, missing values are displayed using the normal print
305 format. Otherwise, this string is displayed. */
308 /* Indexed by variable dictionary index. */
309 enum ctables_vlabel *vlabels;
311 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
313 bool mrsets_count_duplicates; /* MRSETS. */
314 bool smissing_listwise; /* SMISSING. */
315 struct variable *e_weight; /* WEIGHT. */
316 int hide_threshold; /* HIDESMALLCOUNTS. */
318 struct ctables_table **tables;
322 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
325 struct ctables_postcompute
327 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
328 char *name; /* Name, without leading &. */
330 struct msg_location *location; /* Location of definition. */
331 struct ctables_pcexpr *expr;
333 struct ctables_summary_spec_set *specs;
334 bool hide_source_cats;
337 struct ctables_pcexpr
347 enum ctables_postcompute_op
350 CTPO_CONSTANT, /* 5 */
351 CTPO_CAT_NUMBER, /* [5] */
352 CTPO_CAT_STRING, /* ["STRING"] */
353 CTPO_CAT_NRANGE, /* [LO THRU 5] */
354 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
355 CTPO_CAT_MISSING, /* MISSING */
356 CTPO_CAT_OTHERNM, /* OTHERNM */
357 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
358 CTPO_CAT_TOTAL, /* TOTAL */
372 /* CTPO_CAT_NUMBER. */
375 /* CTPO_CAT_STRING, in dictionary encoding. */
376 struct substring string;
378 /* CTPO_CAT_NRANGE. */
381 /* CTPO_CAT_SRANGE. */
382 struct substring srange[2];
384 /* CTPO_CAT_SUBTOTAL. */
385 size_t subtotal_index;
387 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
388 One element: CTPO_NEG. */
389 struct ctables_pcexpr *subs[2];
392 /* Source location. */
393 struct msg_location *location;
396 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
397 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
398 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
399 struct ctables_pcexpr *sub1);
401 struct ctables_summary_spec_set
403 struct ctables_summary_spec *specs;
407 /* The variable to which the summary specs are applied. */
408 struct variable *var;
410 /* Whether the variable to which the summary specs are applied is a scale
411 variable for the purpose of summarization.
413 (VALIDN and TOTALN act differently for summarizing scale and categorical
417 /* If any of these optional additional scale variables are missing, then
418 treat 'var' as if it's missing too. This is for implementing
419 SMISSING=LISTWISE. */
420 struct variable **listwise_vars;
421 size_t n_listwise_vars;
424 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
425 const struct ctables_summary_spec_set *);
426 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
428 /* A nested sequence of variables, e.g. a > b > c. */
431 struct variable **vars;
434 size_t *domains[N_CTDTS];
435 size_t n_domains[N_CTDTS];
438 struct ctables_summary_spec_set specs[N_CSVS];
441 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
444 struct ctables_nest *nests;
450 struct hmap_node node;
455 struct ctables_occurrence
457 struct hmap_node node;
461 struct ctables_section
464 struct ctables_table *table;
465 struct ctables_nest *nests[PIVOT_N_AXES];
468 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
469 struct hmap cells; /* Contains "struct ctables_cell"s. */
470 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
475 struct ctables *ctables;
476 struct ctables_axis *axes[PIVOT_N_AXES];
477 struct ctables_stack stacks[PIVOT_N_AXES];
478 struct ctables_section *sections;
480 enum pivot_axis_type summary_axis;
481 struct ctables_summary_spec_set summary_specs;
482 struct variable **sum_vars;
485 enum pivot_axis_type slabels_axis;
486 bool slabels_visible;
488 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
490 Most commonly, label_axis[a] == a, and in particular we always have
491 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
493 If ROWLABELS or COLLABELS is specified, then one of
494 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
495 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
497 If any category labels are moved, then 'clabels_example' is one of the
498 variables being moved (and it is otherwise NULL). All of the variables
499 being moved have the same width, value labels, and categories, so this
500 example variable can be used to find those out.
502 The remaining members in this group are relevant only if category labels
505 'clabels_values_map' holds a "struct ctables_value" for all the values
506 that appear in all of the variables in the moved categories. It is
507 accumulated as the data is read. Once the data is fully read, its
508 sorted values are put into 'clabels_values' and 'n_clabels_values'.
510 enum pivot_axis_type label_axis[PIVOT_N_AXES];
511 enum pivot_axis_type clabels_from_axis;
512 const struct variable *clabels_example;
513 struct hmap clabels_values_map;
514 struct ctables_value **clabels_values;
515 size_t n_clabels_values;
517 /* Indexed by variable dictionary index. */
518 struct ctables_categories **categories;
527 struct ctables_chisq *chisq;
528 struct ctables_pairwise *pairwise;
531 struct ctables_categories
534 struct ctables_category *cats;
539 struct ctables_category
541 enum ctables_category_type
543 /* Explicit category lists. */
546 CCT_NRANGE, /* Numerical range. */
547 CCT_SRANGE, /* String range. */
552 /* Totals and subtotals. */
556 /* Implicit category lists. */
561 /* For contributing to TOTALN. */
562 CCT_EXCLUDED_MISSING,
566 struct ctables_category *subtotal;
572 double number; /* CCT_NUMBER. */
573 struct substring string; /* CCT_STRING, in dictionary encoding. */
574 double nrange[2]; /* CCT_NRANGE. */
575 struct substring srange[2]; /* CCT_SRANGE. */
579 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
580 bool hide_subcategories; /* CCT_SUBTOTAL. */
583 /* CCT_POSTCOMPUTE. */
586 const struct ctables_postcompute *pc;
587 enum fmt_type parse_format;
590 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
593 bool include_missing;
597 enum ctables_summary_function sort_function;
598 struct variable *sort_var;
603 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
604 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
605 struct msg_location *location;
609 ctables_category_uninit (struct ctables_category *cat)
620 case CCT_POSTCOMPUTE:
624 ss_dealloc (&cat->string);
628 ss_dealloc (&cat->srange[0]);
629 ss_dealloc (&cat->srange[1]);
634 free (cat->total_label);
642 case CCT_EXCLUDED_MISSING:
648 nullable_substring_equal (const struct substring *a,
649 const struct substring *b)
651 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
655 ctables_category_equal (const struct ctables_category *a,
656 const struct ctables_category *b)
658 if (a->type != b->type)
664 return a->number == b->number;
667 return ss_equals (a->string, b->string);
670 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
673 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
674 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
680 case CCT_POSTCOMPUTE:
681 return a->pc == b->pc;
685 return !strcmp (a->total_label, b->total_label);
690 return (a->include_missing == b->include_missing
691 && a->sort_ascending == b->sort_ascending
692 && a->sort_function == b->sort_function
693 && a->sort_var == b->sort_var
694 && a->percentile == b->percentile);
696 case CCT_EXCLUDED_MISSING:
704 ctables_categories_unref (struct ctables_categories *c)
709 assert (c->n_refs > 0);
713 for (size_t i = 0; i < c->n_cats; i++)
714 ctables_category_uninit (&c->cats[i]);
720 ctables_categories_equal (const struct ctables_categories *a,
721 const struct ctables_categories *b)
723 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
726 for (size_t i = 0; i < a->n_cats; i++)
727 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
733 /* Chi-square test (SIGTEST). */
741 /* Pairwise comparison test (COMPARETEST). */
742 struct ctables_pairwise
744 enum { PROP, MEAN } type;
747 bool meansvariance_allcats;
749 enum { BONFERRONI = 1, BH } adjust;
773 struct variable *var;
775 struct ctables_summary_spec_set specs[N_CSVS];
779 struct ctables_axis *subs[2];
782 struct msg_location *loc;
785 static void ctables_axis_destroy (struct ctables_axis *);
794 enum ctables_function_availability
796 CTFA_ALL, /* Any variables. */
797 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
798 //CTFA_MRSETS, /* Only multiple-response sets */
801 struct ctables_summary_spec
803 enum ctables_summary_function function;
804 double percentile; /* CTSF_PTILE only. */
807 struct fmt_spec format;
808 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
815 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
816 const struct ctables_summary_spec *src)
819 dst->label = xstrdup_if_nonnull (src->label);
823 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
830 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
831 const struct ctables_summary_spec_set *src)
833 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
834 for (size_t i = 0; i < src->n; i++)
835 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
837 *dst = (struct ctables_summary_spec_set) {
842 .is_scale = src->is_scale,
847 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
849 for (size_t i = 0; i < set->n; i++)
850 ctables_summary_spec_uninit (&set->specs[i]);
855 parse_col_width (struct lexer *lexer, const char *name, double *width)
857 lex_match (lexer, T_EQUALS);
858 if (lex_match_id (lexer, "DEFAULT"))
860 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
862 *width = lex_number (lexer);
872 parse_bool (struct lexer *lexer, bool *b)
874 if (lex_match_id (lexer, "NO"))
876 else if (lex_match_id (lexer, "YES"))
880 lex_error_expecting (lexer, "YES", "NO");
886 static enum ctables_function_availability
887 ctables_function_availability (enum ctables_summary_function f)
889 static enum ctables_function_availability availability[] = {
890 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
895 return availability[f];
899 ctables_summary_function_is_count (enum ctables_summary_function f)
905 case CTSF_ROWPCT_COUNT:
906 case CTSF_COLPCT_COUNT:
907 case CTSF_TABLEPCT_COUNT:
908 case CTSF_SUBTABLEPCT_COUNT:
909 case CTSF_LAYERPCT_COUNT:
910 case CTSF_LAYERROWPCT_COUNT:
911 case CTSF_LAYERCOLPCT_COUNT:
913 case CTSF_UROWPCT_COUNT:
914 case CTSF_UCOLPCT_COUNT:
915 case CTSF_UTABLEPCT_COUNT:
916 case CTSF_USUBTABLEPCT_COUNT:
917 case CTSF_ULAYERPCT_COUNT:
918 case CTSF_ULAYERROWPCT_COUNT:
919 case CTSF_ULAYERCOLPCT_COUNT:
922 case CTSF_ROWPCT_VALIDN:
923 case CTSF_COLPCT_VALIDN:
924 case CTSF_TABLEPCT_VALIDN:
925 case CTSF_SUBTABLEPCT_VALIDN:
926 case CTSF_LAYERPCT_VALIDN:
927 case CTSF_LAYERROWPCT_VALIDN:
928 case CTSF_LAYERCOLPCT_VALIDN:
929 case CTSF_ROWPCT_TOTALN:
930 case CTSF_COLPCT_TOTALN:
931 case CTSF_TABLEPCT_TOTALN:
932 case CTSF_SUBTABLEPCT_TOTALN:
933 case CTSF_LAYERPCT_TOTALN:
934 case CTSF_LAYERROWPCT_TOTALN:
935 case CTSF_LAYERCOLPCT_TOTALN:
952 case CTSF_ROWPCT_SUM:
953 case CTSF_COLPCT_SUM:
954 case CTSF_TABLEPCT_SUM:
955 case CTSF_SUBTABLEPCT_SUM:
956 case CTSF_LAYERPCT_SUM:
957 case CTSF_LAYERROWPCT_SUM:
958 case CTSF_LAYERCOLPCT_SUM:
959 case CTSF_UROWPCT_VALIDN:
960 case CTSF_UCOLPCT_VALIDN:
961 case CTSF_UTABLEPCT_VALIDN:
962 case CTSF_USUBTABLEPCT_VALIDN:
963 case CTSF_ULAYERPCT_VALIDN:
964 case CTSF_ULAYERROWPCT_VALIDN:
965 case CTSF_ULAYERCOLPCT_VALIDN:
966 case CTSF_UROWPCT_TOTALN:
967 case CTSF_UCOLPCT_TOTALN:
968 case CTSF_UTABLEPCT_TOTALN:
969 case CTSF_USUBTABLEPCT_TOTALN:
970 case CTSF_ULAYERPCT_TOTALN:
971 case CTSF_ULAYERROWPCT_TOTALN:
972 case CTSF_ULAYERCOLPCT_TOTALN:
984 case CTSF_UROWPCT_SUM:
985 case CTSF_UCOLPCT_SUM:
986 case CTSF_UTABLEPCT_SUM:
987 case CTSF_USUBTABLEPCT_SUM:
988 case CTSF_ULAYERPCT_SUM:
989 case CTSF_ULAYERROWPCT_SUM:
990 case CTSF_ULAYERCOLPCT_SUM:
998 parse_ctables_summary_function (struct lexer *lexer,
999 enum ctables_summary_function *f)
1003 enum ctables_summary_function function;
1004 struct substring name;
1006 static struct pair names[] = {
1007 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
1008 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
1011 /* The .COUNT suffix may be omitted. */
1012 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
1013 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
1014 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
1015 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
1016 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
1017 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
1018 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
1022 if (!lex_force_id (lexer))
1025 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
1026 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
1028 *f = names[i].function;
1033 lex_error (lexer, _("Expecting summary function name."));
1038 ctables_axis_destroy (struct ctables_axis *axis)
1046 for (size_t i = 0; i < N_CSVS; i++)
1047 ctables_summary_spec_set_uninit (&axis->specs[i]);
1052 ctables_axis_destroy (axis->subs[0]);
1053 ctables_axis_destroy (axis->subs[1]);
1056 msg_location_destroy (axis->loc);
1060 static struct ctables_axis *
1061 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1062 struct ctables_axis *sub0,
1063 struct ctables_axis *sub1,
1064 struct lexer *lexer, int start_ofs)
1066 struct ctables_axis *axis = xmalloc (sizeof *axis);
1067 *axis = (struct ctables_axis) {
1069 .subs = { sub0, sub1 },
1070 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1075 struct ctables_axis_parse_ctx
1077 struct lexer *lexer;
1078 struct dictionary *dict;
1080 struct ctables_table *t;
1083 static struct fmt_spec
1084 ctables_summary_default_format (enum ctables_summary_function function,
1085 const struct variable *var)
1087 static const enum ctables_format default_formats[] = {
1088 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
1092 switch (default_formats[function])
1095 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
1098 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
1101 return *var_get_print_format (var);
1108 static struct pivot_value *
1109 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1113 static const char *default_labels[] = {
1114 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
1119 return (spec->function == CTSF_PTILE
1120 ? pivot_value_new_text_format (N_("Percentile %.2f"),
1122 : pivot_value_new_text (default_labels[spec->function]));
1126 struct substring in = ss_cstr (spec->label);
1127 struct substring target = ss_cstr (")CILEVEL");
1129 struct string out = DS_EMPTY_INITIALIZER;
1132 size_t chunk = ss_find_substring (in, target);
1133 ds_put_substring (&out, ss_head (in, chunk));
1134 ss_advance (&in, chunk);
1136 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1138 ss_advance (&in, target.length);
1139 ds_put_format (&out, "%g", cilevel);
1145 ctables_summary_function_name (enum ctables_summary_function function)
1147 static const char *names[] = {
1148 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1152 return names[function];
1156 add_summary_spec (struct ctables_axis *axis,
1157 enum ctables_summary_function function, double percentile,
1158 const char *label, const struct fmt_spec *format,
1159 bool is_ctables_format, const struct msg_location *loc,
1160 enum ctables_summary_variant sv)
1162 if (axis->op == CTAO_VAR)
1164 const char *function_name = ctables_summary_function_name (function);
1165 const char *var_name = var_get_name (axis->var);
1166 switch (ctables_function_availability (function))
1170 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1171 "response sets."), function_name);
1172 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1178 if (!axis->scale && sv != CSV_TOTAL)
1181 _("Summary function %s applies only to scale variables."),
1183 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1193 struct ctables_summary_spec_set *set = &axis->specs[sv];
1194 if (set->n >= set->allocated)
1195 set->specs = x2nrealloc (set->specs, &set->allocated,
1196 sizeof *set->specs);
1198 struct ctables_summary_spec *dst = &set->specs[set->n++];
1199 *dst = (struct ctables_summary_spec) {
1200 .function = function,
1201 .percentile = percentile,
1202 .label = xstrdup_if_nonnull (label),
1203 .format = (format ? *format
1204 : ctables_summary_default_format (function, axis->var)),
1205 .is_ctables_format = is_ctables_format,
1211 for (size_t i = 0; i < 2; i++)
1212 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1213 format, is_ctables_format, loc, sv))
1219 static struct ctables_axis *ctables_axis_parse_stack (
1220 struct ctables_axis_parse_ctx *);
1223 static struct ctables_axis *
1224 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1226 if (lex_match (ctx->lexer, T_LPAREN))
1228 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1229 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1231 ctables_axis_destroy (sub);
1237 if (!lex_force_id (ctx->lexer))
1240 int start_ofs = lex_ofs (ctx->lexer);
1241 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1245 struct ctables_axis *axis = xmalloc (sizeof *axis);
1246 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1248 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1249 : lex_match_phrase (ctx->lexer, "[C]") ? false
1250 : var_get_measure (var) == MEASURE_SCALE);
1251 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1252 lex_ofs (ctx->lexer) - 1);
1253 if (axis->scale && var_is_alpha (var))
1255 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1257 var_get_name (var));
1258 ctables_axis_destroy (axis);
1266 has_digit (const char *s)
1268 return s[strcspn (s, "0123456789")] != '\0';
1272 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1273 bool *is_ctables_format)
1275 char type[FMT_TYPE_LEN_MAX + 1];
1276 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1279 if (!strcasecmp (type, "NEGPAREN"))
1280 format->type = CTEF_NEGPAREN;
1281 else if (!strcasecmp (type, "NEQUAL"))
1282 format->type = CTEF_NEQUAL;
1283 else if (!strcasecmp (type, "PAREN"))
1284 format->type = CTEF_PAREN;
1285 else if (!strcasecmp (type, "PCTPAREN"))
1286 format->type = CTEF_PCTPAREN;
1289 *is_ctables_format = false;
1290 return (parse_format_specifier (lexer, format)
1291 && fmt_check_output (format)
1292 && fmt_check_type_compat (format, VAL_NUMERIC));
1298 lex_next_error (lexer, -1, -1,
1299 _("Output format %s requires width 2 or greater."), type);
1302 else if (format->d > format->w - 1)
1304 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1305 "greater than decimals."), type);
1310 *is_ctables_format = true;
1315 static struct ctables_axis *
1316 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1318 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1319 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1322 enum ctables_summary_variant sv = CSV_CELL;
1325 int start_ofs = lex_ofs (ctx->lexer);
1327 /* Parse function. */
1328 enum ctables_summary_function function;
1329 if (!parse_ctables_summary_function (ctx->lexer, &function))
1332 /* Parse percentile. */
1333 double percentile = 0;
1334 if (function == CTSF_PTILE)
1336 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1338 percentile = lex_number (ctx->lexer);
1339 lex_get (ctx->lexer);
1344 if (lex_is_string (ctx->lexer))
1346 label = ss_xstrdup (lex_tokss (ctx->lexer));
1347 lex_get (ctx->lexer);
1351 struct fmt_spec format;
1352 const struct fmt_spec *formatp;
1353 bool is_ctables_format = false;
1354 if (lex_token (ctx->lexer) == T_ID
1355 && has_digit (lex_tokcstr (ctx->lexer)))
1357 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1358 &is_ctables_format))
1368 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1369 lex_ofs (ctx->lexer) - 1);
1370 add_summary_spec (sub, function, percentile, label, formatp,
1371 is_ctables_format, loc, sv);
1373 msg_location_destroy (loc);
1375 lex_match (ctx->lexer, T_COMMA);
1376 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1378 if (!lex_force_match (ctx->lexer, T_LBRACK))
1382 else if (lex_match (ctx->lexer, T_RBRACK))
1384 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1391 ctables_axis_destroy (sub);
1395 static const struct ctables_axis *
1396 find_scale (const struct ctables_axis *axis)
1400 else if (axis->op == CTAO_VAR)
1401 return axis->scale ? axis : NULL;
1404 for (size_t i = 0; i < 2; i++)
1406 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1414 static const struct ctables_axis *
1415 find_categorical_summary_spec (const struct ctables_axis *axis)
1419 else if (axis->op == CTAO_VAR)
1420 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1423 for (size_t i = 0; i < 2; i++)
1425 const struct ctables_axis *sum
1426 = find_categorical_summary_spec (axis->subs[i]);
1434 static struct ctables_axis *
1435 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1437 int start_ofs = lex_ofs (ctx->lexer);
1438 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1442 while (lex_match (ctx->lexer, T_GT))
1444 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1448 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1449 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1451 const struct ctables_axis *outer_scale = find_scale (lhs);
1452 const struct ctables_axis *inner_scale = find_scale (rhs);
1453 if (outer_scale && inner_scale)
1455 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1456 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1457 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1458 ctables_axis_destroy (nest);
1462 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1465 msg_at (SE, nest->loc,
1466 _("Summaries may only be requested for categorical variables "
1467 "at the innermost nesting level."));
1468 msg_at (SN, outer_sum->loc,
1469 _("This outer categorical variable has a summary."));
1470 ctables_axis_destroy (nest);
1480 static struct ctables_axis *
1481 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1483 int start_ofs = lex_ofs (ctx->lexer);
1484 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1488 while (lex_match (ctx->lexer, T_PLUS))
1490 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1494 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1495 ctx->lexer, start_ofs);
1502 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1503 struct ctables *ct, struct ctables_table *t,
1504 enum pivot_axis_type a)
1506 if (lex_token (lexer) == T_BY
1507 || lex_token (lexer) == T_SLASH
1508 || lex_token (lexer) == T_ENDCMD)
1511 struct ctables_axis_parse_ctx ctx = {
1517 t->axes[a] = ctables_axis_parse_stack (&ctx);
1518 return t->axes[a] != NULL;
1522 ctables_chisq_destroy (struct ctables_chisq *chisq)
1528 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1534 ctables_table_destroy (struct ctables_table *t)
1539 for (size_t i = 0; i < t->n_categories; i++)
1540 ctables_categories_unref (t->categories[i]);
1541 free (t->categories);
1543 ctables_axis_destroy (t->axes[PIVOT_AXIS_COLUMN]);
1544 ctables_axis_destroy (t->axes[PIVOT_AXIS_ROW]);
1545 ctables_axis_destroy (t->axes[PIVOT_AXIS_LAYER]);
1549 ctables_chisq_destroy (t->chisq);
1550 ctables_pairwise_destroy (t->pairwise);
1555 ctables_destroy (struct ctables *ct)
1560 pivot_table_look_unref (ct->look);
1564 for (size_t i = 0; i < ct->n_tables; i++)
1565 ctables_table_destroy (ct->tables[i]);
1570 static struct ctables_category
1571 cct_nrange (double low, double high)
1573 return (struct ctables_category) {
1575 .nrange = { low, high }
1579 static struct ctables_category
1580 cct_srange (struct substring low, struct substring high)
1582 return (struct ctables_category) {
1584 .srange = { low, high }
1589 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1590 struct ctables_category *cat)
1593 if (lex_match (lexer, T_EQUALS))
1595 if (!lex_force_string (lexer))
1598 total_label = ss_xstrdup (lex_tokss (lexer));
1602 total_label = xstrdup (_("Subtotal"));
1604 *cat = (struct ctables_category) {
1605 .type = CCT_SUBTOTAL,
1606 .hide_subcategories = hide_subcategories,
1607 .total_label = total_label
1612 static struct substring
1613 parse_substring (struct lexer *lexer, struct dictionary *dict)
1615 struct substring s = recode_substring_pool (
1616 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1617 ss_rtrim (&s, ss_cstr (" "));
1623 ctables_table_parse_explicit_category (struct lexer *lexer,
1624 struct dictionary *dict,
1626 struct ctables_category *cat)
1628 if (lex_match_id (lexer, "OTHERNM"))
1629 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1630 else if (lex_match_id (lexer, "MISSING"))
1631 *cat = (struct ctables_category) { .type = CCT_MISSING };
1632 else if (lex_match_id (lexer, "SUBTOTAL"))
1633 return ctables_table_parse_subtotal (lexer, false, cat);
1634 else if (lex_match_id (lexer, "HSUBTOTAL"))
1635 return ctables_table_parse_subtotal (lexer, true, cat);
1636 else if (lex_match_id (lexer, "LO"))
1638 if (!lex_force_match_id (lexer, "THRU"))
1640 if (lex_is_string (lexer))
1642 struct substring sr0 = { .string = NULL };
1643 struct substring sr1 = parse_substring (lexer, dict);
1644 *cat = cct_srange (sr0, sr1);
1646 else if (lex_force_num (lexer))
1648 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1654 else if (lex_is_number (lexer))
1656 double number = lex_number (lexer);
1658 if (lex_match_id (lexer, "THRU"))
1660 if (lex_match_id (lexer, "HI"))
1661 *cat = cct_nrange (number, DBL_MAX);
1664 if (!lex_force_num (lexer))
1666 *cat = cct_nrange (number, lex_number (lexer));
1671 *cat = (struct ctables_category) {
1676 else if (lex_is_string (lexer))
1678 struct substring s = parse_substring (lexer, dict);
1679 if (lex_match_id (lexer, "THRU"))
1681 if (lex_match_id (lexer, "HI"))
1683 struct substring sr1 = { .string = NULL };
1684 *cat = cct_srange (s, sr1);
1688 if (!lex_force_string (lexer))
1690 struct substring sr1 = parse_substring (lexer, dict);
1691 *cat = cct_srange (s, sr1);
1695 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1697 else if (lex_match (lexer, T_AND))
1699 if (!lex_force_id (lexer))
1701 struct ctables_postcompute *pc = ctables_find_postcompute (
1702 ct, lex_tokcstr (lexer));
1705 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1706 msg_at (SE, loc, _("Unknown postcompute &%s."),
1707 lex_tokcstr (lexer));
1708 msg_location_destroy (loc);
1713 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1717 lex_error (lexer, NULL);
1725 parse_category_string (struct msg_location *location,
1726 struct substring s, const struct dictionary *dict,
1727 enum fmt_type format, double *n)
1730 char *error = data_in (s, dict_get_encoding (dict), format,
1731 settings_get_fmt_settings (), &v, 0, NULL);
1734 msg_at (SE, location,
1735 _("Failed to parse category specification as format %s: %s."),
1736 fmt_name (format), error);
1745 static struct ctables_category *
1746 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1747 const struct ctables_pcexpr *e)
1749 struct ctables_category *best = NULL;
1750 size_t n_subtotals = 0;
1751 for (size_t i = 0; i < cats->n_cats; i++)
1753 struct ctables_category *cat = &cats->cats[i];
1756 case CTPO_CAT_NUMBER:
1757 if (cat->type == CCT_NUMBER && cat->number == e->number)
1761 case CTPO_CAT_STRING:
1762 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1766 case CTPO_CAT_NRANGE:
1767 if (cat->type == CCT_NRANGE
1768 && cat->nrange[0] == e->nrange[0]
1769 && cat->nrange[1] == e->nrange[1])
1773 case CTPO_CAT_SRANGE:
1774 if (cat->type == CCT_SRANGE
1775 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1776 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1780 case CTPO_CAT_MISSING:
1781 if (cat->type == CCT_MISSING)
1785 case CTPO_CAT_OTHERNM:
1786 if (cat->type == CCT_OTHERNM)
1790 case CTPO_CAT_SUBTOTAL:
1791 if (cat->type == CCT_SUBTOTAL)
1794 if (e->subtotal_index == n_subtotals)
1796 else if (e->subtotal_index == 0)
1801 case CTPO_CAT_TOTAL:
1802 if (cat->type == CCT_TOTAL)
1816 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1821 static struct ctables_category *
1822 ctables_find_category_for_postcompute (const struct dictionary *dict,
1823 const struct ctables_categories *cats,
1824 enum fmt_type parse_format,
1825 const struct ctables_pcexpr *e)
1827 if (parse_format != FMT_F)
1829 if (e->op == CTPO_CAT_STRING)
1832 if (!parse_category_string (e->location, e->string, dict,
1833 parse_format, &number))
1836 struct ctables_pcexpr e2 = {
1837 .op = CTPO_CAT_NUMBER,
1839 .location = e->location,
1841 return ctables_find_category_for_postcompute__ (cats, &e2);
1843 else if (e->op == CTPO_CAT_SRANGE)
1846 if (!e->srange[0].string)
1847 nrange[0] = -DBL_MAX;
1848 else if (!parse_category_string (e->location, e->srange[0], dict,
1849 parse_format, &nrange[0]))
1852 if (!e->srange[1].string)
1853 nrange[1] = DBL_MAX;
1854 else if (!parse_category_string (e->location, e->srange[1], dict,
1855 parse_format, &nrange[1]))
1858 struct ctables_pcexpr e2 = {
1859 .op = CTPO_CAT_NRANGE,
1860 .nrange = { nrange[0], nrange[1] },
1861 .location = e->location,
1863 return ctables_find_category_for_postcompute__ (cats, &e2);
1866 return ctables_find_category_for_postcompute__ (cats, e);
1870 ctables_recursive_check_postcompute (struct dictionary *dict,
1871 const struct ctables_pcexpr *e,
1872 struct ctables_category *pc_cat,
1873 const struct ctables_categories *cats,
1874 const struct msg_location *cats_location)
1878 case CTPO_CAT_NUMBER:
1879 case CTPO_CAT_STRING:
1880 case CTPO_CAT_NRANGE:
1881 case CTPO_CAT_MISSING:
1882 case CTPO_CAT_OTHERNM:
1883 case CTPO_CAT_SUBTOTAL:
1884 case CTPO_CAT_TOTAL:
1886 struct ctables_category *cat = ctables_find_category_for_postcompute (
1887 dict, cats, pc_cat->parse_format, e);
1890 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1892 size_t n_subtotals = 0;
1893 for (size_t i = 0; i < cats->n_cats; i++)
1894 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1895 if (n_subtotals > 1)
1897 msg_at (SE, cats_location,
1898 ngettext ("These categories include %zu instance "
1899 "of SUBTOTAL or HSUBTOTAL, so references "
1900 "from computed categories must refer to "
1901 "subtotals by position, "
1902 "e.g. SUBTOTAL[1].",
1903 "These categories include %zu instances "
1904 "of SUBTOTAL or HSUBTOTAL, so references "
1905 "from computed categories must refer to "
1906 "subtotals by position, "
1907 "e.g. SUBTOTAL[1].",
1910 msg_at (SN, e->location,
1911 _("This is the reference that lacks a position."));
1916 msg_at (SE, pc_cat->location,
1917 _("Computed category &%s references a category not included "
1918 "in the category list."),
1920 msg_at (SN, e->location, _("This is the missing category."));
1921 if (e->op == CTPO_CAT_SUBTOTAL)
1922 msg_at (SN, cats_location,
1923 _("To fix the problem, add subtotals to the "
1924 "list of categories here."));
1925 else if (e->op == CTPO_CAT_TOTAL)
1926 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1927 "CATEGORIES specification."));
1929 msg_at (SN, cats_location,
1930 _("To fix the problem, add the missing category to the "
1931 "list of categories here."));
1934 if (pc_cat->pc->hide_source_cats)
1948 for (size_t i = 0; i < 2; i++)
1949 if (e->subs[i] && !ctables_recursive_check_postcompute (
1950 dict, e->subs[i], pc_cat, cats, cats_location))
1960 all_strings (struct variable **vars, size_t n_vars,
1961 const struct ctables_category *cat)
1963 for (size_t j = 0; j < n_vars; j++)
1964 if (var_is_numeric (vars[j]))
1966 msg_at (SE, cat->location,
1967 _("This category specification may be applied only to string "
1968 "variables, but this subcommand tries to apply it to "
1969 "numeric variable %s."),
1970 var_get_name (vars[j]));
1977 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1978 struct ctables *ct, struct ctables_table *t)
1980 if (!lex_match_id (lexer, "VARIABLES"))
1982 lex_match (lexer, T_EQUALS);
1984 struct variable **vars;
1986 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1989 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1990 for (size_t i = 1; i < n_vars; i++)
1992 const struct fmt_spec *f = var_get_print_format (vars[i]);
1993 if (f->type != common_format->type)
1995 common_format = NULL;
2001 && (fmt_get_category (common_format->type)
2002 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2004 struct ctables_categories *c = xmalloc (sizeof *c);
2005 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2006 for (size_t i = 0; i < n_vars; i++)
2008 struct ctables_categories **cp
2009 = &t->categories[var_get_dict_index (vars[i])];
2010 ctables_categories_unref (*cp);
2014 size_t allocated_cats = 0;
2015 int cats_start_ofs = -1;
2016 int cats_end_ofs = -1;
2017 if (lex_match (lexer, T_LBRACK))
2019 cats_start_ofs = lex_ofs (lexer);
2022 if (c->n_cats >= allocated_cats)
2023 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2025 int start_ofs = lex_ofs (lexer);
2026 struct ctables_category *cat = &c->cats[c->n_cats];
2027 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2029 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2032 lex_match (lexer, T_COMMA);
2034 while (!lex_match (lexer, T_RBRACK));
2035 cats_end_ofs = lex_ofs (lexer) - 1;
2038 struct ctables_category cat = {
2040 .include_missing = false,
2041 .sort_ascending = true,
2043 bool show_totals = false;
2044 char *total_label = NULL;
2045 bool totals_before = false;
2046 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2048 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2050 lex_match (lexer, T_EQUALS);
2051 if (lex_match_id (lexer, "A"))
2052 cat.sort_ascending = true;
2053 else if (lex_match_id (lexer, "D"))
2054 cat.sort_ascending = false;
2057 lex_error_expecting (lexer, "A", "D");
2061 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2063 lex_match (lexer, T_EQUALS);
2064 if (lex_match_id (lexer, "VALUE"))
2065 cat.type = CCT_VALUE;
2066 else if (lex_match_id (lexer, "LABEL"))
2067 cat.type = CCT_LABEL;
2070 cat.type = CCT_FUNCTION;
2071 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
2074 if (lex_match (lexer, T_LPAREN))
2076 cat.sort_var = parse_variable (lexer, dict);
2080 if (cat.sort_function == CTSF_PTILE)
2082 lex_match (lexer, T_COMMA);
2083 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2085 cat.percentile = lex_number (lexer);
2089 if (!lex_force_match (lexer, T_RPAREN))
2092 else if (ctables_function_availability (cat.sort_function)
2095 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2100 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2102 lex_match (lexer, T_EQUALS);
2103 if (lex_match_id (lexer, "INCLUDE"))
2104 cat.include_missing = true;
2105 else if (lex_match_id (lexer, "EXCLUDE"))
2106 cat.include_missing = false;
2109 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2113 else if (lex_match_id (lexer, "TOTAL"))
2115 lex_match (lexer, T_EQUALS);
2116 if (!parse_bool (lexer, &show_totals))
2119 else if (lex_match_id (lexer, "LABEL"))
2121 lex_match (lexer, T_EQUALS);
2122 if (!lex_force_string (lexer))
2125 total_label = ss_xstrdup (lex_tokss (lexer));
2128 else if (lex_match_id (lexer, "POSITION"))
2130 lex_match (lexer, T_EQUALS);
2131 if (lex_match_id (lexer, "BEFORE"))
2132 totals_before = true;
2133 else if (lex_match_id (lexer, "AFTER"))
2134 totals_before = false;
2137 lex_error_expecting (lexer, "BEFORE", "AFTER");
2141 else if (lex_match_id (lexer, "EMPTY"))
2143 lex_match (lexer, T_EQUALS);
2144 if (lex_match_id (lexer, "INCLUDE"))
2145 c->show_empty = true;
2146 else if (lex_match_id (lexer, "EXCLUDE"))
2147 c->show_empty = false;
2150 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2157 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2158 "TOTAL", "LABEL", "POSITION", "EMPTY");
2160 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2167 if (c->n_cats >= allocated_cats)
2168 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2169 c->cats[c->n_cats++] = cat;
2174 if (c->n_cats >= allocated_cats)
2175 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2177 struct ctables_category *totals;
2180 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2181 totals = &c->cats[0];
2184 totals = &c->cats[c->n_cats];
2187 *totals = (struct ctables_category) {
2189 .total_label = total_label ? total_label : xstrdup (_("Total")),
2193 struct ctables_category *subtotal = NULL;
2194 for (size_t i = totals_before ? 0 : c->n_cats;
2195 totals_before ? i < c->n_cats : i-- > 0;
2196 totals_before ? i++ : 0)
2198 struct ctables_category *cat = &c->cats[i];
2207 cat->subtotal = subtotal;
2210 case CCT_POSTCOMPUTE:
2221 case CCT_EXCLUDED_MISSING:
2226 if (cats_start_ofs != -1)
2228 struct msg_location *cats_location
2229 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2230 for (size_t i = 0; i < c->n_cats; i++)
2232 struct ctables_category *cat = &c->cats[i];
2235 case CCT_POSTCOMPUTE:
2236 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2237 if (!ctables_recursive_check_postcompute (dict, cat->pc->expr,
2238 cat, c, cats_location))
2244 for (size_t j = 0; j < n_vars; j++)
2245 if (var_is_alpha (vars[j]))
2247 msg_at (SE, cat->location,
2248 _("This category specification may be applied "
2249 "only to numeric variables, but this "
2250 "subcommand tries to apply it to string "
2252 var_get_name (vars[j]));
2261 if (!parse_category_string (cat->location, cat->string, dict,
2262 common_format->type, &n))
2265 ss_dealloc (&cat->string);
2267 cat->type = CCT_NUMBER;
2270 else if (!all_strings (vars, n_vars, cat))
2279 if (!cat->srange[0].string)
2281 else if (!parse_category_string (cat->location,
2282 cat->srange[0], dict,
2283 common_format->type, &n[0]))
2286 if (!cat->srange[1].string)
2288 else if (!parse_category_string (cat->location,
2289 cat->srange[1], dict,
2290 common_format->type, &n[1]))
2293 ss_dealloc (&cat->srange[0]);
2294 ss_dealloc (&cat->srange[1]);
2296 cat->type = CCT_NRANGE;
2297 cat->nrange[0] = n[0];
2298 cat->nrange[1] = n[1];
2300 else if (!all_strings (vars, n_vars, cat))
2311 case CCT_EXCLUDED_MISSING:
2321 ctables_nest_uninit (struct ctables_nest *nest)
2328 ctables_stack_uninit (struct ctables_stack *stack)
2332 for (size_t i = 0; i < stack->n; i++)
2333 ctables_nest_uninit (&stack->nests[i]);
2334 free (stack->nests);
2338 static struct ctables_stack
2339 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2346 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2347 for (size_t i = 0; i < s0.n; i++)
2348 for (size_t j = 0; j < s1.n; j++)
2350 const struct ctables_nest *a = &s0.nests[i];
2351 const struct ctables_nest *b = &s1.nests[j];
2353 size_t allocate = a->n + b->n;
2354 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2355 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
2357 for (size_t k = 0; k < a->n; k++)
2358 vars[n++] = a->vars[k];
2359 for (size_t k = 0; k < b->n; k++)
2360 vars[n++] = b->vars[k];
2361 assert (n == allocate);
2363 const struct ctables_nest *summary_src;
2364 if (!a->specs[CSV_CELL].var)
2366 else if (!b->specs[CSV_CELL].var)
2371 struct ctables_nest *new = &stack.nests[stack.n++];
2372 *new = (struct ctables_nest) {
2374 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2375 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2379 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2380 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2382 ctables_stack_uninit (&s0);
2383 ctables_stack_uninit (&s1);
2387 static struct ctables_stack
2388 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2390 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2391 for (size_t i = 0; i < s0.n; i++)
2392 stack.nests[stack.n++] = s0.nests[i];
2393 for (size_t i = 0; i < s1.n; i++)
2395 stack.nests[stack.n] = s1.nests[i];
2396 stack.nests[stack.n].group_head += s0.n;
2399 assert (stack.n == s0.n + s1.n);
2405 static struct ctables_stack
2406 var_fts (const struct ctables_axis *a)
2408 struct variable **vars = xmalloc (sizeof *vars);
2411 struct ctables_nest *nest = xmalloc (sizeof *nest);
2412 *nest = (struct ctables_nest) {
2415 .scale_idx = a->scale ? 0 : SIZE_MAX,
2417 if (a->specs[CSV_CELL].n || a->scale)
2418 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2420 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2421 nest->specs[sv].var = a->var;
2422 nest->specs[sv].is_scale = a->scale;
2424 return (struct ctables_stack) { .nests = nest, .n = 1 };
2427 static struct ctables_stack
2428 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2431 return (struct ctables_stack) { .n = 0 };
2439 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2440 enumerate_fts (axis_type, a->subs[1]));
2443 /* This should consider any of the scale variables found in the result to
2444 be linked to each other listwise for SMISSING=LISTWISE. */
2445 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2446 enumerate_fts (axis_type, a->subs[1]));
2452 union ctables_summary
2454 /* COUNT, VALIDN, TOTALN. */
2457 /* MINIMUM, MAXIMUM, RANGE. */
2464 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2465 struct moments1 *moments;
2467 /* MEDIAN, MODE, PTILE. */
2470 struct casewriter *writer;
2475 /* XXX multiple response */
2479 ctables_summary_init (union ctables_summary *s,
2480 const struct ctables_summary_spec *ss)
2482 switch (ss->function)
2486 case CTSF_ROWPCT_COUNT:
2487 case CTSF_COLPCT_COUNT:
2488 case CTSF_TABLEPCT_COUNT:
2489 case CTSF_SUBTABLEPCT_COUNT:
2490 case CTSF_LAYERPCT_COUNT:
2491 case CTSF_LAYERROWPCT_COUNT:
2492 case CTSF_LAYERCOLPCT_COUNT:
2493 case CTSF_ROWPCT_VALIDN:
2494 case CTSF_COLPCT_VALIDN:
2495 case CTSF_TABLEPCT_VALIDN:
2496 case CTSF_SUBTABLEPCT_VALIDN:
2497 case CTSF_LAYERPCT_VALIDN:
2498 case CTSF_LAYERROWPCT_VALIDN:
2499 case CTSF_LAYERCOLPCT_VALIDN:
2500 case CTSF_ROWPCT_TOTALN:
2501 case CTSF_COLPCT_TOTALN:
2502 case CTSF_TABLEPCT_TOTALN:
2503 case CTSF_SUBTABLEPCT_TOTALN:
2504 case CTSF_LAYERPCT_TOTALN:
2505 case CTSF_LAYERROWPCT_TOTALN:
2506 case CTSF_LAYERCOLPCT_TOTALN:
2513 case CTSF_UROWPCT_COUNT:
2514 case CTSF_UCOLPCT_COUNT:
2515 case CTSF_UTABLEPCT_COUNT:
2516 case CTSF_USUBTABLEPCT_COUNT:
2517 case CTSF_ULAYERPCT_COUNT:
2518 case CTSF_ULAYERROWPCT_COUNT:
2519 case CTSF_ULAYERCOLPCT_COUNT:
2520 case CTSF_UROWPCT_VALIDN:
2521 case CTSF_UCOLPCT_VALIDN:
2522 case CTSF_UTABLEPCT_VALIDN:
2523 case CTSF_USUBTABLEPCT_VALIDN:
2524 case CTSF_ULAYERPCT_VALIDN:
2525 case CTSF_ULAYERROWPCT_VALIDN:
2526 case CTSF_ULAYERCOLPCT_VALIDN:
2527 case CTSF_UROWPCT_TOTALN:
2528 case CTSF_UCOLPCT_TOTALN:
2529 case CTSF_UTABLEPCT_TOTALN:
2530 case CTSF_USUBTABLEPCT_TOTALN:
2531 case CTSF_ULAYERPCT_TOTALN:
2532 case CTSF_ULAYERROWPCT_TOTALN:
2533 case CTSF_ULAYERCOLPCT_TOTALN:
2543 s->min = s->max = SYSMIS;
2551 case CTSF_ROWPCT_SUM:
2552 case CTSF_COLPCT_SUM:
2553 case CTSF_TABLEPCT_SUM:
2554 case CTSF_SUBTABLEPCT_SUM:
2555 case CTSF_LAYERPCT_SUM:
2556 case CTSF_LAYERROWPCT_SUM:
2557 case CTSF_LAYERCOLPCT_SUM:
2562 case CTSF_UVARIANCE:
2563 case CTSF_UROWPCT_SUM:
2564 case CTSF_UCOLPCT_SUM:
2565 case CTSF_UTABLEPCT_SUM:
2566 case CTSF_USUBTABLEPCT_SUM:
2567 case CTSF_ULAYERPCT_SUM:
2568 case CTSF_ULAYERROWPCT_SUM:
2569 case CTSF_ULAYERCOLPCT_SUM:
2570 s->moments = moments1_create (MOMENT_VARIANCE);
2580 struct caseproto *proto = caseproto_create ();
2581 proto = caseproto_add_width (proto, 0);
2582 proto = caseproto_add_width (proto, 0);
2584 struct subcase ordering;
2585 subcase_init (&ordering, 0, 0, SC_ASCEND);
2586 s->writer = sort_create_writer (&ordering, proto);
2587 subcase_uninit (&ordering);
2588 caseproto_unref (proto);
2598 ctables_summary_uninit (union ctables_summary *s,
2599 const struct ctables_summary_spec *ss)
2601 switch (ss->function)
2605 case CTSF_ROWPCT_COUNT:
2606 case CTSF_COLPCT_COUNT:
2607 case CTSF_TABLEPCT_COUNT:
2608 case CTSF_SUBTABLEPCT_COUNT:
2609 case CTSF_LAYERPCT_COUNT:
2610 case CTSF_LAYERROWPCT_COUNT:
2611 case CTSF_LAYERCOLPCT_COUNT:
2612 case CTSF_ROWPCT_VALIDN:
2613 case CTSF_COLPCT_VALIDN:
2614 case CTSF_TABLEPCT_VALIDN:
2615 case CTSF_SUBTABLEPCT_VALIDN:
2616 case CTSF_LAYERPCT_VALIDN:
2617 case CTSF_LAYERROWPCT_VALIDN:
2618 case CTSF_LAYERCOLPCT_VALIDN:
2619 case CTSF_ROWPCT_TOTALN:
2620 case CTSF_COLPCT_TOTALN:
2621 case CTSF_TABLEPCT_TOTALN:
2622 case CTSF_SUBTABLEPCT_TOTALN:
2623 case CTSF_LAYERPCT_TOTALN:
2624 case CTSF_LAYERROWPCT_TOTALN:
2625 case CTSF_LAYERCOLPCT_TOTALN:
2632 case CTSF_UROWPCT_COUNT:
2633 case CTSF_UCOLPCT_COUNT:
2634 case CTSF_UTABLEPCT_COUNT:
2635 case CTSF_USUBTABLEPCT_COUNT:
2636 case CTSF_ULAYERPCT_COUNT:
2637 case CTSF_ULAYERROWPCT_COUNT:
2638 case CTSF_ULAYERCOLPCT_COUNT:
2639 case CTSF_UROWPCT_VALIDN:
2640 case CTSF_UCOLPCT_VALIDN:
2641 case CTSF_UTABLEPCT_VALIDN:
2642 case CTSF_USUBTABLEPCT_VALIDN:
2643 case CTSF_ULAYERPCT_VALIDN:
2644 case CTSF_ULAYERROWPCT_VALIDN:
2645 case CTSF_ULAYERCOLPCT_VALIDN:
2646 case CTSF_UROWPCT_TOTALN:
2647 case CTSF_UCOLPCT_TOTALN:
2648 case CTSF_UTABLEPCT_TOTALN:
2649 case CTSF_USUBTABLEPCT_TOTALN:
2650 case CTSF_ULAYERPCT_TOTALN:
2651 case CTSF_ULAYERROWPCT_TOTALN:
2652 case CTSF_ULAYERCOLPCT_TOTALN:
2668 case CTSF_ROWPCT_SUM:
2669 case CTSF_COLPCT_SUM:
2670 case CTSF_TABLEPCT_SUM:
2671 case CTSF_SUBTABLEPCT_SUM:
2672 case CTSF_LAYERPCT_SUM:
2673 case CTSF_LAYERROWPCT_SUM:
2674 case CTSF_LAYERCOLPCT_SUM:
2679 case CTSF_UVARIANCE:
2680 case CTSF_UROWPCT_SUM:
2681 case CTSF_UCOLPCT_SUM:
2682 case CTSF_UTABLEPCT_SUM:
2683 case CTSF_USUBTABLEPCT_SUM:
2684 case CTSF_ULAYERPCT_SUM:
2685 case CTSF_ULAYERROWPCT_SUM:
2686 case CTSF_ULAYERCOLPCT_SUM:
2687 moments1_destroy (s->moments);
2696 casewriter_destroy (s->writer);
2702 ctables_summary_add (union ctables_summary *s,
2703 const struct ctables_summary_spec *ss,
2704 const struct variable *var, const union value *value,
2705 bool is_scale, bool is_scale_missing,
2706 bool is_missing, bool excluded_missing,
2707 double d_weight, double e_weight)
2709 /* To determine whether a case is included in a given table for a particular
2710 kind of summary, consider the following charts for each variable in the
2711 table. Only if "yes" appears for every variable for the summary is the
2714 Categorical variables: VALIDN COUNT TOTALN
2715 Valid values in included categories yes yes yes
2716 Missing values in included categories --- yes yes
2717 Missing values in excluded categories --- --- yes
2718 Valid values in excluded categories --- --- ---
2720 Scale variables: VALIDN COUNT TOTALN
2721 Valid value yes yes yes
2722 Missing value --- yes yes
2724 Missing values include both user- and system-missing. (The system-missing
2725 value is always in an excluded category.)
2727 switch (ss->function)
2730 case CTSF_ROWPCT_TOTALN:
2731 case CTSF_COLPCT_TOTALN:
2732 case CTSF_TABLEPCT_TOTALN:
2733 case CTSF_SUBTABLEPCT_TOTALN:
2734 case CTSF_LAYERPCT_TOTALN:
2735 case CTSF_LAYERROWPCT_TOTALN:
2736 case CTSF_LAYERCOLPCT_TOTALN:
2737 s->count += d_weight;
2741 case CTSF_UROWPCT_TOTALN:
2742 case CTSF_UCOLPCT_TOTALN:
2743 case CTSF_UTABLEPCT_TOTALN:
2744 case CTSF_USUBTABLEPCT_TOTALN:
2745 case CTSF_ULAYERPCT_TOTALN:
2746 case CTSF_ULAYERROWPCT_TOTALN:
2747 case CTSF_ULAYERCOLPCT_TOTALN:
2752 case CTSF_ROWPCT_COUNT:
2753 case CTSF_COLPCT_COUNT:
2754 case CTSF_TABLEPCT_COUNT:
2755 case CTSF_SUBTABLEPCT_COUNT:
2756 case CTSF_LAYERPCT_COUNT:
2757 case CTSF_LAYERROWPCT_COUNT:
2758 case CTSF_LAYERCOLPCT_COUNT:
2759 if (is_scale || !excluded_missing)
2760 s->count += d_weight;
2764 case CTSF_UROWPCT_COUNT:
2765 case CTSF_UCOLPCT_COUNT:
2766 case CTSF_UTABLEPCT_COUNT:
2767 case CTSF_USUBTABLEPCT_COUNT:
2768 case CTSF_ULAYERPCT_COUNT:
2769 case CTSF_ULAYERROWPCT_COUNT:
2770 case CTSF_ULAYERCOLPCT_COUNT:
2771 if (is_scale || !excluded_missing)
2776 case CTSF_ROWPCT_VALIDN:
2777 case CTSF_COLPCT_VALIDN:
2778 case CTSF_TABLEPCT_VALIDN:
2779 case CTSF_SUBTABLEPCT_VALIDN:
2780 case CTSF_LAYERPCT_VALIDN:
2781 case CTSF_LAYERROWPCT_VALIDN:
2782 case CTSF_LAYERCOLPCT_VALIDN:
2786 s->count += d_weight;
2790 case CTSF_UROWPCT_VALIDN:
2791 case CTSF_UCOLPCT_VALIDN:
2792 case CTSF_UTABLEPCT_VALIDN:
2793 case CTSF_USUBTABLEPCT_VALIDN:
2794 case CTSF_ULAYERPCT_VALIDN:
2795 case CTSF_ULAYERROWPCT_VALIDN:
2796 case CTSF_ULAYERCOLPCT_VALIDN:
2805 s->count += d_weight;
2814 if (is_scale || !excluded_missing)
2815 s->count += e_weight;
2822 s->count += e_weight;
2826 s->count += e_weight;
2832 if (!is_scale_missing)
2834 assert (!var_is_alpha (var)); /* XXX? */
2835 if (s->min == SYSMIS || value->f < s->min)
2837 if (s->max == SYSMIS || value->f > s->max)
2847 case CTSF_ROWPCT_SUM:
2848 case CTSF_COLPCT_SUM:
2849 case CTSF_TABLEPCT_SUM:
2850 case CTSF_SUBTABLEPCT_SUM:
2851 case CTSF_LAYERPCT_SUM:
2852 case CTSF_LAYERROWPCT_SUM:
2853 case CTSF_LAYERCOLPCT_SUM:
2854 if (!is_scale_missing)
2855 moments1_add (s->moments, value->f, e_weight);
2862 case CTSF_UVARIANCE:
2863 case CTSF_UROWPCT_SUM:
2864 case CTSF_UCOLPCT_SUM:
2865 case CTSF_UTABLEPCT_SUM:
2866 case CTSF_USUBTABLEPCT_SUM:
2867 case CTSF_ULAYERPCT_SUM:
2868 case CTSF_ULAYERROWPCT_SUM:
2869 case CTSF_ULAYERCOLPCT_SUM:
2870 if (!is_scale_missing)
2871 moments1_add (s->moments, value->f, 1.0);
2877 d_weight = e_weight = 1.0;
2882 if (!is_scale_missing)
2884 s->ovalid += e_weight;
2886 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2887 *case_num_rw_idx (c, 0) = value->f;
2888 *case_num_rw_idx (c, 1) = e_weight;
2889 casewriter_write (s->writer, c);
2895 static enum ctables_domain_type
2896 ctables_function_domain (enum ctables_summary_function function)
2926 case CTSF_UVARIANCE:
2932 case CTSF_COLPCT_COUNT:
2933 case CTSF_COLPCT_SUM:
2934 case CTSF_COLPCT_TOTALN:
2935 case CTSF_COLPCT_VALIDN:
2936 case CTSF_UCOLPCT_COUNT:
2937 case CTSF_UCOLPCT_SUM:
2938 case CTSF_UCOLPCT_TOTALN:
2939 case CTSF_UCOLPCT_VALIDN:
2942 case CTSF_LAYERCOLPCT_COUNT:
2943 case CTSF_LAYERCOLPCT_SUM:
2944 case CTSF_LAYERCOLPCT_TOTALN:
2945 case CTSF_LAYERCOLPCT_VALIDN:
2946 case CTSF_ULAYERCOLPCT_COUNT:
2947 case CTSF_ULAYERCOLPCT_SUM:
2948 case CTSF_ULAYERCOLPCT_TOTALN:
2949 case CTSF_ULAYERCOLPCT_VALIDN:
2950 return CTDT_LAYERCOL;
2952 case CTSF_LAYERPCT_COUNT:
2953 case CTSF_LAYERPCT_SUM:
2954 case CTSF_LAYERPCT_TOTALN:
2955 case CTSF_LAYERPCT_VALIDN:
2956 case CTSF_ULAYERPCT_COUNT:
2957 case CTSF_ULAYERPCT_SUM:
2958 case CTSF_ULAYERPCT_TOTALN:
2959 case CTSF_ULAYERPCT_VALIDN:
2962 case CTSF_LAYERROWPCT_COUNT:
2963 case CTSF_LAYERROWPCT_SUM:
2964 case CTSF_LAYERROWPCT_TOTALN:
2965 case CTSF_LAYERROWPCT_VALIDN:
2966 case CTSF_ULAYERROWPCT_COUNT:
2967 case CTSF_ULAYERROWPCT_SUM:
2968 case CTSF_ULAYERROWPCT_TOTALN:
2969 case CTSF_ULAYERROWPCT_VALIDN:
2970 return CTDT_LAYERROW;
2972 case CTSF_ROWPCT_COUNT:
2973 case CTSF_ROWPCT_SUM:
2974 case CTSF_ROWPCT_TOTALN:
2975 case CTSF_ROWPCT_VALIDN:
2976 case CTSF_UROWPCT_COUNT:
2977 case CTSF_UROWPCT_SUM:
2978 case CTSF_UROWPCT_TOTALN:
2979 case CTSF_UROWPCT_VALIDN:
2982 case CTSF_SUBTABLEPCT_COUNT:
2983 case CTSF_SUBTABLEPCT_SUM:
2984 case CTSF_SUBTABLEPCT_TOTALN:
2985 case CTSF_SUBTABLEPCT_VALIDN:
2986 case CTSF_USUBTABLEPCT_COUNT:
2987 case CTSF_USUBTABLEPCT_SUM:
2988 case CTSF_USUBTABLEPCT_TOTALN:
2989 case CTSF_USUBTABLEPCT_VALIDN:
2990 return CTDT_SUBTABLE;
2992 case CTSF_TABLEPCT_COUNT:
2993 case CTSF_TABLEPCT_SUM:
2994 case CTSF_TABLEPCT_TOTALN:
2995 case CTSF_TABLEPCT_VALIDN:
2996 case CTSF_UTABLEPCT_COUNT:
2997 case CTSF_UTABLEPCT_SUM:
2998 case CTSF_UTABLEPCT_TOTALN:
2999 case CTSF_UTABLEPCT_VALIDN:
3006 static enum ctables_domain_type
3007 ctables_function_is_pctsum (enum ctables_summary_function function)
3037 case CTSF_UVARIANCE:
3041 case CTSF_COLPCT_COUNT:
3042 case CTSF_COLPCT_TOTALN:
3043 case CTSF_COLPCT_VALIDN:
3044 case CTSF_UCOLPCT_COUNT:
3045 case CTSF_UCOLPCT_TOTALN:
3046 case CTSF_UCOLPCT_VALIDN:
3047 case CTSF_LAYERCOLPCT_COUNT:
3048 case CTSF_LAYERCOLPCT_TOTALN:
3049 case CTSF_LAYERCOLPCT_VALIDN:
3050 case CTSF_ULAYERCOLPCT_COUNT:
3051 case CTSF_ULAYERCOLPCT_TOTALN:
3052 case CTSF_ULAYERCOLPCT_VALIDN:
3053 case CTSF_LAYERPCT_COUNT:
3054 case CTSF_LAYERPCT_TOTALN:
3055 case CTSF_LAYERPCT_VALIDN:
3056 case CTSF_ULAYERPCT_COUNT:
3057 case CTSF_ULAYERPCT_TOTALN:
3058 case CTSF_ULAYERPCT_VALIDN:
3059 case CTSF_LAYERROWPCT_COUNT:
3060 case CTSF_LAYERROWPCT_TOTALN:
3061 case CTSF_LAYERROWPCT_VALIDN:
3062 case CTSF_ULAYERROWPCT_COUNT:
3063 case CTSF_ULAYERROWPCT_TOTALN:
3064 case CTSF_ULAYERROWPCT_VALIDN:
3065 case CTSF_ROWPCT_COUNT:
3066 case CTSF_ROWPCT_TOTALN:
3067 case CTSF_ROWPCT_VALIDN:
3068 case CTSF_UROWPCT_COUNT:
3069 case CTSF_UROWPCT_TOTALN:
3070 case CTSF_UROWPCT_VALIDN:
3071 case CTSF_SUBTABLEPCT_COUNT:
3072 case CTSF_SUBTABLEPCT_TOTALN:
3073 case CTSF_SUBTABLEPCT_VALIDN:
3074 case CTSF_USUBTABLEPCT_COUNT:
3075 case CTSF_USUBTABLEPCT_TOTALN:
3076 case CTSF_USUBTABLEPCT_VALIDN:
3077 case CTSF_TABLEPCT_COUNT:
3078 case CTSF_TABLEPCT_TOTALN:
3079 case CTSF_TABLEPCT_VALIDN:
3080 case CTSF_UTABLEPCT_COUNT:
3081 case CTSF_UTABLEPCT_TOTALN:
3082 case CTSF_UTABLEPCT_VALIDN:
3085 case CTSF_COLPCT_SUM:
3086 case CTSF_UCOLPCT_SUM:
3087 case CTSF_LAYERCOLPCT_SUM:
3088 case CTSF_ULAYERCOLPCT_SUM:
3089 case CTSF_LAYERPCT_SUM:
3090 case CTSF_ULAYERPCT_SUM:
3091 case CTSF_LAYERROWPCT_SUM:
3092 case CTSF_ULAYERROWPCT_SUM:
3093 case CTSF_ROWPCT_SUM:
3094 case CTSF_UROWPCT_SUM:
3095 case CTSF_SUBTABLEPCT_SUM:
3096 case CTSF_USUBTABLEPCT_SUM:
3097 case CTSF_TABLEPCT_SUM:
3098 case CTSF_UTABLEPCT_SUM:
3106 ctables_summary_value (const struct ctables_cell *cell,
3107 union ctables_summary *s,
3108 const struct ctables_summary_spec *ss)
3110 switch (ss->function)
3117 case CTSF_ROWPCT_COUNT:
3118 case CTSF_COLPCT_COUNT:
3119 case CTSF_TABLEPCT_COUNT:
3120 case CTSF_SUBTABLEPCT_COUNT:
3121 case CTSF_LAYERPCT_COUNT:
3122 case CTSF_LAYERROWPCT_COUNT:
3123 case CTSF_LAYERCOLPCT_COUNT:
3125 enum ctables_domain_type d = ctables_function_domain (ss->function);
3126 return (cell->domains[d]->e_count
3127 ? s->count / cell->domains[d]->e_count * 100
3131 case CTSF_UROWPCT_COUNT:
3132 case CTSF_UCOLPCT_COUNT:
3133 case CTSF_UTABLEPCT_COUNT:
3134 case CTSF_USUBTABLEPCT_COUNT:
3135 case CTSF_ULAYERPCT_COUNT:
3136 case CTSF_ULAYERROWPCT_COUNT:
3137 case CTSF_ULAYERCOLPCT_COUNT:
3139 enum ctables_domain_type d = ctables_function_domain (ss->function);
3140 return (cell->domains[d]->u_count
3141 ? s->count / cell->domains[d]->u_count * 100
3145 case CTSF_ROWPCT_VALIDN:
3146 case CTSF_COLPCT_VALIDN:
3147 case CTSF_TABLEPCT_VALIDN:
3148 case CTSF_SUBTABLEPCT_VALIDN:
3149 case CTSF_LAYERPCT_VALIDN:
3150 case CTSF_LAYERROWPCT_VALIDN:
3151 case CTSF_LAYERCOLPCT_VALIDN:
3153 enum ctables_domain_type d = ctables_function_domain (ss->function);
3154 return (cell->domains[d]->e_valid
3155 ? s->count / cell->domains[d]->e_valid * 100
3159 case CTSF_UROWPCT_VALIDN:
3160 case CTSF_UCOLPCT_VALIDN:
3161 case CTSF_UTABLEPCT_VALIDN:
3162 case CTSF_USUBTABLEPCT_VALIDN:
3163 case CTSF_ULAYERPCT_VALIDN:
3164 case CTSF_ULAYERROWPCT_VALIDN:
3165 case CTSF_ULAYERCOLPCT_VALIDN:
3167 enum ctables_domain_type d = ctables_function_domain (ss->function);
3168 return (cell->domains[d]->u_valid
3169 ? s->count / cell->domains[d]->u_valid * 100
3173 case CTSF_ROWPCT_TOTALN:
3174 case CTSF_COLPCT_TOTALN:
3175 case CTSF_TABLEPCT_TOTALN:
3176 case CTSF_SUBTABLEPCT_TOTALN:
3177 case CTSF_LAYERPCT_TOTALN:
3178 case CTSF_LAYERROWPCT_TOTALN:
3179 case CTSF_LAYERCOLPCT_TOTALN:
3181 enum ctables_domain_type d = ctables_function_domain (ss->function);
3182 return (cell->domains[d]->e_total
3183 ? s->count / cell->domains[d]->e_total * 100
3187 case CTSF_UROWPCT_TOTALN:
3188 case CTSF_UCOLPCT_TOTALN:
3189 case CTSF_UTABLEPCT_TOTALN:
3190 case CTSF_USUBTABLEPCT_TOTALN:
3191 case CTSF_ULAYERPCT_TOTALN:
3192 case CTSF_ULAYERROWPCT_TOTALN:
3193 case CTSF_ULAYERCOLPCT_TOTALN:
3195 enum ctables_domain_type d = ctables_function_domain (ss->function);
3196 return (cell->domains[d]->u_total
3197 ? s->count / cell->domains[d]->u_total * 100
3218 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3224 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3231 double weight, variance;
3232 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3233 return calc_semean (variance, weight);
3240 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3241 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3247 double weight, mean;
3248 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3249 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3253 case CTSF_UVARIANCE:
3256 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3260 case CTSF_ROWPCT_SUM:
3261 case CTSF_COLPCT_SUM:
3262 case CTSF_TABLEPCT_SUM:
3263 case CTSF_SUBTABLEPCT_SUM:
3264 case CTSF_LAYERPCT_SUM:
3265 case CTSF_LAYERROWPCT_SUM:
3266 case CTSF_LAYERCOLPCT_SUM:
3268 double weight, mean;
3269 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3270 if (weight == SYSMIS || mean == SYSMIS)
3272 enum ctables_domain_type d = ctables_function_domain (ss->function);
3273 double num = weight * mean;
3274 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3275 return denom != 0 ? num / denom * 100 : SYSMIS;
3277 case CTSF_UROWPCT_SUM:
3278 case CTSF_UCOLPCT_SUM:
3279 case CTSF_UTABLEPCT_SUM:
3280 case CTSF_USUBTABLEPCT_SUM:
3281 case CTSF_ULAYERPCT_SUM:
3282 case CTSF_ULAYERROWPCT_SUM:
3283 case CTSF_ULAYERCOLPCT_SUM:
3285 double weight, mean;
3286 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3287 if (weight == SYSMIS || mean == SYSMIS)
3289 enum ctables_domain_type d = ctables_function_domain (ss->function);
3290 double num = weight * mean;
3291 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3292 return denom != 0 ? num / denom * 100 : SYSMIS;
3301 struct casereader *reader = casewriter_make_reader (s->writer);
3304 struct percentile *ptile = percentile_create (
3305 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3306 struct order_stats *os = &ptile->parent;
3307 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3308 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3309 statistic_destroy (&ptile->parent.parent);
3317 struct casereader *reader = casewriter_make_reader (s->writer);
3320 struct mode *mode = mode_create ();
3321 struct order_stats *os = &mode->parent;
3322 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3323 s->ovalue = mode->mode;
3324 statistic_destroy (&mode->parent.parent);
3332 struct ctables_cell_sort_aux
3334 const struct ctables_nest *nest;
3335 enum pivot_axis_type a;
3339 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3341 const struct ctables_cell_sort_aux *aux = aux_;
3342 struct ctables_cell *const *ap = a_;
3343 struct ctables_cell *const *bp = b_;
3344 const struct ctables_cell *a = *ap;
3345 const struct ctables_cell *b = *bp;
3347 const struct ctables_nest *nest = aux->nest;
3348 for (size_t i = 0; i < nest->n; i++)
3349 if (i != nest->scale_idx)
3351 const struct variable *var = nest->vars[i];
3352 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3353 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3354 if (a_cv->category != b_cv->category)
3355 return a_cv->category > b_cv->category ? 1 : -1;
3357 const union value *a_val = &a_cv->value;
3358 const union value *b_val = &b_cv->value;
3359 switch (a_cv->category->type)
3365 case CCT_POSTCOMPUTE:
3366 case CCT_EXCLUDED_MISSING:
3367 /* Must be equal. */
3375 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3383 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3385 return a_cv->category->sort_ascending ? cmp : -cmp;
3391 const char *a_label = var_lookup_value_label (var, a_val);
3392 const char *b_label = var_lookup_value_label (var, b_val);
3394 ? (b_label ? strcmp (a_label, b_label) : 1)
3395 : (b_label ? -1 : value_compare_3way (
3396 a_val, b_val, var_get_width (var))));
3398 return a_cv->category->sort_ascending ? cmp : -cmp;
3412 For each ctables_table:
3413 For each combination of row vars:
3414 For each combination of column vars:
3415 For each combination of layer vars:
3417 Make a table of row values:
3418 Sort entries by row values
3419 Assign a 0-based index to each actual value
3420 Construct a dimension
3421 Make a table of column values
3422 Make a table of layer values
3424 Fill the table entry using the indexes from before.
3427 static struct ctables_domain *
3428 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3429 enum ctables_domain_type domain)
3432 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3434 const struct ctables_nest *nest = s->nests[a];
3435 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3437 size_t v_idx = nest->domains[domain][i];
3438 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3439 hash = hash_pointer (cv->category, hash);
3440 if (cv->category->type != CCT_TOTAL
3441 && cv->category->type != CCT_SUBTOTAL
3442 && cv->category->type != CCT_POSTCOMPUTE)
3443 hash = value_hash (&cv->value,
3444 var_get_width (nest->vars[v_idx]), hash);
3448 struct ctables_domain *d;
3449 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3451 const struct ctables_cell *df = d->example;
3452 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3454 const struct ctables_nest *nest = s->nests[a];
3455 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3457 size_t v_idx = nest->domains[domain][i];
3458 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3459 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3460 if (cv1->category != cv2->category
3461 || (cv1->category->type != CCT_TOTAL
3462 && cv1->category->type != CCT_SUBTOTAL
3463 && cv1->category->type != CCT_POSTCOMPUTE
3464 && !value_equal (&cv1->value, &cv2->value,
3465 var_get_width (nest->vars[v_idx]))))
3474 struct ctables_sum *sums = (s->table->n_sum_vars
3475 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3478 d = xmalloc (sizeof *d);
3479 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3480 hmap_insert (&s->domains[domain], &d->node, hash);
3484 static struct substring
3485 rtrim_value (const union value *v, const struct variable *var)
3487 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3488 var_get_width (var));
3489 ss_rtrim (&s, ss_cstr (" "));
3494 in_string_range (const union value *v, const struct variable *var,
3495 const struct substring *srange)
3497 struct substring s = rtrim_value (v, var);
3498 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3499 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3502 static const struct ctables_category *
3503 ctables_categories_match (const struct ctables_categories *c,
3504 const union value *v, const struct variable *var)
3506 if (var_is_numeric (var) && v->f == SYSMIS)
3509 const struct ctables_category *othernm = NULL;
3510 for (size_t i = c->n_cats; i-- > 0; )
3512 const struct ctables_category *cat = &c->cats[i];
3516 if (cat->number == v->f)
3521 if (ss_equals (cat->string, rtrim_value (v, var)))
3526 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3527 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3532 if (in_string_range (v, var, cat->srange))
3537 if (var_is_value_missing (var, v))
3541 case CCT_POSTCOMPUTE:
3556 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3559 case CCT_EXCLUDED_MISSING:
3564 return var_is_value_missing (var, v) ? NULL : othernm;
3567 static const struct ctables_category *
3568 ctables_categories_total (const struct ctables_categories *c)
3570 const struct ctables_category *first = &c->cats[0];
3571 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3572 return (first->type == CCT_TOTAL ? first
3573 : last->type == CCT_TOTAL ? last
3577 static struct ctables_cell *
3578 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3579 const struct ctables_category *cats[PIVOT_N_AXES][10])
3582 enum ctables_summary_variant sv = CSV_CELL;
3583 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3585 const struct ctables_nest *nest = s->nests[a];
3586 for (size_t i = 0; i < nest->n; i++)
3587 if (i != nest->scale_idx)
3589 hash = hash_pointer (cats[a][i], hash);
3590 if (cats[a][i]->type != CCT_TOTAL
3591 && cats[a][i]->type != CCT_SUBTOTAL
3592 && cats[a][i]->type != CCT_POSTCOMPUTE)
3593 hash = value_hash (case_data (c, nest->vars[i]),
3594 var_get_width (nest->vars[i]), hash);
3600 struct ctables_cell *cell;
3601 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3603 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3605 const struct ctables_nest *nest = s->nests[a];
3606 for (size_t i = 0; i < nest->n; i++)
3607 if (i != nest->scale_idx
3608 && (cats[a][i] != cell->axes[a].cvs[i].category
3609 || (cats[a][i]->type != CCT_TOTAL
3610 && cats[a][i]->type != CCT_SUBTOTAL
3611 && cats[a][i]->type != CCT_POSTCOMPUTE
3612 && !value_equal (case_data (c, nest->vars[i]),
3613 &cell->axes[a].cvs[i].value,
3614 var_get_width (nest->vars[i])))))
3623 cell = xmalloc (sizeof *cell);
3626 cell->omit_domains = 0;
3627 cell->postcompute = false;
3628 //struct string name = DS_EMPTY_INITIALIZER;
3629 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3631 const struct ctables_nest *nest = s->nests[a];
3632 cell->axes[a].cvs = (nest->n
3633 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3635 for (size_t i = 0; i < nest->n; i++)
3637 const struct ctables_category *cat = cats[a][i];
3638 const struct variable *var = nest->vars[i];
3639 const union value *value = case_data (c, var);
3640 if (i != nest->scale_idx)
3642 const struct ctables_category *subtotal = cat->subtotal;
3643 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3646 if (cat->type == CCT_TOTAL
3647 || cat->type == CCT_SUBTOTAL
3648 || cat->type == CCT_POSTCOMPUTE)
3650 /* XXX these should be more encompassing I think.*/
3654 case PIVOT_AXIS_COLUMN:
3655 cell->omit_domains |= ((1u << CTDT_TABLE) |
3656 (1u << CTDT_LAYER) |
3657 (1u << CTDT_LAYERCOL) |
3658 (1u << CTDT_SUBTABLE) |
3661 case PIVOT_AXIS_ROW:
3662 cell->omit_domains |= ((1u << CTDT_TABLE) |
3663 (1u << CTDT_LAYER) |
3664 (1u << CTDT_LAYERROW) |
3665 (1u << CTDT_SUBTABLE) |
3668 case PIVOT_AXIS_LAYER:
3669 cell->omit_domains |= ((1u << CTDT_TABLE) |
3670 (1u << CTDT_LAYER));
3674 if (cat->type == CCT_POSTCOMPUTE)
3675 cell->postcompute = true;
3678 cell->axes[a].cvs[i].category = cat;
3679 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3682 if (i != nest->scale_idx)
3684 if (!ds_is_empty (&name))
3685 ds_put_cstr (&name, ", ");
3686 char *value_s = data_out (value, var_get_encoding (var),
3687 var_get_print_format (var),
3688 settings_get_fmt_settings ());
3689 if (cat->type == CCT_TOTAL
3690 || cat->type == CCT_SUBTOTAL
3691 || cat->type == CCT_POSTCOMPUTE)
3692 ds_put_format (&name, "%s=total", var_get_name (var));
3694 ds_put_format (&name, "%s=%s", var_get_name (var),
3695 value_s + strspn (value_s, " "));
3701 //cell->name = ds_steal_cstr (&name);
3703 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3704 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3705 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3706 for (size_t i = 0; i < specs->n; i++)
3707 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3708 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3709 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3710 hmap_insert (&s->cells, &cell->node, hash);
3715 is_scale_missing (const struct ctables_summary_spec_set *specs,
3716 const struct ccase *c)
3718 if (!specs->is_scale)
3721 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3724 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3726 const struct variable *var = specs->listwise_vars[i];
3727 if (var_is_num_missing (var, case_num (c, var)))
3735 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3736 const struct ctables_category *cats[PIVOT_N_AXES][10],
3737 bool is_missing, bool excluded_missing,
3738 double d_weight, double e_weight)
3740 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3741 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3743 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3745 bool scale_missing = is_scale_missing (specs, c);
3746 for (size_t i = 0; i < specs->n; i++)
3747 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3748 specs->var, case_data (c, specs->var), specs->is_scale,
3749 scale_missing, is_missing, excluded_missing,
3750 d_weight, e_weight);
3751 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3752 if (!(cell->omit_domains && (1u << dt)))
3754 struct ctables_domain *d = cell->domains[dt];
3755 d->d_total += d_weight;
3756 d->e_total += e_weight;
3758 if (!excluded_missing)
3760 d->d_count += d_weight;
3761 d->e_count += e_weight;
3766 d->d_valid += d_weight;
3767 d->e_valid += e_weight;
3770 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3772 /* XXX listwise_missing??? */
3773 const struct variable *var = s->table->sum_vars[i];
3774 double addend = case_num (c, var);
3775 if (!var_is_num_missing (var, addend))
3777 struct ctables_sum *sum = &d->sums[i];
3778 sum->e_sum += addend * e_weight;
3779 sum->u_sum += addend;
3787 recurse_totals (struct ctables_section *s, const struct ccase *c,
3788 const struct ctables_category *cats[PIVOT_N_AXES][10],
3789 bool is_missing, bool excluded_missing,
3790 double d_weight, double e_weight,
3791 enum pivot_axis_type start_axis, size_t start_nest)
3793 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3795 const struct ctables_nest *nest = s->nests[a];
3796 for (size_t i = start_nest; i < nest->n; i++)
3798 if (i == nest->scale_idx)
3801 const struct variable *var = nest->vars[i];
3803 const struct ctables_category *total = ctables_categories_total (
3804 s->table->categories[var_get_dict_index (var)]);
3807 const struct ctables_category *save = cats[a][i];
3809 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3810 d_weight, e_weight);
3811 recurse_totals (s, c, cats, is_missing, excluded_missing,
3812 d_weight, e_weight, a, i + 1);
3821 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3822 const struct ctables_category *cats[PIVOT_N_AXES][10],
3823 bool is_missing, bool excluded_missing,
3824 double d_weight, double e_weight,
3825 enum pivot_axis_type start_axis, size_t start_nest)
3827 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3829 const struct ctables_nest *nest = s->nests[a];
3830 for (size_t i = start_nest; i < nest->n; i++)
3832 if (i == nest->scale_idx)
3835 const struct ctables_category *save = cats[a][i];
3838 cats[a][i] = save->subtotal;
3839 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3840 d_weight, e_weight);
3841 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3842 d_weight, e_weight, a, i + 1);
3851 ctables_add_occurrence (const struct variable *var,
3852 const union value *value,
3853 struct hmap *occurrences)
3855 int width = var_get_width (var);
3856 unsigned int hash = value_hash (value, width, 0);
3858 struct ctables_occurrence *o;
3859 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3861 if (value_equal (value, &o->value, width))
3864 o = xmalloc (sizeof *o);
3865 value_clone (&o->value, value, width);
3866 hmap_insert (occurrences, &o->node, hash);
3870 ctables_cell_insert (struct ctables_section *s,
3871 const struct ccase *c,
3872 double d_weight, double e_weight)
3874 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3876 /* Does at least one categorical variable have a missing value in an included
3877 or excluded category? */
3878 bool is_missing = false;
3880 /* Does at least one categorical variable have a missing value in an excluded
3882 bool excluded_missing = false;
3884 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3886 const struct ctables_nest *nest = s->nests[a];
3887 for (size_t i = 0; i < nest->n; i++)
3889 if (i == nest->scale_idx)
3892 const struct variable *var = nest->vars[i];
3893 const union value *value = case_data (c, var);
3895 bool var_missing = var_is_value_missing (var, value) != 0;
3899 cats[a][i] = ctables_categories_match (
3900 s->table->categories[var_get_dict_index (var)], value, var);
3906 static const struct ctables_category cct_excluded_missing = {
3907 .type = CCT_EXCLUDED_MISSING,
3910 cats[a][i] = &cct_excluded_missing;
3911 excluded_missing = true;
3916 if (!excluded_missing)
3917 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3919 const struct ctables_nest *nest = s->nests[a];
3920 for (size_t i = 0; i < nest->n; i++)
3921 if (i != nest->scale_idx)
3923 const struct variable *var = nest->vars[i];
3924 const union value *value = case_data (c, var);
3925 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3929 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3930 d_weight, e_weight);
3932 //if (!excluded_missing)
3934 recurse_totals (s, c, cats, is_missing, excluded_missing,
3935 d_weight, e_weight, 0, 0);
3936 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3937 d_weight, e_weight, 0, 0);
3943 const struct ctables_summary_spec_set *set;
3948 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3950 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3951 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3952 if (as->function != bs->function)
3953 return as->function > bs->function ? 1 : -1;
3954 else if (as->percentile != bs->percentile)
3955 return as->percentile < bs->percentile ? 1 : -1;
3957 const char *as_label = as->label ? as->label : "";
3958 const char *bs_label = bs->label ? bs->label : "";
3959 return strcmp (as_label, bs_label);
3962 static struct pivot_value *
3963 ctables_category_create_label__ (const struct ctables_category *cat,
3964 const struct variable *var,
3965 const union value *value)
3967 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3968 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3969 : pivot_value_new_var_value (var, value));
3972 static struct pivot_value *
3973 ctables_postcompute_label (const struct ctables_categories *cats,
3974 const struct ctables_category *cat,
3975 const struct variable *var,
3976 const union value *value)
3978 struct substring in = ss_cstr (cat->pc->label);
3979 struct substring target = ss_cstr (")LABEL[");
3981 struct string out = DS_EMPTY_INITIALIZER;
3984 size_t chunk = ss_find_substring (in, target);
3985 if (chunk == SIZE_MAX)
3987 if (ds_is_empty (&out))
3988 return pivot_value_new_user_text (in.string, in.length);
3991 ds_put_substring (&out, in);
3992 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3996 ds_put_substring (&out, ss_head (in, chunk));
3997 ss_advance (&in, chunk + target.length);
3999 struct substring idx_s;
4000 if (!ss_get_until (&in, ']', &idx_s))
4003 long int idx = strtol (idx_s.string, &tail, 10);
4004 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
4007 struct ctables_category *cat2 = &cats->cats[idx - 1];
4008 struct pivot_value *label2
4009 = ctables_category_create_label__ (cat2, var, value);
4010 char *label2_s = pivot_value_to_string_defaults (label2);
4011 ds_put_cstr (&out, label2_s);
4013 pivot_value_destroy (label2);
4018 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
4021 static struct pivot_value *
4022 ctables_category_create_label (const struct ctables_categories *cats,
4023 const struct ctables_category *cat,
4024 const struct variable *var,
4025 const union value *value)
4027 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4028 ? ctables_postcompute_label (cats, cat, var, value)
4029 : ctables_category_create_label__ (cat, var, value));
4032 static struct ctables_value *
4033 ctables_value_find__ (struct ctables_table *t, const union value *value,
4034 int width, unsigned int hash)
4036 struct ctables_value *clv;
4037 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4038 hash, &t->clabels_values_map)
4039 if (value_equal (value, &clv->value, width))
4045 ctables_value_insert (struct ctables_table *t, const union value *value,
4048 unsigned int hash = value_hash (value, width, 0);
4049 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4052 clv = xmalloc (sizeof *clv);
4053 value_clone (&clv->value, value, width);
4054 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4058 static struct ctables_value *
4059 ctables_value_find (struct ctables_table *t,
4060 const union value *value, int width)
4062 return ctables_value_find__ (t, value, width,
4063 value_hash (value, width, 0));
4067 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4068 size_t ix[PIVOT_N_AXES])
4070 if (a < PIVOT_N_AXES)
4072 size_t limit = MAX (t->stacks[a].n, 1);
4073 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4074 ctables_table_add_section (t, a + 1, ix);
4078 struct ctables_section *s = &t->sections[t->n_sections++];
4079 *s = (struct ctables_section) {
4081 .cells = HMAP_INITIALIZER (s->cells),
4083 for (a = 0; a < PIVOT_N_AXES; a++)
4086 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4088 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4089 for (size_t i = 0; i < nest->n; i++)
4090 hmap_init (&s->occurrences[a][i]);
4092 for (size_t i = 0; i < N_CTDTS; i++)
4093 hmap_init (&s->domains[i]);
4098 ctpo_add (double a, double b)
4104 ctpo_sub (double a, double b)
4110 ctpo_mul (double a, double b)
4116 ctpo_div (double a, double b)
4118 return b ? a / b : SYSMIS;
4122 ctpo_pow (double a, double b)
4124 int save_errno = errno;
4126 double result = pow (a, b);
4134 ctpo_neg (double a, double b UNUSED)
4139 struct ctables_pcexpr_evaluate_ctx
4141 const struct ctables_cell *cell;
4142 const struct ctables_section *section;
4143 const struct ctables_categories *cats;
4144 enum pivot_axis_type pc_a;
4147 enum fmt_type parse_format;
4150 static double ctables_pcexpr_evaluate (
4151 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4154 ctables_pcexpr_evaluate_nonterminal (
4155 const struct ctables_pcexpr_evaluate_ctx *ctx,
4156 const struct ctables_pcexpr *e, size_t n_args,
4157 double evaluate (double, double))
4159 double args[2] = { 0, 0 };
4160 for (size_t i = 0; i < n_args; i++)
4162 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4163 if (!isfinite (args[i]) || args[i] == SYSMIS)
4166 return evaluate (args[0], args[1]);
4170 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4171 const struct ctables_cell_value *pc_cv)
4173 const struct ctables_section *s = ctx->section;
4176 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4178 const struct ctables_nest *nest = s->nests[a];
4179 for (size_t i = 0; i < nest->n; i++)
4180 if (i != nest->scale_idx)
4182 const struct ctables_cell_value *cv
4183 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4184 : &ctx->cell->axes[a].cvs[i]);
4185 hash = hash_pointer (cv->category, hash);
4186 if (cv->category->type != CCT_TOTAL
4187 && cv->category->type != CCT_SUBTOTAL
4188 && cv->category->type != CCT_POSTCOMPUTE)
4189 hash = value_hash (&cv->value,
4190 var_get_width (nest->vars[i]), hash);
4194 struct ctables_cell *tc;
4195 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4197 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4199 const struct ctables_nest *nest = s->nests[a];
4200 for (size_t i = 0; i < nest->n; i++)
4201 if (i != nest->scale_idx)
4203 const struct ctables_cell_value *p_cv
4204 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4205 : &ctx->cell->axes[a].cvs[i]);
4206 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4207 if (p_cv->category != t_cv->category
4208 || (p_cv->category->type != CCT_TOTAL
4209 && p_cv->category->type != CCT_SUBTOTAL
4210 && p_cv->category->type != CCT_POSTCOMPUTE
4211 && !value_equal (&p_cv->value,
4213 var_get_width (nest->vars[i]))))
4225 const struct ctables_table *t = s->table;
4226 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4227 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4228 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4229 &specs->specs[ctx->summary_idx]);
4233 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4234 const struct ctables_pcexpr *e)
4241 case CTPO_CAT_NRANGE:
4242 case CTPO_CAT_SRANGE:
4244 struct ctables_cell_value cv = {
4245 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4247 assert (cv.category != NULL);
4249 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4250 const struct ctables_occurrence *o;
4253 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4254 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4255 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4257 cv.value = o->value;
4258 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4263 case CTPO_CAT_NUMBER:
4264 case CTPO_CAT_STRING:
4265 case CTPO_CAT_MISSING:
4266 case CTPO_CAT_OTHERNM:
4267 case CTPO_CAT_SUBTOTAL:
4268 case CTPO_CAT_TOTAL:
4270 struct ctables_cell_value cv = {
4271 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4272 .value = { .f = e->number },
4274 assert (cv.category != NULL);
4275 return ctables_pcexpr_evaluate_category (ctx, &cv);
4279 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4282 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4285 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4288 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4291 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4294 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4300 static const struct ctables_category *
4301 ctables_cell_postcompute (const struct ctables_section *s,
4302 const struct ctables_cell *cell,
4303 enum pivot_axis_type *pc_a_p,
4306 assert (cell->postcompute);
4307 const struct ctables_category *pc_cat = NULL;
4308 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4309 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4311 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4312 if (cv->category->type == CCT_POSTCOMPUTE)
4316 /* Multiple postcomputes cross each other. The value is
4321 pc_cat = cv->category;
4325 *pc_a_idx_p = pc_a_idx;
4329 assert (pc_cat != NULL);
4334 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4335 const struct ctables_cell *cell,
4336 const struct ctables_summary_spec *ss,
4337 struct fmt_spec *format,
4338 bool *is_ctables_format,
4341 enum pivot_axis_type pc_a = 0;
4342 size_t pc_a_idx = 0;
4343 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4344 s, cell, &pc_a, &pc_a_idx);
4348 const struct ctables_postcompute *pc = pc_cat->pc;
4351 for (size_t i = 0; i < pc->specs->n; i++)
4353 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4354 if (ss->function == ss2->function
4355 && ss->percentile == ss2->percentile)
4357 *format = ss2->format;
4358 *is_ctables_format = ss2->is_ctables_format;
4364 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4365 const struct ctables_categories *cats = s->table->categories[
4366 var_get_dict_index (var)];
4367 struct ctables_pcexpr_evaluate_ctx ctx = {
4372 .pc_a_idx = pc_a_idx,
4373 .summary_idx = summary_idx,
4374 .parse_format = pc_cat->parse_format,
4376 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4380 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4382 struct pivot_table *pt = pivot_table_create__ (
4384 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4385 : pivot_value_new_text (N_("Custom Tables"))),
4388 pivot_table_set_caption (
4389 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4391 pivot_table_set_corner_text (
4392 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4394 bool summary_dimension = (t->summary_axis != t->slabels_axis
4395 || (!t->slabels_visible
4396 && t->summary_specs.n > 1));
4397 if (summary_dimension)
4399 struct pivot_dimension *d = pivot_dimension_create (
4400 pt, t->slabels_axis, N_("Statistics"));
4401 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4402 if (!t->slabels_visible)
4403 d->hide_all_labels = true;
4404 for (size_t i = 0; i < specs->n; i++)
4405 pivot_category_create_leaf (
4406 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4409 bool categories_dimension = t->clabels_example != NULL;
4410 if (categories_dimension)
4412 struct pivot_dimension *d = pivot_dimension_create (
4413 pt, t->label_axis[t->clabels_from_axis],
4414 t->clabels_from_axis == PIVOT_AXIS_ROW
4415 ? N_("Row Categories")
4416 : N_("Column Categories"));
4417 const struct variable *var = t->clabels_example;
4418 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4419 for (size_t i = 0; i < t->n_clabels_values; i++)
4421 const struct ctables_value *value = t->clabels_values[i];
4422 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4423 assert (cat != NULL);
4424 pivot_category_create_leaf (d->root, ctables_category_create_label (
4425 c, cat, t->clabels_example,
4430 pivot_table_set_look (pt, ct->look);
4431 struct pivot_dimension *d[PIVOT_N_AXES];
4432 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4434 static const char *names[] = {
4435 [PIVOT_AXIS_ROW] = N_("Rows"),
4436 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4437 [PIVOT_AXIS_LAYER] = N_("Layers"),
4439 d[a] = (t->axes[a] || a == t->summary_axis
4440 ? pivot_dimension_create (pt, a, names[a])
4445 assert (t->axes[a]);
4447 for (size_t i = 0; i < t->stacks[a].n; i++)
4449 struct ctables_nest *nest = &t->stacks[a].nests[i];
4450 struct ctables_section **sections = xnmalloc (t->n_sections,
4452 size_t n_sections = 0;
4454 size_t n_total_cells = 0;
4455 size_t max_depth = 0;
4456 for (size_t j = 0; j < t->n_sections; j++)
4457 if (t->sections[j].nests[a] == nest)
4459 struct ctables_section *s = &t->sections[j];
4460 sections[n_sections++] = s;
4461 n_total_cells += s->cells.count;
4463 size_t depth = s->nests[a]->n;
4464 max_depth = MAX (depth, max_depth);
4467 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4469 size_t n_sorted = 0;
4471 for (size_t j = 0; j < n_sections; j++)
4473 struct ctables_section *s = sections[j];
4475 struct ctables_cell *cell;
4476 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4478 sorted[n_sorted++] = cell;
4479 assert (n_sorted <= n_total_cells);
4482 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4483 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4486 for (size_t j = 0; j < n_sorted; j++)
4488 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4493 struct ctables_level
4495 enum ctables_level_type
4497 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4498 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4499 CTL_SUMMARY, /* Summary functions. */
4503 enum settings_value_show vlabel; /* CTL_VAR only. */
4506 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4507 size_t n_levels = 0;
4508 for (size_t k = 0; k < nest->n; k++)
4510 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4511 if (vlabel != CTVL_NONE)
4513 levels[n_levels++] = (struct ctables_level) {
4515 .vlabel = (enum settings_value_show) vlabel,
4520 if (nest->scale_idx != k
4521 && (k != nest->n - 1 || t->label_axis[a] == a))
4523 levels[n_levels++] = (struct ctables_level) {
4524 .type = CTL_CATEGORY,
4530 if (!summary_dimension && a == t->slabels_axis)
4532 levels[n_levels++] = (struct ctables_level) {
4533 .type = CTL_SUMMARY,
4534 .var_idx = SIZE_MAX,
4538 /* Pivot categories:
4540 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4541 - category for nest->vars[0], if nest->scale_idx != 0
4542 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4543 - category for nest->vars[1], if nest->scale_idx != 1
4545 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4546 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4547 - summary function, if 'a == t->slabels_axis && a ==
4550 Additional dimensions:
4552 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4554 - If 't->label_axis[b] == a' for some 'b != a', add a category
4559 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4561 for (size_t j = 0; j < n_sorted; j++)
4563 struct ctables_cell *cell = sorted[j];
4564 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4566 size_t n_common = 0;
4569 for (; n_common < n_levels; n_common++)
4571 const struct ctables_level *level = &levels[n_common];
4572 if (level->type == CTL_CATEGORY)
4574 size_t var_idx = level->var_idx;
4575 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4576 if (prev->axes[a].cvs[var_idx].category != c)
4578 else if (c->type != CCT_SUBTOTAL
4579 && c->type != CCT_TOTAL
4580 && c->type != CCT_POSTCOMPUTE
4581 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4582 &cell->axes[a].cvs[var_idx].value,
4583 var_get_type (nest->vars[var_idx])))
4589 for (size_t k = n_common; k < n_levels; k++)
4591 const struct ctables_level *level = &levels[k];
4592 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4593 if (level->type == CTL_SUMMARY)
4595 assert (k == n_levels - 1);
4597 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4598 for (size_t m = 0; m < specs->n; m++)
4600 int leaf = pivot_category_create_leaf (
4601 parent, ctables_summary_label (&specs->specs[m],
4609 const struct variable *var = nest->vars[level->var_idx];
4610 struct pivot_value *label;
4611 if (level->type == CTL_VAR)
4613 label = pivot_value_new_variable (var);
4614 label->variable.show = level->vlabel;
4616 else if (level->type == CTL_CATEGORY)
4618 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4619 label = ctables_category_create_label (
4620 t->categories[var_get_dict_index (var)],
4621 cv->category, var, &cv->value);
4626 if (k == n_levels - 1)
4627 prev_leaf = pivot_category_create_leaf (parent, label);
4629 groups[k] = pivot_category_create_group__ (parent, label);
4633 cell->axes[a].leaf = prev_leaf;
4640 for (size_t i = 0; i < t->n_sections; i++)
4642 struct ctables_section *s = &t->sections[i];
4644 struct ctables_cell *cell;
4645 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4650 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4651 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4652 for (size_t j = 0; j < specs->n; j++)
4655 size_t n_dindexes = 0;
4657 if (summary_dimension)
4658 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4660 if (categories_dimension)
4662 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4663 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4664 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4665 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4668 dindexes[n_dindexes++] = ctv->leaf;
4671 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4674 int leaf = cell->axes[a].leaf;
4675 if (a == t->summary_axis && !summary_dimension)
4677 dindexes[n_dindexes++] = leaf;
4680 const struct ctables_summary_spec *ss = &specs->specs[j];
4682 struct fmt_spec format = specs->specs[j].format;
4683 bool is_ctables_format = ss->is_ctables_format;
4684 double d = (cell->postcompute
4685 ? ctables_cell_calculate_postcompute (
4686 s, cell, ss, &format, &is_ctables_format, j)
4687 : ctables_summary_value (cell, &cell->summaries[j],
4690 struct pivot_value *value;
4691 if (ct->hide_threshold != 0
4692 && d < ct->hide_threshold
4693 && ctables_summary_function_is_count (ss->function))
4695 value = pivot_value_new_user_text_nocopy (
4696 xasprintf ("<%d", ct->hide_threshold));
4698 else if (d == 0 && ct->zero)
4699 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4700 else if (d == SYSMIS && ct->missing)
4701 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4702 else if (is_ctables_format)
4704 char *s = data_out_stretchy (&(union value) { .f = d },
4706 &ct->ctables_formats, NULL);
4707 value = pivot_value_new_user_text_nocopy (s);
4711 value = pivot_value_new_number (d);
4712 value->numeric.format = format;
4714 pivot_table_put (pt, dindexes, n_dindexes, value);
4719 pivot_table_submit (pt);
4723 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4725 enum pivot_axis_type label_pos = t->label_axis[a];
4729 t->clabels_from_axis = a;
4731 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4732 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4734 const struct ctables_stack *stack = &t->stacks[a];
4738 const struct ctables_nest *n0 = &stack->nests[0];
4741 assert (stack->n == 1);
4745 const struct variable *v0 = n0->vars[n0->n - 1];
4746 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4747 t->clabels_example = v0;
4749 for (size_t i = 0; i < c0->n_cats; i++)
4750 if (c0->cats[i].type == CCT_FUNCTION)
4752 msg (SE, _("%s=%s is not allowed with sorting based "
4753 "on a summary function."),
4754 subcommand_name, pos_name);
4757 if (n0->n - 1 == n0->scale_idx)
4759 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4760 "but %s is a scale variable."),
4761 subcommand_name, pos_name, var_get_name (v0));
4765 for (size_t i = 1; i < stack->n; i++)
4767 const struct ctables_nest *ni = &stack->nests[i];
4769 const struct variable *vi = ni->vars[ni->n - 1];
4770 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4772 if (ni->n - 1 == ni->scale_idx)
4774 msg (SE, _("%s=%s requires the variables to be moved to be "
4775 "categorical, but %s is a scale variable."),
4776 subcommand_name, pos_name, var_get_name (vi));
4779 if (var_get_width (v0) != var_get_width (vi))
4781 msg (SE, _("%s=%s requires the variables to be "
4782 "moved to have the same width, but %s has "
4783 "width %d and %s has width %d."),
4784 subcommand_name, pos_name,
4785 var_get_name (v0), var_get_width (v0),
4786 var_get_name (vi), var_get_width (vi));
4789 if (!val_labs_equal (var_get_value_labels (v0),
4790 var_get_value_labels (vi)))
4792 msg (SE, _("%s=%s requires the variables to be "
4793 "moved to have the same value labels, but %s "
4794 "and %s have different value labels."),
4795 subcommand_name, pos_name,
4796 var_get_name (v0), var_get_name (vi));
4799 if (!ctables_categories_equal (c0, ci))
4801 msg (SE, _("%s=%s requires the variables to be "
4802 "moved to have the same category "
4803 "specifications, but %s and %s have different "
4804 "category specifications."),
4805 subcommand_name, pos_name,
4806 var_get_name (v0), var_get_name (vi));
4815 add_sum_var (struct variable *var,
4816 struct variable ***sum_vars, size_t *n, size_t *allocated)
4818 for (size_t i = 0; i < *n; i++)
4819 if (var == (*sum_vars)[i])
4822 if (*n >= *allocated)
4823 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4824 (*sum_vars)[*n] = var;
4829 enumerate_sum_vars (const struct ctables_axis *a,
4830 struct variable ***sum_vars, size_t *n, size_t *allocated)
4838 for (size_t i = 0; i < N_CSVS; i++)
4839 for (size_t j = 0; j < a->specs[i].n; j++)
4841 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4842 if (ctables_function_is_pctsum (spec->function))
4843 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4849 for (size_t i = 0; i < 2; i++)
4850 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4856 ctables_prepare_table (struct ctables_table *t)
4858 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4861 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4863 for (size_t j = 0; j < t->stacks[a].n; j++)
4865 struct ctables_nest *nest = &t->stacks[a].nests[j];
4866 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4868 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4869 nest->n_domains[dt] = 0;
4871 for (size_t k = 0; k < nest->n; k++)
4873 if (k == nest->scale_idx)
4882 if (a != PIVOT_AXIS_LAYER)
4889 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4890 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4891 : a == PIVOT_AXIS_ROW)
4893 if (k == nest->n - 1
4894 || (nest->scale_idx == nest->n - 1
4895 && k == nest->n - 2))
4901 if (a == PIVOT_AXIS_COLUMN)
4906 if (a == PIVOT_AXIS_ROW)
4911 nest->domains[dt][nest->n_domains[dt]++] = k;
4918 struct ctables_nest *nest = xmalloc (sizeof *nest);
4919 *nest = (struct ctables_nest) { .n = 0 };
4920 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4922 /* There's no point in moving labels away from an axis that has no
4923 labels, so avoid dealing with the special cases around that. */
4924 t->label_axis[a] = a;
4927 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4928 for (size_t i = 0; i < stack->n; i++)
4930 struct ctables_nest *nest = &stack->nests[i];
4931 if (!nest->specs[CSV_CELL].n)
4933 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4934 specs->specs = xmalloc (sizeof *specs->specs);
4937 enum ctables_summary_function function
4938 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4940 *specs->specs = (struct ctables_summary_spec) {
4941 .function = function,
4942 .format = ctables_summary_default_format (function, specs->var),
4945 specs->var = nest->vars[0];
4947 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4948 &nest->specs[CSV_CELL]);
4950 else if (!nest->specs[CSV_TOTAL].n)
4951 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4952 &nest->specs[CSV_CELL]);
4954 if (t->ctables->smissing_listwise)
4956 struct variable **listwise_vars = NULL;
4958 size_t allocated = 0;
4960 for (size_t j = nest->group_head; j < stack->n; j++)
4962 const struct ctables_nest *other_nest = &stack->nests[j];
4963 if (other_nest->group_head != nest->group_head)
4966 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4969 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4970 sizeof *listwise_vars);
4971 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4974 for (size_t j = 0; j < N_CSVS; j++)
4976 nest->specs[j].listwise_vars = listwise_vars;
4977 nest->specs[j].n_listwise_vars = n;
4982 struct ctables_summary_spec_set *merged = &t->summary_specs;
4983 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4985 for (size_t j = 0; j < stack->n; j++)
4987 const struct ctables_nest *nest = &stack->nests[j];
4989 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4990 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4995 struct merge_item min = items[0];
4996 for (size_t j = 1; j < n_left; j++)
4997 if (merge_item_compare_3way (&items[j], &min) < 0)
5000 if (merged->n >= merged->allocated)
5001 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5002 sizeof *merged->specs);
5003 merged->specs[merged->n++] = min.set->specs[min.ofs];
5005 for (size_t j = 0; j < n_left; )
5007 if (merge_item_compare_3way (&items[j], &min) == 0)
5009 struct merge_item *item = &items[j];
5010 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5011 if (++item->ofs >= item->set->n)
5013 items[j] = items[--n_left];
5022 for (size_t j = 0; j < merged->n; j++)
5023 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5025 for (size_t j = 0; j < stack->n; j++)
5027 const struct ctables_nest *nest = &stack->nests[j];
5028 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5030 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5031 for (size_t k = 0; k < specs->n; k++)
5032 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5033 specs->specs[k].axis_idx);
5039 size_t allocated_sum_vars = 0;
5040 enumerate_sum_vars (t->axes[t->summary_axis],
5041 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5043 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5044 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5048 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5049 enum pivot_axis_type a)
5051 struct ctables_stack *stack = &t->stacks[a];
5052 for (size_t i = 0; i < stack->n; i++)
5054 const struct ctables_nest *nest = &stack->nests[i];
5055 const struct variable *var = nest->vars[nest->n - 1];
5056 const union value *value = case_data (c, var);
5058 if (var_is_numeric (var) && value->f == SYSMIS)
5061 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5063 ctables_value_insert (t, value, var_get_width (var));
5068 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5070 const struct ctables_value *const *ap = a_;
5071 const struct ctables_value *const *bp = b_;
5072 const struct ctables_value *a = *ap;
5073 const struct ctables_value *b = *bp;
5074 const int *width = width_;
5075 return value_compare_3way (&a->value, &b->value, *width);
5079 ctables_sort_clabels_values (struct ctables_table *t)
5081 const struct variable *v0 = t->clabels_example;
5082 int width = var_get_width (v0);
5084 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5087 const struct val_labs *val_labs = var_get_value_labels (v0);
5088 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5089 vl = val_labs_next (val_labs, vl))
5090 if (ctables_categories_match (c0, &vl->value, v0))
5091 ctables_value_insert (t, &vl->value, width);
5094 size_t n = hmap_count (&t->clabels_values_map);
5095 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5097 struct ctables_value *clv;
5099 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5100 t->clabels_values[i++] = clv;
5101 t->n_clabels_values = n;
5104 sort (t->clabels_values, n, sizeof *t->clabels_values,
5105 compare_clabels_values_3way, &width);
5107 for (size_t i = 0; i < n; i++)
5108 t->clabels_values[i]->leaf = i;
5112 ctables_add_category_occurrences (const struct variable *var,
5113 struct hmap *occurrences,
5114 const struct ctables_categories *cats)
5116 const struct val_labs *val_labs = var_get_value_labels (var);
5118 for (size_t i = 0; i < cats->n_cats; i++)
5120 const struct ctables_category *c = &cats->cats[i];
5124 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5130 int width = var_get_width (var);
5132 value_init (&value, width);
5133 value_copy_buf_rpad (&value, width,
5134 CHAR_CAST (uint8_t *, c->string.string),
5135 c->string.length, ' ');
5136 ctables_add_occurrence (var, &value, occurrences);
5137 value_destroy (&value, width);
5142 assert (var_is_numeric (var));
5143 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5144 vl = val_labs_next (val_labs, vl))
5145 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5146 ctables_add_occurrence (var, &vl->value, occurrences);
5150 assert (var_is_alpha (var));
5151 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5152 vl = val_labs_next (val_labs, vl))
5153 if (in_string_range (&vl->value, var, c->srange))
5154 ctables_add_occurrence (var, &vl->value, occurrences);
5158 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5159 vl = val_labs_next (val_labs, vl))
5160 if (var_is_value_missing (var, &vl->value))
5161 ctables_add_occurrence (var, &vl->value, occurrences);
5165 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5166 vl = val_labs_next (val_labs, vl))
5167 ctables_add_occurrence (var, &vl->value, occurrences);
5170 case CCT_POSTCOMPUTE:
5180 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5181 vl = val_labs_next (val_labs, vl))
5182 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5183 ctables_add_occurrence (var, &vl->value, occurrences);
5186 case CCT_EXCLUDED_MISSING:
5193 ctables_section_recurse_add_empty_categories (
5194 struct ctables_section *s,
5195 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5196 enum pivot_axis_type a, size_t a_idx)
5198 if (a >= PIVOT_N_AXES)
5199 ctables_cell_insert__ (s, c, cats);
5200 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5201 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5204 const struct variable *var = s->nests[a]->vars[a_idx];
5205 const struct ctables_categories *categories = s->table->categories[
5206 var_get_dict_index (var)];
5207 int width = var_get_width (var);
5208 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5209 const struct ctables_occurrence *o;
5210 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5212 union value *value = case_data_rw (c, var);
5213 value_destroy (value, width);
5214 value_clone (value, &o->value, width);
5215 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5216 assert (cats[a][a_idx] != NULL);
5217 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5220 for (size_t i = 0; i < categories->n_cats; i++)
5222 const struct ctables_category *cat = &categories->cats[i];
5223 if (cat->type == CCT_POSTCOMPUTE)
5225 cats[a][a_idx] = cat;
5226 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5233 ctables_section_add_empty_categories (struct ctables_section *s)
5235 bool show_empty = false;
5236 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5238 for (size_t k = 0; k < s->nests[a]->n; k++)
5239 if (k != s->nests[a]->scale_idx)
5241 const struct variable *var = s->nests[a]->vars[k];
5242 const struct ctables_categories *cats = s->table->categories[
5243 var_get_dict_index (var)];
5244 if (cats->show_empty)
5247 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5253 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5254 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5255 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5260 ctables_section_clear (struct ctables_section *s)
5262 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5264 const struct ctables_nest *nest = s->nests[a];
5265 for (size_t i = 0; i < nest->n; i++)
5266 if (i != nest->scale_idx)
5268 const struct variable *var = nest->vars[i];
5269 int width = var_get_width (var);
5270 struct ctables_occurrence *o, *next;
5271 struct hmap *map = &s->occurrences[a][i];
5272 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5274 value_destroy (&o->value, width);
5275 hmap_delete (map, &o->node);
5282 struct ctables_cell *cell, *next_cell;
5283 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5285 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5287 const struct ctables_nest *nest = s->nests[a];
5288 for (size_t i = 0; i < nest->n; i++)
5289 if (i != nest->scale_idx)
5290 value_destroy (&cell->axes[a].cvs[i].value,
5291 var_get_width (nest->vars[i]));
5292 free (cell->axes[a].cvs);
5295 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5296 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5297 for (size_t i = 0; i < specs->n; i++)
5298 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5299 free (cell->summaries);
5301 hmap_delete (&s->cells, &cell->node);
5304 hmap_shrink (&s->cells);
5306 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5308 struct ctables_domain *domain, *next_domain;
5309 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5312 free (domain->sums);
5313 hmap_delete (&s->domains[dt], &domain->node);
5316 hmap_shrink (&s->domains[dt]);
5321 ctables_table_clear (struct ctables_table *t)
5323 for (size_t i = 0; i < t->n_sections; i++)
5324 ctables_section_clear (&t->sections[i]);
5326 if (t->clabels_example)
5328 int width = var_get_width (t->clabels_example);
5329 struct ctables_value *value, *next_value;
5330 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5331 &t->clabels_values_map)
5333 value_destroy (&value->value, width);
5334 hmap_delete (&t->clabels_values_map, &value->node);
5337 hmap_shrink (&t->clabels_values_map);
5339 free (t->clabels_values);
5340 t->clabels_values = NULL;
5341 t->n_clabels_values = 0;
5346 ctables_execute (struct dataset *ds, struct casereader *input,
5349 for (size_t i = 0; i < ct->n_tables; i++)
5351 struct ctables_table *t = ct->tables[i];
5352 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5353 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5354 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5355 sizeof *t->sections);
5356 size_t ix[PIVOT_N_AXES];
5357 ctables_table_add_section (t, 0, ix);
5360 struct dictionary *dict = dataset_dict (ds);
5361 struct casegrouper *grouper
5362 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5363 ? casegrouper_create_splits (input, dict)
5364 : casegrouper_create_vars (input, NULL, 0));
5365 struct casereader *group;
5366 while (casegrouper_get_next_group (grouper, &group))
5368 /* Output SPLIT FILE variables. */
5369 struct ccase *c = casereader_peek (group, 0);
5372 output_split_file_values (ds, c);
5376 bool warn_on_invalid = true;
5377 for (c = casereader_read (group); c;
5378 case_unref (c), c = casereader_read (group))
5380 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5381 double e_weight = (ct->e_weight
5382 ? var_force_valid_weight (ct->e_weight,
5383 case_num (c, ct->e_weight),
5387 for (size_t i = 0; i < ct->n_tables; i++)
5389 struct ctables_table *t = ct->tables[i];
5391 for (size_t j = 0; j < t->n_sections; j++)
5392 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5394 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5395 if (t->label_axis[a] != a)
5396 ctables_insert_clabels_values (t, c, a);
5399 casereader_destroy (group);
5401 for (size_t i = 0; i < ct->n_tables; i++)
5403 struct ctables_table *t = ct->tables[i];
5405 if (t->clabels_example)
5406 ctables_sort_clabels_values (t);
5408 for (size_t j = 0; j < t->n_sections; j++)
5409 ctables_section_add_empty_categories (&t->sections[j]);
5411 ctables_table_output (ct, t);
5412 ctables_table_clear (t);
5415 return casegrouper_destroy (grouper);
5420 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5421 struct dictionary *);
5424 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5430 case CTPO_CAT_STRING:
5431 ss_dealloc (&e->string);
5434 case CTPO_CAT_SRANGE:
5435 for (size_t i = 0; i < 2; i++)
5436 ss_dealloc (&e->srange[i]);
5445 for (size_t i = 0; i < 2; i++)
5446 ctables_pcexpr_destroy (e->subs[i]);
5450 case CTPO_CAT_NUMBER:
5451 case CTPO_CAT_NRANGE:
5452 case CTPO_CAT_MISSING:
5453 case CTPO_CAT_OTHERNM:
5454 case CTPO_CAT_SUBTOTAL:
5455 case CTPO_CAT_TOTAL:
5459 msg_location_destroy (e->location);
5464 static struct ctables_pcexpr *
5465 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5466 struct ctables_pcexpr *sub0,
5467 struct ctables_pcexpr *sub1)
5469 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5470 *e = (struct ctables_pcexpr) {
5472 .subs = { sub0, sub1 },
5473 .location = msg_location_merged (sub0->location, sub1->location),
5478 /* How to parse an operator. */
5481 enum token_type token;
5482 enum ctables_postcompute_op op;
5485 static const struct operator *
5486 ctable_pcexpr_match_operator (struct lexer *lexer,
5487 const struct operator ops[], size_t n_ops)
5489 for (const struct operator *op = ops; op < ops + n_ops; op++)
5490 if (lex_token (lexer) == op->token)
5492 if (op->token != T_NEG_NUM)
5501 static struct ctables_pcexpr *
5502 ctable_pcexpr_parse_binary_operators__ (
5503 struct lexer *lexer, struct dictionary *dict,
5504 const struct operator ops[], size_t n_ops,
5505 parse_recursively_func *parse_next_level,
5506 const char *chain_warning, struct ctables_pcexpr *lhs)
5508 for (int op_count = 0; ; op_count++)
5510 const struct operator *op
5511 = ctable_pcexpr_match_operator (lexer, ops, n_ops);
5514 if (op_count > 1 && chain_warning)
5515 msg_at (SW, lhs->location, "%s", chain_warning);
5520 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5523 ctables_pcexpr_destroy (lhs);
5527 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5531 static struct ctables_pcexpr *
5532 ctable_pcexpr_parse_binary_operators (struct lexer *lexer,
5533 struct dictionary *dict,
5534 const struct operator ops[], size_t n_ops,
5535 parse_recursively_func *parse_next_level,
5536 const char *chain_warning)
5538 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5542 return ctable_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5544 chain_warning, lhs);
5547 static struct ctables_pcexpr *ctable_pcexpr_parse_add (struct lexer *,
5548 struct dictionary *);
5550 static struct ctables_pcexpr
5551 ctpo_cat_nrange (double low, double high)
5553 return (struct ctables_pcexpr) {
5554 .op = CTPO_CAT_NRANGE,
5555 .nrange = { low, high },
5559 static struct ctables_pcexpr *
5560 ctable_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5562 int start_ofs = lex_ofs (lexer);
5563 struct ctables_pcexpr e;
5564 if (lex_is_number (lexer))
5566 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5567 .number = lex_number (lexer) };
5570 else if (lex_match_id (lexer, "MISSING"))
5571 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5572 else if (lex_match_id (lexer, "OTHERNM"))
5573 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5574 else if (lex_match_id (lexer, "TOTAL"))
5575 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5576 else if (lex_match_id (lexer, "SUBTOTAL"))
5578 size_t subtotal_index = 0;
5579 if (lex_match (lexer, T_LBRACK))
5581 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5583 subtotal_index = lex_integer (lexer);
5585 if (!lex_force_match (lexer, T_RBRACK))
5588 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5589 .subtotal_index = subtotal_index };
5591 else if (lex_match (lexer, T_LBRACK))
5593 if (lex_match_id (lexer, "LO"))
5595 if (!lex_force_match_id (lexer, "THRU") || !lex_force_num (lexer))
5597 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5600 else if (lex_is_number (lexer))
5602 double number = lex_number (lexer);
5604 if (lex_match_id (lexer, "THRU"))
5606 if (lex_match_id (lexer, "HI"))
5607 e = ctpo_cat_nrange (number, DBL_MAX);
5610 if (!lex_force_num (lexer))
5612 e = ctpo_cat_nrange (number, lex_number (lexer));
5617 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5620 else if (lex_is_string (lexer))
5622 struct substring s = recode_substring_pool (
5623 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
5624 ss_rtrim (&s, ss_cstr (" "));
5626 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5631 lex_error (lexer, NULL);
5635 if (!lex_force_match (lexer, T_RBRACK))
5637 if (e.op == CTPO_CAT_STRING)
5638 ss_dealloc (&e.string);
5642 else if (lex_match (lexer, T_LPAREN))
5644 struct ctables_pcexpr *ep = ctable_pcexpr_parse_add (lexer, dict);
5647 if (!lex_force_match (lexer, T_RPAREN))
5649 ctables_pcexpr_destroy (ep);
5656 lex_error (lexer, NULL);
5660 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5661 return xmemdup (&e, sizeof e);
5664 static struct ctables_pcexpr *
5665 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5666 struct lexer *lexer, int start_ofs)
5668 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5669 *e = (struct ctables_pcexpr) {
5672 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5677 static struct ctables_pcexpr *
5678 ctable_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5680 static const struct operator op = { T_EXP, CTPO_POW };
5682 const char *chain_warning =
5683 _("The exponentiation operator (`**') is left-associative: "
5684 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5685 "To disable this warning, insert parentheses.");
5687 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5688 return ctable_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5689 ctable_pcexpr_parse_primary,
5692 /* Special case for situations like "-5**6", which must be parsed as
5695 int start_ofs = lex_ofs (lexer);
5696 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5697 *lhs = (struct ctables_pcexpr) {
5698 .op = CTPO_CONSTANT,
5699 .number = -lex_tokval (lexer),
5700 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5704 struct ctables_pcexpr *node = ctable_pcexpr_parse_binary_operators__ (
5705 lexer, dict, &op, 1,
5706 ctable_pcexpr_parse_primary, chain_warning, lhs);
5710 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5713 /* Parses the unary minus level. */
5714 static struct ctables_pcexpr *
5715 ctable_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5717 int start_ofs = lex_ofs (lexer);
5718 if (!lex_match (lexer, T_DASH))
5719 return ctable_pcexpr_parse_exp (lexer, dict);
5721 struct ctables_pcexpr *inner = ctable_pcexpr_parse_neg (lexer, dict);
5725 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5728 /* Parses the multiplication and division level. */
5729 static struct ctables_pcexpr *
5730 ctable_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5732 static const struct operator ops[] =
5734 { T_ASTERISK, CTPO_MUL },
5735 { T_SLASH, CTPO_DIV },
5738 return ctable_pcexpr_parse_binary_operators (lexer, dict, ops,
5739 sizeof ops / sizeof *ops,
5740 ctable_pcexpr_parse_neg, NULL);
5743 /* Parses the addition and subtraction level. */
5744 static struct ctables_pcexpr *
5745 ctable_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5747 static const struct operator ops[] =
5749 { T_PLUS, CTPO_ADD },
5750 { T_DASH, CTPO_SUB },
5751 { T_NEG_NUM, CTPO_ADD },
5754 return ctable_pcexpr_parse_binary_operators (lexer, dict,
5755 ops, sizeof ops / sizeof *ops,
5756 ctable_pcexpr_parse_mul, NULL);
5759 static struct ctables_postcompute *
5760 ctables_find_postcompute (struct ctables *ct, const char *name)
5762 struct ctables_postcompute *pc;
5763 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5764 utf8_hash_case_string (name, 0), &ct->postcomputes)
5765 if (!utf8_strcasecmp (pc->name, name))
5771 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5774 int pcompute_start = lex_ofs (lexer) - 1;
5776 if (!lex_match (lexer, T_AND))
5778 lex_error_expecting (lexer, "&");
5781 if (!lex_force_id (lexer))
5784 char *name = ss_xstrdup (lex_tokss (lexer));
5787 if (!lex_force_match (lexer, T_EQUALS)
5788 || !lex_force_match_id (lexer, "EXPR")
5789 || !lex_force_match (lexer, T_LPAREN))
5795 int expr_start = lex_ofs (lexer);
5796 struct ctables_pcexpr *expr = ctable_pcexpr_parse_add (lexer, dict);
5797 int expr_end = lex_ofs (lexer) - 1;
5798 if (!expr || !lex_force_match (lexer, T_RPAREN))
5803 int pcompute_end = lex_ofs (lexer) - 1;
5805 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5808 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5811 msg_at (SW, location, _("New definition of &%s will override the "
5812 "previous definition."),
5814 msg_at (SN, pc->location, _("This is the previous definition."));
5816 ctables_pcexpr_destroy (pc->expr);
5817 msg_location_destroy (pc->location);
5822 pc = xmalloc (sizeof *pc);
5823 *pc = (struct ctables_postcompute) { .name = name };
5824 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5825 utf8_hash_case_string (pc->name, 0));
5828 pc->location = location;
5830 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5835 ctables_parse_pproperties_format (struct lexer *lexer,
5836 struct ctables_summary_spec_set *sss)
5838 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5840 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5841 && !(lex_token (lexer) == T_ID
5842 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5843 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5844 lex_tokss (lexer)))))
5846 /* Parse function. */
5847 enum ctables_summary_function function;
5848 if (!parse_ctables_summary_function (lexer, &function))
5851 /* Parse percentile. */
5852 double percentile = 0;
5853 if (function == CTSF_PTILE)
5855 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5857 percentile = lex_number (lexer);
5862 struct fmt_spec format;
5863 bool is_ctables_format;
5864 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5867 if (sss->n >= sss->allocated)
5868 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5869 sizeof *sss->specs);
5870 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5871 .function = function,
5872 .percentile = percentile,
5874 .is_ctables_format = is_ctables_format,
5880 ctables_summary_spec_set_uninit (sss);
5885 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5887 struct ctables_postcompute **pcs = NULL;
5889 size_t allocated_pcs = 0;
5891 while (lex_match (lexer, T_AND))
5893 if (!lex_force_id (lexer))
5895 struct ctables_postcompute *pc
5896 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5899 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5904 if (n_pcs >= allocated_pcs)
5905 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5909 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5911 if (lex_match_id (lexer, "LABEL"))
5913 lex_match (lexer, T_EQUALS);
5914 if (!lex_force_string (lexer))
5917 for (size_t i = 0; i < n_pcs; i++)
5919 free (pcs[i]->label);
5920 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5925 else if (lex_match_id (lexer, "FORMAT"))
5927 lex_match (lexer, T_EQUALS);
5929 struct ctables_summary_spec_set sss;
5930 if (!ctables_parse_pproperties_format (lexer, &sss))
5933 for (size_t i = 0; i < n_pcs; i++)
5936 ctables_summary_spec_set_uninit (pcs[i]->specs);
5938 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5939 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5941 ctables_summary_spec_set_uninit (&sss);
5943 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5945 lex_match (lexer, T_EQUALS);
5946 bool hide_source_cats;
5947 if (!parse_bool (lexer, &hide_source_cats))
5949 for (size_t i = 0; i < n_pcs; i++)
5950 pcs[i]->hide_source_cats = hide_source_cats;
5954 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5967 put_strftime (struct string *out, time_t now, const char *format)
5969 const struct tm *tm = localtime (&now);
5971 strftime (value, sizeof value, format, tm);
5972 ds_put_cstr (out, value);
5976 skip_prefix (struct substring *s, struct substring prefix)
5978 if (ss_starts_with (*s, prefix))
5980 ss_advance (s, prefix.length);
5988 put_table_expression (struct string *out, struct lexer *lexer,
5989 struct dictionary *dict, int expr_start, int expr_end)
5992 for (int ofs = expr_start; ofs < expr_end; ofs++)
5994 const struct token *t = lex_ofs_token (lexer, ofs);
5995 if (t->type == T_LBRACK)
5997 else if (t->type == T_RBRACK && nest > 0)
6003 else if (t->type == T_ID)
6005 const struct variable *var
6006 = dict_lookup_var (dict, t->string.string);
6007 const char *label = var ? var_get_label (var) : NULL;
6008 ds_put_cstr (out, label ? label : t->string.string);
6012 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6013 ds_put_byte (out, ' ');
6015 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6016 ds_put_cstr (out, repr);
6019 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6020 ds_put_byte (out, ' ');
6026 put_title_text (struct string *out, struct substring in, time_t now,
6027 struct lexer *lexer, struct dictionary *dict,
6028 int expr_start, int expr_end)
6032 size_t chunk = ss_find_byte (in, ')');
6033 ds_put_substring (out, ss_head (in, chunk));
6034 ss_advance (&in, chunk);
6035 if (ss_is_empty (in))
6038 if (skip_prefix (&in, ss_cstr (")DATE")))
6039 put_strftime (out, now, "%x");
6040 else if (skip_prefix (&in, ss_cstr (")TIME")))
6041 put_strftime (out, now, "%X");
6042 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6043 put_table_expression (out, lexer, dict, expr_start, expr_end);
6046 ds_put_byte (out, ')');
6047 ss_advance (&in, 1);
6053 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6055 struct casereader *input = NULL;
6057 struct measure_guesser *mg = measure_guesser_create (ds);
6060 input = proc_open (ds);
6061 measure_guesser_run (mg, input);
6062 measure_guesser_destroy (mg);
6065 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6066 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6067 enum settings_value_show tvars = settings_get_show_variables ();
6068 for (size_t i = 0; i < n_vars; i++)
6069 vlabels[i] = (enum ctables_vlabel) tvars;
6071 struct pivot_table_look *look = pivot_table_look_unshare (
6072 pivot_table_look_ref (pivot_table_look_get_default ()));
6073 look->omit_empty = false;
6075 struct ctables *ct = xmalloc (sizeof *ct);
6076 *ct = (struct ctables) {
6077 .dict = dataset_dict (ds),
6079 .ctables_formats = FMT_SETTINGS_INIT,
6081 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6084 time_t now = time (NULL);
6089 const char *dot_string;
6090 const char *comma_string;
6092 static const struct ctf ctfs[4] = {
6093 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6094 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6095 { CTEF_PAREN, "-,(,),", "-.(.)." },
6096 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6098 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6099 for (size_t i = 0; i < 4; i++)
6101 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6102 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6103 fmt_number_style_from_string (s));
6106 if (!lex_force_match (lexer, T_SLASH))
6109 while (!lex_match_id (lexer, "TABLE"))
6111 if (lex_match_id (lexer, "FORMAT"))
6113 double widths[2] = { SYSMIS, SYSMIS };
6114 double units_per_inch = 72.0;
6116 while (lex_token (lexer) != T_SLASH)
6118 if (lex_match_id (lexer, "MINCOLWIDTH"))
6120 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6123 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6125 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6128 else if (lex_match_id (lexer, "UNITS"))
6130 lex_match (lexer, T_EQUALS);
6131 if (lex_match_id (lexer, "POINTS"))
6132 units_per_inch = 72.0;
6133 else if (lex_match_id (lexer, "INCHES"))
6134 units_per_inch = 1.0;
6135 else if (lex_match_id (lexer, "CM"))
6136 units_per_inch = 2.54;
6139 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6143 else if (lex_match_id (lexer, "EMPTY"))
6148 lex_match (lexer, T_EQUALS);
6149 if (lex_match_id (lexer, "ZERO"))
6151 /* Nothing to do. */
6153 else if (lex_match_id (lexer, "BLANK"))
6154 ct->zero = xstrdup ("");
6155 else if (lex_force_string (lexer))
6157 ct->zero = ss_xstrdup (lex_tokss (lexer));
6163 else if (lex_match_id (lexer, "MISSING"))
6165 lex_match (lexer, T_EQUALS);
6166 if (!lex_force_string (lexer))
6170 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6171 ? ss_xstrdup (lex_tokss (lexer))
6177 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6178 "UNITS", "EMPTY", "MISSING");
6183 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6184 && widths[0] > widths[1])
6186 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6190 for (size_t i = 0; i < 2; i++)
6191 if (widths[i] != SYSMIS)
6193 int *wr = ct->look->width_ranges[TABLE_HORZ];
6194 wr[i] = widths[i] / units_per_inch * 96.0;
6199 else if (lex_match_id (lexer, "VLABELS"))
6201 if (!lex_force_match_id (lexer, "VARIABLES"))
6203 lex_match (lexer, T_EQUALS);
6205 struct variable **vars;
6207 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6211 if (!lex_force_match_id (lexer, "DISPLAY"))
6216 lex_match (lexer, T_EQUALS);
6218 enum ctables_vlabel vlabel;
6219 if (lex_match_id (lexer, "DEFAULT"))
6220 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6221 else if (lex_match_id (lexer, "NAME"))
6223 else if (lex_match_id (lexer, "LABEL"))
6224 vlabel = CTVL_LABEL;
6225 else if (lex_match_id (lexer, "BOTH"))
6227 else if (lex_match_id (lexer, "NONE"))
6231 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6237 for (size_t i = 0; i < n_vars; i++)
6238 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6241 else if (lex_match_id (lexer, "MRSETS"))
6243 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6245 lex_match (lexer, T_EQUALS);
6246 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6249 else if (lex_match_id (lexer, "SMISSING"))
6251 if (lex_match_id (lexer, "VARIABLE"))
6252 ct->smissing_listwise = false;
6253 else if (lex_match_id (lexer, "LISTWISE"))
6254 ct->smissing_listwise = true;
6257 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6261 else if (lex_match_id (lexer, "PCOMPUTE"))
6263 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6266 else if (lex_match_id (lexer, "PPROPERTIES"))
6268 if (!ctables_parse_pproperties (lexer, ct))
6271 else if (lex_match_id (lexer, "WEIGHT"))
6273 if (!lex_force_match_id (lexer, "VARIABLE"))
6275 lex_match (lexer, T_EQUALS);
6276 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6280 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6282 if (lex_match_id (lexer, "COUNT"))
6284 lex_match (lexer, T_EQUALS);
6285 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6288 ct->hide_threshold = lex_integer (lexer);
6291 else if (ct->hide_threshold == 0)
6292 ct->hide_threshold = 5;
6296 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6297 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6298 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6302 if (!lex_force_match (lexer, T_SLASH))
6306 size_t allocated_tables = 0;
6309 if (ct->n_tables >= allocated_tables)
6310 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6311 sizeof *ct->tables);
6313 struct ctables_category *cat = xmalloc (sizeof *cat);
6314 *cat = (struct ctables_category) {
6316 .include_missing = false,
6317 .sort_ascending = true,
6320 struct ctables_categories *c = xmalloc (sizeof *c);
6321 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6322 *c = (struct ctables_categories) {
6329 struct ctables_categories **categories = xnmalloc (n_vars,
6330 sizeof *categories);
6331 for (size_t i = 0; i < n_vars; i++)
6334 struct ctables_table *t = xmalloc (sizeof *t);
6335 *t = (struct ctables_table) {
6337 .slabels_axis = PIVOT_AXIS_COLUMN,
6338 .slabels_visible = true,
6339 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6341 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6342 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6343 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6345 .clabels_from_axis = PIVOT_AXIS_LAYER,
6346 .categories = categories,
6347 .n_categories = n_vars,
6350 ct->tables[ct->n_tables++] = t;
6352 lex_match (lexer, T_EQUALS);
6353 int expr_start = lex_ofs (lexer);
6354 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6356 if (lex_match (lexer, T_BY))
6358 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6359 ct, t, PIVOT_AXIS_COLUMN))
6362 if (lex_match (lexer, T_BY))
6364 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6365 ct, t, PIVOT_AXIS_LAYER))
6369 int expr_end = lex_ofs (lexer);
6371 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6372 && !t->axes[PIVOT_AXIS_LAYER])
6374 lex_error (lexer, _("At least one variable must be specified."));
6378 const struct ctables_axis *scales[PIVOT_N_AXES];
6379 size_t n_scales = 0;
6380 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6382 scales[a] = find_scale (t->axes[a]);
6388 msg (SE, _("Scale variables may appear only on one axis."));
6389 if (scales[PIVOT_AXIS_ROW])
6390 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6391 _("This scale variable appears on the rows axis."));
6392 if (scales[PIVOT_AXIS_COLUMN])
6393 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6394 _("This scale variable appears on the columns axis."));
6395 if (scales[PIVOT_AXIS_LAYER])
6396 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6397 _("This scale variable appears on the layer axis."));
6401 const struct ctables_axis *summaries[PIVOT_N_AXES];
6402 size_t n_summaries = 0;
6403 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6405 summaries[a] = (scales[a]
6407 : find_categorical_summary_spec (t->axes[a]));
6411 if (n_summaries > 1)
6413 msg (SE, _("Summaries may appear only on one axis."));
6414 if (summaries[PIVOT_AXIS_ROW])
6415 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
6416 _("This variable on the rows axis has a summary."));
6417 if (summaries[PIVOT_AXIS_COLUMN])
6418 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
6419 _("This variable on the columns axis has a summary."));
6420 if (summaries[PIVOT_AXIS_LAYER])
6421 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
6422 _("This variable on the layers axis has a summary."));
6425 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6426 if (n_summaries ? summaries[a] : t->axes[a])
6428 t->summary_axis = a;
6432 if (lex_token (lexer) == T_ENDCMD)
6434 if (!ctables_prepare_table (t))
6438 if (!lex_force_match (lexer, T_SLASH))
6441 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6443 if (lex_match_id (lexer, "SLABELS"))
6445 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6447 if (lex_match_id (lexer, "POSITION"))
6449 lex_match (lexer, T_EQUALS);
6450 if (lex_match_id (lexer, "COLUMN"))
6451 t->slabels_axis = PIVOT_AXIS_COLUMN;
6452 else if (lex_match_id (lexer, "ROW"))
6453 t->slabels_axis = PIVOT_AXIS_ROW;
6454 else if (lex_match_id (lexer, "LAYER"))
6455 t->slabels_axis = PIVOT_AXIS_LAYER;
6458 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6462 else if (lex_match_id (lexer, "VISIBLE"))
6464 lex_match (lexer, T_EQUALS);
6465 if (!parse_bool (lexer, &t->slabels_visible))
6470 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6475 else if (lex_match_id (lexer, "CLABELS"))
6477 if (lex_match_id (lexer, "AUTO"))
6479 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6480 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6482 else if (lex_match_id (lexer, "ROWLABELS"))
6484 lex_match (lexer, T_EQUALS);
6485 if (lex_match_id (lexer, "OPPOSITE"))
6486 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6487 else if (lex_match_id (lexer, "LAYER"))
6488 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6491 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6495 else if (lex_match_id (lexer, "COLLABELS"))
6497 lex_match (lexer, T_EQUALS);
6498 if (lex_match_id (lexer, "OPPOSITE"))
6499 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6500 else if (lex_match_id (lexer, "LAYER"))
6501 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6504 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6510 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6515 else if (lex_match_id (lexer, "CRITERIA"))
6517 if (!lex_force_match_id (lexer, "CILEVEL"))
6519 lex_match (lexer, T_EQUALS);
6521 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6523 t->cilevel = lex_number (lexer);
6526 else if (lex_match_id (lexer, "CATEGORIES"))
6528 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6532 else if (lex_match_id (lexer, "TITLES"))
6537 if (lex_match_id (lexer, "CAPTION"))
6538 textp = &t->caption;
6539 else if (lex_match_id (lexer, "CORNER"))
6541 else if (lex_match_id (lexer, "TITLE"))
6545 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6548 lex_match (lexer, T_EQUALS);
6550 struct string s = DS_EMPTY_INITIALIZER;
6551 while (lex_is_string (lexer))
6553 if (!ds_is_empty (&s))
6554 ds_put_byte (&s, ' ');
6555 put_title_text (&s, lex_tokss (lexer), now,
6556 lexer, dataset_dict (ds),
6557 expr_start, expr_end);
6561 *textp = ds_steal_cstr (&s);
6563 while (lex_token (lexer) != T_SLASH
6564 && lex_token (lexer) != T_ENDCMD);
6566 else if (lex_match_id (lexer, "SIGTEST"))
6570 t->chisq = xmalloc (sizeof *t->chisq);
6571 *t->chisq = (struct ctables_chisq) {
6573 .include_mrsets = true,
6574 .all_visible = true,
6580 if (lex_match_id (lexer, "TYPE"))
6582 lex_match (lexer, T_EQUALS);
6583 if (!lex_force_match_id (lexer, "CHISQUARE"))
6586 else if (lex_match_id (lexer, "ALPHA"))
6588 lex_match (lexer, T_EQUALS);
6589 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6591 t->chisq->alpha = lex_number (lexer);
6594 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6596 lex_match (lexer, T_EQUALS);
6597 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6600 else if (lex_match_id (lexer, "CATEGORIES"))
6602 lex_match (lexer, T_EQUALS);
6603 if (lex_match_id (lexer, "ALLVISIBLE"))
6604 t->chisq->all_visible = true;
6605 else if (lex_match_id (lexer, "SUBTOTALS"))
6606 t->chisq->all_visible = false;
6609 lex_error_expecting (lexer,
6610 "ALLVISIBLE", "SUBTOTALS");
6616 lex_error_expecting (lexer, "TYPE", "ALPHA",
6617 "INCLUDEMRSETS", "CATEGORIES");
6621 while (lex_token (lexer) != T_SLASH
6622 && lex_token (lexer) != T_ENDCMD);
6624 else if (lex_match_id (lexer, "COMPARETEST"))
6628 t->pairwise = xmalloc (sizeof *t->pairwise);
6629 *t->pairwise = (struct ctables_pairwise) {
6631 .alpha = { .05, .05 },
6632 .adjust = BONFERRONI,
6633 .include_mrsets = true,
6634 .meansvariance_allcats = true,
6635 .all_visible = true,
6644 if (lex_match_id (lexer, "TYPE"))
6646 lex_match (lexer, T_EQUALS);
6647 if (lex_match_id (lexer, "PROP"))
6648 t->pairwise->type = PROP;
6649 else if (lex_match_id (lexer, "MEAN"))
6650 t->pairwise->type = MEAN;
6653 lex_error_expecting (lexer, "PROP", "MEAN");
6657 else if (lex_match_id (lexer, "ALPHA"))
6659 lex_match (lexer, T_EQUALS);
6661 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6663 double a0 = lex_number (lexer);
6666 lex_match (lexer, T_COMMA);
6667 if (lex_is_number (lexer))
6669 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6671 double a1 = lex_number (lexer);
6674 t->pairwise->alpha[0] = MIN (a0, a1);
6675 t->pairwise->alpha[1] = MAX (a0, a1);
6678 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6680 else if (lex_match_id (lexer, "ADJUST"))
6682 lex_match (lexer, T_EQUALS);
6683 if (lex_match_id (lexer, "BONFERRONI"))
6684 t->pairwise->adjust = BONFERRONI;
6685 else if (lex_match_id (lexer, "BH"))
6686 t->pairwise->adjust = BH;
6687 else if (lex_match_id (lexer, "NONE"))
6688 t->pairwise->adjust = 0;
6691 lex_error_expecting (lexer, "BONFERRONI", "BH",
6696 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6698 lex_match (lexer, T_EQUALS);
6699 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6702 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6704 lex_match (lexer, T_EQUALS);
6705 if (lex_match_id (lexer, "ALLCATS"))
6706 t->pairwise->meansvariance_allcats = true;
6707 else if (lex_match_id (lexer, "TESTEDCATS"))
6708 t->pairwise->meansvariance_allcats = false;
6711 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6715 else if (lex_match_id (lexer, "CATEGORIES"))
6717 lex_match (lexer, T_EQUALS);
6718 if (lex_match_id (lexer, "ALLVISIBLE"))
6719 t->pairwise->all_visible = true;
6720 else if (lex_match_id (lexer, "SUBTOTALS"))
6721 t->pairwise->all_visible = false;
6724 lex_error_expecting (lexer, "ALLVISIBLE",
6729 else if (lex_match_id (lexer, "MERGE"))
6731 lex_match (lexer, T_EQUALS);
6732 if (!parse_bool (lexer, &t->pairwise->merge))
6735 else if (lex_match_id (lexer, "STYLE"))
6737 lex_match (lexer, T_EQUALS);
6738 if (lex_match_id (lexer, "APA"))
6739 t->pairwise->apa_style = true;
6740 else if (lex_match_id (lexer, "SIMPLE"))
6741 t->pairwise->apa_style = false;
6744 lex_error_expecting (lexer, "APA", "SIMPLE");
6748 else if (lex_match_id (lexer, "SHOWSIG"))
6750 lex_match (lexer, T_EQUALS);
6751 if (!parse_bool (lexer, &t->pairwise->show_sig))
6756 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6757 "INCLUDEMRSETS", "MEANSVARIANCE",
6758 "CATEGORIES", "MERGE", "STYLE",
6763 while (lex_token (lexer) != T_SLASH
6764 && lex_token (lexer) != T_ENDCMD);
6768 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6769 "CRITERIA", "CATEGORIES", "TITLES",
6770 "SIGTEST", "COMPARETEST");
6774 if (!lex_match (lexer, T_SLASH))
6778 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6779 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6781 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6785 if (!ctables_prepare_table (t))
6788 while (lex_token (lexer) != T_ENDCMD);
6791 input = proc_open (ds);
6792 bool ok = ctables_execute (ds, input, ct);
6793 ok = proc_commit (ds) && ok;
6795 ctables_destroy (ct);
6796 return ok ? CMD_SUCCESS : CMD_FAILURE;
6801 ctables_destroy (ct);