1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 /* All variables. */ \
74 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
75 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
76 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
94 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
95 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
96 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
98 /* All variables (unweighted.) */ \
99 S(CTSF_UCOUNT, "UCOUNT", N_("Unweighted Count"), CTF_COUNT, CTFA_ALL) \
100 S(CTSF_UROWPCT_COUNT, "UROWPCT.COUNT", N_("Unweighted Row %"), CTF_PERCENT, CTFA_ALL) \
101 S(CTSF_UCOLPCT_COUNT, "UCOLPCT.COUNT", N_("Unweighted Column %"), CTF_PERCENT, CTFA_ALL) \
102 S(CTSF_UTABLEPCT_COUNT, "UTABLEPCT.COUNT", N_("Unweighted Table %"), CTF_PERCENT, CTFA_ALL) \
103 S(CTSF_USUBTABLEPCT_COUNT, "USUBTABLEPCT.COUNT", N_("Unweighted Subtable %"), CTF_PERCENT, CTFA_ALL) \
104 S(CTSF_ULAYERPCT_COUNT, "ULAYERPCT.COUNT", N_("Unweighted Layer %"), CTF_PERCENT, CTFA_ALL) \
105 S(CTSF_ULAYERROWPCT_COUNT, "ULAYERROWPCT.COUNT", N_("Unweighted Layer Row %"), CTF_PERCENT, CTFA_ALL) \
106 S(CTSF_ULAYERCOLPCT_COUNT, "ULAYERCOLPCT.COUNT", N_("Unweighted Layer Column %"), CTF_PERCENT, CTFA_ALL) \
107 S(CTSF_UROWPCT_VALIDN, "UROWPCT.VALIDN", N_("Unweighted Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
108 S(CTSF_UCOLPCT_VALIDN, "UCOLPCT.VALIDN", N_("Unweighted Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
109 S(CTSF_UTABLEPCT_VALIDN, "UTABLEPCT.VALIDN", N_("Unweighted Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
110 S(CTSF_USUBTABLEPCT_VALIDN, "USUBTABLEPCT.VALIDN", N_("Unweighted Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
111 S(CTSF_ULAYERPCT_VALIDN, "ULAYERPCT.VALIDN", N_("Unweighted Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
112 S(CTSF_ULAYERROWPCT_VALIDN, "ULAYERROWPCT.VALIDN", N_("Unweighted Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
113 S(CTSF_ULAYERCOLPCT_VALIDN, "ULAYERCOLPCT.VALIDN", N_("Unweighted Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
114 S(CTSF_UROWPCT_TOTALN, "UROWPCT.TOTALN", N_("Unweighted Row Total N %"), CTF_PERCENT, CTFA_ALL) \
115 S(CTSF_UCOLPCT_TOTALN, "UCOLPCT.TOTALN", N_("Unweighted Column Total N %"), CTF_PERCENT, CTFA_ALL) \
116 S(CTSF_UTABLEPCT_TOTALN, "UTABLEPCT.TOTALN", N_("Unweighted Table Total N %"), CTF_PERCENT, CTFA_ALL) \
117 S(CTSF_USUBTABLEPCT_TOTALN, "USUBTABLEPCT.TOTALN", N_("Unweighted Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
118 S(CTSF_ULAYERPCT_TOTALN, "ULAYERPCT.TOTALN", N_("Unweighted Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
119 S(CTSF_ULAYERROWPCT_TOTALN, "ULAYERROWPCT.TOTALN", N_("Unweighted Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
120 S(CTSF_ULAYERCOLPCT_TOTALN, "ULAYERCOLPCT.TOTALN", N_("Unweighted Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
122 /* Scale variables, totals, and subtotals. */ \
123 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
124 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
125 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
126 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
127 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
128 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
129 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
130 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
131 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
132 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
133 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
134 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
135 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
136 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
137 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
138 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
139 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
140 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
141 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
142 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
143 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
144 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
145 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
147 /* Scale variables, totals, and subtotals (unweighted). */ \
148 S(CTSF_UMEAN, "UMEAN", N_("Unweighted Mean"), CTF_GENERAL, CTFA_SCALE) \
149 S(CTSF_UMEDIAN, "UMEDIAN", N_("Unweighted Median"), CTF_GENERAL, CTFA_SCALE) \
150 S(CTSF_UMISSING, "UMISSING", N_("Unweighted Missing"), CTF_GENERAL, CTFA_SCALE) \
151 S(CTSF_UMODE, "UMODE", N_("Unweighted Mode"), CTF_GENERAL, CTFA_SCALE) \
152 S(CTSF_UPTILE, "UPTILE", N_("Unweighted Percentile"), CTF_GENERAL, CTFA_SCALE) \
153 S(CTSF_USEMEAN, "USEMEAN", N_("Unweighted Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
154 S(CTSF_USTDDEV, "USTDDEV", N_("Unweighted Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
155 S(CTSF_USUM, "USUM", N_("Unweighted Sum"), CTF_GENERAL, CTFA_SCALE) \
156 S(CSTF_UTOTALN, "UTOTALN", N_("Unweighted Total N"), CTF_COUNT, CTFA_SCALE) \
157 S(CTSF_UVALIDN, "UVALIDN", N_("Unweighted Valid N"), CTF_COUNT, CTFA_SCALE) \
158 S(CTSF_UVARIANCE, "UVARIANCE", N_("Unweighted Variance"), CTF_GENERAL, CTFA_SCALE) \
159 S(CTSF_UROWPCT_SUM, "UROWPCT.SUM", N_("Unweighted Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
160 S(CTSF_UCOLPCT_SUM, "UCOLPCT.SUM", N_("Unweighted Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
161 S(CTSF_UTABLEPCT_SUM, "UTABLEPCT.SUM", N_("Unweighted Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
162 S(CTSF_USUBTABLEPCT_SUM, "USUBTABLEPCT.SUM", N_("Unweighted Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
163 S(CTSF_ULAYERPCT_SUM, "ULAYERPCT.SUM", N_("Unweighted Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
164 S(CTSF_ULAYERROWPCT_SUM, "ULAYERROWPCT.SUM", N_("Unweighted Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
165 S(CTSF_ULAYERCOLPCT_SUM, "ULAYERCOLPCT.SUM", N_("Unweighted Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
167 #if 0 /* Multiple response sets not yet implemented. */
168 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
169 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
170 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
171 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
172 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
173 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
174 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
175 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
176 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
177 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
178 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
179 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
180 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
181 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
182 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
183 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
184 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
185 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
186 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
187 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
188 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
189 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
192 enum ctables_summary_function
194 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
200 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
201 N_CTSF_FUNCTIONS = SUMMARIES
205 static bool ctables_summary_function_is_count (enum ctables_summary_function);
207 enum ctables_domain_type
209 /* Within a section, where stacked variables divide one section from
211 CTDT_TABLE, /* All layers of a whole section. */
212 CTDT_LAYER, /* One layer within a section. */
213 CTDT_LAYERROW, /* Row in one layer within a section. */
214 CTDT_LAYERCOL, /* Column in one layer within a section. */
216 /* Within a subtable, where a subtable pairs an innermost row variable with
217 an innermost column variable within a single layer. */
218 CTDT_SUBTABLE, /* Whole subtable. */
219 CTDT_ROW, /* Row within a subtable. */
220 CTDT_COL, /* Column within a subtable. */
224 struct ctables_domain
226 struct hmap_node node;
228 const struct ctables_cell *example;
230 double d_valid; /* Dictionary weight. */
233 double e_valid; /* Effective weight */
236 double u_valid; /* Unweighted. */
239 struct ctables_sum *sums;
248 enum ctables_summary_variant
257 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
258 all the axes (except the scalar variable, if any). */
259 struct hmap_node node;
261 /* The domains that contain this cell. */
262 uint32_t omit_domains;
263 struct ctables_domain *domains[N_CTDTS];
268 enum ctables_summary_variant sv;
270 struct ctables_cell_axis
272 struct ctables_cell_value
274 const struct ctables_category *category;
282 union ctables_summary *summaries;
289 const struct dictionary *dict;
290 struct pivot_table_look *look;
292 /* CTABLES has a number of extra formats that we implement via custom
293 currency specifications on an alternate fmt_settings. */
294 #define CTEF_NEGPAREN FMT_CCA
295 #define CTEF_NEQUAL FMT_CCB
296 #define CTEF_PAREN FMT_CCC
297 #define CTEF_PCTPAREN FMT_CCD
298 struct fmt_settings ctables_formats;
300 /* If this is NULL, zeros are displayed using the normal print format.
301 Otherwise, this string is displayed. */
304 /* If this is NULL, missing values are displayed using the normal print
305 format. Otherwise, this string is displayed. */
308 /* Indexed by variable dictionary index. */
309 enum ctables_vlabel *vlabels;
311 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
313 bool mrsets_count_duplicates; /* MRSETS. */
314 bool smissing_listwise; /* SMISSING. */
315 struct variable *e_weight; /* WEIGHT. */
316 int hide_threshold; /* HIDESMALLCOUNTS. */
318 struct ctables_table **tables;
322 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
325 struct ctables_postcompute
327 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
328 char *name; /* Name, without leading &. */
330 struct msg_location *location; /* Location of definition. */
331 struct ctables_pcexpr *expr;
333 struct ctables_summary_spec_set *specs;
334 bool hide_source_cats;
337 struct ctables_pcexpr
347 enum ctables_postcompute_op
350 CTPO_CONSTANT, /* 5 */
351 CTPO_CAT_NUMBER, /* [5] */
352 CTPO_CAT_STRING, /* ["STRING"] */
353 CTPO_CAT_NRANGE, /* [LO THRU 5] */
354 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
355 CTPO_CAT_MISSING, /* MISSING */
356 CTPO_CAT_OTHERNM, /* OTHERNM */
357 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
358 CTPO_CAT_TOTAL, /* TOTAL */
372 /* CTPO_CAT_NUMBER. */
375 /* CTPO_CAT_STRING, in dictionary encoding. */
376 struct substring string;
378 /* CTPO_CAT_NRANGE. */
381 /* CTPO_CAT_SRANGE. */
382 struct substring srange[2];
384 /* CTPO_CAT_SUBTOTAL. */
385 size_t subtotal_index;
387 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
388 One element: CTPO_NEG. */
389 struct ctables_pcexpr *subs[2];
392 /* Source location. */
393 struct msg_location *location;
396 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
397 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
398 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
399 struct ctables_pcexpr *sub1);
401 struct ctables_summary_spec_set
403 struct ctables_summary_spec *specs;
407 /* The variable to which the summary specs are applied. */
408 struct variable *var;
410 /* Whether the variable to which the summary specs are applied is a scale
411 variable for the purpose of summarization.
413 (VALIDN and TOTALN act differently for summarizing scale and categorical
417 /* If any of these optional additional scale variables are missing, then
418 treat 'var' as if it's missing too. This is for implementing
419 SMISSING=LISTWISE. */
420 struct variable **listwise_vars;
421 size_t n_listwise_vars;
424 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
425 const struct ctables_summary_spec_set *);
426 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
428 /* A nested sequence of variables, e.g. a > b > c. */
431 struct variable **vars;
434 size_t *domains[N_CTDTS];
435 size_t n_domains[N_CTDTS];
438 struct ctables_summary_spec_set specs[N_CSVS];
441 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
444 struct ctables_nest *nests;
448 static void ctables_stack_uninit (struct ctables_stack *);
452 struct hmap_node node;
457 struct ctables_occurrence
459 struct hmap_node node;
463 struct ctables_section
466 struct ctables_table *table;
467 struct ctables_nest *nests[PIVOT_N_AXES];
470 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
471 struct hmap cells; /* Contains "struct ctables_cell"s. */
472 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
475 static void ctables_section_uninit (struct ctables_section *);
479 struct ctables *ctables;
480 struct ctables_axis *axes[PIVOT_N_AXES];
481 struct ctables_stack stacks[PIVOT_N_AXES];
482 struct ctables_section *sections;
484 enum pivot_axis_type summary_axis;
485 struct ctables_summary_spec_set summary_specs;
486 struct variable **sum_vars;
489 enum pivot_axis_type slabels_axis;
490 bool slabels_visible;
492 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
494 Most commonly, label_axis[a] == a, and in particular we always have
495 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
497 If ROWLABELS or COLLABELS is specified, then one of
498 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
499 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
501 If any category labels are moved, then 'clabels_example' is one of the
502 variables being moved (and it is otherwise NULL). All of the variables
503 being moved have the same width, value labels, and categories, so this
504 example variable can be used to find those out.
506 The remaining members in this group are relevant only if category labels
509 'clabels_values_map' holds a "struct ctables_value" for all the values
510 that appear in all of the variables in the moved categories. It is
511 accumulated as the data is read. Once the data is fully read, its
512 sorted values are put into 'clabels_values' and 'n_clabels_values'.
514 enum pivot_axis_type label_axis[PIVOT_N_AXES];
515 enum pivot_axis_type clabels_from_axis;
516 const struct variable *clabels_example;
517 struct hmap clabels_values_map;
518 struct ctables_value **clabels_values;
519 size_t n_clabels_values;
521 /* Indexed by variable dictionary index. */
522 struct ctables_categories **categories;
531 struct ctables_chisq *chisq;
532 struct ctables_pairwise *pairwise;
535 struct ctables_categories
538 struct ctables_category *cats;
543 struct ctables_category
545 enum ctables_category_type
547 /* Explicit category lists. */
550 CCT_NRANGE, /* Numerical range. */
551 CCT_SRANGE, /* String range. */
556 /* Totals and subtotals. */
560 /* Implicit category lists. */
565 /* For contributing to TOTALN. */
566 CCT_EXCLUDED_MISSING,
570 struct ctables_category *subtotal;
576 double number; /* CCT_NUMBER. */
577 struct substring string; /* CCT_STRING, in dictionary encoding. */
578 double nrange[2]; /* CCT_NRANGE. */
579 struct substring srange[2]; /* CCT_SRANGE. */
583 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
584 bool hide_subcategories; /* CCT_SUBTOTAL. */
587 /* CCT_POSTCOMPUTE. */
590 const struct ctables_postcompute *pc;
591 enum fmt_type parse_format;
594 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
597 bool include_missing;
601 enum ctables_summary_function sort_function;
602 struct variable *sort_var;
607 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
608 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
609 struct msg_location *location;
613 ctables_category_uninit (struct ctables_category *cat)
618 msg_location_destroy (cat->location);
625 case CCT_POSTCOMPUTE:
629 ss_dealloc (&cat->string);
633 ss_dealloc (&cat->srange[0]);
634 ss_dealloc (&cat->srange[1]);
639 free (cat->total_label);
647 case CCT_EXCLUDED_MISSING:
653 nullable_substring_equal (const struct substring *a,
654 const struct substring *b)
656 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
660 ctables_category_equal (const struct ctables_category *a,
661 const struct ctables_category *b)
663 if (a->type != b->type)
669 return a->number == b->number;
672 return ss_equals (a->string, b->string);
675 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
678 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
679 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
685 case CCT_POSTCOMPUTE:
686 return a->pc == b->pc;
690 return !strcmp (a->total_label, b->total_label);
695 return (a->include_missing == b->include_missing
696 && a->sort_ascending == b->sort_ascending
697 && a->sort_function == b->sort_function
698 && a->sort_var == b->sort_var
699 && a->percentile == b->percentile);
701 case CCT_EXCLUDED_MISSING:
709 ctables_categories_unref (struct ctables_categories *c)
714 assert (c->n_refs > 0);
718 for (size_t i = 0; i < c->n_cats; i++)
719 ctables_category_uninit (&c->cats[i]);
725 ctables_categories_equal (const struct ctables_categories *a,
726 const struct ctables_categories *b)
728 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
731 for (size_t i = 0; i < a->n_cats; i++)
732 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
738 /* Chi-square test (SIGTEST). */
746 /* Pairwise comparison test (COMPARETEST). */
747 struct ctables_pairwise
749 enum { PROP, MEAN } type;
752 bool meansvariance_allcats;
754 enum { BONFERRONI = 1, BH } adjust;
778 struct variable *var;
780 struct ctables_summary_spec_set specs[N_CSVS];
784 struct ctables_axis *subs[2];
787 struct msg_location *loc;
790 static void ctables_axis_destroy (struct ctables_axis *);
799 enum ctables_function_availability
801 CTFA_ALL, /* Any variables. */
802 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
803 //CTFA_MRSETS, /* Only multiple-response sets */
806 struct ctables_summary_spec
808 enum ctables_summary_function function;
809 double percentile; /* CTSF_PTILE only. */
812 struct fmt_spec format;
813 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
820 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
821 const struct ctables_summary_spec *src)
824 dst->label = xstrdup_if_nonnull (src->label);
828 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
835 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
836 const struct ctables_summary_spec_set *src)
838 struct ctables_summary_spec *specs
839 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
840 for (size_t i = 0; i < src->n; i++)
841 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
843 *dst = (struct ctables_summary_spec_set) {
848 .is_scale = src->is_scale,
853 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
855 for (size_t i = 0; i < set->n; i++)
856 ctables_summary_spec_uninit (&set->specs[i]);
861 parse_col_width (struct lexer *lexer, const char *name, double *width)
863 lex_match (lexer, T_EQUALS);
864 if (lex_match_id (lexer, "DEFAULT"))
866 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
868 *width = lex_number (lexer);
878 parse_bool (struct lexer *lexer, bool *b)
880 if (lex_match_id (lexer, "NO"))
882 else if (lex_match_id (lexer, "YES"))
886 lex_error_expecting (lexer, "YES", "NO");
892 static enum ctables_function_availability
893 ctables_function_availability (enum ctables_summary_function f)
895 static enum ctables_function_availability availability[] = {
896 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
901 return availability[f];
905 ctables_summary_function_is_count (enum ctables_summary_function f)
911 case CTSF_ROWPCT_COUNT:
912 case CTSF_COLPCT_COUNT:
913 case CTSF_TABLEPCT_COUNT:
914 case CTSF_SUBTABLEPCT_COUNT:
915 case CTSF_LAYERPCT_COUNT:
916 case CTSF_LAYERROWPCT_COUNT:
917 case CTSF_LAYERCOLPCT_COUNT:
919 case CTSF_UROWPCT_COUNT:
920 case CTSF_UCOLPCT_COUNT:
921 case CTSF_UTABLEPCT_COUNT:
922 case CTSF_USUBTABLEPCT_COUNT:
923 case CTSF_ULAYERPCT_COUNT:
924 case CTSF_ULAYERROWPCT_COUNT:
925 case CTSF_ULAYERCOLPCT_COUNT:
928 case CTSF_ROWPCT_VALIDN:
929 case CTSF_COLPCT_VALIDN:
930 case CTSF_TABLEPCT_VALIDN:
931 case CTSF_SUBTABLEPCT_VALIDN:
932 case CTSF_LAYERPCT_VALIDN:
933 case CTSF_LAYERROWPCT_VALIDN:
934 case CTSF_LAYERCOLPCT_VALIDN:
935 case CTSF_ROWPCT_TOTALN:
936 case CTSF_COLPCT_TOTALN:
937 case CTSF_TABLEPCT_TOTALN:
938 case CTSF_SUBTABLEPCT_TOTALN:
939 case CTSF_LAYERPCT_TOTALN:
940 case CTSF_LAYERROWPCT_TOTALN:
941 case CTSF_LAYERCOLPCT_TOTALN:
958 case CTSF_ROWPCT_SUM:
959 case CTSF_COLPCT_SUM:
960 case CTSF_TABLEPCT_SUM:
961 case CTSF_SUBTABLEPCT_SUM:
962 case CTSF_LAYERPCT_SUM:
963 case CTSF_LAYERROWPCT_SUM:
964 case CTSF_LAYERCOLPCT_SUM:
965 case CTSF_UROWPCT_VALIDN:
966 case CTSF_UCOLPCT_VALIDN:
967 case CTSF_UTABLEPCT_VALIDN:
968 case CTSF_USUBTABLEPCT_VALIDN:
969 case CTSF_ULAYERPCT_VALIDN:
970 case CTSF_ULAYERROWPCT_VALIDN:
971 case CTSF_ULAYERCOLPCT_VALIDN:
972 case CTSF_UROWPCT_TOTALN:
973 case CTSF_UCOLPCT_TOTALN:
974 case CTSF_UTABLEPCT_TOTALN:
975 case CTSF_USUBTABLEPCT_TOTALN:
976 case CTSF_ULAYERPCT_TOTALN:
977 case CTSF_ULAYERROWPCT_TOTALN:
978 case CTSF_ULAYERCOLPCT_TOTALN:
990 case CTSF_UROWPCT_SUM:
991 case CTSF_UCOLPCT_SUM:
992 case CTSF_UTABLEPCT_SUM:
993 case CTSF_USUBTABLEPCT_SUM:
994 case CTSF_ULAYERPCT_SUM:
995 case CTSF_ULAYERROWPCT_SUM:
996 case CTSF_ULAYERCOLPCT_SUM:
1004 parse_ctables_summary_function (struct lexer *lexer,
1005 enum ctables_summary_function *f)
1009 enum ctables_summary_function function;
1010 struct substring name;
1012 static struct pair names[] = {
1013 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
1014 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
1017 /* The .COUNT suffix may be omitted. */
1018 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
1019 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
1020 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
1021 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
1022 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
1023 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
1024 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
1028 if (!lex_force_id (lexer))
1031 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
1032 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
1034 *f = names[i].function;
1039 lex_error (lexer, _("Expecting summary function name."));
1044 ctables_axis_destroy (struct ctables_axis *axis)
1052 for (size_t i = 0; i < N_CSVS; i++)
1053 ctables_summary_spec_set_uninit (&axis->specs[i]);
1058 ctables_axis_destroy (axis->subs[0]);
1059 ctables_axis_destroy (axis->subs[1]);
1062 msg_location_destroy (axis->loc);
1066 static struct ctables_axis *
1067 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1068 struct ctables_axis *sub0,
1069 struct ctables_axis *sub1,
1070 struct lexer *lexer, int start_ofs)
1072 struct ctables_axis *axis = xmalloc (sizeof *axis);
1073 *axis = (struct ctables_axis) {
1075 .subs = { sub0, sub1 },
1076 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1081 struct ctables_axis_parse_ctx
1083 struct lexer *lexer;
1084 struct dictionary *dict;
1086 struct ctables_table *t;
1089 static struct fmt_spec
1090 ctables_summary_default_format (enum ctables_summary_function function,
1091 const struct variable *var)
1093 static const enum ctables_format default_formats[] = {
1094 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
1098 switch (default_formats[function])
1101 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
1104 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
1107 return *var_get_print_format (var);
1114 static struct pivot_value *
1115 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1119 static const char *default_labels[] = {
1120 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
1125 return (spec->function == CTSF_PTILE
1126 ? pivot_value_new_text_format (N_("Percentile %.2f"),
1128 : pivot_value_new_text (default_labels[spec->function]));
1132 struct substring in = ss_cstr (spec->label);
1133 struct substring target = ss_cstr (")CILEVEL");
1135 struct string out = DS_EMPTY_INITIALIZER;
1138 size_t chunk = ss_find_substring (in, target);
1139 ds_put_substring (&out, ss_head (in, chunk));
1140 ss_advance (&in, chunk);
1142 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1144 ss_advance (&in, target.length);
1145 ds_put_format (&out, "%g", cilevel);
1151 ctables_summary_function_name (enum ctables_summary_function function)
1153 static const char *names[] = {
1154 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1158 return names[function];
1162 add_summary_spec (struct ctables_axis *axis,
1163 enum ctables_summary_function function, double percentile,
1164 const char *label, const struct fmt_spec *format,
1165 bool is_ctables_format, const struct msg_location *loc,
1166 enum ctables_summary_variant sv)
1168 if (axis->op == CTAO_VAR)
1170 const char *function_name = ctables_summary_function_name (function);
1171 const char *var_name = var_get_name (axis->var);
1172 switch (ctables_function_availability (function))
1176 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1177 "response sets."), function_name);
1178 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1184 if (!axis->scale && sv != CSV_TOTAL)
1187 _("Summary function %s applies only to scale variables."),
1189 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1199 struct ctables_summary_spec_set *set = &axis->specs[sv];
1200 if (set->n >= set->allocated)
1201 set->specs = x2nrealloc (set->specs, &set->allocated,
1202 sizeof *set->specs);
1204 struct ctables_summary_spec *dst = &set->specs[set->n++];
1205 *dst = (struct ctables_summary_spec) {
1206 .function = function,
1207 .percentile = percentile,
1208 .label = xstrdup_if_nonnull (label),
1209 .format = (format ? *format
1210 : ctables_summary_default_format (function, axis->var)),
1211 .is_ctables_format = is_ctables_format,
1217 for (size_t i = 0; i < 2; i++)
1218 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1219 format, is_ctables_format, loc, sv))
1225 static struct ctables_axis *ctables_axis_parse_stack (
1226 struct ctables_axis_parse_ctx *);
1229 static struct ctables_axis *
1230 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1232 if (lex_match (ctx->lexer, T_LPAREN))
1234 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1235 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1237 ctables_axis_destroy (sub);
1243 if (!lex_force_id (ctx->lexer))
1246 int start_ofs = lex_ofs (ctx->lexer);
1247 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1251 struct ctables_axis *axis = xmalloc (sizeof *axis);
1252 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1254 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1255 : lex_match_phrase (ctx->lexer, "[C]") ? false
1256 : var_get_measure (var) == MEASURE_SCALE);
1257 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1258 lex_ofs (ctx->lexer) - 1);
1259 if (axis->scale && var_is_alpha (var))
1261 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1263 var_get_name (var));
1264 ctables_axis_destroy (axis);
1272 has_digit (const char *s)
1274 return s[strcspn (s, "0123456789")] != '\0';
1278 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1279 bool *is_ctables_format)
1281 char type[FMT_TYPE_LEN_MAX + 1];
1282 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1285 if (!strcasecmp (type, "NEGPAREN"))
1286 format->type = CTEF_NEGPAREN;
1287 else if (!strcasecmp (type, "NEQUAL"))
1288 format->type = CTEF_NEQUAL;
1289 else if (!strcasecmp (type, "PAREN"))
1290 format->type = CTEF_PAREN;
1291 else if (!strcasecmp (type, "PCTPAREN"))
1292 format->type = CTEF_PCTPAREN;
1295 *is_ctables_format = false;
1296 return (parse_format_specifier (lexer, format)
1297 && fmt_check_output (format)
1298 && fmt_check_type_compat (format, VAL_NUMERIC));
1304 lex_next_error (lexer, -1, -1,
1305 _("Output format %s requires width 2 or greater."), type);
1308 else if (format->d > format->w - 1)
1310 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1311 "greater than decimals."), type);
1316 *is_ctables_format = true;
1321 static struct ctables_axis *
1322 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1324 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1325 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1328 enum ctables_summary_variant sv = CSV_CELL;
1331 int start_ofs = lex_ofs (ctx->lexer);
1333 /* Parse function. */
1334 enum ctables_summary_function function;
1335 if (!parse_ctables_summary_function (ctx->lexer, &function))
1338 /* Parse percentile. */
1339 double percentile = 0;
1340 if (function == CTSF_PTILE)
1342 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1344 percentile = lex_number (ctx->lexer);
1345 lex_get (ctx->lexer);
1350 if (lex_is_string (ctx->lexer))
1352 label = ss_xstrdup (lex_tokss (ctx->lexer));
1353 lex_get (ctx->lexer);
1357 struct fmt_spec format;
1358 const struct fmt_spec *formatp;
1359 bool is_ctables_format = false;
1360 if (lex_token (ctx->lexer) == T_ID
1361 && has_digit (lex_tokcstr (ctx->lexer)))
1363 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1364 &is_ctables_format))
1374 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1375 lex_ofs (ctx->lexer) - 1);
1376 add_summary_spec (sub, function, percentile, label, formatp,
1377 is_ctables_format, loc, sv);
1379 msg_location_destroy (loc);
1381 lex_match (ctx->lexer, T_COMMA);
1382 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1384 if (!lex_force_match (ctx->lexer, T_LBRACK))
1388 else if (lex_match (ctx->lexer, T_RBRACK))
1390 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1397 ctables_axis_destroy (sub);
1401 static const struct ctables_axis *
1402 find_scale (const struct ctables_axis *axis)
1406 else if (axis->op == CTAO_VAR)
1407 return axis->scale ? axis : NULL;
1410 for (size_t i = 0; i < 2; i++)
1412 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1420 static const struct ctables_axis *
1421 find_categorical_summary_spec (const struct ctables_axis *axis)
1425 else if (axis->op == CTAO_VAR)
1426 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1429 for (size_t i = 0; i < 2; i++)
1431 const struct ctables_axis *sum
1432 = find_categorical_summary_spec (axis->subs[i]);
1440 static struct ctables_axis *
1441 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1443 int start_ofs = lex_ofs (ctx->lexer);
1444 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1448 while (lex_match (ctx->lexer, T_GT))
1450 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1454 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1455 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1457 const struct ctables_axis *outer_scale = find_scale (lhs);
1458 const struct ctables_axis *inner_scale = find_scale (rhs);
1459 if (outer_scale && inner_scale)
1461 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1462 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1463 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1464 ctables_axis_destroy (nest);
1468 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1471 msg_at (SE, nest->loc,
1472 _("Summaries may only be requested for categorical variables "
1473 "at the innermost nesting level."));
1474 msg_at (SN, outer_sum->loc,
1475 _("This outer categorical variable has a summary."));
1476 ctables_axis_destroy (nest);
1486 static struct ctables_axis *
1487 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1489 int start_ofs = lex_ofs (ctx->lexer);
1490 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1494 while (lex_match (ctx->lexer, T_PLUS))
1496 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1500 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1501 ctx->lexer, start_ofs);
1508 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1509 struct ctables *ct, struct ctables_table *t,
1510 enum pivot_axis_type a)
1512 if (lex_token (lexer) == T_BY
1513 || lex_token (lexer) == T_SLASH
1514 || lex_token (lexer) == T_ENDCMD)
1517 struct ctables_axis_parse_ctx ctx = {
1523 t->axes[a] = ctables_axis_parse_stack (&ctx);
1524 return t->axes[a] != NULL;
1528 ctables_chisq_destroy (struct ctables_chisq *chisq)
1534 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1540 ctables_table_destroy (struct ctables_table *t)
1545 for (size_t i = 0; i < t->n_sections; i++)
1546 ctables_section_uninit (&t->sections[i]);
1549 for (size_t i = 0; i < t->n_categories; i++)
1550 ctables_categories_unref (t->categories[i]);
1551 free (t->categories);
1553 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1555 ctables_axis_destroy (t->axes[a]);
1556 ctables_stack_uninit (&t->stacks[a]);
1558 free (t->summary_specs.specs);
1560 struct ctables_value *ctv, *next_ctv;
1561 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1562 &t->clabels_values_map)
1564 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1565 hmap_delete (&t->clabels_values_map, &ctv->node);
1568 hmap_destroy (&t->clabels_values_map);
1569 free (t->clabels_values);
1575 ctables_chisq_destroy (t->chisq);
1576 ctables_pairwise_destroy (t->pairwise);
1581 ctables_destroy (struct ctables *ct)
1586 struct ctables_postcompute *pc, *next_pc;
1587 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1591 msg_location_destroy (pc->location);
1592 ctables_pcexpr_destroy (pc->expr);
1596 ctables_summary_spec_set_uninit (pc->specs);
1599 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1603 fmt_settings_uninit (&ct->ctables_formats);
1604 pivot_table_look_unref (ct->look);
1608 for (size_t i = 0; i < ct->n_tables; i++)
1609 ctables_table_destroy (ct->tables[i]);
1614 static struct ctables_category
1615 cct_nrange (double low, double high)
1617 return (struct ctables_category) {
1619 .nrange = { low, high }
1623 static struct ctables_category
1624 cct_srange (struct substring low, struct substring high)
1626 return (struct ctables_category) {
1628 .srange = { low, high }
1633 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1634 struct ctables_category *cat)
1637 if (lex_match (lexer, T_EQUALS))
1639 if (!lex_force_string (lexer))
1642 total_label = ss_xstrdup (lex_tokss (lexer));
1646 total_label = xstrdup (_("Subtotal"));
1648 *cat = (struct ctables_category) {
1649 .type = CCT_SUBTOTAL,
1650 .hide_subcategories = hide_subcategories,
1651 .total_label = total_label
1656 static struct substring
1657 parse_substring (struct lexer *lexer, struct dictionary *dict)
1659 struct substring s = recode_substring_pool (
1660 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1661 ss_rtrim (&s, ss_cstr (" "));
1667 ctables_table_parse_explicit_category (struct lexer *lexer,
1668 struct dictionary *dict,
1670 struct ctables_category *cat)
1672 if (lex_match_id (lexer, "OTHERNM"))
1673 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1674 else if (lex_match_id (lexer, "MISSING"))
1675 *cat = (struct ctables_category) { .type = CCT_MISSING };
1676 else if (lex_match_id (lexer, "SUBTOTAL"))
1677 return ctables_table_parse_subtotal (lexer, false, cat);
1678 else if (lex_match_id (lexer, "HSUBTOTAL"))
1679 return ctables_table_parse_subtotal (lexer, true, cat);
1680 else if (lex_match_id (lexer, "LO"))
1682 if (!lex_force_match_id (lexer, "THRU"))
1684 if (lex_is_string (lexer))
1686 struct substring sr0 = { .string = NULL };
1687 struct substring sr1 = parse_substring (lexer, dict);
1688 *cat = cct_srange (sr0, sr1);
1690 else if (lex_force_num (lexer))
1692 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1698 else if (lex_is_number (lexer))
1700 double number = lex_number (lexer);
1702 if (lex_match_id (lexer, "THRU"))
1704 if (lex_match_id (lexer, "HI"))
1705 *cat = cct_nrange (number, DBL_MAX);
1708 if (!lex_force_num (lexer))
1710 *cat = cct_nrange (number, lex_number (lexer));
1715 *cat = (struct ctables_category) {
1720 else if (lex_is_string (lexer))
1722 struct substring s = parse_substring (lexer, dict);
1723 if (lex_match_id (lexer, "THRU"))
1725 if (lex_match_id (lexer, "HI"))
1727 struct substring sr1 = { .string = NULL };
1728 *cat = cct_srange (s, sr1);
1732 if (!lex_force_string (lexer))
1734 struct substring sr1 = parse_substring (lexer, dict);
1735 *cat = cct_srange (s, sr1);
1739 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1741 else if (lex_match (lexer, T_AND))
1743 if (!lex_force_id (lexer))
1745 struct ctables_postcompute *pc = ctables_find_postcompute (
1746 ct, lex_tokcstr (lexer));
1749 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1750 msg_at (SE, loc, _("Unknown postcompute &%s."),
1751 lex_tokcstr (lexer));
1752 msg_location_destroy (loc);
1757 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1761 lex_error (lexer, NULL);
1769 parse_category_string (struct msg_location *location,
1770 struct substring s, const struct dictionary *dict,
1771 enum fmt_type format, double *n)
1774 char *error = data_in (s, dict_get_encoding (dict), format,
1775 settings_get_fmt_settings (), &v, 0, NULL);
1778 msg_at (SE, location,
1779 _("Failed to parse category specification as format %s: %s."),
1780 fmt_name (format), error);
1789 static struct ctables_category *
1790 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1791 const struct ctables_pcexpr *e)
1793 struct ctables_category *best = NULL;
1794 size_t n_subtotals = 0;
1795 for (size_t i = 0; i < cats->n_cats; i++)
1797 struct ctables_category *cat = &cats->cats[i];
1800 case CTPO_CAT_NUMBER:
1801 if (cat->type == CCT_NUMBER && cat->number == e->number)
1805 case CTPO_CAT_STRING:
1806 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1810 case CTPO_CAT_NRANGE:
1811 if (cat->type == CCT_NRANGE
1812 && cat->nrange[0] == e->nrange[0]
1813 && cat->nrange[1] == e->nrange[1])
1817 case CTPO_CAT_SRANGE:
1818 if (cat->type == CCT_SRANGE
1819 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1820 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1824 case CTPO_CAT_MISSING:
1825 if (cat->type == CCT_MISSING)
1829 case CTPO_CAT_OTHERNM:
1830 if (cat->type == CCT_OTHERNM)
1834 case CTPO_CAT_SUBTOTAL:
1835 if (cat->type == CCT_SUBTOTAL)
1838 if (e->subtotal_index == n_subtotals)
1840 else if (e->subtotal_index == 0)
1845 case CTPO_CAT_TOTAL:
1846 if (cat->type == CCT_TOTAL)
1860 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1865 static struct ctables_category *
1866 ctables_find_category_for_postcompute (const struct dictionary *dict,
1867 const struct ctables_categories *cats,
1868 enum fmt_type parse_format,
1869 const struct ctables_pcexpr *e)
1871 if (parse_format != FMT_F)
1873 if (e->op == CTPO_CAT_STRING)
1876 if (!parse_category_string (e->location, e->string, dict,
1877 parse_format, &number))
1880 struct ctables_pcexpr e2 = {
1881 .op = CTPO_CAT_NUMBER,
1883 .location = e->location,
1885 return ctables_find_category_for_postcompute__ (cats, &e2);
1887 else if (e->op == CTPO_CAT_SRANGE)
1890 if (!e->srange[0].string)
1891 nrange[0] = -DBL_MAX;
1892 else if (!parse_category_string (e->location, e->srange[0], dict,
1893 parse_format, &nrange[0]))
1896 if (!e->srange[1].string)
1897 nrange[1] = DBL_MAX;
1898 else if (!parse_category_string (e->location, e->srange[1], dict,
1899 parse_format, &nrange[1]))
1902 struct ctables_pcexpr e2 = {
1903 .op = CTPO_CAT_NRANGE,
1904 .nrange = { nrange[0], nrange[1] },
1905 .location = e->location,
1907 return ctables_find_category_for_postcompute__ (cats, &e2);
1910 return ctables_find_category_for_postcompute__ (cats, e);
1914 ctables_recursive_check_postcompute (struct dictionary *dict,
1915 const struct ctables_pcexpr *e,
1916 struct ctables_category *pc_cat,
1917 const struct ctables_categories *cats,
1918 const struct msg_location *cats_location)
1922 case CTPO_CAT_NUMBER:
1923 case CTPO_CAT_STRING:
1924 case CTPO_CAT_NRANGE:
1925 case CTPO_CAT_SRANGE:
1926 case CTPO_CAT_MISSING:
1927 case CTPO_CAT_OTHERNM:
1928 case CTPO_CAT_SUBTOTAL:
1929 case CTPO_CAT_TOTAL:
1931 struct ctables_category *cat = ctables_find_category_for_postcompute (
1932 dict, cats, pc_cat->parse_format, e);
1935 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1937 size_t n_subtotals = 0;
1938 for (size_t i = 0; i < cats->n_cats; i++)
1939 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1940 if (n_subtotals > 1)
1942 msg_at (SE, cats_location,
1943 ngettext ("These categories include %zu instance "
1944 "of SUBTOTAL or HSUBTOTAL, so references "
1945 "from computed categories must refer to "
1946 "subtotals by position, "
1947 "e.g. SUBTOTAL[1].",
1948 "These categories include %zu instances "
1949 "of SUBTOTAL or HSUBTOTAL, so references "
1950 "from computed categories must refer to "
1951 "subtotals by position, "
1952 "e.g. SUBTOTAL[1].",
1955 msg_at (SN, e->location,
1956 _("This is the reference that lacks a position."));
1961 msg_at (SE, pc_cat->location,
1962 _("Computed category &%s references a category not included "
1963 "in the category list."),
1965 msg_at (SN, e->location, _("This is the missing category."));
1966 if (e->op == CTPO_CAT_SUBTOTAL)
1967 msg_at (SN, cats_location,
1968 _("To fix the problem, add subtotals to the "
1969 "list of categories here."));
1970 else if (e->op == CTPO_CAT_TOTAL)
1971 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1972 "CATEGORIES specification."));
1974 msg_at (SN, cats_location,
1975 _("To fix the problem, add the missing category to the "
1976 "list of categories here."));
1979 if (pc_cat->pc->hide_source_cats)
1993 for (size_t i = 0; i < 2; i++)
1994 if (e->subs[i] && !ctables_recursive_check_postcompute (
1995 dict, e->subs[i], pc_cat, cats, cats_location))
2004 all_strings (struct variable **vars, size_t n_vars,
2005 const struct ctables_category *cat)
2007 for (size_t j = 0; j < n_vars; j++)
2008 if (var_is_numeric (vars[j]))
2010 msg_at (SE, cat->location,
2011 _("This category specification may be applied only to string "
2012 "variables, but this subcommand tries to apply it to "
2013 "numeric variable %s."),
2014 var_get_name (vars[j]));
2021 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2022 struct ctables *ct, struct ctables_table *t)
2024 if (!lex_match_id (lexer, "VARIABLES"))
2026 lex_match (lexer, T_EQUALS);
2028 struct variable **vars;
2030 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2033 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2034 for (size_t i = 1; i < n_vars; i++)
2036 const struct fmt_spec *f = var_get_print_format (vars[i]);
2037 if (f->type != common_format->type)
2039 common_format = NULL;
2045 && (fmt_get_category (common_format->type)
2046 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2048 struct ctables_categories *c = xmalloc (sizeof *c);
2049 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2050 for (size_t i = 0; i < n_vars; i++)
2052 struct ctables_categories **cp
2053 = &t->categories[var_get_dict_index (vars[i])];
2054 ctables_categories_unref (*cp);
2058 size_t allocated_cats = 0;
2059 int cats_start_ofs = -1;
2060 int cats_end_ofs = -1;
2061 if (lex_match (lexer, T_LBRACK))
2063 cats_start_ofs = lex_ofs (lexer);
2066 if (c->n_cats >= allocated_cats)
2067 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2069 int start_ofs = lex_ofs (lexer);
2070 struct ctables_category *cat = &c->cats[c->n_cats];
2071 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2073 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2076 lex_match (lexer, T_COMMA);
2078 while (!lex_match (lexer, T_RBRACK));
2079 cats_end_ofs = lex_ofs (lexer) - 1;
2082 struct ctables_category cat = {
2084 .include_missing = false,
2085 .sort_ascending = true,
2087 bool show_totals = false;
2088 char *total_label = NULL;
2089 bool totals_before = false;
2090 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2092 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2094 lex_match (lexer, T_EQUALS);
2095 if (lex_match_id (lexer, "A"))
2096 cat.sort_ascending = true;
2097 else if (lex_match_id (lexer, "D"))
2098 cat.sort_ascending = false;
2101 lex_error_expecting (lexer, "A", "D");
2105 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2107 lex_match (lexer, T_EQUALS);
2108 if (lex_match_id (lexer, "VALUE"))
2109 cat.type = CCT_VALUE;
2110 else if (lex_match_id (lexer, "LABEL"))
2111 cat.type = CCT_LABEL;
2114 cat.type = CCT_FUNCTION;
2115 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
2118 if (lex_match (lexer, T_LPAREN))
2120 cat.sort_var = parse_variable (lexer, dict);
2124 if (cat.sort_function == CTSF_PTILE)
2126 lex_match (lexer, T_COMMA);
2127 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2129 cat.percentile = lex_number (lexer);
2133 if (!lex_force_match (lexer, T_RPAREN))
2136 else if (ctables_function_availability (cat.sort_function)
2139 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2144 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2146 lex_match (lexer, T_EQUALS);
2147 if (lex_match_id (lexer, "INCLUDE"))
2148 cat.include_missing = true;
2149 else if (lex_match_id (lexer, "EXCLUDE"))
2150 cat.include_missing = false;
2153 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2157 else if (lex_match_id (lexer, "TOTAL"))
2159 lex_match (lexer, T_EQUALS);
2160 if (!parse_bool (lexer, &show_totals))
2163 else if (lex_match_id (lexer, "LABEL"))
2165 lex_match (lexer, T_EQUALS);
2166 if (!lex_force_string (lexer))
2169 total_label = ss_xstrdup (lex_tokss (lexer));
2172 else if (lex_match_id (lexer, "POSITION"))
2174 lex_match (lexer, T_EQUALS);
2175 if (lex_match_id (lexer, "BEFORE"))
2176 totals_before = true;
2177 else if (lex_match_id (lexer, "AFTER"))
2178 totals_before = false;
2181 lex_error_expecting (lexer, "BEFORE", "AFTER");
2185 else if (lex_match_id (lexer, "EMPTY"))
2187 lex_match (lexer, T_EQUALS);
2188 if (lex_match_id (lexer, "INCLUDE"))
2189 c->show_empty = true;
2190 else if (lex_match_id (lexer, "EXCLUDE"))
2191 c->show_empty = false;
2194 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2201 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2202 "TOTAL", "LABEL", "POSITION", "EMPTY");
2204 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2211 if (c->n_cats >= allocated_cats)
2212 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2213 c->cats[c->n_cats++] = cat;
2218 if (c->n_cats >= allocated_cats)
2219 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2221 struct ctables_category *totals;
2224 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2225 totals = &c->cats[0];
2228 totals = &c->cats[c->n_cats];
2231 *totals = (struct ctables_category) {
2233 .total_label = total_label ? total_label : xstrdup (_("Total")),
2237 struct ctables_category *subtotal = NULL;
2238 for (size_t i = totals_before ? 0 : c->n_cats;
2239 totals_before ? i < c->n_cats : i-- > 0;
2240 totals_before ? i++ : 0)
2242 struct ctables_category *cat = &c->cats[i];
2251 cat->subtotal = subtotal;
2254 case CCT_POSTCOMPUTE:
2265 case CCT_EXCLUDED_MISSING:
2270 if (cats_start_ofs != -1)
2272 for (size_t i = 0; i < c->n_cats; i++)
2274 struct ctables_category *cat = &c->cats[i];
2277 case CCT_POSTCOMPUTE:
2278 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2279 struct msg_location *cats_location
2280 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2281 bool ok = ctables_recursive_check_postcompute (
2282 dict, cat->pc->expr, cat, c, cats_location);
2283 msg_location_destroy (cats_location);
2290 for (size_t j = 0; j < n_vars; j++)
2291 if (var_is_alpha (vars[j]))
2293 msg_at (SE, cat->location,
2294 _("This category specification may be applied "
2295 "only to numeric variables, but this "
2296 "subcommand tries to apply it to string "
2298 var_get_name (vars[j]));
2307 if (!parse_category_string (cat->location, cat->string, dict,
2308 common_format->type, &n))
2311 ss_dealloc (&cat->string);
2313 cat->type = CCT_NUMBER;
2316 else if (!all_strings (vars, n_vars, cat))
2325 if (!cat->srange[0].string)
2327 else if (!parse_category_string (cat->location,
2328 cat->srange[0], dict,
2329 common_format->type, &n[0]))
2332 if (!cat->srange[1].string)
2334 else if (!parse_category_string (cat->location,
2335 cat->srange[1], dict,
2336 common_format->type, &n[1]))
2339 ss_dealloc (&cat->srange[0]);
2340 ss_dealloc (&cat->srange[1]);
2342 cat->type = CCT_NRANGE;
2343 cat->nrange[0] = n[0];
2344 cat->nrange[1] = n[1];
2346 else if (!all_strings (vars, n_vars, cat))
2357 case CCT_EXCLUDED_MISSING:
2372 ctables_nest_uninit (struct ctables_nest *nest)
2375 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2376 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2380 ctables_stack_uninit (struct ctables_stack *stack)
2384 for (size_t i = 0; i < stack->n; i++)
2385 ctables_nest_uninit (&stack->nests[i]);
2386 free (stack->nests);
2390 static struct ctables_stack
2391 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2398 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2399 for (size_t i = 0; i < s0.n; i++)
2400 for (size_t j = 0; j < s1.n; j++)
2402 const struct ctables_nest *a = &s0.nests[i];
2403 const struct ctables_nest *b = &s1.nests[j];
2405 size_t allocate = a->n + b->n;
2406 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2407 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
2409 for (size_t k = 0; k < a->n; k++)
2410 vars[n++] = a->vars[k];
2411 for (size_t k = 0; k < b->n; k++)
2412 vars[n++] = b->vars[k];
2413 assert (n == allocate);
2415 const struct ctables_nest *summary_src;
2416 if (!a->specs[CSV_CELL].var)
2418 else if (!b->specs[CSV_CELL].var)
2423 struct ctables_nest *new = &stack.nests[stack.n++];
2424 *new = (struct ctables_nest) {
2426 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2427 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2431 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2432 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2434 ctables_stack_uninit (&s0);
2435 ctables_stack_uninit (&s1);
2439 static struct ctables_stack
2440 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2442 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2443 for (size_t i = 0; i < s0.n; i++)
2444 stack.nests[stack.n++] = s0.nests[i];
2445 for (size_t i = 0; i < s1.n; i++)
2447 stack.nests[stack.n] = s1.nests[i];
2448 stack.nests[stack.n].group_head += s0.n;
2451 assert (stack.n == s0.n + s1.n);
2457 static struct ctables_stack
2458 var_fts (const struct ctables_axis *a)
2460 struct variable **vars = xmalloc (sizeof *vars);
2463 struct ctables_nest *nest = xmalloc (sizeof *nest);
2464 *nest = (struct ctables_nest) {
2467 .scale_idx = a->scale ? 0 : SIZE_MAX,
2469 if (a->specs[CSV_CELL].n || a->scale)
2470 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2472 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2473 nest->specs[sv].var = a->var;
2474 nest->specs[sv].is_scale = a->scale;
2476 return (struct ctables_stack) { .nests = nest, .n = 1 };
2479 static struct ctables_stack
2480 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2483 return (struct ctables_stack) { .n = 0 };
2491 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2492 enumerate_fts (axis_type, a->subs[1]));
2495 /* This should consider any of the scale variables found in the result to
2496 be linked to each other listwise for SMISSING=LISTWISE. */
2497 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2498 enumerate_fts (axis_type, a->subs[1]));
2504 union ctables_summary
2506 /* COUNT, VALIDN, TOTALN. */
2509 /* MINIMUM, MAXIMUM, RANGE. */
2516 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2517 struct moments1 *moments;
2519 /* MEDIAN, MODE, PTILE. */
2522 struct casewriter *writer;
2527 /* XXX multiple response */
2531 ctables_summary_init (union ctables_summary *s,
2532 const struct ctables_summary_spec *ss)
2534 switch (ss->function)
2538 case CTSF_ROWPCT_COUNT:
2539 case CTSF_COLPCT_COUNT:
2540 case CTSF_TABLEPCT_COUNT:
2541 case CTSF_SUBTABLEPCT_COUNT:
2542 case CTSF_LAYERPCT_COUNT:
2543 case CTSF_LAYERROWPCT_COUNT:
2544 case CTSF_LAYERCOLPCT_COUNT:
2545 case CTSF_ROWPCT_VALIDN:
2546 case CTSF_COLPCT_VALIDN:
2547 case CTSF_TABLEPCT_VALIDN:
2548 case CTSF_SUBTABLEPCT_VALIDN:
2549 case CTSF_LAYERPCT_VALIDN:
2550 case CTSF_LAYERROWPCT_VALIDN:
2551 case CTSF_LAYERCOLPCT_VALIDN:
2552 case CTSF_ROWPCT_TOTALN:
2553 case CTSF_COLPCT_TOTALN:
2554 case CTSF_TABLEPCT_TOTALN:
2555 case CTSF_SUBTABLEPCT_TOTALN:
2556 case CTSF_LAYERPCT_TOTALN:
2557 case CTSF_LAYERROWPCT_TOTALN:
2558 case CTSF_LAYERCOLPCT_TOTALN:
2565 case CTSF_UROWPCT_COUNT:
2566 case CTSF_UCOLPCT_COUNT:
2567 case CTSF_UTABLEPCT_COUNT:
2568 case CTSF_USUBTABLEPCT_COUNT:
2569 case CTSF_ULAYERPCT_COUNT:
2570 case CTSF_ULAYERROWPCT_COUNT:
2571 case CTSF_ULAYERCOLPCT_COUNT:
2572 case CTSF_UROWPCT_VALIDN:
2573 case CTSF_UCOLPCT_VALIDN:
2574 case CTSF_UTABLEPCT_VALIDN:
2575 case CTSF_USUBTABLEPCT_VALIDN:
2576 case CTSF_ULAYERPCT_VALIDN:
2577 case CTSF_ULAYERROWPCT_VALIDN:
2578 case CTSF_ULAYERCOLPCT_VALIDN:
2579 case CTSF_UROWPCT_TOTALN:
2580 case CTSF_UCOLPCT_TOTALN:
2581 case CTSF_UTABLEPCT_TOTALN:
2582 case CTSF_USUBTABLEPCT_TOTALN:
2583 case CTSF_ULAYERPCT_TOTALN:
2584 case CTSF_ULAYERROWPCT_TOTALN:
2585 case CTSF_ULAYERCOLPCT_TOTALN:
2595 s->min = s->max = SYSMIS;
2603 case CTSF_ROWPCT_SUM:
2604 case CTSF_COLPCT_SUM:
2605 case CTSF_TABLEPCT_SUM:
2606 case CTSF_SUBTABLEPCT_SUM:
2607 case CTSF_LAYERPCT_SUM:
2608 case CTSF_LAYERROWPCT_SUM:
2609 case CTSF_LAYERCOLPCT_SUM:
2614 case CTSF_UVARIANCE:
2615 case CTSF_UROWPCT_SUM:
2616 case CTSF_UCOLPCT_SUM:
2617 case CTSF_UTABLEPCT_SUM:
2618 case CTSF_USUBTABLEPCT_SUM:
2619 case CTSF_ULAYERPCT_SUM:
2620 case CTSF_ULAYERROWPCT_SUM:
2621 case CTSF_ULAYERCOLPCT_SUM:
2622 s->moments = moments1_create (MOMENT_VARIANCE);
2632 struct caseproto *proto = caseproto_create ();
2633 proto = caseproto_add_width (proto, 0);
2634 proto = caseproto_add_width (proto, 0);
2636 struct subcase ordering;
2637 subcase_init (&ordering, 0, 0, SC_ASCEND);
2638 s->writer = sort_create_writer (&ordering, proto);
2639 subcase_uninit (&ordering);
2640 caseproto_unref (proto);
2650 ctables_summary_uninit (union ctables_summary *s,
2651 const struct ctables_summary_spec *ss)
2653 switch (ss->function)
2657 case CTSF_ROWPCT_COUNT:
2658 case CTSF_COLPCT_COUNT:
2659 case CTSF_TABLEPCT_COUNT:
2660 case CTSF_SUBTABLEPCT_COUNT:
2661 case CTSF_LAYERPCT_COUNT:
2662 case CTSF_LAYERROWPCT_COUNT:
2663 case CTSF_LAYERCOLPCT_COUNT:
2664 case CTSF_ROWPCT_VALIDN:
2665 case CTSF_COLPCT_VALIDN:
2666 case CTSF_TABLEPCT_VALIDN:
2667 case CTSF_SUBTABLEPCT_VALIDN:
2668 case CTSF_LAYERPCT_VALIDN:
2669 case CTSF_LAYERROWPCT_VALIDN:
2670 case CTSF_LAYERCOLPCT_VALIDN:
2671 case CTSF_ROWPCT_TOTALN:
2672 case CTSF_COLPCT_TOTALN:
2673 case CTSF_TABLEPCT_TOTALN:
2674 case CTSF_SUBTABLEPCT_TOTALN:
2675 case CTSF_LAYERPCT_TOTALN:
2676 case CTSF_LAYERROWPCT_TOTALN:
2677 case CTSF_LAYERCOLPCT_TOTALN:
2684 case CTSF_UROWPCT_COUNT:
2685 case CTSF_UCOLPCT_COUNT:
2686 case CTSF_UTABLEPCT_COUNT:
2687 case CTSF_USUBTABLEPCT_COUNT:
2688 case CTSF_ULAYERPCT_COUNT:
2689 case CTSF_ULAYERROWPCT_COUNT:
2690 case CTSF_ULAYERCOLPCT_COUNT:
2691 case CTSF_UROWPCT_VALIDN:
2692 case CTSF_UCOLPCT_VALIDN:
2693 case CTSF_UTABLEPCT_VALIDN:
2694 case CTSF_USUBTABLEPCT_VALIDN:
2695 case CTSF_ULAYERPCT_VALIDN:
2696 case CTSF_ULAYERROWPCT_VALIDN:
2697 case CTSF_ULAYERCOLPCT_VALIDN:
2698 case CTSF_UROWPCT_TOTALN:
2699 case CTSF_UCOLPCT_TOTALN:
2700 case CTSF_UTABLEPCT_TOTALN:
2701 case CTSF_USUBTABLEPCT_TOTALN:
2702 case CTSF_ULAYERPCT_TOTALN:
2703 case CTSF_ULAYERROWPCT_TOTALN:
2704 case CTSF_ULAYERCOLPCT_TOTALN:
2720 case CTSF_ROWPCT_SUM:
2721 case CTSF_COLPCT_SUM:
2722 case CTSF_TABLEPCT_SUM:
2723 case CTSF_SUBTABLEPCT_SUM:
2724 case CTSF_LAYERPCT_SUM:
2725 case CTSF_LAYERROWPCT_SUM:
2726 case CTSF_LAYERCOLPCT_SUM:
2731 case CTSF_UVARIANCE:
2732 case CTSF_UROWPCT_SUM:
2733 case CTSF_UCOLPCT_SUM:
2734 case CTSF_UTABLEPCT_SUM:
2735 case CTSF_USUBTABLEPCT_SUM:
2736 case CTSF_ULAYERPCT_SUM:
2737 case CTSF_ULAYERROWPCT_SUM:
2738 case CTSF_ULAYERCOLPCT_SUM:
2739 moments1_destroy (s->moments);
2748 casewriter_destroy (s->writer);
2754 ctables_summary_add (union ctables_summary *s,
2755 const struct ctables_summary_spec *ss,
2756 const struct variable *var, const union value *value,
2757 bool is_scale, bool is_scale_missing,
2758 bool is_missing, bool excluded_missing,
2759 double d_weight, double e_weight)
2761 /* To determine whether a case is included in a given table for a particular
2762 kind of summary, consider the following charts for each variable in the
2763 table. Only if "yes" appears for every variable for the summary is the
2766 Categorical variables: VALIDN COUNT TOTALN
2767 Valid values in included categories yes yes yes
2768 Missing values in included categories --- yes yes
2769 Missing values in excluded categories --- --- yes
2770 Valid values in excluded categories --- --- ---
2772 Scale variables: VALIDN COUNT TOTALN
2773 Valid value yes yes yes
2774 Missing value --- yes yes
2776 Missing values include both user- and system-missing. (The system-missing
2777 value is always in an excluded category.)
2779 switch (ss->function)
2782 case CTSF_ROWPCT_TOTALN:
2783 case CTSF_COLPCT_TOTALN:
2784 case CTSF_TABLEPCT_TOTALN:
2785 case CTSF_SUBTABLEPCT_TOTALN:
2786 case CTSF_LAYERPCT_TOTALN:
2787 case CTSF_LAYERROWPCT_TOTALN:
2788 case CTSF_LAYERCOLPCT_TOTALN:
2789 s->count += d_weight;
2793 case CTSF_UROWPCT_TOTALN:
2794 case CTSF_UCOLPCT_TOTALN:
2795 case CTSF_UTABLEPCT_TOTALN:
2796 case CTSF_USUBTABLEPCT_TOTALN:
2797 case CTSF_ULAYERPCT_TOTALN:
2798 case CTSF_ULAYERROWPCT_TOTALN:
2799 case CTSF_ULAYERCOLPCT_TOTALN:
2804 case CTSF_ROWPCT_COUNT:
2805 case CTSF_COLPCT_COUNT:
2806 case CTSF_TABLEPCT_COUNT:
2807 case CTSF_SUBTABLEPCT_COUNT:
2808 case CTSF_LAYERPCT_COUNT:
2809 case CTSF_LAYERROWPCT_COUNT:
2810 case CTSF_LAYERCOLPCT_COUNT:
2811 if (is_scale || !excluded_missing)
2812 s->count += d_weight;
2816 case CTSF_UROWPCT_COUNT:
2817 case CTSF_UCOLPCT_COUNT:
2818 case CTSF_UTABLEPCT_COUNT:
2819 case CTSF_USUBTABLEPCT_COUNT:
2820 case CTSF_ULAYERPCT_COUNT:
2821 case CTSF_ULAYERROWPCT_COUNT:
2822 case CTSF_ULAYERCOLPCT_COUNT:
2823 if (is_scale || !excluded_missing)
2828 case CTSF_ROWPCT_VALIDN:
2829 case CTSF_COLPCT_VALIDN:
2830 case CTSF_TABLEPCT_VALIDN:
2831 case CTSF_SUBTABLEPCT_VALIDN:
2832 case CTSF_LAYERPCT_VALIDN:
2833 case CTSF_LAYERROWPCT_VALIDN:
2834 case CTSF_LAYERCOLPCT_VALIDN:
2838 s->count += d_weight;
2842 case CTSF_UROWPCT_VALIDN:
2843 case CTSF_UCOLPCT_VALIDN:
2844 case CTSF_UTABLEPCT_VALIDN:
2845 case CTSF_USUBTABLEPCT_VALIDN:
2846 case CTSF_ULAYERPCT_VALIDN:
2847 case CTSF_ULAYERROWPCT_VALIDN:
2848 case CTSF_ULAYERCOLPCT_VALIDN:
2857 s->count += d_weight;
2866 if (is_scale || !excluded_missing)
2867 s->count += e_weight;
2874 s->count += e_weight;
2878 s->count += e_weight;
2884 if (!is_scale_missing)
2886 assert (!var_is_alpha (var)); /* XXX? */
2887 if (s->min == SYSMIS || value->f < s->min)
2889 if (s->max == SYSMIS || value->f > s->max)
2899 case CTSF_ROWPCT_SUM:
2900 case CTSF_COLPCT_SUM:
2901 case CTSF_TABLEPCT_SUM:
2902 case CTSF_SUBTABLEPCT_SUM:
2903 case CTSF_LAYERPCT_SUM:
2904 case CTSF_LAYERROWPCT_SUM:
2905 case CTSF_LAYERCOLPCT_SUM:
2906 if (!is_scale_missing)
2907 moments1_add (s->moments, value->f, e_weight);
2914 case CTSF_UVARIANCE:
2915 case CTSF_UROWPCT_SUM:
2916 case CTSF_UCOLPCT_SUM:
2917 case CTSF_UTABLEPCT_SUM:
2918 case CTSF_USUBTABLEPCT_SUM:
2919 case CTSF_ULAYERPCT_SUM:
2920 case CTSF_ULAYERROWPCT_SUM:
2921 case CTSF_ULAYERCOLPCT_SUM:
2922 if (!is_scale_missing)
2923 moments1_add (s->moments, value->f, 1.0);
2929 d_weight = e_weight = 1.0;
2934 if (!is_scale_missing)
2936 s->ovalid += e_weight;
2938 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2939 *case_num_rw_idx (c, 0) = value->f;
2940 *case_num_rw_idx (c, 1) = e_weight;
2941 casewriter_write (s->writer, c);
2947 static enum ctables_domain_type
2948 ctables_function_domain (enum ctables_summary_function function)
2978 case CTSF_UVARIANCE:
2984 case CTSF_COLPCT_COUNT:
2985 case CTSF_COLPCT_SUM:
2986 case CTSF_COLPCT_TOTALN:
2987 case CTSF_COLPCT_VALIDN:
2988 case CTSF_UCOLPCT_COUNT:
2989 case CTSF_UCOLPCT_SUM:
2990 case CTSF_UCOLPCT_TOTALN:
2991 case CTSF_UCOLPCT_VALIDN:
2994 case CTSF_LAYERCOLPCT_COUNT:
2995 case CTSF_LAYERCOLPCT_SUM:
2996 case CTSF_LAYERCOLPCT_TOTALN:
2997 case CTSF_LAYERCOLPCT_VALIDN:
2998 case CTSF_ULAYERCOLPCT_COUNT:
2999 case CTSF_ULAYERCOLPCT_SUM:
3000 case CTSF_ULAYERCOLPCT_TOTALN:
3001 case CTSF_ULAYERCOLPCT_VALIDN:
3002 return CTDT_LAYERCOL;
3004 case CTSF_LAYERPCT_COUNT:
3005 case CTSF_LAYERPCT_SUM:
3006 case CTSF_LAYERPCT_TOTALN:
3007 case CTSF_LAYERPCT_VALIDN:
3008 case CTSF_ULAYERPCT_COUNT:
3009 case CTSF_ULAYERPCT_SUM:
3010 case CTSF_ULAYERPCT_TOTALN:
3011 case CTSF_ULAYERPCT_VALIDN:
3014 case CTSF_LAYERROWPCT_COUNT:
3015 case CTSF_LAYERROWPCT_SUM:
3016 case CTSF_LAYERROWPCT_TOTALN:
3017 case CTSF_LAYERROWPCT_VALIDN:
3018 case CTSF_ULAYERROWPCT_COUNT:
3019 case CTSF_ULAYERROWPCT_SUM:
3020 case CTSF_ULAYERROWPCT_TOTALN:
3021 case CTSF_ULAYERROWPCT_VALIDN:
3022 return CTDT_LAYERROW;
3024 case CTSF_ROWPCT_COUNT:
3025 case CTSF_ROWPCT_SUM:
3026 case CTSF_ROWPCT_TOTALN:
3027 case CTSF_ROWPCT_VALIDN:
3028 case CTSF_UROWPCT_COUNT:
3029 case CTSF_UROWPCT_SUM:
3030 case CTSF_UROWPCT_TOTALN:
3031 case CTSF_UROWPCT_VALIDN:
3034 case CTSF_SUBTABLEPCT_COUNT:
3035 case CTSF_SUBTABLEPCT_SUM:
3036 case CTSF_SUBTABLEPCT_TOTALN:
3037 case CTSF_SUBTABLEPCT_VALIDN:
3038 case CTSF_USUBTABLEPCT_COUNT:
3039 case CTSF_USUBTABLEPCT_SUM:
3040 case CTSF_USUBTABLEPCT_TOTALN:
3041 case CTSF_USUBTABLEPCT_VALIDN:
3042 return CTDT_SUBTABLE;
3044 case CTSF_TABLEPCT_COUNT:
3045 case CTSF_TABLEPCT_SUM:
3046 case CTSF_TABLEPCT_TOTALN:
3047 case CTSF_TABLEPCT_VALIDN:
3048 case CTSF_UTABLEPCT_COUNT:
3049 case CTSF_UTABLEPCT_SUM:
3050 case CTSF_UTABLEPCT_TOTALN:
3051 case CTSF_UTABLEPCT_VALIDN:
3058 static enum ctables_domain_type
3059 ctables_function_is_pctsum (enum ctables_summary_function function)
3089 case CTSF_UVARIANCE:
3093 case CTSF_COLPCT_COUNT:
3094 case CTSF_COLPCT_TOTALN:
3095 case CTSF_COLPCT_VALIDN:
3096 case CTSF_UCOLPCT_COUNT:
3097 case CTSF_UCOLPCT_TOTALN:
3098 case CTSF_UCOLPCT_VALIDN:
3099 case CTSF_LAYERCOLPCT_COUNT:
3100 case CTSF_LAYERCOLPCT_TOTALN:
3101 case CTSF_LAYERCOLPCT_VALIDN:
3102 case CTSF_ULAYERCOLPCT_COUNT:
3103 case CTSF_ULAYERCOLPCT_TOTALN:
3104 case CTSF_ULAYERCOLPCT_VALIDN:
3105 case CTSF_LAYERPCT_COUNT:
3106 case CTSF_LAYERPCT_TOTALN:
3107 case CTSF_LAYERPCT_VALIDN:
3108 case CTSF_ULAYERPCT_COUNT:
3109 case CTSF_ULAYERPCT_TOTALN:
3110 case CTSF_ULAYERPCT_VALIDN:
3111 case CTSF_LAYERROWPCT_COUNT:
3112 case CTSF_LAYERROWPCT_TOTALN:
3113 case CTSF_LAYERROWPCT_VALIDN:
3114 case CTSF_ULAYERROWPCT_COUNT:
3115 case CTSF_ULAYERROWPCT_TOTALN:
3116 case CTSF_ULAYERROWPCT_VALIDN:
3117 case CTSF_ROWPCT_COUNT:
3118 case CTSF_ROWPCT_TOTALN:
3119 case CTSF_ROWPCT_VALIDN:
3120 case CTSF_UROWPCT_COUNT:
3121 case CTSF_UROWPCT_TOTALN:
3122 case CTSF_UROWPCT_VALIDN:
3123 case CTSF_SUBTABLEPCT_COUNT:
3124 case CTSF_SUBTABLEPCT_TOTALN:
3125 case CTSF_SUBTABLEPCT_VALIDN:
3126 case CTSF_USUBTABLEPCT_COUNT:
3127 case CTSF_USUBTABLEPCT_TOTALN:
3128 case CTSF_USUBTABLEPCT_VALIDN:
3129 case CTSF_TABLEPCT_COUNT:
3130 case CTSF_TABLEPCT_TOTALN:
3131 case CTSF_TABLEPCT_VALIDN:
3132 case CTSF_UTABLEPCT_COUNT:
3133 case CTSF_UTABLEPCT_TOTALN:
3134 case CTSF_UTABLEPCT_VALIDN:
3137 case CTSF_COLPCT_SUM:
3138 case CTSF_UCOLPCT_SUM:
3139 case CTSF_LAYERCOLPCT_SUM:
3140 case CTSF_ULAYERCOLPCT_SUM:
3141 case CTSF_LAYERPCT_SUM:
3142 case CTSF_ULAYERPCT_SUM:
3143 case CTSF_LAYERROWPCT_SUM:
3144 case CTSF_ULAYERROWPCT_SUM:
3145 case CTSF_ROWPCT_SUM:
3146 case CTSF_UROWPCT_SUM:
3147 case CTSF_SUBTABLEPCT_SUM:
3148 case CTSF_USUBTABLEPCT_SUM:
3149 case CTSF_TABLEPCT_SUM:
3150 case CTSF_UTABLEPCT_SUM:
3158 ctables_summary_value (const struct ctables_cell *cell,
3159 union ctables_summary *s,
3160 const struct ctables_summary_spec *ss)
3162 switch (ss->function)
3169 case CTSF_ROWPCT_COUNT:
3170 case CTSF_COLPCT_COUNT:
3171 case CTSF_TABLEPCT_COUNT:
3172 case CTSF_SUBTABLEPCT_COUNT:
3173 case CTSF_LAYERPCT_COUNT:
3174 case CTSF_LAYERROWPCT_COUNT:
3175 case CTSF_LAYERCOLPCT_COUNT:
3177 enum ctables_domain_type d = ctables_function_domain (ss->function);
3178 return (cell->domains[d]->e_count
3179 ? s->count / cell->domains[d]->e_count * 100
3183 case CTSF_UROWPCT_COUNT:
3184 case CTSF_UCOLPCT_COUNT:
3185 case CTSF_UTABLEPCT_COUNT:
3186 case CTSF_USUBTABLEPCT_COUNT:
3187 case CTSF_ULAYERPCT_COUNT:
3188 case CTSF_ULAYERROWPCT_COUNT:
3189 case CTSF_ULAYERCOLPCT_COUNT:
3191 enum ctables_domain_type d = ctables_function_domain (ss->function);
3192 return (cell->domains[d]->u_count
3193 ? s->count / cell->domains[d]->u_count * 100
3197 case CTSF_ROWPCT_VALIDN:
3198 case CTSF_COLPCT_VALIDN:
3199 case CTSF_TABLEPCT_VALIDN:
3200 case CTSF_SUBTABLEPCT_VALIDN:
3201 case CTSF_LAYERPCT_VALIDN:
3202 case CTSF_LAYERROWPCT_VALIDN:
3203 case CTSF_LAYERCOLPCT_VALIDN:
3205 enum ctables_domain_type d = ctables_function_domain (ss->function);
3206 return (cell->domains[d]->e_valid
3207 ? s->count / cell->domains[d]->e_valid * 100
3211 case CTSF_UROWPCT_VALIDN:
3212 case CTSF_UCOLPCT_VALIDN:
3213 case CTSF_UTABLEPCT_VALIDN:
3214 case CTSF_USUBTABLEPCT_VALIDN:
3215 case CTSF_ULAYERPCT_VALIDN:
3216 case CTSF_ULAYERROWPCT_VALIDN:
3217 case CTSF_ULAYERCOLPCT_VALIDN:
3219 enum ctables_domain_type d = ctables_function_domain (ss->function);
3220 return (cell->domains[d]->u_valid
3221 ? s->count / cell->domains[d]->u_valid * 100
3225 case CTSF_ROWPCT_TOTALN:
3226 case CTSF_COLPCT_TOTALN:
3227 case CTSF_TABLEPCT_TOTALN:
3228 case CTSF_SUBTABLEPCT_TOTALN:
3229 case CTSF_LAYERPCT_TOTALN:
3230 case CTSF_LAYERROWPCT_TOTALN:
3231 case CTSF_LAYERCOLPCT_TOTALN:
3233 enum ctables_domain_type d = ctables_function_domain (ss->function);
3234 return (cell->domains[d]->e_total
3235 ? s->count / cell->domains[d]->e_total * 100
3239 case CTSF_UROWPCT_TOTALN:
3240 case CTSF_UCOLPCT_TOTALN:
3241 case CTSF_UTABLEPCT_TOTALN:
3242 case CTSF_USUBTABLEPCT_TOTALN:
3243 case CTSF_ULAYERPCT_TOTALN:
3244 case CTSF_ULAYERROWPCT_TOTALN:
3245 case CTSF_ULAYERCOLPCT_TOTALN:
3247 enum ctables_domain_type d = ctables_function_domain (ss->function);
3248 return (cell->domains[d]->u_total
3249 ? s->count / cell->domains[d]->u_total * 100
3270 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3276 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3283 double weight, variance;
3284 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3285 return calc_semean (variance, weight);
3292 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3293 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3299 double weight, mean;
3300 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3301 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3305 case CTSF_UVARIANCE:
3308 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3312 case CTSF_ROWPCT_SUM:
3313 case CTSF_COLPCT_SUM:
3314 case CTSF_TABLEPCT_SUM:
3315 case CTSF_SUBTABLEPCT_SUM:
3316 case CTSF_LAYERPCT_SUM:
3317 case CTSF_LAYERROWPCT_SUM:
3318 case CTSF_LAYERCOLPCT_SUM:
3320 double weight, mean;
3321 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3322 if (weight == SYSMIS || mean == SYSMIS)
3324 enum ctables_domain_type d = ctables_function_domain (ss->function);
3325 double num = weight * mean;
3326 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3327 return denom != 0 ? num / denom * 100 : SYSMIS;
3329 case CTSF_UROWPCT_SUM:
3330 case CTSF_UCOLPCT_SUM:
3331 case CTSF_UTABLEPCT_SUM:
3332 case CTSF_USUBTABLEPCT_SUM:
3333 case CTSF_ULAYERPCT_SUM:
3334 case CTSF_ULAYERROWPCT_SUM:
3335 case CTSF_ULAYERCOLPCT_SUM:
3337 double weight, mean;
3338 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3339 if (weight == SYSMIS || mean == SYSMIS)
3341 enum ctables_domain_type d = ctables_function_domain (ss->function);
3342 double num = weight * mean;
3343 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3344 return denom != 0 ? num / denom * 100 : SYSMIS;
3353 struct casereader *reader = casewriter_make_reader (s->writer);
3356 struct percentile *ptile = percentile_create (
3357 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3358 struct order_stats *os = &ptile->parent;
3359 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3360 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3361 statistic_destroy (&ptile->parent.parent);
3369 struct casereader *reader = casewriter_make_reader (s->writer);
3372 struct mode *mode = mode_create ();
3373 struct order_stats *os = &mode->parent;
3374 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3375 s->ovalue = mode->mode;
3376 statistic_destroy (&mode->parent.parent);
3384 struct ctables_cell_sort_aux
3386 const struct ctables_nest *nest;
3387 enum pivot_axis_type a;
3391 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3393 const struct ctables_cell_sort_aux *aux = aux_;
3394 struct ctables_cell *const *ap = a_;
3395 struct ctables_cell *const *bp = b_;
3396 const struct ctables_cell *a = *ap;
3397 const struct ctables_cell *b = *bp;
3399 const struct ctables_nest *nest = aux->nest;
3400 for (size_t i = 0; i < nest->n; i++)
3401 if (i != nest->scale_idx)
3403 const struct variable *var = nest->vars[i];
3404 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3405 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3406 if (a_cv->category != b_cv->category)
3407 return a_cv->category > b_cv->category ? 1 : -1;
3409 const union value *a_val = &a_cv->value;
3410 const union value *b_val = &b_cv->value;
3411 switch (a_cv->category->type)
3417 case CCT_POSTCOMPUTE:
3418 case CCT_EXCLUDED_MISSING:
3419 /* Must be equal. */
3427 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3435 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3437 return a_cv->category->sort_ascending ? cmp : -cmp;
3443 const char *a_label = var_lookup_value_label (var, a_val);
3444 const char *b_label = var_lookup_value_label (var, b_val);
3446 ? (b_label ? strcmp (a_label, b_label) : 1)
3447 : (b_label ? -1 : value_compare_3way (
3448 a_val, b_val, var_get_width (var))));
3450 return a_cv->category->sort_ascending ? cmp : -cmp;
3464 For each ctables_table:
3465 For each combination of row vars:
3466 For each combination of column vars:
3467 For each combination of layer vars:
3469 Make a table of row values:
3470 Sort entries by row values
3471 Assign a 0-based index to each actual value
3472 Construct a dimension
3473 Make a table of column values
3474 Make a table of layer values
3476 Fill the table entry using the indexes from before.
3479 static struct ctables_domain *
3480 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3481 enum ctables_domain_type domain)
3484 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3486 const struct ctables_nest *nest = s->nests[a];
3487 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3489 size_t v_idx = nest->domains[domain][i];
3490 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3491 hash = hash_pointer (cv->category, hash);
3492 if (cv->category->type != CCT_TOTAL
3493 && cv->category->type != CCT_SUBTOTAL
3494 && cv->category->type != CCT_POSTCOMPUTE)
3495 hash = value_hash (&cv->value,
3496 var_get_width (nest->vars[v_idx]), hash);
3500 struct ctables_domain *d;
3501 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3503 const struct ctables_cell *df = d->example;
3504 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3506 const struct ctables_nest *nest = s->nests[a];
3507 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3509 size_t v_idx = nest->domains[domain][i];
3510 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3511 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3512 if (cv1->category != cv2->category
3513 || (cv1->category->type != CCT_TOTAL
3514 && cv1->category->type != CCT_SUBTOTAL
3515 && cv1->category->type != CCT_POSTCOMPUTE
3516 && !value_equal (&cv1->value, &cv2->value,
3517 var_get_width (nest->vars[v_idx]))))
3526 struct ctables_sum *sums = (s->table->n_sum_vars
3527 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3530 d = xmalloc (sizeof *d);
3531 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3532 hmap_insert (&s->domains[domain], &d->node, hash);
3536 static struct substring
3537 rtrim_value (const union value *v, const struct variable *var)
3539 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3540 var_get_width (var));
3541 ss_rtrim (&s, ss_cstr (" "));
3546 in_string_range (const union value *v, const struct variable *var,
3547 const struct substring *srange)
3549 struct substring s = rtrim_value (v, var);
3550 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3551 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3554 static const struct ctables_category *
3555 ctables_categories_match (const struct ctables_categories *c,
3556 const union value *v, const struct variable *var)
3558 if (var_is_numeric (var) && v->f == SYSMIS)
3561 const struct ctables_category *othernm = NULL;
3562 for (size_t i = c->n_cats; i-- > 0; )
3564 const struct ctables_category *cat = &c->cats[i];
3568 if (cat->number == v->f)
3573 if (ss_equals (cat->string, rtrim_value (v, var)))
3578 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3579 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3584 if (in_string_range (v, var, cat->srange))
3589 if (var_is_value_missing (var, v))
3593 case CCT_POSTCOMPUTE:
3608 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3611 case CCT_EXCLUDED_MISSING:
3616 return var_is_value_missing (var, v) ? NULL : othernm;
3619 static const struct ctables_category *
3620 ctables_categories_total (const struct ctables_categories *c)
3622 const struct ctables_category *first = &c->cats[0];
3623 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3624 return (first->type == CCT_TOTAL ? first
3625 : last->type == CCT_TOTAL ? last
3629 static struct ctables_cell *
3630 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3631 const struct ctables_category *cats[PIVOT_N_AXES][10])
3634 enum ctables_summary_variant sv = CSV_CELL;
3635 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3637 const struct ctables_nest *nest = s->nests[a];
3638 for (size_t i = 0; i < nest->n; i++)
3639 if (i != nest->scale_idx)
3641 hash = hash_pointer (cats[a][i], hash);
3642 if (cats[a][i]->type != CCT_TOTAL
3643 && cats[a][i]->type != CCT_SUBTOTAL
3644 && cats[a][i]->type != CCT_POSTCOMPUTE)
3645 hash = value_hash (case_data (c, nest->vars[i]),
3646 var_get_width (nest->vars[i]), hash);
3652 struct ctables_cell *cell;
3653 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3655 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3657 const struct ctables_nest *nest = s->nests[a];
3658 for (size_t i = 0; i < nest->n; i++)
3659 if (i != nest->scale_idx
3660 && (cats[a][i] != cell->axes[a].cvs[i].category
3661 || (cats[a][i]->type != CCT_TOTAL
3662 && cats[a][i]->type != CCT_SUBTOTAL
3663 && cats[a][i]->type != CCT_POSTCOMPUTE
3664 && !value_equal (case_data (c, nest->vars[i]),
3665 &cell->axes[a].cvs[i].value,
3666 var_get_width (nest->vars[i])))))
3675 cell = xmalloc (sizeof *cell);
3678 cell->omit_domains = 0;
3679 cell->postcompute = false;
3680 //struct string name = DS_EMPTY_INITIALIZER;
3681 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3683 const struct ctables_nest *nest = s->nests[a];
3684 cell->axes[a].cvs = (nest->n
3685 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3687 for (size_t i = 0; i < nest->n; i++)
3689 const struct ctables_category *cat = cats[a][i];
3690 const struct variable *var = nest->vars[i];
3691 const union value *value = case_data (c, var);
3692 if (i != nest->scale_idx)
3694 const struct ctables_category *subtotal = cat->subtotal;
3695 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3698 if (cat->type == CCT_TOTAL
3699 || cat->type == CCT_SUBTOTAL
3700 || cat->type == CCT_POSTCOMPUTE)
3702 /* XXX these should be more encompassing I think.*/
3706 case PIVOT_AXIS_COLUMN:
3707 cell->omit_domains |= ((1u << CTDT_TABLE) |
3708 (1u << CTDT_LAYER) |
3709 (1u << CTDT_LAYERCOL) |
3710 (1u << CTDT_SUBTABLE) |
3713 case PIVOT_AXIS_ROW:
3714 cell->omit_domains |= ((1u << CTDT_TABLE) |
3715 (1u << CTDT_LAYER) |
3716 (1u << CTDT_LAYERROW) |
3717 (1u << CTDT_SUBTABLE) |
3720 case PIVOT_AXIS_LAYER:
3721 cell->omit_domains |= ((1u << CTDT_TABLE) |
3722 (1u << CTDT_LAYER));
3726 if (cat->type == CCT_POSTCOMPUTE)
3727 cell->postcompute = true;
3730 cell->axes[a].cvs[i].category = cat;
3731 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3734 if (i != nest->scale_idx)
3736 if (!ds_is_empty (&name))
3737 ds_put_cstr (&name, ", ");
3738 char *value_s = data_out (value, var_get_encoding (var),
3739 var_get_print_format (var),
3740 settings_get_fmt_settings ());
3741 if (cat->type == CCT_TOTAL
3742 || cat->type == CCT_SUBTOTAL
3743 || cat->type == CCT_POSTCOMPUTE)
3744 ds_put_format (&name, "%s=total", var_get_name (var));
3746 ds_put_format (&name, "%s=%s", var_get_name (var),
3747 value_s + strspn (value_s, " "));
3753 //cell->name = ds_steal_cstr (&name);
3755 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3756 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3757 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3758 for (size_t i = 0; i < specs->n; i++)
3759 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3760 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3761 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3762 hmap_insert (&s->cells, &cell->node, hash);
3767 is_scale_missing (const struct ctables_summary_spec_set *specs,
3768 const struct ccase *c)
3770 if (!specs->is_scale)
3773 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3776 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3778 const struct variable *var = specs->listwise_vars[i];
3779 if (var_is_num_missing (var, case_num (c, var)))
3787 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3788 const struct ctables_category *cats[PIVOT_N_AXES][10],
3789 bool is_missing, bool excluded_missing,
3790 double d_weight, double e_weight)
3792 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3793 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3795 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3797 bool scale_missing = is_scale_missing (specs, c);
3798 for (size_t i = 0; i < specs->n; i++)
3799 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3800 specs->var, case_data (c, specs->var), specs->is_scale,
3801 scale_missing, is_missing, excluded_missing,
3802 d_weight, e_weight);
3803 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3804 if (!(cell->omit_domains && (1u << dt)))
3806 struct ctables_domain *d = cell->domains[dt];
3807 d->d_total += d_weight;
3808 d->e_total += e_weight;
3810 if (!excluded_missing)
3812 d->d_count += d_weight;
3813 d->e_count += e_weight;
3818 d->d_valid += d_weight;
3819 d->e_valid += e_weight;
3822 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3824 /* XXX listwise_missing??? */
3825 const struct variable *var = s->table->sum_vars[i];
3826 double addend = case_num (c, var);
3827 if (!var_is_num_missing (var, addend))
3829 struct ctables_sum *sum = &d->sums[i];
3830 sum->e_sum += addend * e_weight;
3831 sum->u_sum += addend;
3839 recurse_totals (struct ctables_section *s, const struct ccase *c,
3840 const struct ctables_category *cats[PIVOT_N_AXES][10],
3841 bool is_missing, bool excluded_missing,
3842 double d_weight, double e_weight,
3843 enum pivot_axis_type start_axis, size_t start_nest)
3845 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3847 const struct ctables_nest *nest = s->nests[a];
3848 for (size_t i = start_nest; i < nest->n; i++)
3850 if (i == nest->scale_idx)
3853 const struct variable *var = nest->vars[i];
3855 const struct ctables_category *total = ctables_categories_total (
3856 s->table->categories[var_get_dict_index (var)]);
3859 const struct ctables_category *save = cats[a][i];
3861 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3862 d_weight, e_weight);
3863 recurse_totals (s, c, cats, is_missing, excluded_missing,
3864 d_weight, e_weight, a, i + 1);
3873 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3874 const struct ctables_category *cats[PIVOT_N_AXES][10],
3875 bool is_missing, bool excluded_missing,
3876 double d_weight, double e_weight,
3877 enum pivot_axis_type start_axis, size_t start_nest)
3879 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3881 const struct ctables_nest *nest = s->nests[a];
3882 for (size_t i = start_nest; i < nest->n; i++)
3884 if (i == nest->scale_idx)
3887 const struct ctables_category *save = cats[a][i];
3890 cats[a][i] = save->subtotal;
3891 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3892 d_weight, e_weight);
3893 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3894 d_weight, e_weight, a, i + 1);
3903 ctables_add_occurrence (const struct variable *var,
3904 const union value *value,
3905 struct hmap *occurrences)
3907 int width = var_get_width (var);
3908 unsigned int hash = value_hash (value, width, 0);
3910 struct ctables_occurrence *o;
3911 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3913 if (value_equal (value, &o->value, width))
3916 o = xmalloc (sizeof *o);
3917 value_clone (&o->value, value, width);
3918 hmap_insert (occurrences, &o->node, hash);
3922 ctables_cell_insert (struct ctables_section *s,
3923 const struct ccase *c,
3924 double d_weight, double e_weight)
3926 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3928 /* Does at least one categorical variable have a missing value in an included
3929 or excluded category? */
3930 bool is_missing = false;
3932 /* Does at least one categorical variable have a missing value in an excluded
3934 bool excluded_missing = false;
3936 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3938 const struct ctables_nest *nest = s->nests[a];
3939 for (size_t i = 0; i < nest->n; i++)
3941 if (i == nest->scale_idx)
3944 const struct variable *var = nest->vars[i];
3945 const union value *value = case_data (c, var);
3947 bool var_missing = var_is_value_missing (var, value) != 0;
3951 cats[a][i] = ctables_categories_match (
3952 s->table->categories[var_get_dict_index (var)], value, var);
3958 static const struct ctables_category cct_excluded_missing = {
3959 .type = CCT_EXCLUDED_MISSING,
3962 cats[a][i] = &cct_excluded_missing;
3963 excluded_missing = true;
3968 if (!excluded_missing)
3969 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3971 const struct ctables_nest *nest = s->nests[a];
3972 for (size_t i = 0; i < nest->n; i++)
3973 if (i != nest->scale_idx)
3975 const struct variable *var = nest->vars[i];
3976 const union value *value = case_data (c, var);
3977 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3981 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3982 d_weight, e_weight);
3984 //if (!excluded_missing)
3986 recurse_totals (s, c, cats, is_missing, excluded_missing,
3987 d_weight, e_weight, 0, 0);
3988 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3989 d_weight, e_weight, 0, 0);
3995 const struct ctables_summary_spec_set *set;
4000 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
4002 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
4003 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
4004 if (as->function != bs->function)
4005 return as->function > bs->function ? 1 : -1;
4006 else if (as->percentile != bs->percentile)
4007 return as->percentile < bs->percentile ? 1 : -1;
4009 const char *as_label = as->label ? as->label : "";
4010 const char *bs_label = bs->label ? bs->label : "";
4011 return strcmp (as_label, bs_label);
4014 static struct pivot_value *
4015 ctables_category_create_label__ (const struct ctables_category *cat,
4016 const struct variable *var,
4017 const union value *value)
4019 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
4020 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
4021 : pivot_value_new_var_value (var, value));
4024 static struct pivot_value *
4025 ctables_postcompute_label (const struct ctables_categories *cats,
4026 const struct ctables_category *cat,
4027 const struct variable *var,
4028 const union value *value)
4030 struct substring in = ss_cstr (cat->pc->label);
4031 struct substring target = ss_cstr (")LABEL[");
4033 struct string out = DS_EMPTY_INITIALIZER;
4036 size_t chunk = ss_find_substring (in, target);
4037 if (chunk == SIZE_MAX)
4039 if (ds_is_empty (&out))
4040 return pivot_value_new_user_text (in.string, in.length);
4043 ds_put_substring (&out, in);
4044 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
4048 ds_put_substring (&out, ss_head (in, chunk));
4049 ss_advance (&in, chunk + target.length);
4051 struct substring idx_s;
4052 if (!ss_get_until (&in, ']', &idx_s))
4055 long int idx = strtol (idx_s.string, &tail, 10);
4056 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
4059 struct ctables_category *cat2 = &cats->cats[idx - 1];
4060 struct pivot_value *label2
4061 = ctables_category_create_label__ (cat2, var, value);
4062 char *label2_s = pivot_value_to_string_defaults (label2);
4063 ds_put_cstr (&out, label2_s);
4065 pivot_value_destroy (label2);
4070 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
4073 static struct pivot_value *
4074 ctables_category_create_label (const struct ctables_categories *cats,
4075 const struct ctables_category *cat,
4076 const struct variable *var,
4077 const union value *value)
4079 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4080 ? ctables_postcompute_label (cats, cat, var, value)
4081 : ctables_category_create_label__ (cat, var, value));
4084 static struct ctables_value *
4085 ctables_value_find__ (struct ctables_table *t, const union value *value,
4086 int width, unsigned int hash)
4088 struct ctables_value *clv;
4089 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4090 hash, &t->clabels_values_map)
4091 if (value_equal (value, &clv->value, width))
4097 ctables_value_insert (struct ctables_table *t, const union value *value,
4100 unsigned int hash = value_hash (value, width, 0);
4101 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4104 clv = xmalloc (sizeof *clv);
4105 value_clone (&clv->value, value, width);
4106 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4110 static struct ctables_value *
4111 ctables_value_find (struct ctables_table *t,
4112 const union value *value, int width)
4114 return ctables_value_find__ (t, value, width,
4115 value_hash (value, width, 0));
4119 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4120 size_t ix[PIVOT_N_AXES])
4122 if (a < PIVOT_N_AXES)
4124 size_t limit = MAX (t->stacks[a].n, 1);
4125 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4126 ctables_table_add_section (t, a + 1, ix);
4130 struct ctables_section *s = &t->sections[t->n_sections++];
4131 *s = (struct ctables_section) {
4133 .cells = HMAP_INITIALIZER (s->cells),
4135 for (a = 0; a < PIVOT_N_AXES; a++)
4138 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4140 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4141 for (size_t i = 0; i < nest->n; i++)
4142 hmap_init (&s->occurrences[a][i]);
4144 for (size_t i = 0; i < N_CTDTS; i++)
4145 hmap_init (&s->domains[i]);
4150 ctpo_add (double a, double b)
4156 ctpo_sub (double a, double b)
4162 ctpo_mul (double a, double b)
4168 ctpo_div (double a, double b)
4170 return b ? a / b : SYSMIS;
4174 ctpo_pow (double a, double b)
4176 int save_errno = errno;
4178 double result = pow (a, b);
4186 ctpo_neg (double a, double b UNUSED)
4191 struct ctables_pcexpr_evaluate_ctx
4193 const struct ctables_cell *cell;
4194 const struct ctables_section *section;
4195 const struct ctables_categories *cats;
4196 enum pivot_axis_type pc_a;
4199 enum fmt_type parse_format;
4202 static double ctables_pcexpr_evaluate (
4203 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4206 ctables_pcexpr_evaluate_nonterminal (
4207 const struct ctables_pcexpr_evaluate_ctx *ctx,
4208 const struct ctables_pcexpr *e, size_t n_args,
4209 double evaluate (double, double))
4211 double args[2] = { 0, 0 };
4212 for (size_t i = 0; i < n_args; i++)
4214 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4215 if (!isfinite (args[i]) || args[i] == SYSMIS)
4218 return evaluate (args[0], args[1]);
4222 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4223 const struct ctables_cell_value *pc_cv)
4225 const struct ctables_section *s = ctx->section;
4228 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4230 const struct ctables_nest *nest = s->nests[a];
4231 for (size_t i = 0; i < nest->n; i++)
4232 if (i != nest->scale_idx)
4234 const struct ctables_cell_value *cv
4235 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4236 : &ctx->cell->axes[a].cvs[i]);
4237 hash = hash_pointer (cv->category, hash);
4238 if (cv->category->type != CCT_TOTAL
4239 && cv->category->type != CCT_SUBTOTAL
4240 && cv->category->type != CCT_POSTCOMPUTE)
4241 hash = value_hash (&cv->value,
4242 var_get_width (nest->vars[i]), hash);
4246 struct ctables_cell *tc;
4247 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4249 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4251 const struct ctables_nest *nest = s->nests[a];
4252 for (size_t i = 0; i < nest->n; i++)
4253 if (i != nest->scale_idx)
4255 const struct ctables_cell_value *p_cv
4256 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4257 : &ctx->cell->axes[a].cvs[i]);
4258 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4259 if (p_cv->category != t_cv->category
4260 || (p_cv->category->type != CCT_TOTAL
4261 && p_cv->category->type != CCT_SUBTOTAL
4262 && p_cv->category->type != CCT_POSTCOMPUTE
4263 && !value_equal (&p_cv->value,
4265 var_get_width (nest->vars[i]))))
4277 const struct ctables_table *t = s->table;
4278 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4279 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4280 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4281 &specs->specs[ctx->summary_idx]);
4285 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4286 const struct ctables_pcexpr *e)
4293 case CTPO_CAT_NRANGE:
4294 case CTPO_CAT_SRANGE:
4296 struct ctables_cell_value cv = {
4297 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4299 assert (cv.category != NULL);
4301 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4302 const struct ctables_occurrence *o;
4305 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4306 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4307 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4309 cv.value = o->value;
4310 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4315 case CTPO_CAT_NUMBER:
4316 case CTPO_CAT_MISSING:
4317 case CTPO_CAT_OTHERNM:
4318 case CTPO_CAT_SUBTOTAL:
4319 case CTPO_CAT_TOTAL:
4321 struct ctables_cell_value cv = {
4322 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4323 .value = { .f = e->number },
4325 assert (cv.category != NULL);
4326 return ctables_pcexpr_evaluate_category (ctx, &cv);
4329 case CTPO_CAT_STRING:
4331 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4333 if (width > e->string.length)
4335 s = xmalloc (width);
4336 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4338 struct ctables_cell_value cv = {
4339 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4340 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4342 assert (cv.category != NULL);
4343 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4349 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4352 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4355 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4358 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4361 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4364 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4370 static const struct ctables_category *
4371 ctables_cell_postcompute (const struct ctables_section *s,
4372 const struct ctables_cell *cell,
4373 enum pivot_axis_type *pc_a_p,
4376 assert (cell->postcompute);
4377 const struct ctables_category *pc_cat = NULL;
4378 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4379 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4381 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4382 if (cv->category->type == CCT_POSTCOMPUTE)
4386 /* Multiple postcomputes cross each other. The value is
4391 pc_cat = cv->category;
4395 *pc_a_idx_p = pc_a_idx;
4399 assert (pc_cat != NULL);
4404 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4405 const struct ctables_cell *cell,
4406 const struct ctables_summary_spec *ss,
4407 struct fmt_spec *format,
4408 bool *is_ctables_format,
4411 enum pivot_axis_type pc_a = 0;
4412 size_t pc_a_idx = 0;
4413 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4414 s, cell, &pc_a, &pc_a_idx);
4418 const struct ctables_postcompute *pc = pc_cat->pc;
4421 for (size_t i = 0; i < pc->specs->n; i++)
4423 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4424 if (ss->function == ss2->function
4425 && ss->percentile == ss2->percentile)
4427 *format = ss2->format;
4428 *is_ctables_format = ss2->is_ctables_format;
4434 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4435 const struct ctables_categories *cats = s->table->categories[
4436 var_get_dict_index (var)];
4437 struct ctables_pcexpr_evaluate_ctx ctx = {
4442 .pc_a_idx = pc_a_idx,
4443 .summary_idx = summary_idx,
4444 .parse_format = pc_cat->parse_format,
4446 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4450 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4452 struct pivot_table *pt = pivot_table_create__ (
4454 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4455 : pivot_value_new_text (N_("Custom Tables"))),
4458 pivot_table_set_caption (
4459 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4461 pivot_table_set_corner_text (
4462 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4464 bool summary_dimension = (t->summary_axis != t->slabels_axis
4465 || (!t->slabels_visible
4466 && t->summary_specs.n > 1));
4467 if (summary_dimension)
4469 struct pivot_dimension *d = pivot_dimension_create (
4470 pt, t->slabels_axis, N_("Statistics"));
4471 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4472 if (!t->slabels_visible)
4473 d->hide_all_labels = true;
4474 for (size_t i = 0; i < specs->n; i++)
4475 pivot_category_create_leaf (
4476 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4479 bool categories_dimension = t->clabels_example != NULL;
4480 if (categories_dimension)
4482 struct pivot_dimension *d = pivot_dimension_create (
4483 pt, t->label_axis[t->clabels_from_axis],
4484 t->clabels_from_axis == PIVOT_AXIS_ROW
4485 ? N_("Row Categories")
4486 : N_("Column Categories"));
4487 const struct variable *var = t->clabels_example;
4488 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4489 for (size_t i = 0; i < t->n_clabels_values; i++)
4491 const struct ctables_value *value = t->clabels_values[i];
4492 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4493 assert (cat != NULL);
4494 pivot_category_create_leaf (d->root, ctables_category_create_label (
4495 c, cat, t->clabels_example,
4500 pivot_table_set_look (pt, ct->look);
4501 struct pivot_dimension *d[PIVOT_N_AXES];
4502 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4504 static const char *names[] = {
4505 [PIVOT_AXIS_ROW] = N_("Rows"),
4506 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4507 [PIVOT_AXIS_LAYER] = N_("Layers"),
4509 d[a] = (t->axes[a] || a == t->summary_axis
4510 ? pivot_dimension_create (pt, a, names[a])
4515 assert (t->axes[a]);
4517 for (size_t i = 0; i < t->stacks[a].n; i++)
4519 struct ctables_nest *nest = &t->stacks[a].nests[i];
4520 struct ctables_section **sections = xnmalloc (t->n_sections,
4522 size_t n_sections = 0;
4524 size_t n_total_cells = 0;
4525 size_t max_depth = 0;
4526 for (size_t j = 0; j < t->n_sections; j++)
4527 if (t->sections[j].nests[a] == nest)
4529 struct ctables_section *s = &t->sections[j];
4530 sections[n_sections++] = s;
4531 n_total_cells += s->cells.count;
4533 size_t depth = s->nests[a]->n;
4534 max_depth = MAX (depth, max_depth);
4537 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4539 size_t n_sorted = 0;
4541 for (size_t j = 0; j < n_sections; j++)
4543 struct ctables_section *s = sections[j];
4545 struct ctables_cell *cell;
4546 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4548 sorted[n_sorted++] = cell;
4549 assert (n_sorted <= n_total_cells);
4552 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4553 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4556 for (size_t j = 0; j < n_sorted; j++)
4558 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4563 struct ctables_level
4565 enum ctables_level_type
4567 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4568 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4569 CTL_SUMMARY, /* Summary functions. */
4573 enum settings_value_show vlabel; /* CTL_VAR only. */
4576 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4577 size_t n_levels = 0;
4578 for (size_t k = 0; k < nest->n; k++)
4580 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4581 if (vlabel != CTVL_NONE)
4583 levels[n_levels++] = (struct ctables_level) {
4585 .vlabel = (enum settings_value_show) vlabel,
4590 if (nest->scale_idx != k
4591 && (k != nest->n - 1 || t->label_axis[a] == a))
4593 levels[n_levels++] = (struct ctables_level) {
4594 .type = CTL_CATEGORY,
4600 if (!summary_dimension && a == t->slabels_axis)
4602 levels[n_levels++] = (struct ctables_level) {
4603 .type = CTL_SUMMARY,
4604 .var_idx = SIZE_MAX,
4608 /* Pivot categories:
4610 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4611 - category for nest->vars[0], if nest->scale_idx != 0
4612 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4613 - category for nest->vars[1], if nest->scale_idx != 1
4615 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4616 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4617 - summary function, if 'a == t->slabels_axis && a ==
4620 Additional dimensions:
4622 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4624 - If 't->label_axis[b] == a' for some 'b != a', add a category
4629 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4631 for (size_t j = 0; j < n_sorted; j++)
4633 struct ctables_cell *cell = sorted[j];
4634 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4636 size_t n_common = 0;
4639 for (; n_common < n_levels; n_common++)
4641 const struct ctables_level *level = &levels[n_common];
4642 if (level->type == CTL_CATEGORY)
4644 size_t var_idx = level->var_idx;
4645 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4646 if (prev->axes[a].cvs[var_idx].category != c)
4648 else if (c->type != CCT_SUBTOTAL
4649 && c->type != CCT_TOTAL
4650 && c->type != CCT_POSTCOMPUTE
4651 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4652 &cell->axes[a].cvs[var_idx].value,
4653 var_get_type (nest->vars[var_idx])))
4659 for (size_t k = n_common; k < n_levels; k++)
4661 const struct ctables_level *level = &levels[k];
4662 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4663 if (level->type == CTL_SUMMARY)
4665 assert (k == n_levels - 1);
4667 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4668 for (size_t m = 0; m < specs->n; m++)
4670 int leaf = pivot_category_create_leaf (
4671 parent, ctables_summary_label (&specs->specs[m],
4679 const struct variable *var = nest->vars[level->var_idx];
4680 struct pivot_value *label;
4681 if (level->type == CTL_VAR)
4683 label = pivot_value_new_variable (var);
4684 label->variable.show = level->vlabel;
4686 else if (level->type == CTL_CATEGORY)
4688 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4689 label = ctables_category_create_label (
4690 t->categories[var_get_dict_index (var)],
4691 cv->category, var, &cv->value);
4696 if (k == n_levels - 1)
4697 prev_leaf = pivot_category_create_leaf (parent, label);
4699 groups[k] = pivot_category_create_group__ (parent, label);
4703 cell->axes[a].leaf = prev_leaf;
4712 for (size_t i = 0; i < t->n_sections; i++)
4714 struct ctables_section *s = &t->sections[i];
4716 struct ctables_cell *cell;
4717 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4722 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4723 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4724 for (size_t j = 0; j < specs->n; j++)
4727 size_t n_dindexes = 0;
4729 if (summary_dimension)
4730 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4732 if (categories_dimension)
4734 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4735 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4736 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4737 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4740 dindexes[n_dindexes++] = ctv->leaf;
4743 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4746 int leaf = cell->axes[a].leaf;
4747 if (a == t->summary_axis && !summary_dimension)
4749 dindexes[n_dindexes++] = leaf;
4752 const struct ctables_summary_spec *ss = &specs->specs[j];
4754 struct fmt_spec format = specs->specs[j].format;
4755 bool is_ctables_format = ss->is_ctables_format;
4756 double d = (cell->postcompute
4757 ? ctables_cell_calculate_postcompute (
4758 s, cell, ss, &format, &is_ctables_format, j)
4759 : ctables_summary_value (cell, &cell->summaries[j],
4762 struct pivot_value *value;
4763 if (ct->hide_threshold != 0
4764 && d < ct->hide_threshold
4765 && ctables_summary_function_is_count (ss->function))
4767 value = pivot_value_new_user_text_nocopy (
4768 xasprintf ("<%d", ct->hide_threshold));
4770 else if (d == 0 && ct->zero)
4771 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4772 else if (d == SYSMIS && ct->missing)
4773 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4774 else if (is_ctables_format)
4776 char *s = data_out_stretchy (&(union value) { .f = d },
4778 &ct->ctables_formats, NULL);
4779 value = pivot_value_new_user_text_nocopy (s);
4783 value = pivot_value_new_number (d);
4784 value->numeric.format = format;
4786 pivot_table_put (pt, dindexes, n_dindexes, value);
4791 pivot_table_submit (pt);
4795 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4797 enum pivot_axis_type label_pos = t->label_axis[a];
4801 t->clabels_from_axis = a;
4803 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4804 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4806 const struct ctables_stack *stack = &t->stacks[a];
4810 const struct ctables_nest *n0 = &stack->nests[0];
4813 assert (stack->n == 1);
4817 const struct variable *v0 = n0->vars[n0->n - 1];
4818 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4819 t->clabels_example = v0;
4821 for (size_t i = 0; i < c0->n_cats; i++)
4822 if (c0->cats[i].type == CCT_FUNCTION)
4824 msg (SE, _("%s=%s is not allowed with sorting based "
4825 "on a summary function."),
4826 subcommand_name, pos_name);
4829 if (n0->n - 1 == n0->scale_idx)
4831 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4832 "but %s is a scale variable."),
4833 subcommand_name, pos_name, var_get_name (v0));
4837 for (size_t i = 1; i < stack->n; i++)
4839 const struct ctables_nest *ni = &stack->nests[i];
4841 const struct variable *vi = ni->vars[ni->n - 1];
4842 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4844 if (ni->n - 1 == ni->scale_idx)
4846 msg (SE, _("%s=%s requires the variables to be moved to be "
4847 "categorical, but %s is a scale variable."),
4848 subcommand_name, pos_name, var_get_name (vi));
4851 if (var_get_width (v0) != var_get_width (vi))
4853 msg (SE, _("%s=%s requires the variables to be "
4854 "moved to have the same width, but %s has "
4855 "width %d and %s has width %d."),
4856 subcommand_name, pos_name,
4857 var_get_name (v0), var_get_width (v0),
4858 var_get_name (vi), var_get_width (vi));
4861 if (!val_labs_equal (var_get_value_labels (v0),
4862 var_get_value_labels (vi)))
4864 msg (SE, _("%s=%s requires the variables to be "
4865 "moved to have the same value labels, but %s "
4866 "and %s have different value labels."),
4867 subcommand_name, pos_name,
4868 var_get_name (v0), var_get_name (vi));
4871 if (!ctables_categories_equal (c0, ci))
4873 msg (SE, _("%s=%s requires the variables to be "
4874 "moved to have the same category "
4875 "specifications, but %s and %s have different "
4876 "category specifications."),
4877 subcommand_name, pos_name,
4878 var_get_name (v0), var_get_name (vi));
4887 add_sum_var (struct variable *var,
4888 struct variable ***sum_vars, size_t *n, size_t *allocated)
4890 for (size_t i = 0; i < *n; i++)
4891 if (var == (*sum_vars)[i])
4894 if (*n >= *allocated)
4895 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4896 (*sum_vars)[*n] = var;
4901 enumerate_sum_vars (const struct ctables_axis *a,
4902 struct variable ***sum_vars, size_t *n, size_t *allocated)
4910 for (size_t i = 0; i < N_CSVS; i++)
4911 for (size_t j = 0; j < a->specs[i].n; j++)
4913 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4914 if (ctables_function_is_pctsum (spec->function))
4915 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4921 for (size_t i = 0; i < 2; i++)
4922 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4928 ctables_prepare_table (struct ctables_table *t)
4930 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4933 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4935 for (size_t j = 0; j < t->stacks[a].n; j++)
4937 struct ctables_nest *nest = &t->stacks[a].nests[j];
4938 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4940 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4941 nest->n_domains[dt] = 0;
4943 for (size_t k = 0; k < nest->n; k++)
4945 if (k == nest->scale_idx)
4954 if (a != PIVOT_AXIS_LAYER)
4961 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4962 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4963 : a == PIVOT_AXIS_ROW)
4965 if (k == nest->n - 1
4966 || (nest->scale_idx == nest->n - 1
4967 && k == nest->n - 2))
4973 if (a == PIVOT_AXIS_COLUMN)
4978 if (a == PIVOT_AXIS_ROW)
4983 nest->domains[dt][nest->n_domains[dt]++] = k;
4990 struct ctables_nest *nest = xmalloc (sizeof *nest);
4991 *nest = (struct ctables_nest) { .n = 0 };
4992 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4994 /* There's no point in moving labels away from an axis that has no
4995 labels, so avoid dealing with the special cases around that. */
4996 t->label_axis[a] = a;
4999 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5000 for (size_t i = 0; i < stack->n; i++)
5002 struct ctables_nest *nest = &stack->nests[i];
5003 if (!nest->specs[CSV_CELL].n)
5005 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
5006 specs->specs = xmalloc (sizeof *specs->specs);
5009 enum ctables_summary_function function
5010 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
5012 *specs->specs = (struct ctables_summary_spec) {
5013 .function = function,
5014 .format = ctables_summary_default_format (function, specs->var),
5017 specs->var = nest->vars[0];
5019 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5020 &nest->specs[CSV_CELL]);
5022 else if (!nest->specs[CSV_TOTAL].n)
5023 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5024 &nest->specs[CSV_CELL]);
5026 if (t->ctables->smissing_listwise)
5028 struct variable **listwise_vars = NULL;
5030 size_t allocated = 0;
5032 for (size_t j = nest->group_head; j < stack->n; j++)
5034 const struct ctables_nest *other_nest = &stack->nests[j];
5035 if (other_nest->group_head != nest->group_head)
5038 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5041 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5042 sizeof *listwise_vars);
5043 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5046 for (size_t j = 0; j < N_CSVS; j++)
5048 nest->specs[j].listwise_vars = listwise_vars;
5049 nest->specs[j].n_listwise_vars = n;
5054 struct ctables_summary_spec_set *merged = &t->summary_specs;
5055 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5057 for (size_t j = 0; j < stack->n; j++)
5059 const struct ctables_nest *nest = &stack->nests[j];
5061 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5062 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5067 struct merge_item min = items[0];
5068 for (size_t j = 1; j < n_left; j++)
5069 if (merge_item_compare_3way (&items[j], &min) < 0)
5072 if (merged->n >= merged->allocated)
5073 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5074 sizeof *merged->specs);
5075 merged->specs[merged->n++] = min.set->specs[min.ofs];
5077 for (size_t j = 0; j < n_left; )
5079 if (merge_item_compare_3way (&items[j], &min) == 0)
5081 struct merge_item *item = &items[j];
5082 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5083 if (++item->ofs >= item->set->n)
5085 items[j] = items[--n_left];
5095 for (size_t j = 0; j < merged->n; j++)
5096 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5098 for (size_t j = 0; j < stack->n; j++)
5100 const struct ctables_nest *nest = &stack->nests[j];
5101 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5103 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5104 for (size_t k = 0; k < specs->n; k++)
5105 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5106 specs->specs[k].axis_idx);
5112 size_t allocated_sum_vars = 0;
5113 enumerate_sum_vars (t->axes[t->summary_axis],
5114 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5116 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5117 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5121 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5122 enum pivot_axis_type a)
5124 struct ctables_stack *stack = &t->stacks[a];
5125 for (size_t i = 0; i < stack->n; i++)
5127 const struct ctables_nest *nest = &stack->nests[i];
5128 const struct variable *var = nest->vars[nest->n - 1];
5129 const union value *value = case_data (c, var);
5131 if (var_is_numeric (var) && value->f == SYSMIS)
5134 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5136 ctables_value_insert (t, value, var_get_width (var));
5141 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5143 const struct ctables_value *const *ap = a_;
5144 const struct ctables_value *const *bp = b_;
5145 const struct ctables_value *a = *ap;
5146 const struct ctables_value *b = *bp;
5147 const int *width = width_;
5148 return value_compare_3way (&a->value, &b->value, *width);
5152 ctables_sort_clabels_values (struct ctables_table *t)
5154 const struct variable *v0 = t->clabels_example;
5155 int width = var_get_width (v0);
5157 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5160 const struct val_labs *val_labs = var_get_value_labels (v0);
5161 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5162 vl = val_labs_next (val_labs, vl))
5163 if (ctables_categories_match (c0, &vl->value, v0))
5164 ctables_value_insert (t, &vl->value, width);
5167 size_t n = hmap_count (&t->clabels_values_map);
5168 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5170 struct ctables_value *clv;
5172 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5173 t->clabels_values[i++] = clv;
5174 t->n_clabels_values = n;
5177 sort (t->clabels_values, n, sizeof *t->clabels_values,
5178 compare_clabels_values_3way, &width);
5180 for (size_t i = 0; i < n; i++)
5181 t->clabels_values[i]->leaf = i;
5185 ctables_add_category_occurrences (const struct variable *var,
5186 struct hmap *occurrences,
5187 const struct ctables_categories *cats)
5189 const struct val_labs *val_labs = var_get_value_labels (var);
5191 for (size_t i = 0; i < cats->n_cats; i++)
5193 const struct ctables_category *c = &cats->cats[i];
5197 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5203 int width = var_get_width (var);
5205 value_init (&value, width);
5206 value_copy_buf_rpad (&value, width,
5207 CHAR_CAST (uint8_t *, c->string.string),
5208 c->string.length, ' ');
5209 ctables_add_occurrence (var, &value, occurrences);
5210 value_destroy (&value, width);
5215 assert (var_is_numeric (var));
5216 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5217 vl = val_labs_next (val_labs, vl))
5218 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5219 ctables_add_occurrence (var, &vl->value, occurrences);
5223 assert (var_is_alpha (var));
5224 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5225 vl = val_labs_next (val_labs, vl))
5226 if (in_string_range (&vl->value, var, c->srange))
5227 ctables_add_occurrence (var, &vl->value, occurrences);
5231 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5232 vl = val_labs_next (val_labs, vl))
5233 if (var_is_value_missing (var, &vl->value))
5234 ctables_add_occurrence (var, &vl->value, occurrences);
5238 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5239 vl = val_labs_next (val_labs, vl))
5240 ctables_add_occurrence (var, &vl->value, occurrences);
5243 case CCT_POSTCOMPUTE:
5253 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5254 vl = val_labs_next (val_labs, vl))
5255 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5256 ctables_add_occurrence (var, &vl->value, occurrences);
5259 case CCT_EXCLUDED_MISSING:
5266 ctables_section_recurse_add_empty_categories (
5267 struct ctables_section *s,
5268 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5269 enum pivot_axis_type a, size_t a_idx)
5271 if (a >= PIVOT_N_AXES)
5272 ctables_cell_insert__ (s, c, cats);
5273 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5274 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5277 const struct variable *var = s->nests[a]->vars[a_idx];
5278 const struct ctables_categories *categories = s->table->categories[
5279 var_get_dict_index (var)];
5280 int width = var_get_width (var);
5281 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5282 const struct ctables_occurrence *o;
5283 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5285 union value *value = case_data_rw (c, var);
5286 value_destroy (value, width);
5287 value_clone (value, &o->value, width);
5288 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5289 assert (cats[a][a_idx] != NULL);
5290 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5293 for (size_t i = 0; i < categories->n_cats; i++)
5295 const struct ctables_category *cat = &categories->cats[i];
5296 if (cat->type == CCT_POSTCOMPUTE)
5298 cats[a][a_idx] = cat;
5299 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5306 ctables_section_add_empty_categories (struct ctables_section *s)
5308 bool show_empty = false;
5309 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5311 for (size_t k = 0; k < s->nests[a]->n; k++)
5312 if (k != s->nests[a]->scale_idx)
5314 const struct variable *var = s->nests[a]->vars[k];
5315 const struct ctables_categories *cats = s->table->categories[
5316 var_get_dict_index (var)];
5317 if (cats->show_empty)
5320 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5326 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5327 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5328 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5333 ctables_section_clear (struct ctables_section *s)
5335 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5337 const struct ctables_nest *nest = s->nests[a];
5338 for (size_t i = 0; i < nest->n; i++)
5339 if (i != nest->scale_idx)
5341 const struct variable *var = nest->vars[i];
5342 int width = var_get_width (var);
5343 struct ctables_occurrence *o, *next;
5344 struct hmap *map = &s->occurrences[a][i];
5345 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5347 value_destroy (&o->value, width);
5348 hmap_delete (map, &o->node);
5355 struct ctables_cell *cell, *next_cell;
5356 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5358 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5360 const struct ctables_nest *nest = s->nests[a];
5361 for (size_t i = 0; i < nest->n; i++)
5362 if (i != nest->scale_idx)
5363 value_destroy (&cell->axes[a].cvs[i].value,
5364 var_get_width (nest->vars[i]));
5365 free (cell->axes[a].cvs);
5368 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5369 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5370 for (size_t i = 0; i < specs->n; i++)
5371 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5372 free (cell->summaries);
5374 hmap_delete (&s->cells, &cell->node);
5377 hmap_shrink (&s->cells);
5379 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5381 struct ctables_domain *domain, *next_domain;
5382 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5385 free (domain->sums);
5386 hmap_delete (&s->domains[dt], &domain->node);
5389 hmap_shrink (&s->domains[dt]);
5394 ctables_section_uninit (struct ctables_section *s)
5396 ctables_section_clear (s);
5398 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5400 struct ctables_nest *nest = s->nests[a];
5401 for (size_t i = 0; i < nest->n; i++)
5402 hmap_destroy (&s->occurrences[a][i]);
5403 free (s->occurrences[a]);
5404 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5406 free (nest->domains[dt]);
5407 nest->domains[dt] = NULL;
5411 hmap_destroy (&s->cells);
5412 for (size_t i = 0; i < N_CTDTS; i++)
5413 hmap_destroy (&s->domains[i]);
5417 ctables_table_clear (struct ctables_table *t)
5419 for (size_t i = 0; i < t->n_sections; i++)
5420 ctables_section_clear (&t->sections[i]);
5422 if (t->clabels_example)
5424 int width = var_get_width (t->clabels_example);
5425 struct ctables_value *value, *next_value;
5426 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5427 &t->clabels_values_map)
5429 value_destroy (&value->value, width);
5430 hmap_delete (&t->clabels_values_map, &value->node);
5433 hmap_shrink (&t->clabels_values_map);
5435 free (t->clabels_values);
5436 t->clabels_values = NULL;
5437 t->n_clabels_values = 0;
5442 ctables_execute (struct dataset *ds, struct casereader *input,
5445 for (size_t i = 0; i < ct->n_tables; i++)
5447 struct ctables_table *t = ct->tables[i];
5448 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5449 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5450 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5451 sizeof *t->sections);
5452 size_t ix[PIVOT_N_AXES];
5453 ctables_table_add_section (t, 0, ix);
5456 struct dictionary *dict = dataset_dict (ds);
5457 struct casegrouper *grouper
5458 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5459 ? casegrouper_create_splits (input, dict)
5460 : casegrouper_create_vars (input, NULL, 0));
5461 struct casereader *group;
5462 while (casegrouper_get_next_group (grouper, &group))
5464 /* Output SPLIT FILE variables. */
5465 struct ccase *c = casereader_peek (group, 0);
5468 output_split_file_values (ds, c);
5472 bool warn_on_invalid = true;
5473 for (c = casereader_read (group); c;
5474 case_unref (c), c = casereader_read (group))
5476 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5477 double e_weight = (ct->e_weight
5478 ? var_force_valid_weight (ct->e_weight,
5479 case_num (c, ct->e_weight),
5483 for (size_t i = 0; i < ct->n_tables; i++)
5485 struct ctables_table *t = ct->tables[i];
5487 for (size_t j = 0; j < t->n_sections; j++)
5488 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5490 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5491 if (t->label_axis[a] != a)
5492 ctables_insert_clabels_values (t, c, a);
5495 casereader_destroy (group);
5497 for (size_t i = 0; i < ct->n_tables; i++)
5499 struct ctables_table *t = ct->tables[i];
5501 if (t->clabels_example)
5502 ctables_sort_clabels_values (t);
5504 for (size_t j = 0; j < t->n_sections; j++)
5505 ctables_section_add_empty_categories (&t->sections[j]);
5507 ctables_table_output (ct, t);
5508 ctables_table_clear (t);
5511 return casegrouper_destroy (grouper);
5516 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5517 struct dictionary *);
5520 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5526 case CTPO_CAT_STRING:
5527 ss_dealloc (&e->string);
5530 case CTPO_CAT_SRANGE:
5531 for (size_t i = 0; i < 2; i++)
5532 ss_dealloc (&e->srange[i]);
5541 for (size_t i = 0; i < 2; i++)
5542 ctables_pcexpr_destroy (e->subs[i]);
5546 case CTPO_CAT_NUMBER:
5547 case CTPO_CAT_NRANGE:
5548 case CTPO_CAT_MISSING:
5549 case CTPO_CAT_OTHERNM:
5550 case CTPO_CAT_SUBTOTAL:
5551 case CTPO_CAT_TOTAL:
5555 msg_location_destroy (e->location);
5560 static struct ctables_pcexpr *
5561 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5562 struct ctables_pcexpr *sub0,
5563 struct ctables_pcexpr *sub1)
5565 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5566 *e = (struct ctables_pcexpr) {
5568 .subs = { sub0, sub1 },
5569 .location = msg_location_merged (sub0->location, sub1->location),
5574 /* How to parse an operator. */
5577 enum token_type token;
5578 enum ctables_postcompute_op op;
5581 static const struct operator *
5582 ctable_pcexpr_match_operator (struct lexer *lexer,
5583 const struct operator ops[], size_t n_ops)
5585 for (const struct operator *op = ops; op < ops + n_ops; op++)
5586 if (lex_token (lexer) == op->token)
5588 if (op->token != T_NEG_NUM)
5597 static struct ctables_pcexpr *
5598 ctable_pcexpr_parse_binary_operators__ (
5599 struct lexer *lexer, struct dictionary *dict,
5600 const struct operator ops[], size_t n_ops,
5601 parse_recursively_func *parse_next_level,
5602 const char *chain_warning, struct ctables_pcexpr *lhs)
5604 for (int op_count = 0; ; op_count++)
5606 const struct operator *op
5607 = ctable_pcexpr_match_operator (lexer, ops, n_ops);
5610 if (op_count > 1 && chain_warning)
5611 msg_at (SW, lhs->location, "%s", chain_warning);
5616 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5619 ctables_pcexpr_destroy (lhs);
5623 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5627 static struct ctables_pcexpr *
5628 ctable_pcexpr_parse_binary_operators (struct lexer *lexer,
5629 struct dictionary *dict,
5630 const struct operator ops[], size_t n_ops,
5631 parse_recursively_func *parse_next_level,
5632 const char *chain_warning)
5634 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5638 return ctable_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5640 chain_warning, lhs);
5643 static struct ctables_pcexpr *ctable_pcexpr_parse_add (struct lexer *,
5644 struct dictionary *);
5646 static struct ctables_pcexpr
5647 ctpo_cat_nrange (double low, double high)
5649 return (struct ctables_pcexpr) {
5650 .op = CTPO_CAT_NRANGE,
5651 .nrange = { low, high },
5655 static struct ctables_pcexpr
5656 ctpo_cat_srange (struct substring low, struct substring high)
5658 return (struct ctables_pcexpr) {
5659 .op = CTPO_CAT_SRANGE,
5660 .srange = { low, high },
5664 static struct ctables_pcexpr *
5665 ctable_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5667 int start_ofs = lex_ofs (lexer);
5668 struct ctables_pcexpr e;
5669 if (lex_is_number (lexer))
5671 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5672 .number = lex_number (lexer) };
5675 else if (lex_match_id (lexer, "MISSING"))
5676 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5677 else if (lex_match_id (lexer, "OTHERNM"))
5678 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5679 else if (lex_match_id (lexer, "TOTAL"))
5680 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5681 else if (lex_match_id (lexer, "SUBTOTAL"))
5683 size_t subtotal_index = 0;
5684 if (lex_match (lexer, T_LBRACK))
5686 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5688 subtotal_index = lex_integer (lexer);
5690 if (!lex_force_match (lexer, T_RBRACK))
5693 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5694 .subtotal_index = subtotal_index };
5696 else if (lex_match (lexer, T_LBRACK))
5698 if (lex_match_id (lexer, "LO"))
5700 if (!lex_force_match_id (lexer, "THRU"))
5703 if (lex_is_string (lexer))
5705 struct substring low = { .string = NULL };
5706 struct substring high = parse_substring (lexer, dict);
5707 e = ctpo_cat_srange (low, high);
5711 if (!lex_force_num (lexer))
5713 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5717 else if (lex_is_number (lexer))
5719 double number = lex_number (lexer);
5721 if (lex_match_id (lexer, "THRU"))
5723 if (lex_match_id (lexer, "HI"))
5724 e = ctpo_cat_nrange (number, DBL_MAX);
5727 if (!lex_force_num (lexer))
5729 e = ctpo_cat_nrange (number, lex_number (lexer));
5734 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5737 else if (lex_is_string (lexer))
5739 struct substring s = parse_substring (lexer, dict);
5741 if (lex_match_id (lexer, "THRU"))
5743 struct substring high;
5745 if (lex_match_id (lexer, "HI"))
5746 high = (struct substring) { .string = NULL };
5749 if (!lex_force_string (lexer))
5754 high = parse_substring (lexer, dict);
5757 e = ctpo_cat_srange (s, high);
5760 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5764 lex_error (lexer, NULL);
5768 if (!lex_force_match (lexer, T_RBRACK))
5770 if (e.op == CTPO_CAT_STRING)
5771 ss_dealloc (&e.string);
5772 else if (e.op == CTPO_CAT_SRANGE)
5774 ss_dealloc (&e.srange[0]);
5775 ss_dealloc (&e.srange[1]);
5780 else if (lex_match (lexer, T_LPAREN))
5782 struct ctables_pcexpr *ep = ctable_pcexpr_parse_add (lexer, dict);
5785 if (!lex_force_match (lexer, T_RPAREN))
5787 ctables_pcexpr_destroy (ep);
5794 lex_error (lexer, NULL);
5798 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5799 return xmemdup (&e, sizeof e);
5802 static struct ctables_pcexpr *
5803 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5804 struct lexer *lexer, int start_ofs)
5806 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5807 *e = (struct ctables_pcexpr) {
5810 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5815 static struct ctables_pcexpr *
5816 ctable_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5818 static const struct operator op = { T_EXP, CTPO_POW };
5820 const char *chain_warning =
5821 _("The exponentiation operator (`**') is left-associative: "
5822 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5823 "To disable this warning, insert parentheses.");
5825 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5826 return ctable_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5827 ctable_pcexpr_parse_primary,
5830 /* Special case for situations like "-5**6", which must be parsed as
5833 int start_ofs = lex_ofs (lexer);
5834 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5835 *lhs = (struct ctables_pcexpr) {
5836 .op = CTPO_CONSTANT,
5837 .number = -lex_tokval (lexer),
5838 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5842 struct ctables_pcexpr *node = ctable_pcexpr_parse_binary_operators__ (
5843 lexer, dict, &op, 1,
5844 ctable_pcexpr_parse_primary, chain_warning, lhs);
5848 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5851 /* Parses the unary minus level. */
5852 static struct ctables_pcexpr *
5853 ctable_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5855 int start_ofs = lex_ofs (lexer);
5856 if (!lex_match (lexer, T_DASH))
5857 return ctable_pcexpr_parse_exp (lexer, dict);
5859 struct ctables_pcexpr *inner = ctable_pcexpr_parse_neg (lexer, dict);
5863 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5866 /* Parses the multiplication and division level. */
5867 static struct ctables_pcexpr *
5868 ctable_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5870 static const struct operator ops[] =
5872 { T_ASTERISK, CTPO_MUL },
5873 { T_SLASH, CTPO_DIV },
5876 return ctable_pcexpr_parse_binary_operators (lexer, dict, ops,
5877 sizeof ops / sizeof *ops,
5878 ctable_pcexpr_parse_neg, NULL);
5881 /* Parses the addition and subtraction level. */
5882 static struct ctables_pcexpr *
5883 ctable_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5885 static const struct operator ops[] =
5887 { T_PLUS, CTPO_ADD },
5888 { T_DASH, CTPO_SUB },
5889 { T_NEG_NUM, CTPO_ADD },
5892 return ctable_pcexpr_parse_binary_operators (lexer, dict,
5893 ops, sizeof ops / sizeof *ops,
5894 ctable_pcexpr_parse_mul, NULL);
5897 static struct ctables_postcompute *
5898 ctables_find_postcompute (struct ctables *ct, const char *name)
5900 struct ctables_postcompute *pc;
5901 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5902 utf8_hash_case_string (name, 0), &ct->postcomputes)
5903 if (!utf8_strcasecmp (pc->name, name))
5909 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5912 int pcompute_start = lex_ofs (lexer) - 1;
5914 if (!lex_match (lexer, T_AND))
5916 lex_error_expecting (lexer, "&");
5919 if (!lex_force_id (lexer))
5922 char *name = ss_xstrdup (lex_tokss (lexer));
5925 if (!lex_force_match (lexer, T_EQUALS)
5926 || !lex_force_match_id (lexer, "EXPR")
5927 || !lex_force_match (lexer, T_LPAREN))
5933 int expr_start = lex_ofs (lexer);
5934 struct ctables_pcexpr *expr = ctable_pcexpr_parse_add (lexer, dict);
5935 int expr_end = lex_ofs (lexer) - 1;
5936 if (!expr || !lex_force_match (lexer, T_RPAREN))
5941 int pcompute_end = lex_ofs (lexer) - 1;
5943 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5946 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5949 msg_at (SW, location, _("New definition of &%s will override the "
5950 "previous definition."),
5952 msg_at (SN, pc->location, _("This is the previous definition."));
5954 ctables_pcexpr_destroy (pc->expr);
5955 msg_location_destroy (pc->location);
5960 pc = xmalloc (sizeof *pc);
5961 *pc = (struct ctables_postcompute) { .name = name };
5962 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5963 utf8_hash_case_string (pc->name, 0));
5966 pc->location = location;
5968 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5973 ctables_parse_pproperties_format (struct lexer *lexer,
5974 struct ctables_summary_spec_set *sss)
5976 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5978 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5979 && !(lex_token (lexer) == T_ID
5980 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5981 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5982 lex_tokss (lexer)))))
5984 /* Parse function. */
5985 enum ctables_summary_function function;
5986 if (!parse_ctables_summary_function (lexer, &function))
5989 /* Parse percentile. */
5990 double percentile = 0;
5991 if (function == CTSF_PTILE)
5993 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5995 percentile = lex_number (lexer);
6000 struct fmt_spec format;
6001 bool is_ctables_format;
6002 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
6005 if (sss->n >= sss->allocated)
6006 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
6007 sizeof *sss->specs);
6008 sss->specs[sss->n++] = (struct ctables_summary_spec) {
6009 .function = function,
6010 .percentile = percentile,
6012 .is_ctables_format = is_ctables_format,
6018 ctables_summary_spec_set_uninit (sss);
6023 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
6025 struct ctables_postcompute **pcs = NULL;
6027 size_t allocated_pcs = 0;
6029 while (lex_match (lexer, T_AND))
6031 if (!lex_force_id (lexer))
6033 struct ctables_postcompute *pc
6034 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
6037 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
6042 if (n_pcs >= allocated_pcs)
6043 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
6047 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6049 if (lex_match_id (lexer, "LABEL"))
6051 lex_match (lexer, T_EQUALS);
6052 if (!lex_force_string (lexer))
6055 for (size_t i = 0; i < n_pcs; i++)
6057 free (pcs[i]->label);
6058 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6063 else if (lex_match_id (lexer, "FORMAT"))
6065 lex_match (lexer, T_EQUALS);
6067 struct ctables_summary_spec_set sss;
6068 if (!ctables_parse_pproperties_format (lexer, &sss))
6071 for (size_t i = 0; i < n_pcs; i++)
6074 ctables_summary_spec_set_uninit (pcs[i]->specs);
6076 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6077 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6079 ctables_summary_spec_set_uninit (&sss);
6081 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6083 lex_match (lexer, T_EQUALS);
6084 bool hide_source_cats;
6085 if (!parse_bool (lexer, &hide_source_cats))
6087 for (size_t i = 0; i < n_pcs; i++)
6088 pcs[i]->hide_source_cats = hide_source_cats;
6092 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6105 put_strftime (struct string *out, time_t now, const char *format)
6107 const struct tm *tm = localtime (&now);
6109 strftime (value, sizeof value, format, tm);
6110 ds_put_cstr (out, value);
6114 skip_prefix (struct substring *s, struct substring prefix)
6116 if (ss_starts_with (*s, prefix))
6118 ss_advance (s, prefix.length);
6126 put_table_expression (struct string *out, struct lexer *lexer,
6127 struct dictionary *dict, int expr_start, int expr_end)
6130 for (int ofs = expr_start; ofs < expr_end; ofs++)
6132 const struct token *t = lex_ofs_token (lexer, ofs);
6133 if (t->type == T_LBRACK)
6135 else if (t->type == T_RBRACK && nest > 0)
6141 else if (t->type == T_ID)
6143 const struct variable *var
6144 = dict_lookup_var (dict, t->string.string);
6145 const char *label = var ? var_get_label (var) : NULL;
6146 ds_put_cstr (out, label ? label : t->string.string);
6150 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6151 ds_put_byte (out, ' ');
6153 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6154 ds_put_cstr (out, repr);
6157 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6158 ds_put_byte (out, ' ');
6164 put_title_text (struct string *out, struct substring in, time_t now,
6165 struct lexer *lexer, struct dictionary *dict,
6166 int expr_start, int expr_end)
6170 size_t chunk = ss_find_byte (in, ')');
6171 ds_put_substring (out, ss_head (in, chunk));
6172 ss_advance (&in, chunk);
6173 if (ss_is_empty (in))
6176 if (skip_prefix (&in, ss_cstr (")DATE")))
6177 put_strftime (out, now, "%x");
6178 else if (skip_prefix (&in, ss_cstr (")TIME")))
6179 put_strftime (out, now, "%X");
6180 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6181 put_table_expression (out, lexer, dict, expr_start, expr_end);
6184 ds_put_byte (out, ')');
6185 ss_advance (&in, 1);
6191 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6193 struct casereader *input = NULL;
6195 struct measure_guesser *mg = measure_guesser_create (ds);
6198 input = proc_open (ds);
6199 measure_guesser_run (mg, input);
6200 measure_guesser_destroy (mg);
6203 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6204 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6205 enum settings_value_show tvars = settings_get_show_variables ();
6206 for (size_t i = 0; i < n_vars; i++)
6207 vlabels[i] = (enum ctables_vlabel) tvars;
6209 struct pivot_table_look *look = pivot_table_look_unshare (
6210 pivot_table_look_ref (pivot_table_look_get_default ()));
6211 look->omit_empty = false;
6213 struct ctables *ct = xmalloc (sizeof *ct);
6214 *ct = (struct ctables) {
6215 .dict = dataset_dict (ds),
6217 .ctables_formats = FMT_SETTINGS_INIT,
6219 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6222 time_t now = time (NULL);
6227 const char *dot_string;
6228 const char *comma_string;
6230 static const struct ctf ctfs[4] = {
6231 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6232 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6233 { CTEF_PAREN, "-,(,),", "-.(.)." },
6234 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6236 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6237 for (size_t i = 0; i < 4; i++)
6239 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6240 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6241 fmt_number_style_from_string (s));
6244 if (!lex_force_match (lexer, T_SLASH))
6247 while (!lex_match_id (lexer, "TABLE"))
6249 if (lex_match_id (lexer, "FORMAT"))
6251 double widths[2] = { SYSMIS, SYSMIS };
6252 double units_per_inch = 72.0;
6254 while (lex_token (lexer) != T_SLASH)
6256 if (lex_match_id (lexer, "MINCOLWIDTH"))
6258 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6261 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6263 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6266 else if (lex_match_id (lexer, "UNITS"))
6268 lex_match (lexer, T_EQUALS);
6269 if (lex_match_id (lexer, "POINTS"))
6270 units_per_inch = 72.0;
6271 else if (lex_match_id (lexer, "INCHES"))
6272 units_per_inch = 1.0;
6273 else if (lex_match_id (lexer, "CM"))
6274 units_per_inch = 2.54;
6277 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6281 else if (lex_match_id (lexer, "EMPTY"))
6286 lex_match (lexer, T_EQUALS);
6287 if (lex_match_id (lexer, "ZERO"))
6289 /* Nothing to do. */
6291 else if (lex_match_id (lexer, "BLANK"))
6292 ct->zero = xstrdup ("");
6293 else if (lex_force_string (lexer))
6295 ct->zero = ss_xstrdup (lex_tokss (lexer));
6301 else if (lex_match_id (lexer, "MISSING"))
6303 lex_match (lexer, T_EQUALS);
6304 if (!lex_force_string (lexer))
6308 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6309 ? ss_xstrdup (lex_tokss (lexer))
6315 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6316 "UNITS", "EMPTY", "MISSING");
6321 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6322 && widths[0] > widths[1])
6324 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6328 for (size_t i = 0; i < 2; i++)
6329 if (widths[i] != SYSMIS)
6331 int *wr = ct->look->width_ranges[TABLE_HORZ];
6332 wr[i] = widths[i] / units_per_inch * 96.0;
6337 else if (lex_match_id (lexer, "VLABELS"))
6339 if (!lex_force_match_id (lexer, "VARIABLES"))
6341 lex_match (lexer, T_EQUALS);
6343 struct variable **vars;
6345 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6349 if (!lex_force_match_id (lexer, "DISPLAY"))
6354 lex_match (lexer, T_EQUALS);
6356 enum ctables_vlabel vlabel;
6357 if (lex_match_id (lexer, "DEFAULT"))
6358 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6359 else if (lex_match_id (lexer, "NAME"))
6361 else if (lex_match_id (lexer, "LABEL"))
6362 vlabel = CTVL_LABEL;
6363 else if (lex_match_id (lexer, "BOTH"))
6365 else if (lex_match_id (lexer, "NONE"))
6369 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6375 for (size_t i = 0; i < n_vars; i++)
6376 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6379 else if (lex_match_id (lexer, "MRSETS"))
6381 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6383 lex_match (lexer, T_EQUALS);
6384 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6387 else if (lex_match_id (lexer, "SMISSING"))
6389 if (lex_match_id (lexer, "VARIABLE"))
6390 ct->smissing_listwise = false;
6391 else if (lex_match_id (lexer, "LISTWISE"))
6392 ct->smissing_listwise = true;
6395 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6399 else if (lex_match_id (lexer, "PCOMPUTE"))
6401 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6404 else if (lex_match_id (lexer, "PPROPERTIES"))
6406 if (!ctables_parse_pproperties (lexer, ct))
6409 else if (lex_match_id (lexer, "WEIGHT"))
6411 if (!lex_force_match_id (lexer, "VARIABLE"))
6413 lex_match (lexer, T_EQUALS);
6414 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6418 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6420 if (lex_match_id (lexer, "COUNT"))
6422 lex_match (lexer, T_EQUALS);
6423 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6426 ct->hide_threshold = lex_integer (lexer);
6429 else if (ct->hide_threshold == 0)
6430 ct->hide_threshold = 5;
6434 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6435 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6436 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6440 if (!lex_force_match (lexer, T_SLASH))
6444 size_t allocated_tables = 0;
6447 if (ct->n_tables >= allocated_tables)
6448 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6449 sizeof *ct->tables);
6451 struct ctables_category *cat = xmalloc (sizeof *cat);
6452 *cat = (struct ctables_category) {
6454 .include_missing = false,
6455 .sort_ascending = true,
6458 struct ctables_categories *c = xmalloc (sizeof *c);
6459 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6460 *c = (struct ctables_categories) {
6467 struct ctables_categories **categories = xnmalloc (n_vars,
6468 sizeof *categories);
6469 for (size_t i = 0; i < n_vars; i++)
6472 struct ctables_table *t = xmalloc (sizeof *t);
6473 *t = (struct ctables_table) {
6475 .slabels_axis = PIVOT_AXIS_COLUMN,
6476 .slabels_visible = true,
6477 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6479 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6480 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6481 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6483 .clabels_from_axis = PIVOT_AXIS_LAYER,
6484 .categories = categories,
6485 .n_categories = n_vars,
6488 ct->tables[ct->n_tables++] = t;
6490 lex_match (lexer, T_EQUALS);
6491 int expr_start = lex_ofs (lexer);
6492 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6494 if (lex_match (lexer, T_BY))
6496 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6497 ct, t, PIVOT_AXIS_COLUMN))
6500 if (lex_match (lexer, T_BY))
6502 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6503 ct, t, PIVOT_AXIS_LAYER))
6507 int expr_end = lex_ofs (lexer);
6509 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6510 && !t->axes[PIVOT_AXIS_LAYER])
6512 lex_error (lexer, _("At least one variable must be specified."));
6516 const struct ctables_axis *scales[PIVOT_N_AXES];
6517 size_t n_scales = 0;
6518 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6520 scales[a] = find_scale (t->axes[a]);
6526 msg (SE, _("Scale variables may appear only on one axis."));
6527 if (scales[PIVOT_AXIS_ROW])
6528 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6529 _("This scale variable appears on the rows axis."));
6530 if (scales[PIVOT_AXIS_COLUMN])
6531 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6532 _("This scale variable appears on the columns axis."));
6533 if (scales[PIVOT_AXIS_LAYER])
6534 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6535 _("This scale variable appears on the layer axis."));
6539 const struct ctables_axis *summaries[PIVOT_N_AXES];
6540 size_t n_summaries = 0;
6541 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6543 summaries[a] = (scales[a]
6545 : find_categorical_summary_spec (t->axes[a]));
6549 if (n_summaries > 1)
6551 msg (SE, _("Summaries may appear only on one axis."));
6552 if (summaries[PIVOT_AXIS_ROW])
6553 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
6554 _("This variable on the rows axis has a summary."));
6555 if (summaries[PIVOT_AXIS_COLUMN])
6556 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
6557 _("This variable on the columns axis has a summary."));
6558 if (summaries[PIVOT_AXIS_LAYER])
6559 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
6560 _("This variable on the layers axis has a summary."));
6563 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6564 if (n_summaries ? summaries[a] : t->axes[a])
6566 t->summary_axis = a;
6570 if (lex_token (lexer) == T_ENDCMD)
6572 if (!ctables_prepare_table (t))
6576 if (!lex_force_match (lexer, T_SLASH))
6579 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6581 if (lex_match_id (lexer, "SLABELS"))
6583 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6585 if (lex_match_id (lexer, "POSITION"))
6587 lex_match (lexer, T_EQUALS);
6588 if (lex_match_id (lexer, "COLUMN"))
6589 t->slabels_axis = PIVOT_AXIS_COLUMN;
6590 else if (lex_match_id (lexer, "ROW"))
6591 t->slabels_axis = PIVOT_AXIS_ROW;
6592 else if (lex_match_id (lexer, "LAYER"))
6593 t->slabels_axis = PIVOT_AXIS_LAYER;
6596 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6600 else if (lex_match_id (lexer, "VISIBLE"))
6602 lex_match (lexer, T_EQUALS);
6603 if (!parse_bool (lexer, &t->slabels_visible))
6608 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6613 else if (lex_match_id (lexer, "CLABELS"))
6615 if (lex_match_id (lexer, "AUTO"))
6617 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6618 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6620 else if (lex_match_id (lexer, "ROWLABELS"))
6622 lex_match (lexer, T_EQUALS);
6623 if (lex_match_id (lexer, "OPPOSITE"))
6624 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6625 else if (lex_match_id (lexer, "LAYER"))
6626 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6629 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6633 else if (lex_match_id (lexer, "COLLABELS"))
6635 lex_match (lexer, T_EQUALS);
6636 if (lex_match_id (lexer, "OPPOSITE"))
6637 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6638 else if (lex_match_id (lexer, "LAYER"))
6639 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6642 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6648 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6653 else if (lex_match_id (lexer, "CRITERIA"))
6655 if (!lex_force_match_id (lexer, "CILEVEL"))
6657 lex_match (lexer, T_EQUALS);
6659 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6661 t->cilevel = lex_number (lexer);
6664 else if (lex_match_id (lexer, "CATEGORIES"))
6666 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6670 else if (lex_match_id (lexer, "TITLES"))
6675 if (lex_match_id (lexer, "CAPTION"))
6676 textp = &t->caption;
6677 else if (lex_match_id (lexer, "CORNER"))
6679 else if (lex_match_id (lexer, "TITLE"))
6683 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6686 lex_match (lexer, T_EQUALS);
6688 struct string s = DS_EMPTY_INITIALIZER;
6689 while (lex_is_string (lexer))
6691 if (!ds_is_empty (&s))
6692 ds_put_byte (&s, ' ');
6693 put_title_text (&s, lex_tokss (lexer), now,
6694 lexer, dataset_dict (ds),
6695 expr_start, expr_end);
6699 *textp = ds_steal_cstr (&s);
6701 while (lex_token (lexer) != T_SLASH
6702 && lex_token (lexer) != T_ENDCMD);
6704 else if (lex_match_id (lexer, "SIGTEST"))
6708 t->chisq = xmalloc (sizeof *t->chisq);
6709 *t->chisq = (struct ctables_chisq) {
6711 .include_mrsets = true,
6712 .all_visible = true,
6718 if (lex_match_id (lexer, "TYPE"))
6720 lex_match (lexer, T_EQUALS);
6721 if (!lex_force_match_id (lexer, "CHISQUARE"))
6724 else if (lex_match_id (lexer, "ALPHA"))
6726 lex_match (lexer, T_EQUALS);
6727 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6729 t->chisq->alpha = lex_number (lexer);
6732 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6734 lex_match (lexer, T_EQUALS);
6735 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6738 else if (lex_match_id (lexer, "CATEGORIES"))
6740 lex_match (lexer, T_EQUALS);
6741 if (lex_match_id (lexer, "ALLVISIBLE"))
6742 t->chisq->all_visible = true;
6743 else if (lex_match_id (lexer, "SUBTOTALS"))
6744 t->chisq->all_visible = false;
6747 lex_error_expecting (lexer,
6748 "ALLVISIBLE", "SUBTOTALS");
6754 lex_error_expecting (lexer, "TYPE", "ALPHA",
6755 "INCLUDEMRSETS", "CATEGORIES");
6759 while (lex_token (lexer) != T_SLASH
6760 && lex_token (lexer) != T_ENDCMD);
6762 else if (lex_match_id (lexer, "COMPARETEST"))
6766 t->pairwise = xmalloc (sizeof *t->pairwise);
6767 *t->pairwise = (struct ctables_pairwise) {
6769 .alpha = { .05, .05 },
6770 .adjust = BONFERRONI,
6771 .include_mrsets = true,
6772 .meansvariance_allcats = true,
6773 .all_visible = true,
6782 if (lex_match_id (lexer, "TYPE"))
6784 lex_match (lexer, T_EQUALS);
6785 if (lex_match_id (lexer, "PROP"))
6786 t->pairwise->type = PROP;
6787 else if (lex_match_id (lexer, "MEAN"))
6788 t->pairwise->type = MEAN;
6791 lex_error_expecting (lexer, "PROP", "MEAN");
6795 else if (lex_match_id (lexer, "ALPHA"))
6797 lex_match (lexer, T_EQUALS);
6799 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6801 double a0 = lex_number (lexer);
6804 lex_match (lexer, T_COMMA);
6805 if (lex_is_number (lexer))
6807 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6809 double a1 = lex_number (lexer);
6812 t->pairwise->alpha[0] = MIN (a0, a1);
6813 t->pairwise->alpha[1] = MAX (a0, a1);
6816 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6818 else if (lex_match_id (lexer, "ADJUST"))
6820 lex_match (lexer, T_EQUALS);
6821 if (lex_match_id (lexer, "BONFERRONI"))
6822 t->pairwise->adjust = BONFERRONI;
6823 else if (lex_match_id (lexer, "BH"))
6824 t->pairwise->adjust = BH;
6825 else if (lex_match_id (lexer, "NONE"))
6826 t->pairwise->adjust = 0;
6829 lex_error_expecting (lexer, "BONFERRONI", "BH",
6834 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6836 lex_match (lexer, T_EQUALS);
6837 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6840 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6842 lex_match (lexer, T_EQUALS);
6843 if (lex_match_id (lexer, "ALLCATS"))
6844 t->pairwise->meansvariance_allcats = true;
6845 else if (lex_match_id (lexer, "TESTEDCATS"))
6846 t->pairwise->meansvariance_allcats = false;
6849 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6853 else if (lex_match_id (lexer, "CATEGORIES"))
6855 lex_match (lexer, T_EQUALS);
6856 if (lex_match_id (lexer, "ALLVISIBLE"))
6857 t->pairwise->all_visible = true;
6858 else if (lex_match_id (lexer, "SUBTOTALS"))
6859 t->pairwise->all_visible = false;
6862 lex_error_expecting (lexer, "ALLVISIBLE",
6867 else if (lex_match_id (lexer, "MERGE"))
6869 lex_match (lexer, T_EQUALS);
6870 if (!parse_bool (lexer, &t->pairwise->merge))
6873 else if (lex_match_id (lexer, "STYLE"))
6875 lex_match (lexer, T_EQUALS);
6876 if (lex_match_id (lexer, "APA"))
6877 t->pairwise->apa_style = true;
6878 else if (lex_match_id (lexer, "SIMPLE"))
6879 t->pairwise->apa_style = false;
6882 lex_error_expecting (lexer, "APA", "SIMPLE");
6886 else if (lex_match_id (lexer, "SHOWSIG"))
6888 lex_match (lexer, T_EQUALS);
6889 if (!parse_bool (lexer, &t->pairwise->show_sig))
6894 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6895 "INCLUDEMRSETS", "MEANSVARIANCE",
6896 "CATEGORIES", "MERGE", "STYLE",
6901 while (lex_token (lexer) != T_SLASH
6902 && lex_token (lexer) != T_ENDCMD);
6906 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6907 "CRITERIA", "CATEGORIES", "TITLES",
6908 "SIGTEST", "COMPARETEST");
6912 if (!lex_match (lexer, T_SLASH))
6916 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6917 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6919 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6923 if (!ctables_prepare_table (t))
6926 while (lex_token (lexer) != T_ENDCMD);
6929 input = proc_open (ds);
6930 bool ok = ctables_execute (ds, input, ct);
6931 ok = proc_commit (ds) && ok;
6933 ctables_destroy (ct);
6934 return ok ? CMD_SUCCESS : CMD_FAILURE;
6939 ctables_destroy (ct);