1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 /* All variables. */ \
74 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
75 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
76 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
94 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
95 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
96 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
98 /* All variables (unweighted.) */ \
99 S(CTSF_UCOUNT, "UCOUNT", N_("Unweighted Count"), CTF_COUNT, CTFA_ALL) \
100 S(CTSF_UROWPCT_COUNT, "UROWPCT.COUNT", N_("Unweighted Row %"), CTF_PERCENT, CTFA_ALL) \
101 S(CTSF_UCOLPCT_COUNT, "UCOLPCT.COUNT", N_("Unweighted Column %"), CTF_PERCENT, CTFA_ALL) \
102 S(CTSF_UTABLEPCT_COUNT, "UTABLEPCT.COUNT", N_("Unweighted Table %"), CTF_PERCENT, CTFA_ALL) \
103 S(CTSF_USUBTABLEPCT_COUNT, "USUBTABLEPCT.COUNT", N_("Unweighted Subtable %"), CTF_PERCENT, CTFA_ALL) \
104 S(CTSF_ULAYERPCT_COUNT, "ULAYERPCT.COUNT", N_("Unweighted Layer %"), CTF_PERCENT, CTFA_ALL) \
105 S(CTSF_ULAYERROWPCT_COUNT, "ULAYERROWPCT.COUNT", N_("Unweighted Layer Row %"), CTF_PERCENT, CTFA_ALL) \
106 S(CTSF_ULAYERCOLPCT_COUNT, "ULAYERCOLPCT.COUNT", N_("Unweighted Layer Column %"), CTF_PERCENT, CTFA_ALL) \
107 S(CTSF_UROWPCT_VALIDN, "UROWPCT.VALIDN", N_("Unweighted Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
108 S(CTSF_UCOLPCT_VALIDN, "UCOLPCT.VALIDN", N_("Unweighted Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
109 S(CTSF_UTABLEPCT_VALIDN, "UTABLEPCT.VALIDN", N_("Unweighted Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
110 S(CTSF_USUBTABLEPCT_VALIDN, "USUBTABLEPCT.VALIDN", N_("Unweighted Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
111 S(CTSF_ULAYERPCT_VALIDN, "ULAYERPCT.VALIDN", N_("Unweighted Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
112 S(CTSF_ULAYERROWPCT_VALIDN, "ULAYERROWPCT.VALIDN", N_("Unweighted Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
113 S(CTSF_ULAYERCOLPCT_VALIDN, "ULAYERCOLPCT.VALIDN", N_("Unweighted Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
114 S(CTSF_UROWPCT_TOTALN, "UROWPCT.TOTALN", N_("Unweighted Row Total N %"), CTF_PERCENT, CTFA_ALL) \
115 S(CTSF_UCOLPCT_TOTALN, "UCOLPCT.TOTALN", N_("Unweighted Column Total N %"), CTF_PERCENT, CTFA_ALL) \
116 S(CTSF_UTABLEPCT_TOTALN, "UTABLEPCT.TOTALN", N_("Unweighted Table Total N %"), CTF_PERCENT, CTFA_ALL) \
117 S(CTSF_USUBTABLEPCT_TOTALN, "USUBTABLEPCT.TOTALN", N_("Unweighted Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
118 S(CTSF_ULAYERPCT_TOTALN, "ULAYERPCT.TOTALN", N_("Unweighted Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
119 S(CTSF_ULAYERROWPCT_TOTALN, "ULAYERROWPCT.TOTALN", N_("Unweighted Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
120 S(CTSF_ULAYERCOLPCT_TOTALN, "ULAYERCOLPCT.TOTALN", N_("Unweighted Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
122 /* Scale variables, totals, and subtotals. */ \
123 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
124 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
125 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
126 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
127 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
128 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
129 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
130 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
131 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
132 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
133 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
134 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
135 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
136 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
137 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
138 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
139 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
140 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
141 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
142 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
143 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
144 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
145 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
147 /* Scale variables, totals, and subtotals (unweighted). */ \
148 S(CTSF_UMEAN, "UMEAN", N_("Unweighted Mean"), CTF_GENERAL, CTFA_SCALE) \
149 S(CTSF_UMEDIAN, "UMEDIAN", N_("Unweighted Median"), CTF_GENERAL, CTFA_SCALE) \
150 S(CTSF_UMISSING, "UMISSING", N_("Unweighted Missing"), CTF_GENERAL, CTFA_SCALE) \
151 S(CTSF_UMODE, "UMODE", N_("Unweighted Mode"), CTF_GENERAL, CTFA_SCALE) \
152 S(CTSF_UPTILE, "UPTILE", N_("Unweighted Percentile"), CTF_GENERAL, CTFA_SCALE) \
153 S(CTSF_USEMEAN, "USEMEAN", N_("Unweighted Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
154 S(CTSF_USTDDEV, "USTDDEV", N_("Unweighted Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
155 S(CTSF_USUM, "USUM", N_("Unweighted Sum"), CTF_GENERAL, CTFA_SCALE) \
156 S(CSTF_UTOTALN, "UTOTALN", N_("Unweighted Total N"), CTF_COUNT, CTFA_SCALE) \
157 S(CTSF_UVALIDN, "UVALIDN", N_("Unweighted Valid N"), CTF_COUNT, CTFA_SCALE) \
158 S(CTSF_UVARIANCE, "UVARIANCE", N_("Unweighted Variance"), CTF_GENERAL, CTFA_SCALE) \
159 S(CTSF_UROWPCT_SUM, "UROWPCT.SUM", N_("Unweighted Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
160 S(CTSF_UCOLPCT_SUM, "UCOLPCT.SUM", N_("Unweighted Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
161 S(CTSF_UTABLEPCT_SUM, "UTABLEPCT.SUM", N_("Unweighted Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
162 S(CTSF_USUBTABLEPCT_SUM, "USUBTABLEPCT.SUM", N_("Unweighted Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
163 S(CTSF_ULAYERPCT_SUM, "ULAYERPCT.SUM", N_("Unweighted Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
164 S(CTSF_ULAYERROWPCT_SUM, "ULAYERROWPCT.SUM", N_("Unweighted Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
165 S(CTSF_ULAYERCOLPCT_SUM, "ULAYERCOLPCT.SUM", N_("Unweighted Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
167 #if 0 /* Multiple response sets not yet implemented. */
168 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
169 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
170 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
171 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
172 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
173 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
174 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
175 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
176 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
177 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
178 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
179 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
180 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
181 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
182 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
183 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
184 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
185 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
186 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
187 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
188 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
189 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
192 enum ctables_summary_function
194 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
200 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
201 N_CTSF_FUNCTIONS = SUMMARIES
205 static bool ctables_summary_function_is_count (enum ctables_summary_function);
207 enum ctables_domain_type
209 /* Within a section, where stacked variables divide one section from
211 CTDT_TABLE, /* All layers of a whole section. */
212 CTDT_LAYER, /* One layer within a section. */
213 CTDT_LAYERROW, /* Row in one layer within a section. */
214 CTDT_LAYERCOL, /* Column in one layer within a section. */
216 /* Within a subtable, where a subtable pairs an innermost row variable with
217 an innermost column variable within a single layer. */
218 CTDT_SUBTABLE, /* Whole subtable. */
219 CTDT_ROW, /* Row within a subtable. */
220 CTDT_COL, /* Column within a subtable. */
224 struct ctables_domain
226 struct hmap_node node;
228 const struct ctables_cell *example;
230 double d_valid; /* Dictionary weight. */
233 double e_valid; /* Effective weight */
236 double u_valid; /* Unweighted. */
239 struct ctables_sum *sums;
248 enum ctables_summary_variant
257 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
258 all the axes (except the scalar variable, if any). */
259 struct hmap_node node;
261 /* The domains that contain this cell. */
262 uint32_t omit_domains;
263 struct ctables_domain *domains[N_CTDTS];
268 enum ctables_summary_variant sv;
270 struct ctables_cell_axis
272 struct ctables_cell_value
274 const struct ctables_category *category;
282 union ctables_summary *summaries;
289 const struct dictionary *dict;
290 struct pivot_table_look *look;
292 /* CTABLES has a number of extra formats that we implement via custom
293 currency specifications on an alternate fmt_settings. */
294 #define CTEF_NEGPAREN FMT_CCA
295 #define CTEF_NEQUAL FMT_CCB
296 #define CTEF_PAREN FMT_CCC
297 #define CTEF_PCTPAREN FMT_CCD
298 struct fmt_settings ctables_formats;
300 /* If this is NULL, zeros are displayed using the normal print format.
301 Otherwise, this string is displayed. */
304 /* If this is NULL, missing values are displayed using the normal print
305 format. Otherwise, this string is displayed. */
308 /* Indexed by variable dictionary index. */
309 enum ctables_vlabel *vlabels;
311 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
313 bool mrsets_count_duplicates; /* MRSETS. */
314 bool smissing_listwise; /* SMISSING. */
315 struct variable *e_weight; /* WEIGHT. */
316 int hide_threshold; /* HIDESMALLCOUNTS. */
318 struct ctables_table **tables;
322 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
325 struct ctables_postcompute
327 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
328 char *name; /* Name, without leading &. */
330 struct msg_location *location; /* Location of definition. */
331 struct ctables_pcexpr *expr;
333 struct ctables_summary_spec_set *specs;
334 bool hide_source_cats;
337 struct ctables_pcexpr
347 enum ctables_postcompute_op
350 CTPO_CONSTANT, /* 5 */
351 CTPO_CAT_NUMBER, /* [5] */
352 CTPO_CAT_STRING, /* ["STRING"] */
353 CTPO_CAT_NRANGE, /* [LO THRU 5] */
354 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
355 CTPO_CAT_MISSING, /* MISSING */
356 CTPO_CAT_OTHERNM, /* OTHERNM */
357 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
358 CTPO_CAT_TOTAL, /* TOTAL */
372 /* CTPO_CAT_NUMBER. */
375 /* CTPO_CAT_STRING, in dictionary encoding. */
376 struct substring string;
378 /* CTPO_CAT_NRANGE. */
381 /* CTPO_CAT_SRANGE. */
382 struct substring srange[2];
384 /* CTPO_CAT_SUBTOTAL. */
385 size_t subtotal_index;
387 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
388 One element: CTPO_NEG. */
389 struct ctables_pcexpr *subs[2];
392 /* Source location. */
393 struct msg_location *location;
396 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
397 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
398 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
399 struct ctables_pcexpr *sub1);
401 struct ctables_summary_spec_set
403 struct ctables_summary_spec *specs;
407 /* The variable to which the summary specs are applied. */
408 struct variable *var;
410 /* Whether the variable to which the summary specs are applied is a scale
411 variable for the purpose of summarization.
413 (VALIDN and TOTALN act differently for summarizing scale and categorical
417 /* If any of these optional additional scale variables are missing, then
418 treat 'var' as if it's missing too. This is for implementing
419 SMISSING=LISTWISE. */
420 struct variable **listwise_vars;
421 size_t n_listwise_vars;
424 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
425 const struct ctables_summary_spec_set *);
426 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
428 /* A nested sequence of variables, e.g. a > b > c. */
431 struct variable **vars;
434 size_t *domains[N_CTDTS];
435 size_t n_domains[N_CTDTS];
438 struct ctables_summary_spec_set specs[N_CSVS];
441 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
444 struct ctables_nest *nests;
448 static void ctables_stack_uninit (struct ctables_stack *);
452 struct hmap_node node;
457 struct ctables_occurrence
459 struct hmap_node node;
463 struct ctables_section
466 struct ctables_table *table;
467 struct ctables_nest *nests[PIVOT_N_AXES];
470 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
471 struct hmap cells; /* Contains "struct ctables_cell"s. */
472 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
475 static void ctables_section_uninit (struct ctables_section *);
479 struct ctables *ctables;
480 struct ctables_axis *axes[PIVOT_N_AXES];
481 struct ctables_stack stacks[PIVOT_N_AXES];
482 struct ctables_section *sections;
484 enum pivot_axis_type summary_axis;
485 struct ctables_summary_spec_set summary_specs;
486 struct variable **sum_vars;
489 enum pivot_axis_type slabels_axis;
490 bool slabels_visible;
492 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
494 Most commonly, label_axis[a] == a, and in particular we always have
495 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
497 If ROWLABELS or COLLABELS is specified, then one of
498 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
499 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
501 If any category labels are moved, then 'clabels_example' is one of the
502 variables being moved (and it is otherwise NULL). All of the variables
503 being moved have the same width, value labels, and categories, so this
504 example variable can be used to find those out.
506 The remaining members in this group are relevant only if category labels
509 'clabels_values_map' holds a "struct ctables_value" for all the values
510 that appear in all of the variables in the moved categories. It is
511 accumulated as the data is read. Once the data is fully read, its
512 sorted values are put into 'clabels_values' and 'n_clabels_values'.
514 enum pivot_axis_type label_axis[PIVOT_N_AXES];
515 enum pivot_axis_type clabels_from_axis;
516 const struct variable *clabels_example;
517 struct hmap clabels_values_map;
518 struct ctables_value **clabels_values;
519 size_t n_clabels_values;
521 /* Indexed by variable dictionary index. */
522 struct ctables_categories **categories;
531 struct ctables_chisq *chisq;
532 struct ctables_pairwise *pairwise;
535 struct ctables_categories
538 struct ctables_category *cats;
543 struct ctables_category
545 enum ctables_category_type
547 /* Explicit category lists. */
550 CCT_NRANGE, /* Numerical range. */
551 CCT_SRANGE, /* String range. */
556 /* Totals and subtotals. */
560 /* Implicit category lists. */
565 /* For contributing to TOTALN. */
566 CCT_EXCLUDED_MISSING,
570 struct ctables_category *subtotal;
576 double number; /* CCT_NUMBER. */
577 struct substring string; /* CCT_STRING, in dictionary encoding. */
578 double nrange[2]; /* CCT_NRANGE. */
579 struct substring srange[2]; /* CCT_SRANGE. */
583 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
584 bool hide_subcategories; /* CCT_SUBTOTAL. */
587 /* CCT_POSTCOMPUTE. */
590 const struct ctables_postcompute *pc;
591 enum fmt_type parse_format;
594 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
597 bool include_missing;
601 enum ctables_summary_function sort_function;
602 struct variable *sort_var;
607 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
608 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
609 struct msg_location *location;
613 ctables_category_uninit (struct ctables_category *cat)
618 msg_location_destroy (cat->location);
625 case CCT_POSTCOMPUTE:
629 ss_dealloc (&cat->string);
633 ss_dealloc (&cat->srange[0]);
634 ss_dealloc (&cat->srange[1]);
639 free (cat->total_label);
647 case CCT_EXCLUDED_MISSING:
653 nullable_substring_equal (const struct substring *a,
654 const struct substring *b)
656 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
660 ctables_category_equal (const struct ctables_category *a,
661 const struct ctables_category *b)
663 if (a->type != b->type)
669 return a->number == b->number;
672 return ss_equals (a->string, b->string);
675 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
678 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
679 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
685 case CCT_POSTCOMPUTE:
686 return a->pc == b->pc;
690 return !strcmp (a->total_label, b->total_label);
695 return (a->include_missing == b->include_missing
696 && a->sort_ascending == b->sort_ascending
697 && a->sort_function == b->sort_function
698 && a->sort_var == b->sort_var
699 && a->percentile == b->percentile);
701 case CCT_EXCLUDED_MISSING:
709 ctables_categories_unref (struct ctables_categories *c)
714 assert (c->n_refs > 0);
718 for (size_t i = 0; i < c->n_cats; i++)
719 ctables_category_uninit (&c->cats[i]);
725 ctables_categories_equal (const struct ctables_categories *a,
726 const struct ctables_categories *b)
728 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
731 for (size_t i = 0; i < a->n_cats; i++)
732 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
738 /* Chi-square test (SIGTEST). */
746 /* Pairwise comparison test (COMPARETEST). */
747 struct ctables_pairwise
749 enum { PROP, MEAN } type;
752 bool meansvariance_allcats;
754 enum { BONFERRONI = 1, BH } adjust;
778 struct variable *var;
780 struct ctables_summary_spec_set specs[N_CSVS];
784 struct ctables_axis *subs[2];
787 struct msg_location *loc;
790 static void ctables_axis_destroy (struct ctables_axis *);
799 enum ctables_function_availability
801 CTFA_ALL, /* Any variables. */
802 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
803 //CTFA_MRSETS, /* Only multiple-response sets */
806 struct ctables_summary_spec
808 enum ctables_summary_function function;
809 double percentile; /* CTSF_PTILE only. */
812 struct fmt_spec format;
813 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
820 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
821 const struct ctables_summary_spec *src)
824 dst->label = xstrdup_if_nonnull (src->label);
828 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
835 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
836 const struct ctables_summary_spec_set *src)
838 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
839 for (size_t i = 0; i < src->n; i++)
840 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
842 *dst = (struct ctables_summary_spec_set) {
847 .is_scale = src->is_scale,
852 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
854 for (size_t i = 0; i < set->n; i++)
855 ctables_summary_spec_uninit (&set->specs[i]);
860 parse_col_width (struct lexer *lexer, const char *name, double *width)
862 lex_match (lexer, T_EQUALS);
863 if (lex_match_id (lexer, "DEFAULT"))
865 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
867 *width = lex_number (lexer);
877 parse_bool (struct lexer *lexer, bool *b)
879 if (lex_match_id (lexer, "NO"))
881 else if (lex_match_id (lexer, "YES"))
885 lex_error_expecting (lexer, "YES", "NO");
891 static enum ctables_function_availability
892 ctables_function_availability (enum ctables_summary_function f)
894 static enum ctables_function_availability availability[] = {
895 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
900 return availability[f];
904 ctables_summary_function_is_count (enum ctables_summary_function f)
910 case CTSF_ROWPCT_COUNT:
911 case CTSF_COLPCT_COUNT:
912 case CTSF_TABLEPCT_COUNT:
913 case CTSF_SUBTABLEPCT_COUNT:
914 case CTSF_LAYERPCT_COUNT:
915 case CTSF_LAYERROWPCT_COUNT:
916 case CTSF_LAYERCOLPCT_COUNT:
918 case CTSF_UROWPCT_COUNT:
919 case CTSF_UCOLPCT_COUNT:
920 case CTSF_UTABLEPCT_COUNT:
921 case CTSF_USUBTABLEPCT_COUNT:
922 case CTSF_ULAYERPCT_COUNT:
923 case CTSF_ULAYERROWPCT_COUNT:
924 case CTSF_ULAYERCOLPCT_COUNT:
927 case CTSF_ROWPCT_VALIDN:
928 case CTSF_COLPCT_VALIDN:
929 case CTSF_TABLEPCT_VALIDN:
930 case CTSF_SUBTABLEPCT_VALIDN:
931 case CTSF_LAYERPCT_VALIDN:
932 case CTSF_LAYERROWPCT_VALIDN:
933 case CTSF_LAYERCOLPCT_VALIDN:
934 case CTSF_ROWPCT_TOTALN:
935 case CTSF_COLPCT_TOTALN:
936 case CTSF_TABLEPCT_TOTALN:
937 case CTSF_SUBTABLEPCT_TOTALN:
938 case CTSF_LAYERPCT_TOTALN:
939 case CTSF_LAYERROWPCT_TOTALN:
940 case CTSF_LAYERCOLPCT_TOTALN:
957 case CTSF_ROWPCT_SUM:
958 case CTSF_COLPCT_SUM:
959 case CTSF_TABLEPCT_SUM:
960 case CTSF_SUBTABLEPCT_SUM:
961 case CTSF_LAYERPCT_SUM:
962 case CTSF_LAYERROWPCT_SUM:
963 case CTSF_LAYERCOLPCT_SUM:
964 case CTSF_UROWPCT_VALIDN:
965 case CTSF_UCOLPCT_VALIDN:
966 case CTSF_UTABLEPCT_VALIDN:
967 case CTSF_USUBTABLEPCT_VALIDN:
968 case CTSF_ULAYERPCT_VALIDN:
969 case CTSF_ULAYERROWPCT_VALIDN:
970 case CTSF_ULAYERCOLPCT_VALIDN:
971 case CTSF_UROWPCT_TOTALN:
972 case CTSF_UCOLPCT_TOTALN:
973 case CTSF_UTABLEPCT_TOTALN:
974 case CTSF_USUBTABLEPCT_TOTALN:
975 case CTSF_ULAYERPCT_TOTALN:
976 case CTSF_ULAYERROWPCT_TOTALN:
977 case CTSF_ULAYERCOLPCT_TOTALN:
989 case CTSF_UROWPCT_SUM:
990 case CTSF_UCOLPCT_SUM:
991 case CTSF_UTABLEPCT_SUM:
992 case CTSF_USUBTABLEPCT_SUM:
993 case CTSF_ULAYERPCT_SUM:
994 case CTSF_ULAYERROWPCT_SUM:
995 case CTSF_ULAYERCOLPCT_SUM:
1003 parse_ctables_summary_function (struct lexer *lexer,
1004 enum ctables_summary_function *f)
1008 enum ctables_summary_function function;
1009 struct substring name;
1011 static struct pair names[] = {
1012 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
1013 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
1016 /* The .COUNT suffix may be omitted. */
1017 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
1018 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
1019 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
1020 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
1021 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
1022 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
1023 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
1027 if (!lex_force_id (lexer))
1030 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
1031 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
1033 *f = names[i].function;
1038 lex_error (lexer, _("Expecting summary function name."));
1043 ctables_axis_destroy (struct ctables_axis *axis)
1051 for (size_t i = 0; i < N_CSVS; i++)
1052 ctables_summary_spec_set_uninit (&axis->specs[i]);
1057 ctables_axis_destroy (axis->subs[0]);
1058 ctables_axis_destroy (axis->subs[1]);
1061 msg_location_destroy (axis->loc);
1065 static struct ctables_axis *
1066 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1067 struct ctables_axis *sub0,
1068 struct ctables_axis *sub1,
1069 struct lexer *lexer, int start_ofs)
1071 struct ctables_axis *axis = xmalloc (sizeof *axis);
1072 *axis = (struct ctables_axis) {
1074 .subs = { sub0, sub1 },
1075 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1080 struct ctables_axis_parse_ctx
1082 struct lexer *lexer;
1083 struct dictionary *dict;
1085 struct ctables_table *t;
1088 static struct fmt_spec
1089 ctables_summary_default_format (enum ctables_summary_function function,
1090 const struct variable *var)
1092 static const enum ctables_format default_formats[] = {
1093 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
1097 switch (default_formats[function])
1100 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
1103 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
1106 return *var_get_print_format (var);
1113 static struct pivot_value *
1114 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1118 static const char *default_labels[] = {
1119 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
1124 return (spec->function == CTSF_PTILE
1125 ? pivot_value_new_text_format (N_("Percentile %.2f"),
1127 : pivot_value_new_text (default_labels[spec->function]));
1131 struct substring in = ss_cstr (spec->label);
1132 struct substring target = ss_cstr (")CILEVEL");
1134 struct string out = DS_EMPTY_INITIALIZER;
1137 size_t chunk = ss_find_substring (in, target);
1138 ds_put_substring (&out, ss_head (in, chunk));
1139 ss_advance (&in, chunk);
1141 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1143 ss_advance (&in, target.length);
1144 ds_put_format (&out, "%g", cilevel);
1150 ctables_summary_function_name (enum ctables_summary_function function)
1152 static const char *names[] = {
1153 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1157 return names[function];
1161 add_summary_spec (struct ctables_axis *axis,
1162 enum ctables_summary_function function, double percentile,
1163 const char *label, const struct fmt_spec *format,
1164 bool is_ctables_format, const struct msg_location *loc,
1165 enum ctables_summary_variant sv)
1167 if (axis->op == CTAO_VAR)
1169 const char *function_name = ctables_summary_function_name (function);
1170 const char *var_name = var_get_name (axis->var);
1171 switch (ctables_function_availability (function))
1175 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1176 "response sets."), function_name);
1177 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1183 if (!axis->scale && sv != CSV_TOTAL)
1186 _("Summary function %s applies only to scale variables."),
1188 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1198 struct ctables_summary_spec_set *set = &axis->specs[sv];
1199 if (set->n >= set->allocated)
1200 set->specs = x2nrealloc (set->specs, &set->allocated,
1201 sizeof *set->specs);
1203 struct ctables_summary_spec *dst = &set->specs[set->n++];
1204 *dst = (struct ctables_summary_spec) {
1205 .function = function,
1206 .percentile = percentile,
1207 .label = xstrdup_if_nonnull (label),
1208 .format = (format ? *format
1209 : ctables_summary_default_format (function, axis->var)),
1210 .is_ctables_format = is_ctables_format,
1216 for (size_t i = 0; i < 2; i++)
1217 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1218 format, is_ctables_format, loc, sv))
1224 static struct ctables_axis *ctables_axis_parse_stack (
1225 struct ctables_axis_parse_ctx *);
1228 static struct ctables_axis *
1229 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1231 if (lex_match (ctx->lexer, T_LPAREN))
1233 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1234 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1236 ctables_axis_destroy (sub);
1242 if (!lex_force_id (ctx->lexer))
1245 int start_ofs = lex_ofs (ctx->lexer);
1246 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1250 struct ctables_axis *axis = xmalloc (sizeof *axis);
1251 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1253 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1254 : lex_match_phrase (ctx->lexer, "[C]") ? false
1255 : var_get_measure (var) == MEASURE_SCALE);
1256 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1257 lex_ofs (ctx->lexer) - 1);
1258 if (axis->scale && var_is_alpha (var))
1260 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1262 var_get_name (var));
1263 ctables_axis_destroy (axis);
1271 has_digit (const char *s)
1273 return s[strcspn (s, "0123456789")] != '\0';
1277 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1278 bool *is_ctables_format)
1280 char type[FMT_TYPE_LEN_MAX + 1];
1281 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1284 if (!strcasecmp (type, "NEGPAREN"))
1285 format->type = CTEF_NEGPAREN;
1286 else if (!strcasecmp (type, "NEQUAL"))
1287 format->type = CTEF_NEQUAL;
1288 else if (!strcasecmp (type, "PAREN"))
1289 format->type = CTEF_PAREN;
1290 else if (!strcasecmp (type, "PCTPAREN"))
1291 format->type = CTEF_PCTPAREN;
1294 *is_ctables_format = false;
1295 return (parse_format_specifier (lexer, format)
1296 && fmt_check_output (format)
1297 && fmt_check_type_compat (format, VAL_NUMERIC));
1303 lex_next_error (lexer, -1, -1,
1304 _("Output format %s requires width 2 or greater."), type);
1307 else if (format->d > format->w - 1)
1309 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1310 "greater than decimals."), type);
1315 *is_ctables_format = true;
1320 static struct ctables_axis *
1321 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1323 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1324 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1327 enum ctables_summary_variant sv = CSV_CELL;
1330 int start_ofs = lex_ofs (ctx->lexer);
1332 /* Parse function. */
1333 enum ctables_summary_function function;
1334 if (!parse_ctables_summary_function (ctx->lexer, &function))
1337 /* Parse percentile. */
1338 double percentile = 0;
1339 if (function == CTSF_PTILE)
1341 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1343 percentile = lex_number (ctx->lexer);
1344 lex_get (ctx->lexer);
1349 if (lex_is_string (ctx->lexer))
1351 label = ss_xstrdup (lex_tokss (ctx->lexer));
1352 lex_get (ctx->lexer);
1356 struct fmt_spec format;
1357 const struct fmt_spec *formatp;
1358 bool is_ctables_format = false;
1359 if (lex_token (ctx->lexer) == T_ID
1360 && has_digit (lex_tokcstr (ctx->lexer)))
1362 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1363 &is_ctables_format))
1373 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1374 lex_ofs (ctx->lexer) - 1);
1375 add_summary_spec (sub, function, percentile, label, formatp,
1376 is_ctables_format, loc, sv);
1378 msg_location_destroy (loc);
1380 lex_match (ctx->lexer, T_COMMA);
1381 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1383 if (!lex_force_match (ctx->lexer, T_LBRACK))
1387 else if (lex_match (ctx->lexer, T_RBRACK))
1389 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1396 ctables_axis_destroy (sub);
1400 static const struct ctables_axis *
1401 find_scale (const struct ctables_axis *axis)
1405 else if (axis->op == CTAO_VAR)
1406 return axis->scale ? axis : NULL;
1409 for (size_t i = 0; i < 2; i++)
1411 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1419 static const struct ctables_axis *
1420 find_categorical_summary_spec (const struct ctables_axis *axis)
1424 else if (axis->op == CTAO_VAR)
1425 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1428 for (size_t i = 0; i < 2; i++)
1430 const struct ctables_axis *sum
1431 = find_categorical_summary_spec (axis->subs[i]);
1439 static struct ctables_axis *
1440 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1442 int start_ofs = lex_ofs (ctx->lexer);
1443 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1447 while (lex_match (ctx->lexer, T_GT))
1449 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1453 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1454 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1456 const struct ctables_axis *outer_scale = find_scale (lhs);
1457 const struct ctables_axis *inner_scale = find_scale (rhs);
1458 if (outer_scale && inner_scale)
1460 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1461 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1462 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1463 ctables_axis_destroy (nest);
1467 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1470 msg_at (SE, nest->loc,
1471 _("Summaries may only be requested for categorical variables "
1472 "at the innermost nesting level."));
1473 msg_at (SN, outer_sum->loc,
1474 _("This outer categorical variable has a summary."));
1475 ctables_axis_destroy (nest);
1485 static struct ctables_axis *
1486 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1488 int start_ofs = lex_ofs (ctx->lexer);
1489 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1493 while (lex_match (ctx->lexer, T_PLUS))
1495 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1499 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1500 ctx->lexer, start_ofs);
1507 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1508 struct ctables *ct, struct ctables_table *t,
1509 enum pivot_axis_type a)
1511 if (lex_token (lexer) == T_BY
1512 || lex_token (lexer) == T_SLASH
1513 || lex_token (lexer) == T_ENDCMD)
1516 struct ctables_axis_parse_ctx ctx = {
1522 t->axes[a] = ctables_axis_parse_stack (&ctx);
1523 return t->axes[a] != NULL;
1527 ctables_chisq_destroy (struct ctables_chisq *chisq)
1533 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1539 ctables_table_destroy (struct ctables_table *t)
1544 for (size_t i = 0; i < t->n_sections; i++)
1545 ctables_section_uninit (&t->sections[i]);
1548 for (size_t i = 0; i < t->n_categories; i++)
1549 ctables_categories_unref (t->categories[i]);
1550 free (t->categories);
1552 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1554 ctables_axis_destroy (t->axes[a]);
1555 ctables_stack_uninit (&t->stacks[a]);
1558 struct ctables_value *ctv, *next_ctv;
1559 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1560 &t->clabels_values_map)
1562 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1563 hmap_delete (&t->clabels_values_map, &ctv->node);
1566 hmap_destroy (&t->clabels_values_map);
1567 free (t->clabels_values);
1573 ctables_chisq_destroy (t->chisq);
1574 ctables_pairwise_destroy (t->pairwise);
1579 ctables_destroy (struct ctables *ct)
1584 fmt_settings_uninit (&ct->ctables_formats);
1585 pivot_table_look_unref (ct->look);
1589 for (size_t i = 0; i < ct->n_tables; i++)
1590 ctables_table_destroy (ct->tables[i]);
1595 static struct ctables_category
1596 cct_nrange (double low, double high)
1598 return (struct ctables_category) {
1600 .nrange = { low, high }
1604 static struct ctables_category
1605 cct_srange (struct substring low, struct substring high)
1607 return (struct ctables_category) {
1609 .srange = { low, high }
1614 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1615 struct ctables_category *cat)
1618 if (lex_match (lexer, T_EQUALS))
1620 if (!lex_force_string (lexer))
1623 total_label = ss_xstrdup (lex_tokss (lexer));
1627 total_label = xstrdup (_("Subtotal"));
1629 *cat = (struct ctables_category) {
1630 .type = CCT_SUBTOTAL,
1631 .hide_subcategories = hide_subcategories,
1632 .total_label = total_label
1637 static struct substring
1638 parse_substring (struct lexer *lexer, struct dictionary *dict)
1640 struct substring s = recode_substring_pool (
1641 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1642 ss_rtrim (&s, ss_cstr (" "));
1648 ctables_table_parse_explicit_category (struct lexer *lexer,
1649 struct dictionary *dict,
1651 struct ctables_category *cat)
1653 if (lex_match_id (lexer, "OTHERNM"))
1654 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1655 else if (lex_match_id (lexer, "MISSING"))
1656 *cat = (struct ctables_category) { .type = CCT_MISSING };
1657 else if (lex_match_id (lexer, "SUBTOTAL"))
1658 return ctables_table_parse_subtotal (lexer, false, cat);
1659 else if (lex_match_id (lexer, "HSUBTOTAL"))
1660 return ctables_table_parse_subtotal (lexer, true, cat);
1661 else if (lex_match_id (lexer, "LO"))
1663 if (!lex_force_match_id (lexer, "THRU"))
1665 if (lex_is_string (lexer))
1667 struct substring sr0 = { .string = NULL };
1668 struct substring sr1 = parse_substring (lexer, dict);
1669 *cat = cct_srange (sr0, sr1);
1671 else if (lex_force_num (lexer))
1673 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1679 else if (lex_is_number (lexer))
1681 double number = lex_number (lexer);
1683 if (lex_match_id (lexer, "THRU"))
1685 if (lex_match_id (lexer, "HI"))
1686 *cat = cct_nrange (number, DBL_MAX);
1689 if (!lex_force_num (lexer))
1691 *cat = cct_nrange (number, lex_number (lexer));
1696 *cat = (struct ctables_category) {
1701 else if (lex_is_string (lexer))
1703 struct substring s = parse_substring (lexer, dict);
1704 if (lex_match_id (lexer, "THRU"))
1706 if (lex_match_id (lexer, "HI"))
1708 struct substring sr1 = { .string = NULL };
1709 *cat = cct_srange (s, sr1);
1713 if (!lex_force_string (lexer))
1715 struct substring sr1 = parse_substring (lexer, dict);
1716 *cat = cct_srange (s, sr1);
1720 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1722 else if (lex_match (lexer, T_AND))
1724 if (!lex_force_id (lexer))
1726 struct ctables_postcompute *pc = ctables_find_postcompute (
1727 ct, lex_tokcstr (lexer));
1730 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1731 msg_at (SE, loc, _("Unknown postcompute &%s."),
1732 lex_tokcstr (lexer));
1733 msg_location_destroy (loc);
1738 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1742 lex_error (lexer, NULL);
1750 parse_category_string (struct msg_location *location,
1751 struct substring s, const struct dictionary *dict,
1752 enum fmt_type format, double *n)
1755 char *error = data_in (s, dict_get_encoding (dict), format,
1756 settings_get_fmt_settings (), &v, 0, NULL);
1759 msg_at (SE, location,
1760 _("Failed to parse category specification as format %s: %s."),
1761 fmt_name (format), error);
1770 static struct ctables_category *
1771 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1772 const struct ctables_pcexpr *e)
1774 struct ctables_category *best = NULL;
1775 size_t n_subtotals = 0;
1776 for (size_t i = 0; i < cats->n_cats; i++)
1778 struct ctables_category *cat = &cats->cats[i];
1781 case CTPO_CAT_NUMBER:
1782 if (cat->type == CCT_NUMBER && cat->number == e->number)
1786 case CTPO_CAT_STRING:
1787 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1791 case CTPO_CAT_NRANGE:
1792 if (cat->type == CCT_NRANGE
1793 && cat->nrange[0] == e->nrange[0]
1794 && cat->nrange[1] == e->nrange[1])
1798 case CTPO_CAT_SRANGE:
1799 if (cat->type == CCT_SRANGE
1800 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1801 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1805 case CTPO_CAT_MISSING:
1806 if (cat->type == CCT_MISSING)
1810 case CTPO_CAT_OTHERNM:
1811 if (cat->type == CCT_OTHERNM)
1815 case CTPO_CAT_SUBTOTAL:
1816 if (cat->type == CCT_SUBTOTAL)
1819 if (e->subtotal_index == n_subtotals)
1821 else if (e->subtotal_index == 0)
1826 case CTPO_CAT_TOTAL:
1827 if (cat->type == CCT_TOTAL)
1841 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1846 static struct ctables_category *
1847 ctables_find_category_for_postcompute (const struct dictionary *dict,
1848 const struct ctables_categories *cats,
1849 enum fmt_type parse_format,
1850 const struct ctables_pcexpr *e)
1852 if (parse_format != FMT_F)
1854 if (e->op == CTPO_CAT_STRING)
1857 if (!parse_category_string (e->location, e->string, dict,
1858 parse_format, &number))
1861 struct ctables_pcexpr e2 = {
1862 .op = CTPO_CAT_NUMBER,
1864 .location = e->location,
1866 return ctables_find_category_for_postcompute__ (cats, &e2);
1868 else if (e->op == CTPO_CAT_SRANGE)
1871 if (!e->srange[0].string)
1872 nrange[0] = -DBL_MAX;
1873 else if (!parse_category_string (e->location, e->srange[0], dict,
1874 parse_format, &nrange[0]))
1877 if (!e->srange[1].string)
1878 nrange[1] = DBL_MAX;
1879 else if (!parse_category_string (e->location, e->srange[1], dict,
1880 parse_format, &nrange[1]))
1883 struct ctables_pcexpr e2 = {
1884 .op = CTPO_CAT_NRANGE,
1885 .nrange = { nrange[0], nrange[1] },
1886 .location = e->location,
1888 return ctables_find_category_for_postcompute__ (cats, &e2);
1891 return ctables_find_category_for_postcompute__ (cats, e);
1895 ctables_recursive_check_postcompute (struct dictionary *dict,
1896 const struct ctables_pcexpr *e,
1897 struct ctables_category *pc_cat,
1898 const struct ctables_categories *cats,
1899 const struct msg_location *cats_location)
1903 case CTPO_CAT_NUMBER:
1904 case CTPO_CAT_STRING:
1905 case CTPO_CAT_NRANGE:
1906 case CTPO_CAT_SRANGE:
1907 case CTPO_CAT_MISSING:
1908 case CTPO_CAT_OTHERNM:
1909 case CTPO_CAT_SUBTOTAL:
1910 case CTPO_CAT_TOTAL:
1912 struct ctables_category *cat = ctables_find_category_for_postcompute (
1913 dict, cats, pc_cat->parse_format, e);
1916 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1918 size_t n_subtotals = 0;
1919 for (size_t i = 0; i < cats->n_cats; i++)
1920 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1921 if (n_subtotals > 1)
1923 msg_at (SE, cats_location,
1924 ngettext ("These categories include %zu instance "
1925 "of SUBTOTAL or HSUBTOTAL, so references "
1926 "from computed categories must refer to "
1927 "subtotals by position, "
1928 "e.g. SUBTOTAL[1].",
1929 "These categories include %zu instances "
1930 "of SUBTOTAL or HSUBTOTAL, so references "
1931 "from computed categories must refer to "
1932 "subtotals by position, "
1933 "e.g. SUBTOTAL[1].",
1936 msg_at (SN, e->location,
1937 _("This is the reference that lacks a position."));
1942 msg_at (SE, pc_cat->location,
1943 _("Computed category &%s references a category not included "
1944 "in the category list."),
1946 msg_at (SN, e->location, _("This is the missing category."));
1947 if (e->op == CTPO_CAT_SUBTOTAL)
1948 msg_at (SN, cats_location,
1949 _("To fix the problem, add subtotals to the "
1950 "list of categories here."));
1951 else if (e->op == CTPO_CAT_TOTAL)
1952 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1953 "CATEGORIES specification."));
1955 msg_at (SN, cats_location,
1956 _("To fix the problem, add the missing category to the "
1957 "list of categories here."));
1960 if (pc_cat->pc->hide_source_cats)
1974 for (size_t i = 0; i < 2; i++)
1975 if (e->subs[i] && !ctables_recursive_check_postcompute (
1976 dict, e->subs[i], pc_cat, cats, cats_location))
1985 all_strings (struct variable **vars, size_t n_vars,
1986 const struct ctables_category *cat)
1988 for (size_t j = 0; j < n_vars; j++)
1989 if (var_is_numeric (vars[j]))
1991 msg_at (SE, cat->location,
1992 _("This category specification may be applied only to string "
1993 "variables, but this subcommand tries to apply it to "
1994 "numeric variable %s."),
1995 var_get_name (vars[j]));
2002 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2003 struct ctables *ct, struct ctables_table *t)
2005 if (!lex_match_id (lexer, "VARIABLES"))
2007 lex_match (lexer, T_EQUALS);
2009 struct variable **vars;
2011 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2014 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2015 for (size_t i = 1; i < n_vars; i++)
2017 const struct fmt_spec *f = var_get_print_format (vars[i]);
2018 if (f->type != common_format->type)
2020 common_format = NULL;
2026 && (fmt_get_category (common_format->type)
2027 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2029 struct ctables_categories *c = xmalloc (sizeof *c);
2030 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2031 for (size_t i = 0; i < n_vars; i++)
2033 struct ctables_categories **cp
2034 = &t->categories[var_get_dict_index (vars[i])];
2035 ctables_categories_unref (*cp);
2039 size_t allocated_cats = 0;
2040 int cats_start_ofs = -1;
2041 int cats_end_ofs = -1;
2042 if (lex_match (lexer, T_LBRACK))
2044 cats_start_ofs = lex_ofs (lexer);
2047 if (c->n_cats >= allocated_cats)
2048 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2050 int start_ofs = lex_ofs (lexer);
2051 struct ctables_category *cat = &c->cats[c->n_cats];
2052 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2054 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2057 lex_match (lexer, T_COMMA);
2059 while (!lex_match (lexer, T_RBRACK));
2060 cats_end_ofs = lex_ofs (lexer) - 1;
2063 struct ctables_category cat = {
2065 .include_missing = false,
2066 .sort_ascending = true,
2068 bool show_totals = false;
2069 char *total_label = NULL;
2070 bool totals_before = false;
2071 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2073 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2075 lex_match (lexer, T_EQUALS);
2076 if (lex_match_id (lexer, "A"))
2077 cat.sort_ascending = true;
2078 else if (lex_match_id (lexer, "D"))
2079 cat.sort_ascending = false;
2082 lex_error_expecting (lexer, "A", "D");
2086 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2088 lex_match (lexer, T_EQUALS);
2089 if (lex_match_id (lexer, "VALUE"))
2090 cat.type = CCT_VALUE;
2091 else if (lex_match_id (lexer, "LABEL"))
2092 cat.type = CCT_LABEL;
2095 cat.type = CCT_FUNCTION;
2096 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
2099 if (lex_match (lexer, T_LPAREN))
2101 cat.sort_var = parse_variable (lexer, dict);
2105 if (cat.sort_function == CTSF_PTILE)
2107 lex_match (lexer, T_COMMA);
2108 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2110 cat.percentile = lex_number (lexer);
2114 if (!lex_force_match (lexer, T_RPAREN))
2117 else if (ctables_function_availability (cat.sort_function)
2120 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2125 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2127 lex_match (lexer, T_EQUALS);
2128 if (lex_match_id (lexer, "INCLUDE"))
2129 cat.include_missing = true;
2130 else if (lex_match_id (lexer, "EXCLUDE"))
2131 cat.include_missing = false;
2134 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2138 else if (lex_match_id (lexer, "TOTAL"))
2140 lex_match (lexer, T_EQUALS);
2141 if (!parse_bool (lexer, &show_totals))
2144 else if (lex_match_id (lexer, "LABEL"))
2146 lex_match (lexer, T_EQUALS);
2147 if (!lex_force_string (lexer))
2150 total_label = ss_xstrdup (lex_tokss (lexer));
2153 else if (lex_match_id (lexer, "POSITION"))
2155 lex_match (lexer, T_EQUALS);
2156 if (lex_match_id (lexer, "BEFORE"))
2157 totals_before = true;
2158 else if (lex_match_id (lexer, "AFTER"))
2159 totals_before = false;
2162 lex_error_expecting (lexer, "BEFORE", "AFTER");
2166 else if (lex_match_id (lexer, "EMPTY"))
2168 lex_match (lexer, T_EQUALS);
2169 if (lex_match_id (lexer, "INCLUDE"))
2170 c->show_empty = true;
2171 else if (lex_match_id (lexer, "EXCLUDE"))
2172 c->show_empty = false;
2175 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2182 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2183 "TOTAL", "LABEL", "POSITION", "EMPTY");
2185 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2192 if (c->n_cats >= allocated_cats)
2193 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2194 c->cats[c->n_cats++] = cat;
2199 if (c->n_cats >= allocated_cats)
2200 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2202 struct ctables_category *totals;
2205 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2206 totals = &c->cats[0];
2209 totals = &c->cats[c->n_cats];
2212 *totals = (struct ctables_category) {
2214 .total_label = total_label ? total_label : xstrdup (_("Total")),
2218 struct ctables_category *subtotal = NULL;
2219 for (size_t i = totals_before ? 0 : c->n_cats;
2220 totals_before ? i < c->n_cats : i-- > 0;
2221 totals_before ? i++ : 0)
2223 struct ctables_category *cat = &c->cats[i];
2232 cat->subtotal = subtotal;
2235 case CCT_POSTCOMPUTE:
2246 case CCT_EXCLUDED_MISSING:
2251 if (cats_start_ofs != -1)
2253 for (size_t i = 0; i < c->n_cats; i++)
2255 struct ctables_category *cat = &c->cats[i];
2258 case CCT_POSTCOMPUTE:
2259 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2260 struct msg_location *cats_location
2261 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2262 bool ok = ctables_recursive_check_postcompute (
2263 dict, cat->pc->expr, cat, c, cats_location);
2264 msg_location_destroy (cats_location);
2271 for (size_t j = 0; j < n_vars; j++)
2272 if (var_is_alpha (vars[j]))
2274 msg_at (SE, cat->location,
2275 _("This category specification may be applied "
2276 "only to numeric variables, but this "
2277 "subcommand tries to apply it to string "
2279 var_get_name (vars[j]));
2288 if (!parse_category_string (cat->location, cat->string, dict,
2289 common_format->type, &n))
2292 ss_dealloc (&cat->string);
2294 cat->type = CCT_NUMBER;
2297 else if (!all_strings (vars, n_vars, cat))
2306 if (!cat->srange[0].string)
2308 else if (!parse_category_string (cat->location,
2309 cat->srange[0], dict,
2310 common_format->type, &n[0]))
2313 if (!cat->srange[1].string)
2315 else if (!parse_category_string (cat->location,
2316 cat->srange[1], dict,
2317 common_format->type, &n[1]))
2320 ss_dealloc (&cat->srange[0]);
2321 ss_dealloc (&cat->srange[1]);
2323 cat->type = CCT_NRANGE;
2324 cat->nrange[0] = n[0];
2325 cat->nrange[1] = n[1];
2327 else if (!all_strings (vars, n_vars, cat))
2338 case CCT_EXCLUDED_MISSING:
2353 ctables_nest_uninit (struct ctables_nest *nest)
2360 ctables_stack_uninit (struct ctables_stack *stack)
2364 for (size_t i = 0; i < stack->n; i++)
2365 ctables_nest_uninit (&stack->nests[i]);
2366 free (stack->nests);
2370 static struct ctables_stack
2371 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2378 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2379 for (size_t i = 0; i < s0.n; i++)
2380 for (size_t j = 0; j < s1.n; j++)
2382 const struct ctables_nest *a = &s0.nests[i];
2383 const struct ctables_nest *b = &s1.nests[j];
2385 size_t allocate = a->n + b->n;
2386 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2387 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
2389 for (size_t k = 0; k < a->n; k++)
2390 vars[n++] = a->vars[k];
2391 for (size_t k = 0; k < b->n; k++)
2392 vars[n++] = b->vars[k];
2393 assert (n == allocate);
2395 const struct ctables_nest *summary_src;
2396 if (!a->specs[CSV_CELL].var)
2398 else if (!b->specs[CSV_CELL].var)
2403 struct ctables_nest *new = &stack.nests[stack.n++];
2404 *new = (struct ctables_nest) {
2406 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2407 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2411 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2412 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2414 ctables_stack_uninit (&s0);
2415 ctables_stack_uninit (&s1);
2419 static struct ctables_stack
2420 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2422 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2423 for (size_t i = 0; i < s0.n; i++)
2424 stack.nests[stack.n++] = s0.nests[i];
2425 for (size_t i = 0; i < s1.n; i++)
2427 stack.nests[stack.n] = s1.nests[i];
2428 stack.nests[stack.n].group_head += s0.n;
2431 assert (stack.n == s0.n + s1.n);
2437 static struct ctables_stack
2438 var_fts (const struct ctables_axis *a)
2440 struct variable **vars = xmalloc (sizeof *vars);
2443 struct ctables_nest *nest = xmalloc (sizeof *nest);
2444 *nest = (struct ctables_nest) {
2447 .scale_idx = a->scale ? 0 : SIZE_MAX,
2449 if (a->specs[CSV_CELL].n || a->scale)
2450 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2452 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2453 nest->specs[sv].var = a->var;
2454 nest->specs[sv].is_scale = a->scale;
2456 return (struct ctables_stack) { .nests = nest, .n = 1 };
2459 static struct ctables_stack
2460 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2463 return (struct ctables_stack) { .n = 0 };
2471 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2472 enumerate_fts (axis_type, a->subs[1]));
2475 /* This should consider any of the scale variables found in the result to
2476 be linked to each other listwise for SMISSING=LISTWISE. */
2477 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2478 enumerate_fts (axis_type, a->subs[1]));
2484 union ctables_summary
2486 /* COUNT, VALIDN, TOTALN. */
2489 /* MINIMUM, MAXIMUM, RANGE. */
2496 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2497 struct moments1 *moments;
2499 /* MEDIAN, MODE, PTILE. */
2502 struct casewriter *writer;
2507 /* XXX multiple response */
2511 ctables_summary_init (union ctables_summary *s,
2512 const struct ctables_summary_spec *ss)
2514 switch (ss->function)
2518 case CTSF_ROWPCT_COUNT:
2519 case CTSF_COLPCT_COUNT:
2520 case CTSF_TABLEPCT_COUNT:
2521 case CTSF_SUBTABLEPCT_COUNT:
2522 case CTSF_LAYERPCT_COUNT:
2523 case CTSF_LAYERROWPCT_COUNT:
2524 case CTSF_LAYERCOLPCT_COUNT:
2525 case CTSF_ROWPCT_VALIDN:
2526 case CTSF_COLPCT_VALIDN:
2527 case CTSF_TABLEPCT_VALIDN:
2528 case CTSF_SUBTABLEPCT_VALIDN:
2529 case CTSF_LAYERPCT_VALIDN:
2530 case CTSF_LAYERROWPCT_VALIDN:
2531 case CTSF_LAYERCOLPCT_VALIDN:
2532 case CTSF_ROWPCT_TOTALN:
2533 case CTSF_COLPCT_TOTALN:
2534 case CTSF_TABLEPCT_TOTALN:
2535 case CTSF_SUBTABLEPCT_TOTALN:
2536 case CTSF_LAYERPCT_TOTALN:
2537 case CTSF_LAYERROWPCT_TOTALN:
2538 case CTSF_LAYERCOLPCT_TOTALN:
2545 case CTSF_UROWPCT_COUNT:
2546 case CTSF_UCOLPCT_COUNT:
2547 case CTSF_UTABLEPCT_COUNT:
2548 case CTSF_USUBTABLEPCT_COUNT:
2549 case CTSF_ULAYERPCT_COUNT:
2550 case CTSF_ULAYERROWPCT_COUNT:
2551 case CTSF_ULAYERCOLPCT_COUNT:
2552 case CTSF_UROWPCT_VALIDN:
2553 case CTSF_UCOLPCT_VALIDN:
2554 case CTSF_UTABLEPCT_VALIDN:
2555 case CTSF_USUBTABLEPCT_VALIDN:
2556 case CTSF_ULAYERPCT_VALIDN:
2557 case CTSF_ULAYERROWPCT_VALIDN:
2558 case CTSF_ULAYERCOLPCT_VALIDN:
2559 case CTSF_UROWPCT_TOTALN:
2560 case CTSF_UCOLPCT_TOTALN:
2561 case CTSF_UTABLEPCT_TOTALN:
2562 case CTSF_USUBTABLEPCT_TOTALN:
2563 case CTSF_ULAYERPCT_TOTALN:
2564 case CTSF_ULAYERROWPCT_TOTALN:
2565 case CTSF_ULAYERCOLPCT_TOTALN:
2575 s->min = s->max = SYSMIS;
2583 case CTSF_ROWPCT_SUM:
2584 case CTSF_COLPCT_SUM:
2585 case CTSF_TABLEPCT_SUM:
2586 case CTSF_SUBTABLEPCT_SUM:
2587 case CTSF_LAYERPCT_SUM:
2588 case CTSF_LAYERROWPCT_SUM:
2589 case CTSF_LAYERCOLPCT_SUM:
2594 case CTSF_UVARIANCE:
2595 case CTSF_UROWPCT_SUM:
2596 case CTSF_UCOLPCT_SUM:
2597 case CTSF_UTABLEPCT_SUM:
2598 case CTSF_USUBTABLEPCT_SUM:
2599 case CTSF_ULAYERPCT_SUM:
2600 case CTSF_ULAYERROWPCT_SUM:
2601 case CTSF_ULAYERCOLPCT_SUM:
2602 s->moments = moments1_create (MOMENT_VARIANCE);
2612 struct caseproto *proto = caseproto_create ();
2613 proto = caseproto_add_width (proto, 0);
2614 proto = caseproto_add_width (proto, 0);
2616 struct subcase ordering;
2617 subcase_init (&ordering, 0, 0, SC_ASCEND);
2618 s->writer = sort_create_writer (&ordering, proto);
2619 subcase_uninit (&ordering);
2620 caseproto_unref (proto);
2630 ctables_summary_uninit (union ctables_summary *s,
2631 const struct ctables_summary_spec *ss)
2633 switch (ss->function)
2637 case CTSF_ROWPCT_COUNT:
2638 case CTSF_COLPCT_COUNT:
2639 case CTSF_TABLEPCT_COUNT:
2640 case CTSF_SUBTABLEPCT_COUNT:
2641 case CTSF_LAYERPCT_COUNT:
2642 case CTSF_LAYERROWPCT_COUNT:
2643 case CTSF_LAYERCOLPCT_COUNT:
2644 case CTSF_ROWPCT_VALIDN:
2645 case CTSF_COLPCT_VALIDN:
2646 case CTSF_TABLEPCT_VALIDN:
2647 case CTSF_SUBTABLEPCT_VALIDN:
2648 case CTSF_LAYERPCT_VALIDN:
2649 case CTSF_LAYERROWPCT_VALIDN:
2650 case CTSF_LAYERCOLPCT_VALIDN:
2651 case CTSF_ROWPCT_TOTALN:
2652 case CTSF_COLPCT_TOTALN:
2653 case CTSF_TABLEPCT_TOTALN:
2654 case CTSF_SUBTABLEPCT_TOTALN:
2655 case CTSF_LAYERPCT_TOTALN:
2656 case CTSF_LAYERROWPCT_TOTALN:
2657 case CTSF_LAYERCOLPCT_TOTALN:
2664 case CTSF_UROWPCT_COUNT:
2665 case CTSF_UCOLPCT_COUNT:
2666 case CTSF_UTABLEPCT_COUNT:
2667 case CTSF_USUBTABLEPCT_COUNT:
2668 case CTSF_ULAYERPCT_COUNT:
2669 case CTSF_ULAYERROWPCT_COUNT:
2670 case CTSF_ULAYERCOLPCT_COUNT:
2671 case CTSF_UROWPCT_VALIDN:
2672 case CTSF_UCOLPCT_VALIDN:
2673 case CTSF_UTABLEPCT_VALIDN:
2674 case CTSF_USUBTABLEPCT_VALIDN:
2675 case CTSF_ULAYERPCT_VALIDN:
2676 case CTSF_ULAYERROWPCT_VALIDN:
2677 case CTSF_ULAYERCOLPCT_VALIDN:
2678 case CTSF_UROWPCT_TOTALN:
2679 case CTSF_UCOLPCT_TOTALN:
2680 case CTSF_UTABLEPCT_TOTALN:
2681 case CTSF_USUBTABLEPCT_TOTALN:
2682 case CTSF_ULAYERPCT_TOTALN:
2683 case CTSF_ULAYERROWPCT_TOTALN:
2684 case CTSF_ULAYERCOLPCT_TOTALN:
2700 case CTSF_ROWPCT_SUM:
2701 case CTSF_COLPCT_SUM:
2702 case CTSF_TABLEPCT_SUM:
2703 case CTSF_SUBTABLEPCT_SUM:
2704 case CTSF_LAYERPCT_SUM:
2705 case CTSF_LAYERROWPCT_SUM:
2706 case CTSF_LAYERCOLPCT_SUM:
2711 case CTSF_UVARIANCE:
2712 case CTSF_UROWPCT_SUM:
2713 case CTSF_UCOLPCT_SUM:
2714 case CTSF_UTABLEPCT_SUM:
2715 case CTSF_USUBTABLEPCT_SUM:
2716 case CTSF_ULAYERPCT_SUM:
2717 case CTSF_ULAYERROWPCT_SUM:
2718 case CTSF_ULAYERCOLPCT_SUM:
2719 moments1_destroy (s->moments);
2728 casewriter_destroy (s->writer);
2734 ctables_summary_add (union ctables_summary *s,
2735 const struct ctables_summary_spec *ss,
2736 const struct variable *var, const union value *value,
2737 bool is_scale, bool is_scale_missing,
2738 bool is_missing, bool excluded_missing,
2739 double d_weight, double e_weight)
2741 /* To determine whether a case is included in a given table for a particular
2742 kind of summary, consider the following charts for each variable in the
2743 table. Only if "yes" appears for every variable for the summary is the
2746 Categorical variables: VALIDN COUNT TOTALN
2747 Valid values in included categories yes yes yes
2748 Missing values in included categories --- yes yes
2749 Missing values in excluded categories --- --- yes
2750 Valid values in excluded categories --- --- ---
2752 Scale variables: VALIDN COUNT TOTALN
2753 Valid value yes yes yes
2754 Missing value --- yes yes
2756 Missing values include both user- and system-missing. (The system-missing
2757 value is always in an excluded category.)
2759 switch (ss->function)
2762 case CTSF_ROWPCT_TOTALN:
2763 case CTSF_COLPCT_TOTALN:
2764 case CTSF_TABLEPCT_TOTALN:
2765 case CTSF_SUBTABLEPCT_TOTALN:
2766 case CTSF_LAYERPCT_TOTALN:
2767 case CTSF_LAYERROWPCT_TOTALN:
2768 case CTSF_LAYERCOLPCT_TOTALN:
2769 s->count += d_weight;
2773 case CTSF_UROWPCT_TOTALN:
2774 case CTSF_UCOLPCT_TOTALN:
2775 case CTSF_UTABLEPCT_TOTALN:
2776 case CTSF_USUBTABLEPCT_TOTALN:
2777 case CTSF_ULAYERPCT_TOTALN:
2778 case CTSF_ULAYERROWPCT_TOTALN:
2779 case CTSF_ULAYERCOLPCT_TOTALN:
2784 case CTSF_ROWPCT_COUNT:
2785 case CTSF_COLPCT_COUNT:
2786 case CTSF_TABLEPCT_COUNT:
2787 case CTSF_SUBTABLEPCT_COUNT:
2788 case CTSF_LAYERPCT_COUNT:
2789 case CTSF_LAYERROWPCT_COUNT:
2790 case CTSF_LAYERCOLPCT_COUNT:
2791 if (is_scale || !excluded_missing)
2792 s->count += d_weight;
2796 case CTSF_UROWPCT_COUNT:
2797 case CTSF_UCOLPCT_COUNT:
2798 case CTSF_UTABLEPCT_COUNT:
2799 case CTSF_USUBTABLEPCT_COUNT:
2800 case CTSF_ULAYERPCT_COUNT:
2801 case CTSF_ULAYERROWPCT_COUNT:
2802 case CTSF_ULAYERCOLPCT_COUNT:
2803 if (is_scale || !excluded_missing)
2808 case CTSF_ROWPCT_VALIDN:
2809 case CTSF_COLPCT_VALIDN:
2810 case CTSF_TABLEPCT_VALIDN:
2811 case CTSF_SUBTABLEPCT_VALIDN:
2812 case CTSF_LAYERPCT_VALIDN:
2813 case CTSF_LAYERROWPCT_VALIDN:
2814 case CTSF_LAYERCOLPCT_VALIDN:
2818 s->count += d_weight;
2822 case CTSF_UROWPCT_VALIDN:
2823 case CTSF_UCOLPCT_VALIDN:
2824 case CTSF_UTABLEPCT_VALIDN:
2825 case CTSF_USUBTABLEPCT_VALIDN:
2826 case CTSF_ULAYERPCT_VALIDN:
2827 case CTSF_ULAYERROWPCT_VALIDN:
2828 case CTSF_ULAYERCOLPCT_VALIDN:
2837 s->count += d_weight;
2846 if (is_scale || !excluded_missing)
2847 s->count += e_weight;
2854 s->count += e_weight;
2858 s->count += e_weight;
2864 if (!is_scale_missing)
2866 assert (!var_is_alpha (var)); /* XXX? */
2867 if (s->min == SYSMIS || value->f < s->min)
2869 if (s->max == SYSMIS || value->f > s->max)
2879 case CTSF_ROWPCT_SUM:
2880 case CTSF_COLPCT_SUM:
2881 case CTSF_TABLEPCT_SUM:
2882 case CTSF_SUBTABLEPCT_SUM:
2883 case CTSF_LAYERPCT_SUM:
2884 case CTSF_LAYERROWPCT_SUM:
2885 case CTSF_LAYERCOLPCT_SUM:
2886 if (!is_scale_missing)
2887 moments1_add (s->moments, value->f, e_weight);
2894 case CTSF_UVARIANCE:
2895 case CTSF_UROWPCT_SUM:
2896 case CTSF_UCOLPCT_SUM:
2897 case CTSF_UTABLEPCT_SUM:
2898 case CTSF_USUBTABLEPCT_SUM:
2899 case CTSF_ULAYERPCT_SUM:
2900 case CTSF_ULAYERROWPCT_SUM:
2901 case CTSF_ULAYERCOLPCT_SUM:
2902 if (!is_scale_missing)
2903 moments1_add (s->moments, value->f, 1.0);
2909 d_weight = e_weight = 1.0;
2914 if (!is_scale_missing)
2916 s->ovalid += e_weight;
2918 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2919 *case_num_rw_idx (c, 0) = value->f;
2920 *case_num_rw_idx (c, 1) = e_weight;
2921 casewriter_write (s->writer, c);
2927 static enum ctables_domain_type
2928 ctables_function_domain (enum ctables_summary_function function)
2958 case CTSF_UVARIANCE:
2964 case CTSF_COLPCT_COUNT:
2965 case CTSF_COLPCT_SUM:
2966 case CTSF_COLPCT_TOTALN:
2967 case CTSF_COLPCT_VALIDN:
2968 case CTSF_UCOLPCT_COUNT:
2969 case CTSF_UCOLPCT_SUM:
2970 case CTSF_UCOLPCT_TOTALN:
2971 case CTSF_UCOLPCT_VALIDN:
2974 case CTSF_LAYERCOLPCT_COUNT:
2975 case CTSF_LAYERCOLPCT_SUM:
2976 case CTSF_LAYERCOLPCT_TOTALN:
2977 case CTSF_LAYERCOLPCT_VALIDN:
2978 case CTSF_ULAYERCOLPCT_COUNT:
2979 case CTSF_ULAYERCOLPCT_SUM:
2980 case CTSF_ULAYERCOLPCT_TOTALN:
2981 case CTSF_ULAYERCOLPCT_VALIDN:
2982 return CTDT_LAYERCOL;
2984 case CTSF_LAYERPCT_COUNT:
2985 case CTSF_LAYERPCT_SUM:
2986 case CTSF_LAYERPCT_TOTALN:
2987 case CTSF_LAYERPCT_VALIDN:
2988 case CTSF_ULAYERPCT_COUNT:
2989 case CTSF_ULAYERPCT_SUM:
2990 case CTSF_ULAYERPCT_TOTALN:
2991 case CTSF_ULAYERPCT_VALIDN:
2994 case CTSF_LAYERROWPCT_COUNT:
2995 case CTSF_LAYERROWPCT_SUM:
2996 case CTSF_LAYERROWPCT_TOTALN:
2997 case CTSF_LAYERROWPCT_VALIDN:
2998 case CTSF_ULAYERROWPCT_COUNT:
2999 case CTSF_ULAYERROWPCT_SUM:
3000 case CTSF_ULAYERROWPCT_TOTALN:
3001 case CTSF_ULAYERROWPCT_VALIDN:
3002 return CTDT_LAYERROW;
3004 case CTSF_ROWPCT_COUNT:
3005 case CTSF_ROWPCT_SUM:
3006 case CTSF_ROWPCT_TOTALN:
3007 case CTSF_ROWPCT_VALIDN:
3008 case CTSF_UROWPCT_COUNT:
3009 case CTSF_UROWPCT_SUM:
3010 case CTSF_UROWPCT_TOTALN:
3011 case CTSF_UROWPCT_VALIDN:
3014 case CTSF_SUBTABLEPCT_COUNT:
3015 case CTSF_SUBTABLEPCT_SUM:
3016 case CTSF_SUBTABLEPCT_TOTALN:
3017 case CTSF_SUBTABLEPCT_VALIDN:
3018 case CTSF_USUBTABLEPCT_COUNT:
3019 case CTSF_USUBTABLEPCT_SUM:
3020 case CTSF_USUBTABLEPCT_TOTALN:
3021 case CTSF_USUBTABLEPCT_VALIDN:
3022 return CTDT_SUBTABLE;
3024 case CTSF_TABLEPCT_COUNT:
3025 case CTSF_TABLEPCT_SUM:
3026 case CTSF_TABLEPCT_TOTALN:
3027 case CTSF_TABLEPCT_VALIDN:
3028 case CTSF_UTABLEPCT_COUNT:
3029 case CTSF_UTABLEPCT_SUM:
3030 case CTSF_UTABLEPCT_TOTALN:
3031 case CTSF_UTABLEPCT_VALIDN:
3038 static enum ctables_domain_type
3039 ctables_function_is_pctsum (enum ctables_summary_function function)
3069 case CTSF_UVARIANCE:
3073 case CTSF_COLPCT_COUNT:
3074 case CTSF_COLPCT_TOTALN:
3075 case CTSF_COLPCT_VALIDN:
3076 case CTSF_UCOLPCT_COUNT:
3077 case CTSF_UCOLPCT_TOTALN:
3078 case CTSF_UCOLPCT_VALIDN:
3079 case CTSF_LAYERCOLPCT_COUNT:
3080 case CTSF_LAYERCOLPCT_TOTALN:
3081 case CTSF_LAYERCOLPCT_VALIDN:
3082 case CTSF_ULAYERCOLPCT_COUNT:
3083 case CTSF_ULAYERCOLPCT_TOTALN:
3084 case CTSF_ULAYERCOLPCT_VALIDN:
3085 case CTSF_LAYERPCT_COUNT:
3086 case CTSF_LAYERPCT_TOTALN:
3087 case CTSF_LAYERPCT_VALIDN:
3088 case CTSF_ULAYERPCT_COUNT:
3089 case CTSF_ULAYERPCT_TOTALN:
3090 case CTSF_ULAYERPCT_VALIDN:
3091 case CTSF_LAYERROWPCT_COUNT:
3092 case CTSF_LAYERROWPCT_TOTALN:
3093 case CTSF_LAYERROWPCT_VALIDN:
3094 case CTSF_ULAYERROWPCT_COUNT:
3095 case CTSF_ULAYERROWPCT_TOTALN:
3096 case CTSF_ULAYERROWPCT_VALIDN:
3097 case CTSF_ROWPCT_COUNT:
3098 case CTSF_ROWPCT_TOTALN:
3099 case CTSF_ROWPCT_VALIDN:
3100 case CTSF_UROWPCT_COUNT:
3101 case CTSF_UROWPCT_TOTALN:
3102 case CTSF_UROWPCT_VALIDN:
3103 case CTSF_SUBTABLEPCT_COUNT:
3104 case CTSF_SUBTABLEPCT_TOTALN:
3105 case CTSF_SUBTABLEPCT_VALIDN:
3106 case CTSF_USUBTABLEPCT_COUNT:
3107 case CTSF_USUBTABLEPCT_TOTALN:
3108 case CTSF_USUBTABLEPCT_VALIDN:
3109 case CTSF_TABLEPCT_COUNT:
3110 case CTSF_TABLEPCT_TOTALN:
3111 case CTSF_TABLEPCT_VALIDN:
3112 case CTSF_UTABLEPCT_COUNT:
3113 case CTSF_UTABLEPCT_TOTALN:
3114 case CTSF_UTABLEPCT_VALIDN:
3117 case CTSF_COLPCT_SUM:
3118 case CTSF_UCOLPCT_SUM:
3119 case CTSF_LAYERCOLPCT_SUM:
3120 case CTSF_ULAYERCOLPCT_SUM:
3121 case CTSF_LAYERPCT_SUM:
3122 case CTSF_ULAYERPCT_SUM:
3123 case CTSF_LAYERROWPCT_SUM:
3124 case CTSF_ULAYERROWPCT_SUM:
3125 case CTSF_ROWPCT_SUM:
3126 case CTSF_UROWPCT_SUM:
3127 case CTSF_SUBTABLEPCT_SUM:
3128 case CTSF_USUBTABLEPCT_SUM:
3129 case CTSF_TABLEPCT_SUM:
3130 case CTSF_UTABLEPCT_SUM:
3138 ctables_summary_value (const struct ctables_cell *cell,
3139 union ctables_summary *s,
3140 const struct ctables_summary_spec *ss)
3142 switch (ss->function)
3149 case CTSF_ROWPCT_COUNT:
3150 case CTSF_COLPCT_COUNT:
3151 case CTSF_TABLEPCT_COUNT:
3152 case CTSF_SUBTABLEPCT_COUNT:
3153 case CTSF_LAYERPCT_COUNT:
3154 case CTSF_LAYERROWPCT_COUNT:
3155 case CTSF_LAYERCOLPCT_COUNT:
3157 enum ctables_domain_type d = ctables_function_domain (ss->function);
3158 return (cell->domains[d]->e_count
3159 ? s->count / cell->domains[d]->e_count * 100
3163 case CTSF_UROWPCT_COUNT:
3164 case CTSF_UCOLPCT_COUNT:
3165 case CTSF_UTABLEPCT_COUNT:
3166 case CTSF_USUBTABLEPCT_COUNT:
3167 case CTSF_ULAYERPCT_COUNT:
3168 case CTSF_ULAYERROWPCT_COUNT:
3169 case CTSF_ULAYERCOLPCT_COUNT:
3171 enum ctables_domain_type d = ctables_function_domain (ss->function);
3172 return (cell->domains[d]->u_count
3173 ? s->count / cell->domains[d]->u_count * 100
3177 case CTSF_ROWPCT_VALIDN:
3178 case CTSF_COLPCT_VALIDN:
3179 case CTSF_TABLEPCT_VALIDN:
3180 case CTSF_SUBTABLEPCT_VALIDN:
3181 case CTSF_LAYERPCT_VALIDN:
3182 case CTSF_LAYERROWPCT_VALIDN:
3183 case CTSF_LAYERCOLPCT_VALIDN:
3185 enum ctables_domain_type d = ctables_function_domain (ss->function);
3186 return (cell->domains[d]->e_valid
3187 ? s->count / cell->domains[d]->e_valid * 100
3191 case CTSF_UROWPCT_VALIDN:
3192 case CTSF_UCOLPCT_VALIDN:
3193 case CTSF_UTABLEPCT_VALIDN:
3194 case CTSF_USUBTABLEPCT_VALIDN:
3195 case CTSF_ULAYERPCT_VALIDN:
3196 case CTSF_ULAYERROWPCT_VALIDN:
3197 case CTSF_ULAYERCOLPCT_VALIDN:
3199 enum ctables_domain_type d = ctables_function_domain (ss->function);
3200 return (cell->domains[d]->u_valid
3201 ? s->count / cell->domains[d]->u_valid * 100
3205 case CTSF_ROWPCT_TOTALN:
3206 case CTSF_COLPCT_TOTALN:
3207 case CTSF_TABLEPCT_TOTALN:
3208 case CTSF_SUBTABLEPCT_TOTALN:
3209 case CTSF_LAYERPCT_TOTALN:
3210 case CTSF_LAYERROWPCT_TOTALN:
3211 case CTSF_LAYERCOLPCT_TOTALN:
3213 enum ctables_domain_type d = ctables_function_domain (ss->function);
3214 return (cell->domains[d]->e_total
3215 ? s->count / cell->domains[d]->e_total * 100
3219 case CTSF_UROWPCT_TOTALN:
3220 case CTSF_UCOLPCT_TOTALN:
3221 case CTSF_UTABLEPCT_TOTALN:
3222 case CTSF_USUBTABLEPCT_TOTALN:
3223 case CTSF_ULAYERPCT_TOTALN:
3224 case CTSF_ULAYERROWPCT_TOTALN:
3225 case CTSF_ULAYERCOLPCT_TOTALN:
3227 enum ctables_domain_type d = ctables_function_domain (ss->function);
3228 return (cell->domains[d]->u_total
3229 ? s->count / cell->domains[d]->u_total * 100
3250 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3256 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3263 double weight, variance;
3264 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3265 return calc_semean (variance, weight);
3272 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3273 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3279 double weight, mean;
3280 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3281 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3285 case CTSF_UVARIANCE:
3288 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3292 case CTSF_ROWPCT_SUM:
3293 case CTSF_COLPCT_SUM:
3294 case CTSF_TABLEPCT_SUM:
3295 case CTSF_SUBTABLEPCT_SUM:
3296 case CTSF_LAYERPCT_SUM:
3297 case CTSF_LAYERROWPCT_SUM:
3298 case CTSF_LAYERCOLPCT_SUM:
3300 double weight, mean;
3301 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3302 if (weight == SYSMIS || mean == SYSMIS)
3304 enum ctables_domain_type d = ctables_function_domain (ss->function);
3305 double num = weight * mean;
3306 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3307 return denom != 0 ? num / denom * 100 : SYSMIS;
3309 case CTSF_UROWPCT_SUM:
3310 case CTSF_UCOLPCT_SUM:
3311 case CTSF_UTABLEPCT_SUM:
3312 case CTSF_USUBTABLEPCT_SUM:
3313 case CTSF_ULAYERPCT_SUM:
3314 case CTSF_ULAYERROWPCT_SUM:
3315 case CTSF_ULAYERCOLPCT_SUM:
3317 double weight, mean;
3318 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3319 if (weight == SYSMIS || mean == SYSMIS)
3321 enum ctables_domain_type d = ctables_function_domain (ss->function);
3322 double num = weight * mean;
3323 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3324 return denom != 0 ? num / denom * 100 : SYSMIS;
3333 struct casereader *reader = casewriter_make_reader (s->writer);
3336 struct percentile *ptile = percentile_create (
3337 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3338 struct order_stats *os = &ptile->parent;
3339 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3340 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3341 statistic_destroy (&ptile->parent.parent);
3349 struct casereader *reader = casewriter_make_reader (s->writer);
3352 struct mode *mode = mode_create ();
3353 struct order_stats *os = &mode->parent;
3354 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3355 s->ovalue = mode->mode;
3356 statistic_destroy (&mode->parent.parent);
3364 struct ctables_cell_sort_aux
3366 const struct ctables_nest *nest;
3367 enum pivot_axis_type a;
3371 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3373 const struct ctables_cell_sort_aux *aux = aux_;
3374 struct ctables_cell *const *ap = a_;
3375 struct ctables_cell *const *bp = b_;
3376 const struct ctables_cell *a = *ap;
3377 const struct ctables_cell *b = *bp;
3379 const struct ctables_nest *nest = aux->nest;
3380 for (size_t i = 0; i < nest->n; i++)
3381 if (i != nest->scale_idx)
3383 const struct variable *var = nest->vars[i];
3384 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3385 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3386 if (a_cv->category != b_cv->category)
3387 return a_cv->category > b_cv->category ? 1 : -1;
3389 const union value *a_val = &a_cv->value;
3390 const union value *b_val = &b_cv->value;
3391 switch (a_cv->category->type)
3397 case CCT_POSTCOMPUTE:
3398 case CCT_EXCLUDED_MISSING:
3399 /* Must be equal. */
3407 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3415 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3417 return a_cv->category->sort_ascending ? cmp : -cmp;
3423 const char *a_label = var_lookup_value_label (var, a_val);
3424 const char *b_label = var_lookup_value_label (var, b_val);
3426 ? (b_label ? strcmp (a_label, b_label) : 1)
3427 : (b_label ? -1 : value_compare_3way (
3428 a_val, b_val, var_get_width (var))));
3430 return a_cv->category->sort_ascending ? cmp : -cmp;
3444 For each ctables_table:
3445 For each combination of row vars:
3446 For each combination of column vars:
3447 For each combination of layer vars:
3449 Make a table of row values:
3450 Sort entries by row values
3451 Assign a 0-based index to each actual value
3452 Construct a dimension
3453 Make a table of column values
3454 Make a table of layer values
3456 Fill the table entry using the indexes from before.
3459 static struct ctables_domain *
3460 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3461 enum ctables_domain_type domain)
3464 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3466 const struct ctables_nest *nest = s->nests[a];
3467 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3469 size_t v_idx = nest->domains[domain][i];
3470 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3471 hash = hash_pointer (cv->category, hash);
3472 if (cv->category->type != CCT_TOTAL
3473 && cv->category->type != CCT_SUBTOTAL
3474 && cv->category->type != CCT_POSTCOMPUTE)
3475 hash = value_hash (&cv->value,
3476 var_get_width (nest->vars[v_idx]), hash);
3480 struct ctables_domain *d;
3481 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3483 const struct ctables_cell *df = d->example;
3484 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3486 const struct ctables_nest *nest = s->nests[a];
3487 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3489 size_t v_idx = nest->domains[domain][i];
3490 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3491 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3492 if (cv1->category != cv2->category
3493 || (cv1->category->type != CCT_TOTAL
3494 && cv1->category->type != CCT_SUBTOTAL
3495 && cv1->category->type != CCT_POSTCOMPUTE
3496 && !value_equal (&cv1->value, &cv2->value,
3497 var_get_width (nest->vars[v_idx]))))
3506 struct ctables_sum *sums = (s->table->n_sum_vars
3507 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3510 d = xmalloc (sizeof *d);
3511 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3512 hmap_insert (&s->domains[domain], &d->node, hash);
3516 static struct substring
3517 rtrim_value (const union value *v, const struct variable *var)
3519 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3520 var_get_width (var));
3521 ss_rtrim (&s, ss_cstr (" "));
3526 in_string_range (const union value *v, const struct variable *var,
3527 const struct substring *srange)
3529 struct substring s = rtrim_value (v, var);
3530 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3531 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3534 static const struct ctables_category *
3535 ctables_categories_match (const struct ctables_categories *c,
3536 const union value *v, const struct variable *var)
3538 if (var_is_numeric (var) && v->f == SYSMIS)
3541 const struct ctables_category *othernm = NULL;
3542 for (size_t i = c->n_cats; i-- > 0; )
3544 const struct ctables_category *cat = &c->cats[i];
3548 if (cat->number == v->f)
3553 if (ss_equals (cat->string, rtrim_value (v, var)))
3558 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3559 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3564 if (in_string_range (v, var, cat->srange))
3569 if (var_is_value_missing (var, v))
3573 case CCT_POSTCOMPUTE:
3588 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3591 case CCT_EXCLUDED_MISSING:
3596 return var_is_value_missing (var, v) ? NULL : othernm;
3599 static const struct ctables_category *
3600 ctables_categories_total (const struct ctables_categories *c)
3602 const struct ctables_category *first = &c->cats[0];
3603 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3604 return (first->type == CCT_TOTAL ? first
3605 : last->type == CCT_TOTAL ? last
3609 static struct ctables_cell *
3610 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3611 const struct ctables_category *cats[PIVOT_N_AXES][10])
3614 enum ctables_summary_variant sv = CSV_CELL;
3615 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3617 const struct ctables_nest *nest = s->nests[a];
3618 for (size_t i = 0; i < nest->n; i++)
3619 if (i != nest->scale_idx)
3621 hash = hash_pointer (cats[a][i], hash);
3622 if (cats[a][i]->type != CCT_TOTAL
3623 && cats[a][i]->type != CCT_SUBTOTAL
3624 && cats[a][i]->type != CCT_POSTCOMPUTE)
3625 hash = value_hash (case_data (c, nest->vars[i]),
3626 var_get_width (nest->vars[i]), hash);
3632 struct ctables_cell *cell;
3633 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3635 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3637 const struct ctables_nest *nest = s->nests[a];
3638 for (size_t i = 0; i < nest->n; i++)
3639 if (i != nest->scale_idx
3640 && (cats[a][i] != cell->axes[a].cvs[i].category
3641 || (cats[a][i]->type != CCT_TOTAL
3642 && cats[a][i]->type != CCT_SUBTOTAL
3643 && cats[a][i]->type != CCT_POSTCOMPUTE
3644 && !value_equal (case_data (c, nest->vars[i]),
3645 &cell->axes[a].cvs[i].value,
3646 var_get_width (nest->vars[i])))))
3655 cell = xmalloc (sizeof *cell);
3658 cell->omit_domains = 0;
3659 cell->postcompute = false;
3660 //struct string name = DS_EMPTY_INITIALIZER;
3661 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3663 const struct ctables_nest *nest = s->nests[a];
3664 cell->axes[a].cvs = (nest->n
3665 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3667 for (size_t i = 0; i < nest->n; i++)
3669 const struct ctables_category *cat = cats[a][i];
3670 const struct variable *var = nest->vars[i];
3671 const union value *value = case_data (c, var);
3672 if (i != nest->scale_idx)
3674 const struct ctables_category *subtotal = cat->subtotal;
3675 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3678 if (cat->type == CCT_TOTAL
3679 || cat->type == CCT_SUBTOTAL
3680 || cat->type == CCT_POSTCOMPUTE)
3682 /* XXX these should be more encompassing I think.*/
3686 case PIVOT_AXIS_COLUMN:
3687 cell->omit_domains |= ((1u << CTDT_TABLE) |
3688 (1u << CTDT_LAYER) |
3689 (1u << CTDT_LAYERCOL) |
3690 (1u << CTDT_SUBTABLE) |
3693 case PIVOT_AXIS_ROW:
3694 cell->omit_domains |= ((1u << CTDT_TABLE) |
3695 (1u << CTDT_LAYER) |
3696 (1u << CTDT_LAYERROW) |
3697 (1u << CTDT_SUBTABLE) |
3700 case PIVOT_AXIS_LAYER:
3701 cell->omit_domains |= ((1u << CTDT_TABLE) |
3702 (1u << CTDT_LAYER));
3706 if (cat->type == CCT_POSTCOMPUTE)
3707 cell->postcompute = true;
3710 cell->axes[a].cvs[i].category = cat;
3711 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3714 if (i != nest->scale_idx)
3716 if (!ds_is_empty (&name))
3717 ds_put_cstr (&name, ", ");
3718 char *value_s = data_out (value, var_get_encoding (var),
3719 var_get_print_format (var),
3720 settings_get_fmt_settings ());
3721 if (cat->type == CCT_TOTAL
3722 || cat->type == CCT_SUBTOTAL
3723 || cat->type == CCT_POSTCOMPUTE)
3724 ds_put_format (&name, "%s=total", var_get_name (var));
3726 ds_put_format (&name, "%s=%s", var_get_name (var),
3727 value_s + strspn (value_s, " "));
3733 //cell->name = ds_steal_cstr (&name);
3735 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3736 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3737 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3738 for (size_t i = 0; i < specs->n; i++)
3739 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3740 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3741 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3742 hmap_insert (&s->cells, &cell->node, hash);
3747 is_scale_missing (const struct ctables_summary_spec_set *specs,
3748 const struct ccase *c)
3750 if (!specs->is_scale)
3753 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3756 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3758 const struct variable *var = specs->listwise_vars[i];
3759 if (var_is_num_missing (var, case_num (c, var)))
3767 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3768 const struct ctables_category *cats[PIVOT_N_AXES][10],
3769 bool is_missing, bool excluded_missing,
3770 double d_weight, double e_weight)
3772 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3773 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3775 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3777 bool scale_missing = is_scale_missing (specs, c);
3778 for (size_t i = 0; i < specs->n; i++)
3779 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3780 specs->var, case_data (c, specs->var), specs->is_scale,
3781 scale_missing, is_missing, excluded_missing,
3782 d_weight, e_weight);
3783 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3784 if (!(cell->omit_domains && (1u << dt)))
3786 struct ctables_domain *d = cell->domains[dt];
3787 d->d_total += d_weight;
3788 d->e_total += e_weight;
3790 if (!excluded_missing)
3792 d->d_count += d_weight;
3793 d->e_count += e_weight;
3798 d->d_valid += d_weight;
3799 d->e_valid += e_weight;
3802 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3804 /* XXX listwise_missing??? */
3805 const struct variable *var = s->table->sum_vars[i];
3806 double addend = case_num (c, var);
3807 if (!var_is_num_missing (var, addend))
3809 struct ctables_sum *sum = &d->sums[i];
3810 sum->e_sum += addend * e_weight;
3811 sum->u_sum += addend;
3819 recurse_totals (struct ctables_section *s, const struct ccase *c,
3820 const struct ctables_category *cats[PIVOT_N_AXES][10],
3821 bool is_missing, bool excluded_missing,
3822 double d_weight, double e_weight,
3823 enum pivot_axis_type start_axis, size_t start_nest)
3825 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3827 const struct ctables_nest *nest = s->nests[a];
3828 for (size_t i = start_nest; i < nest->n; i++)
3830 if (i == nest->scale_idx)
3833 const struct variable *var = nest->vars[i];
3835 const struct ctables_category *total = ctables_categories_total (
3836 s->table->categories[var_get_dict_index (var)]);
3839 const struct ctables_category *save = cats[a][i];
3841 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3842 d_weight, e_weight);
3843 recurse_totals (s, c, cats, is_missing, excluded_missing,
3844 d_weight, e_weight, a, i + 1);
3853 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3854 const struct ctables_category *cats[PIVOT_N_AXES][10],
3855 bool is_missing, bool excluded_missing,
3856 double d_weight, double e_weight,
3857 enum pivot_axis_type start_axis, size_t start_nest)
3859 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3861 const struct ctables_nest *nest = s->nests[a];
3862 for (size_t i = start_nest; i < nest->n; i++)
3864 if (i == nest->scale_idx)
3867 const struct ctables_category *save = cats[a][i];
3870 cats[a][i] = save->subtotal;
3871 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3872 d_weight, e_weight);
3873 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3874 d_weight, e_weight, a, i + 1);
3883 ctables_add_occurrence (const struct variable *var,
3884 const union value *value,
3885 struct hmap *occurrences)
3887 int width = var_get_width (var);
3888 unsigned int hash = value_hash (value, width, 0);
3890 struct ctables_occurrence *o;
3891 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3893 if (value_equal (value, &o->value, width))
3896 o = xmalloc (sizeof *o);
3897 value_clone (&o->value, value, width);
3898 hmap_insert (occurrences, &o->node, hash);
3902 ctables_cell_insert (struct ctables_section *s,
3903 const struct ccase *c,
3904 double d_weight, double e_weight)
3906 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3908 /* Does at least one categorical variable have a missing value in an included
3909 or excluded category? */
3910 bool is_missing = false;
3912 /* Does at least one categorical variable have a missing value in an excluded
3914 bool excluded_missing = false;
3916 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3918 const struct ctables_nest *nest = s->nests[a];
3919 for (size_t i = 0; i < nest->n; i++)
3921 if (i == nest->scale_idx)
3924 const struct variable *var = nest->vars[i];
3925 const union value *value = case_data (c, var);
3927 bool var_missing = var_is_value_missing (var, value) != 0;
3931 cats[a][i] = ctables_categories_match (
3932 s->table->categories[var_get_dict_index (var)], value, var);
3938 static const struct ctables_category cct_excluded_missing = {
3939 .type = CCT_EXCLUDED_MISSING,
3942 cats[a][i] = &cct_excluded_missing;
3943 excluded_missing = true;
3948 if (!excluded_missing)
3949 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3951 const struct ctables_nest *nest = s->nests[a];
3952 for (size_t i = 0; i < nest->n; i++)
3953 if (i != nest->scale_idx)
3955 const struct variable *var = nest->vars[i];
3956 const union value *value = case_data (c, var);
3957 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3961 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3962 d_weight, e_weight);
3964 //if (!excluded_missing)
3966 recurse_totals (s, c, cats, is_missing, excluded_missing,
3967 d_weight, e_weight, 0, 0);
3968 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3969 d_weight, e_weight, 0, 0);
3975 const struct ctables_summary_spec_set *set;
3980 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3982 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3983 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3984 if (as->function != bs->function)
3985 return as->function > bs->function ? 1 : -1;
3986 else if (as->percentile != bs->percentile)
3987 return as->percentile < bs->percentile ? 1 : -1;
3989 const char *as_label = as->label ? as->label : "";
3990 const char *bs_label = bs->label ? bs->label : "";
3991 return strcmp (as_label, bs_label);
3994 static struct pivot_value *
3995 ctables_category_create_label__ (const struct ctables_category *cat,
3996 const struct variable *var,
3997 const union value *value)
3999 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
4000 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
4001 : pivot_value_new_var_value (var, value));
4004 static struct pivot_value *
4005 ctables_postcompute_label (const struct ctables_categories *cats,
4006 const struct ctables_category *cat,
4007 const struct variable *var,
4008 const union value *value)
4010 struct substring in = ss_cstr (cat->pc->label);
4011 struct substring target = ss_cstr (")LABEL[");
4013 struct string out = DS_EMPTY_INITIALIZER;
4016 size_t chunk = ss_find_substring (in, target);
4017 if (chunk == SIZE_MAX)
4019 if (ds_is_empty (&out))
4020 return pivot_value_new_user_text (in.string, in.length);
4023 ds_put_substring (&out, in);
4024 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
4028 ds_put_substring (&out, ss_head (in, chunk));
4029 ss_advance (&in, chunk + target.length);
4031 struct substring idx_s;
4032 if (!ss_get_until (&in, ']', &idx_s))
4035 long int idx = strtol (idx_s.string, &tail, 10);
4036 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
4039 struct ctables_category *cat2 = &cats->cats[idx - 1];
4040 struct pivot_value *label2
4041 = ctables_category_create_label__ (cat2, var, value);
4042 char *label2_s = pivot_value_to_string_defaults (label2);
4043 ds_put_cstr (&out, label2_s);
4045 pivot_value_destroy (label2);
4050 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
4053 static struct pivot_value *
4054 ctables_category_create_label (const struct ctables_categories *cats,
4055 const struct ctables_category *cat,
4056 const struct variable *var,
4057 const union value *value)
4059 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4060 ? ctables_postcompute_label (cats, cat, var, value)
4061 : ctables_category_create_label__ (cat, var, value));
4064 static struct ctables_value *
4065 ctables_value_find__ (struct ctables_table *t, const union value *value,
4066 int width, unsigned int hash)
4068 struct ctables_value *clv;
4069 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4070 hash, &t->clabels_values_map)
4071 if (value_equal (value, &clv->value, width))
4077 ctables_value_insert (struct ctables_table *t, const union value *value,
4080 unsigned int hash = value_hash (value, width, 0);
4081 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4084 clv = xmalloc (sizeof *clv);
4085 value_clone (&clv->value, value, width);
4086 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4090 static struct ctables_value *
4091 ctables_value_find (struct ctables_table *t,
4092 const union value *value, int width)
4094 return ctables_value_find__ (t, value, width,
4095 value_hash (value, width, 0));
4099 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4100 size_t ix[PIVOT_N_AXES])
4102 if (a < PIVOT_N_AXES)
4104 size_t limit = MAX (t->stacks[a].n, 1);
4105 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4106 ctables_table_add_section (t, a + 1, ix);
4110 struct ctables_section *s = &t->sections[t->n_sections++];
4111 *s = (struct ctables_section) {
4113 .cells = HMAP_INITIALIZER (s->cells),
4115 for (a = 0; a < PIVOT_N_AXES; a++)
4118 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4120 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4121 for (size_t i = 0; i < nest->n; i++)
4122 hmap_init (&s->occurrences[a][i]);
4124 for (size_t i = 0; i < N_CTDTS; i++)
4125 hmap_init (&s->domains[i]);
4130 ctpo_add (double a, double b)
4136 ctpo_sub (double a, double b)
4142 ctpo_mul (double a, double b)
4148 ctpo_div (double a, double b)
4150 return b ? a / b : SYSMIS;
4154 ctpo_pow (double a, double b)
4156 int save_errno = errno;
4158 double result = pow (a, b);
4166 ctpo_neg (double a, double b UNUSED)
4171 struct ctables_pcexpr_evaluate_ctx
4173 const struct ctables_cell *cell;
4174 const struct ctables_section *section;
4175 const struct ctables_categories *cats;
4176 enum pivot_axis_type pc_a;
4179 enum fmt_type parse_format;
4182 static double ctables_pcexpr_evaluate (
4183 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4186 ctables_pcexpr_evaluate_nonterminal (
4187 const struct ctables_pcexpr_evaluate_ctx *ctx,
4188 const struct ctables_pcexpr *e, size_t n_args,
4189 double evaluate (double, double))
4191 double args[2] = { 0, 0 };
4192 for (size_t i = 0; i < n_args; i++)
4194 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4195 if (!isfinite (args[i]) || args[i] == SYSMIS)
4198 return evaluate (args[0], args[1]);
4202 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4203 const struct ctables_cell_value *pc_cv)
4205 const struct ctables_section *s = ctx->section;
4208 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4210 const struct ctables_nest *nest = s->nests[a];
4211 for (size_t i = 0; i < nest->n; i++)
4212 if (i != nest->scale_idx)
4214 const struct ctables_cell_value *cv
4215 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4216 : &ctx->cell->axes[a].cvs[i]);
4217 hash = hash_pointer (cv->category, hash);
4218 if (cv->category->type != CCT_TOTAL
4219 && cv->category->type != CCT_SUBTOTAL
4220 && cv->category->type != CCT_POSTCOMPUTE)
4221 hash = value_hash (&cv->value,
4222 var_get_width (nest->vars[i]), hash);
4226 struct ctables_cell *tc;
4227 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4229 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4231 const struct ctables_nest *nest = s->nests[a];
4232 for (size_t i = 0; i < nest->n; i++)
4233 if (i != nest->scale_idx)
4235 const struct ctables_cell_value *p_cv
4236 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4237 : &ctx->cell->axes[a].cvs[i]);
4238 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4239 if (p_cv->category != t_cv->category
4240 || (p_cv->category->type != CCT_TOTAL
4241 && p_cv->category->type != CCT_SUBTOTAL
4242 && p_cv->category->type != CCT_POSTCOMPUTE
4243 && !value_equal (&p_cv->value,
4245 var_get_width (nest->vars[i]))))
4257 const struct ctables_table *t = s->table;
4258 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4259 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4260 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4261 &specs->specs[ctx->summary_idx]);
4265 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4266 const struct ctables_pcexpr *e)
4273 case CTPO_CAT_NRANGE:
4274 case CTPO_CAT_SRANGE:
4276 struct ctables_cell_value cv = {
4277 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4279 assert (cv.category != NULL);
4281 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4282 const struct ctables_occurrence *o;
4285 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4286 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4287 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4289 cv.value = o->value;
4290 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4295 case CTPO_CAT_NUMBER:
4296 case CTPO_CAT_MISSING:
4297 case CTPO_CAT_OTHERNM:
4298 case CTPO_CAT_SUBTOTAL:
4299 case CTPO_CAT_TOTAL:
4301 struct ctables_cell_value cv = {
4302 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4303 .value = { .f = e->number },
4305 assert (cv.category != NULL);
4306 return ctables_pcexpr_evaluate_category (ctx, &cv);
4309 case CTPO_CAT_STRING:
4311 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4313 if (width > e->string.length)
4315 s = xmalloc (width);
4316 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4318 struct ctables_cell_value cv = {
4319 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4320 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4322 assert (cv.category != NULL);
4323 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4329 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4332 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4335 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4338 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4341 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4344 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4350 static const struct ctables_category *
4351 ctables_cell_postcompute (const struct ctables_section *s,
4352 const struct ctables_cell *cell,
4353 enum pivot_axis_type *pc_a_p,
4356 assert (cell->postcompute);
4357 const struct ctables_category *pc_cat = NULL;
4358 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4359 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4361 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4362 if (cv->category->type == CCT_POSTCOMPUTE)
4366 /* Multiple postcomputes cross each other. The value is
4371 pc_cat = cv->category;
4375 *pc_a_idx_p = pc_a_idx;
4379 assert (pc_cat != NULL);
4384 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4385 const struct ctables_cell *cell,
4386 const struct ctables_summary_spec *ss,
4387 struct fmt_spec *format,
4388 bool *is_ctables_format,
4391 enum pivot_axis_type pc_a = 0;
4392 size_t pc_a_idx = 0;
4393 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4394 s, cell, &pc_a, &pc_a_idx);
4398 const struct ctables_postcompute *pc = pc_cat->pc;
4401 for (size_t i = 0; i < pc->specs->n; i++)
4403 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4404 if (ss->function == ss2->function
4405 && ss->percentile == ss2->percentile)
4407 *format = ss2->format;
4408 *is_ctables_format = ss2->is_ctables_format;
4414 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4415 const struct ctables_categories *cats = s->table->categories[
4416 var_get_dict_index (var)];
4417 struct ctables_pcexpr_evaluate_ctx ctx = {
4422 .pc_a_idx = pc_a_idx,
4423 .summary_idx = summary_idx,
4424 .parse_format = pc_cat->parse_format,
4426 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4430 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4432 struct pivot_table *pt = pivot_table_create__ (
4434 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4435 : pivot_value_new_text (N_("Custom Tables"))),
4438 pivot_table_set_caption (
4439 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4441 pivot_table_set_corner_text (
4442 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4444 bool summary_dimension = (t->summary_axis != t->slabels_axis
4445 || (!t->slabels_visible
4446 && t->summary_specs.n > 1));
4447 if (summary_dimension)
4449 struct pivot_dimension *d = pivot_dimension_create (
4450 pt, t->slabels_axis, N_("Statistics"));
4451 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4452 if (!t->slabels_visible)
4453 d->hide_all_labels = true;
4454 for (size_t i = 0; i < specs->n; i++)
4455 pivot_category_create_leaf (
4456 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4459 bool categories_dimension = t->clabels_example != NULL;
4460 if (categories_dimension)
4462 struct pivot_dimension *d = pivot_dimension_create (
4463 pt, t->label_axis[t->clabels_from_axis],
4464 t->clabels_from_axis == PIVOT_AXIS_ROW
4465 ? N_("Row Categories")
4466 : N_("Column Categories"));
4467 const struct variable *var = t->clabels_example;
4468 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4469 for (size_t i = 0; i < t->n_clabels_values; i++)
4471 const struct ctables_value *value = t->clabels_values[i];
4472 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4473 assert (cat != NULL);
4474 pivot_category_create_leaf (d->root, ctables_category_create_label (
4475 c, cat, t->clabels_example,
4480 pivot_table_set_look (pt, ct->look);
4481 struct pivot_dimension *d[PIVOT_N_AXES];
4482 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4484 static const char *names[] = {
4485 [PIVOT_AXIS_ROW] = N_("Rows"),
4486 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4487 [PIVOT_AXIS_LAYER] = N_("Layers"),
4489 d[a] = (t->axes[a] || a == t->summary_axis
4490 ? pivot_dimension_create (pt, a, names[a])
4495 assert (t->axes[a]);
4497 for (size_t i = 0; i < t->stacks[a].n; i++)
4499 struct ctables_nest *nest = &t->stacks[a].nests[i];
4500 struct ctables_section **sections = xnmalloc (t->n_sections,
4502 size_t n_sections = 0;
4504 size_t n_total_cells = 0;
4505 size_t max_depth = 0;
4506 for (size_t j = 0; j < t->n_sections; j++)
4507 if (t->sections[j].nests[a] == nest)
4509 struct ctables_section *s = &t->sections[j];
4510 sections[n_sections++] = s;
4511 n_total_cells += s->cells.count;
4513 size_t depth = s->nests[a]->n;
4514 max_depth = MAX (depth, max_depth);
4517 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4519 size_t n_sorted = 0;
4521 for (size_t j = 0; j < n_sections; j++)
4523 struct ctables_section *s = sections[j];
4525 struct ctables_cell *cell;
4526 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4528 sorted[n_sorted++] = cell;
4529 assert (n_sorted <= n_total_cells);
4532 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4533 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4536 for (size_t j = 0; j < n_sorted; j++)
4538 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4543 struct ctables_level
4545 enum ctables_level_type
4547 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4548 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4549 CTL_SUMMARY, /* Summary functions. */
4553 enum settings_value_show vlabel; /* CTL_VAR only. */
4556 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4557 size_t n_levels = 0;
4558 for (size_t k = 0; k < nest->n; k++)
4560 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4561 if (vlabel != CTVL_NONE)
4563 levels[n_levels++] = (struct ctables_level) {
4565 .vlabel = (enum settings_value_show) vlabel,
4570 if (nest->scale_idx != k
4571 && (k != nest->n - 1 || t->label_axis[a] == a))
4573 levels[n_levels++] = (struct ctables_level) {
4574 .type = CTL_CATEGORY,
4580 if (!summary_dimension && a == t->slabels_axis)
4582 levels[n_levels++] = (struct ctables_level) {
4583 .type = CTL_SUMMARY,
4584 .var_idx = SIZE_MAX,
4588 /* Pivot categories:
4590 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4591 - category for nest->vars[0], if nest->scale_idx != 0
4592 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4593 - category for nest->vars[1], if nest->scale_idx != 1
4595 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4596 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4597 - summary function, if 'a == t->slabels_axis && a ==
4600 Additional dimensions:
4602 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4604 - If 't->label_axis[b] == a' for some 'b != a', add a category
4609 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4611 for (size_t j = 0; j < n_sorted; j++)
4613 struct ctables_cell *cell = sorted[j];
4614 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4616 size_t n_common = 0;
4619 for (; n_common < n_levels; n_common++)
4621 const struct ctables_level *level = &levels[n_common];
4622 if (level->type == CTL_CATEGORY)
4624 size_t var_idx = level->var_idx;
4625 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4626 if (prev->axes[a].cvs[var_idx].category != c)
4628 else if (c->type != CCT_SUBTOTAL
4629 && c->type != CCT_TOTAL
4630 && c->type != CCT_POSTCOMPUTE
4631 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4632 &cell->axes[a].cvs[var_idx].value,
4633 var_get_type (nest->vars[var_idx])))
4639 for (size_t k = n_common; k < n_levels; k++)
4641 const struct ctables_level *level = &levels[k];
4642 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4643 if (level->type == CTL_SUMMARY)
4645 assert (k == n_levels - 1);
4647 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4648 for (size_t m = 0; m < specs->n; m++)
4650 int leaf = pivot_category_create_leaf (
4651 parent, ctables_summary_label (&specs->specs[m],
4659 const struct variable *var = nest->vars[level->var_idx];
4660 struct pivot_value *label;
4661 if (level->type == CTL_VAR)
4663 label = pivot_value_new_variable (var);
4664 label->variable.show = level->vlabel;
4666 else if (level->type == CTL_CATEGORY)
4668 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4669 label = ctables_category_create_label (
4670 t->categories[var_get_dict_index (var)],
4671 cv->category, var, &cv->value);
4676 if (k == n_levels - 1)
4677 prev_leaf = pivot_category_create_leaf (parent, label);
4679 groups[k] = pivot_category_create_group__ (parent, label);
4683 cell->axes[a].leaf = prev_leaf;
4692 for (size_t i = 0; i < t->n_sections; i++)
4694 struct ctables_section *s = &t->sections[i];
4696 struct ctables_cell *cell;
4697 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4702 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4703 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4704 for (size_t j = 0; j < specs->n; j++)
4707 size_t n_dindexes = 0;
4709 if (summary_dimension)
4710 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4712 if (categories_dimension)
4714 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4715 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4716 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4717 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4720 dindexes[n_dindexes++] = ctv->leaf;
4723 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4726 int leaf = cell->axes[a].leaf;
4727 if (a == t->summary_axis && !summary_dimension)
4729 dindexes[n_dindexes++] = leaf;
4732 const struct ctables_summary_spec *ss = &specs->specs[j];
4734 struct fmt_spec format = specs->specs[j].format;
4735 bool is_ctables_format = ss->is_ctables_format;
4736 double d = (cell->postcompute
4737 ? ctables_cell_calculate_postcompute (
4738 s, cell, ss, &format, &is_ctables_format, j)
4739 : ctables_summary_value (cell, &cell->summaries[j],
4742 struct pivot_value *value;
4743 if (ct->hide_threshold != 0
4744 && d < ct->hide_threshold
4745 && ctables_summary_function_is_count (ss->function))
4747 value = pivot_value_new_user_text_nocopy (
4748 xasprintf ("<%d", ct->hide_threshold));
4750 else if (d == 0 && ct->zero)
4751 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4752 else if (d == SYSMIS && ct->missing)
4753 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4754 else if (is_ctables_format)
4756 char *s = data_out_stretchy (&(union value) { .f = d },
4758 &ct->ctables_formats, NULL);
4759 value = pivot_value_new_user_text_nocopy (s);
4763 value = pivot_value_new_number (d);
4764 value->numeric.format = format;
4766 pivot_table_put (pt, dindexes, n_dindexes, value);
4771 pivot_table_submit (pt);
4775 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4777 enum pivot_axis_type label_pos = t->label_axis[a];
4781 t->clabels_from_axis = a;
4783 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4784 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4786 const struct ctables_stack *stack = &t->stacks[a];
4790 const struct ctables_nest *n0 = &stack->nests[0];
4793 assert (stack->n == 1);
4797 const struct variable *v0 = n0->vars[n0->n - 1];
4798 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4799 t->clabels_example = v0;
4801 for (size_t i = 0; i < c0->n_cats; i++)
4802 if (c0->cats[i].type == CCT_FUNCTION)
4804 msg (SE, _("%s=%s is not allowed with sorting based "
4805 "on a summary function."),
4806 subcommand_name, pos_name);
4809 if (n0->n - 1 == n0->scale_idx)
4811 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4812 "but %s is a scale variable."),
4813 subcommand_name, pos_name, var_get_name (v0));
4817 for (size_t i = 1; i < stack->n; i++)
4819 const struct ctables_nest *ni = &stack->nests[i];
4821 const struct variable *vi = ni->vars[ni->n - 1];
4822 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4824 if (ni->n - 1 == ni->scale_idx)
4826 msg (SE, _("%s=%s requires the variables to be moved to be "
4827 "categorical, but %s is a scale variable."),
4828 subcommand_name, pos_name, var_get_name (vi));
4831 if (var_get_width (v0) != var_get_width (vi))
4833 msg (SE, _("%s=%s requires the variables to be "
4834 "moved to have the same width, but %s has "
4835 "width %d and %s has width %d."),
4836 subcommand_name, pos_name,
4837 var_get_name (v0), var_get_width (v0),
4838 var_get_name (vi), var_get_width (vi));
4841 if (!val_labs_equal (var_get_value_labels (v0),
4842 var_get_value_labels (vi)))
4844 msg (SE, _("%s=%s requires the variables to be "
4845 "moved to have the same value labels, but %s "
4846 "and %s have different value labels."),
4847 subcommand_name, pos_name,
4848 var_get_name (v0), var_get_name (vi));
4851 if (!ctables_categories_equal (c0, ci))
4853 msg (SE, _("%s=%s requires the variables to be "
4854 "moved to have the same category "
4855 "specifications, but %s and %s have different "
4856 "category specifications."),
4857 subcommand_name, pos_name,
4858 var_get_name (v0), var_get_name (vi));
4867 add_sum_var (struct variable *var,
4868 struct variable ***sum_vars, size_t *n, size_t *allocated)
4870 for (size_t i = 0; i < *n; i++)
4871 if (var == (*sum_vars)[i])
4874 if (*n >= *allocated)
4875 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4876 (*sum_vars)[*n] = var;
4881 enumerate_sum_vars (const struct ctables_axis *a,
4882 struct variable ***sum_vars, size_t *n, size_t *allocated)
4890 for (size_t i = 0; i < N_CSVS; i++)
4891 for (size_t j = 0; j < a->specs[i].n; j++)
4893 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4894 if (ctables_function_is_pctsum (spec->function))
4895 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4901 for (size_t i = 0; i < 2; i++)
4902 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4908 ctables_prepare_table (struct ctables_table *t)
4910 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4913 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4915 for (size_t j = 0; j < t->stacks[a].n; j++)
4917 struct ctables_nest *nest = &t->stacks[a].nests[j];
4918 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4920 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4921 nest->n_domains[dt] = 0;
4923 for (size_t k = 0; k < nest->n; k++)
4925 if (k == nest->scale_idx)
4934 if (a != PIVOT_AXIS_LAYER)
4941 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4942 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4943 : a == PIVOT_AXIS_ROW)
4945 if (k == nest->n - 1
4946 || (nest->scale_idx == nest->n - 1
4947 && k == nest->n - 2))
4953 if (a == PIVOT_AXIS_COLUMN)
4958 if (a == PIVOT_AXIS_ROW)
4963 nest->domains[dt][nest->n_domains[dt]++] = k;
4970 struct ctables_nest *nest = xmalloc (sizeof *nest);
4971 *nest = (struct ctables_nest) { .n = 0 };
4972 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4974 /* There's no point in moving labels away from an axis that has no
4975 labels, so avoid dealing with the special cases around that. */
4976 t->label_axis[a] = a;
4979 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4980 for (size_t i = 0; i < stack->n; i++)
4982 struct ctables_nest *nest = &stack->nests[i];
4983 if (!nest->specs[CSV_CELL].n)
4985 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4986 specs->specs = xmalloc (sizeof *specs->specs);
4989 enum ctables_summary_function function
4990 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4992 *specs->specs = (struct ctables_summary_spec) {
4993 .function = function,
4994 .format = ctables_summary_default_format (function, specs->var),
4997 specs->var = nest->vars[0];
4999 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5000 &nest->specs[CSV_CELL]);
5002 else if (!nest->specs[CSV_TOTAL].n)
5003 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5004 &nest->specs[CSV_CELL]);
5006 if (t->ctables->smissing_listwise)
5008 struct variable **listwise_vars = NULL;
5010 size_t allocated = 0;
5012 for (size_t j = nest->group_head; j < stack->n; j++)
5014 const struct ctables_nest *other_nest = &stack->nests[j];
5015 if (other_nest->group_head != nest->group_head)
5018 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5021 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5022 sizeof *listwise_vars);
5023 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5026 for (size_t j = 0; j < N_CSVS; j++)
5028 nest->specs[j].listwise_vars = listwise_vars;
5029 nest->specs[j].n_listwise_vars = n;
5034 struct ctables_summary_spec_set *merged = &t->summary_specs;
5035 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5037 for (size_t j = 0; j < stack->n; j++)
5039 const struct ctables_nest *nest = &stack->nests[j];
5041 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5042 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5047 struct merge_item min = items[0];
5048 for (size_t j = 1; j < n_left; j++)
5049 if (merge_item_compare_3way (&items[j], &min) < 0)
5052 if (merged->n >= merged->allocated)
5053 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5054 sizeof *merged->specs);
5055 merged->specs[merged->n++] = min.set->specs[min.ofs];
5057 for (size_t j = 0; j < n_left; )
5059 if (merge_item_compare_3way (&items[j], &min) == 0)
5061 struct merge_item *item = &items[j];
5062 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5063 if (++item->ofs >= item->set->n)
5065 items[j] = items[--n_left];
5074 for (size_t j = 0; j < merged->n; j++)
5075 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5077 for (size_t j = 0; j < stack->n; j++)
5079 const struct ctables_nest *nest = &stack->nests[j];
5080 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5082 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5083 for (size_t k = 0; k < specs->n; k++)
5084 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5085 specs->specs[k].axis_idx);
5091 size_t allocated_sum_vars = 0;
5092 enumerate_sum_vars (t->axes[t->summary_axis],
5093 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5095 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5096 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5100 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5101 enum pivot_axis_type a)
5103 struct ctables_stack *stack = &t->stacks[a];
5104 for (size_t i = 0; i < stack->n; i++)
5106 const struct ctables_nest *nest = &stack->nests[i];
5107 const struct variable *var = nest->vars[nest->n - 1];
5108 const union value *value = case_data (c, var);
5110 if (var_is_numeric (var) && value->f == SYSMIS)
5113 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5115 ctables_value_insert (t, value, var_get_width (var));
5120 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5122 const struct ctables_value *const *ap = a_;
5123 const struct ctables_value *const *bp = b_;
5124 const struct ctables_value *a = *ap;
5125 const struct ctables_value *b = *bp;
5126 const int *width = width_;
5127 return value_compare_3way (&a->value, &b->value, *width);
5131 ctables_sort_clabels_values (struct ctables_table *t)
5133 const struct variable *v0 = t->clabels_example;
5134 int width = var_get_width (v0);
5136 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5139 const struct val_labs *val_labs = var_get_value_labels (v0);
5140 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5141 vl = val_labs_next (val_labs, vl))
5142 if (ctables_categories_match (c0, &vl->value, v0))
5143 ctables_value_insert (t, &vl->value, width);
5146 size_t n = hmap_count (&t->clabels_values_map);
5147 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5149 struct ctables_value *clv;
5151 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5152 t->clabels_values[i++] = clv;
5153 t->n_clabels_values = n;
5156 sort (t->clabels_values, n, sizeof *t->clabels_values,
5157 compare_clabels_values_3way, &width);
5159 for (size_t i = 0; i < n; i++)
5160 t->clabels_values[i]->leaf = i;
5164 ctables_add_category_occurrences (const struct variable *var,
5165 struct hmap *occurrences,
5166 const struct ctables_categories *cats)
5168 const struct val_labs *val_labs = var_get_value_labels (var);
5170 for (size_t i = 0; i < cats->n_cats; i++)
5172 const struct ctables_category *c = &cats->cats[i];
5176 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5182 int width = var_get_width (var);
5184 value_init (&value, width);
5185 value_copy_buf_rpad (&value, width,
5186 CHAR_CAST (uint8_t *, c->string.string),
5187 c->string.length, ' ');
5188 ctables_add_occurrence (var, &value, occurrences);
5189 value_destroy (&value, width);
5194 assert (var_is_numeric (var));
5195 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5196 vl = val_labs_next (val_labs, vl))
5197 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5198 ctables_add_occurrence (var, &vl->value, occurrences);
5202 assert (var_is_alpha (var));
5203 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5204 vl = val_labs_next (val_labs, vl))
5205 if (in_string_range (&vl->value, var, c->srange))
5206 ctables_add_occurrence (var, &vl->value, occurrences);
5210 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5211 vl = val_labs_next (val_labs, vl))
5212 if (var_is_value_missing (var, &vl->value))
5213 ctables_add_occurrence (var, &vl->value, occurrences);
5217 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5218 vl = val_labs_next (val_labs, vl))
5219 ctables_add_occurrence (var, &vl->value, occurrences);
5222 case CCT_POSTCOMPUTE:
5232 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5233 vl = val_labs_next (val_labs, vl))
5234 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5235 ctables_add_occurrence (var, &vl->value, occurrences);
5238 case CCT_EXCLUDED_MISSING:
5245 ctables_section_recurse_add_empty_categories (
5246 struct ctables_section *s,
5247 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5248 enum pivot_axis_type a, size_t a_idx)
5250 if (a >= PIVOT_N_AXES)
5251 ctables_cell_insert__ (s, c, cats);
5252 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5253 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5256 const struct variable *var = s->nests[a]->vars[a_idx];
5257 const struct ctables_categories *categories = s->table->categories[
5258 var_get_dict_index (var)];
5259 int width = var_get_width (var);
5260 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5261 const struct ctables_occurrence *o;
5262 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5264 union value *value = case_data_rw (c, var);
5265 value_destroy (value, width);
5266 value_clone (value, &o->value, width);
5267 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5268 assert (cats[a][a_idx] != NULL);
5269 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5272 for (size_t i = 0; i < categories->n_cats; i++)
5274 const struct ctables_category *cat = &categories->cats[i];
5275 if (cat->type == CCT_POSTCOMPUTE)
5277 cats[a][a_idx] = cat;
5278 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5285 ctables_section_add_empty_categories (struct ctables_section *s)
5287 bool show_empty = false;
5288 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5290 for (size_t k = 0; k < s->nests[a]->n; k++)
5291 if (k != s->nests[a]->scale_idx)
5293 const struct variable *var = s->nests[a]->vars[k];
5294 const struct ctables_categories *cats = s->table->categories[
5295 var_get_dict_index (var)];
5296 if (cats->show_empty)
5299 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5305 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5306 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5307 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5312 ctables_section_clear (struct ctables_section *s)
5314 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5316 const struct ctables_nest *nest = s->nests[a];
5317 for (size_t i = 0; i < nest->n; i++)
5318 if (i != nest->scale_idx)
5320 const struct variable *var = nest->vars[i];
5321 int width = var_get_width (var);
5322 struct ctables_occurrence *o, *next;
5323 struct hmap *map = &s->occurrences[a][i];
5324 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5326 value_destroy (&o->value, width);
5327 hmap_delete (map, &o->node);
5334 struct ctables_cell *cell, *next_cell;
5335 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5337 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5339 const struct ctables_nest *nest = s->nests[a];
5340 for (size_t i = 0; i < nest->n; i++)
5341 if (i != nest->scale_idx)
5342 value_destroy (&cell->axes[a].cvs[i].value,
5343 var_get_width (nest->vars[i]));
5344 free (cell->axes[a].cvs);
5347 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5348 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5349 for (size_t i = 0; i < specs->n; i++)
5350 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5351 free (cell->summaries);
5353 hmap_delete (&s->cells, &cell->node);
5356 hmap_shrink (&s->cells);
5358 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5360 struct ctables_domain *domain, *next_domain;
5361 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5364 free (domain->sums);
5365 hmap_delete (&s->domains[dt], &domain->node);
5368 hmap_shrink (&s->domains[dt]);
5373 ctables_section_uninit (struct ctables_section *s)
5375 ctables_section_clear (s);
5377 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5379 for (size_t i = 0; i < s->nests[a]->n; i++)
5380 hmap_destroy (&s->occurrences[a][i]);
5381 free (s->occurrences[a]);
5384 hmap_destroy (&s->cells);
5385 for (size_t i = 0; i < N_CTDTS; i++)
5386 hmap_destroy (&s->domains[i]);
5390 ctables_table_clear (struct ctables_table *t)
5392 for (size_t i = 0; i < t->n_sections; i++)
5393 ctables_section_clear (&t->sections[i]);
5395 if (t->clabels_example)
5397 int width = var_get_width (t->clabels_example);
5398 struct ctables_value *value, *next_value;
5399 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5400 &t->clabels_values_map)
5402 value_destroy (&value->value, width);
5403 hmap_delete (&t->clabels_values_map, &value->node);
5406 hmap_shrink (&t->clabels_values_map);
5408 free (t->clabels_values);
5409 t->clabels_values = NULL;
5410 t->n_clabels_values = 0;
5415 ctables_execute (struct dataset *ds, struct casereader *input,
5418 for (size_t i = 0; i < ct->n_tables; i++)
5420 struct ctables_table *t = ct->tables[i];
5421 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5422 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5423 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5424 sizeof *t->sections);
5425 size_t ix[PIVOT_N_AXES];
5426 ctables_table_add_section (t, 0, ix);
5429 struct dictionary *dict = dataset_dict (ds);
5430 struct casegrouper *grouper
5431 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5432 ? casegrouper_create_splits (input, dict)
5433 : casegrouper_create_vars (input, NULL, 0));
5434 struct casereader *group;
5435 while (casegrouper_get_next_group (grouper, &group))
5437 /* Output SPLIT FILE variables. */
5438 struct ccase *c = casereader_peek (group, 0);
5441 output_split_file_values (ds, c);
5445 bool warn_on_invalid = true;
5446 for (c = casereader_read (group); c;
5447 case_unref (c), c = casereader_read (group))
5449 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5450 double e_weight = (ct->e_weight
5451 ? var_force_valid_weight (ct->e_weight,
5452 case_num (c, ct->e_weight),
5456 for (size_t i = 0; i < ct->n_tables; i++)
5458 struct ctables_table *t = ct->tables[i];
5460 for (size_t j = 0; j < t->n_sections; j++)
5461 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5463 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5464 if (t->label_axis[a] != a)
5465 ctables_insert_clabels_values (t, c, a);
5468 casereader_destroy (group);
5470 for (size_t i = 0; i < ct->n_tables; i++)
5472 struct ctables_table *t = ct->tables[i];
5474 if (t->clabels_example)
5475 ctables_sort_clabels_values (t);
5477 for (size_t j = 0; j < t->n_sections; j++)
5478 ctables_section_add_empty_categories (&t->sections[j]);
5480 ctables_table_output (ct, t);
5481 ctables_table_clear (t);
5484 return casegrouper_destroy (grouper);
5489 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5490 struct dictionary *);
5493 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5499 case CTPO_CAT_STRING:
5500 ss_dealloc (&e->string);
5503 case CTPO_CAT_SRANGE:
5504 for (size_t i = 0; i < 2; i++)
5505 ss_dealloc (&e->srange[i]);
5514 for (size_t i = 0; i < 2; i++)
5515 ctables_pcexpr_destroy (e->subs[i]);
5519 case CTPO_CAT_NUMBER:
5520 case CTPO_CAT_NRANGE:
5521 case CTPO_CAT_MISSING:
5522 case CTPO_CAT_OTHERNM:
5523 case CTPO_CAT_SUBTOTAL:
5524 case CTPO_CAT_TOTAL:
5528 msg_location_destroy (e->location);
5533 static struct ctables_pcexpr *
5534 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5535 struct ctables_pcexpr *sub0,
5536 struct ctables_pcexpr *sub1)
5538 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5539 *e = (struct ctables_pcexpr) {
5541 .subs = { sub0, sub1 },
5542 .location = msg_location_merged (sub0->location, sub1->location),
5547 /* How to parse an operator. */
5550 enum token_type token;
5551 enum ctables_postcompute_op op;
5554 static const struct operator *
5555 ctable_pcexpr_match_operator (struct lexer *lexer,
5556 const struct operator ops[], size_t n_ops)
5558 for (const struct operator *op = ops; op < ops + n_ops; op++)
5559 if (lex_token (lexer) == op->token)
5561 if (op->token != T_NEG_NUM)
5570 static struct ctables_pcexpr *
5571 ctable_pcexpr_parse_binary_operators__ (
5572 struct lexer *lexer, struct dictionary *dict,
5573 const struct operator ops[], size_t n_ops,
5574 parse_recursively_func *parse_next_level,
5575 const char *chain_warning, struct ctables_pcexpr *lhs)
5577 for (int op_count = 0; ; op_count++)
5579 const struct operator *op
5580 = ctable_pcexpr_match_operator (lexer, ops, n_ops);
5583 if (op_count > 1 && chain_warning)
5584 msg_at (SW, lhs->location, "%s", chain_warning);
5589 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5592 ctables_pcexpr_destroy (lhs);
5596 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5600 static struct ctables_pcexpr *
5601 ctable_pcexpr_parse_binary_operators (struct lexer *lexer,
5602 struct dictionary *dict,
5603 const struct operator ops[], size_t n_ops,
5604 parse_recursively_func *parse_next_level,
5605 const char *chain_warning)
5607 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5611 return ctable_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5613 chain_warning, lhs);
5616 static struct ctables_pcexpr *ctable_pcexpr_parse_add (struct lexer *,
5617 struct dictionary *);
5619 static struct ctables_pcexpr
5620 ctpo_cat_nrange (double low, double high)
5622 return (struct ctables_pcexpr) {
5623 .op = CTPO_CAT_NRANGE,
5624 .nrange = { low, high },
5628 static struct ctables_pcexpr
5629 ctpo_cat_srange (struct substring low, struct substring high)
5631 return (struct ctables_pcexpr) {
5632 .op = CTPO_CAT_SRANGE,
5633 .srange = { low, high },
5637 static struct ctables_pcexpr *
5638 ctable_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5640 int start_ofs = lex_ofs (lexer);
5641 struct ctables_pcexpr e;
5642 if (lex_is_number (lexer))
5644 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5645 .number = lex_number (lexer) };
5648 else if (lex_match_id (lexer, "MISSING"))
5649 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5650 else if (lex_match_id (lexer, "OTHERNM"))
5651 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5652 else if (lex_match_id (lexer, "TOTAL"))
5653 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5654 else if (lex_match_id (lexer, "SUBTOTAL"))
5656 size_t subtotal_index = 0;
5657 if (lex_match (lexer, T_LBRACK))
5659 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5661 subtotal_index = lex_integer (lexer);
5663 if (!lex_force_match (lexer, T_RBRACK))
5666 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5667 .subtotal_index = subtotal_index };
5669 else if (lex_match (lexer, T_LBRACK))
5671 if (lex_match_id (lexer, "LO"))
5673 if (!lex_force_match_id (lexer, "THRU"))
5676 if (lex_is_string (lexer))
5678 struct substring low = { .string = NULL };
5679 struct substring high = parse_substring (lexer, dict);
5680 e = ctpo_cat_srange (low, high);
5684 if (!lex_force_num (lexer))
5686 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5690 else if (lex_is_number (lexer))
5692 double number = lex_number (lexer);
5694 if (lex_match_id (lexer, "THRU"))
5696 if (lex_match_id (lexer, "HI"))
5697 e = ctpo_cat_nrange (number, DBL_MAX);
5700 if (!lex_force_num (lexer))
5702 e = ctpo_cat_nrange (number, lex_number (lexer));
5707 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5710 else if (lex_is_string (lexer))
5712 struct substring s = parse_substring (lexer, dict);
5714 if (lex_match_id (lexer, "THRU"))
5716 struct substring high;
5718 if (lex_match_id (lexer, "HI"))
5719 high = (struct substring) { .string = NULL };
5722 if (!lex_force_string (lexer))
5727 high = parse_substring (lexer, dict);
5730 e = ctpo_cat_srange (s, high);
5733 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5737 lex_error (lexer, NULL);
5741 if (!lex_force_match (lexer, T_RBRACK))
5743 if (e.op == CTPO_CAT_STRING)
5744 ss_dealloc (&e.string);
5745 else if (e.op == CTPO_CAT_SRANGE)
5747 ss_dealloc (&e.srange[0]);
5748 ss_dealloc (&e.srange[1]);
5753 else if (lex_match (lexer, T_LPAREN))
5755 struct ctables_pcexpr *ep = ctable_pcexpr_parse_add (lexer, dict);
5758 if (!lex_force_match (lexer, T_RPAREN))
5760 ctables_pcexpr_destroy (ep);
5767 lex_error (lexer, NULL);
5771 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5772 return xmemdup (&e, sizeof e);
5775 static struct ctables_pcexpr *
5776 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5777 struct lexer *lexer, int start_ofs)
5779 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5780 *e = (struct ctables_pcexpr) {
5783 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5788 static struct ctables_pcexpr *
5789 ctable_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5791 static const struct operator op = { T_EXP, CTPO_POW };
5793 const char *chain_warning =
5794 _("The exponentiation operator (`**') is left-associative: "
5795 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5796 "To disable this warning, insert parentheses.");
5798 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5799 return ctable_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5800 ctable_pcexpr_parse_primary,
5803 /* Special case for situations like "-5**6", which must be parsed as
5806 int start_ofs = lex_ofs (lexer);
5807 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5808 *lhs = (struct ctables_pcexpr) {
5809 .op = CTPO_CONSTANT,
5810 .number = -lex_tokval (lexer),
5811 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5815 struct ctables_pcexpr *node = ctable_pcexpr_parse_binary_operators__ (
5816 lexer, dict, &op, 1,
5817 ctable_pcexpr_parse_primary, chain_warning, lhs);
5821 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5824 /* Parses the unary minus level. */
5825 static struct ctables_pcexpr *
5826 ctable_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5828 int start_ofs = lex_ofs (lexer);
5829 if (!lex_match (lexer, T_DASH))
5830 return ctable_pcexpr_parse_exp (lexer, dict);
5832 struct ctables_pcexpr *inner = ctable_pcexpr_parse_neg (lexer, dict);
5836 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5839 /* Parses the multiplication and division level. */
5840 static struct ctables_pcexpr *
5841 ctable_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5843 static const struct operator ops[] =
5845 { T_ASTERISK, CTPO_MUL },
5846 { T_SLASH, CTPO_DIV },
5849 return ctable_pcexpr_parse_binary_operators (lexer, dict, ops,
5850 sizeof ops / sizeof *ops,
5851 ctable_pcexpr_parse_neg, NULL);
5854 /* Parses the addition and subtraction level. */
5855 static struct ctables_pcexpr *
5856 ctable_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5858 static const struct operator ops[] =
5860 { T_PLUS, CTPO_ADD },
5861 { T_DASH, CTPO_SUB },
5862 { T_NEG_NUM, CTPO_ADD },
5865 return ctable_pcexpr_parse_binary_operators (lexer, dict,
5866 ops, sizeof ops / sizeof *ops,
5867 ctable_pcexpr_parse_mul, NULL);
5870 static struct ctables_postcompute *
5871 ctables_find_postcompute (struct ctables *ct, const char *name)
5873 struct ctables_postcompute *pc;
5874 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5875 utf8_hash_case_string (name, 0), &ct->postcomputes)
5876 if (!utf8_strcasecmp (pc->name, name))
5882 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5885 int pcompute_start = lex_ofs (lexer) - 1;
5887 if (!lex_match (lexer, T_AND))
5889 lex_error_expecting (lexer, "&");
5892 if (!lex_force_id (lexer))
5895 char *name = ss_xstrdup (lex_tokss (lexer));
5898 if (!lex_force_match (lexer, T_EQUALS)
5899 || !lex_force_match_id (lexer, "EXPR")
5900 || !lex_force_match (lexer, T_LPAREN))
5906 int expr_start = lex_ofs (lexer);
5907 struct ctables_pcexpr *expr = ctable_pcexpr_parse_add (lexer, dict);
5908 int expr_end = lex_ofs (lexer) - 1;
5909 if (!expr || !lex_force_match (lexer, T_RPAREN))
5914 int pcompute_end = lex_ofs (lexer) - 1;
5916 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5919 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5922 msg_at (SW, location, _("New definition of &%s will override the "
5923 "previous definition."),
5925 msg_at (SN, pc->location, _("This is the previous definition."));
5927 ctables_pcexpr_destroy (pc->expr);
5928 msg_location_destroy (pc->location);
5933 pc = xmalloc (sizeof *pc);
5934 *pc = (struct ctables_postcompute) { .name = name };
5935 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5936 utf8_hash_case_string (pc->name, 0));
5939 pc->location = location;
5941 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5946 ctables_parse_pproperties_format (struct lexer *lexer,
5947 struct ctables_summary_spec_set *sss)
5949 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5951 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5952 && !(lex_token (lexer) == T_ID
5953 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5954 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5955 lex_tokss (lexer)))))
5957 /* Parse function. */
5958 enum ctables_summary_function function;
5959 if (!parse_ctables_summary_function (lexer, &function))
5962 /* Parse percentile. */
5963 double percentile = 0;
5964 if (function == CTSF_PTILE)
5966 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5968 percentile = lex_number (lexer);
5973 struct fmt_spec format;
5974 bool is_ctables_format;
5975 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5978 if (sss->n >= sss->allocated)
5979 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5980 sizeof *sss->specs);
5981 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5982 .function = function,
5983 .percentile = percentile,
5985 .is_ctables_format = is_ctables_format,
5991 ctables_summary_spec_set_uninit (sss);
5996 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5998 struct ctables_postcompute **pcs = NULL;
6000 size_t allocated_pcs = 0;
6002 while (lex_match (lexer, T_AND))
6004 if (!lex_force_id (lexer))
6006 struct ctables_postcompute *pc
6007 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
6010 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
6015 if (n_pcs >= allocated_pcs)
6016 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
6020 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6022 if (lex_match_id (lexer, "LABEL"))
6024 lex_match (lexer, T_EQUALS);
6025 if (!lex_force_string (lexer))
6028 for (size_t i = 0; i < n_pcs; i++)
6030 free (pcs[i]->label);
6031 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6036 else if (lex_match_id (lexer, "FORMAT"))
6038 lex_match (lexer, T_EQUALS);
6040 struct ctables_summary_spec_set sss;
6041 if (!ctables_parse_pproperties_format (lexer, &sss))
6044 for (size_t i = 0; i < n_pcs; i++)
6047 ctables_summary_spec_set_uninit (pcs[i]->specs);
6049 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6050 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6052 ctables_summary_spec_set_uninit (&sss);
6054 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6056 lex_match (lexer, T_EQUALS);
6057 bool hide_source_cats;
6058 if (!parse_bool (lexer, &hide_source_cats))
6060 for (size_t i = 0; i < n_pcs; i++)
6061 pcs[i]->hide_source_cats = hide_source_cats;
6065 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6078 put_strftime (struct string *out, time_t now, const char *format)
6080 const struct tm *tm = localtime (&now);
6082 strftime (value, sizeof value, format, tm);
6083 ds_put_cstr (out, value);
6087 skip_prefix (struct substring *s, struct substring prefix)
6089 if (ss_starts_with (*s, prefix))
6091 ss_advance (s, prefix.length);
6099 put_table_expression (struct string *out, struct lexer *lexer,
6100 struct dictionary *dict, int expr_start, int expr_end)
6103 for (int ofs = expr_start; ofs < expr_end; ofs++)
6105 const struct token *t = lex_ofs_token (lexer, ofs);
6106 if (t->type == T_LBRACK)
6108 else if (t->type == T_RBRACK && nest > 0)
6114 else if (t->type == T_ID)
6116 const struct variable *var
6117 = dict_lookup_var (dict, t->string.string);
6118 const char *label = var ? var_get_label (var) : NULL;
6119 ds_put_cstr (out, label ? label : t->string.string);
6123 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6124 ds_put_byte (out, ' ');
6126 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6127 ds_put_cstr (out, repr);
6130 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6131 ds_put_byte (out, ' ');
6137 put_title_text (struct string *out, struct substring in, time_t now,
6138 struct lexer *lexer, struct dictionary *dict,
6139 int expr_start, int expr_end)
6143 size_t chunk = ss_find_byte (in, ')');
6144 ds_put_substring (out, ss_head (in, chunk));
6145 ss_advance (&in, chunk);
6146 if (ss_is_empty (in))
6149 if (skip_prefix (&in, ss_cstr (")DATE")))
6150 put_strftime (out, now, "%x");
6151 else if (skip_prefix (&in, ss_cstr (")TIME")))
6152 put_strftime (out, now, "%X");
6153 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6154 put_table_expression (out, lexer, dict, expr_start, expr_end);
6157 ds_put_byte (out, ')');
6158 ss_advance (&in, 1);
6164 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6166 struct casereader *input = NULL;
6168 struct measure_guesser *mg = measure_guesser_create (ds);
6171 input = proc_open (ds);
6172 measure_guesser_run (mg, input);
6173 measure_guesser_destroy (mg);
6176 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6177 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6178 enum settings_value_show tvars = settings_get_show_variables ();
6179 for (size_t i = 0; i < n_vars; i++)
6180 vlabels[i] = (enum ctables_vlabel) tvars;
6182 struct pivot_table_look *look = pivot_table_look_unshare (
6183 pivot_table_look_ref (pivot_table_look_get_default ()));
6184 look->omit_empty = false;
6186 struct ctables *ct = xmalloc (sizeof *ct);
6187 *ct = (struct ctables) {
6188 .dict = dataset_dict (ds),
6190 .ctables_formats = FMT_SETTINGS_INIT,
6192 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6195 time_t now = time (NULL);
6200 const char *dot_string;
6201 const char *comma_string;
6203 static const struct ctf ctfs[4] = {
6204 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6205 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6206 { CTEF_PAREN, "-,(,),", "-.(.)." },
6207 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6209 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6210 for (size_t i = 0; i < 4; i++)
6212 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6213 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6214 fmt_number_style_from_string (s));
6217 if (!lex_force_match (lexer, T_SLASH))
6220 while (!lex_match_id (lexer, "TABLE"))
6222 if (lex_match_id (lexer, "FORMAT"))
6224 double widths[2] = { SYSMIS, SYSMIS };
6225 double units_per_inch = 72.0;
6227 while (lex_token (lexer) != T_SLASH)
6229 if (lex_match_id (lexer, "MINCOLWIDTH"))
6231 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6234 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6236 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6239 else if (lex_match_id (lexer, "UNITS"))
6241 lex_match (lexer, T_EQUALS);
6242 if (lex_match_id (lexer, "POINTS"))
6243 units_per_inch = 72.0;
6244 else if (lex_match_id (lexer, "INCHES"))
6245 units_per_inch = 1.0;
6246 else if (lex_match_id (lexer, "CM"))
6247 units_per_inch = 2.54;
6250 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6254 else if (lex_match_id (lexer, "EMPTY"))
6259 lex_match (lexer, T_EQUALS);
6260 if (lex_match_id (lexer, "ZERO"))
6262 /* Nothing to do. */
6264 else if (lex_match_id (lexer, "BLANK"))
6265 ct->zero = xstrdup ("");
6266 else if (lex_force_string (lexer))
6268 ct->zero = ss_xstrdup (lex_tokss (lexer));
6274 else if (lex_match_id (lexer, "MISSING"))
6276 lex_match (lexer, T_EQUALS);
6277 if (!lex_force_string (lexer))
6281 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6282 ? ss_xstrdup (lex_tokss (lexer))
6288 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6289 "UNITS", "EMPTY", "MISSING");
6294 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6295 && widths[0] > widths[1])
6297 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6301 for (size_t i = 0; i < 2; i++)
6302 if (widths[i] != SYSMIS)
6304 int *wr = ct->look->width_ranges[TABLE_HORZ];
6305 wr[i] = widths[i] / units_per_inch * 96.0;
6310 else if (lex_match_id (lexer, "VLABELS"))
6312 if (!lex_force_match_id (lexer, "VARIABLES"))
6314 lex_match (lexer, T_EQUALS);
6316 struct variable **vars;
6318 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6322 if (!lex_force_match_id (lexer, "DISPLAY"))
6327 lex_match (lexer, T_EQUALS);
6329 enum ctables_vlabel vlabel;
6330 if (lex_match_id (lexer, "DEFAULT"))
6331 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6332 else if (lex_match_id (lexer, "NAME"))
6334 else if (lex_match_id (lexer, "LABEL"))
6335 vlabel = CTVL_LABEL;
6336 else if (lex_match_id (lexer, "BOTH"))
6338 else if (lex_match_id (lexer, "NONE"))
6342 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6348 for (size_t i = 0; i < n_vars; i++)
6349 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6352 else if (lex_match_id (lexer, "MRSETS"))
6354 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6356 lex_match (lexer, T_EQUALS);
6357 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6360 else if (lex_match_id (lexer, "SMISSING"))
6362 if (lex_match_id (lexer, "VARIABLE"))
6363 ct->smissing_listwise = false;
6364 else if (lex_match_id (lexer, "LISTWISE"))
6365 ct->smissing_listwise = true;
6368 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6372 else if (lex_match_id (lexer, "PCOMPUTE"))
6374 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6377 else if (lex_match_id (lexer, "PPROPERTIES"))
6379 if (!ctables_parse_pproperties (lexer, ct))
6382 else if (lex_match_id (lexer, "WEIGHT"))
6384 if (!lex_force_match_id (lexer, "VARIABLE"))
6386 lex_match (lexer, T_EQUALS);
6387 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6391 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6393 if (lex_match_id (lexer, "COUNT"))
6395 lex_match (lexer, T_EQUALS);
6396 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6399 ct->hide_threshold = lex_integer (lexer);
6402 else if (ct->hide_threshold == 0)
6403 ct->hide_threshold = 5;
6407 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6408 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6409 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6413 if (!lex_force_match (lexer, T_SLASH))
6417 size_t allocated_tables = 0;
6420 if (ct->n_tables >= allocated_tables)
6421 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6422 sizeof *ct->tables);
6424 struct ctables_category *cat = xmalloc (sizeof *cat);
6425 *cat = (struct ctables_category) {
6427 .include_missing = false,
6428 .sort_ascending = true,
6431 struct ctables_categories *c = xmalloc (sizeof *c);
6432 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6433 *c = (struct ctables_categories) {
6440 struct ctables_categories **categories = xnmalloc (n_vars,
6441 sizeof *categories);
6442 for (size_t i = 0; i < n_vars; i++)
6445 struct ctables_table *t = xmalloc (sizeof *t);
6446 *t = (struct ctables_table) {
6448 .slabels_axis = PIVOT_AXIS_COLUMN,
6449 .slabels_visible = true,
6450 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6452 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6453 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6454 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6456 .clabels_from_axis = PIVOT_AXIS_LAYER,
6457 .categories = categories,
6458 .n_categories = n_vars,
6461 ct->tables[ct->n_tables++] = t;
6463 lex_match (lexer, T_EQUALS);
6464 int expr_start = lex_ofs (lexer);
6465 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6467 if (lex_match (lexer, T_BY))
6469 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6470 ct, t, PIVOT_AXIS_COLUMN))
6473 if (lex_match (lexer, T_BY))
6475 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6476 ct, t, PIVOT_AXIS_LAYER))
6480 int expr_end = lex_ofs (lexer);
6482 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6483 && !t->axes[PIVOT_AXIS_LAYER])
6485 lex_error (lexer, _("At least one variable must be specified."));
6489 const struct ctables_axis *scales[PIVOT_N_AXES];
6490 size_t n_scales = 0;
6491 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6493 scales[a] = find_scale (t->axes[a]);
6499 msg (SE, _("Scale variables may appear only on one axis."));
6500 if (scales[PIVOT_AXIS_ROW])
6501 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6502 _("This scale variable appears on the rows axis."));
6503 if (scales[PIVOT_AXIS_COLUMN])
6504 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6505 _("This scale variable appears on the columns axis."));
6506 if (scales[PIVOT_AXIS_LAYER])
6507 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6508 _("This scale variable appears on the layer axis."));
6512 const struct ctables_axis *summaries[PIVOT_N_AXES];
6513 size_t n_summaries = 0;
6514 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6516 summaries[a] = (scales[a]
6518 : find_categorical_summary_spec (t->axes[a]));
6522 if (n_summaries > 1)
6524 msg (SE, _("Summaries may appear only on one axis."));
6525 if (summaries[PIVOT_AXIS_ROW])
6526 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
6527 _("This variable on the rows axis has a summary."));
6528 if (summaries[PIVOT_AXIS_COLUMN])
6529 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
6530 _("This variable on the columns axis has a summary."));
6531 if (summaries[PIVOT_AXIS_LAYER])
6532 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
6533 _("This variable on the layers axis has a summary."));
6536 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6537 if (n_summaries ? summaries[a] : t->axes[a])
6539 t->summary_axis = a;
6543 if (lex_token (lexer) == T_ENDCMD)
6545 if (!ctables_prepare_table (t))
6549 if (!lex_force_match (lexer, T_SLASH))
6552 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6554 if (lex_match_id (lexer, "SLABELS"))
6556 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6558 if (lex_match_id (lexer, "POSITION"))
6560 lex_match (lexer, T_EQUALS);
6561 if (lex_match_id (lexer, "COLUMN"))
6562 t->slabels_axis = PIVOT_AXIS_COLUMN;
6563 else if (lex_match_id (lexer, "ROW"))
6564 t->slabels_axis = PIVOT_AXIS_ROW;
6565 else if (lex_match_id (lexer, "LAYER"))
6566 t->slabels_axis = PIVOT_AXIS_LAYER;
6569 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6573 else if (lex_match_id (lexer, "VISIBLE"))
6575 lex_match (lexer, T_EQUALS);
6576 if (!parse_bool (lexer, &t->slabels_visible))
6581 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6586 else if (lex_match_id (lexer, "CLABELS"))
6588 if (lex_match_id (lexer, "AUTO"))
6590 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6591 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6593 else if (lex_match_id (lexer, "ROWLABELS"))
6595 lex_match (lexer, T_EQUALS);
6596 if (lex_match_id (lexer, "OPPOSITE"))
6597 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6598 else if (lex_match_id (lexer, "LAYER"))
6599 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6602 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6606 else if (lex_match_id (lexer, "COLLABELS"))
6608 lex_match (lexer, T_EQUALS);
6609 if (lex_match_id (lexer, "OPPOSITE"))
6610 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6611 else if (lex_match_id (lexer, "LAYER"))
6612 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6615 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6621 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6626 else if (lex_match_id (lexer, "CRITERIA"))
6628 if (!lex_force_match_id (lexer, "CILEVEL"))
6630 lex_match (lexer, T_EQUALS);
6632 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6634 t->cilevel = lex_number (lexer);
6637 else if (lex_match_id (lexer, "CATEGORIES"))
6639 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6643 else if (lex_match_id (lexer, "TITLES"))
6648 if (lex_match_id (lexer, "CAPTION"))
6649 textp = &t->caption;
6650 else if (lex_match_id (lexer, "CORNER"))
6652 else if (lex_match_id (lexer, "TITLE"))
6656 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6659 lex_match (lexer, T_EQUALS);
6661 struct string s = DS_EMPTY_INITIALIZER;
6662 while (lex_is_string (lexer))
6664 if (!ds_is_empty (&s))
6665 ds_put_byte (&s, ' ');
6666 put_title_text (&s, lex_tokss (lexer), now,
6667 lexer, dataset_dict (ds),
6668 expr_start, expr_end);
6672 *textp = ds_steal_cstr (&s);
6674 while (lex_token (lexer) != T_SLASH
6675 && lex_token (lexer) != T_ENDCMD);
6677 else if (lex_match_id (lexer, "SIGTEST"))
6681 t->chisq = xmalloc (sizeof *t->chisq);
6682 *t->chisq = (struct ctables_chisq) {
6684 .include_mrsets = true,
6685 .all_visible = true,
6691 if (lex_match_id (lexer, "TYPE"))
6693 lex_match (lexer, T_EQUALS);
6694 if (!lex_force_match_id (lexer, "CHISQUARE"))
6697 else if (lex_match_id (lexer, "ALPHA"))
6699 lex_match (lexer, T_EQUALS);
6700 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6702 t->chisq->alpha = lex_number (lexer);
6705 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6707 lex_match (lexer, T_EQUALS);
6708 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6711 else if (lex_match_id (lexer, "CATEGORIES"))
6713 lex_match (lexer, T_EQUALS);
6714 if (lex_match_id (lexer, "ALLVISIBLE"))
6715 t->chisq->all_visible = true;
6716 else if (lex_match_id (lexer, "SUBTOTALS"))
6717 t->chisq->all_visible = false;
6720 lex_error_expecting (lexer,
6721 "ALLVISIBLE", "SUBTOTALS");
6727 lex_error_expecting (lexer, "TYPE", "ALPHA",
6728 "INCLUDEMRSETS", "CATEGORIES");
6732 while (lex_token (lexer) != T_SLASH
6733 && lex_token (lexer) != T_ENDCMD);
6735 else if (lex_match_id (lexer, "COMPARETEST"))
6739 t->pairwise = xmalloc (sizeof *t->pairwise);
6740 *t->pairwise = (struct ctables_pairwise) {
6742 .alpha = { .05, .05 },
6743 .adjust = BONFERRONI,
6744 .include_mrsets = true,
6745 .meansvariance_allcats = true,
6746 .all_visible = true,
6755 if (lex_match_id (lexer, "TYPE"))
6757 lex_match (lexer, T_EQUALS);
6758 if (lex_match_id (lexer, "PROP"))
6759 t->pairwise->type = PROP;
6760 else if (lex_match_id (lexer, "MEAN"))
6761 t->pairwise->type = MEAN;
6764 lex_error_expecting (lexer, "PROP", "MEAN");
6768 else if (lex_match_id (lexer, "ALPHA"))
6770 lex_match (lexer, T_EQUALS);
6772 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6774 double a0 = lex_number (lexer);
6777 lex_match (lexer, T_COMMA);
6778 if (lex_is_number (lexer))
6780 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6782 double a1 = lex_number (lexer);
6785 t->pairwise->alpha[0] = MIN (a0, a1);
6786 t->pairwise->alpha[1] = MAX (a0, a1);
6789 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6791 else if (lex_match_id (lexer, "ADJUST"))
6793 lex_match (lexer, T_EQUALS);
6794 if (lex_match_id (lexer, "BONFERRONI"))
6795 t->pairwise->adjust = BONFERRONI;
6796 else if (lex_match_id (lexer, "BH"))
6797 t->pairwise->adjust = BH;
6798 else if (lex_match_id (lexer, "NONE"))
6799 t->pairwise->adjust = 0;
6802 lex_error_expecting (lexer, "BONFERRONI", "BH",
6807 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6809 lex_match (lexer, T_EQUALS);
6810 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6813 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6815 lex_match (lexer, T_EQUALS);
6816 if (lex_match_id (lexer, "ALLCATS"))
6817 t->pairwise->meansvariance_allcats = true;
6818 else if (lex_match_id (lexer, "TESTEDCATS"))
6819 t->pairwise->meansvariance_allcats = false;
6822 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6826 else if (lex_match_id (lexer, "CATEGORIES"))
6828 lex_match (lexer, T_EQUALS);
6829 if (lex_match_id (lexer, "ALLVISIBLE"))
6830 t->pairwise->all_visible = true;
6831 else if (lex_match_id (lexer, "SUBTOTALS"))
6832 t->pairwise->all_visible = false;
6835 lex_error_expecting (lexer, "ALLVISIBLE",
6840 else if (lex_match_id (lexer, "MERGE"))
6842 lex_match (lexer, T_EQUALS);
6843 if (!parse_bool (lexer, &t->pairwise->merge))
6846 else if (lex_match_id (lexer, "STYLE"))
6848 lex_match (lexer, T_EQUALS);
6849 if (lex_match_id (lexer, "APA"))
6850 t->pairwise->apa_style = true;
6851 else if (lex_match_id (lexer, "SIMPLE"))
6852 t->pairwise->apa_style = false;
6855 lex_error_expecting (lexer, "APA", "SIMPLE");
6859 else if (lex_match_id (lexer, "SHOWSIG"))
6861 lex_match (lexer, T_EQUALS);
6862 if (!parse_bool (lexer, &t->pairwise->show_sig))
6867 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6868 "INCLUDEMRSETS", "MEANSVARIANCE",
6869 "CATEGORIES", "MERGE", "STYLE",
6874 while (lex_token (lexer) != T_SLASH
6875 && lex_token (lexer) != T_ENDCMD);
6879 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6880 "CRITERIA", "CATEGORIES", "TITLES",
6881 "SIGTEST", "COMPARETEST");
6885 if (!lex_match (lexer, T_SLASH))
6889 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6890 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6892 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6896 if (!ctables_prepare_table (t))
6899 while (lex_token (lexer) != T_ENDCMD);
6902 input = proc_open (ds);
6903 bool ok = ctables_execute (ds, input, ct);
6904 ok = proc_commit (ds) && ok;
6906 ctables_destroy (ct);
6907 return ok ? CMD_SUCCESS : CMD_FAILURE;
6912 ctables_destroy (ct);