1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 /* All variables. */ \
74 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
75 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
76 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
94 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
95 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
96 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
98 /* All variables (unweighted.) */ \
99 S(CTSF_UCOUNT, "UCOUNT", N_("Unweighted Count"), CTF_COUNT, CTFA_ALL) \
100 S(CTSF_UROWPCT_COUNT, "UROWPCT.COUNT", N_("Unweighted Row %"), CTF_PERCENT, CTFA_ALL) \
101 S(CTSF_UCOLPCT_COUNT, "UCOLPCT.COUNT", N_("Unweighted Column %"), CTF_PERCENT, CTFA_ALL) \
102 S(CTSF_UTABLEPCT_COUNT, "UTABLEPCT.COUNT", N_("Unweighted Table %"), CTF_PERCENT, CTFA_ALL) \
103 S(CTSF_USUBTABLEPCT_COUNT, "USUBTABLEPCT.COUNT", N_("Unweighted Subtable %"), CTF_PERCENT, CTFA_ALL) \
104 S(CTSF_ULAYERPCT_COUNT, "ULAYERPCT.COUNT", N_("Unweighted Layer %"), CTF_PERCENT, CTFA_ALL) \
105 S(CTSF_ULAYERROWPCT_COUNT, "ULAYERROWPCT.COUNT", N_("Unweighted Layer Row %"), CTF_PERCENT, CTFA_ALL) \
106 S(CTSF_ULAYERCOLPCT_COUNT, "ULAYERCOLPCT.COUNT", N_("Unweighted Layer Column %"), CTF_PERCENT, CTFA_ALL) \
107 S(CTSF_UROWPCT_VALIDN, "UROWPCT.VALIDN", N_("Unweighted Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
108 S(CTSF_UCOLPCT_VALIDN, "UCOLPCT.VALIDN", N_("Unweighted Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
109 S(CTSF_UTABLEPCT_VALIDN, "UTABLEPCT.VALIDN", N_("Unweighted Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
110 S(CTSF_USUBTABLEPCT_VALIDN, "USUBTABLEPCT.VALIDN", N_("Unweighted Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
111 S(CTSF_ULAYERPCT_VALIDN, "ULAYERPCT.VALIDN", N_("Unweighted Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
112 S(CTSF_ULAYERROWPCT_VALIDN, "ULAYERROWPCT.VALIDN", N_("Unweighted Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
113 S(CTSF_ULAYERCOLPCT_VALIDN, "ULAYERCOLPCT.VALIDN", N_("Unweighted Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
114 S(CTSF_UROWPCT_TOTALN, "UROWPCT.TOTALN", N_("Unweighted Row Total N %"), CTF_PERCENT, CTFA_ALL) \
115 S(CTSF_UCOLPCT_TOTALN, "UCOLPCT.TOTALN", N_("Unweighted Column Total N %"), CTF_PERCENT, CTFA_ALL) \
116 S(CTSF_UTABLEPCT_TOTALN, "UTABLEPCT.TOTALN", N_("Unweighted Table Total N %"), CTF_PERCENT, CTFA_ALL) \
117 S(CTSF_USUBTABLEPCT_TOTALN, "USUBTABLEPCT.TOTALN", N_("Unweighted Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
118 S(CTSF_ULAYERPCT_TOTALN, "ULAYERPCT.TOTALN", N_("Unweighted Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
119 S(CTSF_ULAYERROWPCT_TOTALN, "ULAYERROWPCT.TOTALN", N_("Unweighted Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
120 S(CTSF_ULAYERCOLPCT_TOTALN, "ULAYERCOLPCT.TOTALN", N_("Unweighted Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
122 /* Scale variables, totals, and subtotals. */ \
123 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
124 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
125 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
126 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
127 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
128 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
129 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
130 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
131 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
132 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
133 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
134 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
135 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
136 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
137 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
138 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
139 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
140 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
141 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
142 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
143 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
144 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
145 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
147 /* Scale variables, totals, and subtotals (unweighted). */ \
148 S(CTSF_UMEAN, "UMEAN", N_("Unweighted Mean"), CTF_GENERAL, CTFA_SCALE) \
149 S(CTSF_UMEDIAN, "UMEDIAN", N_("Unweighted Median"), CTF_GENERAL, CTFA_SCALE) \
150 S(CTSF_UMISSING, "UMISSING", N_("Unweighted Missing"), CTF_GENERAL, CTFA_SCALE) \
151 S(CTSF_UMODE, "UMODE", N_("Unweighted Mode"), CTF_GENERAL, CTFA_SCALE) \
152 S(CTSF_UPTILE, "UPTILE", N_("Unweighted Percentile"), CTF_GENERAL, CTFA_SCALE) \
153 S(CTSF_USEMEAN, "USEMEAN", N_("Unweighted Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
154 S(CTSF_USTDDEV, "USTDDEV", N_("Unweighted Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
155 S(CTSF_USUM, "USUM", N_("Unweighted Sum"), CTF_GENERAL, CTFA_SCALE) \
156 S(CSTF_UTOTALN, "UTOTALN", N_("Unweighted Total N"), CTF_COUNT, CTFA_SCALE) \
157 S(CTSF_UVALIDN, "UVALIDN", N_("Unweighted Valid N"), CTF_COUNT, CTFA_SCALE) \
158 S(CTSF_UVARIANCE, "UVARIANCE", N_("Unweighted Variance"), CTF_GENERAL, CTFA_SCALE) \
159 S(CTSF_UROWPCT_SUM, "UROWPCT.SUM", N_("Unweighted Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
160 S(CTSF_UCOLPCT_SUM, "UCOLPCT.SUM", N_("Unweighted Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
161 S(CTSF_UTABLEPCT_SUM, "UTABLEPCT.SUM", N_("Unweighted Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
162 S(CTSF_USUBTABLEPCT_SUM, "USUBTABLEPCT.SUM", N_("Unweighted Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
163 S(CTSF_ULAYERPCT_SUM, "ULAYERPCT.SUM", N_("Unweighted Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
164 S(CTSF_ULAYERROWPCT_SUM, "ULAYERROWPCT.SUM", N_("Unweighted Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
165 S(CTSF_ULAYERCOLPCT_SUM, "ULAYERCOLPCT.SUM", N_("Unweighted Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
167 #if 0 /* Multiple response sets not yet implemented. */
168 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
169 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
170 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
171 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
172 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
173 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
174 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
175 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
176 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
177 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
178 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
179 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
180 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
181 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
182 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
183 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
184 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
185 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
186 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
187 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
188 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
189 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
192 enum ctables_summary_function
194 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
200 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
201 N_CTSF_FUNCTIONS = SUMMARIES
205 static bool ctables_summary_function_is_count (enum ctables_summary_function);
207 enum ctables_domain_type
209 /* Within a section, where stacked variables divide one section from
211 CTDT_TABLE, /* All layers of a whole section. */
212 CTDT_LAYER, /* One layer within a section. */
213 CTDT_LAYERROW, /* Row in one layer within a section. */
214 CTDT_LAYERCOL, /* Column in one layer within a section. */
216 /* Within a subtable, where a subtable pairs an innermost row variable with
217 an innermost column variable within a single layer. */
218 CTDT_SUBTABLE, /* Whole subtable. */
219 CTDT_ROW, /* Row within a subtable. */
220 CTDT_COL, /* Column within a subtable. */
224 struct ctables_domain
226 struct hmap_node node;
228 const struct ctables_cell *example;
230 double d_valid; /* Dictionary weight. */
233 double e_valid; /* Effective weight */
236 double u_valid; /* Unweighted. */
239 struct ctables_sum *sums;
248 enum ctables_summary_variant
257 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
258 all the axes (except the scalar variable, if any). */
259 struct hmap_node node;
261 /* The domains that contain this cell. */
262 uint32_t omit_domains;
263 struct ctables_domain *domains[N_CTDTS];
268 enum ctables_summary_variant sv;
270 struct ctables_cell_axis
272 struct ctables_cell_value
274 const struct ctables_category *category;
282 union ctables_summary *summaries;
289 const struct dictionary *dict;
290 struct pivot_table_look *look;
292 /* CTABLES has a number of extra formats that we implement via custom
293 currency specifications on an alternate fmt_settings. */
294 #define CTEF_NEGPAREN FMT_CCA
295 #define CTEF_NEQUAL FMT_CCB
296 #define CTEF_PAREN FMT_CCC
297 #define CTEF_PCTPAREN FMT_CCD
298 struct fmt_settings ctables_formats;
300 /* If this is NULL, zeros are displayed using the normal print format.
301 Otherwise, this string is displayed. */
304 /* If this is NULL, missing values are displayed using the normal print
305 format. Otherwise, this string is displayed. */
308 /* Indexed by variable dictionary index. */
309 enum ctables_vlabel *vlabels;
311 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
313 bool mrsets_count_duplicates; /* MRSETS. */
314 bool smissing_listwise; /* SMISSING. */
315 struct variable *e_weight; /* WEIGHT. */
316 int hide_threshold; /* HIDESMALLCOUNTS. */
318 struct ctables_table **tables;
322 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
325 struct ctables_postcompute
327 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
328 char *name; /* Name, without leading &. */
330 struct msg_location *location; /* Location of definition. */
331 struct ctables_pcexpr *expr;
333 struct ctables_summary_spec_set *specs;
334 bool hide_source_cats;
337 struct ctables_pcexpr
347 enum ctables_postcompute_op
350 CTPO_CONSTANT, /* 5 */
351 CTPO_CAT_NUMBER, /* [5] */
352 CTPO_CAT_STRING, /* ["STRING"] */
353 CTPO_CAT_NRANGE, /* [LO THRU 5] */
354 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
355 CTPO_CAT_MISSING, /* MISSING */
356 CTPO_CAT_OTHERNM, /* OTHERNM */
357 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
358 CTPO_CAT_TOTAL, /* TOTAL */
372 /* CTPO_CAT_NUMBER. */
375 /* CTPO_CAT_STRING, in dictionary encoding. */
376 struct substring string;
378 /* CTPO_CAT_NRANGE. */
381 /* CTPO_CAT_SRANGE. */
382 struct substring srange[2];
384 /* CTPO_CAT_SUBTOTAL. */
385 size_t subtotal_index;
387 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
388 One element: CTPO_NEG. */
389 struct ctables_pcexpr *subs[2];
392 /* Source location. */
393 struct msg_location *location;
396 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
397 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
398 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
399 struct ctables_pcexpr *sub1);
401 struct ctables_summary_spec_set
403 struct ctables_summary_spec *specs;
407 /* The variable to which the summary specs are applied. */
408 struct variable *var;
410 /* Whether the variable to which the summary specs are applied is a scale
411 variable for the purpose of summarization.
413 (VALIDN and TOTALN act differently for summarizing scale and categorical
417 /* If any of these optional additional scale variables are missing, then
418 treat 'var' as if it's missing too. This is for implementing
419 SMISSING=LISTWISE. */
420 struct variable **listwise_vars;
421 size_t n_listwise_vars;
424 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
425 const struct ctables_summary_spec_set *);
426 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
428 /* A nested sequence of variables, e.g. a > b > c. */
431 struct variable **vars;
434 size_t *domains[N_CTDTS];
435 size_t n_domains[N_CTDTS];
438 struct ctables_summary_spec_set specs[N_CSVS];
441 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
444 struct ctables_nest *nests;
450 struct hmap_node node;
455 struct ctables_occurrence
457 struct hmap_node node;
461 struct ctables_section
464 struct ctables_table *table;
465 struct ctables_nest *nests[PIVOT_N_AXES];
468 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
469 struct hmap cells; /* Contains "struct ctables_cell"s. */
470 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
475 struct ctables *ctables;
476 struct ctables_axis *axes[PIVOT_N_AXES];
477 struct ctables_stack stacks[PIVOT_N_AXES];
478 struct ctables_section *sections;
480 enum pivot_axis_type summary_axis;
481 struct ctables_summary_spec_set summary_specs;
482 struct variable **sum_vars;
485 enum pivot_axis_type slabels_axis;
486 bool slabels_visible;
488 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
490 Most commonly, label_axis[a] == a, and in particular we always have
491 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
493 If ROWLABELS or COLLABELS is specified, then one of
494 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
495 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
497 If any category labels are moved, then 'clabels_example' is one of the
498 variables being moved (and it is otherwise NULL). All of the variables
499 being moved have the same width, value labels, and categories, so this
500 example variable can be used to find those out.
502 The remaining members in this group are relevant only if category labels
505 'clabels_values_map' holds a "struct ctables_value" for all the values
506 that appear in all of the variables in the moved categories. It is
507 accumulated as the data is read. Once the data is fully read, its
508 sorted values are put into 'clabels_values' and 'n_clabels_values'.
510 enum pivot_axis_type label_axis[PIVOT_N_AXES];
511 enum pivot_axis_type clabels_from_axis;
512 const struct variable *clabels_example;
513 struct hmap clabels_values_map;
514 struct ctables_value **clabels_values;
515 size_t n_clabels_values;
517 /* Indexed by variable dictionary index. */
518 struct ctables_categories **categories;
527 struct ctables_chisq *chisq;
528 struct ctables_pairwise *pairwise;
531 struct ctables_categories
534 struct ctables_category *cats;
539 struct ctables_category
541 enum ctables_category_type
543 /* Explicit category lists. */
546 CCT_NRANGE, /* Numerical range. */
547 CCT_SRANGE, /* String range. */
552 /* Totals and subtotals. */
556 /* Implicit category lists. */
561 /* For contributing to TOTALN. */
562 CCT_EXCLUDED_MISSING,
566 struct ctables_category *subtotal;
572 double number; /* CCT_NUMBER. */
573 struct substring string; /* CCT_STRING, in dictionary encoding. */
574 double nrange[2]; /* CCT_NRANGE. */
575 struct substring srange[2]; /* CCT_SRANGE. */
579 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
580 bool hide_subcategories; /* CCT_SUBTOTAL. */
583 /* CCT_POSTCOMPUTE. */
586 const struct ctables_postcompute *pc;
587 enum fmt_type parse_format;
590 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
593 bool include_missing;
597 enum ctables_summary_function sort_function;
598 struct variable *sort_var;
603 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
604 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
605 struct msg_location *location;
609 ctables_category_uninit (struct ctables_category *cat)
620 case CCT_POSTCOMPUTE:
624 ss_dealloc (&cat->string);
628 ss_dealloc (&cat->srange[0]);
629 ss_dealloc (&cat->srange[1]);
634 free (cat->total_label);
642 case CCT_EXCLUDED_MISSING:
648 nullable_substring_equal (const struct substring *a,
649 const struct substring *b)
651 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
655 ctables_category_equal (const struct ctables_category *a,
656 const struct ctables_category *b)
658 if (a->type != b->type)
664 return a->number == b->number;
667 return ss_equals (a->string, b->string);
670 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
673 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
674 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
680 case CCT_POSTCOMPUTE:
681 return a->pc == b->pc;
685 return !strcmp (a->total_label, b->total_label);
690 return (a->include_missing == b->include_missing
691 && a->sort_ascending == b->sort_ascending
692 && a->sort_function == b->sort_function
693 && a->sort_var == b->sort_var
694 && a->percentile == b->percentile);
696 case CCT_EXCLUDED_MISSING:
704 ctables_categories_unref (struct ctables_categories *c)
709 assert (c->n_refs > 0);
713 for (size_t i = 0; i < c->n_cats; i++)
714 ctables_category_uninit (&c->cats[i]);
720 ctables_categories_equal (const struct ctables_categories *a,
721 const struct ctables_categories *b)
723 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
726 for (size_t i = 0; i < a->n_cats; i++)
727 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
733 /* Chi-square test (SIGTEST). */
741 /* Pairwise comparison test (COMPARETEST). */
742 struct ctables_pairwise
744 enum { PROP, MEAN } type;
747 bool meansvariance_allcats;
749 enum { BONFERRONI = 1, BH } adjust;
773 struct variable *var;
775 struct ctables_summary_spec_set specs[N_CSVS];
779 struct ctables_axis *subs[2];
782 struct msg_location *loc;
785 static void ctables_axis_destroy (struct ctables_axis *);
794 enum ctables_function_availability
796 CTFA_ALL, /* Any variables. */
797 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
798 CTFA_MRSETS, /* Only multiple-response sets */
801 struct ctables_summary_spec
803 enum ctables_summary_function function;
804 double percentile; /* CTSF_PTILE only. */
807 struct fmt_spec format;
808 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
815 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
816 const struct ctables_summary_spec *src)
819 dst->label = xstrdup_if_nonnull (src->label);
823 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
830 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
831 const struct ctables_summary_spec_set *src)
833 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
834 for (size_t i = 0; i < src->n; i++)
835 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
837 *dst = (struct ctables_summary_spec_set) {
842 .is_scale = src->is_scale,
847 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
849 for (size_t i = 0; i < set->n; i++)
850 ctables_summary_spec_uninit (&set->specs[i]);
855 parse_col_width (struct lexer *lexer, const char *name, double *width)
857 lex_match (lexer, T_EQUALS);
858 if (lex_match_id (lexer, "DEFAULT"))
860 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
862 *width = lex_number (lexer);
872 parse_bool (struct lexer *lexer, bool *b)
874 if (lex_match_id (lexer, "NO"))
876 else if (lex_match_id (lexer, "YES"))
880 lex_error_expecting (lexer, "YES", "NO");
886 static enum ctables_function_availability
887 ctables_function_availability (enum ctables_summary_function f)
889 static enum ctables_function_availability availability[] = {
890 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
895 return availability[f];
899 ctables_summary_function_is_count (enum ctables_summary_function f)
905 case CTSF_ROWPCT_COUNT:
906 case CTSF_COLPCT_COUNT:
907 case CTSF_TABLEPCT_COUNT:
908 case CTSF_SUBTABLEPCT_COUNT:
909 case CTSF_LAYERPCT_COUNT:
910 case CTSF_LAYERROWPCT_COUNT:
911 case CTSF_LAYERCOLPCT_COUNT:
913 case CTSF_UROWPCT_COUNT:
914 case CTSF_UCOLPCT_COUNT:
915 case CTSF_UTABLEPCT_COUNT:
916 case CTSF_USUBTABLEPCT_COUNT:
917 case CTSF_ULAYERPCT_COUNT:
918 case CTSF_ULAYERROWPCT_COUNT:
919 case CTSF_ULAYERCOLPCT_COUNT:
922 case CTSF_ROWPCT_VALIDN:
923 case CTSF_COLPCT_VALIDN:
924 case CTSF_TABLEPCT_VALIDN:
925 case CTSF_SUBTABLEPCT_VALIDN:
926 case CTSF_LAYERPCT_VALIDN:
927 case CTSF_LAYERROWPCT_VALIDN:
928 case CTSF_LAYERCOLPCT_VALIDN:
929 case CTSF_ROWPCT_TOTALN:
930 case CTSF_COLPCT_TOTALN:
931 case CTSF_TABLEPCT_TOTALN:
932 case CTSF_SUBTABLEPCT_TOTALN:
933 case CTSF_LAYERPCT_TOTALN:
934 case CTSF_LAYERROWPCT_TOTALN:
935 case CTSF_LAYERCOLPCT_TOTALN:
952 case CTSF_ROWPCT_SUM:
953 case CTSF_COLPCT_SUM:
954 case CTSF_TABLEPCT_SUM:
955 case CTSF_SUBTABLEPCT_SUM:
956 case CTSF_LAYERPCT_SUM:
957 case CTSF_LAYERROWPCT_SUM:
958 case CTSF_LAYERCOLPCT_SUM:
959 case CTSF_UROWPCT_VALIDN:
960 case CTSF_UCOLPCT_VALIDN:
961 case CTSF_UTABLEPCT_VALIDN:
962 case CTSF_USUBTABLEPCT_VALIDN:
963 case CTSF_ULAYERPCT_VALIDN:
964 case CTSF_ULAYERROWPCT_VALIDN:
965 case CTSF_ULAYERCOLPCT_VALIDN:
966 case CTSF_UROWPCT_TOTALN:
967 case CTSF_UCOLPCT_TOTALN:
968 case CTSF_UTABLEPCT_TOTALN:
969 case CTSF_USUBTABLEPCT_TOTALN:
970 case CTSF_ULAYERPCT_TOTALN:
971 case CTSF_ULAYERROWPCT_TOTALN:
972 case CTSF_ULAYERCOLPCT_TOTALN:
984 case CTSF_UROWPCT_SUM:
985 case CTSF_UCOLPCT_SUM:
986 case CTSF_UTABLEPCT_SUM:
987 case CTSF_USUBTABLEPCT_SUM:
988 case CTSF_ULAYERPCT_SUM:
989 case CTSF_ULAYERROWPCT_SUM:
990 case CTSF_ULAYERCOLPCT_SUM:
998 parse_ctables_summary_function (struct lexer *lexer,
999 enum ctables_summary_function *f)
1003 enum ctables_summary_function function;
1004 struct substring name;
1006 static struct pair names[] = {
1007 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
1008 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
1011 /* The .COUNT suffix may be omitted. */
1012 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
1013 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
1014 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
1015 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
1016 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
1017 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
1018 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
1022 if (!lex_force_id (lexer))
1025 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
1026 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
1028 *f = names[i].function;
1033 lex_error (lexer, _("Expecting summary function name."));
1038 ctables_axis_destroy (struct ctables_axis *axis)
1046 for (size_t i = 0; i < N_CSVS; i++)
1047 ctables_summary_spec_set_uninit (&axis->specs[i]);
1052 ctables_axis_destroy (axis->subs[0]);
1053 ctables_axis_destroy (axis->subs[1]);
1056 msg_location_destroy (axis->loc);
1060 static struct ctables_axis *
1061 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1062 struct ctables_axis *sub0,
1063 struct ctables_axis *sub1,
1064 struct lexer *lexer, int start_ofs)
1066 struct ctables_axis *axis = xmalloc (sizeof *axis);
1067 *axis = (struct ctables_axis) {
1069 .subs = { sub0, sub1 },
1070 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1075 struct ctables_axis_parse_ctx
1077 struct lexer *lexer;
1078 struct dictionary *dict;
1080 struct ctables_table *t;
1083 static struct fmt_spec
1084 ctables_summary_default_format (enum ctables_summary_function function,
1085 const struct variable *var)
1087 static const enum ctables_format default_formats[] = {
1088 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
1092 switch (default_formats[function])
1095 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
1098 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
1101 return *var_get_print_format (var);
1108 static struct pivot_value *
1109 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1113 static const char *default_labels[] = {
1114 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
1119 return (spec->function == CTSF_PTILE
1120 ? pivot_value_new_text_format (N_("Percentile %.2f"),
1122 : pivot_value_new_text (default_labels[spec->function]));
1126 struct substring in = ss_cstr (spec->label);
1127 struct substring target = ss_cstr (")CILEVEL");
1129 struct string out = DS_EMPTY_INITIALIZER;
1132 size_t chunk = ss_find_substring (in, target);
1133 ds_put_substring (&out, ss_head (in, chunk));
1134 ss_advance (&in, chunk);
1136 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1138 ss_advance (&in, target.length);
1139 ds_put_format (&out, "%g", cilevel);
1145 ctables_summary_function_name (enum ctables_summary_function function)
1147 static const char *names[] = {
1148 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1152 return names[function];
1156 add_summary_spec (struct ctables_axis *axis,
1157 enum ctables_summary_function function, double percentile,
1158 const char *label, const struct fmt_spec *format,
1159 bool is_ctables_format, const struct msg_location *loc,
1160 enum ctables_summary_variant sv)
1162 if (axis->op == CTAO_VAR)
1164 const char *function_name = ctables_summary_function_name (function);
1165 const char *var_name = var_get_name (axis->var);
1166 switch (ctables_function_availability (function))
1169 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1170 "response sets."), function_name);
1171 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1180 _("Summary function %s applies only to scale variables."),
1182 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1193 struct ctables_summary_spec_set *set = &axis->specs[sv];
1194 if (set->n >= set->allocated)
1195 set->specs = x2nrealloc (set->specs, &set->allocated,
1196 sizeof *set->specs);
1198 struct ctables_summary_spec *dst = &set->specs[set->n++];
1199 *dst = (struct ctables_summary_spec) {
1200 .function = function,
1201 .percentile = percentile,
1202 .label = xstrdup_if_nonnull (label),
1203 .format = (format ? *format
1204 : ctables_summary_default_format (function, axis->var)),
1205 .is_ctables_format = is_ctables_format,
1211 for (size_t i = 0; i < 2; i++)
1212 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1213 format, is_ctables_format, loc, sv))
1219 static struct ctables_axis *ctables_axis_parse_stack (
1220 struct ctables_axis_parse_ctx *);
1223 static struct ctables_axis *
1224 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1226 if (lex_match (ctx->lexer, T_LPAREN))
1228 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1229 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1231 ctables_axis_destroy (sub);
1237 if (!lex_force_id (ctx->lexer))
1240 int start_ofs = lex_ofs (ctx->lexer);
1241 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1245 struct ctables_axis *axis = xmalloc (sizeof *axis);
1246 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1248 /* XXX should figure out default measures by reading data */
1249 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1250 : lex_match_phrase (ctx->lexer, "[C]") ? false
1251 : var_get_measure (var) == MEASURE_SCALE);
1252 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1253 lex_ofs (ctx->lexer) - 1);
1254 if (axis->scale && var_is_alpha (var))
1256 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1258 var_get_name (var));
1259 ctables_axis_destroy (axis);
1267 has_digit (const char *s)
1269 return s[strcspn (s, "0123456789")] != '\0';
1273 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1274 bool *is_ctables_format)
1276 char type[FMT_TYPE_LEN_MAX + 1];
1277 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1280 if (!strcasecmp (type, "NEGPAREN"))
1281 format->type = CTEF_NEGPAREN;
1282 else if (!strcasecmp (type, "NEQUAL"))
1283 format->type = CTEF_NEQUAL;
1284 else if (!strcasecmp (type, "PAREN"))
1285 format->type = CTEF_PAREN;
1286 else if (!strcasecmp (type, "PCTPAREN"))
1287 format->type = CTEF_PCTPAREN;
1290 *is_ctables_format = false;
1291 return (parse_format_specifier (lexer, format)
1292 && fmt_check_output (format)
1293 && fmt_check_type_compat (format, VAL_NUMERIC));
1298 msg (SE, _("Output format %s requires width 2 or greater."), type);
1301 else if (format->d > format->w - 1)
1303 msg (SE, _("Output format %s requires width greater than decimals."),
1309 *is_ctables_format = true;
1314 static struct ctables_axis *
1315 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1317 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1318 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1321 enum ctables_summary_variant sv = CSV_CELL;
1324 int start_ofs = lex_ofs (ctx->lexer);
1326 /* Parse function. */
1327 enum ctables_summary_function function;
1328 if (!parse_ctables_summary_function (ctx->lexer, &function))
1331 /* Parse percentile. */
1332 double percentile = 0;
1333 if (function == CTSF_PTILE)
1335 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1337 percentile = lex_number (ctx->lexer);
1338 lex_get (ctx->lexer);
1343 if (lex_is_string (ctx->lexer))
1345 label = ss_xstrdup (lex_tokss (ctx->lexer));
1346 lex_get (ctx->lexer);
1350 struct fmt_spec format;
1351 const struct fmt_spec *formatp;
1352 bool is_ctables_format = false;
1353 if (lex_token (ctx->lexer) == T_ID
1354 && has_digit (lex_tokcstr (ctx->lexer)))
1356 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1357 &is_ctables_format))
1367 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1368 lex_ofs (ctx->lexer) - 1);
1369 add_summary_spec (sub, function, percentile, label, formatp,
1370 is_ctables_format, loc, sv);
1372 msg_location_destroy (loc);
1374 lex_match (ctx->lexer, T_COMMA);
1375 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1377 if (!lex_force_match (ctx->lexer, T_LBRACK))
1381 else if (lex_match (ctx->lexer, T_RBRACK))
1383 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1390 ctables_axis_destroy (sub);
1394 static const struct ctables_axis *
1395 find_scale (const struct ctables_axis *axis)
1399 else if (axis->op == CTAO_VAR)
1400 return axis->scale ? axis : NULL;
1403 for (size_t i = 0; i < 2; i++)
1405 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1413 static const struct ctables_axis *
1414 find_categorical_summary_spec (const struct ctables_axis *axis)
1418 else if (axis->op == CTAO_VAR)
1419 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1422 for (size_t i = 0; i < 2; i++)
1424 const struct ctables_axis *sum
1425 = find_categorical_summary_spec (axis->subs[i]);
1433 static struct ctables_axis *
1434 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1436 int start_ofs = lex_ofs (ctx->lexer);
1437 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1441 while (lex_match (ctx->lexer, T_GT))
1443 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1447 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1448 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1450 const struct ctables_axis *outer_scale = find_scale (lhs);
1451 const struct ctables_axis *inner_scale = find_scale (rhs);
1452 if (outer_scale && inner_scale)
1454 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1455 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1456 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1457 ctables_axis_destroy (nest);
1461 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1464 msg_at (SE, nest->loc,
1465 _("Summaries may only be requested for categorical variables "
1466 "at the innermost nesting level."));
1467 msg_at (SN, outer_sum->loc,
1468 _("This outer categorical variable has a summary."));
1469 ctables_axis_destroy (nest);
1479 static struct ctables_axis *
1480 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1482 int start_ofs = lex_ofs (ctx->lexer);
1483 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1487 while (lex_match (ctx->lexer, T_PLUS))
1489 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1493 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1494 ctx->lexer, start_ofs);
1501 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1502 struct ctables *ct, struct ctables_table *t,
1503 enum pivot_axis_type a)
1505 if (lex_token (lexer) == T_BY
1506 || lex_token (lexer) == T_SLASH
1507 || lex_token (lexer) == T_ENDCMD)
1510 struct ctables_axis_parse_ctx ctx = {
1516 t->axes[a] = ctables_axis_parse_stack (&ctx);
1517 return t->axes[a] != NULL;
1521 ctables_chisq_destroy (struct ctables_chisq *chisq)
1527 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1533 ctables_table_destroy (struct ctables_table *t)
1538 for (size_t i = 0; i < t->n_categories; i++)
1539 ctables_categories_unref (t->categories[i]);
1540 free (t->categories);
1542 ctables_axis_destroy (t->axes[PIVOT_AXIS_COLUMN]);
1543 ctables_axis_destroy (t->axes[PIVOT_AXIS_ROW]);
1544 ctables_axis_destroy (t->axes[PIVOT_AXIS_LAYER]);
1548 ctables_chisq_destroy (t->chisq);
1549 ctables_pairwise_destroy (t->pairwise);
1554 ctables_destroy (struct ctables *ct)
1559 pivot_table_look_unref (ct->look);
1563 for (size_t i = 0; i < ct->n_tables; i++)
1564 ctables_table_destroy (ct->tables[i]);
1569 static struct ctables_category
1570 cct_nrange (double low, double high)
1572 return (struct ctables_category) {
1574 .nrange = { low, high }
1578 static struct ctables_category
1579 cct_srange (struct substring low, struct substring high)
1581 return (struct ctables_category) {
1583 .srange = { low, high }
1588 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1589 struct ctables_category *cat)
1592 if (lex_match (lexer, T_EQUALS))
1594 if (!lex_force_string (lexer))
1597 total_label = ss_xstrdup (lex_tokss (lexer));
1601 total_label = xstrdup (_("Subtotal"));
1603 *cat = (struct ctables_category) {
1604 .type = CCT_SUBTOTAL,
1605 .hide_subcategories = hide_subcategories,
1606 .total_label = total_label
1611 static struct substring
1612 parse_substring (struct lexer *lexer, struct dictionary *dict)
1614 struct substring s = recode_substring_pool (
1615 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1616 ss_rtrim (&s, ss_cstr (" "));
1622 ctables_table_parse_explicit_category (struct lexer *lexer,
1623 struct dictionary *dict,
1625 struct ctables_category *cat)
1627 if (lex_match_id (lexer, "OTHERNM"))
1628 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1629 else if (lex_match_id (lexer, "MISSING"))
1630 *cat = (struct ctables_category) { .type = CCT_MISSING };
1631 else if (lex_match_id (lexer, "SUBTOTAL"))
1632 return ctables_table_parse_subtotal (lexer, false, cat);
1633 else if (lex_match_id (lexer, "HSUBTOTAL"))
1634 return ctables_table_parse_subtotal (lexer, true, cat);
1635 else if (lex_match_id (lexer, "LO"))
1637 if (!lex_force_match_id (lexer, "THRU"))
1639 if (lex_is_string (lexer))
1641 struct substring sr0 = { .string = NULL };
1642 struct substring sr1 = parse_substring (lexer, dict);
1643 *cat = cct_srange (sr0, sr1);
1645 else if (lex_force_num (lexer))
1647 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1653 else if (lex_is_number (lexer))
1655 double number = lex_number (lexer);
1657 if (lex_match_id (lexer, "THRU"))
1659 if (lex_match_id (lexer, "HI"))
1660 *cat = cct_nrange (number, DBL_MAX);
1663 if (!lex_force_num (lexer))
1665 *cat = cct_nrange (number, lex_number (lexer));
1670 *cat = (struct ctables_category) {
1675 else if (lex_is_string (lexer))
1677 struct substring s = parse_substring (lexer, dict);
1678 if (lex_match_id (lexer, "THRU"))
1680 if (lex_match_id (lexer, "HI"))
1682 struct substring sr1 = { .string = NULL };
1683 *cat = cct_srange (s, sr1);
1687 if (!lex_force_string (lexer))
1689 struct substring sr1 = parse_substring (lexer, dict);
1690 *cat = cct_srange (s, sr1);
1694 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1696 else if (lex_match (lexer, T_AND))
1698 if (!lex_force_id (lexer))
1700 struct ctables_postcompute *pc = ctables_find_postcompute (
1701 ct, lex_tokcstr (lexer));
1704 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1705 msg_at (SE, loc, _("Unknown postcompute &%s."),
1706 lex_tokcstr (lexer));
1707 msg_location_destroy (loc);
1712 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1716 lex_error (lexer, NULL);
1724 parse_category_string (struct msg_location *location,
1725 struct substring s, const struct dictionary *dict,
1726 enum fmt_type format, double *n)
1729 char *error = data_in (s, dict_get_encoding (dict), format,
1730 settings_get_fmt_settings (), &v, 0, NULL);
1733 msg_at (SE, location,
1734 _("Failed to parse category specification as format %s: %s."),
1735 fmt_name (format), error);
1744 static struct ctables_category *
1745 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1746 const struct ctables_pcexpr *e)
1748 struct ctables_category *best = NULL;
1749 size_t n_subtotals = 0;
1750 for (size_t i = 0; i < cats->n_cats; i++)
1752 struct ctables_category *cat = &cats->cats[i];
1755 case CTPO_CAT_NUMBER:
1756 if (cat->type == CCT_NUMBER && cat->number == e->number)
1760 case CTPO_CAT_STRING:
1761 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1765 case CTPO_CAT_NRANGE:
1766 if (cat->type == CCT_NRANGE
1767 && cat->nrange[0] == e->nrange[0]
1768 && cat->nrange[1] == e->nrange[1])
1772 case CTPO_CAT_SRANGE:
1773 if (cat->type == CCT_SRANGE
1774 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1775 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1779 case CTPO_CAT_MISSING:
1780 if (cat->type == CCT_MISSING)
1784 case CTPO_CAT_OTHERNM:
1785 if (cat->type == CCT_OTHERNM)
1789 case CTPO_CAT_SUBTOTAL:
1790 if (cat->type == CCT_SUBTOTAL)
1793 if (e->subtotal_index == n_subtotals)
1795 else if (e->subtotal_index == 0)
1800 case CTPO_CAT_TOTAL:
1801 if (cat->type == CCT_TOTAL)
1815 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1820 static struct ctables_category *
1821 ctables_find_category_for_postcompute (const struct dictionary *dict,
1822 const struct ctables_categories *cats,
1823 enum fmt_type parse_format,
1824 const struct ctables_pcexpr *e)
1826 if (parse_format != FMT_F)
1828 if (e->op == CTPO_CAT_STRING)
1831 if (!parse_category_string (e->location, e->string, dict,
1832 parse_format, &number))
1835 struct ctables_pcexpr e2 = {
1836 .op = CTPO_CAT_NUMBER,
1838 .location = e->location,
1840 return ctables_find_category_for_postcompute__ (cats, &e2);
1842 else if (e->op == CTPO_CAT_SRANGE)
1845 if (!e->srange[0].string)
1846 nrange[0] = -DBL_MAX;
1847 else if (!parse_category_string (e->location, e->srange[0], dict,
1848 parse_format, &nrange[0]))
1851 if (!e->srange[1].string)
1852 nrange[1] = DBL_MAX;
1853 else if (!parse_category_string (e->location, e->srange[1], dict,
1854 parse_format, &nrange[1]))
1857 struct ctables_pcexpr e2 = {
1858 .op = CTPO_CAT_NRANGE,
1859 .nrange = { nrange[0], nrange[1] },
1860 .location = e->location,
1862 return ctables_find_category_for_postcompute__ (cats, &e2);
1865 return ctables_find_category_for_postcompute__ (cats, e);
1869 ctables_recursive_check_postcompute (struct dictionary *dict,
1870 const struct ctables_pcexpr *e,
1871 struct ctables_category *pc_cat,
1872 const struct ctables_categories *cats,
1873 const struct msg_location *cats_location)
1877 case CTPO_CAT_NUMBER:
1878 case CTPO_CAT_STRING:
1879 case CTPO_CAT_NRANGE:
1880 case CTPO_CAT_MISSING:
1881 case CTPO_CAT_OTHERNM:
1882 case CTPO_CAT_SUBTOTAL:
1883 case CTPO_CAT_TOTAL:
1885 struct ctables_category *cat = ctables_find_category_for_postcompute (
1886 dict, cats, pc_cat->parse_format, e);
1889 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1891 size_t n_subtotals = 0;
1892 for (size_t i = 0; i < cats->n_cats; i++)
1893 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1894 if (n_subtotals > 1)
1896 msg_at (SE, cats_location,
1897 ngettext ("These categories include %zu instance "
1898 "of SUBTOTAL or HSUBTOTAL, so references "
1899 "from computed categories must refer to "
1900 "subtotals by position.",
1901 "These categories include %zu instances "
1902 "of SUBTOTAL or HSUBTOTAL, so references "
1903 "from computed categories must refer to "
1904 "subtotals by position.",
1907 msg_at (SN, e->location,
1908 _("This is the reference that lacks a position."));
1913 msg_at (SE, pc_cat->location,
1914 _("Computed category &%s references a category not included "
1915 "in the category list."),
1917 msg_at (SN, e->location, _("This is the missing category."));
1918 msg_at (SN, cats_location,
1919 _("To fix the problem, add the missing category to the "
1920 "list of categories here."));
1923 if (pc_cat->pc->hide_source_cats)
1937 for (size_t i = 0; i < 2; i++)
1938 if (e->subs[i] && !ctables_recursive_check_postcompute (
1939 dict, e->subs[i], pc_cat, cats, cats_location))
1949 all_strings (struct variable **vars, size_t n_vars,
1950 const struct ctables_category *cat)
1952 for (size_t j = 0; j < n_vars; j++)
1953 if (var_is_numeric (vars[j]))
1955 msg_at (SE, cat->location,
1956 _("This category specification may be applied only to string "
1957 "variables, but this subcommand tries to apply it to "
1958 "numeric variable %s."),
1959 var_get_name (vars[j]));
1966 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1967 struct ctables *ct, struct ctables_table *t)
1969 if (!lex_match_id (lexer, "VARIABLES"))
1971 lex_match (lexer, T_EQUALS);
1973 struct variable **vars;
1975 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1978 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1979 for (size_t i = 1; i < n_vars; i++)
1981 const struct fmt_spec *f = var_get_print_format (vars[i]);
1982 if (f->type != common_format->type)
1984 common_format = NULL;
1990 && (fmt_get_category (common_format->type)
1991 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1993 struct ctables_categories *c = xmalloc (sizeof *c);
1994 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1995 for (size_t i = 0; i < n_vars; i++)
1997 struct ctables_categories **cp
1998 = &t->categories[var_get_dict_index (vars[i])];
1999 ctables_categories_unref (*cp);
2003 size_t allocated_cats = 0;
2004 if (lex_match (lexer, T_LBRACK))
2006 int cats_start_ofs = lex_ofs (lexer);
2009 if (c->n_cats >= allocated_cats)
2010 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2012 int start_ofs = lex_ofs (lexer);
2013 struct ctables_category *cat = &c->cats[c->n_cats];
2014 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2016 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2019 lex_match (lexer, T_COMMA);
2021 while (!lex_match (lexer, T_RBRACK));
2023 struct msg_location *cats_location
2024 = lex_ofs_location (lexer, cats_start_ofs, lex_ofs (lexer) - 1);
2025 for (size_t i = 0; i < c->n_cats; i++)
2027 struct ctables_category *cat = &c->cats[i];
2030 case CCT_POSTCOMPUTE:
2031 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2032 if (!ctables_recursive_check_postcompute (dict, cat->pc->expr,
2033 cat, c, cats_location))
2039 for (size_t j = 0; j < n_vars; j++)
2040 if (var_is_alpha (vars[j]))
2042 msg_at (SE, cat->location,
2043 _("This category specification may be applied "
2044 "only to numeric variables, but this "
2045 "subcommand tries to apply it to string "
2047 var_get_name (vars[j]));
2056 if (!parse_category_string (cat->location, cat->string, dict,
2057 common_format->type, &n))
2060 ss_dealloc (&cat->string);
2062 cat->type = CCT_NUMBER;
2065 else if (!all_strings (vars, n_vars, cat))
2074 if (!cat->srange[0].string)
2076 else if (!parse_category_string (cat->location,
2077 cat->srange[0], dict,
2078 common_format->type, &n[0]))
2081 if (!cat->srange[1].string)
2083 else if (!parse_category_string (cat->location,
2084 cat->srange[1], dict,
2085 common_format->type, &n[1]))
2088 ss_dealloc (&cat->srange[0]);
2089 ss_dealloc (&cat->srange[1]);
2091 cat->type = CCT_NRANGE;
2092 cat->nrange[0] = n[0];
2093 cat->nrange[1] = n[1];
2095 else if (!all_strings (vars, n_vars, cat))
2106 case CCT_EXCLUDED_MISSING:
2112 struct ctables_category cat = {
2114 .include_missing = false,
2115 .sort_ascending = true,
2117 bool show_totals = false;
2118 char *total_label = NULL;
2119 bool totals_before = false;
2120 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2122 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2124 lex_match (lexer, T_EQUALS);
2125 if (lex_match_id (lexer, "A"))
2126 cat.sort_ascending = true;
2127 else if (lex_match_id (lexer, "D"))
2128 cat.sort_ascending = false;
2131 lex_error_expecting (lexer, "A", "D");
2135 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2137 lex_match (lexer, T_EQUALS);
2138 if (lex_match_id (lexer, "VALUE"))
2139 cat.type = CCT_VALUE;
2140 else if (lex_match_id (lexer, "LABEL"))
2141 cat.type = CCT_LABEL;
2144 cat.type = CCT_FUNCTION;
2145 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
2148 if (lex_match (lexer, T_LPAREN))
2150 cat.sort_var = parse_variable (lexer, dict);
2154 if (cat.sort_function == CTSF_PTILE)
2156 lex_match (lexer, T_COMMA);
2157 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2159 cat.percentile = lex_number (lexer);
2163 if (!lex_force_match (lexer, T_RPAREN))
2166 else if (ctables_function_availability (cat.sort_function)
2169 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2174 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2176 lex_match (lexer, T_EQUALS);
2177 if (lex_match_id (lexer, "INCLUDE"))
2178 cat.include_missing = true;
2179 else if (lex_match_id (lexer, "EXCLUDE"))
2180 cat.include_missing = false;
2183 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2187 else if (lex_match_id (lexer, "TOTAL"))
2189 lex_match (lexer, T_EQUALS);
2190 if (!parse_bool (lexer, &show_totals))
2193 else if (lex_match_id (lexer, "LABEL"))
2195 lex_match (lexer, T_EQUALS);
2196 if (!lex_force_string (lexer))
2199 total_label = ss_xstrdup (lex_tokss (lexer));
2202 else if (lex_match_id (lexer, "POSITION"))
2204 lex_match (lexer, T_EQUALS);
2205 if (lex_match_id (lexer, "BEFORE"))
2206 totals_before = true;
2207 else if (lex_match_id (lexer, "AFTER"))
2208 totals_before = false;
2211 lex_error_expecting (lexer, "BEFORE", "AFTER");
2215 else if (lex_match_id (lexer, "EMPTY"))
2217 lex_match (lexer, T_EQUALS);
2218 if (lex_match_id (lexer, "INCLUDE"))
2219 c->show_empty = true;
2220 else if (lex_match_id (lexer, "EXCLUDE"))
2221 c->show_empty = false;
2224 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2231 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2232 "TOTAL", "LABEL", "POSITION", "EMPTY");
2234 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2241 if (c->n_cats >= allocated_cats)
2242 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2243 c->cats[c->n_cats++] = cat;
2248 if (c->n_cats >= allocated_cats)
2249 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2251 struct ctables_category *totals;
2254 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2255 totals = &c->cats[0];
2258 totals = &c->cats[c->n_cats];
2261 *totals = (struct ctables_category) {
2263 .total_label = total_label ? total_label : xstrdup (_("Total")),
2267 struct ctables_category *subtotal = NULL;
2268 for (size_t i = totals_before ? 0 : c->n_cats;
2269 totals_before ? i < c->n_cats : i-- > 0;
2270 totals_before ? i++ : 0)
2272 struct ctables_category *cat = &c->cats[i];
2281 cat->subtotal = subtotal;
2284 case CCT_POSTCOMPUTE:
2295 case CCT_EXCLUDED_MISSING:
2304 ctables_nest_uninit (struct ctables_nest *nest)
2311 ctables_stack_uninit (struct ctables_stack *stack)
2315 for (size_t i = 0; i < stack->n; i++)
2316 ctables_nest_uninit (&stack->nests[i]);
2317 free (stack->nests);
2321 static struct ctables_stack
2322 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2329 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2330 for (size_t i = 0; i < s0.n; i++)
2331 for (size_t j = 0; j < s1.n; j++)
2333 const struct ctables_nest *a = &s0.nests[i];
2334 const struct ctables_nest *b = &s1.nests[j];
2336 size_t allocate = a->n + b->n;
2337 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2338 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
2340 for (size_t k = 0; k < a->n; k++)
2341 vars[n++] = a->vars[k];
2342 for (size_t k = 0; k < b->n; k++)
2343 vars[n++] = b->vars[k];
2344 assert (n == allocate);
2346 const struct ctables_nest *summary_src;
2347 if (!a->specs[CSV_CELL].var)
2349 else if (!b->specs[CSV_CELL].var)
2354 struct ctables_nest *new = &stack.nests[stack.n++];
2355 *new = (struct ctables_nest) {
2357 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2358 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2362 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2363 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2365 ctables_stack_uninit (&s0);
2366 ctables_stack_uninit (&s1);
2370 static struct ctables_stack
2371 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2373 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2374 for (size_t i = 0; i < s0.n; i++)
2375 stack.nests[stack.n++] = s0.nests[i];
2376 for (size_t i = 0; i < s1.n; i++)
2378 stack.nests[stack.n] = s1.nests[i];
2379 stack.nests[stack.n].group_head += s0.n;
2382 assert (stack.n == s0.n + s1.n);
2388 static struct ctables_stack
2389 var_fts (const struct ctables_axis *a)
2391 struct variable **vars = xmalloc (sizeof *vars);
2394 struct ctables_nest *nest = xmalloc (sizeof *nest);
2395 *nest = (struct ctables_nest) {
2398 .scale_idx = a->scale ? 0 : SIZE_MAX,
2400 if (a->specs[CSV_CELL].n || a->scale)
2401 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2403 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2404 nest->specs[sv].var = a->var;
2405 nest->specs[sv].is_scale = a->scale;
2407 return (struct ctables_stack) { .nests = nest, .n = 1 };
2410 static struct ctables_stack
2411 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2414 return (struct ctables_stack) { .n = 0 };
2422 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2423 enumerate_fts (axis_type, a->subs[1]));
2426 /* This should consider any of the scale variables found in the result to
2427 be linked to each other listwise for SMISSING=LISTWISE. */
2428 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2429 enumerate_fts (axis_type, a->subs[1]));
2435 union ctables_summary
2437 /* COUNT, VALIDN, TOTALN. */
2440 /* MINIMUM, MAXIMUM, RANGE. */
2447 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2448 struct moments1 *moments;
2450 /* MEDIAN, MODE, PTILE. */
2453 struct casewriter *writer;
2458 /* XXX multiple response */
2462 ctables_summary_init (union ctables_summary *s,
2463 const struct ctables_summary_spec *ss)
2465 switch (ss->function)
2469 case CTSF_ROWPCT_COUNT:
2470 case CTSF_COLPCT_COUNT:
2471 case CTSF_TABLEPCT_COUNT:
2472 case CTSF_SUBTABLEPCT_COUNT:
2473 case CTSF_LAYERPCT_COUNT:
2474 case CTSF_LAYERROWPCT_COUNT:
2475 case CTSF_LAYERCOLPCT_COUNT:
2476 case CTSF_ROWPCT_VALIDN:
2477 case CTSF_COLPCT_VALIDN:
2478 case CTSF_TABLEPCT_VALIDN:
2479 case CTSF_SUBTABLEPCT_VALIDN:
2480 case CTSF_LAYERPCT_VALIDN:
2481 case CTSF_LAYERROWPCT_VALIDN:
2482 case CTSF_LAYERCOLPCT_VALIDN:
2483 case CTSF_ROWPCT_TOTALN:
2484 case CTSF_COLPCT_TOTALN:
2485 case CTSF_TABLEPCT_TOTALN:
2486 case CTSF_SUBTABLEPCT_TOTALN:
2487 case CTSF_LAYERPCT_TOTALN:
2488 case CTSF_LAYERROWPCT_TOTALN:
2489 case CTSF_LAYERCOLPCT_TOTALN:
2496 case CTSF_UROWPCT_COUNT:
2497 case CTSF_UCOLPCT_COUNT:
2498 case CTSF_UTABLEPCT_COUNT:
2499 case CTSF_USUBTABLEPCT_COUNT:
2500 case CTSF_ULAYERPCT_COUNT:
2501 case CTSF_ULAYERROWPCT_COUNT:
2502 case CTSF_ULAYERCOLPCT_COUNT:
2503 case CTSF_UROWPCT_VALIDN:
2504 case CTSF_UCOLPCT_VALIDN:
2505 case CTSF_UTABLEPCT_VALIDN:
2506 case CTSF_USUBTABLEPCT_VALIDN:
2507 case CTSF_ULAYERPCT_VALIDN:
2508 case CTSF_ULAYERROWPCT_VALIDN:
2509 case CTSF_ULAYERCOLPCT_VALIDN:
2510 case CTSF_UROWPCT_TOTALN:
2511 case CTSF_UCOLPCT_TOTALN:
2512 case CTSF_UTABLEPCT_TOTALN:
2513 case CTSF_USUBTABLEPCT_TOTALN:
2514 case CTSF_ULAYERPCT_TOTALN:
2515 case CTSF_ULAYERROWPCT_TOTALN:
2516 case CTSF_ULAYERCOLPCT_TOTALN:
2526 s->min = s->max = SYSMIS;
2534 case CTSF_ROWPCT_SUM:
2535 case CTSF_COLPCT_SUM:
2536 case CTSF_TABLEPCT_SUM:
2537 case CTSF_SUBTABLEPCT_SUM:
2538 case CTSF_LAYERPCT_SUM:
2539 case CTSF_LAYERROWPCT_SUM:
2540 case CTSF_LAYERCOLPCT_SUM:
2545 case CTSF_UVARIANCE:
2546 case CTSF_UROWPCT_SUM:
2547 case CTSF_UCOLPCT_SUM:
2548 case CTSF_UTABLEPCT_SUM:
2549 case CTSF_USUBTABLEPCT_SUM:
2550 case CTSF_ULAYERPCT_SUM:
2551 case CTSF_ULAYERROWPCT_SUM:
2552 case CTSF_ULAYERCOLPCT_SUM:
2553 s->moments = moments1_create (MOMENT_VARIANCE);
2563 struct caseproto *proto = caseproto_create ();
2564 proto = caseproto_add_width (proto, 0);
2565 proto = caseproto_add_width (proto, 0);
2567 struct subcase ordering;
2568 subcase_init (&ordering, 0, 0, SC_ASCEND);
2569 s->writer = sort_create_writer (&ordering, proto);
2570 subcase_uninit (&ordering);
2571 caseproto_unref (proto);
2581 ctables_summary_uninit (union ctables_summary *s,
2582 const struct ctables_summary_spec *ss)
2584 switch (ss->function)
2588 case CTSF_ROWPCT_COUNT:
2589 case CTSF_COLPCT_COUNT:
2590 case CTSF_TABLEPCT_COUNT:
2591 case CTSF_SUBTABLEPCT_COUNT:
2592 case CTSF_LAYERPCT_COUNT:
2593 case CTSF_LAYERROWPCT_COUNT:
2594 case CTSF_LAYERCOLPCT_COUNT:
2595 case CTSF_ROWPCT_VALIDN:
2596 case CTSF_COLPCT_VALIDN:
2597 case CTSF_TABLEPCT_VALIDN:
2598 case CTSF_SUBTABLEPCT_VALIDN:
2599 case CTSF_LAYERPCT_VALIDN:
2600 case CTSF_LAYERROWPCT_VALIDN:
2601 case CTSF_LAYERCOLPCT_VALIDN:
2602 case CTSF_ROWPCT_TOTALN:
2603 case CTSF_COLPCT_TOTALN:
2604 case CTSF_TABLEPCT_TOTALN:
2605 case CTSF_SUBTABLEPCT_TOTALN:
2606 case CTSF_LAYERPCT_TOTALN:
2607 case CTSF_LAYERROWPCT_TOTALN:
2608 case CTSF_LAYERCOLPCT_TOTALN:
2615 case CTSF_UROWPCT_COUNT:
2616 case CTSF_UCOLPCT_COUNT:
2617 case CTSF_UTABLEPCT_COUNT:
2618 case CTSF_USUBTABLEPCT_COUNT:
2619 case CTSF_ULAYERPCT_COUNT:
2620 case CTSF_ULAYERROWPCT_COUNT:
2621 case CTSF_ULAYERCOLPCT_COUNT:
2622 case CTSF_UROWPCT_VALIDN:
2623 case CTSF_UCOLPCT_VALIDN:
2624 case CTSF_UTABLEPCT_VALIDN:
2625 case CTSF_USUBTABLEPCT_VALIDN:
2626 case CTSF_ULAYERPCT_VALIDN:
2627 case CTSF_ULAYERROWPCT_VALIDN:
2628 case CTSF_ULAYERCOLPCT_VALIDN:
2629 case CTSF_UROWPCT_TOTALN:
2630 case CTSF_UCOLPCT_TOTALN:
2631 case CTSF_UTABLEPCT_TOTALN:
2632 case CTSF_USUBTABLEPCT_TOTALN:
2633 case CTSF_ULAYERPCT_TOTALN:
2634 case CTSF_ULAYERROWPCT_TOTALN:
2635 case CTSF_ULAYERCOLPCT_TOTALN:
2651 case CTSF_ROWPCT_SUM:
2652 case CTSF_COLPCT_SUM:
2653 case CTSF_TABLEPCT_SUM:
2654 case CTSF_SUBTABLEPCT_SUM:
2655 case CTSF_LAYERPCT_SUM:
2656 case CTSF_LAYERROWPCT_SUM:
2657 case CTSF_LAYERCOLPCT_SUM:
2662 case CTSF_UVARIANCE:
2663 case CTSF_UROWPCT_SUM:
2664 case CTSF_UCOLPCT_SUM:
2665 case CTSF_UTABLEPCT_SUM:
2666 case CTSF_USUBTABLEPCT_SUM:
2667 case CTSF_ULAYERPCT_SUM:
2668 case CTSF_ULAYERROWPCT_SUM:
2669 case CTSF_ULAYERCOLPCT_SUM:
2670 moments1_destroy (s->moments);
2679 casewriter_destroy (s->writer);
2685 ctables_summary_add (union ctables_summary *s,
2686 const struct ctables_summary_spec *ss,
2687 const struct variable *var, const union value *value,
2688 bool is_scale, bool is_scale_missing,
2689 bool is_missing, bool excluded_missing,
2690 double d_weight, double e_weight)
2692 /* To determine whether a case is included in a given table for a particular
2693 kind of summary, consider the following charts for each variable in the
2694 table. Only if "yes" appears for every variable for the summary is the
2697 Categorical variables: VALIDN COUNT TOTALN
2698 Valid values in included categories yes yes yes
2699 Missing values in included categories --- yes yes
2700 Missing values in excluded categories --- --- yes
2701 Valid values in excluded categories --- --- ---
2703 Scale variables: VALIDN COUNT TOTALN
2704 Valid value yes yes yes
2705 Missing value --- yes yes
2707 Missing values include both user- and system-missing. (The system-missing
2708 value is always in an excluded category.)
2710 switch (ss->function)
2713 case CTSF_ROWPCT_TOTALN:
2714 case CTSF_COLPCT_TOTALN:
2715 case CTSF_TABLEPCT_TOTALN:
2716 case CTSF_SUBTABLEPCT_TOTALN:
2717 case CTSF_LAYERPCT_TOTALN:
2718 case CTSF_LAYERROWPCT_TOTALN:
2719 case CTSF_LAYERCOLPCT_TOTALN:
2720 s->count += d_weight;
2724 case CTSF_UROWPCT_TOTALN:
2725 case CTSF_UCOLPCT_TOTALN:
2726 case CTSF_UTABLEPCT_TOTALN:
2727 case CTSF_USUBTABLEPCT_TOTALN:
2728 case CTSF_ULAYERPCT_TOTALN:
2729 case CTSF_ULAYERROWPCT_TOTALN:
2730 case CTSF_ULAYERCOLPCT_TOTALN:
2735 case CTSF_ROWPCT_COUNT:
2736 case CTSF_COLPCT_COUNT:
2737 case CTSF_TABLEPCT_COUNT:
2738 case CTSF_SUBTABLEPCT_COUNT:
2739 case CTSF_LAYERPCT_COUNT:
2740 case CTSF_LAYERROWPCT_COUNT:
2741 case CTSF_LAYERCOLPCT_COUNT:
2742 if (is_scale || !excluded_missing)
2743 s->count += d_weight;
2747 case CTSF_UROWPCT_COUNT:
2748 case CTSF_UCOLPCT_COUNT:
2749 case CTSF_UTABLEPCT_COUNT:
2750 case CTSF_USUBTABLEPCT_COUNT:
2751 case CTSF_ULAYERPCT_COUNT:
2752 case CTSF_ULAYERROWPCT_COUNT:
2753 case CTSF_ULAYERCOLPCT_COUNT:
2754 if (is_scale || !excluded_missing)
2759 case CTSF_ROWPCT_VALIDN:
2760 case CTSF_COLPCT_VALIDN:
2761 case CTSF_TABLEPCT_VALIDN:
2762 case CTSF_SUBTABLEPCT_VALIDN:
2763 case CTSF_LAYERPCT_VALIDN:
2764 case CTSF_LAYERROWPCT_VALIDN:
2765 case CTSF_LAYERCOLPCT_VALIDN:
2769 s->count += d_weight;
2773 case CTSF_UROWPCT_VALIDN:
2774 case CTSF_UCOLPCT_VALIDN:
2775 case CTSF_UTABLEPCT_VALIDN:
2776 case CTSF_USUBTABLEPCT_VALIDN:
2777 case CTSF_ULAYERPCT_VALIDN:
2778 case CTSF_ULAYERROWPCT_VALIDN:
2779 case CTSF_ULAYERCOLPCT_VALIDN:
2788 s->count += d_weight;
2797 if (is_scale || !excluded_missing)
2798 s->count += e_weight;
2805 s->count += e_weight;
2809 s->count += e_weight;
2815 if (!is_scale_missing)
2817 assert (!var_is_alpha (var)); /* XXX? */
2818 if (s->min == SYSMIS || value->f < s->min)
2820 if (s->max == SYSMIS || value->f > s->max)
2830 case CTSF_ROWPCT_SUM:
2831 case CTSF_COLPCT_SUM:
2832 case CTSF_TABLEPCT_SUM:
2833 case CTSF_SUBTABLEPCT_SUM:
2834 case CTSF_LAYERPCT_SUM:
2835 case CTSF_LAYERROWPCT_SUM:
2836 case CTSF_LAYERCOLPCT_SUM:
2837 if (!is_scale_missing)
2838 moments1_add (s->moments, value->f, e_weight);
2845 case CTSF_UVARIANCE:
2846 case CTSF_UROWPCT_SUM:
2847 case CTSF_UCOLPCT_SUM:
2848 case CTSF_UTABLEPCT_SUM:
2849 case CTSF_USUBTABLEPCT_SUM:
2850 case CTSF_ULAYERPCT_SUM:
2851 case CTSF_ULAYERROWPCT_SUM:
2852 case CTSF_ULAYERCOLPCT_SUM:
2853 if (!is_scale_missing)
2854 moments1_add (s->moments, value->f, 1.0);
2860 d_weight = e_weight = 1.0;
2865 if (!is_scale_missing)
2867 s->ovalid += e_weight;
2869 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2870 *case_num_rw_idx (c, 0) = value->f;
2871 *case_num_rw_idx (c, 1) = e_weight;
2872 casewriter_write (s->writer, c);
2878 static enum ctables_domain_type
2879 ctables_function_domain (enum ctables_summary_function function)
2909 case CTSF_UVARIANCE:
2915 case CTSF_COLPCT_COUNT:
2916 case CTSF_COLPCT_SUM:
2917 case CTSF_COLPCT_TOTALN:
2918 case CTSF_COLPCT_VALIDN:
2919 case CTSF_UCOLPCT_COUNT:
2920 case CTSF_UCOLPCT_SUM:
2921 case CTSF_UCOLPCT_TOTALN:
2922 case CTSF_UCOLPCT_VALIDN:
2925 case CTSF_LAYERCOLPCT_COUNT:
2926 case CTSF_LAYERCOLPCT_SUM:
2927 case CTSF_LAYERCOLPCT_TOTALN:
2928 case CTSF_LAYERCOLPCT_VALIDN:
2929 case CTSF_ULAYERCOLPCT_COUNT:
2930 case CTSF_ULAYERCOLPCT_SUM:
2931 case CTSF_ULAYERCOLPCT_TOTALN:
2932 case CTSF_ULAYERCOLPCT_VALIDN:
2933 return CTDT_LAYERCOL;
2935 case CTSF_LAYERPCT_COUNT:
2936 case CTSF_LAYERPCT_SUM:
2937 case CTSF_LAYERPCT_TOTALN:
2938 case CTSF_LAYERPCT_VALIDN:
2939 case CTSF_ULAYERPCT_COUNT:
2940 case CTSF_ULAYERPCT_SUM:
2941 case CTSF_ULAYERPCT_TOTALN:
2942 case CTSF_ULAYERPCT_VALIDN:
2945 case CTSF_LAYERROWPCT_COUNT:
2946 case CTSF_LAYERROWPCT_SUM:
2947 case CTSF_LAYERROWPCT_TOTALN:
2948 case CTSF_LAYERROWPCT_VALIDN:
2949 case CTSF_ULAYERROWPCT_COUNT:
2950 case CTSF_ULAYERROWPCT_SUM:
2951 case CTSF_ULAYERROWPCT_TOTALN:
2952 case CTSF_ULAYERROWPCT_VALIDN:
2953 return CTDT_LAYERROW;
2955 case CTSF_ROWPCT_COUNT:
2956 case CTSF_ROWPCT_SUM:
2957 case CTSF_ROWPCT_TOTALN:
2958 case CTSF_ROWPCT_VALIDN:
2959 case CTSF_UROWPCT_COUNT:
2960 case CTSF_UROWPCT_SUM:
2961 case CTSF_UROWPCT_TOTALN:
2962 case CTSF_UROWPCT_VALIDN:
2965 case CTSF_SUBTABLEPCT_COUNT:
2966 case CTSF_SUBTABLEPCT_SUM:
2967 case CTSF_SUBTABLEPCT_TOTALN:
2968 case CTSF_SUBTABLEPCT_VALIDN:
2969 case CTSF_USUBTABLEPCT_COUNT:
2970 case CTSF_USUBTABLEPCT_SUM:
2971 case CTSF_USUBTABLEPCT_TOTALN:
2972 case CTSF_USUBTABLEPCT_VALIDN:
2973 return CTDT_SUBTABLE;
2975 case CTSF_TABLEPCT_COUNT:
2976 case CTSF_TABLEPCT_SUM:
2977 case CTSF_TABLEPCT_TOTALN:
2978 case CTSF_TABLEPCT_VALIDN:
2979 case CTSF_UTABLEPCT_COUNT:
2980 case CTSF_UTABLEPCT_SUM:
2981 case CTSF_UTABLEPCT_TOTALN:
2982 case CTSF_UTABLEPCT_VALIDN:
2989 static enum ctables_domain_type
2990 ctables_function_is_pctsum (enum ctables_summary_function function)
3020 case CTSF_UVARIANCE:
3024 case CTSF_COLPCT_COUNT:
3025 case CTSF_COLPCT_TOTALN:
3026 case CTSF_COLPCT_VALIDN:
3027 case CTSF_UCOLPCT_COUNT:
3028 case CTSF_UCOLPCT_TOTALN:
3029 case CTSF_UCOLPCT_VALIDN:
3030 case CTSF_LAYERCOLPCT_COUNT:
3031 case CTSF_LAYERCOLPCT_TOTALN:
3032 case CTSF_LAYERCOLPCT_VALIDN:
3033 case CTSF_ULAYERCOLPCT_COUNT:
3034 case CTSF_ULAYERCOLPCT_TOTALN:
3035 case CTSF_ULAYERCOLPCT_VALIDN:
3036 case CTSF_LAYERPCT_COUNT:
3037 case CTSF_LAYERPCT_TOTALN:
3038 case CTSF_LAYERPCT_VALIDN:
3039 case CTSF_ULAYERPCT_COUNT:
3040 case CTSF_ULAYERPCT_TOTALN:
3041 case CTSF_ULAYERPCT_VALIDN:
3042 case CTSF_LAYERROWPCT_COUNT:
3043 case CTSF_LAYERROWPCT_TOTALN:
3044 case CTSF_LAYERROWPCT_VALIDN:
3045 case CTSF_ULAYERROWPCT_COUNT:
3046 case CTSF_ULAYERROWPCT_TOTALN:
3047 case CTSF_ULAYERROWPCT_VALIDN:
3048 case CTSF_ROWPCT_COUNT:
3049 case CTSF_ROWPCT_TOTALN:
3050 case CTSF_ROWPCT_VALIDN:
3051 case CTSF_UROWPCT_COUNT:
3052 case CTSF_UROWPCT_TOTALN:
3053 case CTSF_UROWPCT_VALIDN:
3054 case CTSF_SUBTABLEPCT_COUNT:
3055 case CTSF_SUBTABLEPCT_TOTALN:
3056 case CTSF_SUBTABLEPCT_VALIDN:
3057 case CTSF_USUBTABLEPCT_COUNT:
3058 case CTSF_USUBTABLEPCT_TOTALN:
3059 case CTSF_USUBTABLEPCT_VALIDN:
3060 case CTSF_TABLEPCT_COUNT:
3061 case CTSF_TABLEPCT_TOTALN:
3062 case CTSF_TABLEPCT_VALIDN:
3063 case CTSF_UTABLEPCT_COUNT:
3064 case CTSF_UTABLEPCT_TOTALN:
3065 case CTSF_UTABLEPCT_VALIDN:
3068 case CTSF_COLPCT_SUM:
3069 case CTSF_UCOLPCT_SUM:
3070 case CTSF_LAYERCOLPCT_SUM:
3071 case CTSF_ULAYERCOLPCT_SUM:
3072 case CTSF_LAYERPCT_SUM:
3073 case CTSF_ULAYERPCT_SUM:
3074 case CTSF_LAYERROWPCT_SUM:
3075 case CTSF_ULAYERROWPCT_SUM:
3076 case CTSF_ROWPCT_SUM:
3077 case CTSF_UROWPCT_SUM:
3078 case CTSF_SUBTABLEPCT_SUM:
3079 case CTSF_USUBTABLEPCT_SUM:
3080 case CTSF_TABLEPCT_SUM:
3081 case CTSF_UTABLEPCT_SUM:
3089 ctables_summary_value (const struct ctables_cell *cell,
3090 union ctables_summary *s,
3091 const struct ctables_summary_spec *ss)
3093 switch (ss->function)
3100 case CTSF_ROWPCT_COUNT:
3101 case CTSF_COLPCT_COUNT:
3102 case CTSF_TABLEPCT_COUNT:
3103 case CTSF_SUBTABLEPCT_COUNT:
3104 case CTSF_LAYERPCT_COUNT:
3105 case CTSF_LAYERROWPCT_COUNT:
3106 case CTSF_LAYERCOLPCT_COUNT:
3108 enum ctables_domain_type d = ctables_function_domain (ss->function);
3109 return (cell->domains[d]->e_count
3110 ? s->count / cell->domains[d]->e_count * 100
3114 case CTSF_UROWPCT_COUNT:
3115 case CTSF_UCOLPCT_COUNT:
3116 case CTSF_UTABLEPCT_COUNT:
3117 case CTSF_USUBTABLEPCT_COUNT:
3118 case CTSF_ULAYERPCT_COUNT:
3119 case CTSF_ULAYERROWPCT_COUNT:
3120 case CTSF_ULAYERCOLPCT_COUNT:
3122 enum ctables_domain_type d = ctables_function_domain (ss->function);
3123 return (cell->domains[d]->u_count
3124 ? s->count / cell->domains[d]->u_count * 100
3128 case CTSF_ROWPCT_VALIDN:
3129 case CTSF_COLPCT_VALIDN:
3130 case CTSF_TABLEPCT_VALIDN:
3131 case CTSF_SUBTABLEPCT_VALIDN:
3132 case CTSF_LAYERPCT_VALIDN:
3133 case CTSF_LAYERROWPCT_VALIDN:
3134 case CTSF_LAYERCOLPCT_VALIDN:
3136 enum ctables_domain_type d = ctables_function_domain (ss->function);
3137 return (cell->domains[d]->e_valid
3138 ? s->count / cell->domains[d]->e_valid * 100
3142 case CTSF_UROWPCT_VALIDN:
3143 case CTSF_UCOLPCT_VALIDN:
3144 case CTSF_UTABLEPCT_VALIDN:
3145 case CTSF_USUBTABLEPCT_VALIDN:
3146 case CTSF_ULAYERPCT_VALIDN:
3147 case CTSF_ULAYERROWPCT_VALIDN:
3148 case CTSF_ULAYERCOLPCT_VALIDN:
3150 enum ctables_domain_type d = ctables_function_domain (ss->function);
3151 return (cell->domains[d]->u_valid
3152 ? s->count / cell->domains[d]->u_valid * 100
3156 case CTSF_ROWPCT_TOTALN:
3157 case CTSF_COLPCT_TOTALN:
3158 case CTSF_TABLEPCT_TOTALN:
3159 case CTSF_SUBTABLEPCT_TOTALN:
3160 case CTSF_LAYERPCT_TOTALN:
3161 case CTSF_LAYERROWPCT_TOTALN:
3162 case CTSF_LAYERCOLPCT_TOTALN:
3164 enum ctables_domain_type d = ctables_function_domain (ss->function);
3165 return (cell->domains[d]->e_total
3166 ? s->count / cell->domains[d]->e_total * 100
3170 case CTSF_UROWPCT_TOTALN:
3171 case CTSF_UCOLPCT_TOTALN:
3172 case CTSF_UTABLEPCT_TOTALN:
3173 case CTSF_USUBTABLEPCT_TOTALN:
3174 case CTSF_ULAYERPCT_TOTALN:
3175 case CTSF_ULAYERROWPCT_TOTALN:
3176 case CTSF_ULAYERCOLPCT_TOTALN:
3178 enum ctables_domain_type d = ctables_function_domain (ss->function);
3179 return (cell->domains[d]->u_total
3180 ? s->count / cell->domains[d]->u_total * 100
3201 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3207 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3214 double weight, variance;
3215 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3216 return calc_semean (variance, weight);
3223 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3224 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3230 double weight, mean;
3231 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3232 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3236 case CTSF_UVARIANCE:
3239 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3243 case CTSF_ROWPCT_SUM:
3244 case CTSF_COLPCT_SUM:
3245 case CTSF_TABLEPCT_SUM:
3246 case CTSF_SUBTABLEPCT_SUM:
3247 case CTSF_LAYERPCT_SUM:
3248 case CTSF_LAYERROWPCT_SUM:
3249 case CTSF_LAYERCOLPCT_SUM:
3251 double weight, mean;
3252 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3253 if (weight == SYSMIS || mean == SYSMIS)
3255 enum ctables_domain_type d = ctables_function_domain (ss->function);
3256 double num = weight * mean;
3257 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3258 return denom != 0 ? num / denom * 100 : SYSMIS;
3260 case CTSF_UROWPCT_SUM:
3261 case CTSF_UCOLPCT_SUM:
3262 case CTSF_UTABLEPCT_SUM:
3263 case CTSF_USUBTABLEPCT_SUM:
3264 case CTSF_ULAYERPCT_SUM:
3265 case CTSF_ULAYERROWPCT_SUM:
3266 case CTSF_ULAYERCOLPCT_SUM:
3268 double weight, mean;
3269 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3270 if (weight == SYSMIS || mean == SYSMIS)
3272 enum ctables_domain_type d = ctables_function_domain (ss->function);
3273 double num = weight * mean;
3274 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3275 return denom != 0 ? num / denom * 100 : SYSMIS;
3284 struct casereader *reader = casewriter_make_reader (s->writer);
3287 struct percentile *ptile = percentile_create (
3288 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3289 struct order_stats *os = &ptile->parent;
3290 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3291 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3292 statistic_destroy (&ptile->parent.parent);
3300 struct casereader *reader = casewriter_make_reader (s->writer);
3303 struct mode *mode = mode_create ();
3304 struct order_stats *os = &mode->parent;
3305 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3306 s->ovalue = mode->mode;
3307 statistic_destroy (&mode->parent.parent);
3315 struct ctables_cell_sort_aux
3317 const struct ctables_nest *nest;
3318 enum pivot_axis_type a;
3322 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3324 const struct ctables_cell_sort_aux *aux = aux_;
3325 struct ctables_cell *const *ap = a_;
3326 struct ctables_cell *const *bp = b_;
3327 const struct ctables_cell *a = *ap;
3328 const struct ctables_cell *b = *bp;
3330 const struct ctables_nest *nest = aux->nest;
3331 for (size_t i = 0; i < nest->n; i++)
3332 if (i != nest->scale_idx)
3334 const struct variable *var = nest->vars[i];
3335 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3336 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3337 if (a_cv->category != b_cv->category)
3338 return a_cv->category > b_cv->category ? 1 : -1;
3340 const union value *a_val = &a_cv->value;
3341 const union value *b_val = &b_cv->value;
3342 switch (a_cv->category->type)
3348 case CCT_POSTCOMPUTE:
3349 case CCT_EXCLUDED_MISSING:
3350 /* Must be equal. */
3358 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3366 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3368 return a_cv->category->sort_ascending ? cmp : -cmp;
3374 const char *a_label = var_lookup_value_label (var, a_val);
3375 const char *b_label = var_lookup_value_label (var, b_val);
3377 ? (b_label ? strcmp (a_label, b_label) : 1)
3378 : (b_label ? -1 : value_compare_3way (
3379 a_val, b_val, var_get_width (var))));
3381 return a_cv->category->sort_ascending ? cmp : -cmp;
3395 For each ctables_table:
3396 For each combination of row vars:
3397 For each combination of column vars:
3398 For each combination of layer vars:
3400 Make a table of row values:
3401 Sort entries by row values
3402 Assign a 0-based index to each actual value
3403 Construct a dimension
3404 Make a table of column values
3405 Make a table of layer values
3407 Fill the table entry using the indexes from before.
3410 static struct ctables_domain *
3411 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3412 enum ctables_domain_type domain)
3415 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3417 const struct ctables_nest *nest = s->nests[a];
3418 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3420 size_t v_idx = nest->domains[domain][i];
3421 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3422 hash = hash_pointer (cv->category, hash);
3423 if (cv->category->type != CCT_TOTAL
3424 && cv->category->type != CCT_SUBTOTAL
3425 && cv->category->type != CCT_POSTCOMPUTE)
3426 hash = value_hash (&cv->value,
3427 var_get_width (nest->vars[v_idx]), hash);
3431 struct ctables_domain *d;
3432 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3434 const struct ctables_cell *df = d->example;
3435 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3437 const struct ctables_nest *nest = s->nests[a];
3438 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3440 size_t v_idx = nest->domains[domain][i];
3441 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3442 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3443 if (cv1->category != cv2->category
3444 || (cv1->category->type != CCT_TOTAL
3445 && cv1->category->type != CCT_SUBTOTAL
3446 && cv1->category->type != CCT_POSTCOMPUTE
3447 && !value_equal (&cv1->value, &cv2->value,
3448 var_get_width (nest->vars[v_idx]))))
3457 struct ctables_sum *sums = (s->table->n_sum_vars
3458 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3461 d = xmalloc (sizeof *d);
3462 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3463 hmap_insert (&s->domains[domain], &d->node, hash);
3467 static struct substring
3468 rtrim_value (const union value *v, const struct variable *var)
3470 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3471 var_get_width (var));
3472 ss_rtrim (&s, ss_cstr (" "));
3477 in_string_range (const union value *v, const struct variable *var,
3478 const struct substring *srange)
3480 struct substring s = rtrim_value (v, var);
3481 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3482 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3485 static const struct ctables_category *
3486 ctables_categories_match (const struct ctables_categories *c,
3487 const union value *v, const struct variable *var)
3489 if (var_is_numeric (var) && v->f == SYSMIS)
3492 const struct ctables_category *othernm = NULL;
3493 for (size_t i = c->n_cats; i-- > 0; )
3495 const struct ctables_category *cat = &c->cats[i];
3499 if (cat->number == v->f)
3504 if (ss_equals (cat->string, rtrim_value (v, var)))
3509 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3510 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3515 if (in_string_range (v, var, cat->srange))
3520 if (var_is_value_missing (var, v))
3524 case CCT_POSTCOMPUTE:
3539 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3542 case CCT_EXCLUDED_MISSING:
3547 return var_is_value_missing (var, v) ? NULL : othernm;
3550 static const struct ctables_category *
3551 ctables_categories_total (const struct ctables_categories *c)
3553 const struct ctables_category *first = &c->cats[0];
3554 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3555 return (first->type == CCT_TOTAL ? first
3556 : last->type == CCT_TOTAL ? last
3560 static struct ctables_cell *
3561 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3562 const struct ctables_category *cats[PIVOT_N_AXES][10])
3565 enum ctables_summary_variant sv = CSV_CELL;
3566 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3568 const struct ctables_nest *nest = s->nests[a];
3569 for (size_t i = 0; i < nest->n; i++)
3570 if (i != nest->scale_idx)
3572 hash = hash_pointer (cats[a][i], hash);
3573 if (cats[a][i]->type != CCT_TOTAL
3574 && cats[a][i]->type != CCT_SUBTOTAL
3575 && cats[a][i]->type != CCT_POSTCOMPUTE)
3576 hash = value_hash (case_data (c, nest->vars[i]),
3577 var_get_width (nest->vars[i]), hash);
3583 struct ctables_cell *cell;
3584 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3586 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3588 const struct ctables_nest *nest = s->nests[a];
3589 for (size_t i = 0; i < nest->n; i++)
3590 if (i != nest->scale_idx
3591 && (cats[a][i] != cell->axes[a].cvs[i].category
3592 || (cats[a][i]->type != CCT_TOTAL
3593 && cats[a][i]->type != CCT_SUBTOTAL
3594 && cats[a][i]->type != CCT_POSTCOMPUTE
3595 && !value_equal (case_data (c, nest->vars[i]),
3596 &cell->axes[a].cvs[i].value,
3597 var_get_width (nest->vars[i])))))
3606 cell = xmalloc (sizeof *cell);
3609 cell->omit_domains = 0;
3610 cell->postcompute = false;
3611 //struct string name = DS_EMPTY_INITIALIZER;
3612 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3614 const struct ctables_nest *nest = s->nests[a];
3615 cell->axes[a].cvs = (nest->n
3616 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3618 for (size_t i = 0; i < nest->n; i++)
3620 const struct ctables_category *cat = cats[a][i];
3621 const struct variable *var = nest->vars[i];
3622 const union value *value = case_data (c, var);
3623 if (i != nest->scale_idx)
3625 const struct ctables_category *subtotal = cat->subtotal;
3626 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3629 if (cat->type == CCT_TOTAL
3630 || cat->type == CCT_SUBTOTAL
3631 || cat->type == CCT_POSTCOMPUTE)
3633 /* XXX these should be more encompassing I think.*/
3637 case PIVOT_AXIS_COLUMN:
3638 cell->omit_domains |= ((1u << CTDT_TABLE) |
3639 (1u << CTDT_LAYER) |
3640 (1u << CTDT_LAYERCOL) |
3641 (1u << CTDT_SUBTABLE) |
3644 case PIVOT_AXIS_ROW:
3645 cell->omit_domains |= ((1u << CTDT_TABLE) |
3646 (1u << CTDT_LAYER) |
3647 (1u << CTDT_LAYERROW) |
3648 (1u << CTDT_SUBTABLE) |
3651 case PIVOT_AXIS_LAYER:
3652 cell->omit_domains |= ((1u << CTDT_TABLE) |
3653 (1u << CTDT_LAYER));
3657 if (cat->type == CCT_POSTCOMPUTE)
3658 cell->postcompute = true;
3661 cell->axes[a].cvs[i].category = cat;
3662 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3665 if (i != nest->scale_idx)
3667 if (!ds_is_empty (&name))
3668 ds_put_cstr (&name, ", ");
3669 char *value_s = data_out (value, var_get_encoding (var),
3670 var_get_print_format (var),
3671 settings_get_fmt_settings ());
3672 if (cat->type == CCT_TOTAL
3673 || cat->type == CCT_SUBTOTAL
3674 || cat->type == CCT_POSTCOMPUTE)
3675 ds_put_format (&name, "%s=total", var_get_name (var));
3677 ds_put_format (&name, "%s=%s", var_get_name (var),
3678 value_s + strspn (value_s, " "));
3684 //cell->name = ds_steal_cstr (&name);
3686 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3687 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3688 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3689 for (size_t i = 0; i < specs->n; i++)
3690 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3691 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3692 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3693 hmap_insert (&s->cells, &cell->node, hash);
3698 is_scale_missing (const struct ctables_summary_spec_set *specs,
3699 const struct ccase *c)
3701 if (!specs->is_scale)
3704 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3707 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3709 const struct variable *var = specs->listwise_vars[i];
3710 if (var_is_num_missing (var, case_num (c, var)))
3718 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3719 const struct ctables_category *cats[PIVOT_N_AXES][10],
3720 bool is_missing, bool excluded_missing,
3721 double d_weight, double e_weight)
3723 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3724 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3726 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3728 bool scale_missing = is_scale_missing (specs, c);
3729 for (size_t i = 0; i < specs->n; i++)
3730 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3731 specs->var, case_data (c, specs->var), specs->is_scale,
3732 scale_missing, is_missing, excluded_missing,
3733 d_weight, e_weight);
3734 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3735 if (!(cell->omit_domains && (1u << dt)))
3737 struct ctables_domain *d = cell->domains[dt];
3738 d->d_total += d_weight;
3739 d->e_total += e_weight;
3741 if (!excluded_missing)
3743 d->d_count += d_weight;
3744 d->e_count += e_weight;
3749 d->d_valid += d_weight;
3750 d->e_valid += e_weight;
3753 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3755 /* XXX listwise_missing??? */
3756 const struct variable *var = s->table->sum_vars[i];
3757 double addend = case_num (c, var);
3758 if (!var_is_num_missing (var, addend))
3760 struct ctables_sum *sum = &d->sums[i];
3761 sum->e_sum += addend * e_weight;
3762 sum->u_sum += addend;
3770 recurse_totals (struct ctables_section *s, const struct ccase *c,
3771 const struct ctables_category *cats[PIVOT_N_AXES][10],
3772 bool is_missing, bool excluded_missing,
3773 double d_weight, double e_weight,
3774 enum pivot_axis_type start_axis, size_t start_nest)
3776 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3778 const struct ctables_nest *nest = s->nests[a];
3779 for (size_t i = start_nest; i < nest->n; i++)
3781 if (i == nest->scale_idx)
3784 const struct variable *var = nest->vars[i];
3786 const struct ctables_category *total = ctables_categories_total (
3787 s->table->categories[var_get_dict_index (var)]);
3790 const struct ctables_category *save = cats[a][i];
3792 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3793 d_weight, e_weight);
3794 recurse_totals (s, c, cats, is_missing, excluded_missing,
3795 d_weight, e_weight, a, i + 1);
3804 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3805 const struct ctables_category *cats[PIVOT_N_AXES][10],
3806 bool is_missing, bool excluded_missing,
3807 double d_weight, double e_weight,
3808 enum pivot_axis_type start_axis, size_t start_nest)
3810 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3812 const struct ctables_nest *nest = s->nests[a];
3813 for (size_t i = start_nest; i < nest->n; i++)
3815 if (i == nest->scale_idx)
3818 const struct ctables_category *save = cats[a][i];
3821 cats[a][i] = save->subtotal;
3822 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3823 d_weight, e_weight);
3824 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3825 d_weight, e_weight, a, i + 1);
3834 ctables_add_occurrence (const struct variable *var,
3835 const union value *value,
3836 struct hmap *occurrences)
3838 int width = var_get_width (var);
3839 unsigned int hash = value_hash (value, width, 0);
3841 struct ctables_occurrence *o;
3842 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3844 if (value_equal (value, &o->value, width))
3847 o = xmalloc (sizeof *o);
3848 value_clone (&o->value, value, width);
3849 hmap_insert (occurrences, &o->node, hash);
3853 ctables_cell_insert (struct ctables_section *s,
3854 const struct ccase *c,
3855 double d_weight, double e_weight)
3857 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3859 /* Does at least one categorical variable have a missing value in an included
3860 or excluded category? */
3861 bool is_missing = false;
3863 /* Does at least one categorical variable have a missing value in an excluded
3865 bool excluded_missing = false;
3867 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3869 const struct ctables_nest *nest = s->nests[a];
3870 for (size_t i = 0; i < nest->n; i++)
3872 if (i == nest->scale_idx)
3875 const struct variable *var = nest->vars[i];
3876 const union value *value = case_data (c, var);
3878 bool var_missing = var_is_value_missing (var, value) != 0;
3882 cats[a][i] = ctables_categories_match (
3883 s->table->categories[var_get_dict_index (var)], value, var);
3889 static const struct ctables_category cct_excluded_missing = {
3890 .type = CCT_EXCLUDED_MISSING,
3893 cats[a][i] = &cct_excluded_missing;
3894 excluded_missing = true;
3899 if (!excluded_missing)
3900 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3902 const struct ctables_nest *nest = s->nests[a];
3903 for (size_t i = 0; i < nest->n; i++)
3904 if (i != nest->scale_idx)
3906 const struct variable *var = nest->vars[i];
3907 const union value *value = case_data (c, var);
3908 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3912 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3913 d_weight, e_weight);
3915 //if (!excluded_missing)
3917 recurse_totals (s, c, cats, is_missing, excluded_missing,
3918 d_weight, e_weight, 0, 0);
3919 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3920 d_weight, e_weight, 0, 0);
3926 const struct ctables_summary_spec_set *set;
3931 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3933 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3934 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3935 if (as->function != bs->function)
3936 return as->function > bs->function ? 1 : -1;
3937 else if (as->percentile != bs->percentile)
3938 return as->percentile < bs->percentile ? 1 : -1;
3940 const char *as_label = as->label ? as->label : "";
3941 const char *bs_label = bs->label ? bs->label : "";
3942 return strcmp (as_label, bs_label);
3945 static struct pivot_value *
3946 ctables_category_create_label__ (const struct ctables_category *cat,
3947 const struct variable *var,
3948 const union value *value)
3950 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3951 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3952 : pivot_value_new_var_value (var, value));
3955 static struct pivot_value *
3956 ctables_postcompute_label (const struct ctables_categories *cats,
3957 const struct ctables_category *cat,
3958 const struct variable *var,
3959 const union value *value)
3961 struct substring in = ss_cstr (cat->pc->label);
3962 struct substring target = ss_cstr (")LABEL[");
3964 struct string out = DS_EMPTY_INITIALIZER;
3967 size_t chunk = ss_find_substring (in, target);
3968 if (chunk == SIZE_MAX)
3970 if (ds_is_empty (&out))
3971 return pivot_value_new_user_text (in.string, in.length);
3974 ds_put_substring (&out, in);
3975 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3979 ds_put_substring (&out, ss_head (in, chunk));
3980 ss_advance (&in, chunk + target.length);
3982 struct substring idx_s;
3983 if (!ss_get_until (&in, ']', &idx_s))
3986 long int idx = strtol (idx_s.string, &tail, 10);
3987 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3990 struct ctables_category *cat2 = &cats->cats[idx - 1];
3991 struct pivot_value *label2
3992 = ctables_category_create_label__ (cat2, var, value);
3993 char *label2_s = pivot_value_to_string_defaults (label2);
3994 ds_put_cstr (&out, label2_s);
3996 pivot_value_destroy (label2);
4001 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
4004 static struct pivot_value *
4005 ctables_category_create_label (const struct ctables_categories *cats,
4006 const struct ctables_category *cat,
4007 const struct variable *var,
4008 const union value *value)
4010 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4011 ? ctables_postcompute_label (cats, cat, var, value)
4012 : ctables_category_create_label__ (cat, var, value));
4015 static struct ctables_value *
4016 ctables_value_find__ (struct ctables_table *t, const union value *value,
4017 int width, unsigned int hash)
4019 struct ctables_value *clv;
4020 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4021 hash, &t->clabels_values_map)
4022 if (value_equal (value, &clv->value, width))
4028 ctables_value_insert (struct ctables_table *t, const union value *value,
4031 unsigned int hash = value_hash (value, width, 0);
4032 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4035 clv = xmalloc (sizeof *clv);
4036 value_clone (&clv->value, value, width);
4037 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4041 static struct ctables_value *
4042 ctables_value_find (struct ctables_table *t,
4043 const union value *value, int width)
4045 return ctables_value_find__ (t, value, width,
4046 value_hash (value, width, 0));
4050 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4051 size_t ix[PIVOT_N_AXES])
4053 if (a < PIVOT_N_AXES)
4055 size_t limit = MAX (t->stacks[a].n, 1);
4056 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4057 ctables_table_add_section (t, a + 1, ix);
4061 struct ctables_section *s = &t->sections[t->n_sections++];
4062 *s = (struct ctables_section) {
4064 .cells = HMAP_INITIALIZER (s->cells),
4066 for (a = 0; a < PIVOT_N_AXES; a++)
4069 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4071 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4072 for (size_t i = 0; i < nest->n; i++)
4073 hmap_init (&s->occurrences[a][i]);
4075 for (size_t i = 0; i < N_CTDTS; i++)
4076 hmap_init (&s->domains[i]);
4081 ctpo_add (double a, double b)
4087 ctpo_sub (double a, double b)
4093 ctpo_mul (double a, double b)
4099 ctpo_div (double a, double b)
4101 return b ? a / b : SYSMIS;
4105 ctpo_pow (double a, double b)
4107 int save_errno = errno;
4109 double result = pow (a, b);
4117 ctpo_neg (double a, double b UNUSED)
4122 struct ctables_pcexpr_evaluate_ctx
4124 const struct ctables_cell *cell;
4125 const struct ctables_section *section;
4126 const struct ctables_categories *cats;
4127 enum pivot_axis_type pc_a;
4130 enum fmt_type parse_format;
4133 static double ctables_pcexpr_evaluate (
4134 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4137 ctables_pcexpr_evaluate_nonterminal (
4138 const struct ctables_pcexpr_evaluate_ctx *ctx,
4139 const struct ctables_pcexpr *e, size_t n_args,
4140 double evaluate (double, double))
4142 double args[2] = { 0, 0 };
4143 for (size_t i = 0; i < n_args; i++)
4145 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4146 if (!isfinite (args[i]) || args[i] == SYSMIS)
4149 return evaluate (args[0], args[1]);
4153 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4154 const struct ctables_cell_value *pc_cv)
4156 const struct ctables_section *s = ctx->section;
4159 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4161 const struct ctables_nest *nest = s->nests[a];
4162 for (size_t i = 0; i < nest->n; i++)
4163 if (i != nest->scale_idx)
4165 const struct ctables_cell_value *cv
4166 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4167 : &ctx->cell->axes[a].cvs[i]);
4168 hash = hash_pointer (cv->category, hash);
4169 if (cv->category->type != CCT_TOTAL
4170 && cv->category->type != CCT_SUBTOTAL
4171 && cv->category->type != CCT_POSTCOMPUTE)
4172 hash = value_hash (&cv->value,
4173 var_get_width (nest->vars[i]), hash);
4177 struct ctables_cell *tc;
4178 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4180 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4182 const struct ctables_nest *nest = s->nests[a];
4183 for (size_t i = 0; i < nest->n; i++)
4184 if (i != nest->scale_idx)
4186 const struct ctables_cell_value *p_cv
4187 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4188 : &ctx->cell->axes[a].cvs[i]);
4189 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4190 if (p_cv->category != t_cv->category
4191 || (p_cv->category->type != CCT_TOTAL
4192 && p_cv->category->type != CCT_SUBTOTAL
4193 && p_cv->category->type != CCT_POSTCOMPUTE
4194 && !value_equal (&p_cv->value,
4196 var_get_width (nest->vars[i]))))
4208 const struct ctables_table *t = s->table;
4209 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4210 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4211 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4212 &specs->specs[ctx->summary_idx]);
4216 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4217 const struct ctables_pcexpr *e)
4224 case CTPO_CAT_NRANGE:
4225 case CTPO_CAT_SRANGE:
4227 struct ctables_cell_value cv = {
4228 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4230 assert (cv.category != NULL);
4232 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4233 const struct ctables_occurrence *o;
4236 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4237 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4238 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4240 cv.value = o->value;
4241 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4246 case CTPO_CAT_NUMBER:
4247 case CTPO_CAT_STRING:
4248 case CTPO_CAT_MISSING:
4249 case CTPO_CAT_OTHERNM:
4250 case CTPO_CAT_SUBTOTAL:
4251 case CTPO_CAT_TOTAL:
4253 struct ctables_cell_value cv = {
4254 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4255 .value = { .f = e->number },
4257 assert (cv.category != NULL);
4258 return ctables_pcexpr_evaluate_category (ctx, &cv);
4262 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4265 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4268 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4271 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4274 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4277 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4283 static const struct ctables_category *
4284 ctables_cell_postcompute (const struct ctables_section *s,
4285 const struct ctables_cell *cell,
4286 enum pivot_axis_type *pc_a_p,
4289 assert (cell->postcompute);
4290 const struct ctables_category *pc_cat = NULL;
4291 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4292 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4294 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4295 if (cv->category->type == CCT_POSTCOMPUTE)
4299 /* Multiple postcomputes cross each other. The value is
4304 pc_cat = cv->category;
4308 *pc_a_idx_p = pc_a_idx;
4312 assert (pc_cat != NULL);
4317 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4318 const struct ctables_cell *cell,
4319 const struct ctables_summary_spec *ss,
4320 struct fmt_spec *format,
4321 bool *is_ctables_format,
4324 enum pivot_axis_type pc_a = 0;
4325 size_t pc_a_idx = 0;
4326 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4327 s, cell, &pc_a, &pc_a_idx);
4331 const struct ctables_postcompute *pc = pc_cat->pc;
4334 for (size_t i = 0; i < pc->specs->n; i++)
4336 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4337 if (ss->function == ss2->function
4338 && ss->percentile == ss2->percentile)
4340 *format = ss2->format;
4341 *is_ctables_format = ss2->is_ctables_format;
4347 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4348 const struct ctables_categories *cats = s->table->categories[
4349 var_get_dict_index (var)];
4350 struct ctables_pcexpr_evaluate_ctx ctx = {
4355 .pc_a_idx = pc_a_idx,
4356 .summary_idx = summary_idx,
4357 .parse_format = pc_cat->parse_format,
4359 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4363 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4365 struct pivot_table *pt = pivot_table_create__ (
4367 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4368 : pivot_value_new_text (N_("Custom Tables"))),
4371 pivot_table_set_caption (
4372 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4374 pivot_table_set_corner_text (
4375 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4377 bool summary_dimension = (t->summary_axis != t->slabels_axis
4378 || (!t->slabels_visible
4379 && t->summary_specs.n > 1));
4380 if (summary_dimension)
4382 struct pivot_dimension *d = pivot_dimension_create (
4383 pt, t->slabels_axis, N_("Statistics"));
4384 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4385 if (!t->slabels_visible)
4386 d->hide_all_labels = true;
4387 for (size_t i = 0; i < specs->n; i++)
4388 pivot_category_create_leaf (
4389 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4392 bool categories_dimension = t->clabels_example != NULL;
4393 if (categories_dimension)
4395 struct pivot_dimension *d = pivot_dimension_create (
4396 pt, t->label_axis[t->clabels_from_axis],
4397 t->clabels_from_axis == PIVOT_AXIS_ROW
4398 ? N_("Row Categories")
4399 : N_("Column Categories"));
4400 const struct variable *var = t->clabels_example;
4401 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4402 for (size_t i = 0; i < t->n_clabels_values; i++)
4404 const struct ctables_value *value = t->clabels_values[i];
4405 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4406 assert (cat != NULL);
4407 pivot_category_create_leaf (d->root, ctables_category_create_label (
4408 c, cat, t->clabels_example,
4413 pivot_table_set_look (pt, ct->look);
4414 struct pivot_dimension *d[PIVOT_N_AXES];
4415 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4417 static const char *names[] = {
4418 [PIVOT_AXIS_ROW] = N_("Rows"),
4419 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4420 [PIVOT_AXIS_LAYER] = N_("Layers"),
4422 d[a] = (t->axes[a] || a == t->summary_axis
4423 ? pivot_dimension_create (pt, a, names[a])
4428 assert (t->axes[a]);
4430 for (size_t i = 0; i < t->stacks[a].n; i++)
4432 struct ctables_nest *nest = &t->stacks[a].nests[i];
4433 struct ctables_section **sections = xnmalloc (t->n_sections,
4435 size_t n_sections = 0;
4437 size_t n_total_cells = 0;
4438 size_t max_depth = 0;
4439 for (size_t j = 0; j < t->n_sections; j++)
4440 if (t->sections[j].nests[a] == nest)
4442 struct ctables_section *s = &t->sections[j];
4443 sections[n_sections++] = s;
4444 n_total_cells += s->cells.count;
4446 size_t depth = s->nests[a]->n;
4447 max_depth = MAX (depth, max_depth);
4450 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4452 size_t n_sorted = 0;
4454 for (size_t j = 0; j < n_sections; j++)
4456 struct ctables_section *s = sections[j];
4458 struct ctables_cell *cell;
4459 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4461 sorted[n_sorted++] = cell;
4462 assert (n_sorted <= n_total_cells);
4465 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4466 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4469 for (size_t j = 0; j < n_sorted; j++)
4471 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4476 struct ctables_level
4478 enum ctables_level_type
4480 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4481 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4482 CTL_SUMMARY, /* Summary functions. */
4486 enum settings_value_show vlabel; /* CTL_VAR only. */
4489 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4490 size_t n_levels = 0;
4491 for (size_t k = 0; k < nest->n; k++)
4493 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4494 if (vlabel != CTVL_NONE)
4496 levels[n_levels++] = (struct ctables_level) {
4498 .vlabel = (enum settings_value_show) vlabel,
4503 if (nest->scale_idx != k
4504 && (k != nest->n - 1 || t->label_axis[a] == a))
4506 levels[n_levels++] = (struct ctables_level) {
4507 .type = CTL_CATEGORY,
4513 if (!summary_dimension && a == t->slabels_axis)
4515 levels[n_levels++] = (struct ctables_level) {
4516 .type = CTL_SUMMARY,
4517 .var_idx = SIZE_MAX,
4521 /* Pivot categories:
4523 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4524 - category for nest->vars[0], if nest->scale_idx != 0
4525 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4526 - category for nest->vars[1], if nest->scale_idx != 1
4528 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4529 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4530 - summary function, if 'a == t->slabels_axis && a ==
4533 Additional dimensions:
4535 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4537 - If 't->label_axis[b] == a' for some 'b != a', add a category
4542 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4544 for (size_t j = 0; j < n_sorted; j++)
4546 struct ctables_cell *cell = sorted[j];
4547 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4549 size_t n_common = 0;
4552 for (; n_common < n_levels; n_common++)
4554 const struct ctables_level *level = &levels[n_common];
4555 if (level->type == CTL_CATEGORY)
4557 size_t var_idx = level->var_idx;
4558 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4559 if (prev->axes[a].cvs[var_idx].category != c)
4561 else if (c->type != CCT_SUBTOTAL
4562 && c->type != CCT_TOTAL
4563 && c->type != CCT_POSTCOMPUTE
4564 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4565 &cell->axes[a].cvs[var_idx].value,
4566 var_get_type (nest->vars[var_idx])))
4572 for (size_t k = n_common; k < n_levels; k++)
4574 const struct ctables_level *level = &levels[k];
4575 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4576 if (level->type == CTL_SUMMARY)
4578 assert (k == n_levels - 1);
4580 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4581 for (size_t m = 0; m < specs->n; m++)
4583 int leaf = pivot_category_create_leaf (
4584 parent, ctables_summary_label (&specs->specs[m],
4592 const struct variable *var = nest->vars[level->var_idx];
4593 struct pivot_value *label;
4594 if (level->type == CTL_VAR)
4596 label = pivot_value_new_variable (var);
4597 label->variable.show = level->vlabel;
4599 else if (level->type == CTL_CATEGORY)
4601 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4602 label = ctables_category_create_label (
4603 t->categories[var_get_dict_index (var)],
4604 cv->category, var, &cv->value);
4609 if (k == n_levels - 1)
4610 prev_leaf = pivot_category_create_leaf (parent, label);
4612 groups[k] = pivot_category_create_group__ (parent, label);
4616 cell->axes[a].leaf = prev_leaf;
4623 for (size_t i = 0; i < t->n_sections; i++)
4625 struct ctables_section *s = &t->sections[i];
4627 struct ctables_cell *cell;
4628 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4633 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4634 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4635 for (size_t j = 0; j < specs->n; j++)
4638 size_t n_dindexes = 0;
4640 if (summary_dimension)
4641 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4643 if (categories_dimension)
4645 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4646 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4647 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4648 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4651 dindexes[n_dindexes++] = ctv->leaf;
4654 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4657 int leaf = cell->axes[a].leaf;
4658 if (a == t->summary_axis && !summary_dimension)
4660 dindexes[n_dindexes++] = leaf;
4663 const struct ctables_summary_spec *ss = &specs->specs[j];
4665 struct fmt_spec format = specs->specs[j].format;
4666 bool is_ctables_format = ss->is_ctables_format;
4667 double d = (cell->postcompute
4668 ? ctables_cell_calculate_postcompute (
4669 s, cell, ss, &format, &is_ctables_format, j)
4670 : ctables_summary_value (cell, &cell->summaries[j],
4673 struct pivot_value *value;
4674 if (ct->hide_threshold != 0
4675 && d < ct->hide_threshold
4676 && ctables_summary_function_is_count (ss->function))
4678 value = pivot_value_new_user_text_nocopy (
4679 xasprintf ("<%d", ct->hide_threshold));
4681 else if (d == 0 && ct->zero)
4682 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4683 else if (d == SYSMIS && ct->missing)
4684 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4685 else if (is_ctables_format)
4687 char *s = data_out_stretchy (&(union value) { .f = d },
4689 &ct->ctables_formats, NULL);
4690 value = pivot_value_new_user_text_nocopy (s);
4694 value = pivot_value_new_number (d);
4695 value->numeric.format = format;
4697 pivot_table_put (pt, dindexes, n_dindexes, value);
4702 pivot_table_submit (pt);
4706 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4708 enum pivot_axis_type label_pos = t->label_axis[a];
4712 t->clabels_from_axis = a;
4714 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4715 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4717 const struct ctables_stack *stack = &t->stacks[a];
4721 const struct ctables_nest *n0 = &stack->nests[0];
4724 assert (stack->n == 1);
4728 const struct variable *v0 = n0->vars[n0->n - 1];
4729 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4730 t->clabels_example = v0;
4732 for (size_t i = 0; i < c0->n_cats; i++)
4733 if (c0->cats[i].type == CCT_FUNCTION)
4735 msg (SE, _("%s=%s is not allowed with sorting based "
4736 "on a summary function."),
4737 subcommand_name, pos_name);
4740 if (n0->n - 1 == n0->scale_idx)
4742 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4743 "but %s is a scale variable."),
4744 subcommand_name, pos_name, var_get_name (v0));
4748 for (size_t i = 1; i < stack->n; i++)
4750 const struct ctables_nest *ni = &stack->nests[i];
4752 const struct variable *vi = ni->vars[ni->n - 1];
4753 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4755 if (ni->n - 1 == ni->scale_idx)
4757 msg (SE, _("%s=%s requires the variables to be moved to be "
4758 "categorical, but %s is a scale variable."),
4759 subcommand_name, pos_name, var_get_name (vi));
4762 if (var_get_width (v0) != var_get_width (vi))
4764 msg (SE, _("%s=%s requires the variables to be "
4765 "moved to have the same width, but %s has "
4766 "width %d and %s has width %d."),
4767 subcommand_name, pos_name,
4768 var_get_name (v0), var_get_width (v0),
4769 var_get_name (vi), var_get_width (vi));
4772 if (!val_labs_equal (var_get_value_labels (v0),
4773 var_get_value_labels (vi)))
4775 msg (SE, _("%s=%s requires the variables to be "
4776 "moved to have the same value labels, but %s "
4777 "and %s have different value labels."),
4778 subcommand_name, pos_name,
4779 var_get_name (v0), var_get_name (vi));
4782 if (!ctables_categories_equal (c0, ci))
4784 msg (SE, _("%s=%s requires the variables to be "
4785 "moved to have the same category "
4786 "specifications, but %s and %s have different "
4787 "category specifications."),
4788 subcommand_name, pos_name,
4789 var_get_name (v0), var_get_name (vi));
4798 add_sum_var (struct variable *var,
4799 struct variable ***sum_vars, size_t *n, size_t *allocated)
4801 for (size_t i = 0; i < *n; i++)
4802 if (var == (*sum_vars)[i])
4805 if (*n >= *allocated)
4806 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4807 (*sum_vars)[*n] = var;
4812 enumerate_sum_vars (const struct ctables_axis *a,
4813 struct variable ***sum_vars, size_t *n, size_t *allocated)
4821 for (size_t i = 0; i < N_CSVS; i++)
4822 for (size_t j = 0; j < a->specs[i].n; j++)
4824 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4825 if (ctables_function_is_pctsum (spec->function))
4826 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4832 for (size_t i = 0; i < 2; i++)
4833 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4839 ctables_prepare_table (struct ctables_table *t)
4841 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4844 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4846 for (size_t j = 0; j < t->stacks[a].n; j++)
4848 struct ctables_nest *nest = &t->stacks[a].nests[j];
4849 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4851 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4852 nest->n_domains[dt] = 0;
4854 for (size_t k = 0; k < nest->n; k++)
4856 if (k == nest->scale_idx)
4865 if (a != PIVOT_AXIS_LAYER)
4872 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4873 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4874 : a == PIVOT_AXIS_ROW)
4876 if (k == nest->n - 1
4877 || (nest->scale_idx == nest->n - 1
4878 && k == nest->n - 2))
4884 if (a == PIVOT_AXIS_COLUMN)
4889 if (a == PIVOT_AXIS_ROW)
4894 nest->domains[dt][nest->n_domains[dt]++] = k;
4901 struct ctables_nest *nest = xmalloc (sizeof *nest);
4902 *nest = (struct ctables_nest) { .n = 0 };
4903 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4905 /* There's no point in moving labels away from an axis that has no
4906 labels, so avoid dealing with the special cases around that. */
4907 t->label_axis[a] = a;
4910 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4911 for (size_t i = 0; i < stack->n; i++)
4913 struct ctables_nest *nest = &stack->nests[i];
4914 if (!nest->specs[CSV_CELL].n)
4916 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4917 specs->specs = xmalloc (sizeof *specs->specs);
4920 enum ctables_summary_function function
4921 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4923 *specs->specs = (struct ctables_summary_spec) {
4924 .function = function,
4925 .format = ctables_summary_default_format (function, specs->var),
4928 specs->var = nest->vars[0];
4930 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4931 &nest->specs[CSV_CELL]);
4933 else if (!nest->specs[CSV_TOTAL].n)
4934 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4935 &nest->specs[CSV_CELL]);
4937 if (t->ctables->smissing_listwise)
4939 struct variable **listwise_vars = NULL;
4941 size_t allocated = 0;
4943 for (size_t j = nest->group_head; j < stack->n; j++)
4945 const struct ctables_nest *other_nest = &stack->nests[j];
4946 if (other_nest->group_head != nest->group_head)
4949 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4952 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4953 sizeof *listwise_vars);
4954 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4957 for (size_t j = 0; j < N_CSVS; j++)
4959 nest->specs[j].listwise_vars = listwise_vars;
4960 nest->specs[j].n_listwise_vars = n;
4965 struct ctables_summary_spec_set *merged = &t->summary_specs;
4966 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4968 for (size_t j = 0; j < stack->n; j++)
4970 const struct ctables_nest *nest = &stack->nests[j];
4972 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4973 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4978 struct merge_item min = items[0];
4979 for (size_t j = 1; j < n_left; j++)
4980 if (merge_item_compare_3way (&items[j], &min) < 0)
4983 if (merged->n >= merged->allocated)
4984 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4985 sizeof *merged->specs);
4986 merged->specs[merged->n++] = min.set->specs[min.ofs];
4988 for (size_t j = 0; j < n_left; )
4990 if (merge_item_compare_3way (&items[j], &min) == 0)
4992 struct merge_item *item = &items[j];
4993 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4994 if (++item->ofs >= item->set->n)
4996 items[j] = items[--n_left];
5005 for (size_t j = 0; j < merged->n; j++)
5006 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5008 for (size_t j = 0; j < stack->n; j++)
5010 const struct ctables_nest *nest = &stack->nests[j];
5011 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5013 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5014 for (size_t k = 0; k < specs->n; k++)
5015 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5016 specs->specs[k].axis_idx);
5022 size_t allocated_sum_vars = 0;
5023 enumerate_sum_vars (t->axes[t->summary_axis],
5024 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5026 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5027 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5031 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5032 enum pivot_axis_type a)
5034 struct ctables_stack *stack = &t->stacks[a];
5035 for (size_t i = 0; i < stack->n; i++)
5037 const struct ctables_nest *nest = &stack->nests[i];
5038 const struct variable *var = nest->vars[nest->n - 1];
5039 const union value *value = case_data (c, var);
5041 if (var_is_numeric (var) && value->f == SYSMIS)
5044 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5046 ctables_value_insert (t, value, var_get_width (var));
5051 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5053 const struct ctables_value *const *ap = a_;
5054 const struct ctables_value *const *bp = b_;
5055 const struct ctables_value *a = *ap;
5056 const struct ctables_value *b = *bp;
5057 const int *width = width_;
5058 return value_compare_3way (&a->value, &b->value, *width);
5062 ctables_sort_clabels_values (struct ctables_table *t)
5064 const struct variable *v0 = t->clabels_example;
5065 int width = var_get_width (v0);
5067 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5070 const struct val_labs *val_labs = var_get_value_labels (v0);
5071 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5072 vl = val_labs_next (val_labs, vl))
5073 if (ctables_categories_match (c0, &vl->value, v0))
5074 ctables_value_insert (t, &vl->value, width);
5077 size_t n = hmap_count (&t->clabels_values_map);
5078 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5080 struct ctables_value *clv;
5082 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5083 t->clabels_values[i++] = clv;
5084 t->n_clabels_values = n;
5087 sort (t->clabels_values, n, sizeof *t->clabels_values,
5088 compare_clabels_values_3way, &width);
5090 for (size_t i = 0; i < n; i++)
5091 t->clabels_values[i]->leaf = i;
5095 ctables_add_category_occurrences (const struct variable *var,
5096 struct hmap *occurrences,
5097 const struct ctables_categories *cats)
5099 const struct val_labs *val_labs = var_get_value_labels (var);
5101 for (size_t i = 0; i < cats->n_cats; i++)
5103 const struct ctables_category *c = &cats->cats[i];
5107 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5113 int width = var_get_width (var);
5115 value_init (&value, width);
5116 value_copy_buf_rpad (&value, width,
5117 CHAR_CAST (uint8_t *, c->string.string),
5118 c->string.length, ' ');
5119 ctables_add_occurrence (var, &value, occurrences);
5120 value_destroy (&value, width);
5125 assert (var_is_numeric (var));
5126 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5127 vl = val_labs_next (val_labs, vl))
5128 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5129 ctables_add_occurrence (var, &vl->value, occurrences);
5133 assert (var_is_alpha (var));
5134 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5135 vl = val_labs_next (val_labs, vl))
5136 if (in_string_range (&vl->value, var, c->srange))
5137 ctables_add_occurrence (var, &vl->value, occurrences);
5141 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5142 vl = val_labs_next (val_labs, vl))
5143 if (var_is_value_missing (var, &vl->value))
5144 ctables_add_occurrence (var, &vl->value, occurrences);
5148 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5149 vl = val_labs_next (val_labs, vl))
5150 ctables_add_occurrence (var, &vl->value, occurrences);
5153 case CCT_POSTCOMPUTE:
5163 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5164 vl = val_labs_next (val_labs, vl))
5165 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5166 ctables_add_occurrence (var, &vl->value, occurrences);
5169 case CCT_EXCLUDED_MISSING:
5176 ctables_section_recurse_add_empty_categories (
5177 struct ctables_section *s,
5178 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5179 enum pivot_axis_type a, size_t a_idx)
5181 if (a >= PIVOT_N_AXES)
5182 ctables_cell_insert__ (s, c, cats);
5183 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5184 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5187 const struct variable *var = s->nests[a]->vars[a_idx];
5188 const struct ctables_categories *categories = s->table->categories[
5189 var_get_dict_index (var)];
5190 int width = var_get_width (var);
5191 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5192 const struct ctables_occurrence *o;
5193 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5195 union value *value = case_data_rw (c, var);
5196 value_destroy (value, width);
5197 value_clone (value, &o->value, width);
5198 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5199 assert (cats[a][a_idx] != NULL);
5200 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5203 for (size_t i = 0; i < categories->n_cats; i++)
5205 const struct ctables_category *cat = &categories->cats[i];
5206 if (cat->type == CCT_POSTCOMPUTE)
5208 cats[a][a_idx] = cat;
5209 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5216 ctables_section_add_empty_categories (struct ctables_section *s)
5218 bool show_empty = false;
5219 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5221 for (size_t k = 0; k < s->nests[a]->n; k++)
5222 if (k != s->nests[a]->scale_idx)
5224 const struct variable *var = s->nests[a]->vars[k];
5225 const struct ctables_categories *cats = s->table->categories[
5226 var_get_dict_index (var)];
5227 if (cats->show_empty)
5230 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5236 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5237 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5238 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5243 ctables_section_clear (struct ctables_section *s)
5245 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5247 const struct ctables_nest *nest = s->nests[a];
5248 for (size_t i = 0; i < nest->n; i++)
5249 if (i != nest->scale_idx)
5251 const struct variable *var = nest->vars[i];
5252 int width = var_get_width (var);
5253 struct ctables_occurrence *o, *next;
5254 struct hmap *map = &s->occurrences[a][i];
5255 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5257 value_destroy (&o->value, width);
5258 hmap_delete (map, &o->node);
5265 struct ctables_cell *cell, *next_cell;
5266 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5268 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5270 const struct ctables_nest *nest = s->nests[a];
5271 for (size_t i = 0; i < nest->n; i++)
5272 if (i != nest->scale_idx)
5273 value_destroy (&cell->axes[a].cvs[i].value,
5274 var_get_width (nest->vars[i]));
5275 free (cell->axes[a].cvs);
5278 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5279 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5280 for (size_t i = 0; i < specs->n; i++)
5281 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5282 free (cell->summaries);
5284 hmap_delete (&s->cells, &cell->node);
5287 hmap_shrink (&s->cells);
5289 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5291 struct ctables_domain *domain, *next_domain;
5292 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5295 free (domain->sums);
5296 hmap_delete (&s->domains[dt], &domain->node);
5299 hmap_shrink (&s->domains[dt]);
5304 ctables_table_clear (struct ctables_table *t)
5306 for (size_t i = 0; i < t->n_sections; i++)
5307 ctables_section_clear (&t->sections[i]);
5309 if (t->clabels_example)
5311 int width = var_get_width (t->clabels_example);
5312 struct ctables_value *value, *next_value;
5313 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5314 &t->clabels_values_map)
5316 value_destroy (&value->value, width);
5317 hmap_delete (&t->clabels_values_map, &value->node);
5320 hmap_shrink (&t->clabels_values_map);
5322 free (t->clabels_values);
5323 t->clabels_values = NULL;
5324 t->n_clabels_values = 0;
5329 ctables_execute (struct dataset *ds, struct casereader *input,
5332 for (size_t i = 0; i < ct->n_tables; i++)
5334 struct ctables_table *t = ct->tables[i];
5335 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5336 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5337 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5338 sizeof *t->sections);
5339 size_t ix[PIVOT_N_AXES];
5340 ctables_table_add_section (t, 0, ix);
5343 struct dictionary *dict = dataset_dict (ds);
5344 struct casegrouper *grouper
5345 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5346 ? casegrouper_create_splits (input, dict)
5347 : casegrouper_create_vars (input, NULL, 0));
5348 struct casereader *group;
5349 while (casegrouper_get_next_group (grouper, &group))
5351 /* Output SPLIT FILE variables. */
5352 struct ccase *c = casereader_peek (group, 0);
5355 output_split_file_values (ds, c);
5359 bool warn_on_invalid = true;
5360 for (c = casereader_read (group); c;
5361 case_unref (c), c = casereader_read (group))
5363 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5364 double e_weight = (ct->e_weight
5365 ? var_force_valid_weight (ct->e_weight,
5366 case_num (c, ct->e_weight),
5370 for (size_t i = 0; i < ct->n_tables; i++)
5372 struct ctables_table *t = ct->tables[i];
5374 for (size_t j = 0; j < t->n_sections; j++)
5375 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5377 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5378 if (t->label_axis[a] != a)
5379 ctables_insert_clabels_values (t, c, a);
5382 casereader_destroy (group);
5384 for (size_t i = 0; i < ct->n_tables; i++)
5386 struct ctables_table *t = ct->tables[i];
5388 if (t->clabels_example)
5389 ctables_sort_clabels_values (t);
5391 for (size_t j = 0; j < t->n_sections; j++)
5392 ctables_section_add_empty_categories (&t->sections[j]);
5394 ctables_table_output (ct, t);
5395 ctables_table_clear (t);
5398 return casegrouper_destroy (grouper);
5403 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5404 struct dictionary *);
5407 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5413 case CTPO_CAT_STRING:
5414 ss_dealloc (&e->string);
5417 case CTPO_CAT_SRANGE:
5418 for (size_t i = 0; i < 2; i++)
5419 ss_dealloc (&e->srange[i]);
5428 for (size_t i = 0; i < 2; i++)
5429 ctables_pcexpr_destroy (e->subs[i]);
5433 case CTPO_CAT_NUMBER:
5434 case CTPO_CAT_NRANGE:
5435 case CTPO_CAT_MISSING:
5436 case CTPO_CAT_OTHERNM:
5437 case CTPO_CAT_SUBTOTAL:
5438 case CTPO_CAT_TOTAL:
5442 msg_location_destroy (e->location);
5447 static struct ctables_pcexpr *
5448 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5449 struct ctables_pcexpr *sub0,
5450 struct ctables_pcexpr *sub1)
5452 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5453 *e = (struct ctables_pcexpr) {
5455 .subs = { sub0, sub1 },
5456 .location = msg_location_merged (sub0->location, sub1->location),
5461 /* How to parse an operator. */
5464 enum token_type token;
5465 enum ctables_postcompute_op op;
5468 static const struct operator *
5469 ctable_pcexpr_match_operator (struct lexer *lexer,
5470 const struct operator ops[], size_t n_ops)
5472 for (const struct operator *op = ops; op < ops + n_ops; op++)
5473 if (lex_token (lexer) == op->token)
5475 if (op->token != T_NEG_NUM)
5484 static struct ctables_pcexpr *
5485 ctable_pcexpr_parse_binary_operators__ (
5486 struct lexer *lexer, struct dictionary *dict,
5487 const struct operator ops[], size_t n_ops,
5488 parse_recursively_func *parse_next_level,
5489 const char *chain_warning, struct ctables_pcexpr *lhs)
5491 for (int op_count = 0; ; op_count++)
5493 const struct operator *op
5494 = ctable_pcexpr_match_operator (lexer, ops, n_ops);
5497 if (op_count > 1 && chain_warning)
5498 msg_at (SW, lhs->location, "%s", chain_warning);
5503 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5506 ctables_pcexpr_destroy (lhs);
5510 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5514 static struct ctables_pcexpr *
5515 ctable_pcexpr_parse_binary_operators (struct lexer *lexer,
5516 struct dictionary *dict,
5517 const struct operator ops[], size_t n_ops,
5518 parse_recursively_func *parse_next_level,
5519 const char *chain_warning)
5521 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5525 return ctable_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5527 chain_warning, lhs);
5530 static struct ctables_pcexpr *ctable_pcexpr_parse_add (struct lexer *,
5531 struct dictionary *);
5533 static struct ctables_pcexpr
5534 ctpo_cat_nrange (double low, double high)
5536 return (struct ctables_pcexpr) {
5537 .op = CTPO_CAT_NRANGE,
5538 .nrange = { low, high },
5542 static struct ctables_pcexpr *
5543 ctable_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5545 int start_ofs = lex_ofs (lexer);
5546 struct ctables_pcexpr e;
5547 if (lex_is_number (lexer))
5549 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5550 .number = lex_number (lexer) };
5553 else if (lex_match_id (lexer, "MISSING"))
5554 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5555 else if (lex_match_id (lexer, "OTHERNM"))
5556 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5557 else if (lex_match_id (lexer, "TOTAL"))
5558 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5559 else if (lex_match_id (lexer, "SUBTOTAL"))
5561 size_t subtotal_index = 0;
5562 if (lex_match (lexer, T_LBRACK))
5564 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5566 subtotal_index = lex_integer (lexer);
5568 if (!lex_force_match (lexer, T_RBRACK))
5571 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5572 .subtotal_index = subtotal_index };
5574 else if (lex_match (lexer, T_LBRACK))
5576 if (lex_match_id (lexer, "LO"))
5578 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
5580 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5583 else if (lex_is_number (lexer))
5585 double number = lex_number (lexer);
5587 if (lex_match_id (lexer, "THRU"))
5589 if (lex_match_id (lexer, "HI"))
5590 e = ctpo_cat_nrange (number, DBL_MAX);
5593 if (!lex_force_num (lexer))
5595 e = ctpo_cat_nrange (number, lex_number (lexer));
5600 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5603 else if (lex_is_string (lexer))
5605 struct substring s = recode_substring_pool (
5606 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
5607 ss_rtrim (&s, ss_cstr (" "));
5609 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5614 lex_error (lexer, NULL);
5618 if (!lex_force_match (lexer, T_RBRACK))
5620 if (e.op == CTPO_CAT_STRING)
5621 ss_dealloc (&e.string);
5625 else if (lex_match (lexer, T_LPAREN))
5627 struct ctables_pcexpr *ep = ctable_pcexpr_parse_add (lexer, dict);
5630 if (!lex_force_match (lexer, T_RPAREN))
5632 ctables_pcexpr_destroy (ep);
5639 lex_error (lexer, NULL);
5643 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5644 return xmemdup (&e, sizeof e);
5647 static struct ctables_pcexpr *
5648 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5649 struct lexer *lexer, int start_ofs)
5651 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5652 *e = (struct ctables_pcexpr) {
5655 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5660 static struct ctables_pcexpr *
5661 ctable_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5663 static const struct operator op = { T_EXP, CTPO_POW };
5665 const char *chain_warning =
5666 _("The exponentiation operator (`**') is left-associative: "
5667 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5668 "To disable this warning, insert parentheses.");
5670 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5671 return ctable_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5672 ctable_pcexpr_parse_primary,
5675 /* Special case for situations like "-5**6", which must be parsed as
5678 int start_ofs = lex_ofs (lexer);
5679 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5680 *lhs = (struct ctables_pcexpr) {
5681 .op = CTPO_CONSTANT,
5682 .number = -lex_tokval (lexer),
5683 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5687 struct ctables_pcexpr *node = ctable_pcexpr_parse_binary_operators__ (
5688 lexer, dict, &op, 1,
5689 ctable_pcexpr_parse_primary, chain_warning, lhs);
5693 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5696 /* Parses the unary minus level. */
5697 static struct ctables_pcexpr *
5698 ctable_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5700 int start_ofs = lex_ofs (lexer);
5701 if (!lex_match (lexer, T_DASH))
5702 return ctable_pcexpr_parse_exp (lexer, dict);
5704 struct ctables_pcexpr *inner = ctable_pcexpr_parse_neg (lexer, dict);
5708 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5711 /* Parses the multiplication and division level. */
5712 static struct ctables_pcexpr *
5713 ctable_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5715 static const struct operator ops[] =
5717 { T_ASTERISK, CTPO_MUL },
5718 { T_SLASH, CTPO_DIV },
5721 return ctable_pcexpr_parse_binary_operators (lexer, dict, ops,
5722 sizeof ops / sizeof *ops,
5723 ctable_pcexpr_parse_neg, NULL);
5726 /* Parses the addition and subtraction level. */
5727 static struct ctables_pcexpr *
5728 ctable_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5730 static const struct operator ops[] =
5732 { T_PLUS, CTPO_ADD },
5733 { T_DASH, CTPO_SUB },
5734 { T_NEG_NUM, CTPO_ADD },
5737 return ctable_pcexpr_parse_binary_operators (lexer, dict,
5738 ops, sizeof ops / sizeof *ops,
5739 ctable_pcexpr_parse_mul, NULL);
5742 static struct ctables_postcompute *
5743 ctables_find_postcompute (struct ctables *ct, const char *name)
5745 struct ctables_postcompute *pc;
5746 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5747 utf8_hash_case_string (name, 0), &ct->postcomputes)
5748 if (!utf8_strcasecmp (pc->name, name))
5754 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5757 int pcompute_start = lex_ofs (lexer) - 1;
5759 if (!lex_force_match (lexer, T_AND) || !lex_force_id (lexer))
5762 char *name = ss_xstrdup (lex_tokss (lexer));
5765 if (!lex_force_match (lexer, T_EQUALS)
5766 || !lex_force_match_id (lexer, "EXPR")
5767 || !lex_force_match (lexer, T_LPAREN))
5773 int expr_start = lex_ofs (lexer);
5774 struct ctables_pcexpr *expr = ctable_pcexpr_parse_add (lexer, dict);
5775 int expr_end = lex_ofs (lexer) - 1;
5776 if (!expr || !lex_force_match (lexer, T_RPAREN))
5781 int pcompute_end = lex_ofs (lexer) - 1;
5783 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5786 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5789 msg_at (SW, location, _("New definition of &%s will override the "
5790 "previous definition."),
5792 msg_at (SN, pc->location, _("This is the previous definition."));
5794 ctables_pcexpr_destroy (pc->expr);
5795 msg_location_destroy (pc->location);
5800 pc = xmalloc (sizeof *pc);
5801 *pc = (struct ctables_postcompute) { .name = name };
5802 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5803 utf8_hash_case_string (pc->name, 0));
5806 pc->location = location;
5808 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5813 ctables_parse_pproperties_format (struct lexer *lexer,
5814 struct ctables_summary_spec_set *sss)
5816 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5818 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5819 && !(lex_token (lexer) == T_ID
5820 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5821 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5822 lex_tokss (lexer)))))
5824 /* Parse function. */
5825 enum ctables_summary_function function;
5826 if (!parse_ctables_summary_function (lexer, &function))
5829 /* Parse percentile. */
5830 double percentile = 0;
5831 if (function == CTSF_PTILE)
5833 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5835 percentile = lex_number (lexer);
5840 struct fmt_spec format;
5841 bool is_ctables_format;
5842 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5845 if (sss->n >= sss->allocated)
5846 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5847 sizeof *sss->specs);
5848 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5849 .function = function,
5850 .percentile = percentile,
5852 .is_ctables_format = is_ctables_format,
5858 ctables_summary_spec_set_uninit (sss);
5863 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5865 struct ctables_postcompute **pcs = NULL;
5867 size_t allocated_pcs = 0;
5869 while (lex_match (lexer, T_AND))
5871 if (!lex_force_id (lexer))
5873 struct ctables_postcompute *pc
5874 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5877 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5882 if (n_pcs >= allocated_pcs)
5883 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5887 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5889 if (lex_match_id (lexer, "LABEL"))
5891 lex_match (lexer, T_EQUALS);
5892 if (!lex_force_string (lexer))
5895 for (size_t i = 0; i < n_pcs; i++)
5897 free (pcs[i]->label);
5898 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5903 else if (lex_match_id (lexer, "FORMAT"))
5905 lex_match (lexer, T_EQUALS);
5907 struct ctables_summary_spec_set sss;
5908 if (!ctables_parse_pproperties_format (lexer, &sss))
5911 for (size_t i = 0; i < n_pcs; i++)
5914 ctables_summary_spec_set_uninit (pcs[i]->specs);
5916 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5917 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5919 ctables_summary_spec_set_uninit (&sss);
5921 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5923 lex_match (lexer, T_EQUALS);
5924 bool hide_source_cats;
5925 if (!parse_bool (lexer, &hide_source_cats))
5927 for (size_t i = 0; i < n_pcs; i++)
5928 pcs[i]->hide_source_cats = hide_source_cats;
5932 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5945 put_strftime (struct string *out, time_t now, const char *format)
5947 const struct tm *tm = localtime (&now);
5949 strftime (value, sizeof value, format, tm);
5950 ds_put_cstr (out, value);
5954 skip_prefix (struct substring *s, struct substring prefix)
5956 if (ss_starts_with (*s, prefix))
5958 ss_advance (s, prefix.length);
5966 put_table_expression (struct string *out, struct lexer *lexer,
5967 struct dictionary *dict, int expr_start, int expr_end)
5970 for (int ofs = expr_start; ofs < expr_end; ofs++)
5972 const struct token *t = lex_ofs_token (lexer, ofs);
5973 if (t->type == T_LBRACK)
5975 else if (t->type == T_RBRACK && nest > 0)
5981 else if (t->type == T_ID)
5983 const struct variable *var
5984 = dict_lookup_var (dict, t->string.string);
5985 const char *label = var ? var_get_label (var) : NULL;
5986 ds_put_cstr (out, label ? label : t->string.string);
5990 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5991 ds_put_byte (out, ' ');
5993 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5994 ds_put_cstr (out, repr);
5997 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5998 ds_put_byte (out, ' ');
6004 put_title_text (struct string *out, struct substring in, time_t now,
6005 struct lexer *lexer, struct dictionary *dict,
6006 int expr_start, int expr_end)
6010 size_t chunk = ss_find_byte (in, ')');
6011 ds_put_substring (out, ss_head (in, chunk));
6012 ss_advance (&in, chunk);
6013 if (ss_is_empty (in))
6016 if (skip_prefix (&in, ss_cstr (")DATE")))
6017 put_strftime (out, now, "%x");
6018 else if (skip_prefix (&in, ss_cstr (")TIME")))
6019 put_strftime (out, now, "%X");
6020 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6021 put_table_expression (out, lexer, dict, expr_start, expr_end);
6024 ds_put_byte (out, ')');
6025 ss_advance (&in, 1);
6031 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6033 struct casereader *input = NULL;
6035 struct measure_guesser *mg = measure_guesser_create (ds);
6038 input = proc_open (ds);
6039 measure_guesser_run (mg, input);
6040 measure_guesser_destroy (mg);
6043 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6044 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6045 enum settings_value_show tvars = settings_get_show_variables ();
6046 for (size_t i = 0; i < n_vars; i++)
6047 vlabels[i] = (enum ctables_vlabel) tvars;
6049 struct pivot_table_look *look = pivot_table_look_unshare (
6050 pivot_table_look_ref (pivot_table_look_get_default ()));
6051 look->omit_empty = false;
6053 struct ctables *ct = xmalloc (sizeof *ct);
6054 *ct = (struct ctables) {
6055 .dict = dataset_dict (ds),
6057 .ctables_formats = FMT_SETTINGS_INIT,
6059 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6062 time_t now = time (NULL);
6067 const char *dot_string;
6068 const char *comma_string;
6070 static const struct ctf ctfs[4] = {
6071 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6072 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6073 { CTEF_PAREN, "-,(,),", "-.(.)." },
6074 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6076 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6077 for (size_t i = 0; i < 4; i++)
6079 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6080 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6081 fmt_number_style_from_string (s));
6084 if (!lex_force_match (lexer, T_SLASH))
6087 while (!lex_match_id (lexer, "TABLE"))
6089 if (lex_match_id (lexer, "FORMAT"))
6091 double widths[2] = { SYSMIS, SYSMIS };
6092 double units_per_inch = 72.0;
6094 while (lex_token (lexer) != T_SLASH)
6096 if (lex_match_id (lexer, "MINCOLWIDTH"))
6098 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6101 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6103 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6106 else if (lex_match_id (lexer, "UNITS"))
6108 lex_match (lexer, T_EQUALS);
6109 if (lex_match_id (lexer, "POINTS"))
6110 units_per_inch = 72.0;
6111 else if (lex_match_id (lexer, "INCHES"))
6112 units_per_inch = 1.0;
6113 else if (lex_match_id (lexer, "CM"))
6114 units_per_inch = 2.54;
6117 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6121 else if (lex_match_id (lexer, "EMPTY"))
6126 lex_match (lexer, T_EQUALS);
6127 if (lex_match_id (lexer, "ZERO"))
6129 /* Nothing to do. */
6131 else if (lex_match_id (lexer, "BLANK"))
6132 ct->zero = xstrdup ("");
6133 else if (lex_force_string (lexer))
6135 ct->zero = ss_xstrdup (lex_tokss (lexer));
6141 else if (lex_match_id (lexer, "MISSING"))
6143 lex_match (lexer, T_EQUALS);
6144 if (!lex_force_string (lexer))
6148 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6149 ? ss_xstrdup (lex_tokss (lexer))
6155 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6156 "UNITS", "EMPTY", "MISSING");
6161 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6162 && widths[0] > widths[1])
6164 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6168 for (size_t i = 0; i < 2; i++)
6169 if (widths[i] != SYSMIS)
6171 int *wr = ct->look->width_ranges[TABLE_HORZ];
6172 wr[i] = widths[i] / units_per_inch * 96.0;
6177 else if (lex_match_id (lexer, "VLABELS"))
6179 if (!lex_force_match_id (lexer, "VARIABLES"))
6181 lex_match (lexer, T_EQUALS);
6183 struct variable **vars;
6185 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6189 if (!lex_force_match_id (lexer, "DISPLAY"))
6194 lex_match (lexer, T_EQUALS);
6196 enum ctables_vlabel vlabel;
6197 if (lex_match_id (lexer, "DEFAULT"))
6198 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6199 else if (lex_match_id (lexer, "NAME"))
6201 else if (lex_match_id (lexer, "LABEL"))
6202 vlabel = CTVL_LABEL;
6203 else if (lex_match_id (lexer, "BOTH"))
6205 else if (lex_match_id (lexer, "NONE"))
6209 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6215 for (size_t i = 0; i < n_vars; i++)
6216 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6219 else if (lex_match_id (lexer, "MRSETS"))
6221 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6223 lex_match (lexer, T_EQUALS);
6224 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6227 else if (lex_match_id (lexer, "SMISSING"))
6229 if (lex_match_id (lexer, "VARIABLE"))
6230 ct->smissing_listwise = false;
6231 else if (lex_match_id (lexer, "LISTWISE"))
6232 ct->smissing_listwise = true;
6235 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6239 else if (lex_match_id (lexer, "PCOMPUTE"))
6241 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6244 else if (lex_match_id (lexer, "PPROPERTIES"))
6246 if (!ctables_parse_pproperties (lexer, ct))
6249 else if (lex_match_id (lexer, "WEIGHT"))
6251 if (!lex_force_match_id (lexer, "VARIABLE"))
6253 lex_match (lexer, T_EQUALS);
6254 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6258 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6260 if (lex_match_id (lexer, "COUNT"))
6262 lex_match (lexer, T_EQUALS);
6263 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6266 ct->hide_threshold = lex_integer (lexer);
6269 else if (ct->hide_threshold == 0)
6270 ct->hide_threshold = 5;
6274 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6275 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6276 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6280 if (!lex_force_match (lexer, T_SLASH))
6284 size_t allocated_tables = 0;
6287 if (ct->n_tables >= allocated_tables)
6288 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6289 sizeof *ct->tables);
6291 struct ctables_category *cat = xmalloc (sizeof *cat);
6292 *cat = (struct ctables_category) {
6294 .include_missing = false,
6295 .sort_ascending = true,
6298 struct ctables_categories *c = xmalloc (sizeof *c);
6299 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6300 *c = (struct ctables_categories) {
6307 struct ctables_categories **categories = xnmalloc (n_vars,
6308 sizeof *categories);
6309 for (size_t i = 0; i < n_vars; i++)
6312 struct ctables_table *t = xmalloc (sizeof *t);
6313 *t = (struct ctables_table) {
6315 .slabels_axis = PIVOT_AXIS_COLUMN,
6316 .slabels_visible = true,
6317 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6319 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6320 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6321 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6323 .clabels_from_axis = PIVOT_AXIS_LAYER,
6324 .categories = categories,
6325 .n_categories = n_vars,
6328 ct->tables[ct->n_tables++] = t;
6330 lex_match (lexer, T_EQUALS);
6331 int expr_start = lex_ofs (lexer);
6332 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6334 if (lex_match (lexer, T_BY))
6336 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6337 ct, t, PIVOT_AXIS_COLUMN))
6340 if (lex_match (lexer, T_BY))
6342 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6343 ct, t, PIVOT_AXIS_LAYER))
6347 int expr_end = lex_ofs (lexer);
6349 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6350 && !t->axes[PIVOT_AXIS_LAYER])
6352 lex_error (lexer, _("At least one variable must be specified."));
6356 const struct ctables_axis *scales[PIVOT_N_AXES];
6357 size_t n_scales = 0;
6358 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6360 scales[a] = find_scale (t->axes[a]);
6366 msg (SE, _("Scale variables may appear only on one axis."));
6367 if (scales[PIVOT_AXIS_ROW])
6368 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6369 _("This scale variable appears on the rows axis."));
6370 if (scales[PIVOT_AXIS_COLUMN])
6371 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6372 _("This scale variable appears on the columns axis."));
6373 if (scales[PIVOT_AXIS_LAYER])
6374 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6375 _("This scale variable appears on the layer axis."));
6379 const struct ctables_axis *summaries[PIVOT_N_AXES];
6380 size_t n_summaries = 0;
6381 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6383 summaries[a] = (scales[a]
6385 : find_categorical_summary_spec (t->axes[a]));
6389 if (n_summaries > 1)
6391 msg (SE, _("Summaries may appear only on one axis."));
6392 if (summaries[PIVOT_AXIS_ROW])
6393 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
6394 _("This variable on the rows axis has a summary."));
6395 if (summaries[PIVOT_AXIS_COLUMN])
6396 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
6397 _("This variable on the columns axis has a summary."));
6398 if (summaries[PIVOT_AXIS_LAYER])
6399 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
6400 _("This variable on the layers axis has a summary."));
6403 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6404 if (n_summaries ? summaries[a] : t->axes[a])
6406 t->summary_axis = a;
6410 if (lex_token (lexer) == T_ENDCMD)
6412 if (!ctables_prepare_table (t))
6416 if (!lex_force_match (lexer, T_SLASH))
6419 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6421 if (lex_match_id (lexer, "SLABELS"))
6423 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6425 if (lex_match_id (lexer, "POSITION"))
6427 lex_match (lexer, T_EQUALS);
6428 if (lex_match_id (lexer, "COLUMN"))
6429 t->slabels_axis = PIVOT_AXIS_COLUMN;
6430 else if (lex_match_id (lexer, "ROW"))
6431 t->slabels_axis = PIVOT_AXIS_ROW;
6432 else if (lex_match_id (lexer, "LAYER"))
6433 t->slabels_axis = PIVOT_AXIS_LAYER;
6436 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6440 else if (lex_match_id (lexer, "VISIBLE"))
6442 lex_match (lexer, T_EQUALS);
6443 if (!parse_bool (lexer, &t->slabels_visible))
6448 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6453 else if (lex_match_id (lexer, "CLABELS"))
6455 if (lex_match_id (lexer, "AUTO"))
6457 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6458 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6460 else if (lex_match_id (lexer, "ROWLABELS"))
6462 lex_match (lexer, T_EQUALS);
6463 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6464 if (lex_match_id (lexer, "OPPOSITE"))
6465 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6466 else if (lex_match_id (lexer, "LAYER"))
6467 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6470 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6474 else if (lex_match_id (lexer, "COLLABELS"))
6476 lex_match (lexer, T_EQUALS);
6477 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6478 if (lex_match_id (lexer, "OPPOSITE"))
6479 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6480 else if (lex_match_id (lexer, "LAYER"))
6481 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6484 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6490 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6495 else if (lex_match_id (lexer, "CRITERIA"))
6497 if (!lex_force_match_id (lexer, "CILEVEL"))
6499 lex_match (lexer, T_EQUALS);
6501 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6503 t->cilevel = lex_number (lexer);
6506 else if (lex_match_id (lexer, "CATEGORIES"))
6508 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6512 else if (lex_match_id (lexer, "TITLES"))
6517 if (lex_match_id (lexer, "CAPTION"))
6518 textp = &t->caption;
6519 else if (lex_match_id (lexer, "CORNER"))
6521 else if (lex_match_id (lexer, "TITLE"))
6525 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6528 lex_match (lexer, T_EQUALS);
6530 struct string s = DS_EMPTY_INITIALIZER;
6531 while (lex_is_string (lexer))
6533 if (!ds_is_empty (&s))
6534 ds_put_byte (&s, ' ');
6535 put_title_text (&s, lex_tokss (lexer), now,
6536 lexer, dataset_dict (ds),
6537 expr_start, expr_end);
6541 *textp = ds_steal_cstr (&s);
6543 while (lex_token (lexer) != T_SLASH
6544 && lex_token (lexer) != T_ENDCMD);
6546 else if (lex_match_id (lexer, "SIGTEST"))
6550 t->chisq = xmalloc (sizeof *t->chisq);
6551 *t->chisq = (struct ctables_chisq) {
6553 .include_mrsets = true,
6554 .all_visible = true,
6560 if (lex_match_id (lexer, "TYPE"))
6562 lex_match (lexer, T_EQUALS);
6563 if (!lex_force_match_id (lexer, "CHISQUARE"))
6566 else if (lex_match_id (lexer, "ALPHA"))
6568 lex_match (lexer, T_EQUALS);
6569 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6571 t->chisq->alpha = lex_number (lexer);
6574 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6576 lex_match (lexer, T_EQUALS);
6577 if (parse_bool (lexer, &t->chisq->include_mrsets))
6580 else if (lex_match_id (lexer, "CATEGORIES"))
6582 lex_match (lexer, T_EQUALS);
6583 if (lex_match_id (lexer, "ALLVISIBLE"))
6584 t->chisq->all_visible = true;
6585 else if (lex_match_id (lexer, "SUBTOTALS"))
6586 t->chisq->all_visible = false;
6589 lex_error_expecting (lexer,
6590 "ALLVISIBLE", "SUBTOTALS");
6596 lex_error_expecting (lexer, "TYPE", "ALPHA",
6597 "INCLUDEMRSETS", "CATEGORIES");
6601 while (lex_token (lexer) != T_SLASH
6602 && lex_token (lexer) != T_ENDCMD);
6604 else if (lex_match_id (lexer, "COMPARETEST"))
6608 t->pairwise = xmalloc (sizeof *t->pairwise);
6609 *t->pairwise = (struct ctables_pairwise) {
6611 .alpha = { .05, .05 },
6612 .adjust = BONFERRONI,
6613 .include_mrsets = true,
6614 .meansvariance_allcats = true,
6615 .all_visible = true,
6624 if (lex_match_id (lexer, "TYPE"))
6626 lex_match (lexer, T_EQUALS);
6627 if (lex_match_id (lexer, "PROP"))
6628 t->pairwise->type = PROP;
6629 else if (lex_match_id (lexer, "MEAN"))
6630 t->pairwise->type = MEAN;
6633 lex_error_expecting (lexer, "PROP", "MEAN");
6637 else if (lex_match_id (lexer, "ALPHA"))
6639 lex_match (lexer, T_EQUALS);
6641 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6643 double a0 = lex_number (lexer);
6646 lex_match (lexer, T_COMMA);
6647 if (lex_is_number (lexer))
6649 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6651 double a1 = lex_number (lexer);
6654 t->pairwise->alpha[0] = MIN (a0, a1);
6655 t->pairwise->alpha[1] = MAX (a0, a1);
6658 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6660 else if (lex_match_id (lexer, "ADJUST"))
6662 lex_match (lexer, T_EQUALS);
6663 if (lex_match_id (lexer, "BONFERRONI"))
6664 t->pairwise->adjust = BONFERRONI;
6665 else if (lex_match_id (lexer, "BH"))
6666 t->pairwise->adjust = BH;
6667 else if (lex_match_id (lexer, "NONE"))
6668 t->pairwise->adjust = 0;
6671 lex_error_expecting (lexer, "BONFERRONI", "BH",
6676 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6678 lex_match (lexer, T_EQUALS);
6679 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6682 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6684 lex_match (lexer, T_EQUALS);
6685 if (lex_match_id (lexer, "ALLCATS"))
6686 t->pairwise->meansvariance_allcats = true;
6687 else if (lex_match_id (lexer, "TESTEDCATS"))
6688 t->pairwise->meansvariance_allcats = false;
6691 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6695 else if (lex_match_id (lexer, "CATEGORIES"))
6697 lex_match (lexer, T_EQUALS);
6698 if (lex_match_id (lexer, "ALLVISIBLE"))
6699 t->pairwise->all_visible = true;
6700 else if (lex_match_id (lexer, "SUBTOTALS"))
6701 t->pairwise->all_visible = false;
6704 lex_error_expecting (lexer, "ALLVISIBLE",
6709 else if (lex_match_id (lexer, "MERGE"))
6711 lex_match (lexer, T_EQUALS);
6712 if (!parse_bool (lexer, &t->pairwise->merge))
6715 else if (lex_match_id (lexer, "STYLE"))
6717 lex_match (lexer, T_EQUALS);
6718 if (lex_match_id (lexer, "APA"))
6719 t->pairwise->apa_style = true;
6720 else if (lex_match_id (lexer, "SIMPLE"))
6721 t->pairwise->apa_style = false;
6724 lex_error_expecting (lexer, "APA", "SIMPLE");
6728 else if (lex_match_id (lexer, "SHOWSIG"))
6730 lex_match (lexer, T_EQUALS);
6731 if (!parse_bool (lexer, &t->pairwise->show_sig))
6736 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6737 "INCLUDEMRSETS", "MEANSVARIANCE",
6738 "CATEGORIES", "MERGE", "STYLE",
6743 while (lex_token (lexer) != T_SLASH
6744 && lex_token (lexer) != T_ENDCMD);
6748 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6749 "CRITERIA", "CATEGORIES", "TITLES",
6750 "SIGTEST", "COMPARETEST");
6754 if (!lex_match (lexer, T_SLASH))
6758 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6759 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6761 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6765 if (!ctables_prepare_table (t))
6768 while (lex_token (lexer) != T_ENDCMD);
6771 input = proc_open (ds);
6772 bool ok = ctables_execute (ds, input, ct);
6773 ok = proc_commit (ds) && ok;
6775 ctables_destroy (ct);
6776 return ok ? CMD_SUCCESS : CMD_FAILURE;
6781 ctables_destroy (ct);