1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 /* All variables. */ \
74 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
75 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
76 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
94 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
95 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
96 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
98 /* All variables (unweighted.) */ \
99 S(CTSF_UCOUNT, "UCOUNT", N_("Unweighted Count"), CTF_COUNT, CTFA_ALL) \
100 S(CTSF_UROWPCT_COUNT, "UROWPCT.COUNT", N_("Unweighted Row %"), CTF_PERCENT, CTFA_ALL) \
101 S(CTSF_UCOLPCT_COUNT, "UCOLPCT.COUNT", N_("Unweighted Column %"), CTF_PERCENT, CTFA_ALL) \
102 S(CTSF_UTABLEPCT_COUNT, "UTABLEPCT.COUNT", N_("Unweighted Table %"), CTF_PERCENT, CTFA_ALL) \
103 S(CTSF_USUBTABLEPCT_COUNT, "USUBTABLEPCT.COUNT", N_("Unweighted Subtable %"), CTF_PERCENT, CTFA_ALL) \
104 S(CTSF_ULAYERPCT_COUNT, "ULAYERPCT.COUNT", N_("Unweighted Layer %"), CTF_PERCENT, CTFA_ALL) \
105 S(CTSF_ULAYERROWPCT_COUNT, "ULAYERROWPCT.COUNT", N_("Unweighted Layer Row %"), CTF_PERCENT, CTFA_ALL) \
106 S(CTSF_ULAYERCOLPCT_COUNT, "ULAYERCOLPCT.COUNT", N_("Unweighted Layer Column %"), CTF_PERCENT, CTFA_ALL) \
107 S(CTSF_UROWPCT_VALIDN, "UROWPCT.VALIDN", N_("Unweighted Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
108 S(CTSF_UCOLPCT_VALIDN, "UCOLPCT.VALIDN", N_("Unweighted Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
109 S(CTSF_UTABLEPCT_VALIDN, "UTABLEPCT.VALIDN", N_("Unweighted Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
110 S(CTSF_USUBTABLEPCT_VALIDN, "USUBTABLEPCT.VALIDN", N_("Unweighted Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
111 S(CTSF_ULAYERPCT_VALIDN, "ULAYERPCT.VALIDN", N_("Unweighted Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
112 S(CTSF_ULAYERROWPCT_VALIDN, "ULAYERROWPCT.VALIDN", N_("Unweighted Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
113 S(CTSF_ULAYERCOLPCT_VALIDN, "ULAYERCOLPCT.VALIDN", N_("Unweighted Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
114 S(CTSF_UROWPCT_TOTALN, "UROWPCT.TOTALN", N_("Unweighted Row Total N %"), CTF_PERCENT, CTFA_ALL) \
115 S(CTSF_UCOLPCT_TOTALN, "UCOLPCT.TOTALN", N_("Unweighted Column Total N %"), CTF_PERCENT, CTFA_ALL) \
116 S(CTSF_UTABLEPCT_TOTALN, "UTABLEPCT.TOTALN", N_("Unweighted Table Total N %"), CTF_PERCENT, CTFA_ALL) \
117 S(CTSF_USUBTABLEPCT_TOTALN, "USUBTABLEPCT.TOTALN", N_("Unweighted Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
118 S(CTSF_ULAYERPCT_TOTALN, "ULAYERPCT.TOTALN", N_("Unweighted Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
119 S(CTSF_ULAYERROWPCT_TOTALN, "ULAYERROWPCT.TOTALN", N_("Unweighted Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
120 S(CTSF_ULAYERCOLPCT_TOTALN, "ULAYERCOLPCT.TOTALN", N_("Unweighted Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
122 /* Scale variables, totals, and subtotals. */ \
123 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
124 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
125 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
126 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
127 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
128 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
129 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
130 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
131 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
132 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
133 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
134 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
135 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
136 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
137 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
138 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
139 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
140 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
141 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
142 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
143 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
144 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
145 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
147 /* Scale variables, totals, and subtotals (unweighted). */ \
148 S(CTSF_UMEAN, "UMEAN", N_("Unweighted Mean"), CTF_GENERAL, CTFA_SCALE) \
149 S(CTSF_UMEDIAN, "UMEDIAN", N_("Unweighted Median"), CTF_GENERAL, CTFA_SCALE) \
150 S(CTSF_UMISSING, "UMISSING", N_("Unweighted Missing"), CTF_GENERAL, CTFA_SCALE) \
151 S(CTSF_UMODE, "UMODE", N_("Unweighted Mode"), CTF_GENERAL, CTFA_SCALE) \
152 S(CTSF_UPTILE, "UPTILE", N_("Unweighted Percentile"), CTF_GENERAL, CTFA_SCALE) \
153 S(CTSF_USEMEAN, "USEMEAN", N_("Unweighted Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
154 S(CTSF_USTDDEV, "USTDDEV", N_("Unweighted Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
155 S(CTSF_USUM, "USUM", N_("Unweighted Sum"), CTF_GENERAL, CTFA_SCALE) \
156 S(CSTF_UTOTALN, "UTOTALN", N_("Unweighted Total N"), CTF_COUNT, CTFA_SCALE) \
157 S(CTSF_UVALIDN, "UVALIDN", N_("Unweighted Valid N"), CTF_COUNT, CTFA_SCALE) \
158 S(CTSF_UVARIANCE, "UVARIANCE", N_("Unweighted Variance"), CTF_GENERAL, CTFA_SCALE) \
159 S(CTSF_UROWPCT_SUM, "UROWPCT.SUM", N_("Unweighted Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
160 S(CTSF_UCOLPCT_SUM, "UCOLPCT.SUM", N_("Unweighted Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
161 S(CTSF_UTABLEPCT_SUM, "UTABLEPCT.SUM", N_("Unweighted Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
162 S(CTSF_USUBTABLEPCT_SUM, "USUBTABLEPCT.SUM", N_("Unweighted Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
163 S(CTSF_ULAYERPCT_SUM, "ULAYERPCT.SUM", N_("Unweighted Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
164 S(CTSF_ULAYERROWPCT_SUM, "ULAYERROWPCT.SUM", N_("Unweighted Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
165 S(CTSF_ULAYERCOLPCT_SUM, "ULAYERCOLPCT.SUM", N_("Unweighted Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
167 #if 0 /* Multiple response sets not yet implemented. */
168 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
169 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
170 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
171 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
172 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
173 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
174 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
175 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
176 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
177 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
178 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
179 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
180 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
181 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
182 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
183 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
184 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
185 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
186 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
187 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
188 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
189 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
192 enum ctables_summary_function
194 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
200 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
201 N_CTSF_FUNCTIONS = SUMMARIES
205 static bool ctables_summary_function_is_count (enum ctables_summary_function);
207 enum ctables_domain_type
209 /* Within a section, where stacked variables divide one section from
211 CTDT_TABLE, /* All layers of a whole section. */
212 CTDT_LAYER, /* One layer within a section. */
213 CTDT_LAYERROW, /* Row in one layer within a section. */
214 CTDT_LAYERCOL, /* Column in one layer within a section. */
216 /* Within a subtable, where a subtable pairs an innermost row variable with
217 an innermost column variable within a single layer. */
218 CTDT_SUBTABLE, /* Whole subtable. */
219 CTDT_ROW, /* Row within a subtable. */
220 CTDT_COL, /* Column within a subtable. */
224 struct ctables_domain
226 struct hmap_node node;
228 const struct ctables_cell *example;
230 double d_valid; /* Dictionary weight. */
233 double e_valid; /* Effective weight */
236 double u_valid; /* Unweighted. */
239 struct ctables_sum *sums;
248 enum ctables_summary_variant
257 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
258 all the axes (except the scalar variable, if any). */
259 struct hmap_node node;
261 /* The domains that contain this cell. */
262 uint32_t omit_domains;
263 struct ctables_domain *domains[N_CTDTS];
268 enum ctables_summary_variant sv;
270 struct ctables_cell_axis
272 struct ctables_cell_value
274 const struct ctables_category *category;
282 union ctables_summary *summaries;
289 const struct dictionary *dict;
290 struct pivot_table_look *look;
292 /* CTABLES has a number of extra formats that we implement via custom
293 currency specifications on an alternate fmt_settings. */
294 #define CTEF_NEGPAREN FMT_CCA
295 #define CTEF_NEQUAL FMT_CCB
296 #define CTEF_PAREN FMT_CCC
297 #define CTEF_PCTPAREN FMT_CCD
298 struct fmt_settings ctables_formats;
300 /* If this is NULL, zeros are displayed using the normal print format.
301 Otherwise, this string is displayed. */
304 /* If this is NULL, missing values are displayed using the normal print
305 format. Otherwise, this string is displayed. */
308 /* Indexed by variable dictionary index. */
309 enum ctables_vlabel *vlabels;
311 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
313 bool mrsets_count_duplicates; /* MRSETS. */
314 bool smissing_listwise; /* SMISSING. */
315 struct variable *e_weight; /* WEIGHT. */
316 int hide_threshold; /* HIDESMALLCOUNTS. */
318 struct ctables_table **tables;
322 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
325 struct ctables_postcompute
327 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
328 char *name; /* Name, without leading &. */
330 struct msg_location *location; /* Location of definition. */
331 struct ctables_pcexpr *expr;
333 struct ctables_summary_spec_set *specs;
334 bool hide_source_cats;
337 struct ctables_pcexpr
347 enum ctables_postcompute_op
350 CTPO_CONSTANT, /* 5 */
351 CTPO_CAT_NUMBER, /* [5] */
352 CTPO_CAT_STRING, /* ["STRING"] */
353 CTPO_CAT_NRANGE, /* [LO THRU 5] */
354 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
355 CTPO_CAT_MISSING, /* MISSING */
356 CTPO_CAT_OTHERNM, /* OTHERNM */
357 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
358 CTPO_CAT_TOTAL, /* TOTAL */
372 /* CTPO_CAT_NUMBER. */
375 /* CTPO_CAT_STRING, in dictionary encoding. */
376 struct substring string;
378 /* CTPO_CAT_NRANGE. */
381 /* CTPO_CAT_SRANGE. */
382 struct substring srange[2];
384 /* CTPO_CAT_SUBTOTAL. */
385 size_t subtotal_index;
387 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
388 One element: CTPO_NEG. */
389 struct ctables_pcexpr *subs[2];
392 /* Source location. */
393 struct msg_location *location;
396 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
397 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
398 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
399 struct ctables_pcexpr *sub1);
401 struct ctables_summary_spec_set
403 struct ctables_summary_spec *specs;
407 /* The variable to which the summary specs are applied. */
408 struct variable *var;
410 /* Whether the variable to which the summary specs are applied is a scale
411 variable for the purpose of summarization.
413 (VALIDN and TOTALN act differently for summarizing scale and categorical
417 /* If any of these optional additional scale variables are missing, then
418 treat 'var' as if it's missing too. This is for implementing
419 SMISSING=LISTWISE. */
420 struct variable **listwise_vars;
421 size_t n_listwise_vars;
424 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
425 const struct ctables_summary_spec_set *);
426 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
428 /* A nested sequence of variables, e.g. a > b > c. */
431 struct variable **vars;
434 size_t *domains[N_CTDTS];
435 size_t n_domains[N_CTDTS];
438 struct ctables_summary_spec_set specs[N_CSVS];
441 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
444 struct ctables_nest *nests;
448 static void ctables_stack_uninit (struct ctables_stack *);
452 struct hmap_node node;
457 struct ctables_occurrence
459 struct hmap_node node;
463 struct ctables_section
466 struct ctables_table *table;
467 struct ctables_nest *nests[PIVOT_N_AXES];
470 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
471 struct hmap cells; /* Contains "struct ctables_cell"s. */
472 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
475 static void ctables_section_uninit (struct ctables_section *);
479 struct ctables *ctables;
480 struct ctables_axis *axes[PIVOT_N_AXES];
481 struct ctables_stack stacks[PIVOT_N_AXES];
482 struct ctables_section *sections;
484 enum pivot_axis_type summary_axis;
485 struct ctables_summary_spec_set summary_specs;
486 struct variable **sum_vars;
489 enum pivot_axis_type slabels_axis;
490 bool slabels_visible;
492 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
494 Most commonly, label_axis[a] == a, and in particular we always have
495 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
497 If ROWLABELS or COLLABELS is specified, then one of
498 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
499 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
501 If any category labels are moved, then 'clabels_example' is one of the
502 variables being moved (and it is otherwise NULL). All of the variables
503 being moved have the same width, value labels, and categories, so this
504 example variable can be used to find those out.
506 The remaining members in this group are relevant only if category labels
509 'clabels_values_map' holds a "struct ctables_value" for all the values
510 that appear in all of the variables in the moved categories. It is
511 accumulated as the data is read. Once the data is fully read, its
512 sorted values are put into 'clabels_values' and 'n_clabels_values'.
514 enum pivot_axis_type label_axis[PIVOT_N_AXES];
515 enum pivot_axis_type clabels_from_axis;
516 const struct variable *clabels_example;
517 struct hmap clabels_values_map;
518 struct ctables_value **clabels_values;
519 size_t n_clabels_values;
521 /* Indexed by variable dictionary index. */
522 struct ctables_categories **categories;
531 struct ctables_chisq *chisq;
532 struct ctables_pairwise *pairwise;
535 struct ctables_categories
538 struct ctables_category *cats;
543 struct ctables_category
545 enum ctables_category_type
547 /* Explicit category lists. */
550 CCT_NRANGE, /* Numerical range. */
551 CCT_SRANGE, /* String range. */
556 /* Totals and subtotals. */
560 /* Implicit category lists. */
565 /* For contributing to TOTALN. */
566 CCT_EXCLUDED_MISSING,
570 struct ctables_category *subtotal;
576 double number; /* CCT_NUMBER. */
577 struct substring string; /* CCT_STRING, in dictionary encoding. */
578 double nrange[2]; /* CCT_NRANGE. */
579 struct substring srange[2]; /* CCT_SRANGE. */
583 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
584 bool hide_subcategories; /* CCT_SUBTOTAL. */
587 /* CCT_POSTCOMPUTE. */
590 const struct ctables_postcompute *pc;
591 enum fmt_type parse_format;
594 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
597 bool include_missing;
601 enum ctables_summary_function sort_function;
602 struct variable *sort_var;
607 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
608 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
609 struct msg_location *location;
613 ctables_category_uninit (struct ctables_category *cat)
618 msg_location_destroy (cat->location);
625 case CCT_POSTCOMPUTE:
629 ss_dealloc (&cat->string);
633 ss_dealloc (&cat->srange[0]);
634 ss_dealloc (&cat->srange[1]);
639 free (cat->total_label);
647 case CCT_EXCLUDED_MISSING:
653 nullable_substring_equal (const struct substring *a,
654 const struct substring *b)
656 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
660 ctables_category_equal (const struct ctables_category *a,
661 const struct ctables_category *b)
663 if (a->type != b->type)
669 return a->number == b->number;
672 return ss_equals (a->string, b->string);
675 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
678 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
679 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
685 case CCT_POSTCOMPUTE:
686 return a->pc == b->pc;
690 return !strcmp (a->total_label, b->total_label);
695 return (a->include_missing == b->include_missing
696 && a->sort_ascending == b->sort_ascending
697 && a->sort_function == b->sort_function
698 && a->sort_var == b->sort_var
699 && a->percentile == b->percentile);
701 case CCT_EXCLUDED_MISSING:
709 ctables_categories_unref (struct ctables_categories *c)
714 assert (c->n_refs > 0);
718 for (size_t i = 0; i < c->n_cats; i++)
719 ctables_category_uninit (&c->cats[i]);
725 ctables_categories_equal (const struct ctables_categories *a,
726 const struct ctables_categories *b)
728 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
731 for (size_t i = 0; i < a->n_cats; i++)
732 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
738 /* Chi-square test (SIGTEST). */
746 /* Pairwise comparison test (COMPARETEST). */
747 struct ctables_pairwise
749 enum { PROP, MEAN } type;
752 bool meansvariance_allcats;
754 enum { BONFERRONI = 1, BH } adjust;
778 struct variable *var;
780 struct ctables_summary_spec_set specs[N_CSVS];
784 struct ctables_axis *subs[2];
787 struct msg_location *loc;
790 static void ctables_axis_destroy (struct ctables_axis *);
799 enum ctables_function_availability
801 CTFA_ALL, /* Any variables. */
802 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
803 //CTFA_MRSETS, /* Only multiple-response sets */
806 struct ctables_summary_spec
808 enum ctables_summary_function function;
809 double percentile; /* CTSF_PTILE only. */
812 struct fmt_spec format;
813 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
820 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
821 const struct ctables_summary_spec *src)
824 dst->label = xstrdup_if_nonnull (src->label);
828 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
835 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
836 const struct ctables_summary_spec_set *src)
838 struct ctables_summary_spec *specs
839 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
840 for (size_t i = 0; i < src->n; i++)
841 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
843 *dst = (struct ctables_summary_spec_set) {
848 .is_scale = src->is_scale,
853 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
855 for (size_t i = 0; i < set->n; i++)
856 ctables_summary_spec_uninit (&set->specs[i]);
857 free (set->listwise_vars);
862 parse_col_width (struct lexer *lexer, const char *name, double *width)
864 lex_match (lexer, T_EQUALS);
865 if (lex_match_id (lexer, "DEFAULT"))
867 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
869 *width = lex_number (lexer);
879 parse_bool (struct lexer *lexer, bool *b)
881 if (lex_match_id (lexer, "NO"))
883 else if (lex_match_id (lexer, "YES"))
887 lex_error_expecting (lexer, "YES", "NO");
893 static enum ctables_function_availability
894 ctables_function_availability (enum ctables_summary_function f)
896 static enum ctables_function_availability availability[] = {
897 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
902 return availability[f];
906 ctables_summary_function_is_count (enum ctables_summary_function f)
912 case CTSF_ROWPCT_COUNT:
913 case CTSF_COLPCT_COUNT:
914 case CTSF_TABLEPCT_COUNT:
915 case CTSF_SUBTABLEPCT_COUNT:
916 case CTSF_LAYERPCT_COUNT:
917 case CTSF_LAYERROWPCT_COUNT:
918 case CTSF_LAYERCOLPCT_COUNT:
920 case CTSF_UROWPCT_COUNT:
921 case CTSF_UCOLPCT_COUNT:
922 case CTSF_UTABLEPCT_COUNT:
923 case CTSF_USUBTABLEPCT_COUNT:
924 case CTSF_ULAYERPCT_COUNT:
925 case CTSF_ULAYERROWPCT_COUNT:
926 case CTSF_ULAYERCOLPCT_COUNT:
929 case CTSF_ROWPCT_VALIDN:
930 case CTSF_COLPCT_VALIDN:
931 case CTSF_TABLEPCT_VALIDN:
932 case CTSF_SUBTABLEPCT_VALIDN:
933 case CTSF_LAYERPCT_VALIDN:
934 case CTSF_LAYERROWPCT_VALIDN:
935 case CTSF_LAYERCOLPCT_VALIDN:
936 case CTSF_ROWPCT_TOTALN:
937 case CTSF_COLPCT_TOTALN:
938 case CTSF_TABLEPCT_TOTALN:
939 case CTSF_SUBTABLEPCT_TOTALN:
940 case CTSF_LAYERPCT_TOTALN:
941 case CTSF_LAYERROWPCT_TOTALN:
942 case CTSF_LAYERCOLPCT_TOTALN:
959 case CTSF_ROWPCT_SUM:
960 case CTSF_COLPCT_SUM:
961 case CTSF_TABLEPCT_SUM:
962 case CTSF_SUBTABLEPCT_SUM:
963 case CTSF_LAYERPCT_SUM:
964 case CTSF_LAYERROWPCT_SUM:
965 case CTSF_LAYERCOLPCT_SUM:
966 case CTSF_UROWPCT_VALIDN:
967 case CTSF_UCOLPCT_VALIDN:
968 case CTSF_UTABLEPCT_VALIDN:
969 case CTSF_USUBTABLEPCT_VALIDN:
970 case CTSF_ULAYERPCT_VALIDN:
971 case CTSF_ULAYERROWPCT_VALIDN:
972 case CTSF_ULAYERCOLPCT_VALIDN:
973 case CTSF_UROWPCT_TOTALN:
974 case CTSF_UCOLPCT_TOTALN:
975 case CTSF_UTABLEPCT_TOTALN:
976 case CTSF_USUBTABLEPCT_TOTALN:
977 case CTSF_ULAYERPCT_TOTALN:
978 case CTSF_ULAYERROWPCT_TOTALN:
979 case CTSF_ULAYERCOLPCT_TOTALN:
991 case CTSF_UROWPCT_SUM:
992 case CTSF_UCOLPCT_SUM:
993 case CTSF_UTABLEPCT_SUM:
994 case CTSF_USUBTABLEPCT_SUM:
995 case CTSF_ULAYERPCT_SUM:
996 case CTSF_ULAYERROWPCT_SUM:
997 case CTSF_ULAYERCOLPCT_SUM:
1005 parse_ctables_summary_function (struct lexer *lexer,
1006 enum ctables_summary_function *f)
1010 enum ctables_summary_function function;
1011 struct substring name;
1013 static struct pair names[] = {
1014 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
1015 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
1018 /* The .COUNT suffix may be omitted. */
1019 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
1020 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
1021 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
1022 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
1023 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
1024 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
1025 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
1029 if (!lex_force_id (lexer))
1032 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
1033 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
1035 *f = names[i].function;
1040 lex_error (lexer, _("Expecting summary function name."));
1045 ctables_axis_destroy (struct ctables_axis *axis)
1053 for (size_t i = 0; i < N_CSVS; i++)
1054 ctables_summary_spec_set_uninit (&axis->specs[i]);
1059 ctables_axis_destroy (axis->subs[0]);
1060 ctables_axis_destroy (axis->subs[1]);
1063 msg_location_destroy (axis->loc);
1067 static struct ctables_axis *
1068 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1069 struct ctables_axis *sub0,
1070 struct ctables_axis *sub1,
1071 struct lexer *lexer, int start_ofs)
1073 struct ctables_axis *axis = xmalloc (sizeof *axis);
1074 *axis = (struct ctables_axis) {
1076 .subs = { sub0, sub1 },
1077 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1082 struct ctables_axis_parse_ctx
1084 struct lexer *lexer;
1085 struct dictionary *dict;
1087 struct ctables_table *t;
1090 static struct fmt_spec
1091 ctables_summary_default_format (enum ctables_summary_function function,
1092 const struct variable *var)
1094 static const enum ctables_format default_formats[] = {
1095 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
1099 switch (default_formats[function])
1102 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
1105 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
1108 return *var_get_print_format (var);
1115 static struct pivot_value *
1116 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1120 static const char *default_labels[] = {
1121 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
1126 return (spec->function == CTSF_PTILE
1127 ? pivot_value_new_text_format (N_("Percentile %.2f"),
1129 : pivot_value_new_text (default_labels[spec->function]));
1133 struct substring in = ss_cstr (spec->label);
1134 struct substring target = ss_cstr (")CILEVEL");
1136 struct string out = DS_EMPTY_INITIALIZER;
1139 size_t chunk = ss_find_substring (in, target);
1140 ds_put_substring (&out, ss_head (in, chunk));
1141 ss_advance (&in, chunk);
1143 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1145 ss_advance (&in, target.length);
1146 ds_put_format (&out, "%g", cilevel);
1152 ctables_summary_function_name (enum ctables_summary_function function)
1154 static const char *names[] = {
1155 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1159 return names[function];
1163 add_summary_spec (struct ctables_axis *axis,
1164 enum ctables_summary_function function, double percentile,
1165 const char *label, const struct fmt_spec *format,
1166 bool is_ctables_format, const struct msg_location *loc,
1167 enum ctables_summary_variant sv)
1169 if (axis->op == CTAO_VAR)
1171 const char *function_name = ctables_summary_function_name (function);
1172 const char *var_name = var_get_name (axis->var);
1173 switch (ctables_function_availability (function))
1177 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1178 "response sets."), function_name);
1179 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1185 if (!axis->scale && sv != CSV_TOTAL)
1188 _("Summary function %s applies only to scale variables."),
1190 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1200 struct ctables_summary_spec_set *set = &axis->specs[sv];
1201 if (set->n >= set->allocated)
1202 set->specs = x2nrealloc (set->specs, &set->allocated,
1203 sizeof *set->specs);
1205 struct ctables_summary_spec *dst = &set->specs[set->n++];
1206 *dst = (struct ctables_summary_spec) {
1207 .function = function,
1208 .percentile = percentile,
1209 .label = xstrdup_if_nonnull (label),
1210 .format = (format ? *format
1211 : ctables_summary_default_format (function, axis->var)),
1212 .is_ctables_format = is_ctables_format,
1218 for (size_t i = 0; i < 2; i++)
1219 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1220 format, is_ctables_format, loc, sv))
1226 static struct ctables_axis *ctables_axis_parse_stack (
1227 struct ctables_axis_parse_ctx *);
1230 static struct ctables_axis *
1231 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1233 if (lex_match (ctx->lexer, T_LPAREN))
1235 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1236 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1238 ctables_axis_destroy (sub);
1244 if (!lex_force_id (ctx->lexer))
1247 int start_ofs = lex_ofs (ctx->lexer);
1248 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1252 struct ctables_axis *axis = xmalloc (sizeof *axis);
1253 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1255 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1256 : lex_match_phrase (ctx->lexer, "[C]") ? false
1257 : var_get_measure (var) == MEASURE_SCALE);
1258 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1259 lex_ofs (ctx->lexer) - 1);
1260 if (axis->scale && var_is_alpha (var))
1262 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1264 var_get_name (var));
1265 ctables_axis_destroy (axis);
1273 has_digit (const char *s)
1275 return s[strcspn (s, "0123456789")] != '\0';
1279 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1280 bool *is_ctables_format)
1282 char type[FMT_TYPE_LEN_MAX + 1];
1283 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1286 if (!strcasecmp (type, "NEGPAREN"))
1287 format->type = CTEF_NEGPAREN;
1288 else if (!strcasecmp (type, "NEQUAL"))
1289 format->type = CTEF_NEQUAL;
1290 else if (!strcasecmp (type, "PAREN"))
1291 format->type = CTEF_PAREN;
1292 else if (!strcasecmp (type, "PCTPAREN"))
1293 format->type = CTEF_PCTPAREN;
1296 *is_ctables_format = false;
1297 return (parse_format_specifier (lexer, format)
1298 && fmt_check_output (format)
1299 && fmt_check_type_compat (format, VAL_NUMERIC));
1305 lex_next_error (lexer, -1, -1,
1306 _("Output format %s requires width 2 or greater."), type);
1309 else if (format->d > format->w - 1)
1311 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1312 "greater than decimals."), type);
1317 *is_ctables_format = true;
1322 static struct ctables_axis *
1323 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1325 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1326 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1329 enum ctables_summary_variant sv = CSV_CELL;
1332 int start_ofs = lex_ofs (ctx->lexer);
1334 /* Parse function. */
1335 enum ctables_summary_function function;
1336 if (!parse_ctables_summary_function (ctx->lexer, &function))
1339 /* Parse percentile. */
1340 double percentile = 0;
1341 if (function == CTSF_PTILE)
1343 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1345 percentile = lex_number (ctx->lexer);
1346 lex_get (ctx->lexer);
1351 if (lex_is_string (ctx->lexer))
1353 label = ss_xstrdup (lex_tokss (ctx->lexer));
1354 lex_get (ctx->lexer);
1358 struct fmt_spec format;
1359 const struct fmt_spec *formatp;
1360 bool is_ctables_format = false;
1361 if (lex_token (ctx->lexer) == T_ID
1362 && has_digit (lex_tokcstr (ctx->lexer)))
1364 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1365 &is_ctables_format))
1375 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1376 lex_ofs (ctx->lexer) - 1);
1377 add_summary_spec (sub, function, percentile, label, formatp,
1378 is_ctables_format, loc, sv);
1380 msg_location_destroy (loc);
1382 lex_match (ctx->lexer, T_COMMA);
1383 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1385 if (!lex_force_match (ctx->lexer, T_LBRACK))
1389 else if (lex_match (ctx->lexer, T_RBRACK))
1391 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1398 ctables_axis_destroy (sub);
1402 static const struct ctables_axis *
1403 find_scale (const struct ctables_axis *axis)
1407 else if (axis->op == CTAO_VAR)
1408 return axis->scale ? axis : NULL;
1411 for (size_t i = 0; i < 2; i++)
1413 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1421 static const struct ctables_axis *
1422 find_categorical_summary_spec (const struct ctables_axis *axis)
1426 else if (axis->op == CTAO_VAR)
1427 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1430 for (size_t i = 0; i < 2; i++)
1432 const struct ctables_axis *sum
1433 = find_categorical_summary_spec (axis->subs[i]);
1441 static struct ctables_axis *
1442 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1444 int start_ofs = lex_ofs (ctx->lexer);
1445 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1449 while (lex_match (ctx->lexer, T_GT))
1451 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1455 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1456 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1458 const struct ctables_axis *outer_scale = find_scale (lhs);
1459 const struct ctables_axis *inner_scale = find_scale (rhs);
1460 if (outer_scale && inner_scale)
1462 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1463 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1464 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1465 ctables_axis_destroy (nest);
1469 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1472 msg_at (SE, nest->loc,
1473 _("Summaries may only be requested for categorical variables "
1474 "at the innermost nesting level."));
1475 msg_at (SN, outer_sum->loc,
1476 _("This outer categorical variable has a summary."));
1477 ctables_axis_destroy (nest);
1487 static struct ctables_axis *
1488 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1490 int start_ofs = lex_ofs (ctx->lexer);
1491 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1495 while (lex_match (ctx->lexer, T_PLUS))
1497 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1501 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1502 ctx->lexer, start_ofs);
1509 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1510 struct ctables *ct, struct ctables_table *t,
1511 enum pivot_axis_type a)
1513 if (lex_token (lexer) == T_BY
1514 || lex_token (lexer) == T_SLASH
1515 || lex_token (lexer) == T_ENDCMD)
1518 struct ctables_axis_parse_ctx ctx = {
1524 t->axes[a] = ctables_axis_parse_stack (&ctx);
1525 return t->axes[a] != NULL;
1529 ctables_chisq_destroy (struct ctables_chisq *chisq)
1535 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1541 ctables_table_destroy (struct ctables_table *t)
1546 for (size_t i = 0; i < t->n_sections; i++)
1547 ctables_section_uninit (&t->sections[i]);
1550 for (size_t i = 0; i < t->n_categories; i++)
1551 ctables_categories_unref (t->categories[i]);
1552 free (t->categories);
1554 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1556 ctables_axis_destroy (t->axes[a]);
1557 ctables_stack_uninit (&t->stacks[a]);
1559 free (t->summary_specs.specs);
1561 struct ctables_value *ctv, *next_ctv;
1562 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1563 &t->clabels_values_map)
1565 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1566 hmap_delete (&t->clabels_values_map, &ctv->node);
1569 hmap_destroy (&t->clabels_values_map);
1570 free (t->clabels_values);
1576 ctables_chisq_destroy (t->chisq);
1577 ctables_pairwise_destroy (t->pairwise);
1582 ctables_destroy (struct ctables *ct)
1587 struct ctables_postcompute *pc, *next_pc;
1588 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1592 msg_location_destroy (pc->location);
1593 ctables_pcexpr_destroy (pc->expr);
1597 ctables_summary_spec_set_uninit (pc->specs);
1600 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1604 fmt_settings_uninit (&ct->ctables_formats);
1605 pivot_table_look_unref (ct->look);
1609 for (size_t i = 0; i < ct->n_tables; i++)
1610 ctables_table_destroy (ct->tables[i]);
1615 static struct ctables_category
1616 cct_nrange (double low, double high)
1618 return (struct ctables_category) {
1620 .nrange = { low, high }
1624 static struct ctables_category
1625 cct_srange (struct substring low, struct substring high)
1627 return (struct ctables_category) {
1629 .srange = { low, high }
1634 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1635 struct ctables_category *cat)
1638 if (lex_match (lexer, T_EQUALS))
1640 if (!lex_force_string (lexer))
1643 total_label = ss_xstrdup (lex_tokss (lexer));
1647 total_label = xstrdup (_("Subtotal"));
1649 *cat = (struct ctables_category) {
1650 .type = CCT_SUBTOTAL,
1651 .hide_subcategories = hide_subcategories,
1652 .total_label = total_label
1657 static struct substring
1658 parse_substring (struct lexer *lexer, struct dictionary *dict)
1660 struct substring s = recode_substring_pool (
1661 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1662 ss_rtrim (&s, ss_cstr (" "));
1668 ctables_table_parse_explicit_category (struct lexer *lexer,
1669 struct dictionary *dict,
1671 struct ctables_category *cat)
1673 if (lex_match_id (lexer, "OTHERNM"))
1674 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1675 else if (lex_match_id (lexer, "MISSING"))
1676 *cat = (struct ctables_category) { .type = CCT_MISSING };
1677 else if (lex_match_id (lexer, "SUBTOTAL"))
1678 return ctables_table_parse_subtotal (lexer, false, cat);
1679 else if (lex_match_id (lexer, "HSUBTOTAL"))
1680 return ctables_table_parse_subtotal (lexer, true, cat);
1681 else if (lex_match_id (lexer, "LO"))
1683 if (!lex_force_match_id (lexer, "THRU"))
1685 if (lex_is_string (lexer))
1687 struct substring sr0 = { .string = NULL };
1688 struct substring sr1 = parse_substring (lexer, dict);
1689 *cat = cct_srange (sr0, sr1);
1691 else if (lex_force_num (lexer))
1693 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1699 else if (lex_is_number (lexer))
1701 double number = lex_number (lexer);
1703 if (lex_match_id (lexer, "THRU"))
1705 if (lex_match_id (lexer, "HI"))
1706 *cat = cct_nrange (number, DBL_MAX);
1709 if (!lex_force_num (lexer))
1711 *cat = cct_nrange (number, lex_number (lexer));
1716 *cat = (struct ctables_category) {
1721 else if (lex_is_string (lexer))
1723 struct substring s = parse_substring (lexer, dict);
1724 if (lex_match_id (lexer, "THRU"))
1726 if (lex_match_id (lexer, "HI"))
1728 struct substring sr1 = { .string = NULL };
1729 *cat = cct_srange (s, sr1);
1733 if (!lex_force_string (lexer))
1738 struct substring sr1 = parse_substring (lexer, dict);
1739 *cat = cct_srange (s, sr1);
1743 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1745 else if (lex_match (lexer, T_AND))
1747 if (!lex_force_id (lexer))
1749 struct ctables_postcompute *pc = ctables_find_postcompute (
1750 ct, lex_tokcstr (lexer));
1753 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1754 msg_at (SE, loc, _("Unknown postcompute &%s."),
1755 lex_tokcstr (lexer));
1756 msg_location_destroy (loc);
1761 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1765 lex_error (lexer, NULL);
1773 parse_category_string (struct msg_location *location,
1774 struct substring s, const struct dictionary *dict,
1775 enum fmt_type format, double *n)
1778 char *error = data_in (s, dict_get_encoding (dict), format,
1779 settings_get_fmt_settings (), &v, 0, NULL);
1782 msg_at (SE, location,
1783 _("Failed to parse category specification as format %s: %s."),
1784 fmt_name (format), error);
1793 static struct ctables_category *
1794 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1795 const struct ctables_pcexpr *e)
1797 struct ctables_category *best = NULL;
1798 size_t n_subtotals = 0;
1799 for (size_t i = 0; i < cats->n_cats; i++)
1801 struct ctables_category *cat = &cats->cats[i];
1804 case CTPO_CAT_NUMBER:
1805 if (cat->type == CCT_NUMBER && cat->number == e->number)
1809 case CTPO_CAT_STRING:
1810 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1814 case CTPO_CAT_NRANGE:
1815 if (cat->type == CCT_NRANGE
1816 && cat->nrange[0] == e->nrange[0]
1817 && cat->nrange[1] == e->nrange[1])
1821 case CTPO_CAT_SRANGE:
1822 if (cat->type == CCT_SRANGE
1823 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1824 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1828 case CTPO_CAT_MISSING:
1829 if (cat->type == CCT_MISSING)
1833 case CTPO_CAT_OTHERNM:
1834 if (cat->type == CCT_OTHERNM)
1838 case CTPO_CAT_SUBTOTAL:
1839 if (cat->type == CCT_SUBTOTAL)
1842 if (e->subtotal_index == n_subtotals)
1844 else if (e->subtotal_index == 0)
1849 case CTPO_CAT_TOTAL:
1850 if (cat->type == CCT_TOTAL)
1864 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1869 static struct ctables_category *
1870 ctables_find_category_for_postcompute (const struct dictionary *dict,
1871 const struct ctables_categories *cats,
1872 enum fmt_type parse_format,
1873 const struct ctables_pcexpr *e)
1875 if (parse_format != FMT_F)
1877 if (e->op == CTPO_CAT_STRING)
1880 if (!parse_category_string (e->location, e->string, dict,
1881 parse_format, &number))
1884 struct ctables_pcexpr e2 = {
1885 .op = CTPO_CAT_NUMBER,
1887 .location = e->location,
1889 return ctables_find_category_for_postcompute__ (cats, &e2);
1891 else if (e->op == CTPO_CAT_SRANGE)
1894 if (!e->srange[0].string)
1895 nrange[0] = -DBL_MAX;
1896 else if (!parse_category_string (e->location, e->srange[0], dict,
1897 parse_format, &nrange[0]))
1900 if (!e->srange[1].string)
1901 nrange[1] = DBL_MAX;
1902 else if (!parse_category_string (e->location, e->srange[1], dict,
1903 parse_format, &nrange[1]))
1906 struct ctables_pcexpr e2 = {
1907 .op = CTPO_CAT_NRANGE,
1908 .nrange = { nrange[0], nrange[1] },
1909 .location = e->location,
1911 return ctables_find_category_for_postcompute__ (cats, &e2);
1914 return ctables_find_category_for_postcompute__ (cats, e);
1918 ctables_recursive_check_postcompute (struct dictionary *dict,
1919 const struct ctables_pcexpr *e,
1920 struct ctables_category *pc_cat,
1921 const struct ctables_categories *cats,
1922 const struct msg_location *cats_location)
1926 case CTPO_CAT_NUMBER:
1927 case CTPO_CAT_STRING:
1928 case CTPO_CAT_NRANGE:
1929 case CTPO_CAT_SRANGE:
1930 case CTPO_CAT_MISSING:
1931 case CTPO_CAT_OTHERNM:
1932 case CTPO_CAT_SUBTOTAL:
1933 case CTPO_CAT_TOTAL:
1935 struct ctables_category *cat = ctables_find_category_for_postcompute (
1936 dict, cats, pc_cat->parse_format, e);
1939 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1941 size_t n_subtotals = 0;
1942 for (size_t i = 0; i < cats->n_cats; i++)
1943 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1944 if (n_subtotals > 1)
1946 msg_at (SE, cats_location,
1947 ngettext ("These categories include %zu instance "
1948 "of SUBTOTAL or HSUBTOTAL, so references "
1949 "from computed categories must refer to "
1950 "subtotals by position, "
1951 "e.g. SUBTOTAL[1].",
1952 "These categories include %zu instances "
1953 "of SUBTOTAL or HSUBTOTAL, so references "
1954 "from computed categories must refer to "
1955 "subtotals by position, "
1956 "e.g. SUBTOTAL[1].",
1959 msg_at (SN, e->location,
1960 _("This is the reference that lacks a position."));
1965 msg_at (SE, pc_cat->location,
1966 _("Computed category &%s references a category not included "
1967 "in the category list."),
1969 msg_at (SN, e->location, _("This is the missing category."));
1970 if (e->op == CTPO_CAT_SUBTOTAL)
1971 msg_at (SN, cats_location,
1972 _("To fix the problem, add subtotals to the "
1973 "list of categories here."));
1974 else if (e->op == CTPO_CAT_TOTAL)
1975 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1976 "CATEGORIES specification."));
1978 msg_at (SN, cats_location,
1979 _("To fix the problem, add the missing category to the "
1980 "list of categories here."));
1983 if (pc_cat->pc->hide_source_cats)
1997 for (size_t i = 0; i < 2; i++)
1998 if (e->subs[i] && !ctables_recursive_check_postcompute (
1999 dict, e->subs[i], pc_cat, cats, cats_location))
2008 all_strings (struct variable **vars, size_t n_vars,
2009 const struct ctables_category *cat)
2011 for (size_t j = 0; j < n_vars; j++)
2012 if (var_is_numeric (vars[j]))
2014 msg_at (SE, cat->location,
2015 _("This category specification may be applied only to string "
2016 "variables, but this subcommand tries to apply it to "
2017 "numeric variable %s."),
2018 var_get_name (vars[j]));
2025 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2026 struct ctables *ct, struct ctables_table *t)
2028 if (!lex_match_id (lexer, "VARIABLES"))
2030 lex_match (lexer, T_EQUALS);
2032 struct variable **vars;
2034 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2037 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2038 for (size_t i = 1; i < n_vars; i++)
2040 const struct fmt_spec *f = var_get_print_format (vars[i]);
2041 if (f->type != common_format->type)
2043 common_format = NULL;
2049 && (fmt_get_category (common_format->type)
2050 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2052 struct ctables_categories *c = xmalloc (sizeof *c);
2053 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2054 for (size_t i = 0; i < n_vars; i++)
2056 struct ctables_categories **cp
2057 = &t->categories[var_get_dict_index (vars[i])];
2058 ctables_categories_unref (*cp);
2062 size_t allocated_cats = 0;
2063 int cats_start_ofs = -1;
2064 int cats_end_ofs = -1;
2065 if (lex_match (lexer, T_LBRACK))
2067 cats_start_ofs = lex_ofs (lexer);
2070 if (c->n_cats >= allocated_cats)
2071 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2073 int start_ofs = lex_ofs (lexer);
2074 struct ctables_category *cat = &c->cats[c->n_cats];
2075 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2077 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2080 lex_match (lexer, T_COMMA);
2082 while (!lex_match (lexer, T_RBRACK));
2083 cats_end_ofs = lex_ofs (lexer) - 1;
2086 struct ctables_category cat = {
2088 .include_missing = false,
2089 .sort_ascending = true,
2091 bool show_totals = false;
2092 char *total_label = NULL;
2093 bool totals_before = false;
2094 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2096 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2098 lex_match (lexer, T_EQUALS);
2099 if (lex_match_id (lexer, "A"))
2100 cat.sort_ascending = true;
2101 else if (lex_match_id (lexer, "D"))
2102 cat.sort_ascending = false;
2105 lex_error_expecting (lexer, "A", "D");
2109 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2111 lex_match (lexer, T_EQUALS);
2112 if (lex_match_id (lexer, "VALUE"))
2113 cat.type = CCT_VALUE;
2114 else if (lex_match_id (lexer, "LABEL"))
2115 cat.type = CCT_LABEL;
2118 cat.type = CCT_FUNCTION;
2119 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
2122 if (lex_match (lexer, T_LPAREN))
2124 cat.sort_var = parse_variable (lexer, dict);
2128 if (cat.sort_function == CTSF_PTILE)
2130 lex_match (lexer, T_COMMA);
2131 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2133 cat.percentile = lex_number (lexer);
2137 if (!lex_force_match (lexer, T_RPAREN))
2140 else if (ctables_function_availability (cat.sort_function)
2143 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2148 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2150 lex_match (lexer, T_EQUALS);
2151 if (lex_match_id (lexer, "INCLUDE"))
2152 cat.include_missing = true;
2153 else if (lex_match_id (lexer, "EXCLUDE"))
2154 cat.include_missing = false;
2157 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2161 else if (lex_match_id (lexer, "TOTAL"))
2163 lex_match (lexer, T_EQUALS);
2164 if (!parse_bool (lexer, &show_totals))
2167 else if (lex_match_id (lexer, "LABEL"))
2169 lex_match (lexer, T_EQUALS);
2170 if (!lex_force_string (lexer))
2173 total_label = ss_xstrdup (lex_tokss (lexer));
2176 else if (lex_match_id (lexer, "POSITION"))
2178 lex_match (lexer, T_EQUALS);
2179 if (lex_match_id (lexer, "BEFORE"))
2180 totals_before = true;
2181 else if (lex_match_id (lexer, "AFTER"))
2182 totals_before = false;
2185 lex_error_expecting (lexer, "BEFORE", "AFTER");
2189 else if (lex_match_id (lexer, "EMPTY"))
2191 lex_match (lexer, T_EQUALS);
2192 if (lex_match_id (lexer, "INCLUDE"))
2193 c->show_empty = true;
2194 else if (lex_match_id (lexer, "EXCLUDE"))
2195 c->show_empty = false;
2198 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2205 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2206 "TOTAL", "LABEL", "POSITION", "EMPTY");
2208 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2215 if (c->n_cats >= allocated_cats)
2216 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2217 c->cats[c->n_cats++] = cat;
2222 if (c->n_cats >= allocated_cats)
2223 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2225 struct ctables_category *totals;
2228 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2229 totals = &c->cats[0];
2232 totals = &c->cats[c->n_cats];
2235 *totals = (struct ctables_category) {
2237 .total_label = total_label ? total_label : xstrdup (_("Total")),
2241 struct ctables_category *subtotal = NULL;
2242 for (size_t i = totals_before ? 0 : c->n_cats;
2243 totals_before ? i < c->n_cats : i-- > 0;
2244 totals_before ? i++ : 0)
2246 struct ctables_category *cat = &c->cats[i];
2255 cat->subtotal = subtotal;
2258 case CCT_POSTCOMPUTE:
2269 case CCT_EXCLUDED_MISSING:
2274 if (cats_start_ofs != -1)
2276 for (size_t i = 0; i < c->n_cats; i++)
2278 struct ctables_category *cat = &c->cats[i];
2281 case CCT_POSTCOMPUTE:
2282 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2283 struct msg_location *cats_location
2284 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2285 bool ok = ctables_recursive_check_postcompute (
2286 dict, cat->pc->expr, cat, c, cats_location);
2287 msg_location_destroy (cats_location);
2294 for (size_t j = 0; j < n_vars; j++)
2295 if (var_is_alpha (vars[j]))
2297 msg_at (SE, cat->location,
2298 _("This category specification may be applied "
2299 "only to numeric variables, but this "
2300 "subcommand tries to apply it to string "
2302 var_get_name (vars[j]));
2311 if (!parse_category_string (cat->location, cat->string, dict,
2312 common_format->type, &n))
2315 ss_dealloc (&cat->string);
2317 cat->type = CCT_NUMBER;
2320 else if (!all_strings (vars, n_vars, cat))
2329 if (!cat->srange[0].string)
2331 else if (!parse_category_string (cat->location,
2332 cat->srange[0], dict,
2333 common_format->type, &n[0]))
2336 if (!cat->srange[1].string)
2338 else if (!parse_category_string (cat->location,
2339 cat->srange[1], dict,
2340 common_format->type, &n[1]))
2343 ss_dealloc (&cat->srange[0]);
2344 ss_dealloc (&cat->srange[1]);
2346 cat->type = CCT_NRANGE;
2347 cat->nrange[0] = n[0];
2348 cat->nrange[1] = n[1];
2350 else if (!all_strings (vars, n_vars, cat))
2361 case CCT_EXCLUDED_MISSING:
2376 ctables_nest_uninit (struct ctables_nest *nest)
2379 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2380 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2381 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2382 free (nest->domains[dt]);
2386 ctables_stack_uninit (struct ctables_stack *stack)
2390 for (size_t i = 0; i < stack->n; i++)
2391 ctables_nest_uninit (&stack->nests[i]);
2392 free (stack->nests);
2396 static struct ctables_stack
2397 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2404 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2405 for (size_t i = 0; i < s0.n; i++)
2406 for (size_t j = 0; j < s1.n; j++)
2408 const struct ctables_nest *a = &s0.nests[i];
2409 const struct ctables_nest *b = &s1.nests[j];
2411 size_t allocate = a->n + b->n;
2412 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2414 for (size_t k = 0; k < a->n; k++)
2415 vars[n++] = a->vars[k];
2416 for (size_t k = 0; k < b->n; k++)
2417 vars[n++] = b->vars[k];
2418 assert (n == allocate);
2420 const struct ctables_nest *summary_src;
2421 if (!a->specs[CSV_CELL].var)
2423 else if (!b->specs[CSV_CELL].var)
2428 struct ctables_nest *new = &stack.nests[stack.n++];
2429 *new = (struct ctables_nest) {
2431 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2432 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2436 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2437 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2439 ctables_stack_uninit (&s0);
2440 ctables_stack_uninit (&s1);
2444 static struct ctables_stack
2445 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2447 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2448 for (size_t i = 0; i < s0.n; i++)
2449 stack.nests[stack.n++] = s0.nests[i];
2450 for (size_t i = 0; i < s1.n; i++)
2452 stack.nests[stack.n] = s1.nests[i];
2453 stack.nests[stack.n].group_head += s0.n;
2456 assert (stack.n == s0.n + s1.n);
2462 static struct ctables_stack
2463 var_fts (const struct ctables_axis *a)
2465 struct variable **vars = xmalloc (sizeof *vars);
2468 struct ctables_nest *nest = xmalloc (sizeof *nest);
2469 *nest = (struct ctables_nest) {
2472 .scale_idx = a->scale ? 0 : SIZE_MAX,
2474 if (a->specs[CSV_CELL].n || a->scale)
2475 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2477 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2478 nest->specs[sv].var = a->var;
2479 nest->specs[sv].is_scale = a->scale;
2481 return (struct ctables_stack) { .nests = nest, .n = 1 };
2484 static struct ctables_stack
2485 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2488 return (struct ctables_stack) { .n = 0 };
2496 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2497 enumerate_fts (axis_type, a->subs[1]));
2500 /* This should consider any of the scale variables found in the result to
2501 be linked to each other listwise for SMISSING=LISTWISE. */
2502 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2503 enumerate_fts (axis_type, a->subs[1]));
2509 union ctables_summary
2511 /* COUNT, VALIDN, TOTALN. */
2514 /* MINIMUM, MAXIMUM, RANGE. */
2521 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2522 struct moments1 *moments;
2524 /* MEDIAN, MODE, PTILE. */
2527 struct casewriter *writer;
2532 /* XXX multiple response */
2536 ctables_summary_init (union ctables_summary *s,
2537 const struct ctables_summary_spec *ss)
2539 switch (ss->function)
2543 case CTSF_ROWPCT_COUNT:
2544 case CTSF_COLPCT_COUNT:
2545 case CTSF_TABLEPCT_COUNT:
2546 case CTSF_SUBTABLEPCT_COUNT:
2547 case CTSF_LAYERPCT_COUNT:
2548 case CTSF_LAYERROWPCT_COUNT:
2549 case CTSF_LAYERCOLPCT_COUNT:
2550 case CTSF_ROWPCT_VALIDN:
2551 case CTSF_COLPCT_VALIDN:
2552 case CTSF_TABLEPCT_VALIDN:
2553 case CTSF_SUBTABLEPCT_VALIDN:
2554 case CTSF_LAYERPCT_VALIDN:
2555 case CTSF_LAYERROWPCT_VALIDN:
2556 case CTSF_LAYERCOLPCT_VALIDN:
2557 case CTSF_ROWPCT_TOTALN:
2558 case CTSF_COLPCT_TOTALN:
2559 case CTSF_TABLEPCT_TOTALN:
2560 case CTSF_SUBTABLEPCT_TOTALN:
2561 case CTSF_LAYERPCT_TOTALN:
2562 case CTSF_LAYERROWPCT_TOTALN:
2563 case CTSF_LAYERCOLPCT_TOTALN:
2570 case CTSF_UROWPCT_COUNT:
2571 case CTSF_UCOLPCT_COUNT:
2572 case CTSF_UTABLEPCT_COUNT:
2573 case CTSF_USUBTABLEPCT_COUNT:
2574 case CTSF_ULAYERPCT_COUNT:
2575 case CTSF_ULAYERROWPCT_COUNT:
2576 case CTSF_ULAYERCOLPCT_COUNT:
2577 case CTSF_UROWPCT_VALIDN:
2578 case CTSF_UCOLPCT_VALIDN:
2579 case CTSF_UTABLEPCT_VALIDN:
2580 case CTSF_USUBTABLEPCT_VALIDN:
2581 case CTSF_ULAYERPCT_VALIDN:
2582 case CTSF_ULAYERROWPCT_VALIDN:
2583 case CTSF_ULAYERCOLPCT_VALIDN:
2584 case CTSF_UROWPCT_TOTALN:
2585 case CTSF_UCOLPCT_TOTALN:
2586 case CTSF_UTABLEPCT_TOTALN:
2587 case CTSF_USUBTABLEPCT_TOTALN:
2588 case CTSF_ULAYERPCT_TOTALN:
2589 case CTSF_ULAYERROWPCT_TOTALN:
2590 case CTSF_ULAYERCOLPCT_TOTALN:
2600 s->min = s->max = SYSMIS;
2608 case CTSF_ROWPCT_SUM:
2609 case CTSF_COLPCT_SUM:
2610 case CTSF_TABLEPCT_SUM:
2611 case CTSF_SUBTABLEPCT_SUM:
2612 case CTSF_LAYERPCT_SUM:
2613 case CTSF_LAYERROWPCT_SUM:
2614 case CTSF_LAYERCOLPCT_SUM:
2619 case CTSF_UVARIANCE:
2620 case CTSF_UROWPCT_SUM:
2621 case CTSF_UCOLPCT_SUM:
2622 case CTSF_UTABLEPCT_SUM:
2623 case CTSF_USUBTABLEPCT_SUM:
2624 case CTSF_ULAYERPCT_SUM:
2625 case CTSF_ULAYERROWPCT_SUM:
2626 case CTSF_ULAYERCOLPCT_SUM:
2627 s->moments = moments1_create (MOMENT_VARIANCE);
2637 struct caseproto *proto = caseproto_create ();
2638 proto = caseproto_add_width (proto, 0);
2639 proto = caseproto_add_width (proto, 0);
2641 struct subcase ordering;
2642 subcase_init (&ordering, 0, 0, SC_ASCEND);
2643 s->writer = sort_create_writer (&ordering, proto);
2644 subcase_uninit (&ordering);
2645 caseproto_unref (proto);
2655 ctables_summary_uninit (union ctables_summary *s,
2656 const struct ctables_summary_spec *ss)
2658 switch (ss->function)
2662 case CTSF_ROWPCT_COUNT:
2663 case CTSF_COLPCT_COUNT:
2664 case CTSF_TABLEPCT_COUNT:
2665 case CTSF_SUBTABLEPCT_COUNT:
2666 case CTSF_LAYERPCT_COUNT:
2667 case CTSF_LAYERROWPCT_COUNT:
2668 case CTSF_LAYERCOLPCT_COUNT:
2669 case CTSF_ROWPCT_VALIDN:
2670 case CTSF_COLPCT_VALIDN:
2671 case CTSF_TABLEPCT_VALIDN:
2672 case CTSF_SUBTABLEPCT_VALIDN:
2673 case CTSF_LAYERPCT_VALIDN:
2674 case CTSF_LAYERROWPCT_VALIDN:
2675 case CTSF_LAYERCOLPCT_VALIDN:
2676 case CTSF_ROWPCT_TOTALN:
2677 case CTSF_COLPCT_TOTALN:
2678 case CTSF_TABLEPCT_TOTALN:
2679 case CTSF_SUBTABLEPCT_TOTALN:
2680 case CTSF_LAYERPCT_TOTALN:
2681 case CTSF_LAYERROWPCT_TOTALN:
2682 case CTSF_LAYERCOLPCT_TOTALN:
2689 case CTSF_UROWPCT_COUNT:
2690 case CTSF_UCOLPCT_COUNT:
2691 case CTSF_UTABLEPCT_COUNT:
2692 case CTSF_USUBTABLEPCT_COUNT:
2693 case CTSF_ULAYERPCT_COUNT:
2694 case CTSF_ULAYERROWPCT_COUNT:
2695 case CTSF_ULAYERCOLPCT_COUNT:
2696 case CTSF_UROWPCT_VALIDN:
2697 case CTSF_UCOLPCT_VALIDN:
2698 case CTSF_UTABLEPCT_VALIDN:
2699 case CTSF_USUBTABLEPCT_VALIDN:
2700 case CTSF_ULAYERPCT_VALIDN:
2701 case CTSF_ULAYERROWPCT_VALIDN:
2702 case CTSF_ULAYERCOLPCT_VALIDN:
2703 case CTSF_UROWPCT_TOTALN:
2704 case CTSF_UCOLPCT_TOTALN:
2705 case CTSF_UTABLEPCT_TOTALN:
2706 case CTSF_USUBTABLEPCT_TOTALN:
2707 case CTSF_ULAYERPCT_TOTALN:
2708 case CTSF_ULAYERROWPCT_TOTALN:
2709 case CTSF_ULAYERCOLPCT_TOTALN:
2725 case CTSF_ROWPCT_SUM:
2726 case CTSF_COLPCT_SUM:
2727 case CTSF_TABLEPCT_SUM:
2728 case CTSF_SUBTABLEPCT_SUM:
2729 case CTSF_LAYERPCT_SUM:
2730 case CTSF_LAYERROWPCT_SUM:
2731 case CTSF_LAYERCOLPCT_SUM:
2736 case CTSF_UVARIANCE:
2737 case CTSF_UROWPCT_SUM:
2738 case CTSF_UCOLPCT_SUM:
2739 case CTSF_UTABLEPCT_SUM:
2740 case CTSF_USUBTABLEPCT_SUM:
2741 case CTSF_ULAYERPCT_SUM:
2742 case CTSF_ULAYERROWPCT_SUM:
2743 case CTSF_ULAYERCOLPCT_SUM:
2744 moments1_destroy (s->moments);
2753 casewriter_destroy (s->writer);
2759 ctables_summary_add (union ctables_summary *s,
2760 const struct ctables_summary_spec *ss,
2761 const struct variable *var, const union value *value,
2762 bool is_scale, bool is_scale_missing,
2763 bool is_missing, bool excluded_missing,
2764 double d_weight, double e_weight)
2766 /* To determine whether a case is included in a given table for a particular
2767 kind of summary, consider the following charts for each variable in the
2768 table. Only if "yes" appears for every variable for the summary is the
2771 Categorical variables: VALIDN COUNT TOTALN
2772 Valid values in included categories yes yes yes
2773 Missing values in included categories --- yes yes
2774 Missing values in excluded categories --- --- yes
2775 Valid values in excluded categories --- --- ---
2777 Scale variables: VALIDN COUNT TOTALN
2778 Valid value yes yes yes
2779 Missing value --- yes yes
2781 Missing values include both user- and system-missing. (The system-missing
2782 value is always in an excluded category.)
2784 switch (ss->function)
2787 case CTSF_ROWPCT_TOTALN:
2788 case CTSF_COLPCT_TOTALN:
2789 case CTSF_TABLEPCT_TOTALN:
2790 case CTSF_SUBTABLEPCT_TOTALN:
2791 case CTSF_LAYERPCT_TOTALN:
2792 case CTSF_LAYERROWPCT_TOTALN:
2793 case CTSF_LAYERCOLPCT_TOTALN:
2794 s->count += d_weight;
2798 case CTSF_UROWPCT_TOTALN:
2799 case CTSF_UCOLPCT_TOTALN:
2800 case CTSF_UTABLEPCT_TOTALN:
2801 case CTSF_USUBTABLEPCT_TOTALN:
2802 case CTSF_ULAYERPCT_TOTALN:
2803 case CTSF_ULAYERROWPCT_TOTALN:
2804 case CTSF_ULAYERCOLPCT_TOTALN:
2809 case CTSF_ROWPCT_COUNT:
2810 case CTSF_COLPCT_COUNT:
2811 case CTSF_TABLEPCT_COUNT:
2812 case CTSF_SUBTABLEPCT_COUNT:
2813 case CTSF_LAYERPCT_COUNT:
2814 case CTSF_LAYERROWPCT_COUNT:
2815 case CTSF_LAYERCOLPCT_COUNT:
2816 if (is_scale || !excluded_missing)
2817 s->count += d_weight;
2821 case CTSF_UROWPCT_COUNT:
2822 case CTSF_UCOLPCT_COUNT:
2823 case CTSF_UTABLEPCT_COUNT:
2824 case CTSF_USUBTABLEPCT_COUNT:
2825 case CTSF_ULAYERPCT_COUNT:
2826 case CTSF_ULAYERROWPCT_COUNT:
2827 case CTSF_ULAYERCOLPCT_COUNT:
2828 if (is_scale || !excluded_missing)
2833 case CTSF_ROWPCT_VALIDN:
2834 case CTSF_COLPCT_VALIDN:
2835 case CTSF_TABLEPCT_VALIDN:
2836 case CTSF_SUBTABLEPCT_VALIDN:
2837 case CTSF_LAYERPCT_VALIDN:
2838 case CTSF_LAYERROWPCT_VALIDN:
2839 case CTSF_LAYERCOLPCT_VALIDN:
2843 s->count += d_weight;
2847 case CTSF_UROWPCT_VALIDN:
2848 case CTSF_UCOLPCT_VALIDN:
2849 case CTSF_UTABLEPCT_VALIDN:
2850 case CTSF_USUBTABLEPCT_VALIDN:
2851 case CTSF_ULAYERPCT_VALIDN:
2852 case CTSF_ULAYERROWPCT_VALIDN:
2853 case CTSF_ULAYERCOLPCT_VALIDN:
2862 s->count += d_weight;
2871 if (is_scale || !excluded_missing)
2872 s->count += e_weight;
2879 s->count += e_weight;
2883 s->count += e_weight;
2889 if (!is_scale_missing)
2891 assert (!var_is_alpha (var)); /* XXX? */
2892 if (s->min == SYSMIS || value->f < s->min)
2894 if (s->max == SYSMIS || value->f > s->max)
2904 case CTSF_ROWPCT_SUM:
2905 case CTSF_COLPCT_SUM:
2906 case CTSF_TABLEPCT_SUM:
2907 case CTSF_SUBTABLEPCT_SUM:
2908 case CTSF_LAYERPCT_SUM:
2909 case CTSF_LAYERROWPCT_SUM:
2910 case CTSF_LAYERCOLPCT_SUM:
2911 if (!is_scale_missing)
2912 moments1_add (s->moments, value->f, e_weight);
2919 case CTSF_UVARIANCE:
2920 case CTSF_UROWPCT_SUM:
2921 case CTSF_UCOLPCT_SUM:
2922 case CTSF_UTABLEPCT_SUM:
2923 case CTSF_USUBTABLEPCT_SUM:
2924 case CTSF_ULAYERPCT_SUM:
2925 case CTSF_ULAYERROWPCT_SUM:
2926 case CTSF_ULAYERCOLPCT_SUM:
2927 if (!is_scale_missing)
2928 moments1_add (s->moments, value->f, 1.0);
2934 d_weight = e_weight = 1.0;
2939 if (!is_scale_missing)
2941 s->ovalid += e_weight;
2943 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2944 *case_num_rw_idx (c, 0) = value->f;
2945 *case_num_rw_idx (c, 1) = e_weight;
2946 casewriter_write (s->writer, c);
2952 static enum ctables_domain_type
2953 ctables_function_domain (enum ctables_summary_function function)
2983 case CTSF_UVARIANCE:
2989 case CTSF_COLPCT_COUNT:
2990 case CTSF_COLPCT_SUM:
2991 case CTSF_COLPCT_TOTALN:
2992 case CTSF_COLPCT_VALIDN:
2993 case CTSF_UCOLPCT_COUNT:
2994 case CTSF_UCOLPCT_SUM:
2995 case CTSF_UCOLPCT_TOTALN:
2996 case CTSF_UCOLPCT_VALIDN:
2999 case CTSF_LAYERCOLPCT_COUNT:
3000 case CTSF_LAYERCOLPCT_SUM:
3001 case CTSF_LAYERCOLPCT_TOTALN:
3002 case CTSF_LAYERCOLPCT_VALIDN:
3003 case CTSF_ULAYERCOLPCT_COUNT:
3004 case CTSF_ULAYERCOLPCT_SUM:
3005 case CTSF_ULAYERCOLPCT_TOTALN:
3006 case CTSF_ULAYERCOLPCT_VALIDN:
3007 return CTDT_LAYERCOL;
3009 case CTSF_LAYERPCT_COUNT:
3010 case CTSF_LAYERPCT_SUM:
3011 case CTSF_LAYERPCT_TOTALN:
3012 case CTSF_LAYERPCT_VALIDN:
3013 case CTSF_ULAYERPCT_COUNT:
3014 case CTSF_ULAYERPCT_SUM:
3015 case CTSF_ULAYERPCT_TOTALN:
3016 case CTSF_ULAYERPCT_VALIDN:
3019 case CTSF_LAYERROWPCT_COUNT:
3020 case CTSF_LAYERROWPCT_SUM:
3021 case CTSF_LAYERROWPCT_TOTALN:
3022 case CTSF_LAYERROWPCT_VALIDN:
3023 case CTSF_ULAYERROWPCT_COUNT:
3024 case CTSF_ULAYERROWPCT_SUM:
3025 case CTSF_ULAYERROWPCT_TOTALN:
3026 case CTSF_ULAYERROWPCT_VALIDN:
3027 return CTDT_LAYERROW;
3029 case CTSF_ROWPCT_COUNT:
3030 case CTSF_ROWPCT_SUM:
3031 case CTSF_ROWPCT_TOTALN:
3032 case CTSF_ROWPCT_VALIDN:
3033 case CTSF_UROWPCT_COUNT:
3034 case CTSF_UROWPCT_SUM:
3035 case CTSF_UROWPCT_TOTALN:
3036 case CTSF_UROWPCT_VALIDN:
3039 case CTSF_SUBTABLEPCT_COUNT:
3040 case CTSF_SUBTABLEPCT_SUM:
3041 case CTSF_SUBTABLEPCT_TOTALN:
3042 case CTSF_SUBTABLEPCT_VALIDN:
3043 case CTSF_USUBTABLEPCT_COUNT:
3044 case CTSF_USUBTABLEPCT_SUM:
3045 case CTSF_USUBTABLEPCT_TOTALN:
3046 case CTSF_USUBTABLEPCT_VALIDN:
3047 return CTDT_SUBTABLE;
3049 case CTSF_TABLEPCT_COUNT:
3050 case CTSF_TABLEPCT_SUM:
3051 case CTSF_TABLEPCT_TOTALN:
3052 case CTSF_TABLEPCT_VALIDN:
3053 case CTSF_UTABLEPCT_COUNT:
3054 case CTSF_UTABLEPCT_SUM:
3055 case CTSF_UTABLEPCT_TOTALN:
3056 case CTSF_UTABLEPCT_VALIDN:
3063 static enum ctables_domain_type
3064 ctables_function_is_pctsum (enum ctables_summary_function function)
3094 case CTSF_UVARIANCE:
3098 case CTSF_COLPCT_COUNT:
3099 case CTSF_COLPCT_TOTALN:
3100 case CTSF_COLPCT_VALIDN:
3101 case CTSF_UCOLPCT_COUNT:
3102 case CTSF_UCOLPCT_TOTALN:
3103 case CTSF_UCOLPCT_VALIDN:
3104 case CTSF_LAYERCOLPCT_COUNT:
3105 case CTSF_LAYERCOLPCT_TOTALN:
3106 case CTSF_LAYERCOLPCT_VALIDN:
3107 case CTSF_ULAYERCOLPCT_COUNT:
3108 case CTSF_ULAYERCOLPCT_TOTALN:
3109 case CTSF_ULAYERCOLPCT_VALIDN:
3110 case CTSF_LAYERPCT_COUNT:
3111 case CTSF_LAYERPCT_TOTALN:
3112 case CTSF_LAYERPCT_VALIDN:
3113 case CTSF_ULAYERPCT_COUNT:
3114 case CTSF_ULAYERPCT_TOTALN:
3115 case CTSF_ULAYERPCT_VALIDN:
3116 case CTSF_LAYERROWPCT_COUNT:
3117 case CTSF_LAYERROWPCT_TOTALN:
3118 case CTSF_LAYERROWPCT_VALIDN:
3119 case CTSF_ULAYERROWPCT_COUNT:
3120 case CTSF_ULAYERROWPCT_TOTALN:
3121 case CTSF_ULAYERROWPCT_VALIDN:
3122 case CTSF_ROWPCT_COUNT:
3123 case CTSF_ROWPCT_TOTALN:
3124 case CTSF_ROWPCT_VALIDN:
3125 case CTSF_UROWPCT_COUNT:
3126 case CTSF_UROWPCT_TOTALN:
3127 case CTSF_UROWPCT_VALIDN:
3128 case CTSF_SUBTABLEPCT_COUNT:
3129 case CTSF_SUBTABLEPCT_TOTALN:
3130 case CTSF_SUBTABLEPCT_VALIDN:
3131 case CTSF_USUBTABLEPCT_COUNT:
3132 case CTSF_USUBTABLEPCT_TOTALN:
3133 case CTSF_USUBTABLEPCT_VALIDN:
3134 case CTSF_TABLEPCT_COUNT:
3135 case CTSF_TABLEPCT_TOTALN:
3136 case CTSF_TABLEPCT_VALIDN:
3137 case CTSF_UTABLEPCT_COUNT:
3138 case CTSF_UTABLEPCT_TOTALN:
3139 case CTSF_UTABLEPCT_VALIDN:
3142 case CTSF_COLPCT_SUM:
3143 case CTSF_UCOLPCT_SUM:
3144 case CTSF_LAYERCOLPCT_SUM:
3145 case CTSF_ULAYERCOLPCT_SUM:
3146 case CTSF_LAYERPCT_SUM:
3147 case CTSF_ULAYERPCT_SUM:
3148 case CTSF_LAYERROWPCT_SUM:
3149 case CTSF_ULAYERROWPCT_SUM:
3150 case CTSF_ROWPCT_SUM:
3151 case CTSF_UROWPCT_SUM:
3152 case CTSF_SUBTABLEPCT_SUM:
3153 case CTSF_USUBTABLEPCT_SUM:
3154 case CTSF_TABLEPCT_SUM:
3155 case CTSF_UTABLEPCT_SUM:
3163 ctables_summary_value (const struct ctables_cell *cell,
3164 union ctables_summary *s,
3165 const struct ctables_summary_spec *ss)
3167 switch (ss->function)
3174 case CTSF_ROWPCT_COUNT:
3175 case CTSF_COLPCT_COUNT:
3176 case CTSF_TABLEPCT_COUNT:
3177 case CTSF_SUBTABLEPCT_COUNT:
3178 case CTSF_LAYERPCT_COUNT:
3179 case CTSF_LAYERROWPCT_COUNT:
3180 case CTSF_LAYERCOLPCT_COUNT:
3182 enum ctables_domain_type d = ctables_function_domain (ss->function);
3183 return (cell->domains[d]->e_count
3184 ? s->count / cell->domains[d]->e_count * 100
3188 case CTSF_UROWPCT_COUNT:
3189 case CTSF_UCOLPCT_COUNT:
3190 case CTSF_UTABLEPCT_COUNT:
3191 case CTSF_USUBTABLEPCT_COUNT:
3192 case CTSF_ULAYERPCT_COUNT:
3193 case CTSF_ULAYERROWPCT_COUNT:
3194 case CTSF_ULAYERCOLPCT_COUNT:
3196 enum ctables_domain_type d = ctables_function_domain (ss->function);
3197 return (cell->domains[d]->u_count
3198 ? s->count / cell->domains[d]->u_count * 100
3202 case CTSF_ROWPCT_VALIDN:
3203 case CTSF_COLPCT_VALIDN:
3204 case CTSF_TABLEPCT_VALIDN:
3205 case CTSF_SUBTABLEPCT_VALIDN:
3206 case CTSF_LAYERPCT_VALIDN:
3207 case CTSF_LAYERROWPCT_VALIDN:
3208 case CTSF_LAYERCOLPCT_VALIDN:
3210 enum ctables_domain_type d = ctables_function_domain (ss->function);
3211 return (cell->domains[d]->e_valid
3212 ? s->count / cell->domains[d]->e_valid * 100
3216 case CTSF_UROWPCT_VALIDN:
3217 case CTSF_UCOLPCT_VALIDN:
3218 case CTSF_UTABLEPCT_VALIDN:
3219 case CTSF_USUBTABLEPCT_VALIDN:
3220 case CTSF_ULAYERPCT_VALIDN:
3221 case CTSF_ULAYERROWPCT_VALIDN:
3222 case CTSF_ULAYERCOLPCT_VALIDN:
3224 enum ctables_domain_type d = ctables_function_domain (ss->function);
3225 return (cell->domains[d]->u_valid
3226 ? s->count / cell->domains[d]->u_valid * 100
3230 case CTSF_ROWPCT_TOTALN:
3231 case CTSF_COLPCT_TOTALN:
3232 case CTSF_TABLEPCT_TOTALN:
3233 case CTSF_SUBTABLEPCT_TOTALN:
3234 case CTSF_LAYERPCT_TOTALN:
3235 case CTSF_LAYERROWPCT_TOTALN:
3236 case CTSF_LAYERCOLPCT_TOTALN:
3238 enum ctables_domain_type d = ctables_function_domain (ss->function);
3239 return (cell->domains[d]->e_total
3240 ? s->count / cell->domains[d]->e_total * 100
3244 case CTSF_UROWPCT_TOTALN:
3245 case CTSF_UCOLPCT_TOTALN:
3246 case CTSF_UTABLEPCT_TOTALN:
3247 case CTSF_USUBTABLEPCT_TOTALN:
3248 case CTSF_ULAYERPCT_TOTALN:
3249 case CTSF_ULAYERROWPCT_TOTALN:
3250 case CTSF_ULAYERCOLPCT_TOTALN:
3252 enum ctables_domain_type d = ctables_function_domain (ss->function);
3253 return (cell->domains[d]->u_total
3254 ? s->count / cell->domains[d]->u_total * 100
3275 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3281 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3288 double weight, variance;
3289 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3290 return calc_semean (variance, weight);
3297 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3298 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3304 double weight, mean;
3305 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3306 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3310 case CTSF_UVARIANCE:
3313 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3317 case CTSF_ROWPCT_SUM:
3318 case CTSF_COLPCT_SUM:
3319 case CTSF_TABLEPCT_SUM:
3320 case CTSF_SUBTABLEPCT_SUM:
3321 case CTSF_LAYERPCT_SUM:
3322 case CTSF_LAYERROWPCT_SUM:
3323 case CTSF_LAYERCOLPCT_SUM:
3325 double weight, mean;
3326 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3327 if (weight == SYSMIS || mean == SYSMIS)
3329 enum ctables_domain_type d = ctables_function_domain (ss->function);
3330 double num = weight * mean;
3331 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3332 return denom != 0 ? num / denom * 100 : SYSMIS;
3334 case CTSF_UROWPCT_SUM:
3335 case CTSF_UCOLPCT_SUM:
3336 case CTSF_UTABLEPCT_SUM:
3337 case CTSF_USUBTABLEPCT_SUM:
3338 case CTSF_ULAYERPCT_SUM:
3339 case CTSF_ULAYERROWPCT_SUM:
3340 case CTSF_ULAYERCOLPCT_SUM:
3342 double weight, mean;
3343 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3344 if (weight == SYSMIS || mean == SYSMIS)
3346 enum ctables_domain_type d = ctables_function_domain (ss->function);
3347 double num = weight * mean;
3348 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3349 return denom != 0 ? num / denom * 100 : SYSMIS;
3358 struct casereader *reader = casewriter_make_reader (s->writer);
3361 struct percentile *ptile = percentile_create (
3362 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3363 struct order_stats *os = &ptile->parent;
3364 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3365 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3366 statistic_destroy (&ptile->parent.parent);
3374 struct casereader *reader = casewriter_make_reader (s->writer);
3377 struct mode *mode = mode_create ();
3378 struct order_stats *os = &mode->parent;
3379 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3380 s->ovalue = mode->mode;
3381 statistic_destroy (&mode->parent.parent);
3389 struct ctables_cell_sort_aux
3391 const struct ctables_nest *nest;
3392 enum pivot_axis_type a;
3396 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3398 const struct ctables_cell_sort_aux *aux = aux_;
3399 struct ctables_cell *const *ap = a_;
3400 struct ctables_cell *const *bp = b_;
3401 const struct ctables_cell *a = *ap;
3402 const struct ctables_cell *b = *bp;
3404 const struct ctables_nest *nest = aux->nest;
3405 for (size_t i = 0; i < nest->n; i++)
3406 if (i != nest->scale_idx)
3408 const struct variable *var = nest->vars[i];
3409 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3410 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3411 if (a_cv->category != b_cv->category)
3412 return a_cv->category > b_cv->category ? 1 : -1;
3414 const union value *a_val = &a_cv->value;
3415 const union value *b_val = &b_cv->value;
3416 switch (a_cv->category->type)
3422 case CCT_POSTCOMPUTE:
3423 case CCT_EXCLUDED_MISSING:
3424 /* Must be equal. */
3432 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3440 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3442 return a_cv->category->sort_ascending ? cmp : -cmp;
3448 const char *a_label = var_lookup_value_label (var, a_val);
3449 const char *b_label = var_lookup_value_label (var, b_val);
3451 ? (b_label ? strcmp (a_label, b_label) : 1)
3452 : (b_label ? -1 : value_compare_3way (
3453 a_val, b_val, var_get_width (var))));
3455 return a_cv->category->sort_ascending ? cmp : -cmp;
3469 For each ctables_table:
3470 For each combination of row vars:
3471 For each combination of column vars:
3472 For each combination of layer vars:
3474 Make a table of row values:
3475 Sort entries by row values
3476 Assign a 0-based index to each actual value
3477 Construct a dimension
3478 Make a table of column values
3479 Make a table of layer values
3481 Fill the table entry using the indexes from before.
3484 static struct ctables_domain *
3485 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3486 enum ctables_domain_type domain)
3489 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3491 const struct ctables_nest *nest = s->nests[a];
3492 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3494 size_t v_idx = nest->domains[domain][i];
3495 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3496 hash = hash_pointer (cv->category, hash);
3497 if (cv->category->type != CCT_TOTAL
3498 && cv->category->type != CCT_SUBTOTAL
3499 && cv->category->type != CCT_POSTCOMPUTE)
3500 hash = value_hash (&cv->value,
3501 var_get_width (nest->vars[v_idx]), hash);
3505 struct ctables_domain *d;
3506 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3508 const struct ctables_cell *df = d->example;
3509 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3511 const struct ctables_nest *nest = s->nests[a];
3512 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3514 size_t v_idx = nest->domains[domain][i];
3515 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3516 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3517 if (cv1->category != cv2->category
3518 || (cv1->category->type != CCT_TOTAL
3519 && cv1->category->type != CCT_SUBTOTAL
3520 && cv1->category->type != CCT_POSTCOMPUTE
3521 && !value_equal (&cv1->value, &cv2->value,
3522 var_get_width (nest->vars[v_idx]))))
3531 struct ctables_sum *sums = (s->table->n_sum_vars
3532 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3535 d = xmalloc (sizeof *d);
3536 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3537 hmap_insert (&s->domains[domain], &d->node, hash);
3541 static struct substring
3542 rtrim_value (const union value *v, const struct variable *var)
3544 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3545 var_get_width (var));
3546 ss_rtrim (&s, ss_cstr (" "));
3551 in_string_range (const union value *v, const struct variable *var,
3552 const struct substring *srange)
3554 struct substring s = rtrim_value (v, var);
3555 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3556 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3559 static const struct ctables_category *
3560 ctables_categories_match (const struct ctables_categories *c,
3561 const union value *v, const struct variable *var)
3563 if (var_is_numeric (var) && v->f == SYSMIS)
3566 const struct ctables_category *othernm = NULL;
3567 for (size_t i = c->n_cats; i-- > 0; )
3569 const struct ctables_category *cat = &c->cats[i];
3573 if (cat->number == v->f)
3578 if (ss_equals (cat->string, rtrim_value (v, var)))
3583 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3584 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3589 if (in_string_range (v, var, cat->srange))
3594 if (var_is_value_missing (var, v))
3598 case CCT_POSTCOMPUTE:
3613 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3616 case CCT_EXCLUDED_MISSING:
3621 return var_is_value_missing (var, v) ? NULL : othernm;
3624 static const struct ctables_category *
3625 ctables_categories_total (const struct ctables_categories *c)
3627 const struct ctables_category *first = &c->cats[0];
3628 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3629 return (first->type == CCT_TOTAL ? first
3630 : last->type == CCT_TOTAL ? last
3634 static struct ctables_cell *
3635 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3636 const struct ctables_category *cats[PIVOT_N_AXES][10])
3639 enum ctables_summary_variant sv = CSV_CELL;
3640 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3642 const struct ctables_nest *nest = s->nests[a];
3643 for (size_t i = 0; i < nest->n; i++)
3644 if (i != nest->scale_idx)
3646 hash = hash_pointer (cats[a][i], hash);
3647 if (cats[a][i]->type != CCT_TOTAL
3648 && cats[a][i]->type != CCT_SUBTOTAL
3649 && cats[a][i]->type != CCT_POSTCOMPUTE)
3650 hash = value_hash (case_data (c, nest->vars[i]),
3651 var_get_width (nest->vars[i]), hash);
3657 struct ctables_cell *cell;
3658 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3660 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3662 const struct ctables_nest *nest = s->nests[a];
3663 for (size_t i = 0; i < nest->n; i++)
3664 if (i != nest->scale_idx
3665 && (cats[a][i] != cell->axes[a].cvs[i].category
3666 || (cats[a][i]->type != CCT_TOTAL
3667 && cats[a][i]->type != CCT_SUBTOTAL
3668 && cats[a][i]->type != CCT_POSTCOMPUTE
3669 && !value_equal (case_data (c, nest->vars[i]),
3670 &cell->axes[a].cvs[i].value,
3671 var_get_width (nest->vars[i])))))
3680 cell = xmalloc (sizeof *cell);
3683 cell->omit_domains = 0;
3684 cell->postcompute = false;
3685 //struct string name = DS_EMPTY_INITIALIZER;
3686 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3688 const struct ctables_nest *nest = s->nests[a];
3689 cell->axes[a].cvs = (nest->n
3690 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3692 for (size_t i = 0; i < nest->n; i++)
3694 const struct ctables_category *cat = cats[a][i];
3695 const struct variable *var = nest->vars[i];
3696 const union value *value = case_data (c, var);
3697 if (i != nest->scale_idx)
3699 const struct ctables_category *subtotal = cat->subtotal;
3700 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3703 if (cat->type == CCT_TOTAL
3704 || cat->type == CCT_SUBTOTAL
3705 || cat->type == CCT_POSTCOMPUTE)
3707 /* XXX these should be more encompassing I think.*/
3711 case PIVOT_AXIS_COLUMN:
3712 cell->omit_domains |= ((1u << CTDT_TABLE) |
3713 (1u << CTDT_LAYER) |
3714 (1u << CTDT_LAYERCOL) |
3715 (1u << CTDT_SUBTABLE) |
3718 case PIVOT_AXIS_ROW:
3719 cell->omit_domains |= ((1u << CTDT_TABLE) |
3720 (1u << CTDT_LAYER) |
3721 (1u << CTDT_LAYERROW) |
3722 (1u << CTDT_SUBTABLE) |
3725 case PIVOT_AXIS_LAYER:
3726 cell->omit_domains |= ((1u << CTDT_TABLE) |
3727 (1u << CTDT_LAYER));
3731 if (cat->type == CCT_POSTCOMPUTE)
3732 cell->postcompute = true;
3735 cell->axes[a].cvs[i].category = cat;
3736 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3739 if (i != nest->scale_idx)
3741 if (!ds_is_empty (&name))
3742 ds_put_cstr (&name, ", ");
3743 char *value_s = data_out (value, var_get_encoding (var),
3744 var_get_print_format (var),
3745 settings_get_fmt_settings ());
3746 if (cat->type == CCT_TOTAL
3747 || cat->type == CCT_SUBTOTAL
3748 || cat->type == CCT_POSTCOMPUTE)
3749 ds_put_format (&name, "%s=total", var_get_name (var));
3751 ds_put_format (&name, "%s=%s", var_get_name (var),
3752 value_s + strspn (value_s, " "));
3758 //cell->name = ds_steal_cstr (&name);
3760 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3761 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3762 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3763 for (size_t i = 0; i < specs->n; i++)
3764 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3765 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3766 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3767 hmap_insert (&s->cells, &cell->node, hash);
3772 is_scale_missing (const struct ctables_summary_spec_set *specs,
3773 const struct ccase *c)
3775 if (!specs->is_scale)
3778 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3781 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3783 const struct variable *var = specs->listwise_vars[i];
3784 if (var_is_num_missing (var, case_num (c, var)))
3792 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3793 const struct ctables_category *cats[PIVOT_N_AXES][10],
3794 bool is_missing, bool excluded_missing,
3795 double d_weight, double e_weight)
3797 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3798 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3800 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3802 bool scale_missing = is_scale_missing (specs, c);
3803 for (size_t i = 0; i < specs->n; i++)
3804 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3805 specs->var, case_data (c, specs->var), specs->is_scale,
3806 scale_missing, is_missing, excluded_missing,
3807 d_weight, e_weight);
3808 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3809 if (!(cell->omit_domains && (1u << dt)))
3811 struct ctables_domain *d = cell->domains[dt];
3812 d->d_total += d_weight;
3813 d->e_total += e_weight;
3815 if (!excluded_missing)
3817 d->d_count += d_weight;
3818 d->e_count += e_weight;
3823 d->d_valid += d_weight;
3824 d->e_valid += e_weight;
3827 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3829 /* XXX listwise_missing??? */
3830 const struct variable *var = s->table->sum_vars[i];
3831 double addend = case_num (c, var);
3832 if (!var_is_num_missing (var, addend))
3834 struct ctables_sum *sum = &d->sums[i];
3835 sum->e_sum += addend * e_weight;
3836 sum->u_sum += addend;
3844 recurse_totals (struct ctables_section *s, const struct ccase *c,
3845 const struct ctables_category *cats[PIVOT_N_AXES][10],
3846 bool is_missing, bool excluded_missing,
3847 double d_weight, double e_weight,
3848 enum pivot_axis_type start_axis, size_t start_nest)
3850 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3852 const struct ctables_nest *nest = s->nests[a];
3853 for (size_t i = start_nest; i < nest->n; i++)
3855 if (i == nest->scale_idx)
3858 const struct variable *var = nest->vars[i];
3860 const struct ctables_category *total = ctables_categories_total (
3861 s->table->categories[var_get_dict_index (var)]);
3864 const struct ctables_category *save = cats[a][i];
3866 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3867 d_weight, e_weight);
3868 recurse_totals (s, c, cats, is_missing, excluded_missing,
3869 d_weight, e_weight, a, i + 1);
3878 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3879 const struct ctables_category *cats[PIVOT_N_AXES][10],
3880 bool is_missing, bool excluded_missing,
3881 double d_weight, double e_weight,
3882 enum pivot_axis_type start_axis, size_t start_nest)
3884 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3886 const struct ctables_nest *nest = s->nests[a];
3887 for (size_t i = start_nest; i < nest->n; i++)
3889 if (i == nest->scale_idx)
3892 const struct ctables_category *save = cats[a][i];
3895 cats[a][i] = save->subtotal;
3896 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3897 d_weight, e_weight);
3898 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3899 d_weight, e_weight, a, i + 1);
3908 ctables_add_occurrence (const struct variable *var,
3909 const union value *value,
3910 struct hmap *occurrences)
3912 int width = var_get_width (var);
3913 unsigned int hash = value_hash (value, width, 0);
3915 struct ctables_occurrence *o;
3916 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3918 if (value_equal (value, &o->value, width))
3921 o = xmalloc (sizeof *o);
3922 value_clone (&o->value, value, width);
3923 hmap_insert (occurrences, &o->node, hash);
3927 ctables_cell_insert (struct ctables_section *s,
3928 const struct ccase *c,
3929 double d_weight, double e_weight)
3931 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3933 /* Does at least one categorical variable have a missing value in an included
3934 or excluded category? */
3935 bool is_missing = false;
3937 /* Does at least one categorical variable have a missing value in an excluded
3939 bool excluded_missing = false;
3941 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3943 const struct ctables_nest *nest = s->nests[a];
3944 for (size_t i = 0; i < nest->n; i++)
3946 if (i == nest->scale_idx)
3949 const struct variable *var = nest->vars[i];
3950 const union value *value = case_data (c, var);
3952 bool var_missing = var_is_value_missing (var, value) != 0;
3956 cats[a][i] = ctables_categories_match (
3957 s->table->categories[var_get_dict_index (var)], value, var);
3963 static const struct ctables_category cct_excluded_missing = {
3964 .type = CCT_EXCLUDED_MISSING,
3967 cats[a][i] = &cct_excluded_missing;
3968 excluded_missing = true;
3973 if (!excluded_missing)
3974 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3976 const struct ctables_nest *nest = s->nests[a];
3977 for (size_t i = 0; i < nest->n; i++)
3978 if (i != nest->scale_idx)
3980 const struct variable *var = nest->vars[i];
3981 const union value *value = case_data (c, var);
3982 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3986 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3987 d_weight, e_weight);
3989 //if (!excluded_missing)
3991 recurse_totals (s, c, cats, is_missing, excluded_missing,
3992 d_weight, e_weight, 0, 0);
3993 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3994 d_weight, e_weight, 0, 0);
4000 const struct ctables_summary_spec_set *set;
4005 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
4007 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
4008 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
4009 if (as->function != bs->function)
4010 return as->function > bs->function ? 1 : -1;
4011 else if (as->percentile != bs->percentile)
4012 return as->percentile < bs->percentile ? 1 : -1;
4014 const char *as_label = as->label ? as->label : "";
4015 const char *bs_label = bs->label ? bs->label : "";
4016 return strcmp (as_label, bs_label);
4019 static struct pivot_value *
4020 ctables_category_create_label__ (const struct ctables_category *cat,
4021 const struct variable *var,
4022 const union value *value)
4024 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
4025 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
4026 : pivot_value_new_var_value (var, value));
4029 static struct pivot_value *
4030 ctables_postcompute_label (const struct ctables_categories *cats,
4031 const struct ctables_category *cat,
4032 const struct variable *var,
4033 const union value *value)
4035 struct substring in = ss_cstr (cat->pc->label);
4036 struct substring target = ss_cstr (")LABEL[");
4038 struct string out = DS_EMPTY_INITIALIZER;
4041 size_t chunk = ss_find_substring (in, target);
4042 if (chunk == SIZE_MAX)
4044 if (ds_is_empty (&out))
4045 return pivot_value_new_user_text (in.string, in.length);
4048 ds_put_substring (&out, in);
4049 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
4053 ds_put_substring (&out, ss_head (in, chunk));
4054 ss_advance (&in, chunk + target.length);
4056 struct substring idx_s;
4057 if (!ss_get_until (&in, ']', &idx_s))
4060 long int idx = strtol (idx_s.string, &tail, 10);
4061 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
4064 struct ctables_category *cat2 = &cats->cats[idx - 1];
4065 struct pivot_value *label2
4066 = ctables_category_create_label__ (cat2, var, value);
4067 char *label2_s = pivot_value_to_string_defaults (label2);
4068 ds_put_cstr (&out, label2_s);
4070 pivot_value_destroy (label2);
4075 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
4078 static struct pivot_value *
4079 ctables_category_create_label (const struct ctables_categories *cats,
4080 const struct ctables_category *cat,
4081 const struct variable *var,
4082 const union value *value)
4084 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4085 ? ctables_postcompute_label (cats, cat, var, value)
4086 : ctables_category_create_label__ (cat, var, value));
4089 static struct ctables_value *
4090 ctables_value_find__ (struct ctables_table *t, const union value *value,
4091 int width, unsigned int hash)
4093 struct ctables_value *clv;
4094 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4095 hash, &t->clabels_values_map)
4096 if (value_equal (value, &clv->value, width))
4102 ctables_value_insert (struct ctables_table *t, const union value *value,
4105 unsigned int hash = value_hash (value, width, 0);
4106 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4109 clv = xmalloc (sizeof *clv);
4110 value_clone (&clv->value, value, width);
4111 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4115 static struct ctables_value *
4116 ctables_value_find (struct ctables_table *t,
4117 const union value *value, int width)
4119 return ctables_value_find__ (t, value, width,
4120 value_hash (value, width, 0));
4124 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4125 size_t ix[PIVOT_N_AXES])
4127 if (a < PIVOT_N_AXES)
4129 size_t limit = MAX (t->stacks[a].n, 1);
4130 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4131 ctables_table_add_section (t, a + 1, ix);
4135 struct ctables_section *s = &t->sections[t->n_sections++];
4136 *s = (struct ctables_section) {
4138 .cells = HMAP_INITIALIZER (s->cells),
4140 for (a = 0; a < PIVOT_N_AXES; a++)
4143 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4145 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4146 for (size_t i = 0; i < nest->n; i++)
4147 hmap_init (&s->occurrences[a][i]);
4149 for (size_t i = 0; i < N_CTDTS; i++)
4150 hmap_init (&s->domains[i]);
4155 ctpo_add (double a, double b)
4161 ctpo_sub (double a, double b)
4167 ctpo_mul (double a, double b)
4173 ctpo_div (double a, double b)
4175 return b ? a / b : SYSMIS;
4179 ctpo_pow (double a, double b)
4181 int save_errno = errno;
4183 double result = pow (a, b);
4191 ctpo_neg (double a, double b UNUSED)
4196 struct ctables_pcexpr_evaluate_ctx
4198 const struct ctables_cell *cell;
4199 const struct ctables_section *section;
4200 const struct ctables_categories *cats;
4201 enum pivot_axis_type pc_a;
4204 enum fmt_type parse_format;
4207 static double ctables_pcexpr_evaluate (
4208 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4211 ctables_pcexpr_evaluate_nonterminal (
4212 const struct ctables_pcexpr_evaluate_ctx *ctx,
4213 const struct ctables_pcexpr *e, size_t n_args,
4214 double evaluate (double, double))
4216 double args[2] = { 0, 0 };
4217 for (size_t i = 0; i < n_args; i++)
4219 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4220 if (!isfinite (args[i]) || args[i] == SYSMIS)
4223 return evaluate (args[0], args[1]);
4227 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4228 const struct ctables_cell_value *pc_cv)
4230 const struct ctables_section *s = ctx->section;
4233 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4235 const struct ctables_nest *nest = s->nests[a];
4236 for (size_t i = 0; i < nest->n; i++)
4237 if (i != nest->scale_idx)
4239 const struct ctables_cell_value *cv
4240 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4241 : &ctx->cell->axes[a].cvs[i]);
4242 hash = hash_pointer (cv->category, hash);
4243 if (cv->category->type != CCT_TOTAL
4244 && cv->category->type != CCT_SUBTOTAL
4245 && cv->category->type != CCT_POSTCOMPUTE)
4246 hash = value_hash (&cv->value,
4247 var_get_width (nest->vars[i]), hash);
4251 struct ctables_cell *tc;
4252 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4254 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4256 const struct ctables_nest *nest = s->nests[a];
4257 for (size_t i = 0; i < nest->n; i++)
4258 if (i != nest->scale_idx)
4260 const struct ctables_cell_value *p_cv
4261 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4262 : &ctx->cell->axes[a].cvs[i]);
4263 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4264 if (p_cv->category != t_cv->category
4265 || (p_cv->category->type != CCT_TOTAL
4266 && p_cv->category->type != CCT_SUBTOTAL
4267 && p_cv->category->type != CCT_POSTCOMPUTE
4268 && !value_equal (&p_cv->value,
4270 var_get_width (nest->vars[i]))))
4282 const struct ctables_table *t = s->table;
4283 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4284 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4285 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4286 &specs->specs[ctx->summary_idx]);
4290 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4291 const struct ctables_pcexpr *e)
4298 case CTPO_CAT_NRANGE:
4299 case CTPO_CAT_SRANGE:
4301 struct ctables_cell_value cv = {
4302 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4304 assert (cv.category != NULL);
4306 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4307 const struct ctables_occurrence *o;
4310 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4311 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4312 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4314 cv.value = o->value;
4315 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4320 case CTPO_CAT_NUMBER:
4321 case CTPO_CAT_MISSING:
4322 case CTPO_CAT_OTHERNM:
4323 case CTPO_CAT_SUBTOTAL:
4324 case CTPO_CAT_TOTAL:
4326 struct ctables_cell_value cv = {
4327 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4328 .value = { .f = e->number },
4330 assert (cv.category != NULL);
4331 return ctables_pcexpr_evaluate_category (ctx, &cv);
4334 case CTPO_CAT_STRING:
4336 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4338 if (width > e->string.length)
4340 s = xmalloc (width);
4341 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4343 struct ctables_cell_value cv = {
4344 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4345 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4347 assert (cv.category != NULL);
4348 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4354 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4357 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4360 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4363 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4366 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4369 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4375 static const struct ctables_category *
4376 ctables_cell_postcompute (const struct ctables_section *s,
4377 const struct ctables_cell *cell,
4378 enum pivot_axis_type *pc_a_p,
4381 assert (cell->postcompute);
4382 const struct ctables_category *pc_cat = NULL;
4383 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4384 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4386 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4387 if (cv->category->type == CCT_POSTCOMPUTE)
4391 /* Multiple postcomputes cross each other. The value is
4396 pc_cat = cv->category;
4400 *pc_a_idx_p = pc_a_idx;
4404 assert (pc_cat != NULL);
4409 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4410 const struct ctables_cell *cell,
4411 const struct ctables_summary_spec *ss,
4412 struct fmt_spec *format,
4413 bool *is_ctables_format,
4416 enum pivot_axis_type pc_a = 0;
4417 size_t pc_a_idx = 0;
4418 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4419 s, cell, &pc_a, &pc_a_idx);
4423 const struct ctables_postcompute *pc = pc_cat->pc;
4426 for (size_t i = 0; i < pc->specs->n; i++)
4428 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4429 if (ss->function == ss2->function
4430 && ss->percentile == ss2->percentile)
4432 *format = ss2->format;
4433 *is_ctables_format = ss2->is_ctables_format;
4439 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4440 const struct ctables_categories *cats = s->table->categories[
4441 var_get_dict_index (var)];
4442 struct ctables_pcexpr_evaluate_ctx ctx = {
4447 .pc_a_idx = pc_a_idx,
4448 .summary_idx = summary_idx,
4449 .parse_format = pc_cat->parse_format,
4451 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4455 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4457 struct pivot_table *pt = pivot_table_create__ (
4459 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4460 : pivot_value_new_text (N_("Custom Tables"))),
4463 pivot_table_set_caption (
4464 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4466 pivot_table_set_corner_text (
4467 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4469 bool summary_dimension = (t->summary_axis != t->slabels_axis
4470 || (!t->slabels_visible
4471 && t->summary_specs.n > 1));
4472 if (summary_dimension)
4474 struct pivot_dimension *d = pivot_dimension_create (
4475 pt, t->slabels_axis, N_("Statistics"));
4476 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4477 if (!t->slabels_visible)
4478 d->hide_all_labels = true;
4479 for (size_t i = 0; i < specs->n; i++)
4480 pivot_category_create_leaf (
4481 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4484 bool categories_dimension = t->clabels_example != NULL;
4485 if (categories_dimension)
4487 struct pivot_dimension *d = pivot_dimension_create (
4488 pt, t->label_axis[t->clabels_from_axis],
4489 t->clabels_from_axis == PIVOT_AXIS_ROW
4490 ? N_("Row Categories")
4491 : N_("Column Categories"));
4492 const struct variable *var = t->clabels_example;
4493 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4494 for (size_t i = 0; i < t->n_clabels_values; i++)
4496 const struct ctables_value *value = t->clabels_values[i];
4497 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4498 assert (cat != NULL);
4499 pivot_category_create_leaf (d->root, ctables_category_create_label (
4500 c, cat, t->clabels_example,
4505 pivot_table_set_look (pt, ct->look);
4506 struct pivot_dimension *d[PIVOT_N_AXES];
4507 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4509 static const char *names[] = {
4510 [PIVOT_AXIS_ROW] = N_("Rows"),
4511 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4512 [PIVOT_AXIS_LAYER] = N_("Layers"),
4514 d[a] = (t->axes[a] || a == t->summary_axis
4515 ? pivot_dimension_create (pt, a, names[a])
4520 assert (t->axes[a]);
4522 for (size_t i = 0; i < t->stacks[a].n; i++)
4524 struct ctables_nest *nest = &t->stacks[a].nests[i];
4525 struct ctables_section **sections = xnmalloc (t->n_sections,
4527 size_t n_sections = 0;
4529 size_t n_total_cells = 0;
4530 size_t max_depth = 0;
4531 for (size_t j = 0; j < t->n_sections; j++)
4532 if (t->sections[j].nests[a] == nest)
4534 struct ctables_section *s = &t->sections[j];
4535 sections[n_sections++] = s;
4536 n_total_cells += s->cells.count;
4538 size_t depth = s->nests[a]->n;
4539 max_depth = MAX (depth, max_depth);
4542 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4544 size_t n_sorted = 0;
4546 for (size_t j = 0; j < n_sections; j++)
4548 struct ctables_section *s = sections[j];
4550 struct ctables_cell *cell;
4551 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4553 sorted[n_sorted++] = cell;
4554 assert (n_sorted <= n_total_cells);
4557 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4558 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4561 for (size_t j = 0; j < n_sorted; j++)
4563 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4568 struct ctables_level
4570 enum ctables_level_type
4572 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4573 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4574 CTL_SUMMARY, /* Summary functions. */
4578 enum settings_value_show vlabel; /* CTL_VAR only. */
4581 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4582 size_t n_levels = 0;
4583 for (size_t k = 0; k < nest->n; k++)
4585 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4586 if (vlabel != CTVL_NONE)
4588 levels[n_levels++] = (struct ctables_level) {
4590 .vlabel = (enum settings_value_show) vlabel,
4595 if (nest->scale_idx != k
4596 && (k != nest->n - 1 || t->label_axis[a] == a))
4598 levels[n_levels++] = (struct ctables_level) {
4599 .type = CTL_CATEGORY,
4605 if (!summary_dimension && a == t->slabels_axis)
4607 levels[n_levels++] = (struct ctables_level) {
4608 .type = CTL_SUMMARY,
4609 .var_idx = SIZE_MAX,
4613 /* Pivot categories:
4615 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4616 - category for nest->vars[0], if nest->scale_idx != 0
4617 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4618 - category for nest->vars[1], if nest->scale_idx != 1
4620 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4621 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4622 - summary function, if 'a == t->slabels_axis && a ==
4625 Additional dimensions:
4627 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4629 - If 't->label_axis[b] == a' for some 'b != a', add a category
4634 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4636 for (size_t j = 0; j < n_sorted; j++)
4638 struct ctables_cell *cell = sorted[j];
4639 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4641 size_t n_common = 0;
4644 for (; n_common < n_levels; n_common++)
4646 const struct ctables_level *level = &levels[n_common];
4647 if (level->type == CTL_CATEGORY)
4649 size_t var_idx = level->var_idx;
4650 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4651 if (prev->axes[a].cvs[var_idx].category != c)
4653 else if (c->type != CCT_SUBTOTAL
4654 && c->type != CCT_TOTAL
4655 && c->type != CCT_POSTCOMPUTE
4656 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4657 &cell->axes[a].cvs[var_idx].value,
4658 var_get_type (nest->vars[var_idx])))
4664 for (size_t k = n_common; k < n_levels; k++)
4666 const struct ctables_level *level = &levels[k];
4667 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4668 if (level->type == CTL_SUMMARY)
4670 assert (k == n_levels - 1);
4672 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4673 for (size_t m = 0; m < specs->n; m++)
4675 int leaf = pivot_category_create_leaf (
4676 parent, ctables_summary_label (&specs->specs[m],
4684 const struct variable *var = nest->vars[level->var_idx];
4685 struct pivot_value *label;
4686 if (level->type == CTL_VAR)
4688 label = pivot_value_new_variable (var);
4689 label->variable.show = level->vlabel;
4691 else if (level->type == CTL_CATEGORY)
4693 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4694 label = ctables_category_create_label (
4695 t->categories[var_get_dict_index (var)],
4696 cv->category, var, &cv->value);
4701 if (k == n_levels - 1)
4702 prev_leaf = pivot_category_create_leaf (parent, label);
4704 groups[k] = pivot_category_create_group__ (parent, label);
4708 cell->axes[a].leaf = prev_leaf;
4717 for (size_t i = 0; i < t->n_sections; i++)
4719 struct ctables_section *s = &t->sections[i];
4721 struct ctables_cell *cell;
4722 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4727 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4728 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4729 for (size_t j = 0; j < specs->n; j++)
4732 size_t n_dindexes = 0;
4734 if (summary_dimension)
4735 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4737 if (categories_dimension)
4739 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4740 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4741 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4742 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4745 dindexes[n_dindexes++] = ctv->leaf;
4748 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4751 int leaf = cell->axes[a].leaf;
4752 if (a == t->summary_axis && !summary_dimension)
4754 dindexes[n_dindexes++] = leaf;
4757 const struct ctables_summary_spec *ss = &specs->specs[j];
4759 struct fmt_spec format = specs->specs[j].format;
4760 bool is_ctables_format = ss->is_ctables_format;
4761 double d = (cell->postcompute
4762 ? ctables_cell_calculate_postcompute (
4763 s, cell, ss, &format, &is_ctables_format, j)
4764 : ctables_summary_value (cell, &cell->summaries[j],
4767 struct pivot_value *value;
4768 if (ct->hide_threshold != 0
4769 && d < ct->hide_threshold
4770 && ctables_summary_function_is_count (ss->function))
4772 value = pivot_value_new_user_text_nocopy (
4773 xasprintf ("<%d", ct->hide_threshold));
4775 else if (d == 0 && ct->zero)
4776 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4777 else if (d == SYSMIS && ct->missing)
4778 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4779 else if (is_ctables_format)
4781 char *s = data_out_stretchy (&(union value) { .f = d },
4783 &ct->ctables_formats, NULL);
4784 value = pivot_value_new_user_text_nocopy (s);
4788 value = pivot_value_new_number (d);
4789 value->numeric.format = format;
4791 pivot_table_put (pt, dindexes, n_dindexes, value);
4796 pivot_table_submit (pt);
4800 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4802 enum pivot_axis_type label_pos = t->label_axis[a];
4806 t->clabels_from_axis = a;
4808 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4809 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4811 const struct ctables_stack *stack = &t->stacks[a];
4815 const struct ctables_nest *n0 = &stack->nests[0];
4818 assert (stack->n == 1);
4822 const struct variable *v0 = n0->vars[n0->n - 1];
4823 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4824 t->clabels_example = v0;
4826 for (size_t i = 0; i < c0->n_cats; i++)
4827 if (c0->cats[i].type == CCT_FUNCTION)
4829 msg (SE, _("%s=%s is not allowed with sorting based "
4830 "on a summary function."),
4831 subcommand_name, pos_name);
4834 if (n0->n - 1 == n0->scale_idx)
4836 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4837 "but %s is a scale variable."),
4838 subcommand_name, pos_name, var_get_name (v0));
4842 for (size_t i = 1; i < stack->n; i++)
4844 const struct ctables_nest *ni = &stack->nests[i];
4846 const struct variable *vi = ni->vars[ni->n - 1];
4847 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4849 if (ni->n - 1 == ni->scale_idx)
4851 msg (SE, _("%s=%s requires the variables to be moved to be "
4852 "categorical, but %s is a scale variable."),
4853 subcommand_name, pos_name, var_get_name (vi));
4856 if (var_get_width (v0) != var_get_width (vi))
4858 msg (SE, _("%s=%s requires the variables to be "
4859 "moved to have the same width, but %s has "
4860 "width %d and %s has width %d."),
4861 subcommand_name, pos_name,
4862 var_get_name (v0), var_get_width (v0),
4863 var_get_name (vi), var_get_width (vi));
4866 if (!val_labs_equal (var_get_value_labels (v0),
4867 var_get_value_labels (vi)))
4869 msg (SE, _("%s=%s requires the variables to be "
4870 "moved to have the same value labels, but %s "
4871 "and %s have different value labels."),
4872 subcommand_name, pos_name,
4873 var_get_name (v0), var_get_name (vi));
4876 if (!ctables_categories_equal (c0, ci))
4878 msg (SE, _("%s=%s requires the variables to be "
4879 "moved to have the same category "
4880 "specifications, but %s and %s have different "
4881 "category specifications."),
4882 subcommand_name, pos_name,
4883 var_get_name (v0), var_get_name (vi));
4892 add_sum_var (struct variable *var,
4893 struct variable ***sum_vars, size_t *n, size_t *allocated)
4895 for (size_t i = 0; i < *n; i++)
4896 if (var == (*sum_vars)[i])
4899 if (*n >= *allocated)
4900 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4901 (*sum_vars)[*n] = var;
4906 enumerate_sum_vars (const struct ctables_axis *a,
4907 struct variable ***sum_vars, size_t *n, size_t *allocated)
4915 for (size_t i = 0; i < N_CSVS; i++)
4916 for (size_t j = 0; j < a->specs[i].n; j++)
4918 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4919 if (ctables_function_is_pctsum (spec->function))
4920 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4926 for (size_t i = 0; i < 2; i++)
4927 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4933 ctables_prepare_table (struct ctables_table *t)
4935 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4938 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4940 for (size_t j = 0; j < t->stacks[a].n; j++)
4942 struct ctables_nest *nest = &t->stacks[a].nests[j];
4943 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4945 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4946 nest->n_domains[dt] = 0;
4948 for (size_t k = 0; k < nest->n; k++)
4950 if (k == nest->scale_idx)
4959 if (a != PIVOT_AXIS_LAYER)
4966 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4967 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4968 : a == PIVOT_AXIS_ROW)
4970 if (k == nest->n - 1
4971 || (nest->scale_idx == nest->n - 1
4972 && k == nest->n - 2))
4978 if (a == PIVOT_AXIS_COLUMN)
4983 if (a == PIVOT_AXIS_ROW)
4988 nest->domains[dt][nest->n_domains[dt]++] = k;
4995 struct ctables_nest *nest = xmalloc (sizeof *nest);
4996 *nest = (struct ctables_nest) { .n = 0 };
4997 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4999 /* There's no point in moving labels away from an axis that has no
5000 labels, so avoid dealing with the special cases around that. */
5001 t->label_axis[a] = a;
5004 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5005 for (size_t i = 0; i < stack->n; i++)
5007 struct ctables_nest *nest = &stack->nests[i];
5008 if (!nest->specs[CSV_CELL].n)
5010 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
5011 specs->specs = xmalloc (sizeof *specs->specs);
5014 enum ctables_summary_function function
5015 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
5017 *specs->specs = (struct ctables_summary_spec) {
5018 .function = function,
5019 .format = ctables_summary_default_format (function, specs->var),
5022 specs->var = nest->vars[0];
5024 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5025 &nest->specs[CSV_CELL]);
5027 else if (!nest->specs[CSV_TOTAL].n)
5028 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5029 &nest->specs[CSV_CELL]);
5031 if (t->ctables->smissing_listwise)
5033 struct variable **listwise_vars = NULL;
5035 size_t allocated = 0;
5037 for (size_t j = nest->group_head; j < stack->n; j++)
5039 const struct ctables_nest *other_nest = &stack->nests[j];
5040 if (other_nest->group_head != nest->group_head)
5043 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5046 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5047 sizeof *listwise_vars);
5048 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5051 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5054 listwise_vars = xmemdup (listwise_vars,
5055 n * sizeof *listwise_vars);
5056 nest->specs[sv].listwise_vars = listwise_vars;
5057 nest->specs[sv].n_listwise_vars = n;
5062 struct ctables_summary_spec_set *merged = &t->summary_specs;
5063 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5065 for (size_t j = 0; j < stack->n; j++)
5067 const struct ctables_nest *nest = &stack->nests[j];
5069 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5070 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5075 struct merge_item min = items[0];
5076 for (size_t j = 1; j < n_left; j++)
5077 if (merge_item_compare_3way (&items[j], &min) < 0)
5080 if (merged->n >= merged->allocated)
5081 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5082 sizeof *merged->specs);
5083 merged->specs[merged->n++] = min.set->specs[min.ofs];
5085 for (size_t j = 0; j < n_left; )
5087 if (merge_item_compare_3way (&items[j], &min) == 0)
5089 struct merge_item *item = &items[j];
5090 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5091 if (++item->ofs >= item->set->n)
5093 items[j] = items[--n_left];
5103 for (size_t j = 0; j < merged->n; j++)
5104 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5106 for (size_t j = 0; j < stack->n; j++)
5108 const struct ctables_nest *nest = &stack->nests[j];
5109 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5111 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5112 for (size_t k = 0; k < specs->n; k++)
5113 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5114 specs->specs[k].axis_idx);
5120 size_t allocated_sum_vars = 0;
5121 enumerate_sum_vars (t->axes[t->summary_axis],
5122 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5124 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5125 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5129 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5130 enum pivot_axis_type a)
5132 struct ctables_stack *stack = &t->stacks[a];
5133 for (size_t i = 0; i < stack->n; i++)
5135 const struct ctables_nest *nest = &stack->nests[i];
5136 const struct variable *var = nest->vars[nest->n - 1];
5137 const union value *value = case_data (c, var);
5139 if (var_is_numeric (var) && value->f == SYSMIS)
5142 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5144 ctables_value_insert (t, value, var_get_width (var));
5149 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5151 const struct ctables_value *const *ap = a_;
5152 const struct ctables_value *const *bp = b_;
5153 const struct ctables_value *a = *ap;
5154 const struct ctables_value *b = *bp;
5155 const int *width = width_;
5156 return value_compare_3way (&a->value, &b->value, *width);
5160 ctables_sort_clabels_values (struct ctables_table *t)
5162 const struct variable *v0 = t->clabels_example;
5163 int width = var_get_width (v0);
5165 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5168 const struct val_labs *val_labs = var_get_value_labels (v0);
5169 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5170 vl = val_labs_next (val_labs, vl))
5171 if (ctables_categories_match (c0, &vl->value, v0))
5172 ctables_value_insert (t, &vl->value, width);
5175 size_t n = hmap_count (&t->clabels_values_map);
5176 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5178 struct ctables_value *clv;
5180 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5181 t->clabels_values[i++] = clv;
5182 t->n_clabels_values = n;
5185 sort (t->clabels_values, n, sizeof *t->clabels_values,
5186 compare_clabels_values_3way, &width);
5188 for (size_t i = 0; i < n; i++)
5189 t->clabels_values[i]->leaf = i;
5193 ctables_add_category_occurrences (const struct variable *var,
5194 struct hmap *occurrences,
5195 const struct ctables_categories *cats)
5197 const struct val_labs *val_labs = var_get_value_labels (var);
5199 for (size_t i = 0; i < cats->n_cats; i++)
5201 const struct ctables_category *c = &cats->cats[i];
5205 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5211 int width = var_get_width (var);
5213 value_init (&value, width);
5214 value_copy_buf_rpad (&value, width,
5215 CHAR_CAST (uint8_t *, c->string.string),
5216 c->string.length, ' ');
5217 ctables_add_occurrence (var, &value, occurrences);
5218 value_destroy (&value, width);
5223 assert (var_is_numeric (var));
5224 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5225 vl = val_labs_next (val_labs, vl))
5226 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5227 ctables_add_occurrence (var, &vl->value, occurrences);
5231 assert (var_is_alpha (var));
5232 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5233 vl = val_labs_next (val_labs, vl))
5234 if (in_string_range (&vl->value, var, c->srange))
5235 ctables_add_occurrence (var, &vl->value, occurrences);
5239 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5240 vl = val_labs_next (val_labs, vl))
5241 if (var_is_value_missing (var, &vl->value))
5242 ctables_add_occurrence (var, &vl->value, occurrences);
5246 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5247 vl = val_labs_next (val_labs, vl))
5248 ctables_add_occurrence (var, &vl->value, occurrences);
5251 case CCT_POSTCOMPUTE:
5261 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5262 vl = val_labs_next (val_labs, vl))
5263 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5264 ctables_add_occurrence (var, &vl->value, occurrences);
5267 case CCT_EXCLUDED_MISSING:
5274 ctables_section_recurse_add_empty_categories (
5275 struct ctables_section *s,
5276 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5277 enum pivot_axis_type a, size_t a_idx)
5279 if (a >= PIVOT_N_AXES)
5280 ctables_cell_insert__ (s, c, cats);
5281 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5282 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5285 const struct variable *var = s->nests[a]->vars[a_idx];
5286 const struct ctables_categories *categories = s->table->categories[
5287 var_get_dict_index (var)];
5288 int width = var_get_width (var);
5289 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5290 const struct ctables_occurrence *o;
5291 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5293 union value *value = case_data_rw (c, var);
5294 value_destroy (value, width);
5295 value_clone (value, &o->value, width);
5296 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5297 assert (cats[a][a_idx] != NULL);
5298 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5301 for (size_t i = 0; i < categories->n_cats; i++)
5303 const struct ctables_category *cat = &categories->cats[i];
5304 if (cat->type == CCT_POSTCOMPUTE)
5306 cats[a][a_idx] = cat;
5307 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5314 ctables_section_add_empty_categories (struct ctables_section *s)
5316 bool show_empty = false;
5317 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5319 for (size_t k = 0; k < s->nests[a]->n; k++)
5320 if (k != s->nests[a]->scale_idx)
5322 const struct variable *var = s->nests[a]->vars[k];
5323 const struct ctables_categories *cats = s->table->categories[
5324 var_get_dict_index (var)];
5325 if (cats->show_empty)
5328 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5334 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5335 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5336 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5341 ctables_section_clear (struct ctables_section *s)
5343 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5345 const struct ctables_nest *nest = s->nests[a];
5346 for (size_t i = 0; i < nest->n; i++)
5347 if (i != nest->scale_idx)
5349 const struct variable *var = nest->vars[i];
5350 int width = var_get_width (var);
5351 struct ctables_occurrence *o, *next;
5352 struct hmap *map = &s->occurrences[a][i];
5353 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5355 value_destroy (&o->value, width);
5356 hmap_delete (map, &o->node);
5363 struct ctables_cell *cell, *next_cell;
5364 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5366 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5368 const struct ctables_nest *nest = s->nests[a];
5369 for (size_t i = 0; i < nest->n; i++)
5370 if (i != nest->scale_idx)
5371 value_destroy (&cell->axes[a].cvs[i].value,
5372 var_get_width (nest->vars[i]));
5373 free (cell->axes[a].cvs);
5376 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5377 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5378 for (size_t i = 0; i < specs->n; i++)
5379 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5380 free (cell->summaries);
5382 hmap_delete (&s->cells, &cell->node);
5385 hmap_shrink (&s->cells);
5387 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5389 struct ctables_domain *domain, *next_domain;
5390 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5393 free (domain->sums);
5394 hmap_delete (&s->domains[dt], &domain->node);
5397 hmap_shrink (&s->domains[dt]);
5402 ctables_section_uninit (struct ctables_section *s)
5404 ctables_section_clear (s);
5406 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5408 struct ctables_nest *nest = s->nests[a];
5409 for (size_t i = 0; i < nest->n; i++)
5410 hmap_destroy (&s->occurrences[a][i]);
5411 free (s->occurrences[a]);
5414 hmap_destroy (&s->cells);
5415 for (size_t i = 0; i < N_CTDTS; i++)
5416 hmap_destroy (&s->domains[i]);
5420 ctables_table_clear (struct ctables_table *t)
5422 for (size_t i = 0; i < t->n_sections; i++)
5423 ctables_section_clear (&t->sections[i]);
5425 if (t->clabels_example)
5427 int width = var_get_width (t->clabels_example);
5428 struct ctables_value *value, *next_value;
5429 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5430 &t->clabels_values_map)
5432 value_destroy (&value->value, width);
5433 hmap_delete (&t->clabels_values_map, &value->node);
5436 hmap_shrink (&t->clabels_values_map);
5438 free (t->clabels_values);
5439 t->clabels_values = NULL;
5440 t->n_clabels_values = 0;
5445 ctables_execute (struct dataset *ds, struct casereader *input,
5448 for (size_t i = 0; i < ct->n_tables; i++)
5450 struct ctables_table *t = ct->tables[i];
5451 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5452 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5453 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5454 sizeof *t->sections);
5455 size_t ix[PIVOT_N_AXES];
5456 ctables_table_add_section (t, 0, ix);
5459 struct dictionary *dict = dataset_dict (ds);
5460 struct casegrouper *grouper
5461 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5462 ? casegrouper_create_splits (input, dict)
5463 : casegrouper_create_vars (input, NULL, 0));
5464 struct casereader *group;
5465 while (casegrouper_get_next_group (grouper, &group))
5467 /* Output SPLIT FILE variables. */
5468 struct ccase *c = casereader_peek (group, 0);
5471 output_split_file_values (ds, c);
5475 bool warn_on_invalid = true;
5476 for (c = casereader_read (group); c;
5477 case_unref (c), c = casereader_read (group))
5479 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5480 double e_weight = (ct->e_weight
5481 ? var_force_valid_weight (ct->e_weight,
5482 case_num (c, ct->e_weight),
5486 for (size_t i = 0; i < ct->n_tables; i++)
5488 struct ctables_table *t = ct->tables[i];
5490 for (size_t j = 0; j < t->n_sections; j++)
5491 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5493 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5494 if (t->label_axis[a] != a)
5495 ctables_insert_clabels_values (t, c, a);
5498 casereader_destroy (group);
5500 for (size_t i = 0; i < ct->n_tables; i++)
5502 struct ctables_table *t = ct->tables[i];
5504 if (t->clabels_example)
5505 ctables_sort_clabels_values (t);
5507 for (size_t j = 0; j < t->n_sections; j++)
5508 ctables_section_add_empty_categories (&t->sections[j]);
5510 ctables_table_output (ct, t);
5511 ctables_table_clear (t);
5514 return casegrouper_destroy (grouper);
5519 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5520 struct dictionary *);
5523 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5529 case CTPO_CAT_STRING:
5530 ss_dealloc (&e->string);
5533 case CTPO_CAT_SRANGE:
5534 for (size_t i = 0; i < 2; i++)
5535 ss_dealloc (&e->srange[i]);
5544 for (size_t i = 0; i < 2; i++)
5545 ctables_pcexpr_destroy (e->subs[i]);
5549 case CTPO_CAT_NUMBER:
5550 case CTPO_CAT_NRANGE:
5551 case CTPO_CAT_MISSING:
5552 case CTPO_CAT_OTHERNM:
5553 case CTPO_CAT_SUBTOTAL:
5554 case CTPO_CAT_TOTAL:
5558 msg_location_destroy (e->location);
5563 static struct ctables_pcexpr *
5564 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5565 struct ctables_pcexpr *sub0,
5566 struct ctables_pcexpr *sub1)
5568 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5569 *e = (struct ctables_pcexpr) {
5571 .subs = { sub0, sub1 },
5572 .location = msg_location_merged (sub0->location, sub1->location),
5577 /* How to parse an operator. */
5580 enum token_type token;
5581 enum ctables_postcompute_op op;
5584 static const struct operator *
5585 ctables_pcexpr_match_operator (struct lexer *lexer,
5586 const struct operator ops[], size_t n_ops)
5588 for (const struct operator *op = ops; op < ops + n_ops; op++)
5589 if (lex_token (lexer) == op->token)
5591 if (op->token != T_NEG_NUM)
5600 static struct ctables_pcexpr *
5601 ctables_pcexpr_parse_binary_operators__ (
5602 struct lexer *lexer, struct dictionary *dict,
5603 const struct operator ops[], size_t n_ops,
5604 parse_recursively_func *parse_next_level,
5605 const char *chain_warning, struct ctables_pcexpr *lhs)
5607 for (int op_count = 0; ; op_count++)
5609 const struct operator *op
5610 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5613 if (op_count > 1 && chain_warning)
5614 msg_at (SW, lhs->location, "%s", chain_warning);
5619 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5622 ctables_pcexpr_destroy (lhs);
5626 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5630 static struct ctables_pcexpr *
5631 ctables_pcexpr_parse_binary_operators (
5632 struct lexer *lexer, struct dictionary *dict,
5633 const struct operator ops[], size_t n_ops,
5634 parse_recursively_func *parse_next_level, const char *chain_warning)
5636 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5640 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5642 chain_warning, lhs);
5645 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5646 struct dictionary *);
5648 static struct ctables_pcexpr
5649 ctpo_cat_nrange (double low, double high)
5651 return (struct ctables_pcexpr) {
5652 .op = CTPO_CAT_NRANGE,
5653 .nrange = { low, high },
5657 static struct ctables_pcexpr
5658 ctpo_cat_srange (struct substring low, struct substring high)
5660 return (struct ctables_pcexpr) {
5661 .op = CTPO_CAT_SRANGE,
5662 .srange = { low, high },
5666 static struct ctables_pcexpr *
5667 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5669 int start_ofs = lex_ofs (lexer);
5670 struct ctables_pcexpr e;
5671 if (lex_is_number (lexer))
5673 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5674 .number = lex_number (lexer) };
5677 else if (lex_match_id (lexer, "MISSING"))
5678 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5679 else if (lex_match_id (lexer, "OTHERNM"))
5680 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5681 else if (lex_match_id (lexer, "TOTAL"))
5682 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5683 else if (lex_match_id (lexer, "SUBTOTAL"))
5685 size_t subtotal_index = 0;
5686 if (lex_match (lexer, T_LBRACK))
5688 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5690 subtotal_index = lex_integer (lexer);
5692 if (!lex_force_match (lexer, T_RBRACK))
5695 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5696 .subtotal_index = subtotal_index };
5698 else if (lex_match (lexer, T_LBRACK))
5700 if (lex_match_id (lexer, "LO"))
5702 if (!lex_force_match_id (lexer, "THRU"))
5705 if (lex_is_string (lexer))
5707 struct substring low = { .string = NULL };
5708 struct substring high = parse_substring (lexer, dict);
5709 e = ctpo_cat_srange (low, high);
5713 if (!lex_force_num (lexer))
5715 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5719 else if (lex_is_number (lexer))
5721 double number = lex_number (lexer);
5723 if (lex_match_id (lexer, "THRU"))
5725 if (lex_match_id (lexer, "HI"))
5726 e = ctpo_cat_nrange (number, DBL_MAX);
5729 if (!lex_force_num (lexer))
5731 e = ctpo_cat_nrange (number, lex_number (lexer));
5736 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5739 else if (lex_is_string (lexer))
5741 struct substring s = parse_substring (lexer, dict);
5743 if (lex_match_id (lexer, "THRU"))
5745 struct substring high;
5747 if (lex_match_id (lexer, "HI"))
5748 high = (struct substring) { .string = NULL };
5751 if (!lex_force_string (lexer))
5756 high = parse_substring (lexer, dict);
5759 e = ctpo_cat_srange (s, high);
5762 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5766 lex_error (lexer, NULL);
5770 if (!lex_force_match (lexer, T_RBRACK))
5772 if (e.op == CTPO_CAT_STRING)
5773 ss_dealloc (&e.string);
5774 else if (e.op == CTPO_CAT_SRANGE)
5776 ss_dealloc (&e.srange[0]);
5777 ss_dealloc (&e.srange[1]);
5782 else if (lex_match (lexer, T_LPAREN))
5784 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5787 if (!lex_force_match (lexer, T_RPAREN))
5789 ctables_pcexpr_destroy (ep);
5796 lex_error (lexer, NULL);
5800 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5801 return xmemdup (&e, sizeof e);
5804 static struct ctables_pcexpr *
5805 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5806 struct lexer *lexer, int start_ofs)
5808 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5809 *e = (struct ctables_pcexpr) {
5812 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5817 static struct ctables_pcexpr *
5818 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5820 static const struct operator op = { T_EXP, CTPO_POW };
5822 const char *chain_warning =
5823 _("The exponentiation operator (`**') is left-associative: "
5824 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5825 "To disable this warning, insert parentheses.");
5827 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5828 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5829 ctables_pcexpr_parse_primary,
5832 /* Special case for situations like "-5**6", which must be parsed as
5835 int start_ofs = lex_ofs (lexer);
5836 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5837 *lhs = (struct ctables_pcexpr) {
5838 .op = CTPO_CONSTANT,
5839 .number = -lex_tokval (lexer),
5840 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5844 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5845 lexer, dict, &op, 1,
5846 ctables_pcexpr_parse_primary, chain_warning, lhs);
5850 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5853 /* Parses the unary minus level. */
5854 static struct ctables_pcexpr *
5855 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5857 int start_ofs = lex_ofs (lexer);
5858 if (!lex_match (lexer, T_DASH))
5859 return ctables_pcexpr_parse_exp (lexer, dict);
5861 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5865 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5868 /* Parses the multiplication and division level. */
5869 static struct ctables_pcexpr *
5870 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5872 static const struct operator ops[] =
5874 { T_ASTERISK, CTPO_MUL },
5875 { T_SLASH, CTPO_DIV },
5878 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5879 sizeof ops / sizeof *ops,
5880 ctables_pcexpr_parse_neg, NULL);
5883 /* Parses the addition and subtraction level. */
5884 static struct ctables_pcexpr *
5885 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5887 static const struct operator ops[] =
5889 { T_PLUS, CTPO_ADD },
5890 { T_DASH, CTPO_SUB },
5891 { T_NEG_NUM, CTPO_ADD },
5894 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5895 ops, sizeof ops / sizeof *ops,
5896 ctables_pcexpr_parse_mul, NULL);
5899 static struct ctables_postcompute *
5900 ctables_find_postcompute (struct ctables *ct, const char *name)
5902 struct ctables_postcompute *pc;
5903 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5904 utf8_hash_case_string (name, 0), &ct->postcomputes)
5905 if (!utf8_strcasecmp (pc->name, name))
5911 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5914 int pcompute_start = lex_ofs (lexer) - 1;
5916 if (!lex_match (lexer, T_AND))
5918 lex_error_expecting (lexer, "&");
5921 if (!lex_force_id (lexer))
5924 char *name = ss_xstrdup (lex_tokss (lexer));
5927 if (!lex_force_match (lexer, T_EQUALS)
5928 || !lex_force_match_id (lexer, "EXPR")
5929 || !lex_force_match (lexer, T_LPAREN))
5935 int expr_start = lex_ofs (lexer);
5936 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5937 int expr_end = lex_ofs (lexer) - 1;
5938 if (!expr || !lex_force_match (lexer, T_RPAREN))
5940 ctables_pcexpr_destroy (expr);
5944 int pcompute_end = lex_ofs (lexer) - 1;
5946 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5949 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5952 msg_at (SW, location, _("New definition of &%s will override the "
5953 "previous definition."),
5955 msg_at (SN, pc->location, _("This is the previous definition."));
5957 ctables_pcexpr_destroy (pc->expr);
5958 msg_location_destroy (pc->location);
5963 pc = xmalloc (sizeof *pc);
5964 *pc = (struct ctables_postcompute) { .name = name };
5965 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5966 utf8_hash_case_string (pc->name, 0));
5969 pc->location = location;
5971 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5976 ctables_parse_pproperties_format (struct lexer *lexer,
5977 struct ctables_summary_spec_set *sss)
5979 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5981 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5982 && !(lex_token (lexer) == T_ID
5983 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5984 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5985 lex_tokss (lexer)))))
5987 /* Parse function. */
5988 enum ctables_summary_function function;
5989 if (!parse_ctables_summary_function (lexer, &function))
5992 /* Parse percentile. */
5993 double percentile = 0;
5994 if (function == CTSF_PTILE)
5996 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5998 percentile = lex_number (lexer);
6003 struct fmt_spec format;
6004 bool is_ctables_format;
6005 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
6008 if (sss->n >= sss->allocated)
6009 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
6010 sizeof *sss->specs);
6011 sss->specs[sss->n++] = (struct ctables_summary_spec) {
6012 .function = function,
6013 .percentile = percentile,
6015 .is_ctables_format = is_ctables_format,
6021 ctables_summary_spec_set_uninit (sss);
6026 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
6028 struct ctables_postcompute **pcs = NULL;
6030 size_t allocated_pcs = 0;
6032 while (lex_match (lexer, T_AND))
6034 if (!lex_force_id (lexer))
6036 struct ctables_postcompute *pc
6037 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
6040 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
6045 if (n_pcs >= allocated_pcs)
6046 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
6050 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6052 if (lex_match_id (lexer, "LABEL"))
6054 lex_match (lexer, T_EQUALS);
6055 if (!lex_force_string (lexer))
6058 for (size_t i = 0; i < n_pcs; i++)
6060 free (pcs[i]->label);
6061 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6066 else if (lex_match_id (lexer, "FORMAT"))
6068 lex_match (lexer, T_EQUALS);
6070 struct ctables_summary_spec_set sss;
6071 if (!ctables_parse_pproperties_format (lexer, &sss))
6074 for (size_t i = 0; i < n_pcs; i++)
6077 ctables_summary_spec_set_uninit (pcs[i]->specs);
6079 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6080 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6082 ctables_summary_spec_set_uninit (&sss);
6084 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6086 lex_match (lexer, T_EQUALS);
6087 bool hide_source_cats;
6088 if (!parse_bool (lexer, &hide_source_cats))
6090 for (size_t i = 0; i < n_pcs; i++)
6091 pcs[i]->hide_source_cats = hide_source_cats;
6095 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6108 put_strftime (struct string *out, time_t now, const char *format)
6110 const struct tm *tm = localtime (&now);
6112 strftime (value, sizeof value, format, tm);
6113 ds_put_cstr (out, value);
6117 skip_prefix (struct substring *s, struct substring prefix)
6119 if (ss_starts_with (*s, prefix))
6121 ss_advance (s, prefix.length);
6129 put_table_expression (struct string *out, struct lexer *lexer,
6130 struct dictionary *dict, int expr_start, int expr_end)
6133 for (int ofs = expr_start; ofs < expr_end; ofs++)
6135 const struct token *t = lex_ofs_token (lexer, ofs);
6136 if (t->type == T_LBRACK)
6138 else if (t->type == T_RBRACK && nest > 0)
6144 else if (t->type == T_ID)
6146 const struct variable *var
6147 = dict_lookup_var (dict, t->string.string);
6148 const char *label = var ? var_get_label (var) : NULL;
6149 ds_put_cstr (out, label ? label : t->string.string);
6153 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6154 ds_put_byte (out, ' ');
6156 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6157 ds_put_cstr (out, repr);
6160 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6161 ds_put_byte (out, ' ');
6167 put_title_text (struct string *out, struct substring in, time_t now,
6168 struct lexer *lexer, struct dictionary *dict,
6169 int expr_start, int expr_end)
6173 size_t chunk = ss_find_byte (in, ')');
6174 ds_put_substring (out, ss_head (in, chunk));
6175 ss_advance (&in, chunk);
6176 if (ss_is_empty (in))
6179 if (skip_prefix (&in, ss_cstr (")DATE")))
6180 put_strftime (out, now, "%x");
6181 else if (skip_prefix (&in, ss_cstr (")TIME")))
6182 put_strftime (out, now, "%X");
6183 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6184 put_table_expression (out, lexer, dict, expr_start, expr_end);
6187 ds_put_byte (out, ')');
6188 ss_advance (&in, 1);
6194 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6196 struct casereader *input = NULL;
6198 struct measure_guesser *mg = measure_guesser_create (ds);
6201 input = proc_open (ds);
6202 measure_guesser_run (mg, input);
6203 measure_guesser_destroy (mg);
6206 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6207 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6208 enum settings_value_show tvars = settings_get_show_variables ();
6209 for (size_t i = 0; i < n_vars; i++)
6210 vlabels[i] = (enum ctables_vlabel) tvars;
6212 struct pivot_table_look *look = pivot_table_look_unshare (
6213 pivot_table_look_ref (pivot_table_look_get_default ()));
6214 look->omit_empty = false;
6216 struct ctables *ct = xmalloc (sizeof *ct);
6217 *ct = (struct ctables) {
6218 .dict = dataset_dict (ds),
6220 .ctables_formats = FMT_SETTINGS_INIT,
6222 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6225 time_t now = time (NULL);
6230 const char *dot_string;
6231 const char *comma_string;
6233 static const struct ctf ctfs[4] = {
6234 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6235 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6236 { CTEF_PAREN, "-,(,),", "-.(.)." },
6237 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6239 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6240 for (size_t i = 0; i < 4; i++)
6242 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6243 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6244 fmt_number_style_from_string (s));
6247 if (!lex_force_match (lexer, T_SLASH))
6250 while (!lex_match_id (lexer, "TABLE"))
6252 if (lex_match_id (lexer, "FORMAT"))
6254 double widths[2] = { SYSMIS, SYSMIS };
6255 double units_per_inch = 72.0;
6257 while (lex_token (lexer) != T_SLASH)
6259 if (lex_match_id (lexer, "MINCOLWIDTH"))
6261 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6264 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6266 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6269 else if (lex_match_id (lexer, "UNITS"))
6271 lex_match (lexer, T_EQUALS);
6272 if (lex_match_id (lexer, "POINTS"))
6273 units_per_inch = 72.0;
6274 else if (lex_match_id (lexer, "INCHES"))
6275 units_per_inch = 1.0;
6276 else if (lex_match_id (lexer, "CM"))
6277 units_per_inch = 2.54;
6280 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6284 else if (lex_match_id (lexer, "EMPTY"))
6289 lex_match (lexer, T_EQUALS);
6290 if (lex_match_id (lexer, "ZERO"))
6292 /* Nothing to do. */
6294 else if (lex_match_id (lexer, "BLANK"))
6295 ct->zero = xstrdup ("");
6296 else if (lex_force_string (lexer))
6298 ct->zero = ss_xstrdup (lex_tokss (lexer));
6304 else if (lex_match_id (lexer, "MISSING"))
6306 lex_match (lexer, T_EQUALS);
6307 if (!lex_force_string (lexer))
6311 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6312 ? ss_xstrdup (lex_tokss (lexer))
6318 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6319 "UNITS", "EMPTY", "MISSING");
6324 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6325 && widths[0] > widths[1])
6327 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6331 for (size_t i = 0; i < 2; i++)
6332 if (widths[i] != SYSMIS)
6334 int *wr = ct->look->width_ranges[TABLE_HORZ];
6335 wr[i] = widths[i] / units_per_inch * 96.0;
6340 else if (lex_match_id (lexer, "VLABELS"))
6342 if (!lex_force_match_id (lexer, "VARIABLES"))
6344 lex_match (lexer, T_EQUALS);
6346 struct variable **vars;
6348 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6352 if (!lex_force_match_id (lexer, "DISPLAY"))
6357 lex_match (lexer, T_EQUALS);
6359 enum ctables_vlabel vlabel;
6360 if (lex_match_id (lexer, "DEFAULT"))
6361 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6362 else if (lex_match_id (lexer, "NAME"))
6364 else if (lex_match_id (lexer, "LABEL"))
6365 vlabel = CTVL_LABEL;
6366 else if (lex_match_id (lexer, "BOTH"))
6368 else if (lex_match_id (lexer, "NONE"))
6372 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6378 for (size_t i = 0; i < n_vars; i++)
6379 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6382 else if (lex_match_id (lexer, "MRSETS"))
6384 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6386 lex_match (lexer, T_EQUALS);
6387 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6390 else if (lex_match_id (lexer, "SMISSING"))
6392 if (lex_match_id (lexer, "VARIABLE"))
6393 ct->smissing_listwise = false;
6394 else if (lex_match_id (lexer, "LISTWISE"))
6395 ct->smissing_listwise = true;
6398 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6402 else if (lex_match_id (lexer, "PCOMPUTE"))
6404 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6407 else if (lex_match_id (lexer, "PPROPERTIES"))
6409 if (!ctables_parse_pproperties (lexer, ct))
6412 else if (lex_match_id (lexer, "WEIGHT"))
6414 if (!lex_force_match_id (lexer, "VARIABLE"))
6416 lex_match (lexer, T_EQUALS);
6417 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6421 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6423 if (lex_match_id (lexer, "COUNT"))
6425 lex_match (lexer, T_EQUALS);
6426 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6429 ct->hide_threshold = lex_integer (lexer);
6432 else if (ct->hide_threshold == 0)
6433 ct->hide_threshold = 5;
6437 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6438 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6439 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6443 if (!lex_force_match (lexer, T_SLASH))
6447 size_t allocated_tables = 0;
6450 if (ct->n_tables >= allocated_tables)
6451 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6452 sizeof *ct->tables);
6454 struct ctables_category *cat = xmalloc (sizeof *cat);
6455 *cat = (struct ctables_category) {
6457 .include_missing = false,
6458 .sort_ascending = true,
6461 struct ctables_categories *c = xmalloc (sizeof *c);
6462 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6463 *c = (struct ctables_categories) {
6470 struct ctables_categories **categories = xnmalloc (n_vars,
6471 sizeof *categories);
6472 for (size_t i = 0; i < n_vars; i++)
6475 struct ctables_table *t = xmalloc (sizeof *t);
6476 *t = (struct ctables_table) {
6478 .slabels_axis = PIVOT_AXIS_COLUMN,
6479 .slabels_visible = true,
6480 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6482 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6483 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6484 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6486 .clabels_from_axis = PIVOT_AXIS_LAYER,
6487 .categories = categories,
6488 .n_categories = n_vars,
6491 ct->tables[ct->n_tables++] = t;
6493 lex_match (lexer, T_EQUALS);
6494 int expr_start = lex_ofs (lexer);
6495 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6497 if (lex_match (lexer, T_BY))
6499 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6500 ct, t, PIVOT_AXIS_COLUMN))
6503 if (lex_match (lexer, T_BY))
6505 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6506 ct, t, PIVOT_AXIS_LAYER))
6510 int expr_end = lex_ofs (lexer);
6512 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6513 && !t->axes[PIVOT_AXIS_LAYER])
6515 lex_error (lexer, _("At least one variable must be specified."));
6519 const struct ctables_axis *scales[PIVOT_N_AXES];
6520 size_t n_scales = 0;
6521 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6523 scales[a] = find_scale (t->axes[a]);
6529 msg (SE, _("Scale variables may appear only on one axis."));
6530 if (scales[PIVOT_AXIS_ROW])
6531 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6532 _("This scale variable appears on the rows axis."));
6533 if (scales[PIVOT_AXIS_COLUMN])
6534 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6535 _("This scale variable appears on the columns axis."));
6536 if (scales[PIVOT_AXIS_LAYER])
6537 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6538 _("This scale variable appears on the layer axis."));
6542 const struct ctables_axis *summaries[PIVOT_N_AXES];
6543 size_t n_summaries = 0;
6544 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6546 summaries[a] = (scales[a]
6548 : find_categorical_summary_spec (t->axes[a]));
6552 if (n_summaries > 1)
6554 msg (SE, _("Summaries may appear only on one axis."));
6555 if (summaries[PIVOT_AXIS_ROW])
6556 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
6557 _("This variable on the rows axis has a summary."));
6558 if (summaries[PIVOT_AXIS_COLUMN])
6559 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
6560 _("This variable on the columns axis has a summary."));
6561 if (summaries[PIVOT_AXIS_LAYER])
6562 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
6563 _("This variable on the layers axis has a summary."));
6566 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6567 if (n_summaries ? summaries[a] : t->axes[a])
6569 t->summary_axis = a;
6573 if (lex_token (lexer) == T_ENDCMD)
6575 if (!ctables_prepare_table (t))
6579 if (!lex_force_match (lexer, T_SLASH))
6582 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6584 if (lex_match_id (lexer, "SLABELS"))
6586 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6588 if (lex_match_id (lexer, "POSITION"))
6590 lex_match (lexer, T_EQUALS);
6591 if (lex_match_id (lexer, "COLUMN"))
6592 t->slabels_axis = PIVOT_AXIS_COLUMN;
6593 else if (lex_match_id (lexer, "ROW"))
6594 t->slabels_axis = PIVOT_AXIS_ROW;
6595 else if (lex_match_id (lexer, "LAYER"))
6596 t->slabels_axis = PIVOT_AXIS_LAYER;
6599 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6603 else if (lex_match_id (lexer, "VISIBLE"))
6605 lex_match (lexer, T_EQUALS);
6606 if (!parse_bool (lexer, &t->slabels_visible))
6611 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6616 else if (lex_match_id (lexer, "CLABELS"))
6618 if (lex_match_id (lexer, "AUTO"))
6620 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6621 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6623 else if (lex_match_id (lexer, "ROWLABELS"))
6625 lex_match (lexer, T_EQUALS);
6626 if (lex_match_id (lexer, "OPPOSITE"))
6627 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6628 else if (lex_match_id (lexer, "LAYER"))
6629 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6632 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6636 else if (lex_match_id (lexer, "COLLABELS"))
6638 lex_match (lexer, T_EQUALS);
6639 if (lex_match_id (lexer, "OPPOSITE"))
6640 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6641 else if (lex_match_id (lexer, "LAYER"))
6642 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6645 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6651 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6656 else if (lex_match_id (lexer, "CRITERIA"))
6658 if (!lex_force_match_id (lexer, "CILEVEL"))
6660 lex_match (lexer, T_EQUALS);
6662 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6664 t->cilevel = lex_number (lexer);
6667 else if (lex_match_id (lexer, "CATEGORIES"))
6669 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6673 else if (lex_match_id (lexer, "TITLES"))
6678 if (lex_match_id (lexer, "CAPTION"))
6679 textp = &t->caption;
6680 else if (lex_match_id (lexer, "CORNER"))
6682 else if (lex_match_id (lexer, "TITLE"))
6686 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6689 lex_match (lexer, T_EQUALS);
6691 struct string s = DS_EMPTY_INITIALIZER;
6692 while (lex_is_string (lexer))
6694 if (!ds_is_empty (&s))
6695 ds_put_byte (&s, ' ');
6696 put_title_text (&s, lex_tokss (lexer), now,
6697 lexer, dataset_dict (ds),
6698 expr_start, expr_end);
6702 *textp = ds_steal_cstr (&s);
6704 while (lex_token (lexer) != T_SLASH
6705 && lex_token (lexer) != T_ENDCMD);
6707 else if (lex_match_id (lexer, "SIGTEST"))
6711 t->chisq = xmalloc (sizeof *t->chisq);
6712 *t->chisq = (struct ctables_chisq) {
6714 .include_mrsets = true,
6715 .all_visible = true,
6721 if (lex_match_id (lexer, "TYPE"))
6723 lex_match (lexer, T_EQUALS);
6724 if (!lex_force_match_id (lexer, "CHISQUARE"))
6727 else if (lex_match_id (lexer, "ALPHA"))
6729 lex_match (lexer, T_EQUALS);
6730 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6732 t->chisq->alpha = lex_number (lexer);
6735 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6737 lex_match (lexer, T_EQUALS);
6738 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6741 else if (lex_match_id (lexer, "CATEGORIES"))
6743 lex_match (lexer, T_EQUALS);
6744 if (lex_match_id (lexer, "ALLVISIBLE"))
6745 t->chisq->all_visible = true;
6746 else if (lex_match_id (lexer, "SUBTOTALS"))
6747 t->chisq->all_visible = false;
6750 lex_error_expecting (lexer,
6751 "ALLVISIBLE", "SUBTOTALS");
6757 lex_error_expecting (lexer, "TYPE", "ALPHA",
6758 "INCLUDEMRSETS", "CATEGORIES");
6762 while (lex_token (lexer) != T_SLASH
6763 && lex_token (lexer) != T_ENDCMD);
6765 else if (lex_match_id (lexer, "COMPARETEST"))
6769 t->pairwise = xmalloc (sizeof *t->pairwise);
6770 *t->pairwise = (struct ctables_pairwise) {
6772 .alpha = { .05, .05 },
6773 .adjust = BONFERRONI,
6774 .include_mrsets = true,
6775 .meansvariance_allcats = true,
6776 .all_visible = true,
6785 if (lex_match_id (lexer, "TYPE"))
6787 lex_match (lexer, T_EQUALS);
6788 if (lex_match_id (lexer, "PROP"))
6789 t->pairwise->type = PROP;
6790 else if (lex_match_id (lexer, "MEAN"))
6791 t->pairwise->type = MEAN;
6794 lex_error_expecting (lexer, "PROP", "MEAN");
6798 else if (lex_match_id (lexer, "ALPHA"))
6800 lex_match (lexer, T_EQUALS);
6802 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6804 double a0 = lex_number (lexer);
6807 lex_match (lexer, T_COMMA);
6808 if (lex_is_number (lexer))
6810 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6812 double a1 = lex_number (lexer);
6815 t->pairwise->alpha[0] = MIN (a0, a1);
6816 t->pairwise->alpha[1] = MAX (a0, a1);
6819 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6821 else if (lex_match_id (lexer, "ADJUST"))
6823 lex_match (lexer, T_EQUALS);
6824 if (lex_match_id (lexer, "BONFERRONI"))
6825 t->pairwise->adjust = BONFERRONI;
6826 else if (lex_match_id (lexer, "BH"))
6827 t->pairwise->adjust = BH;
6828 else if (lex_match_id (lexer, "NONE"))
6829 t->pairwise->adjust = 0;
6832 lex_error_expecting (lexer, "BONFERRONI", "BH",
6837 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6839 lex_match (lexer, T_EQUALS);
6840 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6843 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6845 lex_match (lexer, T_EQUALS);
6846 if (lex_match_id (lexer, "ALLCATS"))
6847 t->pairwise->meansvariance_allcats = true;
6848 else if (lex_match_id (lexer, "TESTEDCATS"))
6849 t->pairwise->meansvariance_allcats = false;
6852 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6856 else if (lex_match_id (lexer, "CATEGORIES"))
6858 lex_match (lexer, T_EQUALS);
6859 if (lex_match_id (lexer, "ALLVISIBLE"))
6860 t->pairwise->all_visible = true;
6861 else if (lex_match_id (lexer, "SUBTOTALS"))
6862 t->pairwise->all_visible = false;
6865 lex_error_expecting (lexer, "ALLVISIBLE",
6870 else if (lex_match_id (lexer, "MERGE"))
6872 lex_match (lexer, T_EQUALS);
6873 if (!parse_bool (lexer, &t->pairwise->merge))
6876 else if (lex_match_id (lexer, "STYLE"))
6878 lex_match (lexer, T_EQUALS);
6879 if (lex_match_id (lexer, "APA"))
6880 t->pairwise->apa_style = true;
6881 else if (lex_match_id (lexer, "SIMPLE"))
6882 t->pairwise->apa_style = false;
6885 lex_error_expecting (lexer, "APA", "SIMPLE");
6889 else if (lex_match_id (lexer, "SHOWSIG"))
6891 lex_match (lexer, T_EQUALS);
6892 if (!parse_bool (lexer, &t->pairwise->show_sig))
6897 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6898 "INCLUDEMRSETS", "MEANSVARIANCE",
6899 "CATEGORIES", "MERGE", "STYLE",
6904 while (lex_token (lexer) != T_SLASH
6905 && lex_token (lexer) != T_ENDCMD);
6909 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6910 "CRITERIA", "CATEGORIES", "TITLES",
6911 "SIGTEST", "COMPARETEST");
6915 if (!lex_match (lexer, T_SLASH))
6919 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6920 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6922 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6926 if (!ctables_prepare_table (t))
6929 while (lex_token (lexer) != T_ENDCMD);
6932 input = proc_open (ds);
6933 bool ok = ctables_execute (ds, input, ct);
6934 ok = proc_commit (ds) && ok;
6936 ctables_destroy (ct);
6937 return ok ? CMD_SUCCESS : CMD_FAILURE;
6942 ctables_destroy (ct);