1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 /* All variables. */ \
74 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
75 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
76 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
94 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
95 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
96 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
98 /* All variables (unweighted.) */ \
99 S(CTSF_UCOUNT, "UCOUNT", N_("Unweighted Count"), CTF_COUNT, CTFA_ALL) \
100 S(CTSF_UROWPCT_COUNT, "UROWPCT.COUNT", N_("Unweighted Row %"), CTF_PERCENT, CTFA_ALL) \
101 S(CTSF_UCOLPCT_COUNT, "UCOLPCT.COUNT", N_("Unweighted Column %"), CTF_PERCENT, CTFA_ALL) \
102 S(CTSF_UTABLEPCT_COUNT, "UTABLEPCT.COUNT", N_("Unweighted Table %"), CTF_PERCENT, CTFA_ALL) \
103 S(CTSF_USUBTABLEPCT_COUNT, "USUBTABLEPCT.COUNT", N_("Unweighted Subtable %"), CTF_PERCENT, CTFA_ALL) \
104 S(CTSF_ULAYERPCT_COUNT, "ULAYERPCT.COUNT", N_("Unweighted Layer %"), CTF_PERCENT, CTFA_ALL) \
105 S(CTSF_ULAYERROWPCT_COUNT, "ULAYERROWPCT.COUNT", N_("Unweighted Layer Row %"), CTF_PERCENT, CTFA_ALL) \
106 S(CTSF_ULAYERCOLPCT_COUNT, "ULAYERCOLPCT.COUNT", N_("Unweighted Layer Column %"), CTF_PERCENT, CTFA_ALL) \
107 S(CTSF_UROWPCT_VALIDN, "UROWPCT.VALIDN", N_("Unweighted Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
108 S(CTSF_UCOLPCT_VALIDN, "UCOLPCT.VALIDN", N_("Unweighted Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
109 S(CTSF_UTABLEPCT_VALIDN, "UTABLEPCT.VALIDN", N_("Unweighted Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
110 S(CTSF_USUBTABLEPCT_VALIDN, "USUBTABLEPCT.VALIDN", N_("Unweighted Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
111 S(CTSF_ULAYERPCT_VALIDN, "ULAYERPCT.VALIDN", N_("Unweighted Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
112 S(CTSF_ULAYERROWPCT_VALIDN, "ULAYERROWPCT.VALIDN", N_("Unweighted Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
113 S(CTSF_ULAYERCOLPCT_VALIDN, "ULAYERCOLPCT.VALIDN", N_("Unweighted Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
114 S(CTSF_UROWPCT_TOTALN, "UROWPCT.TOTALN", N_("Unweighted Row Total N %"), CTF_PERCENT, CTFA_ALL) \
115 S(CTSF_UCOLPCT_TOTALN, "UCOLPCT.TOTALN", N_("Unweighted Column Total N %"), CTF_PERCENT, CTFA_ALL) \
116 S(CTSF_UTABLEPCT_TOTALN, "UTABLEPCT.TOTALN", N_("Unweighted Table Total N %"), CTF_PERCENT, CTFA_ALL) \
117 S(CTSF_USUBTABLEPCT_TOTALN, "USUBTABLEPCT.TOTALN", N_("Unweighted Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
118 S(CTSF_ULAYERPCT_TOTALN, "ULAYERPCT.TOTALN", N_("Unweighted Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
119 S(CTSF_ULAYERROWPCT_TOTALN, "ULAYERROWPCT.TOTALN", N_("Unweighted Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
120 S(CTSF_ULAYERCOLPCT_TOTALN, "ULAYERCOLPCT.TOTALN", N_("Unweighted Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
122 /* Scale variables, totals, and subtotals. */ \
123 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
124 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
125 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
126 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
127 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
128 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
129 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
130 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
131 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
132 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
133 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
134 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
135 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
136 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
137 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
138 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
139 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
140 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
141 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
142 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
143 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
144 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
145 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
147 /* Scale variables, totals, and subtotals (unweighted). */ \
148 S(CTSF_UMEAN, "UMEAN", N_("Unweighted Mean"), CTF_GENERAL, CTFA_SCALE) \
149 S(CTSF_UMEDIAN, "UMEDIAN", N_("Unweighted Median"), CTF_GENERAL, CTFA_SCALE) \
150 S(CTSF_UMISSING, "UMISSING", N_("Unweighted Missing"), CTF_GENERAL, CTFA_SCALE) \
151 S(CTSF_UMODE, "UMODE", N_("Unweighted Mode"), CTF_GENERAL, CTFA_SCALE) \
152 S(CTSF_UPTILE, "UPTILE", N_("Unweighted Percentile"), CTF_GENERAL, CTFA_SCALE) \
153 S(CTSF_USEMEAN, "USEMEAN", N_("Unweighted Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
154 S(CTSF_USTDDEV, "USTDDEV", N_("Unweighted Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
155 S(CTSF_USUM, "USUM", N_("Unweighted Sum"), CTF_GENERAL, CTFA_SCALE) \
156 S(CSTF_UTOTALN, "UTOTALN", N_("Unweighted Total N"), CTF_COUNT, CTFA_SCALE) \
157 S(CTSF_UVALIDN, "UVALIDN", N_("Unweighted Valid N"), CTF_COUNT, CTFA_SCALE) \
158 S(CTSF_UVARIANCE, "UVARIANCE", N_("Unweighted Variance"), CTF_GENERAL, CTFA_SCALE) \
159 S(CTSF_UROWPCT_SUM, "UROWPCT.SUM", N_("Unweighted Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
160 S(CTSF_UCOLPCT_SUM, "UCOLPCT.SUM", N_("Unweighted Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
161 S(CTSF_UTABLEPCT_SUM, "UTABLEPCT.SUM", N_("Unweighted Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
162 S(CTSF_USUBTABLEPCT_SUM, "USUBTABLEPCT.SUM", N_("Unweighted Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
163 S(CTSF_ULAYERPCT_SUM, "ULAYERPCT.SUM", N_("Unweighted Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
164 S(CTSF_ULAYERROWPCT_SUM, "ULAYERROWPCT.SUM", N_("Unweighted Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
165 S(CTSF_ULAYERCOLPCT_SUM, "ULAYERCOLPCT.SUM", N_("Unweighted Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
167 #if 0 /* Multiple response sets not yet implemented. */
168 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
169 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
170 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
171 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
172 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
173 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
174 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
175 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
176 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
177 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
178 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
179 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
180 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
181 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
182 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
183 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
184 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
185 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
186 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
187 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
188 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
189 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
192 enum ctables_summary_function
194 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
200 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
201 N_CTSF_FUNCTIONS = SUMMARIES
205 static bool ctables_summary_function_is_count (enum ctables_summary_function);
207 enum ctables_domain_type
209 /* Within a section, where stacked variables divide one section from
211 CTDT_TABLE, /* All layers of a whole section. */
212 CTDT_LAYER, /* One layer within a section. */
213 CTDT_LAYERROW, /* Row in one layer within a section. */
214 CTDT_LAYERCOL, /* Column in one layer within a section. */
216 /* Within a subtable, where a subtable pairs an innermost row variable with
217 an innermost column variable within a single layer. */
218 CTDT_SUBTABLE, /* Whole subtable. */
219 CTDT_ROW, /* Row within a subtable. */
220 CTDT_COL, /* Column within a subtable. */
224 struct ctables_domain
226 struct hmap_node node;
228 const struct ctables_cell *example;
230 double d_valid; /* Dictionary weight. */
233 double e_valid; /* Effective weight */
236 double u_valid; /* Unweighted. */
239 struct ctables_sum *sums;
248 enum ctables_summary_variant
257 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
258 all the axes (except the scalar variable, if any). */
259 struct hmap_node node;
261 /* The domains that contain this cell. */
262 uint32_t omit_domains;
263 struct ctables_domain *domains[N_CTDTS];
268 enum ctables_summary_variant sv;
270 struct ctables_cell_axis
272 struct ctables_cell_value
274 const struct ctables_category *category;
282 union ctables_summary *summaries;
289 const struct dictionary *dict;
290 struct pivot_table_look *look;
292 /* CTABLES has a number of extra formats that we implement via custom
293 currency specifications on an alternate fmt_settings. */
294 #define CTEF_NEGPAREN FMT_CCA
295 #define CTEF_NEQUAL FMT_CCB
296 #define CTEF_PAREN FMT_CCC
297 #define CTEF_PCTPAREN FMT_CCD
298 struct fmt_settings ctables_formats;
300 /* If this is NULL, zeros are displayed using the normal print format.
301 Otherwise, this string is displayed. */
304 /* If this is NULL, missing values are displayed using the normal print
305 format. Otherwise, this string is displayed. */
308 /* Indexed by variable dictionary index. */
309 enum ctables_vlabel *vlabels;
311 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
313 bool mrsets_count_duplicates; /* MRSETS. */
314 bool smissing_listwise; /* SMISSING. */
315 struct variable *e_weight; /* WEIGHT. */
316 int hide_threshold; /* HIDESMALLCOUNTS. */
318 struct ctables_table **tables;
322 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
325 struct ctables_postcompute
327 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
328 char *name; /* Name, without leading &. */
330 struct msg_location *location; /* Location of definition. */
331 struct ctables_pcexpr *expr;
333 struct ctables_summary_spec_set *specs;
334 bool hide_source_cats;
337 struct ctables_pcexpr
347 enum ctables_postcompute_op
350 CTPO_CONSTANT, /* 5 */
351 CTPO_CAT_NUMBER, /* [5] */
352 CTPO_CAT_STRING, /* ["STRING"] */
353 CTPO_CAT_NRANGE, /* [LO THRU 5] */
354 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
355 CTPO_CAT_MISSING, /* MISSING */
356 CTPO_CAT_OTHERNM, /* OTHERNM */
357 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
358 CTPO_CAT_TOTAL, /* TOTAL */
372 /* CTPO_CAT_NUMBER. */
375 /* CTPO_CAT_STRING, in dictionary encoding. */
376 struct substring string;
378 /* CTPO_CAT_NRANGE. */
381 /* CTPO_CAT_SRANGE. */
382 struct substring srange[2];
384 /* CTPO_CAT_SUBTOTAL. */
385 size_t subtotal_index;
387 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
388 One element: CTPO_NEG. */
389 struct ctables_pcexpr *subs[2];
392 /* Source location. */
393 struct msg_location *location;
396 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
397 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
398 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
399 struct ctables_pcexpr *sub1);
401 struct ctables_summary_spec_set
403 struct ctables_summary_spec *specs;
407 /* The variable to which the summary specs are applied. */
408 struct variable *var;
410 /* Whether the variable to which the summary specs are applied is a scale
411 variable for the purpose of summarization.
413 (VALIDN and TOTALN act differently for summarizing scale and categorical
417 /* If any of these optional additional scale variables are missing, then
418 treat 'var' as if it's missing too. This is for implementing
419 SMISSING=LISTWISE. */
420 struct variable **listwise_vars;
421 size_t n_listwise_vars;
424 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
425 const struct ctables_summary_spec_set *);
426 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
428 /* A nested sequence of variables, e.g. a > b > c. */
431 struct variable **vars;
434 size_t *domains[N_CTDTS];
435 size_t n_domains[N_CTDTS];
438 struct ctables_summary_spec_set specs[N_CSVS];
441 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
444 struct ctables_nest *nests;
448 static void ctables_stack_uninit (struct ctables_stack *);
452 struct hmap_node node;
457 struct ctables_occurrence
459 struct hmap_node node;
463 struct ctables_section
466 struct ctables_table *table;
467 struct ctables_nest *nests[PIVOT_N_AXES];
470 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
471 struct hmap cells; /* Contains "struct ctables_cell"s. */
472 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
475 static void ctables_section_uninit (struct ctables_section *);
479 struct ctables *ctables;
480 struct ctables_axis *axes[PIVOT_N_AXES];
481 struct ctables_stack stacks[PIVOT_N_AXES];
482 struct ctables_section *sections;
484 enum pivot_axis_type summary_axis;
485 struct ctables_summary_spec_set summary_specs;
486 struct variable **sum_vars;
489 enum pivot_axis_type slabels_axis;
490 bool slabels_visible;
492 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
494 Most commonly, label_axis[a] == a, and in particular we always have
495 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
497 If ROWLABELS or COLLABELS is specified, then one of
498 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
499 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
501 If any category labels are moved, then 'clabels_example' is one of the
502 variables being moved (and it is otherwise NULL). All of the variables
503 being moved have the same width, value labels, and categories, so this
504 example variable can be used to find those out.
506 The remaining members in this group are relevant only if category labels
509 'clabels_values_map' holds a "struct ctables_value" for all the values
510 that appear in all of the variables in the moved categories. It is
511 accumulated as the data is read. Once the data is fully read, its
512 sorted values are put into 'clabels_values' and 'n_clabels_values'.
514 enum pivot_axis_type label_axis[PIVOT_N_AXES];
515 enum pivot_axis_type clabels_from_axis;
516 const struct variable *clabels_example;
517 struct hmap clabels_values_map;
518 struct ctables_value **clabels_values;
519 size_t n_clabels_values;
521 /* Indexed by variable dictionary index. */
522 struct ctables_categories **categories;
531 struct ctables_chisq *chisq;
532 struct ctables_pairwise *pairwise;
535 struct ctables_categories
538 struct ctables_category *cats;
543 struct ctables_category
545 enum ctables_category_type
547 /* Explicit category lists. */
550 CCT_NRANGE, /* Numerical range. */
551 CCT_SRANGE, /* String range. */
556 /* Totals and subtotals. */
560 /* Implicit category lists. */
565 /* For contributing to TOTALN. */
566 CCT_EXCLUDED_MISSING,
570 struct ctables_category *subtotal;
576 double number; /* CCT_NUMBER. */
577 struct substring string; /* CCT_STRING, in dictionary encoding. */
578 double nrange[2]; /* CCT_NRANGE. */
579 struct substring srange[2]; /* CCT_SRANGE. */
583 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
584 bool hide_subcategories; /* CCT_SUBTOTAL. */
587 /* CCT_POSTCOMPUTE. */
590 const struct ctables_postcompute *pc;
591 enum fmt_type parse_format;
594 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
597 bool include_missing;
601 enum ctables_summary_function sort_function;
602 struct variable *sort_var;
607 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
608 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
609 struct msg_location *location;
613 ctables_category_uninit (struct ctables_category *cat)
618 msg_location_destroy (cat->location);
625 case CCT_POSTCOMPUTE:
629 ss_dealloc (&cat->string);
633 ss_dealloc (&cat->srange[0]);
634 ss_dealloc (&cat->srange[1]);
639 free (cat->total_label);
647 case CCT_EXCLUDED_MISSING:
653 nullable_substring_equal (const struct substring *a,
654 const struct substring *b)
656 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
660 ctables_category_equal (const struct ctables_category *a,
661 const struct ctables_category *b)
663 if (a->type != b->type)
669 return a->number == b->number;
672 return ss_equals (a->string, b->string);
675 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
678 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
679 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
685 case CCT_POSTCOMPUTE:
686 return a->pc == b->pc;
690 return !strcmp (a->total_label, b->total_label);
695 return (a->include_missing == b->include_missing
696 && a->sort_ascending == b->sort_ascending
697 && a->sort_function == b->sort_function
698 && a->sort_var == b->sort_var
699 && a->percentile == b->percentile);
701 case CCT_EXCLUDED_MISSING:
709 ctables_categories_unref (struct ctables_categories *c)
714 assert (c->n_refs > 0);
718 for (size_t i = 0; i < c->n_cats; i++)
719 ctables_category_uninit (&c->cats[i]);
725 ctables_categories_equal (const struct ctables_categories *a,
726 const struct ctables_categories *b)
728 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
731 for (size_t i = 0; i < a->n_cats; i++)
732 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
738 /* Chi-square test (SIGTEST). */
746 /* Pairwise comparison test (COMPARETEST). */
747 struct ctables_pairwise
749 enum { PROP, MEAN } type;
752 bool meansvariance_allcats;
754 enum { BONFERRONI = 1, BH } adjust;
778 struct variable *var;
780 struct ctables_summary_spec_set specs[N_CSVS];
784 struct ctables_axis *subs[2];
787 struct msg_location *loc;
790 static void ctables_axis_destroy (struct ctables_axis *);
799 enum ctables_function_availability
801 CTFA_ALL, /* Any variables. */
802 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
803 //CTFA_MRSETS, /* Only multiple-response sets */
806 struct ctables_summary_spec
808 enum ctables_summary_function function;
809 double percentile; /* CTSF_PTILE only. */
812 struct fmt_spec format;
813 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
820 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
821 const struct ctables_summary_spec *src)
824 dst->label = xstrdup_if_nonnull (src->label);
828 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
835 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
836 const struct ctables_summary_spec_set *src)
838 struct ctables_summary_spec *specs
839 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
840 for (size_t i = 0; i < src->n; i++)
841 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
843 *dst = (struct ctables_summary_spec_set) {
848 .is_scale = src->is_scale,
853 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
855 for (size_t i = 0; i < set->n; i++)
856 ctables_summary_spec_uninit (&set->specs[i]);
857 free (set->listwise_vars);
862 parse_col_width (struct lexer *lexer, const char *name, double *width)
864 lex_match (lexer, T_EQUALS);
865 if (lex_match_id (lexer, "DEFAULT"))
867 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
869 *width = lex_number (lexer);
879 parse_bool (struct lexer *lexer, bool *b)
881 if (lex_match_id (lexer, "NO"))
883 else if (lex_match_id (lexer, "YES"))
887 lex_error_expecting (lexer, "YES", "NO");
893 static enum ctables_function_availability
894 ctables_function_availability (enum ctables_summary_function f)
896 static enum ctables_function_availability availability[] = {
897 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
902 return availability[f];
906 ctables_summary_function_is_count (enum ctables_summary_function f)
912 case CTSF_ROWPCT_COUNT:
913 case CTSF_COLPCT_COUNT:
914 case CTSF_TABLEPCT_COUNT:
915 case CTSF_SUBTABLEPCT_COUNT:
916 case CTSF_LAYERPCT_COUNT:
917 case CTSF_LAYERROWPCT_COUNT:
918 case CTSF_LAYERCOLPCT_COUNT:
920 case CTSF_UROWPCT_COUNT:
921 case CTSF_UCOLPCT_COUNT:
922 case CTSF_UTABLEPCT_COUNT:
923 case CTSF_USUBTABLEPCT_COUNT:
924 case CTSF_ULAYERPCT_COUNT:
925 case CTSF_ULAYERROWPCT_COUNT:
926 case CTSF_ULAYERCOLPCT_COUNT:
929 case CTSF_ROWPCT_VALIDN:
930 case CTSF_COLPCT_VALIDN:
931 case CTSF_TABLEPCT_VALIDN:
932 case CTSF_SUBTABLEPCT_VALIDN:
933 case CTSF_LAYERPCT_VALIDN:
934 case CTSF_LAYERROWPCT_VALIDN:
935 case CTSF_LAYERCOLPCT_VALIDN:
936 case CTSF_ROWPCT_TOTALN:
937 case CTSF_COLPCT_TOTALN:
938 case CTSF_TABLEPCT_TOTALN:
939 case CTSF_SUBTABLEPCT_TOTALN:
940 case CTSF_LAYERPCT_TOTALN:
941 case CTSF_LAYERROWPCT_TOTALN:
942 case CTSF_LAYERCOLPCT_TOTALN:
959 case CTSF_ROWPCT_SUM:
960 case CTSF_COLPCT_SUM:
961 case CTSF_TABLEPCT_SUM:
962 case CTSF_SUBTABLEPCT_SUM:
963 case CTSF_LAYERPCT_SUM:
964 case CTSF_LAYERROWPCT_SUM:
965 case CTSF_LAYERCOLPCT_SUM:
966 case CTSF_UROWPCT_VALIDN:
967 case CTSF_UCOLPCT_VALIDN:
968 case CTSF_UTABLEPCT_VALIDN:
969 case CTSF_USUBTABLEPCT_VALIDN:
970 case CTSF_ULAYERPCT_VALIDN:
971 case CTSF_ULAYERROWPCT_VALIDN:
972 case CTSF_ULAYERCOLPCT_VALIDN:
973 case CTSF_UROWPCT_TOTALN:
974 case CTSF_UCOLPCT_TOTALN:
975 case CTSF_UTABLEPCT_TOTALN:
976 case CTSF_USUBTABLEPCT_TOTALN:
977 case CTSF_ULAYERPCT_TOTALN:
978 case CTSF_ULAYERROWPCT_TOTALN:
979 case CTSF_ULAYERCOLPCT_TOTALN:
991 case CTSF_UROWPCT_SUM:
992 case CTSF_UCOLPCT_SUM:
993 case CTSF_UTABLEPCT_SUM:
994 case CTSF_USUBTABLEPCT_SUM:
995 case CTSF_ULAYERPCT_SUM:
996 case CTSF_ULAYERROWPCT_SUM:
997 case CTSF_ULAYERCOLPCT_SUM:
1005 parse_ctables_summary_function (struct lexer *lexer,
1006 enum ctables_summary_function *f)
1010 enum ctables_summary_function function;
1011 struct substring name;
1013 static struct pair names[] = {
1014 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
1015 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
1018 /* The .COUNT suffix may be omitted. */
1019 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
1020 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
1021 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
1022 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
1023 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
1024 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
1025 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
1029 if (!lex_force_id (lexer))
1032 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
1033 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
1035 *f = names[i].function;
1040 lex_error (lexer, _("Expecting summary function name."));
1045 ctables_axis_destroy (struct ctables_axis *axis)
1053 for (size_t i = 0; i < N_CSVS; i++)
1054 ctables_summary_spec_set_uninit (&axis->specs[i]);
1059 ctables_axis_destroy (axis->subs[0]);
1060 ctables_axis_destroy (axis->subs[1]);
1063 msg_location_destroy (axis->loc);
1067 static struct ctables_axis *
1068 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1069 struct ctables_axis *sub0,
1070 struct ctables_axis *sub1,
1071 struct lexer *lexer, int start_ofs)
1073 struct ctables_axis *axis = xmalloc (sizeof *axis);
1074 *axis = (struct ctables_axis) {
1076 .subs = { sub0, sub1 },
1077 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1082 struct ctables_axis_parse_ctx
1084 struct lexer *lexer;
1085 struct dictionary *dict;
1087 struct ctables_table *t;
1090 static struct fmt_spec
1091 ctables_summary_default_format (enum ctables_summary_function function,
1092 const struct variable *var)
1094 static const enum ctables_format default_formats[] = {
1095 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
1099 switch (default_formats[function])
1102 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
1105 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
1108 return *var_get_print_format (var);
1115 static struct pivot_value *
1116 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1120 static const char *default_labels[] = {
1121 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
1126 return (spec->function == CTSF_PTILE
1127 ? pivot_value_new_text_format (N_("Percentile %.2f"),
1129 : pivot_value_new_text (default_labels[spec->function]));
1133 struct substring in = ss_cstr (spec->label);
1134 struct substring target = ss_cstr (")CILEVEL");
1136 struct string out = DS_EMPTY_INITIALIZER;
1139 size_t chunk = ss_find_substring (in, target);
1140 ds_put_substring (&out, ss_head (in, chunk));
1141 ss_advance (&in, chunk);
1143 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1145 ss_advance (&in, target.length);
1146 ds_put_format (&out, "%g", cilevel);
1152 ctables_summary_function_name (enum ctables_summary_function function)
1154 static const char *names[] = {
1155 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1159 return names[function];
1163 add_summary_spec (struct ctables_axis *axis,
1164 enum ctables_summary_function function, double percentile,
1165 const char *label, const struct fmt_spec *format,
1166 bool is_ctables_format, const struct msg_location *loc,
1167 enum ctables_summary_variant sv)
1169 if (axis->op == CTAO_VAR)
1171 const char *function_name = ctables_summary_function_name (function);
1172 const char *var_name = var_get_name (axis->var);
1173 switch (ctables_function_availability (function))
1177 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1178 "response sets."), function_name);
1179 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1185 if (!axis->scale && sv != CSV_TOTAL)
1188 _("Summary function %s applies only to scale variables."),
1190 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1200 struct ctables_summary_spec_set *set = &axis->specs[sv];
1201 if (set->n >= set->allocated)
1202 set->specs = x2nrealloc (set->specs, &set->allocated,
1203 sizeof *set->specs);
1205 struct ctables_summary_spec *dst = &set->specs[set->n++];
1206 *dst = (struct ctables_summary_spec) {
1207 .function = function,
1208 .percentile = percentile,
1209 .label = xstrdup_if_nonnull (label),
1210 .format = (format ? *format
1211 : ctables_summary_default_format (function, axis->var)),
1212 .is_ctables_format = is_ctables_format,
1218 for (size_t i = 0; i < 2; i++)
1219 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1220 format, is_ctables_format, loc, sv))
1226 static struct ctables_axis *ctables_axis_parse_stack (
1227 struct ctables_axis_parse_ctx *);
1230 static struct ctables_axis *
1231 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1233 if (lex_match (ctx->lexer, T_LPAREN))
1235 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1236 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1238 ctables_axis_destroy (sub);
1244 if (!lex_force_id (ctx->lexer))
1247 int start_ofs = lex_ofs (ctx->lexer);
1248 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1252 struct ctables_axis *axis = xmalloc (sizeof *axis);
1253 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1255 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1256 : lex_match_phrase (ctx->lexer, "[C]") ? false
1257 : var_get_measure (var) == MEASURE_SCALE);
1258 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1259 lex_ofs (ctx->lexer) - 1);
1260 if (axis->scale && var_is_alpha (var))
1262 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1264 var_get_name (var));
1265 ctables_axis_destroy (axis);
1273 has_digit (const char *s)
1275 return s[strcspn (s, "0123456789")] != '\0';
1279 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1280 bool *is_ctables_format)
1282 char type[FMT_TYPE_LEN_MAX + 1];
1283 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1286 if (!strcasecmp (type, "NEGPAREN"))
1287 format->type = CTEF_NEGPAREN;
1288 else if (!strcasecmp (type, "NEQUAL"))
1289 format->type = CTEF_NEQUAL;
1290 else if (!strcasecmp (type, "PAREN"))
1291 format->type = CTEF_PAREN;
1292 else if (!strcasecmp (type, "PCTPAREN"))
1293 format->type = CTEF_PCTPAREN;
1296 *is_ctables_format = false;
1297 return (parse_format_specifier (lexer, format)
1298 && fmt_check_output (format)
1299 && fmt_check_type_compat (format, VAL_NUMERIC));
1305 lex_next_error (lexer, -1, -1,
1306 _("Output format %s requires width 2 or greater."), type);
1309 else if (format->d > format->w - 1)
1311 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1312 "greater than decimals."), type);
1317 *is_ctables_format = true;
1322 static struct ctables_axis *
1323 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1325 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1326 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1329 enum ctables_summary_variant sv = CSV_CELL;
1332 int start_ofs = lex_ofs (ctx->lexer);
1334 /* Parse function. */
1335 enum ctables_summary_function function;
1336 if (!parse_ctables_summary_function (ctx->lexer, &function))
1339 /* Parse percentile. */
1340 double percentile = 0;
1341 if (function == CTSF_PTILE)
1343 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1345 percentile = lex_number (ctx->lexer);
1346 lex_get (ctx->lexer);
1351 if (lex_is_string (ctx->lexer))
1353 label = ss_xstrdup (lex_tokss (ctx->lexer));
1354 lex_get (ctx->lexer);
1358 struct fmt_spec format;
1359 const struct fmt_spec *formatp;
1360 bool is_ctables_format = false;
1361 if (lex_token (ctx->lexer) == T_ID
1362 && has_digit (lex_tokcstr (ctx->lexer)))
1364 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1365 &is_ctables_format))
1375 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1376 lex_ofs (ctx->lexer) - 1);
1377 add_summary_spec (sub, function, percentile, label, formatp,
1378 is_ctables_format, loc, sv);
1380 msg_location_destroy (loc);
1382 lex_match (ctx->lexer, T_COMMA);
1383 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1385 if (!lex_force_match (ctx->lexer, T_LBRACK))
1389 else if (lex_match (ctx->lexer, T_RBRACK))
1391 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1398 ctables_axis_destroy (sub);
1402 static const struct ctables_axis *
1403 find_scale (const struct ctables_axis *axis)
1407 else if (axis->op == CTAO_VAR)
1408 return axis->scale ? axis : NULL;
1411 for (size_t i = 0; i < 2; i++)
1413 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1421 static const struct ctables_axis *
1422 find_categorical_summary_spec (const struct ctables_axis *axis)
1426 else if (axis->op == CTAO_VAR)
1427 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1430 for (size_t i = 0; i < 2; i++)
1432 const struct ctables_axis *sum
1433 = find_categorical_summary_spec (axis->subs[i]);
1441 static struct ctables_axis *
1442 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1444 int start_ofs = lex_ofs (ctx->lexer);
1445 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1449 while (lex_match (ctx->lexer, T_GT))
1451 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1455 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1456 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1458 const struct ctables_axis *outer_scale = find_scale (lhs);
1459 const struct ctables_axis *inner_scale = find_scale (rhs);
1460 if (outer_scale && inner_scale)
1462 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1463 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1464 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1465 ctables_axis_destroy (nest);
1469 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1472 msg_at (SE, nest->loc,
1473 _("Summaries may only be requested for categorical variables "
1474 "at the innermost nesting level."));
1475 msg_at (SN, outer_sum->loc,
1476 _("This outer categorical variable has a summary."));
1477 ctables_axis_destroy (nest);
1487 static struct ctables_axis *
1488 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1490 int start_ofs = lex_ofs (ctx->lexer);
1491 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1495 while (lex_match (ctx->lexer, T_PLUS))
1497 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1501 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1502 ctx->lexer, start_ofs);
1509 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1510 struct ctables *ct, struct ctables_table *t,
1511 enum pivot_axis_type a)
1513 if (lex_token (lexer) == T_BY
1514 || lex_token (lexer) == T_SLASH
1515 || lex_token (lexer) == T_ENDCMD)
1518 struct ctables_axis_parse_ctx ctx = {
1524 t->axes[a] = ctables_axis_parse_stack (&ctx);
1525 return t->axes[a] != NULL;
1529 ctables_chisq_destroy (struct ctables_chisq *chisq)
1535 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1541 ctables_table_destroy (struct ctables_table *t)
1546 for (size_t i = 0; i < t->n_sections; i++)
1547 ctables_section_uninit (&t->sections[i]);
1550 for (size_t i = 0; i < t->n_categories; i++)
1551 ctables_categories_unref (t->categories[i]);
1552 free (t->categories);
1554 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1556 ctables_axis_destroy (t->axes[a]);
1557 ctables_stack_uninit (&t->stacks[a]);
1559 free (t->summary_specs.specs);
1561 struct ctables_value *ctv, *next_ctv;
1562 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1563 &t->clabels_values_map)
1565 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1566 hmap_delete (&t->clabels_values_map, &ctv->node);
1569 hmap_destroy (&t->clabels_values_map);
1570 free (t->clabels_values);
1576 ctables_chisq_destroy (t->chisq);
1577 ctables_pairwise_destroy (t->pairwise);
1582 ctables_destroy (struct ctables *ct)
1587 struct ctables_postcompute *pc, *next_pc;
1588 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1592 msg_location_destroy (pc->location);
1593 ctables_pcexpr_destroy (pc->expr);
1597 ctables_summary_spec_set_uninit (pc->specs);
1600 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1604 fmt_settings_uninit (&ct->ctables_formats);
1605 pivot_table_look_unref (ct->look);
1609 for (size_t i = 0; i < ct->n_tables; i++)
1610 ctables_table_destroy (ct->tables[i]);
1615 static struct ctables_category
1616 cct_nrange (double low, double high)
1618 return (struct ctables_category) {
1620 .nrange = { low, high }
1624 static struct ctables_category
1625 cct_srange (struct substring low, struct substring high)
1627 return (struct ctables_category) {
1629 .srange = { low, high }
1634 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1635 struct ctables_category *cat)
1638 if (lex_match (lexer, T_EQUALS))
1640 if (!lex_force_string (lexer))
1643 total_label = ss_xstrdup (lex_tokss (lexer));
1647 total_label = xstrdup (_("Subtotal"));
1649 *cat = (struct ctables_category) {
1650 .type = CCT_SUBTOTAL,
1651 .hide_subcategories = hide_subcategories,
1652 .total_label = total_label
1657 static struct substring
1658 parse_substring (struct lexer *lexer, struct dictionary *dict)
1660 struct substring s = recode_substring_pool (
1661 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1662 ss_rtrim (&s, ss_cstr (" "));
1668 ctables_table_parse_explicit_category (struct lexer *lexer,
1669 struct dictionary *dict,
1671 struct ctables_category *cat)
1673 if (lex_match_id (lexer, "OTHERNM"))
1674 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1675 else if (lex_match_id (lexer, "MISSING"))
1676 *cat = (struct ctables_category) { .type = CCT_MISSING };
1677 else if (lex_match_id (lexer, "SUBTOTAL"))
1678 return ctables_table_parse_subtotal (lexer, false, cat);
1679 else if (lex_match_id (lexer, "HSUBTOTAL"))
1680 return ctables_table_parse_subtotal (lexer, true, cat);
1681 else if (lex_match_id (lexer, "LO"))
1683 if (!lex_force_match_id (lexer, "THRU"))
1685 if (lex_is_string (lexer))
1687 struct substring sr0 = { .string = NULL };
1688 struct substring sr1 = parse_substring (lexer, dict);
1689 *cat = cct_srange (sr0, sr1);
1691 else if (lex_force_num (lexer))
1693 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1699 else if (lex_is_number (lexer))
1701 double number = lex_number (lexer);
1703 if (lex_match_id (lexer, "THRU"))
1705 if (lex_match_id (lexer, "HI"))
1706 *cat = cct_nrange (number, DBL_MAX);
1709 if (!lex_force_num (lexer))
1711 *cat = cct_nrange (number, lex_number (lexer));
1716 *cat = (struct ctables_category) {
1721 else if (lex_is_string (lexer))
1723 struct substring s = parse_substring (lexer, dict);
1724 if (lex_match_id (lexer, "THRU"))
1726 if (lex_match_id (lexer, "HI"))
1728 struct substring sr1 = { .string = NULL };
1729 *cat = cct_srange (s, sr1);
1733 if (!lex_force_string (lexer))
1738 struct substring sr1 = parse_substring (lexer, dict);
1739 *cat = cct_srange (s, sr1);
1743 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1745 else if (lex_match (lexer, T_AND))
1747 if (!lex_force_id (lexer))
1749 struct ctables_postcompute *pc = ctables_find_postcompute (
1750 ct, lex_tokcstr (lexer));
1753 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1754 msg_at (SE, loc, _("Unknown postcompute &%s."),
1755 lex_tokcstr (lexer));
1756 msg_location_destroy (loc);
1761 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1765 lex_error (lexer, NULL);
1773 parse_category_string (struct msg_location *location,
1774 struct substring s, const struct dictionary *dict,
1775 enum fmt_type format, double *n)
1778 char *error = data_in (s, dict_get_encoding (dict), format,
1779 settings_get_fmt_settings (), &v, 0, NULL);
1782 msg_at (SE, location,
1783 _("Failed to parse category specification as format %s: %s."),
1784 fmt_name (format), error);
1793 static struct ctables_category *
1794 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1795 const struct ctables_pcexpr *e)
1797 struct ctables_category *best = NULL;
1798 size_t n_subtotals = 0;
1799 for (size_t i = 0; i < cats->n_cats; i++)
1801 struct ctables_category *cat = &cats->cats[i];
1804 case CTPO_CAT_NUMBER:
1805 if (cat->type == CCT_NUMBER && cat->number == e->number)
1809 case CTPO_CAT_STRING:
1810 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1814 case CTPO_CAT_NRANGE:
1815 if (cat->type == CCT_NRANGE
1816 && cat->nrange[0] == e->nrange[0]
1817 && cat->nrange[1] == e->nrange[1])
1821 case CTPO_CAT_SRANGE:
1822 if (cat->type == CCT_SRANGE
1823 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1824 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1828 case CTPO_CAT_MISSING:
1829 if (cat->type == CCT_MISSING)
1833 case CTPO_CAT_OTHERNM:
1834 if (cat->type == CCT_OTHERNM)
1838 case CTPO_CAT_SUBTOTAL:
1839 if (cat->type == CCT_SUBTOTAL)
1842 if (e->subtotal_index == n_subtotals)
1844 else if (e->subtotal_index == 0)
1849 case CTPO_CAT_TOTAL:
1850 if (cat->type == CCT_TOTAL)
1864 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1869 static struct ctables_category *
1870 ctables_find_category_for_postcompute (const struct dictionary *dict,
1871 const struct ctables_categories *cats,
1872 enum fmt_type parse_format,
1873 const struct ctables_pcexpr *e)
1875 if (parse_format != FMT_F)
1877 if (e->op == CTPO_CAT_STRING)
1880 if (!parse_category_string (e->location, e->string, dict,
1881 parse_format, &number))
1884 struct ctables_pcexpr e2 = {
1885 .op = CTPO_CAT_NUMBER,
1887 .location = e->location,
1889 return ctables_find_category_for_postcompute__ (cats, &e2);
1891 else if (e->op == CTPO_CAT_SRANGE)
1894 if (!e->srange[0].string)
1895 nrange[0] = -DBL_MAX;
1896 else if (!parse_category_string (e->location, e->srange[0], dict,
1897 parse_format, &nrange[0]))
1900 if (!e->srange[1].string)
1901 nrange[1] = DBL_MAX;
1902 else if (!parse_category_string (e->location, e->srange[1], dict,
1903 parse_format, &nrange[1]))
1906 struct ctables_pcexpr e2 = {
1907 .op = CTPO_CAT_NRANGE,
1908 .nrange = { nrange[0], nrange[1] },
1909 .location = e->location,
1911 return ctables_find_category_for_postcompute__ (cats, &e2);
1914 return ctables_find_category_for_postcompute__ (cats, e);
1918 ctables_recursive_check_postcompute (struct dictionary *dict,
1919 const struct ctables_pcexpr *e,
1920 struct ctables_category *pc_cat,
1921 const struct ctables_categories *cats,
1922 const struct msg_location *cats_location)
1926 case CTPO_CAT_NUMBER:
1927 case CTPO_CAT_STRING:
1928 case CTPO_CAT_NRANGE:
1929 case CTPO_CAT_SRANGE:
1930 case CTPO_CAT_MISSING:
1931 case CTPO_CAT_OTHERNM:
1932 case CTPO_CAT_SUBTOTAL:
1933 case CTPO_CAT_TOTAL:
1935 struct ctables_category *cat = ctables_find_category_for_postcompute (
1936 dict, cats, pc_cat->parse_format, e);
1939 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1941 size_t n_subtotals = 0;
1942 for (size_t i = 0; i < cats->n_cats; i++)
1943 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1944 if (n_subtotals > 1)
1946 msg_at (SE, cats_location,
1947 ngettext ("These categories include %zu instance "
1948 "of SUBTOTAL or HSUBTOTAL, so references "
1949 "from computed categories must refer to "
1950 "subtotals by position, "
1951 "e.g. SUBTOTAL[1].",
1952 "These categories include %zu instances "
1953 "of SUBTOTAL or HSUBTOTAL, so references "
1954 "from computed categories must refer to "
1955 "subtotals by position, "
1956 "e.g. SUBTOTAL[1].",
1959 msg_at (SN, e->location,
1960 _("This is the reference that lacks a position."));
1965 msg_at (SE, pc_cat->location,
1966 _("Computed category &%s references a category not included "
1967 "in the category list."),
1969 msg_at (SN, e->location, _("This is the missing category."));
1970 if (e->op == CTPO_CAT_SUBTOTAL)
1971 msg_at (SN, cats_location,
1972 _("To fix the problem, add subtotals to the "
1973 "list of categories here."));
1974 else if (e->op == CTPO_CAT_TOTAL)
1975 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1976 "CATEGORIES specification."));
1978 msg_at (SN, cats_location,
1979 _("To fix the problem, add the missing category to the "
1980 "list of categories here."));
1983 if (pc_cat->pc->hide_source_cats)
1997 for (size_t i = 0; i < 2; i++)
1998 if (e->subs[i] && !ctables_recursive_check_postcompute (
1999 dict, e->subs[i], pc_cat, cats, cats_location))
2008 all_strings (struct variable **vars, size_t n_vars,
2009 const struct ctables_category *cat)
2011 for (size_t j = 0; j < n_vars; j++)
2012 if (var_is_numeric (vars[j]))
2014 msg_at (SE, cat->location,
2015 _("This category specification may be applied only to string "
2016 "variables, but this subcommand tries to apply it to "
2017 "numeric variable %s."),
2018 var_get_name (vars[j]));
2025 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
2026 struct ctables *ct, struct ctables_table *t)
2028 if (!lex_match_id (lexer, "VARIABLES"))
2030 lex_match (lexer, T_EQUALS);
2032 struct variable **vars;
2034 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
2037 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
2038 for (size_t i = 1; i < n_vars; i++)
2040 const struct fmt_spec *f = var_get_print_format (vars[i]);
2041 if (f->type != common_format->type)
2043 common_format = NULL;
2049 && (fmt_get_category (common_format->type)
2050 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
2052 struct ctables_categories *c = xmalloc (sizeof *c);
2053 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
2054 for (size_t i = 0; i < n_vars; i++)
2056 struct ctables_categories **cp
2057 = &t->categories[var_get_dict_index (vars[i])];
2058 ctables_categories_unref (*cp);
2062 size_t allocated_cats = 0;
2063 int cats_start_ofs = -1;
2064 int cats_end_ofs = -1;
2065 if (lex_match (lexer, T_LBRACK))
2067 cats_start_ofs = lex_ofs (lexer);
2070 if (c->n_cats >= allocated_cats)
2071 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2073 int start_ofs = lex_ofs (lexer);
2074 struct ctables_category *cat = &c->cats[c->n_cats];
2075 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
2077 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
2080 lex_match (lexer, T_COMMA);
2082 while (!lex_match (lexer, T_RBRACK));
2083 cats_end_ofs = lex_ofs (lexer) - 1;
2086 struct ctables_category cat = {
2088 .include_missing = false,
2089 .sort_ascending = true,
2091 bool show_totals = false;
2092 char *total_label = NULL;
2093 bool totals_before = false;
2094 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2096 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2098 lex_match (lexer, T_EQUALS);
2099 if (lex_match_id (lexer, "A"))
2100 cat.sort_ascending = true;
2101 else if (lex_match_id (lexer, "D"))
2102 cat.sort_ascending = false;
2105 lex_error_expecting (lexer, "A", "D");
2109 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2111 lex_match (lexer, T_EQUALS);
2112 if (lex_match_id (lexer, "VALUE"))
2113 cat.type = CCT_VALUE;
2114 else if (lex_match_id (lexer, "LABEL"))
2115 cat.type = CCT_LABEL;
2118 cat.type = CCT_FUNCTION;
2119 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
2122 if (lex_match (lexer, T_LPAREN))
2124 cat.sort_var = parse_variable (lexer, dict);
2128 if (cat.sort_function == CTSF_PTILE)
2130 lex_match (lexer, T_COMMA);
2131 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2133 cat.percentile = lex_number (lexer);
2137 if (!lex_force_match (lexer, T_RPAREN))
2140 else if (ctables_function_availability (cat.sort_function)
2143 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2148 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2150 lex_match (lexer, T_EQUALS);
2151 if (lex_match_id (lexer, "INCLUDE"))
2152 cat.include_missing = true;
2153 else if (lex_match_id (lexer, "EXCLUDE"))
2154 cat.include_missing = false;
2157 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2161 else if (lex_match_id (lexer, "TOTAL"))
2163 lex_match (lexer, T_EQUALS);
2164 if (!parse_bool (lexer, &show_totals))
2167 else if (lex_match_id (lexer, "LABEL"))
2169 lex_match (lexer, T_EQUALS);
2170 if (!lex_force_string (lexer))
2173 total_label = ss_xstrdup (lex_tokss (lexer));
2176 else if (lex_match_id (lexer, "POSITION"))
2178 lex_match (lexer, T_EQUALS);
2179 if (lex_match_id (lexer, "BEFORE"))
2180 totals_before = true;
2181 else if (lex_match_id (lexer, "AFTER"))
2182 totals_before = false;
2185 lex_error_expecting (lexer, "BEFORE", "AFTER");
2189 else if (lex_match_id (lexer, "EMPTY"))
2191 lex_match (lexer, T_EQUALS);
2192 if (lex_match_id (lexer, "INCLUDE"))
2193 c->show_empty = true;
2194 else if (lex_match_id (lexer, "EXCLUDE"))
2195 c->show_empty = false;
2198 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2205 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2206 "TOTAL", "LABEL", "POSITION", "EMPTY");
2208 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2215 if (c->n_cats >= allocated_cats)
2216 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2217 c->cats[c->n_cats++] = cat;
2222 if (c->n_cats >= allocated_cats)
2223 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2225 struct ctables_category *totals;
2228 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2229 totals = &c->cats[0];
2232 totals = &c->cats[c->n_cats];
2235 *totals = (struct ctables_category) {
2237 .total_label = total_label ? total_label : xstrdup (_("Total")),
2241 struct ctables_category *subtotal = NULL;
2242 for (size_t i = totals_before ? 0 : c->n_cats;
2243 totals_before ? i < c->n_cats : i-- > 0;
2244 totals_before ? i++ : 0)
2246 struct ctables_category *cat = &c->cats[i];
2255 cat->subtotal = subtotal;
2258 case CCT_POSTCOMPUTE:
2269 case CCT_EXCLUDED_MISSING:
2274 if (cats_start_ofs != -1)
2276 for (size_t i = 0; i < c->n_cats; i++)
2278 struct ctables_category *cat = &c->cats[i];
2281 case CCT_POSTCOMPUTE:
2282 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2283 struct msg_location *cats_location
2284 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2285 bool ok = ctables_recursive_check_postcompute (
2286 dict, cat->pc->expr, cat, c, cats_location);
2287 msg_location_destroy (cats_location);
2294 for (size_t j = 0; j < n_vars; j++)
2295 if (var_is_alpha (vars[j]))
2297 msg_at (SE, cat->location,
2298 _("This category specification may be applied "
2299 "only to numeric variables, but this "
2300 "subcommand tries to apply it to string "
2302 var_get_name (vars[j]));
2311 if (!parse_category_string (cat->location, cat->string, dict,
2312 common_format->type, &n))
2315 ss_dealloc (&cat->string);
2317 cat->type = CCT_NUMBER;
2320 else if (!all_strings (vars, n_vars, cat))
2329 if (!cat->srange[0].string)
2331 else if (!parse_category_string (cat->location,
2332 cat->srange[0], dict,
2333 common_format->type, &n[0]))
2336 if (!cat->srange[1].string)
2338 else if (!parse_category_string (cat->location,
2339 cat->srange[1], dict,
2340 common_format->type, &n[1]))
2343 ss_dealloc (&cat->srange[0]);
2344 ss_dealloc (&cat->srange[1]);
2346 cat->type = CCT_NRANGE;
2347 cat->nrange[0] = n[0];
2348 cat->nrange[1] = n[1];
2350 else if (!all_strings (vars, n_vars, cat))
2361 case CCT_EXCLUDED_MISSING:
2376 ctables_nest_uninit (struct ctables_nest *nest)
2379 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2380 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2381 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2382 free (nest->domains[dt]);
2386 ctables_stack_uninit (struct ctables_stack *stack)
2390 for (size_t i = 0; i < stack->n; i++)
2391 ctables_nest_uninit (&stack->nests[i]);
2392 free (stack->nests);
2396 static struct ctables_stack
2397 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2404 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2405 for (size_t i = 0; i < s0.n; i++)
2406 for (size_t j = 0; j < s1.n; j++)
2408 const struct ctables_nest *a = &s0.nests[i];
2409 const struct ctables_nest *b = &s1.nests[j];
2411 size_t allocate = a->n + b->n;
2412 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2414 for (size_t k = 0; k < a->n; k++)
2415 vars[n++] = a->vars[k];
2416 for (size_t k = 0; k < b->n; k++)
2417 vars[n++] = b->vars[k];
2418 assert (n == allocate);
2420 const struct ctables_nest *summary_src;
2421 if (!a->specs[CSV_CELL].var)
2423 else if (!b->specs[CSV_CELL].var)
2428 struct ctables_nest *new = &stack.nests[stack.n++];
2429 *new = (struct ctables_nest) {
2431 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2432 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2436 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2437 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2439 ctables_stack_uninit (&s0);
2440 ctables_stack_uninit (&s1);
2444 static struct ctables_stack
2445 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2447 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2448 for (size_t i = 0; i < s0.n; i++)
2449 stack.nests[stack.n++] = s0.nests[i];
2450 for (size_t i = 0; i < s1.n; i++)
2452 stack.nests[stack.n] = s1.nests[i];
2453 stack.nests[stack.n].group_head += s0.n;
2456 assert (stack.n == s0.n + s1.n);
2462 static struct ctables_stack
2463 var_fts (const struct ctables_axis *a)
2465 struct variable **vars = xmalloc (sizeof *vars);
2468 struct ctables_nest *nest = xmalloc (sizeof *nest);
2469 *nest = (struct ctables_nest) {
2472 .scale_idx = a->scale ? 0 : SIZE_MAX,
2474 if (a->specs[CSV_CELL].n || a->scale)
2475 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2477 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2478 nest->specs[sv].var = a->var;
2479 nest->specs[sv].is_scale = a->scale;
2481 return (struct ctables_stack) { .nests = nest, .n = 1 };
2484 static struct ctables_stack
2485 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2488 return (struct ctables_stack) { .n = 0 };
2496 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2497 enumerate_fts (axis_type, a->subs[1]));
2500 /* This should consider any of the scale variables found in the result to
2501 be linked to each other listwise for SMISSING=LISTWISE. */
2502 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2503 enumerate_fts (axis_type, a->subs[1]));
2509 union ctables_summary
2511 /* COUNT, VALIDN, TOTALN. */
2514 /* MINIMUM, MAXIMUM, RANGE. */
2521 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2522 struct moments1 *moments;
2524 /* MEDIAN, MODE, PTILE. */
2527 struct casewriter *writer;
2532 /* XXX multiple response */
2536 ctables_summary_init (union ctables_summary *s,
2537 const struct ctables_summary_spec *ss)
2539 switch (ss->function)
2543 case CTSF_ROWPCT_COUNT:
2544 case CTSF_COLPCT_COUNT:
2545 case CTSF_TABLEPCT_COUNT:
2546 case CTSF_SUBTABLEPCT_COUNT:
2547 case CTSF_LAYERPCT_COUNT:
2548 case CTSF_LAYERROWPCT_COUNT:
2549 case CTSF_LAYERCOLPCT_COUNT:
2550 case CTSF_ROWPCT_VALIDN:
2551 case CTSF_COLPCT_VALIDN:
2552 case CTSF_TABLEPCT_VALIDN:
2553 case CTSF_SUBTABLEPCT_VALIDN:
2554 case CTSF_LAYERPCT_VALIDN:
2555 case CTSF_LAYERROWPCT_VALIDN:
2556 case CTSF_LAYERCOLPCT_VALIDN:
2557 case CTSF_ROWPCT_TOTALN:
2558 case CTSF_COLPCT_TOTALN:
2559 case CTSF_TABLEPCT_TOTALN:
2560 case CTSF_SUBTABLEPCT_TOTALN:
2561 case CTSF_LAYERPCT_TOTALN:
2562 case CTSF_LAYERROWPCT_TOTALN:
2563 case CTSF_LAYERCOLPCT_TOTALN:
2570 case CTSF_UROWPCT_COUNT:
2571 case CTSF_UCOLPCT_COUNT:
2572 case CTSF_UTABLEPCT_COUNT:
2573 case CTSF_USUBTABLEPCT_COUNT:
2574 case CTSF_ULAYERPCT_COUNT:
2575 case CTSF_ULAYERROWPCT_COUNT:
2576 case CTSF_ULAYERCOLPCT_COUNT:
2577 case CTSF_UROWPCT_VALIDN:
2578 case CTSF_UCOLPCT_VALIDN:
2579 case CTSF_UTABLEPCT_VALIDN:
2580 case CTSF_USUBTABLEPCT_VALIDN:
2581 case CTSF_ULAYERPCT_VALIDN:
2582 case CTSF_ULAYERROWPCT_VALIDN:
2583 case CTSF_ULAYERCOLPCT_VALIDN:
2584 case CTSF_UROWPCT_TOTALN:
2585 case CTSF_UCOLPCT_TOTALN:
2586 case CTSF_UTABLEPCT_TOTALN:
2587 case CTSF_USUBTABLEPCT_TOTALN:
2588 case CTSF_ULAYERPCT_TOTALN:
2589 case CTSF_ULAYERROWPCT_TOTALN:
2590 case CTSF_ULAYERCOLPCT_TOTALN:
2600 s->min = s->max = SYSMIS;
2608 case CTSF_ROWPCT_SUM:
2609 case CTSF_COLPCT_SUM:
2610 case CTSF_TABLEPCT_SUM:
2611 case CTSF_SUBTABLEPCT_SUM:
2612 case CTSF_LAYERPCT_SUM:
2613 case CTSF_LAYERROWPCT_SUM:
2614 case CTSF_LAYERCOLPCT_SUM:
2619 case CTSF_UVARIANCE:
2620 case CTSF_UROWPCT_SUM:
2621 case CTSF_UCOLPCT_SUM:
2622 case CTSF_UTABLEPCT_SUM:
2623 case CTSF_USUBTABLEPCT_SUM:
2624 case CTSF_ULAYERPCT_SUM:
2625 case CTSF_ULAYERROWPCT_SUM:
2626 case CTSF_ULAYERCOLPCT_SUM:
2627 s->moments = moments1_create (MOMENT_VARIANCE);
2637 struct caseproto *proto = caseproto_create ();
2638 proto = caseproto_add_width (proto, 0);
2639 proto = caseproto_add_width (proto, 0);
2641 struct subcase ordering;
2642 subcase_init (&ordering, 0, 0, SC_ASCEND);
2643 s->writer = sort_create_writer (&ordering, proto);
2644 subcase_uninit (&ordering);
2645 caseproto_unref (proto);
2655 ctables_summary_uninit (union ctables_summary *s,
2656 const struct ctables_summary_spec *ss)
2658 switch (ss->function)
2662 case CTSF_ROWPCT_COUNT:
2663 case CTSF_COLPCT_COUNT:
2664 case CTSF_TABLEPCT_COUNT:
2665 case CTSF_SUBTABLEPCT_COUNT:
2666 case CTSF_LAYERPCT_COUNT:
2667 case CTSF_LAYERROWPCT_COUNT:
2668 case CTSF_LAYERCOLPCT_COUNT:
2669 case CTSF_ROWPCT_VALIDN:
2670 case CTSF_COLPCT_VALIDN:
2671 case CTSF_TABLEPCT_VALIDN:
2672 case CTSF_SUBTABLEPCT_VALIDN:
2673 case CTSF_LAYERPCT_VALIDN:
2674 case CTSF_LAYERROWPCT_VALIDN:
2675 case CTSF_LAYERCOLPCT_VALIDN:
2676 case CTSF_ROWPCT_TOTALN:
2677 case CTSF_COLPCT_TOTALN:
2678 case CTSF_TABLEPCT_TOTALN:
2679 case CTSF_SUBTABLEPCT_TOTALN:
2680 case CTSF_LAYERPCT_TOTALN:
2681 case CTSF_LAYERROWPCT_TOTALN:
2682 case CTSF_LAYERCOLPCT_TOTALN:
2689 case CTSF_UROWPCT_COUNT:
2690 case CTSF_UCOLPCT_COUNT:
2691 case CTSF_UTABLEPCT_COUNT:
2692 case CTSF_USUBTABLEPCT_COUNT:
2693 case CTSF_ULAYERPCT_COUNT:
2694 case CTSF_ULAYERROWPCT_COUNT:
2695 case CTSF_ULAYERCOLPCT_COUNT:
2696 case CTSF_UROWPCT_VALIDN:
2697 case CTSF_UCOLPCT_VALIDN:
2698 case CTSF_UTABLEPCT_VALIDN:
2699 case CTSF_USUBTABLEPCT_VALIDN:
2700 case CTSF_ULAYERPCT_VALIDN:
2701 case CTSF_ULAYERROWPCT_VALIDN:
2702 case CTSF_ULAYERCOLPCT_VALIDN:
2703 case CTSF_UROWPCT_TOTALN:
2704 case CTSF_UCOLPCT_TOTALN:
2705 case CTSF_UTABLEPCT_TOTALN:
2706 case CTSF_USUBTABLEPCT_TOTALN:
2707 case CTSF_ULAYERPCT_TOTALN:
2708 case CTSF_ULAYERROWPCT_TOTALN:
2709 case CTSF_ULAYERCOLPCT_TOTALN:
2725 case CTSF_ROWPCT_SUM:
2726 case CTSF_COLPCT_SUM:
2727 case CTSF_TABLEPCT_SUM:
2728 case CTSF_SUBTABLEPCT_SUM:
2729 case CTSF_LAYERPCT_SUM:
2730 case CTSF_LAYERROWPCT_SUM:
2731 case CTSF_LAYERCOLPCT_SUM:
2736 case CTSF_UVARIANCE:
2737 case CTSF_UROWPCT_SUM:
2738 case CTSF_UCOLPCT_SUM:
2739 case CTSF_UTABLEPCT_SUM:
2740 case CTSF_USUBTABLEPCT_SUM:
2741 case CTSF_ULAYERPCT_SUM:
2742 case CTSF_ULAYERROWPCT_SUM:
2743 case CTSF_ULAYERCOLPCT_SUM:
2744 moments1_destroy (s->moments);
2753 casewriter_destroy (s->writer);
2759 ctables_summary_add (union ctables_summary *s,
2760 const struct ctables_summary_spec *ss,
2761 const struct variable *var, const union value *value,
2762 bool is_scale, bool is_scale_missing,
2763 bool is_missing, bool excluded_missing,
2764 double d_weight, double e_weight)
2766 /* To determine whether a case is included in a given table for a particular
2767 kind of summary, consider the following charts for each variable in the
2768 table. Only if "yes" appears for every variable for the summary is the
2771 Categorical variables: VALIDN COUNT TOTALN
2772 Valid values in included categories yes yes yes
2773 Missing values in included categories --- yes yes
2774 Missing values in excluded categories --- --- yes
2775 Valid values in excluded categories --- --- ---
2777 Scale variables: VALIDN COUNT TOTALN
2778 Valid value yes yes yes
2779 Missing value --- yes yes
2781 Missing values include both user- and system-missing. (The system-missing
2782 value is always in an excluded category.)
2784 switch (ss->function)
2787 case CTSF_ROWPCT_TOTALN:
2788 case CTSF_COLPCT_TOTALN:
2789 case CTSF_TABLEPCT_TOTALN:
2790 case CTSF_SUBTABLEPCT_TOTALN:
2791 case CTSF_LAYERPCT_TOTALN:
2792 case CTSF_LAYERROWPCT_TOTALN:
2793 case CTSF_LAYERCOLPCT_TOTALN:
2794 s->count += d_weight;
2798 case CTSF_UROWPCT_TOTALN:
2799 case CTSF_UCOLPCT_TOTALN:
2800 case CTSF_UTABLEPCT_TOTALN:
2801 case CTSF_USUBTABLEPCT_TOTALN:
2802 case CTSF_ULAYERPCT_TOTALN:
2803 case CTSF_ULAYERROWPCT_TOTALN:
2804 case CTSF_ULAYERCOLPCT_TOTALN:
2809 case CTSF_ROWPCT_COUNT:
2810 case CTSF_COLPCT_COUNT:
2811 case CTSF_TABLEPCT_COUNT:
2812 case CTSF_SUBTABLEPCT_COUNT:
2813 case CTSF_LAYERPCT_COUNT:
2814 case CTSF_LAYERROWPCT_COUNT:
2815 case CTSF_LAYERCOLPCT_COUNT:
2816 if (is_scale || !excluded_missing)
2817 s->count += d_weight;
2821 case CTSF_UROWPCT_COUNT:
2822 case CTSF_UCOLPCT_COUNT:
2823 case CTSF_UTABLEPCT_COUNT:
2824 case CTSF_USUBTABLEPCT_COUNT:
2825 case CTSF_ULAYERPCT_COUNT:
2826 case CTSF_ULAYERROWPCT_COUNT:
2827 case CTSF_ULAYERCOLPCT_COUNT:
2828 if (is_scale || !excluded_missing)
2833 case CTSF_ROWPCT_VALIDN:
2834 case CTSF_COLPCT_VALIDN:
2835 case CTSF_TABLEPCT_VALIDN:
2836 case CTSF_SUBTABLEPCT_VALIDN:
2837 case CTSF_LAYERPCT_VALIDN:
2838 case CTSF_LAYERROWPCT_VALIDN:
2839 case CTSF_LAYERCOLPCT_VALIDN:
2843 s->count += d_weight;
2847 case CTSF_UROWPCT_VALIDN:
2848 case CTSF_UCOLPCT_VALIDN:
2849 case CTSF_UTABLEPCT_VALIDN:
2850 case CTSF_USUBTABLEPCT_VALIDN:
2851 case CTSF_ULAYERPCT_VALIDN:
2852 case CTSF_ULAYERROWPCT_VALIDN:
2853 case CTSF_ULAYERCOLPCT_VALIDN:
2862 s->count += d_weight;
2871 if (is_scale || !excluded_missing)
2872 s->count += e_weight;
2879 s->count += e_weight;
2883 s->count += e_weight;
2889 if (!is_scale_missing)
2891 assert (!var_is_alpha (var)); /* XXX? */
2892 if (s->min == SYSMIS || value->f < s->min)
2894 if (s->max == SYSMIS || value->f > s->max)
2904 case CTSF_ROWPCT_SUM:
2905 case CTSF_COLPCT_SUM:
2906 case CTSF_TABLEPCT_SUM:
2907 case CTSF_SUBTABLEPCT_SUM:
2908 case CTSF_LAYERPCT_SUM:
2909 case CTSF_LAYERROWPCT_SUM:
2910 case CTSF_LAYERCOLPCT_SUM:
2911 if (!is_scale_missing)
2912 moments1_add (s->moments, value->f, e_weight);
2919 case CTSF_UVARIANCE:
2920 case CTSF_UROWPCT_SUM:
2921 case CTSF_UCOLPCT_SUM:
2922 case CTSF_UTABLEPCT_SUM:
2923 case CTSF_USUBTABLEPCT_SUM:
2924 case CTSF_ULAYERPCT_SUM:
2925 case CTSF_ULAYERROWPCT_SUM:
2926 case CTSF_ULAYERCOLPCT_SUM:
2927 if (!is_scale_missing)
2928 moments1_add (s->moments, value->f, 1.0);
2934 d_weight = e_weight = 1.0;
2939 if (!is_scale_missing)
2941 s->ovalid += e_weight;
2943 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2944 *case_num_rw_idx (c, 0) = value->f;
2945 *case_num_rw_idx (c, 1) = e_weight;
2946 casewriter_write (s->writer, c);
2952 static enum ctables_domain_type
2953 ctables_function_domain (enum ctables_summary_function function)
2983 case CTSF_UVARIANCE:
2989 case CTSF_COLPCT_COUNT:
2990 case CTSF_COLPCT_SUM:
2991 case CTSF_COLPCT_TOTALN:
2992 case CTSF_COLPCT_VALIDN:
2993 case CTSF_UCOLPCT_COUNT:
2994 case CTSF_UCOLPCT_SUM:
2995 case CTSF_UCOLPCT_TOTALN:
2996 case CTSF_UCOLPCT_VALIDN:
2999 case CTSF_LAYERCOLPCT_COUNT:
3000 case CTSF_LAYERCOLPCT_SUM:
3001 case CTSF_LAYERCOLPCT_TOTALN:
3002 case CTSF_LAYERCOLPCT_VALIDN:
3003 case CTSF_ULAYERCOLPCT_COUNT:
3004 case CTSF_ULAYERCOLPCT_SUM:
3005 case CTSF_ULAYERCOLPCT_TOTALN:
3006 case CTSF_ULAYERCOLPCT_VALIDN:
3007 return CTDT_LAYERCOL;
3009 case CTSF_LAYERPCT_COUNT:
3010 case CTSF_LAYERPCT_SUM:
3011 case CTSF_LAYERPCT_TOTALN:
3012 case CTSF_LAYERPCT_VALIDN:
3013 case CTSF_ULAYERPCT_COUNT:
3014 case CTSF_ULAYERPCT_SUM:
3015 case CTSF_ULAYERPCT_TOTALN:
3016 case CTSF_ULAYERPCT_VALIDN:
3019 case CTSF_LAYERROWPCT_COUNT:
3020 case CTSF_LAYERROWPCT_SUM:
3021 case CTSF_LAYERROWPCT_TOTALN:
3022 case CTSF_LAYERROWPCT_VALIDN:
3023 case CTSF_ULAYERROWPCT_COUNT:
3024 case CTSF_ULAYERROWPCT_SUM:
3025 case CTSF_ULAYERROWPCT_TOTALN:
3026 case CTSF_ULAYERROWPCT_VALIDN:
3027 return CTDT_LAYERROW;
3029 case CTSF_ROWPCT_COUNT:
3030 case CTSF_ROWPCT_SUM:
3031 case CTSF_ROWPCT_TOTALN:
3032 case CTSF_ROWPCT_VALIDN:
3033 case CTSF_UROWPCT_COUNT:
3034 case CTSF_UROWPCT_SUM:
3035 case CTSF_UROWPCT_TOTALN:
3036 case CTSF_UROWPCT_VALIDN:
3039 case CTSF_SUBTABLEPCT_COUNT:
3040 case CTSF_SUBTABLEPCT_SUM:
3041 case CTSF_SUBTABLEPCT_TOTALN:
3042 case CTSF_SUBTABLEPCT_VALIDN:
3043 case CTSF_USUBTABLEPCT_COUNT:
3044 case CTSF_USUBTABLEPCT_SUM:
3045 case CTSF_USUBTABLEPCT_TOTALN:
3046 case CTSF_USUBTABLEPCT_VALIDN:
3047 return CTDT_SUBTABLE;
3049 case CTSF_TABLEPCT_COUNT:
3050 case CTSF_TABLEPCT_SUM:
3051 case CTSF_TABLEPCT_TOTALN:
3052 case CTSF_TABLEPCT_VALIDN:
3053 case CTSF_UTABLEPCT_COUNT:
3054 case CTSF_UTABLEPCT_SUM:
3055 case CTSF_UTABLEPCT_TOTALN:
3056 case CTSF_UTABLEPCT_VALIDN:
3063 static enum ctables_domain_type
3064 ctables_function_is_pctsum (enum ctables_summary_function function)
3094 case CTSF_UVARIANCE:
3098 case CTSF_COLPCT_COUNT:
3099 case CTSF_COLPCT_TOTALN:
3100 case CTSF_COLPCT_VALIDN:
3101 case CTSF_UCOLPCT_COUNT:
3102 case CTSF_UCOLPCT_TOTALN:
3103 case CTSF_UCOLPCT_VALIDN:
3104 case CTSF_LAYERCOLPCT_COUNT:
3105 case CTSF_LAYERCOLPCT_TOTALN:
3106 case CTSF_LAYERCOLPCT_VALIDN:
3107 case CTSF_ULAYERCOLPCT_COUNT:
3108 case CTSF_ULAYERCOLPCT_TOTALN:
3109 case CTSF_ULAYERCOLPCT_VALIDN:
3110 case CTSF_LAYERPCT_COUNT:
3111 case CTSF_LAYERPCT_TOTALN:
3112 case CTSF_LAYERPCT_VALIDN:
3113 case CTSF_ULAYERPCT_COUNT:
3114 case CTSF_ULAYERPCT_TOTALN:
3115 case CTSF_ULAYERPCT_VALIDN:
3116 case CTSF_LAYERROWPCT_COUNT:
3117 case CTSF_LAYERROWPCT_TOTALN:
3118 case CTSF_LAYERROWPCT_VALIDN:
3119 case CTSF_ULAYERROWPCT_COUNT:
3120 case CTSF_ULAYERROWPCT_TOTALN:
3121 case CTSF_ULAYERROWPCT_VALIDN:
3122 case CTSF_ROWPCT_COUNT:
3123 case CTSF_ROWPCT_TOTALN:
3124 case CTSF_ROWPCT_VALIDN:
3125 case CTSF_UROWPCT_COUNT:
3126 case CTSF_UROWPCT_TOTALN:
3127 case CTSF_UROWPCT_VALIDN:
3128 case CTSF_SUBTABLEPCT_COUNT:
3129 case CTSF_SUBTABLEPCT_TOTALN:
3130 case CTSF_SUBTABLEPCT_VALIDN:
3131 case CTSF_USUBTABLEPCT_COUNT:
3132 case CTSF_USUBTABLEPCT_TOTALN:
3133 case CTSF_USUBTABLEPCT_VALIDN:
3134 case CTSF_TABLEPCT_COUNT:
3135 case CTSF_TABLEPCT_TOTALN:
3136 case CTSF_TABLEPCT_VALIDN:
3137 case CTSF_UTABLEPCT_COUNT:
3138 case CTSF_UTABLEPCT_TOTALN:
3139 case CTSF_UTABLEPCT_VALIDN:
3142 case CTSF_COLPCT_SUM:
3143 case CTSF_UCOLPCT_SUM:
3144 case CTSF_LAYERCOLPCT_SUM:
3145 case CTSF_ULAYERCOLPCT_SUM:
3146 case CTSF_LAYERPCT_SUM:
3147 case CTSF_ULAYERPCT_SUM:
3148 case CTSF_LAYERROWPCT_SUM:
3149 case CTSF_ULAYERROWPCT_SUM:
3150 case CTSF_ROWPCT_SUM:
3151 case CTSF_UROWPCT_SUM:
3152 case CTSF_SUBTABLEPCT_SUM:
3153 case CTSF_USUBTABLEPCT_SUM:
3154 case CTSF_TABLEPCT_SUM:
3155 case CTSF_UTABLEPCT_SUM:
3163 ctables_summary_value (const struct ctables_cell *cell,
3164 union ctables_summary *s,
3165 const struct ctables_summary_spec *ss)
3167 switch (ss->function)
3174 case CTSF_ROWPCT_COUNT:
3175 case CTSF_COLPCT_COUNT:
3176 case CTSF_TABLEPCT_COUNT:
3177 case CTSF_SUBTABLEPCT_COUNT:
3178 case CTSF_LAYERPCT_COUNT:
3179 case CTSF_LAYERROWPCT_COUNT:
3180 case CTSF_LAYERCOLPCT_COUNT:
3182 enum ctables_domain_type d = ctables_function_domain (ss->function);
3183 return (cell->domains[d]->e_count
3184 ? s->count / cell->domains[d]->e_count * 100
3188 case CTSF_UROWPCT_COUNT:
3189 case CTSF_UCOLPCT_COUNT:
3190 case CTSF_UTABLEPCT_COUNT:
3191 case CTSF_USUBTABLEPCT_COUNT:
3192 case CTSF_ULAYERPCT_COUNT:
3193 case CTSF_ULAYERROWPCT_COUNT:
3194 case CTSF_ULAYERCOLPCT_COUNT:
3196 enum ctables_domain_type d = ctables_function_domain (ss->function);
3197 return (cell->domains[d]->u_count
3198 ? s->count / cell->domains[d]->u_count * 100
3202 case CTSF_ROWPCT_VALIDN:
3203 case CTSF_COLPCT_VALIDN:
3204 case CTSF_TABLEPCT_VALIDN:
3205 case CTSF_SUBTABLEPCT_VALIDN:
3206 case CTSF_LAYERPCT_VALIDN:
3207 case CTSF_LAYERROWPCT_VALIDN:
3208 case CTSF_LAYERCOLPCT_VALIDN:
3210 enum ctables_domain_type d = ctables_function_domain (ss->function);
3211 return (cell->domains[d]->e_valid
3212 ? s->count / cell->domains[d]->e_valid * 100
3216 case CTSF_UROWPCT_VALIDN:
3217 case CTSF_UCOLPCT_VALIDN:
3218 case CTSF_UTABLEPCT_VALIDN:
3219 case CTSF_USUBTABLEPCT_VALIDN:
3220 case CTSF_ULAYERPCT_VALIDN:
3221 case CTSF_ULAYERROWPCT_VALIDN:
3222 case CTSF_ULAYERCOLPCT_VALIDN:
3224 enum ctables_domain_type d = ctables_function_domain (ss->function);
3225 return (cell->domains[d]->u_valid
3226 ? s->count / cell->domains[d]->u_valid * 100
3230 case CTSF_ROWPCT_TOTALN:
3231 case CTSF_COLPCT_TOTALN:
3232 case CTSF_TABLEPCT_TOTALN:
3233 case CTSF_SUBTABLEPCT_TOTALN:
3234 case CTSF_LAYERPCT_TOTALN:
3235 case CTSF_LAYERROWPCT_TOTALN:
3236 case CTSF_LAYERCOLPCT_TOTALN:
3238 enum ctables_domain_type d = ctables_function_domain (ss->function);
3239 return (cell->domains[d]->e_total
3240 ? s->count / cell->domains[d]->e_total * 100
3244 case CTSF_UROWPCT_TOTALN:
3245 case CTSF_UCOLPCT_TOTALN:
3246 case CTSF_UTABLEPCT_TOTALN:
3247 case CTSF_USUBTABLEPCT_TOTALN:
3248 case CTSF_ULAYERPCT_TOTALN:
3249 case CTSF_ULAYERROWPCT_TOTALN:
3250 case CTSF_ULAYERCOLPCT_TOTALN:
3252 enum ctables_domain_type d = ctables_function_domain (ss->function);
3253 return (cell->domains[d]->u_total
3254 ? s->count / cell->domains[d]->u_total * 100
3275 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3281 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3288 double weight, variance;
3289 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3290 return calc_semean (variance, weight);
3297 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3298 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3304 double weight, mean;
3305 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3306 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3310 case CTSF_UVARIANCE:
3313 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3317 case CTSF_ROWPCT_SUM:
3318 case CTSF_COLPCT_SUM:
3319 case CTSF_TABLEPCT_SUM:
3320 case CTSF_SUBTABLEPCT_SUM:
3321 case CTSF_LAYERPCT_SUM:
3322 case CTSF_LAYERROWPCT_SUM:
3323 case CTSF_LAYERCOLPCT_SUM:
3325 double weight, mean;
3326 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3327 if (weight == SYSMIS || mean == SYSMIS)
3329 enum ctables_domain_type d = ctables_function_domain (ss->function);
3330 double num = weight * mean;
3331 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3332 return denom != 0 ? num / denom * 100 : SYSMIS;
3334 case CTSF_UROWPCT_SUM:
3335 case CTSF_UCOLPCT_SUM:
3336 case CTSF_UTABLEPCT_SUM:
3337 case CTSF_USUBTABLEPCT_SUM:
3338 case CTSF_ULAYERPCT_SUM:
3339 case CTSF_ULAYERROWPCT_SUM:
3340 case CTSF_ULAYERCOLPCT_SUM:
3342 double weight, mean;
3343 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3344 if (weight == SYSMIS || mean == SYSMIS)
3346 enum ctables_domain_type d = ctables_function_domain (ss->function);
3347 double num = weight * mean;
3348 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3349 return denom != 0 ? num / denom * 100 : SYSMIS;
3358 struct casereader *reader = casewriter_make_reader (s->writer);
3361 struct percentile *ptile = percentile_create (
3362 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3363 struct order_stats *os = &ptile->parent;
3364 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3365 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3366 statistic_destroy (&ptile->parent.parent);
3374 struct casereader *reader = casewriter_make_reader (s->writer);
3377 struct mode *mode = mode_create ();
3378 struct order_stats *os = &mode->parent;
3379 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3380 s->ovalue = mode->mode;
3381 statistic_destroy (&mode->parent.parent);
3389 struct ctables_cell_sort_aux
3391 const struct ctables_nest *nest;
3392 enum pivot_axis_type a;
3396 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3398 const struct ctables_cell_sort_aux *aux = aux_;
3399 struct ctables_cell *const *ap = a_;
3400 struct ctables_cell *const *bp = b_;
3401 const struct ctables_cell *a = *ap;
3402 const struct ctables_cell *b = *bp;
3404 const struct ctables_nest *nest = aux->nest;
3405 for (size_t i = 0; i < nest->n; i++)
3406 if (i != nest->scale_idx)
3408 const struct variable *var = nest->vars[i];
3409 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3410 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3411 if (a_cv->category != b_cv->category)
3412 return a_cv->category > b_cv->category ? 1 : -1;
3414 const union value *a_val = &a_cv->value;
3415 const union value *b_val = &b_cv->value;
3416 switch (a_cv->category->type)
3422 case CCT_POSTCOMPUTE:
3423 case CCT_EXCLUDED_MISSING:
3424 /* Must be equal. */
3432 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3440 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3442 return a_cv->category->sort_ascending ? cmp : -cmp;
3448 const char *a_label = var_lookup_value_label (var, a_val);
3449 const char *b_label = var_lookup_value_label (var, b_val);
3451 ? (b_label ? strcmp (a_label, b_label) : 1)
3452 : (b_label ? -1 : value_compare_3way (
3453 a_val, b_val, var_get_width (var))));
3455 return a_cv->category->sort_ascending ? cmp : -cmp;
3469 For each ctables_table:
3470 For each combination of row vars:
3471 For each combination of column vars:
3472 For each combination of layer vars:
3474 Make a table of row values:
3475 Sort entries by row values
3476 Assign a 0-based index to each actual value
3477 Construct a dimension
3478 Make a table of column values
3479 Make a table of layer values
3481 Fill the table entry using the indexes from before.
3484 static struct ctables_domain *
3485 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3486 enum ctables_domain_type domain)
3489 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3491 const struct ctables_nest *nest = s->nests[a];
3492 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3494 size_t v_idx = nest->domains[domain][i];
3495 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3496 hash = hash_pointer (cv->category, hash);
3497 if (cv->category->type != CCT_TOTAL
3498 && cv->category->type != CCT_SUBTOTAL
3499 && cv->category->type != CCT_POSTCOMPUTE)
3500 hash = value_hash (&cv->value,
3501 var_get_width (nest->vars[v_idx]), hash);
3505 struct ctables_domain *d;
3506 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3508 const struct ctables_cell *df = d->example;
3509 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3511 const struct ctables_nest *nest = s->nests[a];
3512 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3514 size_t v_idx = nest->domains[domain][i];
3515 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3516 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3517 if (cv1->category != cv2->category
3518 || (cv1->category->type != CCT_TOTAL
3519 && cv1->category->type != CCT_SUBTOTAL
3520 && cv1->category->type != CCT_POSTCOMPUTE
3521 && !value_equal (&cv1->value, &cv2->value,
3522 var_get_width (nest->vars[v_idx]))))
3531 struct ctables_sum *sums = (s->table->n_sum_vars
3532 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3535 d = xmalloc (sizeof *d);
3536 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3537 hmap_insert (&s->domains[domain], &d->node, hash);
3541 static struct substring
3542 rtrim_value (const union value *v, const struct variable *var)
3544 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3545 var_get_width (var));
3546 ss_rtrim (&s, ss_cstr (" "));
3551 in_string_range (const union value *v, const struct variable *var,
3552 const struct substring *srange)
3554 struct substring s = rtrim_value (v, var);
3555 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3556 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3559 static const struct ctables_category *
3560 ctables_categories_match (const struct ctables_categories *c,
3561 const union value *v, const struct variable *var)
3563 if (var_is_numeric (var) && v->f == SYSMIS)
3566 const struct ctables_category *othernm = NULL;
3567 for (size_t i = c->n_cats; i-- > 0; )
3569 const struct ctables_category *cat = &c->cats[i];
3573 if (cat->number == v->f)
3578 if (ss_equals (cat->string, rtrim_value (v, var)))
3583 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3584 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3589 if (in_string_range (v, var, cat->srange))
3594 if (var_is_value_missing (var, v))
3598 case CCT_POSTCOMPUTE:
3613 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3616 case CCT_EXCLUDED_MISSING:
3621 return var_is_value_missing (var, v) ? NULL : othernm;
3624 static const struct ctables_category *
3625 ctables_categories_total (const struct ctables_categories *c)
3627 const struct ctables_category *first = &c->cats[0];
3628 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3629 return (first->type == CCT_TOTAL ? first
3630 : last->type == CCT_TOTAL ? last
3634 static struct ctables_cell *
3635 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3636 const struct ctables_category *cats[PIVOT_N_AXES][10])
3639 enum ctables_summary_variant sv = CSV_CELL;
3640 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3642 const struct ctables_nest *nest = s->nests[a];
3643 for (size_t i = 0; i < nest->n; i++)
3644 if (i != nest->scale_idx)
3646 hash = hash_pointer (cats[a][i], hash);
3647 if (cats[a][i]->type != CCT_TOTAL
3648 && cats[a][i]->type != CCT_SUBTOTAL
3649 && cats[a][i]->type != CCT_POSTCOMPUTE)
3650 hash = value_hash (case_data (c, nest->vars[i]),
3651 var_get_width (nest->vars[i]), hash);
3657 struct ctables_cell *cell;
3658 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3660 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3662 const struct ctables_nest *nest = s->nests[a];
3663 for (size_t i = 0; i < nest->n; i++)
3664 if (i != nest->scale_idx
3665 && (cats[a][i] != cell->axes[a].cvs[i].category
3666 || (cats[a][i]->type != CCT_TOTAL
3667 && cats[a][i]->type != CCT_SUBTOTAL
3668 && cats[a][i]->type != CCT_POSTCOMPUTE
3669 && !value_equal (case_data (c, nest->vars[i]),
3670 &cell->axes[a].cvs[i].value,
3671 var_get_width (nest->vars[i])))))
3680 cell = xmalloc (sizeof *cell);
3683 cell->omit_domains = 0;
3684 cell->postcompute = false;
3685 //struct string name = DS_EMPTY_INITIALIZER;
3686 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3688 const struct ctables_nest *nest = s->nests[a];
3689 cell->axes[a].cvs = (nest->n
3690 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3692 for (size_t i = 0; i < nest->n; i++)
3694 const struct ctables_category *cat = cats[a][i];
3695 const struct variable *var = nest->vars[i];
3696 const union value *value = case_data (c, var);
3697 if (i != nest->scale_idx)
3699 const struct ctables_category *subtotal = cat->subtotal;
3700 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3703 if (cat->type == CCT_TOTAL
3704 || cat->type == CCT_SUBTOTAL
3705 || cat->type == CCT_POSTCOMPUTE)
3707 /* XXX these should be more encompassing I think.*/
3711 case PIVOT_AXIS_COLUMN:
3712 cell->omit_domains |= ((1u << CTDT_TABLE) |
3713 (1u << CTDT_LAYER) |
3714 (1u << CTDT_LAYERCOL) |
3715 (1u << CTDT_SUBTABLE) |
3718 case PIVOT_AXIS_ROW:
3719 cell->omit_domains |= ((1u << CTDT_TABLE) |
3720 (1u << CTDT_LAYER) |
3721 (1u << CTDT_LAYERROW) |
3722 (1u << CTDT_SUBTABLE) |
3725 case PIVOT_AXIS_LAYER:
3726 cell->omit_domains |= ((1u << CTDT_TABLE) |
3727 (1u << CTDT_LAYER));
3731 if (cat->type == CCT_POSTCOMPUTE)
3732 cell->postcompute = true;
3735 cell->axes[a].cvs[i].category = cat;
3736 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3739 if (i != nest->scale_idx)
3741 if (!ds_is_empty (&name))
3742 ds_put_cstr (&name, ", ");
3743 char *value_s = data_out (value, var_get_encoding (var),
3744 var_get_print_format (var),
3745 settings_get_fmt_settings ());
3746 if (cat->type == CCT_TOTAL
3747 || cat->type == CCT_SUBTOTAL
3748 || cat->type == CCT_POSTCOMPUTE)
3749 ds_put_format (&name, "%s=total", var_get_name (var));
3751 ds_put_format (&name, "%s=%s", var_get_name (var),
3752 value_s + strspn (value_s, " "));
3758 //cell->name = ds_steal_cstr (&name);
3760 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3761 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3762 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3763 for (size_t i = 0; i < specs->n; i++)
3764 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3765 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3766 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3767 hmap_insert (&s->cells, &cell->node, hash);
3772 is_scale_missing (const struct ctables_summary_spec_set *specs,
3773 const struct ccase *c)
3775 if (!specs->is_scale)
3778 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3781 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3783 const struct variable *var = specs->listwise_vars[i];
3784 if (var_is_num_missing (var, case_num (c, var)))
3792 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3793 const struct ctables_category *cats[PIVOT_N_AXES][10],
3794 bool is_missing, bool excluded_missing,
3795 double d_weight, double e_weight)
3797 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3798 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3800 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3802 bool scale_missing = is_scale_missing (specs, c);
3803 for (size_t i = 0; i < specs->n; i++)
3804 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3805 specs->var, case_data (c, specs->var), specs->is_scale,
3806 scale_missing, is_missing, excluded_missing,
3807 d_weight, e_weight);
3808 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3809 if (!(cell->omit_domains && (1u << dt)))
3811 struct ctables_domain *d = cell->domains[dt];
3812 d->d_total += d_weight;
3813 d->e_total += e_weight;
3815 if (!excluded_missing)
3817 d->d_count += d_weight;
3818 d->e_count += e_weight;
3823 d->d_valid += d_weight;
3824 d->e_valid += e_weight;
3827 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3829 /* XXX listwise_missing??? */
3830 const struct variable *var = s->table->sum_vars[i];
3831 double addend = case_num (c, var);
3832 if (!var_is_num_missing (var, addend))
3834 struct ctables_sum *sum = &d->sums[i];
3835 sum->e_sum += addend * e_weight;
3836 sum->u_sum += addend;
3844 recurse_totals (struct ctables_section *s, const struct ccase *c,
3845 const struct ctables_category *cats[PIVOT_N_AXES][10],
3846 bool is_missing, bool excluded_missing,
3847 double d_weight, double e_weight,
3848 enum pivot_axis_type start_axis, size_t start_nest)
3850 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3852 const struct ctables_nest *nest = s->nests[a];
3853 for (size_t i = start_nest; i < nest->n; i++)
3855 if (i == nest->scale_idx)
3858 const struct variable *var = nest->vars[i];
3860 const struct ctables_category *total = ctables_categories_total (
3861 s->table->categories[var_get_dict_index (var)]);
3864 const struct ctables_category *save = cats[a][i];
3866 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3867 d_weight, e_weight);
3868 recurse_totals (s, c, cats, is_missing, excluded_missing,
3869 d_weight, e_weight, a, i + 1);
3878 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3879 const struct ctables_category *cats[PIVOT_N_AXES][10],
3880 bool is_missing, bool excluded_missing,
3881 double d_weight, double e_weight,
3882 enum pivot_axis_type start_axis, size_t start_nest)
3884 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3886 const struct ctables_nest *nest = s->nests[a];
3887 for (size_t i = start_nest; i < nest->n; i++)
3889 if (i == nest->scale_idx)
3892 const struct ctables_category *save = cats[a][i];
3895 cats[a][i] = save->subtotal;
3896 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3897 d_weight, e_weight);
3898 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3899 d_weight, e_weight, a, i + 1);
3908 ctables_add_occurrence (const struct variable *var,
3909 const union value *value,
3910 struct hmap *occurrences)
3912 int width = var_get_width (var);
3913 unsigned int hash = value_hash (value, width, 0);
3915 struct ctables_occurrence *o;
3916 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3918 if (value_equal (value, &o->value, width))
3921 o = xmalloc (sizeof *o);
3922 value_clone (&o->value, value, width);
3923 hmap_insert (occurrences, &o->node, hash);
3927 ctables_cell_insert (struct ctables_section *s,
3928 const struct ccase *c,
3929 double d_weight, double e_weight)
3931 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3933 /* Does at least one categorical variable have a missing value in an included
3934 or excluded category? */
3935 bool is_missing = false;
3937 /* Does at least one categorical variable have a missing value in an excluded
3939 bool excluded_missing = false;
3941 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3943 const struct ctables_nest *nest = s->nests[a];
3944 for (size_t i = 0; i < nest->n; i++)
3946 if (i == nest->scale_idx)
3949 const struct variable *var = nest->vars[i];
3950 const union value *value = case_data (c, var);
3952 bool var_missing = var_is_value_missing (var, value) != 0;
3956 cats[a][i] = ctables_categories_match (
3957 s->table->categories[var_get_dict_index (var)], value, var);
3963 static const struct ctables_category cct_excluded_missing = {
3964 .type = CCT_EXCLUDED_MISSING,
3967 cats[a][i] = &cct_excluded_missing;
3968 excluded_missing = true;
3973 if (!excluded_missing)
3974 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3976 const struct ctables_nest *nest = s->nests[a];
3977 for (size_t i = 0; i < nest->n; i++)
3978 if (i != nest->scale_idx)
3980 const struct variable *var = nest->vars[i];
3981 const union value *value = case_data (c, var);
3982 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3986 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3987 d_weight, e_weight);
3989 //if (!excluded_missing)
3991 recurse_totals (s, c, cats, is_missing, excluded_missing,
3992 d_weight, e_weight, 0, 0);
3993 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3994 d_weight, e_weight, 0, 0);
4000 const struct ctables_summary_spec_set *set;
4005 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
4007 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
4008 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
4009 if (as->function != bs->function)
4010 return as->function > bs->function ? 1 : -1;
4011 else if (as->percentile != bs->percentile)
4012 return as->percentile < bs->percentile ? 1 : -1;
4014 const char *as_label = as->label ? as->label : "";
4015 const char *bs_label = bs->label ? bs->label : "";
4016 return strcmp (as_label, bs_label);
4019 static struct pivot_value *
4020 ctables_category_create_label__ (const struct ctables_category *cat,
4021 const struct variable *var,
4022 const union value *value)
4024 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
4025 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
4026 : pivot_value_new_var_value (var, value));
4029 static struct pivot_value *
4030 ctables_postcompute_label (const struct ctables_categories *cats,
4031 const struct ctables_category *cat,
4032 const struct variable *var,
4033 const union value *value)
4035 struct substring in = ss_cstr (cat->pc->label);
4036 struct substring target = ss_cstr (")LABEL[");
4038 struct string out = DS_EMPTY_INITIALIZER;
4041 size_t chunk = ss_find_substring (in, target);
4042 if (chunk == SIZE_MAX)
4044 if (ds_is_empty (&out))
4045 return pivot_value_new_user_text (in.string, in.length);
4048 ds_put_substring (&out, in);
4049 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
4053 ds_put_substring (&out, ss_head (in, chunk));
4054 ss_advance (&in, chunk + target.length);
4056 struct substring idx_s;
4057 if (!ss_get_until (&in, ']', &idx_s))
4060 long int idx = strtol (idx_s.string, &tail, 10);
4061 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
4064 struct ctables_category *cat2 = &cats->cats[idx - 1];
4065 struct pivot_value *label2
4066 = ctables_category_create_label__ (cat2, var, value);
4067 char *label2_s = pivot_value_to_string_defaults (label2);
4068 ds_put_cstr (&out, label2_s);
4070 pivot_value_destroy (label2);
4075 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
4078 static struct pivot_value *
4079 ctables_category_create_label (const struct ctables_categories *cats,
4080 const struct ctables_category *cat,
4081 const struct variable *var,
4082 const union value *value)
4084 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4085 ? ctables_postcompute_label (cats, cat, var, value)
4086 : ctables_category_create_label__ (cat, var, value));
4089 static struct ctables_value *
4090 ctables_value_find__ (struct ctables_table *t, const union value *value,
4091 int width, unsigned int hash)
4093 struct ctables_value *clv;
4094 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4095 hash, &t->clabels_values_map)
4096 if (value_equal (value, &clv->value, width))
4102 ctables_value_insert (struct ctables_table *t, const union value *value,
4105 unsigned int hash = value_hash (value, width, 0);
4106 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4109 clv = xmalloc (sizeof *clv);
4110 value_clone (&clv->value, value, width);
4111 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4115 static struct ctables_value *
4116 ctables_value_find (struct ctables_table *t,
4117 const union value *value, int width)
4119 return ctables_value_find__ (t, value, width,
4120 value_hash (value, width, 0));
4124 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4125 size_t ix[PIVOT_N_AXES])
4127 if (a < PIVOT_N_AXES)
4129 size_t limit = MAX (t->stacks[a].n, 1);
4130 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4131 ctables_table_add_section (t, a + 1, ix);
4135 struct ctables_section *s = &t->sections[t->n_sections++];
4136 *s = (struct ctables_section) {
4138 .cells = HMAP_INITIALIZER (s->cells),
4140 for (a = 0; a < PIVOT_N_AXES; a++)
4143 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4145 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4146 for (size_t i = 0; i < nest->n; i++)
4147 hmap_init (&s->occurrences[a][i]);
4149 for (size_t i = 0; i < N_CTDTS; i++)
4150 hmap_init (&s->domains[i]);
4155 ctpo_add (double a, double b)
4161 ctpo_sub (double a, double b)
4167 ctpo_mul (double a, double b)
4173 ctpo_div (double a, double b)
4175 return b ? a / b : SYSMIS;
4179 ctpo_pow (double a, double b)
4181 int save_errno = errno;
4183 double result = pow (a, b);
4191 ctpo_neg (double a, double b UNUSED)
4196 struct ctables_pcexpr_evaluate_ctx
4198 const struct ctables_cell *cell;
4199 const struct ctables_section *section;
4200 const struct ctables_categories *cats;
4201 enum pivot_axis_type pc_a;
4204 enum fmt_type parse_format;
4207 static double ctables_pcexpr_evaluate (
4208 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4211 ctables_pcexpr_evaluate_nonterminal (
4212 const struct ctables_pcexpr_evaluate_ctx *ctx,
4213 const struct ctables_pcexpr *e, size_t n_args,
4214 double evaluate (double, double))
4216 double args[2] = { 0, 0 };
4217 for (size_t i = 0; i < n_args; i++)
4219 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4220 if (!isfinite (args[i]) || args[i] == SYSMIS)
4223 return evaluate (args[0], args[1]);
4227 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4228 const struct ctables_cell_value *pc_cv)
4230 const struct ctables_section *s = ctx->section;
4233 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4235 const struct ctables_nest *nest = s->nests[a];
4236 for (size_t i = 0; i < nest->n; i++)
4237 if (i != nest->scale_idx)
4239 const struct ctables_cell_value *cv
4240 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4241 : &ctx->cell->axes[a].cvs[i]);
4242 hash = hash_pointer (cv->category, hash);
4243 if (cv->category->type != CCT_TOTAL
4244 && cv->category->type != CCT_SUBTOTAL
4245 && cv->category->type != CCT_POSTCOMPUTE)
4246 hash = value_hash (&cv->value,
4247 var_get_width (nest->vars[i]), hash);
4251 struct ctables_cell *tc;
4252 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4254 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4256 const struct ctables_nest *nest = s->nests[a];
4257 for (size_t i = 0; i < nest->n; i++)
4258 if (i != nest->scale_idx)
4260 const struct ctables_cell_value *p_cv
4261 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4262 : &ctx->cell->axes[a].cvs[i]);
4263 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4264 if (p_cv->category != t_cv->category
4265 || (p_cv->category->type != CCT_TOTAL
4266 && p_cv->category->type != CCT_SUBTOTAL
4267 && p_cv->category->type != CCT_POSTCOMPUTE
4268 && !value_equal (&p_cv->value,
4270 var_get_width (nest->vars[i]))))
4282 const struct ctables_table *t = s->table;
4283 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4284 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4285 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4286 &specs->specs[ctx->summary_idx]);
4290 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4291 const struct ctables_pcexpr *e)
4298 case CTPO_CAT_NRANGE:
4299 case CTPO_CAT_SRANGE:
4301 struct ctables_cell_value cv = {
4302 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4304 assert (cv.category != NULL);
4306 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4307 const struct ctables_occurrence *o;
4310 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4311 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4312 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4314 cv.value = o->value;
4315 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4320 case CTPO_CAT_NUMBER:
4321 case CTPO_CAT_MISSING:
4322 case CTPO_CAT_OTHERNM:
4323 case CTPO_CAT_SUBTOTAL:
4324 case CTPO_CAT_TOTAL:
4326 struct ctables_cell_value cv = {
4327 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4328 .value = { .f = e->number },
4330 assert (cv.category != NULL);
4331 return ctables_pcexpr_evaluate_category (ctx, &cv);
4334 case CTPO_CAT_STRING:
4336 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4338 if (width > e->string.length)
4340 s = xmalloc (width);
4341 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4343 struct ctables_cell_value cv = {
4344 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4345 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4347 assert (cv.category != NULL);
4348 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4354 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4357 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4360 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4363 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4366 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4369 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4375 static const struct ctables_category *
4376 ctables_cell_postcompute (const struct ctables_section *s,
4377 const struct ctables_cell *cell,
4378 enum pivot_axis_type *pc_a_p,
4381 assert (cell->postcompute);
4382 const struct ctables_category *pc_cat = NULL;
4383 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4384 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4386 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4387 if (cv->category->type == CCT_POSTCOMPUTE)
4391 /* Multiple postcomputes cross each other. The value is
4396 pc_cat = cv->category;
4400 *pc_a_idx_p = pc_a_idx;
4404 assert (pc_cat != NULL);
4409 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4410 const struct ctables_cell *cell,
4411 const struct ctables_summary_spec *ss,
4412 struct fmt_spec *format,
4413 bool *is_ctables_format,
4416 enum pivot_axis_type pc_a = 0;
4417 size_t pc_a_idx = 0;
4418 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4419 s, cell, &pc_a, &pc_a_idx);
4423 const struct ctables_postcompute *pc = pc_cat->pc;
4426 for (size_t i = 0; i < pc->specs->n; i++)
4428 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4429 if (ss->function == ss2->function
4430 && ss->percentile == ss2->percentile)
4432 *format = ss2->format;
4433 *is_ctables_format = ss2->is_ctables_format;
4439 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4440 const struct ctables_categories *cats = s->table->categories[
4441 var_get_dict_index (var)];
4442 struct ctables_pcexpr_evaluate_ctx ctx = {
4447 .pc_a_idx = pc_a_idx,
4448 .summary_idx = summary_idx,
4449 .parse_format = pc_cat->parse_format,
4451 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4455 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4457 struct pivot_table *pt = pivot_table_create__ (
4459 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4460 : pivot_value_new_text (N_("Custom Tables"))),
4463 pivot_table_set_caption (
4464 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4466 pivot_table_set_corner_text (
4467 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4469 bool summary_dimension = (t->summary_axis != t->slabels_axis
4470 || (!t->slabels_visible
4471 && t->summary_specs.n > 1));
4472 if (summary_dimension)
4474 struct pivot_dimension *d = pivot_dimension_create (
4475 pt, t->slabels_axis, N_("Statistics"));
4476 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4477 if (!t->slabels_visible)
4478 d->hide_all_labels = true;
4479 for (size_t i = 0; i < specs->n; i++)
4480 pivot_category_create_leaf (
4481 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4484 bool categories_dimension = t->clabels_example != NULL;
4485 if (categories_dimension)
4487 struct pivot_dimension *d = pivot_dimension_create (
4488 pt, t->label_axis[t->clabels_from_axis],
4489 t->clabels_from_axis == PIVOT_AXIS_ROW
4490 ? N_("Row Categories")
4491 : N_("Column Categories"));
4492 const struct variable *var = t->clabels_example;
4493 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4494 for (size_t i = 0; i < t->n_clabels_values; i++)
4496 const struct ctables_value *value = t->clabels_values[i];
4497 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4498 assert (cat != NULL);
4499 pivot_category_create_leaf (d->root, ctables_category_create_label (
4500 c, cat, t->clabels_example,
4505 pivot_table_set_look (pt, ct->look);
4506 struct pivot_dimension *d[PIVOT_N_AXES];
4507 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4509 static const char *names[] = {
4510 [PIVOT_AXIS_ROW] = N_("Rows"),
4511 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4512 [PIVOT_AXIS_LAYER] = N_("Layers"),
4514 d[a] = (t->axes[a] || a == t->summary_axis
4515 ? pivot_dimension_create (pt, a, names[a])
4520 assert (t->axes[a]);
4522 for (size_t i = 0; i < t->stacks[a].n; i++)
4524 struct ctables_nest *nest = &t->stacks[a].nests[i];
4525 struct ctables_section **sections = xnmalloc (t->n_sections,
4527 size_t n_sections = 0;
4529 size_t n_total_cells = 0;
4530 size_t max_depth = 0;
4531 for (size_t j = 0; j < t->n_sections; j++)
4532 if (t->sections[j].nests[a] == nest)
4534 struct ctables_section *s = &t->sections[j];
4535 sections[n_sections++] = s;
4536 n_total_cells += s->cells.count;
4538 size_t depth = s->nests[a]->n;
4539 max_depth = MAX (depth, max_depth);
4542 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4544 size_t n_sorted = 0;
4546 for (size_t j = 0; j < n_sections; j++)
4548 struct ctables_section *s = sections[j];
4550 struct ctables_cell *cell;
4551 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4553 sorted[n_sorted++] = cell;
4554 assert (n_sorted <= n_total_cells);
4557 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4558 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4561 for (size_t j = 0; j < n_sorted; j++)
4563 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4568 struct ctables_level
4570 enum ctables_level_type
4572 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4573 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4574 CTL_SUMMARY, /* Summary functions. */
4578 enum settings_value_show vlabel; /* CTL_VAR only. */
4581 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4582 size_t n_levels = 0;
4583 for (size_t k = 0; k < nest->n; k++)
4585 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4586 if (vlabel != CTVL_NONE)
4588 levels[n_levels++] = (struct ctables_level) {
4590 .vlabel = (enum settings_value_show) vlabel,
4595 if (nest->scale_idx != k
4596 && (k != nest->n - 1 || t->label_axis[a] == a))
4598 levels[n_levels++] = (struct ctables_level) {
4599 .type = CTL_CATEGORY,
4605 if (!summary_dimension && a == t->slabels_axis)
4607 levels[n_levels++] = (struct ctables_level) {
4608 .type = CTL_SUMMARY,
4609 .var_idx = SIZE_MAX,
4613 /* Pivot categories:
4615 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4616 - category for nest->vars[0], if nest->scale_idx != 0
4617 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4618 - category for nest->vars[1], if nest->scale_idx != 1
4620 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4621 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4622 - summary function, if 'a == t->slabels_axis && a ==
4625 Additional dimensions:
4627 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4629 - If 't->label_axis[b] == a' for some 'b != a', add a category
4634 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4636 for (size_t j = 0; j < n_sorted; j++)
4638 struct ctables_cell *cell = sorted[j];
4639 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4641 size_t n_common = 0;
4644 for (; n_common < n_levels; n_common++)
4646 const struct ctables_level *level = &levels[n_common];
4647 if (level->type == CTL_CATEGORY)
4649 size_t var_idx = level->var_idx;
4650 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4651 if (prev->axes[a].cvs[var_idx].category != c)
4653 else if (c->type != CCT_SUBTOTAL
4654 && c->type != CCT_TOTAL
4655 && c->type != CCT_POSTCOMPUTE
4656 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4657 &cell->axes[a].cvs[var_idx].value,
4658 var_get_type (nest->vars[var_idx])))
4664 for (size_t k = n_common; k < n_levels; k++)
4666 const struct ctables_level *level = &levels[k];
4667 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4668 if (level->type == CTL_SUMMARY)
4670 assert (k == n_levels - 1);
4672 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4673 for (size_t m = 0; m < specs->n; m++)
4675 int leaf = pivot_category_create_leaf (
4676 parent, ctables_summary_label (&specs->specs[m],
4684 const struct variable *var = nest->vars[level->var_idx];
4685 struct pivot_value *label;
4686 if (level->type == CTL_VAR)
4688 label = pivot_value_new_variable (var);
4689 label->variable.show = level->vlabel;
4691 else if (level->type == CTL_CATEGORY)
4693 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4694 label = ctables_category_create_label (
4695 t->categories[var_get_dict_index (var)],
4696 cv->category, var, &cv->value);
4701 if (k == n_levels - 1)
4702 prev_leaf = pivot_category_create_leaf (parent, label);
4704 groups[k] = pivot_category_create_group__ (parent, label);
4708 cell->axes[a].leaf = prev_leaf;
4717 for (size_t i = 0; i < t->n_sections; i++)
4719 struct ctables_section *s = &t->sections[i];
4721 struct ctables_cell *cell;
4722 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4727 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4728 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4729 for (size_t j = 0; j < specs->n; j++)
4732 size_t n_dindexes = 0;
4734 if (summary_dimension)
4735 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4737 if (categories_dimension)
4739 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4740 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4741 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4742 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4745 dindexes[n_dindexes++] = ctv->leaf;
4748 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4751 int leaf = cell->axes[a].leaf;
4752 if (a == t->summary_axis && !summary_dimension)
4754 dindexes[n_dindexes++] = leaf;
4757 const struct ctables_summary_spec *ss = &specs->specs[j];
4759 struct fmt_spec format = specs->specs[j].format;
4760 bool is_ctables_format = ss->is_ctables_format;
4761 double d = (cell->postcompute
4762 ? ctables_cell_calculate_postcompute (
4763 s, cell, ss, &format, &is_ctables_format, j)
4764 : ctables_summary_value (cell, &cell->summaries[j],
4767 struct pivot_value *value;
4768 if (ct->hide_threshold != 0
4769 && d < ct->hide_threshold
4770 && ctables_summary_function_is_count (ss->function))
4772 value = pivot_value_new_user_text_nocopy (
4773 xasprintf ("<%d", ct->hide_threshold));
4775 else if (d == 0 && ct->zero)
4776 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4777 else if (d == SYSMIS && ct->missing)
4778 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4779 else if (is_ctables_format)
4781 char *s = data_out_stretchy (&(union value) { .f = d },
4783 &ct->ctables_formats, NULL);
4784 value = pivot_value_new_user_text_nocopy (s);
4788 value = pivot_value_new_number (d);
4789 value->numeric.format = format;
4791 /* XXX should text values be right-justified? */
4792 pivot_table_put (pt, dindexes, n_dindexes, value);
4797 pivot_table_submit (pt);
4801 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4803 enum pivot_axis_type label_pos = t->label_axis[a];
4807 t->clabels_from_axis = a;
4809 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4810 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4812 const struct ctables_stack *stack = &t->stacks[a];
4816 const struct ctables_nest *n0 = &stack->nests[0];
4819 assert (stack->n == 1);
4823 const struct variable *v0 = n0->vars[n0->n - 1];
4824 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4825 t->clabels_example = v0;
4827 for (size_t i = 0; i < c0->n_cats; i++)
4828 if (c0->cats[i].type == CCT_FUNCTION)
4830 msg (SE, _("%s=%s is not allowed with sorting based "
4831 "on a summary function."),
4832 subcommand_name, pos_name);
4835 if (n0->n - 1 == n0->scale_idx)
4837 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4838 "but %s is a scale variable."),
4839 subcommand_name, pos_name, var_get_name (v0));
4843 for (size_t i = 1; i < stack->n; i++)
4845 const struct ctables_nest *ni = &stack->nests[i];
4847 const struct variable *vi = ni->vars[ni->n - 1];
4848 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4850 if (ni->n - 1 == ni->scale_idx)
4852 msg (SE, _("%s=%s requires the variables to be moved to be "
4853 "categorical, but %s is a scale variable."),
4854 subcommand_name, pos_name, var_get_name (vi));
4857 if (var_get_width (v0) != var_get_width (vi))
4859 msg (SE, _("%s=%s requires the variables to be "
4860 "moved to have the same width, but %s has "
4861 "width %d and %s has width %d."),
4862 subcommand_name, pos_name,
4863 var_get_name (v0), var_get_width (v0),
4864 var_get_name (vi), var_get_width (vi));
4867 if (!val_labs_equal (var_get_value_labels (v0),
4868 var_get_value_labels (vi)))
4870 msg (SE, _("%s=%s requires the variables to be "
4871 "moved to have the same value labels, but %s "
4872 "and %s have different value labels."),
4873 subcommand_name, pos_name,
4874 var_get_name (v0), var_get_name (vi));
4877 if (!ctables_categories_equal (c0, ci))
4879 msg (SE, _("%s=%s requires the variables to be "
4880 "moved to have the same category "
4881 "specifications, but %s and %s have different "
4882 "category specifications."),
4883 subcommand_name, pos_name,
4884 var_get_name (v0), var_get_name (vi));
4893 add_sum_var (struct variable *var,
4894 struct variable ***sum_vars, size_t *n, size_t *allocated)
4896 for (size_t i = 0; i < *n; i++)
4897 if (var == (*sum_vars)[i])
4900 if (*n >= *allocated)
4901 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4902 (*sum_vars)[*n] = var;
4907 enumerate_sum_vars (const struct ctables_axis *a,
4908 struct variable ***sum_vars, size_t *n, size_t *allocated)
4916 for (size_t i = 0; i < N_CSVS; i++)
4917 for (size_t j = 0; j < a->specs[i].n; j++)
4919 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4920 if (ctables_function_is_pctsum (spec->function))
4921 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4927 for (size_t i = 0; i < 2; i++)
4928 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4934 ctables_prepare_table (struct ctables_table *t)
4936 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4939 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4941 for (size_t j = 0; j < t->stacks[a].n; j++)
4943 struct ctables_nest *nest = &t->stacks[a].nests[j];
4944 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4946 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4947 nest->n_domains[dt] = 0;
4949 for (size_t k = 0; k < nest->n; k++)
4951 if (k == nest->scale_idx)
4960 if (a != PIVOT_AXIS_LAYER)
4967 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4968 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4969 : a == PIVOT_AXIS_ROW)
4971 if (k == nest->n - 1
4972 || (nest->scale_idx == nest->n - 1
4973 && k == nest->n - 2))
4979 if (a == PIVOT_AXIS_COLUMN)
4984 if (a == PIVOT_AXIS_ROW)
4989 nest->domains[dt][nest->n_domains[dt]++] = k;
4996 struct ctables_nest *nest = xmalloc (sizeof *nest);
4997 *nest = (struct ctables_nest) { .n = 0 };
4998 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
5000 /* There's no point in moving labels away from an axis that has no
5001 labels, so avoid dealing with the special cases around that. */
5002 t->label_axis[a] = a;
5005 struct ctables_stack *stack = &t->stacks[t->summary_axis];
5006 for (size_t i = 0; i < stack->n; i++)
5008 struct ctables_nest *nest = &stack->nests[i];
5009 if (!nest->specs[CSV_CELL].n)
5011 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
5012 specs->specs = xmalloc (sizeof *specs->specs);
5015 enum ctables_summary_function function
5016 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
5018 *specs->specs = (struct ctables_summary_spec) {
5019 .function = function,
5020 .format = ctables_summary_default_format (function, specs->var),
5023 specs->var = nest->vars[0];
5025 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5026 &nest->specs[CSV_CELL]);
5028 else if (!nest->specs[CSV_TOTAL].n)
5029 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
5030 &nest->specs[CSV_CELL]);
5032 if (t->ctables->smissing_listwise)
5034 struct variable **listwise_vars = NULL;
5036 size_t allocated = 0;
5038 for (size_t j = nest->group_head; j < stack->n; j++)
5040 const struct ctables_nest *other_nest = &stack->nests[j];
5041 if (other_nest->group_head != nest->group_head)
5044 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
5047 listwise_vars = x2nrealloc (listwise_vars, &allocated,
5048 sizeof *listwise_vars);
5049 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
5052 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5055 listwise_vars = xmemdup (listwise_vars,
5056 n * sizeof *listwise_vars);
5057 nest->specs[sv].listwise_vars = listwise_vars;
5058 nest->specs[sv].n_listwise_vars = n;
5063 struct ctables_summary_spec_set *merged = &t->summary_specs;
5064 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5066 for (size_t j = 0; j < stack->n; j++)
5068 const struct ctables_nest *nest = &stack->nests[j];
5070 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5071 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5076 struct merge_item min = items[0];
5077 for (size_t j = 1; j < n_left; j++)
5078 if (merge_item_compare_3way (&items[j], &min) < 0)
5081 if (merged->n >= merged->allocated)
5082 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5083 sizeof *merged->specs);
5084 merged->specs[merged->n++] = min.set->specs[min.ofs];
5086 for (size_t j = 0; j < n_left; )
5088 if (merge_item_compare_3way (&items[j], &min) == 0)
5090 struct merge_item *item = &items[j];
5091 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5092 if (++item->ofs >= item->set->n)
5094 items[j] = items[--n_left];
5104 for (size_t j = 0; j < merged->n; j++)
5105 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5107 for (size_t j = 0; j < stack->n; j++)
5109 const struct ctables_nest *nest = &stack->nests[j];
5110 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5112 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5113 for (size_t k = 0; k < specs->n; k++)
5114 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5115 specs->specs[k].axis_idx);
5121 size_t allocated_sum_vars = 0;
5122 enumerate_sum_vars (t->axes[t->summary_axis],
5123 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5125 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5126 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5130 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5131 enum pivot_axis_type a)
5133 struct ctables_stack *stack = &t->stacks[a];
5134 for (size_t i = 0; i < stack->n; i++)
5136 const struct ctables_nest *nest = &stack->nests[i];
5137 const struct variable *var = nest->vars[nest->n - 1];
5138 const union value *value = case_data (c, var);
5140 if (var_is_numeric (var) && value->f == SYSMIS)
5143 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5145 ctables_value_insert (t, value, var_get_width (var));
5150 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5152 const struct ctables_value *const *ap = a_;
5153 const struct ctables_value *const *bp = b_;
5154 const struct ctables_value *a = *ap;
5155 const struct ctables_value *b = *bp;
5156 const int *width = width_;
5157 return value_compare_3way (&a->value, &b->value, *width);
5161 ctables_sort_clabels_values (struct ctables_table *t)
5163 const struct variable *v0 = t->clabels_example;
5164 int width = var_get_width (v0);
5166 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5169 const struct val_labs *val_labs = var_get_value_labels (v0);
5170 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5171 vl = val_labs_next (val_labs, vl))
5172 if (ctables_categories_match (c0, &vl->value, v0))
5173 ctables_value_insert (t, &vl->value, width);
5176 size_t n = hmap_count (&t->clabels_values_map);
5177 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5179 struct ctables_value *clv;
5181 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5182 t->clabels_values[i++] = clv;
5183 t->n_clabels_values = n;
5186 sort (t->clabels_values, n, sizeof *t->clabels_values,
5187 compare_clabels_values_3way, &width);
5189 for (size_t i = 0; i < n; i++)
5190 t->clabels_values[i]->leaf = i;
5194 ctables_add_category_occurrences (const struct variable *var,
5195 struct hmap *occurrences,
5196 const struct ctables_categories *cats)
5198 const struct val_labs *val_labs = var_get_value_labels (var);
5200 for (size_t i = 0; i < cats->n_cats; i++)
5202 const struct ctables_category *c = &cats->cats[i];
5206 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5212 int width = var_get_width (var);
5214 value_init (&value, width);
5215 value_copy_buf_rpad (&value, width,
5216 CHAR_CAST (uint8_t *, c->string.string),
5217 c->string.length, ' ');
5218 ctables_add_occurrence (var, &value, occurrences);
5219 value_destroy (&value, width);
5224 assert (var_is_numeric (var));
5225 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5226 vl = val_labs_next (val_labs, vl))
5227 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5228 ctables_add_occurrence (var, &vl->value, occurrences);
5232 assert (var_is_alpha (var));
5233 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5234 vl = val_labs_next (val_labs, vl))
5235 if (in_string_range (&vl->value, var, c->srange))
5236 ctables_add_occurrence (var, &vl->value, occurrences);
5240 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5241 vl = val_labs_next (val_labs, vl))
5242 if (var_is_value_missing (var, &vl->value))
5243 ctables_add_occurrence (var, &vl->value, occurrences);
5247 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5248 vl = val_labs_next (val_labs, vl))
5249 ctables_add_occurrence (var, &vl->value, occurrences);
5252 case CCT_POSTCOMPUTE:
5262 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5263 vl = val_labs_next (val_labs, vl))
5264 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5265 ctables_add_occurrence (var, &vl->value, occurrences);
5268 case CCT_EXCLUDED_MISSING:
5275 ctables_section_recurse_add_empty_categories (
5276 struct ctables_section *s,
5277 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5278 enum pivot_axis_type a, size_t a_idx)
5280 if (a >= PIVOT_N_AXES)
5281 ctables_cell_insert__ (s, c, cats);
5282 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5283 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5286 const struct variable *var = s->nests[a]->vars[a_idx];
5287 const struct ctables_categories *categories = s->table->categories[
5288 var_get_dict_index (var)];
5289 int width = var_get_width (var);
5290 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5291 const struct ctables_occurrence *o;
5292 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5294 union value *value = case_data_rw (c, var);
5295 value_destroy (value, width);
5296 value_clone (value, &o->value, width);
5297 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5298 assert (cats[a][a_idx] != NULL);
5299 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5302 for (size_t i = 0; i < categories->n_cats; i++)
5304 const struct ctables_category *cat = &categories->cats[i];
5305 if (cat->type == CCT_POSTCOMPUTE)
5307 cats[a][a_idx] = cat;
5308 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5315 ctables_section_add_empty_categories (struct ctables_section *s)
5317 bool show_empty = false;
5318 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5320 for (size_t k = 0; k < s->nests[a]->n; k++)
5321 if (k != s->nests[a]->scale_idx)
5323 const struct variable *var = s->nests[a]->vars[k];
5324 const struct ctables_categories *cats = s->table->categories[
5325 var_get_dict_index (var)];
5326 if (cats->show_empty)
5329 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5335 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5336 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5337 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5342 ctables_section_clear (struct ctables_section *s)
5344 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5346 const struct ctables_nest *nest = s->nests[a];
5347 for (size_t i = 0; i < nest->n; i++)
5348 if (i != nest->scale_idx)
5350 const struct variable *var = nest->vars[i];
5351 int width = var_get_width (var);
5352 struct ctables_occurrence *o, *next;
5353 struct hmap *map = &s->occurrences[a][i];
5354 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5356 value_destroy (&o->value, width);
5357 hmap_delete (map, &o->node);
5364 struct ctables_cell *cell, *next_cell;
5365 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5367 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5369 const struct ctables_nest *nest = s->nests[a];
5370 for (size_t i = 0; i < nest->n; i++)
5371 if (i != nest->scale_idx)
5372 value_destroy (&cell->axes[a].cvs[i].value,
5373 var_get_width (nest->vars[i]));
5374 free (cell->axes[a].cvs);
5377 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5378 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5379 for (size_t i = 0; i < specs->n; i++)
5380 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5381 free (cell->summaries);
5383 hmap_delete (&s->cells, &cell->node);
5386 hmap_shrink (&s->cells);
5388 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5390 struct ctables_domain *domain, *next_domain;
5391 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5394 free (domain->sums);
5395 hmap_delete (&s->domains[dt], &domain->node);
5398 hmap_shrink (&s->domains[dt]);
5403 ctables_section_uninit (struct ctables_section *s)
5405 ctables_section_clear (s);
5407 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5409 struct ctables_nest *nest = s->nests[a];
5410 for (size_t i = 0; i < nest->n; i++)
5411 hmap_destroy (&s->occurrences[a][i]);
5412 free (s->occurrences[a]);
5415 hmap_destroy (&s->cells);
5416 for (size_t i = 0; i < N_CTDTS; i++)
5417 hmap_destroy (&s->domains[i]);
5421 ctables_table_clear (struct ctables_table *t)
5423 for (size_t i = 0; i < t->n_sections; i++)
5424 ctables_section_clear (&t->sections[i]);
5426 if (t->clabels_example)
5428 int width = var_get_width (t->clabels_example);
5429 struct ctables_value *value, *next_value;
5430 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5431 &t->clabels_values_map)
5433 value_destroy (&value->value, width);
5434 hmap_delete (&t->clabels_values_map, &value->node);
5437 hmap_shrink (&t->clabels_values_map);
5439 free (t->clabels_values);
5440 t->clabels_values = NULL;
5441 t->n_clabels_values = 0;
5446 ctables_execute (struct dataset *ds, struct casereader *input,
5449 for (size_t i = 0; i < ct->n_tables; i++)
5451 struct ctables_table *t = ct->tables[i];
5452 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5453 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5454 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5455 sizeof *t->sections);
5456 size_t ix[PIVOT_N_AXES];
5457 ctables_table_add_section (t, 0, ix);
5460 struct dictionary *dict = dataset_dict (ds);
5461 struct casegrouper *grouper
5462 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5463 ? casegrouper_create_splits (input, dict)
5464 : casegrouper_create_vars (input, NULL, 0));
5465 struct casereader *group;
5466 while (casegrouper_get_next_group (grouper, &group))
5468 /* Output SPLIT FILE variables. */
5469 struct ccase *c = casereader_peek (group, 0);
5472 output_split_file_values (ds, c);
5476 bool warn_on_invalid = true;
5477 for (c = casereader_read (group); c;
5478 case_unref (c), c = casereader_read (group))
5480 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5481 double e_weight = (ct->e_weight
5482 ? var_force_valid_weight (ct->e_weight,
5483 case_num (c, ct->e_weight),
5487 for (size_t i = 0; i < ct->n_tables; i++)
5489 struct ctables_table *t = ct->tables[i];
5491 for (size_t j = 0; j < t->n_sections; j++)
5492 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5494 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5495 if (t->label_axis[a] != a)
5496 ctables_insert_clabels_values (t, c, a);
5499 casereader_destroy (group);
5501 for (size_t i = 0; i < ct->n_tables; i++)
5503 struct ctables_table *t = ct->tables[i];
5505 if (t->clabels_example)
5506 ctables_sort_clabels_values (t);
5508 for (size_t j = 0; j < t->n_sections; j++)
5509 ctables_section_add_empty_categories (&t->sections[j]);
5511 ctables_table_output (ct, t);
5512 ctables_table_clear (t);
5515 return casegrouper_destroy (grouper);
5520 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5521 struct dictionary *);
5524 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5530 case CTPO_CAT_STRING:
5531 ss_dealloc (&e->string);
5534 case CTPO_CAT_SRANGE:
5535 for (size_t i = 0; i < 2; i++)
5536 ss_dealloc (&e->srange[i]);
5545 for (size_t i = 0; i < 2; i++)
5546 ctables_pcexpr_destroy (e->subs[i]);
5550 case CTPO_CAT_NUMBER:
5551 case CTPO_CAT_NRANGE:
5552 case CTPO_CAT_MISSING:
5553 case CTPO_CAT_OTHERNM:
5554 case CTPO_CAT_SUBTOTAL:
5555 case CTPO_CAT_TOTAL:
5559 msg_location_destroy (e->location);
5564 static struct ctables_pcexpr *
5565 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5566 struct ctables_pcexpr *sub0,
5567 struct ctables_pcexpr *sub1)
5569 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5570 *e = (struct ctables_pcexpr) {
5572 .subs = { sub0, sub1 },
5573 .location = msg_location_merged (sub0->location, sub1->location),
5578 /* How to parse an operator. */
5581 enum token_type token;
5582 enum ctables_postcompute_op op;
5585 static const struct operator *
5586 ctables_pcexpr_match_operator (struct lexer *lexer,
5587 const struct operator ops[], size_t n_ops)
5589 for (const struct operator *op = ops; op < ops + n_ops; op++)
5590 if (lex_token (lexer) == op->token)
5592 if (op->token != T_NEG_NUM)
5601 static struct ctables_pcexpr *
5602 ctables_pcexpr_parse_binary_operators__ (
5603 struct lexer *lexer, struct dictionary *dict,
5604 const struct operator ops[], size_t n_ops,
5605 parse_recursively_func *parse_next_level,
5606 const char *chain_warning, struct ctables_pcexpr *lhs)
5608 for (int op_count = 0; ; op_count++)
5610 const struct operator *op
5611 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5614 if (op_count > 1 && chain_warning)
5615 msg_at (SW, lhs->location, "%s", chain_warning);
5620 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5623 ctables_pcexpr_destroy (lhs);
5627 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5631 static struct ctables_pcexpr *
5632 ctables_pcexpr_parse_binary_operators (
5633 struct lexer *lexer, struct dictionary *dict,
5634 const struct operator ops[], size_t n_ops,
5635 parse_recursively_func *parse_next_level, const char *chain_warning)
5637 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5641 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5643 chain_warning, lhs);
5646 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5647 struct dictionary *);
5649 static struct ctables_pcexpr
5650 ctpo_cat_nrange (double low, double high)
5652 return (struct ctables_pcexpr) {
5653 .op = CTPO_CAT_NRANGE,
5654 .nrange = { low, high },
5658 static struct ctables_pcexpr
5659 ctpo_cat_srange (struct substring low, struct substring high)
5661 return (struct ctables_pcexpr) {
5662 .op = CTPO_CAT_SRANGE,
5663 .srange = { low, high },
5667 static struct ctables_pcexpr *
5668 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5670 int start_ofs = lex_ofs (lexer);
5671 struct ctables_pcexpr e;
5672 if (lex_is_number (lexer))
5674 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5675 .number = lex_number (lexer) };
5678 else if (lex_match_id (lexer, "MISSING"))
5679 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5680 else if (lex_match_id (lexer, "OTHERNM"))
5681 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5682 else if (lex_match_id (lexer, "TOTAL"))
5683 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5684 else if (lex_match_id (lexer, "SUBTOTAL"))
5686 size_t subtotal_index = 0;
5687 if (lex_match (lexer, T_LBRACK))
5689 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5691 subtotal_index = lex_integer (lexer);
5693 if (!lex_force_match (lexer, T_RBRACK))
5696 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5697 .subtotal_index = subtotal_index };
5699 else if (lex_match (lexer, T_LBRACK))
5701 if (lex_match_id (lexer, "LO"))
5703 if (!lex_force_match_id (lexer, "THRU"))
5706 if (lex_is_string (lexer))
5708 struct substring low = { .string = NULL };
5709 struct substring high = parse_substring (lexer, dict);
5710 e = ctpo_cat_srange (low, high);
5714 if (!lex_force_num (lexer))
5716 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5720 else if (lex_is_number (lexer))
5722 double number = lex_number (lexer);
5724 if (lex_match_id (lexer, "THRU"))
5726 if (lex_match_id (lexer, "HI"))
5727 e = ctpo_cat_nrange (number, DBL_MAX);
5730 if (!lex_force_num (lexer))
5732 e = ctpo_cat_nrange (number, lex_number (lexer));
5737 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5740 else if (lex_is_string (lexer))
5742 struct substring s = parse_substring (lexer, dict);
5744 if (lex_match_id (lexer, "THRU"))
5746 struct substring high;
5748 if (lex_match_id (lexer, "HI"))
5749 high = (struct substring) { .string = NULL };
5752 if (!lex_force_string (lexer))
5757 high = parse_substring (lexer, dict);
5760 e = ctpo_cat_srange (s, high);
5763 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5767 lex_error (lexer, NULL);
5771 if (!lex_force_match (lexer, T_RBRACK))
5773 if (e.op == CTPO_CAT_STRING)
5774 ss_dealloc (&e.string);
5775 else if (e.op == CTPO_CAT_SRANGE)
5777 ss_dealloc (&e.srange[0]);
5778 ss_dealloc (&e.srange[1]);
5783 else if (lex_match (lexer, T_LPAREN))
5785 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5788 if (!lex_force_match (lexer, T_RPAREN))
5790 ctables_pcexpr_destroy (ep);
5797 lex_error (lexer, NULL);
5801 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5802 return xmemdup (&e, sizeof e);
5805 static struct ctables_pcexpr *
5806 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5807 struct lexer *lexer, int start_ofs)
5809 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5810 *e = (struct ctables_pcexpr) {
5813 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5818 static struct ctables_pcexpr *
5819 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5821 static const struct operator op = { T_EXP, CTPO_POW };
5823 const char *chain_warning =
5824 _("The exponentiation operator (`**') is left-associative: "
5825 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5826 "To disable this warning, insert parentheses.");
5828 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5829 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5830 ctables_pcexpr_parse_primary,
5833 /* Special case for situations like "-5**6", which must be parsed as
5836 int start_ofs = lex_ofs (lexer);
5837 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5838 *lhs = (struct ctables_pcexpr) {
5839 .op = CTPO_CONSTANT,
5840 .number = -lex_tokval (lexer),
5841 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5845 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5846 lexer, dict, &op, 1,
5847 ctables_pcexpr_parse_primary, chain_warning, lhs);
5851 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5854 /* Parses the unary minus level. */
5855 static struct ctables_pcexpr *
5856 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5858 int start_ofs = lex_ofs (lexer);
5859 if (!lex_match (lexer, T_DASH))
5860 return ctables_pcexpr_parse_exp (lexer, dict);
5862 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5866 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5869 /* Parses the multiplication and division level. */
5870 static struct ctables_pcexpr *
5871 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5873 static const struct operator ops[] =
5875 { T_ASTERISK, CTPO_MUL },
5876 { T_SLASH, CTPO_DIV },
5879 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5880 sizeof ops / sizeof *ops,
5881 ctables_pcexpr_parse_neg, NULL);
5884 /* Parses the addition and subtraction level. */
5885 static struct ctables_pcexpr *
5886 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5888 static const struct operator ops[] =
5890 { T_PLUS, CTPO_ADD },
5891 { T_DASH, CTPO_SUB },
5892 { T_NEG_NUM, CTPO_ADD },
5895 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5896 ops, sizeof ops / sizeof *ops,
5897 ctables_pcexpr_parse_mul, NULL);
5900 static struct ctables_postcompute *
5901 ctables_find_postcompute (struct ctables *ct, const char *name)
5903 struct ctables_postcompute *pc;
5904 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5905 utf8_hash_case_string (name, 0), &ct->postcomputes)
5906 if (!utf8_strcasecmp (pc->name, name))
5912 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5915 int pcompute_start = lex_ofs (lexer) - 1;
5917 if (!lex_match (lexer, T_AND))
5919 lex_error_expecting (lexer, "&");
5922 if (!lex_force_id (lexer))
5925 char *name = ss_xstrdup (lex_tokss (lexer));
5928 if (!lex_force_match (lexer, T_EQUALS)
5929 || !lex_force_match_id (lexer, "EXPR")
5930 || !lex_force_match (lexer, T_LPAREN))
5936 int expr_start = lex_ofs (lexer);
5937 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5938 int expr_end = lex_ofs (lexer) - 1;
5939 if (!expr || !lex_force_match (lexer, T_RPAREN))
5941 ctables_pcexpr_destroy (expr);
5945 int pcompute_end = lex_ofs (lexer) - 1;
5947 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5950 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5953 msg_at (SW, location, _("New definition of &%s will override the "
5954 "previous definition."),
5956 msg_at (SN, pc->location, _("This is the previous definition."));
5958 ctables_pcexpr_destroy (pc->expr);
5959 msg_location_destroy (pc->location);
5964 pc = xmalloc (sizeof *pc);
5965 *pc = (struct ctables_postcompute) { .name = name };
5966 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5967 utf8_hash_case_string (pc->name, 0));
5970 pc->location = location;
5972 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5977 ctables_parse_pproperties_format (struct lexer *lexer,
5978 struct ctables_summary_spec_set *sss)
5980 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5982 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5983 && !(lex_token (lexer) == T_ID
5984 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5985 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5986 lex_tokss (lexer)))))
5988 /* Parse function. */
5989 enum ctables_summary_function function;
5990 if (!parse_ctables_summary_function (lexer, &function))
5993 /* Parse percentile. */
5994 double percentile = 0;
5995 if (function == CTSF_PTILE)
5997 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5999 percentile = lex_number (lexer);
6004 struct fmt_spec format;
6005 bool is_ctables_format;
6006 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
6009 if (sss->n >= sss->allocated)
6010 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
6011 sizeof *sss->specs);
6012 sss->specs[sss->n++] = (struct ctables_summary_spec) {
6013 .function = function,
6014 .percentile = percentile,
6016 .is_ctables_format = is_ctables_format,
6022 ctables_summary_spec_set_uninit (sss);
6027 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
6029 struct ctables_postcompute **pcs = NULL;
6031 size_t allocated_pcs = 0;
6033 while (lex_match (lexer, T_AND))
6035 if (!lex_force_id (lexer))
6037 struct ctables_postcompute *pc
6038 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
6041 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
6046 if (n_pcs >= allocated_pcs)
6047 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
6051 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6053 if (lex_match_id (lexer, "LABEL"))
6055 lex_match (lexer, T_EQUALS);
6056 if (!lex_force_string (lexer))
6059 for (size_t i = 0; i < n_pcs; i++)
6061 free (pcs[i]->label);
6062 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6067 else if (lex_match_id (lexer, "FORMAT"))
6069 lex_match (lexer, T_EQUALS);
6071 struct ctables_summary_spec_set sss;
6072 if (!ctables_parse_pproperties_format (lexer, &sss))
6075 for (size_t i = 0; i < n_pcs; i++)
6078 ctables_summary_spec_set_uninit (pcs[i]->specs);
6080 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6081 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6083 ctables_summary_spec_set_uninit (&sss);
6085 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6087 lex_match (lexer, T_EQUALS);
6088 bool hide_source_cats;
6089 if (!parse_bool (lexer, &hide_source_cats))
6091 for (size_t i = 0; i < n_pcs; i++)
6092 pcs[i]->hide_source_cats = hide_source_cats;
6096 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6109 put_strftime (struct string *out, time_t now, const char *format)
6111 const struct tm *tm = localtime (&now);
6113 strftime (value, sizeof value, format, tm);
6114 ds_put_cstr (out, value);
6118 skip_prefix (struct substring *s, struct substring prefix)
6120 if (ss_starts_with (*s, prefix))
6122 ss_advance (s, prefix.length);
6130 put_table_expression (struct string *out, struct lexer *lexer,
6131 struct dictionary *dict, int expr_start, int expr_end)
6134 for (int ofs = expr_start; ofs < expr_end; ofs++)
6136 const struct token *t = lex_ofs_token (lexer, ofs);
6137 if (t->type == T_LBRACK)
6139 else if (t->type == T_RBRACK && nest > 0)
6145 else if (t->type == T_ID)
6147 const struct variable *var
6148 = dict_lookup_var (dict, t->string.string);
6149 const char *label = var ? var_get_label (var) : NULL;
6150 ds_put_cstr (out, label ? label : t->string.string);
6154 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6155 ds_put_byte (out, ' ');
6157 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6158 ds_put_cstr (out, repr);
6161 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6162 ds_put_byte (out, ' ');
6168 put_title_text (struct string *out, struct substring in, time_t now,
6169 struct lexer *lexer, struct dictionary *dict,
6170 int expr_start, int expr_end)
6174 size_t chunk = ss_find_byte (in, ')');
6175 ds_put_substring (out, ss_head (in, chunk));
6176 ss_advance (&in, chunk);
6177 if (ss_is_empty (in))
6180 if (skip_prefix (&in, ss_cstr (")DATE")))
6181 put_strftime (out, now, "%x");
6182 else if (skip_prefix (&in, ss_cstr (")TIME")))
6183 put_strftime (out, now, "%X");
6184 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6185 put_table_expression (out, lexer, dict, expr_start, expr_end);
6188 ds_put_byte (out, ')');
6189 ss_advance (&in, 1);
6195 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6197 struct casereader *input = NULL;
6199 struct measure_guesser *mg = measure_guesser_create (ds);
6202 input = proc_open (ds);
6203 measure_guesser_run (mg, input);
6204 measure_guesser_destroy (mg);
6207 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6208 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6209 enum settings_value_show tvars = settings_get_show_variables ();
6210 for (size_t i = 0; i < n_vars; i++)
6211 vlabels[i] = (enum ctables_vlabel) tvars;
6213 struct pivot_table_look *look = pivot_table_look_unshare (
6214 pivot_table_look_ref (pivot_table_look_get_default ()));
6215 look->omit_empty = false;
6217 struct ctables *ct = xmalloc (sizeof *ct);
6218 *ct = (struct ctables) {
6219 .dict = dataset_dict (ds),
6221 .ctables_formats = FMT_SETTINGS_INIT,
6223 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6226 time_t now = time (NULL);
6231 const char *dot_string;
6232 const char *comma_string;
6234 static const struct ctf ctfs[4] = {
6235 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6236 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6237 { CTEF_PAREN, "-,(,),", "-.(.)." },
6238 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6240 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6241 for (size_t i = 0; i < 4; i++)
6243 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6244 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6245 fmt_number_style_from_string (s));
6248 if (!lex_force_match (lexer, T_SLASH))
6251 while (!lex_match_id (lexer, "TABLE"))
6253 if (lex_match_id (lexer, "FORMAT"))
6255 double widths[2] = { SYSMIS, SYSMIS };
6256 double units_per_inch = 72.0;
6258 while (lex_token (lexer) != T_SLASH)
6260 if (lex_match_id (lexer, "MINCOLWIDTH"))
6262 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6265 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6267 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6270 else if (lex_match_id (lexer, "UNITS"))
6272 lex_match (lexer, T_EQUALS);
6273 if (lex_match_id (lexer, "POINTS"))
6274 units_per_inch = 72.0;
6275 else if (lex_match_id (lexer, "INCHES"))
6276 units_per_inch = 1.0;
6277 else if (lex_match_id (lexer, "CM"))
6278 units_per_inch = 2.54;
6281 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6285 else if (lex_match_id (lexer, "EMPTY"))
6290 lex_match (lexer, T_EQUALS);
6291 if (lex_match_id (lexer, "ZERO"))
6293 /* Nothing to do. */
6295 else if (lex_match_id (lexer, "BLANK"))
6296 ct->zero = xstrdup ("");
6297 else if (lex_force_string (lexer))
6299 ct->zero = ss_xstrdup (lex_tokss (lexer));
6305 else if (lex_match_id (lexer, "MISSING"))
6307 lex_match (lexer, T_EQUALS);
6308 if (!lex_force_string (lexer))
6312 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6313 ? ss_xstrdup (lex_tokss (lexer))
6319 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6320 "UNITS", "EMPTY", "MISSING");
6325 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6326 && widths[0] > widths[1])
6328 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6332 for (size_t i = 0; i < 2; i++)
6333 if (widths[i] != SYSMIS)
6335 int *wr = ct->look->width_ranges[TABLE_HORZ];
6336 wr[i] = widths[i] / units_per_inch * 96.0;
6341 else if (lex_match_id (lexer, "VLABELS"))
6343 if (!lex_force_match_id (lexer, "VARIABLES"))
6345 lex_match (lexer, T_EQUALS);
6347 struct variable **vars;
6349 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6353 if (!lex_force_match_id (lexer, "DISPLAY"))
6358 lex_match (lexer, T_EQUALS);
6360 enum ctables_vlabel vlabel;
6361 if (lex_match_id (lexer, "DEFAULT"))
6362 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6363 else if (lex_match_id (lexer, "NAME"))
6365 else if (lex_match_id (lexer, "LABEL"))
6366 vlabel = CTVL_LABEL;
6367 else if (lex_match_id (lexer, "BOTH"))
6369 else if (lex_match_id (lexer, "NONE"))
6373 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6379 for (size_t i = 0; i < n_vars; i++)
6380 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6383 else if (lex_match_id (lexer, "MRSETS"))
6385 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6387 lex_match (lexer, T_EQUALS);
6388 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6391 else if (lex_match_id (lexer, "SMISSING"))
6393 if (lex_match_id (lexer, "VARIABLE"))
6394 ct->smissing_listwise = false;
6395 else if (lex_match_id (lexer, "LISTWISE"))
6396 ct->smissing_listwise = true;
6399 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6403 else if (lex_match_id (lexer, "PCOMPUTE"))
6405 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6408 else if (lex_match_id (lexer, "PPROPERTIES"))
6410 if (!ctables_parse_pproperties (lexer, ct))
6413 else if (lex_match_id (lexer, "WEIGHT"))
6415 if (!lex_force_match_id (lexer, "VARIABLE"))
6417 lex_match (lexer, T_EQUALS);
6418 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6422 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6424 if (lex_match_id (lexer, "COUNT"))
6426 lex_match (lexer, T_EQUALS);
6427 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6430 ct->hide_threshold = lex_integer (lexer);
6433 else if (ct->hide_threshold == 0)
6434 ct->hide_threshold = 5;
6438 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6439 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6440 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6444 if (!lex_force_match (lexer, T_SLASH))
6448 size_t allocated_tables = 0;
6451 if (ct->n_tables >= allocated_tables)
6452 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6453 sizeof *ct->tables);
6455 struct ctables_category *cat = xmalloc (sizeof *cat);
6456 *cat = (struct ctables_category) {
6458 .include_missing = false,
6459 .sort_ascending = true,
6462 struct ctables_categories *c = xmalloc (sizeof *c);
6463 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6464 *c = (struct ctables_categories) {
6471 struct ctables_categories **categories = xnmalloc (n_vars,
6472 sizeof *categories);
6473 for (size_t i = 0; i < n_vars; i++)
6476 struct ctables_table *t = xmalloc (sizeof *t);
6477 *t = (struct ctables_table) {
6479 .slabels_axis = PIVOT_AXIS_COLUMN,
6480 .slabels_visible = true,
6481 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6483 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6484 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6485 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6487 .clabels_from_axis = PIVOT_AXIS_LAYER,
6488 .categories = categories,
6489 .n_categories = n_vars,
6492 ct->tables[ct->n_tables++] = t;
6494 lex_match (lexer, T_EQUALS);
6495 int expr_start = lex_ofs (lexer);
6496 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6498 if (lex_match (lexer, T_BY))
6500 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6501 ct, t, PIVOT_AXIS_COLUMN))
6504 if (lex_match (lexer, T_BY))
6506 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6507 ct, t, PIVOT_AXIS_LAYER))
6511 int expr_end = lex_ofs (lexer);
6513 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6514 && !t->axes[PIVOT_AXIS_LAYER])
6516 lex_error (lexer, _("At least one variable must be specified."));
6520 const struct ctables_axis *scales[PIVOT_N_AXES];
6521 size_t n_scales = 0;
6522 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6524 scales[a] = find_scale (t->axes[a]);
6530 msg (SE, _("Scale variables may appear only on one axis."));
6531 if (scales[PIVOT_AXIS_ROW])
6532 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6533 _("This scale variable appears on the rows axis."));
6534 if (scales[PIVOT_AXIS_COLUMN])
6535 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6536 _("This scale variable appears on the columns axis."));
6537 if (scales[PIVOT_AXIS_LAYER])
6538 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6539 _("This scale variable appears on the layer axis."));
6543 const struct ctables_axis *summaries[PIVOT_N_AXES];
6544 size_t n_summaries = 0;
6545 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6547 summaries[a] = (scales[a]
6549 : find_categorical_summary_spec (t->axes[a]));
6553 if (n_summaries > 1)
6555 msg (SE, _("Summaries may appear only on one axis."));
6556 if (summaries[PIVOT_AXIS_ROW])
6557 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
6558 _("This variable on the rows axis has a summary."));
6559 if (summaries[PIVOT_AXIS_COLUMN])
6560 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
6561 _("This variable on the columns axis has a summary."));
6562 if (summaries[PIVOT_AXIS_LAYER])
6563 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
6564 _("This variable on the layers axis has a summary."));
6567 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6568 if (n_summaries ? summaries[a] : t->axes[a])
6570 t->summary_axis = a;
6574 if (lex_token (lexer) == T_ENDCMD)
6576 if (!ctables_prepare_table (t))
6580 if (!lex_force_match (lexer, T_SLASH))
6583 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6585 if (lex_match_id (lexer, "SLABELS"))
6587 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6589 if (lex_match_id (lexer, "POSITION"))
6591 lex_match (lexer, T_EQUALS);
6592 if (lex_match_id (lexer, "COLUMN"))
6593 t->slabels_axis = PIVOT_AXIS_COLUMN;
6594 else if (lex_match_id (lexer, "ROW"))
6595 t->slabels_axis = PIVOT_AXIS_ROW;
6596 else if (lex_match_id (lexer, "LAYER"))
6597 t->slabels_axis = PIVOT_AXIS_LAYER;
6600 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6604 else if (lex_match_id (lexer, "VISIBLE"))
6606 lex_match (lexer, T_EQUALS);
6607 if (!parse_bool (lexer, &t->slabels_visible))
6612 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6617 else if (lex_match_id (lexer, "CLABELS"))
6619 if (lex_match_id (lexer, "AUTO"))
6621 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6622 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6624 else if (lex_match_id (lexer, "ROWLABELS"))
6626 lex_match (lexer, T_EQUALS);
6627 if (lex_match_id (lexer, "OPPOSITE"))
6628 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6629 else if (lex_match_id (lexer, "LAYER"))
6630 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6633 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6637 else if (lex_match_id (lexer, "COLLABELS"))
6639 lex_match (lexer, T_EQUALS);
6640 if (lex_match_id (lexer, "OPPOSITE"))
6641 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6642 else if (lex_match_id (lexer, "LAYER"))
6643 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6646 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6652 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6657 else if (lex_match_id (lexer, "CRITERIA"))
6659 if (!lex_force_match_id (lexer, "CILEVEL"))
6661 lex_match (lexer, T_EQUALS);
6663 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6665 t->cilevel = lex_number (lexer);
6668 else if (lex_match_id (lexer, "CATEGORIES"))
6670 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6674 else if (lex_match_id (lexer, "TITLES"))
6679 if (lex_match_id (lexer, "CAPTION"))
6680 textp = &t->caption;
6681 else if (lex_match_id (lexer, "CORNER"))
6683 else if (lex_match_id (lexer, "TITLE"))
6687 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6690 lex_match (lexer, T_EQUALS);
6692 struct string s = DS_EMPTY_INITIALIZER;
6693 while (lex_is_string (lexer))
6695 if (!ds_is_empty (&s))
6696 ds_put_byte (&s, ' ');
6697 put_title_text (&s, lex_tokss (lexer), now,
6698 lexer, dataset_dict (ds),
6699 expr_start, expr_end);
6703 *textp = ds_steal_cstr (&s);
6705 while (lex_token (lexer) != T_SLASH
6706 && lex_token (lexer) != T_ENDCMD);
6708 else if (lex_match_id (lexer, "SIGTEST"))
6712 t->chisq = xmalloc (sizeof *t->chisq);
6713 *t->chisq = (struct ctables_chisq) {
6715 .include_mrsets = true,
6716 .all_visible = true,
6722 if (lex_match_id (lexer, "TYPE"))
6724 lex_match (lexer, T_EQUALS);
6725 if (!lex_force_match_id (lexer, "CHISQUARE"))
6728 else if (lex_match_id (lexer, "ALPHA"))
6730 lex_match (lexer, T_EQUALS);
6731 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6733 t->chisq->alpha = lex_number (lexer);
6736 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6738 lex_match (lexer, T_EQUALS);
6739 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6742 else if (lex_match_id (lexer, "CATEGORIES"))
6744 lex_match (lexer, T_EQUALS);
6745 if (lex_match_id (lexer, "ALLVISIBLE"))
6746 t->chisq->all_visible = true;
6747 else if (lex_match_id (lexer, "SUBTOTALS"))
6748 t->chisq->all_visible = false;
6751 lex_error_expecting (lexer,
6752 "ALLVISIBLE", "SUBTOTALS");
6758 lex_error_expecting (lexer, "TYPE", "ALPHA",
6759 "INCLUDEMRSETS", "CATEGORIES");
6763 while (lex_token (lexer) != T_SLASH
6764 && lex_token (lexer) != T_ENDCMD);
6766 else if (lex_match_id (lexer, "COMPARETEST"))
6770 t->pairwise = xmalloc (sizeof *t->pairwise);
6771 *t->pairwise = (struct ctables_pairwise) {
6773 .alpha = { .05, .05 },
6774 .adjust = BONFERRONI,
6775 .include_mrsets = true,
6776 .meansvariance_allcats = true,
6777 .all_visible = true,
6786 if (lex_match_id (lexer, "TYPE"))
6788 lex_match (lexer, T_EQUALS);
6789 if (lex_match_id (lexer, "PROP"))
6790 t->pairwise->type = PROP;
6791 else if (lex_match_id (lexer, "MEAN"))
6792 t->pairwise->type = MEAN;
6795 lex_error_expecting (lexer, "PROP", "MEAN");
6799 else if (lex_match_id (lexer, "ALPHA"))
6801 lex_match (lexer, T_EQUALS);
6803 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6805 double a0 = lex_number (lexer);
6808 lex_match (lexer, T_COMMA);
6809 if (lex_is_number (lexer))
6811 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6813 double a1 = lex_number (lexer);
6816 t->pairwise->alpha[0] = MIN (a0, a1);
6817 t->pairwise->alpha[1] = MAX (a0, a1);
6820 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6822 else if (lex_match_id (lexer, "ADJUST"))
6824 lex_match (lexer, T_EQUALS);
6825 if (lex_match_id (lexer, "BONFERRONI"))
6826 t->pairwise->adjust = BONFERRONI;
6827 else if (lex_match_id (lexer, "BH"))
6828 t->pairwise->adjust = BH;
6829 else if (lex_match_id (lexer, "NONE"))
6830 t->pairwise->adjust = 0;
6833 lex_error_expecting (lexer, "BONFERRONI", "BH",
6838 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6840 lex_match (lexer, T_EQUALS);
6841 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6844 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6846 lex_match (lexer, T_EQUALS);
6847 if (lex_match_id (lexer, "ALLCATS"))
6848 t->pairwise->meansvariance_allcats = true;
6849 else if (lex_match_id (lexer, "TESTEDCATS"))
6850 t->pairwise->meansvariance_allcats = false;
6853 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6857 else if (lex_match_id (lexer, "CATEGORIES"))
6859 lex_match (lexer, T_EQUALS);
6860 if (lex_match_id (lexer, "ALLVISIBLE"))
6861 t->pairwise->all_visible = true;
6862 else if (lex_match_id (lexer, "SUBTOTALS"))
6863 t->pairwise->all_visible = false;
6866 lex_error_expecting (lexer, "ALLVISIBLE",
6871 else if (lex_match_id (lexer, "MERGE"))
6873 lex_match (lexer, T_EQUALS);
6874 if (!parse_bool (lexer, &t->pairwise->merge))
6877 else if (lex_match_id (lexer, "STYLE"))
6879 lex_match (lexer, T_EQUALS);
6880 if (lex_match_id (lexer, "APA"))
6881 t->pairwise->apa_style = true;
6882 else if (lex_match_id (lexer, "SIMPLE"))
6883 t->pairwise->apa_style = false;
6886 lex_error_expecting (lexer, "APA", "SIMPLE");
6890 else if (lex_match_id (lexer, "SHOWSIG"))
6892 lex_match (lexer, T_EQUALS);
6893 if (!parse_bool (lexer, &t->pairwise->show_sig))
6898 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6899 "INCLUDEMRSETS", "MEANSVARIANCE",
6900 "CATEGORIES", "MERGE", "STYLE",
6905 while (lex_token (lexer) != T_SLASH
6906 && lex_token (lexer) != T_ENDCMD);
6910 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6911 "CRITERIA", "CATEGORIES", "TITLES",
6912 "SIGTEST", "COMPARETEST");
6916 if (!lex_match (lexer, T_SLASH))
6920 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6921 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6923 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6927 if (!ctables_prepare_table (t))
6930 while (lex_token (lexer) != T_ENDCMD);
6933 input = proc_open (ds);
6934 bool ok = ctables_execute (ds, input, ct);
6935 ok = proc_commit (ds) && ok;
6937 ctables_destroy (ct);
6938 return ok ? CMD_SUCCESS : CMD_FAILURE;
6943 ctables_destroy (ct);