1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 /* All variables. */ \
74 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
75 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
76 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
94 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
95 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
96 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
98 /* All variables (unweighted.) */ \
99 S(CTSF_UCOUNT, "UCOUNT", N_("Unweighted Count"), CTF_COUNT, CTFA_ALL) \
100 S(CTSF_UROWPCT_COUNT, "UROWPCT.COUNT", N_("Unweighted Row %"), CTF_PERCENT, CTFA_ALL) \
101 S(CTSF_UCOLPCT_COUNT, "UCOLPCT.COUNT", N_("Unweighted Column %"), CTF_PERCENT, CTFA_ALL) \
102 S(CTSF_UTABLEPCT_COUNT, "UTABLEPCT.COUNT", N_("Unweighted Table %"), CTF_PERCENT, CTFA_ALL) \
103 S(CTSF_USUBTABLEPCT_COUNT, "USUBTABLEPCT.COUNT", N_("Unweighted Subtable %"), CTF_PERCENT, CTFA_ALL) \
104 S(CTSF_ULAYERPCT_COUNT, "ULAYERPCT.COUNT", N_("Unweighted Layer %"), CTF_PERCENT, CTFA_ALL) \
105 S(CTSF_ULAYERROWPCT_COUNT, "ULAYERROWPCT.COUNT", N_("Unweighted Layer Row %"), CTF_PERCENT, CTFA_ALL) \
106 S(CTSF_ULAYERCOLPCT_COUNT, "ULAYERCOLPCT.COUNT", N_("Unweighted Layer Column %"), CTF_PERCENT, CTFA_ALL) \
107 S(CTSF_UROWPCT_VALIDN, "UROWPCT.VALIDN", N_("Unweighted Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
108 S(CTSF_UCOLPCT_VALIDN, "UCOLPCT.VALIDN", N_("Unweighted Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
109 S(CTSF_UTABLEPCT_VALIDN, "UTABLEPCT.VALIDN", N_("Unweighted Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
110 S(CTSF_USUBTABLEPCT_VALIDN, "USUBTABLEPCT.VALIDN", N_("Unweighted Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
111 S(CTSF_ULAYERPCT_VALIDN, "ULAYERPCT.VALIDN", N_("Unweighted Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
112 S(CTSF_ULAYERROWPCT_VALIDN, "ULAYERROWPCT.VALIDN", N_("Unweighted Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
113 S(CTSF_ULAYERCOLPCT_VALIDN, "ULAYERCOLPCT.VALIDN", N_("Unweighted Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
114 S(CTSF_UROWPCT_TOTALN, "UROWPCT.TOTALN", N_("Unweighted Row Total N %"), CTF_PERCENT, CTFA_ALL) \
115 S(CTSF_UCOLPCT_TOTALN, "UCOLPCT.TOTALN", N_("Unweighted Column Total N %"), CTF_PERCENT, CTFA_ALL) \
116 S(CTSF_UTABLEPCT_TOTALN, "UTABLEPCT.TOTALN", N_("Unweighted Table Total N %"), CTF_PERCENT, CTFA_ALL) \
117 S(CTSF_USUBTABLEPCT_TOTALN, "USUBTABLEPCT.TOTALN", N_("Unweighted Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
118 S(CTSF_ULAYERPCT_TOTALN, "ULAYERPCT.TOTALN", N_("Unweighted Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
119 S(CTSF_ULAYERROWPCT_TOTALN, "ULAYERROWPCT.TOTALN", N_("Unweighted Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
120 S(CTSF_ULAYERCOLPCT_TOTALN, "ULAYERCOLPCT.TOTALN", N_("Unweighted Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
122 /* Scale variables, totals, and subtotals. */ \
123 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
124 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
125 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
126 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
127 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
128 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
129 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
130 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
131 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
132 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
133 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
134 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
135 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
136 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
137 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
138 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
139 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
140 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
141 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
142 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
143 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
144 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
145 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
147 /* Scale variables, totals, and subtotals (unweighted). */ \
148 S(CTSF_UMEAN, "UMEAN", N_("Unweighted Mean"), CTF_GENERAL, CTFA_SCALE) \
149 S(CTSF_UMEDIAN, "UMEDIAN", N_("Unweighted Median"), CTF_GENERAL, CTFA_SCALE) \
150 S(CTSF_UMISSING, "UMISSING", N_("Unweighted Missing"), CTF_GENERAL, CTFA_SCALE) \
151 S(CTSF_UMODE, "UMODE", N_("Unweighted Mode"), CTF_GENERAL, CTFA_SCALE) \
152 S(CTSF_UPTILE, "UPTILE", N_("Unweighted Percentile"), CTF_GENERAL, CTFA_SCALE) \
153 S(CTSF_USEMEAN, "USEMEAN", N_("Unweighted Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
154 S(CTSF_USTDDEV, "USTDDEV", N_("Unweighted Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
155 S(CTSF_USUM, "USUM", N_("Unweighted Sum"), CTF_GENERAL, CTFA_SCALE) \
156 S(CSTF_UTOTALN, "UTOTALN", N_("Unweighted Total N"), CTF_COUNT, CTFA_SCALE) \
157 S(CTSF_UVALIDN, "UVALIDN", N_("Unweighted Valid N"), CTF_COUNT, CTFA_SCALE) \
158 S(CTSF_UVARIANCE, "UVARIANCE", N_("Unweighted Variance"), CTF_GENERAL, CTFA_SCALE) \
159 S(CTSF_UROWPCT_SUM, "UROWPCT.SUM", N_("Unweighted Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
160 S(CTSF_UCOLPCT_SUM, "UCOLPCT.SUM", N_("Unweighted Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
161 S(CTSF_UTABLEPCT_SUM, "UTABLEPCT.SUM", N_("Unweighted Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
162 S(CTSF_USUBTABLEPCT_SUM, "USUBTABLEPCT.SUM", N_("Unweighted Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
163 S(CTSF_ULAYERPCT_SUM, "ULAYERPCT.SUM", N_("Unweighted Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
164 S(CTSF_ULAYERROWPCT_SUM, "ULAYERROWPCT.SUM", N_("Unweighted Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
165 S(CTSF_ULAYERCOLPCT_SUM, "ULAYERCOLPCT.SUM", N_("Unweighted Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
167 #if 0 /* Multiple response sets not yet implemented. */
168 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
169 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
170 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
171 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
172 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
173 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
174 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
175 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
176 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
177 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
178 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
179 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
180 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
181 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
182 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
183 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
184 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
185 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
186 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
187 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
188 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
189 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
192 enum ctables_summary_function
194 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
200 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
201 N_CTSF_FUNCTIONS = SUMMARIES
205 static bool ctables_summary_function_is_count (enum ctables_summary_function);
207 enum ctables_domain_type
209 /* Within a section, where stacked variables divide one section from
211 CTDT_TABLE, /* All layers of a whole section. */
212 CTDT_LAYER, /* One layer within a section. */
213 CTDT_LAYERROW, /* Row in one layer within a section. */
214 CTDT_LAYERCOL, /* Column in one layer within a section. */
216 /* Within a subtable, where a subtable pairs an innermost row variable with
217 an innermost column variable within a single layer. */
218 CTDT_SUBTABLE, /* Whole subtable. */
219 CTDT_ROW, /* Row within a subtable. */
220 CTDT_COL, /* Column within a subtable. */
224 struct ctables_domain
226 struct hmap_node node;
228 const struct ctables_cell *example;
230 double d_valid; /* Dictionary weight. */
233 double e_valid; /* Effective weight */
236 double u_valid; /* Unweighted. */
239 struct ctables_sum *sums;
248 enum ctables_summary_variant
257 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
258 all the axes (except the scalar variable, if any). */
259 struct hmap_node node;
261 /* The domains that contain this cell. */
262 uint32_t omit_domains;
263 struct ctables_domain *domains[N_CTDTS];
268 enum ctables_summary_variant sv;
270 struct ctables_cell_axis
272 struct ctables_cell_value
274 const struct ctables_category *category;
282 union ctables_summary *summaries;
289 const struct dictionary *dict;
290 struct pivot_table_look *look;
292 /* CTABLES has a number of extra formats that we implement via custom
293 currency specifications on an alternate fmt_settings. */
294 #define CTEF_NEGPAREN FMT_CCA
295 #define CTEF_NEQUAL FMT_CCB
296 #define CTEF_PAREN FMT_CCC
297 #define CTEF_PCTPAREN FMT_CCD
298 struct fmt_settings ctables_formats;
300 /* If this is NULL, zeros are displayed using the normal print format.
301 Otherwise, this string is displayed. */
304 /* If this is NULL, missing values are displayed using the normal print
305 format. Otherwise, this string is displayed. */
308 /* Indexed by variable dictionary index. */
309 enum ctables_vlabel *vlabels;
311 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
313 bool mrsets_count_duplicates; /* MRSETS. */
314 bool smissing_listwise; /* SMISSING. */
315 struct variable *e_weight; /* WEIGHT. */
316 int hide_threshold; /* HIDESMALLCOUNTS. */
318 struct ctables_table **tables;
322 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
325 struct ctables_postcompute
327 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
328 char *name; /* Name, without leading &. */
330 struct msg_location *location; /* Location of definition. */
331 struct ctables_pcexpr *expr;
333 struct ctables_summary_spec_set *specs;
334 bool hide_source_cats;
337 struct ctables_pcexpr
347 enum ctables_postcompute_op
350 CTPO_CONSTANT, /* 5 */
351 CTPO_CAT_NUMBER, /* [5] */
352 CTPO_CAT_STRING, /* ["STRING"] */
353 CTPO_CAT_NRANGE, /* [LO THRU 5] */
354 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
355 CTPO_CAT_MISSING, /* MISSING */
356 CTPO_CAT_OTHERNM, /* OTHERNM */
357 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
358 CTPO_CAT_TOTAL, /* TOTAL */
372 /* CTPO_CAT_NUMBER. */
375 /* CTPO_CAT_STRING, in dictionary encoding. */
376 struct substring string;
378 /* CTPO_CAT_NRANGE. */
381 /* CTPO_CAT_SRANGE. */
382 struct substring srange[2];
384 /* CTPO_CAT_SUBTOTAL. */
385 size_t subtotal_index;
387 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
388 One element: CTPO_NEG. */
389 struct ctables_pcexpr *subs[2];
392 /* Source location. */
393 struct msg_location *location;
396 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
397 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
398 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
399 struct ctables_pcexpr *sub1);
401 struct ctables_summary_spec_set
403 struct ctables_summary_spec *specs;
407 /* The variable to which the summary specs are applied. */
408 struct variable *var;
410 /* Whether the variable to which the summary specs are applied is a scale
411 variable for the purpose of summarization.
413 (VALIDN and TOTALN act differently for summarizing scale and categorical
417 /* If any of these optional additional scale variables are missing, then
418 treat 'var' as if it's missing too. This is for implementing
419 SMISSING=LISTWISE. */
420 struct variable **listwise_vars;
421 size_t n_listwise_vars;
424 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
425 const struct ctables_summary_spec_set *);
426 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
428 /* A nested sequence of variables, e.g. a > b > c. */
431 struct variable **vars;
434 size_t *domains[N_CTDTS];
435 size_t n_domains[N_CTDTS];
438 struct ctables_summary_spec_set specs[N_CSVS];
441 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
444 struct ctables_nest *nests;
448 static void ctables_stack_uninit (struct ctables_stack *);
452 struct hmap_node node;
457 struct ctables_occurrence
459 struct hmap_node node;
463 struct ctables_section
466 struct ctables_table *table;
467 struct ctables_nest *nests[PIVOT_N_AXES];
470 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
471 struct hmap cells; /* Contains "struct ctables_cell"s. */
472 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
475 static void ctables_section_uninit (struct ctables_section *);
479 struct ctables *ctables;
480 struct ctables_axis *axes[PIVOT_N_AXES];
481 struct ctables_stack stacks[PIVOT_N_AXES];
482 struct ctables_section *sections;
484 enum pivot_axis_type summary_axis;
485 struct ctables_summary_spec_set summary_specs;
486 struct variable **sum_vars;
489 enum pivot_axis_type slabels_axis;
490 bool slabels_visible;
492 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
494 Most commonly, label_axis[a] == a, and in particular we always have
495 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
497 If ROWLABELS or COLLABELS is specified, then one of
498 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
499 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
501 If any category labels are moved, then 'clabels_example' is one of the
502 variables being moved (and it is otherwise NULL). All of the variables
503 being moved have the same width, value labels, and categories, so this
504 example variable can be used to find those out.
506 The remaining members in this group are relevant only if category labels
509 'clabels_values_map' holds a "struct ctables_value" for all the values
510 that appear in all of the variables in the moved categories. It is
511 accumulated as the data is read. Once the data is fully read, its
512 sorted values are put into 'clabels_values' and 'n_clabels_values'.
514 enum pivot_axis_type label_axis[PIVOT_N_AXES];
515 enum pivot_axis_type clabels_from_axis;
516 const struct variable *clabels_example;
517 struct hmap clabels_values_map;
518 struct ctables_value **clabels_values;
519 size_t n_clabels_values;
521 /* Indexed by variable dictionary index. */
522 struct ctables_categories **categories;
531 struct ctables_chisq *chisq;
532 struct ctables_pairwise *pairwise;
535 struct ctables_categories
538 struct ctables_category *cats;
543 struct ctables_category
545 enum ctables_category_type
547 /* Explicit category lists. */
550 CCT_NRANGE, /* Numerical range. */
551 CCT_SRANGE, /* String range. */
556 /* Totals and subtotals. */
560 /* Implicit category lists. */
565 /* For contributing to TOTALN. */
566 CCT_EXCLUDED_MISSING,
570 struct ctables_category *subtotal;
576 double number; /* CCT_NUMBER. */
577 struct substring string; /* CCT_STRING, in dictionary encoding. */
578 double nrange[2]; /* CCT_NRANGE. */
579 struct substring srange[2]; /* CCT_SRANGE. */
583 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
584 bool hide_subcategories; /* CCT_SUBTOTAL. */
587 /* CCT_POSTCOMPUTE. */
590 const struct ctables_postcompute *pc;
591 enum fmt_type parse_format;
594 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
597 bool include_missing;
601 enum ctables_summary_function sort_function;
602 struct variable *sort_var;
607 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
608 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
609 struct msg_location *location;
613 ctables_category_uninit (struct ctables_category *cat)
618 msg_location_destroy (cat->location);
625 case CCT_POSTCOMPUTE:
629 ss_dealloc (&cat->string);
633 ss_dealloc (&cat->srange[0]);
634 ss_dealloc (&cat->srange[1]);
639 free (cat->total_label);
647 case CCT_EXCLUDED_MISSING:
653 nullable_substring_equal (const struct substring *a,
654 const struct substring *b)
656 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
660 ctables_category_equal (const struct ctables_category *a,
661 const struct ctables_category *b)
663 if (a->type != b->type)
669 return a->number == b->number;
672 return ss_equals (a->string, b->string);
675 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
678 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
679 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
685 case CCT_POSTCOMPUTE:
686 return a->pc == b->pc;
690 return !strcmp (a->total_label, b->total_label);
695 return (a->include_missing == b->include_missing
696 && a->sort_ascending == b->sort_ascending
697 && a->sort_function == b->sort_function
698 && a->sort_var == b->sort_var
699 && a->percentile == b->percentile);
701 case CCT_EXCLUDED_MISSING:
709 ctables_categories_unref (struct ctables_categories *c)
714 assert (c->n_refs > 0);
718 for (size_t i = 0; i < c->n_cats; i++)
719 ctables_category_uninit (&c->cats[i]);
725 ctables_categories_equal (const struct ctables_categories *a,
726 const struct ctables_categories *b)
728 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
731 for (size_t i = 0; i < a->n_cats; i++)
732 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
738 /* Chi-square test (SIGTEST). */
746 /* Pairwise comparison test (COMPARETEST). */
747 struct ctables_pairwise
749 enum { PROP, MEAN } type;
752 bool meansvariance_allcats;
754 enum { BONFERRONI = 1, BH } adjust;
778 struct variable *var;
780 struct ctables_summary_spec_set specs[N_CSVS];
784 struct ctables_axis *subs[2];
787 struct msg_location *loc;
790 static void ctables_axis_destroy (struct ctables_axis *);
799 enum ctables_function_availability
801 CTFA_ALL, /* Any variables. */
802 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
803 //CTFA_MRSETS, /* Only multiple-response sets */
806 struct ctables_summary_spec
808 enum ctables_summary_function function;
809 double percentile; /* CTSF_PTILE only. */
812 struct fmt_spec format;
813 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
820 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
821 const struct ctables_summary_spec *src)
824 dst->label = xstrdup_if_nonnull (src->label);
828 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
835 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
836 const struct ctables_summary_spec_set *src)
838 struct ctables_summary_spec *specs
839 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
840 for (size_t i = 0; i < src->n; i++)
841 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
843 *dst = (struct ctables_summary_spec_set) {
848 .is_scale = src->is_scale,
853 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
855 for (size_t i = 0; i < set->n; i++)
856 ctables_summary_spec_uninit (&set->specs[i]);
857 free (set->listwise_vars);
862 parse_col_width (struct lexer *lexer, const char *name, double *width)
864 lex_match (lexer, T_EQUALS);
865 if (lex_match_id (lexer, "DEFAULT"))
867 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
869 *width = lex_number (lexer);
879 parse_bool (struct lexer *lexer, bool *b)
881 if (lex_match_id (lexer, "NO"))
883 else if (lex_match_id (lexer, "YES"))
887 lex_error_expecting (lexer, "YES", "NO");
893 static enum ctables_function_availability
894 ctables_function_availability (enum ctables_summary_function f)
896 static enum ctables_function_availability availability[] = {
897 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
902 return availability[f];
906 ctables_summary_function_is_count (enum ctables_summary_function f)
908 return f == CTSF_COUNT || f == CTSF_ECOUNT || f == CTSF_UCOUNT;
912 parse_ctables_summary_function (struct lexer *lexer,
913 enum ctables_summary_function *f)
917 enum ctables_summary_function function;
918 struct substring name;
920 static struct pair names[] = {
921 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
922 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
925 /* The .COUNT suffix may be omitted. */
926 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
927 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
928 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
929 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
930 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
931 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
932 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
936 if (!lex_force_id (lexer))
939 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
940 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
942 *f = names[i].function;
947 lex_error (lexer, _("Expecting summary function name."));
952 ctables_axis_destroy (struct ctables_axis *axis)
960 for (size_t i = 0; i < N_CSVS; i++)
961 ctables_summary_spec_set_uninit (&axis->specs[i]);
966 ctables_axis_destroy (axis->subs[0]);
967 ctables_axis_destroy (axis->subs[1]);
970 msg_location_destroy (axis->loc);
974 static struct ctables_axis *
975 ctables_axis_new_nonterminal (enum ctables_axis_op op,
976 struct ctables_axis *sub0,
977 struct ctables_axis *sub1,
978 struct lexer *lexer, int start_ofs)
980 struct ctables_axis *axis = xmalloc (sizeof *axis);
981 *axis = (struct ctables_axis) {
983 .subs = { sub0, sub1 },
984 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
989 struct ctables_axis_parse_ctx
992 struct dictionary *dict;
994 struct ctables_table *t;
997 static struct fmt_spec
998 ctables_summary_default_format (enum ctables_summary_function function,
999 const struct variable *var)
1001 static const enum ctables_format default_formats[] = {
1002 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
1006 switch (default_formats[function])
1009 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
1012 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
1015 return *var_get_print_format (var);
1022 static struct pivot_value *
1023 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1027 static const char *default_labels[] = {
1028 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
1033 return (spec->function == CTSF_PTILE
1034 ? pivot_value_new_text_format (N_("Percentile %.2f"),
1036 : pivot_value_new_text (default_labels[spec->function]));
1040 struct substring in = ss_cstr (spec->label);
1041 struct substring target = ss_cstr (")CILEVEL");
1043 struct string out = DS_EMPTY_INITIALIZER;
1046 size_t chunk = ss_find_substring (in, target);
1047 ds_put_substring (&out, ss_head (in, chunk));
1048 ss_advance (&in, chunk);
1050 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1052 ss_advance (&in, target.length);
1053 ds_put_format (&out, "%g", cilevel);
1059 ctables_summary_function_name (enum ctables_summary_function function)
1061 static const char *names[] = {
1062 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1066 return names[function];
1070 add_summary_spec (struct ctables_axis *axis,
1071 enum ctables_summary_function function, double percentile,
1072 const char *label, const struct fmt_spec *format,
1073 bool is_ctables_format, const struct msg_location *loc,
1074 enum ctables_summary_variant sv)
1076 if (axis->op == CTAO_VAR)
1078 const char *function_name = ctables_summary_function_name (function);
1079 const char *var_name = var_get_name (axis->var);
1080 switch (ctables_function_availability (function))
1084 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1085 "response sets."), function_name);
1086 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1092 if (!axis->scale && sv != CSV_TOTAL)
1095 _("Summary function %s applies only to scale variables."),
1097 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1107 struct ctables_summary_spec_set *set = &axis->specs[sv];
1108 if (set->n >= set->allocated)
1109 set->specs = x2nrealloc (set->specs, &set->allocated,
1110 sizeof *set->specs);
1112 struct ctables_summary_spec *dst = &set->specs[set->n++];
1113 *dst = (struct ctables_summary_spec) {
1114 .function = function,
1115 .percentile = percentile,
1116 .label = xstrdup_if_nonnull (label),
1117 .format = (format ? *format
1118 : ctables_summary_default_format (function, axis->var)),
1119 .is_ctables_format = is_ctables_format,
1125 for (size_t i = 0; i < 2; i++)
1126 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1127 format, is_ctables_format, loc, sv))
1133 static struct ctables_axis *ctables_axis_parse_stack (
1134 struct ctables_axis_parse_ctx *);
1137 static struct ctables_axis *
1138 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1140 if (lex_match (ctx->lexer, T_LPAREN))
1142 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1143 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1145 ctables_axis_destroy (sub);
1151 if (!lex_force_id (ctx->lexer))
1154 int start_ofs = lex_ofs (ctx->lexer);
1155 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1159 struct ctables_axis *axis = xmalloc (sizeof *axis);
1160 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1162 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1163 : lex_match_phrase (ctx->lexer, "[C]") ? false
1164 : var_get_measure (var) == MEASURE_SCALE);
1165 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1166 lex_ofs (ctx->lexer) - 1);
1167 if (axis->scale && var_is_alpha (var))
1169 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1171 var_get_name (var));
1172 ctables_axis_destroy (axis);
1180 has_digit (const char *s)
1182 return s[strcspn (s, "0123456789")] != '\0';
1186 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1187 bool *is_ctables_format)
1189 char type[FMT_TYPE_LEN_MAX + 1];
1190 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1193 if (!strcasecmp (type, "NEGPAREN"))
1194 format->type = CTEF_NEGPAREN;
1195 else if (!strcasecmp (type, "NEQUAL"))
1196 format->type = CTEF_NEQUAL;
1197 else if (!strcasecmp (type, "PAREN"))
1198 format->type = CTEF_PAREN;
1199 else if (!strcasecmp (type, "PCTPAREN"))
1200 format->type = CTEF_PCTPAREN;
1203 *is_ctables_format = false;
1204 return (parse_format_specifier (lexer, format)
1205 && fmt_check_output (format)
1206 && fmt_check_type_compat (format, VAL_NUMERIC));
1212 lex_next_error (lexer, -1, -1,
1213 _("Output format %s requires width 2 or greater."), type);
1216 else if (format->d > format->w - 1)
1218 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1219 "greater than decimals."), type);
1224 *is_ctables_format = true;
1229 static struct ctables_axis *
1230 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1232 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1233 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1236 enum ctables_summary_variant sv = CSV_CELL;
1239 int start_ofs = lex_ofs (ctx->lexer);
1241 /* Parse function. */
1242 enum ctables_summary_function function;
1243 if (!parse_ctables_summary_function (ctx->lexer, &function))
1246 /* Parse percentile. */
1247 double percentile = 0;
1248 if (function == CTSF_PTILE)
1250 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1252 percentile = lex_number (ctx->lexer);
1253 lex_get (ctx->lexer);
1258 if (lex_is_string (ctx->lexer))
1260 label = ss_xstrdup (lex_tokss (ctx->lexer));
1261 lex_get (ctx->lexer);
1265 struct fmt_spec format;
1266 const struct fmt_spec *formatp;
1267 bool is_ctables_format = false;
1268 if (lex_token (ctx->lexer) == T_ID
1269 && has_digit (lex_tokcstr (ctx->lexer)))
1271 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1272 &is_ctables_format))
1282 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1283 lex_ofs (ctx->lexer) - 1);
1284 add_summary_spec (sub, function, percentile, label, formatp,
1285 is_ctables_format, loc, sv);
1287 msg_location_destroy (loc);
1289 lex_match (ctx->lexer, T_COMMA);
1290 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1292 if (!lex_force_match (ctx->lexer, T_LBRACK))
1296 else if (lex_match (ctx->lexer, T_RBRACK))
1298 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1305 ctables_axis_destroy (sub);
1309 static const struct ctables_axis *
1310 find_scale (const struct ctables_axis *axis)
1314 else if (axis->op == CTAO_VAR)
1315 return axis->scale ? axis : NULL;
1318 for (size_t i = 0; i < 2; i++)
1320 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1328 static const struct ctables_axis *
1329 find_categorical_summary_spec (const struct ctables_axis *axis)
1333 else if (axis->op == CTAO_VAR)
1334 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1337 for (size_t i = 0; i < 2; i++)
1339 const struct ctables_axis *sum
1340 = find_categorical_summary_spec (axis->subs[i]);
1348 static struct ctables_axis *
1349 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1351 int start_ofs = lex_ofs (ctx->lexer);
1352 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1356 while (lex_match (ctx->lexer, T_GT))
1358 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1362 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1363 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1365 const struct ctables_axis *outer_scale = find_scale (lhs);
1366 const struct ctables_axis *inner_scale = find_scale (rhs);
1367 if (outer_scale && inner_scale)
1369 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1370 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1371 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1372 ctables_axis_destroy (nest);
1376 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1379 msg_at (SE, nest->loc,
1380 _("Summaries may only be requested for categorical variables "
1381 "at the innermost nesting level."));
1382 msg_at (SN, outer_sum->loc,
1383 _("This outer categorical variable has a summary."));
1384 ctables_axis_destroy (nest);
1394 static struct ctables_axis *
1395 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1397 int start_ofs = lex_ofs (ctx->lexer);
1398 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1402 while (lex_match (ctx->lexer, T_PLUS))
1404 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1408 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1409 ctx->lexer, start_ofs);
1416 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1417 struct ctables *ct, struct ctables_table *t,
1418 enum pivot_axis_type a)
1420 if (lex_token (lexer) == T_BY
1421 || lex_token (lexer) == T_SLASH
1422 || lex_token (lexer) == T_ENDCMD)
1425 struct ctables_axis_parse_ctx ctx = {
1431 t->axes[a] = ctables_axis_parse_stack (&ctx);
1432 return t->axes[a] != NULL;
1436 ctables_chisq_destroy (struct ctables_chisq *chisq)
1442 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1448 ctables_table_destroy (struct ctables_table *t)
1453 for (size_t i = 0; i < t->n_sections; i++)
1454 ctables_section_uninit (&t->sections[i]);
1457 for (size_t i = 0; i < t->n_categories; i++)
1458 ctables_categories_unref (t->categories[i]);
1459 free (t->categories);
1461 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1463 ctables_axis_destroy (t->axes[a]);
1464 ctables_stack_uninit (&t->stacks[a]);
1466 free (t->summary_specs.specs);
1468 struct ctables_value *ctv, *next_ctv;
1469 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1470 &t->clabels_values_map)
1472 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1473 hmap_delete (&t->clabels_values_map, &ctv->node);
1476 hmap_destroy (&t->clabels_values_map);
1477 free (t->clabels_values);
1483 ctables_chisq_destroy (t->chisq);
1484 ctables_pairwise_destroy (t->pairwise);
1489 ctables_destroy (struct ctables *ct)
1494 struct ctables_postcompute *pc, *next_pc;
1495 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1499 msg_location_destroy (pc->location);
1500 ctables_pcexpr_destroy (pc->expr);
1504 ctables_summary_spec_set_uninit (pc->specs);
1507 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1511 fmt_settings_uninit (&ct->ctables_formats);
1512 pivot_table_look_unref (ct->look);
1516 for (size_t i = 0; i < ct->n_tables; i++)
1517 ctables_table_destroy (ct->tables[i]);
1522 static struct ctables_category
1523 cct_nrange (double low, double high)
1525 return (struct ctables_category) {
1527 .nrange = { low, high }
1531 static struct ctables_category
1532 cct_srange (struct substring low, struct substring high)
1534 return (struct ctables_category) {
1536 .srange = { low, high }
1541 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1542 struct ctables_category *cat)
1545 if (lex_match (lexer, T_EQUALS))
1547 if (!lex_force_string (lexer))
1550 total_label = ss_xstrdup (lex_tokss (lexer));
1554 total_label = xstrdup (_("Subtotal"));
1556 *cat = (struct ctables_category) {
1557 .type = CCT_SUBTOTAL,
1558 .hide_subcategories = hide_subcategories,
1559 .total_label = total_label
1564 static struct substring
1565 parse_substring (struct lexer *lexer, struct dictionary *dict)
1567 struct substring s = recode_substring_pool (
1568 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1569 ss_rtrim (&s, ss_cstr (" "));
1575 ctables_table_parse_explicit_category (struct lexer *lexer,
1576 struct dictionary *dict,
1578 struct ctables_category *cat)
1580 if (lex_match_id (lexer, "OTHERNM"))
1581 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1582 else if (lex_match_id (lexer, "MISSING"))
1583 *cat = (struct ctables_category) { .type = CCT_MISSING };
1584 else if (lex_match_id (lexer, "SUBTOTAL"))
1585 return ctables_table_parse_subtotal (lexer, false, cat);
1586 else if (lex_match_id (lexer, "HSUBTOTAL"))
1587 return ctables_table_parse_subtotal (lexer, true, cat);
1588 else if (lex_match_id (lexer, "LO"))
1590 if (!lex_force_match_id (lexer, "THRU"))
1592 if (lex_is_string (lexer))
1594 struct substring sr0 = { .string = NULL };
1595 struct substring sr1 = parse_substring (lexer, dict);
1596 *cat = cct_srange (sr0, sr1);
1598 else if (lex_force_num (lexer))
1600 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1606 else if (lex_is_number (lexer))
1608 double number = lex_number (lexer);
1610 if (lex_match_id (lexer, "THRU"))
1612 if (lex_match_id (lexer, "HI"))
1613 *cat = cct_nrange (number, DBL_MAX);
1616 if (!lex_force_num (lexer))
1618 *cat = cct_nrange (number, lex_number (lexer));
1623 *cat = (struct ctables_category) {
1628 else if (lex_is_string (lexer))
1630 struct substring s = parse_substring (lexer, dict);
1631 if (lex_match_id (lexer, "THRU"))
1633 if (lex_match_id (lexer, "HI"))
1635 struct substring sr1 = { .string = NULL };
1636 *cat = cct_srange (s, sr1);
1640 if (!lex_force_string (lexer))
1645 struct substring sr1 = parse_substring (lexer, dict);
1646 *cat = cct_srange (s, sr1);
1650 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1652 else if (lex_match (lexer, T_AND))
1654 if (!lex_force_id (lexer))
1656 struct ctables_postcompute *pc = ctables_find_postcompute (
1657 ct, lex_tokcstr (lexer));
1660 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1661 msg_at (SE, loc, _("Unknown postcompute &%s."),
1662 lex_tokcstr (lexer));
1663 msg_location_destroy (loc);
1668 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1672 lex_error (lexer, NULL);
1680 parse_category_string (struct msg_location *location,
1681 struct substring s, const struct dictionary *dict,
1682 enum fmt_type format, double *n)
1685 char *error = data_in (s, dict_get_encoding (dict), format,
1686 settings_get_fmt_settings (), &v, 0, NULL);
1689 msg_at (SE, location,
1690 _("Failed to parse category specification as format %s: %s."),
1691 fmt_name (format), error);
1700 static struct ctables_category *
1701 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1702 const struct ctables_pcexpr *e)
1704 struct ctables_category *best = NULL;
1705 size_t n_subtotals = 0;
1706 for (size_t i = 0; i < cats->n_cats; i++)
1708 struct ctables_category *cat = &cats->cats[i];
1711 case CTPO_CAT_NUMBER:
1712 if (cat->type == CCT_NUMBER && cat->number == e->number)
1716 case CTPO_CAT_STRING:
1717 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1721 case CTPO_CAT_NRANGE:
1722 if (cat->type == CCT_NRANGE
1723 && cat->nrange[0] == e->nrange[0]
1724 && cat->nrange[1] == e->nrange[1])
1728 case CTPO_CAT_SRANGE:
1729 if (cat->type == CCT_SRANGE
1730 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1731 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1735 case CTPO_CAT_MISSING:
1736 if (cat->type == CCT_MISSING)
1740 case CTPO_CAT_OTHERNM:
1741 if (cat->type == CCT_OTHERNM)
1745 case CTPO_CAT_SUBTOTAL:
1746 if (cat->type == CCT_SUBTOTAL)
1749 if (e->subtotal_index == n_subtotals)
1751 else if (e->subtotal_index == 0)
1756 case CTPO_CAT_TOTAL:
1757 if (cat->type == CCT_TOTAL)
1771 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1776 static struct ctables_category *
1777 ctables_find_category_for_postcompute (const struct dictionary *dict,
1778 const struct ctables_categories *cats,
1779 enum fmt_type parse_format,
1780 const struct ctables_pcexpr *e)
1782 if (parse_format != FMT_F)
1784 if (e->op == CTPO_CAT_STRING)
1787 if (!parse_category_string (e->location, e->string, dict,
1788 parse_format, &number))
1791 struct ctables_pcexpr e2 = {
1792 .op = CTPO_CAT_NUMBER,
1794 .location = e->location,
1796 return ctables_find_category_for_postcompute__ (cats, &e2);
1798 else if (e->op == CTPO_CAT_SRANGE)
1801 if (!e->srange[0].string)
1802 nrange[0] = -DBL_MAX;
1803 else if (!parse_category_string (e->location, e->srange[0], dict,
1804 parse_format, &nrange[0]))
1807 if (!e->srange[1].string)
1808 nrange[1] = DBL_MAX;
1809 else if (!parse_category_string (e->location, e->srange[1], dict,
1810 parse_format, &nrange[1]))
1813 struct ctables_pcexpr e2 = {
1814 .op = CTPO_CAT_NRANGE,
1815 .nrange = { nrange[0], nrange[1] },
1816 .location = e->location,
1818 return ctables_find_category_for_postcompute__ (cats, &e2);
1821 return ctables_find_category_for_postcompute__ (cats, e);
1825 ctables_recursive_check_postcompute (struct dictionary *dict,
1826 const struct ctables_pcexpr *e,
1827 struct ctables_category *pc_cat,
1828 const struct ctables_categories *cats,
1829 const struct msg_location *cats_location)
1833 case CTPO_CAT_NUMBER:
1834 case CTPO_CAT_STRING:
1835 case CTPO_CAT_NRANGE:
1836 case CTPO_CAT_SRANGE:
1837 case CTPO_CAT_MISSING:
1838 case CTPO_CAT_OTHERNM:
1839 case CTPO_CAT_SUBTOTAL:
1840 case CTPO_CAT_TOTAL:
1842 struct ctables_category *cat = ctables_find_category_for_postcompute (
1843 dict, cats, pc_cat->parse_format, e);
1846 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1848 size_t n_subtotals = 0;
1849 for (size_t i = 0; i < cats->n_cats; i++)
1850 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1851 if (n_subtotals > 1)
1853 msg_at (SE, cats_location,
1854 ngettext ("These categories include %zu instance "
1855 "of SUBTOTAL or HSUBTOTAL, so references "
1856 "from computed categories must refer to "
1857 "subtotals by position, "
1858 "e.g. SUBTOTAL[1].",
1859 "These categories include %zu instances "
1860 "of SUBTOTAL or HSUBTOTAL, so references "
1861 "from computed categories must refer to "
1862 "subtotals by position, "
1863 "e.g. SUBTOTAL[1].",
1866 msg_at (SN, e->location,
1867 _("This is the reference that lacks a position."));
1872 msg_at (SE, pc_cat->location,
1873 _("Computed category &%s references a category not included "
1874 "in the category list."),
1876 msg_at (SN, e->location, _("This is the missing category."));
1877 if (e->op == CTPO_CAT_SUBTOTAL)
1878 msg_at (SN, cats_location,
1879 _("To fix the problem, add subtotals to the "
1880 "list of categories here."));
1881 else if (e->op == CTPO_CAT_TOTAL)
1882 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1883 "CATEGORIES specification."));
1885 msg_at (SN, cats_location,
1886 _("To fix the problem, add the missing category to the "
1887 "list of categories here."));
1890 if (pc_cat->pc->hide_source_cats)
1904 for (size_t i = 0; i < 2; i++)
1905 if (e->subs[i] && !ctables_recursive_check_postcompute (
1906 dict, e->subs[i], pc_cat, cats, cats_location))
1915 all_strings (struct variable **vars, size_t n_vars,
1916 const struct ctables_category *cat)
1918 for (size_t j = 0; j < n_vars; j++)
1919 if (var_is_numeric (vars[j]))
1921 msg_at (SE, cat->location,
1922 _("This category specification may be applied only to string "
1923 "variables, but this subcommand tries to apply it to "
1924 "numeric variable %s."),
1925 var_get_name (vars[j]));
1932 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1933 struct ctables *ct, struct ctables_table *t)
1935 if (!lex_match_id (lexer, "VARIABLES"))
1937 lex_match (lexer, T_EQUALS);
1939 struct variable **vars;
1941 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1944 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1945 for (size_t i = 1; i < n_vars; i++)
1947 const struct fmt_spec *f = var_get_print_format (vars[i]);
1948 if (f->type != common_format->type)
1950 common_format = NULL;
1956 && (fmt_get_category (common_format->type)
1957 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1959 struct ctables_categories *c = xmalloc (sizeof *c);
1960 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1961 for (size_t i = 0; i < n_vars; i++)
1963 struct ctables_categories **cp
1964 = &t->categories[var_get_dict_index (vars[i])];
1965 ctables_categories_unref (*cp);
1969 size_t allocated_cats = 0;
1970 int cats_start_ofs = -1;
1971 int cats_end_ofs = -1;
1972 if (lex_match (lexer, T_LBRACK))
1974 cats_start_ofs = lex_ofs (lexer);
1977 if (c->n_cats >= allocated_cats)
1978 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1980 int start_ofs = lex_ofs (lexer);
1981 struct ctables_category *cat = &c->cats[c->n_cats];
1982 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1984 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1987 lex_match (lexer, T_COMMA);
1989 while (!lex_match (lexer, T_RBRACK));
1990 cats_end_ofs = lex_ofs (lexer) - 1;
1993 struct ctables_category cat = {
1995 .include_missing = false,
1996 .sort_ascending = true,
1998 bool show_totals = false;
1999 char *total_label = NULL;
2000 bool totals_before = false;
2001 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2003 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2005 lex_match (lexer, T_EQUALS);
2006 if (lex_match_id (lexer, "A"))
2007 cat.sort_ascending = true;
2008 else if (lex_match_id (lexer, "D"))
2009 cat.sort_ascending = false;
2012 lex_error_expecting (lexer, "A", "D");
2016 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2018 lex_match (lexer, T_EQUALS);
2019 if (lex_match_id (lexer, "VALUE"))
2020 cat.type = CCT_VALUE;
2021 else if (lex_match_id (lexer, "LABEL"))
2022 cat.type = CCT_LABEL;
2025 cat.type = CCT_FUNCTION;
2026 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
2029 if (lex_match (lexer, T_LPAREN))
2031 cat.sort_var = parse_variable (lexer, dict);
2035 if (cat.sort_function == CTSF_PTILE)
2037 lex_match (lexer, T_COMMA);
2038 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2040 cat.percentile = lex_number (lexer);
2044 if (!lex_force_match (lexer, T_RPAREN))
2047 else if (ctables_function_availability (cat.sort_function)
2050 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2055 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2057 lex_match (lexer, T_EQUALS);
2058 if (lex_match_id (lexer, "INCLUDE"))
2059 cat.include_missing = true;
2060 else if (lex_match_id (lexer, "EXCLUDE"))
2061 cat.include_missing = false;
2064 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2068 else if (lex_match_id (lexer, "TOTAL"))
2070 lex_match (lexer, T_EQUALS);
2071 if (!parse_bool (lexer, &show_totals))
2074 else if (lex_match_id (lexer, "LABEL"))
2076 lex_match (lexer, T_EQUALS);
2077 if (!lex_force_string (lexer))
2080 total_label = ss_xstrdup (lex_tokss (lexer));
2083 else if (lex_match_id (lexer, "POSITION"))
2085 lex_match (lexer, T_EQUALS);
2086 if (lex_match_id (lexer, "BEFORE"))
2087 totals_before = true;
2088 else if (lex_match_id (lexer, "AFTER"))
2089 totals_before = false;
2092 lex_error_expecting (lexer, "BEFORE", "AFTER");
2096 else if (lex_match_id (lexer, "EMPTY"))
2098 lex_match (lexer, T_EQUALS);
2099 if (lex_match_id (lexer, "INCLUDE"))
2100 c->show_empty = true;
2101 else if (lex_match_id (lexer, "EXCLUDE"))
2102 c->show_empty = false;
2105 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2112 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2113 "TOTAL", "LABEL", "POSITION", "EMPTY");
2115 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2122 if (c->n_cats >= allocated_cats)
2123 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2124 c->cats[c->n_cats++] = cat;
2129 if (c->n_cats >= allocated_cats)
2130 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2132 struct ctables_category *totals;
2135 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2136 totals = &c->cats[0];
2139 totals = &c->cats[c->n_cats];
2142 *totals = (struct ctables_category) {
2144 .total_label = total_label ? total_label : xstrdup (_("Total")),
2148 struct ctables_category *subtotal = NULL;
2149 for (size_t i = totals_before ? 0 : c->n_cats;
2150 totals_before ? i < c->n_cats : i-- > 0;
2151 totals_before ? i++ : 0)
2153 struct ctables_category *cat = &c->cats[i];
2162 cat->subtotal = subtotal;
2165 case CCT_POSTCOMPUTE:
2176 case CCT_EXCLUDED_MISSING:
2181 if (cats_start_ofs != -1)
2183 for (size_t i = 0; i < c->n_cats; i++)
2185 struct ctables_category *cat = &c->cats[i];
2188 case CCT_POSTCOMPUTE:
2189 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2190 struct msg_location *cats_location
2191 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2192 bool ok = ctables_recursive_check_postcompute (
2193 dict, cat->pc->expr, cat, c, cats_location);
2194 msg_location_destroy (cats_location);
2201 for (size_t j = 0; j < n_vars; j++)
2202 if (var_is_alpha (vars[j]))
2204 msg_at (SE, cat->location,
2205 _("This category specification may be applied "
2206 "only to numeric variables, but this "
2207 "subcommand tries to apply it to string "
2209 var_get_name (vars[j]));
2218 if (!parse_category_string (cat->location, cat->string, dict,
2219 common_format->type, &n))
2222 ss_dealloc (&cat->string);
2224 cat->type = CCT_NUMBER;
2227 else if (!all_strings (vars, n_vars, cat))
2236 if (!cat->srange[0].string)
2238 else if (!parse_category_string (cat->location,
2239 cat->srange[0], dict,
2240 common_format->type, &n[0]))
2243 if (!cat->srange[1].string)
2245 else if (!parse_category_string (cat->location,
2246 cat->srange[1], dict,
2247 common_format->type, &n[1]))
2250 ss_dealloc (&cat->srange[0]);
2251 ss_dealloc (&cat->srange[1]);
2253 cat->type = CCT_NRANGE;
2254 cat->nrange[0] = n[0];
2255 cat->nrange[1] = n[1];
2257 else if (!all_strings (vars, n_vars, cat))
2268 case CCT_EXCLUDED_MISSING:
2283 ctables_nest_uninit (struct ctables_nest *nest)
2286 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2287 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2288 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2289 free (nest->domains[dt]);
2293 ctables_stack_uninit (struct ctables_stack *stack)
2297 for (size_t i = 0; i < stack->n; i++)
2298 ctables_nest_uninit (&stack->nests[i]);
2299 free (stack->nests);
2303 static struct ctables_stack
2304 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2311 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2312 for (size_t i = 0; i < s0.n; i++)
2313 for (size_t j = 0; j < s1.n; j++)
2315 const struct ctables_nest *a = &s0.nests[i];
2316 const struct ctables_nest *b = &s1.nests[j];
2318 size_t allocate = a->n + b->n;
2319 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2321 for (size_t k = 0; k < a->n; k++)
2322 vars[n++] = a->vars[k];
2323 for (size_t k = 0; k < b->n; k++)
2324 vars[n++] = b->vars[k];
2325 assert (n == allocate);
2327 const struct ctables_nest *summary_src;
2328 if (!a->specs[CSV_CELL].var)
2330 else if (!b->specs[CSV_CELL].var)
2335 struct ctables_nest *new = &stack.nests[stack.n++];
2336 *new = (struct ctables_nest) {
2338 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2339 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2343 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2344 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2346 ctables_stack_uninit (&s0);
2347 ctables_stack_uninit (&s1);
2351 static struct ctables_stack
2352 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2354 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2355 for (size_t i = 0; i < s0.n; i++)
2356 stack.nests[stack.n++] = s0.nests[i];
2357 for (size_t i = 0; i < s1.n; i++)
2359 stack.nests[stack.n] = s1.nests[i];
2360 stack.nests[stack.n].group_head += s0.n;
2363 assert (stack.n == s0.n + s1.n);
2369 static struct ctables_stack
2370 var_fts (const struct ctables_axis *a)
2372 struct variable **vars = xmalloc (sizeof *vars);
2375 struct ctables_nest *nest = xmalloc (sizeof *nest);
2376 *nest = (struct ctables_nest) {
2379 .scale_idx = a->scale ? 0 : SIZE_MAX,
2381 if (a->specs[CSV_CELL].n || a->scale)
2382 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2384 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2385 nest->specs[sv].var = a->var;
2386 nest->specs[sv].is_scale = a->scale;
2388 return (struct ctables_stack) { .nests = nest, .n = 1 };
2391 static struct ctables_stack
2392 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2395 return (struct ctables_stack) { .n = 0 };
2403 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2404 enumerate_fts (axis_type, a->subs[1]));
2407 /* This should consider any of the scale variables found in the result to
2408 be linked to each other listwise for SMISSING=LISTWISE. */
2409 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2410 enumerate_fts (axis_type, a->subs[1]));
2416 union ctables_summary
2418 /* COUNT, VALIDN, TOTALN. */
2421 /* MINIMUM, MAXIMUM, RANGE. */
2428 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2429 struct moments1 *moments;
2431 /* MEDIAN, MODE, PTILE. */
2434 struct casewriter *writer;
2439 /* XXX multiple response */
2443 ctables_summary_init (union ctables_summary *s,
2444 const struct ctables_summary_spec *ss)
2446 switch (ss->function)
2450 case CTSF_ROWPCT_COUNT:
2451 case CTSF_COLPCT_COUNT:
2452 case CTSF_TABLEPCT_COUNT:
2453 case CTSF_SUBTABLEPCT_COUNT:
2454 case CTSF_LAYERPCT_COUNT:
2455 case CTSF_LAYERROWPCT_COUNT:
2456 case CTSF_LAYERCOLPCT_COUNT:
2457 case CTSF_ROWPCT_VALIDN:
2458 case CTSF_COLPCT_VALIDN:
2459 case CTSF_TABLEPCT_VALIDN:
2460 case CTSF_SUBTABLEPCT_VALIDN:
2461 case CTSF_LAYERPCT_VALIDN:
2462 case CTSF_LAYERROWPCT_VALIDN:
2463 case CTSF_LAYERCOLPCT_VALIDN:
2464 case CTSF_ROWPCT_TOTALN:
2465 case CTSF_COLPCT_TOTALN:
2466 case CTSF_TABLEPCT_TOTALN:
2467 case CTSF_SUBTABLEPCT_TOTALN:
2468 case CTSF_LAYERPCT_TOTALN:
2469 case CTSF_LAYERROWPCT_TOTALN:
2470 case CTSF_LAYERCOLPCT_TOTALN:
2477 case CTSF_UROWPCT_COUNT:
2478 case CTSF_UCOLPCT_COUNT:
2479 case CTSF_UTABLEPCT_COUNT:
2480 case CTSF_USUBTABLEPCT_COUNT:
2481 case CTSF_ULAYERPCT_COUNT:
2482 case CTSF_ULAYERROWPCT_COUNT:
2483 case CTSF_ULAYERCOLPCT_COUNT:
2484 case CTSF_UROWPCT_VALIDN:
2485 case CTSF_UCOLPCT_VALIDN:
2486 case CTSF_UTABLEPCT_VALIDN:
2487 case CTSF_USUBTABLEPCT_VALIDN:
2488 case CTSF_ULAYERPCT_VALIDN:
2489 case CTSF_ULAYERROWPCT_VALIDN:
2490 case CTSF_ULAYERCOLPCT_VALIDN:
2491 case CTSF_UROWPCT_TOTALN:
2492 case CTSF_UCOLPCT_TOTALN:
2493 case CTSF_UTABLEPCT_TOTALN:
2494 case CTSF_USUBTABLEPCT_TOTALN:
2495 case CTSF_ULAYERPCT_TOTALN:
2496 case CTSF_ULAYERROWPCT_TOTALN:
2497 case CTSF_ULAYERCOLPCT_TOTALN:
2507 s->min = s->max = SYSMIS;
2515 case CTSF_ROWPCT_SUM:
2516 case CTSF_COLPCT_SUM:
2517 case CTSF_TABLEPCT_SUM:
2518 case CTSF_SUBTABLEPCT_SUM:
2519 case CTSF_LAYERPCT_SUM:
2520 case CTSF_LAYERROWPCT_SUM:
2521 case CTSF_LAYERCOLPCT_SUM:
2526 case CTSF_UVARIANCE:
2527 case CTSF_UROWPCT_SUM:
2528 case CTSF_UCOLPCT_SUM:
2529 case CTSF_UTABLEPCT_SUM:
2530 case CTSF_USUBTABLEPCT_SUM:
2531 case CTSF_ULAYERPCT_SUM:
2532 case CTSF_ULAYERROWPCT_SUM:
2533 case CTSF_ULAYERCOLPCT_SUM:
2534 s->moments = moments1_create (MOMENT_VARIANCE);
2544 struct caseproto *proto = caseproto_create ();
2545 proto = caseproto_add_width (proto, 0);
2546 proto = caseproto_add_width (proto, 0);
2548 struct subcase ordering;
2549 subcase_init (&ordering, 0, 0, SC_ASCEND);
2550 s->writer = sort_create_writer (&ordering, proto);
2551 subcase_uninit (&ordering);
2552 caseproto_unref (proto);
2562 ctables_summary_uninit (union ctables_summary *s,
2563 const struct ctables_summary_spec *ss)
2565 switch (ss->function)
2569 case CTSF_ROWPCT_COUNT:
2570 case CTSF_COLPCT_COUNT:
2571 case CTSF_TABLEPCT_COUNT:
2572 case CTSF_SUBTABLEPCT_COUNT:
2573 case CTSF_LAYERPCT_COUNT:
2574 case CTSF_LAYERROWPCT_COUNT:
2575 case CTSF_LAYERCOLPCT_COUNT:
2576 case CTSF_ROWPCT_VALIDN:
2577 case CTSF_COLPCT_VALIDN:
2578 case CTSF_TABLEPCT_VALIDN:
2579 case CTSF_SUBTABLEPCT_VALIDN:
2580 case CTSF_LAYERPCT_VALIDN:
2581 case CTSF_LAYERROWPCT_VALIDN:
2582 case CTSF_LAYERCOLPCT_VALIDN:
2583 case CTSF_ROWPCT_TOTALN:
2584 case CTSF_COLPCT_TOTALN:
2585 case CTSF_TABLEPCT_TOTALN:
2586 case CTSF_SUBTABLEPCT_TOTALN:
2587 case CTSF_LAYERPCT_TOTALN:
2588 case CTSF_LAYERROWPCT_TOTALN:
2589 case CTSF_LAYERCOLPCT_TOTALN:
2596 case CTSF_UROWPCT_COUNT:
2597 case CTSF_UCOLPCT_COUNT:
2598 case CTSF_UTABLEPCT_COUNT:
2599 case CTSF_USUBTABLEPCT_COUNT:
2600 case CTSF_ULAYERPCT_COUNT:
2601 case CTSF_ULAYERROWPCT_COUNT:
2602 case CTSF_ULAYERCOLPCT_COUNT:
2603 case CTSF_UROWPCT_VALIDN:
2604 case CTSF_UCOLPCT_VALIDN:
2605 case CTSF_UTABLEPCT_VALIDN:
2606 case CTSF_USUBTABLEPCT_VALIDN:
2607 case CTSF_ULAYERPCT_VALIDN:
2608 case CTSF_ULAYERROWPCT_VALIDN:
2609 case CTSF_ULAYERCOLPCT_VALIDN:
2610 case CTSF_UROWPCT_TOTALN:
2611 case CTSF_UCOLPCT_TOTALN:
2612 case CTSF_UTABLEPCT_TOTALN:
2613 case CTSF_USUBTABLEPCT_TOTALN:
2614 case CTSF_ULAYERPCT_TOTALN:
2615 case CTSF_ULAYERROWPCT_TOTALN:
2616 case CTSF_ULAYERCOLPCT_TOTALN:
2632 case CTSF_ROWPCT_SUM:
2633 case CTSF_COLPCT_SUM:
2634 case CTSF_TABLEPCT_SUM:
2635 case CTSF_SUBTABLEPCT_SUM:
2636 case CTSF_LAYERPCT_SUM:
2637 case CTSF_LAYERROWPCT_SUM:
2638 case CTSF_LAYERCOLPCT_SUM:
2643 case CTSF_UVARIANCE:
2644 case CTSF_UROWPCT_SUM:
2645 case CTSF_UCOLPCT_SUM:
2646 case CTSF_UTABLEPCT_SUM:
2647 case CTSF_USUBTABLEPCT_SUM:
2648 case CTSF_ULAYERPCT_SUM:
2649 case CTSF_ULAYERROWPCT_SUM:
2650 case CTSF_ULAYERCOLPCT_SUM:
2651 moments1_destroy (s->moments);
2660 casewriter_destroy (s->writer);
2666 ctables_summary_add (union ctables_summary *s,
2667 const struct ctables_summary_spec *ss,
2668 const struct variable *var, const union value *value,
2669 bool is_scale, bool is_scale_missing,
2670 bool is_missing, bool excluded_missing,
2671 double d_weight, double e_weight)
2673 /* To determine whether a case is included in a given table for a particular
2674 kind of summary, consider the following charts for each variable in the
2675 table. Only if "yes" appears for every variable for the summary is the
2678 Categorical variables: VALIDN COUNT TOTALN
2679 Valid values in included categories yes yes yes
2680 Missing values in included categories --- yes yes
2681 Missing values in excluded categories --- --- yes
2682 Valid values in excluded categories --- --- ---
2684 Scale variables: VALIDN COUNT TOTALN
2685 Valid value yes yes yes
2686 Missing value --- yes yes
2688 Missing values include both user- and system-missing. (The system-missing
2689 value is always in an excluded category.)
2691 switch (ss->function)
2694 case CTSF_ROWPCT_TOTALN:
2695 case CTSF_COLPCT_TOTALN:
2696 case CTSF_TABLEPCT_TOTALN:
2697 case CTSF_SUBTABLEPCT_TOTALN:
2698 case CTSF_LAYERPCT_TOTALN:
2699 case CTSF_LAYERROWPCT_TOTALN:
2700 case CTSF_LAYERCOLPCT_TOTALN:
2701 s->count += d_weight;
2705 case CTSF_UROWPCT_TOTALN:
2706 case CTSF_UCOLPCT_TOTALN:
2707 case CTSF_UTABLEPCT_TOTALN:
2708 case CTSF_USUBTABLEPCT_TOTALN:
2709 case CTSF_ULAYERPCT_TOTALN:
2710 case CTSF_ULAYERROWPCT_TOTALN:
2711 case CTSF_ULAYERCOLPCT_TOTALN:
2716 case CTSF_ROWPCT_COUNT:
2717 case CTSF_COLPCT_COUNT:
2718 case CTSF_TABLEPCT_COUNT:
2719 case CTSF_SUBTABLEPCT_COUNT:
2720 case CTSF_LAYERPCT_COUNT:
2721 case CTSF_LAYERROWPCT_COUNT:
2722 case CTSF_LAYERCOLPCT_COUNT:
2723 if (is_scale || !excluded_missing)
2724 s->count += d_weight;
2728 case CTSF_UROWPCT_COUNT:
2729 case CTSF_UCOLPCT_COUNT:
2730 case CTSF_UTABLEPCT_COUNT:
2731 case CTSF_USUBTABLEPCT_COUNT:
2732 case CTSF_ULAYERPCT_COUNT:
2733 case CTSF_ULAYERROWPCT_COUNT:
2734 case CTSF_ULAYERCOLPCT_COUNT:
2735 if (is_scale || !excluded_missing)
2740 case CTSF_ROWPCT_VALIDN:
2741 case CTSF_COLPCT_VALIDN:
2742 case CTSF_TABLEPCT_VALIDN:
2743 case CTSF_SUBTABLEPCT_VALIDN:
2744 case CTSF_LAYERPCT_VALIDN:
2745 case CTSF_LAYERROWPCT_VALIDN:
2746 case CTSF_LAYERCOLPCT_VALIDN:
2750 s->count += d_weight;
2754 case CTSF_UROWPCT_VALIDN:
2755 case CTSF_UCOLPCT_VALIDN:
2756 case CTSF_UTABLEPCT_VALIDN:
2757 case CTSF_USUBTABLEPCT_VALIDN:
2758 case CTSF_ULAYERPCT_VALIDN:
2759 case CTSF_ULAYERROWPCT_VALIDN:
2760 case CTSF_ULAYERCOLPCT_VALIDN:
2769 s->count += d_weight;
2778 if (is_scale || !excluded_missing)
2779 s->count += e_weight;
2786 s->count += e_weight;
2790 s->count += e_weight;
2796 if (!is_scale_missing)
2798 assert (!var_is_alpha (var)); /* XXX? */
2799 if (s->min == SYSMIS || value->f < s->min)
2801 if (s->max == SYSMIS || value->f > s->max)
2811 case CTSF_ROWPCT_SUM:
2812 case CTSF_COLPCT_SUM:
2813 case CTSF_TABLEPCT_SUM:
2814 case CTSF_SUBTABLEPCT_SUM:
2815 case CTSF_LAYERPCT_SUM:
2816 case CTSF_LAYERROWPCT_SUM:
2817 case CTSF_LAYERCOLPCT_SUM:
2818 if (!is_scale_missing)
2819 moments1_add (s->moments, value->f, e_weight);
2826 case CTSF_UVARIANCE:
2827 case CTSF_UROWPCT_SUM:
2828 case CTSF_UCOLPCT_SUM:
2829 case CTSF_UTABLEPCT_SUM:
2830 case CTSF_USUBTABLEPCT_SUM:
2831 case CTSF_ULAYERPCT_SUM:
2832 case CTSF_ULAYERROWPCT_SUM:
2833 case CTSF_ULAYERCOLPCT_SUM:
2834 if (!is_scale_missing)
2835 moments1_add (s->moments, value->f, 1.0);
2841 d_weight = e_weight = 1.0;
2846 if (!is_scale_missing)
2848 s->ovalid += e_weight;
2850 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2851 *case_num_rw_idx (c, 0) = value->f;
2852 *case_num_rw_idx (c, 1) = e_weight;
2853 casewriter_write (s->writer, c);
2859 static enum ctables_domain_type
2860 ctables_function_domain (enum ctables_summary_function function)
2890 case CTSF_UVARIANCE:
2896 case CTSF_COLPCT_COUNT:
2897 case CTSF_COLPCT_SUM:
2898 case CTSF_COLPCT_TOTALN:
2899 case CTSF_COLPCT_VALIDN:
2900 case CTSF_UCOLPCT_COUNT:
2901 case CTSF_UCOLPCT_SUM:
2902 case CTSF_UCOLPCT_TOTALN:
2903 case CTSF_UCOLPCT_VALIDN:
2906 case CTSF_LAYERCOLPCT_COUNT:
2907 case CTSF_LAYERCOLPCT_SUM:
2908 case CTSF_LAYERCOLPCT_TOTALN:
2909 case CTSF_LAYERCOLPCT_VALIDN:
2910 case CTSF_ULAYERCOLPCT_COUNT:
2911 case CTSF_ULAYERCOLPCT_SUM:
2912 case CTSF_ULAYERCOLPCT_TOTALN:
2913 case CTSF_ULAYERCOLPCT_VALIDN:
2914 return CTDT_LAYERCOL;
2916 case CTSF_LAYERPCT_COUNT:
2917 case CTSF_LAYERPCT_SUM:
2918 case CTSF_LAYERPCT_TOTALN:
2919 case CTSF_LAYERPCT_VALIDN:
2920 case CTSF_ULAYERPCT_COUNT:
2921 case CTSF_ULAYERPCT_SUM:
2922 case CTSF_ULAYERPCT_TOTALN:
2923 case CTSF_ULAYERPCT_VALIDN:
2926 case CTSF_LAYERROWPCT_COUNT:
2927 case CTSF_LAYERROWPCT_SUM:
2928 case CTSF_LAYERROWPCT_TOTALN:
2929 case CTSF_LAYERROWPCT_VALIDN:
2930 case CTSF_ULAYERROWPCT_COUNT:
2931 case CTSF_ULAYERROWPCT_SUM:
2932 case CTSF_ULAYERROWPCT_TOTALN:
2933 case CTSF_ULAYERROWPCT_VALIDN:
2934 return CTDT_LAYERROW;
2936 case CTSF_ROWPCT_COUNT:
2937 case CTSF_ROWPCT_SUM:
2938 case CTSF_ROWPCT_TOTALN:
2939 case CTSF_ROWPCT_VALIDN:
2940 case CTSF_UROWPCT_COUNT:
2941 case CTSF_UROWPCT_SUM:
2942 case CTSF_UROWPCT_TOTALN:
2943 case CTSF_UROWPCT_VALIDN:
2946 case CTSF_SUBTABLEPCT_COUNT:
2947 case CTSF_SUBTABLEPCT_SUM:
2948 case CTSF_SUBTABLEPCT_TOTALN:
2949 case CTSF_SUBTABLEPCT_VALIDN:
2950 case CTSF_USUBTABLEPCT_COUNT:
2951 case CTSF_USUBTABLEPCT_SUM:
2952 case CTSF_USUBTABLEPCT_TOTALN:
2953 case CTSF_USUBTABLEPCT_VALIDN:
2954 return CTDT_SUBTABLE;
2956 case CTSF_TABLEPCT_COUNT:
2957 case CTSF_TABLEPCT_SUM:
2958 case CTSF_TABLEPCT_TOTALN:
2959 case CTSF_TABLEPCT_VALIDN:
2960 case CTSF_UTABLEPCT_COUNT:
2961 case CTSF_UTABLEPCT_SUM:
2962 case CTSF_UTABLEPCT_TOTALN:
2963 case CTSF_UTABLEPCT_VALIDN:
2970 static enum ctables_domain_type
2971 ctables_function_is_pctsum (enum ctables_summary_function function)
3001 case CTSF_UVARIANCE:
3005 case CTSF_COLPCT_COUNT:
3006 case CTSF_COLPCT_TOTALN:
3007 case CTSF_COLPCT_VALIDN:
3008 case CTSF_UCOLPCT_COUNT:
3009 case CTSF_UCOLPCT_TOTALN:
3010 case CTSF_UCOLPCT_VALIDN:
3011 case CTSF_LAYERCOLPCT_COUNT:
3012 case CTSF_LAYERCOLPCT_TOTALN:
3013 case CTSF_LAYERCOLPCT_VALIDN:
3014 case CTSF_ULAYERCOLPCT_COUNT:
3015 case CTSF_ULAYERCOLPCT_TOTALN:
3016 case CTSF_ULAYERCOLPCT_VALIDN:
3017 case CTSF_LAYERPCT_COUNT:
3018 case CTSF_LAYERPCT_TOTALN:
3019 case CTSF_LAYERPCT_VALIDN:
3020 case CTSF_ULAYERPCT_COUNT:
3021 case CTSF_ULAYERPCT_TOTALN:
3022 case CTSF_ULAYERPCT_VALIDN:
3023 case CTSF_LAYERROWPCT_COUNT:
3024 case CTSF_LAYERROWPCT_TOTALN:
3025 case CTSF_LAYERROWPCT_VALIDN:
3026 case CTSF_ULAYERROWPCT_COUNT:
3027 case CTSF_ULAYERROWPCT_TOTALN:
3028 case CTSF_ULAYERROWPCT_VALIDN:
3029 case CTSF_ROWPCT_COUNT:
3030 case CTSF_ROWPCT_TOTALN:
3031 case CTSF_ROWPCT_VALIDN:
3032 case CTSF_UROWPCT_COUNT:
3033 case CTSF_UROWPCT_TOTALN:
3034 case CTSF_UROWPCT_VALIDN:
3035 case CTSF_SUBTABLEPCT_COUNT:
3036 case CTSF_SUBTABLEPCT_TOTALN:
3037 case CTSF_SUBTABLEPCT_VALIDN:
3038 case CTSF_USUBTABLEPCT_COUNT:
3039 case CTSF_USUBTABLEPCT_TOTALN:
3040 case CTSF_USUBTABLEPCT_VALIDN:
3041 case CTSF_TABLEPCT_COUNT:
3042 case CTSF_TABLEPCT_TOTALN:
3043 case CTSF_TABLEPCT_VALIDN:
3044 case CTSF_UTABLEPCT_COUNT:
3045 case CTSF_UTABLEPCT_TOTALN:
3046 case CTSF_UTABLEPCT_VALIDN:
3049 case CTSF_COLPCT_SUM:
3050 case CTSF_UCOLPCT_SUM:
3051 case CTSF_LAYERCOLPCT_SUM:
3052 case CTSF_ULAYERCOLPCT_SUM:
3053 case CTSF_LAYERPCT_SUM:
3054 case CTSF_ULAYERPCT_SUM:
3055 case CTSF_LAYERROWPCT_SUM:
3056 case CTSF_ULAYERROWPCT_SUM:
3057 case CTSF_ROWPCT_SUM:
3058 case CTSF_UROWPCT_SUM:
3059 case CTSF_SUBTABLEPCT_SUM:
3060 case CTSF_USUBTABLEPCT_SUM:
3061 case CTSF_TABLEPCT_SUM:
3062 case CTSF_UTABLEPCT_SUM:
3070 ctables_summary_value (const struct ctables_cell *cell,
3071 union ctables_summary *s,
3072 const struct ctables_summary_spec *ss)
3074 switch (ss->function)
3081 case CTSF_ROWPCT_COUNT:
3082 case CTSF_COLPCT_COUNT:
3083 case CTSF_TABLEPCT_COUNT:
3084 case CTSF_SUBTABLEPCT_COUNT:
3085 case CTSF_LAYERPCT_COUNT:
3086 case CTSF_LAYERROWPCT_COUNT:
3087 case CTSF_LAYERCOLPCT_COUNT:
3089 enum ctables_domain_type d = ctables_function_domain (ss->function);
3090 return (cell->domains[d]->e_count
3091 ? s->count / cell->domains[d]->e_count * 100
3095 case CTSF_UROWPCT_COUNT:
3096 case CTSF_UCOLPCT_COUNT:
3097 case CTSF_UTABLEPCT_COUNT:
3098 case CTSF_USUBTABLEPCT_COUNT:
3099 case CTSF_ULAYERPCT_COUNT:
3100 case CTSF_ULAYERROWPCT_COUNT:
3101 case CTSF_ULAYERCOLPCT_COUNT:
3103 enum ctables_domain_type d = ctables_function_domain (ss->function);
3104 return (cell->domains[d]->u_count
3105 ? s->count / cell->domains[d]->u_count * 100
3109 case CTSF_ROWPCT_VALIDN:
3110 case CTSF_COLPCT_VALIDN:
3111 case CTSF_TABLEPCT_VALIDN:
3112 case CTSF_SUBTABLEPCT_VALIDN:
3113 case CTSF_LAYERPCT_VALIDN:
3114 case CTSF_LAYERROWPCT_VALIDN:
3115 case CTSF_LAYERCOLPCT_VALIDN:
3117 enum ctables_domain_type d = ctables_function_domain (ss->function);
3118 return (cell->domains[d]->e_valid
3119 ? s->count / cell->domains[d]->e_valid * 100
3123 case CTSF_UROWPCT_VALIDN:
3124 case CTSF_UCOLPCT_VALIDN:
3125 case CTSF_UTABLEPCT_VALIDN:
3126 case CTSF_USUBTABLEPCT_VALIDN:
3127 case CTSF_ULAYERPCT_VALIDN:
3128 case CTSF_ULAYERROWPCT_VALIDN:
3129 case CTSF_ULAYERCOLPCT_VALIDN:
3131 enum ctables_domain_type d = ctables_function_domain (ss->function);
3132 return (cell->domains[d]->u_valid
3133 ? s->count / cell->domains[d]->u_valid * 100
3137 case CTSF_ROWPCT_TOTALN:
3138 case CTSF_COLPCT_TOTALN:
3139 case CTSF_TABLEPCT_TOTALN:
3140 case CTSF_SUBTABLEPCT_TOTALN:
3141 case CTSF_LAYERPCT_TOTALN:
3142 case CTSF_LAYERROWPCT_TOTALN:
3143 case CTSF_LAYERCOLPCT_TOTALN:
3145 enum ctables_domain_type d = ctables_function_domain (ss->function);
3146 return (cell->domains[d]->e_total
3147 ? s->count / cell->domains[d]->e_total * 100
3151 case CTSF_UROWPCT_TOTALN:
3152 case CTSF_UCOLPCT_TOTALN:
3153 case CTSF_UTABLEPCT_TOTALN:
3154 case CTSF_USUBTABLEPCT_TOTALN:
3155 case CTSF_ULAYERPCT_TOTALN:
3156 case CTSF_ULAYERROWPCT_TOTALN:
3157 case CTSF_ULAYERCOLPCT_TOTALN:
3159 enum ctables_domain_type d = ctables_function_domain (ss->function);
3160 return (cell->domains[d]->u_total
3161 ? s->count / cell->domains[d]->u_total * 100
3182 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3188 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3195 double weight, variance;
3196 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3197 return calc_semean (variance, weight);
3204 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3205 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3211 double weight, mean;
3212 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3213 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3217 case CTSF_UVARIANCE:
3220 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3224 case CTSF_ROWPCT_SUM:
3225 case CTSF_COLPCT_SUM:
3226 case CTSF_TABLEPCT_SUM:
3227 case CTSF_SUBTABLEPCT_SUM:
3228 case CTSF_LAYERPCT_SUM:
3229 case CTSF_LAYERROWPCT_SUM:
3230 case CTSF_LAYERCOLPCT_SUM:
3232 double weight, mean;
3233 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3234 if (weight == SYSMIS || mean == SYSMIS)
3236 enum ctables_domain_type d = ctables_function_domain (ss->function);
3237 double num = weight * mean;
3238 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3239 return denom != 0 ? num / denom * 100 : SYSMIS;
3241 case CTSF_UROWPCT_SUM:
3242 case CTSF_UCOLPCT_SUM:
3243 case CTSF_UTABLEPCT_SUM:
3244 case CTSF_USUBTABLEPCT_SUM:
3245 case CTSF_ULAYERPCT_SUM:
3246 case CTSF_ULAYERROWPCT_SUM:
3247 case CTSF_ULAYERCOLPCT_SUM:
3249 double weight, mean;
3250 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3251 if (weight == SYSMIS || mean == SYSMIS)
3253 enum ctables_domain_type d = ctables_function_domain (ss->function);
3254 double num = weight * mean;
3255 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3256 return denom != 0 ? num / denom * 100 : SYSMIS;
3265 struct casereader *reader = casewriter_make_reader (s->writer);
3268 struct percentile *ptile = percentile_create (
3269 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3270 struct order_stats *os = &ptile->parent;
3271 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3272 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3273 statistic_destroy (&ptile->parent.parent);
3281 struct casereader *reader = casewriter_make_reader (s->writer);
3284 struct mode *mode = mode_create ();
3285 struct order_stats *os = &mode->parent;
3286 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3287 s->ovalue = mode->mode;
3288 statistic_destroy (&mode->parent.parent);
3296 struct ctables_cell_sort_aux
3298 const struct ctables_nest *nest;
3299 enum pivot_axis_type a;
3303 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3305 const struct ctables_cell_sort_aux *aux = aux_;
3306 struct ctables_cell *const *ap = a_;
3307 struct ctables_cell *const *bp = b_;
3308 const struct ctables_cell *a = *ap;
3309 const struct ctables_cell *b = *bp;
3311 const struct ctables_nest *nest = aux->nest;
3312 for (size_t i = 0; i < nest->n; i++)
3313 if (i != nest->scale_idx)
3315 const struct variable *var = nest->vars[i];
3316 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3317 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3318 if (a_cv->category != b_cv->category)
3319 return a_cv->category > b_cv->category ? 1 : -1;
3321 const union value *a_val = &a_cv->value;
3322 const union value *b_val = &b_cv->value;
3323 switch (a_cv->category->type)
3329 case CCT_POSTCOMPUTE:
3330 case CCT_EXCLUDED_MISSING:
3331 /* Must be equal. */
3339 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3347 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3349 return a_cv->category->sort_ascending ? cmp : -cmp;
3355 const char *a_label = var_lookup_value_label (var, a_val);
3356 const char *b_label = var_lookup_value_label (var, b_val);
3358 ? (b_label ? strcmp (a_label, b_label) : 1)
3359 : (b_label ? -1 : value_compare_3way (
3360 a_val, b_val, var_get_width (var))));
3362 return a_cv->category->sort_ascending ? cmp : -cmp;
3376 For each ctables_table:
3377 For each combination of row vars:
3378 For each combination of column vars:
3379 For each combination of layer vars:
3381 Make a table of row values:
3382 Sort entries by row values
3383 Assign a 0-based index to each actual value
3384 Construct a dimension
3385 Make a table of column values
3386 Make a table of layer values
3388 Fill the table entry using the indexes from before.
3391 static struct ctables_domain *
3392 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3393 enum ctables_domain_type domain)
3396 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3398 const struct ctables_nest *nest = s->nests[a];
3399 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3401 size_t v_idx = nest->domains[domain][i];
3402 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3403 hash = hash_pointer (cv->category, hash);
3404 if (cv->category->type != CCT_TOTAL
3405 && cv->category->type != CCT_SUBTOTAL
3406 && cv->category->type != CCT_POSTCOMPUTE)
3407 hash = value_hash (&cv->value,
3408 var_get_width (nest->vars[v_idx]), hash);
3412 struct ctables_domain *d;
3413 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3415 const struct ctables_cell *df = d->example;
3416 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3418 const struct ctables_nest *nest = s->nests[a];
3419 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3421 size_t v_idx = nest->domains[domain][i];
3422 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3423 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3424 if (cv1->category != cv2->category
3425 || (cv1->category->type != CCT_TOTAL
3426 && cv1->category->type != CCT_SUBTOTAL
3427 && cv1->category->type != CCT_POSTCOMPUTE
3428 && !value_equal (&cv1->value, &cv2->value,
3429 var_get_width (nest->vars[v_idx]))))
3438 struct ctables_sum *sums = (s->table->n_sum_vars
3439 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3442 d = xmalloc (sizeof *d);
3443 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3444 hmap_insert (&s->domains[domain], &d->node, hash);
3448 static struct substring
3449 rtrim_value (const union value *v, const struct variable *var)
3451 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3452 var_get_width (var));
3453 ss_rtrim (&s, ss_cstr (" "));
3458 in_string_range (const union value *v, const struct variable *var,
3459 const struct substring *srange)
3461 struct substring s = rtrim_value (v, var);
3462 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3463 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3466 static const struct ctables_category *
3467 ctables_categories_match (const struct ctables_categories *c,
3468 const union value *v, const struct variable *var)
3470 if (var_is_numeric (var) && v->f == SYSMIS)
3473 const struct ctables_category *othernm = NULL;
3474 for (size_t i = c->n_cats; i-- > 0; )
3476 const struct ctables_category *cat = &c->cats[i];
3480 if (cat->number == v->f)
3485 if (ss_equals (cat->string, rtrim_value (v, var)))
3490 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3491 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3496 if (in_string_range (v, var, cat->srange))
3501 if (var_is_value_missing (var, v))
3505 case CCT_POSTCOMPUTE:
3520 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3523 case CCT_EXCLUDED_MISSING:
3528 return var_is_value_missing (var, v) ? NULL : othernm;
3531 static const struct ctables_category *
3532 ctables_categories_total (const struct ctables_categories *c)
3534 const struct ctables_category *first = &c->cats[0];
3535 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3536 return (first->type == CCT_TOTAL ? first
3537 : last->type == CCT_TOTAL ? last
3541 static struct ctables_cell *
3542 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3543 const struct ctables_category *cats[PIVOT_N_AXES][10])
3546 enum ctables_summary_variant sv = CSV_CELL;
3547 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3549 const struct ctables_nest *nest = s->nests[a];
3550 for (size_t i = 0; i < nest->n; i++)
3551 if (i != nest->scale_idx)
3553 hash = hash_pointer (cats[a][i], hash);
3554 if (cats[a][i]->type != CCT_TOTAL
3555 && cats[a][i]->type != CCT_SUBTOTAL
3556 && cats[a][i]->type != CCT_POSTCOMPUTE)
3557 hash = value_hash (case_data (c, nest->vars[i]),
3558 var_get_width (nest->vars[i]), hash);
3564 struct ctables_cell *cell;
3565 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3567 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3569 const struct ctables_nest *nest = s->nests[a];
3570 for (size_t i = 0; i < nest->n; i++)
3571 if (i != nest->scale_idx
3572 && (cats[a][i] != cell->axes[a].cvs[i].category
3573 || (cats[a][i]->type != CCT_TOTAL
3574 && cats[a][i]->type != CCT_SUBTOTAL
3575 && cats[a][i]->type != CCT_POSTCOMPUTE
3576 && !value_equal (case_data (c, nest->vars[i]),
3577 &cell->axes[a].cvs[i].value,
3578 var_get_width (nest->vars[i])))))
3587 cell = xmalloc (sizeof *cell);
3590 cell->omit_domains = 0;
3591 cell->postcompute = false;
3592 //struct string name = DS_EMPTY_INITIALIZER;
3593 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3595 const struct ctables_nest *nest = s->nests[a];
3596 cell->axes[a].cvs = (nest->n
3597 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3599 for (size_t i = 0; i < nest->n; i++)
3601 const struct ctables_category *cat = cats[a][i];
3602 const struct variable *var = nest->vars[i];
3603 const union value *value = case_data (c, var);
3604 if (i != nest->scale_idx)
3606 const struct ctables_category *subtotal = cat->subtotal;
3607 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3610 if (cat->type == CCT_TOTAL
3611 || cat->type == CCT_SUBTOTAL
3612 || cat->type == CCT_POSTCOMPUTE)
3614 /* XXX these should be more encompassing I think.*/
3618 case PIVOT_AXIS_COLUMN:
3619 cell->omit_domains |= ((1u << CTDT_TABLE) |
3620 (1u << CTDT_LAYER) |
3621 (1u << CTDT_LAYERCOL) |
3622 (1u << CTDT_SUBTABLE) |
3625 case PIVOT_AXIS_ROW:
3626 cell->omit_domains |= ((1u << CTDT_TABLE) |
3627 (1u << CTDT_LAYER) |
3628 (1u << CTDT_LAYERROW) |
3629 (1u << CTDT_SUBTABLE) |
3632 case PIVOT_AXIS_LAYER:
3633 cell->omit_domains |= ((1u << CTDT_TABLE) |
3634 (1u << CTDT_LAYER));
3638 if (cat->type == CCT_POSTCOMPUTE)
3639 cell->postcompute = true;
3642 cell->axes[a].cvs[i].category = cat;
3643 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3646 if (i != nest->scale_idx)
3648 if (!ds_is_empty (&name))
3649 ds_put_cstr (&name, ", ");
3650 char *value_s = data_out (value, var_get_encoding (var),
3651 var_get_print_format (var),
3652 settings_get_fmt_settings ());
3653 if (cat->type == CCT_TOTAL
3654 || cat->type == CCT_SUBTOTAL
3655 || cat->type == CCT_POSTCOMPUTE)
3656 ds_put_format (&name, "%s=total", var_get_name (var));
3658 ds_put_format (&name, "%s=%s", var_get_name (var),
3659 value_s + strspn (value_s, " "));
3665 //cell->name = ds_steal_cstr (&name);
3667 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3668 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3669 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3670 for (size_t i = 0; i < specs->n; i++)
3671 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3672 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3673 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3674 hmap_insert (&s->cells, &cell->node, hash);
3679 is_scale_missing (const struct ctables_summary_spec_set *specs,
3680 const struct ccase *c)
3682 if (!specs->is_scale)
3685 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3688 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3690 const struct variable *var = specs->listwise_vars[i];
3691 if (var_is_num_missing (var, case_num (c, var)))
3699 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3700 const struct ctables_category *cats[PIVOT_N_AXES][10],
3701 bool is_missing, bool excluded_missing,
3702 double d_weight, double e_weight)
3704 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3705 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3707 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3709 bool scale_missing = is_scale_missing (specs, c);
3710 for (size_t i = 0; i < specs->n; i++)
3711 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3712 specs->var, case_data (c, specs->var), specs->is_scale,
3713 scale_missing, is_missing, excluded_missing,
3714 d_weight, e_weight);
3715 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3716 if (!(cell->omit_domains && (1u << dt)))
3718 struct ctables_domain *d = cell->domains[dt];
3719 d->d_total += d_weight;
3720 d->e_total += e_weight;
3722 if (!excluded_missing)
3724 d->d_count += d_weight;
3725 d->e_count += e_weight;
3730 d->d_valid += d_weight;
3731 d->e_valid += e_weight;
3734 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3736 /* XXX listwise_missing??? */
3737 const struct variable *var = s->table->sum_vars[i];
3738 double addend = case_num (c, var);
3739 if (!var_is_num_missing (var, addend))
3741 struct ctables_sum *sum = &d->sums[i];
3742 sum->e_sum += addend * e_weight;
3743 sum->u_sum += addend;
3751 recurse_totals (struct ctables_section *s, const struct ccase *c,
3752 const struct ctables_category *cats[PIVOT_N_AXES][10],
3753 bool is_missing, bool excluded_missing,
3754 double d_weight, double e_weight,
3755 enum pivot_axis_type start_axis, size_t start_nest)
3757 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3759 const struct ctables_nest *nest = s->nests[a];
3760 for (size_t i = start_nest; i < nest->n; i++)
3762 if (i == nest->scale_idx)
3765 const struct variable *var = nest->vars[i];
3767 const struct ctables_category *total = ctables_categories_total (
3768 s->table->categories[var_get_dict_index (var)]);
3771 const struct ctables_category *save = cats[a][i];
3773 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3774 d_weight, e_weight);
3775 recurse_totals (s, c, cats, is_missing, excluded_missing,
3776 d_weight, e_weight, a, i + 1);
3785 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3786 const struct ctables_category *cats[PIVOT_N_AXES][10],
3787 bool is_missing, bool excluded_missing,
3788 double d_weight, double e_weight,
3789 enum pivot_axis_type start_axis, size_t start_nest)
3791 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3793 const struct ctables_nest *nest = s->nests[a];
3794 for (size_t i = start_nest; i < nest->n; i++)
3796 if (i == nest->scale_idx)
3799 const struct ctables_category *save = cats[a][i];
3802 cats[a][i] = save->subtotal;
3803 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3804 d_weight, e_weight);
3805 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3806 d_weight, e_weight, a, i + 1);
3815 ctables_add_occurrence (const struct variable *var,
3816 const union value *value,
3817 struct hmap *occurrences)
3819 int width = var_get_width (var);
3820 unsigned int hash = value_hash (value, width, 0);
3822 struct ctables_occurrence *o;
3823 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3825 if (value_equal (value, &o->value, width))
3828 o = xmalloc (sizeof *o);
3829 value_clone (&o->value, value, width);
3830 hmap_insert (occurrences, &o->node, hash);
3834 ctables_cell_insert (struct ctables_section *s,
3835 const struct ccase *c,
3836 double d_weight, double e_weight)
3838 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3840 /* Does at least one categorical variable have a missing value in an included
3841 or excluded category? */
3842 bool is_missing = false;
3844 /* Does at least one categorical variable have a missing value in an excluded
3846 bool excluded_missing = false;
3848 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3850 const struct ctables_nest *nest = s->nests[a];
3851 for (size_t i = 0; i < nest->n; i++)
3853 if (i == nest->scale_idx)
3856 const struct variable *var = nest->vars[i];
3857 const union value *value = case_data (c, var);
3859 bool var_missing = var_is_value_missing (var, value) != 0;
3863 cats[a][i] = ctables_categories_match (
3864 s->table->categories[var_get_dict_index (var)], value, var);
3870 static const struct ctables_category cct_excluded_missing = {
3871 .type = CCT_EXCLUDED_MISSING,
3874 cats[a][i] = &cct_excluded_missing;
3875 excluded_missing = true;
3880 if (!excluded_missing)
3881 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3883 const struct ctables_nest *nest = s->nests[a];
3884 for (size_t i = 0; i < nest->n; i++)
3885 if (i != nest->scale_idx)
3887 const struct variable *var = nest->vars[i];
3888 const union value *value = case_data (c, var);
3889 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3893 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3894 d_weight, e_weight);
3896 //if (!excluded_missing)
3898 recurse_totals (s, c, cats, is_missing, excluded_missing,
3899 d_weight, e_weight, 0, 0);
3900 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3901 d_weight, e_weight, 0, 0);
3907 const struct ctables_summary_spec_set *set;
3912 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3914 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3915 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3916 if (as->function != bs->function)
3917 return as->function > bs->function ? 1 : -1;
3918 else if (as->percentile != bs->percentile)
3919 return as->percentile < bs->percentile ? 1 : -1;
3921 const char *as_label = as->label ? as->label : "";
3922 const char *bs_label = bs->label ? bs->label : "";
3923 return strcmp (as_label, bs_label);
3926 static struct pivot_value *
3927 ctables_category_create_label__ (const struct ctables_category *cat,
3928 const struct variable *var,
3929 const union value *value)
3931 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3932 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3933 : pivot_value_new_var_value (var, value));
3936 static struct pivot_value *
3937 ctables_postcompute_label (const struct ctables_categories *cats,
3938 const struct ctables_category *cat,
3939 const struct variable *var,
3940 const union value *value)
3942 struct substring in = ss_cstr (cat->pc->label);
3943 struct substring target = ss_cstr (")LABEL[");
3945 struct string out = DS_EMPTY_INITIALIZER;
3948 size_t chunk = ss_find_substring (in, target);
3949 if (chunk == SIZE_MAX)
3951 if (ds_is_empty (&out))
3952 return pivot_value_new_user_text (in.string, in.length);
3955 ds_put_substring (&out, in);
3956 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3960 ds_put_substring (&out, ss_head (in, chunk));
3961 ss_advance (&in, chunk + target.length);
3963 struct substring idx_s;
3964 if (!ss_get_until (&in, ']', &idx_s))
3967 long int idx = strtol (idx_s.string, &tail, 10);
3968 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3971 struct ctables_category *cat2 = &cats->cats[idx - 1];
3972 struct pivot_value *label2
3973 = ctables_category_create_label__ (cat2, var, value);
3974 char *label2_s = pivot_value_to_string_defaults (label2);
3975 ds_put_cstr (&out, label2_s);
3977 pivot_value_destroy (label2);
3982 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3985 static struct pivot_value *
3986 ctables_category_create_label (const struct ctables_categories *cats,
3987 const struct ctables_category *cat,
3988 const struct variable *var,
3989 const union value *value)
3991 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3992 ? ctables_postcompute_label (cats, cat, var, value)
3993 : ctables_category_create_label__ (cat, var, value));
3996 static struct ctables_value *
3997 ctables_value_find__ (struct ctables_table *t, const union value *value,
3998 int width, unsigned int hash)
4000 struct ctables_value *clv;
4001 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4002 hash, &t->clabels_values_map)
4003 if (value_equal (value, &clv->value, width))
4009 ctables_value_insert (struct ctables_table *t, const union value *value,
4012 unsigned int hash = value_hash (value, width, 0);
4013 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4016 clv = xmalloc (sizeof *clv);
4017 value_clone (&clv->value, value, width);
4018 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4022 static struct ctables_value *
4023 ctables_value_find (struct ctables_table *t,
4024 const union value *value, int width)
4026 return ctables_value_find__ (t, value, width,
4027 value_hash (value, width, 0));
4031 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4032 size_t ix[PIVOT_N_AXES])
4034 if (a < PIVOT_N_AXES)
4036 size_t limit = MAX (t->stacks[a].n, 1);
4037 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4038 ctables_table_add_section (t, a + 1, ix);
4042 struct ctables_section *s = &t->sections[t->n_sections++];
4043 *s = (struct ctables_section) {
4045 .cells = HMAP_INITIALIZER (s->cells),
4047 for (a = 0; a < PIVOT_N_AXES; a++)
4050 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4052 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4053 for (size_t i = 0; i < nest->n; i++)
4054 hmap_init (&s->occurrences[a][i]);
4056 for (size_t i = 0; i < N_CTDTS; i++)
4057 hmap_init (&s->domains[i]);
4062 ctpo_add (double a, double b)
4068 ctpo_sub (double a, double b)
4074 ctpo_mul (double a, double b)
4080 ctpo_div (double a, double b)
4082 return b ? a / b : SYSMIS;
4086 ctpo_pow (double a, double b)
4088 int save_errno = errno;
4090 double result = pow (a, b);
4098 ctpo_neg (double a, double b UNUSED)
4103 struct ctables_pcexpr_evaluate_ctx
4105 const struct ctables_cell *cell;
4106 const struct ctables_section *section;
4107 const struct ctables_categories *cats;
4108 enum pivot_axis_type pc_a;
4111 enum fmt_type parse_format;
4114 static double ctables_pcexpr_evaluate (
4115 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4118 ctables_pcexpr_evaluate_nonterminal (
4119 const struct ctables_pcexpr_evaluate_ctx *ctx,
4120 const struct ctables_pcexpr *e, size_t n_args,
4121 double evaluate (double, double))
4123 double args[2] = { 0, 0 };
4124 for (size_t i = 0; i < n_args; i++)
4126 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4127 if (!isfinite (args[i]) || args[i] == SYSMIS)
4130 return evaluate (args[0], args[1]);
4134 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4135 const struct ctables_cell_value *pc_cv)
4137 const struct ctables_section *s = ctx->section;
4140 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4142 const struct ctables_nest *nest = s->nests[a];
4143 for (size_t i = 0; i < nest->n; i++)
4144 if (i != nest->scale_idx)
4146 const struct ctables_cell_value *cv
4147 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4148 : &ctx->cell->axes[a].cvs[i]);
4149 hash = hash_pointer (cv->category, hash);
4150 if (cv->category->type != CCT_TOTAL
4151 && cv->category->type != CCT_SUBTOTAL
4152 && cv->category->type != CCT_POSTCOMPUTE)
4153 hash = value_hash (&cv->value,
4154 var_get_width (nest->vars[i]), hash);
4158 struct ctables_cell *tc;
4159 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4161 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4163 const struct ctables_nest *nest = s->nests[a];
4164 for (size_t i = 0; i < nest->n; i++)
4165 if (i != nest->scale_idx)
4167 const struct ctables_cell_value *p_cv
4168 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4169 : &ctx->cell->axes[a].cvs[i]);
4170 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4171 if (p_cv->category != t_cv->category
4172 || (p_cv->category->type != CCT_TOTAL
4173 && p_cv->category->type != CCT_SUBTOTAL
4174 && p_cv->category->type != CCT_POSTCOMPUTE
4175 && !value_equal (&p_cv->value,
4177 var_get_width (nest->vars[i]))))
4189 const struct ctables_table *t = s->table;
4190 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4191 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4192 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4193 &specs->specs[ctx->summary_idx]);
4197 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4198 const struct ctables_pcexpr *e)
4205 case CTPO_CAT_NRANGE:
4206 case CTPO_CAT_SRANGE:
4208 struct ctables_cell_value cv = {
4209 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4211 assert (cv.category != NULL);
4213 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4214 const struct ctables_occurrence *o;
4217 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4218 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4219 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4221 cv.value = o->value;
4222 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4227 case CTPO_CAT_NUMBER:
4228 case CTPO_CAT_MISSING:
4229 case CTPO_CAT_OTHERNM:
4230 case CTPO_CAT_SUBTOTAL:
4231 case CTPO_CAT_TOTAL:
4233 struct ctables_cell_value cv = {
4234 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4235 .value = { .f = e->number },
4237 assert (cv.category != NULL);
4238 return ctables_pcexpr_evaluate_category (ctx, &cv);
4241 case CTPO_CAT_STRING:
4243 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4245 if (width > e->string.length)
4247 s = xmalloc (width);
4248 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4250 struct ctables_cell_value cv = {
4251 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4252 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4254 assert (cv.category != NULL);
4255 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4261 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4264 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4267 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4270 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4273 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4276 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4282 static const struct ctables_category *
4283 ctables_cell_postcompute (const struct ctables_section *s,
4284 const struct ctables_cell *cell,
4285 enum pivot_axis_type *pc_a_p,
4288 assert (cell->postcompute);
4289 const struct ctables_category *pc_cat = NULL;
4290 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4291 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4293 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4294 if (cv->category->type == CCT_POSTCOMPUTE)
4298 /* Multiple postcomputes cross each other. The value is
4303 pc_cat = cv->category;
4307 *pc_a_idx_p = pc_a_idx;
4311 assert (pc_cat != NULL);
4316 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4317 const struct ctables_cell *cell,
4318 const struct ctables_summary_spec *ss,
4319 struct fmt_spec *format,
4320 bool *is_ctables_format,
4323 enum pivot_axis_type pc_a = 0;
4324 size_t pc_a_idx = 0;
4325 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4326 s, cell, &pc_a, &pc_a_idx);
4330 const struct ctables_postcompute *pc = pc_cat->pc;
4333 for (size_t i = 0; i < pc->specs->n; i++)
4335 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4336 if (ss->function == ss2->function
4337 && ss->percentile == ss2->percentile)
4339 *format = ss2->format;
4340 *is_ctables_format = ss2->is_ctables_format;
4346 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4347 const struct ctables_categories *cats = s->table->categories[
4348 var_get_dict_index (var)];
4349 struct ctables_pcexpr_evaluate_ctx ctx = {
4354 .pc_a_idx = pc_a_idx,
4355 .summary_idx = summary_idx,
4356 .parse_format = pc_cat->parse_format,
4358 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4362 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4364 struct pivot_table *pt = pivot_table_create__ (
4366 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4367 : pivot_value_new_text (N_("Custom Tables"))),
4370 pivot_table_set_caption (
4371 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4373 pivot_table_set_corner_text (
4374 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4376 bool summary_dimension = (t->summary_axis != t->slabels_axis
4377 || (!t->slabels_visible
4378 && t->summary_specs.n > 1));
4379 if (summary_dimension)
4381 struct pivot_dimension *d = pivot_dimension_create (
4382 pt, t->slabels_axis, N_("Statistics"));
4383 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4384 if (!t->slabels_visible)
4385 d->hide_all_labels = true;
4386 for (size_t i = 0; i < specs->n; i++)
4387 pivot_category_create_leaf (
4388 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4391 bool categories_dimension = t->clabels_example != NULL;
4392 if (categories_dimension)
4394 struct pivot_dimension *d = pivot_dimension_create (
4395 pt, t->label_axis[t->clabels_from_axis],
4396 t->clabels_from_axis == PIVOT_AXIS_ROW
4397 ? N_("Row Categories")
4398 : N_("Column Categories"));
4399 const struct variable *var = t->clabels_example;
4400 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4401 for (size_t i = 0; i < t->n_clabels_values; i++)
4403 const struct ctables_value *value = t->clabels_values[i];
4404 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4405 assert (cat != NULL);
4406 pivot_category_create_leaf (d->root, ctables_category_create_label (
4407 c, cat, t->clabels_example,
4412 pivot_table_set_look (pt, ct->look);
4413 struct pivot_dimension *d[PIVOT_N_AXES];
4414 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4416 static const char *names[] = {
4417 [PIVOT_AXIS_ROW] = N_("Rows"),
4418 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4419 [PIVOT_AXIS_LAYER] = N_("Layers"),
4421 d[a] = (t->axes[a] || a == t->summary_axis
4422 ? pivot_dimension_create (pt, a, names[a])
4427 assert (t->axes[a]);
4429 for (size_t i = 0; i < t->stacks[a].n; i++)
4431 struct ctables_nest *nest = &t->stacks[a].nests[i];
4432 struct ctables_section **sections = xnmalloc (t->n_sections,
4434 size_t n_sections = 0;
4436 size_t n_total_cells = 0;
4437 size_t max_depth = 0;
4438 for (size_t j = 0; j < t->n_sections; j++)
4439 if (t->sections[j].nests[a] == nest)
4441 struct ctables_section *s = &t->sections[j];
4442 sections[n_sections++] = s;
4443 n_total_cells += s->cells.count;
4445 size_t depth = s->nests[a]->n;
4446 max_depth = MAX (depth, max_depth);
4449 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4451 size_t n_sorted = 0;
4453 for (size_t j = 0; j < n_sections; j++)
4455 struct ctables_section *s = sections[j];
4457 struct ctables_cell *cell;
4458 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4460 sorted[n_sorted++] = cell;
4461 assert (n_sorted <= n_total_cells);
4464 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4465 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4468 for (size_t j = 0; j < n_sorted; j++)
4470 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4475 struct ctables_level
4477 enum ctables_level_type
4479 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4480 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4481 CTL_SUMMARY, /* Summary functions. */
4485 enum settings_value_show vlabel; /* CTL_VAR only. */
4488 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4489 size_t n_levels = 0;
4490 for (size_t k = 0; k < nest->n; k++)
4492 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4493 if (vlabel != CTVL_NONE)
4495 levels[n_levels++] = (struct ctables_level) {
4497 .vlabel = (enum settings_value_show) vlabel,
4502 if (nest->scale_idx != k
4503 && (k != nest->n - 1 || t->label_axis[a] == a))
4505 levels[n_levels++] = (struct ctables_level) {
4506 .type = CTL_CATEGORY,
4512 if (!summary_dimension && a == t->slabels_axis)
4514 levels[n_levels++] = (struct ctables_level) {
4515 .type = CTL_SUMMARY,
4516 .var_idx = SIZE_MAX,
4520 /* Pivot categories:
4522 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4523 - category for nest->vars[0], if nest->scale_idx != 0
4524 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4525 - category for nest->vars[1], if nest->scale_idx != 1
4527 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4528 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4529 - summary function, if 'a == t->slabels_axis && a ==
4532 Additional dimensions:
4534 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4536 - If 't->label_axis[b] == a' for some 'b != a', add a category
4541 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4543 for (size_t j = 0; j < n_sorted; j++)
4545 struct ctables_cell *cell = sorted[j];
4546 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4548 size_t n_common = 0;
4551 for (; n_common < n_levels; n_common++)
4553 const struct ctables_level *level = &levels[n_common];
4554 if (level->type == CTL_CATEGORY)
4556 size_t var_idx = level->var_idx;
4557 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4558 if (prev->axes[a].cvs[var_idx].category != c)
4560 else if (c->type != CCT_SUBTOTAL
4561 && c->type != CCT_TOTAL
4562 && c->type != CCT_POSTCOMPUTE
4563 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4564 &cell->axes[a].cvs[var_idx].value,
4565 var_get_type (nest->vars[var_idx])))
4571 for (size_t k = n_common; k < n_levels; k++)
4573 const struct ctables_level *level = &levels[k];
4574 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4575 if (level->type == CTL_SUMMARY)
4577 assert (k == n_levels - 1);
4579 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4580 for (size_t m = 0; m < specs->n; m++)
4582 int leaf = pivot_category_create_leaf (
4583 parent, ctables_summary_label (&specs->specs[m],
4591 const struct variable *var = nest->vars[level->var_idx];
4592 struct pivot_value *label;
4593 if (level->type == CTL_VAR)
4595 label = pivot_value_new_variable (var);
4596 label->variable.show = level->vlabel;
4598 else if (level->type == CTL_CATEGORY)
4600 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4601 label = ctables_category_create_label (
4602 t->categories[var_get_dict_index (var)],
4603 cv->category, var, &cv->value);
4608 if (k == n_levels - 1)
4609 prev_leaf = pivot_category_create_leaf (parent, label);
4611 groups[k] = pivot_category_create_group__ (parent, label);
4615 cell->axes[a].leaf = prev_leaf;
4624 for (size_t i = 0; i < t->n_sections; i++)
4626 struct ctables_section *s = &t->sections[i];
4628 struct ctables_cell *cell;
4629 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4634 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4635 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4636 for (size_t j = 0; j < specs->n; j++)
4639 size_t n_dindexes = 0;
4641 if (summary_dimension)
4642 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4644 if (categories_dimension)
4646 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4647 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4648 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4649 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4652 dindexes[n_dindexes++] = ctv->leaf;
4655 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4658 int leaf = cell->axes[a].leaf;
4659 if (a == t->summary_axis && !summary_dimension)
4661 dindexes[n_dindexes++] = leaf;
4664 const struct ctables_summary_spec *ss = &specs->specs[j];
4666 struct fmt_spec format = specs->specs[j].format;
4667 bool is_ctables_format = ss->is_ctables_format;
4668 double d = (cell->postcompute
4669 ? ctables_cell_calculate_postcompute (
4670 s, cell, ss, &format, &is_ctables_format, j)
4671 : ctables_summary_value (cell, &cell->summaries[j],
4674 struct pivot_value *value;
4675 if (ct->hide_threshold != 0
4676 && d < ct->hide_threshold
4677 && ctables_summary_function_is_count (ss->function))
4679 value = pivot_value_new_user_text_nocopy (
4680 xasprintf ("<%d", ct->hide_threshold));
4682 else if (d == 0 && ct->zero)
4683 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4684 else if (d == SYSMIS && ct->missing)
4685 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4686 else if (is_ctables_format)
4688 char *s = data_out_stretchy (&(union value) { .f = d },
4690 &ct->ctables_formats, NULL);
4691 value = pivot_value_new_user_text_nocopy (s);
4695 value = pivot_value_new_number (d);
4696 value->numeric.format = format;
4698 /* XXX should text values be right-justified? */
4699 pivot_table_put (pt, dindexes, n_dindexes, value);
4704 pivot_table_submit (pt);
4708 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4710 enum pivot_axis_type label_pos = t->label_axis[a];
4714 t->clabels_from_axis = a;
4716 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4717 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4719 const struct ctables_stack *stack = &t->stacks[a];
4723 const struct ctables_nest *n0 = &stack->nests[0];
4726 assert (stack->n == 1);
4730 const struct variable *v0 = n0->vars[n0->n - 1];
4731 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4732 t->clabels_example = v0;
4734 for (size_t i = 0; i < c0->n_cats; i++)
4735 if (c0->cats[i].type == CCT_FUNCTION)
4737 msg (SE, _("%s=%s is not allowed with sorting based "
4738 "on a summary function."),
4739 subcommand_name, pos_name);
4742 if (n0->n - 1 == n0->scale_idx)
4744 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4745 "but %s is a scale variable."),
4746 subcommand_name, pos_name, var_get_name (v0));
4750 for (size_t i = 1; i < stack->n; i++)
4752 const struct ctables_nest *ni = &stack->nests[i];
4754 const struct variable *vi = ni->vars[ni->n - 1];
4755 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4757 if (ni->n - 1 == ni->scale_idx)
4759 msg (SE, _("%s=%s requires the variables to be moved to be "
4760 "categorical, but %s is a scale variable."),
4761 subcommand_name, pos_name, var_get_name (vi));
4764 if (var_get_width (v0) != var_get_width (vi))
4766 msg (SE, _("%s=%s requires the variables to be "
4767 "moved to have the same width, but %s has "
4768 "width %d and %s has width %d."),
4769 subcommand_name, pos_name,
4770 var_get_name (v0), var_get_width (v0),
4771 var_get_name (vi), var_get_width (vi));
4774 if (!val_labs_equal (var_get_value_labels (v0),
4775 var_get_value_labels (vi)))
4777 msg (SE, _("%s=%s requires the variables to be "
4778 "moved to have the same value labels, but %s "
4779 "and %s have different value labels."),
4780 subcommand_name, pos_name,
4781 var_get_name (v0), var_get_name (vi));
4784 if (!ctables_categories_equal (c0, ci))
4786 msg (SE, _("%s=%s requires the variables to be "
4787 "moved to have the same category "
4788 "specifications, but %s and %s have different "
4789 "category specifications."),
4790 subcommand_name, pos_name,
4791 var_get_name (v0), var_get_name (vi));
4800 add_sum_var (struct variable *var,
4801 struct variable ***sum_vars, size_t *n, size_t *allocated)
4803 for (size_t i = 0; i < *n; i++)
4804 if (var == (*sum_vars)[i])
4807 if (*n >= *allocated)
4808 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4809 (*sum_vars)[*n] = var;
4814 enumerate_sum_vars (const struct ctables_axis *a,
4815 struct variable ***sum_vars, size_t *n, size_t *allocated)
4823 for (size_t i = 0; i < N_CSVS; i++)
4824 for (size_t j = 0; j < a->specs[i].n; j++)
4826 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4827 if (ctables_function_is_pctsum (spec->function))
4828 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4834 for (size_t i = 0; i < 2; i++)
4835 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4841 ctables_prepare_table (struct ctables_table *t)
4843 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4846 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4848 for (size_t j = 0; j < t->stacks[a].n; j++)
4850 struct ctables_nest *nest = &t->stacks[a].nests[j];
4851 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4853 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4854 nest->n_domains[dt] = 0;
4856 for (size_t k = 0; k < nest->n; k++)
4858 if (k == nest->scale_idx)
4867 if (a != PIVOT_AXIS_LAYER)
4874 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4875 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4876 : a == PIVOT_AXIS_ROW)
4878 if (k == nest->n - 1
4879 || (nest->scale_idx == nest->n - 1
4880 && k == nest->n - 2))
4886 if (a == PIVOT_AXIS_COLUMN)
4891 if (a == PIVOT_AXIS_ROW)
4896 nest->domains[dt][nest->n_domains[dt]++] = k;
4903 struct ctables_nest *nest = xmalloc (sizeof *nest);
4904 *nest = (struct ctables_nest) { .n = 0 };
4905 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4907 /* There's no point in moving labels away from an axis that has no
4908 labels, so avoid dealing with the special cases around that. */
4909 t->label_axis[a] = a;
4912 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4913 for (size_t i = 0; i < stack->n; i++)
4915 struct ctables_nest *nest = &stack->nests[i];
4916 if (!nest->specs[CSV_CELL].n)
4918 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4919 specs->specs = xmalloc (sizeof *specs->specs);
4922 enum ctables_summary_function function
4923 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4925 *specs->specs = (struct ctables_summary_spec) {
4926 .function = function,
4927 .format = ctables_summary_default_format (function, specs->var),
4930 specs->var = nest->vars[0];
4932 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4933 &nest->specs[CSV_CELL]);
4935 else if (!nest->specs[CSV_TOTAL].n)
4936 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4937 &nest->specs[CSV_CELL]);
4939 if (t->ctables->smissing_listwise)
4941 struct variable **listwise_vars = NULL;
4943 size_t allocated = 0;
4945 for (size_t j = nest->group_head; j < stack->n; j++)
4947 const struct ctables_nest *other_nest = &stack->nests[j];
4948 if (other_nest->group_head != nest->group_head)
4951 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4954 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4955 sizeof *listwise_vars);
4956 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4959 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4962 listwise_vars = xmemdup (listwise_vars,
4963 n * sizeof *listwise_vars);
4964 nest->specs[sv].listwise_vars = listwise_vars;
4965 nest->specs[sv].n_listwise_vars = n;
4970 struct ctables_summary_spec_set *merged = &t->summary_specs;
4971 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
4973 for (size_t j = 0; j < stack->n; j++)
4975 const struct ctables_nest *nest = &stack->nests[j];
4977 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4978 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4983 struct merge_item min = items[0];
4984 for (size_t j = 1; j < n_left; j++)
4985 if (merge_item_compare_3way (&items[j], &min) < 0)
4988 if (merged->n >= merged->allocated)
4989 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4990 sizeof *merged->specs);
4991 merged->specs[merged->n++] = min.set->specs[min.ofs];
4993 for (size_t j = 0; j < n_left; )
4995 if (merge_item_compare_3way (&items[j], &min) == 0)
4997 struct merge_item *item = &items[j];
4998 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4999 if (++item->ofs >= item->set->n)
5001 items[j] = items[--n_left];
5011 for (size_t j = 0; j < merged->n; j++)
5012 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5014 for (size_t j = 0; j < stack->n; j++)
5016 const struct ctables_nest *nest = &stack->nests[j];
5017 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5019 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5020 for (size_t k = 0; k < specs->n; k++)
5021 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5022 specs->specs[k].axis_idx);
5028 size_t allocated_sum_vars = 0;
5029 enumerate_sum_vars (t->axes[t->summary_axis],
5030 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5032 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5033 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5037 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5038 enum pivot_axis_type a)
5040 struct ctables_stack *stack = &t->stacks[a];
5041 for (size_t i = 0; i < stack->n; i++)
5043 const struct ctables_nest *nest = &stack->nests[i];
5044 const struct variable *var = nest->vars[nest->n - 1];
5045 const union value *value = case_data (c, var);
5047 if (var_is_numeric (var) && value->f == SYSMIS)
5050 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5052 ctables_value_insert (t, value, var_get_width (var));
5057 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5059 const struct ctables_value *const *ap = a_;
5060 const struct ctables_value *const *bp = b_;
5061 const struct ctables_value *a = *ap;
5062 const struct ctables_value *b = *bp;
5063 const int *width = width_;
5064 return value_compare_3way (&a->value, &b->value, *width);
5068 ctables_sort_clabels_values (struct ctables_table *t)
5070 const struct variable *v0 = t->clabels_example;
5071 int width = var_get_width (v0);
5073 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5076 const struct val_labs *val_labs = var_get_value_labels (v0);
5077 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5078 vl = val_labs_next (val_labs, vl))
5079 if (ctables_categories_match (c0, &vl->value, v0))
5080 ctables_value_insert (t, &vl->value, width);
5083 size_t n = hmap_count (&t->clabels_values_map);
5084 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5086 struct ctables_value *clv;
5088 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5089 t->clabels_values[i++] = clv;
5090 t->n_clabels_values = n;
5093 sort (t->clabels_values, n, sizeof *t->clabels_values,
5094 compare_clabels_values_3way, &width);
5096 for (size_t i = 0; i < n; i++)
5097 t->clabels_values[i]->leaf = i;
5101 ctables_add_category_occurrences (const struct variable *var,
5102 struct hmap *occurrences,
5103 const struct ctables_categories *cats)
5105 const struct val_labs *val_labs = var_get_value_labels (var);
5107 for (size_t i = 0; i < cats->n_cats; i++)
5109 const struct ctables_category *c = &cats->cats[i];
5113 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5119 int width = var_get_width (var);
5121 value_init (&value, width);
5122 value_copy_buf_rpad (&value, width,
5123 CHAR_CAST (uint8_t *, c->string.string),
5124 c->string.length, ' ');
5125 ctables_add_occurrence (var, &value, occurrences);
5126 value_destroy (&value, width);
5131 assert (var_is_numeric (var));
5132 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5133 vl = val_labs_next (val_labs, vl))
5134 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5135 ctables_add_occurrence (var, &vl->value, occurrences);
5139 assert (var_is_alpha (var));
5140 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5141 vl = val_labs_next (val_labs, vl))
5142 if (in_string_range (&vl->value, var, c->srange))
5143 ctables_add_occurrence (var, &vl->value, occurrences);
5147 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5148 vl = val_labs_next (val_labs, vl))
5149 if (var_is_value_missing (var, &vl->value))
5150 ctables_add_occurrence (var, &vl->value, occurrences);
5154 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5155 vl = val_labs_next (val_labs, vl))
5156 ctables_add_occurrence (var, &vl->value, occurrences);
5159 case CCT_POSTCOMPUTE:
5169 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5170 vl = val_labs_next (val_labs, vl))
5171 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5172 ctables_add_occurrence (var, &vl->value, occurrences);
5175 case CCT_EXCLUDED_MISSING:
5182 ctables_section_recurse_add_empty_categories (
5183 struct ctables_section *s,
5184 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5185 enum pivot_axis_type a, size_t a_idx)
5187 if (a >= PIVOT_N_AXES)
5188 ctables_cell_insert__ (s, c, cats);
5189 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5190 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5193 const struct variable *var = s->nests[a]->vars[a_idx];
5194 const struct ctables_categories *categories = s->table->categories[
5195 var_get_dict_index (var)];
5196 int width = var_get_width (var);
5197 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5198 const struct ctables_occurrence *o;
5199 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5201 union value *value = case_data_rw (c, var);
5202 value_destroy (value, width);
5203 value_clone (value, &o->value, width);
5204 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5205 assert (cats[a][a_idx] != NULL);
5206 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5209 for (size_t i = 0; i < categories->n_cats; i++)
5211 const struct ctables_category *cat = &categories->cats[i];
5212 if (cat->type == CCT_POSTCOMPUTE)
5214 cats[a][a_idx] = cat;
5215 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5222 ctables_section_add_empty_categories (struct ctables_section *s)
5224 bool show_empty = false;
5225 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5227 for (size_t k = 0; k < s->nests[a]->n; k++)
5228 if (k != s->nests[a]->scale_idx)
5230 const struct variable *var = s->nests[a]->vars[k];
5231 const struct ctables_categories *cats = s->table->categories[
5232 var_get_dict_index (var)];
5233 if (cats->show_empty)
5236 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5242 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5243 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5244 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5249 ctables_section_clear (struct ctables_section *s)
5251 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5253 const struct ctables_nest *nest = s->nests[a];
5254 for (size_t i = 0; i < nest->n; i++)
5255 if (i != nest->scale_idx)
5257 const struct variable *var = nest->vars[i];
5258 int width = var_get_width (var);
5259 struct ctables_occurrence *o, *next;
5260 struct hmap *map = &s->occurrences[a][i];
5261 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5263 value_destroy (&o->value, width);
5264 hmap_delete (map, &o->node);
5271 struct ctables_cell *cell, *next_cell;
5272 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5274 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5276 const struct ctables_nest *nest = s->nests[a];
5277 for (size_t i = 0; i < nest->n; i++)
5278 if (i != nest->scale_idx)
5279 value_destroy (&cell->axes[a].cvs[i].value,
5280 var_get_width (nest->vars[i]));
5281 free (cell->axes[a].cvs);
5284 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5285 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5286 for (size_t i = 0; i < specs->n; i++)
5287 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5288 free (cell->summaries);
5290 hmap_delete (&s->cells, &cell->node);
5293 hmap_shrink (&s->cells);
5295 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5297 struct ctables_domain *domain, *next_domain;
5298 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5301 free (domain->sums);
5302 hmap_delete (&s->domains[dt], &domain->node);
5305 hmap_shrink (&s->domains[dt]);
5310 ctables_section_uninit (struct ctables_section *s)
5312 ctables_section_clear (s);
5314 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5316 struct ctables_nest *nest = s->nests[a];
5317 for (size_t i = 0; i < nest->n; i++)
5318 hmap_destroy (&s->occurrences[a][i]);
5319 free (s->occurrences[a]);
5322 hmap_destroy (&s->cells);
5323 for (size_t i = 0; i < N_CTDTS; i++)
5324 hmap_destroy (&s->domains[i]);
5328 ctables_table_clear (struct ctables_table *t)
5330 for (size_t i = 0; i < t->n_sections; i++)
5331 ctables_section_clear (&t->sections[i]);
5333 if (t->clabels_example)
5335 int width = var_get_width (t->clabels_example);
5336 struct ctables_value *value, *next_value;
5337 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5338 &t->clabels_values_map)
5340 value_destroy (&value->value, width);
5341 hmap_delete (&t->clabels_values_map, &value->node);
5344 hmap_shrink (&t->clabels_values_map);
5346 free (t->clabels_values);
5347 t->clabels_values = NULL;
5348 t->n_clabels_values = 0;
5353 ctables_execute (struct dataset *ds, struct casereader *input,
5356 for (size_t i = 0; i < ct->n_tables; i++)
5358 struct ctables_table *t = ct->tables[i];
5359 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5360 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5361 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5362 sizeof *t->sections);
5363 size_t ix[PIVOT_N_AXES];
5364 ctables_table_add_section (t, 0, ix);
5367 struct dictionary *dict = dataset_dict (ds);
5368 struct casegrouper *grouper
5369 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5370 ? casegrouper_create_splits (input, dict)
5371 : casegrouper_create_vars (input, NULL, 0));
5372 struct casereader *group;
5373 while (casegrouper_get_next_group (grouper, &group))
5375 /* Output SPLIT FILE variables. */
5376 struct ccase *c = casereader_peek (group, 0);
5379 output_split_file_values (ds, c);
5383 bool warn_on_invalid = true;
5384 for (c = casereader_read (group); c;
5385 case_unref (c), c = casereader_read (group))
5387 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5388 double e_weight = (ct->e_weight
5389 ? var_force_valid_weight (ct->e_weight,
5390 case_num (c, ct->e_weight),
5394 for (size_t i = 0; i < ct->n_tables; i++)
5396 struct ctables_table *t = ct->tables[i];
5398 for (size_t j = 0; j < t->n_sections; j++)
5399 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5401 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5402 if (t->label_axis[a] != a)
5403 ctables_insert_clabels_values (t, c, a);
5406 casereader_destroy (group);
5408 for (size_t i = 0; i < ct->n_tables; i++)
5410 struct ctables_table *t = ct->tables[i];
5412 if (t->clabels_example)
5413 ctables_sort_clabels_values (t);
5415 for (size_t j = 0; j < t->n_sections; j++)
5416 ctables_section_add_empty_categories (&t->sections[j]);
5418 ctables_table_output (ct, t);
5419 ctables_table_clear (t);
5422 return casegrouper_destroy (grouper);
5427 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5428 struct dictionary *);
5431 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5437 case CTPO_CAT_STRING:
5438 ss_dealloc (&e->string);
5441 case CTPO_CAT_SRANGE:
5442 for (size_t i = 0; i < 2; i++)
5443 ss_dealloc (&e->srange[i]);
5452 for (size_t i = 0; i < 2; i++)
5453 ctables_pcexpr_destroy (e->subs[i]);
5457 case CTPO_CAT_NUMBER:
5458 case CTPO_CAT_NRANGE:
5459 case CTPO_CAT_MISSING:
5460 case CTPO_CAT_OTHERNM:
5461 case CTPO_CAT_SUBTOTAL:
5462 case CTPO_CAT_TOTAL:
5466 msg_location_destroy (e->location);
5471 static struct ctables_pcexpr *
5472 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5473 struct ctables_pcexpr *sub0,
5474 struct ctables_pcexpr *sub1)
5476 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5477 *e = (struct ctables_pcexpr) {
5479 .subs = { sub0, sub1 },
5480 .location = msg_location_merged (sub0->location, sub1->location),
5485 /* How to parse an operator. */
5488 enum token_type token;
5489 enum ctables_postcompute_op op;
5492 static const struct operator *
5493 ctables_pcexpr_match_operator (struct lexer *lexer,
5494 const struct operator ops[], size_t n_ops)
5496 for (const struct operator *op = ops; op < ops + n_ops; op++)
5497 if (lex_token (lexer) == op->token)
5499 if (op->token != T_NEG_NUM)
5508 static struct ctables_pcexpr *
5509 ctables_pcexpr_parse_binary_operators__ (
5510 struct lexer *lexer, struct dictionary *dict,
5511 const struct operator ops[], size_t n_ops,
5512 parse_recursively_func *parse_next_level,
5513 const char *chain_warning, struct ctables_pcexpr *lhs)
5515 for (int op_count = 0; ; op_count++)
5517 const struct operator *op
5518 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5521 if (op_count > 1 && chain_warning)
5522 msg_at (SW, lhs->location, "%s", chain_warning);
5527 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5530 ctables_pcexpr_destroy (lhs);
5534 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5538 static struct ctables_pcexpr *
5539 ctables_pcexpr_parse_binary_operators (
5540 struct lexer *lexer, struct dictionary *dict,
5541 const struct operator ops[], size_t n_ops,
5542 parse_recursively_func *parse_next_level, const char *chain_warning)
5544 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5548 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5550 chain_warning, lhs);
5553 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5554 struct dictionary *);
5556 static struct ctables_pcexpr
5557 ctpo_cat_nrange (double low, double high)
5559 return (struct ctables_pcexpr) {
5560 .op = CTPO_CAT_NRANGE,
5561 .nrange = { low, high },
5565 static struct ctables_pcexpr
5566 ctpo_cat_srange (struct substring low, struct substring high)
5568 return (struct ctables_pcexpr) {
5569 .op = CTPO_CAT_SRANGE,
5570 .srange = { low, high },
5574 static struct ctables_pcexpr *
5575 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5577 int start_ofs = lex_ofs (lexer);
5578 struct ctables_pcexpr e;
5579 if (lex_is_number (lexer))
5581 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5582 .number = lex_number (lexer) };
5585 else if (lex_match_id (lexer, "MISSING"))
5586 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5587 else if (lex_match_id (lexer, "OTHERNM"))
5588 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5589 else if (lex_match_id (lexer, "TOTAL"))
5590 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5591 else if (lex_match_id (lexer, "SUBTOTAL"))
5593 size_t subtotal_index = 0;
5594 if (lex_match (lexer, T_LBRACK))
5596 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5598 subtotal_index = lex_integer (lexer);
5600 if (!lex_force_match (lexer, T_RBRACK))
5603 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5604 .subtotal_index = subtotal_index };
5606 else if (lex_match (lexer, T_LBRACK))
5608 if (lex_match_id (lexer, "LO"))
5610 if (!lex_force_match_id (lexer, "THRU"))
5613 if (lex_is_string (lexer))
5615 struct substring low = { .string = NULL };
5616 struct substring high = parse_substring (lexer, dict);
5617 e = ctpo_cat_srange (low, high);
5621 if (!lex_force_num (lexer))
5623 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5627 else if (lex_is_number (lexer))
5629 double number = lex_number (lexer);
5631 if (lex_match_id (lexer, "THRU"))
5633 if (lex_match_id (lexer, "HI"))
5634 e = ctpo_cat_nrange (number, DBL_MAX);
5637 if (!lex_force_num (lexer))
5639 e = ctpo_cat_nrange (number, lex_number (lexer));
5644 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5647 else if (lex_is_string (lexer))
5649 struct substring s = parse_substring (lexer, dict);
5651 if (lex_match_id (lexer, "THRU"))
5653 struct substring high;
5655 if (lex_match_id (lexer, "HI"))
5656 high = (struct substring) { .string = NULL };
5659 if (!lex_force_string (lexer))
5664 high = parse_substring (lexer, dict);
5667 e = ctpo_cat_srange (s, high);
5670 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5674 lex_error (lexer, NULL);
5678 if (!lex_force_match (lexer, T_RBRACK))
5680 if (e.op == CTPO_CAT_STRING)
5681 ss_dealloc (&e.string);
5682 else if (e.op == CTPO_CAT_SRANGE)
5684 ss_dealloc (&e.srange[0]);
5685 ss_dealloc (&e.srange[1]);
5690 else if (lex_match (lexer, T_LPAREN))
5692 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5695 if (!lex_force_match (lexer, T_RPAREN))
5697 ctables_pcexpr_destroy (ep);
5704 lex_error (lexer, NULL);
5708 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5709 return xmemdup (&e, sizeof e);
5712 static struct ctables_pcexpr *
5713 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5714 struct lexer *lexer, int start_ofs)
5716 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5717 *e = (struct ctables_pcexpr) {
5720 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5725 static struct ctables_pcexpr *
5726 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5728 static const struct operator op = { T_EXP, CTPO_POW };
5730 const char *chain_warning =
5731 _("The exponentiation operator (`**') is left-associative: "
5732 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5733 "To disable this warning, insert parentheses.");
5735 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5736 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5737 ctables_pcexpr_parse_primary,
5740 /* Special case for situations like "-5**6", which must be parsed as
5743 int start_ofs = lex_ofs (lexer);
5744 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5745 *lhs = (struct ctables_pcexpr) {
5746 .op = CTPO_CONSTANT,
5747 .number = -lex_tokval (lexer),
5748 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5752 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5753 lexer, dict, &op, 1,
5754 ctables_pcexpr_parse_primary, chain_warning, lhs);
5758 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5761 /* Parses the unary minus level. */
5762 static struct ctables_pcexpr *
5763 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5765 int start_ofs = lex_ofs (lexer);
5766 if (!lex_match (lexer, T_DASH))
5767 return ctables_pcexpr_parse_exp (lexer, dict);
5769 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5773 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5776 /* Parses the multiplication and division level. */
5777 static struct ctables_pcexpr *
5778 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5780 static const struct operator ops[] =
5782 { T_ASTERISK, CTPO_MUL },
5783 { T_SLASH, CTPO_DIV },
5786 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5787 sizeof ops / sizeof *ops,
5788 ctables_pcexpr_parse_neg, NULL);
5791 /* Parses the addition and subtraction level. */
5792 static struct ctables_pcexpr *
5793 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5795 static const struct operator ops[] =
5797 { T_PLUS, CTPO_ADD },
5798 { T_DASH, CTPO_SUB },
5799 { T_NEG_NUM, CTPO_ADD },
5802 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5803 ops, sizeof ops / sizeof *ops,
5804 ctables_pcexpr_parse_mul, NULL);
5807 static struct ctables_postcompute *
5808 ctables_find_postcompute (struct ctables *ct, const char *name)
5810 struct ctables_postcompute *pc;
5811 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5812 utf8_hash_case_string (name, 0), &ct->postcomputes)
5813 if (!utf8_strcasecmp (pc->name, name))
5819 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5822 int pcompute_start = lex_ofs (lexer) - 1;
5824 if (!lex_match (lexer, T_AND))
5826 lex_error_expecting (lexer, "&");
5829 if (!lex_force_id (lexer))
5832 char *name = ss_xstrdup (lex_tokss (lexer));
5835 if (!lex_force_match (lexer, T_EQUALS)
5836 || !lex_force_match_id (lexer, "EXPR")
5837 || !lex_force_match (lexer, T_LPAREN))
5843 int expr_start = lex_ofs (lexer);
5844 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5845 int expr_end = lex_ofs (lexer) - 1;
5846 if (!expr || !lex_force_match (lexer, T_RPAREN))
5848 ctables_pcexpr_destroy (expr);
5852 int pcompute_end = lex_ofs (lexer) - 1;
5854 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5857 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5860 msg_at (SW, location, _("New definition of &%s will override the "
5861 "previous definition."),
5863 msg_at (SN, pc->location, _("This is the previous definition."));
5865 ctables_pcexpr_destroy (pc->expr);
5866 msg_location_destroy (pc->location);
5871 pc = xmalloc (sizeof *pc);
5872 *pc = (struct ctables_postcompute) { .name = name };
5873 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5874 utf8_hash_case_string (pc->name, 0));
5877 pc->location = location;
5879 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5884 ctables_parse_pproperties_format (struct lexer *lexer,
5885 struct ctables_summary_spec_set *sss)
5887 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5889 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5890 && !(lex_token (lexer) == T_ID
5891 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5892 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5893 lex_tokss (lexer)))))
5895 /* Parse function. */
5896 enum ctables_summary_function function;
5897 if (!parse_ctables_summary_function (lexer, &function))
5900 /* Parse percentile. */
5901 double percentile = 0;
5902 if (function == CTSF_PTILE)
5904 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5906 percentile = lex_number (lexer);
5911 struct fmt_spec format;
5912 bool is_ctables_format;
5913 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5916 if (sss->n >= sss->allocated)
5917 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5918 sizeof *sss->specs);
5919 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5920 .function = function,
5921 .percentile = percentile,
5923 .is_ctables_format = is_ctables_format,
5929 ctables_summary_spec_set_uninit (sss);
5934 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5936 struct ctables_postcompute **pcs = NULL;
5938 size_t allocated_pcs = 0;
5940 while (lex_match (lexer, T_AND))
5942 if (!lex_force_id (lexer))
5944 struct ctables_postcompute *pc
5945 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5948 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5953 if (n_pcs >= allocated_pcs)
5954 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5958 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5960 if (lex_match_id (lexer, "LABEL"))
5962 lex_match (lexer, T_EQUALS);
5963 if (!lex_force_string (lexer))
5966 for (size_t i = 0; i < n_pcs; i++)
5968 free (pcs[i]->label);
5969 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5974 else if (lex_match_id (lexer, "FORMAT"))
5976 lex_match (lexer, T_EQUALS);
5978 struct ctables_summary_spec_set sss;
5979 if (!ctables_parse_pproperties_format (lexer, &sss))
5982 for (size_t i = 0; i < n_pcs; i++)
5985 ctables_summary_spec_set_uninit (pcs[i]->specs);
5987 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5988 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5990 ctables_summary_spec_set_uninit (&sss);
5992 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5994 lex_match (lexer, T_EQUALS);
5995 bool hide_source_cats;
5996 if (!parse_bool (lexer, &hide_source_cats))
5998 for (size_t i = 0; i < n_pcs; i++)
5999 pcs[i]->hide_source_cats = hide_source_cats;
6003 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6016 put_strftime (struct string *out, time_t now, const char *format)
6018 const struct tm *tm = localtime (&now);
6020 strftime (value, sizeof value, format, tm);
6021 ds_put_cstr (out, value);
6025 skip_prefix (struct substring *s, struct substring prefix)
6027 if (ss_starts_with (*s, prefix))
6029 ss_advance (s, prefix.length);
6037 put_table_expression (struct string *out, struct lexer *lexer,
6038 struct dictionary *dict, int expr_start, int expr_end)
6041 for (int ofs = expr_start; ofs < expr_end; ofs++)
6043 const struct token *t = lex_ofs_token (lexer, ofs);
6044 if (t->type == T_LBRACK)
6046 else if (t->type == T_RBRACK && nest > 0)
6052 else if (t->type == T_ID)
6054 const struct variable *var
6055 = dict_lookup_var (dict, t->string.string);
6056 const char *label = var ? var_get_label (var) : NULL;
6057 ds_put_cstr (out, label ? label : t->string.string);
6061 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6062 ds_put_byte (out, ' ');
6064 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6065 ds_put_cstr (out, repr);
6068 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6069 ds_put_byte (out, ' ');
6075 put_title_text (struct string *out, struct substring in, time_t now,
6076 struct lexer *lexer, struct dictionary *dict,
6077 int expr_start, int expr_end)
6081 size_t chunk = ss_find_byte (in, ')');
6082 ds_put_substring (out, ss_head (in, chunk));
6083 ss_advance (&in, chunk);
6084 if (ss_is_empty (in))
6087 if (skip_prefix (&in, ss_cstr (")DATE")))
6088 put_strftime (out, now, "%x");
6089 else if (skip_prefix (&in, ss_cstr (")TIME")))
6090 put_strftime (out, now, "%X");
6091 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6092 put_table_expression (out, lexer, dict, expr_start, expr_end);
6095 ds_put_byte (out, ')');
6096 ss_advance (&in, 1);
6102 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6104 struct casereader *input = NULL;
6106 struct measure_guesser *mg = measure_guesser_create (ds);
6109 input = proc_open (ds);
6110 measure_guesser_run (mg, input);
6111 measure_guesser_destroy (mg);
6114 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6115 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6116 enum settings_value_show tvars = settings_get_show_variables ();
6117 for (size_t i = 0; i < n_vars; i++)
6118 vlabels[i] = (enum ctables_vlabel) tvars;
6120 struct pivot_table_look *look = pivot_table_look_unshare (
6121 pivot_table_look_ref (pivot_table_look_get_default ()));
6122 look->omit_empty = false;
6124 struct ctables *ct = xmalloc (sizeof *ct);
6125 *ct = (struct ctables) {
6126 .dict = dataset_dict (ds),
6128 .ctables_formats = FMT_SETTINGS_INIT,
6130 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6133 time_t now = time (NULL);
6138 const char *dot_string;
6139 const char *comma_string;
6141 static const struct ctf ctfs[4] = {
6142 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6143 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6144 { CTEF_PAREN, "-,(,),", "-.(.)." },
6145 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6147 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6148 for (size_t i = 0; i < 4; i++)
6150 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6151 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6152 fmt_number_style_from_string (s));
6155 if (!lex_force_match (lexer, T_SLASH))
6158 while (!lex_match_id (lexer, "TABLE"))
6160 if (lex_match_id (lexer, "FORMAT"))
6162 double widths[2] = { SYSMIS, SYSMIS };
6163 double units_per_inch = 72.0;
6165 while (lex_token (lexer) != T_SLASH)
6167 if (lex_match_id (lexer, "MINCOLWIDTH"))
6169 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6172 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6174 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6177 else if (lex_match_id (lexer, "UNITS"))
6179 lex_match (lexer, T_EQUALS);
6180 if (lex_match_id (lexer, "POINTS"))
6181 units_per_inch = 72.0;
6182 else if (lex_match_id (lexer, "INCHES"))
6183 units_per_inch = 1.0;
6184 else if (lex_match_id (lexer, "CM"))
6185 units_per_inch = 2.54;
6188 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6192 else if (lex_match_id (lexer, "EMPTY"))
6197 lex_match (lexer, T_EQUALS);
6198 if (lex_match_id (lexer, "ZERO"))
6200 /* Nothing to do. */
6202 else if (lex_match_id (lexer, "BLANK"))
6203 ct->zero = xstrdup ("");
6204 else if (lex_force_string (lexer))
6206 ct->zero = ss_xstrdup (lex_tokss (lexer));
6212 else if (lex_match_id (lexer, "MISSING"))
6214 lex_match (lexer, T_EQUALS);
6215 if (!lex_force_string (lexer))
6219 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6220 ? ss_xstrdup (lex_tokss (lexer))
6226 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6227 "UNITS", "EMPTY", "MISSING");
6232 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6233 && widths[0] > widths[1])
6235 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6239 for (size_t i = 0; i < 2; i++)
6240 if (widths[i] != SYSMIS)
6242 int *wr = ct->look->width_ranges[TABLE_HORZ];
6243 wr[i] = widths[i] / units_per_inch * 96.0;
6248 else if (lex_match_id (lexer, "VLABELS"))
6250 if (!lex_force_match_id (lexer, "VARIABLES"))
6252 lex_match (lexer, T_EQUALS);
6254 struct variable **vars;
6256 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6260 if (!lex_force_match_id (lexer, "DISPLAY"))
6265 lex_match (lexer, T_EQUALS);
6267 enum ctables_vlabel vlabel;
6268 if (lex_match_id (lexer, "DEFAULT"))
6269 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6270 else if (lex_match_id (lexer, "NAME"))
6272 else if (lex_match_id (lexer, "LABEL"))
6273 vlabel = CTVL_LABEL;
6274 else if (lex_match_id (lexer, "BOTH"))
6276 else if (lex_match_id (lexer, "NONE"))
6280 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6286 for (size_t i = 0; i < n_vars; i++)
6287 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6290 else if (lex_match_id (lexer, "MRSETS"))
6292 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6294 lex_match (lexer, T_EQUALS);
6295 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6298 else if (lex_match_id (lexer, "SMISSING"))
6300 if (lex_match_id (lexer, "VARIABLE"))
6301 ct->smissing_listwise = false;
6302 else if (lex_match_id (lexer, "LISTWISE"))
6303 ct->smissing_listwise = true;
6306 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6310 else if (lex_match_id (lexer, "PCOMPUTE"))
6312 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6315 else if (lex_match_id (lexer, "PPROPERTIES"))
6317 if (!ctables_parse_pproperties (lexer, ct))
6320 else if (lex_match_id (lexer, "WEIGHT"))
6322 if (!lex_force_match_id (lexer, "VARIABLE"))
6324 lex_match (lexer, T_EQUALS);
6325 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6329 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6331 if (lex_match_id (lexer, "COUNT"))
6333 lex_match (lexer, T_EQUALS);
6334 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6337 ct->hide_threshold = lex_integer (lexer);
6340 else if (ct->hide_threshold == 0)
6341 ct->hide_threshold = 5;
6345 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6346 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6347 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6351 if (!lex_force_match (lexer, T_SLASH))
6355 size_t allocated_tables = 0;
6358 if (ct->n_tables >= allocated_tables)
6359 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6360 sizeof *ct->tables);
6362 struct ctables_category *cat = xmalloc (sizeof *cat);
6363 *cat = (struct ctables_category) {
6365 .include_missing = false,
6366 .sort_ascending = true,
6369 struct ctables_categories *c = xmalloc (sizeof *c);
6370 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6371 *c = (struct ctables_categories) {
6378 struct ctables_categories **categories = xnmalloc (n_vars,
6379 sizeof *categories);
6380 for (size_t i = 0; i < n_vars; i++)
6383 struct ctables_table *t = xmalloc (sizeof *t);
6384 *t = (struct ctables_table) {
6386 .slabels_axis = PIVOT_AXIS_COLUMN,
6387 .slabels_visible = true,
6388 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6390 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6391 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6392 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6394 .clabels_from_axis = PIVOT_AXIS_LAYER,
6395 .categories = categories,
6396 .n_categories = n_vars,
6399 ct->tables[ct->n_tables++] = t;
6401 lex_match (lexer, T_EQUALS);
6402 int expr_start = lex_ofs (lexer);
6403 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6405 if (lex_match (lexer, T_BY))
6407 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6408 ct, t, PIVOT_AXIS_COLUMN))
6411 if (lex_match (lexer, T_BY))
6413 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6414 ct, t, PIVOT_AXIS_LAYER))
6418 int expr_end = lex_ofs (lexer);
6420 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6421 && !t->axes[PIVOT_AXIS_LAYER])
6423 lex_error (lexer, _("At least one variable must be specified."));
6427 const struct ctables_axis *scales[PIVOT_N_AXES];
6428 size_t n_scales = 0;
6429 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6431 scales[a] = find_scale (t->axes[a]);
6437 msg (SE, _("Scale variables may appear only on one axis."));
6438 if (scales[PIVOT_AXIS_ROW])
6439 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6440 _("This scale variable appears on the rows axis."));
6441 if (scales[PIVOT_AXIS_COLUMN])
6442 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6443 _("This scale variable appears on the columns axis."));
6444 if (scales[PIVOT_AXIS_LAYER])
6445 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6446 _("This scale variable appears on the layer axis."));
6450 const struct ctables_axis *summaries[PIVOT_N_AXES];
6451 size_t n_summaries = 0;
6452 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6454 summaries[a] = (scales[a]
6456 : find_categorical_summary_spec (t->axes[a]));
6460 if (n_summaries > 1)
6462 msg (SE, _("Summaries may appear only on one axis."));
6463 if (summaries[PIVOT_AXIS_ROW])
6464 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
6465 _("This variable on the rows axis has a summary."));
6466 if (summaries[PIVOT_AXIS_COLUMN])
6467 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
6468 _("This variable on the columns axis has a summary."));
6469 if (summaries[PIVOT_AXIS_LAYER])
6470 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
6471 _("This variable on the layers axis has a summary."));
6474 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6475 if (n_summaries ? summaries[a] : t->axes[a])
6477 t->summary_axis = a;
6481 if (lex_token (lexer) == T_ENDCMD)
6483 if (!ctables_prepare_table (t))
6487 if (!lex_force_match (lexer, T_SLASH))
6490 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6492 if (lex_match_id (lexer, "SLABELS"))
6494 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6496 if (lex_match_id (lexer, "POSITION"))
6498 lex_match (lexer, T_EQUALS);
6499 if (lex_match_id (lexer, "COLUMN"))
6500 t->slabels_axis = PIVOT_AXIS_COLUMN;
6501 else if (lex_match_id (lexer, "ROW"))
6502 t->slabels_axis = PIVOT_AXIS_ROW;
6503 else if (lex_match_id (lexer, "LAYER"))
6504 t->slabels_axis = PIVOT_AXIS_LAYER;
6507 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6511 else if (lex_match_id (lexer, "VISIBLE"))
6513 lex_match (lexer, T_EQUALS);
6514 if (!parse_bool (lexer, &t->slabels_visible))
6519 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6524 else if (lex_match_id (lexer, "CLABELS"))
6526 if (lex_match_id (lexer, "AUTO"))
6528 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6529 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6531 else if (lex_match_id (lexer, "ROWLABELS"))
6533 lex_match (lexer, T_EQUALS);
6534 if (lex_match_id (lexer, "OPPOSITE"))
6535 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6536 else if (lex_match_id (lexer, "LAYER"))
6537 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6540 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6544 else if (lex_match_id (lexer, "COLLABELS"))
6546 lex_match (lexer, T_EQUALS);
6547 if (lex_match_id (lexer, "OPPOSITE"))
6548 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6549 else if (lex_match_id (lexer, "LAYER"))
6550 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6553 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6559 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6564 else if (lex_match_id (lexer, "CRITERIA"))
6566 if (!lex_force_match_id (lexer, "CILEVEL"))
6568 lex_match (lexer, T_EQUALS);
6570 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6572 t->cilevel = lex_number (lexer);
6575 else if (lex_match_id (lexer, "CATEGORIES"))
6577 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6581 else if (lex_match_id (lexer, "TITLES"))
6586 if (lex_match_id (lexer, "CAPTION"))
6587 textp = &t->caption;
6588 else if (lex_match_id (lexer, "CORNER"))
6590 else if (lex_match_id (lexer, "TITLE"))
6594 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6597 lex_match (lexer, T_EQUALS);
6599 struct string s = DS_EMPTY_INITIALIZER;
6600 while (lex_is_string (lexer))
6602 if (!ds_is_empty (&s))
6603 ds_put_byte (&s, ' ');
6604 put_title_text (&s, lex_tokss (lexer), now,
6605 lexer, dataset_dict (ds),
6606 expr_start, expr_end);
6610 *textp = ds_steal_cstr (&s);
6612 while (lex_token (lexer) != T_SLASH
6613 && lex_token (lexer) != T_ENDCMD);
6615 else if (lex_match_id (lexer, "SIGTEST"))
6619 t->chisq = xmalloc (sizeof *t->chisq);
6620 *t->chisq = (struct ctables_chisq) {
6622 .include_mrsets = true,
6623 .all_visible = true,
6629 if (lex_match_id (lexer, "TYPE"))
6631 lex_match (lexer, T_EQUALS);
6632 if (!lex_force_match_id (lexer, "CHISQUARE"))
6635 else if (lex_match_id (lexer, "ALPHA"))
6637 lex_match (lexer, T_EQUALS);
6638 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6640 t->chisq->alpha = lex_number (lexer);
6643 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6645 lex_match (lexer, T_EQUALS);
6646 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6649 else if (lex_match_id (lexer, "CATEGORIES"))
6651 lex_match (lexer, T_EQUALS);
6652 if (lex_match_id (lexer, "ALLVISIBLE"))
6653 t->chisq->all_visible = true;
6654 else if (lex_match_id (lexer, "SUBTOTALS"))
6655 t->chisq->all_visible = false;
6658 lex_error_expecting (lexer,
6659 "ALLVISIBLE", "SUBTOTALS");
6665 lex_error_expecting (lexer, "TYPE", "ALPHA",
6666 "INCLUDEMRSETS", "CATEGORIES");
6670 while (lex_token (lexer) != T_SLASH
6671 && lex_token (lexer) != T_ENDCMD);
6673 else if (lex_match_id (lexer, "COMPARETEST"))
6677 t->pairwise = xmalloc (sizeof *t->pairwise);
6678 *t->pairwise = (struct ctables_pairwise) {
6680 .alpha = { .05, .05 },
6681 .adjust = BONFERRONI,
6682 .include_mrsets = true,
6683 .meansvariance_allcats = true,
6684 .all_visible = true,
6693 if (lex_match_id (lexer, "TYPE"))
6695 lex_match (lexer, T_EQUALS);
6696 if (lex_match_id (lexer, "PROP"))
6697 t->pairwise->type = PROP;
6698 else if (lex_match_id (lexer, "MEAN"))
6699 t->pairwise->type = MEAN;
6702 lex_error_expecting (lexer, "PROP", "MEAN");
6706 else if (lex_match_id (lexer, "ALPHA"))
6708 lex_match (lexer, T_EQUALS);
6710 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6712 double a0 = lex_number (lexer);
6715 lex_match (lexer, T_COMMA);
6716 if (lex_is_number (lexer))
6718 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6720 double a1 = lex_number (lexer);
6723 t->pairwise->alpha[0] = MIN (a0, a1);
6724 t->pairwise->alpha[1] = MAX (a0, a1);
6727 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6729 else if (lex_match_id (lexer, "ADJUST"))
6731 lex_match (lexer, T_EQUALS);
6732 if (lex_match_id (lexer, "BONFERRONI"))
6733 t->pairwise->adjust = BONFERRONI;
6734 else if (lex_match_id (lexer, "BH"))
6735 t->pairwise->adjust = BH;
6736 else if (lex_match_id (lexer, "NONE"))
6737 t->pairwise->adjust = 0;
6740 lex_error_expecting (lexer, "BONFERRONI", "BH",
6745 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6747 lex_match (lexer, T_EQUALS);
6748 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6751 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6753 lex_match (lexer, T_EQUALS);
6754 if (lex_match_id (lexer, "ALLCATS"))
6755 t->pairwise->meansvariance_allcats = true;
6756 else if (lex_match_id (lexer, "TESTEDCATS"))
6757 t->pairwise->meansvariance_allcats = false;
6760 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6764 else if (lex_match_id (lexer, "CATEGORIES"))
6766 lex_match (lexer, T_EQUALS);
6767 if (lex_match_id (lexer, "ALLVISIBLE"))
6768 t->pairwise->all_visible = true;
6769 else if (lex_match_id (lexer, "SUBTOTALS"))
6770 t->pairwise->all_visible = false;
6773 lex_error_expecting (lexer, "ALLVISIBLE",
6778 else if (lex_match_id (lexer, "MERGE"))
6780 lex_match (lexer, T_EQUALS);
6781 if (!parse_bool (lexer, &t->pairwise->merge))
6784 else if (lex_match_id (lexer, "STYLE"))
6786 lex_match (lexer, T_EQUALS);
6787 if (lex_match_id (lexer, "APA"))
6788 t->pairwise->apa_style = true;
6789 else if (lex_match_id (lexer, "SIMPLE"))
6790 t->pairwise->apa_style = false;
6793 lex_error_expecting (lexer, "APA", "SIMPLE");
6797 else if (lex_match_id (lexer, "SHOWSIG"))
6799 lex_match (lexer, T_EQUALS);
6800 if (!parse_bool (lexer, &t->pairwise->show_sig))
6805 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6806 "INCLUDEMRSETS", "MEANSVARIANCE",
6807 "CATEGORIES", "MERGE", "STYLE",
6812 while (lex_token (lexer) != T_SLASH
6813 && lex_token (lexer) != T_ENDCMD);
6817 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6818 "CRITERIA", "CATEGORIES", "TITLES",
6819 "SIGTEST", "COMPARETEST");
6823 if (!lex_match (lexer, T_SLASH))
6827 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6828 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6830 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6834 if (!ctables_prepare_table (t))
6837 while (lex_token (lexer) != T_ENDCMD);
6840 input = proc_open (ds);
6841 bool ok = ctables_execute (ds, input, ct);
6842 ok = proc_commit (ds) && ok;
6844 ctables_destroy (ct);
6845 return ok ? CMD_SUCCESS : CMD_FAILURE;
6850 ctables_destroy (ct);