1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 /* All variables. */ \
74 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
75 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
76 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
94 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
95 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
96 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
98 /* All variables (unweighted.) */ \
99 S(CTSF_UCOUNT, "UCOUNT", N_("Unweighted Count"), CTF_COUNT, CTFA_ALL) \
100 S(CTSF_UROWPCT_COUNT, "UROWPCT.COUNT", N_("Unweighted Row %"), CTF_PERCENT, CTFA_ALL) \
101 S(CTSF_UCOLPCT_COUNT, "UCOLPCT.COUNT", N_("Unweighted Column %"), CTF_PERCENT, CTFA_ALL) \
102 S(CTSF_UTABLEPCT_COUNT, "UTABLEPCT.COUNT", N_("Unweighted Table %"), CTF_PERCENT, CTFA_ALL) \
103 S(CTSF_USUBTABLEPCT_COUNT, "USUBTABLEPCT.COUNT", N_("Unweighted Subtable %"), CTF_PERCENT, CTFA_ALL) \
104 S(CTSF_ULAYERPCT_COUNT, "ULAYERPCT.COUNT", N_("Unweighted Layer %"), CTF_PERCENT, CTFA_ALL) \
105 S(CTSF_ULAYERROWPCT_COUNT, "ULAYERROWPCT.COUNT", N_("Unweighted Layer Row %"), CTF_PERCENT, CTFA_ALL) \
106 S(CTSF_ULAYERCOLPCT_COUNT, "ULAYERCOLPCT.COUNT", N_("Unweighted Layer Column %"), CTF_PERCENT, CTFA_ALL) \
107 S(CTSF_UROWPCT_VALIDN, "UROWPCT.VALIDN", N_("Unweighted Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
108 S(CTSF_UCOLPCT_VALIDN, "UCOLPCT.VALIDN", N_("Unweighted Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
109 S(CTSF_UTABLEPCT_VALIDN, "UTABLEPCT.VALIDN", N_("Unweighted Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
110 S(CTSF_USUBTABLEPCT_VALIDN, "USUBTABLEPCT.VALIDN", N_("Unweighted Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
111 S(CTSF_ULAYERPCT_VALIDN, "ULAYERPCT.VALIDN", N_("Unweighted Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
112 S(CTSF_ULAYERROWPCT_VALIDN, "ULAYERROWPCT.VALIDN", N_("Unweighted Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
113 S(CTSF_ULAYERCOLPCT_VALIDN, "ULAYERCOLPCT.VALIDN", N_("Unweighted Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
114 S(CTSF_UROWPCT_TOTALN, "UROWPCT.TOTALN", N_("Unweighted Row Total N %"), CTF_PERCENT, CTFA_ALL) \
115 S(CTSF_UCOLPCT_TOTALN, "UCOLPCT.TOTALN", N_("Unweighted Column Total N %"), CTF_PERCENT, CTFA_ALL) \
116 S(CTSF_UTABLEPCT_TOTALN, "UTABLEPCT.TOTALN", N_("Unweighted Table Total N %"), CTF_PERCENT, CTFA_ALL) \
117 S(CTSF_USUBTABLEPCT_TOTALN, "USUBTABLEPCT.TOTALN", N_("Unweighted Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
118 S(CTSF_ULAYERPCT_TOTALN, "ULAYERPCT.TOTALN", N_("Unweighted Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
119 S(CTSF_ULAYERROWPCT_TOTALN, "ULAYERROWPCT.TOTALN", N_("Unweighted Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
120 S(CTSF_ULAYERCOLPCT_TOTALN, "ULAYERCOLPCT.TOTALN", N_("Unweighted Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
122 /* Scale variables, totals, and subtotals. */ \
123 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
124 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
125 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
126 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
127 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
128 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
129 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
130 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
131 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
132 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
133 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
134 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
135 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
136 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
137 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
138 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
139 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
140 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
141 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
142 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
143 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
144 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
145 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
147 /* Scale variables, totals, and subtotals (unweighted). */ \
148 S(CTSF_UMEAN, "UMEAN", N_("Unweighted Mean"), CTF_GENERAL, CTFA_SCALE) \
149 S(CTSF_UMEDIAN, "UMEDIAN", N_("Unweighted Median"), CTF_GENERAL, CTFA_SCALE) \
150 S(CTSF_UMISSING, "UMISSING", N_("Unweighted Missing"), CTF_GENERAL, CTFA_SCALE) \
151 S(CTSF_UMODE, "UMODE", N_("Unweighted Mode"), CTF_GENERAL, CTFA_SCALE) \
152 S(CTSF_UPTILE, "UPTILE", N_("Unweighted Percentile"), CTF_GENERAL, CTFA_SCALE) \
153 S(CTSF_USEMEAN, "USEMEAN", N_("Unweighted Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
154 S(CTSF_USTDDEV, "USTDDEV", N_("Unweighted Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
155 S(CTSF_USUM, "USUM", N_("Unweighted Sum"), CTF_GENERAL, CTFA_SCALE) \
156 S(CSTF_UTOTALN, "UTOTALN", N_("Unweighted Total N"), CTF_COUNT, CTFA_SCALE) \
157 S(CTSF_UVALIDN, "UVALIDN", N_("Unweighted Valid N"), CTF_COUNT, CTFA_SCALE) \
158 S(CTSF_UVARIANCE, "UVARIANCE", N_("Unweighted Variance"), CTF_GENERAL, CTFA_SCALE) \
159 S(CTSF_UROWPCT_SUM, "UROWPCT.SUM", N_("Unweighted Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
160 S(CTSF_UCOLPCT_SUM, "UCOLPCT.SUM", N_("Unweighted Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
161 S(CTSF_UTABLEPCT_SUM, "UTABLEPCT.SUM", N_("Unweighted Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
162 S(CTSF_USUBTABLEPCT_SUM, "USUBTABLEPCT.SUM", N_("Unweighted Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
163 S(CTSF_ULAYERPCT_SUM, "ULAYERPCT.SUM", N_("Unweighted Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
164 S(CTSF_ULAYERROWPCT_SUM, "ULAYERROWPCT.SUM", N_("Unweighted Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
165 S(CTSF_ULAYERCOLPCT_SUM, "ULAYERCOLPCT.SUM", N_("Unweighted Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
167 #if 0 /* Multiple response sets not yet implemented. */
168 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
169 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
170 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
171 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
172 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
173 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
174 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
175 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
176 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
177 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
178 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
179 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
180 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
181 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
182 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
183 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
184 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
185 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
186 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
187 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
188 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
189 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
192 enum ctables_summary_function
194 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
200 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
201 N_CTSF_FUNCTIONS = SUMMARIES
205 static bool ctables_summary_function_is_count (enum ctables_summary_function);
207 enum ctables_domain_type
209 /* Within a section, where stacked variables divide one section from
211 CTDT_TABLE, /* All layers of a whole section. */
212 CTDT_LAYER, /* One layer within a section. */
213 CTDT_LAYERROW, /* Row in one layer within a section. */
214 CTDT_LAYERCOL, /* Column in one layer within a section. */
216 /* Within a subtable, where a subtable pairs an innermost row variable with
217 an innermost column variable within a single layer. */
218 CTDT_SUBTABLE, /* Whole subtable. */
219 CTDT_ROW, /* Row within a subtable. */
220 CTDT_COL, /* Column within a subtable. */
224 struct ctables_domain
226 struct hmap_node node;
228 const struct ctables_cell *example;
230 double d_valid; /* Dictionary weight. */
233 double e_valid; /* Effective weight */
236 double u_valid; /* Unweighted. */
239 struct ctables_sum *sums;
248 enum ctables_summary_variant
257 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
258 all the axes (except the scalar variable, if any). */
259 struct hmap_node node;
261 /* The domains that contain this cell. */
262 uint32_t omit_domains;
263 struct ctables_domain *domains[N_CTDTS];
268 enum ctables_summary_variant sv;
270 struct ctables_cell_axis
272 struct ctables_cell_value
274 const struct ctables_category *category;
282 union ctables_summary *summaries;
289 const struct dictionary *dict;
290 struct pivot_table_look *look;
292 /* CTABLES has a number of extra formats that we implement via custom
293 currency specifications on an alternate fmt_settings. */
294 #define CTEF_NEGPAREN FMT_CCA
295 #define CTEF_NEQUAL FMT_CCB
296 #define CTEF_PAREN FMT_CCC
297 #define CTEF_PCTPAREN FMT_CCD
298 struct fmt_settings ctables_formats;
300 /* If this is NULL, zeros are displayed using the normal print format.
301 Otherwise, this string is displayed. */
304 /* If this is NULL, missing values are displayed using the normal print
305 format. Otherwise, this string is displayed. */
308 /* Indexed by variable dictionary index. */
309 enum ctables_vlabel *vlabels;
311 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
313 bool mrsets_count_duplicates; /* MRSETS. */
314 bool smissing_listwise; /* SMISSING. */
315 struct variable *e_weight; /* WEIGHT. */
316 int hide_threshold; /* HIDESMALLCOUNTS. */
318 struct ctables_table **tables;
322 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
325 struct ctables_postcompute
327 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
328 char *name; /* Name, without leading &. */
330 struct msg_location *location; /* Location of definition. */
331 struct ctables_pcexpr *expr;
333 struct ctables_summary_spec_set *specs;
334 bool hide_source_cats;
337 struct ctables_pcexpr
347 enum ctables_postcompute_op
350 CTPO_CONSTANT, /* 5 */
351 CTPO_CAT_NUMBER, /* [5] */
352 CTPO_CAT_STRING, /* ["STRING"] */
353 CTPO_CAT_NRANGE, /* [LO THRU 5] */
354 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
355 CTPO_CAT_MISSING, /* MISSING */
356 CTPO_CAT_OTHERNM, /* OTHERNM */
357 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
358 CTPO_CAT_TOTAL, /* TOTAL */
372 /* CTPO_CAT_NUMBER. */
375 /* CTPO_CAT_STRING, in dictionary encoding. */
376 struct substring string;
378 /* CTPO_CAT_NRANGE. */
381 /* CTPO_CAT_SRANGE. */
382 struct substring srange[2];
384 /* CTPO_CAT_SUBTOTAL. */
385 size_t subtotal_index;
387 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
388 One element: CTPO_NEG. */
389 struct ctables_pcexpr *subs[2];
392 /* Source location. */
393 struct msg_location *location;
396 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
397 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
398 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
399 struct ctables_pcexpr *sub1);
401 struct ctables_summary_spec_set
403 struct ctables_summary_spec *specs;
407 /* The variable to which the summary specs are applied. */
408 struct variable *var;
410 /* Whether the variable to which the summary specs are applied is a scale
411 variable for the purpose of summarization.
413 (VALIDN and TOTALN act differently for summarizing scale and categorical
417 /* If any of these optional additional scale variables are missing, then
418 treat 'var' as if it's missing too. This is for implementing
419 SMISSING=LISTWISE. */
420 struct variable **listwise_vars;
421 size_t n_listwise_vars;
424 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
425 const struct ctables_summary_spec_set *);
426 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
428 /* A nested sequence of variables, e.g. a > b > c. */
431 struct variable **vars;
434 size_t *domains[N_CTDTS];
435 size_t n_domains[N_CTDTS];
438 struct ctables_summary_spec_set specs[N_CSVS];
441 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
444 struct ctables_nest *nests;
448 static void ctables_stack_uninit (struct ctables_stack *);
452 struct hmap_node node;
457 struct ctables_occurrence
459 struct hmap_node node;
463 struct ctables_section
466 struct ctables_table *table;
467 struct ctables_nest *nests[PIVOT_N_AXES];
470 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
471 struct hmap cells; /* Contains "struct ctables_cell"s. */
472 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
475 static void ctables_section_uninit (struct ctables_section *);
479 struct ctables *ctables;
480 struct ctables_axis *axes[PIVOT_N_AXES];
481 struct ctables_stack stacks[PIVOT_N_AXES];
482 struct ctables_section *sections;
484 enum pivot_axis_type summary_axis;
485 struct ctables_summary_spec_set summary_specs;
486 struct variable **sum_vars;
489 enum pivot_axis_type slabels_axis;
490 bool slabels_visible;
492 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
494 Most commonly, label_axis[a] == a, and in particular we always have
495 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
497 If ROWLABELS or COLLABELS is specified, then one of
498 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
499 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
501 If any category labels are moved, then 'clabels_example' is one of the
502 variables being moved (and it is otherwise NULL). All of the variables
503 being moved have the same width, value labels, and categories, so this
504 example variable can be used to find those out.
506 The remaining members in this group are relevant only if category labels
509 'clabels_values_map' holds a "struct ctables_value" for all the values
510 that appear in all of the variables in the moved categories. It is
511 accumulated as the data is read. Once the data is fully read, its
512 sorted values are put into 'clabels_values' and 'n_clabels_values'.
514 enum pivot_axis_type label_axis[PIVOT_N_AXES];
515 enum pivot_axis_type clabels_from_axis;
516 const struct variable *clabels_example;
517 struct hmap clabels_values_map;
518 struct ctables_value **clabels_values;
519 size_t n_clabels_values;
521 /* Indexed by variable dictionary index. */
522 struct ctables_categories **categories;
531 struct ctables_chisq *chisq;
532 struct ctables_pairwise *pairwise;
535 struct ctables_categories
538 struct ctables_category *cats;
543 struct ctables_category
545 enum ctables_category_type
547 /* Explicit category lists. */
550 CCT_NRANGE, /* Numerical range. */
551 CCT_SRANGE, /* String range. */
556 /* Totals and subtotals. */
560 /* Implicit category lists. */
565 /* For contributing to TOTALN. */
566 CCT_EXCLUDED_MISSING,
570 struct ctables_category *subtotal;
576 double number; /* CCT_NUMBER. */
577 struct substring string; /* CCT_STRING, in dictionary encoding. */
578 double nrange[2]; /* CCT_NRANGE. */
579 struct substring srange[2]; /* CCT_SRANGE. */
583 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
584 bool hide_subcategories; /* CCT_SUBTOTAL. */
587 /* CCT_POSTCOMPUTE. */
590 const struct ctables_postcompute *pc;
591 enum fmt_type parse_format;
594 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
597 bool include_missing;
601 enum ctables_summary_function sort_function;
602 struct variable *sort_var;
607 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
608 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
609 struct msg_location *location;
613 ctables_category_uninit (struct ctables_category *cat)
618 msg_location_destroy (cat->location);
625 case CCT_POSTCOMPUTE:
629 ss_dealloc (&cat->string);
633 ss_dealloc (&cat->srange[0]);
634 ss_dealloc (&cat->srange[1]);
639 free (cat->total_label);
647 case CCT_EXCLUDED_MISSING:
653 nullable_substring_equal (const struct substring *a,
654 const struct substring *b)
656 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
660 ctables_category_equal (const struct ctables_category *a,
661 const struct ctables_category *b)
663 if (a->type != b->type)
669 return a->number == b->number;
672 return ss_equals (a->string, b->string);
675 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
678 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
679 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
685 case CCT_POSTCOMPUTE:
686 return a->pc == b->pc;
690 return !strcmp (a->total_label, b->total_label);
695 return (a->include_missing == b->include_missing
696 && a->sort_ascending == b->sort_ascending
697 && a->sort_function == b->sort_function
698 && a->sort_var == b->sort_var
699 && a->percentile == b->percentile);
701 case CCT_EXCLUDED_MISSING:
709 ctables_categories_unref (struct ctables_categories *c)
714 assert (c->n_refs > 0);
718 for (size_t i = 0; i < c->n_cats; i++)
719 ctables_category_uninit (&c->cats[i]);
725 ctables_categories_equal (const struct ctables_categories *a,
726 const struct ctables_categories *b)
728 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
731 for (size_t i = 0; i < a->n_cats; i++)
732 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
738 /* Chi-square test (SIGTEST). */
746 /* Pairwise comparison test (COMPARETEST). */
747 struct ctables_pairwise
749 enum { PROP, MEAN } type;
752 bool meansvariance_allcats;
754 enum { BONFERRONI = 1, BH } adjust;
778 struct variable *var;
780 struct ctables_summary_spec_set specs[N_CSVS];
784 struct ctables_axis *subs[2];
787 struct msg_location *loc;
790 static void ctables_axis_destroy (struct ctables_axis *);
799 enum ctables_function_availability
801 CTFA_ALL, /* Any variables. */
802 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
803 //CTFA_MRSETS, /* Only multiple-response sets */
806 struct ctables_summary_spec
808 enum ctables_summary_function function;
809 double percentile; /* CTSF_PTILE only. */
812 struct fmt_spec format;
813 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
820 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
821 const struct ctables_summary_spec *src)
824 dst->label = xstrdup_if_nonnull (src->label);
828 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
835 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
836 const struct ctables_summary_spec_set *src)
838 struct ctables_summary_spec *specs
839 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
840 for (size_t i = 0; i < src->n; i++)
841 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
843 *dst = (struct ctables_summary_spec_set) {
848 .is_scale = src->is_scale,
853 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
855 for (size_t i = 0; i < set->n; i++)
856 ctables_summary_spec_uninit (&set->specs[i]);
857 free (set->listwise_vars);
862 parse_col_width (struct lexer *lexer, const char *name, double *width)
864 lex_match (lexer, T_EQUALS);
865 if (lex_match_id (lexer, "DEFAULT"))
867 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
869 *width = lex_number (lexer);
879 parse_bool (struct lexer *lexer, bool *b)
881 if (lex_match_id (lexer, "NO"))
883 else if (lex_match_id (lexer, "YES"))
887 lex_error_expecting (lexer, "YES", "NO");
893 static enum ctables_function_availability
894 ctables_function_availability (enum ctables_summary_function f)
896 static enum ctables_function_availability availability[] = {
897 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
902 return availability[f];
906 ctables_summary_function_is_count (enum ctables_summary_function f)
908 return f == CTSF_COUNT || f == CTSF_ECOUNT || f == CTSF_UCOUNT;
912 parse_ctables_summary_function (struct lexer *lexer,
913 enum ctables_summary_function *f)
917 enum ctables_summary_function function;
918 struct substring name;
920 static struct pair names[] = {
921 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
922 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
925 /* The .COUNT suffix may be omitted. */
926 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
927 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
928 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
929 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
930 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
931 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
932 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
936 if (!lex_force_id (lexer))
939 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
940 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
942 *f = names[i].function;
947 lex_error (lexer, _("Expecting summary function name."));
952 ctables_axis_destroy (struct ctables_axis *axis)
960 for (size_t i = 0; i < N_CSVS; i++)
961 ctables_summary_spec_set_uninit (&axis->specs[i]);
966 ctables_axis_destroy (axis->subs[0]);
967 ctables_axis_destroy (axis->subs[1]);
970 msg_location_destroy (axis->loc);
974 static struct ctables_axis *
975 ctables_axis_new_nonterminal (enum ctables_axis_op op,
976 struct ctables_axis *sub0,
977 struct ctables_axis *sub1,
978 struct lexer *lexer, int start_ofs)
980 struct ctables_axis *axis = xmalloc (sizeof *axis);
981 *axis = (struct ctables_axis) {
983 .subs = { sub0, sub1 },
984 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
989 struct ctables_axis_parse_ctx
992 struct dictionary *dict;
994 struct ctables_table *t;
997 static struct fmt_spec
998 ctables_summary_default_format (enum ctables_summary_function function,
999 const struct variable *var)
1001 static const enum ctables_format default_formats[] = {
1002 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
1006 switch (default_formats[function])
1009 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
1012 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
1015 return *var_get_print_format (var);
1022 static struct pivot_value *
1023 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1027 static const char *default_labels[] = {
1028 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
1033 return (spec->function == CTSF_PTILE
1034 ? pivot_value_new_text_format (N_("Percentile %.2f"),
1036 : pivot_value_new_text (default_labels[spec->function]));
1040 struct substring in = ss_cstr (spec->label);
1041 struct substring target = ss_cstr (")CILEVEL");
1043 struct string out = DS_EMPTY_INITIALIZER;
1046 size_t chunk = ss_find_substring (in, target);
1047 ds_put_substring (&out, ss_head (in, chunk));
1048 ss_advance (&in, chunk);
1050 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1052 ss_advance (&in, target.length);
1053 ds_put_format (&out, "%g", cilevel);
1059 ctables_summary_function_name (enum ctables_summary_function function)
1061 static const char *names[] = {
1062 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1066 return names[function];
1070 add_summary_spec (struct ctables_axis *axis,
1071 enum ctables_summary_function function, double percentile,
1072 const char *label, const struct fmt_spec *format,
1073 bool is_ctables_format, const struct msg_location *loc,
1074 enum ctables_summary_variant sv)
1076 if (axis->op == CTAO_VAR)
1078 const char *function_name = ctables_summary_function_name (function);
1079 const char *var_name = var_get_name (axis->var);
1080 switch (ctables_function_availability (function))
1084 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1085 "response sets."), function_name);
1086 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1092 if (!axis->scale && sv != CSV_TOTAL)
1095 _("Summary function %s applies only to scale variables."),
1097 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1107 struct ctables_summary_spec_set *set = &axis->specs[sv];
1108 if (set->n >= set->allocated)
1109 set->specs = x2nrealloc (set->specs, &set->allocated,
1110 sizeof *set->specs);
1112 struct ctables_summary_spec *dst = &set->specs[set->n++];
1113 *dst = (struct ctables_summary_spec) {
1114 .function = function,
1115 .percentile = percentile,
1116 .label = xstrdup_if_nonnull (label),
1117 .format = (format ? *format
1118 : ctables_summary_default_format (function, axis->var)),
1119 .is_ctables_format = is_ctables_format,
1125 for (size_t i = 0; i < 2; i++)
1126 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1127 format, is_ctables_format, loc, sv))
1133 static struct ctables_axis *ctables_axis_parse_stack (
1134 struct ctables_axis_parse_ctx *);
1137 static struct ctables_axis *
1138 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1140 if (lex_match (ctx->lexer, T_LPAREN))
1142 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1143 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1145 ctables_axis_destroy (sub);
1151 if (!lex_force_id (ctx->lexer))
1154 int start_ofs = lex_ofs (ctx->lexer);
1155 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1159 struct ctables_axis *axis = xmalloc (sizeof *axis);
1160 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1162 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1163 : lex_match_phrase (ctx->lexer, "[C]") ? false
1164 : var_get_measure (var) == MEASURE_SCALE);
1165 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1166 lex_ofs (ctx->lexer) - 1);
1167 if (axis->scale && var_is_alpha (var))
1169 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1171 var_get_name (var));
1172 ctables_axis_destroy (axis);
1180 has_digit (const char *s)
1182 return s[strcspn (s, "0123456789")] != '\0';
1186 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1187 bool *is_ctables_format)
1189 char type[FMT_TYPE_LEN_MAX + 1];
1190 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1193 if (!strcasecmp (type, "NEGPAREN"))
1194 format->type = CTEF_NEGPAREN;
1195 else if (!strcasecmp (type, "NEQUAL"))
1196 format->type = CTEF_NEQUAL;
1197 else if (!strcasecmp (type, "PAREN"))
1198 format->type = CTEF_PAREN;
1199 else if (!strcasecmp (type, "PCTPAREN"))
1200 format->type = CTEF_PCTPAREN;
1203 *is_ctables_format = false;
1204 return (parse_format_specifier (lexer, format)
1205 && fmt_check_output (format)
1206 && fmt_check_type_compat (format, VAL_NUMERIC));
1212 lex_next_error (lexer, -1, -1,
1213 _("Output format %s requires width 2 or greater."), type);
1216 else if (format->d > format->w - 1)
1218 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1219 "greater than decimals."), type);
1224 *is_ctables_format = true;
1229 static struct ctables_axis *
1230 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1232 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1233 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1236 enum ctables_summary_variant sv = CSV_CELL;
1239 int start_ofs = lex_ofs (ctx->lexer);
1241 /* Parse function. */
1242 enum ctables_summary_function function;
1243 if (!parse_ctables_summary_function (ctx->lexer, &function))
1246 /* Parse percentile. */
1247 double percentile = 0;
1248 if (function == CTSF_PTILE)
1250 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1252 percentile = lex_number (ctx->lexer);
1253 lex_get (ctx->lexer);
1258 if (lex_is_string (ctx->lexer))
1260 label = ss_xstrdup (lex_tokss (ctx->lexer));
1261 lex_get (ctx->lexer);
1265 struct fmt_spec format;
1266 const struct fmt_spec *formatp;
1267 bool is_ctables_format = false;
1268 if (lex_token (ctx->lexer) == T_ID
1269 && has_digit (lex_tokcstr (ctx->lexer)))
1271 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1272 &is_ctables_format))
1282 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1283 lex_ofs (ctx->lexer) - 1);
1284 add_summary_spec (sub, function, percentile, label, formatp,
1285 is_ctables_format, loc, sv);
1287 msg_location_destroy (loc);
1289 lex_match (ctx->lexer, T_COMMA);
1290 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1292 if (!lex_force_match (ctx->lexer, T_LBRACK))
1296 else if (lex_match (ctx->lexer, T_RBRACK))
1298 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1305 ctables_axis_destroy (sub);
1309 static const struct ctables_axis *
1310 find_scale (const struct ctables_axis *axis)
1314 else if (axis->op == CTAO_VAR)
1315 return axis->scale ? axis : NULL;
1318 for (size_t i = 0; i < 2; i++)
1320 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1328 static const struct ctables_axis *
1329 find_categorical_summary_spec (const struct ctables_axis *axis)
1333 else if (axis->op == CTAO_VAR)
1334 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1337 for (size_t i = 0; i < 2; i++)
1339 const struct ctables_axis *sum
1340 = find_categorical_summary_spec (axis->subs[i]);
1348 static struct ctables_axis *
1349 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1351 int start_ofs = lex_ofs (ctx->lexer);
1352 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1356 while (lex_match (ctx->lexer, T_GT))
1358 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1362 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1363 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1365 const struct ctables_axis *outer_scale = find_scale (lhs);
1366 const struct ctables_axis *inner_scale = find_scale (rhs);
1367 if (outer_scale && inner_scale)
1369 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1370 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1371 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1372 ctables_axis_destroy (nest);
1376 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1379 msg_at (SE, nest->loc,
1380 _("Summaries may only be requested for categorical variables "
1381 "at the innermost nesting level."));
1382 msg_at (SN, outer_sum->loc,
1383 _("This outer categorical variable has a summary."));
1384 ctables_axis_destroy (nest);
1394 static struct ctables_axis *
1395 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1397 int start_ofs = lex_ofs (ctx->lexer);
1398 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1402 while (lex_match (ctx->lexer, T_PLUS))
1404 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1408 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1409 ctx->lexer, start_ofs);
1416 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1417 struct ctables *ct, struct ctables_table *t,
1418 enum pivot_axis_type a)
1420 if (lex_token (lexer) == T_BY
1421 || lex_token (lexer) == T_SLASH
1422 || lex_token (lexer) == T_ENDCMD)
1425 struct ctables_axis_parse_ctx ctx = {
1431 t->axes[a] = ctables_axis_parse_stack (&ctx);
1432 return t->axes[a] != NULL;
1436 ctables_chisq_destroy (struct ctables_chisq *chisq)
1442 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1448 ctables_table_destroy (struct ctables_table *t)
1453 for (size_t i = 0; i < t->n_sections; i++)
1454 ctables_section_uninit (&t->sections[i]);
1457 for (size_t i = 0; i < t->n_categories; i++)
1458 ctables_categories_unref (t->categories[i]);
1459 free (t->categories);
1461 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1463 ctables_axis_destroy (t->axes[a]);
1464 ctables_stack_uninit (&t->stacks[a]);
1466 free (t->summary_specs.specs);
1468 struct ctables_value *ctv, *next_ctv;
1469 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1470 &t->clabels_values_map)
1472 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1473 hmap_delete (&t->clabels_values_map, &ctv->node);
1476 hmap_destroy (&t->clabels_values_map);
1477 free (t->clabels_values);
1483 ctables_chisq_destroy (t->chisq);
1484 ctables_pairwise_destroy (t->pairwise);
1489 ctables_destroy (struct ctables *ct)
1494 struct ctables_postcompute *pc, *next_pc;
1495 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1499 msg_location_destroy (pc->location);
1500 ctables_pcexpr_destroy (pc->expr);
1504 ctables_summary_spec_set_uninit (pc->specs);
1507 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1511 fmt_settings_uninit (&ct->ctables_formats);
1512 pivot_table_look_unref (ct->look);
1516 for (size_t i = 0; i < ct->n_tables; i++)
1517 ctables_table_destroy (ct->tables[i]);
1522 static struct ctables_category
1523 cct_nrange (double low, double high)
1525 return (struct ctables_category) {
1527 .nrange = { low, high }
1531 static struct ctables_category
1532 cct_srange (struct substring low, struct substring high)
1534 return (struct ctables_category) {
1536 .srange = { low, high }
1541 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1542 struct ctables_category *cat)
1545 if (lex_match (lexer, T_EQUALS))
1547 if (!lex_force_string (lexer))
1550 total_label = ss_xstrdup (lex_tokss (lexer));
1554 total_label = xstrdup (_("Subtotal"));
1556 *cat = (struct ctables_category) {
1557 .type = CCT_SUBTOTAL,
1558 .hide_subcategories = hide_subcategories,
1559 .total_label = total_label
1564 static struct substring
1565 parse_substring (struct lexer *lexer, struct dictionary *dict)
1567 struct substring s = recode_substring_pool (
1568 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1569 ss_rtrim (&s, ss_cstr (" "));
1575 ctables_table_parse_explicit_category (struct lexer *lexer,
1576 struct dictionary *dict,
1578 struct ctables_category *cat)
1580 if (lex_match_id (lexer, "OTHERNM"))
1581 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1582 else if (lex_match_id (lexer, "MISSING"))
1583 *cat = (struct ctables_category) { .type = CCT_MISSING };
1584 else if (lex_match_id (lexer, "SUBTOTAL"))
1585 return ctables_table_parse_subtotal (lexer, false, cat);
1586 else if (lex_match_id (lexer, "HSUBTOTAL"))
1587 return ctables_table_parse_subtotal (lexer, true, cat);
1588 else if (lex_match_id (lexer, "LO"))
1590 if (!lex_force_match_id (lexer, "THRU"))
1592 if (lex_is_string (lexer))
1594 struct substring sr0 = { .string = NULL };
1595 struct substring sr1 = parse_substring (lexer, dict);
1596 *cat = cct_srange (sr0, sr1);
1598 else if (lex_force_num (lexer))
1600 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1606 else if (lex_is_number (lexer))
1608 double number = lex_number (lexer);
1610 if (lex_match_id (lexer, "THRU"))
1612 if (lex_match_id (lexer, "HI"))
1613 *cat = cct_nrange (number, DBL_MAX);
1616 if (!lex_force_num (lexer))
1618 *cat = cct_nrange (number, lex_number (lexer));
1623 *cat = (struct ctables_category) {
1628 else if (lex_is_string (lexer))
1630 struct substring s = parse_substring (lexer, dict);
1631 if (lex_match_id (lexer, "THRU"))
1633 if (lex_match_id (lexer, "HI"))
1635 struct substring sr1 = { .string = NULL };
1636 *cat = cct_srange (s, sr1);
1640 if (!lex_force_string (lexer))
1645 struct substring sr1 = parse_substring (lexer, dict);
1646 *cat = cct_srange (s, sr1);
1650 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1652 else if (lex_match (lexer, T_AND))
1654 if (!lex_force_id (lexer))
1656 struct ctables_postcompute *pc = ctables_find_postcompute (
1657 ct, lex_tokcstr (lexer));
1660 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1661 msg_at (SE, loc, _("Unknown postcompute &%s."),
1662 lex_tokcstr (lexer));
1663 msg_location_destroy (loc);
1668 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1672 lex_error (lexer, NULL);
1680 parse_category_string (struct msg_location *location,
1681 struct substring s, const struct dictionary *dict,
1682 enum fmt_type format, double *n)
1685 char *error = data_in (s, dict_get_encoding (dict), format,
1686 settings_get_fmt_settings (), &v, 0, NULL);
1689 msg_at (SE, location,
1690 _("Failed to parse category specification as format %s: %s."),
1691 fmt_name (format), error);
1700 static struct ctables_category *
1701 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1702 const struct ctables_pcexpr *e)
1704 struct ctables_category *best = NULL;
1705 size_t n_subtotals = 0;
1706 for (size_t i = 0; i < cats->n_cats; i++)
1708 struct ctables_category *cat = &cats->cats[i];
1711 case CTPO_CAT_NUMBER:
1712 if (cat->type == CCT_NUMBER && cat->number == e->number)
1716 case CTPO_CAT_STRING:
1717 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1721 case CTPO_CAT_NRANGE:
1722 if (cat->type == CCT_NRANGE
1723 && cat->nrange[0] == e->nrange[0]
1724 && cat->nrange[1] == e->nrange[1])
1728 case CTPO_CAT_SRANGE:
1729 if (cat->type == CCT_SRANGE
1730 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1731 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1735 case CTPO_CAT_MISSING:
1736 if (cat->type == CCT_MISSING)
1740 case CTPO_CAT_OTHERNM:
1741 if (cat->type == CCT_OTHERNM)
1745 case CTPO_CAT_SUBTOTAL:
1746 if (cat->type == CCT_SUBTOTAL)
1749 if (e->subtotal_index == n_subtotals)
1751 else if (e->subtotal_index == 0)
1756 case CTPO_CAT_TOTAL:
1757 if (cat->type == CCT_TOTAL)
1771 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1776 static struct ctables_category *
1777 ctables_find_category_for_postcompute (const struct dictionary *dict,
1778 const struct ctables_categories *cats,
1779 enum fmt_type parse_format,
1780 const struct ctables_pcexpr *e)
1782 if (parse_format != FMT_F)
1784 if (e->op == CTPO_CAT_STRING)
1787 if (!parse_category_string (e->location, e->string, dict,
1788 parse_format, &number))
1791 struct ctables_pcexpr e2 = {
1792 .op = CTPO_CAT_NUMBER,
1794 .location = e->location,
1796 return ctables_find_category_for_postcompute__ (cats, &e2);
1798 else if (e->op == CTPO_CAT_SRANGE)
1801 if (!e->srange[0].string)
1802 nrange[0] = -DBL_MAX;
1803 else if (!parse_category_string (e->location, e->srange[0], dict,
1804 parse_format, &nrange[0]))
1807 if (!e->srange[1].string)
1808 nrange[1] = DBL_MAX;
1809 else if (!parse_category_string (e->location, e->srange[1], dict,
1810 parse_format, &nrange[1]))
1813 struct ctables_pcexpr e2 = {
1814 .op = CTPO_CAT_NRANGE,
1815 .nrange = { nrange[0], nrange[1] },
1816 .location = e->location,
1818 return ctables_find_category_for_postcompute__ (cats, &e2);
1821 return ctables_find_category_for_postcompute__ (cats, e);
1825 ctables_recursive_check_postcompute (struct dictionary *dict,
1826 const struct ctables_pcexpr *e,
1827 struct ctables_category *pc_cat,
1828 const struct ctables_categories *cats,
1829 const struct msg_location *cats_location)
1833 case CTPO_CAT_NUMBER:
1834 case CTPO_CAT_STRING:
1835 case CTPO_CAT_NRANGE:
1836 case CTPO_CAT_SRANGE:
1837 case CTPO_CAT_MISSING:
1838 case CTPO_CAT_OTHERNM:
1839 case CTPO_CAT_SUBTOTAL:
1840 case CTPO_CAT_TOTAL:
1842 struct ctables_category *cat = ctables_find_category_for_postcompute (
1843 dict, cats, pc_cat->parse_format, e);
1846 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1848 size_t n_subtotals = 0;
1849 for (size_t i = 0; i < cats->n_cats; i++)
1850 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1851 if (n_subtotals > 1)
1853 msg_at (SE, cats_location,
1854 ngettext ("These categories include %zu instance "
1855 "of SUBTOTAL or HSUBTOTAL, so references "
1856 "from computed categories must refer to "
1857 "subtotals by position, "
1858 "e.g. SUBTOTAL[1].",
1859 "These categories include %zu instances "
1860 "of SUBTOTAL or HSUBTOTAL, so references "
1861 "from computed categories must refer to "
1862 "subtotals by position, "
1863 "e.g. SUBTOTAL[1].",
1866 msg_at (SN, e->location,
1867 _("This is the reference that lacks a position."));
1872 msg_at (SE, pc_cat->location,
1873 _("Computed category &%s references a category not included "
1874 "in the category list."),
1876 msg_at (SN, e->location, _("This is the missing category."));
1877 if (e->op == CTPO_CAT_SUBTOTAL)
1878 msg_at (SN, cats_location,
1879 _("To fix the problem, add subtotals to the "
1880 "list of categories here."));
1881 else if (e->op == CTPO_CAT_TOTAL)
1882 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1883 "CATEGORIES specification."));
1885 msg_at (SN, cats_location,
1886 _("To fix the problem, add the missing category to the "
1887 "list of categories here."));
1890 if (pc_cat->pc->hide_source_cats)
1904 for (size_t i = 0; i < 2; i++)
1905 if (e->subs[i] && !ctables_recursive_check_postcompute (
1906 dict, e->subs[i], pc_cat, cats, cats_location))
1915 all_strings (struct variable **vars, size_t n_vars,
1916 const struct ctables_category *cat)
1918 for (size_t j = 0; j < n_vars; j++)
1919 if (var_is_numeric (vars[j]))
1921 msg_at (SE, cat->location,
1922 _("This category specification may be applied only to string "
1923 "variables, but this subcommand tries to apply it to "
1924 "numeric variable %s."),
1925 var_get_name (vars[j]));
1932 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1933 struct ctables *ct, struct ctables_table *t)
1935 if (!lex_match_id (lexer, "VARIABLES"))
1937 lex_match (lexer, T_EQUALS);
1939 struct variable **vars;
1941 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1944 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1945 for (size_t i = 1; i < n_vars; i++)
1947 const struct fmt_spec *f = var_get_print_format (vars[i]);
1948 if (f->type != common_format->type)
1950 common_format = NULL;
1956 && (fmt_get_category (common_format->type)
1957 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1959 struct ctables_categories *c = xmalloc (sizeof *c);
1960 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1961 for (size_t i = 0; i < n_vars; i++)
1963 struct ctables_categories **cp
1964 = &t->categories[var_get_dict_index (vars[i])];
1965 ctables_categories_unref (*cp);
1969 size_t allocated_cats = 0;
1970 int cats_start_ofs = -1;
1971 int cats_end_ofs = -1;
1972 if (lex_match (lexer, T_LBRACK))
1974 cats_start_ofs = lex_ofs (lexer);
1977 if (c->n_cats >= allocated_cats)
1978 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1980 int start_ofs = lex_ofs (lexer);
1981 struct ctables_category *cat = &c->cats[c->n_cats];
1982 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1984 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1987 lex_match (lexer, T_COMMA);
1989 while (!lex_match (lexer, T_RBRACK));
1990 cats_end_ofs = lex_ofs (lexer) - 1;
1993 struct ctables_category cat = {
1995 .include_missing = false,
1996 .sort_ascending = true,
1998 bool show_totals = false;
1999 char *total_label = NULL;
2000 bool totals_before = false;
2001 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2003 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2005 lex_match (lexer, T_EQUALS);
2006 if (lex_match_id (lexer, "A"))
2007 cat.sort_ascending = true;
2008 else if (lex_match_id (lexer, "D"))
2009 cat.sort_ascending = false;
2012 lex_error_expecting (lexer, "A", "D");
2016 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2018 lex_match (lexer, T_EQUALS);
2019 if (lex_match_id (lexer, "VALUE"))
2020 cat.type = CCT_VALUE;
2021 else if (lex_match_id (lexer, "LABEL"))
2022 cat.type = CCT_LABEL;
2025 cat.type = CCT_FUNCTION;
2026 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
2029 if (lex_match (lexer, T_LPAREN))
2031 cat.sort_var = parse_variable (lexer, dict);
2035 if (cat.sort_function == CTSF_PTILE)
2037 lex_match (lexer, T_COMMA);
2038 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2040 cat.percentile = lex_number (lexer);
2044 if (!lex_force_match (lexer, T_RPAREN))
2047 else if (ctables_function_availability (cat.sort_function)
2050 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2055 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2057 lex_match (lexer, T_EQUALS);
2058 if (lex_match_id (lexer, "INCLUDE"))
2059 cat.include_missing = true;
2060 else if (lex_match_id (lexer, "EXCLUDE"))
2061 cat.include_missing = false;
2064 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2068 else if (lex_match_id (lexer, "TOTAL"))
2070 lex_match (lexer, T_EQUALS);
2071 if (!parse_bool (lexer, &show_totals))
2074 else if (lex_match_id (lexer, "LABEL"))
2076 lex_match (lexer, T_EQUALS);
2077 if (!lex_force_string (lexer))
2080 total_label = ss_xstrdup (lex_tokss (lexer));
2083 else if (lex_match_id (lexer, "POSITION"))
2085 lex_match (lexer, T_EQUALS);
2086 if (lex_match_id (lexer, "BEFORE"))
2087 totals_before = true;
2088 else if (lex_match_id (lexer, "AFTER"))
2089 totals_before = false;
2092 lex_error_expecting (lexer, "BEFORE", "AFTER");
2096 else if (lex_match_id (lexer, "EMPTY"))
2098 lex_match (lexer, T_EQUALS);
2099 if (lex_match_id (lexer, "INCLUDE"))
2100 c->show_empty = true;
2101 else if (lex_match_id (lexer, "EXCLUDE"))
2102 c->show_empty = false;
2105 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2112 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2113 "TOTAL", "LABEL", "POSITION", "EMPTY");
2115 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2122 if (c->n_cats >= allocated_cats)
2123 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2124 c->cats[c->n_cats++] = cat;
2129 if (c->n_cats >= allocated_cats)
2130 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2132 struct ctables_category *totals;
2135 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2136 totals = &c->cats[0];
2139 totals = &c->cats[c->n_cats];
2142 *totals = (struct ctables_category) {
2144 .total_label = total_label ? total_label : xstrdup (_("Total")),
2148 struct ctables_category *subtotal = NULL;
2149 for (size_t i = totals_before ? 0 : c->n_cats;
2150 totals_before ? i < c->n_cats : i-- > 0;
2151 totals_before ? i++ : 0)
2153 struct ctables_category *cat = &c->cats[i];
2162 cat->subtotal = subtotal;
2165 case CCT_POSTCOMPUTE:
2176 case CCT_EXCLUDED_MISSING:
2181 if (cats_start_ofs != -1)
2183 for (size_t i = 0; i < c->n_cats; i++)
2185 struct ctables_category *cat = &c->cats[i];
2188 case CCT_POSTCOMPUTE:
2189 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2190 struct msg_location *cats_location
2191 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2192 bool ok = ctables_recursive_check_postcompute (
2193 dict, cat->pc->expr, cat, c, cats_location);
2194 msg_location_destroy (cats_location);
2201 for (size_t j = 0; j < n_vars; j++)
2202 if (var_is_alpha (vars[j]))
2204 msg_at (SE, cat->location,
2205 _("This category specification may be applied "
2206 "only to numeric variables, but this "
2207 "subcommand tries to apply it to string "
2209 var_get_name (vars[j]));
2218 if (!parse_category_string (cat->location, cat->string, dict,
2219 common_format->type, &n))
2222 ss_dealloc (&cat->string);
2224 cat->type = CCT_NUMBER;
2227 else if (!all_strings (vars, n_vars, cat))
2236 if (!cat->srange[0].string)
2238 else if (!parse_category_string (cat->location,
2239 cat->srange[0], dict,
2240 common_format->type, &n[0]))
2243 if (!cat->srange[1].string)
2245 else if (!parse_category_string (cat->location,
2246 cat->srange[1], dict,
2247 common_format->type, &n[1]))
2250 ss_dealloc (&cat->srange[0]);
2251 ss_dealloc (&cat->srange[1]);
2253 cat->type = CCT_NRANGE;
2254 cat->nrange[0] = n[0];
2255 cat->nrange[1] = n[1];
2257 else if (!all_strings (vars, n_vars, cat))
2268 case CCT_EXCLUDED_MISSING:
2283 ctables_nest_uninit (struct ctables_nest *nest)
2286 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2287 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2288 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2289 free (nest->domains[dt]);
2293 ctables_stack_uninit (struct ctables_stack *stack)
2297 for (size_t i = 0; i < stack->n; i++)
2298 ctables_nest_uninit (&stack->nests[i]);
2299 free (stack->nests);
2303 static struct ctables_stack
2304 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2311 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2312 for (size_t i = 0; i < s0.n; i++)
2313 for (size_t j = 0; j < s1.n; j++)
2315 const struct ctables_nest *a = &s0.nests[i];
2316 const struct ctables_nest *b = &s1.nests[j];
2318 size_t allocate = a->n + b->n;
2319 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2321 for (size_t k = 0; k < a->n; k++)
2322 vars[n++] = a->vars[k];
2323 for (size_t k = 0; k < b->n; k++)
2324 vars[n++] = b->vars[k];
2325 assert (n == allocate);
2327 const struct ctables_nest *summary_src;
2328 if (!a->specs[CSV_CELL].var)
2330 else if (!b->specs[CSV_CELL].var)
2335 struct ctables_nest *new = &stack.nests[stack.n++];
2336 *new = (struct ctables_nest) {
2338 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2339 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2343 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2344 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2346 ctables_stack_uninit (&s0);
2347 ctables_stack_uninit (&s1);
2351 static struct ctables_stack
2352 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2354 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2355 for (size_t i = 0; i < s0.n; i++)
2356 stack.nests[stack.n++] = s0.nests[i];
2357 for (size_t i = 0; i < s1.n; i++)
2359 stack.nests[stack.n] = s1.nests[i];
2360 stack.nests[stack.n].group_head += s0.n;
2363 assert (stack.n == s0.n + s1.n);
2369 static struct ctables_stack
2370 var_fts (const struct ctables_axis *a)
2372 struct variable **vars = xmalloc (sizeof *vars);
2375 struct ctables_nest *nest = xmalloc (sizeof *nest);
2376 *nest = (struct ctables_nest) {
2379 .scale_idx = a->scale ? 0 : SIZE_MAX,
2381 if (a->specs[CSV_CELL].n || a->scale)
2382 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2384 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2385 nest->specs[sv].var = a->var;
2386 nest->specs[sv].is_scale = a->scale;
2388 return (struct ctables_stack) { .nests = nest, .n = 1 };
2391 static struct ctables_stack
2392 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2395 return (struct ctables_stack) { .n = 0 };
2403 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2404 enumerate_fts (axis_type, a->subs[1]));
2407 /* This should consider any of the scale variables found in the result to
2408 be linked to each other listwise for SMISSING=LISTWISE. */
2409 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2410 enumerate_fts (axis_type, a->subs[1]));
2416 union ctables_summary
2418 /* COUNT, VALIDN, TOTALN. */
2421 /* MINIMUM, MAXIMUM, RANGE. */
2428 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2429 struct moments1 *moments;
2431 /* MEDIAN, MODE, PTILE. */
2434 struct casewriter *writer;
2439 /* XXX multiple response */
2443 ctables_summary_init (union ctables_summary *s,
2444 const struct ctables_summary_spec *ss)
2446 switch (ss->function)
2450 case CTSF_ROWPCT_COUNT:
2451 case CTSF_COLPCT_COUNT:
2452 case CTSF_TABLEPCT_COUNT:
2453 case CTSF_SUBTABLEPCT_COUNT:
2454 case CTSF_LAYERPCT_COUNT:
2455 case CTSF_LAYERROWPCT_COUNT:
2456 case CTSF_LAYERCOLPCT_COUNT:
2457 case CTSF_ROWPCT_VALIDN:
2458 case CTSF_COLPCT_VALIDN:
2459 case CTSF_TABLEPCT_VALIDN:
2460 case CTSF_SUBTABLEPCT_VALIDN:
2461 case CTSF_LAYERPCT_VALIDN:
2462 case CTSF_LAYERROWPCT_VALIDN:
2463 case CTSF_LAYERCOLPCT_VALIDN:
2464 case CTSF_ROWPCT_TOTALN:
2465 case CTSF_COLPCT_TOTALN:
2466 case CTSF_TABLEPCT_TOTALN:
2467 case CTSF_SUBTABLEPCT_TOTALN:
2468 case CTSF_LAYERPCT_TOTALN:
2469 case CTSF_LAYERROWPCT_TOTALN:
2470 case CTSF_LAYERCOLPCT_TOTALN:
2477 case CTSF_UROWPCT_COUNT:
2478 case CTSF_UCOLPCT_COUNT:
2479 case CTSF_UTABLEPCT_COUNT:
2480 case CTSF_USUBTABLEPCT_COUNT:
2481 case CTSF_ULAYERPCT_COUNT:
2482 case CTSF_ULAYERROWPCT_COUNT:
2483 case CTSF_ULAYERCOLPCT_COUNT:
2484 case CTSF_UROWPCT_VALIDN:
2485 case CTSF_UCOLPCT_VALIDN:
2486 case CTSF_UTABLEPCT_VALIDN:
2487 case CTSF_USUBTABLEPCT_VALIDN:
2488 case CTSF_ULAYERPCT_VALIDN:
2489 case CTSF_ULAYERROWPCT_VALIDN:
2490 case CTSF_ULAYERCOLPCT_VALIDN:
2491 case CTSF_UROWPCT_TOTALN:
2492 case CTSF_UCOLPCT_TOTALN:
2493 case CTSF_UTABLEPCT_TOTALN:
2494 case CTSF_USUBTABLEPCT_TOTALN:
2495 case CTSF_ULAYERPCT_TOTALN:
2496 case CTSF_ULAYERROWPCT_TOTALN:
2497 case CTSF_ULAYERCOLPCT_TOTALN:
2507 s->min = s->max = SYSMIS;
2515 case CTSF_ROWPCT_SUM:
2516 case CTSF_COLPCT_SUM:
2517 case CTSF_TABLEPCT_SUM:
2518 case CTSF_SUBTABLEPCT_SUM:
2519 case CTSF_LAYERPCT_SUM:
2520 case CTSF_LAYERROWPCT_SUM:
2521 case CTSF_LAYERCOLPCT_SUM:
2526 case CTSF_UVARIANCE:
2527 case CTSF_UROWPCT_SUM:
2528 case CTSF_UCOLPCT_SUM:
2529 case CTSF_UTABLEPCT_SUM:
2530 case CTSF_USUBTABLEPCT_SUM:
2531 case CTSF_ULAYERPCT_SUM:
2532 case CTSF_ULAYERROWPCT_SUM:
2533 case CTSF_ULAYERCOLPCT_SUM:
2534 s->moments = moments1_create (MOMENT_VARIANCE);
2544 struct caseproto *proto = caseproto_create ();
2545 proto = caseproto_add_width (proto, 0);
2546 proto = caseproto_add_width (proto, 0);
2548 struct subcase ordering;
2549 subcase_init (&ordering, 0, 0, SC_ASCEND);
2550 s->writer = sort_create_writer (&ordering, proto);
2551 subcase_uninit (&ordering);
2552 caseproto_unref (proto);
2562 ctables_summary_uninit (union ctables_summary *s,
2563 const struct ctables_summary_spec *ss)
2565 switch (ss->function)
2569 case CTSF_ROWPCT_COUNT:
2570 case CTSF_COLPCT_COUNT:
2571 case CTSF_TABLEPCT_COUNT:
2572 case CTSF_SUBTABLEPCT_COUNT:
2573 case CTSF_LAYERPCT_COUNT:
2574 case CTSF_LAYERROWPCT_COUNT:
2575 case CTSF_LAYERCOLPCT_COUNT:
2576 case CTSF_ROWPCT_VALIDN:
2577 case CTSF_COLPCT_VALIDN:
2578 case CTSF_TABLEPCT_VALIDN:
2579 case CTSF_SUBTABLEPCT_VALIDN:
2580 case CTSF_LAYERPCT_VALIDN:
2581 case CTSF_LAYERROWPCT_VALIDN:
2582 case CTSF_LAYERCOLPCT_VALIDN:
2583 case CTSF_ROWPCT_TOTALN:
2584 case CTSF_COLPCT_TOTALN:
2585 case CTSF_TABLEPCT_TOTALN:
2586 case CTSF_SUBTABLEPCT_TOTALN:
2587 case CTSF_LAYERPCT_TOTALN:
2588 case CTSF_LAYERROWPCT_TOTALN:
2589 case CTSF_LAYERCOLPCT_TOTALN:
2596 case CTSF_UROWPCT_COUNT:
2597 case CTSF_UCOLPCT_COUNT:
2598 case CTSF_UTABLEPCT_COUNT:
2599 case CTSF_USUBTABLEPCT_COUNT:
2600 case CTSF_ULAYERPCT_COUNT:
2601 case CTSF_ULAYERROWPCT_COUNT:
2602 case CTSF_ULAYERCOLPCT_COUNT:
2603 case CTSF_UROWPCT_VALIDN:
2604 case CTSF_UCOLPCT_VALIDN:
2605 case CTSF_UTABLEPCT_VALIDN:
2606 case CTSF_USUBTABLEPCT_VALIDN:
2607 case CTSF_ULAYERPCT_VALIDN:
2608 case CTSF_ULAYERROWPCT_VALIDN:
2609 case CTSF_ULAYERCOLPCT_VALIDN:
2610 case CTSF_UROWPCT_TOTALN:
2611 case CTSF_UCOLPCT_TOTALN:
2612 case CTSF_UTABLEPCT_TOTALN:
2613 case CTSF_USUBTABLEPCT_TOTALN:
2614 case CTSF_ULAYERPCT_TOTALN:
2615 case CTSF_ULAYERROWPCT_TOTALN:
2616 case CTSF_ULAYERCOLPCT_TOTALN:
2632 case CTSF_ROWPCT_SUM:
2633 case CTSF_COLPCT_SUM:
2634 case CTSF_TABLEPCT_SUM:
2635 case CTSF_SUBTABLEPCT_SUM:
2636 case CTSF_LAYERPCT_SUM:
2637 case CTSF_LAYERROWPCT_SUM:
2638 case CTSF_LAYERCOLPCT_SUM:
2643 case CTSF_UVARIANCE:
2644 case CTSF_UROWPCT_SUM:
2645 case CTSF_UCOLPCT_SUM:
2646 case CTSF_UTABLEPCT_SUM:
2647 case CTSF_USUBTABLEPCT_SUM:
2648 case CTSF_ULAYERPCT_SUM:
2649 case CTSF_ULAYERROWPCT_SUM:
2650 case CTSF_ULAYERCOLPCT_SUM:
2651 moments1_destroy (s->moments);
2660 casewriter_destroy (s->writer);
2666 ctables_summary_add (union ctables_summary *s,
2667 const struct ctables_summary_spec *ss,
2668 const struct variable *var, const union value *value,
2669 bool is_scale, bool is_scale_missing,
2670 bool is_missing, bool excluded_missing,
2671 double d_weight, double e_weight)
2673 /* To determine whether a case is included in a given table for a particular
2674 kind of summary, consider the following charts for each variable in the
2675 table. Only if "yes" appears for every variable for the summary is the
2678 Categorical variables: VALIDN COUNT TOTALN
2679 Valid values in included categories yes yes yes
2680 Missing values in included categories --- yes yes
2681 Missing values in excluded categories --- --- yes
2682 Valid values in excluded categories --- --- ---
2684 Scale variables: VALIDN COUNT TOTALN
2685 Valid value yes yes yes
2686 Missing value --- yes yes
2688 Missing values include both user- and system-missing. (The system-missing
2689 value is always in an excluded category.)
2691 switch (ss->function)
2694 case CTSF_ROWPCT_TOTALN:
2695 case CTSF_COLPCT_TOTALN:
2696 case CTSF_TABLEPCT_TOTALN:
2697 case CTSF_SUBTABLEPCT_TOTALN:
2698 case CTSF_LAYERPCT_TOTALN:
2699 case CTSF_LAYERROWPCT_TOTALN:
2700 case CTSF_LAYERCOLPCT_TOTALN:
2701 s->count += d_weight;
2705 case CTSF_UROWPCT_TOTALN:
2706 case CTSF_UCOLPCT_TOTALN:
2707 case CTSF_UTABLEPCT_TOTALN:
2708 case CTSF_USUBTABLEPCT_TOTALN:
2709 case CTSF_ULAYERPCT_TOTALN:
2710 case CTSF_ULAYERROWPCT_TOTALN:
2711 case CTSF_ULAYERCOLPCT_TOTALN:
2716 case CTSF_ROWPCT_COUNT:
2717 case CTSF_COLPCT_COUNT:
2718 case CTSF_TABLEPCT_COUNT:
2719 case CTSF_SUBTABLEPCT_COUNT:
2720 case CTSF_LAYERPCT_COUNT:
2721 case CTSF_LAYERROWPCT_COUNT:
2722 case CTSF_LAYERCOLPCT_COUNT:
2723 if (is_scale || !excluded_missing)
2724 s->count += d_weight;
2728 case CTSF_UROWPCT_COUNT:
2729 case CTSF_UCOLPCT_COUNT:
2730 case CTSF_UTABLEPCT_COUNT:
2731 case CTSF_USUBTABLEPCT_COUNT:
2732 case CTSF_ULAYERPCT_COUNT:
2733 case CTSF_ULAYERROWPCT_COUNT:
2734 case CTSF_ULAYERCOLPCT_COUNT:
2735 if (is_scale || !excluded_missing)
2740 case CTSF_ROWPCT_VALIDN:
2741 case CTSF_COLPCT_VALIDN:
2742 case CTSF_TABLEPCT_VALIDN:
2743 case CTSF_SUBTABLEPCT_VALIDN:
2744 case CTSF_LAYERPCT_VALIDN:
2745 case CTSF_LAYERROWPCT_VALIDN:
2746 case CTSF_LAYERCOLPCT_VALIDN:
2750 s->count += d_weight;
2754 case CTSF_UROWPCT_VALIDN:
2755 case CTSF_UCOLPCT_VALIDN:
2756 case CTSF_UTABLEPCT_VALIDN:
2757 case CTSF_USUBTABLEPCT_VALIDN:
2758 case CTSF_ULAYERPCT_VALIDN:
2759 case CTSF_ULAYERROWPCT_VALIDN:
2760 case CTSF_ULAYERCOLPCT_VALIDN:
2769 s->count += d_weight;
2778 if (is_scale || !excluded_missing)
2779 s->count += e_weight;
2786 s->count += e_weight;
2790 s->count += e_weight;
2796 if (!is_scale_missing)
2798 assert (!var_is_alpha (var)); /* XXX? */
2799 if (s->min == SYSMIS || value->f < s->min)
2801 if (s->max == SYSMIS || value->f > s->max)
2811 case CTSF_ROWPCT_SUM:
2812 case CTSF_COLPCT_SUM:
2813 case CTSF_TABLEPCT_SUM:
2814 case CTSF_SUBTABLEPCT_SUM:
2815 case CTSF_LAYERPCT_SUM:
2816 case CTSF_LAYERROWPCT_SUM:
2817 case CTSF_LAYERCOLPCT_SUM:
2818 if (!is_scale_missing)
2819 moments1_add (s->moments, value->f, e_weight);
2826 case CTSF_UVARIANCE:
2827 case CTSF_UROWPCT_SUM:
2828 case CTSF_UCOLPCT_SUM:
2829 case CTSF_UTABLEPCT_SUM:
2830 case CTSF_USUBTABLEPCT_SUM:
2831 case CTSF_ULAYERPCT_SUM:
2832 case CTSF_ULAYERROWPCT_SUM:
2833 case CTSF_ULAYERCOLPCT_SUM:
2834 if (!is_scale_missing)
2835 moments1_add (s->moments, value->f, 1.0);
2841 d_weight = e_weight = 1.0;
2846 if (!is_scale_missing)
2848 s->ovalid += e_weight;
2850 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2851 *case_num_rw_idx (c, 0) = value->f;
2852 *case_num_rw_idx (c, 1) = e_weight;
2853 casewriter_write (s->writer, c);
2859 static enum ctables_domain_type
2860 ctables_function_domain (enum ctables_summary_function function)
2890 case CTSF_UVARIANCE:
2896 case CTSF_COLPCT_COUNT:
2897 case CTSF_COLPCT_SUM:
2898 case CTSF_COLPCT_TOTALN:
2899 case CTSF_COLPCT_VALIDN:
2900 case CTSF_UCOLPCT_COUNT:
2901 case CTSF_UCOLPCT_SUM:
2902 case CTSF_UCOLPCT_TOTALN:
2903 case CTSF_UCOLPCT_VALIDN:
2906 case CTSF_LAYERCOLPCT_COUNT:
2907 case CTSF_LAYERCOLPCT_SUM:
2908 case CTSF_LAYERCOLPCT_TOTALN:
2909 case CTSF_LAYERCOLPCT_VALIDN:
2910 case CTSF_ULAYERCOLPCT_COUNT:
2911 case CTSF_ULAYERCOLPCT_SUM:
2912 case CTSF_ULAYERCOLPCT_TOTALN:
2913 case CTSF_ULAYERCOLPCT_VALIDN:
2914 return CTDT_LAYERCOL;
2916 case CTSF_LAYERPCT_COUNT:
2917 case CTSF_LAYERPCT_SUM:
2918 case CTSF_LAYERPCT_TOTALN:
2919 case CTSF_LAYERPCT_VALIDN:
2920 case CTSF_ULAYERPCT_COUNT:
2921 case CTSF_ULAYERPCT_SUM:
2922 case CTSF_ULAYERPCT_TOTALN:
2923 case CTSF_ULAYERPCT_VALIDN:
2926 case CTSF_LAYERROWPCT_COUNT:
2927 case CTSF_LAYERROWPCT_SUM:
2928 case CTSF_LAYERROWPCT_TOTALN:
2929 case CTSF_LAYERROWPCT_VALIDN:
2930 case CTSF_ULAYERROWPCT_COUNT:
2931 case CTSF_ULAYERROWPCT_SUM:
2932 case CTSF_ULAYERROWPCT_TOTALN:
2933 case CTSF_ULAYERROWPCT_VALIDN:
2934 return CTDT_LAYERROW;
2936 case CTSF_ROWPCT_COUNT:
2937 case CTSF_ROWPCT_SUM:
2938 case CTSF_ROWPCT_TOTALN:
2939 case CTSF_ROWPCT_VALIDN:
2940 case CTSF_UROWPCT_COUNT:
2941 case CTSF_UROWPCT_SUM:
2942 case CTSF_UROWPCT_TOTALN:
2943 case CTSF_UROWPCT_VALIDN:
2946 case CTSF_SUBTABLEPCT_COUNT:
2947 case CTSF_SUBTABLEPCT_SUM:
2948 case CTSF_SUBTABLEPCT_TOTALN:
2949 case CTSF_SUBTABLEPCT_VALIDN:
2950 case CTSF_USUBTABLEPCT_COUNT:
2951 case CTSF_USUBTABLEPCT_SUM:
2952 case CTSF_USUBTABLEPCT_TOTALN:
2953 case CTSF_USUBTABLEPCT_VALIDN:
2954 return CTDT_SUBTABLE;
2956 case CTSF_TABLEPCT_COUNT:
2957 case CTSF_TABLEPCT_SUM:
2958 case CTSF_TABLEPCT_TOTALN:
2959 case CTSF_TABLEPCT_VALIDN:
2960 case CTSF_UTABLEPCT_COUNT:
2961 case CTSF_UTABLEPCT_SUM:
2962 case CTSF_UTABLEPCT_TOTALN:
2963 case CTSF_UTABLEPCT_VALIDN:
2970 static enum ctables_domain_type
2971 ctables_function_is_pctsum (enum ctables_summary_function function)
3001 case CTSF_UVARIANCE:
3005 case CTSF_COLPCT_COUNT:
3006 case CTSF_COLPCT_TOTALN:
3007 case CTSF_COLPCT_VALIDN:
3008 case CTSF_UCOLPCT_COUNT:
3009 case CTSF_UCOLPCT_TOTALN:
3010 case CTSF_UCOLPCT_VALIDN:
3011 case CTSF_LAYERCOLPCT_COUNT:
3012 case CTSF_LAYERCOLPCT_TOTALN:
3013 case CTSF_LAYERCOLPCT_VALIDN:
3014 case CTSF_ULAYERCOLPCT_COUNT:
3015 case CTSF_ULAYERCOLPCT_TOTALN:
3016 case CTSF_ULAYERCOLPCT_VALIDN:
3017 case CTSF_LAYERPCT_COUNT:
3018 case CTSF_LAYERPCT_TOTALN:
3019 case CTSF_LAYERPCT_VALIDN:
3020 case CTSF_ULAYERPCT_COUNT:
3021 case CTSF_ULAYERPCT_TOTALN:
3022 case CTSF_ULAYERPCT_VALIDN:
3023 case CTSF_LAYERROWPCT_COUNT:
3024 case CTSF_LAYERROWPCT_TOTALN:
3025 case CTSF_LAYERROWPCT_VALIDN:
3026 case CTSF_ULAYERROWPCT_COUNT:
3027 case CTSF_ULAYERROWPCT_TOTALN:
3028 case CTSF_ULAYERROWPCT_VALIDN:
3029 case CTSF_ROWPCT_COUNT:
3030 case CTSF_ROWPCT_TOTALN:
3031 case CTSF_ROWPCT_VALIDN:
3032 case CTSF_UROWPCT_COUNT:
3033 case CTSF_UROWPCT_TOTALN:
3034 case CTSF_UROWPCT_VALIDN:
3035 case CTSF_SUBTABLEPCT_COUNT:
3036 case CTSF_SUBTABLEPCT_TOTALN:
3037 case CTSF_SUBTABLEPCT_VALIDN:
3038 case CTSF_USUBTABLEPCT_COUNT:
3039 case CTSF_USUBTABLEPCT_TOTALN:
3040 case CTSF_USUBTABLEPCT_VALIDN:
3041 case CTSF_TABLEPCT_COUNT:
3042 case CTSF_TABLEPCT_TOTALN:
3043 case CTSF_TABLEPCT_VALIDN:
3044 case CTSF_UTABLEPCT_COUNT:
3045 case CTSF_UTABLEPCT_TOTALN:
3046 case CTSF_UTABLEPCT_VALIDN:
3049 case CTSF_COLPCT_SUM:
3050 case CTSF_UCOLPCT_SUM:
3051 case CTSF_LAYERCOLPCT_SUM:
3052 case CTSF_ULAYERCOLPCT_SUM:
3053 case CTSF_LAYERPCT_SUM:
3054 case CTSF_ULAYERPCT_SUM:
3055 case CTSF_LAYERROWPCT_SUM:
3056 case CTSF_ULAYERROWPCT_SUM:
3057 case CTSF_ROWPCT_SUM:
3058 case CTSF_UROWPCT_SUM:
3059 case CTSF_SUBTABLEPCT_SUM:
3060 case CTSF_USUBTABLEPCT_SUM:
3061 case CTSF_TABLEPCT_SUM:
3062 case CTSF_UTABLEPCT_SUM:
3070 ctables_summary_value (const struct ctables_cell *cell,
3071 union ctables_summary *s,
3072 const struct ctables_summary_spec *ss)
3074 switch (ss->function)
3081 case CTSF_ROWPCT_COUNT:
3082 case CTSF_COLPCT_COUNT:
3083 case CTSF_TABLEPCT_COUNT:
3084 case CTSF_SUBTABLEPCT_COUNT:
3085 case CTSF_LAYERPCT_COUNT:
3086 case CTSF_LAYERROWPCT_COUNT:
3087 case CTSF_LAYERCOLPCT_COUNT:
3089 enum ctables_domain_type d = ctables_function_domain (ss->function);
3090 return (cell->domains[d]->e_count
3091 ? s->count / cell->domains[d]->e_count * 100
3095 case CTSF_UROWPCT_COUNT:
3096 case CTSF_UCOLPCT_COUNT:
3097 case CTSF_UTABLEPCT_COUNT:
3098 case CTSF_USUBTABLEPCT_COUNT:
3099 case CTSF_ULAYERPCT_COUNT:
3100 case CTSF_ULAYERROWPCT_COUNT:
3101 case CTSF_ULAYERCOLPCT_COUNT:
3103 enum ctables_domain_type d = ctables_function_domain (ss->function);
3104 return (cell->domains[d]->u_count
3105 ? s->count / cell->domains[d]->u_count * 100
3109 case CTSF_ROWPCT_VALIDN:
3110 case CTSF_COLPCT_VALIDN:
3111 case CTSF_TABLEPCT_VALIDN:
3112 case CTSF_SUBTABLEPCT_VALIDN:
3113 case CTSF_LAYERPCT_VALIDN:
3114 case CTSF_LAYERROWPCT_VALIDN:
3115 case CTSF_LAYERCOLPCT_VALIDN:
3117 enum ctables_domain_type d = ctables_function_domain (ss->function);
3118 return (cell->domains[d]->e_valid
3119 ? s->count / cell->domains[d]->e_valid * 100
3123 case CTSF_UROWPCT_VALIDN:
3124 case CTSF_UCOLPCT_VALIDN:
3125 case CTSF_UTABLEPCT_VALIDN:
3126 case CTSF_USUBTABLEPCT_VALIDN:
3127 case CTSF_ULAYERPCT_VALIDN:
3128 case CTSF_ULAYERROWPCT_VALIDN:
3129 case CTSF_ULAYERCOLPCT_VALIDN:
3131 enum ctables_domain_type d = ctables_function_domain (ss->function);
3132 return (cell->domains[d]->u_valid
3133 ? s->count / cell->domains[d]->u_valid * 100
3137 case CTSF_ROWPCT_TOTALN:
3138 case CTSF_COLPCT_TOTALN:
3139 case CTSF_TABLEPCT_TOTALN:
3140 case CTSF_SUBTABLEPCT_TOTALN:
3141 case CTSF_LAYERPCT_TOTALN:
3142 case CTSF_LAYERROWPCT_TOTALN:
3143 case CTSF_LAYERCOLPCT_TOTALN:
3145 enum ctables_domain_type d = ctables_function_domain (ss->function);
3146 return (cell->domains[d]->e_total
3147 ? s->count / cell->domains[d]->e_total * 100
3151 case CTSF_UROWPCT_TOTALN:
3152 case CTSF_UCOLPCT_TOTALN:
3153 case CTSF_UTABLEPCT_TOTALN:
3154 case CTSF_USUBTABLEPCT_TOTALN:
3155 case CTSF_ULAYERPCT_TOTALN:
3156 case CTSF_ULAYERROWPCT_TOTALN:
3157 case CTSF_ULAYERCOLPCT_TOTALN:
3159 enum ctables_domain_type d = ctables_function_domain (ss->function);
3160 return (cell->domains[d]->u_total
3161 ? s->count / cell->domains[d]->u_total * 100
3182 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3188 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3195 double weight, variance;
3196 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3197 return calc_semean (variance, weight);
3204 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3205 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3211 double weight, mean;
3212 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3213 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3217 case CTSF_UVARIANCE:
3220 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3224 case CTSF_ROWPCT_SUM:
3225 case CTSF_COLPCT_SUM:
3226 case CTSF_TABLEPCT_SUM:
3227 case CTSF_SUBTABLEPCT_SUM:
3228 case CTSF_LAYERPCT_SUM:
3229 case CTSF_LAYERROWPCT_SUM:
3230 case CTSF_LAYERCOLPCT_SUM:
3232 double weight, mean;
3233 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3234 if (weight == SYSMIS || mean == SYSMIS)
3236 enum ctables_domain_type d = ctables_function_domain (ss->function);
3237 double num = weight * mean;
3238 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3239 return denom != 0 ? num / denom * 100 : SYSMIS;
3241 case CTSF_UROWPCT_SUM:
3242 case CTSF_UCOLPCT_SUM:
3243 case CTSF_UTABLEPCT_SUM:
3244 case CTSF_USUBTABLEPCT_SUM:
3245 case CTSF_ULAYERPCT_SUM:
3246 case CTSF_ULAYERROWPCT_SUM:
3247 case CTSF_ULAYERCOLPCT_SUM:
3249 double weight, mean;
3250 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3251 if (weight == SYSMIS || mean == SYSMIS)
3253 enum ctables_domain_type d = ctables_function_domain (ss->function);
3254 double num = weight * mean;
3255 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3256 return denom != 0 ? num / denom * 100 : SYSMIS;
3265 struct casereader *reader = casewriter_make_reader (s->writer);
3268 struct percentile *ptile = percentile_create (
3269 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3270 struct order_stats *os = &ptile->parent;
3271 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3272 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3273 statistic_destroy (&ptile->parent.parent);
3281 struct casereader *reader = casewriter_make_reader (s->writer);
3284 struct mode *mode = mode_create ();
3285 struct order_stats *os = &mode->parent;
3286 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3287 s->ovalue = mode->mode;
3288 statistic_destroy (&mode->parent.parent);
3296 struct ctables_cell_sort_aux
3298 const struct ctables_nest *nest;
3299 enum pivot_axis_type a;
3303 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3305 const struct ctables_cell_sort_aux *aux = aux_;
3306 struct ctables_cell *const *ap = a_;
3307 struct ctables_cell *const *bp = b_;
3308 const struct ctables_cell *a = *ap;
3309 const struct ctables_cell *b = *bp;
3311 const struct ctables_nest *nest = aux->nest;
3312 for (size_t i = 0; i < nest->n; i++)
3313 if (i != nest->scale_idx)
3315 const struct variable *var = nest->vars[i];
3316 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3317 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3318 if (a_cv->category != b_cv->category)
3319 return a_cv->category > b_cv->category ? 1 : -1;
3321 const union value *a_val = &a_cv->value;
3322 const union value *b_val = &b_cv->value;
3323 switch (a_cv->category->type)
3329 case CCT_POSTCOMPUTE:
3330 case CCT_EXCLUDED_MISSING:
3331 /* Must be equal. */
3339 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3347 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3349 return a_cv->category->sort_ascending ? cmp : -cmp;
3355 const char *a_label = var_lookup_value_label (var, a_val);
3356 const char *b_label = var_lookup_value_label (var, b_val);
3362 cmp = strcmp (a_label, b_label);
3368 cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3371 return a_cv->category->sort_ascending ? cmp : -cmp;
3385 For each ctables_table:
3386 For each combination of row vars:
3387 For each combination of column vars:
3388 For each combination of layer vars:
3390 Make a table of row values:
3391 Sort entries by row values
3392 Assign a 0-based index to each actual value
3393 Construct a dimension
3394 Make a table of column values
3395 Make a table of layer values
3397 Fill the table entry using the indexes from before.
3400 static struct ctables_domain *
3401 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3402 enum ctables_domain_type domain)
3405 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3407 const struct ctables_nest *nest = s->nests[a];
3408 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3410 size_t v_idx = nest->domains[domain][i];
3411 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3412 hash = hash_pointer (cv->category, hash);
3413 if (cv->category->type != CCT_TOTAL
3414 && cv->category->type != CCT_SUBTOTAL
3415 && cv->category->type != CCT_POSTCOMPUTE)
3416 hash = value_hash (&cv->value,
3417 var_get_width (nest->vars[v_idx]), hash);
3421 struct ctables_domain *d;
3422 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3424 const struct ctables_cell *df = d->example;
3425 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3427 const struct ctables_nest *nest = s->nests[a];
3428 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3430 size_t v_idx = nest->domains[domain][i];
3431 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3432 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3433 if (cv1->category != cv2->category
3434 || (cv1->category->type != CCT_TOTAL
3435 && cv1->category->type != CCT_SUBTOTAL
3436 && cv1->category->type != CCT_POSTCOMPUTE
3437 && !value_equal (&cv1->value, &cv2->value,
3438 var_get_width (nest->vars[v_idx]))))
3447 struct ctables_sum *sums = (s->table->n_sum_vars
3448 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3451 d = xmalloc (sizeof *d);
3452 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3453 hmap_insert (&s->domains[domain], &d->node, hash);
3457 static struct substring
3458 rtrim_value (const union value *v, const struct variable *var)
3460 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3461 var_get_width (var));
3462 ss_rtrim (&s, ss_cstr (" "));
3467 in_string_range (const union value *v, const struct variable *var,
3468 const struct substring *srange)
3470 struct substring s = rtrim_value (v, var);
3471 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3472 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3475 static const struct ctables_category *
3476 ctables_categories_match (const struct ctables_categories *c,
3477 const union value *v, const struct variable *var)
3479 if (var_is_numeric (var) && v->f == SYSMIS)
3482 const struct ctables_category *othernm = NULL;
3483 for (size_t i = c->n_cats; i-- > 0; )
3485 const struct ctables_category *cat = &c->cats[i];
3489 if (cat->number == v->f)
3494 if (ss_equals (cat->string, rtrim_value (v, var)))
3499 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3500 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3505 if (in_string_range (v, var, cat->srange))
3510 if (var_is_value_missing (var, v))
3514 case CCT_POSTCOMPUTE:
3529 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3532 case CCT_EXCLUDED_MISSING:
3537 return var_is_value_missing (var, v) ? NULL : othernm;
3540 static const struct ctables_category *
3541 ctables_categories_total (const struct ctables_categories *c)
3543 const struct ctables_category *first = &c->cats[0];
3544 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3545 return (first->type == CCT_TOTAL ? first
3546 : last->type == CCT_TOTAL ? last
3550 static struct ctables_cell *
3551 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3552 const struct ctables_category *cats[PIVOT_N_AXES][10])
3555 enum ctables_summary_variant sv = CSV_CELL;
3556 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3558 const struct ctables_nest *nest = s->nests[a];
3559 for (size_t i = 0; i < nest->n; i++)
3560 if (i != nest->scale_idx)
3562 hash = hash_pointer (cats[a][i], hash);
3563 if (cats[a][i]->type != CCT_TOTAL
3564 && cats[a][i]->type != CCT_SUBTOTAL
3565 && cats[a][i]->type != CCT_POSTCOMPUTE)
3566 hash = value_hash (case_data (c, nest->vars[i]),
3567 var_get_width (nest->vars[i]), hash);
3573 struct ctables_cell *cell;
3574 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3576 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3578 const struct ctables_nest *nest = s->nests[a];
3579 for (size_t i = 0; i < nest->n; i++)
3580 if (i != nest->scale_idx
3581 && (cats[a][i] != cell->axes[a].cvs[i].category
3582 || (cats[a][i]->type != CCT_TOTAL
3583 && cats[a][i]->type != CCT_SUBTOTAL
3584 && cats[a][i]->type != CCT_POSTCOMPUTE
3585 && !value_equal (case_data (c, nest->vars[i]),
3586 &cell->axes[a].cvs[i].value,
3587 var_get_width (nest->vars[i])))))
3596 cell = xmalloc (sizeof *cell);
3599 cell->omit_domains = 0;
3600 cell->postcompute = false;
3601 //struct string name = DS_EMPTY_INITIALIZER;
3602 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3604 const struct ctables_nest *nest = s->nests[a];
3605 cell->axes[a].cvs = (nest->n
3606 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3608 for (size_t i = 0; i < nest->n; i++)
3610 const struct ctables_category *cat = cats[a][i];
3611 const struct variable *var = nest->vars[i];
3612 const union value *value = case_data (c, var);
3613 if (i != nest->scale_idx)
3615 const struct ctables_category *subtotal = cat->subtotal;
3616 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3619 if (cat->type == CCT_TOTAL
3620 || cat->type == CCT_SUBTOTAL
3621 || cat->type == CCT_POSTCOMPUTE)
3623 /* XXX these should be more encompassing I think.*/
3627 case PIVOT_AXIS_COLUMN:
3628 cell->omit_domains |= ((1u << CTDT_TABLE) |
3629 (1u << CTDT_LAYER) |
3630 (1u << CTDT_LAYERCOL) |
3631 (1u << CTDT_SUBTABLE) |
3634 case PIVOT_AXIS_ROW:
3635 cell->omit_domains |= ((1u << CTDT_TABLE) |
3636 (1u << CTDT_LAYER) |
3637 (1u << CTDT_LAYERROW) |
3638 (1u << CTDT_SUBTABLE) |
3641 case PIVOT_AXIS_LAYER:
3642 cell->omit_domains |= ((1u << CTDT_TABLE) |
3643 (1u << CTDT_LAYER));
3647 if (cat->type == CCT_POSTCOMPUTE)
3648 cell->postcompute = true;
3651 cell->axes[a].cvs[i].category = cat;
3652 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3655 if (i != nest->scale_idx)
3657 if (!ds_is_empty (&name))
3658 ds_put_cstr (&name, ", ");
3659 char *value_s = data_out (value, var_get_encoding (var),
3660 var_get_print_format (var),
3661 settings_get_fmt_settings ());
3662 if (cat->type == CCT_TOTAL
3663 || cat->type == CCT_SUBTOTAL
3664 || cat->type == CCT_POSTCOMPUTE)
3665 ds_put_format (&name, "%s=total", var_get_name (var));
3667 ds_put_format (&name, "%s=%s", var_get_name (var),
3668 value_s + strspn (value_s, " "));
3674 //cell->name = ds_steal_cstr (&name);
3676 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3677 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3678 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3679 for (size_t i = 0; i < specs->n; i++)
3680 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3681 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3682 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3683 hmap_insert (&s->cells, &cell->node, hash);
3688 is_scale_missing (const struct ctables_summary_spec_set *specs,
3689 const struct ccase *c)
3691 if (!specs->is_scale)
3694 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3697 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3699 const struct variable *var = specs->listwise_vars[i];
3700 if (var_is_num_missing (var, case_num (c, var)))
3708 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3709 const struct ctables_category *cats[PIVOT_N_AXES][10],
3710 bool is_missing, bool excluded_missing,
3711 double d_weight, double e_weight)
3713 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3714 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3716 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3718 bool scale_missing = is_scale_missing (specs, c);
3719 for (size_t i = 0; i < specs->n; i++)
3720 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3721 specs->var, case_data (c, specs->var), specs->is_scale,
3722 scale_missing, is_missing, excluded_missing,
3723 d_weight, e_weight);
3724 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3725 if (!(cell->omit_domains && (1u << dt)))
3727 struct ctables_domain *d = cell->domains[dt];
3728 d->d_total += d_weight;
3729 d->e_total += e_weight;
3731 if (!excluded_missing)
3733 d->d_count += d_weight;
3734 d->e_count += e_weight;
3739 d->d_valid += d_weight;
3740 d->e_valid += e_weight;
3743 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3745 /* XXX listwise_missing??? */
3746 const struct variable *var = s->table->sum_vars[i];
3747 double addend = case_num (c, var);
3748 if (!var_is_num_missing (var, addend))
3750 struct ctables_sum *sum = &d->sums[i];
3751 sum->e_sum += addend * e_weight;
3752 sum->u_sum += addend;
3760 recurse_totals (struct ctables_section *s, const struct ccase *c,
3761 const struct ctables_category *cats[PIVOT_N_AXES][10],
3762 bool is_missing, bool excluded_missing,
3763 double d_weight, double e_weight,
3764 enum pivot_axis_type start_axis, size_t start_nest)
3766 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3768 const struct ctables_nest *nest = s->nests[a];
3769 for (size_t i = start_nest; i < nest->n; i++)
3771 if (i == nest->scale_idx)
3774 const struct variable *var = nest->vars[i];
3776 const struct ctables_category *total = ctables_categories_total (
3777 s->table->categories[var_get_dict_index (var)]);
3780 const struct ctables_category *save = cats[a][i];
3782 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3783 d_weight, e_weight);
3784 recurse_totals (s, c, cats, is_missing, excluded_missing,
3785 d_weight, e_weight, a, i + 1);
3794 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3795 const struct ctables_category *cats[PIVOT_N_AXES][10],
3796 bool is_missing, bool excluded_missing,
3797 double d_weight, double e_weight,
3798 enum pivot_axis_type start_axis, size_t start_nest)
3800 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3802 const struct ctables_nest *nest = s->nests[a];
3803 for (size_t i = start_nest; i < nest->n; i++)
3805 if (i == nest->scale_idx)
3808 const struct ctables_category *save = cats[a][i];
3811 cats[a][i] = save->subtotal;
3812 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3813 d_weight, e_weight);
3814 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3815 d_weight, e_weight, a, i + 1);
3824 ctables_add_occurrence (const struct variable *var,
3825 const union value *value,
3826 struct hmap *occurrences)
3828 int width = var_get_width (var);
3829 unsigned int hash = value_hash (value, width, 0);
3831 struct ctables_occurrence *o;
3832 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3834 if (value_equal (value, &o->value, width))
3837 o = xmalloc (sizeof *o);
3838 value_clone (&o->value, value, width);
3839 hmap_insert (occurrences, &o->node, hash);
3843 ctables_cell_insert (struct ctables_section *s,
3844 const struct ccase *c,
3845 double d_weight, double e_weight)
3847 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3849 /* Does at least one categorical variable have a missing value in an included
3850 or excluded category? */
3851 bool is_missing = false;
3853 /* Does at least one categorical variable have a missing value in an excluded
3855 bool excluded_missing = false;
3857 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3859 const struct ctables_nest *nest = s->nests[a];
3860 for (size_t i = 0; i < nest->n; i++)
3862 if (i == nest->scale_idx)
3865 const struct variable *var = nest->vars[i];
3866 const union value *value = case_data (c, var);
3868 bool var_missing = var_is_value_missing (var, value) != 0;
3872 cats[a][i] = ctables_categories_match (
3873 s->table->categories[var_get_dict_index (var)], value, var);
3879 static const struct ctables_category cct_excluded_missing = {
3880 .type = CCT_EXCLUDED_MISSING,
3883 cats[a][i] = &cct_excluded_missing;
3884 excluded_missing = true;
3889 if (!excluded_missing)
3890 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3892 const struct ctables_nest *nest = s->nests[a];
3893 for (size_t i = 0; i < nest->n; i++)
3894 if (i != nest->scale_idx)
3896 const struct variable *var = nest->vars[i];
3897 const union value *value = case_data (c, var);
3898 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3902 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3903 d_weight, e_weight);
3905 //if (!excluded_missing)
3907 recurse_totals (s, c, cats, is_missing, excluded_missing,
3908 d_weight, e_weight, 0, 0);
3909 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3910 d_weight, e_weight, 0, 0);
3916 const struct ctables_summary_spec_set *set;
3921 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3923 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3924 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3925 if (as->function != bs->function)
3926 return as->function > bs->function ? 1 : -1;
3927 else if (as->percentile != bs->percentile)
3928 return as->percentile < bs->percentile ? 1 : -1;
3930 const char *as_label = as->label ? as->label : "";
3931 const char *bs_label = bs->label ? bs->label : "";
3932 return strcmp (as_label, bs_label);
3935 static struct pivot_value *
3936 ctables_category_create_label__ (const struct ctables_category *cat,
3937 const struct variable *var,
3938 const union value *value)
3940 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3941 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3942 : pivot_value_new_var_value (var, value));
3945 static struct pivot_value *
3946 ctables_postcompute_label (const struct ctables_categories *cats,
3947 const struct ctables_category *cat,
3948 const struct variable *var,
3949 const union value *value)
3951 struct substring in = ss_cstr (cat->pc->label);
3952 struct substring target = ss_cstr (")LABEL[");
3954 struct string out = DS_EMPTY_INITIALIZER;
3957 size_t chunk = ss_find_substring (in, target);
3958 if (chunk == SIZE_MAX)
3960 if (ds_is_empty (&out))
3961 return pivot_value_new_user_text (in.string, in.length);
3964 ds_put_substring (&out, in);
3965 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3969 ds_put_substring (&out, ss_head (in, chunk));
3970 ss_advance (&in, chunk + target.length);
3972 struct substring idx_s;
3973 if (!ss_get_until (&in, ']', &idx_s))
3976 long int idx = strtol (idx_s.string, &tail, 10);
3977 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3980 struct ctables_category *cat2 = &cats->cats[idx - 1];
3981 struct pivot_value *label2
3982 = ctables_category_create_label__ (cat2, var, value);
3983 char *label2_s = pivot_value_to_string_defaults (label2);
3984 ds_put_cstr (&out, label2_s);
3986 pivot_value_destroy (label2);
3991 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3994 static struct pivot_value *
3995 ctables_category_create_label (const struct ctables_categories *cats,
3996 const struct ctables_category *cat,
3997 const struct variable *var,
3998 const union value *value)
4000 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
4001 ? ctables_postcompute_label (cats, cat, var, value)
4002 : ctables_category_create_label__ (cat, var, value));
4005 static struct ctables_value *
4006 ctables_value_find__ (struct ctables_table *t, const union value *value,
4007 int width, unsigned int hash)
4009 struct ctables_value *clv;
4010 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4011 hash, &t->clabels_values_map)
4012 if (value_equal (value, &clv->value, width))
4018 ctables_value_insert (struct ctables_table *t, const union value *value,
4021 unsigned int hash = value_hash (value, width, 0);
4022 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4025 clv = xmalloc (sizeof *clv);
4026 value_clone (&clv->value, value, width);
4027 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4031 static struct ctables_value *
4032 ctables_value_find (struct ctables_table *t,
4033 const union value *value, int width)
4035 return ctables_value_find__ (t, value, width,
4036 value_hash (value, width, 0));
4040 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4041 size_t ix[PIVOT_N_AXES])
4043 if (a < PIVOT_N_AXES)
4045 size_t limit = MAX (t->stacks[a].n, 1);
4046 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4047 ctables_table_add_section (t, a + 1, ix);
4051 struct ctables_section *s = &t->sections[t->n_sections++];
4052 *s = (struct ctables_section) {
4054 .cells = HMAP_INITIALIZER (s->cells),
4056 for (a = 0; a < PIVOT_N_AXES; a++)
4059 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4061 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4062 for (size_t i = 0; i < nest->n; i++)
4063 hmap_init (&s->occurrences[a][i]);
4065 for (size_t i = 0; i < N_CTDTS; i++)
4066 hmap_init (&s->domains[i]);
4071 ctpo_add (double a, double b)
4077 ctpo_sub (double a, double b)
4083 ctpo_mul (double a, double b)
4089 ctpo_div (double a, double b)
4091 return b ? a / b : SYSMIS;
4095 ctpo_pow (double a, double b)
4097 int save_errno = errno;
4099 double result = pow (a, b);
4107 ctpo_neg (double a, double b UNUSED)
4112 struct ctables_pcexpr_evaluate_ctx
4114 const struct ctables_cell *cell;
4115 const struct ctables_section *section;
4116 const struct ctables_categories *cats;
4117 enum pivot_axis_type pc_a;
4120 enum fmt_type parse_format;
4123 static double ctables_pcexpr_evaluate (
4124 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4127 ctables_pcexpr_evaluate_nonterminal (
4128 const struct ctables_pcexpr_evaluate_ctx *ctx,
4129 const struct ctables_pcexpr *e, size_t n_args,
4130 double evaluate (double, double))
4132 double args[2] = { 0, 0 };
4133 for (size_t i = 0; i < n_args; i++)
4135 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4136 if (!isfinite (args[i]) || args[i] == SYSMIS)
4139 return evaluate (args[0], args[1]);
4143 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4144 const struct ctables_cell_value *pc_cv)
4146 const struct ctables_section *s = ctx->section;
4149 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4151 const struct ctables_nest *nest = s->nests[a];
4152 for (size_t i = 0; i < nest->n; i++)
4153 if (i != nest->scale_idx)
4155 const struct ctables_cell_value *cv
4156 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4157 : &ctx->cell->axes[a].cvs[i]);
4158 hash = hash_pointer (cv->category, hash);
4159 if (cv->category->type != CCT_TOTAL
4160 && cv->category->type != CCT_SUBTOTAL
4161 && cv->category->type != CCT_POSTCOMPUTE)
4162 hash = value_hash (&cv->value,
4163 var_get_width (nest->vars[i]), hash);
4167 struct ctables_cell *tc;
4168 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4170 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4172 const struct ctables_nest *nest = s->nests[a];
4173 for (size_t i = 0; i < nest->n; i++)
4174 if (i != nest->scale_idx)
4176 const struct ctables_cell_value *p_cv
4177 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4178 : &ctx->cell->axes[a].cvs[i]);
4179 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4180 if (p_cv->category != t_cv->category
4181 || (p_cv->category->type != CCT_TOTAL
4182 && p_cv->category->type != CCT_SUBTOTAL
4183 && p_cv->category->type != CCT_POSTCOMPUTE
4184 && !value_equal (&p_cv->value,
4186 var_get_width (nest->vars[i]))))
4198 const struct ctables_table *t = s->table;
4199 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4200 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4201 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4202 &specs->specs[ctx->summary_idx]);
4206 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4207 const struct ctables_pcexpr *e)
4214 case CTPO_CAT_NRANGE:
4215 case CTPO_CAT_SRANGE:
4217 struct ctables_cell_value cv = {
4218 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4220 assert (cv.category != NULL);
4222 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4223 const struct ctables_occurrence *o;
4226 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4227 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4228 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4230 cv.value = o->value;
4231 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4236 case CTPO_CAT_NUMBER:
4237 case CTPO_CAT_MISSING:
4238 case CTPO_CAT_OTHERNM:
4239 case CTPO_CAT_SUBTOTAL:
4240 case CTPO_CAT_TOTAL:
4242 struct ctables_cell_value cv = {
4243 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4244 .value = { .f = e->number },
4246 assert (cv.category != NULL);
4247 return ctables_pcexpr_evaluate_category (ctx, &cv);
4250 case CTPO_CAT_STRING:
4252 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4254 if (width > e->string.length)
4256 s = xmalloc (width);
4257 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4259 struct ctables_cell_value cv = {
4260 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4261 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4263 assert (cv.category != NULL);
4264 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4270 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4273 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4276 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4279 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4282 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4285 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4291 static const struct ctables_category *
4292 ctables_cell_postcompute (const struct ctables_section *s,
4293 const struct ctables_cell *cell,
4294 enum pivot_axis_type *pc_a_p,
4297 assert (cell->postcompute);
4298 const struct ctables_category *pc_cat = NULL;
4299 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4300 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4302 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4303 if (cv->category->type == CCT_POSTCOMPUTE)
4307 /* Multiple postcomputes cross each other. The value is
4312 pc_cat = cv->category;
4316 *pc_a_idx_p = pc_a_idx;
4320 assert (pc_cat != NULL);
4325 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4326 const struct ctables_cell *cell,
4327 const struct ctables_summary_spec *ss,
4328 struct fmt_spec *format,
4329 bool *is_ctables_format,
4332 enum pivot_axis_type pc_a = 0;
4333 size_t pc_a_idx = 0;
4334 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4335 s, cell, &pc_a, &pc_a_idx);
4339 const struct ctables_postcompute *pc = pc_cat->pc;
4342 for (size_t i = 0; i < pc->specs->n; i++)
4344 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4345 if (ss->function == ss2->function
4346 && ss->percentile == ss2->percentile)
4348 *format = ss2->format;
4349 *is_ctables_format = ss2->is_ctables_format;
4355 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4356 const struct ctables_categories *cats = s->table->categories[
4357 var_get_dict_index (var)];
4358 struct ctables_pcexpr_evaluate_ctx ctx = {
4363 .pc_a_idx = pc_a_idx,
4364 .summary_idx = summary_idx,
4365 .parse_format = pc_cat->parse_format,
4367 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4371 ctables_format (double d, const struct fmt_spec *format,
4372 const struct fmt_settings *settings)
4374 const union value v = { .f = d };
4375 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4377 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4378 produce the results we want for negative numbers, putting the negative
4379 sign in the wrong spot, before the prefix instead of after it. We can't,
4380 in fact, produce the desired results using a custom-currency
4381 specification. Instead, we postprocess the output, moving the negative
4384 NEQUAL: "-N=3" => "N=-3"
4385 PAREN: "-(3)" => "(-3)"
4386 PCTPAREN: "-(3%)" => "(-3%)"
4388 This transformation doesn't affect NEGPAREN. */
4389 char *minus_src = strchr (s, '-');
4390 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4392 char *n_equals = strstr (s, "N=");
4393 char *lparen = strchr (s, '(');
4394 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4396 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4402 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4404 struct pivot_table *pt = pivot_table_create__ (
4406 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4407 : pivot_value_new_text (N_("Custom Tables"))),
4410 pivot_table_set_caption (
4411 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4413 pivot_table_set_corner_text (
4414 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4416 bool summary_dimension = (t->summary_axis != t->slabels_axis
4417 || (!t->slabels_visible
4418 && t->summary_specs.n > 1));
4419 if (summary_dimension)
4421 struct pivot_dimension *d = pivot_dimension_create (
4422 pt, t->slabels_axis, N_("Statistics"));
4423 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4424 if (!t->slabels_visible)
4425 d->hide_all_labels = true;
4426 for (size_t i = 0; i < specs->n; i++)
4427 pivot_category_create_leaf (
4428 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4431 bool categories_dimension = t->clabels_example != NULL;
4432 if (categories_dimension)
4434 struct pivot_dimension *d = pivot_dimension_create (
4435 pt, t->label_axis[t->clabels_from_axis],
4436 t->clabels_from_axis == PIVOT_AXIS_ROW
4437 ? N_("Row Categories")
4438 : N_("Column Categories"));
4439 const struct variable *var = t->clabels_example;
4440 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4441 for (size_t i = 0; i < t->n_clabels_values; i++)
4443 const struct ctables_value *value = t->clabels_values[i];
4444 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4445 assert (cat != NULL);
4446 pivot_category_create_leaf (d->root, ctables_category_create_label (
4447 c, cat, t->clabels_example,
4452 pivot_table_set_look (pt, ct->look);
4453 struct pivot_dimension *d[PIVOT_N_AXES];
4454 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4456 static const char *names[] = {
4457 [PIVOT_AXIS_ROW] = N_("Rows"),
4458 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4459 [PIVOT_AXIS_LAYER] = N_("Layers"),
4461 d[a] = (t->axes[a] || a == t->summary_axis
4462 ? pivot_dimension_create (pt, a, names[a])
4467 assert (t->axes[a]);
4469 for (size_t i = 0; i < t->stacks[a].n; i++)
4471 struct ctables_nest *nest = &t->stacks[a].nests[i];
4472 struct ctables_section **sections = xnmalloc (t->n_sections,
4474 size_t n_sections = 0;
4476 size_t n_total_cells = 0;
4477 size_t max_depth = 0;
4478 for (size_t j = 0; j < t->n_sections; j++)
4479 if (t->sections[j].nests[a] == nest)
4481 struct ctables_section *s = &t->sections[j];
4482 sections[n_sections++] = s;
4483 n_total_cells += s->cells.count;
4485 size_t depth = s->nests[a]->n;
4486 max_depth = MAX (depth, max_depth);
4489 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4491 size_t n_sorted = 0;
4493 for (size_t j = 0; j < n_sections; j++)
4495 struct ctables_section *s = sections[j];
4497 struct ctables_cell *cell;
4498 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4500 sorted[n_sorted++] = cell;
4501 assert (n_sorted <= n_total_cells);
4504 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4505 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4508 for (size_t j = 0; j < n_sorted; j++)
4510 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4515 struct ctables_level
4517 enum ctables_level_type
4519 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4520 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4521 CTL_SUMMARY, /* Summary functions. */
4525 enum settings_value_show vlabel; /* CTL_VAR only. */
4528 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4529 size_t n_levels = 0;
4530 for (size_t k = 0; k < nest->n; k++)
4532 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4533 if (vlabel != CTVL_NONE)
4535 levels[n_levels++] = (struct ctables_level) {
4537 .vlabel = (enum settings_value_show) vlabel,
4542 if (nest->scale_idx != k
4543 && (k != nest->n - 1 || t->label_axis[a] == a))
4545 levels[n_levels++] = (struct ctables_level) {
4546 .type = CTL_CATEGORY,
4552 if (!summary_dimension && a == t->slabels_axis)
4554 levels[n_levels++] = (struct ctables_level) {
4555 .type = CTL_SUMMARY,
4556 .var_idx = SIZE_MAX,
4560 /* Pivot categories:
4562 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4563 - category for nest->vars[0], if nest->scale_idx != 0
4564 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4565 - category for nest->vars[1], if nest->scale_idx != 1
4567 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4568 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4569 - summary function, if 'a == t->slabels_axis && a ==
4572 Additional dimensions:
4574 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4576 - If 't->label_axis[b] == a' for some 'b != a', add a category
4581 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4583 for (size_t j = 0; j < n_sorted; j++)
4585 struct ctables_cell *cell = sorted[j];
4586 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4588 size_t n_common = 0;
4591 for (; n_common < n_levels; n_common++)
4593 const struct ctables_level *level = &levels[n_common];
4594 if (level->type == CTL_CATEGORY)
4596 size_t var_idx = level->var_idx;
4597 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4598 if (prev->axes[a].cvs[var_idx].category != c)
4600 else if (c->type != CCT_SUBTOTAL
4601 && c->type != CCT_TOTAL
4602 && c->type != CCT_POSTCOMPUTE
4603 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4604 &cell->axes[a].cvs[var_idx].value,
4605 var_get_type (nest->vars[var_idx])))
4611 for (size_t k = n_common; k < n_levels; k++)
4613 const struct ctables_level *level = &levels[k];
4614 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4615 if (level->type == CTL_SUMMARY)
4617 assert (k == n_levels - 1);
4619 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4620 for (size_t m = 0; m < specs->n; m++)
4622 int leaf = pivot_category_create_leaf (
4623 parent, ctables_summary_label (&specs->specs[m],
4631 const struct variable *var = nest->vars[level->var_idx];
4632 struct pivot_value *label;
4633 if (level->type == CTL_VAR)
4635 label = pivot_value_new_variable (var);
4636 label->variable.show = level->vlabel;
4638 else if (level->type == CTL_CATEGORY)
4640 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4641 label = ctables_category_create_label (
4642 t->categories[var_get_dict_index (var)],
4643 cv->category, var, &cv->value);
4648 if (k == n_levels - 1)
4649 prev_leaf = pivot_category_create_leaf (parent, label);
4651 groups[k] = pivot_category_create_group__ (parent, label);
4655 cell->axes[a].leaf = prev_leaf;
4664 for (size_t i = 0; i < t->n_sections; i++)
4666 struct ctables_section *s = &t->sections[i];
4668 struct ctables_cell *cell;
4669 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4674 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4675 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4676 for (size_t j = 0; j < specs->n; j++)
4679 size_t n_dindexes = 0;
4681 if (summary_dimension)
4682 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4684 if (categories_dimension)
4686 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4687 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4688 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4689 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4692 dindexes[n_dindexes++] = ctv->leaf;
4695 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4698 int leaf = cell->axes[a].leaf;
4699 if (a == t->summary_axis && !summary_dimension)
4701 dindexes[n_dindexes++] = leaf;
4704 const struct ctables_summary_spec *ss = &specs->specs[j];
4706 struct fmt_spec format = specs->specs[j].format;
4707 bool is_ctables_format = ss->is_ctables_format;
4708 double d = (cell->postcompute
4709 ? ctables_cell_calculate_postcompute (
4710 s, cell, ss, &format, &is_ctables_format, j)
4711 : ctables_summary_value (cell, &cell->summaries[j],
4714 struct pivot_value *value;
4715 if (ct->hide_threshold != 0
4716 && d < ct->hide_threshold
4717 && ctables_summary_function_is_count (ss->function))
4719 value = pivot_value_new_user_text_nocopy (
4720 xasprintf ("<%d", ct->hide_threshold));
4722 else if (d == 0 && ct->zero)
4723 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4724 else if (d == SYSMIS && ct->missing)
4725 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4726 else if (is_ctables_format)
4727 value = pivot_value_new_user_text_nocopy (
4728 ctables_format (d, &format, &ct->ctables_formats));
4731 value = pivot_value_new_number (d);
4732 value->numeric.format = format;
4734 /* XXX should text values be right-justified? */
4735 pivot_table_put (pt, dindexes, n_dindexes, value);
4740 pivot_table_submit (pt);
4744 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4746 enum pivot_axis_type label_pos = t->label_axis[a];
4750 t->clabels_from_axis = a;
4752 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4753 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4755 const struct ctables_stack *stack = &t->stacks[a];
4759 const struct ctables_nest *n0 = &stack->nests[0];
4762 assert (stack->n == 1);
4766 const struct variable *v0 = n0->vars[n0->n - 1];
4767 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4768 t->clabels_example = v0;
4770 for (size_t i = 0; i < c0->n_cats; i++)
4771 if (c0->cats[i].type == CCT_FUNCTION)
4773 msg (SE, _("%s=%s is not allowed with sorting based "
4774 "on a summary function."),
4775 subcommand_name, pos_name);
4778 if (n0->n - 1 == n0->scale_idx)
4780 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4781 "but %s is a scale variable."),
4782 subcommand_name, pos_name, var_get_name (v0));
4786 for (size_t i = 1; i < stack->n; i++)
4788 const struct ctables_nest *ni = &stack->nests[i];
4790 const struct variable *vi = ni->vars[ni->n - 1];
4791 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4793 if (ni->n - 1 == ni->scale_idx)
4795 msg (SE, _("%s=%s requires the variables to be moved to be "
4796 "categorical, but %s is a scale variable."),
4797 subcommand_name, pos_name, var_get_name (vi));
4800 if (var_get_width (v0) != var_get_width (vi))
4802 msg (SE, _("%s=%s requires the variables to be "
4803 "moved to have the same width, but %s has "
4804 "width %d and %s has width %d."),
4805 subcommand_name, pos_name,
4806 var_get_name (v0), var_get_width (v0),
4807 var_get_name (vi), var_get_width (vi));
4810 if (!val_labs_equal (var_get_value_labels (v0),
4811 var_get_value_labels (vi)))
4813 msg (SE, _("%s=%s requires the variables to be "
4814 "moved to have the same value labels, but %s "
4815 "and %s have different value labels."),
4816 subcommand_name, pos_name,
4817 var_get_name (v0), var_get_name (vi));
4820 if (!ctables_categories_equal (c0, ci))
4822 msg (SE, _("%s=%s requires the variables to be "
4823 "moved to have the same category "
4824 "specifications, but %s and %s have different "
4825 "category specifications."),
4826 subcommand_name, pos_name,
4827 var_get_name (v0), var_get_name (vi));
4836 add_sum_var (struct variable *var,
4837 struct variable ***sum_vars, size_t *n, size_t *allocated)
4839 for (size_t i = 0; i < *n; i++)
4840 if (var == (*sum_vars)[i])
4843 if (*n >= *allocated)
4844 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4845 (*sum_vars)[*n] = var;
4850 enumerate_sum_vars (const struct ctables_axis *a,
4851 struct variable ***sum_vars, size_t *n, size_t *allocated)
4859 for (size_t i = 0; i < N_CSVS; i++)
4860 for (size_t j = 0; j < a->specs[i].n; j++)
4862 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4863 if (ctables_function_is_pctsum (spec->function))
4864 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4870 for (size_t i = 0; i < 2; i++)
4871 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4877 ctables_prepare_table (struct ctables_table *t)
4879 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4882 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4884 for (size_t j = 0; j < t->stacks[a].n; j++)
4886 struct ctables_nest *nest = &t->stacks[a].nests[j];
4887 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4889 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4890 nest->n_domains[dt] = 0;
4892 for (size_t k = 0; k < nest->n; k++)
4894 if (k == nest->scale_idx)
4903 if (a != PIVOT_AXIS_LAYER)
4910 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4911 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4912 : a == PIVOT_AXIS_ROW)
4914 if (k == nest->n - 1
4915 || (nest->scale_idx == nest->n - 1
4916 && k == nest->n - 2))
4922 if (a == PIVOT_AXIS_COLUMN)
4927 if (a == PIVOT_AXIS_ROW)
4932 nest->domains[dt][nest->n_domains[dt]++] = k;
4939 struct ctables_nest *nest = xmalloc (sizeof *nest);
4940 *nest = (struct ctables_nest) { .n = 0 };
4941 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4943 /* There's no point in moving labels away from an axis that has no
4944 labels, so avoid dealing with the special cases around that. */
4945 t->label_axis[a] = a;
4948 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4949 for (size_t i = 0; i < stack->n; i++)
4951 struct ctables_nest *nest = &stack->nests[i];
4952 if (!nest->specs[CSV_CELL].n)
4954 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4955 specs->specs = xmalloc (sizeof *specs->specs);
4958 enum ctables_summary_function function
4959 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4961 *specs->specs = (struct ctables_summary_spec) {
4962 .function = function,
4963 .format = ctables_summary_default_format (function, specs->var),
4966 specs->var = nest->vars[0];
4968 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4969 &nest->specs[CSV_CELL]);
4971 else if (!nest->specs[CSV_TOTAL].n)
4972 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4973 &nest->specs[CSV_CELL]);
4975 if (t->ctables->smissing_listwise)
4977 struct variable **listwise_vars = NULL;
4979 size_t allocated = 0;
4981 for (size_t j = nest->group_head; j < stack->n; j++)
4983 const struct ctables_nest *other_nest = &stack->nests[j];
4984 if (other_nest->group_head != nest->group_head)
4987 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4990 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4991 sizeof *listwise_vars);
4992 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4995 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4998 listwise_vars = xmemdup (listwise_vars,
4999 n * sizeof *listwise_vars);
5000 nest->specs[sv].listwise_vars = listwise_vars;
5001 nest->specs[sv].n_listwise_vars = n;
5006 struct ctables_summary_spec_set *merged = &t->summary_specs;
5007 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5009 for (size_t j = 0; j < stack->n; j++)
5011 const struct ctables_nest *nest = &stack->nests[j];
5013 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5014 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5019 struct merge_item min = items[0];
5020 for (size_t j = 1; j < n_left; j++)
5021 if (merge_item_compare_3way (&items[j], &min) < 0)
5024 if (merged->n >= merged->allocated)
5025 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5026 sizeof *merged->specs);
5027 merged->specs[merged->n++] = min.set->specs[min.ofs];
5029 for (size_t j = 0; j < n_left; )
5031 if (merge_item_compare_3way (&items[j], &min) == 0)
5033 struct merge_item *item = &items[j];
5034 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5035 if (++item->ofs >= item->set->n)
5037 items[j] = items[--n_left];
5047 for (size_t j = 0; j < merged->n; j++)
5048 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5050 for (size_t j = 0; j < stack->n; j++)
5052 const struct ctables_nest *nest = &stack->nests[j];
5053 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5055 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5056 for (size_t k = 0; k < specs->n; k++)
5057 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5058 specs->specs[k].axis_idx);
5064 size_t allocated_sum_vars = 0;
5065 enumerate_sum_vars (t->axes[t->summary_axis],
5066 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5068 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5069 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5073 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5074 enum pivot_axis_type a)
5076 struct ctables_stack *stack = &t->stacks[a];
5077 for (size_t i = 0; i < stack->n; i++)
5079 const struct ctables_nest *nest = &stack->nests[i];
5080 const struct variable *var = nest->vars[nest->n - 1];
5081 const union value *value = case_data (c, var);
5083 if (var_is_numeric (var) && value->f == SYSMIS)
5086 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5088 ctables_value_insert (t, value, var_get_width (var));
5093 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5095 const struct ctables_value *const *ap = a_;
5096 const struct ctables_value *const *bp = b_;
5097 const struct ctables_value *a = *ap;
5098 const struct ctables_value *b = *bp;
5099 const int *width = width_;
5100 return value_compare_3way (&a->value, &b->value, *width);
5104 ctables_sort_clabels_values (struct ctables_table *t)
5106 const struct variable *v0 = t->clabels_example;
5107 int width = var_get_width (v0);
5109 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5112 const struct val_labs *val_labs = var_get_value_labels (v0);
5113 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5114 vl = val_labs_next (val_labs, vl))
5115 if (ctables_categories_match (c0, &vl->value, v0))
5116 ctables_value_insert (t, &vl->value, width);
5119 size_t n = hmap_count (&t->clabels_values_map);
5120 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5122 struct ctables_value *clv;
5124 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5125 t->clabels_values[i++] = clv;
5126 t->n_clabels_values = n;
5129 sort (t->clabels_values, n, sizeof *t->clabels_values,
5130 compare_clabels_values_3way, &width);
5132 for (size_t i = 0; i < n; i++)
5133 t->clabels_values[i]->leaf = i;
5137 ctables_add_category_occurrences (const struct variable *var,
5138 struct hmap *occurrences,
5139 const struct ctables_categories *cats)
5141 const struct val_labs *val_labs = var_get_value_labels (var);
5143 for (size_t i = 0; i < cats->n_cats; i++)
5145 const struct ctables_category *c = &cats->cats[i];
5149 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5155 int width = var_get_width (var);
5157 value_init (&value, width);
5158 value_copy_buf_rpad (&value, width,
5159 CHAR_CAST (uint8_t *, c->string.string),
5160 c->string.length, ' ');
5161 ctables_add_occurrence (var, &value, occurrences);
5162 value_destroy (&value, width);
5167 assert (var_is_numeric (var));
5168 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5169 vl = val_labs_next (val_labs, vl))
5170 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5171 ctables_add_occurrence (var, &vl->value, occurrences);
5175 assert (var_is_alpha (var));
5176 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5177 vl = val_labs_next (val_labs, vl))
5178 if (in_string_range (&vl->value, var, c->srange))
5179 ctables_add_occurrence (var, &vl->value, occurrences);
5183 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5184 vl = val_labs_next (val_labs, vl))
5185 if (var_is_value_missing (var, &vl->value))
5186 ctables_add_occurrence (var, &vl->value, occurrences);
5190 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5191 vl = val_labs_next (val_labs, vl))
5192 ctables_add_occurrence (var, &vl->value, occurrences);
5195 case CCT_POSTCOMPUTE:
5205 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5206 vl = val_labs_next (val_labs, vl))
5207 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5208 ctables_add_occurrence (var, &vl->value, occurrences);
5211 case CCT_EXCLUDED_MISSING:
5218 ctables_section_recurse_add_empty_categories (
5219 struct ctables_section *s,
5220 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5221 enum pivot_axis_type a, size_t a_idx)
5223 if (a >= PIVOT_N_AXES)
5224 ctables_cell_insert__ (s, c, cats);
5225 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5226 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5229 const struct variable *var = s->nests[a]->vars[a_idx];
5230 const struct ctables_categories *categories = s->table->categories[
5231 var_get_dict_index (var)];
5232 int width = var_get_width (var);
5233 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5234 const struct ctables_occurrence *o;
5235 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5237 union value *value = case_data_rw (c, var);
5238 value_destroy (value, width);
5239 value_clone (value, &o->value, width);
5240 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5241 assert (cats[a][a_idx] != NULL);
5242 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5245 for (size_t i = 0; i < categories->n_cats; i++)
5247 const struct ctables_category *cat = &categories->cats[i];
5248 if (cat->type == CCT_POSTCOMPUTE)
5250 cats[a][a_idx] = cat;
5251 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5258 ctables_section_add_empty_categories (struct ctables_section *s)
5260 bool show_empty = false;
5261 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5263 for (size_t k = 0; k < s->nests[a]->n; k++)
5264 if (k != s->nests[a]->scale_idx)
5266 const struct variable *var = s->nests[a]->vars[k];
5267 const struct ctables_categories *cats = s->table->categories[
5268 var_get_dict_index (var)];
5269 if (cats->show_empty)
5272 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5278 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5279 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5280 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5285 ctables_section_clear (struct ctables_section *s)
5287 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5289 const struct ctables_nest *nest = s->nests[a];
5290 for (size_t i = 0; i < nest->n; i++)
5291 if (i != nest->scale_idx)
5293 const struct variable *var = nest->vars[i];
5294 int width = var_get_width (var);
5295 struct ctables_occurrence *o, *next;
5296 struct hmap *map = &s->occurrences[a][i];
5297 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5299 value_destroy (&o->value, width);
5300 hmap_delete (map, &o->node);
5307 struct ctables_cell *cell, *next_cell;
5308 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5310 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5312 const struct ctables_nest *nest = s->nests[a];
5313 for (size_t i = 0; i < nest->n; i++)
5314 if (i != nest->scale_idx)
5315 value_destroy (&cell->axes[a].cvs[i].value,
5316 var_get_width (nest->vars[i]));
5317 free (cell->axes[a].cvs);
5320 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5321 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5322 for (size_t i = 0; i < specs->n; i++)
5323 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5324 free (cell->summaries);
5326 hmap_delete (&s->cells, &cell->node);
5329 hmap_shrink (&s->cells);
5331 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5333 struct ctables_domain *domain, *next_domain;
5334 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5337 free (domain->sums);
5338 hmap_delete (&s->domains[dt], &domain->node);
5341 hmap_shrink (&s->domains[dt]);
5346 ctables_section_uninit (struct ctables_section *s)
5348 ctables_section_clear (s);
5350 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5352 struct ctables_nest *nest = s->nests[a];
5353 for (size_t i = 0; i < nest->n; i++)
5354 hmap_destroy (&s->occurrences[a][i]);
5355 free (s->occurrences[a]);
5358 hmap_destroy (&s->cells);
5359 for (size_t i = 0; i < N_CTDTS; i++)
5360 hmap_destroy (&s->domains[i]);
5364 ctables_table_clear (struct ctables_table *t)
5366 for (size_t i = 0; i < t->n_sections; i++)
5367 ctables_section_clear (&t->sections[i]);
5369 if (t->clabels_example)
5371 int width = var_get_width (t->clabels_example);
5372 struct ctables_value *value, *next_value;
5373 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5374 &t->clabels_values_map)
5376 value_destroy (&value->value, width);
5377 hmap_delete (&t->clabels_values_map, &value->node);
5380 hmap_shrink (&t->clabels_values_map);
5382 free (t->clabels_values);
5383 t->clabels_values = NULL;
5384 t->n_clabels_values = 0;
5389 ctables_execute (struct dataset *ds, struct casereader *input,
5392 for (size_t i = 0; i < ct->n_tables; i++)
5394 struct ctables_table *t = ct->tables[i];
5395 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5396 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5397 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5398 sizeof *t->sections);
5399 size_t ix[PIVOT_N_AXES];
5400 ctables_table_add_section (t, 0, ix);
5403 struct dictionary *dict = dataset_dict (ds);
5404 struct casegrouper *grouper
5405 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5406 ? casegrouper_create_splits (input, dict)
5407 : casegrouper_create_vars (input, NULL, 0));
5408 struct casereader *group;
5409 while (casegrouper_get_next_group (grouper, &group))
5411 /* Output SPLIT FILE variables. */
5412 struct ccase *c = casereader_peek (group, 0);
5415 output_split_file_values (ds, c);
5419 bool warn_on_invalid = true;
5420 for (c = casereader_read (group); c;
5421 case_unref (c), c = casereader_read (group))
5423 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5424 double e_weight = (ct->e_weight
5425 ? var_force_valid_weight (ct->e_weight,
5426 case_num (c, ct->e_weight),
5430 for (size_t i = 0; i < ct->n_tables; i++)
5432 struct ctables_table *t = ct->tables[i];
5434 for (size_t j = 0; j < t->n_sections; j++)
5435 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5437 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5438 if (t->label_axis[a] != a)
5439 ctables_insert_clabels_values (t, c, a);
5442 casereader_destroy (group);
5444 for (size_t i = 0; i < ct->n_tables; i++)
5446 struct ctables_table *t = ct->tables[i];
5448 if (t->clabels_example)
5449 ctables_sort_clabels_values (t);
5451 for (size_t j = 0; j < t->n_sections; j++)
5452 ctables_section_add_empty_categories (&t->sections[j]);
5454 ctables_table_output (ct, t);
5455 ctables_table_clear (t);
5458 return casegrouper_destroy (grouper);
5463 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5464 struct dictionary *);
5467 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5473 case CTPO_CAT_STRING:
5474 ss_dealloc (&e->string);
5477 case CTPO_CAT_SRANGE:
5478 for (size_t i = 0; i < 2; i++)
5479 ss_dealloc (&e->srange[i]);
5488 for (size_t i = 0; i < 2; i++)
5489 ctables_pcexpr_destroy (e->subs[i]);
5493 case CTPO_CAT_NUMBER:
5494 case CTPO_CAT_NRANGE:
5495 case CTPO_CAT_MISSING:
5496 case CTPO_CAT_OTHERNM:
5497 case CTPO_CAT_SUBTOTAL:
5498 case CTPO_CAT_TOTAL:
5502 msg_location_destroy (e->location);
5507 static struct ctables_pcexpr *
5508 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5509 struct ctables_pcexpr *sub0,
5510 struct ctables_pcexpr *sub1)
5512 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5513 *e = (struct ctables_pcexpr) {
5515 .subs = { sub0, sub1 },
5516 .location = msg_location_merged (sub0->location, sub1->location),
5521 /* How to parse an operator. */
5524 enum token_type token;
5525 enum ctables_postcompute_op op;
5528 static const struct operator *
5529 ctables_pcexpr_match_operator (struct lexer *lexer,
5530 const struct operator ops[], size_t n_ops)
5532 for (const struct operator *op = ops; op < ops + n_ops; op++)
5533 if (lex_token (lexer) == op->token)
5535 if (op->token != T_NEG_NUM)
5544 static struct ctables_pcexpr *
5545 ctables_pcexpr_parse_binary_operators__ (
5546 struct lexer *lexer, struct dictionary *dict,
5547 const struct operator ops[], size_t n_ops,
5548 parse_recursively_func *parse_next_level,
5549 const char *chain_warning, struct ctables_pcexpr *lhs)
5551 for (int op_count = 0; ; op_count++)
5553 const struct operator *op
5554 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5557 if (op_count > 1 && chain_warning)
5558 msg_at (SW, lhs->location, "%s", chain_warning);
5563 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5566 ctables_pcexpr_destroy (lhs);
5570 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5574 static struct ctables_pcexpr *
5575 ctables_pcexpr_parse_binary_operators (
5576 struct lexer *lexer, struct dictionary *dict,
5577 const struct operator ops[], size_t n_ops,
5578 parse_recursively_func *parse_next_level, const char *chain_warning)
5580 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5584 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5586 chain_warning, lhs);
5589 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5590 struct dictionary *);
5592 static struct ctables_pcexpr
5593 ctpo_cat_nrange (double low, double high)
5595 return (struct ctables_pcexpr) {
5596 .op = CTPO_CAT_NRANGE,
5597 .nrange = { low, high },
5601 static struct ctables_pcexpr
5602 ctpo_cat_srange (struct substring low, struct substring high)
5604 return (struct ctables_pcexpr) {
5605 .op = CTPO_CAT_SRANGE,
5606 .srange = { low, high },
5610 static struct ctables_pcexpr *
5611 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5613 int start_ofs = lex_ofs (lexer);
5614 struct ctables_pcexpr e;
5615 if (lex_is_number (lexer))
5617 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5618 .number = lex_number (lexer) };
5621 else if (lex_match_id (lexer, "MISSING"))
5622 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5623 else if (lex_match_id (lexer, "OTHERNM"))
5624 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5625 else if (lex_match_id (lexer, "TOTAL"))
5626 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5627 else if (lex_match_id (lexer, "SUBTOTAL"))
5629 size_t subtotal_index = 0;
5630 if (lex_match (lexer, T_LBRACK))
5632 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5634 subtotal_index = lex_integer (lexer);
5636 if (!lex_force_match (lexer, T_RBRACK))
5639 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5640 .subtotal_index = subtotal_index };
5642 else if (lex_match (lexer, T_LBRACK))
5644 if (lex_match_id (lexer, "LO"))
5646 if (!lex_force_match_id (lexer, "THRU"))
5649 if (lex_is_string (lexer))
5651 struct substring low = { .string = NULL };
5652 struct substring high = parse_substring (lexer, dict);
5653 e = ctpo_cat_srange (low, high);
5657 if (!lex_force_num (lexer))
5659 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5663 else if (lex_is_number (lexer))
5665 double number = lex_number (lexer);
5667 if (lex_match_id (lexer, "THRU"))
5669 if (lex_match_id (lexer, "HI"))
5670 e = ctpo_cat_nrange (number, DBL_MAX);
5673 if (!lex_force_num (lexer))
5675 e = ctpo_cat_nrange (number, lex_number (lexer));
5680 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5683 else if (lex_is_string (lexer))
5685 struct substring s = parse_substring (lexer, dict);
5687 if (lex_match_id (lexer, "THRU"))
5689 struct substring high;
5691 if (lex_match_id (lexer, "HI"))
5692 high = (struct substring) { .string = NULL };
5695 if (!lex_force_string (lexer))
5700 high = parse_substring (lexer, dict);
5703 e = ctpo_cat_srange (s, high);
5706 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5710 lex_error (lexer, NULL);
5714 if (!lex_force_match (lexer, T_RBRACK))
5716 if (e.op == CTPO_CAT_STRING)
5717 ss_dealloc (&e.string);
5718 else if (e.op == CTPO_CAT_SRANGE)
5720 ss_dealloc (&e.srange[0]);
5721 ss_dealloc (&e.srange[1]);
5726 else if (lex_match (lexer, T_LPAREN))
5728 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5731 if (!lex_force_match (lexer, T_RPAREN))
5733 ctables_pcexpr_destroy (ep);
5740 lex_error (lexer, NULL);
5744 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5745 return xmemdup (&e, sizeof e);
5748 static struct ctables_pcexpr *
5749 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5750 struct lexer *lexer, int start_ofs)
5752 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5753 *e = (struct ctables_pcexpr) {
5756 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5761 static struct ctables_pcexpr *
5762 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5764 static const struct operator op = { T_EXP, CTPO_POW };
5766 const char *chain_warning =
5767 _("The exponentiation operator (`**') is left-associative: "
5768 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5769 "To disable this warning, insert parentheses.");
5771 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5772 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5773 ctables_pcexpr_parse_primary,
5776 /* Special case for situations like "-5**6", which must be parsed as
5779 int start_ofs = lex_ofs (lexer);
5780 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5781 *lhs = (struct ctables_pcexpr) {
5782 .op = CTPO_CONSTANT,
5783 .number = -lex_tokval (lexer),
5784 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5788 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5789 lexer, dict, &op, 1,
5790 ctables_pcexpr_parse_primary, chain_warning, lhs);
5794 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5797 /* Parses the unary minus level. */
5798 static struct ctables_pcexpr *
5799 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5801 int start_ofs = lex_ofs (lexer);
5802 if (!lex_match (lexer, T_DASH))
5803 return ctables_pcexpr_parse_exp (lexer, dict);
5805 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5809 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5812 /* Parses the multiplication and division level. */
5813 static struct ctables_pcexpr *
5814 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5816 static const struct operator ops[] =
5818 { T_ASTERISK, CTPO_MUL },
5819 { T_SLASH, CTPO_DIV },
5822 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5823 sizeof ops / sizeof *ops,
5824 ctables_pcexpr_parse_neg, NULL);
5827 /* Parses the addition and subtraction level. */
5828 static struct ctables_pcexpr *
5829 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5831 static const struct operator ops[] =
5833 { T_PLUS, CTPO_ADD },
5834 { T_DASH, CTPO_SUB },
5835 { T_NEG_NUM, CTPO_ADD },
5838 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5839 ops, sizeof ops / sizeof *ops,
5840 ctables_pcexpr_parse_mul, NULL);
5843 static struct ctables_postcompute *
5844 ctables_find_postcompute (struct ctables *ct, const char *name)
5846 struct ctables_postcompute *pc;
5847 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5848 utf8_hash_case_string (name, 0), &ct->postcomputes)
5849 if (!utf8_strcasecmp (pc->name, name))
5855 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5858 int pcompute_start = lex_ofs (lexer) - 1;
5860 if (!lex_match (lexer, T_AND))
5862 lex_error_expecting (lexer, "&");
5865 if (!lex_force_id (lexer))
5868 char *name = ss_xstrdup (lex_tokss (lexer));
5871 if (!lex_force_match (lexer, T_EQUALS)
5872 || !lex_force_match_id (lexer, "EXPR")
5873 || !lex_force_match (lexer, T_LPAREN))
5879 int expr_start = lex_ofs (lexer);
5880 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5881 int expr_end = lex_ofs (lexer) - 1;
5882 if (!expr || !lex_force_match (lexer, T_RPAREN))
5884 ctables_pcexpr_destroy (expr);
5888 int pcompute_end = lex_ofs (lexer) - 1;
5890 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5893 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5896 msg_at (SW, location, _("New definition of &%s will override the "
5897 "previous definition."),
5899 msg_at (SN, pc->location, _("This is the previous definition."));
5901 ctables_pcexpr_destroy (pc->expr);
5902 msg_location_destroy (pc->location);
5907 pc = xmalloc (sizeof *pc);
5908 *pc = (struct ctables_postcompute) { .name = name };
5909 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5910 utf8_hash_case_string (pc->name, 0));
5913 pc->location = location;
5915 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5920 ctables_parse_pproperties_format (struct lexer *lexer,
5921 struct ctables_summary_spec_set *sss)
5923 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5925 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5926 && !(lex_token (lexer) == T_ID
5927 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5928 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5929 lex_tokss (lexer)))))
5931 /* Parse function. */
5932 enum ctables_summary_function function;
5933 if (!parse_ctables_summary_function (lexer, &function))
5936 /* Parse percentile. */
5937 double percentile = 0;
5938 if (function == CTSF_PTILE)
5940 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5942 percentile = lex_number (lexer);
5947 struct fmt_spec format;
5948 bool is_ctables_format;
5949 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5952 if (sss->n >= sss->allocated)
5953 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5954 sizeof *sss->specs);
5955 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5956 .function = function,
5957 .percentile = percentile,
5959 .is_ctables_format = is_ctables_format,
5965 ctables_summary_spec_set_uninit (sss);
5970 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5972 struct ctables_postcompute **pcs = NULL;
5974 size_t allocated_pcs = 0;
5976 while (lex_match (lexer, T_AND))
5978 if (!lex_force_id (lexer))
5980 struct ctables_postcompute *pc
5981 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5984 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5989 if (n_pcs >= allocated_pcs)
5990 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5994 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5996 if (lex_match_id (lexer, "LABEL"))
5998 lex_match (lexer, T_EQUALS);
5999 if (!lex_force_string (lexer))
6002 for (size_t i = 0; i < n_pcs; i++)
6004 free (pcs[i]->label);
6005 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6010 else if (lex_match_id (lexer, "FORMAT"))
6012 lex_match (lexer, T_EQUALS);
6014 struct ctables_summary_spec_set sss;
6015 if (!ctables_parse_pproperties_format (lexer, &sss))
6018 for (size_t i = 0; i < n_pcs; i++)
6021 ctables_summary_spec_set_uninit (pcs[i]->specs);
6023 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6024 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6026 ctables_summary_spec_set_uninit (&sss);
6028 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6030 lex_match (lexer, T_EQUALS);
6031 bool hide_source_cats;
6032 if (!parse_bool (lexer, &hide_source_cats))
6034 for (size_t i = 0; i < n_pcs; i++)
6035 pcs[i]->hide_source_cats = hide_source_cats;
6039 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6052 put_strftime (struct string *out, time_t now, const char *format)
6054 const struct tm *tm = localtime (&now);
6056 strftime (value, sizeof value, format, tm);
6057 ds_put_cstr (out, value);
6061 skip_prefix (struct substring *s, struct substring prefix)
6063 if (ss_starts_with (*s, prefix))
6065 ss_advance (s, prefix.length);
6073 put_table_expression (struct string *out, struct lexer *lexer,
6074 struct dictionary *dict, int expr_start, int expr_end)
6077 for (int ofs = expr_start; ofs < expr_end; ofs++)
6079 const struct token *t = lex_ofs_token (lexer, ofs);
6080 if (t->type == T_LBRACK)
6082 else if (t->type == T_RBRACK && nest > 0)
6088 else if (t->type == T_ID)
6090 const struct variable *var
6091 = dict_lookup_var (dict, t->string.string);
6092 const char *label = var ? var_get_label (var) : NULL;
6093 ds_put_cstr (out, label ? label : t->string.string);
6097 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6098 ds_put_byte (out, ' ');
6100 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6101 ds_put_cstr (out, repr);
6104 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6105 ds_put_byte (out, ' ');
6111 put_title_text (struct string *out, struct substring in, time_t now,
6112 struct lexer *lexer, struct dictionary *dict,
6113 int expr_start, int expr_end)
6117 size_t chunk = ss_find_byte (in, ')');
6118 ds_put_substring (out, ss_head (in, chunk));
6119 ss_advance (&in, chunk);
6120 if (ss_is_empty (in))
6123 if (skip_prefix (&in, ss_cstr (")DATE")))
6124 put_strftime (out, now, "%x");
6125 else if (skip_prefix (&in, ss_cstr (")TIME")))
6126 put_strftime (out, now, "%X");
6127 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6128 put_table_expression (out, lexer, dict, expr_start, expr_end);
6131 ds_put_byte (out, ')');
6132 ss_advance (&in, 1);
6138 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6140 struct casereader *input = NULL;
6142 struct measure_guesser *mg = measure_guesser_create (ds);
6145 input = proc_open (ds);
6146 measure_guesser_run (mg, input);
6147 measure_guesser_destroy (mg);
6150 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6151 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6152 enum settings_value_show tvars = settings_get_show_variables ();
6153 for (size_t i = 0; i < n_vars; i++)
6154 vlabels[i] = (enum ctables_vlabel) tvars;
6156 struct pivot_table_look *look = pivot_table_look_unshare (
6157 pivot_table_look_ref (pivot_table_look_get_default ()));
6158 look->omit_empty = false;
6160 struct ctables *ct = xmalloc (sizeof *ct);
6161 *ct = (struct ctables) {
6162 .dict = dataset_dict (ds),
6164 .ctables_formats = FMT_SETTINGS_INIT,
6166 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6169 time_t now = time (NULL);
6174 const char *dot_string;
6175 const char *comma_string;
6177 static const struct ctf ctfs[4] = {
6178 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6179 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6180 { CTEF_PAREN, "-,(,),", "-.(.)." },
6181 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6183 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6184 for (size_t i = 0; i < 4; i++)
6186 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6187 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6188 fmt_number_style_from_string (s));
6191 if (!lex_force_match (lexer, T_SLASH))
6194 while (!lex_match_id (lexer, "TABLE"))
6196 if (lex_match_id (lexer, "FORMAT"))
6198 double widths[2] = { SYSMIS, SYSMIS };
6199 double units_per_inch = 72.0;
6201 while (lex_token (lexer) != T_SLASH)
6203 if (lex_match_id (lexer, "MINCOLWIDTH"))
6205 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6208 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6210 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6213 else if (lex_match_id (lexer, "UNITS"))
6215 lex_match (lexer, T_EQUALS);
6216 if (lex_match_id (lexer, "POINTS"))
6217 units_per_inch = 72.0;
6218 else if (lex_match_id (lexer, "INCHES"))
6219 units_per_inch = 1.0;
6220 else if (lex_match_id (lexer, "CM"))
6221 units_per_inch = 2.54;
6224 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6228 else if (lex_match_id (lexer, "EMPTY"))
6233 lex_match (lexer, T_EQUALS);
6234 if (lex_match_id (lexer, "ZERO"))
6236 /* Nothing to do. */
6238 else if (lex_match_id (lexer, "BLANK"))
6239 ct->zero = xstrdup ("");
6240 else if (lex_force_string (lexer))
6242 ct->zero = ss_xstrdup (lex_tokss (lexer));
6248 else if (lex_match_id (lexer, "MISSING"))
6250 lex_match (lexer, T_EQUALS);
6251 if (!lex_force_string (lexer))
6255 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6256 ? ss_xstrdup (lex_tokss (lexer))
6262 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6263 "UNITS", "EMPTY", "MISSING");
6268 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6269 && widths[0] > widths[1])
6271 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6275 for (size_t i = 0; i < 2; i++)
6276 if (widths[i] != SYSMIS)
6278 int *wr = ct->look->width_ranges[TABLE_HORZ];
6279 wr[i] = widths[i] / units_per_inch * 96.0;
6284 else if (lex_match_id (lexer, "VLABELS"))
6286 if (!lex_force_match_id (lexer, "VARIABLES"))
6288 lex_match (lexer, T_EQUALS);
6290 struct variable **vars;
6292 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6296 if (!lex_force_match_id (lexer, "DISPLAY"))
6301 lex_match (lexer, T_EQUALS);
6303 enum ctables_vlabel vlabel;
6304 if (lex_match_id (lexer, "DEFAULT"))
6305 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6306 else if (lex_match_id (lexer, "NAME"))
6308 else if (lex_match_id (lexer, "LABEL"))
6309 vlabel = CTVL_LABEL;
6310 else if (lex_match_id (lexer, "BOTH"))
6312 else if (lex_match_id (lexer, "NONE"))
6316 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6322 for (size_t i = 0; i < n_vars; i++)
6323 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6326 else if (lex_match_id (lexer, "MRSETS"))
6328 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6330 lex_match (lexer, T_EQUALS);
6331 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6334 else if (lex_match_id (lexer, "SMISSING"))
6336 if (lex_match_id (lexer, "VARIABLE"))
6337 ct->smissing_listwise = false;
6338 else if (lex_match_id (lexer, "LISTWISE"))
6339 ct->smissing_listwise = true;
6342 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6346 else if (lex_match_id (lexer, "PCOMPUTE"))
6348 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6351 else if (lex_match_id (lexer, "PPROPERTIES"))
6353 if (!ctables_parse_pproperties (lexer, ct))
6356 else if (lex_match_id (lexer, "WEIGHT"))
6358 if (!lex_force_match_id (lexer, "VARIABLE"))
6360 lex_match (lexer, T_EQUALS);
6361 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6365 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6367 if (lex_match_id (lexer, "COUNT"))
6369 lex_match (lexer, T_EQUALS);
6370 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6373 ct->hide_threshold = lex_integer (lexer);
6376 else if (ct->hide_threshold == 0)
6377 ct->hide_threshold = 5;
6381 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6382 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6383 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6387 if (!lex_force_match (lexer, T_SLASH))
6391 size_t allocated_tables = 0;
6394 if (ct->n_tables >= allocated_tables)
6395 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6396 sizeof *ct->tables);
6398 struct ctables_category *cat = xmalloc (sizeof *cat);
6399 *cat = (struct ctables_category) {
6401 .include_missing = false,
6402 .sort_ascending = true,
6405 struct ctables_categories *c = xmalloc (sizeof *c);
6406 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6407 *c = (struct ctables_categories) {
6414 struct ctables_categories **categories = xnmalloc (n_vars,
6415 sizeof *categories);
6416 for (size_t i = 0; i < n_vars; i++)
6419 struct ctables_table *t = xmalloc (sizeof *t);
6420 *t = (struct ctables_table) {
6422 .slabels_axis = PIVOT_AXIS_COLUMN,
6423 .slabels_visible = true,
6424 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6426 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6427 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6428 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6430 .clabels_from_axis = PIVOT_AXIS_LAYER,
6431 .categories = categories,
6432 .n_categories = n_vars,
6435 ct->tables[ct->n_tables++] = t;
6437 lex_match (lexer, T_EQUALS);
6438 int expr_start = lex_ofs (lexer);
6439 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6441 if (lex_match (lexer, T_BY))
6443 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6444 ct, t, PIVOT_AXIS_COLUMN))
6447 if (lex_match (lexer, T_BY))
6449 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6450 ct, t, PIVOT_AXIS_LAYER))
6454 int expr_end = lex_ofs (lexer);
6456 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6457 && !t->axes[PIVOT_AXIS_LAYER])
6459 lex_error (lexer, _("At least one variable must be specified."));
6463 const struct ctables_axis *scales[PIVOT_N_AXES];
6464 size_t n_scales = 0;
6465 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6467 scales[a] = find_scale (t->axes[a]);
6473 msg (SE, _("Scale variables may appear only on one axis."));
6474 if (scales[PIVOT_AXIS_ROW])
6475 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6476 _("This scale variable appears on the rows axis."));
6477 if (scales[PIVOT_AXIS_COLUMN])
6478 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6479 _("This scale variable appears on the columns axis."));
6480 if (scales[PIVOT_AXIS_LAYER])
6481 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6482 _("This scale variable appears on the layer axis."));
6486 const struct ctables_axis *summaries[PIVOT_N_AXES];
6487 size_t n_summaries = 0;
6488 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6490 summaries[a] = (scales[a]
6492 : find_categorical_summary_spec (t->axes[a]));
6496 if (n_summaries > 1)
6498 msg (SE, _("Summaries may appear only on one axis."));
6499 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6502 msg_at (SN, summaries[a]->loc,
6504 ? _("This variable on the rows axis has a summary.")
6505 : a == PIVOT_AXIS_COLUMN
6506 ? _("This variable on the columns axis has a summary.")
6507 : _("This variable on the layers axis has a summary."));
6509 msg_at (SN, summaries[a]->loc,
6510 _("This is a scale variable, so it always has a "
6511 "summary even if the syntax does not explicitly "
6516 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6517 if (n_summaries ? summaries[a] : t->axes[a])
6519 t->summary_axis = a;
6523 if (lex_token (lexer) == T_ENDCMD)
6525 if (!ctables_prepare_table (t))
6529 if (!lex_force_match (lexer, T_SLASH))
6532 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6534 if (lex_match_id (lexer, "SLABELS"))
6536 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6538 if (lex_match_id (lexer, "POSITION"))
6540 lex_match (lexer, T_EQUALS);
6541 if (lex_match_id (lexer, "COLUMN"))
6542 t->slabels_axis = PIVOT_AXIS_COLUMN;
6543 else if (lex_match_id (lexer, "ROW"))
6544 t->slabels_axis = PIVOT_AXIS_ROW;
6545 else if (lex_match_id (lexer, "LAYER"))
6546 t->slabels_axis = PIVOT_AXIS_LAYER;
6549 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6553 else if (lex_match_id (lexer, "VISIBLE"))
6555 lex_match (lexer, T_EQUALS);
6556 if (!parse_bool (lexer, &t->slabels_visible))
6561 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6566 else if (lex_match_id (lexer, "CLABELS"))
6568 if (lex_match_id (lexer, "AUTO"))
6570 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6571 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6573 else if (lex_match_id (lexer, "ROWLABELS"))
6575 lex_match (lexer, T_EQUALS);
6576 if (lex_match_id (lexer, "OPPOSITE"))
6577 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6578 else if (lex_match_id (lexer, "LAYER"))
6579 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6582 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6586 else if (lex_match_id (lexer, "COLLABELS"))
6588 lex_match (lexer, T_EQUALS);
6589 if (lex_match_id (lexer, "OPPOSITE"))
6590 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6591 else if (lex_match_id (lexer, "LAYER"))
6592 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6595 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6601 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6606 else if (lex_match_id (lexer, "CRITERIA"))
6608 if (!lex_force_match_id (lexer, "CILEVEL"))
6610 lex_match (lexer, T_EQUALS);
6612 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6614 t->cilevel = lex_number (lexer);
6617 else if (lex_match_id (lexer, "CATEGORIES"))
6619 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6623 else if (lex_match_id (lexer, "TITLES"))
6628 if (lex_match_id (lexer, "CAPTION"))
6629 textp = &t->caption;
6630 else if (lex_match_id (lexer, "CORNER"))
6632 else if (lex_match_id (lexer, "TITLE"))
6636 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6639 lex_match (lexer, T_EQUALS);
6641 struct string s = DS_EMPTY_INITIALIZER;
6642 while (lex_is_string (lexer))
6644 if (!ds_is_empty (&s))
6645 ds_put_byte (&s, ' ');
6646 put_title_text (&s, lex_tokss (lexer), now,
6647 lexer, dataset_dict (ds),
6648 expr_start, expr_end);
6652 *textp = ds_steal_cstr (&s);
6654 while (lex_token (lexer) != T_SLASH
6655 && lex_token (lexer) != T_ENDCMD);
6657 else if (lex_match_id (lexer, "SIGTEST"))
6661 t->chisq = xmalloc (sizeof *t->chisq);
6662 *t->chisq = (struct ctables_chisq) {
6664 .include_mrsets = true,
6665 .all_visible = true,
6671 if (lex_match_id (lexer, "TYPE"))
6673 lex_match (lexer, T_EQUALS);
6674 if (!lex_force_match_id (lexer, "CHISQUARE"))
6677 else if (lex_match_id (lexer, "ALPHA"))
6679 lex_match (lexer, T_EQUALS);
6680 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6682 t->chisq->alpha = lex_number (lexer);
6685 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6687 lex_match (lexer, T_EQUALS);
6688 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6691 else if (lex_match_id (lexer, "CATEGORIES"))
6693 lex_match (lexer, T_EQUALS);
6694 if (lex_match_id (lexer, "ALLVISIBLE"))
6695 t->chisq->all_visible = true;
6696 else if (lex_match_id (lexer, "SUBTOTALS"))
6697 t->chisq->all_visible = false;
6700 lex_error_expecting (lexer,
6701 "ALLVISIBLE", "SUBTOTALS");
6707 lex_error_expecting (lexer, "TYPE", "ALPHA",
6708 "INCLUDEMRSETS", "CATEGORIES");
6712 while (lex_token (lexer) != T_SLASH
6713 && lex_token (lexer) != T_ENDCMD);
6715 else if (lex_match_id (lexer, "COMPARETEST"))
6719 t->pairwise = xmalloc (sizeof *t->pairwise);
6720 *t->pairwise = (struct ctables_pairwise) {
6722 .alpha = { .05, .05 },
6723 .adjust = BONFERRONI,
6724 .include_mrsets = true,
6725 .meansvariance_allcats = true,
6726 .all_visible = true,
6735 if (lex_match_id (lexer, "TYPE"))
6737 lex_match (lexer, T_EQUALS);
6738 if (lex_match_id (lexer, "PROP"))
6739 t->pairwise->type = PROP;
6740 else if (lex_match_id (lexer, "MEAN"))
6741 t->pairwise->type = MEAN;
6744 lex_error_expecting (lexer, "PROP", "MEAN");
6748 else if (lex_match_id (lexer, "ALPHA"))
6750 lex_match (lexer, T_EQUALS);
6752 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6754 double a0 = lex_number (lexer);
6757 lex_match (lexer, T_COMMA);
6758 if (lex_is_number (lexer))
6760 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6762 double a1 = lex_number (lexer);
6765 t->pairwise->alpha[0] = MIN (a0, a1);
6766 t->pairwise->alpha[1] = MAX (a0, a1);
6769 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6771 else if (lex_match_id (lexer, "ADJUST"))
6773 lex_match (lexer, T_EQUALS);
6774 if (lex_match_id (lexer, "BONFERRONI"))
6775 t->pairwise->adjust = BONFERRONI;
6776 else if (lex_match_id (lexer, "BH"))
6777 t->pairwise->adjust = BH;
6778 else if (lex_match_id (lexer, "NONE"))
6779 t->pairwise->adjust = 0;
6782 lex_error_expecting (lexer, "BONFERRONI", "BH",
6787 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6789 lex_match (lexer, T_EQUALS);
6790 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6793 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6795 lex_match (lexer, T_EQUALS);
6796 if (lex_match_id (lexer, "ALLCATS"))
6797 t->pairwise->meansvariance_allcats = true;
6798 else if (lex_match_id (lexer, "TESTEDCATS"))
6799 t->pairwise->meansvariance_allcats = false;
6802 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6806 else if (lex_match_id (lexer, "CATEGORIES"))
6808 lex_match (lexer, T_EQUALS);
6809 if (lex_match_id (lexer, "ALLVISIBLE"))
6810 t->pairwise->all_visible = true;
6811 else if (lex_match_id (lexer, "SUBTOTALS"))
6812 t->pairwise->all_visible = false;
6815 lex_error_expecting (lexer, "ALLVISIBLE",
6820 else if (lex_match_id (lexer, "MERGE"))
6822 lex_match (lexer, T_EQUALS);
6823 if (!parse_bool (lexer, &t->pairwise->merge))
6826 else if (lex_match_id (lexer, "STYLE"))
6828 lex_match (lexer, T_EQUALS);
6829 if (lex_match_id (lexer, "APA"))
6830 t->pairwise->apa_style = true;
6831 else if (lex_match_id (lexer, "SIMPLE"))
6832 t->pairwise->apa_style = false;
6835 lex_error_expecting (lexer, "APA", "SIMPLE");
6839 else if (lex_match_id (lexer, "SHOWSIG"))
6841 lex_match (lexer, T_EQUALS);
6842 if (!parse_bool (lexer, &t->pairwise->show_sig))
6847 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6848 "INCLUDEMRSETS", "MEANSVARIANCE",
6849 "CATEGORIES", "MERGE", "STYLE",
6854 while (lex_token (lexer) != T_SLASH
6855 && lex_token (lexer) != T_ENDCMD);
6859 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6860 "CRITERIA", "CATEGORIES", "TITLES",
6861 "SIGTEST", "COMPARETEST");
6865 if (!lex_match (lexer, T_SLASH))
6869 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6870 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6872 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6876 if (!ctables_prepare_table (t))
6879 while (lex_token (lexer) != T_ENDCMD);
6882 input = proc_open (ds);
6883 bool ok = ctables_execute (ds, input, ct);
6884 ok = proc_commit (ds) && ok;
6886 ctables_destroy (ct);
6887 return ok ? CMD_SUCCESS : CMD_FAILURE;
6892 ctables_destroy (ct);