1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casegrouper.h"
23 #include "data/casereader.h"
24 #include "data/casewriter.h"
25 #include "data/data-in.h"
26 #include "data/data-out.h"
27 #include "data/dataset.h"
28 #include "data/dictionary.h"
29 #include "data/mrset.h"
30 #include "data/subcase.h"
31 #include "data/value-labels.h"
32 #include "language/command.h"
33 #include "language/dictionary/split-file.h"
34 #include "language/lexer/format-parser.h"
35 #include "language/lexer/lexer.h"
36 #include "language/lexer/token.h"
37 #include "language/lexer/variable-parser.h"
38 #include "libpspp/array.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/string-array.h"
45 #include "math/mode.h"
46 #include "math/moments.h"
47 #include "math/percentiles.h"
48 #include "math/sort.h"
49 #include "output/pivot-table.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
55 #define _(msgid) gettext (msgid)
56 #define N_(msgid) (msgid)
60 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
61 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
62 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
63 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
67 - unweighted summaries (U*)
68 - lower confidence limits (*.LCL)
69 - upper confidence limits (*.UCL)
70 - standard error (*.SE)
73 /* All variables. */ \
74 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
75 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
76 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
94 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
95 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
96 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
98 /* All variables (unweighted.) */ \
99 S(CTSF_UCOUNT, "UCOUNT", N_("Unweighted Count"), CTF_COUNT, CTFA_ALL) \
100 S(CTSF_UROWPCT_COUNT, "UROWPCT.COUNT", N_("Unweighted Row %"), CTF_PERCENT, CTFA_ALL) \
101 S(CTSF_UCOLPCT_COUNT, "UCOLPCT.COUNT", N_("Unweighted Column %"), CTF_PERCENT, CTFA_ALL) \
102 S(CTSF_UTABLEPCT_COUNT, "UTABLEPCT.COUNT", N_("Unweighted Table %"), CTF_PERCENT, CTFA_ALL) \
103 S(CTSF_USUBTABLEPCT_COUNT, "USUBTABLEPCT.COUNT", N_("Unweighted Subtable %"), CTF_PERCENT, CTFA_ALL) \
104 S(CTSF_ULAYERPCT_COUNT, "ULAYERPCT.COUNT", N_("Unweighted Layer %"), CTF_PERCENT, CTFA_ALL) \
105 S(CTSF_ULAYERROWPCT_COUNT, "ULAYERROWPCT.COUNT", N_("Unweighted Layer Row %"), CTF_PERCENT, CTFA_ALL) \
106 S(CTSF_ULAYERCOLPCT_COUNT, "ULAYERCOLPCT.COUNT", N_("Unweighted Layer Column %"), CTF_PERCENT, CTFA_ALL) \
107 S(CTSF_UROWPCT_VALIDN, "UROWPCT.VALIDN", N_("Unweighted Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
108 S(CTSF_UCOLPCT_VALIDN, "UCOLPCT.VALIDN", N_("Unweighted Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
109 S(CTSF_UTABLEPCT_VALIDN, "UTABLEPCT.VALIDN", N_("Unweighted Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
110 S(CTSF_USUBTABLEPCT_VALIDN, "USUBTABLEPCT.VALIDN", N_("Unweighted Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
111 S(CTSF_ULAYERPCT_VALIDN, "ULAYERPCT.VALIDN", N_("Unweighted Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
112 S(CTSF_ULAYERROWPCT_VALIDN, "ULAYERROWPCT.VALIDN", N_("Unweighted Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
113 S(CTSF_ULAYERCOLPCT_VALIDN, "ULAYERCOLPCT.VALIDN", N_("Unweighted Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
114 S(CTSF_UROWPCT_TOTALN, "UROWPCT.TOTALN", N_("Unweighted Row Total N %"), CTF_PERCENT, CTFA_ALL) \
115 S(CTSF_UCOLPCT_TOTALN, "UCOLPCT.TOTALN", N_("Unweighted Column Total N %"), CTF_PERCENT, CTFA_ALL) \
116 S(CTSF_UTABLEPCT_TOTALN, "UTABLEPCT.TOTALN", N_("Unweighted Table Total N %"), CTF_PERCENT, CTFA_ALL) \
117 S(CTSF_USUBTABLEPCT_TOTALN, "USUBTABLEPCT.TOTALN", N_("Unweighted Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
118 S(CTSF_ULAYERPCT_TOTALN, "ULAYERPCT.TOTALN", N_("Unweighted Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
119 S(CTSF_ULAYERROWPCT_TOTALN, "ULAYERROWPCT.TOTALN", N_("Unweighted Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
120 S(CTSF_ULAYERCOLPCT_TOTALN, "ULAYERCOLPCT.TOTALN", N_("Unweighted Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
122 /* Scale variables, totals, and subtotals. */ \
123 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
124 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
125 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
126 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
127 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
128 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
129 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
130 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
131 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
132 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
133 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
134 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
135 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
136 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
137 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
138 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
139 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
140 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
141 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
142 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
143 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
144 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
145 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
147 /* Scale variables, totals, and subtotals (unweighted). */ \
148 S(CTSF_UMEAN, "UMEAN", N_("Unweighted Mean"), CTF_GENERAL, CTFA_SCALE) \
149 S(CTSF_UMEDIAN, "UMEDIAN", N_("Unweighted Median"), CTF_GENERAL, CTFA_SCALE) \
150 S(CTSF_UMISSING, "UMISSING", N_("Unweighted Missing"), CTF_GENERAL, CTFA_SCALE) \
151 S(CTSF_UMODE, "UMODE", N_("Unweighted Mode"), CTF_GENERAL, CTFA_SCALE) \
152 S(CTSF_UPTILE, "UPTILE", N_("Unweighted Percentile"), CTF_GENERAL, CTFA_SCALE) \
153 S(CTSF_USEMEAN, "USEMEAN", N_("Unweighted Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
154 S(CTSF_USTDDEV, "USTDDEV", N_("Unweighted Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
155 S(CTSF_USUM, "USUM", N_("Unweighted Sum"), CTF_GENERAL, CTFA_SCALE) \
156 S(CSTF_UTOTALN, "UTOTALN", N_("Unweighted Total N"), CTF_COUNT, CTFA_SCALE) \
157 S(CTSF_UVALIDN, "UVALIDN", N_("Unweighted Valid N"), CTF_COUNT, CTFA_SCALE) \
158 S(CTSF_UVARIANCE, "UVARIANCE", N_("Unweighted Variance"), CTF_GENERAL, CTFA_SCALE) \
159 S(CTSF_UROWPCT_SUM, "UROWPCT.SUM", N_("Unweighted Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
160 S(CTSF_UCOLPCT_SUM, "UCOLPCT.SUM", N_("Unweighted Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
161 S(CTSF_UTABLEPCT_SUM, "UTABLEPCT.SUM", N_("Unweighted Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
162 S(CTSF_USUBTABLEPCT_SUM, "USUBTABLEPCT.SUM", N_("Unweighted Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
163 S(CTSF_ULAYERPCT_SUM, "ULAYERPCT.SUM", N_("Unweighted Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
164 S(CTSF_ULAYERROWPCT_SUM, "ULAYERROWPCT.SUM", N_("Unweighted Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
165 S(CTSF_ULAYERCOLPCT_SUM, "ULAYERCOLPCT.SUM", N_("Unweighted Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
167 #if 0 /* Multiple response sets not yet implemented. */
168 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
169 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
170 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
171 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
172 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
173 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
174 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
175 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
176 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
177 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
178 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
179 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
180 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
181 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
182 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
183 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
184 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
185 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
186 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
187 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
188 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
189 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
192 enum ctables_summary_function
194 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
200 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
201 N_CTSF_FUNCTIONS = SUMMARIES
205 static bool ctables_summary_function_is_count (enum ctables_summary_function);
207 enum ctables_domain_type
209 /* Within a section, where stacked variables divide one section from
211 CTDT_TABLE, /* All layers of a whole section. */
212 CTDT_LAYER, /* One layer within a section. */
213 CTDT_LAYERROW, /* Row in one layer within a section. */
214 CTDT_LAYERCOL, /* Column in one layer within a section. */
216 /* Within a subtable, where a subtable pairs an innermost row variable with
217 an innermost column variable within a single layer. */
218 CTDT_SUBTABLE, /* Whole subtable. */
219 CTDT_ROW, /* Row within a subtable. */
220 CTDT_COL, /* Column within a subtable. */
224 struct ctables_domain
226 struct hmap_node node;
228 const struct ctables_cell *example;
230 double d_valid; /* Dictionary weight. */
233 double e_valid; /* Effective weight */
236 double u_valid; /* Unweighted. */
239 struct ctables_sum *sums;
248 enum ctables_summary_variant
257 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
258 all the axes (except the scalar variable, if any). */
259 struct hmap_node node;
261 /* The domains that contain this cell. */
262 uint32_t omit_domains;
263 struct ctables_domain *domains[N_CTDTS];
268 enum ctables_summary_variant sv;
270 struct ctables_cell_axis
272 struct ctables_cell_value
274 const struct ctables_category *category;
282 union ctables_summary *summaries;
289 const struct dictionary *dict;
290 struct pivot_table_look *look;
292 /* CTABLES has a number of extra formats that we implement via custom
293 currency specifications on an alternate fmt_settings. */
294 #define CTEF_NEGPAREN FMT_CCA
295 #define CTEF_NEQUAL FMT_CCB
296 #define CTEF_PAREN FMT_CCC
297 #define CTEF_PCTPAREN FMT_CCD
298 struct fmt_settings ctables_formats;
300 /* If this is NULL, zeros are displayed using the normal print format.
301 Otherwise, this string is displayed. */
304 /* If this is NULL, missing values are displayed using the normal print
305 format. Otherwise, this string is displayed. */
308 /* Indexed by variable dictionary index. */
309 enum ctables_vlabel *vlabels;
311 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
313 bool mrsets_count_duplicates; /* MRSETS. */
314 bool smissing_listwise; /* SMISSING. */
315 struct variable *e_weight; /* WEIGHT. */
316 int hide_threshold; /* HIDESMALLCOUNTS. */
318 struct ctables_table **tables;
322 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
325 struct ctables_postcompute
327 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
328 char *name; /* Name, without leading &. */
330 struct msg_location *location; /* Location of definition. */
331 struct ctables_pcexpr *expr;
333 struct ctables_summary_spec_set *specs;
334 bool hide_source_cats;
337 struct ctables_pcexpr
347 enum ctables_postcompute_op
350 CTPO_CONSTANT, /* 5 */
351 CTPO_CAT_NUMBER, /* [5] */
352 CTPO_CAT_STRING, /* ["STRING"] */
353 CTPO_CAT_NRANGE, /* [LO THRU 5] */
354 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
355 CTPO_CAT_MISSING, /* MISSING */
356 CTPO_CAT_OTHERNM, /* OTHERNM */
357 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
358 CTPO_CAT_TOTAL, /* TOTAL */
372 /* CTPO_CAT_NUMBER. */
375 /* CTPO_CAT_STRING, in dictionary encoding. */
376 struct substring string;
378 /* CTPO_CAT_NRANGE. */
381 /* CTPO_CAT_SRANGE. */
382 struct substring srange[2];
384 /* CTPO_CAT_SUBTOTAL. */
385 size_t subtotal_index;
387 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
388 One element: CTPO_NEG. */
389 struct ctables_pcexpr *subs[2];
392 /* Source location. */
393 struct msg_location *location;
396 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
397 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
398 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
399 struct ctables_pcexpr *sub1);
401 struct ctables_summary_spec_set
403 struct ctables_summary_spec *specs;
407 /* The variable to which the summary specs are applied. */
408 struct variable *var;
410 /* Whether the variable to which the summary specs are applied is a scale
411 variable for the purpose of summarization.
413 (VALIDN and TOTALN act differently for summarizing scale and categorical
417 /* If any of these optional additional scale variables are missing, then
418 treat 'var' as if it's missing too. This is for implementing
419 SMISSING=LISTWISE. */
420 struct variable **listwise_vars;
421 size_t n_listwise_vars;
424 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
425 const struct ctables_summary_spec_set *);
426 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
428 /* A nested sequence of variables, e.g. a > b > c. */
431 struct variable **vars;
434 size_t *domains[N_CTDTS];
435 size_t n_domains[N_CTDTS];
438 struct ctables_summary_spec_set specs[N_CSVS];
441 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
444 struct ctables_nest *nests;
448 static void ctables_stack_uninit (struct ctables_stack *);
452 struct hmap_node node;
457 struct ctables_occurrence
459 struct hmap_node node;
463 struct ctables_section
466 struct ctables_table *table;
467 struct ctables_nest *nests[PIVOT_N_AXES];
470 struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
471 struct hmap cells; /* Contains "struct ctables_cell"s. */
472 struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */
475 static void ctables_section_uninit (struct ctables_section *);
479 struct ctables *ctables;
480 struct ctables_axis *axes[PIVOT_N_AXES];
481 struct ctables_stack stacks[PIVOT_N_AXES];
482 struct ctables_section *sections;
484 enum pivot_axis_type summary_axis;
485 struct ctables_summary_spec_set summary_specs;
486 struct variable **sum_vars;
489 enum pivot_axis_type slabels_axis;
490 bool slabels_visible;
492 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
494 Most commonly, label_axis[a] == a, and in particular we always have
495 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
497 If ROWLABELS or COLLABELS is specified, then one of
498 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
499 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
501 If any category labels are moved, then 'clabels_example' is one of the
502 variables being moved (and it is otherwise NULL). All of the variables
503 being moved have the same width, value labels, and categories, so this
504 example variable can be used to find those out.
506 The remaining members in this group are relevant only if category labels
509 'clabels_values_map' holds a "struct ctables_value" for all the values
510 that appear in all of the variables in the moved categories. It is
511 accumulated as the data is read. Once the data is fully read, its
512 sorted values are put into 'clabels_values' and 'n_clabels_values'.
514 enum pivot_axis_type label_axis[PIVOT_N_AXES];
515 enum pivot_axis_type clabels_from_axis;
516 const struct variable *clabels_example;
517 struct hmap clabels_values_map;
518 struct ctables_value **clabels_values;
519 size_t n_clabels_values;
521 /* Indexed by variable dictionary index. */
522 struct ctables_categories **categories;
531 struct ctables_chisq *chisq;
532 struct ctables_pairwise *pairwise;
535 struct ctables_categories
538 struct ctables_category *cats;
543 struct ctables_category
545 enum ctables_category_type
547 /* Explicit category lists. */
550 CCT_NRANGE, /* Numerical range. */
551 CCT_SRANGE, /* String range. */
556 /* Totals and subtotals. */
560 /* Implicit category lists. */
565 /* For contributing to TOTALN. */
566 CCT_EXCLUDED_MISSING,
570 struct ctables_category *subtotal;
576 double number; /* CCT_NUMBER. */
577 struct substring string; /* CCT_STRING, in dictionary encoding. */
578 double nrange[2]; /* CCT_NRANGE. */
579 struct substring srange[2]; /* CCT_SRANGE. */
583 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
584 bool hide_subcategories; /* CCT_SUBTOTAL. */
587 /* CCT_POSTCOMPUTE. */
590 const struct ctables_postcompute *pc;
591 enum fmt_type parse_format;
594 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
597 bool include_missing;
601 enum ctables_summary_function sort_function;
602 struct variable *sort_var;
607 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
608 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
609 struct msg_location *location;
613 ctables_category_uninit (struct ctables_category *cat)
618 msg_location_destroy (cat->location);
625 case CCT_POSTCOMPUTE:
629 ss_dealloc (&cat->string);
633 ss_dealloc (&cat->srange[0]);
634 ss_dealloc (&cat->srange[1]);
639 free (cat->total_label);
647 case CCT_EXCLUDED_MISSING:
653 nullable_substring_equal (const struct substring *a,
654 const struct substring *b)
656 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
660 ctables_category_equal (const struct ctables_category *a,
661 const struct ctables_category *b)
663 if (a->type != b->type)
669 return a->number == b->number;
672 return ss_equals (a->string, b->string);
675 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
678 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
679 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
685 case CCT_POSTCOMPUTE:
686 return a->pc == b->pc;
690 return !strcmp (a->total_label, b->total_label);
695 return (a->include_missing == b->include_missing
696 && a->sort_ascending == b->sort_ascending
697 && a->sort_function == b->sort_function
698 && a->sort_var == b->sort_var
699 && a->percentile == b->percentile);
701 case CCT_EXCLUDED_MISSING:
709 ctables_categories_unref (struct ctables_categories *c)
714 assert (c->n_refs > 0);
718 for (size_t i = 0; i < c->n_cats; i++)
719 ctables_category_uninit (&c->cats[i]);
725 ctables_categories_equal (const struct ctables_categories *a,
726 const struct ctables_categories *b)
728 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
731 for (size_t i = 0; i < a->n_cats; i++)
732 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
738 /* Chi-square test (SIGTEST). */
746 /* Pairwise comparison test (COMPARETEST). */
747 struct ctables_pairwise
749 enum { PROP, MEAN } type;
752 bool meansvariance_allcats;
754 enum { BONFERRONI = 1, BH } adjust;
778 struct variable *var;
780 struct ctables_summary_spec_set specs[N_CSVS];
784 struct ctables_axis *subs[2];
787 struct msg_location *loc;
790 static void ctables_axis_destroy (struct ctables_axis *);
799 enum ctables_function_availability
801 CTFA_ALL, /* Any variables. */
802 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
803 //CTFA_MRSETS, /* Only multiple-response sets */
806 struct ctables_summary_spec
808 enum ctables_summary_function function;
809 double percentile; /* CTSF_PTILE only. */
812 struct fmt_spec format;
813 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
820 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
821 const struct ctables_summary_spec *src)
824 dst->label = xstrdup_if_nonnull (src->label);
828 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
835 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
836 const struct ctables_summary_spec_set *src)
838 struct ctables_summary_spec *specs
839 = (src->n ? xnmalloc (src->n, sizeof *specs) : NULL);
840 for (size_t i = 0; i < src->n; i++)
841 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
843 *dst = (struct ctables_summary_spec_set) {
848 .is_scale = src->is_scale,
853 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
855 for (size_t i = 0; i < set->n; i++)
856 ctables_summary_spec_uninit (&set->specs[i]);
857 free (set->listwise_vars);
862 parse_col_width (struct lexer *lexer, const char *name, double *width)
864 lex_match (lexer, T_EQUALS);
865 if (lex_match_id (lexer, "DEFAULT"))
867 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
869 *width = lex_number (lexer);
879 parse_bool (struct lexer *lexer, bool *b)
881 if (lex_match_id (lexer, "NO"))
883 else if (lex_match_id (lexer, "YES"))
887 lex_error_expecting (lexer, "YES", "NO");
893 static enum ctables_function_availability
894 ctables_function_availability (enum ctables_summary_function f)
896 static enum ctables_function_availability availability[] = {
897 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
902 return availability[f];
906 ctables_summary_function_is_count (enum ctables_summary_function f)
908 return f == CTSF_COUNT || f == CTSF_ECOUNT || f == CTSF_UCOUNT;
912 parse_ctables_summary_function (struct lexer *lexer,
913 enum ctables_summary_function *f)
917 enum ctables_summary_function function;
918 struct substring name;
920 static struct pair names[] = {
921 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
922 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
925 /* The .COUNT suffix may be omitted. */
926 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
927 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
928 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
929 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
930 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
931 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
932 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
936 if (!lex_force_id (lexer))
939 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
940 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
942 *f = names[i].function;
947 lex_error (lexer, _("Expecting summary function name."));
952 ctables_axis_destroy (struct ctables_axis *axis)
960 for (size_t i = 0; i < N_CSVS; i++)
961 ctables_summary_spec_set_uninit (&axis->specs[i]);
966 ctables_axis_destroy (axis->subs[0]);
967 ctables_axis_destroy (axis->subs[1]);
970 msg_location_destroy (axis->loc);
974 static struct ctables_axis *
975 ctables_axis_new_nonterminal (enum ctables_axis_op op,
976 struct ctables_axis *sub0,
977 struct ctables_axis *sub1,
978 struct lexer *lexer, int start_ofs)
980 struct ctables_axis *axis = xmalloc (sizeof *axis);
981 *axis = (struct ctables_axis) {
983 .subs = { sub0, sub1 },
984 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
989 struct ctables_axis_parse_ctx
992 struct dictionary *dict;
994 struct ctables_table *t;
997 static struct fmt_spec
998 ctables_summary_default_format (enum ctables_summary_function function,
999 const struct variable *var)
1001 static const enum ctables_format default_formats[] = {
1002 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
1006 switch (default_formats[function])
1009 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
1012 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
1015 return *var_get_print_format (var);
1022 static struct pivot_value *
1023 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1027 static const char *default_labels[] = {
1028 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
1033 return (spec->function == CTSF_PTILE
1034 ? pivot_value_new_text_format (N_("Percentile %.2f"),
1036 : pivot_value_new_text (default_labels[spec->function]));
1040 struct substring in = ss_cstr (spec->label);
1041 struct substring target = ss_cstr (")CILEVEL");
1043 struct string out = DS_EMPTY_INITIALIZER;
1046 size_t chunk = ss_find_substring (in, target);
1047 ds_put_substring (&out, ss_head (in, chunk));
1048 ss_advance (&in, chunk);
1050 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1052 ss_advance (&in, target.length);
1053 ds_put_format (&out, "%g", cilevel);
1059 ctables_summary_function_name (enum ctables_summary_function function)
1061 static const char *names[] = {
1062 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1066 return names[function];
1070 add_summary_spec (struct ctables_axis *axis,
1071 enum ctables_summary_function function, double percentile,
1072 const char *label, const struct fmt_spec *format,
1073 bool is_ctables_format, const struct msg_location *loc,
1074 enum ctables_summary_variant sv)
1076 if (axis->op == CTAO_VAR)
1078 const char *function_name = ctables_summary_function_name (function);
1079 const char *var_name = var_get_name (axis->var);
1080 switch (ctables_function_availability (function))
1084 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1085 "response sets."), function_name);
1086 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1092 if (!axis->scale && sv != CSV_TOTAL)
1095 _("Summary function %s applies only to scale variables."),
1097 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1107 struct ctables_summary_spec_set *set = &axis->specs[sv];
1108 if (set->n >= set->allocated)
1109 set->specs = x2nrealloc (set->specs, &set->allocated,
1110 sizeof *set->specs);
1112 struct ctables_summary_spec *dst = &set->specs[set->n++];
1113 *dst = (struct ctables_summary_spec) {
1114 .function = function,
1115 .percentile = percentile,
1116 .label = xstrdup_if_nonnull (label),
1117 .format = (format ? *format
1118 : ctables_summary_default_format (function, axis->var)),
1119 .is_ctables_format = is_ctables_format,
1125 for (size_t i = 0; i < 2; i++)
1126 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1127 format, is_ctables_format, loc, sv))
1133 static struct ctables_axis *ctables_axis_parse_stack (
1134 struct ctables_axis_parse_ctx *);
1137 static struct ctables_axis *
1138 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1140 if (lex_match (ctx->lexer, T_LPAREN))
1142 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1143 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1145 ctables_axis_destroy (sub);
1151 if (!lex_force_id (ctx->lexer))
1154 int start_ofs = lex_ofs (ctx->lexer);
1155 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1159 struct ctables_axis *axis = xmalloc (sizeof *axis);
1160 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1162 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1163 : lex_match_phrase (ctx->lexer, "[C]") ? false
1164 : var_get_measure (var) == MEASURE_SCALE);
1165 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1166 lex_ofs (ctx->lexer) - 1);
1167 if (axis->scale && var_is_alpha (var))
1169 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1171 var_get_name (var));
1172 ctables_axis_destroy (axis);
1180 has_digit (const char *s)
1182 return s[strcspn (s, "0123456789")] != '\0';
1186 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1187 bool *is_ctables_format)
1189 char type[FMT_TYPE_LEN_MAX + 1];
1190 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1193 if (!strcasecmp (type, "NEGPAREN"))
1194 format->type = CTEF_NEGPAREN;
1195 else if (!strcasecmp (type, "NEQUAL"))
1196 format->type = CTEF_NEQUAL;
1197 else if (!strcasecmp (type, "PAREN"))
1198 format->type = CTEF_PAREN;
1199 else if (!strcasecmp (type, "PCTPAREN"))
1200 format->type = CTEF_PCTPAREN;
1203 *is_ctables_format = false;
1204 return (parse_format_specifier (lexer, format)
1205 && fmt_check_output (format)
1206 && fmt_check_type_compat (format, VAL_NUMERIC));
1212 lex_next_error (lexer, -1, -1,
1213 _("Output format %s requires width 2 or greater."), type);
1216 else if (format->d > format->w - 1)
1218 lex_next_error (lexer, -1, -1, _("Output format %s requires width "
1219 "greater than decimals."), type);
1224 *is_ctables_format = true;
1229 static struct ctables_axis *
1230 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1232 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1233 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1236 enum ctables_summary_variant sv = CSV_CELL;
1239 int start_ofs = lex_ofs (ctx->lexer);
1241 /* Parse function. */
1242 enum ctables_summary_function function;
1243 if (!parse_ctables_summary_function (ctx->lexer, &function))
1246 /* Parse percentile. */
1247 double percentile = 0;
1248 if (function == CTSF_PTILE)
1250 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1252 percentile = lex_number (ctx->lexer);
1253 lex_get (ctx->lexer);
1258 if (lex_is_string (ctx->lexer))
1260 label = ss_xstrdup (lex_tokss (ctx->lexer));
1261 lex_get (ctx->lexer);
1265 struct fmt_spec format;
1266 const struct fmt_spec *formatp;
1267 bool is_ctables_format = false;
1268 if (lex_token (ctx->lexer) == T_ID
1269 && has_digit (lex_tokcstr (ctx->lexer)))
1271 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1272 &is_ctables_format))
1282 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1283 lex_ofs (ctx->lexer) - 1);
1284 add_summary_spec (sub, function, percentile, label, formatp,
1285 is_ctables_format, loc, sv);
1287 msg_location_destroy (loc);
1289 lex_match (ctx->lexer, T_COMMA);
1290 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1292 if (!lex_force_match (ctx->lexer, T_LBRACK))
1296 else if (lex_match (ctx->lexer, T_RBRACK))
1298 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1305 ctables_axis_destroy (sub);
1309 static const struct ctables_axis *
1310 find_scale (const struct ctables_axis *axis)
1314 else if (axis->op == CTAO_VAR)
1315 return axis->scale ? axis : NULL;
1318 for (size_t i = 0; i < 2; i++)
1320 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1328 static const struct ctables_axis *
1329 find_categorical_summary_spec (const struct ctables_axis *axis)
1333 else if (axis->op == CTAO_VAR)
1334 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1337 for (size_t i = 0; i < 2; i++)
1339 const struct ctables_axis *sum
1340 = find_categorical_summary_spec (axis->subs[i]);
1348 static struct ctables_axis *
1349 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1351 int start_ofs = lex_ofs (ctx->lexer);
1352 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1356 while (lex_match (ctx->lexer, T_GT))
1358 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1362 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1363 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1365 const struct ctables_axis *outer_scale = find_scale (lhs);
1366 const struct ctables_axis *inner_scale = find_scale (rhs);
1367 if (outer_scale && inner_scale)
1369 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1370 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1371 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1372 ctables_axis_destroy (nest);
1376 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1379 msg_at (SE, nest->loc,
1380 _("Summaries may only be requested for categorical variables "
1381 "at the innermost nesting level."));
1382 msg_at (SN, outer_sum->loc,
1383 _("This outer categorical variable has a summary."));
1384 ctables_axis_destroy (nest);
1394 static struct ctables_axis *
1395 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1397 int start_ofs = lex_ofs (ctx->lexer);
1398 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1402 while (lex_match (ctx->lexer, T_PLUS))
1404 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1408 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1409 ctx->lexer, start_ofs);
1416 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1417 struct ctables *ct, struct ctables_table *t,
1418 enum pivot_axis_type a)
1420 if (lex_token (lexer) == T_BY
1421 || lex_token (lexer) == T_SLASH
1422 || lex_token (lexer) == T_ENDCMD)
1425 struct ctables_axis_parse_ctx ctx = {
1431 t->axes[a] = ctables_axis_parse_stack (&ctx);
1432 return t->axes[a] != NULL;
1436 ctables_chisq_destroy (struct ctables_chisq *chisq)
1442 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1448 ctables_table_destroy (struct ctables_table *t)
1453 for (size_t i = 0; i < t->n_sections; i++)
1454 ctables_section_uninit (&t->sections[i]);
1457 for (size_t i = 0; i < t->n_categories; i++)
1458 ctables_categories_unref (t->categories[i]);
1459 free (t->categories);
1461 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
1463 ctables_axis_destroy (t->axes[a]);
1464 ctables_stack_uninit (&t->stacks[a]);
1466 free (t->summary_specs.specs);
1468 struct ctables_value *ctv, *next_ctv;
1469 HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
1470 &t->clabels_values_map)
1472 value_destroy (&ctv->value, var_get_width (t->clabels_example));
1473 hmap_delete (&t->clabels_values_map, &ctv->node);
1476 hmap_destroy (&t->clabels_values_map);
1477 free (t->clabels_values);
1483 ctables_chisq_destroy (t->chisq);
1484 ctables_pairwise_destroy (t->pairwise);
1489 ctables_destroy (struct ctables *ct)
1494 struct ctables_postcompute *pc, *next_pc;
1495 HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
1499 msg_location_destroy (pc->location);
1500 ctables_pcexpr_destroy (pc->expr);
1504 ctables_summary_spec_set_uninit (pc->specs);
1507 hmap_delete (&ct->postcomputes, &pc->hmap_node);
1511 fmt_settings_uninit (&ct->ctables_formats);
1512 pivot_table_look_unref (ct->look);
1516 for (size_t i = 0; i < ct->n_tables; i++)
1517 ctables_table_destroy (ct->tables[i]);
1522 static struct ctables_category
1523 cct_nrange (double low, double high)
1525 return (struct ctables_category) {
1527 .nrange = { low, high }
1531 static struct ctables_category
1532 cct_srange (struct substring low, struct substring high)
1534 return (struct ctables_category) {
1536 .srange = { low, high }
1541 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1542 struct ctables_category *cat)
1545 if (lex_match (lexer, T_EQUALS))
1547 if (!lex_force_string (lexer))
1550 total_label = ss_xstrdup (lex_tokss (lexer));
1554 total_label = xstrdup (_("Subtotal"));
1556 *cat = (struct ctables_category) {
1557 .type = CCT_SUBTOTAL,
1558 .hide_subcategories = hide_subcategories,
1559 .total_label = total_label
1564 static struct substring
1565 parse_substring (struct lexer *lexer, struct dictionary *dict)
1567 struct substring s = recode_substring_pool (
1568 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1569 ss_rtrim (&s, ss_cstr (" "));
1575 ctables_table_parse_explicit_category (struct lexer *lexer,
1576 struct dictionary *dict,
1578 struct ctables_category *cat)
1580 if (lex_match_id (lexer, "OTHERNM"))
1581 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1582 else if (lex_match_id (lexer, "MISSING"))
1583 *cat = (struct ctables_category) { .type = CCT_MISSING };
1584 else if (lex_match_id (lexer, "SUBTOTAL"))
1585 return ctables_table_parse_subtotal (lexer, false, cat);
1586 else if (lex_match_id (lexer, "HSUBTOTAL"))
1587 return ctables_table_parse_subtotal (lexer, true, cat);
1588 else if (lex_match_id (lexer, "LO"))
1590 if (!lex_force_match_id (lexer, "THRU"))
1592 if (lex_is_string (lexer))
1594 struct substring sr0 = { .string = NULL };
1595 struct substring sr1 = parse_substring (lexer, dict);
1596 *cat = cct_srange (sr0, sr1);
1598 else if (lex_force_num (lexer))
1600 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1606 else if (lex_is_number (lexer))
1608 double number = lex_number (lexer);
1610 if (lex_match_id (lexer, "THRU"))
1612 if (lex_match_id (lexer, "HI"))
1613 *cat = cct_nrange (number, DBL_MAX);
1616 if (!lex_force_num (lexer))
1618 *cat = cct_nrange (number, lex_number (lexer));
1623 *cat = (struct ctables_category) {
1628 else if (lex_is_string (lexer))
1630 struct substring s = parse_substring (lexer, dict);
1631 if (lex_match_id (lexer, "THRU"))
1633 if (lex_match_id (lexer, "HI"))
1635 struct substring sr1 = { .string = NULL };
1636 *cat = cct_srange (s, sr1);
1640 if (!lex_force_string (lexer))
1645 struct substring sr1 = parse_substring (lexer, dict);
1646 *cat = cct_srange (s, sr1);
1650 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1652 else if (lex_match (lexer, T_AND))
1654 if (!lex_force_id (lexer))
1656 struct ctables_postcompute *pc = ctables_find_postcompute (
1657 ct, lex_tokcstr (lexer));
1660 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1661 msg_at (SE, loc, _("Unknown postcompute &%s."),
1662 lex_tokcstr (lexer));
1663 msg_location_destroy (loc);
1668 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1672 lex_error (lexer, NULL);
1680 parse_category_string (struct msg_location *location,
1681 struct substring s, const struct dictionary *dict,
1682 enum fmt_type format, double *n)
1685 char *error = data_in (s, dict_get_encoding (dict), format,
1686 settings_get_fmt_settings (), &v, 0, NULL);
1689 msg_at (SE, location,
1690 _("Failed to parse category specification as format %s: %s."),
1691 fmt_name (format), error);
1700 static struct ctables_category *
1701 ctables_find_category_for_postcompute__ (const struct ctables_categories *cats,
1702 const struct ctables_pcexpr *e)
1704 struct ctables_category *best = NULL;
1705 size_t n_subtotals = 0;
1706 for (size_t i = 0; i < cats->n_cats; i++)
1708 struct ctables_category *cat = &cats->cats[i];
1711 case CTPO_CAT_NUMBER:
1712 if (cat->type == CCT_NUMBER && cat->number == e->number)
1716 case CTPO_CAT_STRING:
1717 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1721 case CTPO_CAT_NRANGE:
1722 if (cat->type == CCT_NRANGE
1723 && cat->nrange[0] == e->nrange[0]
1724 && cat->nrange[1] == e->nrange[1])
1728 case CTPO_CAT_SRANGE:
1729 if (cat->type == CCT_SRANGE
1730 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1731 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1735 case CTPO_CAT_MISSING:
1736 if (cat->type == CCT_MISSING)
1740 case CTPO_CAT_OTHERNM:
1741 if (cat->type == CCT_OTHERNM)
1745 case CTPO_CAT_SUBTOTAL:
1746 if (cat->type == CCT_SUBTOTAL)
1749 if (e->subtotal_index == n_subtotals)
1751 else if (e->subtotal_index == 0)
1756 case CTPO_CAT_TOTAL:
1757 if (cat->type == CCT_TOTAL)
1771 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1776 static struct ctables_category *
1777 ctables_find_category_for_postcompute (const struct dictionary *dict,
1778 const struct ctables_categories *cats,
1779 enum fmt_type parse_format,
1780 const struct ctables_pcexpr *e)
1782 if (parse_format != FMT_F)
1784 if (e->op == CTPO_CAT_STRING)
1787 if (!parse_category_string (e->location, e->string, dict,
1788 parse_format, &number))
1791 struct ctables_pcexpr e2 = {
1792 .op = CTPO_CAT_NUMBER,
1794 .location = e->location,
1796 return ctables_find_category_for_postcompute__ (cats, &e2);
1798 else if (e->op == CTPO_CAT_SRANGE)
1801 if (!e->srange[0].string)
1802 nrange[0] = -DBL_MAX;
1803 else if (!parse_category_string (e->location, e->srange[0], dict,
1804 parse_format, &nrange[0]))
1807 if (!e->srange[1].string)
1808 nrange[1] = DBL_MAX;
1809 else if (!parse_category_string (e->location, e->srange[1], dict,
1810 parse_format, &nrange[1]))
1813 struct ctables_pcexpr e2 = {
1814 .op = CTPO_CAT_NRANGE,
1815 .nrange = { nrange[0], nrange[1] },
1816 .location = e->location,
1818 return ctables_find_category_for_postcompute__ (cats, &e2);
1821 return ctables_find_category_for_postcompute__ (cats, e);
1825 ctables_recursive_check_postcompute (struct dictionary *dict,
1826 const struct ctables_pcexpr *e,
1827 struct ctables_category *pc_cat,
1828 const struct ctables_categories *cats,
1829 const struct msg_location *cats_location)
1833 case CTPO_CAT_NUMBER:
1834 case CTPO_CAT_STRING:
1835 case CTPO_CAT_NRANGE:
1836 case CTPO_CAT_SRANGE:
1837 case CTPO_CAT_MISSING:
1838 case CTPO_CAT_OTHERNM:
1839 case CTPO_CAT_SUBTOTAL:
1840 case CTPO_CAT_TOTAL:
1842 struct ctables_category *cat = ctables_find_category_for_postcompute (
1843 dict, cats, pc_cat->parse_format, e);
1846 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1848 size_t n_subtotals = 0;
1849 for (size_t i = 0; i < cats->n_cats; i++)
1850 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1851 if (n_subtotals > 1)
1853 msg_at (SE, cats_location,
1854 ngettext ("These categories include %zu instance "
1855 "of SUBTOTAL or HSUBTOTAL, so references "
1856 "from computed categories must refer to "
1857 "subtotals by position, "
1858 "e.g. SUBTOTAL[1].",
1859 "These categories include %zu instances "
1860 "of SUBTOTAL or HSUBTOTAL, so references "
1861 "from computed categories must refer to "
1862 "subtotals by position, "
1863 "e.g. SUBTOTAL[1].",
1866 msg_at (SN, e->location,
1867 _("This is the reference that lacks a position."));
1872 msg_at (SE, pc_cat->location,
1873 _("Computed category &%s references a category not included "
1874 "in the category list."),
1876 msg_at (SN, e->location, _("This is the missing category."));
1877 if (e->op == CTPO_CAT_SUBTOTAL)
1878 msg_at (SN, cats_location,
1879 _("To fix the problem, add subtotals to the "
1880 "list of categories here."));
1881 else if (e->op == CTPO_CAT_TOTAL)
1882 msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
1883 "CATEGORIES specification."));
1885 msg_at (SN, cats_location,
1886 _("To fix the problem, add the missing category to the "
1887 "list of categories here."));
1890 if (pc_cat->pc->hide_source_cats)
1904 for (size_t i = 0; i < 2; i++)
1905 if (e->subs[i] && !ctables_recursive_check_postcompute (
1906 dict, e->subs[i], pc_cat, cats, cats_location))
1915 all_strings (struct variable **vars, size_t n_vars,
1916 const struct ctables_category *cat)
1918 for (size_t j = 0; j < n_vars; j++)
1919 if (var_is_numeric (vars[j]))
1921 msg_at (SE, cat->location,
1922 _("This category specification may be applied only to string "
1923 "variables, but this subcommand tries to apply it to "
1924 "numeric variable %s."),
1925 var_get_name (vars[j]));
1932 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1933 struct ctables *ct, struct ctables_table *t)
1935 if (!lex_match_id (lexer, "VARIABLES"))
1937 lex_match (lexer, T_EQUALS);
1939 struct variable **vars;
1941 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1944 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1945 for (size_t i = 1; i < n_vars; i++)
1947 const struct fmt_spec *f = var_get_print_format (vars[i]);
1948 if (f->type != common_format->type)
1950 common_format = NULL;
1956 && (fmt_get_category (common_format->type)
1957 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1959 struct ctables_categories *c = xmalloc (sizeof *c);
1960 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1961 for (size_t i = 0; i < n_vars; i++)
1963 struct ctables_categories **cp
1964 = &t->categories[var_get_dict_index (vars[i])];
1965 ctables_categories_unref (*cp);
1969 size_t allocated_cats = 0;
1970 int cats_start_ofs = -1;
1971 int cats_end_ofs = -1;
1972 if (lex_match (lexer, T_LBRACK))
1974 cats_start_ofs = lex_ofs (lexer);
1977 if (c->n_cats >= allocated_cats)
1978 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1980 int start_ofs = lex_ofs (lexer);
1981 struct ctables_category *cat = &c->cats[c->n_cats];
1982 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1984 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1987 lex_match (lexer, T_COMMA);
1989 while (!lex_match (lexer, T_RBRACK));
1990 cats_end_ofs = lex_ofs (lexer) - 1;
1993 struct ctables_category cat = {
1995 .include_missing = false,
1996 .sort_ascending = true,
1998 bool show_totals = false;
1999 char *total_label = NULL;
2000 bool totals_before = false;
2001 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2003 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2005 lex_match (lexer, T_EQUALS);
2006 if (lex_match_id (lexer, "A"))
2007 cat.sort_ascending = true;
2008 else if (lex_match_id (lexer, "D"))
2009 cat.sort_ascending = false;
2012 lex_error_expecting (lexer, "A", "D");
2016 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2018 lex_match (lexer, T_EQUALS);
2019 if (lex_match_id (lexer, "VALUE"))
2020 cat.type = CCT_VALUE;
2021 else if (lex_match_id (lexer, "LABEL"))
2022 cat.type = CCT_LABEL;
2025 cat.type = CCT_FUNCTION;
2026 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
2029 if (lex_match (lexer, T_LPAREN))
2031 cat.sort_var = parse_variable (lexer, dict);
2035 if (cat.sort_function == CTSF_PTILE)
2037 lex_match (lexer, T_COMMA);
2038 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2040 cat.percentile = lex_number (lexer);
2044 if (!lex_force_match (lexer, T_RPAREN))
2047 else if (ctables_function_availability (cat.sort_function)
2050 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2055 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2057 lex_match (lexer, T_EQUALS);
2058 if (lex_match_id (lexer, "INCLUDE"))
2059 cat.include_missing = true;
2060 else if (lex_match_id (lexer, "EXCLUDE"))
2061 cat.include_missing = false;
2064 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2068 else if (lex_match_id (lexer, "TOTAL"))
2070 lex_match (lexer, T_EQUALS);
2071 if (!parse_bool (lexer, &show_totals))
2074 else if (lex_match_id (lexer, "LABEL"))
2076 lex_match (lexer, T_EQUALS);
2077 if (!lex_force_string (lexer))
2080 total_label = ss_xstrdup (lex_tokss (lexer));
2083 else if (lex_match_id (lexer, "POSITION"))
2085 lex_match (lexer, T_EQUALS);
2086 if (lex_match_id (lexer, "BEFORE"))
2087 totals_before = true;
2088 else if (lex_match_id (lexer, "AFTER"))
2089 totals_before = false;
2092 lex_error_expecting (lexer, "BEFORE", "AFTER");
2096 else if (lex_match_id (lexer, "EMPTY"))
2098 lex_match (lexer, T_EQUALS);
2099 if (lex_match_id (lexer, "INCLUDE"))
2100 c->show_empty = true;
2101 else if (lex_match_id (lexer, "EXCLUDE"))
2102 c->show_empty = false;
2105 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2112 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2113 "TOTAL", "LABEL", "POSITION", "EMPTY");
2115 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2122 if (c->n_cats >= allocated_cats)
2123 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2124 c->cats[c->n_cats++] = cat;
2129 if (c->n_cats >= allocated_cats)
2130 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2132 struct ctables_category *totals;
2135 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2136 totals = &c->cats[0];
2139 totals = &c->cats[c->n_cats];
2142 *totals = (struct ctables_category) {
2144 .total_label = total_label ? total_label : xstrdup (_("Total")),
2148 struct ctables_category *subtotal = NULL;
2149 for (size_t i = totals_before ? 0 : c->n_cats;
2150 totals_before ? i < c->n_cats : i-- > 0;
2151 totals_before ? i++ : 0)
2153 struct ctables_category *cat = &c->cats[i];
2162 cat->subtotal = subtotal;
2165 case CCT_POSTCOMPUTE:
2176 case CCT_EXCLUDED_MISSING:
2181 if (cats_start_ofs != -1)
2183 for (size_t i = 0; i < c->n_cats; i++)
2185 struct ctables_category *cat = &c->cats[i];
2188 case CCT_POSTCOMPUTE:
2189 cat->parse_format = parse_strings ? common_format->type : FMT_F;
2190 struct msg_location *cats_location
2191 = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
2192 bool ok = ctables_recursive_check_postcompute (
2193 dict, cat->pc->expr, cat, c, cats_location);
2194 msg_location_destroy (cats_location);
2201 for (size_t j = 0; j < n_vars; j++)
2202 if (var_is_alpha (vars[j]))
2204 msg_at (SE, cat->location,
2205 _("This category specification may be applied "
2206 "only to numeric variables, but this "
2207 "subcommand tries to apply it to string "
2209 var_get_name (vars[j]));
2218 if (!parse_category_string (cat->location, cat->string, dict,
2219 common_format->type, &n))
2222 ss_dealloc (&cat->string);
2224 cat->type = CCT_NUMBER;
2227 else if (!all_strings (vars, n_vars, cat))
2236 if (!cat->srange[0].string)
2238 else if (!parse_category_string (cat->location,
2239 cat->srange[0], dict,
2240 common_format->type, &n[0]))
2243 if (!cat->srange[1].string)
2245 else if (!parse_category_string (cat->location,
2246 cat->srange[1], dict,
2247 common_format->type, &n[1]))
2250 ss_dealloc (&cat->srange[0]);
2251 ss_dealloc (&cat->srange[1]);
2253 cat->type = CCT_NRANGE;
2254 cat->nrange[0] = n[0];
2255 cat->nrange[1] = n[1];
2257 else if (!all_strings (vars, n_vars, cat))
2268 case CCT_EXCLUDED_MISSING:
2283 ctables_nest_uninit (struct ctables_nest *nest)
2286 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2287 ctables_summary_spec_set_uninit (&nest->specs[sv]);
2288 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2289 free (nest->domains[dt]);
2293 ctables_stack_uninit (struct ctables_stack *stack)
2297 for (size_t i = 0; i < stack->n; i++)
2298 ctables_nest_uninit (&stack->nests[i]);
2299 free (stack->nests);
2303 static struct ctables_stack
2304 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2311 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2312 for (size_t i = 0; i < s0.n; i++)
2313 for (size_t j = 0; j < s1.n; j++)
2315 const struct ctables_nest *a = &s0.nests[i];
2316 const struct ctables_nest *b = &s1.nests[j];
2318 size_t allocate = a->n + b->n;
2319 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2321 for (size_t k = 0; k < a->n; k++)
2322 vars[n++] = a->vars[k];
2323 for (size_t k = 0; k < b->n; k++)
2324 vars[n++] = b->vars[k];
2325 assert (n == allocate);
2327 const struct ctables_nest *summary_src;
2328 if (!a->specs[CSV_CELL].var)
2330 else if (!b->specs[CSV_CELL].var)
2335 struct ctables_nest *new = &stack.nests[stack.n++];
2336 *new = (struct ctables_nest) {
2338 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2339 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2343 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2344 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2346 ctables_stack_uninit (&s0);
2347 ctables_stack_uninit (&s1);
2351 static struct ctables_stack
2352 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2354 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2355 for (size_t i = 0; i < s0.n; i++)
2356 stack.nests[stack.n++] = s0.nests[i];
2357 for (size_t i = 0; i < s1.n; i++)
2359 stack.nests[stack.n] = s1.nests[i];
2360 stack.nests[stack.n].group_head += s0.n;
2363 assert (stack.n == s0.n + s1.n);
2369 static struct ctables_stack
2370 var_fts (const struct ctables_axis *a)
2372 struct variable **vars = xmalloc (sizeof *vars);
2375 struct ctables_nest *nest = xmalloc (sizeof *nest);
2376 *nest = (struct ctables_nest) {
2379 .scale_idx = a->scale ? 0 : SIZE_MAX,
2381 if (a->specs[CSV_CELL].n || a->scale)
2382 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2384 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2385 nest->specs[sv].var = a->var;
2386 nest->specs[sv].is_scale = a->scale;
2388 return (struct ctables_stack) { .nests = nest, .n = 1 };
2391 static struct ctables_stack
2392 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2395 return (struct ctables_stack) { .n = 0 };
2403 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2404 enumerate_fts (axis_type, a->subs[1]));
2407 /* This should consider any of the scale variables found in the result to
2408 be linked to each other listwise for SMISSING=LISTWISE. */
2409 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2410 enumerate_fts (axis_type, a->subs[1]));
2416 union ctables_summary
2418 /* COUNT, VALIDN, TOTALN. */
2421 /* MINIMUM, MAXIMUM, RANGE. */
2428 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2429 struct moments1 *moments;
2431 /* MEDIAN, MODE, PTILE. */
2434 struct casewriter *writer;
2439 /* XXX multiple response */
2443 ctables_summary_init (union ctables_summary *s,
2444 const struct ctables_summary_spec *ss)
2446 switch (ss->function)
2450 case CTSF_ROWPCT_COUNT:
2451 case CTSF_COLPCT_COUNT:
2452 case CTSF_TABLEPCT_COUNT:
2453 case CTSF_SUBTABLEPCT_COUNT:
2454 case CTSF_LAYERPCT_COUNT:
2455 case CTSF_LAYERROWPCT_COUNT:
2456 case CTSF_LAYERCOLPCT_COUNT:
2457 case CTSF_ROWPCT_VALIDN:
2458 case CTSF_COLPCT_VALIDN:
2459 case CTSF_TABLEPCT_VALIDN:
2460 case CTSF_SUBTABLEPCT_VALIDN:
2461 case CTSF_LAYERPCT_VALIDN:
2462 case CTSF_LAYERROWPCT_VALIDN:
2463 case CTSF_LAYERCOLPCT_VALIDN:
2464 case CTSF_ROWPCT_TOTALN:
2465 case CTSF_COLPCT_TOTALN:
2466 case CTSF_TABLEPCT_TOTALN:
2467 case CTSF_SUBTABLEPCT_TOTALN:
2468 case CTSF_LAYERPCT_TOTALN:
2469 case CTSF_LAYERROWPCT_TOTALN:
2470 case CTSF_LAYERCOLPCT_TOTALN:
2477 case CTSF_UROWPCT_COUNT:
2478 case CTSF_UCOLPCT_COUNT:
2479 case CTSF_UTABLEPCT_COUNT:
2480 case CTSF_USUBTABLEPCT_COUNT:
2481 case CTSF_ULAYERPCT_COUNT:
2482 case CTSF_ULAYERROWPCT_COUNT:
2483 case CTSF_ULAYERCOLPCT_COUNT:
2484 case CTSF_UROWPCT_VALIDN:
2485 case CTSF_UCOLPCT_VALIDN:
2486 case CTSF_UTABLEPCT_VALIDN:
2487 case CTSF_USUBTABLEPCT_VALIDN:
2488 case CTSF_ULAYERPCT_VALIDN:
2489 case CTSF_ULAYERROWPCT_VALIDN:
2490 case CTSF_ULAYERCOLPCT_VALIDN:
2491 case CTSF_UROWPCT_TOTALN:
2492 case CTSF_UCOLPCT_TOTALN:
2493 case CTSF_UTABLEPCT_TOTALN:
2494 case CTSF_USUBTABLEPCT_TOTALN:
2495 case CTSF_ULAYERPCT_TOTALN:
2496 case CTSF_ULAYERROWPCT_TOTALN:
2497 case CTSF_ULAYERCOLPCT_TOTALN:
2507 s->min = s->max = SYSMIS;
2515 case CTSF_ROWPCT_SUM:
2516 case CTSF_COLPCT_SUM:
2517 case CTSF_TABLEPCT_SUM:
2518 case CTSF_SUBTABLEPCT_SUM:
2519 case CTSF_LAYERPCT_SUM:
2520 case CTSF_LAYERROWPCT_SUM:
2521 case CTSF_LAYERCOLPCT_SUM:
2526 case CTSF_UVARIANCE:
2527 case CTSF_UROWPCT_SUM:
2528 case CTSF_UCOLPCT_SUM:
2529 case CTSF_UTABLEPCT_SUM:
2530 case CTSF_USUBTABLEPCT_SUM:
2531 case CTSF_ULAYERPCT_SUM:
2532 case CTSF_ULAYERROWPCT_SUM:
2533 case CTSF_ULAYERCOLPCT_SUM:
2534 s->moments = moments1_create (MOMENT_VARIANCE);
2544 struct caseproto *proto = caseproto_create ();
2545 proto = caseproto_add_width (proto, 0);
2546 proto = caseproto_add_width (proto, 0);
2548 struct subcase ordering;
2549 subcase_init (&ordering, 0, 0, SC_ASCEND);
2550 s->writer = sort_create_writer (&ordering, proto);
2551 subcase_uninit (&ordering);
2552 caseproto_unref (proto);
2562 ctables_summary_uninit (union ctables_summary *s,
2563 const struct ctables_summary_spec *ss)
2565 switch (ss->function)
2569 case CTSF_ROWPCT_COUNT:
2570 case CTSF_COLPCT_COUNT:
2571 case CTSF_TABLEPCT_COUNT:
2572 case CTSF_SUBTABLEPCT_COUNT:
2573 case CTSF_LAYERPCT_COUNT:
2574 case CTSF_LAYERROWPCT_COUNT:
2575 case CTSF_LAYERCOLPCT_COUNT:
2576 case CTSF_ROWPCT_VALIDN:
2577 case CTSF_COLPCT_VALIDN:
2578 case CTSF_TABLEPCT_VALIDN:
2579 case CTSF_SUBTABLEPCT_VALIDN:
2580 case CTSF_LAYERPCT_VALIDN:
2581 case CTSF_LAYERROWPCT_VALIDN:
2582 case CTSF_LAYERCOLPCT_VALIDN:
2583 case CTSF_ROWPCT_TOTALN:
2584 case CTSF_COLPCT_TOTALN:
2585 case CTSF_TABLEPCT_TOTALN:
2586 case CTSF_SUBTABLEPCT_TOTALN:
2587 case CTSF_LAYERPCT_TOTALN:
2588 case CTSF_LAYERROWPCT_TOTALN:
2589 case CTSF_LAYERCOLPCT_TOTALN:
2596 case CTSF_UROWPCT_COUNT:
2597 case CTSF_UCOLPCT_COUNT:
2598 case CTSF_UTABLEPCT_COUNT:
2599 case CTSF_USUBTABLEPCT_COUNT:
2600 case CTSF_ULAYERPCT_COUNT:
2601 case CTSF_ULAYERROWPCT_COUNT:
2602 case CTSF_ULAYERCOLPCT_COUNT:
2603 case CTSF_UROWPCT_VALIDN:
2604 case CTSF_UCOLPCT_VALIDN:
2605 case CTSF_UTABLEPCT_VALIDN:
2606 case CTSF_USUBTABLEPCT_VALIDN:
2607 case CTSF_ULAYERPCT_VALIDN:
2608 case CTSF_ULAYERROWPCT_VALIDN:
2609 case CTSF_ULAYERCOLPCT_VALIDN:
2610 case CTSF_UROWPCT_TOTALN:
2611 case CTSF_UCOLPCT_TOTALN:
2612 case CTSF_UTABLEPCT_TOTALN:
2613 case CTSF_USUBTABLEPCT_TOTALN:
2614 case CTSF_ULAYERPCT_TOTALN:
2615 case CTSF_ULAYERROWPCT_TOTALN:
2616 case CTSF_ULAYERCOLPCT_TOTALN:
2632 case CTSF_ROWPCT_SUM:
2633 case CTSF_COLPCT_SUM:
2634 case CTSF_TABLEPCT_SUM:
2635 case CTSF_SUBTABLEPCT_SUM:
2636 case CTSF_LAYERPCT_SUM:
2637 case CTSF_LAYERROWPCT_SUM:
2638 case CTSF_LAYERCOLPCT_SUM:
2643 case CTSF_UVARIANCE:
2644 case CTSF_UROWPCT_SUM:
2645 case CTSF_UCOLPCT_SUM:
2646 case CTSF_UTABLEPCT_SUM:
2647 case CTSF_USUBTABLEPCT_SUM:
2648 case CTSF_ULAYERPCT_SUM:
2649 case CTSF_ULAYERROWPCT_SUM:
2650 case CTSF_ULAYERCOLPCT_SUM:
2651 moments1_destroy (s->moments);
2660 casewriter_destroy (s->writer);
2666 ctables_summary_add (union ctables_summary *s,
2667 const struct ctables_summary_spec *ss,
2668 const struct variable *var, const union value *value,
2669 bool is_scale, bool is_scale_missing,
2670 bool is_missing, bool excluded_missing,
2671 double d_weight, double e_weight)
2673 /* To determine whether a case is included in a given table for a particular
2674 kind of summary, consider the following charts for each variable in the
2675 table. Only if "yes" appears for every variable for the summary is the
2678 Categorical variables: VALIDN COUNT TOTALN
2679 Valid values in included categories yes yes yes
2680 Missing values in included categories --- yes yes
2681 Missing values in excluded categories --- --- yes
2682 Valid values in excluded categories --- --- ---
2684 Scale variables: VALIDN COUNT TOTALN
2685 Valid value yes yes yes
2686 Missing value --- yes yes
2688 Missing values include both user- and system-missing. (The system-missing
2689 value is always in an excluded category.)
2691 switch (ss->function)
2694 case CTSF_ROWPCT_TOTALN:
2695 case CTSF_COLPCT_TOTALN:
2696 case CTSF_TABLEPCT_TOTALN:
2697 case CTSF_SUBTABLEPCT_TOTALN:
2698 case CTSF_LAYERPCT_TOTALN:
2699 case CTSF_LAYERROWPCT_TOTALN:
2700 case CTSF_LAYERCOLPCT_TOTALN:
2701 s->count += d_weight;
2705 case CTSF_UROWPCT_TOTALN:
2706 case CTSF_UCOLPCT_TOTALN:
2707 case CTSF_UTABLEPCT_TOTALN:
2708 case CTSF_USUBTABLEPCT_TOTALN:
2709 case CTSF_ULAYERPCT_TOTALN:
2710 case CTSF_ULAYERROWPCT_TOTALN:
2711 case CTSF_ULAYERCOLPCT_TOTALN:
2716 case CTSF_ROWPCT_COUNT:
2717 case CTSF_COLPCT_COUNT:
2718 case CTSF_TABLEPCT_COUNT:
2719 case CTSF_SUBTABLEPCT_COUNT:
2720 case CTSF_LAYERPCT_COUNT:
2721 case CTSF_LAYERROWPCT_COUNT:
2722 case CTSF_LAYERCOLPCT_COUNT:
2723 if (is_scale || !excluded_missing)
2724 s->count += d_weight;
2728 case CTSF_UROWPCT_COUNT:
2729 case CTSF_UCOLPCT_COUNT:
2730 case CTSF_UTABLEPCT_COUNT:
2731 case CTSF_USUBTABLEPCT_COUNT:
2732 case CTSF_ULAYERPCT_COUNT:
2733 case CTSF_ULAYERROWPCT_COUNT:
2734 case CTSF_ULAYERCOLPCT_COUNT:
2735 if (is_scale || !excluded_missing)
2740 case CTSF_ROWPCT_VALIDN:
2741 case CTSF_COLPCT_VALIDN:
2742 case CTSF_TABLEPCT_VALIDN:
2743 case CTSF_SUBTABLEPCT_VALIDN:
2744 case CTSF_LAYERPCT_VALIDN:
2745 case CTSF_LAYERROWPCT_VALIDN:
2746 case CTSF_LAYERCOLPCT_VALIDN:
2750 s->count += d_weight;
2754 case CTSF_UROWPCT_VALIDN:
2755 case CTSF_UCOLPCT_VALIDN:
2756 case CTSF_UTABLEPCT_VALIDN:
2757 case CTSF_USUBTABLEPCT_VALIDN:
2758 case CTSF_ULAYERPCT_VALIDN:
2759 case CTSF_ULAYERROWPCT_VALIDN:
2760 case CTSF_ULAYERCOLPCT_VALIDN:
2769 s->count += d_weight;
2778 if (is_scale || !excluded_missing)
2779 s->count += e_weight;
2786 s->count += e_weight;
2790 s->count += e_weight;
2796 if (!is_scale_missing)
2798 assert (!var_is_alpha (var)); /* XXX? */
2799 if (s->min == SYSMIS || value->f < s->min)
2801 if (s->max == SYSMIS || value->f > s->max)
2811 case CTSF_ROWPCT_SUM:
2812 case CTSF_COLPCT_SUM:
2813 case CTSF_TABLEPCT_SUM:
2814 case CTSF_SUBTABLEPCT_SUM:
2815 case CTSF_LAYERPCT_SUM:
2816 case CTSF_LAYERROWPCT_SUM:
2817 case CTSF_LAYERCOLPCT_SUM:
2818 if (!is_scale_missing)
2819 moments1_add (s->moments, value->f, e_weight);
2826 case CTSF_UVARIANCE:
2827 case CTSF_UROWPCT_SUM:
2828 case CTSF_UCOLPCT_SUM:
2829 case CTSF_UTABLEPCT_SUM:
2830 case CTSF_USUBTABLEPCT_SUM:
2831 case CTSF_ULAYERPCT_SUM:
2832 case CTSF_ULAYERROWPCT_SUM:
2833 case CTSF_ULAYERCOLPCT_SUM:
2834 if (!is_scale_missing)
2835 moments1_add (s->moments, value->f, 1.0);
2841 d_weight = e_weight = 1.0;
2846 if (!is_scale_missing)
2848 s->ovalid += e_weight;
2850 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2851 *case_num_rw_idx (c, 0) = value->f;
2852 *case_num_rw_idx (c, 1) = e_weight;
2853 casewriter_write (s->writer, c);
2859 static enum ctables_domain_type
2860 ctables_function_domain (enum ctables_summary_function function)
2890 case CTSF_UVARIANCE:
2896 case CTSF_COLPCT_COUNT:
2897 case CTSF_COLPCT_SUM:
2898 case CTSF_COLPCT_TOTALN:
2899 case CTSF_COLPCT_VALIDN:
2900 case CTSF_UCOLPCT_COUNT:
2901 case CTSF_UCOLPCT_SUM:
2902 case CTSF_UCOLPCT_TOTALN:
2903 case CTSF_UCOLPCT_VALIDN:
2906 case CTSF_LAYERCOLPCT_COUNT:
2907 case CTSF_LAYERCOLPCT_SUM:
2908 case CTSF_LAYERCOLPCT_TOTALN:
2909 case CTSF_LAYERCOLPCT_VALIDN:
2910 case CTSF_ULAYERCOLPCT_COUNT:
2911 case CTSF_ULAYERCOLPCT_SUM:
2912 case CTSF_ULAYERCOLPCT_TOTALN:
2913 case CTSF_ULAYERCOLPCT_VALIDN:
2914 return CTDT_LAYERCOL;
2916 case CTSF_LAYERPCT_COUNT:
2917 case CTSF_LAYERPCT_SUM:
2918 case CTSF_LAYERPCT_TOTALN:
2919 case CTSF_LAYERPCT_VALIDN:
2920 case CTSF_ULAYERPCT_COUNT:
2921 case CTSF_ULAYERPCT_SUM:
2922 case CTSF_ULAYERPCT_TOTALN:
2923 case CTSF_ULAYERPCT_VALIDN:
2926 case CTSF_LAYERROWPCT_COUNT:
2927 case CTSF_LAYERROWPCT_SUM:
2928 case CTSF_LAYERROWPCT_TOTALN:
2929 case CTSF_LAYERROWPCT_VALIDN:
2930 case CTSF_ULAYERROWPCT_COUNT:
2931 case CTSF_ULAYERROWPCT_SUM:
2932 case CTSF_ULAYERROWPCT_TOTALN:
2933 case CTSF_ULAYERROWPCT_VALIDN:
2934 return CTDT_LAYERROW;
2936 case CTSF_ROWPCT_COUNT:
2937 case CTSF_ROWPCT_SUM:
2938 case CTSF_ROWPCT_TOTALN:
2939 case CTSF_ROWPCT_VALIDN:
2940 case CTSF_UROWPCT_COUNT:
2941 case CTSF_UROWPCT_SUM:
2942 case CTSF_UROWPCT_TOTALN:
2943 case CTSF_UROWPCT_VALIDN:
2946 case CTSF_SUBTABLEPCT_COUNT:
2947 case CTSF_SUBTABLEPCT_SUM:
2948 case CTSF_SUBTABLEPCT_TOTALN:
2949 case CTSF_SUBTABLEPCT_VALIDN:
2950 case CTSF_USUBTABLEPCT_COUNT:
2951 case CTSF_USUBTABLEPCT_SUM:
2952 case CTSF_USUBTABLEPCT_TOTALN:
2953 case CTSF_USUBTABLEPCT_VALIDN:
2954 return CTDT_SUBTABLE;
2956 case CTSF_TABLEPCT_COUNT:
2957 case CTSF_TABLEPCT_SUM:
2958 case CTSF_TABLEPCT_TOTALN:
2959 case CTSF_TABLEPCT_VALIDN:
2960 case CTSF_UTABLEPCT_COUNT:
2961 case CTSF_UTABLEPCT_SUM:
2962 case CTSF_UTABLEPCT_TOTALN:
2963 case CTSF_UTABLEPCT_VALIDN:
2970 static enum ctables_domain_type
2971 ctables_function_is_pctsum (enum ctables_summary_function function)
3001 case CTSF_UVARIANCE:
3005 case CTSF_COLPCT_COUNT:
3006 case CTSF_COLPCT_TOTALN:
3007 case CTSF_COLPCT_VALIDN:
3008 case CTSF_UCOLPCT_COUNT:
3009 case CTSF_UCOLPCT_TOTALN:
3010 case CTSF_UCOLPCT_VALIDN:
3011 case CTSF_LAYERCOLPCT_COUNT:
3012 case CTSF_LAYERCOLPCT_TOTALN:
3013 case CTSF_LAYERCOLPCT_VALIDN:
3014 case CTSF_ULAYERCOLPCT_COUNT:
3015 case CTSF_ULAYERCOLPCT_TOTALN:
3016 case CTSF_ULAYERCOLPCT_VALIDN:
3017 case CTSF_LAYERPCT_COUNT:
3018 case CTSF_LAYERPCT_TOTALN:
3019 case CTSF_LAYERPCT_VALIDN:
3020 case CTSF_ULAYERPCT_COUNT:
3021 case CTSF_ULAYERPCT_TOTALN:
3022 case CTSF_ULAYERPCT_VALIDN:
3023 case CTSF_LAYERROWPCT_COUNT:
3024 case CTSF_LAYERROWPCT_TOTALN:
3025 case CTSF_LAYERROWPCT_VALIDN:
3026 case CTSF_ULAYERROWPCT_COUNT:
3027 case CTSF_ULAYERROWPCT_TOTALN:
3028 case CTSF_ULAYERROWPCT_VALIDN:
3029 case CTSF_ROWPCT_COUNT:
3030 case CTSF_ROWPCT_TOTALN:
3031 case CTSF_ROWPCT_VALIDN:
3032 case CTSF_UROWPCT_COUNT:
3033 case CTSF_UROWPCT_TOTALN:
3034 case CTSF_UROWPCT_VALIDN:
3035 case CTSF_SUBTABLEPCT_COUNT:
3036 case CTSF_SUBTABLEPCT_TOTALN:
3037 case CTSF_SUBTABLEPCT_VALIDN:
3038 case CTSF_USUBTABLEPCT_COUNT:
3039 case CTSF_USUBTABLEPCT_TOTALN:
3040 case CTSF_USUBTABLEPCT_VALIDN:
3041 case CTSF_TABLEPCT_COUNT:
3042 case CTSF_TABLEPCT_TOTALN:
3043 case CTSF_TABLEPCT_VALIDN:
3044 case CTSF_UTABLEPCT_COUNT:
3045 case CTSF_UTABLEPCT_TOTALN:
3046 case CTSF_UTABLEPCT_VALIDN:
3049 case CTSF_COLPCT_SUM:
3050 case CTSF_UCOLPCT_SUM:
3051 case CTSF_LAYERCOLPCT_SUM:
3052 case CTSF_ULAYERCOLPCT_SUM:
3053 case CTSF_LAYERPCT_SUM:
3054 case CTSF_ULAYERPCT_SUM:
3055 case CTSF_LAYERROWPCT_SUM:
3056 case CTSF_ULAYERROWPCT_SUM:
3057 case CTSF_ROWPCT_SUM:
3058 case CTSF_UROWPCT_SUM:
3059 case CTSF_SUBTABLEPCT_SUM:
3060 case CTSF_USUBTABLEPCT_SUM:
3061 case CTSF_TABLEPCT_SUM:
3062 case CTSF_UTABLEPCT_SUM:
3070 ctables_summary_value (const struct ctables_cell *cell,
3071 union ctables_summary *s,
3072 const struct ctables_summary_spec *ss)
3074 switch (ss->function)
3081 case CTSF_ROWPCT_COUNT:
3082 case CTSF_COLPCT_COUNT:
3083 case CTSF_TABLEPCT_COUNT:
3084 case CTSF_SUBTABLEPCT_COUNT:
3085 case CTSF_LAYERPCT_COUNT:
3086 case CTSF_LAYERROWPCT_COUNT:
3087 case CTSF_LAYERCOLPCT_COUNT:
3089 enum ctables_domain_type d = ctables_function_domain (ss->function);
3090 return (cell->domains[d]->e_count
3091 ? s->count / cell->domains[d]->e_count * 100
3095 case CTSF_UROWPCT_COUNT:
3096 case CTSF_UCOLPCT_COUNT:
3097 case CTSF_UTABLEPCT_COUNT:
3098 case CTSF_USUBTABLEPCT_COUNT:
3099 case CTSF_ULAYERPCT_COUNT:
3100 case CTSF_ULAYERROWPCT_COUNT:
3101 case CTSF_ULAYERCOLPCT_COUNT:
3103 enum ctables_domain_type d = ctables_function_domain (ss->function);
3104 return (cell->domains[d]->u_count
3105 ? s->count / cell->domains[d]->u_count * 100
3109 case CTSF_ROWPCT_VALIDN:
3110 case CTSF_COLPCT_VALIDN:
3111 case CTSF_TABLEPCT_VALIDN:
3112 case CTSF_SUBTABLEPCT_VALIDN:
3113 case CTSF_LAYERPCT_VALIDN:
3114 case CTSF_LAYERROWPCT_VALIDN:
3115 case CTSF_LAYERCOLPCT_VALIDN:
3117 enum ctables_domain_type d = ctables_function_domain (ss->function);
3118 return (cell->domains[d]->e_valid
3119 ? s->count / cell->domains[d]->e_valid * 100
3123 case CTSF_UROWPCT_VALIDN:
3124 case CTSF_UCOLPCT_VALIDN:
3125 case CTSF_UTABLEPCT_VALIDN:
3126 case CTSF_USUBTABLEPCT_VALIDN:
3127 case CTSF_ULAYERPCT_VALIDN:
3128 case CTSF_ULAYERROWPCT_VALIDN:
3129 case CTSF_ULAYERCOLPCT_VALIDN:
3131 enum ctables_domain_type d = ctables_function_domain (ss->function);
3132 return (cell->domains[d]->u_valid
3133 ? s->count / cell->domains[d]->u_valid * 100
3137 case CTSF_ROWPCT_TOTALN:
3138 case CTSF_COLPCT_TOTALN:
3139 case CTSF_TABLEPCT_TOTALN:
3140 case CTSF_SUBTABLEPCT_TOTALN:
3141 case CTSF_LAYERPCT_TOTALN:
3142 case CTSF_LAYERROWPCT_TOTALN:
3143 case CTSF_LAYERCOLPCT_TOTALN:
3145 enum ctables_domain_type d = ctables_function_domain (ss->function);
3146 return (cell->domains[d]->e_total
3147 ? s->count / cell->domains[d]->e_total * 100
3151 case CTSF_UROWPCT_TOTALN:
3152 case CTSF_UCOLPCT_TOTALN:
3153 case CTSF_UTABLEPCT_TOTALN:
3154 case CTSF_USUBTABLEPCT_TOTALN:
3155 case CTSF_ULAYERPCT_TOTALN:
3156 case CTSF_ULAYERROWPCT_TOTALN:
3157 case CTSF_ULAYERCOLPCT_TOTALN:
3159 enum ctables_domain_type d = ctables_function_domain (ss->function);
3160 return (cell->domains[d]->u_total
3161 ? s->count / cell->domains[d]->u_total * 100
3182 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3188 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3195 double weight, variance;
3196 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3197 return calc_semean (variance, weight);
3204 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3205 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3211 double weight, mean;
3212 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3213 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3217 case CTSF_UVARIANCE:
3220 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3224 case CTSF_ROWPCT_SUM:
3225 case CTSF_COLPCT_SUM:
3226 case CTSF_TABLEPCT_SUM:
3227 case CTSF_SUBTABLEPCT_SUM:
3228 case CTSF_LAYERPCT_SUM:
3229 case CTSF_LAYERROWPCT_SUM:
3230 case CTSF_LAYERCOLPCT_SUM:
3232 double weight, mean;
3233 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3234 if (weight == SYSMIS || mean == SYSMIS)
3236 enum ctables_domain_type d = ctables_function_domain (ss->function);
3237 double num = weight * mean;
3238 double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum;
3239 return denom != 0 ? num / denom * 100 : SYSMIS;
3241 case CTSF_UROWPCT_SUM:
3242 case CTSF_UCOLPCT_SUM:
3243 case CTSF_UTABLEPCT_SUM:
3244 case CTSF_USUBTABLEPCT_SUM:
3245 case CTSF_ULAYERPCT_SUM:
3246 case CTSF_ULAYERROWPCT_SUM:
3247 case CTSF_ULAYERCOLPCT_SUM:
3249 double weight, mean;
3250 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3251 if (weight == SYSMIS || mean == SYSMIS)
3253 enum ctables_domain_type d = ctables_function_domain (ss->function);
3254 double num = weight * mean;
3255 double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum;
3256 return denom != 0 ? num / denom * 100 : SYSMIS;
3265 struct casereader *reader = casewriter_make_reader (s->writer);
3268 struct percentile *ptile = percentile_create (
3269 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3270 struct order_stats *os = &ptile->parent;
3271 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3272 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3273 statistic_destroy (&ptile->parent.parent);
3281 struct casereader *reader = casewriter_make_reader (s->writer);
3284 struct mode *mode = mode_create ();
3285 struct order_stats *os = &mode->parent;
3286 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3287 s->ovalue = mode->mode;
3288 statistic_destroy (&mode->parent.parent);
3296 struct ctables_cell_sort_aux
3298 const struct ctables_nest *nest;
3299 enum pivot_axis_type a;
3303 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3305 const struct ctables_cell_sort_aux *aux = aux_;
3306 struct ctables_cell *const *ap = a_;
3307 struct ctables_cell *const *bp = b_;
3308 const struct ctables_cell *a = *ap;
3309 const struct ctables_cell *b = *bp;
3311 const struct ctables_nest *nest = aux->nest;
3312 for (size_t i = 0; i < nest->n; i++)
3313 if (i != nest->scale_idx)
3315 const struct variable *var = nest->vars[i];
3316 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3317 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3318 if (a_cv->category != b_cv->category)
3319 return a_cv->category > b_cv->category ? 1 : -1;
3321 const union value *a_val = &a_cv->value;
3322 const union value *b_val = &b_cv->value;
3323 switch (a_cv->category->type)
3329 case CCT_POSTCOMPUTE:
3330 case CCT_EXCLUDED_MISSING:
3331 /* Must be equal. */
3339 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3347 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3349 return a_cv->category->sort_ascending ? cmp : -cmp;
3355 const char *a_label = var_lookup_value_label (var, a_val);
3356 const char *b_label = var_lookup_value_label (var, b_val);
3358 ? (b_label ? strcmp (a_label, b_label) : 1)
3359 : (b_label ? -1 : value_compare_3way (
3360 a_val, b_val, var_get_width (var))));
3362 return a_cv->category->sort_ascending ? cmp : -cmp;
3376 For each ctables_table:
3377 For each combination of row vars:
3378 For each combination of column vars:
3379 For each combination of layer vars:
3381 Make a table of row values:
3382 Sort entries by row values
3383 Assign a 0-based index to each actual value
3384 Construct a dimension
3385 Make a table of column values
3386 Make a table of layer values
3388 Fill the table entry using the indexes from before.
3391 static struct ctables_domain *
3392 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3393 enum ctables_domain_type domain)
3396 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3398 const struct ctables_nest *nest = s->nests[a];
3399 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3401 size_t v_idx = nest->domains[domain][i];
3402 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3403 hash = hash_pointer (cv->category, hash);
3404 if (cv->category->type != CCT_TOTAL
3405 && cv->category->type != CCT_SUBTOTAL
3406 && cv->category->type != CCT_POSTCOMPUTE)
3407 hash = value_hash (&cv->value,
3408 var_get_width (nest->vars[v_idx]), hash);
3412 struct ctables_domain *d;
3413 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3415 const struct ctables_cell *df = d->example;
3416 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3418 const struct ctables_nest *nest = s->nests[a];
3419 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3421 size_t v_idx = nest->domains[domain][i];
3422 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3423 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3424 if (cv1->category != cv2->category
3425 || (cv1->category->type != CCT_TOTAL
3426 && cv1->category->type != CCT_SUBTOTAL
3427 && cv1->category->type != CCT_POSTCOMPUTE
3428 && !value_equal (&cv1->value, &cv2->value,
3429 var_get_width (nest->vars[v_idx]))))
3438 struct ctables_sum *sums = (s->table->n_sum_vars
3439 ? xzalloc (s->table->n_sum_vars * sizeof *sums)
3442 d = xmalloc (sizeof *d);
3443 *d = (struct ctables_domain) { .example = cell, .sums = sums };
3444 hmap_insert (&s->domains[domain], &d->node, hash);
3448 static struct substring
3449 rtrim_value (const union value *v, const struct variable *var)
3451 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3452 var_get_width (var));
3453 ss_rtrim (&s, ss_cstr (" "));
3458 in_string_range (const union value *v, const struct variable *var,
3459 const struct substring *srange)
3461 struct substring s = rtrim_value (v, var);
3462 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3463 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3466 static const struct ctables_category *
3467 ctables_categories_match (const struct ctables_categories *c,
3468 const union value *v, const struct variable *var)
3470 if (var_is_numeric (var) && v->f == SYSMIS)
3473 const struct ctables_category *othernm = NULL;
3474 for (size_t i = c->n_cats; i-- > 0; )
3476 const struct ctables_category *cat = &c->cats[i];
3480 if (cat->number == v->f)
3485 if (ss_equals (cat->string, rtrim_value (v, var)))
3490 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3491 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3496 if (in_string_range (v, var, cat->srange))
3501 if (var_is_value_missing (var, v))
3505 case CCT_POSTCOMPUTE:
3520 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3523 case CCT_EXCLUDED_MISSING:
3528 return var_is_value_missing (var, v) ? NULL : othernm;
3531 static const struct ctables_category *
3532 ctables_categories_total (const struct ctables_categories *c)
3534 const struct ctables_category *first = &c->cats[0];
3535 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3536 return (first->type == CCT_TOTAL ? first
3537 : last->type == CCT_TOTAL ? last
3541 static struct ctables_cell *
3542 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3543 const struct ctables_category *cats[PIVOT_N_AXES][10])
3546 enum ctables_summary_variant sv = CSV_CELL;
3547 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3549 const struct ctables_nest *nest = s->nests[a];
3550 for (size_t i = 0; i < nest->n; i++)
3551 if (i != nest->scale_idx)
3553 hash = hash_pointer (cats[a][i], hash);
3554 if (cats[a][i]->type != CCT_TOTAL
3555 && cats[a][i]->type != CCT_SUBTOTAL
3556 && cats[a][i]->type != CCT_POSTCOMPUTE)
3557 hash = value_hash (case_data (c, nest->vars[i]),
3558 var_get_width (nest->vars[i]), hash);
3564 struct ctables_cell *cell;
3565 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3567 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3569 const struct ctables_nest *nest = s->nests[a];
3570 for (size_t i = 0; i < nest->n; i++)
3571 if (i != nest->scale_idx
3572 && (cats[a][i] != cell->axes[a].cvs[i].category
3573 || (cats[a][i]->type != CCT_TOTAL
3574 && cats[a][i]->type != CCT_SUBTOTAL
3575 && cats[a][i]->type != CCT_POSTCOMPUTE
3576 && !value_equal (case_data (c, nest->vars[i]),
3577 &cell->axes[a].cvs[i].value,
3578 var_get_width (nest->vars[i])))))
3587 cell = xmalloc (sizeof *cell);
3590 cell->omit_domains = 0;
3591 cell->postcompute = false;
3592 //struct string name = DS_EMPTY_INITIALIZER;
3593 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3595 const struct ctables_nest *nest = s->nests[a];
3596 cell->axes[a].cvs = (nest->n
3597 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3599 for (size_t i = 0; i < nest->n; i++)
3601 const struct ctables_category *cat = cats[a][i];
3602 const struct variable *var = nest->vars[i];
3603 const union value *value = case_data (c, var);
3604 if (i != nest->scale_idx)
3606 const struct ctables_category *subtotal = cat->subtotal;
3607 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3610 if (cat->type == CCT_TOTAL
3611 || cat->type == CCT_SUBTOTAL
3612 || cat->type == CCT_POSTCOMPUTE)
3614 /* XXX these should be more encompassing I think.*/
3618 case PIVOT_AXIS_COLUMN:
3619 cell->omit_domains |= ((1u << CTDT_TABLE) |
3620 (1u << CTDT_LAYER) |
3621 (1u << CTDT_LAYERCOL) |
3622 (1u << CTDT_SUBTABLE) |
3625 case PIVOT_AXIS_ROW:
3626 cell->omit_domains |= ((1u << CTDT_TABLE) |
3627 (1u << CTDT_LAYER) |
3628 (1u << CTDT_LAYERROW) |
3629 (1u << CTDT_SUBTABLE) |
3632 case PIVOT_AXIS_LAYER:
3633 cell->omit_domains |= ((1u << CTDT_TABLE) |
3634 (1u << CTDT_LAYER));
3638 if (cat->type == CCT_POSTCOMPUTE)
3639 cell->postcompute = true;
3642 cell->axes[a].cvs[i].category = cat;
3643 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3646 if (i != nest->scale_idx)
3648 if (!ds_is_empty (&name))
3649 ds_put_cstr (&name, ", ");
3650 char *value_s = data_out (value, var_get_encoding (var),
3651 var_get_print_format (var),
3652 settings_get_fmt_settings ());
3653 if (cat->type == CCT_TOTAL
3654 || cat->type == CCT_SUBTOTAL
3655 || cat->type == CCT_POSTCOMPUTE)
3656 ds_put_format (&name, "%s=total", var_get_name (var));
3658 ds_put_format (&name, "%s=%s", var_get_name (var),
3659 value_s + strspn (value_s, " "));
3665 //cell->name = ds_steal_cstr (&name);
3667 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3668 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3669 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3670 for (size_t i = 0; i < specs->n; i++)
3671 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3672 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3673 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3674 hmap_insert (&s->cells, &cell->node, hash);
3679 is_scale_missing (const struct ctables_summary_spec_set *specs,
3680 const struct ccase *c)
3682 if (!specs->is_scale)
3685 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3688 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3690 const struct variable *var = specs->listwise_vars[i];
3691 if (var_is_num_missing (var, case_num (c, var)))
3699 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3700 const struct ctables_category *cats[PIVOT_N_AXES][10],
3701 bool is_missing, bool excluded_missing,
3702 double d_weight, double e_weight)
3704 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3705 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3707 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3709 bool scale_missing = is_scale_missing (specs, c);
3710 for (size_t i = 0; i < specs->n; i++)
3711 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3712 specs->var, case_data (c, specs->var), specs->is_scale,
3713 scale_missing, is_missing, excluded_missing,
3714 d_weight, e_weight);
3715 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3716 if (!(cell->omit_domains && (1u << dt)))
3718 struct ctables_domain *d = cell->domains[dt];
3719 d->d_total += d_weight;
3720 d->e_total += e_weight;
3722 if (!excluded_missing)
3724 d->d_count += d_weight;
3725 d->e_count += e_weight;
3730 d->d_valid += d_weight;
3731 d->e_valid += e_weight;
3734 for (size_t i = 0; i < s->table->n_sum_vars; i++)
3736 /* XXX listwise_missing??? */
3737 const struct variable *var = s->table->sum_vars[i];
3738 double addend = case_num (c, var);
3739 if (!var_is_num_missing (var, addend))
3741 struct ctables_sum *sum = &d->sums[i];
3742 sum->e_sum += addend * e_weight;
3743 sum->u_sum += addend;
3751 recurse_totals (struct ctables_section *s, const struct ccase *c,
3752 const struct ctables_category *cats[PIVOT_N_AXES][10],
3753 bool is_missing, bool excluded_missing,
3754 double d_weight, double e_weight,
3755 enum pivot_axis_type start_axis, size_t start_nest)
3757 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3759 const struct ctables_nest *nest = s->nests[a];
3760 for (size_t i = start_nest; i < nest->n; i++)
3762 if (i == nest->scale_idx)
3765 const struct variable *var = nest->vars[i];
3767 const struct ctables_category *total = ctables_categories_total (
3768 s->table->categories[var_get_dict_index (var)]);
3771 const struct ctables_category *save = cats[a][i];
3773 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3774 d_weight, e_weight);
3775 recurse_totals (s, c, cats, is_missing, excluded_missing,
3776 d_weight, e_weight, a, i + 1);
3785 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3786 const struct ctables_category *cats[PIVOT_N_AXES][10],
3787 bool is_missing, bool excluded_missing,
3788 double d_weight, double e_weight,
3789 enum pivot_axis_type start_axis, size_t start_nest)
3791 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3793 const struct ctables_nest *nest = s->nests[a];
3794 for (size_t i = start_nest; i < nest->n; i++)
3796 if (i == nest->scale_idx)
3799 const struct ctables_category *save = cats[a][i];
3802 cats[a][i] = save->subtotal;
3803 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3804 d_weight, e_weight);
3805 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3806 d_weight, e_weight, a, i + 1);
3815 ctables_add_occurrence (const struct variable *var,
3816 const union value *value,
3817 struct hmap *occurrences)
3819 int width = var_get_width (var);
3820 unsigned int hash = value_hash (value, width, 0);
3822 struct ctables_occurrence *o;
3823 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3825 if (value_equal (value, &o->value, width))
3828 o = xmalloc (sizeof *o);
3829 value_clone (&o->value, value, width);
3830 hmap_insert (occurrences, &o->node, hash);
3834 ctables_cell_insert (struct ctables_section *s,
3835 const struct ccase *c,
3836 double d_weight, double e_weight)
3838 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3840 /* Does at least one categorical variable have a missing value in an included
3841 or excluded category? */
3842 bool is_missing = false;
3844 /* Does at least one categorical variable have a missing value in an excluded
3846 bool excluded_missing = false;
3848 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3850 const struct ctables_nest *nest = s->nests[a];
3851 for (size_t i = 0; i < nest->n; i++)
3853 if (i == nest->scale_idx)
3856 const struct variable *var = nest->vars[i];
3857 const union value *value = case_data (c, var);
3859 bool var_missing = var_is_value_missing (var, value) != 0;
3863 cats[a][i] = ctables_categories_match (
3864 s->table->categories[var_get_dict_index (var)], value, var);
3870 static const struct ctables_category cct_excluded_missing = {
3871 .type = CCT_EXCLUDED_MISSING,
3874 cats[a][i] = &cct_excluded_missing;
3875 excluded_missing = true;
3880 if (!excluded_missing)
3881 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3883 const struct ctables_nest *nest = s->nests[a];
3884 for (size_t i = 0; i < nest->n; i++)
3885 if (i != nest->scale_idx)
3887 const struct variable *var = nest->vars[i];
3888 const union value *value = case_data (c, var);
3889 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3893 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3894 d_weight, e_weight);
3896 //if (!excluded_missing)
3898 recurse_totals (s, c, cats, is_missing, excluded_missing,
3899 d_weight, e_weight, 0, 0);
3900 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3901 d_weight, e_weight, 0, 0);
3907 const struct ctables_summary_spec_set *set;
3912 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3914 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3915 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3916 if (as->function != bs->function)
3917 return as->function > bs->function ? 1 : -1;
3918 else if (as->percentile != bs->percentile)
3919 return as->percentile < bs->percentile ? 1 : -1;
3921 const char *as_label = as->label ? as->label : "";
3922 const char *bs_label = bs->label ? bs->label : "";
3923 return strcmp (as_label, bs_label);
3926 static struct pivot_value *
3927 ctables_category_create_label__ (const struct ctables_category *cat,
3928 const struct variable *var,
3929 const union value *value)
3931 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3932 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3933 : pivot_value_new_var_value (var, value));
3936 static struct pivot_value *
3937 ctables_postcompute_label (const struct ctables_categories *cats,
3938 const struct ctables_category *cat,
3939 const struct variable *var,
3940 const union value *value)
3942 struct substring in = ss_cstr (cat->pc->label);
3943 struct substring target = ss_cstr (")LABEL[");
3945 struct string out = DS_EMPTY_INITIALIZER;
3948 size_t chunk = ss_find_substring (in, target);
3949 if (chunk == SIZE_MAX)
3951 if (ds_is_empty (&out))
3952 return pivot_value_new_user_text (in.string, in.length);
3955 ds_put_substring (&out, in);
3956 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3960 ds_put_substring (&out, ss_head (in, chunk));
3961 ss_advance (&in, chunk + target.length);
3963 struct substring idx_s;
3964 if (!ss_get_until (&in, ']', &idx_s))
3967 long int idx = strtol (idx_s.string, &tail, 10);
3968 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3971 struct ctables_category *cat2 = &cats->cats[idx - 1];
3972 struct pivot_value *label2
3973 = ctables_category_create_label__ (cat2, var, value);
3974 char *label2_s = pivot_value_to_string_defaults (label2);
3975 ds_put_cstr (&out, label2_s);
3977 pivot_value_destroy (label2);
3982 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3985 static struct pivot_value *
3986 ctables_category_create_label (const struct ctables_categories *cats,
3987 const struct ctables_category *cat,
3988 const struct variable *var,
3989 const union value *value)
3991 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3992 ? ctables_postcompute_label (cats, cat, var, value)
3993 : ctables_category_create_label__ (cat, var, value));
3996 static struct ctables_value *
3997 ctables_value_find__ (struct ctables_table *t, const union value *value,
3998 int width, unsigned int hash)
4000 struct ctables_value *clv;
4001 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
4002 hash, &t->clabels_values_map)
4003 if (value_equal (value, &clv->value, width))
4009 ctables_value_insert (struct ctables_table *t, const union value *value,
4012 unsigned int hash = value_hash (value, width, 0);
4013 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
4016 clv = xmalloc (sizeof *clv);
4017 value_clone (&clv->value, value, width);
4018 hmap_insert (&t->clabels_values_map, &clv->node, hash);
4022 static struct ctables_value *
4023 ctables_value_find (struct ctables_table *t,
4024 const union value *value, int width)
4026 return ctables_value_find__ (t, value, width,
4027 value_hash (value, width, 0));
4031 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
4032 size_t ix[PIVOT_N_AXES])
4034 if (a < PIVOT_N_AXES)
4036 size_t limit = MAX (t->stacks[a].n, 1);
4037 for (ix[a] = 0; ix[a] < limit; ix[a]++)
4038 ctables_table_add_section (t, a + 1, ix);
4042 struct ctables_section *s = &t->sections[t->n_sections++];
4043 *s = (struct ctables_section) {
4045 .cells = HMAP_INITIALIZER (s->cells),
4047 for (a = 0; a < PIVOT_N_AXES; a++)
4050 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
4052 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
4053 for (size_t i = 0; i < nest->n; i++)
4054 hmap_init (&s->occurrences[a][i]);
4056 for (size_t i = 0; i < N_CTDTS; i++)
4057 hmap_init (&s->domains[i]);
4062 ctpo_add (double a, double b)
4068 ctpo_sub (double a, double b)
4074 ctpo_mul (double a, double b)
4080 ctpo_div (double a, double b)
4082 return b ? a / b : SYSMIS;
4086 ctpo_pow (double a, double b)
4088 int save_errno = errno;
4090 double result = pow (a, b);
4098 ctpo_neg (double a, double b UNUSED)
4103 struct ctables_pcexpr_evaluate_ctx
4105 const struct ctables_cell *cell;
4106 const struct ctables_section *section;
4107 const struct ctables_categories *cats;
4108 enum pivot_axis_type pc_a;
4111 enum fmt_type parse_format;
4114 static double ctables_pcexpr_evaluate (
4115 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
4118 ctables_pcexpr_evaluate_nonterminal (
4119 const struct ctables_pcexpr_evaluate_ctx *ctx,
4120 const struct ctables_pcexpr *e, size_t n_args,
4121 double evaluate (double, double))
4123 double args[2] = { 0, 0 };
4124 for (size_t i = 0; i < n_args; i++)
4126 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
4127 if (!isfinite (args[i]) || args[i] == SYSMIS)
4130 return evaluate (args[0], args[1]);
4134 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
4135 const struct ctables_cell_value *pc_cv)
4137 const struct ctables_section *s = ctx->section;
4140 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4142 const struct ctables_nest *nest = s->nests[a];
4143 for (size_t i = 0; i < nest->n; i++)
4144 if (i != nest->scale_idx)
4146 const struct ctables_cell_value *cv
4147 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4148 : &ctx->cell->axes[a].cvs[i]);
4149 hash = hash_pointer (cv->category, hash);
4150 if (cv->category->type != CCT_TOTAL
4151 && cv->category->type != CCT_SUBTOTAL
4152 && cv->category->type != CCT_POSTCOMPUTE)
4153 hash = value_hash (&cv->value,
4154 var_get_width (nest->vars[i]), hash);
4158 struct ctables_cell *tc;
4159 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
4161 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4163 const struct ctables_nest *nest = s->nests[a];
4164 for (size_t i = 0; i < nest->n; i++)
4165 if (i != nest->scale_idx)
4167 const struct ctables_cell_value *p_cv
4168 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
4169 : &ctx->cell->axes[a].cvs[i]);
4170 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
4171 if (p_cv->category != t_cv->category
4172 || (p_cv->category->type != CCT_TOTAL
4173 && p_cv->category->type != CCT_SUBTOTAL
4174 && p_cv->category->type != CCT_POSTCOMPUTE
4175 && !value_equal (&p_cv->value,
4177 var_get_width (nest->vars[i]))))
4189 const struct ctables_table *t = s->table;
4190 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4191 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
4192 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
4193 &specs->specs[ctx->summary_idx]);
4197 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
4198 const struct ctables_pcexpr *e)
4205 case CTPO_CAT_NRANGE:
4206 case CTPO_CAT_SRANGE:
4208 struct ctables_cell_value cv = {
4209 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e)
4211 assert (cv.category != NULL);
4213 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4214 const struct ctables_occurrence *o;
4217 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4218 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4219 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4221 cv.value = o->value;
4222 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4227 case CTPO_CAT_NUMBER:
4228 case CTPO_CAT_MISSING:
4229 case CTPO_CAT_OTHERNM:
4230 case CTPO_CAT_SUBTOTAL:
4231 case CTPO_CAT_TOTAL:
4233 struct ctables_cell_value cv = {
4234 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4235 .value = { .f = e->number },
4237 assert (cv.category != NULL);
4238 return ctables_pcexpr_evaluate_category (ctx, &cv);
4241 case CTPO_CAT_STRING:
4243 int width = var_get_width (ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx]);
4245 if (width > e->string.length)
4247 s = xmalloc (width);
4248 buf_copy_rpad (s, width, e->string.string, e->string.length, ' ');
4250 struct ctables_cell_value cv = {
4251 .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e),
4252 .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) },
4254 assert (cv.category != NULL);
4255 double retval = ctables_pcexpr_evaluate_category (ctx, &cv);
4261 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4264 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4267 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4270 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4273 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4276 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4282 static const struct ctables_category *
4283 ctables_cell_postcompute (const struct ctables_section *s,
4284 const struct ctables_cell *cell,
4285 enum pivot_axis_type *pc_a_p,
4288 assert (cell->postcompute);
4289 const struct ctables_category *pc_cat = NULL;
4290 for (enum pivot_axis_type pc_a = 0; pc_a < PIVOT_N_AXES; pc_a++)
4291 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4293 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4294 if (cv->category->type == CCT_POSTCOMPUTE)
4298 /* Multiple postcomputes cross each other. The value is
4303 pc_cat = cv->category;
4307 *pc_a_idx_p = pc_a_idx;
4311 assert (pc_cat != NULL);
4316 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4317 const struct ctables_cell *cell,
4318 const struct ctables_summary_spec *ss,
4319 struct fmt_spec *format,
4320 bool *is_ctables_format,
4323 enum pivot_axis_type pc_a = 0;
4324 size_t pc_a_idx = 0;
4325 const struct ctables_category *pc_cat = ctables_cell_postcompute (
4326 s, cell, &pc_a, &pc_a_idx);
4330 const struct ctables_postcompute *pc = pc_cat->pc;
4333 for (size_t i = 0; i < pc->specs->n; i++)
4335 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4336 if (ss->function == ss2->function
4337 && ss->percentile == ss2->percentile)
4339 *format = ss2->format;
4340 *is_ctables_format = ss2->is_ctables_format;
4346 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4347 const struct ctables_categories *cats = s->table->categories[
4348 var_get_dict_index (var)];
4349 struct ctables_pcexpr_evaluate_ctx ctx = {
4354 .pc_a_idx = pc_a_idx,
4355 .summary_idx = summary_idx,
4356 .parse_format = pc_cat->parse_format,
4358 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4362 ctables_format (double d, const struct fmt_spec *format,
4363 const struct fmt_settings *settings)
4365 const union value v = { .f = d };
4366 char *s = data_out_stretchy (&v, "UTF-8", format, settings, NULL);
4368 /* The custom-currency specifications for NEQUAL, PAREN, and PCTPAREN don't
4369 produce the results we want for negative numbers, putting the negative
4370 sign in the wrong spot, before the prefix instead of after it. We can't,
4371 in fact, produce the desired results using a custom-currency
4372 specification. Instead, we postprocess the output, moving the negative
4375 NEQUAL: "-N=3" => "N=-3"
4376 PAREN: "-(3)" => "(-3)"
4377 PCTPAREN: "-(3%)" => "(-3%)"
4379 This transformation doesn't affect NEGPAREN. */
4380 char *minus_src = strchr (s, '-');
4381 if (minus_src && (minus_src == s || minus_src[-1] != 'E'))
4383 char *n_equals = strstr (s, "N=");
4384 char *lparen = strchr (s, '(');
4385 char *minus_dst = n_equals ? n_equals + 1 : lparen;
4387 move_element (s, minus_dst - s + 1, 1, minus_src - s, minus_dst - s);
4393 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4395 struct pivot_table *pt = pivot_table_create__ (
4397 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4398 : pivot_value_new_text (N_("Custom Tables"))),
4401 pivot_table_set_caption (
4402 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4404 pivot_table_set_corner_text (
4405 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4407 bool summary_dimension = (t->summary_axis != t->slabels_axis
4408 || (!t->slabels_visible
4409 && t->summary_specs.n > 1));
4410 if (summary_dimension)
4412 struct pivot_dimension *d = pivot_dimension_create (
4413 pt, t->slabels_axis, N_("Statistics"));
4414 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4415 if (!t->slabels_visible)
4416 d->hide_all_labels = true;
4417 for (size_t i = 0; i < specs->n; i++)
4418 pivot_category_create_leaf (
4419 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4422 bool categories_dimension = t->clabels_example != NULL;
4423 if (categories_dimension)
4425 struct pivot_dimension *d = pivot_dimension_create (
4426 pt, t->label_axis[t->clabels_from_axis],
4427 t->clabels_from_axis == PIVOT_AXIS_ROW
4428 ? N_("Row Categories")
4429 : N_("Column Categories"));
4430 const struct variable *var = t->clabels_example;
4431 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4432 for (size_t i = 0; i < t->n_clabels_values; i++)
4434 const struct ctables_value *value = t->clabels_values[i];
4435 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4436 assert (cat != NULL);
4437 pivot_category_create_leaf (d->root, ctables_category_create_label (
4438 c, cat, t->clabels_example,
4443 pivot_table_set_look (pt, ct->look);
4444 struct pivot_dimension *d[PIVOT_N_AXES];
4445 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4447 static const char *names[] = {
4448 [PIVOT_AXIS_ROW] = N_("Rows"),
4449 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4450 [PIVOT_AXIS_LAYER] = N_("Layers"),
4452 d[a] = (t->axes[a] || a == t->summary_axis
4453 ? pivot_dimension_create (pt, a, names[a])
4458 assert (t->axes[a]);
4460 for (size_t i = 0; i < t->stacks[a].n; i++)
4462 struct ctables_nest *nest = &t->stacks[a].nests[i];
4463 struct ctables_section **sections = xnmalloc (t->n_sections,
4465 size_t n_sections = 0;
4467 size_t n_total_cells = 0;
4468 size_t max_depth = 0;
4469 for (size_t j = 0; j < t->n_sections; j++)
4470 if (t->sections[j].nests[a] == nest)
4472 struct ctables_section *s = &t->sections[j];
4473 sections[n_sections++] = s;
4474 n_total_cells += s->cells.count;
4476 size_t depth = s->nests[a]->n;
4477 max_depth = MAX (depth, max_depth);
4480 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4482 size_t n_sorted = 0;
4484 for (size_t j = 0; j < n_sections; j++)
4486 struct ctables_section *s = sections[j];
4488 struct ctables_cell *cell;
4489 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4491 sorted[n_sorted++] = cell;
4492 assert (n_sorted <= n_total_cells);
4495 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4496 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4499 for (size_t j = 0; j < n_sorted; j++)
4501 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4506 struct ctables_level
4508 enum ctables_level_type
4510 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4511 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4512 CTL_SUMMARY, /* Summary functions. */
4516 enum settings_value_show vlabel; /* CTL_VAR only. */
4519 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4520 size_t n_levels = 0;
4521 for (size_t k = 0; k < nest->n; k++)
4523 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4524 if (vlabel != CTVL_NONE)
4526 levels[n_levels++] = (struct ctables_level) {
4528 .vlabel = (enum settings_value_show) vlabel,
4533 if (nest->scale_idx != k
4534 && (k != nest->n - 1 || t->label_axis[a] == a))
4536 levels[n_levels++] = (struct ctables_level) {
4537 .type = CTL_CATEGORY,
4543 if (!summary_dimension && a == t->slabels_axis)
4545 levels[n_levels++] = (struct ctables_level) {
4546 .type = CTL_SUMMARY,
4547 .var_idx = SIZE_MAX,
4551 /* Pivot categories:
4553 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4554 - category for nest->vars[0], if nest->scale_idx != 0
4555 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4556 - category for nest->vars[1], if nest->scale_idx != 1
4558 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4559 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4560 - summary function, if 'a == t->slabels_axis && a ==
4563 Additional dimensions:
4565 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4567 - If 't->label_axis[b] == a' for some 'b != a', add a category
4572 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4574 for (size_t j = 0; j < n_sorted; j++)
4576 struct ctables_cell *cell = sorted[j];
4577 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4579 size_t n_common = 0;
4582 for (; n_common < n_levels; n_common++)
4584 const struct ctables_level *level = &levels[n_common];
4585 if (level->type == CTL_CATEGORY)
4587 size_t var_idx = level->var_idx;
4588 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4589 if (prev->axes[a].cvs[var_idx].category != c)
4591 else if (c->type != CCT_SUBTOTAL
4592 && c->type != CCT_TOTAL
4593 && c->type != CCT_POSTCOMPUTE
4594 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4595 &cell->axes[a].cvs[var_idx].value,
4596 var_get_type (nest->vars[var_idx])))
4602 for (size_t k = n_common; k < n_levels; k++)
4604 const struct ctables_level *level = &levels[k];
4605 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4606 if (level->type == CTL_SUMMARY)
4608 assert (k == n_levels - 1);
4610 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4611 for (size_t m = 0; m < specs->n; m++)
4613 int leaf = pivot_category_create_leaf (
4614 parent, ctables_summary_label (&specs->specs[m],
4622 const struct variable *var = nest->vars[level->var_idx];
4623 struct pivot_value *label;
4624 if (level->type == CTL_VAR)
4626 label = pivot_value_new_variable (var);
4627 label->variable.show = level->vlabel;
4629 else if (level->type == CTL_CATEGORY)
4631 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4632 label = ctables_category_create_label (
4633 t->categories[var_get_dict_index (var)],
4634 cv->category, var, &cv->value);
4639 if (k == n_levels - 1)
4640 prev_leaf = pivot_category_create_leaf (parent, label);
4642 groups[k] = pivot_category_create_group__ (parent, label);
4646 cell->axes[a].leaf = prev_leaf;
4655 for (size_t i = 0; i < t->n_sections; i++)
4657 struct ctables_section *s = &t->sections[i];
4659 struct ctables_cell *cell;
4660 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4665 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4666 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4667 for (size_t j = 0; j < specs->n; j++)
4670 size_t n_dindexes = 0;
4672 if (summary_dimension)
4673 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4675 if (categories_dimension)
4677 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4678 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4679 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4680 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4683 dindexes[n_dindexes++] = ctv->leaf;
4686 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4689 int leaf = cell->axes[a].leaf;
4690 if (a == t->summary_axis && !summary_dimension)
4692 dindexes[n_dindexes++] = leaf;
4695 const struct ctables_summary_spec *ss = &specs->specs[j];
4697 struct fmt_spec format = specs->specs[j].format;
4698 bool is_ctables_format = ss->is_ctables_format;
4699 double d = (cell->postcompute
4700 ? ctables_cell_calculate_postcompute (
4701 s, cell, ss, &format, &is_ctables_format, j)
4702 : ctables_summary_value (cell, &cell->summaries[j],
4705 struct pivot_value *value;
4706 if (ct->hide_threshold != 0
4707 && d < ct->hide_threshold
4708 && ctables_summary_function_is_count (ss->function))
4710 value = pivot_value_new_user_text_nocopy (
4711 xasprintf ("<%d", ct->hide_threshold));
4713 else if (d == 0 && ct->zero)
4714 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4715 else if (d == SYSMIS && ct->missing)
4716 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4717 else if (is_ctables_format)
4718 value = pivot_value_new_user_text_nocopy (
4719 ctables_format (d, &format, &ct->ctables_formats));
4722 value = pivot_value_new_number (d);
4723 value->numeric.format = format;
4725 /* XXX should text values be right-justified? */
4726 pivot_table_put (pt, dindexes, n_dindexes, value);
4731 pivot_table_submit (pt);
4735 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4737 enum pivot_axis_type label_pos = t->label_axis[a];
4741 t->clabels_from_axis = a;
4743 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4744 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4746 const struct ctables_stack *stack = &t->stacks[a];
4750 const struct ctables_nest *n0 = &stack->nests[0];
4753 assert (stack->n == 1);
4757 const struct variable *v0 = n0->vars[n0->n - 1];
4758 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4759 t->clabels_example = v0;
4761 for (size_t i = 0; i < c0->n_cats; i++)
4762 if (c0->cats[i].type == CCT_FUNCTION)
4764 msg (SE, _("%s=%s is not allowed with sorting based "
4765 "on a summary function."),
4766 subcommand_name, pos_name);
4769 if (n0->n - 1 == n0->scale_idx)
4771 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4772 "but %s is a scale variable."),
4773 subcommand_name, pos_name, var_get_name (v0));
4777 for (size_t i = 1; i < stack->n; i++)
4779 const struct ctables_nest *ni = &stack->nests[i];
4781 const struct variable *vi = ni->vars[ni->n - 1];
4782 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4784 if (ni->n - 1 == ni->scale_idx)
4786 msg (SE, _("%s=%s requires the variables to be moved to be "
4787 "categorical, but %s is a scale variable."),
4788 subcommand_name, pos_name, var_get_name (vi));
4791 if (var_get_width (v0) != var_get_width (vi))
4793 msg (SE, _("%s=%s requires the variables to be "
4794 "moved to have the same width, but %s has "
4795 "width %d and %s has width %d."),
4796 subcommand_name, pos_name,
4797 var_get_name (v0), var_get_width (v0),
4798 var_get_name (vi), var_get_width (vi));
4801 if (!val_labs_equal (var_get_value_labels (v0),
4802 var_get_value_labels (vi)))
4804 msg (SE, _("%s=%s requires the variables to be "
4805 "moved to have the same value labels, but %s "
4806 "and %s have different value labels."),
4807 subcommand_name, pos_name,
4808 var_get_name (v0), var_get_name (vi));
4811 if (!ctables_categories_equal (c0, ci))
4813 msg (SE, _("%s=%s requires the variables to be "
4814 "moved to have the same category "
4815 "specifications, but %s and %s have different "
4816 "category specifications."),
4817 subcommand_name, pos_name,
4818 var_get_name (v0), var_get_name (vi));
4827 add_sum_var (struct variable *var,
4828 struct variable ***sum_vars, size_t *n, size_t *allocated)
4830 for (size_t i = 0; i < *n; i++)
4831 if (var == (*sum_vars)[i])
4834 if (*n >= *allocated)
4835 *sum_vars = x2nrealloc (*sum_vars, allocated, sizeof **sum_vars);
4836 (*sum_vars)[*n] = var;
4841 enumerate_sum_vars (const struct ctables_axis *a,
4842 struct variable ***sum_vars, size_t *n, size_t *allocated)
4850 for (size_t i = 0; i < N_CSVS; i++)
4851 for (size_t j = 0; j < a->specs[i].n; j++)
4853 struct ctables_summary_spec *spec = &a->specs[i].specs[j];
4854 if (ctables_function_is_pctsum (spec->function))
4855 spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated);
4861 for (size_t i = 0; i < 2; i++)
4862 enumerate_sum_vars (a->subs[i], sum_vars, n, allocated);
4868 ctables_prepare_table (struct ctables_table *t)
4870 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4873 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4875 for (size_t j = 0; j < t->stacks[a].n; j++)
4877 struct ctables_nest *nest = &t->stacks[a].nests[j];
4878 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4880 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4881 nest->n_domains[dt] = 0;
4883 for (size_t k = 0; k < nest->n; k++)
4885 if (k == nest->scale_idx)
4894 if (a != PIVOT_AXIS_LAYER)
4901 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4902 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4903 : a == PIVOT_AXIS_ROW)
4905 if (k == nest->n - 1
4906 || (nest->scale_idx == nest->n - 1
4907 && k == nest->n - 2))
4913 if (a == PIVOT_AXIS_COLUMN)
4918 if (a == PIVOT_AXIS_ROW)
4923 nest->domains[dt][nest->n_domains[dt]++] = k;
4930 struct ctables_nest *nest = xmalloc (sizeof *nest);
4931 *nest = (struct ctables_nest) { .n = 0 };
4932 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4934 /* There's no point in moving labels away from an axis that has no
4935 labels, so avoid dealing with the special cases around that. */
4936 t->label_axis[a] = a;
4939 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4940 for (size_t i = 0; i < stack->n; i++)
4942 struct ctables_nest *nest = &stack->nests[i];
4943 if (!nest->specs[CSV_CELL].n)
4945 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4946 specs->specs = xmalloc (sizeof *specs->specs);
4949 enum ctables_summary_function function
4950 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4952 *specs->specs = (struct ctables_summary_spec) {
4953 .function = function,
4954 .format = ctables_summary_default_format (function, specs->var),
4957 specs->var = nest->vars[0];
4959 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4960 &nest->specs[CSV_CELL]);
4962 else if (!nest->specs[CSV_TOTAL].n)
4963 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4964 &nest->specs[CSV_CELL]);
4966 if (t->ctables->smissing_listwise)
4968 struct variable **listwise_vars = NULL;
4970 size_t allocated = 0;
4972 for (size_t j = nest->group_head; j < stack->n; j++)
4974 const struct ctables_nest *other_nest = &stack->nests[j];
4975 if (other_nest->group_head != nest->group_head)
4978 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4981 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4982 sizeof *listwise_vars);
4983 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4986 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4989 listwise_vars = xmemdup (listwise_vars,
4990 n * sizeof *listwise_vars);
4991 nest->specs[sv].listwise_vars = listwise_vars;
4992 nest->specs[sv].n_listwise_vars = n;
4997 struct ctables_summary_spec_set *merged = &t->summary_specs;
4998 struct merge_item *items = xnmalloc (N_CSVS * stack->n, sizeof *items);
5000 for (size_t j = 0; j < stack->n; j++)
5002 const struct ctables_nest *nest = &stack->nests[j];
5004 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5005 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
5010 struct merge_item min = items[0];
5011 for (size_t j = 1; j < n_left; j++)
5012 if (merge_item_compare_3way (&items[j], &min) < 0)
5015 if (merged->n >= merged->allocated)
5016 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
5017 sizeof *merged->specs);
5018 merged->specs[merged->n++] = min.set->specs[min.ofs];
5020 for (size_t j = 0; j < n_left; )
5022 if (merge_item_compare_3way (&items[j], &min) == 0)
5024 struct merge_item *item = &items[j];
5025 item->set->specs[item->ofs].axis_idx = merged->n - 1;
5026 if (++item->ofs >= item->set->n)
5028 items[j] = items[--n_left];
5038 for (size_t j = 0; j < merged->n; j++)
5039 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
5041 for (size_t j = 0; j < stack->n; j++)
5043 const struct ctables_nest *nest = &stack->nests[j];
5044 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
5046 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
5047 for (size_t k = 0; k < specs->n; k++)
5048 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
5049 specs->specs[k].axis_idx);
5055 size_t allocated_sum_vars = 0;
5056 enumerate_sum_vars (t->axes[t->summary_axis],
5057 &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars);
5059 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
5060 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
5064 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
5065 enum pivot_axis_type a)
5067 struct ctables_stack *stack = &t->stacks[a];
5068 for (size_t i = 0; i < stack->n; i++)
5070 const struct ctables_nest *nest = &stack->nests[i];
5071 const struct variable *var = nest->vars[nest->n - 1];
5072 const union value *value = case_data (c, var);
5074 if (var_is_numeric (var) && value->f == SYSMIS)
5077 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
5079 ctables_value_insert (t, value, var_get_width (var));
5084 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
5086 const struct ctables_value *const *ap = a_;
5087 const struct ctables_value *const *bp = b_;
5088 const struct ctables_value *a = *ap;
5089 const struct ctables_value *b = *bp;
5090 const int *width = width_;
5091 return value_compare_3way (&a->value, &b->value, *width);
5095 ctables_sort_clabels_values (struct ctables_table *t)
5097 const struct variable *v0 = t->clabels_example;
5098 int width = var_get_width (v0);
5100 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
5103 const struct val_labs *val_labs = var_get_value_labels (v0);
5104 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5105 vl = val_labs_next (val_labs, vl))
5106 if (ctables_categories_match (c0, &vl->value, v0))
5107 ctables_value_insert (t, &vl->value, width);
5110 size_t n = hmap_count (&t->clabels_values_map);
5111 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
5113 struct ctables_value *clv;
5115 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
5116 t->clabels_values[i++] = clv;
5117 t->n_clabels_values = n;
5120 sort (t->clabels_values, n, sizeof *t->clabels_values,
5121 compare_clabels_values_3way, &width);
5123 for (size_t i = 0; i < n; i++)
5124 t->clabels_values[i]->leaf = i;
5128 ctables_add_category_occurrences (const struct variable *var,
5129 struct hmap *occurrences,
5130 const struct ctables_categories *cats)
5132 const struct val_labs *val_labs = var_get_value_labels (var);
5134 for (size_t i = 0; i < cats->n_cats; i++)
5136 const struct ctables_category *c = &cats->cats[i];
5140 ctables_add_occurrence (var, &(const union value) { .f = c->number },
5146 int width = var_get_width (var);
5148 value_init (&value, width);
5149 value_copy_buf_rpad (&value, width,
5150 CHAR_CAST (uint8_t *, c->string.string),
5151 c->string.length, ' ');
5152 ctables_add_occurrence (var, &value, occurrences);
5153 value_destroy (&value, width);
5158 assert (var_is_numeric (var));
5159 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5160 vl = val_labs_next (val_labs, vl))
5161 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
5162 ctables_add_occurrence (var, &vl->value, occurrences);
5166 assert (var_is_alpha (var));
5167 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5168 vl = val_labs_next (val_labs, vl))
5169 if (in_string_range (&vl->value, var, c->srange))
5170 ctables_add_occurrence (var, &vl->value, occurrences);
5174 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5175 vl = val_labs_next (val_labs, vl))
5176 if (var_is_value_missing (var, &vl->value))
5177 ctables_add_occurrence (var, &vl->value, occurrences);
5181 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5182 vl = val_labs_next (val_labs, vl))
5183 ctables_add_occurrence (var, &vl->value, occurrences);
5186 case CCT_POSTCOMPUTE:
5196 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
5197 vl = val_labs_next (val_labs, vl))
5198 if (c->include_missing || !var_is_value_missing (var, &vl->value))
5199 ctables_add_occurrence (var, &vl->value, occurrences);
5202 case CCT_EXCLUDED_MISSING:
5209 ctables_section_recurse_add_empty_categories (
5210 struct ctables_section *s,
5211 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
5212 enum pivot_axis_type a, size_t a_idx)
5214 if (a >= PIVOT_N_AXES)
5215 ctables_cell_insert__ (s, c, cats);
5216 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
5217 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
5220 const struct variable *var = s->nests[a]->vars[a_idx];
5221 const struct ctables_categories *categories = s->table->categories[
5222 var_get_dict_index (var)];
5223 int width = var_get_width (var);
5224 const struct hmap *occurrences = &s->occurrences[a][a_idx];
5225 const struct ctables_occurrence *o;
5226 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
5228 union value *value = case_data_rw (c, var);
5229 value_destroy (value, width);
5230 value_clone (value, &o->value, width);
5231 cats[a][a_idx] = ctables_categories_match (categories, value, var);
5232 assert (cats[a][a_idx] != NULL);
5233 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5236 for (size_t i = 0; i < categories->n_cats; i++)
5238 const struct ctables_category *cat = &categories->cats[i];
5239 if (cat->type == CCT_POSTCOMPUTE)
5241 cats[a][a_idx] = cat;
5242 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
5249 ctables_section_add_empty_categories (struct ctables_section *s)
5251 bool show_empty = false;
5252 for (size_t a = 0; a < PIVOT_N_AXES; a++)
5254 for (size_t k = 0; k < s->nests[a]->n; k++)
5255 if (k != s->nests[a]->scale_idx)
5257 const struct variable *var = s->nests[a]->vars[k];
5258 const struct ctables_categories *cats = s->table->categories[
5259 var_get_dict_index (var)];
5260 if (cats->show_empty)
5263 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
5269 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
5270 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
5271 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
5276 ctables_section_clear (struct ctables_section *s)
5278 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5280 const struct ctables_nest *nest = s->nests[a];
5281 for (size_t i = 0; i < nest->n; i++)
5282 if (i != nest->scale_idx)
5284 const struct variable *var = nest->vars[i];
5285 int width = var_get_width (var);
5286 struct ctables_occurrence *o, *next;
5287 struct hmap *map = &s->occurrences[a][i];
5288 HMAP_FOR_EACH_SAFE (o, next, struct ctables_occurrence, node, map)
5290 value_destroy (&o->value, width);
5291 hmap_delete (map, &o->node);
5298 struct ctables_cell *cell, *next_cell;
5299 HMAP_FOR_EACH_SAFE (cell, next_cell, struct ctables_cell, node, &s->cells)
5301 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5303 const struct ctables_nest *nest = s->nests[a];
5304 for (size_t i = 0; i < nest->n; i++)
5305 if (i != nest->scale_idx)
5306 value_destroy (&cell->axes[a].cvs[i].value,
5307 var_get_width (nest->vars[i]));
5308 free (cell->axes[a].cvs);
5311 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
5312 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
5313 for (size_t i = 0; i < specs->n; i++)
5314 ctables_summary_uninit (&cell->summaries[i], &specs->specs[i]);
5315 free (cell->summaries);
5317 hmap_delete (&s->cells, &cell->node);
5320 hmap_shrink (&s->cells);
5322 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
5324 struct ctables_domain *domain, *next_domain;
5325 HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node,
5328 free (domain->sums);
5329 hmap_delete (&s->domains[dt], &domain->node);
5332 hmap_shrink (&s->domains[dt]);
5337 ctables_section_uninit (struct ctables_section *s)
5339 ctables_section_clear (s);
5341 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5343 struct ctables_nest *nest = s->nests[a];
5344 for (size_t i = 0; i < nest->n; i++)
5345 hmap_destroy (&s->occurrences[a][i]);
5346 free (s->occurrences[a]);
5349 hmap_destroy (&s->cells);
5350 for (size_t i = 0; i < N_CTDTS; i++)
5351 hmap_destroy (&s->domains[i]);
5355 ctables_table_clear (struct ctables_table *t)
5357 for (size_t i = 0; i < t->n_sections; i++)
5358 ctables_section_clear (&t->sections[i]);
5360 if (t->clabels_example)
5362 int width = var_get_width (t->clabels_example);
5363 struct ctables_value *value, *next_value;
5364 HMAP_FOR_EACH_SAFE (value, next_value, struct ctables_value, node,
5365 &t->clabels_values_map)
5367 value_destroy (&value->value, width);
5368 hmap_delete (&t->clabels_values_map, &value->node);
5371 hmap_shrink (&t->clabels_values_map);
5373 free (t->clabels_values);
5374 t->clabels_values = NULL;
5375 t->n_clabels_values = 0;
5380 ctables_execute (struct dataset *ds, struct casereader *input,
5383 for (size_t i = 0; i < ct->n_tables; i++)
5385 struct ctables_table *t = ct->tables[i];
5386 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
5387 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
5388 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
5389 sizeof *t->sections);
5390 size_t ix[PIVOT_N_AXES];
5391 ctables_table_add_section (t, 0, ix);
5394 struct dictionary *dict = dataset_dict (ds);
5395 struct casegrouper *grouper
5396 = (dict_get_split_type (dict) == SPLIT_SEPARATE
5397 ? casegrouper_create_splits (input, dict)
5398 : casegrouper_create_vars (input, NULL, 0));
5399 struct casereader *group;
5400 while (casegrouper_get_next_group (grouper, &group))
5402 /* Output SPLIT FILE variables. */
5403 struct ccase *c = casereader_peek (group, 0);
5406 output_split_file_values (ds, c);
5410 bool warn_on_invalid = true;
5411 for (c = casereader_read (group); c;
5412 case_unref (c), c = casereader_read (group))
5414 double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
5415 double e_weight = (ct->e_weight
5416 ? var_force_valid_weight (ct->e_weight,
5417 case_num (c, ct->e_weight),
5421 for (size_t i = 0; i < ct->n_tables; i++)
5423 struct ctables_table *t = ct->tables[i];
5425 for (size_t j = 0; j < t->n_sections; j++)
5426 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
5428 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5429 if (t->label_axis[a] != a)
5430 ctables_insert_clabels_values (t, c, a);
5433 casereader_destroy (group);
5435 for (size_t i = 0; i < ct->n_tables; i++)
5437 struct ctables_table *t = ct->tables[i];
5439 if (t->clabels_example)
5440 ctables_sort_clabels_values (t);
5442 for (size_t j = 0; j < t->n_sections; j++)
5443 ctables_section_add_empty_categories (&t->sections[j]);
5445 ctables_table_output (ct, t);
5446 ctables_table_clear (t);
5449 return casegrouper_destroy (grouper);
5454 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5455 struct dictionary *);
5458 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5464 case CTPO_CAT_STRING:
5465 ss_dealloc (&e->string);
5468 case CTPO_CAT_SRANGE:
5469 for (size_t i = 0; i < 2; i++)
5470 ss_dealloc (&e->srange[i]);
5479 for (size_t i = 0; i < 2; i++)
5480 ctables_pcexpr_destroy (e->subs[i]);
5484 case CTPO_CAT_NUMBER:
5485 case CTPO_CAT_NRANGE:
5486 case CTPO_CAT_MISSING:
5487 case CTPO_CAT_OTHERNM:
5488 case CTPO_CAT_SUBTOTAL:
5489 case CTPO_CAT_TOTAL:
5493 msg_location_destroy (e->location);
5498 static struct ctables_pcexpr *
5499 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5500 struct ctables_pcexpr *sub0,
5501 struct ctables_pcexpr *sub1)
5503 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5504 *e = (struct ctables_pcexpr) {
5506 .subs = { sub0, sub1 },
5507 .location = msg_location_merged (sub0->location, sub1->location),
5512 /* How to parse an operator. */
5515 enum token_type token;
5516 enum ctables_postcompute_op op;
5519 static const struct operator *
5520 ctables_pcexpr_match_operator (struct lexer *lexer,
5521 const struct operator ops[], size_t n_ops)
5523 for (const struct operator *op = ops; op < ops + n_ops; op++)
5524 if (lex_token (lexer) == op->token)
5526 if (op->token != T_NEG_NUM)
5535 static struct ctables_pcexpr *
5536 ctables_pcexpr_parse_binary_operators__ (
5537 struct lexer *lexer, struct dictionary *dict,
5538 const struct operator ops[], size_t n_ops,
5539 parse_recursively_func *parse_next_level,
5540 const char *chain_warning, struct ctables_pcexpr *lhs)
5542 for (int op_count = 0; ; op_count++)
5544 const struct operator *op
5545 = ctables_pcexpr_match_operator (lexer, ops, n_ops);
5548 if (op_count > 1 && chain_warning)
5549 msg_at (SW, lhs->location, "%s", chain_warning);
5554 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5557 ctables_pcexpr_destroy (lhs);
5561 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5565 static struct ctables_pcexpr *
5566 ctables_pcexpr_parse_binary_operators (
5567 struct lexer *lexer, struct dictionary *dict,
5568 const struct operator ops[], size_t n_ops,
5569 parse_recursively_func *parse_next_level, const char *chain_warning)
5571 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5575 return ctables_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5577 chain_warning, lhs);
5580 static struct ctables_pcexpr *ctables_pcexpr_parse_add (struct lexer *,
5581 struct dictionary *);
5583 static struct ctables_pcexpr
5584 ctpo_cat_nrange (double low, double high)
5586 return (struct ctables_pcexpr) {
5587 .op = CTPO_CAT_NRANGE,
5588 .nrange = { low, high },
5592 static struct ctables_pcexpr
5593 ctpo_cat_srange (struct substring low, struct substring high)
5595 return (struct ctables_pcexpr) {
5596 .op = CTPO_CAT_SRANGE,
5597 .srange = { low, high },
5601 static struct ctables_pcexpr *
5602 ctables_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5604 int start_ofs = lex_ofs (lexer);
5605 struct ctables_pcexpr e;
5606 if (lex_is_number (lexer))
5608 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5609 .number = lex_number (lexer) };
5612 else if (lex_match_id (lexer, "MISSING"))
5613 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5614 else if (lex_match_id (lexer, "OTHERNM"))
5615 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5616 else if (lex_match_id (lexer, "TOTAL"))
5617 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5618 else if (lex_match_id (lexer, "SUBTOTAL"))
5620 size_t subtotal_index = 0;
5621 if (lex_match (lexer, T_LBRACK))
5623 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5625 subtotal_index = lex_integer (lexer);
5627 if (!lex_force_match (lexer, T_RBRACK))
5630 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5631 .subtotal_index = subtotal_index };
5633 else if (lex_match (lexer, T_LBRACK))
5635 if (lex_match_id (lexer, "LO"))
5637 if (!lex_force_match_id (lexer, "THRU"))
5640 if (lex_is_string (lexer))
5642 struct substring low = { .string = NULL };
5643 struct substring high = parse_substring (lexer, dict);
5644 e = ctpo_cat_srange (low, high);
5648 if (!lex_force_num (lexer))
5650 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5654 else if (lex_is_number (lexer))
5656 double number = lex_number (lexer);
5658 if (lex_match_id (lexer, "THRU"))
5660 if (lex_match_id (lexer, "HI"))
5661 e = ctpo_cat_nrange (number, DBL_MAX);
5664 if (!lex_force_num (lexer))
5666 e = ctpo_cat_nrange (number, lex_number (lexer));
5671 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5674 else if (lex_is_string (lexer))
5676 struct substring s = parse_substring (lexer, dict);
5678 if (lex_match_id (lexer, "THRU"))
5680 struct substring high;
5682 if (lex_match_id (lexer, "HI"))
5683 high = (struct substring) { .string = NULL };
5686 if (!lex_force_string (lexer))
5691 high = parse_substring (lexer, dict);
5694 e = ctpo_cat_srange (s, high);
5697 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5701 lex_error (lexer, NULL);
5705 if (!lex_force_match (lexer, T_RBRACK))
5707 if (e.op == CTPO_CAT_STRING)
5708 ss_dealloc (&e.string);
5709 else if (e.op == CTPO_CAT_SRANGE)
5711 ss_dealloc (&e.srange[0]);
5712 ss_dealloc (&e.srange[1]);
5717 else if (lex_match (lexer, T_LPAREN))
5719 struct ctables_pcexpr *ep = ctables_pcexpr_parse_add (lexer, dict);
5722 if (!lex_force_match (lexer, T_RPAREN))
5724 ctables_pcexpr_destroy (ep);
5731 lex_error (lexer, NULL);
5735 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5736 return xmemdup (&e, sizeof e);
5739 static struct ctables_pcexpr *
5740 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5741 struct lexer *lexer, int start_ofs)
5743 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5744 *e = (struct ctables_pcexpr) {
5747 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5752 static struct ctables_pcexpr *
5753 ctables_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5755 static const struct operator op = { T_EXP, CTPO_POW };
5757 const char *chain_warning =
5758 _("The exponentiation operator (`**') is left-associative: "
5759 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5760 "To disable this warning, insert parentheses.");
5762 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5763 return ctables_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5764 ctables_pcexpr_parse_primary,
5767 /* Special case for situations like "-5**6", which must be parsed as
5770 int start_ofs = lex_ofs (lexer);
5771 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5772 *lhs = (struct ctables_pcexpr) {
5773 .op = CTPO_CONSTANT,
5774 .number = -lex_tokval (lexer),
5775 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5779 struct ctables_pcexpr *node = ctables_pcexpr_parse_binary_operators__ (
5780 lexer, dict, &op, 1,
5781 ctables_pcexpr_parse_primary, chain_warning, lhs);
5785 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5788 /* Parses the unary minus level. */
5789 static struct ctables_pcexpr *
5790 ctables_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5792 int start_ofs = lex_ofs (lexer);
5793 if (!lex_match (lexer, T_DASH))
5794 return ctables_pcexpr_parse_exp (lexer, dict);
5796 struct ctables_pcexpr *inner = ctables_pcexpr_parse_neg (lexer, dict);
5800 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5803 /* Parses the multiplication and division level. */
5804 static struct ctables_pcexpr *
5805 ctables_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5807 static const struct operator ops[] =
5809 { T_ASTERISK, CTPO_MUL },
5810 { T_SLASH, CTPO_DIV },
5813 return ctables_pcexpr_parse_binary_operators (lexer, dict, ops,
5814 sizeof ops / sizeof *ops,
5815 ctables_pcexpr_parse_neg, NULL);
5818 /* Parses the addition and subtraction level. */
5819 static struct ctables_pcexpr *
5820 ctables_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5822 static const struct operator ops[] =
5824 { T_PLUS, CTPO_ADD },
5825 { T_DASH, CTPO_SUB },
5826 { T_NEG_NUM, CTPO_ADD },
5829 return ctables_pcexpr_parse_binary_operators (lexer, dict,
5830 ops, sizeof ops / sizeof *ops,
5831 ctables_pcexpr_parse_mul, NULL);
5834 static struct ctables_postcompute *
5835 ctables_find_postcompute (struct ctables *ct, const char *name)
5837 struct ctables_postcompute *pc;
5838 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5839 utf8_hash_case_string (name, 0), &ct->postcomputes)
5840 if (!utf8_strcasecmp (pc->name, name))
5846 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5849 int pcompute_start = lex_ofs (lexer) - 1;
5851 if (!lex_match (lexer, T_AND))
5853 lex_error_expecting (lexer, "&");
5856 if (!lex_force_id (lexer))
5859 char *name = ss_xstrdup (lex_tokss (lexer));
5862 if (!lex_force_match (lexer, T_EQUALS)
5863 || !lex_force_match_id (lexer, "EXPR")
5864 || !lex_force_match (lexer, T_LPAREN))
5870 int expr_start = lex_ofs (lexer);
5871 struct ctables_pcexpr *expr = ctables_pcexpr_parse_add (lexer, dict);
5872 int expr_end = lex_ofs (lexer) - 1;
5873 if (!expr || !lex_force_match (lexer, T_RPAREN))
5875 ctables_pcexpr_destroy (expr);
5879 int pcompute_end = lex_ofs (lexer) - 1;
5881 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5884 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5887 msg_at (SW, location, _("New definition of &%s will override the "
5888 "previous definition."),
5890 msg_at (SN, pc->location, _("This is the previous definition."));
5892 ctables_pcexpr_destroy (pc->expr);
5893 msg_location_destroy (pc->location);
5898 pc = xmalloc (sizeof *pc);
5899 *pc = (struct ctables_postcompute) { .name = name };
5900 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5901 utf8_hash_case_string (pc->name, 0));
5904 pc->location = location;
5906 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5911 ctables_parse_pproperties_format (struct lexer *lexer,
5912 struct ctables_summary_spec_set *sss)
5914 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5916 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5917 && !(lex_token (lexer) == T_ID
5918 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5919 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5920 lex_tokss (lexer)))))
5922 /* Parse function. */
5923 enum ctables_summary_function function;
5924 if (!parse_ctables_summary_function (lexer, &function))
5927 /* Parse percentile. */
5928 double percentile = 0;
5929 if (function == CTSF_PTILE)
5931 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5933 percentile = lex_number (lexer);
5938 struct fmt_spec format;
5939 bool is_ctables_format;
5940 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5943 if (sss->n >= sss->allocated)
5944 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5945 sizeof *sss->specs);
5946 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5947 .function = function,
5948 .percentile = percentile,
5950 .is_ctables_format = is_ctables_format,
5956 ctables_summary_spec_set_uninit (sss);
5961 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5963 struct ctables_postcompute **pcs = NULL;
5965 size_t allocated_pcs = 0;
5967 while (lex_match (lexer, T_AND))
5969 if (!lex_force_id (lexer))
5971 struct ctables_postcompute *pc
5972 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5975 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5980 if (n_pcs >= allocated_pcs)
5981 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5985 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5987 if (lex_match_id (lexer, "LABEL"))
5989 lex_match (lexer, T_EQUALS);
5990 if (!lex_force_string (lexer))
5993 for (size_t i = 0; i < n_pcs; i++)
5995 free (pcs[i]->label);
5996 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
6001 else if (lex_match_id (lexer, "FORMAT"))
6003 lex_match (lexer, T_EQUALS);
6005 struct ctables_summary_spec_set sss;
6006 if (!ctables_parse_pproperties_format (lexer, &sss))
6009 for (size_t i = 0; i < n_pcs; i++)
6012 ctables_summary_spec_set_uninit (pcs[i]->specs);
6014 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
6015 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
6017 ctables_summary_spec_set_uninit (&sss);
6019 else if (lex_match_id (lexer, "HIDESOURCECATS"))
6021 lex_match (lexer, T_EQUALS);
6022 bool hide_source_cats;
6023 if (!parse_bool (lexer, &hide_source_cats))
6025 for (size_t i = 0; i < n_pcs; i++)
6026 pcs[i]->hide_source_cats = hide_source_cats;
6030 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
6043 put_strftime (struct string *out, time_t now, const char *format)
6045 const struct tm *tm = localtime (&now);
6047 strftime (value, sizeof value, format, tm);
6048 ds_put_cstr (out, value);
6052 skip_prefix (struct substring *s, struct substring prefix)
6054 if (ss_starts_with (*s, prefix))
6056 ss_advance (s, prefix.length);
6064 put_table_expression (struct string *out, struct lexer *lexer,
6065 struct dictionary *dict, int expr_start, int expr_end)
6068 for (int ofs = expr_start; ofs < expr_end; ofs++)
6070 const struct token *t = lex_ofs_token (lexer, ofs);
6071 if (t->type == T_LBRACK)
6073 else if (t->type == T_RBRACK && nest > 0)
6079 else if (t->type == T_ID)
6081 const struct variable *var
6082 = dict_lookup_var (dict, t->string.string);
6083 const char *label = var ? var_get_label (var) : NULL;
6084 ds_put_cstr (out, label ? label : t->string.string);
6088 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
6089 ds_put_byte (out, ' ');
6091 char *repr = lex_ofs_representation (lexer, ofs, ofs);
6092 ds_put_cstr (out, repr);
6095 if (ofs + 1 != expr_end && t->type != T_LPAREN)
6096 ds_put_byte (out, ' ');
6102 put_title_text (struct string *out, struct substring in, time_t now,
6103 struct lexer *lexer, struct dictionary *dict,
6104 int expr_start, int expr_end)
6108 size_t chunk = ss_find_byte (in, ')');
6109 ds_put_substring (out, ss_head (in, chunk));
6110 ss_advance (&in, chunk);
6111 if (ss_is_empty (in))
6114 if (skip_prefix (&in, ss_cstr (")DATE")))
6115 put_strftime (out, now, "%x");
6116 else if (skip_prefix (&in, ss_cstr (")TIME")))
6117 put_strftime (out, now, "%X");
6118 else if (skip_prefix (&in, ss_cstr (")TABLE")))
6119 put_table_expression (out, lexer, dict, expr_start, expr_end);
6122 ds_put_byte (out, ')');
6123 ss_advance (&in, 1);
6129 cmd_ctables (struct lexer *lexer, struct dataset *ds)
6131 struct casereader *input = NULL;
6133 struct measure_guesser *mg = measure_guesser_create (ds);
6136 input = proc_open (ds);
6137 measure_guesser_run (mg, input);
6138 measure_guesser_destroy (mg);
6141 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6142 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
6143 enum settings_value_show tvars = settings_get_show_variables ();
6144 for (size_t i = 0; i < n_vars; i++)
6145 vlabels[i] = (enum ctables_vlabel) tvars;
6147 struct pivot_table_look *look = pivot_table_look_unshare (
6148 pivot_table_look_ref (pivot_table_look_get_default ()));
6149 look->omit_empty = false;
6151 struct ctables *ct = xmalloc (sizeof *ct);
6152 *ct = (struct ctables) {
6153 .dict = dataset_dict (ds),
6155 .ctables_formats = FMT_SETTINGS_INIT,
6157 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
6160 time_t now = time (NULL);
6165 const char *dot_string;
6166 const char *comma_string;
6168 static const struct ctf ctfs[4] = {
6169 { CTEF_NEGPAREN, "(,,,)", "(...)" },
6170 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
6171 { CTEF_PAREN, "-,(,),", "-.(.)." },
6172 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
6174 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
6175 for (size_t i = 0; i < 4; i++)
6177 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
6178 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
6179 fmt_number_style_from_string (s));
6182 if (!lex_force_match (lexer, T_SLASH))
6185 while (!lex_match_id (lexer, "TABLE"))
6187 if (lex_match_id (lexer, "FORMAT"))
6189 double widths[2] = { SYSMIS, SYSMIS };
6190 double units_per_inch = 72.0;
6192 while (lex_token (lexer) != T_SLASH)
6194 if (lex_match_id (lexer, "MINCOLWIDTH"))
6196 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
6199 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
6201 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
6204 else if (lex_match_id (lexer, "UNITS"))
6206 lex_match (lexer, T_EQUALS);
6207 if (lex_match_id (lexer, "POINTS"))
6208 units_per_inch = 72.0;
6209 else if (lex_match_id (lexer, "INCHES"))
6210 units_per_inch = 1.0;
6211 else if (lex_match_id (lexer, "CM"))
6212 units_per_inch = 2.54;
6215 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
6219 else if (lex_match_id (lexer, "EMPTY"))
6224 lex_match (lexer, T_EQUALS);
6225 if (lex_match_id (lexer, "ZERO"))
6227 /* Nothing to do. */
6229 else if (lex_match_id (lexer, "BLANK"))
6230 ct->zero = xstrdup ("");
6231 else if (lex_force_string (lexer))
6233 ct->zero = ss_xstrdup (lex_tokss (lexer));
6239 else if (lex_match_id (lexer, "MISSING"))
6241 lex_match (lexer, T_EQUALS);
6242 if (!lex_force_string (lexer))
6246 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
6247 ? ss_xstrdup (lex_tokss (lexer))
6253 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
6254 "UNITS", "EMPTY", "MISSING");
6259 if (widths[0] != SYSMIS && widths[1] != SYSMIS
6260 && widths[0] > widths[1])
6262 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
6266 for (size_t i = 0; i < 2; i++)
6267 if (widths[i] != SYSMIS)
6269 int *wr = ct->look->width_ranges[TABLE_HORZ];
6270 wr[i] = widths[i] / units_per_inch * 96.0;
6275 else if (lex_match_id (lexer, "VLABELS"))
6277 if (!lex_force_match_id (lexer, "VARIABLES"))
6279 lex_match (lexer, T_EQUALS);
6281 struct variable **vars;
6283 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
6287 if (!lex_force_match_id (lexer, "DISPLAY"))
6292 lex_match (lexer, T_EQUALS);
6294 enum ctables_vlabel vlabel;
6295 if (lex_match_id (lexer, "DEFAULT"))
6296 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
6297 else if (lex_match_id (lexer, "NAME"))
6299 else if (lex_match_id (lexer, "LABEL"))
6300 vlabel = CTVL_LABEL;
6301 else if (lex_match_id (lexer, "BOTH"))
6303 else if (lex_match_id (lexer, "NONE"))
6307 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
6313 for (size_t i = 0; i < n_vars; i++)
6314 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
6317 else if (lex_match_id (lexer, "MRSETS"))
6319 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
6321 lex_match (lexer, T_EQUALS);
6322 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
6325 else if (lex_match_id (lexer, "SMISSING"))
6327 if (lex_match_id (lexer, "VARIABLE"))
6328 ct->smissing_listwise = false;
6329 else if (lex_match_id (lexer, "LISTWISE"))
6330 ct->smissing_listwise = true;
6333 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
6337 else if (lex_match_id (lexer, "PCOMPUTE"))
6339 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
6342 else if (lex_match_id (lexer, "PPROPERTIES"))
6344 if (!ctables_parse_pproperties (lexer, ct))
6347 else if (lex_match_id (lexer, "WEIGHT"))
6349 if (!lex_force_match_id (lexer, "VARIABLE"))
6351 lex_match (lexer, T_EQUALS);
6352 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
6356 else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
6358 if (lex_match_id (lexer, "COUNT"))
6360 lex_match (lexer, T_EQUALS);
6361 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
6364 ct->hide_threshold = lex_integer (lexer);
6367 else if (ct->hide_threshold == 0)
6368 ct->hide_threshold = 5;
6372 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
6373 "SMISSING", "PCOMPUTE", "PPROPERTIES",
6374 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
6378 if (!lex_force_match (lexer, T_SLASH))
6382 size_t allocated_tables = 0;
6385 if (ct->n_tables >= allocated_tables)
6386 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
6387 sizeof *ct->tables);
6389 struct ctables_category *cat = xmalloc (sizeof *cat);
6390 *cat = (struct ctables_category) {
6392 .include_missing = false,
6393 .sort_ascending = true,
6396 struct ctables_categories *c = xmalloc (sizeof *c);
6397 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
6398 *c = (struct ctables_categories) {
6405 struct ctables_categories **categories = xnmalloc (n_vars,
6406 sizeof *categories);
6407 for (size_t i = 0; i < n_vars; i++)
6410 struct ctables_table *t = xmalloc (sizeof *t);
6411 *t = (struct ctables_table) {
6413 .slabels_axis = PIVOT_AXIS_COLUMN,
6414 .slabels_visible = true,
6415 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
6417 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
6418 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
6419 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
6421 .clabels_from_axis = PIVOT_AXIS_LAYER,
6422 .categories = categories,
6423 .n_categories = n_vars,
6426 ct->tables[ct->n_tables++] = t;
6428 lex_match (lexer, T_EQUALS);
6429 int expr_start = lex_ofs (lexer);
6430 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
6432 if (lex_match (lexer, T_BY))
6434 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6435 ct, t, PIVOT_AXIS_COLUMN))
6438 if (lex_match (lexer, T_BY))
6440 if (!ctables_axis_parse (lexer, dataset_dict (ds),
6441 ct, t, PIVOT_AXIS_LAYER))
6445 int expr_end = lex_ofs (lexer);
6447 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
6448 && !t->axes[PIVOT_AXIS_LAYER])
6450 lex_error (lexer, _("At least one variable must be specified."));
6454 const struct ctables_axis *scales[PIVOT_N_AXES];
6455 size_t n_scales = 0;
6456 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6458 scales[a] = find_scale (t->axes[a]);
6464 msg (SE, _("Scale variables may appear only on one axis."));
6465 if (scales[PIVOT_AXIS_ROW])
6466 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
6467 _("This scale variable appears on the rows axis."));
6468 if (scales[PIVOT_AXIS_COLUMN])
6469 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
6470 _("This scale variable appears on the columns axis."));
6471 if (scales[PIVOT_AXIS_LAYER])
6472 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
6473 _("This scale variable appears on the layer axis."));
6477 const struct ctables_axis *summaries[PIVOT_N_AXES];
6478 size_t n_summaries = 0;
6479 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6481 summaries[a] = (scales[a]
6483 : find_categorical_summary_spec (t->axes[a]));
6487 if (n_summaries > 1)
6489 msg (SE, _("Summaries may appear only on one axis."));
6490 if (summaries[PIVOT_AXIS_ROW])
6491 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
6492 _("This variable on the rows axis has a summary."));
6493 if (summaries[PIVOT_AXIS_COLUMN])
6494 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
6495 _("This variable on the columns axis has a summary."));
6496 if (summaries[PIVOT_AXIS_LAYER])
6497 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
6498 _("This variable on the layers axis has a summary."));
6501 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6502 if (n_summaries ? summaries[a] : t->axes[a])
6504 t->summary_axis = a;
6508 if (lex_token (lexer) == T_ENDCMD)
6510 if (!ctables_prepare_table (t))
6514 if (!lex_force_match (lexer, T_SLASH))
6517 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6519 if (lex_match_id (lexer, "SLABELS"))
6521 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6523 if (lex_match_id (lexer, "POSITION"))
6525 lex_match (lexer, T_EQUALS);
6526 if (lex_match_id (lexer, "COLUMN"))
6527 t->slabels_axis = PIVOT_AXIS_COLUMN;
6528 else if (lex_match_id (lexer, "ROW"))
6529 t->slabels_axis = PIVOT_AXIS_ROW;
6530 else if (lex_match_id (lexer, "LAYER"))
6531 t->slabels_axis = PIVOT_AXIS_LAYER;
6534 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6538 else if (lex_match_id (lexer, "VISIBLE"))
6540 lex_match (lexer, T_EQUALS);
6541 if (!parse_bool (lexer, &t->slabels_visible))
6546 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6551 else if (lex_match_id (lexer, "CLABELS"))
6553 if (lex_match_id (lexer, "AUTO"))
6555 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6556 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6558 else if (lex_match_id (lexer, "ROWLABELS"))
6560 lex_match (lexer, T_EQUALS);
6561 if (lex_match_id (lexer, "OPPOSITE"))
6562 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6563 else if (lex_match_id (lexer, "LAYER"))
6564 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6567 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6571 else if (lex_match_id (lexer, "COLLABELS"))
6573 lex_match (lexer, T_EQUALS);
6574 if (lex_match_id (lexer, "OPPOSITE"))
6575 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6576 else if (lex_match_id (lexer, "LAYER"))
6577 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6580 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6586 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6591 else if (lex_match_id (lexer, "CRITERIA"))
6593 if (!lex_force_match_id (lexer, "CILEVEL"))
6595 lex_match (lexer, T_EQUALS);
6597 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6599 t->cilevel = lex_number (lexer);
6602 else if (lex_match_id (lexer, "CATEGORIES"))
6604 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6608 else if (lex_match_id (lexer, "TITLES"))
6613 if (lex_match_id (lexer, "CAPTION"))
6614 textp = &t->caption;
6615 else if (lex_match_id (lexer, "CORNER"))
6617 else if (lex_match_id (lexer, "TITLE"))
6621 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6624 lex_match (lexer, T_EQUALS);
6626 struct string s = DS_EMPTY_INITIALIZER;
6627 while (lex_is_string (lexer))
6629 if (!ds_is_empty (&s))
6630 ds_put_byte (&s, ' ');
6631 put_title_text (&s, lex_tokss (lexer), now,
6632 lexer, dataset_dict (ds),
6633 expr_start, expr_end);
6637 *textp = ds_steal_cstr (&s);
6639 while (lex_token (lexer) != T_SLASH
6640 && lex_token (lexer) != T_ENDCMD);
6642 else if (lex_match_id (lexer, "SIGTEST"))
6646 t->chisq = xmalloc (sizeof *t->chisq);
6647 *t->chisq = (struct ctables_chisq) {
6649 .include_mrsets = true,
6650 .all_visible = true,
6656 if (lex_match_id (lexer, "TYPE"))
6658 lex_match (lexer, T_EQUALS);
6659 if (!lex_force_match_id (lexer, "CHISQUARE"))
6662 else if (lex_match_id (lexer, "ALPHA"))
6664 lex_match (lexer, T_EQUALS);
6665 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6667 t->chisq->alpha = lex_number (lexer);
6670 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6672 lex_match (lexer, T_EQUALS);
6673 if (!parse_bool (lexer, &t->chisq->include_mrsets))
6676 else if (lex_match_id (lexer, "CATEGORIES"))
6678 lex_match (lexer, T_EQUALS);
6679 if (lex_match_id (lexer, "ALLVISIBLE"))
6680 t->chisq->all_visible = true;
6681 else if (lex_match_id (lexer, "SUBTOTALS"))
6682 t->chisq->all_visible = false;
6685 lex_error_expecting (lexer,
6686 "ALLVISIBLE", "SUBTOTALS");
6692 lex_error_expecting (lexer, "TYPE", "ALPHA",
6693 "INCLUDEMRSETS", "CATEGORIES");
6697 while (lex_token (lexer) != T_SLASH
6698 && lex_token (lexer) != T_ENDCMD);
6700 else if (lex_match_id (lexer, "COMPARETEST"))
6704 t->pairwise = xmalloc (sizeof *t->pairwise);
6705 *t->pairwise = (struct ctables_pairwise) {
6707 .alpha = { .05, .05 },
6708 .adjust = BONFERRONI,
6709 .include_mrsets = true,
6710 .meansvariance_allcats = true,
6711 .all_visible = true,
6720 if (lex_match_id (lexer, "TYPE"))
6722 lex_match (lexer, T_EQUALS);
6723 if (lex_match_id (lexer, "PROP"))
6724 t->pairwise->type = PROP;
6725 else if (lex_match_id (lexer, "MEAN"))
6726 t->pairwise->type = MEAN;
6729 lex_error_expecting (lexer, "PROP", "MEAN");
6733 else if (lex_match_id (lexer, "ALPHA"))
6735 lex_match (lexer, T_EQUALS);
6737 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6739 double a0 = lex_number (lexer);
6742 lex_match (lexer, T_COMMA);
6743 if (lex_is_number (lexer))
6745 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6747 double a1 = lex_number (lexer);
6750 t->pairwise->alpha[0] = MIN (a0, a1);
6751 t->pairwise->alpha[1] = MAX (a0, a1);
6754 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6756 else if (lex_match_id (lexer, "ADJUST"))
6758 lex_match (lexer, T_EQUALS);
6759 if (lex_match_id (lexer, "BONFERRONI"))
6760 t->pairwise->adjust = BONFERRONI;
6761 else if (lex_match_id (lexer, "BH"))
6762 t->pairwise->adjust = BH;
6763 else if (lex_match_id (lexer, "NONE"))
6764 t->pairwise->adjust = 0;
6767 lex_error_expecting (lexer, "BONFERRONI", "BH",
6772 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6774 lex_match (lexer, T_EQUALS);
6775 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6778 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6780 lex_match (lexer, T_EQUALS);
6781 if (lex_match_id (lexer, "ALLCATS"))
6782 t->pairwise->meansvariance_allcats = true;
6783 else if (lex_match_id (lexer, "TESTEDCATS"))
6784 t->pairwise->meansvariance_allcats = false;
6787 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6791 else if (lex_match_id (lexer, "CATEGORIES"))
6793 lex_match (lexer, T_EQUALS);
6794 if (lex_match_id (lexer, "ALLVISIBLE"))
6795 t->pairwise->all_visible = true;
6796 else if (lex_match_id (lexer, "SUBTOTALS"))
6797 t->pairwise->all_visible = false;
6800 lex_error_expecting (lexer, "ALLVISIBLE",
6805 else if (lex_match_id (lexer, "MERGE"))
6807 lex_match (lexer, T_EQUALS);
6808 if (!parse_bool (lexer, &t->pairwise->merge))
6811 else if (lex_match_id (lexer, "STYLE"))
6813 lex_match (lexer, T_EQUALS);
6814 if (lex_match_id (lexer, "APA"))
6815 t->pairwise->apa_style = true;
6816 else if (lex_match_id (lexer, "SIMPLE"))
6817 t->pairwise->apa_style = false;
6820 lex_error_expecting (lexer, "APA", "SIMPLE");
6824 else if (lex_match_id (lexer, "SHOWSIG"))
6826 lex_match (lexer, T_EQUALS);
6827 if (!parse_bool (lexer, &t->pairwise->show_sig))
6832 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6833 "INCLUDEMRSETS", "MEANSVARIANCE",
6834 "CATEGORIES", "MERGE", "STYLE",
6839 while (lex_token (lexer) != T_SLASH
6840 && lex_token (lexer) != T_ENDCMD);
6844 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6845 "CRITERIA", "CATEGORIES", "TITLES",
6846 "SIGTEST", "COMPARETEST");
6850 if (!lex_match (lexer, T_SLASH))
6854 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6855 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6857 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6861 if (!ctables_prepare_table (t))
6864 while (lex_token (lexer) != T_ENDCMD);
6867 input = proc_open (ds);
6868 bool ok = ctables_execute (ds, input, ct);
6869 ok = proc_commit (ds) && ok;
6871 ctables_destroy (ct);
6872 return ok ? CMD_SUCCESS : CMD_FAILURE;
6877 ctables_destroy (ct);