1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casereader.h"
23 #include "data/casewriter.h"
24 #include "data/data-in.h"
25 #include "data/data-out.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/mrset.h"
29 #include "data/subcase.h"
30 #include "data/value-labels.h"
31 #include "language/command.h"
32 #include "language/lexer/format-parser.h"
33 #include "language/lexer/lexer.h"
34 #include "language/lexer/token.h"
35 #include "language/lexer/variable-parser.h"
36 #include "libpspp/array.h"
37 #include "libpspp/assertion.h"
38 #include "libpspp/hash-functions.h"
39 #include "libpspp/hmap.h"
40 #include "libpspp/i18n.h"
41 #include "libpspp/message.h"
42 #include "libpspp/string-array.h"
43 #include "math/mode.h"
44 #include "math/moments.h"
45 #include "math/percentiles.h"
46 #include "math/sort.h"
47 #include "output/pivot-table.h"
49 #include "gl/minmax.h"
50 #include "gl/xalloc.h"
53 #define _(msgid) gettext (msgid)
54 #define N_(msgid) (msgid)
58 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
59 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
60 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
61 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
65 - unweighted summaries (U*)
66 - lower confidence limits (*.LCL)
67 - upper confidence limits (*.UCL)
68 - standard error (*.SE)
71 /* All variables. */ \
72 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
73 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
74 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
75 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
76 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
94 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
96 /* All variables (unweighted.) */ \
97 S(CTSF_UCOUNT, "UCOUNT", N_("Unweighted Count"), CTF_COUNT, CTFA_ALL) \
98 S(CTSF_UROWPCT_COUNT, "UROWPCT.COUNT", N_("Unweighted Row %"), CTF_PERCENT, CTFA_ALL) \
99 S(CTSF_UCOLPCT_COUNT, "UCOLPCT.COUNT", N_("Unweighted Column %"), CTF_PERCENT, CTFA_ALL) \
100 S(CTSF_UTABLEPCT_COUNT, "UTABLEPCT.COUNT", N_("Unweighted Table %"), CTF_PERCENT, CTFA_ALL) \
101 S(CTSF_USUBTABLEPCT_COUNT, "USUBTABLEPCT.COUNT", N_("Unweighted Subtable %"), CTF_PERCENT, CTFA_ALL) \
102 S(CTSF_ULAYERPCT_COUNT, "ULAYERPCT.COUNT", N_("Unweighted Layer %"), CTF_PERCENT, CTFA_ALL) \
103 S(CTSF_ULAYERROWPCT_COUNT, "ULAYERROWPCT.COUNT", N_("Unweighted Layer Row %"), CTF_PERCENT, CTFA_ALL) \
104 S(CTSF_ULAYERCOLPCT_COUNT, "ULAYERCOLPCT.COUNT", N_("Unweighted Layer Column %"), CTF_PERCENT, CTFA_ALL) \
105 S(CTSF_UROWPCT_VALIDN, "UROWPCT.VALIDN", N_("Unweighted Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
106 S(CTSF_UCOLPCT_VALIDN, "UCOLPCT.VALIDN", N_("Unweighted Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
107 S(CTSF_UTABLEPCT_VALIDN, "UTABLEPCT.VALIDN", N_("Unweighted Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
108 S(CTSF_USUBTABLEPCT_VALIDN, "USUBTABLEPCT.VALIDN", N_("Unweighted Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
109 S(CTSF_ULAYERPCT_VALIDN, "ULAYERPCT.VALIDN", N_("Unweighted Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
110 S(CTSF_ULAYERROWPCT_VALIDN, "ULAYERROWPCT.VALIDN", N_("Unweighted Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
111 S(CTSF_ULAYERCOLPCT_VALIDN, "ULAYERCOLPCT.VALIDN", N_("Unweighted Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
112 S(CTSF_UROWPCT_TOTALN, "UROWPCT.TOTALN", N_("Unweighted Row Total N %"), CTF_PERCENT, CTFA_ALL) \
113 S(CTSF_UCOLPCT_TOTALN, "UCOLPCT.TOTALN", N_("Unweighted Column Total N %"), CTF_PERCENT, CTFA_ALL) \
114 S(CTSF_UTABLEPCT_TOTALN, "UTABLEPCT.TOTALN", N_("Unweighted Table Total N %"), CTF_PERCENT, CTFA_ALL) \
115 S(CTSF_USUBTABLEPCT_TOTALN, "USUBTABLEPCT.TOTALN", N_("Unweighted Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
116 S(CTSF_ULAYERPCT_TOTALN, "ULAYERPCT.TOTALN", N_("Unweighted Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
117 S(CTSF_ULAYERROWPCT_TOTALN, "ULAYERROWPCT.TOTALN", N_("Unweighted Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
118 S(CTSF_ULAYERCOLPCT_TOTALN, "ULAYERCOLPCT.TOTALN", N_("Unweighted Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
120 /* Scale variables, totals, and subtotals. */ \
121 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
122 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
123 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
124 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
125 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
126 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
127 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
128 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
129 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
130 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
131 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
132 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
133 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
134 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
135 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
136 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
137 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
138 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
139 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
140 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
141 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
142 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
143 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
145 /* Scale variables, totals, and subtotals (unweighted). */ \
146 S(CTSF_UMEAN, "UMEAN", N_("Unweighted Mean"), CTF_GENERAL, CTFA_SCALE) \
147 S(CTSF_UMEDIAN, "UMEDIAN", N_("Unweighted Median"), CTF_GENERAL, CTFA_SCALE) \
148 S(CTSF_UMISSING, "UMISSING", N_("Unweighted Missing"), CTF_GENERAL, CTFA_SCALE) \
149 S(CTSF_UMODE, "UMODE", N_("Unweighted Mode"), CTF_GENERAL, CTFA_SCALE) \
150 S(CTSF_UPTILE, "UPTILE", N_("Unweighted Percentile"), CTF_GENERAL, CTFA_SCALE) \
151 S(CTSF_USEMEAN, "USEMEAN", N_("Unweighted Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
152 S(CTSF_USTDDEV, "USTDDEV", N_("Unweighted Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
153 S(CTSF_USUM, "USUM", N_("Unweighted Sum"), CTF_GENERAL, CTFA_SCALE) \
154 S(CSTF_UTOTALN, "UTOTALN", N_("Unweighted Total N"), CTF_COUNT, CTFA_SCALE) \
155 S(CTSF_UVALIDN, "UVALIDN", N_("Unweighted Valid N"), CTF_COUNT, CTFA_SCALE) \
156 S(CTSF_UVARIANCE, "UVARIANCE", N_("Unweighted Variance"), CTF_GENERAL, CTFA_SCALE) \
157 S(CTSF_UROWPCT_SUM, "UROWPCT.SUM", N_("Unweighted Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
158 S(CTSF_UCOLPCT_SUM, "UCOLPCT.SUM", N_("Unweighted Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
159 S(CTSF_UTABLEPCT_SUM, "UTABLEPCT.SUM", N_("Unweighted Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
160 S(CTSF_USUBTABLEPCT_SUM, "USUBTABLEPCT.SUM", N_("Unweighted Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
161 S(CTSF_ULAYERPCT_SUM, "ULAYERPCT.SUM", N_("Unweighted Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
162 S(CTSF_ULAYERROWPCT_SUM, "ULAYERROWPCT.SUM", N_("Unweighted Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
163 S(CTSF_ULAYERCOLPCT_SUM, "ULAYERCOLPCT.SUM", N_("Unweighted Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
165 #if 0 /* Multiple response sets not yet implemented. */
166 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
167 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
168 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
169 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
170 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
171 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
172 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
173 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
174 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
175 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
176 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
177 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
178 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
179 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
180 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
181 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
182 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
183 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
184 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
185 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
186 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
187 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
190 enum ctables_summary_function
192 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
198 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
199 N_CTSF_FUNCTIONS = SUMMARIES
203 static bool ctables_summary_function_is_count (enum ctables_summary_function);
205 enum ctables_domain_type
207 /* Within a section, where stacked variables divide one section from
209 CTDT_TABLE, /* All layers of a whole section. */
210 CTDT_LAYER, /* One layer within a section. */
211 CTDT_LAYERROW, /* Row in one layer within a section. */
212 CTDT_LAYERCOL, /* Column in one layer within a section. */
214 /* Within a subtable, where a subtable pairs an innermost row variable with
215 an innermost column variable within a single layer. */
216 CTDT_SUBTABLE, /* Whole subtable. */
217 CTDT_ROW, /* Row within a subtable. */
218 CTDT_COL, /* Column within a subtable. */
222 struct ctables_domain
224 struct hmap_node node;
226 const struct ctables_cell *example;
228 double d_valid; /* Dictionary weight. */
231 double e_valid; /* Effective weight */
234 double u_valid; /* Unweighted. */
239 enum ctables_summary_variant
248 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
249 all the axes (except the scalar variable, if any). */
250 struct hmap_node node;
252 /* The domains that contain this cell. */
253 uint32_t omit_domains;
254 struct ctables_domain *domains[N_CTDTS];
259 enum ctables_summary_variant sv;
261 struct ctables_cell_axis
263 struct ctables_cell_value
265 const struct ctables_category *category;
273 union ctables_summary *summaries;
280 const struct dictionary *dict;
281 struct pivot_table_look *look;
283 /* CTABLES has a number of extra formats that we implement via custom
284 currency specifications on an alternate fmt_settings. */
285 #define CTEF_NEGPAREN FMT_CCA
286 #define CTEF_NEQUAL FMT_CCB
287 #define CTEF_PAREN FMT_CCC
288 #define CTEF_PCTPAREN FMT_CCD
289 struct fmt_settings ctables_formats;
291 /* If this is NULL, zeros are displayed using the normal print format.
292 Otherwise, this string is displayed. */
295 /* If this is NULL, missing values are displayed using the normal print
296 format. Otherwise, this string is displayed. */
299 /* Indexed by variable dictionary index. */
300 enum ctables_vlabel *vlabels;
302 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
304 bool mrsets_count_duplicates; /* MRSETS. */
305 bool smissing_listwise; /* SMISSING. */
306 struct variable *e_weight; /* WEIGHT. */
307 int hide_threshold; /* HIDESMALLCOUNTS. */
309 struct ctables_table **tables;
313 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
316 struct ctables_postcompute
318 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
319 char *name; /* Name, without leading &. */
321 struct msg_location *location; /* Location of definition. */
322 struct ctables_pcexpr *expr;
324 struct ctables_summary_spec_set *specs;
325 bool hide_source_cats;
328 struct ctables_pcexpr
338 enum ctables_postcompute_op
341 CTPO_CONSTANT, /* 5 */
342 CTPO_CAT_NUMBER, /* [5] */
343 CTPO_CAT_STRING, /* ["STRING"] */
344 CTPO_CAT_NRANGE, /* [LO THRU 5] */
345 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
346 CTPO_CAT_MISSING, /* MISSING */
347 CTPO_CAT_OTHERNM, /* OTHERNM */
348 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
349 CTPO_CAT_TOTAL, /* TOTAL */
363 /* CTPO_CAT_NUMBER. */
366 /* CTPO_CAT_STRING, in dictionary encoding. */
367 struct substring string;
369 /* CTPO_CAT_NRANGE. */
372 /* CTPO_CAT_SRANGE. */
373 struct substring srange[2];
375 /* CTPO_CAT_SUBTOTAL. */
376 size_t subtotal_index;
378 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
379 One element: CTPO_NEG. */
380 struct ctables_pcexpr *subs[2];
383 /* Source location. */
384 struct msg_location *location;
387 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
388 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
389 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
390 struct ctables_pcexpr *sub1);
392 struct ctables_summary_spec_set
394 struct ctables_summary_spec *specs;
398 /* The variable to which the summary specs are applied. */
399 struct variable *var;
401 /* Whether the variable to which the summary specs are applied is a scale
402 variable for the purpose of summarization.
404 (VALIDN and TOTALN act differently for summarizing scale and categorical
408 /* If any of these optional additional scale variables are missing, then
409 treat 'var' as if it's missing too. This is for implementing
410 SMISSING=LISTWISE. */
411 struct variable **listwise_vars;
412 size_t n_listwise_vars;
415 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
416 const struct ctables_summary_spec_set *);
417 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
419 /* A nested sequence of variables, e.g. a > b > c. */
422 struct variable **vars;
425 size_t *domains[N_CTDTS];
426 size_t n_domains[N_CTDTS];
429 struct ctables_summary_spec_set specs[N_CSVS];
432 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
435 struct ctables_nest *nests;
441 struct hmap_node node;
446 struct ctables_occurrence
448 struct hmap_node node;
452 struct ctables_section
454 struct ctables_table *table;
455 struct ctables_nest *nests[PIVOT_N_AXES];
456 struct hmap *occurrences[PIVOT_N_AXES];
457 struct hmap cells; /* Contains "struct ctable_cell"s. */
458 struct hmap domains[N_CTDTS]; /* Contains "struct ctable_domain"s. */
463 struct ctables *ctables;
464 struct ctables_axis *axes[PIVOT_N_AXES];
465 struct ctables_stack stacks[PIVOT_N_AXES];
466 struct ctables_section *sections;
468 enum pivot_axis_type summary_axis;
469 struct ctables_summary_spec_set summary_specs;
471 const struct variable *clabels_example;
472 struct hmap clabels_values_map;
473 struct ctables_value **clabels_values;
474 size_t n_clabels_values;
476 enum pivot_axis_type slabels_axis;
477 bool slabels_visible;
479 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
481 Most commonly, label_axis[a] == a, and in particular we always have
482 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
484 If ROWLABELS or COLLABELS is specified, then one of
485 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
486 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
488 enum pivot_axis_type label_axis[PIVOT_N_AXES];
489 enum pivot_axis_type clabels_from_axis;
491 /* Indexed by variable dictionary index. */
492 struct ctables_categories **categories;
501 struct ctables_chisq *chisq;
502 struct ctables_pairwise *pairwise;
505 struct ctables_categories
508 struct ctables_category *cats;
513 struct ctables_category
515 enum ctables_category_type
517 /* Explicit category lists. */
520 CCT_NRANGE, /* Numerical range. */
521 CCT_SRANGE, /* String range. */
526 /* Totals and subtotals. */
530 /* Implicit category lists. */
535 /* For contributing to TOTALN. */
536 CCT_EXCLUDED_MISSING,
540 struct ctables_category *subtotal;
546 double number; /* CCT_NUMBER. */
547 struct substring string; /* CCT_STRING, in dictionary encoding. */
548 double nrange[2]; /* CCT_NRANGE. */
549 struct substring srange[2]; /* CCT_SRANGE. */
553 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
554 bool hide_subcategories; /* CCT_SUBTOTAL. */
557 const struct ctables_postcompute *pc; /* CCT_POSTCOMPUTE. */
559 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
562 bool include_missing;
566 enum ctables_summary_function sort_function;
567 struct variable *sort_var;
572 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
573 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
574 struct msg_location *location;
578 ctables_category_uninit (struct ctables_category *cat)
589 case CCT_POSTCOMPUTE:
593 ss_dealloc (&cat->string);
597 ss_dealloc (&cat->srange[0]);
598 ss_dealloc (&cat->srange[1]);
603 free (cat->total_label);
611 case CCT_EXCLUDED_MISSING:
617 nullable_substring_equal (const struct substring *a,
618 const struct substring *b)
620 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
624 ctables_category_equal (const struct ctables_category *a,
625 const struct ctables_category *b)
627 if (a->type != b->type)
633 return a->number == b->number;
636 return ss_equals (a->string, b->string);
639 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
642 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
643 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
649 case CCT_POSTCOMPUTE:
650 return a->pc == b->pc;
654 return !strcmp (a->total_label, b->total_label);
659 return (a->include_missing == b->include_missing
660 && a->sort_ascending == b->sort_ascending
661 && a->sort_function == b->sort_function
662 && a->sort_var == b->sort_var
663 && a->percentile == b->percentile);
665 case CCT_EXCLUDED_MISSING:
673 ctables_categories_unref (struct ctables_categories *c)
678 assert (c->n_refs > 0);
682 for (size_t i = 0; i < c->n_cats; i++)
683 ctables_category_uninit (&c->cats[i]);
689 ctables_categories_equal (const struct ctables_categories *a,
690 const struct ctables_categories *b)
692 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
695 for (size_t i = 0; i < a->n_cats; i++)
696 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
702 /* Chi-square test (SIGTEST). */
710 /* Pairwise comparison test (COMPARETEST). */
711 struct ctables_pairwise
713 enum { PROP, MEAN } type;
716 bool meansvariance_allcats;
718 enum { BONFERRONI = 1, BH } adjust;
742 struct variable *var;
744 struct ctables_summary_spec_set specs[N_CSVS];
748 struct ctables_axis *subs[2];
751 struct msg_location *loc;
754 static void ctables_axis_destroy (struct ctables_axis *);
763 enum ctables_function_availability
765 CTFA_ALL, /* Any variables. */
766 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
767 CTFA_MRSETS, /* Only multiple-response sets */
770 struct ctables_summary_spec
772 enum ctables_summary_function function;
773 double percentile; /* CTSF_PTILE only. */
776 struct fmt_spec format;
777 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
783 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
784 const struct ctables_summary_spec *src)
787 dst->label = xstrdup_if_nonnull (src->label);
791 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
798 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
799 const struct ctables_summary_spec_set *src)
801 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
802 for (size_t i = 0; i < src->n; i++)
803 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
805 *dst = (struct ctables_summary_spec_set) {
810 .is_scale = src->is_scale,
815 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
817 for (size_t i = 0; i < set->n; i++)
818 ctables_summary_spec_uninit (&set->specs[i]);
823 parse_col_width (struct lexer *lexer, const char *name, double *width)
825 lex_match (lexer, T_EQUALS);
826 if (lex_match_id (lexer, "DEFAULT"))
828 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
830 *width = lex_number (lexer);
840 parse_bool (struct lexer *lexer, bool *b)
842 if (lex_match_id (lexer, "NO"))
844 else if (lex_match_id (lexer, "YES"))
848 lex_error_expecting (lexer, "YES", "NO");
854 static enum ctables_function_availability
855 ctables_function_availability (enum ctables_summary_function f)
857 static enum ctables_function_availability availability[] = {
858 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
863 return availability[f];
867 ctables_summary_function_is_count (enum ctables_summary_function f)
873 case CTSF_ROWPCT_COUNT:
874 case CTSF_COLPCT_COUNT:
875 case CTSF_TABLEPCT_COUNT:
876 case CTSF_SUBTABLEPCT_COUNT:
877 case CTSF_LAYERPCT_COUNT:
878 case CTSF_LAYERROWPCT_COUNT:
879 case CTSF_LAYERCOLPCT_COUNT:
881 case CTSF_UROWPCT_COUNT:
882 case CTSF_UCOLPCT_COUNT:
883 case CTSF_UTABLEPCT_COUNT:
884 case CTSF_USUBTABLEPCT_COUNT:
885 case CTSF_ULAYERPCT_COUNT:
886 case CTSF_ULAYERROWPCT_COUNT:
887 case CTSF_ULAYERCOLPCT_COUNT:
890 case CTSF_ROWPCT_VALIDN:
891 case CTSF_COLPCT_VALIDN:
892 case CTSF_TABLEPCT_VALIDN:
893 case CTSF_SUBTABLEPCT_VALIDN:
894 case CTSF_LAYERPCT_VALIDN:
895 case CTSF_LAYERROWPCT_VALIDN:
896 case CTSF_LAYERCOLPCT_VALIDN:
897 case CTSF_ROWPCT_TOTALN:
898 case CTSF_COLPCT_TOTALN:
899 case CTSF_TABLEPCT_TOTALN:
900 case CTSF_SUBTABLEPCT_TOTALN:
901 case CTSF_LAYERPCT_TOTALN:
902 case CTSF_LAYERROWPCT_TOTALN:
903 case CTSF_LAYERCOLPCT_TOTALN:
920 case CTSF_ROWPCT_SUM:
921 case CTSF_COLPCT_SUM:
922 case CTSF_TABLEPCT_SUM:
923 case CTSF_SUBTABLEPCT_SUM:
924 case CTSF_LAYERPCT_SUM:
925 case CTSF_LAYERROWPCT_SUM:
926 case CTSF_LAYERCOLPCT_SUM:
927 case CTSF_UROWPCT_VALIDN:
928 case CTSF_UCOLPCT_VALIDN:
929 case CTSF_UTABLEPCT_VALIDN:
930 case CTSF_USUBTABLEPCT_VALIDN:
931 case CTSF_ULAYERPCT_VALIDN:
932 case CTSF_ULAYERROWPCT_VALIDN:
933 case CTSF_ULAYERCOLPCT_VALIDN:
934 case CTSF_UROWPCT_TOTALN:
935 case CTSF_UCOLPCT_TOTALN:
936 case CTSF_UTABLEPCT_TOTALN:
937 case CTSF_USUBTABLEPCT_TOTALN:
938 case CTSF_ULAYERPCT_TOTALN:
939 case CTSF_ULAYERROWPCT_TOTALN:
940 case CTSF_ULAYERCOLPCT_TOTALN:
952 case CTSF_UROWPCT_SUM:
953 case CTSF_UCOLPCT_SUM:
954 case CTSF_UTABLEPCT_SUM:
955 case CTSF_USUBTABLEPCT_SUM:
956 case CTSF_ULAYERPCT_SUM:
957 case CTSF_ULAYERROWPCT_SUM:
958 case CTSF_ULAYERCOLPCT_SUM:
966 parse_ctables_summary_function (struct lexer *lexer,
967 enum ctables_summary_function *f)
971 enum ctables_summary_function function;
972 struct substring name;
974 static struct pair names[] = {
975 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
976 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
979 /* The .COUNT suffix may be omitted. */
980 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
981 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
982 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
983 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
984 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
985 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
986 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
990 if (!lex_force_id (lexer))
993 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
994 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
996 *f = names[i].function;
1001 lex_error (lexer, _("Expecting summary function name."));
1006 ctables_axis_destroy (struct ctables_axis *axis)
1014 for (size_t i = 0; i < N_CSVS; i++)
1015 ctables_summary_spec_set_uninit (&axis->specs[i]);
1020 ctables_axis_destroy (axis->subs[0]);
1021 ctables_axis_destroy (axis->subs[1]);
1024 msg_location_destroy (axis->loc);
1028 static struct ctables_axis *
1029 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1030 struct ctables_axis *sub0,
1031 struct ctables_axis *sub1,
1032 struct lexer *lexer, int start_ofs)
1034 struct ctables_axis *axis = xmalloc (sizeof *axis);
1035 *axis = (struct ctables_axis) {
1037 .subs = { sub0, sub1 },
1038 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1043 struct ctables_axis_parse_ctx
1045 struct lexer *lexer;
1046 struct dictionary *dict;
1048 struct ctables_table *t;
1051 static struct fmt_spec
1052 ctables_summary_default_format (enum ctables_summary_function function,
1053 const struct variable *var)
1055 static const enum ctables_format default_formats[] = {
1056 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
1060 switch (default_formats[function])
1063 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
1066 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
1069 return *var_get_print_format (var);
1076 static struct pivot_value *
1077 ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
1081 static const char *default_labels[] = {
1082 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
1087 return (spec->function == CTSF_PTILE
1088 ? pivot_value_new_text_format (N_("Percentile %.2f"),
1090 : pivot_value_new_text (default_labels[spec->function]));
1094 struct substring in = ss_cstr (spec->label);
1095 struct substring target = ss_cstr (")CILEVEL");
1097 struct string out = DS_EMPTY_INITIALIZER;
1100 size_t chunk = ss_find_substring (in, target);
1101 ds_put_substring (&out, ss_head (in, chunk));
1102 ss_advance (&in, chunk);
1104 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
1106 ss_advance (&in, target.length);
1107 ds_put_format (&out, "%g", cilevel);
1113 ctables_summary_function_name (enum ctables_summary_function function)
1115 static const char *names[] = {
1116 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1120 return names[function];
1124 add_summary_spec (struct ctables_axis *axis,
1125 enum ctables_summary_function function, double percentile,
1126 const char *label, const struct fmt_spec *format,
1127 bool is_ctables_format, const struct msg_location *loc,
1128 enum ctables_summary_variant sv)
1130 if (axis->op == CTAO_VAR)
1132 const char *function_name = ctables_summary_function_name (function);
1133 const char *var_name = var_get_name (axis->var);
1134 switch (ctables_function_availability (function))
1137 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1138 "response sets."), function_name);
1139 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1148 _("Summary function %s applies only to scale variables."),
1150 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1161 struct ctables_summary_spec_set *set = &axis->specs[sv];
1162 if (set->n >= set->allocated)
1163 set->specs = x2nrealloc (set->specs, &set->allocated,
1164 sizeof *set->specs);
1166 struct ctables_summary_spec *dst = &set->specs[set->n++];
1167 *dst = (struct ctables_summary_spec) {
1168 .function = function,
1169 .percentile = percentile,
1170 .label = xstrdup_if_nonnull (label),
1171 .format = (format ? *format
1172 : ctables_summary_default_format (function, axis->var)),
1173 .is_ctables_format = is_ctables_format,
1179 for (size_t i = 0; i < 2; i++)
1180 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1181 format, is_ctables_format, loc, sv))
1187 static struct ctables_axis *ctables_axis_parse_stack (
1188 struct ctables_axis_parse_ctx *);
1191 static struct ctables_axis *
1192 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1194 if (lex_match (ctx->lexer, T_LPAREN))
1196 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1197 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1199 ctables_axis_destroy (sub);
1205 if (!lex_force_id (ctx->lexer))
1208 int start_ofs = lex_ofs (ctx->lexer);
1209 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1213 struct ctables_axis *axis = xmalloc (sizeof *axis);
1214 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1216 /* XXX should figure out default measures by reading data */
1217 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1218 : lex_match_phrase (ctx->lexer, "[C]") ? false
1219 : var_get_measure (var) == MEASURE_SCALE);
1220 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1221 lex_ofs (ctx->lexer) - 1);
1222 if (axis->scale && var_is_alpha (var))
1224 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1226 var_get_name (var));
1227 ctables_axis_destroy (axis);
1235 has_digit (const char *s)
1237 return s[strcspn (s, "0123456789")] != '\0';
1241 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1242 bool *is_ctables_format)
1244 char type[FMT_TYPE_LEN_MAX + 1];
1245 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1248 if (!strcasecmp (type, "NEGPAREN"))
1249 format->type = CTEF_NEGPAREN;
1250 else if (!strcasecmp (type, "NEQUAL"))
1251 format->type = CTEF_NEQUAL;
1252 else if (!strcasecmp (type, "PAREN"))
1253 format->type = CTEF_PAREN;
1254 else if (!strcasecmp (type, "PCTPAREN"))
1255 format->type = CTEF_PCTPAREN;
1258 *is_ctables_format = false;
1259 return (parse_format_specifier (lexer, format)
1260 && fmt_check_output (format)
1261 && fmt_check_type_compat (format, VAL_NUMERIC));
1266 msg (SE, _("Output format %s requires width 2 or greater."), type);
1269 else if (format->d > format->w - 1)
1271 msg (SE, _("Output format %s requires width greater than decimals."),
1277 *is_ctables_format = true;
1282 static struct ctables_axis *
1283 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1285 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1286 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1289 enum ctables_summary_variant sv = CSV_CELL;
1292 int start_ofs = lex_ofs (ctx->lexer);
1294 /* Parse function. */
1295 enum ctables_summary_function function;
1296 if (!parse_ctables_summary_function (ctx->lexer, &function))
1299 /* Parse percentile. */
1300 double percentile = 0;
1301 if (function == CTSF_PTILE)
1303 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1305 percentile = lex_number (ctx->lexer);
1306 lex_get (ctx->lexer);
1311 if (lex_is_string (ctx->lexer))
1313 label = ss_xstrdup (lex_tokss (ctx->lexer));
1314 lex_get (ctx->lexer);
1318 struct fmt_spec format;
1319 const struct fmt_spec *formatp;
1320 bool is_ctables_format = false;
1321 if (lex_token (ctx->lexer) == T_ID
1322 && has_digit (lex_tokcstr (ctx->lexer)))
1324 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1325 &is_ctables_format))
1335 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1336 lex_ofs (ctx->lexer) - 1);
1337 add_summary_spec (sub, function, percentile, label, formatp,
1338 is_ctables_format, loc, sv);
1340 msg_location_destroy (loc);
1342 lex_match (ctx->lexer, T_COMMA);
1343 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1345 if (!lex_force_match (ctx->lexer, T_LBRACK))
1349 else if (lex_match (ctx->lexer, T_RBRACK))
1351 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1358 ctables_axis_destroy (sub);
1362 static const struct ctables_axis *
1363 find_scale (const struct ctables_axis *axis)
1367 else if (axis->op == CTAO_VAR)
1368 return axis->scale ? axis : NULL;
1371 for (size_t i = 0; i < 2; i++)
1373 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1381 static const struct ctables_axis *
1382 find_categorical_summary_spec (const struct ctables_axis *axis)
1386 else if (axis->op == CTAO_VAR)
1387 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1390 for (size_t i = 0; i < 2; i++)
1392 const struct ctables_axis *sum
1393 = find_categorical_summary_spec (axis->subs[i]);
1401 static struct ctables_axis *
1402 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1404 int start_ofs = lex_ofs (ctx->lexer);
1405 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1409 while (lex_match (ctx->lexer, T_GT))
1411 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1415 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1416 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1418 const struct ctables_axis *outer_scale = find_scale (lhs);
1419 const struct ctables_axis *inner_scale = find_scale (rhs);
1420 if (outer_scale && inner_scale)
1422 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1423 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1424 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1425 ctables_axis_destroy (nest);
1429 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1432 msg_at (SE, nest->loc,
1433 _("Summaries may only be requested for categorical variables "
1434 "at the innermost nesting level."));
1435 msg_at (SN, outer_sum->loc,
1436 _("This outer categorical variable has a summary."));
1437 ctables_axis_destroy (nest);
1447 static struct ctables_axis *
1448 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1450 int start_ofs = lex_ofs (ctx->lexer);
1451 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1455 while (lex_match (ctx->lexer, T_PLUS))
1457 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1461 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1462 ctx->lexer, start_ofs);
1469 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1470 struct ctables *ct, struct ctables_table *t,
1471 enum pivot_axis_type a)
1473 if (lex_token (lexer) == T_BY
1474 || lex_token (lexer) == T_SLASH
1475 || lex_token (lexer) == T_ENDCMD)
1478 struct ctables_axis_parse_ctx ctx = {
1484 t->axes[a] = ctables_axis_parse_stack (&ctx);
1485 return t->axes[a] != NULL;
1489 ctables_chisq_destroy (struct ctables_chisq *chisq)
1495 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1501 ctables_table_destroy (struct ctables_table *t)
1506 for (size_t i = 0; i < t->n_categories; i++)
1507 ctables_categories_unref (t->categories[i]);
1508 free (t->categories);
1510 ctables_axis_destroy (t->axes[PIVOT_AXIS_COLUMN]);
1511 ctables_axis_destroy (t->axes[PIVOT_AXIS_ROW]);
1512 ctables_axis_destroy (t->axes[PIVOT_AXIS_LAYER]);
1516 ctables_chisq_destroy (t->chisq);
1517 ctables_pairwise_destroy (t->pairwise);
1522 ctables_destroy (struct ctables *ct)
1527 pivot_table_look_unref (ct->look);
1531 for (size_t i = 0; i < ct->n_tables; i++)
1532 ctables_table_destroy (ct->tables[i]);
1537 static struct ctables_category
1538 cct_nrange (double low, double high)
1540 return (struct ctables_category) {
1542 .nrange = { low, high }
1546 static struct ctables_category
1547 cct_srange (struct substring low, struct substring high)
1549 return (struct ctables_category) {
1551 .srange = { low, high }
1556 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1557 struct ctables_category *cat)
1560 if (lex_match (lexer, T_EQUALS))
1562 if (!lex_force_string (lexer))
1565 total_label = ss_xstrdup (lex_tokss (lexer));
1569 total_label = xstrdup (_("Subtotal"));
1571 *cat = (struct ctables_category) {
1572 .type = CCT_SUBTOTAL,
1573 .hide_subcategories = hide_subcategories,
1574 .total_label = total_label
1579 static struct substring
1580 parse_substring (struct lexer *lexer, struct dictionary *dict)
1582 struct substring s = recode_substring_pool (
1583 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1584 ss_rtrim (&s, ss_cstr (" "));
1590 ctables_table_parse_explicit_category (struct lexer *lexer,
1591 struct dictionary *dict,
1593 struct ctables_category *cat)
1595 if (lex_match_id (lexer, "OTHERNM"))
1596 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1597 else if (lex_match_id (lexer, "MISSING"))
1598 *cat = (struct ctables_category) { .type = CCT_MISSING };
1599 else if (lex_match_id (lexer, "SUBTOTAL"))
1600 return ctables_table_parse_subtotal (lexer, false, cat);
1601 else if (lex_match_id (lexer, "HSUBTOTAL"))
1602 return ctables_table_parse_subtotal (lexer, true, cat);
1603 else if (lex_match_id (lexer, "LO"))
1605 if (!lex_force_match_id (lexer, "THRU"))
1607 if (lex_is_string (lexer))
1609 struct substring sr0 = { .string = NULL };
1610 struct substring sr1 = parse_substring (lexer, dict);
1611 *cat = cct_srange (sr0, sr1);
1613 else if (lex_force_num (lexer))
1615 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1621 else if (lex_is_number (lexer))
1623 double number = lex_number (lexer);
1625 if (lex_match_id (lexer, "THRU"))
1627 if (lex_match_id (lexer, "HI"))
1628 *cat = cct_nrange (number, DBL_MAX);
1631 if (!lex_force_num (lexer))
1633 *cat = cct_nrange (number, lex_number (lexer));
1638 *cat = (struct ctables_category) {
1643 else if (lex_is_string (lexer))
1645 struct substring s = parse_substring (lexer, dict);
1646 if (lex_match_id (lexer, "THRU"))
1648 if (lex_match_id (lexer, "HI"))
1650 struct substring sr1 = { .string = NULL };
1651 *cat = cct_srange (s, sr1);
1655 if (!lex_force_string (lexer))
1657 struct substring sr1 = parse_substring (lexer, dict);
1658 *cat = cct_srange (s, sr1);
1662 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1664 else if (lex_match (lexer, T_AND))
1666 if (!lex_force_id (lexer))
1668 struct ctables_postcompute *pc = ctables_find_postcompute (
1669 ct, lex_tokcstr (lexer));
1672 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1673 msg_at (SE, loc, _("Unknown postcompute &%s."),
1674 lex_tokcstr (lexer));
1675 msg_location_destroy (loc);
1680 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1684 lex_error (lexer, NULL);
1691 static struct ctables_category *
1692 ctables_find_category_for_postcompute (const struct ctables_categories *cats,
1693 const struct ctables_pcexpr *e)
1695 struct ctables_category *best = NULL;
1696 size_t n_subtotals = 0;
1697 for (size_t i = 0; i < cats->n_cats; i++)
1699 struct ctables_category *cat = &cats->cats[i];
1702 case CTPO_CAT_NUMBER:
1703 if (cat->type == CCT_NUMBER && cat->number == e->number)
1707 case CTPO_CAT_STRING:
1708 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1712 case CTPO_CAT_NRANGE:
1713 if (cat->type == CCT_NRANGE
1714 && cat->nrange[0] == e->nrange[0]
1715 && cat->nrange[1] == e->nrange[1])
1719 case CTPO_CAT_SRANGE:
1720 if (cat->type == CCT_SRANGE
1721 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1722 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1726 case CTPO_CAT_MISSING:
1727 if (cat->type == CCT_MISSING)
1731 case CTPO_CAT_OTHERNM:
1732 if (cat->type == CCT_OTHERNM)
1736 case CTPO_CAT_SUBTOTAL:
1737 if (cat->type == CCT_SUBTOTAL)
1740 if (e->subtotal_index == n_subtotals)
1742 else if (e->subtotal_index == 0)
1747 case CTPO_CAT_TOTAL:
1748 if (cat->type == CCT_TOTAL)
1762 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1768 ctables_recursive_check_postcompute (const struct ctables_pcexpr *e,
1769 struct ctables_category *pc_cat,
1770 const struct ctables_categories *cats,
1771 const struct msg_location *cats_location)
1775 case CTPO_CAT_NUMBER:
1776 case CTPO_CAT_STRING:
1777 case CTPO_CAT_NRANGE:
1778 case CTPO_CAT_MISSING:
1779 case CTPO_CAT_OTHERNM:
1780 case CTPO_CAT_SUBTOTAL:
1781 case CTPO_CAT_TOTAL:
1783 struct ctables_category *cat = ctables_find_category_for_postcompute (
1787 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1789 size_t n_subtotals = 0;
1790 for (size_t i = 0; i < cats->n_cats; i++)
1791 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1792 if (n_subtotals > 1)
1794 msg_at (SE, cats_location,
1795 ngettext ("These categories include %zu instance "
1796 "of SUBTOTAL or HSUBTOTAL, so references "
1797 "from computed categories must refer to "
1798 "subtotals by position.",
1799 "These categories include %zu instances "
1800 "of SUBTOTAL or HSUBTOTAL, so references "
1801 "from computed categories must refer to "
1802 "subtotals by position.",
1805 msg_at (SN, e->location,
1806 _("This is the reference that lacks a position."));
1811 msg_at (SE, pc_cat->location,
1812 _("Computed category &%s references a category not included "
1813 "in the category list."),
1815 msg_at (SN, e->location, _("This is the missing category."));
1816 msg_at (SN, cats_location,
1817 _("To fix the problem, add the missing category to the "
1818 "list of categories here."));
1821 if (pc_cat->pc->hide_source_cats)
1835 for (size_t i = 0; i < 2; i++)
1836 if (e->subs[i] && !ctables_recursive_check_postcompute (
1837 e->subs[i], pc_cat, cats, cats_location))
1847 parse_category_string (const struct ctables_category *cat,
1848 struct substring s, struct dictionary *dict,
1849 enum fmt_type format, double *n)
1852 char *error = data_in (s, dict_get_encoding (dict), format,
1853 settings_get_fmt_settings (), &v, 0, NULL);
1856 msg_at (SE, cat->location,
1857 _("Failed to parse category specification as format %s: %s."),
1858 fmt_name (format), error);
1868 all_strings (struct variable **vars, size_t n_vars,
1869 const struct ctables_category *cat)
1871 for (size_t j = 0; j < n_vars; j++)
1872 if (var_is_numeric (vars[j]))
1874 msg_at (SE, cat->location,
1875 _("This category specification may be applied only to string "
1876 "variables, but this subcommand tries to apply it to "
1877 "numeric variable %s."),
1878 var_get_name (vars[j]));
1885 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1886 struct ctables *ct, struct ctables_table *t)
1888 if (!lex_match_id (lexer, "VARIABLES"))
1890 lex_match (lexer, T_EQUALS);
1892 struct variable **vars;
1894 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1897 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1898 for (size_t i = 1; i < n_vars; i++)
1900 const struct fmt_spec *f = var_get_print_format (vars[i]);
1901 if (f->type != common_format->type)
1903 common_format = NULL;
1909 && (fmt_get_category (common_format->type)
1910 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1912 struct ctables_categories *c = xmalloc (sizeof *c);
1913 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1914 for (size_t i = 0; i < n_vars; i++)
1916 struct ctables_categories **cp
1917 = &t->categories[var_get_dict_index (vars[i])];
1918 ctables_categories_unref (*cp);
1922 size_t allocated_cats = 0;
1923 if (lex_match (lexer, T_LBRACK))
1925 int cats_start_ofs = lex_ofs (lexer);
1928 if (c->n_cats >= allocated_cats)
1929 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1931 int start_ofs = lex_ofs (lexer);
1932 struct ctables_category *cat = &c->cats[c->n_cats];
1933 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1935 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1938 lex_match (lexer, T_COMMA);
1940 while (!lex_match (lexer, T_RBRACK));
1942 struct msg_location *cats_location
1943 = lex_ofs_location (lexer, cats_start_ofs, lex_ofs (lexer) - 1);
1944 for (size_t i = 0; i < c->n_cats; i++)
1946 struct ctables_category *cat = &c->cats[i];
1949 case CCT_POSTCOMPUTE:
1950 if (!ctables_recursive_check_postcompute (cat->pc->expr, cat,
1957 for (size_t j = 0; j < n_vars; j++)
1958 if (var_is_alpha (vars[j]))
1960 msg_at (SE, cat->location,
1961 _("This category specification may be applied "
1962 "only to numeric variables, but this "
1963 "subcommand tries to apply it to string "
1965 var_get_name (vars[j]));
1974 if (!parse_category_string (cat, cat->string, dict,
1975 common_format->type, &n))
1978 ss_dealloc (&cat->string);
1980 cat->type = CCT_NUMBER;
1983 else if (!all_strings (vars, n_vars, cat))
1992 if (!cat->srange[0].string)
1994 else if (!parse_category_string (cat, cat->srange[0], dict,
1995 common_format->type, &n[0]))
1998 if (!cat->srange[1].string)
2000 else if (!parse_category_string (cat, cat->srange[1], dict,
2001 common_format->type, &n[1]))
2004 ss_dealloc (&cat->srange[0]);
2005 ss_dealloc (&cat->srange[1]);
2007 cat->type = CCT_NRANGE;
2008 cat->nrange[0] = n[0];
2009 cat->nrange[1] = n[1];
2011 else if (!all_strings (vars, n_vars, cat))
2022 case CCT_EXCLUDED_MISSING:
2028 struct ctables_category cat = {
2030 .include_missing = false,
2031 .sort_ascending = true,
2033 bool show_totals = false;
2034 char *total_label = NULL;
2035 bool totals_before = false;
2036 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2038 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2040 lex_match (lexer, T_EQUALS);
2041 if (lex_match_id (lexer, "A"))
2042 cat.sort_ascending = true;
2043 else if (lex_match_id (lexer, "D"))
2044 cat.sort_ascending = false;
2047 lex_error_expecting (lexer, "A", "D");
2051 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2053 lex_match (lexer, T_EQUALS);
2054 if (lex_match_id (lexer, "VALUE"))
2055 cat.type = CCT_VALUE;
2056 else if (lex_match_id (lexer, "LABEL"))
2057 cat.type = CCT_LABEL;
2060 cat.type = CCT_FUNCTION;
2061 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
2064 if (lex_match (lexer, T_LPAREN))
2066 cat.sort_var = parse_variable (lexer, dict);
2070 if (cat.sort_function == CTSF_PTILE)
2072 lex_match (lexer, T_COMMA);
2073 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2075 cat.percentile = lex_number (lexer);
2079 if (!lex_force_match (lexer, T_RPAREN))
2082 else if (ctables_function_availability (cat.sort_function)
2085 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2090 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2092 lex_match (lexer, T_EQUALS);
2093 if (lex_match_id (lexer, "INCLUDE"))
2094 cat.include_missing = true;
2095 else if (lex_match_id (lexer, "EXCLUDE"))
2096 cat.include_missing = false;
2099 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2103 else if (lex_match_id (lexer, "TOTAL"))
2105 lex_match (lexer, T_EQUALS);
2106 if (!parse_bool (lexer, &show_totals))
2109 else if (lex_match_id (lexer, "LABEL"))
2111 lex_match (lexer, T_EQUALS);
2112 if (!lex_force_string (lexer))
2115 total_label = ss_xstrdup (lex_tokss (lexer));
2118 else if (lex_match_id (lexer, "POSITION"))
2120 lex_match (lexer, T_EQUALS);
2121 if (lex_match_id (lexer, "BEFORE"))
2122 totals_before = true;
2123 else if (lex_match_id (lexer, "AFTER"))
2124 totals_before = false;
2127 lex_error_expecting (lexer, "BEFORE", "AFTER");
2131 else if (lex_match_id (lexer, "EMPTY"))
2133 lex_match (lexer, T_EQUALS);
2134 if (lex_match_id (lexer, "INCLUDE"))
2135 c->show_empty = true;
2136 else if (lex_match_id (lexer, "EXCLUDE"))
2137 c->show_empty = false;
2140 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2147 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2148 "TOTAL", "LABEL", "POSITION", "EMPTY");
2150 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2157 if (c->n_cats >= allocated_cats)
2158 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2159 c->cats[c->n_cats++] = cat;
2164 if (c->n_cats >= allocated_cats)
2165 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2167 struct ctables_category *totals;
2170 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2171 totals = &c->cats[0];
2174 totals = &c->cats[c->n_cats];
2177 *totals = (struct ctables_category) {
2179 .total_label = total_label ? total_label : xstrdup (_("Total")),
2183 struct ctables_category *subtotal = NULL;
2184 for (size_t i = totals_before ? 0 : c->n_cats;
2185 totals_before ? i < c->n_cats : i-- > 0;
2186 totals_before ? i++ : 0)
2188 struct ctables_category *cat = &c->cats[i];
2197 cat->subtotal = subtotal;
2200 case CCT_POSTCOMPUTE:
2211 case CCT_EXCLUDED_MISSING:
2220 ctables_nest_uninit (struct ctables_nest *nest)
2227 ctables_stack_uninit (struct ctables_stack *stack)
2231 for (size_t i = 0; i < stack->n; i++)
2232 ctables_nest_uninit (&stack->nests[i]);
2233 free (stack->nests);
2237 static struct ctables_stack
2238 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2245 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2246 for (size_t i = 0; i < s0.n; i++)
2247 for (size_t j = 0; j < s1.n; j++)
2249 const struct ctables_nest *a = &s0.nests[i];
2250 const struct ctables_nest *b = &s1.nests[j];
2252 size_t allocate = a->n + b->n;
2253 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2254 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
2256 for (size_t k = 0; k < a->n; k++)
2257 vars[n++] = a->vars[k];
2258 for (size_t k = 0; k < b->n; k++)
2259 vars[n++] = b->vars[k];
2260 assert (n == allocate);
2262 const struct ctables_nest *summary_src;
2263 if (!a->specs[CSV_CELL].var)
2265 else if (!b->specs[CSV_CELL].var)
2270 struct ctables_nest *new = &stack.nests[stack.n++];
2271 *new = (struct ctables_nest) {
2273 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2274 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2278 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2279 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2281 ctables_stack_uninit (&s0);
2282 ctables_stack_uninit (&s1);
2286 static struct ctables_stack
2287 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2289 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2290 for (size_t i = 0; i < s0.n; i++)
2291 stack.nests[stack.n++] = s0.nests[i];
2292 for (size_t i = 0; i < s1.n; i++)
2294 stack.nests[stack.n] = s1.nests[i];
2295 stack.nests[stack.n].group_head += s0.n;
2298 assert (stack.n == s0.n + s1.n);
2304 static struct ctables_stack
2305 var_fts (const struct ctables_axis *a)
2307 struct variable **vars = xmalloc (sizeof *vars);
2310 struct ctables_nest *nest = xmalloc (sizeof *nest);
2311 *nest = (struct ctables_nest) {
2314 .scale_idx = a->scale ? 0 : SIZE_MAX,
2316 if (a->specs[CSV_CELL].n || a->scale)
2317 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2319 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2320 nest->specs[sv].var = a->var;
2321 nest->specs[sv].is_scale = a->scale;
2323 return (struct ctables_stack) { .nests = nest, .n = 1 };
2326 static struct ctables_stack
2327 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2330 return (struct ctables_stack) { .n = 0 };
2338 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2339 enumerate_fts (axis_type, a->subs[1]));
2342 /* This should consider any of the scale variables found in the result to
2343 be linked to each other listwise for SMISSING=LISTWISE. */
2344 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2345 enumerate_fts (axis_type, a->subs[1]));
2351 union ctables_summary
2353 /* COUNT, VALIDN, TOTALN. */
2356 /* MINIMUM, MAXIMUM, RANGE. */
2363 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2364 struct moments1 *moments;
2366 /* MEDIAN, MODE, PTILE. */
2369 struct casewriter *writer;
2374 /* XXX multiple response */
2378 ctables_summary_init (union ctables_summary *s,
2379 const struct ctables_summary_spec *ss)
2381 switch (ss->function)
2385 case CTSF_ROWPCT_COUNT:
2386 case CTSF_COLPCT_COUNT:
2387 case CTSF_TABLEPCT_COUNT:
2388 case CTSF_SUBTABLEPCT_COUNT:
2389 case CTSF_LAYERPCT_COUNT:
2390 case CTSF_LAYERROWPCT_COUNT:
2391 case CTSF_LAYERCOLPCT_COUNT:
2392 case CTSF_ROWPCT_VALIDN:
2393 case CTSF_COLPCT_VALIDN:
2394 case CTSF_TABLEPCT_VALIDN:
2395 case CTSF_SUBTABLEPCT_VALIDN:
2396 case CTSF_LAYERPCT_VALIDN:
2397 case CTSF_LAYERROWPCT_VALIDN:
2398 case CTSF_LAYERCOLPCT_VALIDN:
2399 case CTSF_ROWPCT_TOTALN:
2400 case CTSF_COLPCT_TOTALN:
2401 case CTSF_TABLEPCT_TOTALN:
2402 case CTSF_SUBTABLEPCT_TOTALN:
2403 case CTSF_LAYERPCT_TOTALN:
2404 case CTSF_LAYERROWPCT_TOTALN:
2405 case CTSF_LAYERCOLPCT_TOTALN:
2412 case CTSF_UROWPCT_COUNT:
2413 case CTSF_UCOLPCT_COUNT:
2414 case CTSF_UTABLEPCT_COUNT:
2415 case CTSF_USUBTABLEPCT_COUNT:
2416 case CTSF_ULAYERPCT_COUNT:
2417 case CTSF_ULAYERROWPCT_COUNT:
2418 case CTSF_ULAYERCOLPCT_COUNT:
2419 case CTSF_UROWPCT_VALIDN:
2420 case CTSF_UCOLPCT_VALIDN:
2421 case CTSF_UTABLEPCT_VALIDN:
2422 case CTSF_USUBTABLEPCT_VALIDN:
2423 case CTSF_ULAYERPCT_VALIDN:
2424 case CTSF_ULAYERROWPCT_VALIDN:
2425 case CTSF_ULAYERCOLPCT_VALIDN:
2426 case CTSF_UROWPCT_TOTALN:
2427 case CTSF_UCOLPCT_TOTALN:
2428 case CTSF_UTABLEPCT_TOTALN:
2429 case CTSF_USUBTABLEPCT_TOTALN:
2430 case CTSF_ULAYERPCT_TOTALN:
2431 case CTSF_ULAYERROWPCT_TOTALN:
2432 case CTSF_ULAYERCOLPCT_TOTALN:
2442 s->min = s->max = SYSMIS;
2450 case CTSF_ROWPCT_SUM:
2451 case CTSF_COLPCT_SUM:
2452 case CTSF_TABLEPCT_SUM:
2453 case CTSF_SUBTABLEPCT_SUM:
2454 case CTSF_LAYERPCT_SUM:
2455 case CTSF_LAYERROWPCT_SUM:
2456 case CTSF_LAYERCOLPCT_SUM:
2461 case CTSF_UVARIANCE:
2462 case CTSF_UROWPCT_SUM:
2463 case CTSF_UCOLPCT_SUM:
2464 case CTSF_UTABLEPCT_SUM:
2465 case CTSF_USUBTABLEPCT_SUM:
2466 case CTSF_ULAYERPCT_SUM:
2467 case CTSF_ULAYERROWPCT_SUM:
2468 case CTSF_ULAYERCOLPCT_SUM:
2469 s->moments = moments1_create (MOMENT_VARIANCE);
2479 struct caseproto *proto = caseproto_create ();
2480 proto = caseproto_add_width (proto, 0);
2481 proto = caseproto_add_width (proto, 0);
2483 struct subcase ordering;
2484 subcase_init (&ordering, 0, 0, SC_ASCEND);
2485 s->writer = sort_create_writer (&ordering, proto);
2486 subcase_uninit (&ordering);
2487 caseproto_unref (proto);
2497 ctables_summary_uninit (union ctables_summary *s,
2498 const struct ctables_summary_spec *ss)
2500 switch (ss->function)
2504 case CTSF_ROWPCT_COUNT:
2505 case CTSF_COLPCT_COUNT:
2506 case CTSF_TABLEPCT_COUNT:
2507 case CTSF_SUBTABLEPCT_COUNT:
2508 case CTSF_LAYERPCT_COUNT:
2509 case CTSF_LAYERROWPCT_COUNT:
2510 case CTSF_LAYERCOLPCT_COUNT:
2511 case CTSF_ROWPCT_VALIDN:
2512 case CTSF_COLPCT_VALIDN:
2513 case CTSF_TABLEPCT_VALIDN:
2514 case CTSF_SUBTABLEPCT_VALIDN:
2515 case CTSF_LAYERPCT_VALIDN:
2516 case CTSF_LAYERROWPCT_VALIDN:
2517 case CTSF_LAYERCOLPCT_VALIDN:
2518 case CTSF_ROWPCT_TOTALN:
2519 case CTSF_COLPCT_TOTALN:
2520 case CTSF_TABLEPCT_TOTALN:
2521 case CTSF_SUBTABLEPCT_TOTALN:
2522 case CTSF_LAYERPCT_TOTALN:
2523 case CTSF_LAYERROWPCT_TOTALN:
2524 case CTSF_LAYERCOLPCT_TOTALN:
2531 case CTSF_UROWPCT_COUNT:
2532 case CTSF_UCOLPCT_COUNT:
2533 case CTSF_UTABLEPCT_COUNT:
2534 case CTSF_USUBTABLEPCT_COUNT:
2535 case CTSF_ULAYERPCT_COUNT:
2536 case CTSF_ULAYERROWPCT_COUNT:
2537 case CTSF_ULAYERCOLPCT_COUNT:
2538 case CTSF_UROWPCT_VALIDN:
2539 case CTSF_UCOLPCT_VALIDN:
2540 case CTSF_UTABLEPCT_VALIDN:
2541 case CTSF_USUBTABLEPCT_VALIDN:
2542 case CTSF_ULAYERPCT_VALIDN:
2543 case CTSF_ULAYERROWPCT_VALIDN:
2544 case CTSF_ULAYERCOLPCT_VALIDN:
2545 case CTSF_UROWPCT_TOTALN:
2546 case CTSF_UCOLPCT_TOTALN:
2547 case CTSF_UTABLEPCT_TOTALN:
2548 case CTSF_USUBTABLEPCT_TOTALN:
2549 case CTSF_ULAYERPCT_TOTALN:
2550 case CTSF_ULAYERROWPCT_TOTALN:
2551 case CTSF_ULAYERCOLPCT_TOTALN:
2567 case CTSF_ROWPCT_SUM:
2568 case CTSF_COLPCT_SUM:
2569 case CTSF_TABLEPCT_SUM:
2570 case CTSF_SUBTABLEPCT_SUM:
2571 case CTSF_LAYERPCT_SUM:
2572 case CTSF_LAYERROWPCT_SUM:
2573 case CTSF_LAYERCOLPCT_SUM:
2578 case CTSF_UVARIANCE:
2579 case CTSF_UROWPCT_SUM:
2580 case CTSF_UCOLPCT_SUM:
2581 case CTSF_UTABLEPCT_SUM:
2582 case CTSF_USUBTABLEPCT_SUM:
2583 case CTSF_ULAYERPCT_SUM:
2584 case CTSF_ULAYERROWPCT_SUM:
2585 case CTSF_ULAYERCOLPCT_SUM:
2586 moments1_destroy (s->moments);
2595 casewriter_destroy (s->writer);
2601 ctables_summary_add (union ctables_summary *s,
2602 const struct ctables_summary_spec *ss,
2603 const struct variable *var, const union value *value,
2604 bool is_scale, bool is_scale_missing,
2605 bool is_missing, bool excluded_missing,
2606 double d_weight, double e_weight)
2608 /* To determine whether a case is included in a given table for a particular
2609 kind of summary, consider the following charts for each variable in the
2610 table. Only if "yes" appears for every variable for the summary is the
2613 Categorical variables: VALIDN COUNT TOTALN
2614 Valid values in included categories yes yes yes
2615 Missing values in included categories --- yes yes
2616 Missing values in excluded categories --- --- yes
2617 Valid values in excluded categories --- --- ---
2619 Scale variables: VALIDN COUNT TOTALN
2620 Valid value yes yes yes
2621 Missing value --- yes yes
2623 Missing values include both user- and system-missing. (The system-missing
2624 value is always in an excluded category.)
2626 switch (ss->function)
2629 case CTSF_ROWPCT_TOTALN:
2630 case CTSF_COLPCT_TOTALN:
2631 case CTSF_TABLEPCT_TOTALN:
2632 case CTSF_SUBTABLEPCT_TOTALN:
2633 case CTSF_LAYERPCT_TOTALN:
2634 case CTSF_LAYERROWPCT_TOTALN:
2635 case CTSF_LAYERCOLPCT_TOTALN:
2636 s->count += d_weight;
2640 case CTSF_UROWPCT_TOTALN:
2641 case CTSF_UCOLPCT_TOTALN:
2642 case CTSF_UTABLEPCT_TOTALN:
2643 case CTSF_USUBTABLEPCT_TOTALN:
2644 case CTSF_ULAYERPCT_TOTALN:
2645 case CTSF_ULAYERROWPCT_TOTALN:
2646 case CTSF_ULAYERCOLPCT_TOTALN:
2651 case CTSF_ROWPCT_COUNT:
2652 case CTSF_COLPCT_COUNT:
2653 case CTSF_TABLEPCT_COUNT:
2654 case CTSF_SUBTABLEPCT_COUNT:
2655 case CTSF_LAYERPCT_COUNT:
2656 case CTSF_LAYERROWPCT_COUNT:
2657 case CTSF_LAYERCOLPCT_COUNT:
2658 if (is_scale || !excluded_missing)
2659 s->count += d_weight;
2663 case CTSF_UROWPCT_COUNT:
2664 case CTSF_UCOLPCT_COUNT:
2665 case CTSF_UTABLEPCT_COUNT:
2666 case CTSF_USUBTABLEPCT_COUNT:
2667 case CTSF_ULAYERPCT_COUNT:
2668 case CTSF_ULAYERROWPCT_COUNT:
2669 case CTSF_ULAYERCOLPCT_COUNT:
2670 if (is_scale || !excluded_missing)
2675 case CTSF_ROWPCT_VALIDN:
2676 case CTSF_COLPCT_VALIDN:
2677 case CTSF_TABLEPCT_VALIDN:
2678 case CTSF_SUBTABLEPCT_VALIDN:
2679 case CTSF_LAYERPCT_VALIDN:
2680 case CTSF_LAYERROWPCT_VALIDN:
2681 case CTSF_LAYERCOLPCT_VALIDN:
2685 s->count += d_weight;
2689 case CTSF_UROWPCT_VALIDN:
2690 case CTSF_UCOLPCT_VALIDN:
2691 case CTSF_UTABLEPCT_VALIDN:
2692 case CTSF_USUBTABLEPCT_VALIDN:
2693 case CTSF_ULAYERPCT_VALIDN:
2694 case CTSF_ULAYERROWPCT_VALIDN:
2695 case CTSF_ULAYERCOLPCT_VALIDN:
2704 s->count += d_weight;
2713 if (is_scale || !excluded_missing)
2714 s->count += e_weight;
2721 s->count += e_weight;
2725 s->count += e_weight;
2731 if (!is_scale_missing)
2733 assert (!var_is_alpha (var)); /* XXX? */
2734 if (s->min == SYSMIS || value->f < s->min)
2736 if (s->max == SYSMIS || value->f > s->max)
2746 case CTSF_ROWPCT_SUM:
2747 case CTSF_COLPCT_SUM:
2748 case CTSF_TABLEPCT_SUM:
2749 case CTSF_SUBTABLEPCT_SUM:
2750 case CTSF_LAYERPCT_SUM:
2751 case CTSF_LAYERROWPCT_SUM:
2752 case CTSF_LAYERCOLPCT_SUM:
2753 if (!is_scale_missing)
2754 moments1_add (s->moments, value->f, e_weight);
2761 case CTSF_UVARIANCE:
2762 case CTSF_UROWPCT_SUM:
2763 case CTSF_UCOLPCT_SUM:
2764 case CTSF_UTABLEPCT_SUM:
2765 case CTSF_USUBTABLEPCT_SUM:
2766 case CTSF_ULAYERPCT_SUM:
2767 case CTSF_ULAYERROWPCT_SUM:
2768 case CTSF_ULAYERCOLPCT_SUM:
2769 if (!is_scale_missing)
2770 moments1_add (s->moments, value->f, 1.0);
2776 d_weight = e_weight = 1.0;
2781 if (!is_scale_missing)
2783 s->ovalid += e_weight;
2785 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2786 *case_num_rw_idx (c, 0) = value->f;
2787 *case_num_rw_idx (c, 1) = e_weight;
2788 casewriter_write (s->writer, c);
2794 static enum ctables_domain_type
2795 ctables_function_domain (enum ctables_summary_function function)
2825 case CTSF_UVARIANCE:
2831 case CTSF_COLPCT_COUNT:
2832 case CTSF_COLPCT_SUM:
2833 case CTSF_COLPCT_TOTALN:
2834 case CTSF_COLPCT_VALIDN:
2835 case CTSF_UCOLPCT_COUNT:
2836 case CTSF_UCOLPCT_SUM:
2837 case CTSF_UCOLPCT_TOTALN:
2838 case CTSF_UCOLPCT_VALIDN:
2841 case CTSF_LAYERCOLPCT_COUNT:
2842 case CTSF_LAYERCOLPCT_SUM:
2843 case CTSF_LAYERCOLPCT_TOTALN:
2844 case CTSF_LAYERCOLPCT_VALIDN:
2845 case CTSF_ULAYERCOLPCT_COUNT:
2846 case CTSF_ULAYERCOLPCT_SUM:
2847 case CTSF_ULAYERCOLPCT_TOTALN:
2848 case CTSF_ULAYERCOLPCT_VALIDN:
2849 return CTDT_LAYERCOL;
2851 case CTSF_LAYERPCT_COUNT:
2852 case CTSF_LAYERPCT_SUM:
2853 case CTSF_LAYERPCT_TOTALN:
2854 case CTSF_LAYERPCT_VALIDN:
2855 case CTSF_ULAYERPCT_COUNT:
2856 case CTSF_ULAYERPCT_SUM:
2857 case CTSF_ULAYERPCT_TOTALN:
2858 case CTSF_ULAYERPCT_VALIDN:
2861 case CTSF_LAYERROWPCT_COUNT:
2862 case CTSF_LAYERROWPCT_SUM:
2863 case CTSF_LAYERROWPCT_TOTALN:
2864 case CTSF_LAYERROWPCT_VALIDN:
2865 case CTSF_ULAYERROWPCT_COUNT:
2866 case CTSF_ULAYERROWPCT_SUM:
2867 case CTSF_ULAYERROWPCT_TOTALN:
2868 case CTSF_ULAYERROWPCT_VALIDN:
2869 return CTDT_LAYERROW;
2871 case CTSF_ROWPCT_COUNT:
2872 case CTSF_ROWPCT_SUM:
2873 case CTSF_ROWPCT_TOTALN:
2874 case CTSF_ROWPCT_VALIDN:
2875 case CTSF_UROWPCT_COUNT:
2876 case CTSF_UROWPCT_SUM:
2877 case CTSF_UROWPCT_TOTALN:
2878 case CTSF_UROWPCT_VALIDN:
2881 case CTSF_SUBTABLEPCT_COUNT:
2882 case CTSF_SUBTABLEPCT_SUM:
2883 case CTSF_SUBTABLEPCT_TOTALN:
2884 case CTSF_SUBTABLEPCT_VALIDN:
2885 case CTSF_USUBTABLEPCT_COUNT:
2886 case CTSF_USUBTABLEPCT_SUM:
2887 case CTSF_USUBTABLEPCT_TOTALN:
2888 case CTSF_USUBTABLEPCT_VALIDN:
2889 return CTDT_SUBTABLE;
2891 case CTSF_TABLEPCT_COUNT:
2892 case CTSF_TABLEPCT_SUM:
2893 case CTSF_TABLEPCT_TOTALN:
2894 case CTSF_TABLEPCT_VALIDN:
2895 case CTSF_UTABLEPCT_COUNT:
2896 case CTSF_UTABLEPCT_SUM:
2897 case CTSF_UTABLEPCT_TOTALN:
2898 case CTSF_UTABLEPCT_VALIDN:
2906 ctables_summary_value (const struct ctables_cell *cell,
2907 union ctables_summary *s,
2908 const struct ctables_summary_spec *ss)
2910 switch (ss->function)
2917 case CTSF_ROWPCT_COUNT:
2918 case CTSF_COLPCT_COUNT:
2919 case CTSF_TABLEPCT_COUNT:
2920 case CTSF_SUBTABLEPCT_COUNT:
2921 case CTSF_LAYERPCT_COUNT:
2922 case CTSF_LAYERROWPCT_COUNT:
2923 case CTSF_LAYERCOLPCT_COUNT:
2925 enum ctables_domain_type d = ctables_function_domain (ss->function);
2926 return (cell->domains[d]->e_count
2927 ? s->count / cell->domains[d]->e_count * 100
2931 case CTSF_UROWPCT_COUNT:
2932 case CTSF_UCOLPCT_COUNT:
2933 case CTSF_UTABLEPCT_COUNT:
2934 case CTSF_USUBTABLEPCT_COUNT:
2935 case CTSF_ULAYERPCT_COUNT:
2936 case CTSF_ULAYERROWPCT_COUNT:
2937 case CTSF_ULAYERCOLPCT_COUNT:
2939 enum ctables_domain_type d = ctables_function_domain (ss->function);
2940 return (cell->domains[d]->u_count
2941 ? s->count / cell->domains[d]->u_count * 100
2945 case CTSF_ROWPCT_VALIDN:
2946 case CTSF_COLPCT_VALIDN:
2947 case CTSF_TABLEPCT_VALIDN:
2948 case CTSF_SUBTABLEPCT_VALIDN:
2949 case CTSF_LAYERPCT_VALIDN:
2950 case CTSF_LAYERROWPCT_VALIDN:
2951 case CTSF_LAYERCOLPCT_VALIDN:
2953 enum ctables_domain_type d = ctables_function_domain (ss->function);
2954 return (cell->domains[d]->e_valid
2955 ? s->count / cell->domains[d]->e_valid * 100
2959 case CTSF_UROWPCT_VALIDN:
2960 case CTSF_UCOLPCT_VALIDN:
2961 case CTSF_UTABLEPCT_VALIDN:
2962 case CTSF_USUBTABLEPCT_VALIDN:
2963 case CTSF_ULAYERPCT_VALIDN:
2964 case CTSF_ULAYERROWPCT_VALIDN:
2965 case CTSF_ULAYERCOLPCT_VALIDN:
2967 enum ctables_domain_type d = ctables_function_domain (ss->function);
2968 return (cell->domains[d]->u_valid
2969 ? s->count / cell->domains[d]->u_valid * 100
2973 case CTSF_ROWPCT_TOTALN:
2974 case CTSF_COLPCT_TOTALN:
2975 case CTSF_TABLEPCT_TOTALN:
2976 case CTSF_SUBTABLEPCT_TOTALN:
2977 case CTSF_LAYERPCT_TOTALN:
2978 case CTSF_LAYERROWPCT_TOTALN:
2979 case CTSF_LAYERCOLPCT_TOTALN:
2981 enum ctables_domain_type d = ctables_function_domain (ss->function);
2982 return (cell->domains[d]->e_total
2983 ? s->count / cell->domains[d]->e_total * 100
2987 case CTSF_UROWPCT_TOTALN:
2988 case CTSF_UCOLPCT_TOTALN:
2989 case CTSF_UTABLEPCT_TOTALN:
2990 case CTSF_USUBTABLEPCT_TOTALN:
2991 case CTSF_ULAYERPCT_TOTALN:
2992 case CTSF_ULAYERROWPCT_TOTALN:
2993 case CTSF_ULAYERCOLPCT_TOTALN:
2995 enum ctables_domain_type d = ctables_function_domain (ss->function);
2996 return (cell->domains[d]->u_total
2997 ? s->count / cell->domains[d]->u_total * 100
3018 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3024 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3031 double weight, variance;
3032 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3033 return calc_semean (variance, weight);
3040 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3041 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3047 double weight, mean;
3048 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3049 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3053 case CTSF_UVARIANCE:
3056 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3060 case CTSF_ROWPCT_SUM:
3061 case CTSF_COLPCT_SUM:
3062 case CTSF_TABLEPCT_SUM:
3063 case CTSF_SUBTABLEPCT_SUM:
3064 case CTSF_LAYERPCT_SUM:
3065 case CTSF_LAYERROWPCT_SUM:
3066 case CTSF_LAYERCOLPCT_SUM:
3067 case CTSF_UROWPCT_SUM:
3068 case CTSF_UCOLPCT_SUM:
3069 case CTSF_UTABLEPCT_SUM:
3070 case CTSF_USUBTABLEPCT_SUM:
3071 case CTSF_ULAYERPCT_SUM:
3072 case CTSF_ULAYERROWPCT_SUM:
3073 case CTSF_ULAYERCOLPCT_SUM:
3082 struct casereader *reader = casewriter_make_reader (s->writer);
3085 struct percentile *ptile = percentile_create (
3086 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3087 struct order_stats *os = &ptile->parent;
3088 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3089 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3090 statistic_destroy (&ptile->parent.parent);
3098 struct casereader *reader = casewriter_make_reader (s->writer);
3101 struct mode *mode = mode_create ();
3102 struct order_stats *os = &mode->parent;
3103 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3104 s->ovalue = mode->mode;
3105 statistic_destroy (&mode->parent.parent);
3113 struct ctables_cell_sort_aux
3115 const struct ctables_nest *nest;
3116 enum pivot_axis_type a;
3120 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3122 const struct ctables_cell_sort_aux *aux = aux_;
3123 struct ctables_cell *const *ap = a_;
3124 struct ctables_cell *const *bp = b_;
3125 const struct ctables_cell *a = *ap;
3126 const struct ctables_cell *b = *bp;
3128 const struct ctables_nest *nest = aux->nest;
3129 for (size_t i = 0; i < nest->n; i++)
3130 if (i != nest->scale_idx)
3132 const struct variable *var = nest->vars[i];
3133 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3134 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3135 if (a_cv->category != b_cv->category)
3136 return a_cv->category > b_cv->category ? 1 : -1;
3138 const union value *a_val = &a_cv->value;
3139 const union value *b_val = &b_cv->value;
3140 switch (a_cv->category->type)
3146 case CCT_POSTCOMPUTE:
3147 case CCT_EXCLUDED_MISSING:
3148 /* Must be equal. */
3156 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3164 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3166 return a_cv->category->sort_ascending ? cmp : -cmp;
3172 const char *a_label = var_lookup_value_label (var, a_val);
3173 const char *b_label = var_lookup_value_label (var, b_val);
3175 ? (b_label ? strcmp (a_label, b_label) : 1)
3176 : (b_label ? -1 : value_compare_3way (
3177 a_val, b_val, var_get_width (var))));
3179 return a_cv->category->sort_ascending ? cmp : -cmp;
3193 For each ctables_table:
3194 For each combination of row vars:
3195 For each combination of column vars:
3196 For each combination of layer vars:
3198 Make a table of row values:
3199 Sort entries by row values
3200 Assign a 0-based index to each actual value
3201 Construct a dimension
3202 Make a table of column values
3203 Make a table of layer values
3205 Fill the table entry using the indexes from before.
3208 static struct ctables_domain *
3209 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3210 enum ctables_domain_type domain)
3213 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3215 const struct ctables_nest *nest = s->nests[a];
3216 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3218 size_t v_idx = nest->domains[domain][i];
3219 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3220 hash = hash_pointer (cv->category, hash);
3221 if (cv->category->type != CCT_TOTAL
3222 && cv->category->type != CCT_SUBTOTAL
3223 && cv->category->type != CCT_POSTCOMPUTE)
3224 hash = value_hash (&cv->value,
3225 var_get_width (nest->vars[v_idx]), hash);
3229 struct ctables_domain *d;
3230 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3232 const struct ctables_cell *df = d->example;
3233 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3235 const struct ctables_nest *nest = s->nests[a];
3236 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3238 size_t v_idx = nest->domains[domain][i];
3239 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3240 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3241 if (cv1->category != cv2->category
3242 || (cv1->category->type != CCT_TOTAL
3243 && cv1->category->type != CCT_SUBTOTAL
3244 && cv1->category->type != CCT_POSTCOMPUTE
3245 && !value_equal (&cv1->value, &cv2->value,
3246 var_get_width (nest->vars[v_idx]))))
3255 d = xmalloc (sizeof *d);
3256 *d = (struct ctables_domain) { .example = cell };
3257 hmap_insert (&s->domains[domain], &d->node, hash);
3261 static struct substring
3262 rtrim_value (const union value *v, const struct variable *var)
3264 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3265 var_get_width (var));
3266 ss_rtrim (&s, ss_cstr (" "));
3271 in_string_range (const union value *v, const struct variable *var,
3272 const struct substring *srange)
3274 struct substring s = rtrim_value (v, var);
3275 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3276 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3279 static const struct ctables_category *
3280 ctables_categories_match (const struct ctables_categories *c,
3281 const union value *v, const struct variable *var)
3283 if (var_is_numeric (var) && v->f == SYSMIS)
3286 const struct ctables_category *othernm = NULL;
3287 for (size_t i = c->n_cats; i-- > 0; )
3289 const struct ctables_category *cat = &c->cats[i];
3293 if (cat->number == v->f)
3298 if (ss_equals (cat->string, rtrim_value (v, var)))
3303 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3304 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3309 if (in_string_range (v, var, cat->srange))
3314 if (var_is_value_missing (var, v))
3318 case CCT_POSTCOMPUTE:
3333 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3336 case CCT_EXCLUDED_MISSING:
3341 return var_is_value_missing (var, v) ? NULL : othernm;
3344 static const struct ctables_category *
3345 ctables_categories_total (const struct ctables_categories *c)
3347 const struct ctables_category *first = &c->cats[0];
3348 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3349 return (first->type == CCT_TOTAL ? first
3350 : last->type == CCT_TOTAL ? last
3354 static struct ctables_cell *
3355 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3356 const struct ctables_category *cats[PIVOT_N_AXES][10])
3359 enum ctables_summary_variant sv = CSV_CELL;
3360 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3362 const struct ctables_nest *nest = s->nests[a];
3363 for (size_t i = 0; i < nest->n; i++)
3364 if (i != nest->scale_idx)
3366 hash = hash_pointer (cats[a][i], hash);
3367 if (cats[a][i]->type != CCT_TOTAL
3368 && cats[a][i]->type != CCT_SUBTOTAL
3369 && cats[a][i]->type != CCT_POSTCOMPUTE)
3370 hash = value_hash (case_data (c, nest->vars[i]),
3371 var_get_width (nest->vars[i]), hash);
3377 struct ctables_cell *cell;
3378 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3380 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3382 const struct ctables_nest *nest = s->nests[a];
3383 for (size_t i = 0; i < nest->n; i++)
3384 if (i != nest->scale_idx
3385 && (cats[a][i] != cell->axes[a].cvs[i].category
3386 || (cats[a][i]->type != CCT_TOTAL
3387 && cats[a][i]->type != CCT_SUBTOTAL
3388 && cats[a][i]->type != CCT_POSTCOMPUTE
3389 && !value_equal (case_data (c, nest->vars[i]),
3390 &cell->axes[a].cvs[i].value,
3391 var_get_width (nest->vars[i])))))
3400 cell = xmalloc (sizeof *cell);
3403 cell->omit_domains = 0;
3404 cell->postcompute = false;
3405 //struct string name = DS_EMPTY_INITIALIZER;
3406 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3408 const struct ctables_nest *nest = s->nests[a];
3409 cell->axes[a].cvs = (nest->n
3410 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3412 for (size_t i = 0; i < nest->n; i++)
3414 const struct ctables_category *cat = cats[a][i];
3415 const struct variable *var = nest->vars[i];
3416 const union value *value = case_data (c, var);
3417 if (i != nest->scale_idx)
3419 const struct ctables_category *subtotal = cat->subtotal;
3420 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3423 if (cat->type == CCT_TOTAL
3424 || cat->type == CCT_SUBTOTAL
3425 || cat->type == CCT_POSTCOMPUTE)
3427 /* XXX these should be more encompassing I think.*/
3431 case PIVOT_AXIS_COLUMN:
3432 cell->omit_domains |= ((1u << CTDT_TABLE) |
3433 (1u << CTDT_LAYER) |
3434 (1u << CTDT_LAYERCOL) |
3435 (1u << CTDT_SUBTABLE) |
3438 case PIVOT_AXIS_ROW:
3439 cell->omit_domains |= ((1u << CTDT_TABLE) |
3440 (1u << CTDT_LAYER) |
3441 (1u << CTDT_LAYERROW) |
3442 (1u << CTDT_SUBTABLE) |
3445 case PIVOT_AXIS_LAYER:
3446 cell->omit_domains |= ((1u << CTDT_TABLE) |
3447 (1u << CTDT_LAYER));
3451 if (cat->type == CCT_POSTCOMPUTE)
3452 cell->postcompute = true;
3455 cell->axes[a].cvs[i].category = cat;
3456 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3459 if (i != nest->scale_idx)
3461 if (!ds_is_empty (&name))
3462 ds_put_cstr (&name, ", ");
3463 char *value_s = data_out (value, var_get_encoding (var),
3464 var_get_print_format (var),
3465 settings_get_fmt_settings ());
3466 if (cat->type == CCT_TOTAL
3467 || cat->type == CCT_SUBTOTAL
3468 || cat->type == CCT_POSTCOMPUTE)
3469 ds_put_format (&name, "%s=total", var_get_name (var));
3471 ds_put_format (&name, "%s=%s", var_get_name (var),
3472 value_s + strspn (value_s, " "));
3478 //cell->name = ds_steal_cstr (&name);
3480 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3481 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3482 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3483 for (size_t i = 0; i < specs->n; i++)
3484 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3485 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3486 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3487 hmap_insert (&s->cells, &cell->node, hash);
3492 is_scale_missing (const struct ctables_summary_spec_set *specs,
3493 const struct ccase *c)
3495 if (!specs->is_scale)
3498 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3501 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3503 const struct variable *var = specs->listwise_vars[i];
3504 if (var_is_num_missing (var, case_num (c, var)))
3512 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3513 const struct ctables_category *cats[PIVOT_N_AXES][10],
3514 bool is_missing, bool excluded_missing,
3515 double d_weight, double e_weight)
3517 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3518 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3520 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3522 bool scale_missing = is_scale_missing (specs, c);
3523 for (size_t i = 0; i < specs->n; i++)
3524 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3525 specs->var, case_data (c, specs->var), specs->is_scale,
3526 scale_missing, is_missing, excluded_missing,
3527 d_weight, e_weight);
3528 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3529 if (!(cell->omit_domains && (1u << dt)))
3531 struct ctables_domain *d = cell->domains[dt];
3532 d->d_total += d_weight;
3533 d->e_total += e_weight;
3535 if (!excluded_missing)
3537 d->d_count += d_weight;
3538 d->e_count += e_weight;
3543 d->d_valid += d_weight;
3544 d->e_valid += e_weight;
3551 recurse_totals (struct ctables_section *s, const struct ccase *c,
3552 const struct ctables_category *cats[PIVOT_N_AXES][10],
3553 bool is_missing, bool excluded_missing,
3554 double d_weight, double e_weight,
3555 enum pivot_axis_type start_axis, size_t start_nest)
3557 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3559 const struct ctables_nest *nest = s->nests[a];
3560 for (size_t i = start_nest; i < nest->n; i++)
3562 if (i == nest->scale_idx)
3565 const struct variable *var = nest->vars[i];
3567 const struct ctables_category *total = ctables_categories_total (
3568 s->table->categories[var_get_dict_index (var)]);
3571 const struct ctables_category *save = cats[a][i];
3573 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3574 d_weight, e_weight);
3575 recurse_totals (s, c, cats, is_missing, excluded_missing,
3576 d_weight, e_weight, a, i + 1);
3585 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3586 const struct ctables_category *cats[PIVOT_N_AXES][10],
3587 bool is_missing, bool excluded_missing,
3588 double d_weight, double e_weight,
3589 enum pivot_axis_type start_axis, size_t start_nest)
3591 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3593 const struct ctables_nest *nest = s->nests[a];
3594 for (size_t i = start_nest; i < nest->n; i++)
3596 if (i == nest->scale_idx)
3599 const struct ctables_category *save = cats[a][i];
3602 cats[a][i] = save->subtotal;
3603 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3604 d_weight, e_weight);
3605 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3606 d_weight, e_weight, a, i + 1);
3615 ctables_add_occurrence (const struct variable *var,
3616 const union value *value,
3617 struct hmap *occurrences)
3619 int width = var_get_width (var);
3620 unsigned int hash = value_hash (value, width, 0);
3622 struct ctables_occurrence *o;
3623 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3625 if (value_equal (value, &o->value, width))
3628 o = xmalloc (sizeof *o);
3629 value_clone (&o->value, value, width);
3630 hmap_insert (occurrences, &o->node, hash);
3634 ctables_cell_insert (struct ctables_section *s,
3635 const struct ccase *c,
3636 double d_weight, double e_weight)
3638 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3640 /* Does at least one categorical variable have a missing value in an included
3641 or excluded category? */
3642 bool is_missing = false;
3644 /* Does at least one categorical variable have a missing value in an excluded
3646 bool excluded_missing = false;
3648 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3650 const struct ctables_nest *nest = s->nests[a];
3651 for (size_t i = 0; i < nest->n; i++)
3653 if (i == nest->scale_idx)
3656 const struct variable *var = nest->vars[i];
3657 const union value *value = case_data (c, var);
3659 bool var_missing = var_is_value_missing (var, value) != 0;
3663 cats[a][i] = ctables_categories_match (
3664 s->table->categories[var_get_dict_index (var)], value, var);
3670 static const struct ctables_category cct_excluded_missing = {
3671 .type = CCT_EXCLUDED_MISSING,
3674 cats[a][i] = &cct_excluded_missing;
3675 excluded_missing = true;
3680 if (!excluded_missing)
3681 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3683 const struct ctables_nest *nest = s->nests[a];
3684 for (size_t i = 0; i < nest->n; i++)
3685 if (i != nest->scale_idx)
3687 const struct variable *var = nest->vars[i];
3688 const union value *value = case_data (c, var);
3689 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3693 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3694 d_weight, e_weight);
3696 //if (!excluded_missing)
3698 recurse_totals (s, c, cats, is_missing, excluded_missing,
3699 d_weight, e_weight, 0, 0);
3700 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3701 d_weight, e_weight, 0, 0);
3707 const struct ctables_summary_spec_set *set;
3712 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3714 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3715 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3716 if (as->function != bs->function)
3717 return as->function > bs->function ? 1 : -1;
3718 else if (as->percentile != bs->percentile)
3719 return as->percentile < bs->percentile ? 1 : -1;
3721 const char *as_label = as->label ? as->label : "";
3722 const char *bs_label = bs->label ? bs->label : "";
3723 return strcmp (as_label, bs_label);
3726 static struct pivot_value *
3727 ctables_category_create_label__ (const struct ctables_category *cat,
3728 const struct variable *var,
3729 const union value *value)
3731 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3732 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3733 : pivot_value_new_var_value (var, value));
3736 static struct pivot_value *
3737 ctables_postcompute_label (const struct ctables_categories *cats,
3738 const struct ctables_category *cat,
3739 const struct variable *var,
3740 const union value *value)
3742 struct substring in = ss_cstr (cat->pc->label);
3743 struct substring target = ss_cstr (")LABEL[");
3745 struct string out = DS_EMPTY_INITIALIZER;
3748 size_t chunk = ss_find_substring (in, target);
3749 if (chunk == SIZE_MAX)
3751 if (ds_is_empty (&out))
3752 return pivot_value_new_user_text (in.string, in.length);
3755 ds_put_substring (&out, in);
3756 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3760 ds_put_substring (&out, ss_head (in, chunk));
3761 ss_advance (&in, chunk + target.length);
3763 struct substring idx_s;
3764 if (!ss_get_until (&in, ']', &idx_s))
3767 long int idx = strtol (idx_s.string, &tail, 10);
3768 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3771 struct ctables_category *cat2 = &cats->cats[idx - 1];
3772 struct pivot_value *label2
3773 = ctables_category_create_label__ (cat2, var, value);
3774 char *label2_s = pivot_value_to_string_defaults (label2);
3775 ds_put_cstr (&out, label2_s);
3777 pivot_value_destroy (label2);
3782 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3785 static struct pivot_value *
3786 ctables_category_create_label (const struct ctables_categories *cats,
3787 const struct ctables_category *cat,
3788 const struct variable *var,
3789 const union value *value)
3791 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3792 ? ctables_postcompute_label (cats, cat, var, value)
3793 : ctables_category_create_label__ (cat, var, value));
3796 static struct ctables_value *
3797 ctables_value_find__ (struct ctables_table *t, const union value *value,
3798 int width, unsigned int hash)
3800 struct ctables_value *clv;
3801 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3802 hash, &t->clabels_values_map)
3803 if (value_equal (value, &clv->value, width))
3809 ctables_value_insert (struct ctables_table *t, const union value *value,
3812 unsigned int hash = value_hash (value, width, 0);
3813 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3816 clv = xmalloc (sizeof *clv);
3817 value_clone (&clv->value, value, width);
3818 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3822 static struct ctables_value *
3823 ctables_value_find (struct ctables_table *t,
3824 const union value *value, int width)
3826 return ctables_value_find__ (t, value, width,
3827 value_hash (value, width, 0));
3831 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3832 size_t ix[PIVOT_N_AXES])
3834 if (a < PIVOT_N_AXES)
3836 size_t limit = MAX (t->stacks[a].n, 1);
3837 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3838 ctables_table_add_section (t, a + 1, ix);
3842 struct ctables_section *s = &t->sections[t->n_sections++];
3843 *s = (struct ctables_section) {
3845 .cells = HMAP_INITIALIZER (s->cells),
3847 for (a = 0; a < PIVOT_N_AXES; a++)
3850 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3852 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3853 for (size_t i = 0; i < nest->n; i++)
3854 hmap_init (&s->occurrences[a][i]);
3856 for (size_t i = 0; i < N_CTDTS; i++)
3857 hmap_init (&s->domains[i]);
3862 ctpo_add (double a, double b)
3868 ctpo_sub (double a, double b)
3874 ctpo_mul (double a, double b)
3880 ctpo_div (double a, double b)
3882 return b ? a / b : SYSMIS;
3886 ctpo_pow (double a, double b)
3888 int save_errno = errno;
3890 double result = pow (a, b);
3898 ctpo_neg (double a, double b UNUSED)
3903 struct ctables_pcexpr_evaluate_ctx
3905 const struct ctables_cell *cell;
3906 const struct ctables_section *section;
3907 const struct ctables_categories *cats;
3908 enum pivot_axis_type pc_a;
3913 static double ctables_pcexpr_evaluate (
3914 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3917 ctables_pcexpr_evaluate_nonterminal (
3918 const struct ctables_pcexpr_evaluate_ctx *ctx,
3919 const struct ctables_pcexpr *e, size_t n_args,
3920 double evaluate (double, double))
3922 double args[2] = { 0, 0 };
3923 for (size_t i = 0; i < n_args; i++)
3925 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3926 if (!isfinite (args[i]) || args[i] == SYSMIS)
3929 return evaluate (args[0], args[1]);
3933 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3934 const struct ctables_cell_value *pc_cv)
3936 const struct ctables_section *s = ctx->section;
3939 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3941 const struct ctables_nest *nest = s->nests[a];
3942 for (size_t i = 0; i < nest->n; i++)
3943 if (i != nest->scale_idx)
3945 const struct ctables_cell_value *cv
3946 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3947 : &ctx->cell->axes[a].cvs[i]);
3948 hash = hash_pointer (cv->category, hash);
3949 if (cv->category->type != CCT_TOTAL
3950 && cv->category->type != CCT_SUBTOTAL
3951 && cv->category->type != CCT_POSTCOMPUTE)
3952 hash = value_hash (&cv->value,
3953 var_get_width (nest->vars[i]), hash);
3957 struct ctables_cell *tc;
3958 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3960 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3962 const struct ctables_nest *nest = s->nests[a];
3963 for (size_t i = 0; i < nest->n; i++)
3964 if (i != nest->scale_idx)
3966 const struct ctables_cell_value *p_cv
3967 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3968 : &ctx->cell->axes[a].cvs[i]);
3969 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3970 if (p_cv->category != t_cv->category
3971 || (p_cv->category->type != CCT_TOTAL
3972 && p_cv->category->type != CCT_SUBTOTAL
3973 && p_cv->category->type != CCT_POSTCOMPUTE
3974 && !value_equal (&p_cv->value,
3976 var_get_width (nest->vars[i]))))
3988 const struct ctables_table *t = s->table;
3989 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3990 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3991 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
3992 &specs->specs[ctx->summary_idx]);
3996 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3997 const struct ctables_pcexpr *e)
4004 case CTPO_CAT_NRANGE:
4005 case CTPO_CAT_SRANGE:
4007 struct ctables_cell_value cv = {
4008 .category = ctables_find_category_for_postcompute (ctx->cats, e)
4010 assert (cv.category != NULL);
4012 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
4013 const struct ctables_occurrence *o;
4016 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
4017 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4018 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
4020 cv.value = o->value;
4021 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4026 case CTPO_CAT_NUMBER:
4027 case CTPO_CAT_STRING:
4028 case CTPO_CAT_MISSING:
4029 case CTPO_CAT_OTHERNM:
4030 case CTPO_CAT_SUBTOTAL:
4031 case CTPO_CAT_TOTAL:
4033 struct ctables_cell_value cv = {
4034 .category = ctables_find_category_for_postcompute (ctx->cats, e),
4035 .value = { .f = e->number },
4037 assert (cv.category != NULL);
4038 return ctables_pcexpr_evaluate_category (ctx, &cv);
4042 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4045 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4048 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4051 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4054 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4057 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4063 /* XXX what if there is a postcompute in more than one dimension?? */
4064 static const struct ctables_postcompute *
4065 ctables_cell_postcompute (const struct ctables_section *s,
4066 const struct ctables_cell *cell,
4067 enum pivot_axis_type *pc_a_p,
4070 assert (cell->postcompute);
4071 for (enum pivot_axis_type pc_a = 0; ; pc_a++)
4073 assert (pc_a < PIVOT_N_AXES);
4074 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4076 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4077 if (cv->category->type == CCT_POSTCOMPUTE)
4082 *pc_a_idx_p = pc_a_idx;
4083 return cv->category->pc;
4092 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4093 const struct ctables_cell *cell,
4094 const struct ctables_summary_spec *ss,
4095 struct fmt_spec *format,
4096 bool *is_ctables_format,
4099 enum pivot_axis_type pc_a;
4101 const struct ctables_postcompute *pc = ctables_cell_postcompute (
4102 s, cell, &pc_a, &pc_a_idx);
4106 for (size_t i = 0; i < pc->specs->n; i++)
4108 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4109 if (ss->function == ss2->function
4110 && ss->percentile == ss2->percentile)
4112 *format = ss2->format;
4113 *is_ctables_format = ss2->is_ctables_format;
4119 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4120 const struct ctables_categories *cats = s->table->categories[
4121 var_get_dict_index (var)];
4122 struct ctables_pcexpr_evaluate_ctx ctx = {
4127 .pc_a_idx = pc_a_idx,
4128 .summary_idx = summary_idx,
4130 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4134 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4136 struct pivot_table *pt = pivot_table_create__ (
4138 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4139 : pivot_value_new_text (N_("Custom Tables"))),
4142 pivot_table_set_caption (
4143 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4145 pivot_table_set_corner_text (
4146 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4148 bool summary_dimension = (t->summary_axis != t->slabels_axis
4149 || (!t->slabels_visible
4150 && t->summary_specs.n > 1));
4151 if (summary_dimension)
4153 struct pivot_dimension *d = pivot_dimension_create (
4154 pt, t->slabels_axis, N_("Statistics"));
4155 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4156 if (!t->slabels_visible)
4157 d->hide_all_labels = true;
4158 for (size_t i = 0; i < specs->n; i++)
4159 pivot_category_create_leaf (
4160 d->root, ctables_summary_label (&specs->specs[i], t->cilevel));
4163 bool categories_dimension = t->clabels_example != NULL;
4164 if (categories_dimension)
4166 struct pivot_dimension *d = pivot_dimension_create (
4167 pt, t->label_axis[t->clabels_from_axis],
4168 t->clabels_from_axis == PIVOT_AXIS_ROW
4169 ? N_("Row Categories")
4170 : N_("Column Categories"));
4171 const struct variable *var = t->clabels_example;
4172 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4173 for (size_t i = 0; i < t->n_clabels_values; i++)
4175 const struct ctables_value *value = t->clabels_values[i];
4176 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4177 assert (cat != NULL);
4178 pivot_category_create_leaf (d->root, ctables_category_create_label (
4179 c, cat, t->clabels_example,
4184 pivot_table_set_look (pt, ct->look);
4185 struct pivot_dimension *d[PIVOT_N_AXES];
4186 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4188 static const char *names[] = {
4189 [PIVOT_AXIS_ROW] = N_("Rows"),
4190 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4191 [PIVOT_AXIS_LAYER] = N_("Layers"),
4193 d[a] = (t->axes[a] || a == t->summary_axis
4194 ? pivot_dimension_create (pt, a, names[a])
4199 assert (t->axes[a]);
4201 for (size_t i = 0; i < t->stacks[a].n; i++)
4203 struct ctables_nest *nest = &t->stacks[a].nests[i];
4204 struct ctables_section **sections = xnmalloc (t->n_sections,
4206 size_t n_sections = 0;
4208 size_t n_total_cells = 0;
4209 size_t max_depth = 0;
4210 for (size_t j = 0; j < t->n_sections; j++)
4211 if (t->sections[j].nests[a] == nest)
4213 struct ctables_section *s = &t->sections[j];
4214 sections[n_sections++] = s;
4215 n_total_cells += s->cells.count;
4217 size_t depth = s->nests[a]->n;
4218 max_depth = MAX (depth, max_depth);
4221 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4223 size_t n_sorted = 0;
4225 for (size_t j = 0; j < n_sections; j++)
4227 struct ctables_section *s = sections[j];
4229 struct ctables_cell *cell;
4230 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4232 sorted[n_sorted++] = cell;
4233 assert (n_sorted <= n_total_cells);
4236 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4237 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4240 for (size_t j = 0; j < n_sorted; j++)
4242 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4247 struct ctables_level
4249 enum ctables_level_type
4251 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4252 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4253 CTL_SUMMARY, /* Summary functions. */
4257 enum settings_value_show vlabel; /* CTL_VAR only. */
4260 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4261 size_t n_levels = 0;
4262 for (size_t k = 0; k < nest->n; k++)
4264 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4265 if (vlabel != CTVL_NONE)
4267 levels[n_levels++] = (struct ctables_level) {
4269 .vlabel = (enum settings_value_show) vlabel,
4274 if (nest->scale_idx != k
4275 && (k != nest->n - 1 || t->label_axis[a] == a))
4277 levels[n_levels++] = (struct ctables_level) {
4278 .type = CTL_CATEGORY,
4284 if (!summary_dimension && a == t->slabels_axis)
4286 levels[n_levels++] = (struct ctables_level) {
4287 .type = CTL_SUMMARY,
4288 .var_idx = SIZE_MAX,
4292 /* Pivot categories:
4294 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4295 - category for nest->vars[0], if nest->scale_idx != 0
4296 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4297 - category for nest->vars[1], if nest->scale_idx != 1
4299 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4300 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4301 - summary function, if 'a == t->slabels_axis && a ==
4304 Additional dimensions:
4306 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4308 - If 't->label_axis[b] == a' for some 'b != a', add a category
4313 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4315 for (size_t j = 0; j < n_sorted; j++)
4317 struct ctables_cell *cell = sorted[j];
4318 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4320 size_t n_common = 0;
4323 for (; n_common < n_levels; n_common++)
4325 const struct ctables_level *level = &levels[n_common];
4326 if (level->type == CTL_CATEGORY)
4328 size_t var_idx = level->var_idx;
4329 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4330 if (prev->axes[a].cvs[var_idx].category != c)
4332 else if (c->type != CCT_SUBTOTAL
4333 && c->type != CCT_TOTAL
4334 && c->type != CCT_POSTCOMPUTE
4335 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4336 &cell->axes[a].cvs[var_idx].value,
4337 var_get_type (nest->vars[var_idx])))
4343 for (size_t k = n_common; k < n_levels; k++)
4345 const struct ctables_level *level = &levels[k];
4346 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4347 if (level->type == CTL_SUMMARY)
4349 assert (k == n_levels - 1);
4351 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4352 for (size_t m = 0; m < specs->n; m++)
4354 int leaf = pivot_category_create_leaf (
4355 parent, ctables_summary_label (&specs->specs[m],
4363 const struct variable *var = nest->vars[level->var_idx];
4364 struct pivot_value *label;
4365 if (level->type == CTL_VAR)
4367 label = pivot_value_new_variable (var);
4368 label->variable.show = level->vlabel;
4370 else if (level->type == CTL_CATEGORY)
4372 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4373 label = ctables_category_create_label (
4374 t->categories[var_get_dict_index (var)],
4375 cv->category, var, &cv->value);
4380 if (k == n_levels - 1)
4381 prev_leaf = pivot_category_create_leaf (parent, label);
4383 groups[k] = pivot_category_create_group__ (parent, label);
4387 cell->axes[a].leaf = prev_leaf;
4394 for (size_t i = 0; i < t->n_sections; i++)
4396 struct ctables_section *s = &t->sections[i];
4398 struct ctables_cell *cell;
4399 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4404 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4405 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4406 for (size_t j = 0; j < specs->n; j++)
4409 size_t n_dindexes = 0;
4411 if (summary_dimension)
4412 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4414 if (categories_dimension)
4416 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4417 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4418 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4419 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4422 dindexes[n_dindexes++] = ctv->leaf;
4425 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4428 int leaf = cell->axes[a].leaf;
4429 if (a == t->summary_axis && !summary_dimension)
4431 dindexes[n_dindexes++] = leaf;
4434 const struct ctables_summary_spec *ss = &specs->specs[j];
4436 struct fmt_spec format = specs->specs[j].format;
4437 bool is_ctables_format = ss->is_ctables_format;
4438 double d = (cell->postcompute
4439 ? ctables_cell_calculate_postcompute (
4440 s, cell, ss, &format, &is_ctables_format, j)
4441 : ctables_summary_value (cell, &cell->summaries[j],
4444 struct pivot_value *value;
4445 if (ct->hide_threshold != 0
4446 && d < ct->hide_threshold
4447 && ctables_summary_function_is_count (ss->function))
4449 value = pivot_value_new_user_text_nocopy (
4450 xasprintf ("<%d", ct->hide_threshold));
4452 else if (d == 0 && ct->zero)
4453 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4454 else if (d == SYSMIS && ct->missing)
4455 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4456 else if (is_ctables_format)
4458 char *s = data_out_stretchy (&(union value) { .f = d },
4460 &ct->ctables_formats, NULL);
4461 value = pivot_value_new_user_text_nocopy (s);
4465 value = pivot_value_new_number (d);
4466 value->numeric.format = format;
4468 pivot_table_put (pt, dindexes, n_dindexes, value);
4473 pivot_table_submit (pt);
4477 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4479 enum pivot_axis_type label_pos = t->label_axis[a];
4483 t->clabels_from_axis = a;
4485 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4486 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4488 const struct ctables_stack *stack = &t->stacks[a];
4492 const struct ctables_nest *n0 = &stack->nests[0];
4494 const struct variable *v0 = n0->vars[n0->n - 1];
4495 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4496 t->clabels_example = v0;
4498 for (size_t i = 0; i < c0->n_cats; i++)
4499 if (c0->cats[i].type == CCT_FUNCTION)
4501 msg (SE, _("%s=%s is not allowed with sorting based "
4502 "on a summary function."),
4503 subcommand_name, pos_name);
4506 if (n0->n - 1 == n0->scale_idx)
4508 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4509 "but %s is a scale variable."),
4510 subcommand_name, pos_name, var_get_name (v0));
4514 for (size_t i = 1; i < stack->n; i++)
4516 const struct ctables_nest *ni = &stack->nests[i];
4518 const struct variable *vi = ni->vars[ni->n - 1];
4519 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4521 if (ni->n - 1 == ni->scale_idx)
4523 msg (SE, _("%s=%s requires the variables to be moved to be "
4524 "categorical, but %s is a scale variable."),
4525 subcommand_name, pos_name, var_get_name (vi));
4528 if (var_get_width (v0) != var_get_width (vi))
4530 msg (SE, _("%s=%s requires the variables to be "
4531 "moved to have the same width, but %s has "
4532 "width %d and %s has width %d."),
4533 subcommand_name, pos_name,
4534 var_get_name (v0), var_get_width (v0),
4535 var_get_name (vi), var_get_width (vi));
4538 if (!val_labs_equal (var_get_value_labels (v0),
4539 var_get_value_labels (vi)))
4541 msg (SE, _("%s=%s requires the variables to be "
4542 "moved to have the same value labels, but %s "
4543 "and %s have different value labels."),
4544 subcommand_name, pos_name,
4545 var_get_name (v0), var_get_name (vi));
4548 if (!ctables_categories_equal (c0, ci))
4550 msg (SE, _("%s=%s requires the variables to be "
4551 "moved to have the same category "
4552 "specifications, but %s and %s have different "
4553 "category specifications."),
4554 subcommand_name, pos_name,
4555 var_get_name (v0), var_get_name (vi));
4564 ctables_prepare_table (struct ctables_table *t)
4566 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4569 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4571 for (size_t j = 0; j < t->stacks[a].n; j++)
4573 struct ctables_nest *nest = &t->stacks[a].nests[j];
4574 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4576 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4577 nest->n_domains[dt] = 0;
4579 for (size_t k = 0; k < nest->n; k++)
4581 if (k == nest->scale_idx)
4590 if (a != PIVOT_AXIS_LAYER)
4597 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4598 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4599 : a == PIVOT_AXIS_ROW)
4601 if (k == nest->n - 1
4602 || (nest->scale_idx == nest->n - 1
4603 && k == nest->n - 2))
4609 if (a == PIVOT_AXIS_COLUMN)
4614 if (a == PIVOT_AXIS_ROW)
4619 nest->domains[dt][nest->n_domains[dt]++] = k;
4626 struct ctables_nest *nest = xmalloc (sizeof *nest);
4627 *nest = (struct ctables_nest) { .n = 0 };
4628 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4631 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4632 for (size_t i = 0; i < stack->n; i++)
4634 struct ctables_nest *nest = &stack->nests[i];
4635 if (!nest->specs[CSV_CELL].n)
4637 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4638 specs->specs = xmalloc (sizeof *specs->specs);
4641 enum ctables_summary_function function
4642 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4644 *specs->specs = (struct ctables_summary_spec) {
4645 .function = function,
4646 .format = ctables_summary_default_format (function, specs->var),
4649 specs->var = nest->vars[0];
4651 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4652 &nest->specs[CSV_CELL]);
4654 else if (!nest->specs[CSV_TOTAL].n)
4655 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4656 &nest->specs[CSV_CELL]);
4658 if (t->ctables->smissing_listwise)
4660 struct variable **listwise_vars = NULL;
4662 size_t allocated = 0;
4664 for (size_t j = nest->group_head; j < stack->n; j++)
4666 const struct ctables_nest *other_nest = &stack->nests[j];
4667 if (other_nest->group_head != nest->group_head)
4670 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4673 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4674 sizeof *listwise_vars);
4675 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4678 for (size_t j = 0; j < N_CSVS; j++)
4680 nest->specs[j].listwise_vars = listwise_vars;
4681 nest->specs[j].n_listwise_vars = n;
4686 struct ctables_summary_spec_set *merged = &t->summary_specs;
4687 struct merge_item *items = xnmalloc (2 * stack->n, sizeof *items);
4689 for (size_t j = 0; j < stack->n; j++)
4691 const struct ctables_nest *nest = &stack->nests[j];
4693 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4694 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4699 struct merge_item min = items[0];
4700 for (size_t j = 1; j < n_left; j++)
4701 if (merge_item_compare_3way (&items[j], &min) < 0)
4704 if (merged->n >= merged->allocated)
4705 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4706 sizeof *merged->specs);
4707 merged->specs[merged->n++] = min.set->specs[min.ofs];
4709 for (size_t j = 0; j < n_left; )
4711 if (merge_item_compare_3way (&items[j], &min) == 0)
4713 struct merge_item *item = &items[j];
4714 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4715 if (++item->ofs >= item->set->n)
4717 items[j] = items[--n_left];
4726 for (size_t j = 0; j < merged->n; j++)
4727 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4729 for (size_t j = 0; j < stack->n; j++)
4731 const struct ctables_nest *nest = &stack->nests[j];
4732 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4734 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4735 for (size_t k = 0; k < specs->n; k++)
4736 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4737 specs->specs[k].axis_idx);
4743 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4744 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4748 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4749 enum pivot_axis_type a)
4751 struct ctables_stack *stack = &t->stacks[a];
4752 for (size_t i = 0; i < stack->n; i++)
4754 const struct ctables_nest *nest = &stack->nests[i];
4755 const struct variable *var = nest->vars[nest->n - 1];
4756 const union value *value = case_data (c, var);
4758 if (var_is_numeric (var) && value->f == SYSMIS)
4761 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4763 ctables_value_insert (t, value, var_get_width (var));
4768 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4770 const struct ctables_value *const *ap = a_;
4771 const struct ctables_value *const *bp = b_;
4772 const struct ctables_value *a = *ap;
4773 const struct ctables_value *b = *bp;
4774 const int *width = width_;
4775 return value_compare_3way (&a->value, &b->value, *width);
4779 ctables_sort_clabels_values (struct ctables_table *t)
4781 const struct variable *v0 = t->clabels_example;
4782 int width = var_get_width (v0);
4784 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4787 const struct val_labs *val_labs = var_get_value_labels (v0);
4788 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4789 vl = val_labs_next (val_labs, vl))
4790 if (ctables_categories_match (c0, &vl->value, v0))
4791 ctables_value_insert (t, &vl->value, width);
4794 size_t n = hmap_count (&t->clabels_values_map);
4795 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4797 struct ctables_value *clv;
4799 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4800 t->clabels_values[i++] = clv;
4801 t->n_clabels_values = n;
4804 sort (t->clabels_values, n, sizeof *t->clabels_values,
4805 compare_clabels_values_3way, &width);
4807 for (size_t i = 0; i < n; i++)
4808 t->clabels_values[i]->leaf = i;
4812 ctables_add_category_occurrences (const struct variable *var,
4813 struct hmap *occurrences,
4814 const struct ctables_categories *cats)
4816 const struct val_labs *val_labs = var_get_value_labels (var);
4818 for (size_t i = 0; i < cats->n_cats; i++)
4820 const struct ctables_category *c = &cats->cats[i];
4824 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4830 int width = var_get_width (var);
4832 value_init (&value, width);
4833 value_copy_buf_rpad (&value, width,
4834 CHAR_CAST (uint8_t *, c->string.string),
4835 c->string.length, ' ');
4836 ctables_add_occurrence (var, &value, occurrences);
4837 value_destroy (&value, width);
4842 assert (var_is_numeric (var));
4843 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4844 vl = val_labs_next (val_labs, vl))
4845 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
4846 ctables_add_occurrence (var, &vl->value, occurrences);
4850 assert (var_is_alpha (var));
4851 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4852 vl = val_labs_next (val_labs, vl))
4853 if (in_string_range (&vl->value, var, c->srange))
4854 ctables_add_occurrence (var, &vl->value, occurrences);
4858 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4859 vl = val_labs_next (val_labs, vl))
4860 if (var_is_value_missing (var, &vl->value))
4861 ctables_add_occurrence (var, &vl->value, occurrences);
4865 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4866 vl = val_labs_next (val_labs, vl))
4867 ctables_add_occurrence (var, &vl->value, occurrences);
4870 case CCT_POSTCOMPUTE:
4880 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4881 vl = val_labs_next (val_labs, vl))
4882 if (c->include_missing || !var_is_value_missing (var, &vl->value))
4883 ctables_add_occurrence (var, &vl->value, occurrences);
4886 case CCT_EXCLUDED_MISSING:
4893 ctables_section_recurse_add_empty_categories (
4894 struct ctables_section *s,
4895 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
4896 enum pivot_axis_type a, size_t a_idx)
4898 if (a >= PIVOT_N_AXES)
4899 ctables_cell_insert__ (s, c, cats);
4900 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
4901 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
4904 const struct variable *var = s->nests[a]->vars[a_idx];
4905 const struct ctables_categories *categories = s->table->categories[
4906 var_get_dict_index (var)];
4907 int width = var_get_width (var);
4908 const struct hmap *occurrences = &s->occurrences[a][a_idx];
4909 const struct ctables_occurrence *o;
4910 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4912 union value *value = case_data_rw (c, var);
4913 value_destroy (value, width);
4914 value_clone (value, &o->value, width);
4915 cats[a][a_idx] = ctables_categories_match (categories, value, var);
4916 assert (cats[a][a_idx] != NULL);
4917 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4920 for (size_t i = 0; i < categories->n_cats; i++)
4922 const struct ctables_category *cat = &categories->cats[i];
4923 if (cat->type == CCT_POSTCOMPUTE)
4925 cats[a][a_idx] = cat;
4926 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4933 ctables_section_add_empty_categories (struct ctables_section *s)
4935 bool show_empty = false;
4936 for (size_t a = 0; a < PIVOT_N_AXES; a++)
4938 for (size_t k = 0; k < s->nests[a]->n; k++)
4939 if (k != s->nests[a]->scale_idx)
4941 const struct variable *var = s->nests[a]->vars[k];
4942 const struct ctables_categories *cats = s->table->categories[
4943 var_get_dict_index (var)];
4944 if (cats->show_empty)
4947 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
4953 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
4954 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
4955 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
4960 ctables_execute (struct dataset *ds, struct ctables *ct)
4962 for (size_t i = 0; i < ct->n_tables; i++)
4964 struct ctables_table *t = ct->tables[i];
4965 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
4966 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
4967 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
4968 sizeof *t->sections);
4969 size_t ix[PIVOT_N_AXES];
4970 ctables_table_add_section (t, 0, ix);
4973 struct casereader *input = proc_open (ds);
4974 bool warn_on_invalid = true;
4975 for (struct ccase *c = casereader_read (input); c;
4976 case_unref (c), c = casereader_read (input))
4978 double d_weight = dict_get_case_weight (dataset_dict (ds), c,
4980 double e_weight = (ct->e_weight
4981 ? var_force_valid_weight (ct->e_weight,
4982 case_num (c, ct->e_weight),
4986 for (size_t i = 0; i < ct->n_tables; i++)
4988 struct ctables_table *t = ct->tables[i];
4990 for (size_t j = 0; j < t->n_sections; j++)
4991 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
4993 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4994 if (t->label_axis[a] != a)
4995 ctables_insert_clabels_values (t, c, a);
4998 casereader_destroy (input);
5000 for (size_t i = 0; i < ct->n_tables; i++)
5002 struct ctables_table *t = ct->tables[i];
5004 if (t->clabels_example)
5005 ctables_sort_clabels_values (t);
5007 for (size_t j = 0; j < t->n_sections; j++)
5008 ctables_section_add_empty_categories (&t->sections[j]);
5010 ctables_table_output (ct, ct->tables[i]);
5012 return proc_commit (ds);
5017 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
5018 struct dictionary *);
5021 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5027 case CTPO_CAT_STRING:
5028 ss_dealloc (&e->string);
5031 case CTPO_CAT_SRANGE:
5032 for (size_t i = 0; i < 2; i++)
5033 ss_dealloc (&e->srange[i]);
5042 for (size_t i = 0; i < 2; i++)
5043 ctables_pcexpr_destroy (e->subs[i]);
5047 case CTPO_CAT_NUMBER:
5048 case CTPO_CAT_NRANGE:
5049 case CTPO_CAT_MISSING:
5050 case CTPO_CAT_OTHERNM:
5051 case CTPO_CAT_SUBTOTAL:
5052 case CTPO_CAT_TOTAL:
5056 msg_location_destroy (e->location);
5061 static struct ctables_pcexpr *
5062 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5063 struct ctables_pcexpr *sub0,
5064 struct ctables_pcexpr *sub1)
5066 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5067 *e = (struct ctables_pcexpr) {
5069 .subs = { sub0, sub1 },
5070 .location = msg_location_merged (sub0->location, sub1->location),
5075 /* How to parse an operator. */
5078 enum token_type token;
5079 enum ctables_postcompute_op op;
5082 static const struct operator *
5083 ctable_pcexpr_match_operator (struct lexer *lexer,
5084 const struct operator ops[], size_t n_ops)
5086 for (const struct operator *op = ops; op < ops + n_ops; op++)
5087 if (lex_token (lexer) == op->token)
5089 if (op->token != T_NEG_NUM)
5098 static struct ctables_pcexpr *
5099 ctable_pcexpr_parse_binary_operators__ (
5100 struct lexer *lexer, struct dictionary *dict,
5101 const struct operator ops[], size_t n_ops,
5102 parse_recursively_func *parse_next_level,
5103 const char *chain_warning, struct ctables_pcexpr *lhs)
5105 for (int op_count = 0; ; op_count++)
5107 const struct operator *op
5108 = ctable_pcexpr_match_operator (lexer, ops, n_ops);
5111 if (op_count > 1 && chain_warning)
5112 msg_at (SW, lhs->location, "%s", chain_warning);
5117 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5120 ctables_pcexpr_destroy (lhs);
5124 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5128 static struct ctables_pcexpr *
5129 ctable_pcexpr_parse_binary_operators (struct lexer *lexer,
5130 struct dictionary *dict,
5131 const struct operator ops[], size_t n_ops,
5132 parse_recursively_func *parse_next_level,
5133 const char *chain_warning)
5135 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5139 return ctable_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5141 chain_warning, lhs);
5144 static struct ctables_pcexpr *ctable_pcexpr_parse_add (struct lexer *,
5145 struct dictionary *);
5147 static struct ctables_pcexpr
5148 ctpo_cat_nrange (double low, double high)
5150 return (struct ctables_pcexpr) {
5151 .op = CTPO_CAT_NRANGE,
5152 .nrange = { low, high },
5156 static struct ctables_pcexpr *
5157 ctable_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5159 int start_ofs = lex_ofs (lexer);
5160 struct ctables_pcexpr e;
5161 if (lex_is_number (lexer))
5163 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5164 .number = lex_number (lexer) };
5167 else if (lex_match_id (lexer, "MISSING"))
5168 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5169 else if (lex_match_id (lexer, "OTHERNM"))
5170 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5171 else if (lex_match_id (lexer, "TOTAL"))
5172 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5173 else if (lex_match_id (lexer, "SUBTOTAL"))
5175 size_t subtotal_index = 0;
5176 if (lex_match (lexer, T_LBRACK))
5178 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5180 subtotal_index = lex_integer (lexer);
5182 if (!lex_force_match (lexer, T_RBRACK))
5185 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5186 .subtotal_index = subtotal_index };
5188 else if (lex_match (lexer, T_LBRACK))
5190 if (lex_match_id (lexer, "LO"))
5192 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
5194 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5197 else if (lex_is_number (lexer))
5199 double number = lex_number (lexer);
5201 if (lex_match_id (lexer, "THRU"))
5203 if (lex_match_id (lexer, "HI"))
5204 e = ctpo_cat_nrange (number, DBL_MAX);
5207 if (!lex_force_num (lexer))
5209 e = ctpo_cat_nrange (number, lex_number (lexer));
5214 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5217 else if (lex_is_string (lexer))
5219 struct substring s = recode_substring_pool (
5220 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
5221 ss_rtrim (&s, ss_cstr (" "));
5223 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5228 lex_error (lexer, NULL);
5232 if (!lex_force_match (lexer, T_RBRACK))
5234 if (e.op == CTPO_CAT_STRING)
5235 ss_dealloc (&e.string);
5239 else if (lex_match (lexer, T_LPAREN))
5241 struct ctables_pcexpr *ep = ctable_pcexpr_parse_add (lexer, dict);
5244 if (!lex_force_match (lexer, T_RPAREN))
5246 ctables_pcexpr_destroy (ep);
5253 lex_error (lexer, NULL);
5257 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5258 return xmemdup (&e, sizeof e);
5261 static struct ctables_pcexpr *
5262 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5263 struct lexer *lexer, int start_ofs)
5265 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5266 *e = (struct ctables_pcexpr) {
5269 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5274 static struct ctables_pcexpr *
5275 ctable_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5277 static const struct operator op = { T_EXP, CTPO_POW };
5279 const char *chain_warning =
5280 _("The exponentiation operator (`**') is left-associative: "
5281 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5282 "To disable this warning, insert parentheses.");
5284 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5285 return ctable_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5286 ctable_pcexpr_parse_primary,
5289 /* Special case for situations like "-5**6", which must be parsed as
5292 int start_ofs = lex_ofs (lexer);
5293 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5294 *lhs = (struct ctables_pcexpr) {
5295 .op = CTPO_CONSTANT,
5296 .number = -lex_tokval (lexer),
5297 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5301 struct ctables_pcexpr *node = ctable_pcexpr_parse_binary_operators__ (
5302 lexer, dict, &op, 1,
5303 ctable_pcexpr_parse_primary, chain_warning, lhs);
5307 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5310 /* Parses the unary minus level. */
5311 static struct ctables_pcexpr *
5312 ctable_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5314 int start_ofs = lex_ofs (lexer);
5315 if (!lex_match (lexer, T_DASH))
5316 return ctable_pcexpr_parse_exp (lexer, dict);
5318 struct ctables_pcexpr *inner = ctable_pcexpr_parse_neg (lexer, dict);
5322 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5325 /* Parses the multiplication and division level. */
5326 static struct ctables_pcexpr *
5327 ctable_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5329 static const struct operator ops[] =
5331 { T_ASTERISK, CTPO_MUL },
5332 { T_SLASH, CTPO_DIV },
5335 return ctable_pcexpr_parse_binary_operators (lexer, dict, ops,
5336 sizeof ops / sizeof *ops,
5337 ctable_pcexpr_parse_neg, NULL);
5340 /* Parses the addition and subtraction level. */
5341 static struct ctables_pcexpr *
5342 ctable_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5344 static const struct operator ops[] =
5346 { T_PLUS, CTPO_ADD },
5347 { T_DASH, CTPO_SUB },
5348 { T_NEG_NUM, CTPO_ADD },
5351 return ctable_pcexpr_parse_binary_operators (lexer, dict,
5352 ops, sizeof ops / sizeof *ops,
5353 ctable_pcexpr_parse_mul, NULL);
5356 static struct ctables_postcompute *
5357 ctables_find_postcompute (struct ctables *ct, const char *name)
5359 struct ctables_postcompute *pc;
5360 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5361 utf8_hash_case_string (name, 0), &ct->postcomputes)
5362 if (!utf8_strcasecmp (pc->name, name))
5368 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5371 int pcompute_start = lex_ofs (lexer) - 1;
5373 if (!lex_force_match (lexer, T_AND) || !lex_force_id (lexer))
5376 char *name = ss_xstrdup (lex_tokss (lexer));
5379 if (!lex_force_match (lexer, T_EQUALS)
5380 || !lex_force_match_id (lexer, "EXPR")
5381 || !lex_force_match (lexer, T_LPAREN))
5387 int expr_start = lex_ofs (lexer);
5388 struct ctables_pcexpr *expr = ctable_pcexpr_parse_add (lexer, dict);
5389 int expr_end = lex_ofs (lexer) - 1;
5390 if (!expr || !lex_force_match (lexer, T_RPAREN))
5395 int pcompute_end = lex_ofs (lexer) - 1;
5397 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5400 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5403 msg_at (SW, location, _("New definition of &%s will override the "
5404 "previous definition."),
5406 msg_at (SN, pc->location, _("This is the previous definition."));
5408 ctables_pcexpr_destroy (pc->expr);
5409 msg_location_destroy (pc->location);
5414 pc = xmalloc (sizeof *pc);
5415 *pc = (struct ctables_postcompute) { .name = name };
5416 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5417 utf8_hash_case_string (pc->name, 0));
5420 pc->location = location;
5422 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5427 ctables_parse_pproperties_format (struct lexer *lexer,
5428 struct ctables_summary_spec_set *sss)
5430 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5432 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5433 && !(lex_token (lexer) == T_ID
5434 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5435 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5436 lex_tokss (lexer)))))
5438 /* Parse function. */
5439 enum ctables_summary_function function;
5440 if (!parse_ctables_summary_function (lexer, &function))
5443 /* Parse percentile. */
5444 double percentile = 0;
5445 if (function == CTSF_PTILE)
5447 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5449 percentile = lex_number (lexer);
5454 struct fmt_spec format;
5455 bool is_ctables_format;
5456 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5459 if (sss->n >= sss->allocated)
5460 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5461 sizeof *sss->specs);
5462 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5463 .function = function,
5464 .percentile = percentile,
5466 .is_ctables_format = is_ctables_format,
5472 ctables_summary_spec_set_uninit (sss);
5477 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5479 struct ctables_postcompute **pcs = NULL;
5481 size_t allocated_pcs = 0;
5483 while (lex_match (lexer, T_AND))
5485 if (!lex_force_id (lexer))
5487 struct ctables_postcompute *pc
5488 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5491 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5496 if (n_pcs >= allocated_pcs)
5497 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5501 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5503 if (lex_match_id (lexer, "LABEL"))
5505 lex_match (lexer, T_EQUALS);
5506 if (!lex_force_string (lexer))
5509 for (size_t i = 0; i < n_pcs; i++)
5511 free (pcs[i]->label);
5512 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5517 else if (lex_match_id (lexer, "FORMAT"))
5519 lex_match (lexer, T_EQUALS);
5521 struct ctables_summary_spec_set sss;
5522 if (!ctables_parse_pproperties_format (lexer, &sss))
5525 for (size_t i = 0; i < n_pcs; i++)
5528 ctables_summary_spec_set_uninit (pcs[i]->specs);
5530 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5531 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5533 ctables_summary_spec_set_uninit (&sss);
5535 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5537 lex_match (lexer, T_EQUALS);
5538 bool hide_source_cats;
5539 if (!parse_bool (lexer, &hide_source_cats))
5541 for (size_t i = 0; i < n_pcs; i++)
5542 pcs[i]->hide_source_cats = hide_source_cats;
5546 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5559 put_strftime (struct string *out, time_t now, const char *format)
5561 const struct tm *tm = localtime (&now);
5563 strftime (value, sizeof value, format, tm);
5564 ds_put_cstr (out, value);
5568 skip_prefix (struct substring *s, struct substring prefix)
5570 if (ss_starts_with (*s, prefix))
5572 ss_advance (s, prefix.length);
5580 put_table_expression (struct string *out, struct lexer *lexer,
5581 struct dictionary *dict, int expr_start, int expr_end)
5584 for (int ofs = expr_start; ofs < expr_end; ofs++)
5586 const struct token *t = lex_ofs_token (lexer, ofs);
5587 if (t->type == T_LBRACK)
5589 else if (t->type == T_RBRACK && nest > 0)
5595 else if (t->type == T_ID)
5597 const struct variable *var
5598 = dict_lookup_var (dict, t->string.string);
5599 const char *label = var ? var_get_label (var) : NULL;
5600 ds_put_cstr (out, label ? label : t->string.string);
5604 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5605 ds_put_byte (out, ' ');
5607 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5608 ds_put_cstr (out, repr);
5611 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5612 ds_put_byte (out, ' ');
5618 put_title_text (struct string *out, struct substring in, time_t now,
5619 struct lexer *lexer, struct dictionary *dict,
5620 int expr_start, int expr_end)
5624 size_t chunk = ss_find_byte (in, ')');
5625 ds_put_substring (out, ss_head (in, chunk));
5626 ss_advance (&in, chunk);
5627 if (ss_is_empty (in))
5630 if (skip_prefix (&in, ss_cstr (")DATE")))
5631 put_strftime (out, now, "%x");
5632 else if (skip_prefix (&in, ss_cstr (")TIME")))
5633 put_strftime (out, now, "%X");
5634 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5635 put_table_expression (out, lexer, dict, expr_start, expr_end);
5638 ds_put_byte (out, ')');
5639 ss_advance (&in, 1);
5645 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5647 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5648 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5649 enum settings_value_show tvars = settings_get_show_variables ();
5650 for (size_t i = 0; i < n_vars; i++)
5651 vlabels[i] = (enum ctables_vlabel) tvars;
5653 struct pivot_table_look *look = pivot_table_look_unshare (
5654 pivot_table_look_ref (pivot_table_look_get_default ()));
5655 look->omit_empty = false;
5657 struct ctables *ct = xmalloc (sizeof *ct);
5658 *ct = (struct ctables) {
5659 .dict = dataset_dict (ds),
5661 .ctables_formats = FMT_SETTINGS_INIT,
5663 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5666 time_t now = time (NULL);
5671 const char *dot_string;
5672 const char *comma_string;
5674 static const struct ctf ctfs[4] = {
5675 { CTEF_NEGPAREN, "(,,,)", "(...)" },
5676 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
5677 { CTEF_PAREN, "-,(,),", "-.(.)." },
5678 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
5680 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
5681 for (size_t i = 0; i < 4; i++)
5683 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
5684 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
5685 fmt_number_style_from_string (s));
5688 if (!lex_force_match (lexer, T_SLASH))
5691 while (!lex_match_id (lexer, "TABLE"))
5693 if (lex_match_id (lexer, "FORMAT"))
5695 double widths[2] = { SYSMIS, SYSMIS };
5696 double units_per_inch = 72.0;
5698 while (lex_token (lexer) != T_SLASH)
5700 if (lex_match_id (lexer, "MINCOLWIDTH"))
5702 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
5705 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
5707 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
5710 else if (lex_match_id (lexer, "UNITS"))
5712 lex_match (lexer, T_EQUALS);
5713 if (lex_match_id (lexer, "POINTS"))
5714 units_per_inch = 72.0;
5715 else if (lex_match_id (lexer, "INCHES"))
5716 units_per_inch = 1.0;
5717 else if (lex_match_id (lexer, "CM"))
5718 units_per_inch = 2.54;
5721 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
5725 else if (lex_match_id (lexer, "EMPTY"))
5730 lex_match (lexer, T_EQUALS);
5731 if (lex_match_id (lexer, "ZERO"))
5733 /* Nothing to do. */
5735 else if (lex_match_id (lexer, "BLANK"))
5736 ct->zero = xstrdup ("");
5737 else if (lex_force_string (lexer))
5739 ct->zero = ss_xstrdup (lex_tokss (lexer));
5745 else if (lex_match_id (lexer, "MISSING"))
5747 lex_match (lexer, T_EQUALS);
5748 if (!lex_force_string (lexer))
5752 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
5753 ? ss_xstrdup (lex_tokss (lexer))
5759 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
5760 "UNITS", "EMPTY", "MISSING");
5765 if (widths[0] != SYSMIS && widths[1] != SYSMIS
5766 && widths[0] > widths[1])
5768 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
5772 for (size_t i = 0; i < 2; i++)
5773 if (widths[i] != SYSMIS)
5775 int *wr = ct->look->width_ranges[TABLE_HORZ];
5776 wr[i] = widths[i] / units_per_inch * 96.0;
5781 else if (lex_match_id (lexer, "VLABELS"))
5783 if (!lex_force_match_id (lexer, "VARIABLES"))
5785 lex_match (lexer, T_EQUALS);
5787 struct variable **vars;
5789 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
5793 if (!lex_force_match_id (lexer, "DISPLAY"))
5798 lex_match (lexer, T_EQUALS);
5800 enum ctables_vlabel vlabel;
5801 if (lex_match_id (lexer, "DEFAULT"))
5802 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
5803 else if (lex_match_id (lexer, "NAME"))
5805 else if (lex_match_id (lexer, "LABEL"))
5806 vlabel = CTVL_LABEL;
5807 else if (lex_match_id (lexer, "BOTH"))
5809 else if (lex_match_id (lexer, "NONE"))
5813 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
5819 for (size_t i = 0; i < n_vars; i++)
5820 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
5823 else if (lex_match_id (lexer, "MRSETS"))
5825 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
5827 lex_match (lexer, T_EQUALS);
5828 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
5831 else if (lex_match_id (lexer, "SMISSING"))
5833 if (lex_match_id (lexer, "VARIABLE"))
5834 ct->smissing_listwise = false;
5835 else if (lex_match_id (lexer, "LISTWISE"))
5836 ct->smissing_listwise = true;
5839 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
5843 else if (lex_match_id (lexer, "PCOMPUTE"))
5845 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
5848 else if (lex_match_id (lexer, "PPROPERTIES"))
5850 if (!ctables_parse_pproperties (lexer, ct))
5853 else if (lex_match_id (lexer, "WEIGHT"))
5855 if (!lex_force_match_id (lexer, "VARIABLE"))
5857 lex_match (lexer, T_EQUALS);
5858 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
5862 else if (lex_match_id (lexer, " HIDESMALLCOUNTS"))
5864 if (lex_match_id (lexer, "COUNT"))
5866 lex_match (lexer, T_EQUALS);
5867 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
5870 ct->hide_threshold = lex_integer (lexer);
5873 else if (ct->hide_threshold == 0)
5874 ct->hide_threshold = 5;
5878 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
5879 "SMISSING", "PCOMPUTE", "PPROPERTIES",
5880 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
5884 if (!lex_force_match (lexer, T_SLASH))
5888 size_t allocated_tables = 0;
5891 if (ct->n_tables >= allocated_tables)
5892 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
5893 sizeof *ct->tables);
5895 struct ctables_category *cat = xmalloc (sizeof *cat);
5896 *cat = (struct ctables_category) {
5898 .include_missing = false,
5899 .sort_ascending = true,
5902 struct ctables_categories *c = xmalloc (sizeof *c);
5903 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5904 *c = (struct ctables_categories) {
5911 struct ctables_categories **categories = xnmalloc (n_vars,
5912 sizeof *categories);
5913 for (size_t i = 0; i < n_vars; i++)
5916 struct ctables_table *t = xmalloc (sizeof *t);
5917 *t = (struct ctables_table) {
5919 .slabels_axis = PIVOT_AXIS_COLUMN,
5920 .slabels_visible = true,
5921 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
5923 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
5924 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
5925 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
5927 .clabels_from_axis = PIVOT_AXIS_LAYER,
5928 .categories = categories,
5929 .n_categories = n_vars,
5932 ct->tables[ct->n_tables++] = t;
5934 lex_match (lexer, T_EQUALS);
5935 int expr_start = lex_ofs (lexer);
5936 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
5938 if (lex_match (lexer, T_BY))
5940 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5941 ct, t, PIVOT_AXIS_COLUMN))
5944 if (lex_match (lexer, T_BY))
5946 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5947 ct, t, PIVOT_AXIS_LAYER))
5951 int expr_end = lex_ofs (lexer);
5953 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
5954 && !t->axes[PIVOT_AXIS_LAYER])
5956 lex_error (lexer, _("At least one variable must be specified."));
5960 const struct ctables_axis *scales[PIVOT_N_AXES];
5961 size_t n_scales = 0;
5962 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5964 scales[a] = find_scale (t->axes[a]);
5970 msg (SE, _("Scale variables may appear only on one axis."));
5971 if (scales[PIVOT_AXIS_ROW])
5972 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
5973 _("This scale variable appears on the rows axis."));
5974 if (scales[PIVOT_AXIS_COLUMN])
5975 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
5976 _("This scale variable appears on the columns axis."));
5977 if (scales[PIVOT_AXIS_LAYER])
5978 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
5979 _("This scale variable appears on the layer axis."));
5983 const struct ctables_axis *summaries[PIVOT_N_AXES];
5984 size_t n_summaries = 0;
5985 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5987 summaries[a] = (scales[a]
5989 : find_categorical_summary_spec (t->axes[a]));
5993 if (n_summaries > 1)
5995 msg (SE, _("Summaries may appear only on one axis."));
5996 if (summaries[PIVOT_AXIS_ROW])
5997 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
5998 _("This variable on the rows axis has a summary."));
5999 if (summaries[PIVOT_AXIS_COLUMN])
6000 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
6001 _("This variable on the columns axis has a summary."));
6002 if (summaries[PIVOT_AXIS_LAYER])
6003 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
6004 _("This variable on the layers axis has a summary."));
6007 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
6008 if (n_summaries ? summaries[a] : t->axes[a])
6010 t->summary_axis = a;
6014 if (lex_token (lexer) == T_ENDCMD)
6016 if (!ctables_prepare_table (t))
6020 if (!lex_force_match (lexer, T_SLASH))
6023 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6025 if (lex_match_id (lexer, "SLABELS"))
6027 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6029 if (lex_match_id (lexer, "POSITION"))
6031 lex_match (lexer, T_EQUALS);
6032 if (lex_match_id (lexer, "COLUMN"))
6033 t->slabels_axis = PIVOT_AXIS_COLUMN;
6034 else if (lex_match_id (lexer, "ROW"))
6035 t->slabels_axis = PIVOT_AXIS_ROW;
6036 else if (lex_match_id (lexer, "LAYER"))
6037 t->slabels_axis = PIVOT_AXIS_LAYER;
6040 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6044 else if (lex_match_id (lexer, "VISIBLE"))
6046 lex_match (lexer, T_EQUALS);
6047 if (!parse_bool (lexer, &t->slabels_visible))
6052 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6057 else if (lex_match_id (lexer, "CLABELS"))
6059 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6061 if (lex_match_id (lexer, "AUTO"))
6063 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6064 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6066 else if (lex_match_id (lexer, "ROWLABELS"))
6068 lex_match (lexer, T_EQUALS);
6069 if (lex_match_id (lexer, "OPPOSITE"))
6070 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6071 else if (lex_match_id (lexer, "LAYER"))
6072 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6075 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6079 else if (lex_match_id (lexer, "COLLABELS"))
6081 lex_match (lexer, T_EQUALS);
6082 if (lex_match_id (lexer, "OPPOSITE"))
6083 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6084 else if (lex_match_id (lexer, "LAYER"))
6085 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6088 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6094 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6100 else if (lex_match_id (lexer, "CRITERIA"))
6102 if (!lex_force_match_id (lexer, "CILEVEL"))
6104 lex_match (lexer, T_EQUALS);
6106 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6108 t->cilevel = lex_number (lexer);
6111 else if (lex_match_id (lexer, "CATEGORIES"))
6113 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6117 else if (lex_match_id (lexer, "TITLES"))
6122 if (lex_match_id (lexer, "CAPTION"))
6123 textp = &t->caption;
6124 else if (lex_match_id (lexer, "CORNER"))
6126 else if (lex_match_id (lexer, "TITLE"))
6130 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6133 lex_match (lexer, T_EQUALS);
6135 struct string s = DS_EMPTY_INITIALIZER;
6136 while (lex_is_string (lexer))
6138 if (!ds_is_empty (&s))
6139 ds_put_byte (&s, ' ');
6140 put_title_text (&s, lex_tokss (lexer), now,
6141 lexer, dataset_dict (ds),
6142 expr_start, expr_end);
6146 *textp = ds_steal_cstr (&s);
6148 while (lex_token (lexer) != T_SLASH
6149 && lex_token (lexer) != T_ENDCMD);
6151 else if (lex_match_id (lexer, "SIGTEST"))
6155 t->chisq = xmalloc (sizeof *t->chisq);
6156 *t->chisq = (struct ctables_chisq) {
6158 .include_mrsets = true,
6159 .all_visible = true,
6165 if (lex_match_id (lexer, "TYPE"))
6167 lex_match (lexer, T_EQUALS);
6168 if (!lex_force_match_id (lexer, "CHISQUARE"))
6171 else if (lex_match_id (lexer, "ALPHA"))
6173 lex_match (lexer, T_EQUALS);
6174 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6176 t->chisq->alpha = lex_number (lexer);
6179 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6181 lex_match (lexer, T_EQUALS);
6182 if (parse_bool (lexer, &t->chisq->include_mrsets))
6185 else if (lex_match_id (lexer, "CATEGORIES"))
6187 lex_match (lexer, T_EQUALS);
6188 if (lex_match_id (lexer, "ALLVISIBLE"))
6189 t->chisq->all_visible = true;
6190 else if (lex_match_id (lexer, "SUBTOTALS"))
6191 t->chisq->all_visible = false;
6194 lex_error_expecting (lexer,
6195 "ALLVISIBLE", "SUBTOTALS");
6201 lex_error_expecting (lexer, "TYPE", "ALPHA",
6202 "INCLUDEMRSETS", "CATEGORIES");
6206 while (lex_token (lexer) != T_SLASH
6207 && lex_token (lexer) != T_ENDCMD);
6209 else if (lex_match_id (lexer, "COMPARETEST"))
6213 t->pairwise = xmalloc (sizeof *t->pairwise);
6214 *t->pairwise = (struct ctables_pairwise) {
6216 .alpha = { .05, .05 },
6217 .adjust = BONFERRONI,
6218 .include_mrsets = true,
6219 .meansvariance_allcats = true,
6220 .all_visible = true,
6229 if (lex_match_id (lexer, "TYPE"))
6231 lex_match (lexer, T_EQUALS);
6232 if (lex_match_id (lexer, "PROP"))
6233 t->pairwise->type = PROP;
6234 else if (lex_match_id (lexer, "MEAN"))
6235 t->pairwise->type = MEAN;
6238 lex_error_expecting (lexer, "PROP", "MEAN");
6242 else if (lex_match_id (lexer, "ALPHA"))
6244 lex_match (lexer, T_EQUALS);
6246 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6248 double a0 = lex_number (lexer);
6251 lex_match (lexer, T_COMMA);
6252 if (lex_is_number (lexer))
6254 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6256 double a1 = lex_number (lexer);
6259 t->pairwise->alpha[0] = MIN (a0, a1);
6260 t->pairwise->alpha[1] = MAX (a0, a1);
6263 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6265 else if (lex_match_id (lexer, "ADJUST"))
6267 lex_match (lexer, T_EQUALS);
6268 if (lex_match_id (lexer, "BONFERRONI"))
6269 t->pairwise->adjust = BONFERRONI;
6270 else if (lex_match_id (lexer, "BH"))
6271 t->pairwise->adjust = BH;
6272 else if (lex_match_id (lexer, "NONE"))
6273 t->pairwise->adjust = 0;
6276 lex_error_expecting (lexer, "BONFERRONI", "BH",
6281 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6283 lex_match (lexer, T_EQUALS);
6284 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6287 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6289 lex_match (lexer, T_EQUALS);
6290 if (lex_match_id (lexer, "ALLCATS"))
6291 t->pairwise->meansvariance_allcats = true;
6292 else if (lex_match_id (lexer, "TESTEDCATS"))
6293 t->pairwise->meansvariance_allcats = false;
6296 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6300 else if (lex_match_id (lexer, "CATEGORIES"))
6302 lex_match (lexer, T_EQUALS);
6303 if (lex_match_id (lexer, "ALLVISIBLE"))
6304 t->pairwise->all_visible = true;
6305 else if (lex_match_id (lexer, "SUBTOTALS"))
6306 t->pairwise->all_visible = false;
6309 lex_error_expecting (lexer, "ALLVISIBLE",
6314 else if (lex_match_id (lexer, "MERGE"))
6316 lex_match (lexer, T_EQUALS);
6317 if (!parse_bool (lexer, &t->pairwise->merge))
6320 else if (lex_match_id (lexer, "STYLE"))
6322 lex_match (lexer, T_EQUALS);
6323 if (lex_match_id (lexer, "APA"))
6324 t->pairwise->apa_style = true;
6325 else if (lex_match_id (lexer, "SIMPLE"))
6326 t->pairwise->apa_style = false;
6329 lex_error_expecting (lexer, "APA", "SIMPLE");
6333 else if (lex_match_id (lexer, "SHOWSIG"))
6335 lex_match (lexer, T_EQUALS);
6336 if (!parse_bool (lexer, &t->pairwise->show_sig))
6341 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6342 "INCLUDEMRSETS", "MEANSVARIANCE",
6343 "CATEGORIES", "MERGE", "STYLE",
6348 while (lex_token (lexer) != T_SLASH
6349 && lex_token (lexer) != T_ENDCMD);
6353 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6354 "CRITERIA", "CATEGORIES", "TITLES",
6355 "SIGTEST", "COMPARETEST");
6359 if (!lex_match (lexer, T_SLASH))
6363 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6364 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6366 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6370 if (!ctables_prepare_table (t))
6373 while (lex_token (lexer) != T_ENDCMD);
6375 bool ok = ctables_execute (ds, ct);
6376 ctables_destroy (ct);
6377 return ok ? CMD_SUCCESS : CMD_FAILURE;
6380 ctables_destroy (ct);