1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casereader.h"
23 #include "data/casewriter.h"
24 #include "data/data-in.h"
25 #include "data/data-out.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/mrset.h"
29 #include "data/subcase.h"
30 #include "data/value-labels.h"
31 #include "language/command.h"
32 #include "language/lexer/format-parser.h"
33 #include "language/lexer/lexer.h"
34 #include "language/lexer/token.h"
35 #include "language/lexer/variable-parser.h"
36 #include "libpspp/array.h"
37 #include "libpspp/assertion.h"
38 #include "libpspp/hash-functions.h"
39 #include "libpspp/hmap.h"
40 #include "libpspp/i18n.h"
41 #include "libpspp/message.h"
42 #include "libpspp/string-array.h"
43 #include "math/mode.h"
44 #include "math/moments.h"
45 #include "math/percentiles.h"
46 #include "math/sort.h"
47 #include "output/pivot-table.h"
49 #include "gl/minmax.h"
50 #include "gl/xalloc.h"
53 #define _(msgid) gettext (msgid)
54 #define N_(msgid) (msgid)
58 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
59 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
60 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
61 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
65 - unweighted summaries (U*)
66 - lower confidence limits (*.LCL)
67 - upper confidence limits (*.UCL)
68 - standard error (*.SE)
71 /* All variables. */ \
72 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
73 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
74 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
75 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
76 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
94 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
96 /* All variables (unweighted.) */ \
97 S(CTSF_UCOUNT, "UCOUNT", N_("Unweighted Count"), CTF_COUNT, CTFA_ALL) \
98 S(CTSF_UROWPCT_COUNT, "UROWPCT.COUNT", N_("Unweighted Row %"), CTF_PERCENT, CTFA_ALL) \
99 S(CTSF_UCOLPCT_COUNT, "UCOLPCT.COUNT", N_("Unweighted Column %"), CTF_PERCENT, CTFA_ALL) \
100 S(CTSF_UTABLEPCT_COUNT, "UTABLEPCT.COUNT", N_("Unweighted Table %"), CTF_PERCENT, CTFA_ALL) \
101 S(CTSF_USUBTABLEPCT_COUNT, "USUBTABLEPCT.COUNT", N_("Unweighted Subtable %"), CTF_PERCENT, CTFA_ALL) \
102 S(CTSF_ULAYERPCT_COUNT, "ULAYERPCT.COUNT", N_("Unweighted Layer %"), CTF_PERCENT, CTFA_ALL) \
103 S(CTSF_ULAYERROWPCT_COUNT, "ULAYERROWPCT.COUNT", N_("Unweighted Layer Row %"), CTF_PERCENT, CTFA_ALL) \
104 S(CTSF_ULAYERCOLPCT_COUNT, "ULAYERCOLPCT.COUNT", N_("Unweighted Layer Column %"), CTF_PERCENT, CTFA_ALL) \
105 S(CTSF_UROWPCT_VALIDN, "UROWPCT.VALIDN", N_("Unweighted Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
106 S(CTSF_UCOLPCT_VALIDN, "UCOLPCT.VALIDN", N_("Unweighted Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
107 S(CTSF_UTABLEPCT_VALIDN, "UTABLEPCT.VALIDN", N_("Unweighted Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
108 S(CTSF_USUBTABLEPCT_VALIDN, "USUBTABLEPCT.VALIDN", N_("Unweighted Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
109 S(CTSF_ULAYERPCT_VALIDN, "ULAYERPCT.VALIDN", N_("Unweighted Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
110 S(CTSF_ULAYERROWPCT_VALIDN, "ULAYERROWPCT.VALIDN", N_("Unweighted Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
111 S(CTSF_ULAYERCOLPCT_VALIDN, "ULAYERCOLPCT.VALIDN", N_("Unweighted Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
112 S(CTSF_UROWPCT_TOTALN, "UROWPCT.TOTALN", N_("Unweighted Row Total N %"), CTF_PERCENT, CTFA_ALL) \
113 S(CTSF_UCOLPCT_TOTALN, "UCOLPCT.TOTALN", N_("Unweighted Column Total N %"), CTF_PERCENT, CTFA_ALL) \
114 S(CTSF_UTABLEPCT_TOTALN, "UTABLEPCT.TOTALN", N_("Unweighted Table Total N %"), CTF_PERCENT, CTFA_ALL) \
115 S(CTSF_USUBTABLEPCT_TOTALN, "USUBTABLEPCT.TOTALN", N_("Unweighted Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
116 S(CTSF_ULAYERPCT_TOTALN, "ULAYERPCT.TOTALN", N_("Unweighted Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
117 S(CTSF_ULAYERROWPCT_TOTALN, "ULAYERROWPCT.TOTALN", N_("Unweighted Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
118 S(CTSF_ULAYERCOLPCT_TOTALN, "ULAYERCOLPCT.TOTALN", N_("Unweighted Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
120 /* Scale variables, totals, and subtotals. */ \
121 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
122 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
123 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
124 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
125 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
126 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
127 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
128 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
129 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
130 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
131 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
132 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
133 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
134 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
135 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
136 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
137 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
138 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
139 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
140 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
141 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
142 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
143 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
145 /* Scale variables, totals, and subtotals (unweighted). */ \
146 S(CTSF_UMEAN, "UMEAN", N_("Unweighted Mean"), CTF_GENERAL, CTFA_SCALE) \
147 S(CTSF_UMEDIAN, "UMEDIAN", N_("Unweighted Median"), CTF_GENERAL, CTFA_SCALE) \
148 S(CTSF_UMISSING, "UMISSING", N_("Unweighted Missing"), CTF_GENERAL, CTFA_SCALE) \
149 S(CTSF_UMODE, "UMODE", N_("Unweighted Mode"), CTF_GENERAL, CTFA_SCALE) \
150 S(CTSF_UPTILE, "UPTILE", N_("Unweighted Percentile"), CTF_GENERAL, CTFA_SCALE) \
151 S(CTSF_USEMEAN, "USEMEAN", N_("Unweighted Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
152 S(CTSF_USTDDEV, "USTDDEV", N_("Unweighted Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
153 S(CTSF_USUM, "USUM", N_("Unweighted Sum"), CTF_GENERAL, CTFA_SCALE) \
154 S(CSTF_UTOTALN, "UTOTALN", N_("Unweighted Total N"), CTF_COUNT, CTFA_SCALE) \
155 S(CTSF_UVALIDN, "UVALIDN", N_("Unweighted Valid N"), CTF_COUNT, CTFA_SCALE) \
156 S(CTSF_UVARIANCE, "UVARIANCE", N_("Unweighted Variance"), CTF_GENERAL, CTFA_SCALE) \
157 S(CTSF_UROWPCT_SUM, "UROWPCT.SUM", N_("Unweighted Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
158 S(CTSF_UCOLPCT_SUM, "UCOLPCT.SUM", N_("Unweighted Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
159 S(CTSF_UTABLEPCT_SUM, "UTABLEPCT.SUM", N_("Unweighted Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
160 S(CTSF_USUBTABLEPCT_SUM, "USUBTABLEPCT.SUM", N_("Unweighted Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
161 S(CTSF_ULAYERPCT_SUM, "ULAYERPCT.SUM", N_("Unweighted Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
162 S(CTSF_ULAYERROWPCT_SUM, "ULAYERROWPCT.SUM", N_("Unweighted Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
163 S(CTSF_ULAYERCOLPCT_SUM, "ULAYERCOLPCT.SUM", N_("Unweighted Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
165 #if 0 /* Multiple response sets not yet implemented. */
166 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
167 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
168 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
169 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
170 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
171 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
172 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
173 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
174 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
175 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
176 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
177 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
178 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
179 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
180 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
181 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
182 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
183 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
184 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
185 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
186 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
187 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
190 enum ctables_summary_function
192 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
198 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
199 N_CTSF_FUNCTIONS = SUMMARIES
203 static bool ctables_summary_function_is_count (enum ctables_summary_function);
205 enum ctables_domain_type
207 /* Within a section, where stacked variables divide one section from
209 CTDT_TABLE, /* All layers of a whole section. */
210 CTDT_LAYER, /* One layer within a section. */
211 CTDT_LAYERROW, /* Row in one layer within a section. */
212 CTDT_LAYERCOL, /* Column in one layer within a section. */
214 /* Within a subtable, where a subtable pairs an innermost row variable with
215 an innermost column variable within a single layer. */
216 CTDT_SUBTABLE, /* Whole subtable. */
217 CTDT_ROW, /* Row within a subtable. */
218 CTDT_COL, /* Column within a subtable. */
222 struct ctables_domain
224 struct hmap_node node;
226 const struct ctables_cell *example;
228 double d_valid; /* Dictionary weight. */
231 double e_valid; /* Effective weight */
234 double u_valid; /* Unweighted. */
239 enum ctables_summary_variant
248 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
249 all the axes (except the scalar variable, if any). */
250 struct hmap_node node;
252 /* The domains that contain this cell. */
253 uint32_t omit_domains;
254 struct ctables_domain *domains[N_CTDTS];
259 enum ctables_summary_variant sv;
261 struct ctables_cell_axis
263 struct ctables_cell_value
265 const struct ctables_category *category;
273 union ctables_summary *summaries;
280 const struct dictionary *dict;
281 struct pivot_table_look *look;
283 /* CTABLES has a number of extra formats that we implement via custom
284 currency specifications on an alternate fmt_settings. */
285 #define CTEF_NEGPAREN FMT_CCA
286 #define CTEF_NEQUAL FMT_CCB
287 #define CTEF_PAREN FMT_CCC
288 #define CTEF_PCTPAREN FMT_CCD
289 struct fmt_settings ctables_formats;
291 /* If this is NULL, zeros are displayed using the normal print format.
292 Otherwise, this string is displayed. */
295 /* If this is NULL, missing values are displayed using the normal print
296 format. Otherwise, this string is displayed. */
299 /* Indexed by variable dictionary index. */
300 enum ctables_vlabel *vlabels;
302 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
304 bool mrsets_count_duplicates; /* MRSETS. */
305 bool smissing_listwise; /* SMISSING. */
306 struct variable *e_weight; /* WEIGHT. */
307 int hide_threshold; /* HIDESMALLCOUNTS. */
309 struct ctables_table **tables;
313 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
316 struct ctables_postcompute
318 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
319 char *name; /* Name, without leading &. */
321 struct msg_location *location; /* Location of definition. */
322 struct ctables_pcexpr *expr;
324 struct ctables_summary_spec_set *specs;
325 bool hide_source_cats;
328 struct ctables_pcexpr
338 enum ctables_postcompute_op
341 CTPO_CONSTANT, /* 5 */
342 CTPO_CAT_NUMBER, /* [5] */
343 CTPO_CAT_STRING, /* ["STRING"] */
344 CTPO_CAT_NRANGE, /* [LO THRU 5] */
345 CTPO_CAT_SRANGE, /* ["A" THRU "B"] */
346 CTPO_CAT_MISSING, /* MISSING */
347 CTPO_CAT_OTHERNM, /* OTHERNM */
348 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
349 CTPO_CAT_TOTAL, /* TOTAL */
363 /* CTPO_CAT_NUMBER. */
366 /* CTPO_CAT_STRING, in dictionary encoding. */
367 struct substring string;
369 /* CTPO_CAT_NRANGE. */
372 /* CTPO_CAT_SRANGE. */
373 struct substring srange[2];
375 /* CTPO_CAT_SUBTOTAL. */
376 size_t subtotal_index;
378 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
379 One element: CTPO_NEG. */
380 struct ctables_pcexpr *subs[2];
383 /* Source location. */
384 struct msg_location *location;
387 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
388 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
389 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
390 struct ctables_pcexpr *sub1);
392 struct ctables_summary_spec_set
394 struct ctables_summary_spec *specs;
398 /* The variable to which the summary specs are applied. */
399 struct variable *var;
401 /* Whether the variable to which the summary specs are applied is a scale
402 variable for the purpose of summarization.
404 (VALIDN and TOTALN act differently for summarizing scale and categorical
408 /* If any of these optional additional scale variables are missing, then
409 treat 'var' as if it's missing too. This is for implementing
410 SMISSING=LISTWISE. */
411 struct variable **listwise_vars;
412 size_t n_listwise_vars;
415 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
416 const struct ctables_summary_spec_set *);
417 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
419 /* A nested sequence of variables, e.g. a > b > c. */
422 struct variable **vars;
425 size_t *domains[N_CTDTS];
426 size_t n_domains[N_CTDTS];
429 struct ctables_summary_spec_set specs[N_CSVS];
432 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
435 struct ctables_nest *nests;
441 struct hmap_node node;
446 struct ctables_occurrence
448 struct hmap_node node;
452 struct ctables_section
454 struct ctables_table *table;
455 struct ctables_nest *nests[PIVOT_N_AXES];
456 struct hmap *occurrences[PIVOT_N_AXES];
457 struct hmap cells; /* Contains "struct ctable_cell"s. */
458 struct hmap domains[N_CTDTS]; /* Contains "struct ctable_domain"s. */
463 struct ctables *ctables;
464 struct ctables_axis *axes[PIVOT_N_AXES];
465 struct ctables_stack stacks[PIVOT_N_AXES];
466 struct ctables_section *sections;
468 enum pivot_axis_type summary_axis;
469 struct ctables_summary_spec_set summary_specs;
471 const struct variable *clabels_example;
472 struct hmap clabels_values_map;
473 struct ctables_value **clabels_values;
474 size_t n_clabels_values;
476 enum pivot_axis_type slabels_axis;
477 bool slabels_visible;
479 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
481 Most commonly, label_axis[a] == a, and in particular we always have
482 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
484 If ROWLABELS or COLLABELS is specified, then one of
485 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
486 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
488 enum pivot_axis_type label_axis[PIVOT_N_AXES];
489 enum pivot_axis_type clabels_from_axis;
491 /* Indexed by variable dictionary index. */
492 struct ctables_categories **categories;
501 struct ctables_chisq *chisq;
502 struct ctables_pairwise *pairwise;
505 struct ctables_categories
508 struct ctables_category *cats;
513 struct ctables_category
515 enum ctables_category_type
517 /* Explicit category lists. */
520 CCT_NRANGE, /* Numerical range. */
521 CCT_SRANGE, /* String range. */
526 /* Totals and subtotals. */
530 /* Implicit category lists. */
535 /* For contributing to TOTALN. */
536 CCT_EXCLUDED_MISSING,
540 struct ctables_category *subtotal;
546 double number; /* CCT_NUMBER. */
547 struct substring string; /* CCT_STRING, in dictionary encoding. */
548 double nrange[2]; /* CCT_NRANGE. */
549 struct substring srange[2]; /* CCT_SRANGE. */
553 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
554 bool hide_subcategories; /* CCT_SUBTOTAL. */
557 const struct ctables_postcompute *pc; /* CCT_POSTCOMPUTE. */
559 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
562 bool include_missing;
566 enum ctables_summary_function sort_function;
567 struct variable *sort_var;
572 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
573 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
574 struct msg_location *location;
578 ctables_category_uninit (struct ctables_category *cat)
589 case CCT_POSTCOMPUTE:
593 ss_dealloc (&cat->string);
597 ss_dealloc (&cat->srange[0]);
598 ss_dealloc (&cat->srange[1]);
603 free (cat->total_label);
611 case CCT_EXCLUDED_MISSING:
617 nullable_substring_equal (const struct substring *a,
618 const struct substring *b)
620 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
624 ctables_category_equal (const struct ctables_category *a,
625 const struct ctables_category *b)
627 if (a->type != b->type)
633 return a->number == b->number;
636 return ss_equals (a->string, b->string);
639 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
642 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
643 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
649 case CCT_POSTCOMPUTE:
650 return a->pc == b->pc;
654 return !strcmp (a->total_label, b->total_label);
659 return (a->include_missing == b->include_missing
660 && a->sort_ascending == b->sort_ascending
661 && a->sort_function == b->sort_function
662 && a->sort_var == b->sort_var
663 && a->percentile == b->percentile);
665 case CCT_EXCLUDED_MISSING:
673 ctables_categories_unref (struct ctables_categories *c)
678 assert (c->n_refs > 0);
682 for (size_t i = 0; i < c->n_cats; i++)
683 ctables_category_uninit (&c->cats[i]);
689 ctables_categories_equal (const struct ctables_categories *a,
690 const struct ctables_categories *b)
692 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
695 for (size_t i = 0; i < a->n_cats; i++)
696 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
702 /* Chi-square test (SIGTEST). */
710 /* Pairwise comparison test (COMPARETEST). */
711 struct ctables_pairwise
713 enum { PROP, MEAN } type;
716 bool meansvariance_allcats;
718 enum { BONFERRONI = 1, BH } adjust;
742 struct variable *var;
744 struct ctables_summary_spec_set specs[N_CSVS];
748 struct ctables_axis *subs[2];
751 struct msg_location *loc;
754 static void ctables_axis_destroy (struct ctables_axis *);
763 enum ctables_function_availability
765 CTFA_ALL, /* Any variables. */
766 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
767 CTFA_MRSETS, /* Only multiple-response sets */
770 struct ctables_summary_spec
772 enum ctables_summary_function function;
773 double percentile; /* CTSF_PTILE only. */
776 struct fmt_spec format;
777 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
783 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
784 const struct ctables_summary_spec *src)
787 dst->label = xstrdup_if_nonnull (src->label);
791 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
798 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
799 const struct ctables_summary_spec_set *src)
801 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
802 for (size_t i = 0; i < src->n; i++)
803 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
805 *dst = (struct ctables_summary_spec_set) {
810 .is_scale = src->is_scale,
815 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
817 for (size_t i = 0; i < set->n; i++)
818 ctables_summary_spec_uninit (&set->specs[i]);
823 parse_col_width (struct lexer *lexer, const char *name, double *width)
825 lex_match (lexer, T_EQUALS);
826 if (lex_match_id (lexer, "DEFAULT"))
828 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
830 *width = lex_number (lexer);
840 parse_bool (struct lexer *lexer, bool *b)
842 if (lex_match_id (lexer, "NO"))
844 else if (lex_match_id (lexer, "YES"))
848 lex_error_expecting (lexer, "YES", "NO");
854 static enum ctables_function_availability
855 ctables_function_availability (enum ctables_summary_function f)
857 static enum ctables_function_availability availability[] = {
858 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
863 return availability[f];
867 ctables_summary_function_is_count (enum ctables_summary_function f)
873 case CTSF_ROWPCT_COUNT:
874 case CTSF_COLPCT_COUNT:
875 case CTSF_TABLEPCT_COUNT:
876 case CTSF_SUBTABLEPCT_COUNT:
877 case CTSF_LAYERPCT_COUNT:
878 case CTSF_LAYERROWPCT_COUNT:
879 case CTSF_LAYERCOLPCT_COUNT:
881 case CTSF_UROWPCT_COUNT:
882 case CTSF_UCOLPCT_COUNT:
883 case CTSF_UTABLEPCT_COUNT:
884 case CTSF_USUBTABLEPCT_COUNT:
885 case CTSF_ULAYERPCT_COUNT:
886 case CTSF_ULAYERROWPCT_COUNT:
887 case CTSF_ULAYERCOLPCT_COUNT:
890 case CTSF_ROWPCT_VALIDN:
891 case CTSF_COLPCT_VALIDN:
892 case CTSF_TABLEPCT_VALIDN:
893 case CTSF_SUBTABLEPCT_VALIDN:
894 case CTSF_LAYERPCT_VALIDN:
895 case CTSF_LAYERROWPCT_VALIDN:
896 case CTSF_LAYERCOLPCT_VALIDN:
897 case CTSF_ROWPCT_TOTALN:
898 case CTSF_COLPCT_TOTALN:
899 case CTSF_TABLEPCT_TOTALN:
900 case CTSF_SUBTABLEPCT_TOTALN:
901 case CTSF_LAYERPCT_TOTALN:
902 case CTSF_LAYERROWPCT_TOTALN:
903 case CTSF_LAYERCOLPCT_TOTALN:
920 case CTSF_ROWPCT_SUM:
921 case CTSF_COLPCT_SUM:
922 case CTSF_TABLEPCT_SUM:
923 case CTSF_SUBTABLEPCT_SUM:
924 case CTSF_LAYERPCT_SUM:
925 case CTSF_LAYERROWPCT_SUM:
926 case CTSF_LAYERCOLPCT_SUM:
927 case CTSF_UROWPCT_VALIDN:
928 case CTSF_UCOLPCT_VALIDN:
929 case CTSF_UTABLEPCT_VALIDN:
930 case CTSF_USUBTABLEPCT_VALIDN:
931 case CTSF_ULAYERPCT_VALIDN:
932 case CTSF_ULAYERROWPCT_VALIDN:
933 case CTSF_ULAYERCOLPCT_VALIDN:
934 case CTSF_UROWPCT_TOTALN:
935 case CTSF_UCOLPCT_TOTALN:
936 case CTSF_UTABLEPCT_TOTALN:
937 case CTSF_USUBTABLEPCT_TOTALN:
938 case CTSF_ULAYERPCT_TOTALN:
939 case CTSF_ULAYERROWPCT_TOTALN:
940 case CTSF_ULAYERCOLPCT_TOTALN:
952 case CTSF_UROWPCT_SUM:
953 case CTSF_UCOLPCT_SUM:
954 case CTSF_UTABLEPCT_SUM:
955 case CTSF_USUBTABLEPCT_SUM:
956 case CTSF_ULAYERPCT_SUM:
957 case CTSF_ULAYERROWPCT_SUM:
958 case CTSF_ULAYERCOLPCT_SUM:
966 parse_ctables_summary_function (struct lexer *lexer,
967 enum ctables_summary_function *f)
971 enum ctables_summary_function function;
972 struct substring name;
974 static struct pair names[] = {
975 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
976 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
979 /* The .COUNT suffix may be omitted. */
980 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
981 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
982 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
983 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
984 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
985 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
986 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
990 if (!lex_force_id (lexer))
993 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
994 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
996 *f = names[i].function;
1001 lex_error (lexer, _("Expecting summary function name."));
1006 ctables_axis_destroy (struct ctables_axis *axis)
1014 for (size_t i = 0; i < N_CSVS; i++)
1015 ctables_summary_spec_set_uninit (&axis->specs[i]);
1020 ctables_axis_destroy (axis->subs[0]);
1021 ctables_axis_destroy (axis->subs[1]);
1024 msg_location_destroy (axis->loc);
1028 static struct ctables_axis *
1029 ctables_axis_new_nonterminal (enum ctables_axis_op op,
1030 struct ctables_axis *sub0,
1031 struct ctables_axis *sub1,
1032 struct lexer *lexer, int start_ofs)
1034 struct ctables_axis *axis = xmalloc (sizeof *axis);
1035 *axis = (struct ctables_axis) {
1037 .subs = { sub0, sub1 },
1038 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
1043 struct ctables_axis_parse_ctx
1045 struct lexer *lexer;
1046 struct dictionary *dict;
1048 struct ctables_table *t;
1051 static struct fmt_spec
1052 ctables_summary_default_format (enum ctables_summary_function function,
1053 const struct variable *var)
1055 static const enum ctables_format default_formats[] = {
1056 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
1060 switch (default_formats[function])
1063 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
1066 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
1069 return *var_get_print_format (var);
1077 ctables_summary_default_label (enum ctables_summary_function function,
1080 static const char *default_labels[] = {
1081 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
1086 return (function == CTSF_PTILE
1087 ? xasprintf (_("Percentile %.2f"), percentile)
1088 : xstrdup (gettext (default_labels[function])));
1092 ctables_summary_function_name (enum ctables_summary_function function)
1094 static const char *names[] = {
1095 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1099 return names[function];
1103 add_summary_spec (struct ctables_axis *axis,
1104 enum ctables_summary_function function, double percentile,
1105 const char *label, const struct fmt_spec *format,
1106 bool is_ctables_format, const struct msg_location *loc,
1107 enum ctables_summary_variant sv)
1109 if (axis->op == CTAO_VAR)
1111 const char *function_name = ctables_summary_function_name (function);
1112 const char *var_name = var_get_name (axis->var);
1113 switch (ctables_function_availability (function))
1116 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1117 "response sets."), function_name);
1118 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1127 _("Summary function %s applies only to scale variables."),
1129 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1140 struct ctables_summary_spec_set *set = &axis->specs[sv];
1141 if (set->n >= set->allocated)
1142 set->specs = x2nrealloc (set->specs, &set->allocated,
1143 sizeof *set->specs);
1145 struct ctables_summary_spec *dst = &set->specs[set->n++];
1146 *dst = (struct ctables_summary_spec) {
1147 .function = function,
1148 .percentile = percentile,
1149 .label = xstrdup (label),
1150 .format = (format ? *format
1151 : ctables_summary_default_format (function, axis->var)),
1152 .is_ctables_format = is_ctables_format,
1158 for (size_t i = 0; i < 2; i++)
1159 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1160 format, is_ctables_format, loc, sv))
1166 static struct ctables_axis *ctables_axis_parse_stack (
1167 struct ctables_axis_parse_ctx *);
1170 static struct ctables_axis *
1171 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1173 if (lex_match (ctx->lexer, T_LPAREN))
1175 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1176 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1178 ctables_axis_destroy (sub);
1184 if (!lex_force_id (ctx->lexer))
1187 int start_ofs = lex_ofs (ctx->lexer);
1188 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1192 struct ctables_axis *axis = xmalloc (sizeof *axis);
1193 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1195 /* XXX should figure out default measures by reading data */
1196 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1197 : lex_match_phrase (ctx->lexer, "[C]") ? false
1198 : var_get_measure (var) == MEASURE_SCALE);
1199 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1200 lex_ofs (ctx->lexer) - 1);
1201 if (axis->scale && var_is_alpha (var))
1203 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1205 var_get_name (var));
1206 ctables_axis_destroy (axis);
1214 has_digit (const char *s)
1216 return s[strcspn (s, "0123456789")] != '\0';
1220 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1221 bool *is_ctables_format)
1223 char type[FMT_TYPE_LEN_MAX + 1];
1224 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1227 if (!strcasecmp (type, "NEGPAREN"))
1228 format->type = CTEF_NEGPAREN;
1229 else if (!strcasecmp (type, "NEQUAL"))
1230 format->type = CTEF_NEQUAL;
1231 else if (!strcasecmp (type, "PAREN"))
1232 format->type = CTEF_PAREN;
1233 else if (!strcasecmp (type, "PCTPAREN"))
1234 format->type = CTEF_PCTPAREN;
1237 *is_ctables_format = false;
1238 return (parse_format_specifier (lexer, format)
1239 && fmt_check_output (format)
1240 && fmt_check_type_compat (format, VAL_NUMERIC));
1245 msg (SE, _("Output format %s requires width 2 or greater."), type);
1248 else if (format->d > format->w - 1)
1250 msg (SE, _("Output format %s requires width greater than decimals."),
1256 *is_ctables_format = true;
1261 static struct ctables_axis *
1262 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1264 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1265 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1268 enum ctables_summary_variant sv = CSV_CELL;
1271 int start_ofs = lex_ofs (ctx->lexer);
1273 /* Parse function. */
1274 enum ctables_summary_function function;
1275 if (!parse_ctables_summary_function (ctx->lexer, &function))
1278 /* Parse percentile. */
1279 double percentile = 0;
1280 if (function == CTSF_PTILE)
1282 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1284 percentile = lex_number (ctx->lexer);
1285 lex_get (ctx->lexer);
1290 if (lex_is_string (ctx->lexer))
1292 label = ss_xstrdup (lex_tokss (ctx->lexer));
1293 lex_get (ctx->lexer);
1296 label = ctables_summary_default_label (function, percentile);
1299 struct fmt_spec format;
1300 const struct fmt_spec *formatp;
1301 bool is_ctables_format = false;
1302 if (lex_token (ctx->lexer) == T_ID
1303 && has_digit (lex_tokcstr (ctx->lexer)))
1305 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1306 &is_ctables_format))
1316 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1317 lex_ofs (ctx->lexer) - 1);
1318 add_summary_spec (sub, function, percentile, label, formatp,
1319 is_ctables_format, loc, sv);
1321 msg_location_destroy (loc);
1323 lex_match (ctx->lexer, T_COMMA);
1324 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1326 if (!lex_force_match (ctx->lexer, T_LBRACK))
1330 else if (lex_match (ctx->lexer, T_RBRACK))
1332 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1339 ctables_axis_destroy (sub);
1343 static const struct ctables_axis *
1344 find_scale (const struct ctables_axis *axis)
1348 else if (axis->op == CTAO_VAR)
1349 return axis->scale ? axis : NULL;
1352 for (size_t i = 0; i < 2; i++)
1354 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1362 static const struct ctables_axis *
1363 find_categorical_summary_spec (const struct ctables_axis *axis)
1367 else if (axis->op == CTAO_VAR)
1368 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1371 for (size_t i = 0; i < 2; i++)
1373 const struct ctables_axis *sum
1374 = find_categorical_summary_spec (axis->subs[i]);
1382 static struct ctables_axis *
1383 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1385 int start_ofs = lex_ofs (ctx->lexer);
1386 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1390 while (lex_match (ctx->lexer, T_GT))
1392 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1396 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1397 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1399 const struct ctables_axis *outer_scale = find_scale (lhs);
1400 const struct ctables_axis *inner_scale = find_scale (rhs);
1401 if (outer_scale && inner_scale)
1403 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1404 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1405 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1406 ctables_axis_destroy (nest);
1410 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1413 msg_at (SE, nest->loc,
1414 _("Summaries may only be requested for categorical variables "
1415 "at the innermost nesting level."));
1416 msg_at (SN, outer_sum->loc,
1417 _("This outer categorical variable has a summary."));
1418 ctables_axis_destroy (nest);
1428 static struct ctables_axis *
1429 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1431 int start_ofs = lex_ofs (ctx->lexer);
1432 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1436 while (lex_match (ctx->lexer, T_PLUS))
1438 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1442 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1443 ctx->lexer, start_ofs);
1450 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1451 struct ctables *ct, struct ctables_table *t,
1452 enum pivot_axis_type a)
1454 if (lex_token (lexer) == T_BY
1455 || lex_token (lexer) == T_SLASH
1456 || lex_token (lexer) == T_ENDCMD)
1459 struct ctables_axis_parse_ctx ctx = {
1465 t->axes[a] = ctables_axis_parse_stack (&ctx);
1466 return t->axes[a] != NULL;
1470 ctables_chisq_destroy (struct ctables_chisq *chisq)
1476 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1482 ctables_table_destroy (struct ctables_table *t)
1487 for (size_t i = 0; i < t->n_categories; i++)
1488 ctables_categories_unref (t->categories[i]);
1489 free (t->categories);
1491 ctables_axis_destroy (t->axes[PIVOT_AXIS_COLUMN]);
1492 ctables_axis_destroy (t->axes[PIVOT_AXIS_ROW]);
1493 ctables_axis_destroy (t->axes[PIVOT_AXIS_LAYER]);
1497 ctables_chisq_destroy (t->chisq);
1498 ctables_pairwise_destroy (t->pairwise);
1503 ctables_destroy (struct ctables *ct)
1508 pivot_table_look_unref (ct->look);
1512 for (size_t i = 0; i < ct->n_tables; i++)
1513 ctables_table_destroy (ct->tables[i]);
1518 static struct ctables_category
1519 cct_nrange (double low, double high)
1521 return (struct ctables_category) {
1523 .nrange = { low, high }
1527 static struct ctables_category
1528 cct_srange (struct substring low, struct substring high)
1530 return (struct ctables_category) {
1532 .srange = { low, high }
1537 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1538 struct ctables_category *cat)
1541 if (lex_match (lexer, T_EQUALS))
1543 if (!lex_force_string (lexer))
1546 total_label = ss_xstrdup (lex_tokss (lexer));
1550 total_label = xstrdup (_("Subtotal"));
1552 *cat = (struct ctables_category) {
1553 .type = CCT_SUBTOTAL,
1554 .hide_subcategories = hide_subcategories,
1555 .total_label = total_label
1560 static struct substring
1561 parse_substring (struct lexer *lexer, struct dictionary *dict)
1563 struct substring s = recode_substring_pool (
1564 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1565 ss_rtrim (&s, ss_cstr (" "));
1571 ctables_table_parse_explicit_category (struct lexer *lexer,
1572 struct dictionary *dict,
1574 struct ctables_category *cat)
1576 if (lex_match_id (lexer, "OTHERNM"))
1577 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1578 else if (lex_match_id (lexer, "MISSING"))
1579 *cat = (struct ctables_category) { .type = CCT_MISSING };
1580 else if (lex_match_id (lexer, "SUBTOTAL"))
1581 return ctables_table_parse_subtotal (lexer, false, cat);
1582 else if (lex_match_id (lexer, "HSUBTOTAL"))
1583 return ctables_table_parse_subtotal (lexer, true, cat);
1584 else if (lex_match_id (lexer, "LO"))
1586 if (!lex_force_match_id (lexer, "THRU"))
1588 if (lex_is_string (lexer))
1590 struct substring sr0 = { .string = NULL };
1591 struct substring sr1 = parse_substring (lexer, dict);
1592 *cat = cct_srange (sr0, sr1);
1594 else if (lex_force_num (lexer))
1596 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1602 else if (lex_is_number (lexer))
1604 double number = lex_number (lexer);
1606 if (lex_match_id (lexer, "THRU"))
1608 if (lex_match_id (lexer, "HI"))
1609 *cat = cct_nrange (number, DBL_MAX);
1612 if (!lex_force_num (lexer))
1614 *cat = cct_nrange (number, lex_number (lexer));
1619 *cat = (struct ctables_category) {
1624 else if (lex_is_string (lexer))
1626 struct substring s = parse_substring (lexer, dict);
1627 if (lex_match_id (lexer, "THRU"))
1629 if (lex_match_id (lexer, "HI"))
1631 struct substring sr1 = { .string = NULL };
1632 *cat = cct_srange (s, sr1);
1636 if (!lex_force_string (lexer))
1638 struct substring sr1 = parse_substring (lexer, dict);
1639 *cat = cct_srange (s, sr1);
1643 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1645 else if (lex_match (lexer, T_AND))
1647 if (!lex_force_id (lexer))
1649 struct ctables_postcompute *pc = ctables_find_postcompute (
1650 ct, lex_tokcstr (lexer));
1653 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1654 msg_at (SE, loc, _("Unknown postcompute &%s."),
1655 lex_tokcstr (lexer));
1656 msg_location_destroy (loc);
1661 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1665 lex_error (lexer, NULL);
1672 static struct ctables_category *
1673 ctables_find_category_for_postcompute (const struct ctables_categories *cats,
1674 const struct ctables_pcexpr *e)
1676 struct ctables_category *best = NULL;
1677 size_t n_subtotals = 0;
1678 for (size_t i = 0; i < cats->n_cats; i++)
1680 struct ctables_category *cat = &cats->cats[i];
1683 case CTPO_CAT_NUMBER:
1684 if (cat->type == CCT_NUMBER && cat->number == e->number)
1688 case CTPO_CAT_STRING:
1689 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1693 case CTPO_CAT_NRANGE:
1694 if (cat->type == CCT_NRANGE
1695 && cat->nrange[0] == e->nrange[0]
1696 && cat->nrange[1] == e->nrange[1])
1700 case CTPO_CAT_SRANGE:
1701 if (cat->type == CCT_SRANGE
1702 && nullable_substring_equal (&cat->srange[0], &e->srange[0])
1703 && nullable_substring_equal (&cat->srange[1], &e->srange[1]))
1707 case CTPO_CAT_MISSING:
1708 if (cat->type == CCT_MISSING)
1712 case CTPO_CAT_OTHERNM:
1713 if (cat->type == CCT_OTHERNM)
1717 case CTPO_CAT_SUBTOTAL:
1718 if (cat->type == CCT_SUBTOTAL)
1721 if (e->subtotal_index == n_subtotals)
1723 else if (e->subtotal_index == 0)
1728 case CTPO_CAT_TOTAL:
1729 if (cat->type == CCT_TOTAL)
1743 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1749 ctables_recursive_check_postcompute (const struct ctables_pcexpr *e,
1750 struct ctables_category *pc_cat,
1751 const struct ctables_categories *cats,
1752 const struct msg_location *cats_location)
1756 case CTPO_CAT_NUMBER:
1757 case CTPO_CAT_STRING:
1758 case CTPO_CAT_NRANGE:
1759 case CTPO_CAT_MISSING:
1760 case CTPO_CAT_OTHERNM:
1761 case CTPO_CAT_SUBTOTAL:
1762 case CTPO_CAT_TOTAL:
1764 struct ctables_category *cat = ctables_find_category_for_postcompute (
1768 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1770 size_t n_subtotals = 0;
1771 for (size_t i = 0; i < cats->n_cats; i++)
1772 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1773 if (n_subtotals > 1)
1775 msg_at (SE, cats_location,
1776 ngettext ("These categories include %zu instance "
1777 "of SUBTOTAL or HSUBTOTAL, so references "
1778 "from computed categories must refer to "
1779 "subtotals by position.",
1780 "These categories include %zu instances "
1781 "of SUBTOTAL or HSUBTOTAL, so references "
1782 "from computed categories must refer to "
1783 "subtotals by position.",
1786 msg_at (SN, e->location,
1787 _("This is the reference that lacks a position."));
1792 msg_at (SE, pc_cat->location,
1793 _("Computed category &%s references a category not included "
1794 "in the category list."),
1796 msg_at (SN, e->location, _("This is the missing category."));
1797 msg_at (SN, cats_location,
1798 _("To fix the problem, add the missing category to the "
1799 "list of categories here."));
1802 if (pc_cat->pc->hide_source_cats)
1816 for (size_t i = 0; i < 2; i++)
1817 if (e->subs[i] && !ctables_recursive_check_postcompute (
1818 e->subs[i], pc_cat, cats, cats_location))
1828 parse_category_string (const struct ctables_category *cat,
1829 struct substring s, struct dictionary *dict,
1830 enum fmt_type format, double *n)
1833 char *error = data_in (s, dict_get_encoding (dict), format,
1834 settings_get_fmt_settings (), &v, 0, NULL);
1837 msg_at (SE, cat->location,
1838 _("Failed to parse category specification as format %s: %s."),
1839 fmt_name (format), error);
1849 all_strings (struct variable **vars, size_t n_vars,
1850 const struct ctables_category *cat)
1852 for (size_t j = 0; j < n_vars; j++)
1853 if (var_is_numeric (vars[j]))
1855 msg_at (SE, cat->location,
1856 _("This category specification may be applied only to string "
1857 "variables, but this subcommand tries to apply it to "
1858 "numeric variable %s."),
1859 var_get_name (vars[j]));
1866 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1867 struct ctables *ct, struct ctables_table *t)
1869 if (!lex_match_id (lexer, "VARIABLES"))
1871 lex_match (lexer, T_EQUALS);
1873 struct variable **vars;
1875 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1878 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1879 for (size_t i = 1; i < n_vars; i++)
1881 const struct fmt_spec *f = var_get_print_format (vars[i]);
1882 if (f->type != common_format->type)
1884 common_format = NULL;
1890 && (fmt_get_category (common_format->type)
1891 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1893 struct ctables_categories *c = xmalloc (sizeof *c);
1894 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1895 for (size_t i = 0; i < n_vars; i++)
1897 struct ctables_categories **cp
1898 = &t->categories[var_get_dict_index (vars[i])];
1899 ctables_categories_unref (*cp);
1903 size_t allocated_cats = 0;
1904 if (lex_match (lexer, T_LBRACK))
1906 int cats_start_ofs = lex_ofs (lexer);
1909 if (c->n_cats >= allocated_cats)
1910 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1912 int start_ofs = lex_ofs (lexer);
1913 struct ctables_category *cat = &c->cats[c->n_cats];
1914 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1916 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1919 lex_match (lexer, T_COMMA);
1921 while (!lex_match (lexer, T_RBRACK));
1923 struct msg_location *cats_location
1924 = lex_ofs_location (lexer, cats_start_ofs, lex_ofs (lexer) - 1);
1925 for (size_t i = 0; i < c->n_cats; i++)
1927 struct ctables_category *cat = &c->cats[i];
1930 case CCT_POSTCOMPUTE:
1931 if (!ctables_recursive_check_postcompute (cat->pc->expr, cat,
1938 for (size_t j = 0; j < n_vars; j++)
1939 if (var_is_alpha (vars[j]))
1941 msg_at (SE, cat->location,
1942 _("This category specification may be applied "
1943 "only to numeric variables, but this "
1944 "subcommand tries to apply it to string "
1946 var_get_name (vars[j]));
1955 if (!parse_category_string (cat, cat->string, dict,
1956 common_format->type, &n))
1959 ss_dealloc (&cat->string);
1961 cat->type = CCT_NUMBER;
1964 else if (!all_strings (vars, n_vars, cat))
1973 if (!cat->srange[0].string)
1975 else if (!parse_category_string (cat, cat->srange[0], dict,
1976 common_format->type, &n[0]))
1979 if (!cat->srange[1].string)
1981 else if (!parse_category_string (cat, cat->srange[1], dict,
1982 common_format->type, &n[1]))
1985 ss_dealloc (&cat->srange[0]);
1986 ss_dealloc (&cat->srange[1]);
1988 cat->type = CCT_NRANGE;
1989 cat->nrange[0] = n[0];
1990 cat->nrange[1] = n[1];
1992 else if (!all_strings (vars, n_vars, cat))
2003 case CCT_EXCLUDED_MISSING:
2009 struct ctables_category cat = {
2011 .include_missing = false,
2012 .sort_ascending = true,
2014 bool show_totals = false;
2015 char *total_label = NULL;
2016 bool totals_before = false;
2017 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
2019 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
2021 lex_match (lexer, T_EQUALS);
2022 if (lex_match_id (lexer, "A"))
2023 cat.sort_ascending = true;
2024 else if (lex_match_id (lexer, "D"))
2025 cat.sort_ascending = false;
2028 lex_error_expecting (lexer, "A", "D");
2032 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
2034 lex_match (lexer, T_EQUALS);
2035 if (lex_match_id (lexer, "VALUE"))
2036 cat.type = CCT_VALUE;
2037 else if (lex_match_id (lexer, "LABEL"))
2038 cat.type = CCT_LABEL;
2041 cat.type = CCT_FUNCTION;
2042 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
2045 if (lex_match (lexer, T_LPAREN))
2047 cat.sort_var = parse_variable (lexer, dict);
2051 if (cat.sort_function == CTSF_PTILE)
2053 lex_match (lexer, T_COMMA);
2054 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
2056 cat.percentile = lex_number (lexer);
2060 if (!lex_force_match (lexer, T_RPAREN))
2063 else if (ctables_function_availability (cat.sort_function)
2066 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
2071 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
2073 lex_match (lexer, T_EQUALS);
2074 if (lex_match_id (lexer, "INCLUDE"))
2075 cat.include_missing = true;
2076 else if (lex_match_id (lexer, "EXCLUDE"))
2077 cat.include_missing = false;
2080 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2084 else if (lex_match_id (lexer, "TOTAL"))
2086 lex_match (lexer, T_EQUALS);
2087 if (!parse_bool (lexer, &show_totals))
2090 else if (lex_match_id (lexer, "LABEL"))
2092 lex_match (lexer, T_EQUALS);
2093 if (!lex_force_string (lexer))
2096 total_label = ss_xstrdup (lex_tokss (lexer));
2099 else if (lex_match_id (lexer, "POSITION"))
2101 lex_match (lexer, T_EQUALS);
2102 if (lex_match_id (lexer, "BEFORE"))
2103 totals_before = true;
2104 else if (lex_match_id (lexer, "AFTER"))
2105 totals_before = false;
2108 lex_error_expecting (lexer, "BEFORE", "AFTER");
2112 else if (lex_match_id (lexer, "EMPTY"))
2114 lex_match (lexer, T_EQUALS);
2115 if (lex_match_id (lexer, "INCLUDE"))
2116 c->show_empty = true;
2117 else if (lex_match_id (lexer, "EXCLUDE"))
2118 c->show_empty = false;
2121 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2128 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2129 "TOTAL", "LABEL", "POSITION", "EMPTY");
2131 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2138 if (c->n_cats >= allocated_cats)
2139 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2140 c->cats[c->n_cats++] = cat;
2145 if (c->n_cats >= allocated_cats)
2146 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2148 struct ctables_category *totals;
2151 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2152 totals = &c->cats[0];
2155 totals = &c->cats[c->n_cats];
2158 *totals = (struct ctables_category) {
2160 .total_label = total_label ? total_label : xstrdup (_("Total")),
2164 struct ctables_category *subtotal = NULL;
2165 for (size_t i = totals_before ? 0 : c->n_cats;
2166 totals_before ? i < c->n_cats : i-- > 0;
2167 totals_before ? i++ : 0)
2169 struct ctables_category *cat = &c->cats[i];
2178 cat->subtotal = subtotal;
2181 case CCT_POSTCOMPUTE:
2192 case CCT_EXCLUDED_MISSING:
2201 ctables_nest_uninit (struct ctables_nest *nest)
2208 ctables_stack_uninit (struct ctables_stack *stack)
2212 for (size_t i = 0; i < stack->n; i++)
2213 ctables_nest_uninit (&stack->nests[i]);
2214 free (stack->nests);
2218 static struct ctables_stack
2219 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2226 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2227 for (size_t i = 0; i < s0.n; i++)
2228 for (size_t j = 0; j < s1.n; j++)
2230 const struct ctables_nest *a = &s0.nests[i];
2231 const struct ctables_nest *b = &s1.nests[j];
2233 size_t allocate = a->n + b->n;
2234 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2235 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
2237 for (size_t k = 0; k < a->n; k++)
2238 vars[n++] = a->vars[k];
2239 for (size_t k = 0; k < b->n; k++)
2240 vars[n++] = b->vars[k];
2241 assert (n == allocate);
2243 const struct ctables_nest *summary_src;
2244 if (!a->specs[CSV_CELL].var)
2246 else if (!b->specs[CSV_CELL].var)
2251 struct ctables_nest *new = &stack.nests[stack.n++];
2252 *new = (struct ctables_nest) {
2254 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2255 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2259 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2260 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2262 ctables_stack_uninit (&s0);
2263 ctables_stack_uninit (&s1);
2267 static struct ctables_stack
2268 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2270 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2271 for (size_t i = 0; i < s0.n; i++)
2272 stack.nests[stack.n++] = s0.nests[i];
2273 for (size_t i = 0; i < s1.n; i++)
2275 stack.nests[stack.n] = s1.nests[i];
2276 stack.nests[stack.n].group_head += s0.n;
2279 assert (stack.n == s0.n + s1.n);
2285 static struct ctables_stack
2286 var_fts (const struct ctables_axis *a)
2288 struct variable **vars = xmalloc (sizeof *vars);
2291 struct ctables_nest *nest = xmalloc (sizeof *nest);
2292 *nest = (struct ctables_nest) {
2295 .scale_idx = a->scale ? 0 : SIZE_MAX,
2297 if (a->specs[CSV_CELL].n || a->scale)
2298 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2300 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2301 nest->specs[sv].var = a->var;
2302 nest->specs[sv].is_scale = a->scale;
2304 return (struct ctables_stack) { .nests = nest, .n = 1 };
2307 static struct ctables_stack
2308 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2311 return (struct ctables_stack) { .n = 0 };
2319 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2320 enumerate_fts (axis_type, a->subs[1]));
2323 /* This should consider any of the scale variables found in the result to
2324 be linked to each other listwise for SMISSING=LISTWISE. */
2325 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2326 enumerate_fts (axis_type, a->subs[1]));
2332 union ctables_summary
2334 /* COUNT, VALIDN, TOTALN. */
2337 /* MINIMUM, MAXIMUM, RANGE. */
2344 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2345 struct moments1 *moments;
2347 /* MEDIAN, MODE, PTILE. */
2350 struct casewriter *writer;
2355 /* XXX multiple response */
2359 ctables_summary_init (union ctables_summary *s,
2360 const struct ctables_summary_spec *ss)
2362 switch (ss->function)
2366 case CTSF_ROWPCT_COUNT:
2367 case CTSF_COLPCT_COUNT:
2368 case CTSF_TABLEPCT_COUNT:
2369 case CTSF_SUBTABLEPCT_COUNT:
2370 case CTSF_LAYERPCT_COUNT:
2371 case CTSF_LAYERROWPCT_COUNT:
2372 case CTSF_LAYERCOLPCT_COUNT:
2373 case CTSF_ROWPCT_VALIDN:
2374 case CTSF_COLPCT_VALIDN:
2375 case CTSF_TABLEPCT_VALIDN:
2376 case CTSF_SUBTABLEPCT_VALIDN:
2377 case CTSF_LAYERPCT_VALIDN:
2378 case CTSF_LAYERROWPCT_VALIDN:
2379 case CTSF_LAYERCOLPCT_VALIDN:
2380 case CTSF_ROWPCT_TOTALN:
2381 case CTSF_COLPCT_TOTALN:
2382 case CTSF_TABLEPCT_TOTALN:
2383 case CTSF_SUBTABLEPCT_TOTALN:
2384 case CTSF_LAYERPCT_TOTALN:
2385 case CTSF_LAYERROWPCT_TOTALN:
2386 case CTSF_LAYERCOLPCT_TOTALN:
2393 case CTSF_UROWPCT_COUNT:
2394 case CTSF_UCOLPCT_COUNT:
2395 case CTSF_UTABLEPCT_COUNT:
2396 case CTSF_USUBTABLEPCT_COUNT:
2397 case CTSF_ULAYERPCT_COUNT:
2398 case CTSF_ULAYERROWPCT_COUNT:
2399 case CTSF_ULAYERCOLPCT_COUNT:
2400 case CTSF_UROWPCT_VALIDN:
2401 case CTSF_UCOLPCT_VALIDN:
2402 case CTSF_UTABLEPCT_VALIDN:
2403 case CTSF_USUBTABLEPCT_VALIDN:
2404 case CTSF_ULAYERPCT_VALIDN:
2405 case CTSF_ULAYERROWPCT_VALIDN:
2406 case CTSF_ULAYERCOLPCT_VALIDN:
2407 case CTSF_UROWPCT_TOTALN:
2408 case CTSF_UCOLPCT_TOTALN:
2409 case CTSF_UTABLEPCT_TOTALN:
2410 case CTSF_USUBTABLEPCT_TOTALN:
2411 case CTSF_ULAYERPCT_TOTALN:
2412 case CTSF_ULAYERROWPCT_TOTALN:
2413 case CTSF_ULAYERCOLPCT_TOTALN:
2423 s->min = s->max = SYSMIS;
2431 case CTSF_ROWPCT_SUM:
2432 case CTSF_COLPCT_SUM:
2433 case CTSF_TABLEPCT_SUM:
2434 case CTSF_SUBTABLEPCT_SUM:
2435 case CTSF_LAYERPCT_SUM:
2436 case CTSF_LAYERROWPCT_SUM:
2437 case CTSF_LAYERCOLPCT_SUM:
2442 case CTSF_UVARIANCE:
2443 case CTSF_UROWPCT_SUM:
2444 case CTSF_UCOLPCT_SUM:
2445 case CTSF_UTABLEPCT_SUM:
2446 case CTSF_USUBTABLEPCT_SUM:
2447 case CTSF_ULAYERPCT_SUM:
2448 case CTSF_ULAYERROWPCT_SUM:
2449 case CTSF_ULAYERCOLPCT_SUM:
2450 s->moments = moments1_create (MOMENT_VARIANCE);
2460 struct caseproto *proto = caseproto_create ();
2461 proto = caseproto_add_width (proto, 0);
2462 proto = caseproto_add_width (proto, 0);
2464 struct subcase ordering;
2465 subcase_init (&ordering, 0, 0, SC_ASCEND);
2466 s->writer = sort_create_writer (&ordering, proto);
2467 subcase_uninit (&ordering);
2468 caseproto_unref (proto);
2478 ctables_summary_uninit (union ctables_summary *s,
2479 const struct ctables_summary_spec *ss)
2481 switch (ss->function)
2485 case CTSF_ROWPCT_COUNT:
2486 case CTSF_COLPCT_COUNT:
2487 case CTSF_TABLEPCT_COUNT:
2488 case CTSF_SUBTABLEPCT_COUNT:
2489 case CTSF_LAYERPCT_COUNT:
2490 case CTSF_LAYERROWPCT_COUNT:
2491 case CTSF_LAYERCOLPCT_COUNT:
2492 case CTSF_ROWPCT_VALIDN:
2493 case CTSF_COLPCT_VALIDN:
2494 case CTSF_TABLEPCT_VALIDN:
2495 case CTSF_SUBTABLEPCT_VALIDN:
2496 case CTSF_LAYERPCT_VALIDN:
2497 case CTSF_LAYERROWPCT_VALIDN:
2498 case CTSF_LAYERCOLPCT_VALIDN:
2499 case CTSF_ROWPCT_TOTALN:
2500 case CTSF_COLPCT_TOTALN:
2501 case CTSF_TABLEPCT_TOTALN:
2502 case CTSF_SUBTABLEPCT_TOTALN:
2503 case CTSF_LAYERPCT_TOTALN:
2504 case CTSF_LAYERROWPCT_TOTALN:
2505 case CTSF_LAYERCOLPCT_TOTALN:
2512 case CTSF_UROWPCT_COUNT:
2513 case CTSF_UCOLPCT_COUNT:
2514 case CTSF_UTABLEPCT_COUNT:
2515 case CTSF_USUBTABLEPCT_COUNT:
2516 case CTSF_ULAYERPCT_COUNT:
2517 case CTSF_ULAYERROWPCT_COUNT:
2518 case CTSF_ULAYERCOLPCT_COUNT:
2519 case CTSF_UROWPCT_VALIDN:
2520 case CTSF_UCOLPCT_VALIDN:
2521 case CTSF_UTABLEPCT_VALIDN:
2522 case CTSF_USUBTABLEPCT_VALIDN:
2523 case CTSF_ULAYERPCT_VALIDN:
2524 case CTSF_ULAYERROWPCT_VALIDN:
2525 case CTSF_ULAYERCOLPCT_VALIDN:
2526 case CTSF_UROWPCT_TOTALN:
2527 case CTSF_UCOLPCT_TOTALN:
2528 case CTSF_UTABLEPCT_TOTALN:
2529 case CTSF_USUBTABLEPCT_TOTALN:
2530 case CTSF_ULAYERPCT_TOTALN:
2531 case CTSF_ULAYERROWPCT_TOTALN:
2532 case CTSF_ULAYERCOLPCT_TOTALN:
2548 case CTSF_ROWPCT_SUM:
2549 case CTSF_COLPCT_SUM:
2550 case CTSF_TABLEPCT_SUM:
2551 case CTSF_SUBTABLEPCT_SUM:
2552 case CTSF_LAYERPCT_SUM:
2553 case CTSF_LAYERROWPCT_SUM:
2554 case CTSF_LAYERCOLPCT_SUM:
2559 case CTSF_UVARIANCE:
2560 case CTSF_UROWPCT_SUM:
2561 case CTSF_UCOLPCT_SUM:
2562 case CTSF_UTABLEPCT_SUM:
2563 case CTSF_USUBTABLEPCT_SUM:
2564 case CTSF_ULAYERPCT_SUM:
2565 case CTSF_ULAYERROWPCT_SUM:
2566 case CTSF_ULAYERCOLPCT_SUM:
2567 moments1_destroy (s->moments);
2576 casewriter_destroy (s->writer);
2582 ctables_summary_add (union ctables_summary *s,
2583 const struct ctables_summary_spec *ss,
2584 const struct variable *var, const union value *value,
2585 bool is_scale, bool is_scale_missing,
2586 bool is_missing, bool excluded_missing,
2587 double d_weight, double e_weight)
2589 /* To determine whether a case is included in a given table for a particular
2590 kind of summary, consider the following charts for each variable in the
2591 table. Only if "yes" appears for every variable for the summary is the
2594 Categorical variables: VALIDN COUNT TOTALN
2595 Valid values in included categories yes yes yes
2596 Missing values in included categories --- yes yes
2597 Missing values in excluded categories --- --- yes
2598 Valid values in excluded categories --- --- ---
2600 Scale variables: VALIDN COUNT TOTALN
2601 Valid value yes yes yes
2602 Missing value --- yes yes
2604 Missing values include both user- and system-missing. (The system-missing
2605 value is always in an excluded category.)
2607 switch (ss->function)
2610 case CTSF_ROWPCT_TOTALN:
2611 case CTSF_COLPCT_TOTALN:
2612 case CTSF_TABLEPCT_TOTALN:
2613 case CTSF_SUBTABLEPCT_TOTALN:
2614 case CTSF_LAYERPCT_TOTALN:
2615 case CTSF_LAYERROWPCT_TOTALN:
2616 case CTSF_LAYERCOLPCT_TOTALN:
2617 s->count += d_weight;
2621 case CTSF_UROWPCT_TOTALN:
2622 case CTSF_UCOLPCT_TOTALN:
2623 case CTSF_UTABLEPCT_TOTALN:
2624 case CTSF_USUBTABLEPCT_TOTALN:
2625 case CTSF_ULAYERPCT_TOTALN:
2626 case CTSF_ULAYERROWPCT_TOTALN:
2627 case CTSF_ULAYERCOLPCT_TOTALN:
2632 case CTSF_ROWPCT_COUNT:
2633 case CTSF_COLPCT_COUNT:
2634 case CTSF_TABLEPCT_COUNT:
2635 case CTSF_SUBTABLEPCT_COUNT:
2636 case CTSF_LAYERPCT_COUNT:
2637 case CTSF_LAYERROWPCT_COUNT:
2638 case CTSF_LAYERCOLPCT_COUNT:
2639 if (is_scale || !excluded_missing)
2640 s->count += d_weight;
2644 case CTSF_UROWPCT_COUNT:
2645 case CTSF_UCOLPCT_COUNT:
2646 case CTSF_UTABLEPCT_COUNT:
2647 case CTSF_USUBTABLEPCT_COUNT:
2648 case CTSF_ULAYERPCT_COUNT:
2649 case CTSF_ULAYERROWPCT_COUNT:
2650 case CTSF_ULAYERCOLPCT_COUNT:
2651 if (is_scale || !excluded_missing)
2656 case CTSF_ROWPCT_VALIDN:
2657 case CTSF_COLPCT_VALIDN:
2658 case CTSF_TABLEPCT_VALIDN:
2659 case CTSF_SUBTABLEPCT_VALIDN:
2660 case CTSF_LAYERPCT_VALIDN:
2661 case CTSF_LAYERROWPCT_VALIDN:
2662 case CTSF_LAYERCOLPCT_VALIDN:
2666 s->count += d_weight;
2670 case CTSF_UROWPCT_VALIDN:
2671 case CTSF_UCOLPCT_VALIDN:
2672 case CTSF_UTABLEPCT_VALIDN:
2673 case CTSF_USUBTABLEPCT_VALIDN:
2674 case CTSF_ULAYERPCT_VALIDN:
2675 case CTSF_ULAYERROWPCT_VALIDN:
2676 case CTSF_ULAYERCOLPCT_VALIDN:
2685 s->count += d_weight;
2694 if (is_scale || !excluded_missing)
2695 s->count += e_weight;
2702 s->count += e_weight;
2706 s->count += e_weight;
2712 if (!is_scale_missing)
2714 assert (!var_is_alpha (var)); /* XXX? */
2715 if (s->min == SYSMIS || value->f < s->min)
2717 if (s->max == SYSMIS || value->f > s->max)
2727 case CTSF_ROWPCT_SUM:
2728 case CTSF_COLPCT_SUM:
2729 case CTSF_TABLEPCT_SUM:
2730 case CTSF_SUBTABLEPCT_SUM:
2731 case CTSF_LAYERPCT_SUM:
2732 case CTSF_LAYERROWPCT_SUM:
2733 case CTSF_LAYERCOLPCT_SUM:
2734 if (!is_scale_missing)
2735 moments1_add (s->moments, value->f, e_weight);
2742 case CTSF_UVARIANCE:
2743 case CTSF_UROWPCT_SUM:
2744 case CTSF_UCOLPCT_SUM:
2745 case CTSF_UTABLEPCT_SUM:
2746 case CTSF_USUBTABLEPCT_SUM:
2747 case CTSF_ULAYERPCT_SUM:
2748 case CTSF_ULAYERROWPCT_SUM:
2749 case CTSF_ULAYERCOLPCT_SUM:
2750 if (!is_scale_missing)
2751 moments1_add (s->moments, value->f, 1.0);
2757 d_weight = e_weight = 1.0;
2762 if (!is_scale_missing)
2764 s->ovalid += e_weight;
2766 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2767 *case_num_rw_idx (c, 0) = value->f;
2768 *case_num_rw_idx (c, 1) = e_weight;
2769 casewriter_write (s->writer, c);
2775 static enum ctables_domain_type
2776 ctables_function_domain (enum ctables_summary_function function)
2806 case CTSF_UVARIANCE:
2812 case CTSF_COLPCT_COUNT:
2813 case CTSF_COLPCT_SUM:
2814 case CTSF_COLPCT_TOTALN:
2815 case CTSF_COLPCT_VALIDN:
2816 case CTSF_UCOLPCT_COUNT:
2817 case CTSF_UCOLPCT_SUM:
2818 case CTSF_UCOLPCT_TOTALN:
2819 case CTSF_UCOLPCT_VALIDN:
2822 case CTSF_LAYERCOLPCT_COUNT:
2823 case CTSF_LAYERCOLPCT_SUM:
2824 case CTSF_LAYERCOLPCT_TOTALN:
2825 case CTSF_LAYERCOLPCT_VALIDN:
2826 case CTSF_ULAYERCOLPCT_COUNT:
2827 case CTSF_ULAYERCOLPCT_SUM:
2828 case CTSF_ULAYERCOLPCT_TOTALN:
2829 case CTSF_ULAYERCOLPCT_VALIDN:
2830 return CTDT_LAYERCOL;
2832 case CTSF_LAYERPCT_COUNT:
2833 case CTSF_LAYERPCT_SUM:
2834 case CTSF_LAYERPCT_TOTALN:
2835 case CTSF_LAYERPCT_VALIDN:
2836 case CTSF_ULAYERPCT_COUNT:
2837 case CTSF_ULAYERPCT_SUM:
2838 case CTSF_ULAYERPCT_TOTALN:
2839 case CTSF_ULAYERPCT_VALIDN:
2842 case CTSF_LAYERROWPCT_COUNT:
2843 case CTSF_LAYERROWPCT_SUM:
2844 case CTSF_LAYERROWPCT_TOTALN:
2845 case CTSF_LAYERROWPCT_VALIDN:
2846 case CTSF_ULAYERROWPCT_COUNT:
2847 case CTSF_ULAYERROWPCT_SUM:
2848 case CTSF_ULAYERROWPCT_TOTALN:
2849 case CTSF_ULAYERROWPCT_VALIDN:
2850 return CTDT_LAYERROW;
2852 case CTSF_ROWPCT_COUNT:
2853 case CTSF_ROWPCT_SUM:
2854 case CTSF_ROWPCT_TOTALN:
2855 case CTSF_ROWPCT_VALIDN:
2856 case CTSF_UROWPCT_COUNT:
2857 case CTSF_UROWPCT_SUM:
2858 case CTSF_UROWPCT_TOTALN:
2859 case CTSF_UROWPCT_VALIDN:
2862 case CTSF_SUBTABLEPCT_COUNT:
2863 case CTSF_SUBTABLEPCT_SUM:
2864 case CTSF_SUBTABLEPCT_TOTALN:
2865 case CTSF_SUBTABLEPCT_VALIDN:
2866 case CTSF_USUBTABLEPCT_COUNT:
2867 case CTSF_USUBTABLEPCT_SUM:
2868 case CTSF_USUBTABLEPCT_TOTALN:
2869 case CTSF_USUBTABLEPCT_VALIDN:
2870 return CTDT_SUBTABLE;
2872 case CTSF_TABLEPCT_COUNT:
2873 case CTSF_TABLEPCT_SUM:
2874 case CTSF_TABLEPCT_TOTALN:
2875 case CTSF_TABLEPCT_VALIDN:
2876 case CTSF_UTABLEPCT_COUNT:
2877 case CTSF_UTABLEPCT_SUM:
2878 case CTSF_UTABLEPCT_TOTALN:
2879 case CTSF_UTABLEPCT_VALIDN:
2887 ctables_summary_value (const struct ctables_cell *cell,
2888 union ctables_summary *s,
2889 const struct ctables_summary_spec *ss)
2891 switch (ss->function)
2898 case CTSF_ROWPCT_COUNT:
2899 case CTSF_COLPCT_COUNT:
2900 case CTSF_TABLEPCT_COUNT:
2901 case CTSF_SUBTABLEPCT_COUNT:
2902 case CTSF_LAYERPCT_COUNT:
2903 case CTSF_LAYERROWPCT_COUNT:
2904 case CTSF_LAYERCOLPCT_COUNT:
2906 enum ctables_domain_type d = ctables_function_domain (ss->function);
2907 return (cell->domains[d]->e_count
2908 ? s->count / cell->domains[d]->e_count * 100
2912 case CTSF_UROWPCT_COUNT:
2913 case CTSF_UCOLPCT_COUNT:
2914 case CTSF_UTABLEPCT_COUNT:
2915 case CTSF_USUBTABLEPCT_COUNT:
2916 case CTSF_ULAYERPCT_COUNT:
2917 case CTSF_ULAYERROWPCT_COUNT:
2918 case CTSF_ULAYERCOLPCT_COUNT:
2920 enum ctables_domain_type d = ctables_function_domain (ss->function);
2921 return (cell->domains[d]->u_count
2922 ? s->count / cell->domains[d]->u_count * 100
2926 case CTSF_ROWPCT_VALIDN:
2927 case CTSF_COLPCT_VALIDN:
2928 case CTSF_TABLEPCT_VALIDN:
2929 case CTSF_SUBTABLEPCT_VALIDN:
2930 case CTSF_LAYERPCT_VALIDN:
2931 case CTSF_LAYERROWPCT_VALIDN:
2932 case CTSF_LAYERCOLPCT_VALIDN:
2934 enum ctables_domain_type d = ctables_function_domain (ss->function);
2935 return (cell->domains[d]->e_valid
2936 ? s->count / cell->domains[d]->e_valid * 100
2940 case CTSF_UROWPCT_VALIDN:
2941 case CTSF_UCOLPCT_VALIDN:
2942 case CTSF_UTABLEPCT_VALIDN:
2943 case CTSF_USUBTABLEPCT_VALIDN:
2944 case CTSF_ULAYERPCT_VALIDN:
2945 case CTSF_ULAYERROWPCT_VALIDN:
2946 case CTSF_ULAYERCOLPCT_VALIDN:
2948 enum ctables_domain_type d = ctables_function_domain (ss->function);
2949 return (cell->domains[d]->u_valid
2950 ? s->count / cell->domains[d]->u_valid * 100
2954 case CTSF_ROWPCT_TOTALN:
2955 case CTSF_COLPCT_TOTALN:
2956 case CTSF_TABLEPCT_TOTALN:
2957 case CTSF_SUBTABLEPCT_TOTALN:
2958 case CTSF_LAYERPCT_TOTALN:
2959 case CTSF_LAYERROWPCT_TOTALN:
2960 case CTSF_LAYERCOLPCT_TOTALN:
2962 enum ctables_domain_type d = ctables_function_domain (ss->function);
2963 return (cell->domains[d]->e_total
2964 ? s->count / cell->domains[d]->e_total * 100
2968 case CTSF_UROWPCT_TOTALN:
2969 case CTSF_UCOLPCT_TOTALN:
2970 case CTSF_UTABLEPCT_TOTALN:
2971 case CTSF_USUBTABLEPCT_TOTALN:
2972 case CTSF_ULAYERPCT_TOTALN:
2973 case CTSF_ULAYERROWPCT_TOTALN:
2974 case CTSF_ULAYERCOLPCT_TOTALN:
2976 enum ctables_domain_type d = ctables_function_domain (ss->function);
2977 return (cell->domains[d]->u_total
2978 ? s->count / cell->domains[d]->u_total * 100
2999 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
3005 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
3012 double weight, variance;
3013 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
3014 return calc_semean (variance, weight);
3021 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3022 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
3028 double weight, mean;
3029 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
3030 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
3034 case CTSF_UVARIANCE:
3037 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
3041 case CTSF_ROWPCT_SUM:
3042 case CTSF_COLPCT_SUM:
3043 case CTSF_TABLEPCT_SUM:
3044 case CTSF_SUBTABLEPCT_SUM:
3045 case CTSF_LAYERPCT_SUM:
3046 case CTSF_LAYERROWPCT_SUM:
3047 case CTSF_LAYERCOLPCT_SUM:
3048 case CTSF_UROWPCT_SUM:
3049 case CTSF_UCOLPCT_SUM:
3050 case CTSF_UTABLEPCT_SUM:
3051 case CTSF_USUBTABLEPCT_SUM:
3052 case CTSF_ULAYERPCT_SUM:
3053 case CTSF_ULAYERROWPCT_SUM:
3054 case CTSF_ULAYERCOLPCT_SUM:
3063 struct casereader *reader = casewriter_make_reader (s->writer);
3066 struct percentile *ptile = percentile_create (
3067 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
3068 struct order_stats *os = &ptile->parent;
3069 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3070 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
3071 statistic_destroy (&ptile->parent.parent);
3079 struct casereader *reader = casewriter_make_reader (s->writer);
3082 struct mode *mode = mode_create ();
3083 struct order_stats *os = &mode->parent;
3084 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
3085 s->ovalue = mode->mode;
3086 statistic_destroy (&mode->parent.parent);
3094 struct ctables_cell_sort_aux
3096 const struct ctables_nest *nest;
3097 enum pivot_axis_type a;
3101 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
3103 const struct ctables_cell_sort_aux *aux = aux_;
3104 struct ctables_cell *const *ap = a_;
3105 struct ctables_cell *const *bp = b_;
3106 const struct ctables_cell *a = *ap;
3107 const struct ctables_cell *b = *bp;
3109 const struct ctables_nest *nest = aux->nest;
3110 for (size_t i = 0; i < nest->n; i++)
3111 if (i != nest->scale_idx)
3113 const struct variable *var = nest->vars[i];
3114 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
3115 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
3116 if (a_cv->category != b_cv->category)
3117 return a_cv->category > b_cv->category ? 1 : -1;
3119 const union value *a_val = &a_cv->value;
3120 const union value *b_val = &b_cv->value;
3121 switch (a_cv->category->type)
3127 case CCT_POSTCOMPUTE:
3128 case CCT_EXCLUDED_MISSING:
3129 /* Must be equal. */
3137 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3145 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
3147 return a_cv->category->sort_ascending ? cmp : -cmp;
3153 const char *a_label = var_lookup_value_label (var, a_val);
3154 const char *b_label = var_lookup_value_label (var, b_val);
3156 ? (b_label ? strcmp (a_label, b_label) : 1)
3157 : (b_label ? -1 : value_compare_3way (
3158 a_val, b_val, var_get_width (var))));
3160 return a_cv->category->sort_ascending ? cmp : -cmp;
3174 For each ctables_table:
3175 For each combination of row vars:
3176 For each combination of column vars:
3177 For each combination of layer vars:
3179 Make a table of row values:
3180 Sort entries by row values
3181 Assign a 0-based index to each actual value
3182 Construct a dimension
3183 Make a table of column values
3184 Make a table of layer values
3186 Fill the table entry using the indexes from before.
3189 static struct ctables_domain *
3190 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
3191 enum ctables_domain_type domain)
3194 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3196 const struct ctables_nest *nest = s->nests[a];
3197 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3199 size_t v_idx = nest->domains[domain][i];
3200 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
3201 hash = hash_pointer (cv->category, hash);
3202 if (cv->category->type != CCT_TOTAL
3203 && cv->category->type != CCT_SUBTOTAL
3204 && cv->category->type != CCT_POSTCOMPUTE)
3205 hash = value_hash (&cv->value,
3206 var_get_width (nest->vars[v_idx]), hash);
3210 struct ctables_domain *d;
3211 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
3213 const struct ctables_cell *df = d->example;
3214 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3216 const struct ctables_nest *nest = s->nests[a];
3217 for (size_t i = 0; i < nest->n_domains[domain]; i++)
3219 size_t v_idx = nest->domains[domain][i];
3220 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
3221 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
3222 if (cv1->category != cv2->category
3223 || (cv1->category->type != CCT_TOTAL
3224 && cv1->category->type != CCT_SUBTOTAL
3225 && cv1->category->type != CCT_POSTCOMPUTE
3226 && !value_equal (&cv1->value, &cv2->value,
3227 var_get_width (nest->vars[v_idx]))))
3236 d = xmalloc (sizeof *d);
3237 *d = (struct ctables_domain) { .example = cell };
3238 hmap_insert (&s->domains[domain], &d->node, hash);
3242 static struct substring
3243 rtrim_value (const union value *v, const struct variable *var)
3245 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
3246 var_get_width (var));
3247 ss_rtrim (&s, ss_cstr (" "));
3252 in_string_range (const union value *v, const struct variable *var,
3253 const struct substring *srange)
3255 struct substring s = rtrim_value (v, var);
3256 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
3257 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
3260 static const struct ctables_category *
3261 ctables_categories_match (const struct ctables_categories *c,
3262 const union value *v, const struct variable *var)
3264 if (var_is_numeric (var) && v->f == SYSMIS)
3267 const struct ctables_category *othernm = NULL;
3268 for (size_t i = c->n_cats; i-- > 0; )
3270 const struct ctables_category *cat = &c->cats[i];
3274 if (cat->number == v->f)
3279 if (ss_equals (cat->string, rtrim_value (v, var)))
3284 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
3285 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
3290 if (in_string_range (v, var, cat->srange))
3295 if (var_is_value_missing (var, v))
3299 case CCT_POSTCOMPUTE:
3314 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
3317 case CCT_EXCLUDED_MISSING:
3322 return var_is_value_missing (var, v) ? NULL : othernm;
3325 static const struct ctables_category *
3326 ctables_categories_total (const struct ctables_categories *c)
3328 const struct ctables_category *first = &c->cats[0];
3329 const struct ctables_category *last = &c->cats[c->n_cats - 1];
3330 return (first->type == CCT_TOTAL ? first
3331 : last->type == CCT_TOTAL ? last
3335 static struct ctables_cell *
3336 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3337 const struct ctables_category *cats[PIVOT_N_AXES][10])
3340 enum ctables_summary_variant sv = CSV_CELL;
3341 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3343 const struct ctables_nest *nest = s->nests[a];
3344 for (size_t i = 0; i < nest->n; i++)
3345 if (i != nest->scale_idx)
3347 hash = hash_pointer (cats[a][i], hash);
3348 if (cats[a][i]->type != CCT_TOTAL
3349 && cats[a][i]->type != CCT_SUBTOTAL
3350 && cats[a][i]->type != CCT_POSTCOMPUTE)
3351 hash = value_hash (case_data (c, nest->vars[i]),
3352 var_get_width (nest->vars[i]), hash);
3358 struct ctables_cell *cell;
3359 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3361 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3363 const struct ctables_nest *nest = s->nests[a];
3364 for (size_t i = 0; i < nest->n; i++)
3365 if (i != nest->scale_idx
3366 && (cats[a][i] != cell->axes[a].cvs[i].category
3367 || (cats[a][i]->type != CCT_TOTAL
3368 && cats[a][i]->type != CCT_SUBTOTAL
3369 && cats[a][i]->type != CCT_POSTCOMPUTE
3370 && !value_equal (case_data (c, nest->vars[i]),
3371 &cell->axes[a].cvs[i].value,
3372 var_get_width (nest->vars[i])))))
3381 cell = xmalloc (sizeof *cell);
3384 cell->omit_domains = 0;
3385 cell->postcompute = false;
3386 //struct string name = DS_EMPTY_INITIALIZER;
3387 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3389 const struct ctables_nest *nest = s->nests[a];
3390 cell->axes[a].cvs = (nest->n
3391 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3393 for (size_t i = 0; i < nest->n; i++)
3395 const struct ctables_category *cat = cats[a][i];
3396 const struct variable *var = nest->vars[i];
3397 const union value *value = case_data (c, var);
3398 if (i != nest->scale_idx)
3400 const struct ctables_category *subtotal = cat->subtotal;
3401 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3404 if (cat->type == CCT_TOTAL
3405 || cat->type == CCT_SUBTOTAL
3406 || cat->type == CCT_POSTCOMPUTE)
3408 /* XXX these should be more encompassing I think.*/
3412 case PIVOT_AXIS_COLUMN:
3413 cell->omit_domains |= ((1u << CTDT_TABLE) |
3414 (1u << CTDT_LAYER) |
3415 (1u << CTDT_LAYERCOL) |
3416 (1u << CTDT_SUBTABLE) |
3419 case PIVOT_AXIS_ROW:
3420 cell->omit_domains |= ((1u << CTDT_TABLE) |
3421 (1u << CTDT_LAYER) |
3422 (1u << CTDT_LAYERROW) |
3423 (1u << CTDT_SUBTABLE) |
3426 case PIVOT_AXIS_LAYER:
3427 cell->omit_domains |= ((1u << CTDT_TABLE) |
3428 (1u << CTDT_LAYER));
3432 if (cat->type == CCT_POSTCOMPUTE)
3433 cell->postcompute = true;
3436 cell->axes[a].cvs[i].category = cat;
3437 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3440 if (i != nest->scale_idx)
3442 if (!ds_is_empty (&name))
3443 ds_put_cstr (&name, ", ");
3444 char *value_s = data_out (value, var_get_encoding (var),
3445 var_get_print_format (var),
3446 settings_get_fmt_settings ());
3447 if (cat->type == CCT_TOTAL
3448 || cat->type == CCT_SUBTOTAL
3449 || cat->type == CCT_POSTCOMPUTE)
3450 ds_put_format (&name, "%s=total", var_get_name (var));
3452 ds_put_format (&name, "%s=%s", var_get_name (var),
3453 value_s + strspn (value_s, " "));
3459 //cell->name = ds_steal_cstr (&name);
3461 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3462 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3463 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3464 for (size_t i = 0; i < specs->n; i++)
3465 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3466 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3467 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3468 hmap_insert (&s->cells, &cell->node, hash);
3473 is_scale_missing (const struct ctables_summary_spec_set *specs,
3474 const struct ccase *c)
3476 if (!specs->is_scale)
3479 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3482 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3484 const struct variable *var = specs->listwise_vars[i];
3485 if (var_is_num_missing (var, case_num (c, var)))
3493 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3494 const struct ctables_category *cats[PIVOT_N_AXES][10],
3495 bool is_missing, bool excluded_missing,
3496 double d_weight, double e_weight)
3498 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3499 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3501 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3503 bool scale_missing = is_scale_missing (specs, c);
3504 for (size_t i = 0; i < specs->n; i++)
3505 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3506 specs->var, case_data (c, specs->var), specs->is_scale,
3507 scale_missing, is_missing, excluded_missing,
3508 d_weight, e_weight);
3509 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3510 if (!(cell->omit_domains && (1u << dt)))
3512 struct ctables_domain *d = cell->domains[dt];
3513 d->d_total += d_weight;
3514 d->e_total += e_weight;
3516 if (!excluded_missing)
3518 d->d_count += d_weight;
3519 d->e_count += e_weight;
3524 d->d_valid += d_weight;
3525 d->e_valid += e_weight;
3532 recurse_totals (struct ctables_section *s, const struct ccase *c,
3533 const struct ctables_category *cats[PIVOT_N_AXES][10],
3534 bool is_missing, bool excluded_missing,
3535 double d_weight, double e_weight,
3536 enum pivot_axis_type start_axis, size_t start_nest)
3538 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3540 const struct ctables_nest *nest = s->nests[a];
3541 for (size_t i = start_nest; i < nest->n; i++)
3543 if (i == nest->scale_idx)
3546 const struct variable *var = nest->vars[i];
3548 const struct ctables_category *total = ctables_categories_total (
3549 s->table->categories[var_get_dict_index (var)]);
3552 const struct ctables_category *save = cats[a][i];
3554 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3555 d_weight, e_weight);
3556 recurse_totals (s, c, cats, is_missing, excluded_missing,
3557 d_weight, e_weight, a, i + 1);
3566 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3567 const struct ctables_category *cats[PIVOT_N_AXES][10],
3568 bool is_missing, bool excluded_missing,
3569 double d_weight, double e_weight,
3570 enum pivot_axis_type start_axis, size_t start_nest)
3572 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3574 const struct ctables_nest *nest = s->nests[a];
3575 for (size_t i = start_nest; i < nest->n; i++)
3577 if (i == nest->scale_idx)
3580 const struct ctables_category *save = cats[a][i];
3583 cats[a][i] = save->subtotal;
3584 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3585 d_weight, e_weight);
3586 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3587 d_weight, e_weight, a, i + 1);
3596 ctables_add_occurrence (const struct variable *var,
3597 const union value *value,
3598 struct hmap *occurrences)
3600 int width = var_get_width (var);
3601 unsigned int hash = value_hash (value, width, 0);
3603 struct ctables_occurrence *o;
3604 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3606 if (value_equal (value, &o->value, width))
3609 o = xmalloc (sizeof *o);
3610 value_clone (&o->value, value, width);
3611 hmap_insert (occurrences, &o->node, hash);
3615 ctables_cell_insert (struct ctables_section *s,
3616 const struct ccase *c,
3617 double d_weight, double e_weight)
3619 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3621 /* Does at least one categorical variable have a missing value in an included
3622 or excluded category? */
3623 bool is_missing = false;
3625 /* Does at least one categorical variable have a missing value in an excluded
3627 bool excluded_missing = false;
3629 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3631 const struct ctables_nest *nest = s->nests[a];
3632 for (size_t i = 0; i < nest->n; i++)
3634 if (i == nest->scale_idx)
3637 const struct variable *var = nest->vars[i];
3638 const union value *value = case_data (c, var);
3640 bool var_missing = var_is_value_missing (var, value) != 0;
3644 cats[a][i] = ctables_categories_match (
3645 s->table->categories[var_get_dict_index (var)], value, var);
3651 static const struct ctables_category cct_excluded_missing = {
3652 .type = CCT_EXCLUDED_MISSING,
3655 cats[a][i] = &cct_excluded_missing;
3656 excluded_missing = true;
3661 if (!excluded_missing)
3662 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3664 const struct ctables_nest *nest = s->nests[a];
3665 for (size_t i = 0; i < nest->n; i++)
3666 if (i != nest->scale_idx)
3668 const struct variable *var = nest->vars[i];
3669 const union value *value = case_data (c, var);
3670 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3674 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3675 d_weight, e_weight);
3677 //if (!excluded_missing)
3679 recurse_totals (s, c, cats, is_missing, excluded_missing,
3680 d_weight, e_weight, 0, 0);
3681 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3682 d_weight, e_weight, 0, 0);
3688 const struct ctables_summary_spec_set *set;
3693 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3695 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3696 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3697 if (as->function != bs->function)
3698 return as->function > bs->function ? 1 : -1;
3699 else if (as->percentile != bs->percentile)
3700 return as->percentile < bs->percentile ? 1 : -1;
3701 return strcmp (as->label, bs->label);
3704 static struct pivot_value *
3705 ctables_category_create_label__ (const struct ctables_category *cat,
3706 const struct variable *var,
3707 const union value *value)
3709 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3710 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3711 : pivot_value_new_var_value (var, value));
3714 static struct pivot_value *
3715 ctables_postcompute_label (const struct ctables_categories *cats,
3716 const struct ctables_category *cat,
3717 const struct variable *var,
3718 const union value *value)
3720 struct substring in = ss_cstr (cat->pc->label);
3721 struct substring target = ss_cstr (")LABEL[");
3723 struct string out = DS_EMPTY_INITIALIZER;
3726 size_t chunk = ss_find_substring (in, target);
3727 if (chunk == SIZE_MAX)
3729 if (ds_is_empty (&out))
3730 return pivot_value_new_user_text (in.string, in.length);
3733 ds_put_substring (&out, in);
3734 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3738 ds_put_substring (&out, ss_head (in, chunk));
3739 ss_advance (&in, chunk + target.length);
3741 struct substring idx_s;
3742 if (!ss_get_until (&in, ']', &idx_s))
3745 long int idx = strtol (idx_s.string, &tail, 10);
3746 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3749 struct ctables_category *cat2 = &cats->cats[idx - 1];
3750 struct pivot_value *label2
3751 = ctables_category_create_label__ (cat2, var, value);
3752 char *label2_s = pivot_value_to_string_defaults (label2);
3753 ds_put_cstr (&out, label2_s);
3755 pivot_value_destroy (label2);
3760 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3763 static struct pivot_value *
3764 ctables_category_create_label (const struct ctables_categories *cats,
3765 const struct ctables_category *cat,
3766 const struct variable *var,
3767 const union value *value)
3769 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3770 ? ctables_postcompute_label (cats, cat, var, value)
3771 : ctables_category_create_label__ (cat, var, value));
3774 static struct ctables_value *
3775 ctables_value_find__ (struct ctables_table *t, const union value *value,
3776 int width, unsigned int hash)
3778 struct ctables_value *clv;
3779 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3780 hash, &t->clabels_values_map)
3781 if (value_equal (value, &clv->value, width))
3787 ctables_value_insert (struct ctables_table *t, const union value *value,
3790 unsigned int hash = value_hash (value, width, 0);
3791 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3794 clv = xmalloc (sizeof *clv);
3795 value_clone (&clv->value, value, width);
3796 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3800 static struct ctables_value *
3801 ctables_value_find (struct ctables_table *t,
3802 const union value *value, int width)
3804 return ctables_value_find__ (t, value, width,
3805 value_hash (value, width, 0));
3809 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3810 size_t ix[PIVOT_N_AXES])
3812 if (a < PIVOT_N_AXES)
3814 size_t limit = MAX (t->stacks[a].n, 1);
3815 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3816 ctables_table_add_section (t, a + 1, ix);
3820 struct ctables_section *s = &t->sections[t->n_sections++];
3821 *s = (struct ctables_section) {
3823 .cells = HMAP_INITIALIZER (s->cells),
3825 for (a = 0; a < PIVOT_N_AXES; a++)
3828 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3830 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3831 for (size_t i = 0; i < nest->n; i++)
3832 hmap_init (&s->occurrences[a][i]);
3834 for (size_t i = 0; i < N_CTDTS; i++)
3835 hmap_init (&s->domains[i]);
3840 ctpo_add (double a, double b)
3846 ctpo_sub (double a, double b)
3852 ctpo_mul (double a, double b)
3858 ctpo_div (double a, double b)
3860 return b ? a / b : SYSMIS;
3864 ctpo_pow (double a, double b)
3866 int save_errno = errno;
3868 double result = pow (a, b);
3876 ctpo_neg (double a, double b UNUSED)
3881 struct ctables_pcexpr_evaluate_ctx
3883 const struct ctables_cell *cell;
3884 const struct ctables_section *section;
3885 const struct ctables_categories *cats;
3886 enum pivot_axis_type pc_a;
3891 static double ctables_pcexpr_evaluate (
3892 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3895 ctables_pcexpr_evaluate_nonterminal (
3896 const struct ctables_pcexpr_evaluate_ctx *ctx,
3897 const struct ctables_pcexpr *e, size_t n_args,
3898 double evaluate (double, double))
3900 double args[2] = { 0, 0 };
3901 for (size_t i = 0; i < n_args; i++)
3903 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3904 if (!isfinite (args[i]) || args[i] == SYSMIS)
3907 return evaluate (args[0], args[1]);
3911 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3912 const struct ctables_cell_value *pc_cv)
3914 const struct ctables_section *s = ctx->section;
3917 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3919 const struct ctables_nest *nest = s->nests[a];
3920 for (size_t i = 0; i < nest->n; i++)
3921 if (i != nest->scale_idx)
3923 const struct ctables_cell_value *cv
3924 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3925 : &ctx->cell->axes[a].cvs[i]);
3926 hash = hash_pointer (cv->category, hash);
3927 if (cv->category->type != CCT_TOTAL
3928 && cv->category->type != CCT_SUBTOTAL
3929 && cv->category->type != CCT_POSTCOMPUTE)
3930 hash = value_hash (&cv->value,
3931 var_get_width (nest->vars[i]), hash);
3935 struct ctables_cell *tc;
3936 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3938 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3940 const struct ctables_nest *nest = s->nests[a];
3941 for (size_t i = 0; i < nest->n; i++)
3942 if (i != nest->scale_idx)
3944 const struct ctables_cell_value *p_cv
3945 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3946 : &ctx->cell->axes[a].cvs[i]);
3947 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3948 if (p_cv->category != t_cv->category
3949 || (p_cv->category->type != CCT_TOTAL
3950 && p_cv->category->type != CCT_SUBTOTAL
3951 && p_cv->category->type != CCT_POSTCOMPUTE
3952 && !value_equal (&p_cv->value,
3954 var_get_width (nest->vars[i]))))
3966 const struct ctables_table *t = s->table;
3967 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3968 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3969 return ctables_summary_value (tc, &tc->summaries[ctx->summary_idx],
3970 &specs->specs[ctx->summary_idx]);
3974 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3975 const struct ctables_pcexpr *e)
3982 case CTPO_CAT_NRANGE:
3983 case CTPO_CAT_SRANGE:
3985 struct ctables_cell_value cv = {
3986 .category = ctables_find_category_for_postcompute (ctx->cats, e)
3988 assert (cv.category != NULL);
3990 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3991 const struct ctables_occurrence *o;
3994 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3995 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3996 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3998 cv.value = o->value;
3999 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
4004 case CTPO_CAT_NUMBER:
4005 case CTPO_CAT_STRING:
4006 case CTPO_CAT_MISSING:
4007 case CTPO_CAT_OTHERNM:
4008 case CTPO_CAT_SUBTOTAL:
4009 case CTPO_CAT_TOTAL:
4011 struct ctables_cell_value cv = {
4012 .category = ctables_find_category_for_postcompute (ctx->cats, e),
4013 .value = { .f = e->number },
4015 assert (cv.category != NULL);
4016 return ctables_pcexpr_evaluate_category (ctx, &cv);
4020 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
4023 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
4026 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
4029 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
4032 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
4035 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
4041 /* XXX what if there is a postcompute in more than one dimension?? */
4042 static const struct ctables_postcompute *
4043 ctables_cell_postcompute (const struct ctables_section *s,
4044 const struct ctables_cell *cell,
4045 enum pivot_axis_type *pc_a_p,
4048 assert (cell->postcompute);
4049 for (enum pivot_axis_type pc_a = 0; ; pc_a++)
4051 assert (pc_a < PIVOT_N_AXES);
4052 for (size_t pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
4054 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
4055 if (cv->category->type == CCT_POSTCOMPUTE)
4060 *pc_a_idx_p = pc_a_idx;
4061 return cv->category->pc;
4070 ctables_cell_calculate_postcompute (const struct ctables_section *s,
4071 const struct ctables_cell *cell,
4072 const struct ctables_summary_spec *ss,
4073 struct fmt_spec *format,
4074 bool *is_ctables_format,
4077 enum pivot_axis_type pc_a;
4079 const struct ctables_postcompute *pc = ctables_cell_postcompute (
4080 s, cell, &pc_a, &pc_a_idx);
4084 for (size_t i = 0; i < pc->specs->n; i++)
4086 const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
4087 if (ss->function == ss2->function
4088 && ss->percentile == ss2->percentile)
4090 *format = ss2->format;
4091 *is_ctables_format = ss2->is_ctables_format;
4097 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
4098 const struct ctables_categories *cats = s->table->categories[
4099 var_get_dict_index (var)];
4100 struct ctables_pcexpr_evaluate_ctx ctx = {
4105 .pc_a_idx = pc_a_idx,
4106 .summary_idx = summary_idx,
4108 return ctables_pcexpr_evaluate (&ctx, pc->expr);
4112 ctables_table_output (struct ctables *ct, struct ctables_table *t)
4114 struct pivot_table *pt = pivot_table_create__ (
4116 ? pivot_value_new_user_text (t->title, SIZE_MAX)
4117 : pivot_value_new_text (N_("Custom Tables"))),
4120 pivot_table_set_caption (
4121 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
4123 pivot_table_set_corner_text (
4124 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
4126 bool summary_dimension = (t->summary_axis != t->slabels_axis
4127 || (!t->slabels_visible
4128 && t->summary_specs.n > 1));
4129 if (summary_dimension)
4131 struct pivot_dimension *d = pivot_dimension_create (
4132 pt, t->slabels_axis, N_("Statistics"));
4133 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4134 if (!t->slabels_visible)
4135 d->hide_all_labels = true;
4136 for (size_t i = 0; i < specs->n; i++)
4137 pivot_category_create_leaf (
4138 d->root, pivot_value_new_text (specs->specs[i].label));
4141 bool categories_dimension = t->clabels_example != NULL;
4142 if (categories_dimension)
4144 struct pivot_dimension *d = pivot_dimension_create (
4145 pt, t->label_axis[t->clabels_from_axis],
4146 t->clabels_from_axis == PIVOT_AXIS_ROW
4147 ? N_("Row Categories")
4148 : N_("Column Categories"));
4149 const struct variable *var = t->clabels_example;
4150 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
4151 for (size_t i = 0; i < t->n_clabels_values; i++)
4153 const struct ctables_value *value = t->clabels_values[i];
4154 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
4155 assert (cat != NULL);
4156 pivot_category_create_leaf (d->root, ctables_category_create_label (
4157 c, cat, t->clabels_example,
4162 pivot_table_set_look (pt, ct->look);
4163 struct pivot_dimension *d[PIVOT_N_AXES];
4164 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4166 static const char *names[] = {
4167 [PIVOT_AXIS_ROW] = N_("Rows"),
4168 [PIVOT_AXIS_COLUMN] = N_("Columns"),
4169 [PIVOT_AXIS_LAYER] = N_("Layers"),
4171 d[a] = (t->axes[a] || a == t->summary_axis
4172 ? pivot_dimension_create (pt, a, names[a])
4177 assert (t->axes[a]);
4179 for (size_t i = 0; i < t->stacks[a].n; i++)
4181 struct ctables_nest *nest = &t->stacks[a].nests[i];
4182 struct ctables_section **sections = xnmalloc (t->n_sections,
4184 size_t n_sections = 0;
4186 size_t n_total_cells = 0;
4187 size_t max_depth = 0;
4188 for (size_t j = 0; j < t->n_sections; j++)
4189 if (t->sections[j].nests[a] == nest)
4191 struct ctables_section *s = &t->sections[j];
4192 sections[n_sections++] = s;
4193 n_total_cells += s->cells.count;
4195 size_t depth = s->nests[a]->n;
4196 max_depth = MAX (depth, max_depth);
4199 struct ctables_cell **sorted = xnmalloc (n_total_cells,
4201 size_t n_sorted = 0;
4203 for (size_t j = 0; j < n_sections; j++)
4205 struct ctables_section *s = sections[j];
4207 struct ctables_cell *cell;
4208 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4210 sorted[n_sorted++] = cell;
4211 assert (n_sorted <= n_total_cells);
4214 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
4215 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
4218 for (size_t j = 0; j < n_sorted; j++)
4220 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
4225 struct ctables_level
4227 enum ctables_level_type
4229 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
4230 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
4231 CTL_SUMMARY, /* Summary functions. */
4235 enum settings_value_show vlabel; /* CTL_VAR only. */
4238 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
4239 size_t n_levels = 0;
4240 for (size_t k = 0; k < nest->n; k++)
4242 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
4243 if (vlabel != CTVL_NONE)
4245 levels[n_levels++] = (struct ctables_level) {
4247 .vlabel = (enum settings_value_show) vlabel,
4252 if (nest->scale_idx != k
4253 && (k != nest->n - 1 || t->label_axis[a] == a))
4255 levels[n_levels++] = (struct ctables_level) {
4256 .type = CTL_CATEGORY,
4262 if (!summary_dimension && a == t->slabels_axis)
4264 levels[n_levels++] = (struct ctables_level) {
4265 .type = CTL_SUMMARY,
4266 .var_idx = SIZE_MAX,
4270 /* Pivot categories:
4272 - variable label for nest->vars[0], if vlabel != CTVL_NONE
4273 - category for nest->vars[0], if nest->scale_idx != 0
4274 - variable label for nest->vars[1], if vlabel != CTVL_NONE
4275 - category for nest->vars[1], if nest->scale_idx != 1
4277 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
4278 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
4279 - summary function, if 'a == t->slabels_axis && a ==
4282 Additional dimensions:
4284 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
4286 - If 't->label_axis[b] == a' for some 'b != a', add a category
4291 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
4293 for (size_t j = 0; j < n_sorted; j++)
4295 struct ctables_cell *cell = sorted[j];
4296 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
4298 size_t n_common = 0;
4301 for (; n_common < n_levels; n_common++)
4303 const struct ctables_level *level = &levels[n_common];
4304 if (level->type == CTL_CATEGORY)
4306 size_t var_idx = level->var_idx;
4307 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
4308 if (prev->axes[a].cvs[var_idx].category != c)
4310 else if (c->type != CCT_SUBTOTAL
4311 && c->type != CCT_TOTAL
4312 && c->type != CCT_POSTCOMPUTE
4313 && !value_equal (&prev->axes[a].cvs[var_idx].value,
4314 &cell->axes[a].cvs[var_idx].value,
4315 var_get_type (nest->vars[var_idx])))
4321 for (size_t k = n_common; k < n_levels; k++)
4323 const struct ctables_level *level = &levels[k];
4324 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
4325 if (level->type == CTL_SUMMARY)
4327 assert (k == n_levels - 1);
4329 const struct ctables_summary_spec_set *specs = &t->summary_specs;
4330 for (size_t m = 0; m < specs->n; m++)
4332 int leaf = pivot_category_create_leaf (
4333 parent, pivot_value_new_text (specs->specs[m].label));
4340 const struct variable *var = nest->vars[level->var_idx];
4341 struct pivot_value *label;
4342 if (level->type == CTL_VAR)
4344 label = pivot_value_new_variable (var);
4345 label->variable.show = level->vlabel;
4347 else if (level->type == CTL_CATEGORY)
4349 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
4350 label = ctables_category_create_label (
4351 t->categories[var_get_dict_index (var)],
4352 cv->category, var, &cv->value);
4357 if (k == n_levels - 1)
4358 prev_leaf = pivot_category_create_leaf (parent, label);
4360 groups[k] = pivot_category_create_group__ (parent, label);
4364 cell->axes[a].leaf = prev_leaf;
4371 for (size_t i = 0; i < t->n_sections; i++)
4373 struct ctables_section *s = &t->sections[i];
4375 struct ctables_cell *cell;
4376 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4381 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4382 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4383 for (size_t j = 0; j < specs->n; j++)
4386 size_t n_dindexes = 0;
4388 if (summary_dimension)
4389 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4391 if (categories_dimension)
4393 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4394 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4395 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4396 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4399 dindexes[n_dindexes++] = ctv->leaf;
4402 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4405 int leaf = cell->axes[a].leaf;
4406 if (a == t->summary_axis && !summary_dimension)
4408 dindexes[n_dindexes++] = leaf;
4411 const struct ctables_summary_spec *ss = &specs->specs[j];
4413 struct fmt_spec format = specs->specs[j].format;
4414 bool is_ctables_format = ss->is_ctables_format;
4415 double d = (cell->postcompute
4416 ? ctables_cell_calculate_postcompute (
4417 s, cell, ss, &format, &is_ctables_format, j)
4418 : ctables_summary_value (cell, &cell->summaries[j],
4421 struct pivot_value *value;
4422 if (ct->hide_threshold != 0
4423 && d < ct->hide_threshold
4424 && ctables_summary_function_is_count (ss->function))
4426 value = pivot_value_new_user_text_nocopy (
4427 xasprintf ("<%d", ct->hide_threshold));
4429 else if (d == 0 && ct->zero)
4430 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4431 else if (d == SYSMIS && ct->missing)
4432 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4433 else if (is_ctables_format)
4435 char *s = data_out_stretchy (&(union value) { .f = d },
4437 &ct->ctables_formats, NULL);
4438 value = pivot_value_new_user_text_nocopy (s);
4442 value = pivot_value_new_number (d);
4443 value->numeric.format = format;
4445 pivot_table_put (pt, dindexes, n_dindexes, value);
4450 pivot_table_submit (pt);
4454 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4456 enum pivot_axis_type label_pos = t->label_axis[a];
4460 t->clabels_from_axis = a;
4462 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4463 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4465 const struct ctables_stack *stack = &t->stacks[a];
4469 const struct ctables_nest *n0 = &stack->nests[0];
4471 const struct variable *v0 = n0->vars[n0->n - 1];
4472 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4473 t->clabels_example = v0;
4475 for (size_t i = 0; i < c0->n_cats; i++)
4476 if (c0->cats[i].type == CCT_FUNCTION)
4478 msg (SE, _("%s=%s is not allowed with sorting based "
4479 "on a summary function."),
4480 subcommand_name, pos_name);
4483 if (n0->n - 1 == n0->scale_idx)
4485 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4486 "but %s is a scale variable."),
4487 subcommand_name, pos_name, var_get_name (v0));
4491 for (size_t i = 1; i < stack->n; i++)
4493 const struct ctables_nest *ni = &stack->nests[i];
4495 const struct variable *vi = ni->vars[ni->n - 1];
4496 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4498 if (ni->n - 1 == ni->scale_idx)
4500 msg (SE, _("%s=%s requires the variables to be moved to be "
4501 "categorical, but %s is a scale variable."),
4502 subcommand_name, pos_name, var_get_name (vi));
4505 if (var_get_width (v0) != var_get_width (vi))
4507 msg (SE, _("%s=%s requires the variables to be "
4508 "moved to have the same width, but %s has "
4509 "width %d and %s has width %d."),
4510 subcommand_name, pos_name,
4511 var_get_name (v0), var_get_width (v0),
4512 var_get_name (vi), var_get_width (vi));
4515 if (!val_labs_equal (var_get_value_labels (v0),
4516 var_get_value_labels (vi)))
4518 msg (SE, _("%s=%s requires the variables to be "
4519 "moved to have the same value labels, but %s "
4520 "and %s have different value labels."),
4521 subcommand_name, pos_name,
4522 var_get_name (v0), var_get_name (vi));
4525 if (!ctables_categories_equal (c0, ci))
4527 msg (SE, _("%s=%s requires the variables to be "
4528 "moved to have the same category "
4529 "specifications, but %s and %s have different "
4530 "category specifications."),
4531 subcommand_name, pos_name,
4532 var_get_name (v0), var_get_name (vi));
4541 ctables_prepare_table (struct ctables_table *t)
4543 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4546 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4548 for (size_t j = 0; j < t->stacks[a].n; j++)
4550 struct ctables_nest *nest = &t->stacks[a].nests[j];
4551 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4553 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4554 nest->n_domains[dt] = 0;
4556 for (size_t k = 0; k < nest->n; k++)
4558 if (k == nest->scale_idx)
4567 if (a != PIVOT_AXIS_LAYER)
4574 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4575 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4576 : a == PIVOT_AXIS_ROW)
4578 if (k == nest->n - 1
4579 || (nest->scale_idx == nest->n - 1
4580 && k == nest->n - 2))
4586 if (a == PIVOT_AXIS_COLUMN)
4591 if (a == PIVOT_AXIS_ROW)
4596 nest->domains[dt][nest->n_domains[dt]++] = k;
4603 struct ctables_nest *nest = xmalloc (sizeof *nest);
4604 *nest = (struct ctables_nest) { .n = 0 };
4605 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4608 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4609 for (size_t i = 0; i < stack->n; i++)
4611 struct ctables_nest *nest = &stack->nests[i];
4612 if (!nest->specs[CSV_CELL].n)
4614 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4615 specs->specs = xmalloc (sizeof *specs->specs);
4618 enum ctables_summary_function function
4619 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4621 *specs->specs = (struct ctables_summary_spec) {
4622 .function = function,
4623 .format = ctables_summary_default_format (function, specs->var),
4624 .label = ctables_summary_default_label (function, 0),
4627 specs->var = nest->vars[0];
4629 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4630 &nest->specs[CSV_CELL]);
4632 else if (!nest->specs[CSV_TOTAL].n)
4633 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4634 &nest->specs[CSV_CELL]);
4636 if (t->ctables->smissing_listwise)
4638 struct variable **listwise_vars = NULL;
4640 size_t allocated = 0;
4642 for (size_t j = nest->group_head; j < stack->n; j++)
4644 const struct ctables_nest *other_nest = &stack->nests[j];
4645 if (other_nest->group_head != nest->group_head)
4648 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4651 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4652 sizeof *listwise_vars);
4653 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4656 for (size_t j = 0; j < N_CSVS; j++)
4658 nest->specs[j].listwise_vars = listwise_vars;
4659 nest->specs[j].n_listwise_vars = n;
4664 struct ctables_summary_spec_set *merged = &t->summary_specs;
4665 struct merge_item *items = xnmalloc (2 * stack->n, sizeof *items);
4667 for (size_t j = 0; j < stack->n; j++)
4669 const struct ctables_nest *nest = &stack->nests[j];
4671 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4672 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4677 struct merge_item min = items[0];
4678 for (size_t j = 1; j < n_left; j++)
4679 if (merge_item_compare_3way (&items[j], &min) < 0)
4682 if (merged->n >= merged->allocated)
4683 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4684 sizeof *merged->specs);
4685 merged->specs[merged->n++] = min.set->specs[min.ofs];
4687 for (size_t j = 0; j < n_left; )
4689 if (merge_item_compare_3way (&items[j], &min) == 0)
4691 struct merge_item *item = &items[j];
4692 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4693 if (++item->ofs >= item->set->n)
4695 items[j] = items[--n_left];
4704 for (size_t j = 0; j < merged->n; j++)
4705 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4707 for (size_t j = 0; j < stack->n; j++)
4709 const struct ctables_nest *nest = &stack->nests[j];
4710 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4712 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4713 for (size_t k = 0; k < specs->n; k++)
4714 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4715 specs->specs[k].axis_idx);
4721 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4722 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4726 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4727 enum pivot_axis_type a)
4729 struct ctables_stack *stack = &t->stacks[a];
4730 for (size_t i = 0; i < stack->n; i++)
4732 const struct ctables_nest *nest = &stack->nests[i];
4733 const struct variable *var = nest->vars[nest->n - 1];
4734 const union value *value = case_data (c, var);
4736 if (var_is_numeric (var) && value->f == SYSMIS)
4739 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4741 ctables_value_insert (t, value, var_get_width (var));
4746 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4748 const struct ctables_value *const *ap = a_;
4749 const struct ctables_value *const *bp = b_;
4750 const struct ctables_value *a = *ap;
4751 const struct ctables_value *b = *bp;
4752 const int *width = width_;
4753 return value_compare_3way (&a->value, &b->value, *width);
4757 ctables_sort_clabels_values (struct ctables_table *t)
4759 const struct variable *v0 = t->clabels_example;
4760 int width = var_get_width (v0);
4762 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4765 const struct val_labs *val_labs = var_get_value_labels (v0);
4766 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4767 vl = val_labs_next (val_labs, vl))
4768 if (ctables_categories_match (c0, &vl->value, v0))
4769 ctables_value_insert (t, &vl->value, width);
4772 size_t n = hmap_count (&t->clabels_values_map);
4773 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4775 struct ctables_value *clv;
4777 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4778 t->clabels_values[i++] = clv;
4779 t->n_clabels_values = n;
4782 sort (t->clabels_values, n, sizeof *t->clabels_values,
4783 compare_clabels_values_3way, &width);
4785 for (size_t i = 0; i < n; i++)
4786 t->clabels_values[i]->leaf = i;
4790 ctables_add_category_occurrences (const struct variable *var,
4791 struct hmap *occurrences,
4792 const struct ctables_categories *cats)
4794 const struct val_labs *val_labs = var_get_value_labels (var);
4796 for (size_t i = 0; i < cats->n_cats; i++)
4798 const struct ctables_category *c = &cats->cats[i];
4802 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4808 int width = var_get_width (var);
4810 value_init (&value, width);
4811 value_copy_buf_rpad (&value, width,
4812 CHAR_CAST (uint8_t *, c->string.string),
4813 c->string.length, ' ');
4814 ctables_add_occurrence (var, &value, occurrences);
4815 value_destroy (&value, width);
4820 assert (var_is_numeric (var));
4821 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4822 vl = val_labs_next (val_labs, vl))
4823 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
4824 ctables_add_occurrence (var, &vl->value, occurrences);
4828 assert (var_is_alpha (var));
4829 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4830 vl = val_labs_next (val_labs, vl))
4831 if (in_string_range (&vl->value, var, c->srange))
4832 ctables_add_occurrence (var, &vl->value, occurrences);
4836 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4837 vl = val_labs_next (val_labs, vl))
4838 if (var_is_value_missing (var, &vl->value))
4839 ctables_add_occurrence (var, &vl->value, occurrences);
4843 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4844 vl = val_labs_next (val_labs, vl))
4845 ctables_add_occurrence (var, &vl->value, occurrences);
4848 case CCT_POSTCOMPUTE:
4858 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4859 vl = val_labs_next (val_labs, vl))
4860 if (c->include_missing || !var_is_value_missing (var, &vl->value))
4861 ctables_add_occurrence (var, &vl->value, occurrences);
4864 case CCT_EXCLUDED_MISSING:
4871 ctables_section_recurse_add_empty_categories (
4872 struct ctables_section *s,
4873 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
4874 enum pivot_axis_type a, size_t a_idx)
4876 if (a >= PIVOT_N_AXES)
4877 ctables_cell_insert__ (s, c, cats);
4878 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
4879 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
4882 const struct variable *var = s->nests[a]->vars[a_idx];
4883 const struct ctables_categories *categories = s->table->categories[
4884 var_get_dict_index (var)];
4885 int width = var_get_width (var);
4886 const struct hmap *occurrences = &s->occurrences[a][a_idx];
4887 const struct ctables_occurrence *o;
4888 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4890 union value *value = case_data_rw (c, var);
4891 value_destroy (value, width);
4892 value_clone (value, &o->value, width);
4893 cats[a][a_idx] = ctables_categories_match (categories, value, var);
4894 assert (cats[a][a_idx] != NULL);
4895 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4898 for (size_t i = 0; i < categories->n_cats; i++)
4900 const struct ctables_category *cat = &categories->cats[i];
4901 if (cat->type == CCT_POSTCOMPUTE)
4903 cats[a][a_idx] = cat;
4904 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4911 ctables_section_add_empty_categories (struct ctables_section *s)
4913 bool show_empty = false;
4914 for (size_t a = 0; a < PIVOT_N_AXES; a++)
4916 for (size_t k = 0; k < s->nests[a]->n; k++)
4917 if (k != s->nests[a]->scale_idx)
4919 const struct variable *var = s->nests[a]->vars[k];
4920 const struct ctables_categories *cats = s->table->categories[
4921 var_get_dict_index (var)];
4922 if (cats->show_empty)
4925 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
4931 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
4932 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
4933 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
4938 ctables_execute (struct dataset *ds, struct ctables *ct)
4940 for (size_t i = 0; i < ct->n_tables; i++)
4942 struct ctables_table *t = ct->tables[i];
4943 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
4944 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
4945 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
4946 sizeof *t->sections);
4947 size_t ix[PIVOT_N_AXES];
4948 ctables_table_add_section (t, 0, ix);
4951 struct casereader *input = proc_open (ds);
4952 bool warn_on_invalid = true;
4953 for (struct ccase *c = casereader_read (input); c;
4954 case_unref (c), c = casereader_read (input))
4956 double d_weight = dict_get_case_weight (dataset_dict (ds), c,
4958 double e_weight = (ct->e_weight
4959 ? var_force_valid_weight (ct->e_weight,
4960 case_num (c, ct->e_weight),
4964 for (size_t i = 0; i < ct->n_tables; i++)
4966 struct ctables_table *t = ct->tables[i];
4968 for (size_t j = 0; j < t->n_sections; j++)
4969 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
4971 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4972 if (t->label_axis[a] != a)
4973 ctables_insert_clabels_values (t, c, a);
4976 casereader_destroy (input);
4978 for (size_t i = 0; i < ct->n_tables; i++)
4980 struct ctables_table *t = ct->tables[i];
4982 if (t->clabels_example)
4983 ctables_sort_clabels_values (t);
4985 for (size_t j = 0; j < t->n_sections; j++)
4986 ctables_section_add_empty_categories (&t->sections[j]);
4988 ctables_table_output (ct, ct->tables[i]);
4990 return proc_commit (ds);
4995 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
4996 struct dictionary *);
4999 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
5005 case CTPO_CAT_STRING:
5006 ss_dealloc (&e->string);
5009 case CTPO_CAT_SRANGE:
5010 for (size_t i = 0; i < 2; i++)
5011 ss_dealloc (&e->srange[i]);
5020 for (size_t i = 0; i < 2; i++)
5021 ctables_pcexpr_destroy (e->subs[i]);
5025 case CTPO_CAT_NUMBER:
5026 case CTPO_CAT_NRANGE:
5027 case CTPO_CAT_MISSING:
5028 case CTPO_CAT_OTHERNM:
5029 case CTPO_CAT_SUBTOTAL:
5030 case CTPO_CAT_TOTAL:
5034 msg_location_destroy (e->location);
5039 static struct ctables_pcexpr *
5040 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
5041 struct ctables_pcexpr *sub0,
5042 struct ctables_pcexpr *sub1)
5044 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5045 *e = (struct ctables_pcexpr) {
5047 .subs = { sub0, sub1 },
5048 .location = msg_location_merged (sub0->location, sub1->location),
5053 /* How to parse an operator. */
5056 enum token_type token;
5057 enum ctables_postcompute_op op;
5060 static const struct operator *
5061 ctable_pcexpr_match_operator (struct lexer *lexer,
5062 const struct operator ops[], size_t n_ops)
5064 for (const struct operator *op = ops; op < ops + n_ops; op++)
5065 if (lex_token (lexer) == op->token)
5067 if (op->token != T_NEG_NUM)
5076 static struct ctables_pcexpr *
5077 ctable_pcexpr_parse_binary_operators__ (
5078 struct lexer *lexer, struct dictionary *dict,
5079 const struct operator ops[], size_t n_ops,
5080 parse_recursively_func *parse_next_level,
5081 const char *chain_warning, struct ctables_pcexpr *lhs)
5083 for (int op_count = 0; ; op_count++)
5085 const struct operator *op
5086 = ctable_pcexpr_match_operator (lexer, ops, n_ops);
5089 if (op_count > 1 && chain_warning)
5090 msg_at (SW, lhs->location, "%s", chain_warning);
5095 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
5098 ctables_pcexpr_destroy (lhs);
5102 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
5106 static struct ctables_pcexpr *
5107 ctable_pcexpr_parse_binary_operators (struct lexer *lexer,
5108 struct dictionary *dict,
5109 const struct operator ops[], size_t n_ops,
5110 parse_recursively_func *parse_next_level,
5111 const char *chain_warning)
5113 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
5117 return ctable_pcexpr_parse_binary_operators__ (lexer, dict, ops, n_ops,
5119 chain_warning, lhs);
5122 static struct ctables_pcexpr *ctable_pcexpr_parse_add (struct lexer *,
5123 struct dictionary *);
5125 static struct ctables_pcexpr
5126 ctpo_cat_nrange (double low, double high)
5128 return (struct ctables_pcexpr) {
5129 .op = CTPO_CAT_NRANGE,
5130 .nrange = { low, high },
5134 static struct ctables_pcexpr *
5135 ctable_pcexpr_parse_primary (struct lexer *lexer, struct dictionary *dict)
5137 int start_ofs = lex_ofs (lexer);
5138 struct ctables_pcexpr e;
5139 if (lex_is_number (lexer))
5141 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
5142 .number = lex_number (lexer) };
5145 else if (lex_match_id (lexer, "MISSING"))
5146 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
5147 else if (lex_match_id (lexer, "OTHERNM"))
5148 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
5149 else if (lex_match_id (lexer, "TOTAL"))
5150 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
5151 else if (lex_match_id (lexer, "SUBTOTAL"))
5153 size_t subtotal_index = 0;
5154 if (lex_match (lexer, T_LBRACK))
5156 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
5158 subtotal_index = lex_integer (lexer);
5160 if (!lex_force_match (lexer, T_RBRACK))
5163 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
5164 .subtotal_index = subtotal_index };
5166 else if (lex_match (lexer, T_LBRACK))
5168 if (lex_match_id (lexer, "LO"))
5170 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
5172 e = ctpo_cat_nrange (-DBL_MAX, lex_number (lexer));
5175 else if (lex_is_number (lexer))
5177 double number = lex_number (lexer);
5179 if (lex_match_id (lexer, "THRU"))
5181 if (lex_match_id (lexer, "HI"))
5182 e = ctpo_cat_nrange (number, DBL_MAX);
5185 if (!lex_force_num (lexer))
5187 e = ctpo_cat_nrange (number, lex_number (lexer));
5192 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
5195 else if (lex_is_string (lexer))
5197 struct substring s = recode_substring_pool (
5198 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
5199 ss_rtrim (&s, ss_cstr (" "));
5201 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
5206 lex_error (lexer, NULL);
5210 if (!lex_force_match (lexer, T_RBRACK))
5212 if (e.op == CTPO_CAT_STRING)
5213 ss_dealloc (&e.string);
5217 else if (lex_match (lexer, T_LPAREN))
5219 struct ctables_pcexpr *ep = ctable_pcexpr_parse_add (lexer, dict);
5222 if (!lex_force_match (lexer, T_RPAREN))
5224 ctables_pcexpr_destroy (ep);
5231 lex_error (lexer, NULL);
5235 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
5236 return xmemdup (&e, sizeof e);
5239 static struct ctables_pcexpr *
5240 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
5241 struct lexer *lexer, int start_ofs)
5243 struct ctables_pcexpr *e = xmalloc (sizeof *e);
5244 *e = (struct ctables_pcexpr) {
5247 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
5252 static struct ctables_pcexpr *
5253 ctable_pcexpr_parse_exp (struct lexer *lexer, struct dictionary *dict)
5255 static const struct operator op = { T_EXP, CTPO_POW };
5257 const char *chain_warning =
5258 _("The exponentiation operator (`**') is left-associative: "
5259 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
5260 "To disable this warning, insert parentheses.");
5262 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
5263 return ctable_pcexpr_parse_binary_operators (lexer, dict, &op, 1,
5264 ctable_pcexpr_parse_primary,
5267 /* Special case for situations like "-5**6", which must be parsed as
5270 int start_ofs = lex_ofs (lexer);
5271 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
5272 *lhs = (struct ctables_pcexpr) {
5273 .op = CTPO_CONSTANT,
5274 .number = -lex_tokval (lexer),
5275 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
5279 struct ctables_pcexpr *node = ctable_pcexpr_parse_binary_operators__ (
5280 lexer, dict, &op, 1,
5281 ctable_pcexpr_parse_primary, chain_warning, lhs);
5285 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
5288 /* Parses the unary minus level. */
5289 static struct ctables_pcexpr *
5290 ctable_pcexpr_parse_neg (struct lexer *lexer, struct dictionary *dict)
5292 int start_ofs = lex_ofs (lexer);
5293 if (!lex_match (lexer, T_DASH))
5294 return ctable_pcexpr_parse_exp (lexer, dict);
5296 struct ctables_pcexpr *inner = ctable_pcexpr_parse_neg (lexer, dict);
5300 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
5303 /* Parses the multiplication and division level. */
5304 static struct ctables_pcexpr *
5305 ctable_pcexpr_parse_mul (struct lexer *lexer, struct dictionary *dict)
5307 static const struct operator ops[] =
5309 { T_ASTERISK, CTPO_MUL },
5310 { T_SLASH, CTPO_DIV },
5313 return ctable_pcexpr_parse_binary_operators (lexer, dict, ops,
5314 sizeof ops / sizeof *ops,
5315 ctable_pcexpr_parse_neg, NULL);
5318 /* Parses the addition and subtraction level. */
5319 static struct ctables_pcexpr *
5320 ctable_pcexpr_parse_add (struct lexer *lexer, struct dictionary *dict)
5322 static const struct operator ops[] =
5324 { T_PLUS, CTPO_ADD },
5325 { T_DASH, CTPO_SUB },
5326 { T_NEG_NUM, CTPO_ADD },
5329 return ctable_pcexpr_parse_binary_operators (lexer, dict,
5330 ops, sizeof ops / sizeof *ops,
5331 ctable_pcexpr_parse_mul, NULL);
5334 static struct ctables_postcompute *
5335 ctables_find_postcompute (struct ctables *ct, const char *name)
5337 struct ctables_postcompute *pc;
5338 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
5339 utf8_hash_case_string (name, 0), &ct->postcomputes)
5340 if (!utf8_strcasecmp (pc->name, name))
5346 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
5349 int pcompute_start = lex_ofs (lexer) - 1;
5351 if (!lex_force_match (lexer, T_AND) || !lex_force_id (lexer))
5354 char *name = ss_xstrdup (lex_tokss (lexer));
5357 if (!lex_force_match (lexer, T_EQUALS)
5358 || !lex_force_match_id (lexer, "EXPR")
5359 || !lex_force_match (lexer, T_LPAREN))
5365 int expr_start = lex_ofs (lexer);
5366 struct ctables_pcexpr *expr = ctable_pcexpr_parse_add (lexer, dict);
5367 int expr_end = lex_ofs (lexer) - 1;
5368 if (!expr || !lex_force_match (lexer, T_RPAREN))
5373 int pcompute_end = lex_ofs (lexer) - 1;
5375 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
5378 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
5381 msg_at (SW, location, _("New definition of &%s will override the "
5382 "previous definition."),
5384 msg_at (SN, pc->location, _("This is the previous definition."));
5386 ctables_pcexpr_destroy (pc->expr);
5387 msg_location_destroy (pc->location);
5392 pc = xmalloc (sizeof *pc);
5393 *pc = (struct ctables_postcompute) { .name = name };
5394 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5395 utf8_hash_case_string (pc->name, 0));
5398 pc->location = location;
5400 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5405 ctables_parse_pproperties_format (struct lexer *lexer,
5406 struct ctables_summary_spec_set *sss)
5408 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5410 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5411 && !(lex_token (lexer) == T_ID
5412 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5413 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5414 lex_tokss (lexer)))))
5416 /* Parse function. */
5417 enum ctables_summary_function function;
5418 if (!parse_ctables_summary_function (lexer, &function))
5421 /* Parse percentile. */
5422 double percentile = 0;
5423 if (function == CTSF_PTILE)
5425 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5427 percentile = lex_number (lexer);
5432 struct fmt_spec format;
5433 bool is_ctables_format;
5434 if (!parse_ctables_format_specifier (lexer, &format, &is_ctables_format))
5437 if (sss->n >= sss->allocated)
5438 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5439 sizeof *sss->specs);
5440 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5441 .function = function,
5442 .percentile = percentile,
5444 .is_ctables_format = is_ctables_format,
5450 ctables_summary_spec_set_uninit (sss);
5455 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5457 struct ctables_postcompute **pcs = NULL;
5459 size_t allocated_pcs = 0;
5461 while (lex_match (lexer, T_AND))
5463 if (!lex_force_id (lexer))
5465 struct ctables_postcompute *pc
5466 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5469 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5474 if (n_pcs >= allocated_pcs)
5475 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5479 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5481 if (lex_match_id (lexer, "LABEL"))
5483 lex_match (lexer, T_EQUALS);
5484 if (!lex_force_string (lexer))
5487 for (size_t i = 0; i < n_pcs; i++)
5489 free (pcs[i]->label);
5490 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5495 else if (lex_match_id (lexer, "FORMAT"))
5497 lex_match (lexer, T_EQUALS);
5499 struct ctables_summary_spec_set sss;
5500 if (!ctables_parse_pproperties_format (lexer, &sss))
5503 for (size_t i = 0; i < n_pcs; i++)
5506 ctables_summary_spec_set_uninit (pcs[i]->specs);
5508 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5509 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5511 ctables_summary_spec_set_uninit (&sss);
5513 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5515 lex_match (lexer, T_EQUALS);
5516 bool hide_source_cats;
5517 if (!parse_bool (lexer, &hide_source_cats))
5519 for (size_t i = 0; i < n_pcs; i++)
5520 pcs[i]->hide_source_cats = hide_source_cats;
5524 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5537 put_strftime (struct string *out, time_t now, const char *format)
5539 const struct tm *tm = localtime (&now);
5541 strftime (value, sizeof value, format, tm);
5542 ds_put_cstr (out, value);
5546 skip_prefix (struct substring *s, struct substring prefix)
5548 if (ss_starts_with (*s, prefix))
5550 ss_advance (s, prefix.length);
5558 put_table_expression (struct string *out, struct lexer *lexer,
5559 struct dictionary *dict, int expr_start, int expr_end)
5562 for (int ofs = expr_start; ofs < expr_end; ofs++)
5564 const struct token *t = lex_ofs_token (lexer, ofs);
5565 if (t->type == T_LBRACK)
5567 else if (t->type == T_RBRACK && nest > 0)
5573 else if (t->type == T_ID)
5575 const struct variable *var
5576 = dict_lookup_var (dict, t->string.string);
5577 const char *label = var ? var_get_label (var) : NULL;
5578 ds_put_cstr (out, label ? label : t->string.string);
5582 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5583 ds_put_byte (out, ' ');
5585 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5586 ds_put_cstr (out, repr);
5589 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5590 ds_put_byte (out, ' ');
5596 put_title_text (struct string *out, struct substring in, time_t now,
5597 struct lexer *lexer, struct dictionary *dict,
5598 int expr_start, int expr_end)
5602 size_t chunk = ss_find_byte (in, ')');
5603 ds_put_substring (out, ss_head (in, chunk));
5604 ss_advance (&in, chunk);
5605 if (ss_is_empty (in))
5608 if (skip_prefix (&in, ss_cstr (")DATE")))
5609 put_strftime (out, now, "%x");
5610 else if (skip_prefix (&in, ss_cstr (")TIME")))
5611 put_strftime (out, now, "%X");
5612 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5613 put_table_expression (out, lexer, dict, expr_start, expr_end);
5616 ds_put_byte (out, ')');
5617 ss_advance (&in, 1);
5623 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5625 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5626 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5627 enum settings_value_show tvars = settings_get_show_variables ();
5628 for (size_t i = 0; i < n_vars; i++)
5629 vlabels[i] = (enum ctables_vlabel) tvars;
5631 struct pivot_table_look *look = pivot_table_look_unshare (
5632 pivot_table_look_ref (pivot_table_look_get_default ()));
5633 look->omit_empty = false;
5635 struct ctables *ct = xmalloc (sizeof *ct);
5636 *ct = (struct ctables) {
5637 .dict = dataset_dict (ds),
5639 .ctables_formats = FMT_SETTINGS_INIT,
5641 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5644 time_t now = time (NULL);
5649 const char *dot_string;
5650 const char *comma_string;
5652 static const struct ctf ctfs[4] = {
5653 { CTEF_NEGPAREN, "(,,,)", "(...)" },
5654 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
5655 { CTEF_PAREN, "-,(,),", "-.(.)." },
5656 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
5658 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
5659 for (size_t i = 0; i < 4; i++)
5661 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
5662 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
5663 fmt_number_style_from_string (s));
5666 if (!lex_force_match (lexer, T_SLASH))
5669 while (!lex_match_id (lexer, "TABLE"))
5671 if (lex_match_id (lexer, "FORMAT"))
5673 double widths[2] = { SYSMIS, SYSMIS };
5674 double units_per_inch = 72.0;
5676 while (lex_token (lexer) != T_SLASH)
5678 if (lex_match_id (lexer, "MINCOLWIDTH"))
5680 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
5683 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
5685 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
5688 else if (lex_match_id (lexer, "UNITS"))
5690 lex_match (lexer, T_EQUALS);
5691 if (lex_match_id (lexer, "POINTS"))
5692 units_per_inch = 72.0;
5693 else if (lex_match_id (lexer, "INCHES"))
5694 units_per_inch = 1.0;
5695 else if (lex_match_id (lexer, "CM"))
5696 units_per_inch = 2.54;
5699 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
5703 else if (lex_match_id (lexer, "EMPTY"))
5708 lex_match (lexer, T_EQUALS);
5709 if (lex_match_id (lexer, "ZERO"))
5711 /* Nothing to do. */
5713 else if (lex_match_id (lexer, "BLANK"))
5714 ct->zero = xstrdup ("");
5715 else if (lex_force_string (lexer))
5717 ct->zero = ss_xstrdup (lex_tokss (lexer));
5723 else if (lex_match_id (lexer, "MISSING"))
5725 lex_match (lexer, T_EQUALS);
5726 if (!lex_force_string (lexer))
5730 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
5731 ? ss_xstrdup (lex_tokss (lexer))
5737 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
5738 "UNITS", "EMPTY", "MISSING");
5743 if (widths[0] != SYSMIS && widths[1] != SYSMIS
5744 && widths[0] > widths[1])
5746 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
5750 for (size_t i = 0; i < 2; i++)
5751 if (widths[i] != SYSMIS)
5753 int *wr = ct->look->width_ranges[TABLE_HORZ];
5754 wr[i] = widths[i] / units_per_inch * 96.0;
5759 else if (lex_match_id (lexer, "VLABELS"))
5761 if (!lex_force_match_id (lexer, "VARIABLES"))
5763 lex_match (lexer, T_EQUALS);
5765 struct variable **vars;
5767 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
5771 if (!lex_force_match_id (lexer, "DISPLAY"))
5776 lex_match (lexer, T_EQUALS);
5778 enum ctables_vlabel vlabel;
5779 if (lex_match_id (lexer, "DEFAULT"))
5780 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
5781 else if (lex_match_id (lexer, "NAME"))
5783 else if (lex_match_id (lexer, "LABEL"))
5784 vlabel = CTVL_LABEL;
5785 else if (lex_match_id (lexer, "BOTH"))
5787 else if (lex_match_id (lexer, "NONE"))
5791 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
5797 for (size_t i = 0; i < n_vars; i++)
5798 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
5801 else if (lex_match_id (lexer, "MRSETS"))
5803 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
5805 lex_match (lexer, T_EQUALS);
5806 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
5809 else if (lex_match_id (lexer, "SMISSING"))
5811 if (lex_match_id (lexer, "VARIABLE"))
5812 ct->smissing_listwise = false;
5813 else if (lex_match_id (lexer, "LISTWISE"))
5814 ct->smissing_listwise = true;
5817 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
5821 else if (lex_match_id (lexer, "PCOMPUTE"))
5823 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
5826 else if (lex_match_id (lexer, "PPROPERTIES"))
5828 if (!ctables_parse_pproperties (lexer, ct))
5831 else if (lex_match_id (lexer, "WEIGHT"))
5833 if (!lex_force_match_id (lexer, "VARIABLE"))
5835 lex_match (lexer, T_EQUALS);
5836 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
5840 else if (lex_match_id (lexer, " HIDESMALLCOUNTS"))
5842 if (lex_match_id (lexer, "COUNT"))
5844 lex_match (lexer, T_EQUALS);
5845 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
5848 ct->hide_threshold = lex_integer (lexer);
5851 else if (ct->hide_threshold == 0)
5852 ct->hide_threshold = 5;
5856 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
5857 "SMISSING", "PCOMPUTE", "PPROPERTIES",
5858 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
5862 if (!lex_force_match (lexer, T_SLASH))
5866 size_t allocated_tables = 0;
5869 if (ct->n_tables >= allocated_tables)
5870 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
5871 sizeof *ct->tables);
5873 struct ctables_category *cat = xmalloc (sizeof *cat);
5874 *cat = (struct ctables_category) {
5876 .include_missing = false,
5877 .sort_ascending = true,
5880 struct ctables_categories *c = xmalloc (sizeof *c);
5881 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5882 *c = (struct ctables_categories) {
5889 struct ctables_categories **categories = xnmalloc (n_vars,
5890 sizeof *categories);
5891 for (size_t i = 0; i < n_vars; i++)
5894 struct ctables_table *t = xmalloc (sizeof *t);
5895 *t = (struct ctables_table) {
5897 .slabels_axis = PIVOT_AXIS_COLUMN,
5898 .slabels_visible = true,
5899 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
5901 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
5902 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
5903 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
5905 .clabels_from_axis = PIVOT_AXIS_LAYER,
5906 .categories = categories,
5907 .n_categories = n_vars,
5910 ct->tables[ct->n_tables++] = t;
5912 lex_match (lexer, T_EQUALS);
5913 int expr_start = lex_ofs (lexer);
5914 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
5916 if (lex_match (lexer, T_BY))
5918 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5919 ct, t, PIVOT_AXIS_COLUMN))
5922 if (lex_match (lexer, T_BY))
5924 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5925 ct, t, PIVOT_AXIS_LAYER))
5929 int expr_end = lex_ofs (lexer);
5931 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
5932 && !t->axes[PIVOT_AXIS_LAYER])
5934 lex_error (lexer, _("At least one variable must be specified."));
5938 const struct ctables_axis *scales[PIVOT_N_AXES];
5939 size_t n_scales = 0;
5940 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5942 scales[a] = find_scale (t->axes[a]);
5948 msg (SE, _("Scale variables may appear only on one axis."));
5949 if (scales[PIVOT_AXIS_ROW])
5950 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
5951 _("This scale variable appears on the rows axis."));
5952 if (scales[PIVOT_AXIS_COLUMN])
5953 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
5954 _("This scale variable appears on the columns axis."));
5955 if (scales[PIVOT_AXIS_LAYER])
5956 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
5957 _("This scale variable appears on the layer axis."));
5961 const struct ctables_axis *summaries[PIVOT_N_AXES];
5962 size_t n_summaries = 0;
5963 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5965 summaries[a] = (scales[a]
5967 : find_categorical_summary_spec (t->axes[a]));
5971 if (n_summaries > 1)
5973 msg (SE, _("Summaries may appear only on one axis."));
5974 if (summaries[PIVOT_AXIS_ROW])
5975 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
5976 _("This variable on the rows axis has a summary."));
5977 if (summaries[PIVOT_AXIS_COLUMN])
5978 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
5979 _("This variable on the columns axis has a summary."));
5980 if (summaries[PIVOT_AXIS_LAYER])
5981 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
5982 _("This variable on the layers axis has a summary."));
5985 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5986 if (n_summaries ? summaries[a] : t->axes[a])
5988 t->summary_axis = a;
5992 if (lex_token (lexer) == T_ENDCMD)
5994 if (!ctables_prepare_table (t))
5998 if (!lex_force_match (lexer, T_SLASH))
6001 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
6003 if (lex_match_id (lexer, "SLABELS"))
6005 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6007 if (lex_match_id (lexer, "POSITION"))
6009 lex_match (lexer, T_EQUALS);
6010 if (lex_match_id (lexer, "COLUMN"))
6011 t->slabels_axis = PIVOT_AXIS_COLUMN;
6012 else if (lex_match_id (lexer, "ROW"))
6013 t->slabels_axis = PIVOT_AXIS_ROW;
6014 else if (lex_match_id (lexer, "LAYER"))
6015 t->slabels_axis = PIVOT_AXIS_LAYER;
6018 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
6022 else if (lex_match_id (lexer, "VISIBLE"))
6024 lex_match (lexer, T_EQUALS);
6025 if (!parse_bool (lexer, &t->slabels_visible))
6030 lex_error_expecting (lexer, "POSITION", "VISIBLE");
6035 else if (lex_match_id (lexer, "CLABELS"))
6037 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
6039 if (lex_match_id (lexer, "AUTO"))
6041 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
6042 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
6044 else if (lex_match_id (lexer, "ROWLABELS"))
6046 lex_match (lexer, T_EQUALS);
6047 if (lex_match_id (lexer, "OPPOSITE"))
6048 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
6049 else if (lex_match_id (lexer, "LAYER"))
6050 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
6053 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6057 else if (lex_match_id (lexer, "COLLABELS"))
6059 lex_match (lexer, T_EQUALS);
6060 if (lex_match_id (lexer, "OPPOSITE"))
6061 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
6062 else if (lex_match_id (lexer, "LAYER"))
6063 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
6066 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
6072 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
6078 else if (lex_match_id (lexer, "CRITERIA"))
6080 if (!lex_force_match_id (lexer, "CILEVEL"))
6082 lex_match (lexer, T_EQUALS);
6084 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
6086 t->cilevel = lex_number (lexer);
6089 else if (lex_match_id (lexer, "CATEGORIES"))
6091 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
6095 else if (lex_match_id (lexer, "TITLES"))
6100 if (lex_match_id (lexer, "CAPTION"))
6101 textp = &t->caption;
6102 else if (lex_match_id (lexer, "CORNER"))
6104 else if (lex_match_id (lexer, "TITLE"))
6108 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
6111 lex_match (lexer, T_EQUALS);
6113 struct string s = DS_EMPTY_INITIALIZER;
6114 while (lex_is_string (lexer))
6116 if (!ds_is_empty (&s))
6117 ds_put_byte (&s, ' ');
6118 put_title_text (&s, lex_tokss (lexer), now,
6119 lexer, dataset_dict (ds),
6120 expr_start, expr_end);
6124 *textp = ds_steal_cstr (&s);
6126 while (lex_token (lexer) != T_SLASH
6127 && lex_token (lexer) != T_ENDCMD);
6129 else if (lex_match_id (lexer, "SIGTEST"))
6133 t->chisq = xmalloc (sizeof *t->chisq);
6134 *t->chisq = (struct ctables_chisq) {
6136 .include_mrsets = true,
6137 .all_visible = true,
6143 if (lex_match_id (lexer, "TYPE"))
6145 lex_match (lexer, T_EQUALS);
6146 if (!lex_force_match_id (lexer, "CHISQUARE"))
6149 else if (lex_match_id (lexer, "ALPHA"))
6151 lex_match (lexer, T_EQUALS);
6152 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
6154 t->chisq->alpha = lex_number (lexer);
6157 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6159 lex_match (lexer, T_EQUALS);
6160 if (parse_bool (lexer, &t->chisq->include_mrsets))
6163 else if (lex_match_id (lexer, "CATEGORIES"))
6165 lex_match (lexer, T_EQUALS);
6166 if (lex_match_id (lexer, "ALLVISIBLE"))
6167 t->chisq->all_visible = true;
6168 else if (lex_match_id (lexer, "SUBTOTALS"))
6169 t->chisq->all_visible = false;
6172 lex_error_expecting (lexer,
6173 "ALLVISIBLE", "SUBTOTALS");
6179 lex_error_expecting (lexer, "TYPE", "ALPHA",
6180 "INCLUDEMRSETS", "CATEGORIES");
6184 while (lex_token (lexer) != T_SLASH
6185 && lex_token (lexer) != T_ENDCMD);
6187 else if (lex_match_id (lexer, "COMPARETEST"))
6191 t->pairwise = xmalloc (sizeof *t->pairwise);
6192 *t->pairwise = (struct ctables_pairwise) {
6194 .alpha = { .05, .05 },
6195 .adjust = BONFERRONI,
6196 .include_mrsets = true,
6197 .meansvariance_allcats = true,
6198 .all_visible = true,
6207 if (lex_match_id (lexer, "TYPE"))
6209 lex_match (lexer, T_EQUALS);
6210 if (lex_match_id (lexer, "PROP"))
6211 t->pairwise->type = PROP;
6212 else if (lex_match_id (lexer, "MEAN"))
6213 t->pairwise->type = MEAN;
6216 lex_error_expecting (lexer, "PROP", "MEAN");
6220 else if (lex_match_id (lexer, "ALPHA"))
6222 lex_match (lexer, T_EQUALS);
6224 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6226 double a0 = lex_number (lexer);
6229 lex_match (lexer, T_COMMA);
6230 if (lex_is_number (lexer))
6232 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
6234 double a1 = lex_number (lexer);
6237 t->pairwise->alpha[0] = MIN (a0, a1);
6238 t->pairwise->alpha[1] = MAX (a0, a1);
6241 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
6243 else if (lex_match_id (lexer, "ADJUST"))
6245 lex_match (lexer, T_EQUALS);
6246 if (lex_match_id (lexer, "BONFERRONI"))
6247 t->pairwise->adjust = BONFERRONI;
6248 else if (lex_match_id (lexer, "BH"))
6249 t->pairwise->adjust = BH;
6250 else if (lex_match_id (lexer, "NONE"))
6251 t->pairwise->adjust = 0;
6254 lex_error_expecting (lexer, "BONFERRONI", "BH",
6259 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
6261 lex_match (lexer, T_EQUALS);
6262 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
6265 else if (lex_match_id (lexer, "MEANSVARIANCE"))
6267 lex_match (lexer, T_EQUALS);
6268 if (lex_match_id (lexer, "ALLCATS"))
6269 t->pairwise->meansvariance_allcats = true;
6270 else if (lex_match_id (lexer, "TESTEDCATS"))
6271 t->pairwise->meansvariance_allcats = false;
6274 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
6278 else if (lex_match_id (lexer, "CATEGORIES"))
6280 lex_match (lexer, T_EQUALS);
6281 if (lex_match_id (lexer, "ALLVISIBLE"))
6282 t->pairwise->all_visible = true;
6283 else if (lex_match_id (lexer, "SUBTOTALS"))
6284 t->pairwise->all_visible = false;
6287 lex_error_expecting (lexer, "ALLVISIBLE",
6292 else if (lex_match_id (lexer, "MERGE"))
6294 lex_match (lexer, T_EQUALS);
6295 if (!parse_bool (lexer, &t->pairwise->merge))
6298 else if (lex_match_id (lexer, "STYLE"))
6300 lex_match (lexer, T_EQUALS);
6301 if (lex_match_id (lexer, "APA"))
6302 t->pairwise->apa_style = true;
6303 else if (lex_match_id (lexer, "SIMPLE"))
6304 t->pairwise->apa_style = false;
6307 lex_error_expecting (lexer, "APA", "SIMPLE");
6311 else if (lex_match_id (lexer, "SHOWSIG"))
6313 lex_match (lexer, T_EQUALS);
6314 if (!parse_bool (lexer, &t->pairwise->show_sig))
6319 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
6320 "INCLUDEMRSETS", "MEANSVARIANCE",
6321 "CATEGORIES", "MERGE", "STYLE",
6326 while (lex_token (lexer) != T_SLASH
6327 && lex_token (lexer) != T_ENDCMD);
6331 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
6332 "CRITERIA", "CATEGORIES", "TITLES",
6333 "SIGTEST", "COMPARETEST");
6337 if (!lex_match (lexer, T_SLASH))
6341 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
6342 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
6344 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
6348 if (!ctables_prepare_table (t))
6351 while (lex_token (lexer) != T_ENDCMD);
6353 bool ok = ctables_execute (ds, ct);
6354 ctables_destroy (ct);
6355 return ok ? CMD_SUCCESS : CMD_FAILURE;
6358 ctables_destroy (ct);