1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casereader.h"
23 #include "data/casewriter.h"
24 #include "data/data-in.h"
25 #include "data/data-out.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/mrset.h"
29 #include "data/subcase.h"
30 #include "data/value-labels.h"
31 #include "language/command.h"
32 #include "language/lexer/format-parser.h"
33 #include "language/lexer/lexer.h"
34 #include "language/lexer/token.h"
35 #include "language/lexer/variable-parser.h"
36 #include "libpspp/array.h"
37 #include "libpspp/assertion.h"
38 #include "libpspp/hash-functions.h"
39 #include "libpspp/hmap.h"
40 #include "libpspp/i18n.h"
41 #include "libpspp/message.h"
42 #include "libpspp/string-array.h"
43 #include "math/mode.h"
44 #include "math/moments.h"
45 #include "math/percentiles.h"
46 #include "math/sort.h"
47 #include "output/pivot-table.h"
49 #include "gl/minmax.h"
50 #include "gl/xalloc.h"
53 #define _(msgid) gettext (msgid)
54 #define N_(msgid) (msgid)
58 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
59 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
60 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
61 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
65 - unweighted summaries (U*)
66 - lower confidence limits (*.LCL)
67 - upper confidence limits (*.UCL)
68 - standard error (*.SE)
71 /* All variables. */ \
72 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
73 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
74 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
75 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
76 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
94 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
96 /* Scale variables, totals, and subtotals. */ \
97 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
98 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
99 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
100 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
101 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
102 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
103 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
104 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
105 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
106 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
107 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
108 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
109 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
110 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
111 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
112 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
113 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
114 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
115 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
116 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
117 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
118 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
119 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
121 #if 0 /* Multiple response sets not yet implemented. */
122 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
123 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
124 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
125 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
126 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
127 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
128 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
129 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
130 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
131 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
132 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
133 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
134 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
135 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
136 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
137 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
138 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
139 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
140 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
141 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
142 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
143 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
146 enum ctables_summary_function
148 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
154 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
155 N_CTSF_FUNCTIONS = SUMMARIES
159 static bool ctables_summary_function_is_count (enum ctables_summary_function);
161 enum ctables_domain_type
163 /* Within a section, where stacked variables divide one section from
165 CTDT_TABLE, /* All layers of a whole section. */
166 CTDT_LAYER, /* One layer within a section. */
167 CTDT_LAYERROW, /* Row in one layer within a section. */
168 CTDT_LAYERCOL, /* Column in one layer within a section. */
170 /* Within a subtable, where a subtable pairs an innermost row variable with
171 an innermost column variable within a single layer. */
172 CTDT_SUBTABLE, /* Whole subtable. */
173 CTDT_ROW, /* Row within a subtable. */
174 CTDT_COL, /* Column within a subtable. */
178 struct ctables_domain
180 struct hmap_node node;
182 const struct ctables_cell *example;
184 double d_valid; /* Dictionary weight. */
187 double e_valid; /* Effective weight */
192 enum ctables_summary_variant
201 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
202 all the axes (except the scalar variable, if any). */
203 struct hmap_node node;
205 /* The domains that contain this cell. */
206 uint32_t omit_domains;
207 struct ctables_domain *domains[N_CTDTS];
212 enum ctables_summary_variant sv;
214 struct ctables_cell_axis
216 struct ctables_cell_value
218 const struct ctables_category *category;
226 union ctables_summary *summaries;
233 const struct dictionary *dict;
234 struct pivot_table_look *look;
236 /* CTABLES has a number of extra formats that we implement via custom
237 currency specifications on an alternate fmt_settings. */
238 #define CTEF_NEGPAREN FMT_CCA
239 #define CTEF_NEQUAL FMT_CCB
240 #define CTEF_PAREN FMT_CCC
241 #define CTEF_PCTPAREN FMT_CCD
242 struct fmt_settings ctables_formats;
244 /* If this is NULL, zeros are displayed using the normal print format.
245 Otherwise, this string is displayed. */
248 /* If this is NULL, missing values are displayed using the normal print
249 format. Otherwise, this string is displayed. */
252 /* Indexed by variable dictionary index. */
253 enum ctables_vlabel *vlabels;
255 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
257 bool mrsets_count_duplicates; /* MRSETS. */
258 bool smissing_listwise; /* SMISSING. */
259 struct variable *e_weight; /* WEIGHT. */
260 int hide_threshold; /* HIDESMALLCOUNTS. */
262 struct ctables_table **tables;
266 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
269 struct ctables_postcompute
271 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
272 char *name; /* Name, without leading &. */
274 struct msg_location *location; /* Location of definition. */
275 struct ctables_pcexpr *expr;
277 struct ctables_summary_spec_set *specs;
278 bool hide_source_cats;
281 struct ctables_pcexpr
291 enum ctables_postcompute_op
294 CTPO_CONSTANT, /* 5 */
295 CTPO_CAT_NUMBER, /* [5] */
296 CTPO_CAT_STRING, /* ["STRING"] */
297 CTPO_CAT_RANGE, /* [LO THRU 5] */
298 CTPO_CAT_MISSING, /* MISSING */
299 CTPO_CAT_OTHERNM, /* OTHERNM */
300 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
301 CTPO_CAT_TOTAL, /* TOTAL */
315 /* CTPO_CAT_NUMBER. */
318 /* CTPO_CAT_STRING, in dictionary encoding. */
319 struct substring string;
321 /* CTPO_CAT_RANGE. */
324 /* CTPO_CAT_SUBTOTAL. */
325 size_t subtotal_index;
327 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
328 One element: CTPO_NEG. */
329 struct ctables_pcexpr *subs[2];
332 /* Source location. */
333 struct msg_location *location;
336 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
337 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
338 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
339 struct ctables_pcexpr *sub1);
341 struct ctables_summary_spec_set
343 struct ctables_summary_spec *specs;
347 /* The variable to which the summary specs are applied. */
348 struct variable *var;
350 /* Whether the variable to which the summary specs are applied is a scale
351 variable for the purpose of summarization.
353 (VALIDN and TOTALN act differently for summarizing scale and categorical
357 /* If any of these optional additional scale variables are missing, then
358 treat 'var' as if it's missing too. This is for implementing
359 SMISSING=LISTWISE. */
360 struct variable **listwise_vars;
361 size_t n_listwise_vars;
364 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
365 const struct ctables_summary_spec_set *);
366 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
368 /* A nested sequence of variables, e.g. a > b > c. */
371 struct variable **vars;
374 size_t *domains[N_CTDTS];
375 size_t n_domains[N_CTDTS];
378 struct ctables_summary_spec_set specs[N_CSVS];
381 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
384 struct ctables_nest *nests;
390 struct hmap_node node;
395 struct ctables_occurrence
397 struct hmap_node node;
401 struct ctables_section
403 struct ctables_table *table;
404 struct ctables_nest *nests[PIVOT_N_AXES];
405 struct hmap *occurrences[PIVOT_N_AXES];
406 struct hmap cells; /* Contains "struct ctable_cell"s. */
407 struct hmap domains[N_CTDTS]; /* Contains "struct ctable_domain"s. */
412 struct ctables *ctables;
413 struct ctables_axis *axes[PIVOT_N_AXES];
414 struct ctables_stack stacks[PIVOT_N_AXES];
415 struct ctables_section *sections;
417 enum pivot_axis_type summary_axis;
418 struct ctables_summary_spec_set summary_specs;
420 const struct variable *clabels_example;
421 struct hmap clabels_values_map;
422 struct ctables_value **clabels_values;
423 size_t n_clabels_values;
425 enum pivot_axis_type slabels_axis;
426 bool slabels_visible;
428 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
430 Most commonly, label_axis[a] == a, and in particular we always have
431 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
433 If ROWLABELS or COLLABELS is specified, then one of
434 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
435 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
437 enum pivot_axis_type label_axis[PIVOT_N_AXES];
438 enum pivot_axis_type clabels_from_axis;
440 /* Indexed by variable dictionary index. */
441 struct ctables_categories **categories;
450 struct ctables_chisq *chisq;
451 struct ctables_pairwise *pairwise;
454 struct ctables_categories
457 struct ctables_category *cats;
462 struct ctables_category
464 enum ctables_category_type
466 /* Explicit category lists. */
469 CCT_NRANGE, /* Numerical range. */
470 CCT_SRANGE, /* String range. */
475 /* Totals and subtotals. */
479 /* Implicit category lists. */
484 /* For contributing to TOTALN. */
485 CCT_EXCLUDED_MISSING,
489 struct ctables_category *subtotal;
495 double number; /* CCT_NUMBER. */
496 struct substring string; /* CCT_STRING, in dictionary encoding. */
497 double nrange[2]; /* CCT_NRANGE. */
498 struct substring srange[2]; /* CCT_SRANGE. */
502 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
503 bool hide_subcategories; /* CCT_SUBTOTAL. */
506 const struct ctables_postcompute *pc; /* CCT_POSTCOMPUTE. */
508 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
511 bool include_missing;
515 enum ctables_summary_function sort_function;
516 struct variable *sort_var;
521 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
522 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
523 struct msg_location *location;
527 ctables_category_uninit (struct ctables_category *cat)
538 case CCT_POSTCOMPUTE:
542 ss_dealloc (&cat->string);
546 ss_dealloc (&cat->srange[0]);
547 ss_dealloc (&cat->srange[1]);
552 free (cat->total_label);
560 case CCT_EXCLUDED_MISSING:
566 nullable_substring_equal (const struct substring *a,
567 const struct substring *b)
569 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
573 ctables_category_equal (const struct ctables_category *a,
574 const struct ctables_category *b)
576 if (a->type != b->type)
582 return a->number == b->number;
585 return ss_equals (a->string, b->string);
588 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
591 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
592 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
598 case CCT_POSTCOMPUTE:
599 return a->pc == b->pc;
603 return !strcmp (a->total_label, b->total_label);
608 return (a->include_missing == b->include_missing
609 && a->sort_ascending == b->sort_ascending
610 && a->sort_function == b->sort_function
611 && a->sort_var == b->sort_var
612 && a->percentile == b->percentile);
614 case CCT_EXCLUDED_MISSING:
622 ctables_categories_unref (struct ctables_categories *c)
627 assert (c->n_refs > 0);
631 for (size_t i = 0; i < c->n_cats; i++)
632 ctables_category_uninit (&c->cats[i]);
638 ctables_categories_equal (const struct ctables_categories *a,
639 const struct ctables_categories *b)
641 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
644 for (size_t i = 0; i < a->n_cats; i++)
645 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
651 /* Chi-square test (SIGTEST). */
659 /* Pairwise comparison test (COMPARETEST). */
660 struct ctables_pairwise
662 enum { PROP, MEAN } type;
665 bool meansvariance_allcats;
667 enum { BONFERRONI = 1, BH } adjust;
691 struct variable *var;
693 struct ctables_summary_spec_set specs[N_CSVS];
697 struct ctables_axis *subs[2];
700 struct msg_location *loc;
703 static void ctables_axis_destroy (struct ctables_axis *);
712 enum ctables_function_availability
714 CTFA_ALL, /* Any variables. */
715 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
716 CTFA_MRSETS, /* Only multiple-response sets */
719 struct ctables_summary_spec
721 enum ctables_summary_function function;
722 double percentile; /* CTSF_PTILE only. */
725 struct fmt_spec format;
726 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
732 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
733 const struct ctables_summary_spec *src)
736 dst->label = xstrdup (src->label);
740 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
747 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
748 const struct ctables_summary_spec_set *src)
750 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
751 for (size_t i = 0; i < src->n; i++)
752 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
754 *dst = (struct ctables_summary_spec_set) {
759 .is_scale = src->is_scale,
764 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
766 for (size_t i = 0; i < set->n; i++)
767 ctables_summary_spec_uninit (&set->specs[i]);
772 parse_col_width (struct lexer *lexer, const char *name, double *width)
774 lex_match (lexer, T_EQUALS);
775 if (lex_match_id (lexer, "DEFAULT"))
777 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
779 *width = lex_number (lexer);
789 parse_bool (struct lexer *lexer, bool *b)
791 if (lex_match_id (lexer, "NO"))
793 else if (lex_match_id (lexer, "YES"))
797 lex_error_expecting (lexer, "YES", "NO");
803 static enum ctables_function_availability
804 ctables_function_availability (enum ctables_summary_function f)
806 static enum ctables_function_availability availability[] = {
807 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
812 return availability[f];
816 ctables_summary_function_is_count (enum ctables_summary_function f)
822 case CTSF_ROWPCT_COUNT:
823 case CTSF_COLPCT_COUNT:
824 case CTSF_TABLEPCT_COUNT:
825 case CTSF_SUBTABLEPCT_COUNT:
826 case CTSF_LAYERPCT_COUNT:
827 case CTSF_LAYERROWPCT_COUNT:
828 case CTSF_LAYERCOLPCT_COUNT:
831 case CTSF_ROWPCT_VALIDN:
832 case CTSF_COLPCT_VALIDN:
833 case CTSF_TABLEPCT_VALIDN:
834 case CTSF_SUBTABLEPCT_VALIDN:
835 case CTSF_LAYERPCT_VALIDN:
836 case CTSF_LAYERROWPCT_VALIDN:
837 case CTSF_LAYERCOLPCT_VALIDN:
838 case CTSF_ROWPCT_TOTALN:
839 case CTSF_COLPCT_TOTALN:
840 case CTSF_TABLEPCT_TOTALN:
841 case CTSF_SUBTABLEPCT_TOTALN:
842 case CTSF_LAYERPCT_TOTALN:
843 case CTSF_LAYERROWPCT_TOTALN:
844 case CTSF_LAYERCOLPCT_TOTALN:
861 case CTSF_ROWPCT_SUM:
862 case CTSF_COLPCT_SUM:
863 case CTSF_TABLEPCT_SUM:
864 case CTSF_SUBTABLEPCT_SUM:
865 case CTSF_LAYERPCT_SUM:
866 case CTSF_LAYERROWPCT_SUM:
867 case CTSF_LAYERCOLPCT_SUM:
875 parse_ctables_summary_function (struct lexer *lexer,
876 enum ctables_summary_function *f)
880 enum ctables_summary_function function;
881 struct substring name;
883 static struct pair names[] = {
884 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
885 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
888 /* The .COUNT suffix may be omitted. */
889 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
890 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
891 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
892 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
893 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
894 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
895 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
899 if (!lex_force_id (lexer))
902 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
903 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
905 *f = names[i].function;
910 lex_error (lexer, _("Expecting summary function name."));
915 ctables_axis_destroy (struct ctables_axis *axis)
923 for (size_t i = 0; i < N_CSVS; i++)
924 ctables_summary_spec_set_uninit (&axis->specs[i]);
929 ctables_axis_destroy (axis->subs[0]);
930 ctables_axis_destroy (axis->subs[1]);
933 msg_location_destroy (axis->loc);
937 static struct ctables_axis *
938 ctables_axis_new_nonterminal (enum ctables_axis_op op,
939 struct ctables_axis *sub0,
940 struct ctables_axis *sub1,
941 struct lexer *lexer, int start_ofs)
943 struct ctables_axis *axis = xmalloc (sizeof *axis);
944 *axis = (struct ctables_axis) {
946 .subs = { sub0, sub1 },
947 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
952 struct ctables_axis_parse_ctx
955 struct dictionary *dict;
957 struct ctables_table *t;
960 static struct fmt_spec
961 ctables_summary_default_format (enum ctables_summary_function function,
962 const struct variable *var)
964 static const enum ctables_format default_formats[] = {
965 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
969 switch (default_formats[function])
972 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
975 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
978 return *var_get_print_format (var);
986 ctables_summary_default_label (enum ctables_summary_function function,
989 static const char *default_labels[] = {
990 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
995 return (function == CTSF_PTILE
996 ? xasprintf (_("Percentile %.2f"), percentile)
997 : xstrdup (gettext (default_labels[function])));
1001 ctables_summary_function_name (enum ctables_summary_function function)
1003 static const char *names[] = {
1004 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1008 return names[function];
1012 add_summary_spec (struct ctables_axis *axis,
1013 enum ctables_summary_function function, double percentile,
1014 const char *label, const struct fmt_spec *format,
1015 bool is_ctables_format, const struct msg_location *loc,
1016 enum ctables_summary_variant sv)
1018 if (axis->op == CTAO_VAR)
1020 const char *function_name = ctables_summary_function_name (function);
1021 const char *var_name = var_get_name (axis->var);
1022 switch (ctables_function_availability (function))
1025 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1026 "response sets."), function_name);
1027 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1036 _("Summary function %s applies only to scale variables."),
1038 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1049 struct ctables_summary_spec_set *set = &axis->specs[sv];
1050 if (set->n >= set->allocated)
1051 set->specs = x2nrealloc (set->specs, &set->allocated,
1052 sizeof *set->specs);
1054 struct ctables_summary_spec *dst = &set->specs[set->n++];
1055 *dst = (struct ctables_summary_spec) {
1056 .function = function,
1057 .percentile = percentile,
1058 .label = xstrdup (label),
1059 .format = (format ? *format
1060 : ctables_summary_default_format (function, axis->var)),
1061 .is_ctables_format = is_ctables_format,
1067 for (size_t i = 0; i < 2; i++)
1068 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1069 format, is_ctables_format, loc, sv))
1075 static struct ctables_axis *ctables_axis_parse_stack (
1076 struct ctables_axis_parse_ctx *);
1079 static struct ctables_axis *
1080 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1082 if (lex_match (ctx->lexer, T_LPAREN))
1084 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1085 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1087 ctables_axis_destroy (sub);
1093 if (!lex_force_id (ctx->lexer))
1096 int start_ofs = lex_ofs (ctx->lexer);
1097 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1101 struct ctables_axis *axis = xmalloc (sizeof *axis);
1102 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1104 /* XXX should figure out default measures by reading data */
1105 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1106 : lex_match_phrase (ctx->lexer, "[C]") ? false
1107 : var_get_measure (var) == MEASURE_SCALE);
1108 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1109 lex_ofs (ctx->lexer) - 1);
1110 if (axis->scale && var_is_alpha (var))
1112 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1114 var_get_name (var));
1115 ctables_axis_destroy (axis);
1123 has_digit (const char *s)
1125 return s[strcspn (s, "0123456789")] != '\0';
1129 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1130 bool *is_ctables_format)
1132 char type[FMT_TYPE_LEN_MAX + 1];
1133 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1136 if (!strcasecmp (type, "NEGPAREN"))
1137 format->type = CTEF_NEGPAREN;
1138 else if (!strcasecmp (type, "NEQUAL"))
1139 format->type = CTEF_NEQUAL;
1140 else if (!strcasecmp (type, "PAREN"))
1141 format->type = CTEF_PAREN;
1142 else if (!strcasecmp (type, "PCTPAREN"))
1143 format->type = CTEF_PCTPAREN;
1146 *is_ctables_format = false;
1147 return (parse_format_specifier (lexer, format)
1148 && fmt_check_output (format)
1149 && fmt_check_type_compat (format, VAL_NUMERIC));
1154 msg (SE, _("Output format %s requires width 2 or greater."), type);
1157 else if (format->d > format->w - 1)
1159 msg (SE, _("Output format %s requires width greater than decimals."),
1165 *is_ctables_format = true;
1170 static struct ctables_axis *
1171 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1173 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1174 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1177 enum ctables_summary_variant sv = CSV_CELL;
1180 int start_ofs = lex_ofs (ctx->lexer);
1182 /* Parse function. */
1183 enum ctables_summary_function function;
1184 if (!parse_ctables_summary_function (ctx->lexer, &function))
1187 /* Parse percentile. */
1188 double percentile = 0;
1189 if (function == CTSF_PTILE)
1191 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1193 percentile = lex_number (ctx->lexer);
1194 lex_get (ctx->lexer);
1199 if (lex_is_string (ctx->lexer))
1201 label = ss_xstrdup (lex_tokss (ctx->lexer));
1202 lex_get (ctx->lexer);
1205 label = ctables_summary_default_label (function, percentile);
1208 struct fmt_spec format;
1209 const struct fmt_spec *formatp;
1210 bool is_ctables_format = false;
1211 if (lex_token (ctx->lexer) == T_ID
1212 && has_digit (lex_tokcstr (ctx->lexer)))
1214 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1215 &is_ctables_format))
1225 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1226 lex_ofs (ctx->lexer) - 1);
1227 add_summary_spec (sub, function, percentile, label, formatp,
1228 is_ctables_format, loc, sv);
1230 msg_location_destroy (loc);
1232 lex_match (ctx->lexer, T_COMMA);
1233 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1235 if (!lex_force_match (ctx->lexer, T_LBRACK))
1239 else if (lex_match (ctx->lexer, T_RBRACK))
1241 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1248 ctables_axis_destroy (sub);
1252 static const struct ctables_axis *
1253 find_scale (const struct ctables_axis *axis)
1257 else if (axis->op == CTAO_VAR)
1258 return axis->scale ? axis : NULL;
1261 for (size_t i = 0; i < 2; i++)
1263 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1271 static const struct ctables_axis *
1272 find_categorical_summary_spec (const struct ctables_axis *axis)
1276 else if (axis->op == CTAO_VAR)
1277 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1280 for (size_t i = 0; i < 2; i++)
1282 const struct ctables_axis *sum
1283 = find_categorical_summary_spec (axis->subs[i]);
1291 static struct ctables_axis *
1292 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1294 int start_ofs = lex_ofs (ctx->lexer);
1295 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1299 while (lex_match (ctx->lexer, T_GT))
1301 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1305 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1306 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1308 const struct ctables_axis *outer_scale = find_scale (lhs);
1309 const struct ctables_axis *inner_scale = find_scale (rhs);
1310 if (outer_scale && inner_scale)
1312 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1313 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1314 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1315 ctables_axis_destroy (nest);
1319 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1322 msg_at (SE, nest->loc,
1323 _("Summaries may only be requested for categorical variables "
1324 "at the innermost nesting level."));
1325 msg_at (SN, outer_sum->loc,
1326 _("This outer categorical variable has a summary."));
1327 ctables_axis_destroy (nest);
1337 static struct ctables_axis *
1338 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1340 int start_ofs = lex_ofs (ctx->lexer);
1341 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1345 while (lex_match (ctx->lexer, T_PLUS))
1347 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1351 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1352 ctx->lexer, start_ofs);
1359 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1360 struct ctables *ct, struct ctables_table *t,
1361 enum pivot_axis_type a)
1363 if (lex_token (lexer) == T_BY
1364 || lex_token (lexer) == T_SLASH
1365 || lex_token (lexer) == T_ENDCMD)
1368 struct ctables_axis_parse_ctx ctx = {
1374 t->axes[a] = ctables_axis_parse_stack (&ctx);
1375 return t->axes[a] != NULL;
1379 ctables_chisq_destroy (struct ctables_chisq *chisq)
1385 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1391 ctables_table_destroy (struct ctables_table *t)
1396 for (size_t i = 0; i < t->n_categories; i++)
1397 ctables_categories_unref (t->categories[i]);
1398 free (t->categories);
1400 ctables_axis_destroy (t->axes[PIVOT_AXIS_COLUMN]);
1401 ctables_axis_destroy (t->axes[PIVOT_AXIS_ROW]);
1402 ctables_axis_destroy (t->axes[PIVOT_AXIS_LAYER]);
1406 ctables_chisq_destroy (t->chisq);
1407 ctables_pairwise_destroy (t->pairwise);
1412 ctables_destroy (struct ctables *ct)
1417 pivot_table_look_unref (ct->look);
1421 for (size_t i = 0; i < ct->n_tables; i++)
1422 ctables_table_destroy (ct->tables[i]);
1427 static struct ctables_category
1428 cct_nrange (double low, double high)
1430 return (struct ctables_category) {
1432 .nrange = { low, high }
1436 static struct ctables_category
1437 cct_srange (struct substring low, struct substring high)
1439 return (struct ctables_category) {
1441 .srange = { low, high }
1446 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1447 struct ctables_category *cat)
1450 if (lex_match (lexer, T_EQUALS))
1452 if (!lex_force_string (lexer))
1455 total_label = ss_xstrdup (lex_tokss (lexer));
1459 total_label = xstrdup (_("Subtotal"));
1461 *cat = (struct ctables_category) {
1462 .type = CCT_SUBTOTAL,
1463 .hide_subcategories = hide_subcategories,
1464 .total_label = total_label
1469 static struct substring
1470 parse_substring (struct lexer *lexer, struct dictionary *dict)
1472 struct substring s = recode_substring_pool (
1473 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1474 ss_rtrim (&s, ss_cstr (" "));
1480 ctables_table_parse_explicit_category (struct lexer *lexer,
1481 struct dictionary *dict,
1483 struct ctables_category *cat)
1485 if (lex_match_id (lexer, "OTHERNM"))
1486 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1487 else if (lex_match_id (lexer, "MISSING"))
1488 *cat = (struct ctables_category) { .type = CCT_MISSING };
1489 else if (lex_match_id (lexer, "SUBTOTAL"))
1490 return ctables_table_parse_subtotal (lexer, false, cat);
1491 else if (lex_match_id (lexer, "HSUBTOTAL"))
1492 return ctables_table_parse_subtotal (lexer, true, cat);
1493 else if (lex_match_id (lexer, "LO"))
1495 if (!lex_force_match_id (lexer, "THRU"))
1497 if (lex_is_string (lexer))
1499 struct substring sr0 = { .string = NULL };
1500 struct substring sr1 = parse_substring (lexer, dict);
1501 *cat = cct_srange (sr0, sr1);
1503 else if (lex_force_num (lexer))
1505 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1511 else if (lex_is_number (lexer))
1513 double number = lex_number (lexer);
1515 if (lex_match_id (lexer, "THRU"))
1517 if (lex_match_id (lexer, "HI"))
1518 *cat = cct_nrange (number, DBL_MAX);
1521 if (!lex_force_num (lexer))
1523 *cat = cct_nrange (number, lex_number (lexer));
1528 *cat = (struct ctables_category) {
1533 else if (lex_is_string (lexer))
1535 struct substring s = parse_substring (lexer, dict);
1536 if (lex_match_id (lexer, "THRU"))
1538 if (lex_match_id (lexer, "HI"))
1540 struct substring sr1 = { .string = NULL };
1541 *cat = cct_srange (s, sr1);
1545 if (!lex_force_string (lexer))
1547 struct substring sr1 = parse_substring (lexer, dict);
1548 *cat = cct_srange (s, sr1);
1552 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1554 else if (lex_match (lexer, T_AND))
1556 if (!lex_force_id (lexer))
1558 struct ctables_postcompute *pc = ctables_find_postcompute (
1559 ct, lex_tokcstr (lexer));
1562 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1563 msg_at (SE, loc, _("Unknown postcompute &%s."),
1564 lex_tokcstr (lexer));
1565 msg_location_destroy (loc);
1570 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1574 lex_error (lexer, NULL);
1581 static struct ctables_category *
1582 ctables_find_category_for_postcompute (const struct ctables_categories *cats,
1583 const struct ctables_pcexpr *e)
1585 struct ctables_category *best = NULL;
1586 size_t n_subtotals = 0;
1587 for (size_t i = 0; i < cats->n_cats; i++)
1589 struct ctables_category *cat = &cats->cats[i];
1592 case CTPO_CAT_NUMBER:
1593 if (cat->type == CCT_NUMBER && cat->number == e->number)
1597 case CTPO_CAT_STRING:
1598 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1602 case CTPO_CAT_RANGE:
1603 if (cat->type == CCT_NRANGE
1604 && cat->nrange[0] == e->range[0]
1605 && cat->nrange[1] == e->range[1])
1609 case CTPO_CAT_MISSING:
1610 if (cat->type == CCT_MISSING)
1614 case CTPO_CAT_OTHERNM:
1615 if (cat->type == CCT_OTHERNM)
1619 case CTPO_CAT_SUBTOTAL:
1620 if (cat->type == CCT_SUBTOTAL)
1623 if (e->subtotal_index == n_subtotals)
1625 else if (e->subtotal_index == 0)
1630 case CTPO_CAT_TOTAL:
1631 if (cat->type == CCT_TOTAL)
1645 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1651 ctables_recursive_check_postcompute (const struct ctables_pcexpr *e,
1652 struct ctables_category *pc_cat,
1653 const struct ctables_categories *cats,
1654 const struct msg_location *cats_location)
1658 case CTPO_CAT_NUMBER:
1659 case CTPO_CAT_STRING:
1660 case CTPO_CAT_RANGE:
1661 case CTPO_CAT_MISSING:
1662 case CTPO_CAT_OTHERNM:
1663 case CTPO_CAT_SUBTOTAL:
1664 case CTPO_CAT_TOTAL:
1666 struct ctables_category *cat = ctables_find_category_for_postcompute (
1670 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1672 size_t n_subtotals = 0;
1673 for (size_t i = 0; i < cats->n_cats; i++)
1674 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1675 if (n_subtotals > 1)
1677 msg_at (SE, cats_location,
1678 ngettext ("These categories include %zu instance "
1679 "of SUBTOTAL or HSUBTOTAL, so references "
1680 "from computed categories must refer to "
1681 "subtotals by position.",
1682 "These categories include %zu instances "
1683 "of SUBTOTAL or HSUBTOTAL, so references "
1684 "from computed categories must refer to "
1685 "subtotals by position.",
1688 msg_at (SN, e->location,
1689 _("This is the reference that lacks a position."));
1694 msg_at (SE, pc_cat->location,
1695 _("Computed category &%s references a category not included "
1696 "in the category list."),
1698 msg_at (SN, e->location, _("This is the missing category."));
1699 msg_at (SN, cats_location,
1700 _("To fix the problem, add the missing category to the "
1701 "list of categories here."));
1704 if (pc_cat->pc->hide_source_cats)
1718 for (size_t i = 0; i < 2; i++)
1719 if (e->subs[i] && !ctables_recursive_check_postcompute (
1720 e->subs[i], pc_cat, cats, cats_location))
1730 parse_category_string (const struct ctables_category *cat,
1731 struct substring s, struct dictionary *dict,
1732 enum fmt_type format, double *n)
1735 char *error = data_in (s, dict_get_encoding (dict), format,
1736 settings_get_fmt_settings (), &v, 0, NULL);
1739 msg_at (SE, cat->location,
1740 _("Failed to parse category specification as format %s: %s."),
1741 fmt_name (format), error);
1751 all_strings (struct variable **vars, size_t n_vars,
1752 const struct ctables_category *cat)
1754 for (size_t j = 0; j < n_vars; j++)
1755 if (var_is_numeric (vars[j]))
1757 msg_at (SE, cat->location,
1758 _("This category specification may be applied only to string "
1759 "variables, but this subcommand tries to apply it to "
1760 "numeric variable %s."),
1761 var_get_name (vars[j]));
1768 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1769 struct ctables *ct, struct ctables_table *t)
1771 if (!lex_match_id (lexer, "VARIABLES"))
1773 lex_match (lexer, T_EQUALS);
1775 struct variable **vars;
1777 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1780 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1781 for (size_t i = 1; i < n_vars; i++)
1783 const struct fmt_spec *f = var_get_print_format (vars[i]);
1784 if (f->type != common_format->type)
1786 common_format = NULL;
1792 && (fmt_get_category (common_format->type)
1793 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1795 struct ctables_categories *c = xmalloc (sizeof *c);
1796 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1797 for (size_t i = 0; i < n_vars; i++)
1799 struct ctables_categories **cp
1800 = &t->categories[var_get_dict_index (vars[i])];
1801 ctables_categories_unref (*cp);
1805 size_t allocated_cats = 0;
1806 if (lex_match (lexer, T_LBRACK))
1808 int cats_start_ofs = lex_ofs (lexer);
1811 if (c->n_cats >= allocated_cats)
1812 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1814 int start_ofs = lex_ofs (lexer);
1815 struct ctables_category *cat = &c->cats[c->n_cats];
1816 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1818 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1821 lex_match (lexer, T_COMMA);
1823 while (!lex_match (lexer, T_RBRACK));
1825 struct msg_location *cats_location
1826 = lex_ofs_location (lexer, cats_start_ofs, lex_ofs (lexer) - 1);
1827 for (size_t i = 0; i < c->n_cats; i++)
1829 struct ctables_category *cat = &c->cats[i];
1832 case CCT_POSTCOMPUTE:
1833 if (!ctables_recursive_check_postcompute (cat->pc->expr, cat,
1840 for (size_t j = 0; j < n_vars; j++)
1841 if (var_is_alpha (vars[j]))
1843 msg_at (SE, cat->location,
1844 _("This category specification may be applied "
1845 "only to numeric variables, but this "
1846 "subcommand tries to apply it to string "
1848 var_get_name (vars[j]));
1857 if (!parse_category_string (cat, cat->string, dict,
1858 common_format->type, &n))
1861 ss_dealloc (&cat->string);
1863 cat->type = CCT_NUMBER;
1866 else if (!all_strings (vars, n_vars, cat))
1875 if (!cat->srange[0].string)
1877 else if (!parse_category_string (cat, cat->srange[0], dict,
1878 common_format->type, &n[0]))
1881 if (!cat->srange[1].string)
1883 else if (!parse_category_string (cat, cat->srange[1], dict,
1884 common_format->type, &n[1]))
1887 ss_dealloc (&cat->srange[0]);
1888 ss_dealloc (&cat->srange[1]);
1890 cat->type = CCT_NRANGE;
1891 cat->nrange[0] = n[0];
1892 cat->nrange[1] = n[1];
1894 else if (!all_strings (vars, n_vars, cat))
1905 case CCT_EXCLUDED_MISSING:
1911 struct ctables_category cat = {
1913 .include_missing = false,
1914 .sort_ascending = true,
1916 bool show_totals = false;
1917 char *total_label = NULL;
1918 bool totals_before = false;
1919 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1921 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1923 lex_match (lexer, T_EQUALS);
1924 if (lex_match_id (lexer, "A"))
1925 cat.sort_ascending = true;
1926 else if (lex_match_id (lexer, "D"))
1927 cat.sort_ascending = false;
1930 lex_error_expecting (lexer, "A", "D");
1934 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1936 lex_match (lexer, T_EQUALS);
1937 if (lex_match_id (lexer, "VALUE"))
1938 cat.type = CCT_VALUE;
1939 else if (lex_match_id (lexer, "LABEL"))
1940 cat.type = CCT_LABEL;
1943 cat.type = CCT_FUNCTION;
1944 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1947 if (lex_match (lexer, T_LPAREN))
1949 cat.sort_var = parse_variable (lexer, dict);
1953 if (cat.sort_function == CTSF_PTILE)
1955 lex_match (lexer, T_COMMA);
1956 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1958 cat.percentile = lex_number (lexer);
1962 if (!lex_force_match (lexer, T_RPAREN))
1965 else if (ctables_function_availability (cat.sort_function)
1968 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1973 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1975 lex_match (lexer, T_EQUALS);
1976 if (lex_match_id (lexer, "INCLUDE"))
1977 cat.include_missing = true;
1978 else if (lex_match_id (lexer, "EXCLUDE"))
1979 cat.include_missing = false;
1982 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1986 else if (lex_match_id (lexer, "TOTAL"))
1988 lex_match (lexer, T_EQUALS);
1989 if (!parse_bool (lexer, &show_totals))
1992 else if (lex_match_id (lexer, "LABEL"))
1994 lex_match (lexer, T_EQUALS);
1995 if (!lex_force_string (lexer))
1998 total_label = ss_xstrdup (lex_tokss (lexer));
2001 else if (lex_match_id (lexer, "POSITION"))
2003 lex_match (lexer, T_EQUALS);
2004 if (lex_match_id (lexer, "BEFORE"))
2005 totals_before = true;
2006 else if (lex_match_id (lexer, "AFTER"))
2007 totals_before = false;
2010 lex_error_expecting (lexer, "BEFORE", "AFTER");
2014 else if (lex_match_id (lexer, "EMPTY"))
2016 lex_match (lexer, T_EQUALS);
2017 if (lex_match_id (lexer, "INCLUDE"))
2018 c->show_empty = true;
2019 else if (lex_match_id (lexer, "EXCLUDE"))
2020 c->show_empty = false;
2023 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2030 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2031 "TOTAL", "LABEL", "POSITION", "EMPTY");
2033 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2040 if (c->n_cats >= allocated_cats)
2041 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2042 c->cats[c->n_cats++] = cat;
2047 if (c->n_cats >= allocated_cats)
2048 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2050 struct ctables_category *totals;
2053 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2054 totals = &c->cats[0];
2057 totals = &c->cats[c->n_cats];
2060 *totals = (struct ctables_category) {
2062 .total_label = total_label ? total_label : xstrdup (_("Total")),
2066 struct ctables_category *subtotal = NULL;
2067 for (size_t i = totals_before ? 0 : c->n_cats;
2068 totals_before ? i < c->n_cats : i-- > 0;
2069 totals_before ? i++ : 0)
2071 struct ctables_category *cat = &c->cats[i];
2080 cat->subtotal = subtotal;
2083 case CCT_POSTCOMPUTE:
2094 case CCT_EXCLUDED_MISSING:
2103 ctables_nest_uninit (struct ctables_nest *nest)
2110 ctables_stack_uninit (struct ctables_stack *stack)
2114 for (size_t i = 0; i < stack->n; i++)
2115 ctables_nest_uninit (&stack->nests[i]);
2116 free (stack->nests);
2120 static struct ctables_stack
2121 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2128 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2129 for (size_t i = 0; i < s0.n; i++)
2130 for (size_t j = 0; j < s1.n; j++)
2132 const struct ctables_nest *a = &s0.nests[i];
2133 const struct ctables_nest *b = &s1.nests[j];
2135 size_t allocate = a->n + b->n;
2136 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2137 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
2139 for (size_t k = 0; k < a->n; k++)
2140 vars[n++] = a->vars[k];
2141 for (size_t k = 0; k < b->n; k++)
2142 vars[n++] = b->vars[k];
2143 assert (n == allocate);
2145 const struct ctables_nest *summary_src;
2146 if (!a->specs[CSV_CELL].var)
2148 else if (!b->specs[CSV_CELL].var)
2153 struct ctables_nest *new = &stack.nests[stack.n++];
2154 *new = (struct ctables_nest) {
2156 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2157 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2161 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2162 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2164 ctables_stack_uninit (&s0);
2165 ctables_stack_uninit (&s1);
2169 static struct ctables_stack
2170 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2172 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2173 for (size_t i = 0; i < s0.n; i++)
2174 stack.nests[stack.n++] = s0.nests[i];
2175 for (size_t i = 0; i < s1.n; i++)
2177 stack.nests[stack.n] = s1.nests[i];
2178 stack.nests[stack.n].group_head += s0.n;
2181 assert (stack.n == s0.n + s1.n);
2187 static struct ctables_stack
2188 var_fts (const struct ctables_axis *a)
2190 struct variable **vars = xmalloc (sizeof *vars);
2193 struct ctables_nest *nest = xmalloc (sizeof *nest);
2194 *nest = (struct ctables_nest) {
2197 .scale_idx = a->scale ? 0 : SIZE_MAX,
2199 if (a->specs[CSV_CELL].n || a->scale)
2200 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2202 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2203 nest->specs[sv].var = a->var;
2204 nest->specs[sv].is_scale = a->scale;
2206 return (struct ctables_stack) { .nests = nest, .n = 1 };
2209 static struct ctables_stack
2210 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2213 return (struct ctables_stack) { .n = 0 };
2221 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2222 enumerate_fts (axis_type, a->subs[1]));
2225 /* This should consider any of the scale variables found in the result to
2226 be linked to each other listwise for SMISSING=LISTWISE. */
2227 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2228 enumerate_fts (axis_type, a->subs[1]));
2234 union ctables_summary
2236 /* COUNT, VALIDN, TOTALN. */
2239 /* MINIMUM, MAXIMUM, RANGE. */
2246 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2247 struct moments1 *moments;
2249 /* MEDIAN, MODE, PTILE. */
2252 struct casewriter *writer;
2257 /* XXX multiple response */
2261 ctables_summary_init (union ctables_summary *s,
2262 const struct ctables_summary_spec *ss)
2264 switch (ss->function)
2268 case CTSF_ROWPCT_COUNT:
2269 case CTSF_COLPCT_COUNT:
2270 case CTSF_TABLEPCT_COUNT:
2271 case CTSF_SUBTABLEPCT_COUNT:
2272 case CTSF_LAYERPCT_COUNT:
2273 case CTSF_LAYERROWPCT_COUNT:
2274 case CTSF_LAYERCOLPCT_COUNT:
2275 case CTSF_ROWPCT_VALIDN:
2276 case CTSF_COLPCT_VALIDN:
2277 case CTSF_TABLEPCT_VALIDN:
2278 case CTSF_SUBTABLEPCT_VALIDN:
2279 case CTSF_LAYERPCT_VALIDN:
2280 case CTSF_LAYERROWPCT_VALIDN:
2281 case CTSF_LAYERCOLPCT_VALIDN:
2282 case CTSF_ROWPCT_TOTALN:
2283 case CTSF_COLPCT_TOTALN:
2284 case CTSF_TABLEPCT_TOTALN:
2285 case CTSF_SUBTABLEPCT_TOTALN:
2286 case CTSF_LAYERPCT_TOTALN:
2287 case CTSF_LAYERROWPCT_TOTALN:
2288 case CTSF_LAYERCOLPCT_TOTALN:
2300 s->min = s->max = SYSMIS;
2308 case CTSF_ROWPCT_SUM:
2309 case CTSF_COLPCT_SUM:
2310 case CTSF_TABLEPCT_SUM:
2311 case CTSF_SUBTABLEPCT_SUM:
2312 case CTSF_LAYERPCT_SUM:
2313 case CTSF_LAYERROWPCT_SUM:
2314 case CTSF_LAYERCOLPCT_SUM:
2315 s->moments = moments1_create (MOMENT_VARIANCE);
2322 struct caseproto *proto = caseproto_create ();
2323 proto = caseproto_add_width (proto, 0);
2324 proto = caseproto_add_width (proto, 0);
2326 struct subcase ordering;
2327 subcase_init (&ordering, 0, 0, SC_ASCEND);
2328 s->writer = sort_create_writer (&ordering, proto);
2329 subcase_uninit (&ordering);
2330 caseproto_unref (proto);
2340 ctables_summary_uninit (union ctables_summary *s,
2341 const struct ctables_summary_spec *ss)
2343 switch (ss->function)
2347 case CTSF_ROWPCT_COUNT:
2348 case CTSF_COLPCT_COUNT:
2349 case CTSF_TABLEPCT_COUNT:
2350 case CTSF_SUBTABLEPCT_COUNT:
2351 case CTSF_LAYERPCT_COUNT:
2352 case CTSF_LAYERROWPCT_COUNT:
2353 case CTSF_LAYERCOLPCT_COUNT:
2354 case CTSF_ROWPCT_VALIDN:
2355 case CTSF_COLPCT_VALIDN:
2356 case CTSF_TABLEPCT_VALIDN:
2357 case CTSF_SUBTABLEPCT_VALIDN:
2358 case CTSF_LAYERPCT_VALIDN:
2359 case CTSF_LAYERROWPCT_VALIDN:
2360 case CTSF_LAYERCOLPCT_VALIDN:
2361 case CTSF_ROWPCT_TOTALN:
2362 case CTSF_COLPCT_TOTALN:
2363 case CTSF_TABLEPCT_TOTALN:
2364 case CTSF_SUBTABLEPCT_TOTALN:
2365 case CTSF_LAYERPCT_TOTALN:
2366 case CTSF_LAYERROWPCT_TOTALN:
2367 case CTSF_LAYERCOLPCT_TOTALN:
2385 case CTSF_ROWPCT_SUM:
2386 case CTSF_COLPCT_SUM:
2387 case CTSF_TABLEPCT_SUM:
2388 case CTSF_SUBTABLEPCT_SUM:
2389 case CTSF_LAYERPCT_SUM:
2390 case CTSF_LAYERROWPCT_SUM:
2391 case CTSF_LAYERCOLPCT_SUM:
2392 moments1_destroy (s->moments);
2398 casewriter_destroy (s->writer);
2404 ctables_summary_add (union ctables_summary *s,
2405 const struct ctables_summary_spec *ss,
2406 const struct variable *var, const union value *value,
2407 bool is_scale, bool is_scale_missing,
2408 bool is_missing, bool excluded_missing,
2409 double d_weight, double e_weight)
2411 /* To determine whether a case is included in a given table for a particular
2412 kind of summary, consider the following charts for each variable in the
2413 table. Only if "yes" appears for every variable for the summary is the
2416 Categorical variables: VALIDN COUNT TOTALN
2417 Valid values in included categories yes yes yes
2418 Missing values in included categories --- yes yes
2419 Missing values in excluded categories --- --- yes
2420 Valid values in excluded categories --- --- ---
2422 Scale variables: VALIDN COUNT TOTALN
2423 Valid value yes yes yes
2424 Missing value --- yes yes
2426 Missing values include both user- and system-missing. (The system-missing
2427 value is always in an excluded category.)
2429 switch (ss->function)
2432 case CTSF_ROWPCT_TOTALN:
2433 case CTSF_COLPCT_TOTALN:
2434 case CTSF_TABLEPCT_TOTALN:
2435 case CTSF_SUBTABLEPCT_TOTALN:
2436 case CTSF_LAYERPCT_TOTALN:
2437 case CTSF_LAYERROWPCT_TOTALN:
2438 case CTSF_LAYERCOLPCT_TOTALN:
2439 s->count += d_weight;
2443 case CTSF_ROWPCT_COUNT:
2444 case CTSF_COLPCT_COUNT:
2445 case CTSF_TABLEPCT_COUNT:
2446 case CTSF_SUBTABLEPCT_COUNT:
2447 case CTSF_LAYERPCT_COUNT:
2448 case CTSF_LAYERROWPCT_COUNT:
2449 case CTSF_LAYERCOLPCT_COUNT:
2450 if (is_scale || !excluded_missing)
2451 s->count += d_weight;
2455 case CTSF_ROWPCT_VALIDN:
2456 case CTSF_COLPCT_VALIDN:
2457 case CTSF_TABLEPCT_VALIDN:
2458 case CTSF_SUBTABLEPCT_VALIDN:
2459 case CTSF_LAYERPCT_VALIDN:
2460 case CTSF_LAYERROWPCT_VALIDN:
2461 case CTSF_LAYERCOLPCT_VALIDN:
2465 s->count += d_weight;
2470 s->count += d_weight;
2474 if (is_scale || !excluded_missing)
2475 s->count += e_weight;
2482 s->count += e_weight;
2486 s->count += e_weight;
2492 if (!is_scale_missing)
2494 assert (!var_is_alpha (var)); /* XXX? */
2495 if (s->min == SYSMIS || value->f < s->min)
2497 if (s->max == SYSMIS || value->f > s->max)
2507 case CTSF_ROWPCT_SUM:
2508 case CTSF_COLPCT_SUM:
2509 case CTSF_TABLEPCT_SUM:
2510 case CTSF_SUBTABLEPCT_SUM:
2511 case CTSF_LAYERPCT_SUM:
2512 case CTSF_LAYERROWPCT_SUM:
2513 case CTSF_LAYERCOLPCT_SUM:
2514 if (!is_scale_missing)
2515 moments1_add (s->moments, value->f, e_weight);
2521 if (!is_scale_missing)
2523 s->ovalid += e_weight;
2525 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2526 *case_num_rw_idx (c, 0) = value->f;
2527 *case_num_rw_idx (c, 1) = e_weight;
2528 casewriter_write (s->writer, c);
2534 static enum ctables_domain_type
2535 ctables_function_domain (enum ctables_summary_function function)
2559 case CTSF_COLPCT_COUNT:
2560 case CTSF_COLPCT_SUM:
2561 case CTSF_COLPCT_TOTALN:
2562 case CTSF_COLPCT_VALIDN:
2565 case CTSF_LAYERCOLPCT_COUNT:
2566 case CTSF_LAYERCOLPCT_SUM:
2567 case CTSF_LAYERCOLPCT_TOTALN:
2568 case CTSF_LAYERCOLPCT_VALIDN:
2569 return CTDT_LAYERCOL;
2571 case CTSF_LAYERPCT_COUNT:
2572 case CTSF_LAYERPCT_SUM:
2573 case CTSF_LAYERPCT_TOTALN:
2574 case CTSF_LAYERPCT_VALIDN:
2577 case CTSF_LAYERROWPCT_COUNT:
2578 case CTSF_LAYERROWPCT_SUM:
2579 case CTSF_LAYERROWPCT_TOTALN:
2580 case CTSF_LAYERROWPCT_VALIDN:
2581 return CTDT_LAYERROW;
2583 case CTSF_ROWPCT_COUNT:
2584 case CTSF_ROWPCT_SUM:
2585 case CTSF_ROWPCT_TOTALN:
2586 case CTSF_ROWPCT_VALIDN:
2589 case CTSF_SUBTABLEPCT_COUNT:
2590 case CTSF_SUBTABLEPCT_SUM:
2591 case CTSF_SUBTABLEPCT_TOTALN:
2592 case CTSF_SUBTABLEPCT_VALIDN:
2593 return CTDT_SUBTABLE;
2595 case CTSF_TABLEPCT_COUNT:
2596 case CTSF_TABLEPCT_SUM:
2597 case CTSF_TABLEPCT_TOTALN:
2598 case CTSF_TABLEPCT_VALIDN:
2606 ctables_summary_value (const struct ctables_cell *cell,
2607 union ctables_summary *s,
2608 const struct ctables_summary_spec *ss)
2610 switch (ss->function)
2616 case CTSF_ROWPCT_COUNT:
2617 case CTSF_COLPCT_COUNT:
2618 case CTSF_TABLEPCT_COUNT:
2619 case CTSF_SUBTABLEPCT_COUNT:
2620 case CTSF_LAYERPCT_COUNT:
2621 case CTSF_LAYERROWPCT_COUNT:
2622 case CTSF_LAYERCOLPCT_COUNT:
2624 enum ctables_domain_type d = ctables_function_domain (ss->function);
2625 return (cell->domains[d]->e_count
2626 ? s->count / cell->domains[d]->e_count * 100
2630 case CTSF_ROWPCT_VALIDN:
2631 case CTSF_COLPCT_VALIDN:
2632 case CTSF_TABLEPCT_VALIDN:
2633 case CTSF_SUBTABLEPCT_VALIDN:
2634 case CTSF_LAYERPCT_VALIDN:
2635 case CTSF_LAYERROWPCT_VALIDN:
2636 case CTSF_LAYERCOLPCT_VALIDN:
2638 enum ctables_domain_type d = ctables_function_domain (ss->function);
2639 return (cell->domains[d]->e_valid
2640 ? s->count / cell->domains[d]->e_valid * 100
2644 case CTSF_ROWPCT_TOTALN:
2645 case CTSF_COLPCT_TOTALN:
2646 case CTSF_TABLEPCT_TOTALN:
2647 case CTSF_SUBTABLEPCT_TOTALN:
2648 case CTSF_LAYERPCT_TOTALN:
2649 case CTSF_LAYERROWPCT_TOTALN:
2650 case CTSF_LAYERCOLPCT_TOTALN:
2652 enum ctables_domain_type d = ctables_function_domain (ss->function);
2653 return (cell->domains[d]->e_total
2654 ? s->count / cell->domains[d]->e_total * 100
2678 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2683 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2689 double weight, variance;
2690 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2691 return calc_semean (variance, weight);
2697 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2698 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2703 double weight, mean;
2704 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2705 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2711 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2715 case CTSF_ROWPCT_SUM:
2716 case CTSF_COLPCT_SUM:
2717 case CTSF_TABLEPCT_SUM:
2718 case CTSF_SUBTABLEPCT_SUM:
2719 case CTSF_LAYERPCT_SUM:
2720 case CTSF_LAYERROWPCT_SUM:
2721 case CTSF_LAYERCOLPCT_SUM:
2728 struct casereader *reader = casewriter_make_reader (s->writer);
2731 struct percentile *ptile = percentile_create (
2732 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2733 struct order_stats *os = &ptile->parent;
2734 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2735 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2736 statistic_destroy (&ptile->parent.parent);
2743 struct casereader *reader = casewriter_make_reader (s->writer);
2746 struct mode *mode = mode_create ();
2747 struct order_stats *os = &mode->parent;
2748 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2749 s->ovalue = mode->mode;
2750 statistic_destroy (&mode->parent.parent);
2758 struct ctables_cell_sort_aux
2760 const struct ctables_nest *nest;
2761 enum pivot_axis_type a;
2765 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2767 const struct ctables_cell_sort_aux *aux = aux_;
2768 struct ctables_cell *const *ap = a_;
2769 struct ctables_cell *const *bp = b_;
2770 const struct ctables_cell *a = *ap;
2771 const struct ctables_cell *b = *bp;
2773 const struct ctables_nest *nest = aux->nest;
2774 for (size_t i = 0; i < nest->n; i++)
2775 if (i != nest->scale_idx)
2777 const struct variable *var = nest->vars[i];
2778 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2779 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2780 if (a_cv->category != b_cv->category)
2781 return a_cv->category > b_cv->category ? 1 : -1;
2783 const union value *a_val = &a_cv->value;
2784 const union value *b_val = &b_cv->value;
2785 switch (a_cv->category->type)
2791 case CCT_POSTCOMPUTE:
2792 case CCT_EXCLUDED_MISSING:
2793 /* Must be equal. */
2801 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2809 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2811 return a_cv->category->sort_ascending ? cmp : -cmp;
2817 const char *a_label = var_lookup_value_label (var, a_val);
2818 const char *b_label = var_lookup_value_label (var, b_val);
2820 ? (b_label ? strcmp (a_label, b_label) : 1)
2821 : (b_label ? -1 : value_compare_3way (
2822 a_val, b_val, var_get_width (var))));
2824 return a_cv->category->sort_ascending ? cmp : -cmp;
2838 For each ctables_table:
2839 For each combination of row vars:
2840 For each combination of column vars:
2841 For each combination of layer vars:
2843 Make a table of row values:
2844 Sort entries by row values
2845 Assign a 0-based index to each actual value
2846 Construct a dimension
2847 Make a table of column values
2848 Make a table of layer values
2850 Fill the table entry using the indexes from before.
2853 static struct ctables_domain *
2854 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
2855 enum ctables_domain_type domain)
2858 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2860 const struct ctables_nest *nest = s->nests[a];
2861 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2863 size_t v_idx = nest->domains[domain][i];
2864 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
2865 hash = hash_pointer (cv->category, hash);
2866 if (cv->category->type != CCT_TOTAL
2867 && cv->category->type != CCT_SUBTOTAL
2868 && cv->category->type != CCT_POSTCOMPUTE)
2869 hash = value_hash (&cv->value,
2870 var_get_width (nest->vars[v_idx]), hash);
2874 struct ctables_domain *d;
2875 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
2877 const struct ctables_cell *df = d->example;
2878 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2880 const struct ctables_nest *nest = s->nests[a];
2881 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2883 size_t v_idx = nest->domains[domain][i];
2884 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
2885 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
2886 if (cv1->category != cv2->category
2887 || (cv1->category->type != CCT_TOTAL
2888 && cv1->category->type != CCT_SUBTOTAL
2889 && cv1->category->type != CCT_POSTCOMPUTE
2890 && !value_equal (&cv1->value, &cv2->value,
2891 var_get_width (nest->vars[v_idx]))))
2900 d = xmalloc (sizeof *d);
2901 *d = (struct ctables_domain) { .example = cell };
2902 hmap_insert (&s->domains[domain], &d->node, hash);
2906 static struct substring
2907 rtrim_value (const union value *v, const struct variable *var)
2909 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
2910 var_get_width (var));
2911 ss_rtrim (&s, ss_cstr (" "));
2916 in_string_range (const union value *v, const struct variable *var,
2917 const struct substring *srange)
2919 struct substring s = rtrim_value (v, var);
2920 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
2921 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
2924 static const struct ctables_category *
2925 ctables_categories_match (const struct ctables_categories *c,
2926 const union value *v, const struct variable *var)
2928 if (var_is_numeric (var) && v->f == SYSMIS)
2931 const struct ctables_category *othernm = NULL;
2932 for (size_t i = c->n_cats; i-- > 0; )
2934 const struct ctables_category *cat = &c->cats[i];
2938 if (cat->number == v->f)
2943 if (ss_equals (cat->string, rtrim_value (v, var)))
2948 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
2949 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
2954 if (in_string_range (v, var, cat->srange))
2959 if (var_is_value_missing (var, v))
2963 case CCT_POSTCOMPUTE:
2978 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2981 case CCT_EXCLUDED_MISSING:
2986 return var_is_value_missing (var, v) ? NULL : othernm;
2989 static const struct ctables_category *
2990 ctables_categories_total (const struct ctables_categories *c)
2992 const struct ctables_category *first = &c->cats[0];
2993 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2994 return (first->type == CCT_TOTAL ? first
2995 : last->type == CCT_TOTAL ? last
2999 static struct ctables_cell *
3000 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3001 const struct ctables_category *cats[PIVOT_N_AXES][10])
3004 enum ctables_summary_variant sv = CSV_CELL;
3005 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3007 const struct ctables_nest *nest = s->nests[a];
3008 for (size_t i = 0; i < nest->n; i++)
3009 if (i != nest->scale_idx)
3011 hash = hash_pointer (cats[a][i], hash);
3012 if (cats[a][i]->type != CCT_TOTAL
3013 && cats[a][i]->type != CCT_SUBTOTAL
3014 && cats[a][i]->type != CCT_POSTCOMPUTE)
3015 hash = value_hash (case_data (c, nest->vars[i]),
3016 var_get_width (nest->vars[i]), hash);
3022 struct ctables_cell *cell;
3023 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3025 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3027 const struct ctables_nest *nest = s->nests[a];
3028 for (size_t i = 0; i < nest->n; i++)
3029 if (i != nest->scale_idx
3030 && (cats[a][i] != cell->axes[a].cvs[i].category
3031 || (cats[a][i]->type != CCT_TOTAL
3032 && cats[a][i]->type != CCT_SUBTOTAL
3033 && cats[a][i]->type != CCT_POSTCOMPUTE
3034 && !value_equal (case_data (c, nest->vars[i]),
3035 &cell->axes[a].cvs[i].value,
3036 var_get_width (nest->vars[i])))))
3045 cell = xmalloc (sizeof *cell);
3048 cell->omit_domains = 0;
3049 cell->postcompute = false;
3050 //struct string name = DS_EMPTY_INITIALIZER;
3051 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3053 const struct ctables_nest *nest = s->nests[a];
3054 cell->axes[a].cvs = (nest->n
3055 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3057 for (size_t i = 0; i < nest->n; i++)
3059 const struct ctables_category *cat = cats[a][i];
3060 const struct variable *var = nest->vars[i];
3061 const union value *value = case_data (c, var);
3062 if (i != nest->scale_idx)
3064 const struct ctables_category *subtotal = cat->subtotal;
3065 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3068 if (cat->type == CCT_TOTAL
3069 || cat->type == CCT_SUBTOTAL
3070 || cat->type == CCT_POSTCOMPUTE)
3072 /* XXX these should be more encompassing I think.*/
3076 case PIVOT_AXIS_COLUMN:
3077 cell->omit_domains |= ((1u << CTDT_TABLE) |
3078 (1u << CTDT_LAYER) |
3079 (1u << CTDT_LAYERCOL) |
3080 (1u << CTDT_SUBTABLE) |
3083 case PIVOT_AXIS_ROW:
3084 cell->omit_domains |= ((1u << CTDT_TABLE) |
3085 (1u << CTDT_LAYER) |
3086 (1u << CTDT_LAYERROW) |
3087 (1u << CTDT_SUBTABLE) |
3090 case PIVOT_AXIS_LAYER:
3091 cell->omit_domains |= ((1u << CTDT_TABLE) |
3092 (1u << CTDT_LAYER));
3096 if (cat->type == CCT_POSTCOMPUTE)
3097 cell->postcompute = true;
3100 cell->axes[a].cvs[i].category = cat;
3101 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3104 if (i != nest->scale_idx)
3106 if (!ds_is_empty (&name))
3107 ds_put_cstr (&name, ", ");
3108 char *value_s = data_out (value, var_get_encoding (var),
3109 var_get_print_format (var),
3110 settings_get_fmt_settings ());
3111 if (cat->type == CCT_TOTAL
3112 || cat->type == CCT_SUBTOTAL
3113 || cat->type == CCT_POSTCOMPUTE)
3114 ds_put_format (&name, "%s=total", var_get_name (var));
3116 ds_put_format (&name, "%s=%s", var_get_name (var),
3117 value_s + strspn (value_s, " "));
3123 //cell->name = ds_steal_cstr (&name);
3125 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3126 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3127 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3128 for (size_t i = 0; i < specs->n; i++)
3129 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3130 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3131 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3132 hmap_insert (&s->cells, &cell->node, hash);
3137 is_scale_missing (const struct ctables_summary_spec_set *specs,
3138 const struct ccase *c)
3140 if (!specs->is_scale)
3143 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3146 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3148 const struct variable *var = specs->listwise_vars[i];
3149 if (var_is_num_missing (var, case_num (c, var)))
3157 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3158 const struct ctables_category *cats[PIVOT_N_AXES][10],
3159 bool is_missing, bool excluded_missing,
3160 double d_weight, double e_weight)
3162 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3163 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3165 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3167 bool scale_missing = is_scale_missing (specs, c);
3168 for (size_t i = 0; i < specs->n; i++)
3169 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3170 specs->var, case_data (c, specs->var), specs->is_scale,
3171 scale_missing, is_missing, excluded_missing,
3172 d_weight, e_weight);
3173 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3174 if (!(cell->omit_domains && (1u << dt)))
3176 struct ctables_domain *d = cell->domains[dt];
3177 d->d_total += d_weight;
3178 d->e_total += e_weight;
3179 if (!excluded_missing)
3181 d->d_count += d_weight;
3182 d->e_count += e_weight;
3186 d->d_valid += d_weight;
3187 d->e_valid += e_weight;
3193 recurse_totals (struct ctables_section *s, const struct ccase *c,
3194 const struct ctables_category *cats[PIVOT_N_AXES][10],
3195 bool is_missing, bool excluded_missing,
3196 double d_weight, double e_weight,
3197 enum pivot_axis_type start_axis, size_t start_nest)
3199 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3201 const struct ctables_nest *nest = s->nests[a];
3202 for (size_t i = start_nest; i < nest->n; i++)
3204 if (i == nest->scale_idx)
3207 const struct variable *var = nest->vars[i];
3209 const struct ctables_category *total = ctables_categories_total (
3210 s->table->categories[var_get_dict_index (var)]);
3213 const struct ctables_category *save = cats[a][i];
3215 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3216 d_weight, e_weight);
3217 recurse_totals (s, c, cats, is_missing, excluded_missing,
3218 d_weight, e_weight, a, i + 1);
3227 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3228 const struct ctables_category *cats[PIVOT_N_AXES][10],
3229 bool is_missing, bool excluded_missing,
3230 double d_weight, double e_weight,
3231 enum pivot_axis_type start_axis, size_t start_nest)
3233 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3235 const struct ctables_nest *nest = s->nests[a];
3236 for (size_t i = start_nest; i < nest->n; i++)
3238 if (i == nest->scale_idx)
3241 const struct ctables_category *save = cats[a][i];
3244 cats[a][i] = save->subtotal;
3245 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3246 d_weight, e_weight);
3247 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3248 d_weight, e_weight, a, i + 1);
3257 ctables_add_occurrence (const struct variable *var,
3258 const union value *value,
3259 struct hmap *occurrences)
3261 int width = var_get_width (var);
3262 unsigned int hash = value_hash (value, width, 0);
3264 struct ctables_occurrence *o;
3265 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3267 if (value_equal (value, &o->value, width))
3270 o = xmalloc (sizeof *o);
3271 value_clone (&o->value, value, width);
3272 hmap_insert (occurrences, &o->node, hash);
3276 ctables_cell_insert (struct ctables_section *s,
3277 const struct ccase *c,
3278 double d_weight, double e_weight)
3280 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3282 /* Does at least one categorical variable have a missing value in an included
3283 or excluded category? */
3284 bool is_missing = false;
3286 /* Does at least one categorical variable have a missing value in an excluded
3288 bool excluded_missing = false;
3290 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3292 const struct ctables_nest *nest = s->nests[a];
3293 for (size_t i = 0; i < nest->n; i++)
3295 if (i == nest->scale_idx)
3298 const struct variable *var = nest->vars[i];
3299 const union value *value = case_data (c, var);
3301 bool var_missing = var_is_value_missing (var, value) != 0;
3305 cats[a][i] = ctables_categories_match (
3306 s->table->categories[var_get_dict_index (var)], value, var);
3312 static const struct ctables_category cct_excluded_missing = {
3313 .type = CCT_EXCLUDED_MISSING,
3316 cats[a][i] = &cct_excluded_missing;
3317 excluded_missing = true;
3322 if (!excluded_missing)
3323 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3325 const struct ctables_nest *nest = s->nests[a];
3326 for (size_t i = 0; i < nest->n; i++)
3327 if (i != nest->scale_idx)
3329 const struct variable *var = nest->vars[i];
3330 const union value *value = case_data (c, var);
3331 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3335 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3336 d_weight, e_weight);
3338 //if (!excluded_missing)
3340 recurse_totals (s, c, cats, is_missing, excluded_missing,
3341 d_weight, e_weight, 0, 0);
3342 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3343 d_weight, e_weight, 0, 0);
3349 const struct ctables_summary_spec_set *set;
3354 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3356 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3357 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3358 if (as->function != bs->function)
3359 return as->function > bs->function ? 1 : -1;
3360 else if (as->percentile != bs->percentile)
3361 return as->percentile < bs->percentile ? 1 : -1;
3362 return strcmp (as->label, bs->label);
3365 static struct pivot_value *
3366 ctables_category_create_label (const struct ctables_category *cat,
3367 const struct variable *var,
3368 const union value *value)
3370 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3371 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3372 : cat->type == CCT_POSTCOMPUTE && cat->pc->label
3373 ? pivot_value_new_user_text (cat->pc->label, SIZE_MAX)
3374 : pivot_value_new_var_value (var, value));
3377 static struct ctables_value *
3378 ctables_value_find__ (struct ctables_table *t, const union value *value,
3379 int width, unsigned int hash)
3381 struct ctables_value *clv;
3382 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3383 hash, &t->clabels_values_map)
3384 if (value_equal (value, &clv->value, width))
3390 ctables_value_insert (struct ctables_table *t, const union value *value,
3393 unsigned int hash = value_hash (value, width, 0);
3394 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3397 clv = xmalloc (sizeof *clv);
3398 value_clone (&clv->value, value, width);
3399 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3403 static struct ctables_value *
3404 ctables_value_find (struct ctables_table *t,
3405 const union value *value, int width)
3407 return ctables_value_find__ (t, value, width,
3408 value_hash (value, width, 0));
3412 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3413 size_t ix[PIVOT_N_AXES])
3415 if (a < PIVOT_N_AXES)
3417 size_t limit = MAX (t->stacks[a].n, 1);
3418 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3419 ctables_table_add_section (t, a + 1, ix);
3423 struct ctables_section *s = &t->sections[t->n_sections++];
3424 *s = (struct ctables_section) {
3426 .cells = HMAP_INITIALIZER (s->cells),
3428 for (a = 0; a < PIVOT_N_AXES; a++)
3431 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3433 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3434 for (size_t i = 0; i < nest->n; i++)
3435 hmap_init (&s->occurrences[a][i]);
3437 for (size_t i = 0; i < N_CTDTS; i++)
3438 hmap_init (&s->domains[i]);
3443 ctpo_add (double a, double b)
3449 ctpo_sub (double a, double b)
3455 ctpo_mul (double a, double b)
3461 ctpo_div (double a, double b)
3463 return b ? a / b : SYSMIS;
3467 ctpo_pow (double a, double b)
3469 int save_errno = errno;
3471 double result = pow (a, b);
3479 ctpo_neg (double a, double b UNUSED)
3484 struct ctables_pcexpr_evaluate_ctx
3486 const struct ctables_cell *cell;
3487 const struct ctables_section *section;
3488 const struct ctables_categories *cats;
3489 enum pivot_axis_type pc_a;
3493 static double ctables_pcexpr_evaluate (
3494 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3497 ctables_pcexpr_evaluate_nonterminal (
3498 const struct ctables_pcexpr_evaluate_ctx *ctx,
3499 const struct ctables_pcexpr *e, size_t n_args,
3500 double evaluate (double, double))
3502 double args[2] = { 0, 0 };
3503 for (size_t i = 0; i < n_args; i++)
3505 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3506 if (!isfinite (args[i]) || args[i] == SYSMIS)
3509 return evaluate (args[0], args[1]);
3513 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3514 const struct ctables_cell_value *pc_cv)
3516 const struct ctables_section *s = ctx->section;
3519 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3521 const struct ctables_nest *nest = s->nests[a];
3522 for (size_t i = 0; i < nest->n; i++)
3523 if (i != nest->scale_idx)
3525 const struct ctables_cell_value *cv
3526 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3527 : &ctx->cell->axes[a].cvs[i]);
3528 hash = hash_pointer (cv->category, hash);
3529 if (cv->category->type != CCT_TOTAL
3530 && cv->category->type != CCT_SUBTOTAL
3531 && cv->category->type != CCT_POSTCOMPUTE)
3532 hash = value_hash (&cv->value,
3533 var_get_width (nest->vars[i]), hash);
3537 struct ctables_cell *tc;
3538 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3540 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3542 const struct ctables_nest *nest = s->nests[a];
3543 for (size_t i = 0; i < nest->n; i++)
3544 if (i != nest->scale_idx)
3546 const struct ctables_cell_value *p_cv
3547 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3548 : &ctx->cell->axes[a].cvs[i]);
3549 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3550 if (p_cv->category != t_cv->category
3551 || (p_cv->category->type != CCT_TOTAL
3552 && p_cv->category->type != CCT_SUBTOTAL
3553 && p_cv->category->type != CCT_POSTCOMPUTE
3554 && !value_equal (&p_cv->value,
3556 var_get_width (nest->vars[i]))))
3568 const struct ctables_table *t = s->table;
3569 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3570 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3571 size_t j = 0 /* XXX */;
3572 return ctables_summary_value (tc, &tc->summaries[j], &specs->specs[j]);
3576 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3577 const struct ctables_pcexpr *e)
3584 case CTPO_CAT_RANGE:
3586 struct ctables_cell_value cv = {
3587 .category = ctables_find_category_for_postcompute (ctx->cats, e)
3589 assert (cv.category != NULL);
3591 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3592 const struct ctables_occurrence *o;
3595 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3596 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3597 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3599 cv.value = o->value;
3600 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3605 case CTPO_CAT_NUMBER:
3606 case CTPO_CAT_STRING:
3607 case CTPO_CAT_MISSING:
3608 case CTPO_CAT_OTHERNM:
3609 case CTPO_CAT_SUBTOTAL:
3610 case CTPO_CAT_TOTAL:
3612 struct ctables_cell_value cv = {
3613 .category = ctables_find_category_for_postcompute (ctx->cats, e),
3614 .value = { .f = e->number },
3616 assert (cv.category != NULL);
3617 return ctables_pcexpr_evaluate_category (ctx, &cv);
3621 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3624 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3627 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3630 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3633 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3636 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3643 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3644 const struct ctables_cell *cell)
3646 enum pivot_axis_type pc_a;
3648 const struct ctables_postcompute *pc;
3649 for (pc_a = 0; ; pc_a++)
3651 assert (pc_a < PIVOT_N_AXES);
3652 for (pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3654 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3655 if (cv->category->type == CCT_POSTCOMPUTE)
3657 pc = cv->category->pc;
3664 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3665 const struct ctables_categories *cats = s->table->categories[
3666 var_get_dict_index (var)];
3667 struct ctables_pcexpr_evaluate_ctx ctx = {
3672 .pc_a_idx = pc_a_idx,
3674 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3678 ctables_table_output (struct ctables *ct, struct ctables_table *t)
3680 struct pivot_table *pt = pivot_table_create__ (
3682 ? pivot_value_new_user_text (t->title, SIZE_MAX)
3683 : pivot_value_new_text (N_("Custom Tables"))),
3686 pivot_table_set_caption (
3687 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
3689 pivot_table_set_corner_text (
3690 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
3692 bool summary_dimension = (t->summary_axis != t->slabels_axis
3693 || (!t->slabels_visible
3694 && t->summary_specs.n > 1));
3695 if (summary_dimension)
3697 struct pivot_dimension *d = pivot_dimension_create (
3698 pt, t->slabels_axis, N_("Statistics"));
3699 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3700 if (!t->slabels_visible)
3701 d->hide_all_labels = true;
3702 for (size_t i = 0; i < specs->n; i++)
3703 pivot_category_create_leaf (
3704 d->root, pivot_value_new_text (specs->specs[i].label));
3707 bool categories_dimension = t->clabels_example != NULL;
3708 if (categories_dimension)
3710 struct pivot_dimension *d = pivot_dimension_create (
3711 pt, t->label_axis[t->clabels_from_axis],
3712 t->clabels_from_axis == PIVOT_AXIS_ROW
3713 ? N_("Row Categories")
3714 : N_("Column Categories"));
3715 const struct variable *var = t->clabels_example;
3716 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
3717 for (size_t i = 0; i < t->n_clabels_values; i++)
3719 const struct ctables_value *value = t->clabels_values[i];
3720 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
3721 assert (cat != NULL);
3722 pivot_category_create_leaf (d->root, ctables_category_create_label (
3723 cat, t->clabels_example, &value->value));
3727 pivot_table_set_look (pt, ct->look);
3728 struct pivot_dimension *d[PIVOT_N_AXES];
3729 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3731 static const char *names[] = {
3732 [PIVOT_AXIS_ROW] = N_("Rows"),
3733 [PIVOT_AXIS_COLUMN] = N_("Columns"),
3734 [PIVOT_AXIS_LAYER] = N_("Layers"),
3736 d[a] = (t->axes[a] || a == t->summary_axis
3737 ? pivot_dimension_create (pt, a, names[a])
3742 assert (t->axes[a]);
3744 for (size_t i = 0; i < t->stacks[a].n; i++)
3746 struct ctables_nest *nest = &t->stacks[a].nests[i];
3747 struct ctables_section **sections = xnmalloc (t->n_sections,
3749 size_t n_sections = 0;
3751 size_t n_total_cells = 0;
3752 size_t max_depth = 0;
3753 for (size_t j = 0; j < t->n_sections; j++)
3754 if (t->sections[j].nests[a] == nest)
3756 struct ctables_section *s = &t->sections[j];
3757 sections[n_sections++] = s;
3758 n_total_cells += s->cells.count;
3760 size_t depth = s->nests[a]->n;
3761 max_depth = MAX (depth, max_depth);
3764 struct ctables_cell **sorted = xnmalloc (n_total_cells,
3766 size_t n_sorted = 0;
3768 for (size_t j = 0; j < n_sections; j++)
3770 struct ctables_section *s = sections[j];
3772 struct ctables_cell *cell;
3773 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3775 sorted[n_sorted++] = cell;
3776 assert (n_sorted <= n_total_cells);
3779 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
3780 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
3783 for (size_t j = 0; j < n_sorted; j++)
3785 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
3790 struct ctables_level
3792 enum ctables_level_type
3794 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
3795 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
3796 CTL_SUMMARY, /* Summary functions. */
3800 enum settings_value_show vlabel; /* CTL_VAR only. */
3803 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
3804 size_t n_levels = 0;
3805 for (size_t k = 0; k < nest->n; k++)
3807 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
3808 if (vlabel != CTVL_NONE)
3810 levels[n_levels++] = (struct ctables_level) {
3812 .vlabel = (enum settings_value_show) vlabel,
3817 if (nest->scale_idx != k
3818 && (k != nest->n - 1 || t->label_axis[a] == a))
3820 levels[n_levels++] = (struct ctables_level) {
3821 .type = CTL_CATEGORY,
3827 if (!summary_dimension && a == t->slabels_axis)
3829 levels[n_levels++] = (struct ctables_level) {
3830 .type = CTL_SUMMARY,
3831 .var_idx = SIZE_MAX,
3835 /* Pivot categories:
3837 - variable label for nest->vars[0], if vlabel != CTVL_NONE
3838 - category for nest->vars[0], if nest->scale_idx != 0
3839 - variable label for nest->vars[1], if vlabel != CTVL_NONE
3840 - category for nest->vars[1], if nest->scale_idx != 1
3842 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
3843 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
3844 - summary function, if 'a == t->slabels_axis && a ==
3847 Additional dimensions:
3849 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
3851 - If 't->label_axis[b] == a' for some 'b != a', add a category
3856 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
3858 for (size_t j = 0; j < n_sorted; j++)
3860 struct ctables_cell *cell = sorted[j];
3861 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
3863 size_t n_common = 0;
3866 for (; n_common < n_levels; n_common++)
3868 const struct ctables_level *level = &levels[n_common];
3869 if (level->type == CTL_CATEGORY)
3871 size_t var_idx = level->var_idx;
3872 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
3873 if (prev->axes[a].cvs[var_idx].category != c)
3875 else if (c->type != CCT_SUBTOTAL
3876 && c->type != CCT_TOTAL
3877 && c->type != CCT_POSTCOMPUTE
3878 && !value_equal (&prev->axes[a].cvs[var_idx].value,
3879 &cell->axes[a].cvs[var_idx].value,
3880 var_get_type (nest->vars[var_idx])))
3886 for (size_t k = n_common; k < n_levels; k++)
3888 const struct ctables_level *level = &levels[k];
3889 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
3890 if (level->type == CTL_SUMMARY)
3892 assert (k == n_levels - 1);
3894 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3895 for (size_t m = 0; m < specs->n; m++)
3897 int leaf = pivot_category_create_leaf (
3898 parent, pivot_value_new_text (specs->specs[m].label));
3905 const struct variable *var = nest->vars[level->var_idx];
3906 struct pivot_value *label;
3907 if (level->type == CTL_VAR)
3909 label = pivot_value_new_variable (var);
3910 label->variable.show = level->vlabel;
3912 else if (level->type == CTL_CATEGORY)
3914 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
3915 label = ctables_category_create_label (cv->category,
3921 if (k == n_levels - 1)
3922 prev_leaf = pivot_category_create_leaf (parent, label);
3924 groups[k] = pivot_category_create_group__ (parent, label);
3928 cell->axes[a].leaf = prev_leaf;
3935 for (size_t i = 0; i < t->n_sections; i++)
3937 struct ctables_section *s = &t->sections[i];
3939 struct ctables_cell *cell;
3940 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3945 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3946 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
3947 for (size_t j = 0; j < specs->n; j++)
3950 size_t n_dindexes = 0;
3952 if (summary_dimension)
3953 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
3955 if (categories_dimension)
3957 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
3958 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
3959 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
3960 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
3963 dindexes[n_dindexes++] = ctv->leaf;
3966 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3969 int leaf = cell->axes[a].leaf;
3970 if (a == t->summary_axis && !summary_dimension)
3972 dindexes[n_dindexes++] = leaf;
3975 const struct ctables_summary_spec *ss = &specs->specs[j];
3977 double d = (cell->postcompute
3978 ? ctables_cell_calculate_postcompute (s, cell)
3979 : ctables_summary_value (cell, &cell->summaries[j], ss));
3980 struct pivot_value *value;
3981 if (ct->hide_threshold != 0
3982 && d < ct->hide_threshold
3983 && (cell->postcompute
3985 : ctables_summary_function_is_count (ss->function)))
3987 value = pivot_value_new_user_text_nocopy (
3988 xasprintf ("<%d", ct->hide_threshold));
3990 else if (d == 0 && ct->zero)
3991 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
3992 else if (d == SYSMIS && ct->missing)
3993 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
3994 else if (specs->specs[j].is_ctables_format)
3996 char *s = data_out_stretchy (&(union value) { .f = d },
3998 &specs->specs[j].format,
3999 &ct->ctables_formats, NULL);
4000 value = pivot_value_new_user_text_nocopy (s);
4004 value = pivot_value_new_number (d);
4005 value->numeric.format = specs->specs[j].format;
4007 pivot_table_put (pt, dindexes, n_dindexes, value);
4012 pivot_table_submit (pt);
4016 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4018 enum pivot_axis_type label_pos = t->label_axis[a];
4022 t->clabels_from_axis = a;
4024 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4025 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4027 const struct ctables_stack *stack = &t->stacks[a];
4031 const struct ctables_nest *n0 = &stack->nests[0];
4033 const struct variable *v0 = n0->vars[n0->n - 1];
4034 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4035 t->clabels_example = v0;
4037 for (size_t i = 0; i < c0->n_cats; i++)
4038 if (c0->cats[i].type == CCT_FUNCTION)
4040 msg (SE, _("%s=%s is not allowed with sorting based "
4041 "on a summary function."),
4042 subcommand_name, pos_name);
4045 if (n0->n - 1 == n0->scale_idx)
4047 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4048 "but %s is a scale variable."),
4049 subcommand_name, pos_name, var_get_name (v0));
4053 for (size_t i = 1; i < stack->n; i++)
4055 const struct ctables_nest *ni = &stack->nests[i];
4057 const struct variable *vi = ni->vars[ni->n - 1];
4058 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4060 if (ni->n - 1 == ni->scale_idx)
4062 msg (SE, _("%s=%s requires the variables to be moved to be "
4063 "categorical, but %s is a scale variable."),
4064 subcommand_name, pos_name, var_get_name (vi));
4067 if (var_get_width (v0) != var_get_width (vi))
4069 msg (SE, _("%s=%s requires the variables to be "
4070 "moved to have the same width, but %s has "
4071 "width %d and %s has width %d."),
4072 subcommand_name, pos_name,
4073 var_get_name (v0), var_get_width (v0),
4074 var_get_name (vi), var_get_width (vi));
4077 if (!val_labs_equal (var_get_value_labels (v0),
4078 var_get_value_labels (vi)))
4080 msg (SE, _("%s=%s requires the variables to be "
4081 "moved to have the same value labels, but %s "
4082 "and %s have different value labels."),
4083 subcommand_name, pos_name,
4084 var_get_name (v0), var_get_name (vi));
4087 if (!ctables_categories_equal (c0, ci))
4089 msg (SE, _("%s=%s requires the variables to be "
4090 "moved to have the same category "
4091 "specifications, but %s and %s have different "
4092 "category specifications."),
4093 subcommand_name, pos_name,
4094 var_get_name (v0), var_get_name (vi));
4103 ctables_prepare_table (struct ctables_table *t)
4105 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4108 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4110 for (size_t j = 0; j < t->stacks[a].n; j++)
4112 struct ctables_nest *nest = &t->stacks[a].nests[j];
4113 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4115 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4116 nest->n_domains[dt] = 0;
4118 for (size_t k = 0; k < nest->n; k++)
4120 if (k == nest->scale_idx)
4129 if (a != PIVOT_AXIS_LAYER)
4136 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4137 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4138 : a == PIVOT_AXIS_ROW)
4140 if (k == nest->n - 1
4141 || (nest->scale_idx == nest->n - 1
4142 && k == nest->n - 2))
4148 if (a == PIVOT_AXIS_COLUMN)
4153 if (a == PIVOT_AXIS_ROW)
4158 nest->domains[dt][nest->n_domains[dt]++] = k;
4165 struct ctables_nest *nest = xmalloc (sizeof *nest);
4166 *nest = (struct ctables_nest) { .n = 0 };
4167 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4170 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4171 for (size_t i = 0; i < stack->n; i++)
4173 struct ctables_nest *nest = &stack->nests[i];
4174 if (!nest->specs[CSV_CELL].n)
4176 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4177 specs->specs = xmalloc (sizeof *specs->specs);
4180 enum ctables_summary_function function
4181 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4183 *specs->specs = (struct ctables_summary_spec) {
4184 .function = function,
4185 .format = ctables_summary_default_format (function, specs->var),
4186 .label = ctables_summary_default_label (function, 0),
4189 specs->var = nest->vars[0];
4191 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4192 &nest->specs[CSV_CELL]);
4194 else if (!nest->specs[CSV_TOTAL].n)
4195 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4196 &nest->specs[CSV_CELL]);
4198 if (t->ctables->smissing_listwise)
4200 struct variable **listwise_vars = NULL;
4202 size_t allocated = 0;
4204 for (size_t j = nest->group_head; j < stack->n; j++)
4206 const struct ctables_nest *other_nest = &stack->nests[j];
4207 if (other_nest->group_head != nest->group_head)
4210 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4213 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4214 sizeof *listwise_vars);
4215 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4218 for (size_t j = 0; j < N_CSVS; j++)
4220 nest->specs[j].listwise_vars = listwise_vars;
4221 nest->specs[j].n_listwise_vars = n;
4226 struct ctables_summary_spec_set *merged = &t->summary_specs;
4227 struct merge_item *items = xnmalloc (2 * stack->n, sizeof *items);
4229 for (size_t j = 0; j < stack->n; j++)
4231 const struct ctables_nest *nest = &stack->nests[j];
4233 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4234 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4239 struct merge_item min = items[0];
4240 for (size_t j = 1; j < n_left; j++)
4241 if (merge_item_compare_3way (&items[j], &min) < 0)
4244 if (merged->n >= merged->allocated)
4245 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4246 sizeof *merged->specs);
4247 merged->specs[merged->n++] = min.set->specs[min.ofs];
4249 for (size_t j = 0; j < n_left; )
4251 if (merge_item_compare_3way (&items[j], &min) == 0)
4253 struct merge_item *item = &items[j];
4254 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4255 if (++item->ofs >= item->set->n)
4257 items[j] = items[--n_left];
4266 for (size_t j = 0; j < merged->n; j++)
4267 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4269 for (size_t j = 0; j < stack->n; j++)
4271 const struct ctables_nest *nest = &stack->nests[j];
4272 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4274 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4275 for (size_t k = 0; k < specs->n; k++)
4276 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4277 specs->specs[k].axis_idx);
4283 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4284 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4288 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4289 enum pivot_axis_type a)
4291 struct ctables_stack *stack = &t->stacks[a];
4292 for (size_t i = 0; i < stack->n; i++)
4294 const struct ctables_nest *nest = &stack->nests[i];
4295 const struct variable *var = nest->vars[nest->n - 1];
4296 const union value *value = case_data (c, var);
4298 if (var_is_numeric (var) && value->f == SYSMIS)
4301 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4303 ctables_value_insert (t, value, var_get_width (var));
4308 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4310 const struct ctables_value *const *ap = a_;
4311 const struct ctables_value *const *bp = b_;
4312 const struct ctables_value *a = *ap;
4313 const struct ctables_value *b = *bp;
4314 const int *width = width_;
4315 return value_compare_3way (&a->value, &b->value, *width);
4319 ctables_sort_clabels_values (struct ctables_table *t)
4321 const struct variable *v0 = t->clabels_example;
4322 int width = var_get_width (v0);
4324 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4327 const struct val_labs *val_labs = var_get_value_labels (v0);
4328 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4329 vl = val_labs_next (val_labs, vl))
4330 if (ctables_categories_match (c0, &vl->value, v0))
4331 ctables_value_insert (t, &vl->value, width);
4334 size_t n = hmap_count (&t->clabels_values_map);
4335 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4337 struct ctables_value *clv;
4339 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4340 t->clabels_values[i++] = clv;
4341 t->n_clabels_values = n;
4344 sort (t->clabels_values, n, sizeof *t->clabels_values,
4345 compare_clabels_values_3way, &width);
4347 for (size_t i = 0; i < n; i++)
4348 t->clabels_values[i]->leaf = i;
4352 ctables_add_category_occurrences (const struct variable *var,
4353 struct hmap *occurrences,
4354 const struct ctables_categories *cats)
4356 const struct val_labs *val_labs = var_get_value_labels (var);
4358 for (size_t i = 0; i < cats->n_cats; i++)
4360 const struct ctables_category *c = &cats->cats[i];
4364 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4370 int width = var_get_width (var);
4372 value_init (&value, width);
4373 value_copy_buf_rpad (&value, width,
4374 CHAR_CAST (uint8_t *, c->string.string),
4375 c->string.length, ' ');
4376 ctables_add_occurrence (var, &value, occurrences);
4377 value_destroy (&value, width);
4382 assert (var_is_numeric (var));
4383 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4384 vl = val_labs_next (val_labs, vl))
4385 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
4386 ctables_add_occurrence (var, &vl->value, occurrences);
4390 assert (var_is_alpha (var));
4391 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4392 vl = val_labs_next (val_labs, vl))
4393 if (in_string_range (&vl->value, var, c->srange))
4394 ctables_add_occurrence (var, &vl->value, occurrences);
4398 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4399 vl = val_labs_next (val_labs, vl))
4400 if (var_is_value_missing (var, &vl->value))
4401 ctables_add_occurrence (var, &vl->value, occurrences);
4405 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4406 vl = val_labs_next (val_labs, vl))
4407 ctables_add_occurrence (var, &vl->value, occurrences);
4410 case CCT_POSTCOMPUTE:
4420 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4421 vl = val_labs_next (val_labs, vl))
4422 if (c->include_missing || !var_is_value_missing (var, &vl->value))
4423 ctables_add_occurrence (var, &vl->value, occurrences);
4426 case CCT_EXCLUDED_MISSING:
4433 ctables_section_recurse_add_empty_categories (
4434 struct ctables_section *s,
4435 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
4436 enum pivot_axis_type a, size_t a_idx)
4438 if (a >= PIVOT_N_AXES)
4439 ctables_cell_insert__ (s, c, cats);
4440 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
4441 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
4444 const struct variable *var = s->nests[a]->vars[a_idx];
4445 const struct ctables_categories *categories = s->table->categories[
4446 var_get_dict_index (var)];
4447 int width = var_get_width (var);
4448 const struct hmap *occurrences = &s->occurrences[a][a_idx];
4449 const struct ctables_occurrence *o;
4450 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4452 union value *value = case_data_rw (c, var);
4453 value_destroy (value, width);
4454 value_clone (value, &o->value, width);
4455 cats[a][a_idx] = ctables_categories_match (categories, value, var);
4456 assert (cats[a][a_idx] != NULL);
4457 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4460 for (size_t i = 0; i < categories->n_cats; i++)
4462 const struct ctables_category *cat = &categories->cats[i];
4463 if (cat->type == CCT_POSTCOMPUTE)
4465 cats[a][a_idx] = cat;
4466 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4473 ctables_section_add_empty_categories (struct ctables_section *s)
4475 bool show_empty = false;
4476 for (size_t a = 0; a < PIVOT_N_AXES; a++)
4478 for (size_t k = 0; k < s->nests[a]->n; k++)
4479 if (k != s->nests[a]->scale_idx)
4481 const struct variable *var = s->nests[a]->vars[k];
4482 const struct ctables_categories *cats = s->table->categories[
4483 var_get_dict_index (var)];
4484 if (cats->show_empty)
4487 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
4493 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
4494 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
4495 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
4500 ctables_execute (struct dataset *ds, struct ctables *ct)
4502 for (size_t i = 0; i < ct->n_tables; i++)
4504 struct ctables_table *t = ct->tables[i];
4505 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
4506 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
4507 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
4508 sizeof *t->sections);
4509 size_t ix[PIVOT_N_AXES];
4510 ctables_table_add_section (t, 0, ix);
4513 struct casereader *input = proc_open (ds);
4514 bool warn_on_invalid = true;
4515 for (struct ccase *c = casereader_read (input); c;
4516 case_unref (c), c = casereader_read (input))
4518 double d_weight = dict_get_case_weight (dataset_dict (ds), c,
4520 double e_weight = (ct->e_weight
4521 ? var_force_valid_weight (ct->e_weight,
4522 case_num (c, ct->e_weight),
4526 for (size_t i = 0; i < ct->n_tables; i++)
4528 struct ctables_table *t = ct->tables[i];
4530 for (size_t j = 0; j < t->n_sections; j++)
4531 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
4533 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4534 if (t->label_axis[a] != a)
4535 ctables_insert_clabels_values (t, c, a);
4538 casereader_destroy (input);
4540 for (size_t i = 0; i < ct->n_tables; i++)
4542 struct ctables_table *t = ct->tables[i];
4544 if (t->clabels_example)
4545 ctables_sort_clabels_values (t);
4547 for (size_t j = 0; j < t->n_sections; j++)
4548 ctables_section_add_empty_categories (&t->sections[j]);
4550 ctables_table_output (ct, ct->tables[i]);
4552 return proc_commit (ds);
4557 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
4558 struct dictionary *);
4561 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
4567 case CTPO_CAT_STRING:
4568 ss_dealloc (&e->string);
4577 for (size_t i = 0; i < 2; i++)
4578 ctables_pcexpr_destroy (e->subs[i]);
4582 case CTPO_CAT_NUMBER:
4583 case CTPO_CAT_RANGE:
4584 case CTPO_CAT_MISSING:
4585 case CTPO_CAT_OTHERNM:
4586 case CTPO_CAT_SUBTOTAL:
4587 case CTPO_CAT_TOTAL:
4591 msg_location_destroy (e->location);
4596 static struct ctables_pcexpr *
4597 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
4598 struct ctables_pcexpr *sub0,
4599 struct ctables_pcexpr *sub1)
4601 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4602 *e = (struct ctables_pcexpr) {
4604 .subs = { sub0, sub1 },
4605 .location = msg_location_merged (sub0->location, sub1->location),
4610 /* How to parse an operator. */
4613 enum token_type token;
4614 enum ctables_postcompute_op op;
4617 static const struct operator *
4618 match_operator (struct lexer *lexer, const struct operator ops[], size_t n_ops)
4620 for (const struct operator *op = ops; op < ops + n_ops; op++)
4621 if (lex_token (lexer) == op->token)
4623 if (op->token != T_NEG_NUM)
4632 static struct ctables_pcexpr *
4633 parse_binary_operators__ (struct lexer *lexer, struct dictionary *dict,
4634 const struct operator ops[], size_t n_ops,
4635 parse_recursively_func *parse_next_level,
4636 const char *chain_warning,
4637 struct ctables_pcexpr *lhs)
4639 for (int op_count = 0; ; op_count++)
4641 const struct operator *op = match_operator (lexer, ops, n_ops);
4644 if (op_count > 1 && chain_warning)
4645 msg_at (SW, lhs->location, "%s", chain_warning);
4650 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
4653 ctables_pcexpr_destroy (lhs);
4657 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
4661 static struct ctables_pcexpr *
4662 parse_binary_operators (struct lexer *lexer, struct dictionary *dict,
4663 const struct operator ops[], size_t n_ops,
4664 parse_recursively_func *parse_next_level,
4665 const char *chain_warning)
4667 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
4671 return parse_binary_operators__ (lexer, dict, ops, n_ops, parse_next_level,
4672 chain_warning, lhs);
4675 static struct ctables_pcexpr *parse_add (struct lexer *, struct dictionary *);
4677 static struct ctables_pcexpr
4678 ctpo_cat_range (double low, double high)
4680 return (struct ctables_pcexpr) {
4681 .op = CTPO_CAT_RANGE,
4682 .range = { low, high },
4686 static struct ctables_pcexpr *
4687 parse_primary (struct lexer *lexer, struct dictionary *dict)
4689 int start_ofs = lex_ofs (lexer);
4690 struct ctables_pcexpr e;
4691 if (lex_is_number (lexer))
4693 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
4694 .number = lex_number (lexer) };
4697 else if (lex_match_id (lexer, "MISSING"))
4698 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
4699 else if (lex_match_id (lexer, "OTHERNM"))
4700 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
4701 else if (lex_match_id (lexer, "TOTAL"))
4702 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
4703 else if (lex_match_id (lexer, "SUBTOTAL"))
4705 size_t subtotal_index = 0;
4706 if (lex_match (lexer, T_LBRACK))
4708 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
4710 subtotal_index = lex_integer (lexer);
4712 if (!lex_force_match (lexer, T_RBRACK))
4715 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
4716 .subtotal_index = subtotal_index };
4718 else if (lex_match (lexer, T_LBRACK))
4720 if (lex_match_id (lexer, "LO"))
4722 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
4724 e = ctpo_cat_range (-DBL_MAX, lex_number (lexer));
4727 else if (lex_is_number (lexer))
4729 double number = lex_number (lexer);
4731 if (lex_match_id (lexer, "THRU"))
4733 if (lex_match_id (lexer, "HI"))
4734 e = ctpo_cat_range (number, DBL_MAX);
4737 if (!lex_force_num (lexer))
4739 e = ctpo_cat_range (number, lex_number (lexer));
4744 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
4747 else if (lex_is_string (lexer))
4749 struct substring s = recode_substring_pool (
4750 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
4751 ss_rtrim (&s, ss_cstr (" "));
4753 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
4758 lex_error (lexer, NULL);
4762 if (!lex_force_match (lexer, T_RBRACK))
4764 if (e.op == CTPO_CAT_STRING)
4765 ss_dealloc (&e.string);
4769 else if (lex_match (lexer, T_LPAREN))
4771 struct ctables_pcexpr *ep = parse_add (lexer, dict);
4774 if (!lex_force_match (lexer, T_RPAREN))
4776 ctables_pcexpr_destroy (ep);
4783 lex_error (lexer, NULL);
4787 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4788 return xmemdup (&e, sizeof e);
4791 static struct ctables_pcexpr *
4792 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
4793 struct lexer *lexer, int start_ofs)
4795 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4796 *e = (struct ctables_pcexpr) {
4799 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
4804 static struct ctables_pcexpr *
4805 parse_exp (struct lexer *lexer, struct dictionary *dict)
4807 static const struct operator op = { T_EXP, CTPO_POW };
4809 const char *chain_warning =
4810 _("The exponentiation operator (`**') is left-associative: "
4811 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
4812 "To disable this warning, insert parentheses.");
4814 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
4815 return parse_binary_operators (lexer, dict, &op, 1,
4816 parse_primary, chain_warning);
4818 /* Special case for situations like "-5**6", which must be parsed as
4821 int start_ofs = lex_ofs (lexer);
4822 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
4823 *lhs = (struct ctables_pcexpr) {
4824 .op = CTPO_CONSTANT,
4825 .number = -lex_tokval (lexer),
4826 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
4830 struct ctables_pcexpr *node = parse_binary_operators__ (
4831 lexer, dict, &op, 1, parse_primary, chain_warning, lhs);
4835 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
4838 /* Parses the unary minus level. */
4839 static struct ctables_pcexpr *
4840 parse_neg (struct lexer *lexer, struct dictionary *dict)
4842 int start_ofs = lex_ofs (lexer);
4843 if (!lex_match (lexer, T_DASH))
4844 return parse_exp (lexer, dict);
4846 struct ctables_pcexpr *inner = parse_neg (lexer, dict);
4850 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
4853 /* Parses the multiplication and division level. */
4854 static struct ctables_pcexpr *
4855 parse_mul (struct lexer *lexer, struct dictionary *dict)
4857 static const struct operator ops[] =
4859 { T_ASTERISK, CTPO_MUL },
4860 { T_SLASH, CTPO_DIV },
4863 return parse_binary_operators (lexer, dict, ops, sizeof ops / sizeof *ops,
4867 /* Parses the addition and subtraction level. */
4868 static struct ctables_pcexpr *
4869 parse_add (struct lexer *lexer, struct dictionary *dict)
4871 static const struct operator ops[] =
4873 { T_PLUS, CTPO_ADD },
4874 { T_DASH, CTPO_SUB },
4875 { T_NEG_NUM, CTPO_ADD },
4878 return parse_binary_operators (lexer, dict, ops, sizeof ops / sizeof *ops,
4882 static struct ctables_postcompute *
4883 ctables_find_postcompute (struct ctables *ct, const char *name)
4885 struct ctables_postcompute *pc;
4886 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
4887 utf8_hash_case_string (name, 0), &ct->postcomputes)
4888 if (!utf8_strcasecmp (pc->name, name))
4894 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
4897 int pcompute_start = lex_ofs (lexer) - 1;
4899 if (!lex_force_match (lexer, T_AND) || !lex_force_id (lexer))
4902 char *name = ss_xstrdup (lex_tokss (lexer));
4905 if (!lex_force_match (lexer, T_EQUALS)
4906 || !lex_force_match_id (lexer, "EXPR")
4907 || !lex_force_match (lexer, T_LPAREN))
4913 int expr_start = lex_ofs (lexer);
4914 struct ctables_pcexpr *expr = parse_add (lexer, dict);
4915 int expr_end = lex_ofs (lexer) - 1;
4916 if (!expr || !lex_force_match (lexer, T_RPAREN))
4921 int pcompute_end = lex_ofs (lexer) - 1;
4923 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
4926 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
4929 msg_at (SW, location, _("New definition of &%s will override the "
4930 "previous definition."),
4932 msg_at (SN, pc->location, _("This is the previous definition."));
4934 ctables_pcexpr_destroy (pc->expr);
4935 msg_location_destroy (pc->location);
4940 pc = xmalloc (sizeof *pc);
4941 *pc = (struct ctables_postcompute) { .name = name };
4942 hmap_insert (&ct->postcomputes, &pc->hmap_node,
4943 utf8_hash_case_string (pc->name, 0));
4946 pc->location = location;
4948 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
4953 ctables_parse_pproperties_format (struct lexer *lexer,
4954 struct ctables_summary_spec_set *sss)
4956 *sss = (struct ctables_summary_spec_set) { .n = 0 };
4958 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
4959 && !(lex_token (lexer) == T_ID
4960 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
4961 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
4962 lex_tokss (lexer)))))
4964 /* Parse function. */
4965 enum ctables_summary_function function;
4966 if (!parse_ctables_summary_function (lexer, &function))
4969 /* Parse percentile. */
4970 double percentile = 0;
4971 if (function == CTSF_PTILE)
4973 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
4975 percentile = lex_number (lexer);
4980 struct fmt_spec format;
4981 if (!parse_format_specifier (lexer, &format)
4982 || !fmt_check_output (&format)
4983 || !fmt_check_type_compat (&format, VAL_NUMERIC))
4986 if (sss->n >= sss->allocated)
4987 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
4988 sizeof *sss->specs);
4989 sss->specs[sss->n++] = (struct ctables_summary_spec) {
4990 .function = function,
4991 .percentile = percentile,
4998 ctables_summary_spec_set_uninit (sss);
5003 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5005 struct ctables_postcompute **pcs = NULL;
5007 size_t allocated_pcs = 0;
5009 while (lex_match (lexer, T_AND))
5011 if (!lex_force_id (lexer))
5013 struct ctables_postcompute *pc
5014 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5017 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5022 if (n_pcs >= allocated_pcs)
5023 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5027 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5029 if (lex_match_id (lexer, "LABEL"))
5031 lex_match (lexer, T_EQUALS);
5032 if (!lex_force_string (lexer))
5035 for (size_t i = 0; i < n_pcs; i++)
5037 free (pcs[i]->label);
5038 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5043 else if (lex_match_id (lexer, "FORMAT"))
5045 lex_match (lexer, T_EQUALS);
5047 struct ctables_summary_spec_set sss;
5048 if (!ctables_parse_pproperties_format (lexer, &sss))
5051 for (size_t i = 0; i < n_pcs; i++)
5054 ctables_summary_spec_set_uninit (pcs[i]->specs);
5056 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5057 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5059 ctables_summary_spec_set_uninit (&sss);
5061 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5063 lex_match (lexer, T_EQUALS);
5064 bool hide_source_cats;
5065 if (!parse_bool (lexer, &hide_source_cats))
5067 for (size_t i = 0; i < n_pcs; i++)
5068 pcs[i]->hide_source_cats = hide_source_cats;
5072 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5085 put_strftime (struct string *out, time_t now, const char *format)
5087 const struct tm *tm = localtime (&now);
5089 strftime (value, sizeof value, format, tm);
5090 ds_put_cstr (out, value);
5094 skip_prefix (struct substring *s, struct substring prefix)
5096 if (ss_starts_with (*s, prefix))
5098 ss_advance (s, prefix.length);
5106 put_table_expression (struct string *out, struct lexer *lexer,
5107 struct dictionary *dict, int expr_start, int expr_end)
5110 for (int ofs = expr_start; ofs < expr_end; ofs++)
5112 const struct token *t = lex_ofs_token (lexer, ofs);
5113 if (t->type == T_LBRACK)
5115 else if (t->type == T_RBRACK && nest > 0)
5121 else if (t->type == T_ID)
5123 const struct variable *var
5124 = dict_lookup_var (dict, t->string.string);
5125 const char *label = var ? var_get_label (var) : NULL;
5126 ds_put_cstr (out, label ? label : t->string.string);
5130 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5131 ds_put_byte (out, ' ');
5133 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5134 ds_put_cstr (out, repr);
5137 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5138 ds_put_byte (out, ' ');
5144 put_title_text (struct string *out, struct substring in, time_t now,
5145 struct lexer *lexer, struct dictionary *dict,
5146 int expr_start, int expr_end)
5150 size_t chunk = ss_find_byte (in, ')');
5151 ds_put_substring (out, ss_head (in, chunk));
5152 ss_advance (&in, chunk);
5153 if (ss_is_empty (in))
5156 if (skip_prefix (&in, ss_cstr (")DATE")))
5157 put_strftime (out, now, "%x");
5158 else if (skip_prefix (&in, ss_cstr (")TIME")))
5159 put_strftime (out, now, "%X");
5160 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5161 put_table_expression (out, lexer, dict, expr_start, expr_end);
5164 ds_put_byte (out, ')');
5165 ss_advance (&in, 1);
5171 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5173 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5174 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5175 enum settings_value_show tvars = settings_get_show_variables ();
5176 for (size_t i = 0; i < n_vars; i++)
5177 vlabels[i] = (enum ctables_vlabel) tvars;
5179 struct pivot_table_look *look = pivot_table_look_unshare (
5180 pivot_table_look_ref (pivot_table_look_get_default ()));
5181 look->omit_empty = false;
5183 struct ctables *ct = xmalloc (sizeof *ct);
5184 *ct = (struct ctables) {
5185 .dict = dataset_dict (ds),
5187 .ctables_formats = FMT_SETTINGS_INIT,
5189 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5192 time_t now = time (NULL);
5197 const char *dot_string;
5198 const char *comma_string;
5200 static const struct ctf ctfs[4] = {
5201 { CTEF_NEGPAREN, "(,,,)", "(...)" },
5202 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
5203 { CTEF_PAREN, "-,(,),", "-.(.)." },
5204 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
5206 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
5207 for (size_t i = 0; i < 4; i++)
5209 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
5210 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
5211 fmt_number_style_from_string (s));
5214 if (!lex_force_match (lexer, T_SLASH))
5217 while (!lex_match_id (lexer, "TABLE"))
5219 if (lex_match_id (lexer, "FORMAT"))
5221 double widths[2] = { SYSMIS, SYSMIS };
5222 double units_per_inch = 72.0;
5224 while (lex_token (lexer) != T_SLASH)
5226 if (lex_match_id (lexer, "MINCOLWIDTH"))
5228 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
5231 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
5233 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
5236 else if (lex_match_id (lexer, "UNITS"))
5238 lex_match (lexer, T_EQUALS);
5239 if (lex_match_id (lexer, "POINTS"))
5240 units_per_inch = 72.0;
5241 else if (lex_match_id (lexer, "INCHES"))
5242 units_per_inch = 1.0;
5243 else if (lex_match_id (lexer, "CM"))
5244 units_per_inch = 2.54;
5247 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
5251 else if (lex_match_id (lexer, "EMPTY"))
5256 lex_match (lexer, T_EQUALS);
5257 if (lex_match_id (lexer, "ZERO"))
5259 /* Nothing to do. */
5261 else if (lex_match_id (lexer, "BLANK"))
5262 ct->zero = xstrdup ("");
5263 else if (lex_force_string (lexer))
5265 ct->zero = ss_xstrdup (lex_tokss (lexer));
5271 else if (lex_match_id (lexer, "MISSING"))
5273 lex_match (lexer, T_EQUALS);
5274 if (!lex_force_string (lexer))
5278 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
5279 ? ss_xstrdup (lex_tokss (lexer))
5285 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
5286 "UNITS", "EMPTY", "MISSING");
5291 if (widths[0] != SYSMIS && widths[1] != SYSMIS
5292 && widths[0] > widths[1])
5294 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
5298 for (size_t i = 0; i < 2; i++)
5299 if (widths[i] != SYSMIS)
5301 int *wr = ct->look->width_ranges[TABLE_HORZ];
5302 wr[i] = widths[i] / units_per_inch * 96.0;
5307 else if (lex_match_id (lexer, "VLABELS"))
5309 if (!lex_force_match_id (lexer, "VARIABLES"))
5311 lex_match (lexer, T_EQUALS);
5313 struct variable **vars;
5315 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
5319 if (!lex_force_match_id (lexer, "DISPLAY"))
5324 lex_match (lexer, T_EQUALS);
5326 enum ctables_vlabel vlabel;
5327 if (lex_match_id (lexer, "DEFAULT"))
5328 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
5329 else if (lex_match_id (lexer, "NAME"))
5331 else if (lex_match_id (lexer, "LABEL"))
5332 vlabel = CTVL_LABEL;
5333 else if (lex_match_id (lexer, "BOTH"))
5335 else if (lex_match_id (lexer, "NONE"))
5339 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
5345 for (size_t i = 0; i < n_vars; i++)
5346 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
5349 else if (lex_match_id (lexer, "MRSETS"))
5351 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
5353 lex_match (lexer, T_EQUALS);
5354 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
5357 else if (lex_match_id (lexer, "SMISSING"))
5359 if (lex_match_id (lexer, "VARIABLE"))
5360 ct->smissing_listwise = false;
5361 else if (lex_match_id (lexer, "LISTWISE"))
5362 ct->smissing_listwise = true;
5365 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
5369 else if (lex_match_id (lexer, "PCOMPUTE"))
5371 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
5374 else if (lex_match_id (lexer, "PPROPERTIES"))
5376 if (!ctables_parse_pproperties (lexer, ct))
5379 else if (lex_match_id (lexer, "WEIGHT"))
5381 if (!lex_force_match_id (lexer, "VARIABLE"))
5383 lex_match (lexer, T_EQUALS);
5384 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
5388 else if (lex_match_id (lexer, " HIDESMALLCOUNTS"))
5390 if (lex_match_id (lexer, "COUNT"))
5392 lex_match (lexer, T_EQUALS);
5393 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
5396 ct->hide_threshold = lex_integer (lexer);
5399 else if (ct->hide_threshold == 0)
5400 ct->hide_threshold = 5;
5404 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
5405 "SMISSING", "PCOMPUTE", "PPROPERTIES",
5406 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
5410 if (!lex_force_match (lexer, T_SLASH))
5414 size_t allocated_tables = 0;
5417 if (ct->n_tables >= allocated_tables)
5418 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
5419 sizeof *ct->tables);
5421 struct ctables_category *cat = xmalloc (sizeof *cat);
5422 *cat = (struct ctables_category) {
5424 .include_missing = false,
5425 .sort_ascending = true,
5428 struct ctables_categories *c = xmalloc (sizeof *c);
5429 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5430 *c = (struct ctables_categories) {
5437 struct ctables_categories **categories = xnmalloc (n_vars,
5438 sizeof *categories);
5439 for (size_t i = 0; i < n_vars; i++)
5442 struct ctables_table *t = xmalloc (sizeof *t);
5443 *t = (struct ctables_table) {
5445 .slabels_axis = PIVOT_AXIS_COLUMN,
5446 .slabels_visible = true,
5447 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
5449 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
5450 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
5451 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
5453 .clabels_from_axis = PIVOT_AXIS_LAYER,
5454 .categories = categories,
5455 .n_categories = n_vars,
5458 ct->tables[ct->n_tables++] = t;
5460 lex_match (lexer, T_EQUALS);
5461 int expr_start = lex_ofs (lexer);
5462 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
5464 if (lex_match (lexer, T_BY))
5466 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5467 ct, t, PIVOT_AXIS_COLUMN))
5470 if (lex_match (lexer, T_BY))
5472 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5473 ct, t, PIVOT_AXIS_LAYER))
5477 int expr_end = lex_ofs (lexer);
5479 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
5480 && !t->axes[PIVOT_AXIS_LAYER])
5482 lex_error (lexer, _("At least one variable must be specified."));
5486 const struct ctables_axis *scales[PIVOT_N_AXES];
5487 size_t n_scales = 0;
5488 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5490 scales[a] = find_scale (t->axes[a]);
5496 msg (SE, _("Scale variables may appear only on one axis."));
5497 if (scales[PIVOT_AXIS_ROW])
5498 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
5499 _("This scale variable appears on the rows axis."));
5500 if (scales[PIVOT_AXIS_COLUMN])
5501 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
5502 _("This scale variable appears on the columns axis."));
5503 if (scales[PIVOT_AXIS_LAYER])
5504 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
5505 _("This scale variable appears on the layer axis."));
5509 const struct ctables_axis *summaries[PIVOT_N_AXES];
5510 size_t n_summaries = 0;
5511 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5513 summaries[a] = (scales[a]
5515 : find_categorical_summary_spec (t->axes[a]));
5519 if (n_summaries > 1)
5521 msg (SE, _("Summaries may appear only on one axis."));
5522 if (summaries[PIVOT_AXIS_ROW])
5523 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
5524 _("This variable on the rows axis has a summary."));
5525 if (summaries[PIVOT_AXIS_COLUMN])
5526 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
5527 _("This variable on the columns axis has a summary."));
5528 if (summaries[PIVOT_AXIS_LAYER])
5529 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
5530 _("This variable on the layers axis has a summary."));
5533 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5534 if (n_summaries ? summaries[a] : t->axes[a])
5536 t->summary_axis = a;
5540 if (lex_token (lexer) == T_ENDCMD)
5542 if (!ctables_prepare_table (t))
5546 if (!lex_force_match (lexer, T_SLASH))
5549 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
5551 if (lex_match_id (lexer, "SLABELS"))
5553 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5555 if (lex_match_id (lexer, "POSITION"))
5557 lex_match (lexer, T_EQUALS);
5558 if (lex_match_id (lexer, "COLUMN"))
5559 t->slabels_axis = PIVOT_AXIS_COLUMN;
5560 else if (lex_match_id (lexer, "ROW"))
5561 t->slabels_axis = PIVOT_AXIS_ROW;
5562 else if (lex_match_id (lexer, "LAYER"))
5563 t->slabels_axis = PIVOT_AXIS_LAYER;
5566 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
5570 else if (lex_match_id (lexer, "VISIBLE"))
5572 lex_match (lexer, T_EQUALS);
5573 if (!parse_bool (lexer, &t->slabels_visible))
5578 lex_error_expecting (lexer, "POSITION", "VISIBLE");
5583 else if (lex_match_id (lexer, "CLABELS"))
5585 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5587 if (lex_match_id (lexer, "AUTO"))
5589 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
5590 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
5592 else if (lex_match_id (lexer, "ROWLABELS"))
5594 lex_match (lexer, T_EQUALS);
5595 if (lex_match_id (lexer, "OPPOSITE"))
5596 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
5597 else if (lex_match_id (lexer, "LAYER"))
5598 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
5601 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5605 else if (lex_match_id (lexer, "COLLABELS"))
5607 lex_match (lexer, T_EQUALS);
5608 if (lex_match_id (lexer, "OPPOSITE"))
5609 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
5610 else if (lex_match_id (lexer, "LAYER"))
5611 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
5614 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5620 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
5626 else if (lex_match_id (lexer, "CRITERIA"))
5628 if (!lex_force_match_id (lexer, "CILEVEL"))
5630 lex_match (lexer, T_EQUALS);
5632 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
5634 t->cilevel = lex_number (lexer);
5637 else if (lex_match_id (lexer, "CATEGORIES"))
5639 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
5643 else if (lex_match_id (lexer, "TITLES"))
5648 if (lex_match_id (lexer, "CAPTION"))
5649 textp = &t->caption;
5650 else if (lex_match_id (lexer, "CORNER"))
5652 else if (lex_match_id (lexer, "TITLE"))
5656 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
5659 lex_match (lexer, T_EQUALS);
5661 struct string s = DS_EMPTY_INITIALIZER;
5662 while (lex_is_string (lexer))
5664 if (!ds_is_empty (&s))
5665 ds_put_byte (&s, ' ');
5666 put_title_text (&s, lex_tokss (lexer), now,
5667 lexer, dataset_dict (ds),
5668 expr_start, expr_end);
5672 *textp = ds_steal_cstr (&s);
5674 while (lex_token (lexer) != T_SLASH
5675 && lex_token (lexer) != T_ENDCMD);
5677 else if (lex_match_id (lexer, "SIGTEST"))
5681 t->chisq = xmalloc (sizeof *t->chisq);
5682 *t->chisq = (struct ctables_chisq) {
5684 .include_mrsets = true,
5685 .all_visible = true,
5691 if (lex_match_id (lexer, "TYPE"))
5693 lex_match (lexer, T_EQUALS);
5694 if (!lex_force_match_id (lexer, "CHISQUARE"))
5697 else if (lex_match_id (lexer, "ALPHA"))
5699 lex_match (lexer, T_EQUALS);
5700 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
5702 t->chisq->alpha = lex_number (lexer);
5705 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5707 lex_match (lexer, T_EQUALS);
5708 if (parse_bool (lexer, &t->chisq->include_mrsets))
5711 else if (lex_match_id (lexer, "CATEGORIES"))
5713 lex_match (lexer, T_EQUALS);
5714 if (lex_match_id (lexer, "ALLVISIBLE"))
5715 t->chisq->all_visible = true;
5716 else if (lex_match_id (lexer, "SUBTOTALS"))
5717 t->chisq->all_visible = false;
5720 lex_error_expecting (lexer,
5721 "ALLVISIBLE", "SUBTOTALS");
5727 lex_error_expecting (lexer, "TYPE", "ALPHA",
5728 "INCLUDEMRSETS", "CATEGORIES");
5732 while (lex_token (lexer) != T_SLASH
5733 && lex_token (lexer) != T_ENDCMD);
5735 else if (lex_match_id (lexer, "COMPARETEST"))
5739 t->pairwise = xmalloc (sizeof *t->pairwise);
5740 *t->pairwise = (struct ctables_pairwise) {
5742 .alpha = { .05, .05 },
5743 .adjust = BONFERRONI,
5744 .include_mrsets = true,
5745 .meansvariance_allcats = true,
5746 .all_visible = true,
5755 if (lex_match_id (lexer, "TYPE"))
5757 lex_match (lexer, T_EQUALS);
5758 if (lex_match_id (lexer, "PROP"))
5759 t->pairwise->type = PROP;
5760 else if (lex_match_id (lexer, "MEAN"))
5761 t->pairwise->type = MEAN;
5764 lex_error_expecting (lexer, "PROP", "MEAN");
5768 else if (lex_match_id (lexer, "ALPHA"))
5770 lex_match (lexer, T_EQUALS);
5772 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5774 double a0 = lex_number (lexer);
5777 lex_match (lexer, T_COMMA);
5778 if (lex_is_number (lexer))
5780 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5782 double a1 = lex_number (lexer);
5785 t->pairwise->alpha[0] = MIN (a0, a1);
5786 t->pairwise->alpha[1] = MAX (a0, a1);
5789 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
5791 else if (lex_match_id (lexer, "ADJUST"))
5793 lex_match (lexer, T_EQUALS);
5794 if (lex_match_id (lexer, "BONFERRONI"))
5795 t->pairwise->adjust = BONFERRONI;
5796 else if (lex_match_id (lexer, "BH"))
5797 t->pairwise->adjust = BH;
5798 else if (lex_match_id (lexer, "NONE"))
5799 t->pairwise->adjust = 0;
5802 lex_error_expecting (lexer, "BONFERRONI", "BH",
5807 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5809 lex_match (lexer, T_EQUALS);
5810 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
5813 else if (lex_match_id (lexer, "MEANSVARIANCE"))
5815 lex_match (lexer, T_EQUALS);
5816 if (lex_match_id (lexer, "ALLCATS"))
5817 t->pairwise->meansvariance_allcats = true;
5818 else if (lex_match_id (lexer, "TESTEDCATS"))
5819 t->pairwise->meansvariance_allcats = false;
5822 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
5826 else if (lex_match_id (lexer, "CATEGORIES"))
5828 lex_match (lexer, T_EQUALS);
5829 if (lex_match_id (lexer, "ALLVISIBLE"))
5830 t->pairwise->all_visible = true;
5831 else if (lex_match_id (lexer, "SUBTOTALS"))
5832 t->pairwise->all_visible = false;
5835 lex_error_expecting (lexer, "ALLVISIBLE",
5840 else if (lex_match_id (lexer, "MERGE"))
5842 lex_match (lexer, T_EQUALS);
5843 if (!parse_bool (lexer, &t->pairwise->merge))
5846 else if (lex_match_id (lexer, "STYLE"))
5848 lex_match (lexer, T_EQUALS);
5849 if (lex_match_id (lexer, "APA"))
5850 t->pairwise->apa_style = true;
5851 else if (lex_match_id (lexer, "SIMPLE"))
5852 t->pairwise->apa_style = false;
5855 lex_error_expecting (lexer, "APA", "SIMPLE");
5859 else if (lex_match_id (lexer, "SHOWSIG"))
5861 lex_match (lexer, T_EQUALS);
5862 if (!parse_bool (lexer, &t->pairwise->show_sig))
5867 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
5868 "INCLUDEMRSETS", "MEANSVARIANCE",
5869 "CATEGORIES", "MERGE", "STYLE",
5874 while (lex_token (lexer) != T_SLASH
5875 && lex_token (lexer) != T_ENDCMD);
5879 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
5880 "CRITERIA", "CATEGORIES", "TITLES",
5881 "SIGTEST", "COMPARETEST");
5885 if (!lex_match (lexer, T_SLASH))
5889 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
5890 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
5892 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
5896 if (!ctables_prepare_table (t))
5899 while (lex_token (lexer) != T_ENDCMD);
5901 bool ok = ctables_execute (ds, ct);
5902 ctables_destroy (ct);
5903 return ok ? CMD_SUCCESS : CMD_FAILURE;
5906 ctables_destroy (ct);