1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casereader.h"
23 #include "data/casewriter.h"
24 #include "data/data-in.h"
25 #include "data/data-out.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/mrset.h"
29 #include "data/subcase.h"
30 #include "data/value-labels.h"
31 #include "language/command.h"
32 #include "language/lexer/format-parser.h"
33 #include "language/lexer/lexer.h"
34 #include "language/lexer/token.h"
35 #include "language/lexer/variable-parser.h"
36 #include "libpspp/array.h"
37 #include "libpspp/assertion.h"
38 #include "libpspp/hash-functions.h"
39 #include "libpspp/hmap.h"
40 #include "libpspp/i18n.h"
41 #include "libpspp/message.h"
42 #include "libpspp/string-array.h"
43 #include "math/mode.h"
44 #include "math/moments.h"
45 #include "math/percentiles.h"
46 #include "math/sort.h"
47 #include "output/pivot-table.h"
49 #include "gl/minmax.h"
50 #include "gl/xalloc.h"
53 #define _(msgid) gettext (msgid)
54 #define N_(msgid) (msgid)
58 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
59 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
60 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
61 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
65 - unweighted summaries (U*)
66 - lower confidence limits (*.LCL)
67 - upper confidence limits (*.UCL)
68 - standard error (*.SE)
71 /* All variables. */ \
72 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
73 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
74 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
75 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
76 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
94 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
96 /* Scale variables, totals, and subtotals. */ \
97 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
98 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
99 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
100 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
101 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
102 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
103 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
104 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
105 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
106 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
107 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
108 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
109 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
110 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
111 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
112 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
113 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
114 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
115 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
116 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
117 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
118 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
119 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
121 #if 0 /* Multiple response sets not yet implemented. */
122 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
123 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
124 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
125 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
126 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
127 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
128 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
129 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
130 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
131 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
132 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
133 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
134 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
135 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
136 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
137 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
138 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
139 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
140 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
141 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
142 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
143 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
146 enum ctables_summary_function
148 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
154 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
155 N_CTSF_FUNCTIONS = SUMMARIES
159 static bool ctables_summary_function_is_count (enum ctables_summary_function);
161 enum ctables_domain_type
163 /* Within a section, where stacked variables divide one section from
165 CTDT_TABLE, /* All layers of a whole section. */
166 CTDT_LAYER, /* One layer within a section. */
167 CTDT_LAYERROW, /* Row in one layer within a section. */
168 CTDT_LAYERCOL, /* Column in one layer within a section. */
170 /* Within a subtable, where a subtable pairs an innermost row variable with
171 an innermost column variable within a single layer. */
172 CTDT_SUBTABLE, /* Whole subtable. */
173 CTDT_ROW, /* Row within a subtable. */
174 CTDT_COL, /* Column within a subtable. */
178 struct ctables_domain
180 struct hmap_node node;
182 const struct ctables_cell *example;
184 double d_valid; /* Dictionary weight. */
187 double e_valid; /* Effective weight */
192 enum ctables_summary_variant
201 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
202 all the axes (except the scalar variable, if any). */
203 struct hmap_node node;
205 /* The domains that contain this cell. */
206 uint32_t omit_domains;
207 struct ctables_domain *domains[N_CTDTS];
212 enum ctables_summary_variant sv;
214 struct ctables_cell_axis
216 struct ctables_cell_value
218 const struct ctables_category *category;
226 union ctables_summary *summaries;
233 const struct dictionary *dict;
234 struct pivot_table_look *look;
236 /* CTABLES has a number of extra formats that we implement via custom
237 currency specifications on an alternate fmt_settings. */
238 #define CTEF_NEGPAREN FMT_CCA
239 #define CTEF_NEQUAL FMT_CCB
240 #define CTEF_PAREN FMT_CCC
241 #define CTEF_PCTPAREN FMT_CCD
242 struct fmt_settings ctables_formats;
244 /* If this is NULL, zeros are displayed using the normal print format.
245 Otherwise, this string is displayed. */
248 /* If this is NULL, missing values are displayed using the normal print
249 format. Otherwise, this string is displayed. */
252 /* Indexed by variable dictionary index. */
253 enum ctables_vlabel *vlabels;
255 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
257 bool mrsets_count_duplicates; /* MRSETS. */
258 bool smissing_listwise; /* SMISSING. */
259 struct variable *e_weight; /* WEIGHT. */
260 int hide_threshold; /* HIDESMALLCOUNTS. */
262 struct ctables_table **tables;
266 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
269 struct ctables_postcompute
271 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
272 char *name; /* Name, without leading &. */
274 struct msg_location *location; /* Location of definition. */
275 struct ctables_pcexpr *expr;
277 struct ctables_summary_spec_set *specs;
278 bool hide_source_cats;
281 struct ctables_pcexpr
291 enum ctables_postcompute_op
294 CTPO_CONSTANT, /* 5 */
295 CTPO_CAT_NUMBER, /* [5] */
296 CTPO_CAT_STRING, /* ["STRING"] */
297 CTPO_CAT_RANGE, /* [LO THRU 5] */
298 CTPO_CAT_MISSING, /* MISSING */
299 CTPO_CAT_OTHERNM, /* OTHERNM */
300 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
301 CTPO_CAT_TOTAL, /* TOTAL */
315 /* CTPO_CAT_NUMBER. */
318 /* CTPO_CAT_STRING, in dictionary encoding. */
319 struct substring string;
321 /* CTPO_CAT_RANGE. */
324 /* CTPO_CAT_SUBTOTAL. */
325 size_t subtotal_index;
327 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
328 One element: CTPO_NEG. */
329 struct ctables_pcexpr *subs[2];
332 /* Source location. */
333 struct msg_location *location;
336 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
337 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
338 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
339 struct ctables_pcexpr *sub1);
341 struct ctables_summary_spec_set
343 struct ctables_summary_spec *specs;
347 /* The variable to which the summary specs are applied. */
348 struct variable *var;
350 /* Whether the variable to which the summary specs are applied is a scale
351 variable for the purpose of summarization.
353 (VALIDN and TOTALN act differently for summarizing scale and categorical
357 /* If any of these optional additional scale variables are missing, then
358 treat 'var' as if it's missing too. This is for implementing
359 SMISSING=LISTWISE. */
360 struct variable **listwise_vars;
361 size_t n_listwise_vars;
364 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
365 const struct ctables_summary_spec_set *);
366 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
368 /* A nested sequence of variables, e.g. a > b > c. */
371 struct variable **vars;
374 size_t *domains[N_CTDTS];
375 size_t n_domains[N_CTDTS];
378 struct ctables_summary_spec_set specs[N_CSVS];
381 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
384 struct ctables_nest *nests;
390 struct hmap_node node;
395 struct ctables_occurrence
397 struct hmap_node node;
401 struct ctables_section
403 struct ctables_table *table;
404 struct ctables_nest *nests[PIVOT_N_AXES];
405 struct hmap *occurrences[PIVOT_N_AXES];
406 struct hmap cells; /* Contains "struct ctable_cell"s. */
407 struct hmap domains[N_CTDTS]; /* Contains "struct ctable_domain"s. */
412 struct ctables *ctables;
413 struct ctables_axis *axes[PIVOT_N_AXES];
414 struct ctables_stack stacks[PIVOT_N_AXES];
415 struct ctables_section *sections;
417 enum pivot_axis_type summary_axis;
418 struct ctables_summary_spec_set summary_specs;
420 const struct variable *clabels_example;
421 struct hmap clabels_values_map;
422 struct ctables_value **clabels_values;
423 size_t n_clabels_values;
425 enum pivot_axis_type slabels_axis;
426 bool slabels_visible;
428 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
430 Most commonly, label_axis[a] == a, and in particular we always have
431 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
433 If ROWLABELS or COLLABELS is specified, then one of
434 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
435 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
437 enum pivot_axis_type label_axis[PIVOT_N_AXES];
438 enum pivot_axis_type clabels_from_axis;
440 /* Indexed by variable dictionary index. */
441 struct ctables_categories **categories;
450 struct ctables_chisq *chisq;
451 struct ctables_pairwise *pairwise;
454 struct ctables_categories
457 struct ctables_category *cats;
462 struct ctables_category
464 enum ctables_category_type
466 /* Explicit category lists. */
469 CCT_NRANGE, /* Numerical range. */
470 CCT_SRANGE, /* String range. */
475 /* Totals and subtotals. */
479 /* Implicit category lists. */
484 /* For contributing to TOTALN. */
485 CCT_EXCLUDED_MISSING,
489 struct ctables_category *subtotal;
495 double number; /* CCT_NUMBER. */
496 struct substring string; /* CCT_STRING, in dictionary encoding. */
497 double nrange[2]; /* CCT_NRANGE. */
498 struct substring srange[2]; /* CCT_SRANGE. */
502 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
503 bool hide_subcategories; /* CCT_SUBTOTAL. */
506 const struct ctables_postcompute *pc; /* CCT_POSTCOMPUTE. */
508 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
511 bool include_missing;
515 enum ctables_summary_function sort_function;
516 struct variable *sort_var;
521 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
522 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
523 struct msg_location *location;
527 ctables_category_uninit (struct ctables_category *cat)
538 case CCT_POSTCOMPUTE:
542 ss_dealloc (&cat->string);
546 ss_dealloc (&cat->srange[0]);
547 ss_dealloc (&cat->srange[1]);
552 free (cat->total_label);
560 case CCT_EXCLUDED_MISSING:
566 nullable_substring_equal (const struct substring *a,
567 const struct substring *b)
569 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
573 ctables_category_equal (const struct ctables_category *a,
574 const struct ctables_category *b)
576 if (a->type != b->type)
582 return a->number == b->number;
585 return ss_equals (a->string, b->string);
588 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
591 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
592 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
598 case CCT_POSTCOMPUTE:
599 return a->pc == b->pc;
603 return !strcmp (a->total_label, b->total_label);
608 return (a->include_missing == b->include_missing
609 && a->sort_ascending == b->sort_ascending
610 && a->sort_function == b->sort_function
611 && a->sort_var == b->sort_var
612 && a->percentile == b->percentile);
614 case CCT_EXCLUDED_MISSING:
622 ctables_categories_unref (struct ctables_categories *c)
627 assert (c->n_refs > 0);
631 for (size_t i = 0; i < c->n_cats; i++)
632 ctables_category_uninit (&c->cats[i]);
638 ctables_categories_equal (const struct ctables_categories *a,
639 const struct ctables_categories *b)
641 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
644 for (size_t i = 0; i < a->n_cats; i++)
645 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
651 /* Chi-square test (SIGTEST). */
659 /* Pairwise comparison test (COMPARETEST). */
660 struct ctables_pairwise
662 enum { PROP, MEAN } type;
665 bool meansvariance_allcats;
667 enum { BONFERRONI = 1, BH } adjust;
691 struct variable *var;
693 struct ctables_summary_spec_set specs[N_CSVS];
697 struct ctables_axis *subs[2];
700 struct msg_location *loc;
703 static void ctables_axis_destroy (struct ctables_axis *);
712 enum ctables_function_availability
714 CTFA_ALL, /* Any variables. */
715 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
716 CTFA_MRSETS, /* Only multiple-response sets */
719 struct ctables_summary_spec
721 enum ctables_summary_function function;
722 double percentile; /* CTSF_PTILE only. */
725 struct fmt_spec format;
726 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
732 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
733 const struct ctables_summary_spec *src)
736 dst->label = xstrdup (src->label);
740 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
747 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
748 const struct ctables_summary_spec_set *src)
750 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
751 for (size_t i = 0; i < src->n; i++)
752 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
754 *dst = (struct ctables_summary_spec_set) {
759 .is_scale = src->is_scale,
764 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
766 for (size_t i = 0; i < set->n; i++)
767 ctables_summary_spec_uninit (&set->specs[i]);
772 parse_col_width (struct lexer *lexer, const char *name, double *width)
774 lex_match (lexer, T_EQUALS);
775 if (lex_match_id (lexer, "DEFAULT"))
777 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
779 *width = lex_number (lexer);
789 parse_bool (struct lexer *lexer, bool *b)
791 if (lex_match_id (lexer, "NO"))
793 else if (lex_match_id (lexer, "YES"))
797 lex_error_expecting (lexer, "YES", "NO");
803 static enum ctables_function_availability
804 ctables_function_availability (enum ctables_summary_function f)
806 static enum ctables_function_availability availability[] = {
807 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
812 return availability[f];
816 ctables_summary_function_is_count (enum ctables_summary_function f)
822 case CTSF_ROWPCT_COUNT:
823 case CTSF_COLPCT_COUNT:
824 case CTSF_TABLEPCT_COUNT:
825 case CTSF_SUBTABLEPCT_COUNT:
826 case CTSF_LAYERPCT_COUNT:
827 case CTSF_LAYERROWPCT_COUNT:
828 case CTSF_LAYERCOLPCT_COUNT:
831 case CTSF_ROWPCT_VALIDN:
832 case CTSF_COLPCT_VALIDN:
833 case CTSF_TABLEPCT_VALIDN:
834 case CTSF_SUBTABLEPCT_VALIDN:
835 case CTSF_LAYERPCT_VALIDN:
836 case CTSF_LAYERROWPCT_VALIDN:
837 case CTSF_LAYERCOLPCT_VALIDN:
838 case CTSF_ROWPCT_TOTALN:
839 case CTSF_COLPCT_TOTALN:
840 case CTSF_TABLEPCT_TOTALN:
841 case CTSF_SUBTABLEPCT_TOTALN:
842 case CTSF_LAYERPCT_TOTALN:
843 case CTSF_LAYERROWPCT_TOTALN:
844 case CTSF_LAYERCOLPCT_TOTALN:
861 case CTSF_ROWPCT_SUM:
862 case CTSF_COLPCT_SUM:
863 case CTSF_TABLEPCT_SUM:
864 case CTSF_SUBTABLEPCT_SUM:
865 case CTSF_LAYERPCT_SUM:
866 case CTSF_LAYERROWPCT_SUM:
867 case CTSF_LAYERCOLPCT_SUM:
875 parse_ctables_summary_function (struct lexer *lexer,
876 enum ctables_summary_function *f)
880 enum ctables_summary_function function;
881 struct substring name;
883 static struct pair names[] = {
884 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
885 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
888 /* The .COUNT suffix may be omitted. */
889 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
890 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
891 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
892 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
893 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
894 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
895 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
899 if (!lex_force_id (lexer))
902 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
903 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
905 *f = names[i].function;
910 lex_error (lexer, _("Expecting summary function name."));
915 ctables_axis_destroy (struct ctables_axis *axis)
923 for (size_t i = 0; i < N_CSVS; i++)
924 ctables_summary_spec_set_uninit (&axis->specs[i]);
929 ctables_axis_destroy (axis->subs[0]);
930 ctables_axis_destroy (axis->subs[1]);
933 msg_location_destroy (axis->loc);
937 static struct ctables_axis *
938 ctables_axis_new_nonterminal (enum ctables_axis_op op,
939 struct ctables_axis *sub0,
940 struct ctables_axis *sub1,
941 struct lexer *lexer, int start_ofs)
943 struct ctables_axis *axis = xmalloc (sizeof *axis);
944 *axis = (struct ctables_axis) {
946 .subs = { sub0, sub1 },
947 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
952 struct ctables_axis_parse_ctx
955 struct dictionary *dict;
957 struct ctables_table *t;
960 static struct fmt_spec
961 ctables_summary_default_format (enum ctables_summary_function function,
962 const struct variable *var)
964 static const enum ctables_format default_formats[] = {
965 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
969 switch (default_formats[function])
972 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
975 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
978 return *var_get_print_format (var);
986 ctables_summary_default_label (enum ctables_summary_function function,
989 static const char *default_labels[] = {
990 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
995 return (function == CTSF_PTILE
996 ? xasprintf (_("Percentile %.2f"), percentile)
997 : xstrdup (gettext (default_labels[function])));
1001 ctables_summary_function_name (enum ctables_summary_function function)
1003 static const char *names[] = {
1004 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1008 return names[function];
1012 add_summary_spec (struct ctables_axis *axis,
1013 enum ctables_summary_function function, double percentile,
1014 const char *label, const struct fmt_spec *format,
1015 bool is_ctables_format, const struct msg_location *loc,
1016 enum ctables_summary_variant sv)
1018 if (axis->op == CTAO_VAR)
1020 const char *function_name = ctables_summary_function_name (function);
1021 const char *var_name = var_get_name (axis->var);
1022 switch (ctables_function_availability (function))
1025 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1026 "response sets."), function_name);
1027 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1036 _("Summary function %s applies only to scale variables."),
1038 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1049 struct ctables_summary_spec_set *set = &axis->specs[sv];
1050 if (set->n >= set->allocated)
1051 set->specs = x2nrealloc (set->specs, &set->allocated,
1052 sizeof *set->specs);
1054 struct ctables_summary_spec *dst = &set->specs[set->n++];
1055 *dst = (struct ctables_summary_spec) {
1056 .function = function,
1057 .percentile = percentile,
1058 .label = xstrdup (label),
1059 .format = (format ? *format
1060 : ctables_summary_default_format (function, axis->var)),
1061 .is_ctables_format = is_ctables_format,
1067 for (size_t i = 0; i < 2; i++)
1068 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1069 format, is_ctables_format, loc, sv))
1075 static struct ctables_axis *ctables_axis_parse_stack (
1076 struct ctables_axis_parse_ctx *);
1079 static struct ctables_axis *
1080 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1082 if (lex_match (ctx->lexer, T_LPAREN))
1084 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1085 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1087 ctables_axis_destroy (sub);
1093 if (!lex_force_id (ctx->lexer))
1096 int start_ofs = lex_ofs (ctx->lexer);
1097 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1101 struct ctables_axis *axis = xmalloc (sizeof *axis);
1102 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1104 /* XXX should figure out default measures by reading data */
1105 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1106 : lex_match_phrase (ctx->lexer, "[C]") ? false
1107 : var_get_measure (var) == MEASURE_SCALE);
1108 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1109 lex_ofs (ctx->lexer) - 1);
1110 if (axis->scale && var_is_alpha (var))
1112 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1114 var_get_name (var));
1115 ctables_axis_destroy (axis);
1123 has_digit (const char *s)
1125 return s[strcspn (s, "0123456789")] != '\0';
1129 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1130 bool *is_ctables_format)
1132 char type[FMT_TYPE_LEN_MAX + 1];
1133 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1136 if (!strcasecmp (type, "NEGPAREN"))
1137 format->type = CTEF_NEGPAREN;
1138 else if (!strcasecmp (type, "NEQUAL"))
1139 format->type = CTEF_NEQUAL;
1140 else if (!strcasecmp (type, "PAREN"))
1141 format->type = CTEF_PAREN;
1142 else if (!strcasecmp (type, "PCTPAREN"))
1143 format->type = CTEF_PCTPAREN;
1146 *is_ctables_format = false;
1147 return (parse_format_specifier (lexer, format)
1148 && fmt_check_output (format)
1149 && fmt_check_type_compat (format, VAL_NUMERIC));
1154 msg (SE, _("Output format %s requires width 2 or greater."), type);
1157 else if (format->d > format->w - 1)
1159 msg (SE, _("Output format %s requires width greater than decimals."),
1165 *is_ctables_format = true;
1170 static struct ctables_axis *
1171 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1173 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1174 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1177 enum ctables_summary_variant sv = CSV_CELL;
1180 int start_ofs = lex_ofs (ctx->lexer);
1182 /* Parse function. */
1183 enum ctables_summary_function function;
1184 if (!parse_ctables_summary_function (ctx->lexer, &function))
1187 /* Parse percentile. */
1188 double percentile = 0;
1189 if (function == CTSF_PTILE)
1191 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1193 percentile = lex_number (ctx->lexer);
1194 lex_get (ctx->lexer);
1199 if (lex_is_string (ctx->lexer))
1201 label = ss_xstrdup (lex_tokss (ctx->lexer));
1202 lex_get (ctx->lexer);
1205 label = ctables_summary_default_label (function, percentile);
1208 struct fmt_spec format;
1209 const struct fmt_spec *formatp;
1210 bool is_ctables_format = false;
1211 if (lex_token (ctx->lexer) == T_ID
1212 && has_digit (lex_tokcstr (ctx->lexer)))
1214 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1215 &is_ctables_format))
1225 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1226 lex_ofs (ctx->lexer) - 1);
1227 add_summary_spec (sub, function, percentile, label, formatp,
1228 is_ctables_format, loc, sv);
1230 msg_location_destroy (loc);
1232 lex_match (ctx->lexer, T_COMMA);
1233 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1235 if (!lex_force_match (ctx->lexer, T_LBRACK))
1239 else if (lex_match (ctx->lexer, T_RBRACK))
1241 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1248 ctables_axis_destroy (sub);
1252 static const struct ctables_axis *
1253 find_scale (const struct ctables_axis *axis)
1257 else if (axis->op == CTAO_VAR)
1258 return axis->scale ? axis : NULL;
1261 for (size_t i = 0; i < 2; i++)
1263 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1271 static const struct ctables_axis *
1272 find_categorical_summary_spec (const struct ctables_axis *axis)
1276 else if (axis->op == CTAO_VAR)
1277 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1280 for (size_t i = 0; i < 2; i++)
1282 const struct ctables_axis *sum
1283 = find_categorical_summary_spec (axis->subs[i]);
1291 static struct ctables_axis *
1292 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1294 int start_ofs = lex_ofs (ctx->lexer);
1295 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1299 while (lex_match (ctx->lexer, T_GT))
1301 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1305 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1306 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1308 const struct ctables_axis *outer_scale = find_scale (lhs);
1309 const struct ctables_axis *inner_scale = find_scale (rhs);
1310 if (outer_scale && inner_scale)
1312 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1313 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1314 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1315 ctables_axis_destroy (nest);
1319 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1322 msg_at (SE, nest->loc,
1323 _("Summaries may only be requested for categorical variables "
1324 "at the innermost nesting level."));
1325 msg_at (SN, outer_sum->loc,
1326 _("This outer categorical variable has a summary."));
1327 ctables_axis_destroy (nest);
1337 static struct ctables_axis *
1338 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1340 int start_ofs = lex_ofs (ctx->lexer);
1341 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1345 while (lex_match (ctx->lexer, T_PLUS))
1347 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1351 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1352 ctx->lexer, start_ofs);
1359 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1360 struct ctables *ct, struct ctables_table *t,
1361 enum pivot_axis_type a)
1363 if (lex_token (lexer) == T_BY
1364 || lex_token (lexer) == T_SLASH
1365 || lex_token (lexer) == T_ENDCMD)
1368 struct ctables_axis_parse_ctx ctx = {
1374 t->axes[a] = ctables_axis_parse_stack (&ctx);
1375 return t->axes[a] != NULL;
1379 ctables_chisq_destroy (struct ctables_chisq *chisq)
1385 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1391 ctables_table_destroy (struct ctables_table *t)
1396 for (size_t i = 0; i < t->n_categories; i++)
1397 ctables_categories_unref (t->categories[i]);
1398 free (t->categories);
1400 ctables_axis_destroy (t->axes[PIVOT_AXIS_COLUMN]);
1401 ctables_axis_destroy (t->axes[PIVOT_AXIS_ROW]);
1402 ctables_axis_destroy (t->axes[PIVOT_AXIS_LAYER]);
1406 ctables_chisq_destroy (t->chisq);
1407 ctables_pairwise_destroy (t->pairwise);
1412 ctables_destroy (struct ctables *ct)
1417 pivot_table_look_unref (ct->look);
1421 for (size_t i = 0; i < ct->n_tables; i++)
1422 ctables_table_destroy (ct->tables[i]);
1427 static struct ctables_category
1428 cct_nrange (double low, double high)
1430 return (struct ctables_category) {
1432 .nrange = { low, high }
1436 static struct ctables_category
1437 cct_srange (struct substring low, struct substring high)
1439 return (struct ctables_category) {
1441 .srange = { low, high }
1446 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1447 struct ctables_category *cat)
1450 if (lex_match (lexer, T_EQUALS))
1452 if (!lex_force_string (lexer))
1455 total_label = ss_xstrdup (lex_tokss (lexer));
1459 total_label = xstrdup (_("Subtotal"));
1461 *cat = (struct ctables_category) {
1462 .type = CCT_SUBTOTAL,
1463 .hide_subcategories = hide_subcategories,
1464 .total_label = total_label
1469 static struct substring
1470 parse_substring (struct lexer *lexer, struct dictionary *dict)
1472 struct substring s = recode_substring_pool (
1473 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1474 ss_rtrim (&s, ss_cstr (" "));
1480 ctables_table_parse_explicit_category (struct lexer *lexer,
1481 struct dictionary *dict,
1483 struct ctables_category *cat)
1485 if (lex_match_id (lexer, "OTHERNM"))
1486 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1487 else if (lex_match_id (lexer, "MISSING"))
1488 *cat = (struct ctables_category) { .type = CCT_MISSING };
1489 else if (lex_match_id (lexer, "SUBTOTAL"))
1490 return ctables_table_parse_subtotal (lexer, false, cat);
1491 else if (lex_match_id (lexer, "HSUBTOTAL"))
1492 return ctables_table_parse_subtotal (lexer, true, cat);
1493 else if (lex_match_id (lexer, "LO"))
1495 if (!lex_force_match_id (lexer, "THRU"))
1497 if (lex_is_string (lexer))
1499 struct substring sr0 = { .string = NULL };
1500 struct substring sr1 = parse_substring (lexer, dict);
1501 *cat = cct_srange (sr0, sr1);
1503 else if (lex_force_num (lexer))
1505 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1511 else if (lex_is_number (lexer))
1513 double number = lex_number (lexer);
1515 if (lex_match_id (lexer, "THRU"))
1517 if (lex_match_id (lexer, "HI"))
1518 *cat = cct_nrange (number, DBL_MAX);
1521 if (!lex_force_num (lexer))
1523 *cat = cct_nrange (number, lex_number (lexer));
1528 *cat = (struct ctables_category) {
1533 else if (lex_is_string (lexer))
1535 struct substring s = parse_substring (lexer, dict);
1536 if (lex_match_id (lexer, "THRU"))
1538 if (lex_match_id (lexer, "HI"))
1540 struct substring sr1 = { .string = NULL };
1541 *cat = cct_srange (s, sr1);
1545 if (!lex_force_string (lexer))
1547 struct substring sr1 = parse_substring (lexer, dict);
1548 *cat = cct_srange (s, sr1);
1552 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1554 else if (lex_match (lexer, T_AND))
1556 if (!lex_force_id (lexer))
1558 struct ctables_postcompute *pc = ctables_find_postcompute (
1559 ct, lex_tokcstr (lexer));
1562 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1563 msg_at (SE, loc, _("Unknown postcompute &%s."),
1564 lex_tokcstr (lexer));
1565 msg_location_destroy (loc);
1570 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1574 lex_error (lexer, NULL);
1581 static struct ctables_category *
1582 ctables_find_category_for_postcompute (const struct ctables_categories *cats,
1583 const struct ctables_pcexpr *e)
1585 struct ctables_category *best = NULL;
1586 size_t n_subtotals = 0;
1587 for (size_t i = 0; i < cats->n_cats; i++)
1589 struct ctables_category *cat = &cats->cats[i];
1592 case CTPO_CAT_NUMBER:
1593 if (cat->type == CCT_NUMBER && cat->number == e->number)
1597 case CTPO_CAT_STRING:
1598 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1602 case CTPO_CAT_RANGE:
1603 if (cat->type == CCT_NRANGE
1604 && cat->nrange[0] == e->range[0]
1605 && cat->nrange[1] == e->range[1])
1609 case CTPO_CAT_MISSING:
1610 if (cat->type == CCT_MISSING)
1614 case CTPO_CAT_OTHERNM:
1615 if (cat->type == CCT_OTHERNM)
1619 case CTPO_CAT_SUBTOTAL:
1620 if (cat->type == CCT_SUBTOTAL)
1623 if (e->subtotal_index == n_subtotals)
1625 else if (e->subtotal_index == 0)
1630 case CTPO_CAT_TOTAL:
1631 if (cat->type == CCT_TOTAL)
1645 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1651 ctables_recursive_check_postcompute (const struct ctables_pcexpr *e,
1652 struct ctables_category *pc_cat,
1653 const struct ctables_categories *cats,
1654 const struct msg_location *cats_location)
1658 case CTPO_CAT_NUMBER:
1659 case CTPO_CAT_STRING:
1660 case CTPO_CAT_RANGE:
1661 case CTPO_CAT_MISSING:
1662 case CTPO_CAT_OTHERNM:
1663 case CTPO_CAT_SUBTOTAL:
1664 case CTPO_CAT_TOTAL:
1666 struct ctables_category *cat = ctables_find_category_for_postcompute (
1670 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1672 size_t n_subtotals = 0;
1673 for (size_t i = 0; i < cats->n_cats; i++)
1674 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1675 if (n_subtotals > 1)
1677 msg_at (SE, cats_location,
1678 ngettext ("These categories include %zu instance "
1679 "of SUBTOTAL or HSUBTOTAL, so references "
1680 "from computed categories must refer to "
1681 "subtotals by position.",
1682 "These categories include %zu instances "
1683 "of SUBTOTAL or HSUBTOTAL, so references "
1684 "from computed categories must refer to "
1685 "subtotals by position.",
1688 msg_at (SN, e->location,
1689 _("This is the reference that lacks a position."));
1694 msg_at (SE, pc_cat->location,
1695 _("Computed category &%s references a category not included "
1696 "in the category list."),
1698 msg_at (SN, e->location, _("This is the missing category."));
1699 msg_at (SN, cats_location,
1700 _("To fix the problem, add the missing category to the "
1701 "list of categories here."));
1704 if (pc_cat->pc->hide_source_cats)
1718 for (size_t i = 0; i < 2; i++)
1719 if (e->subs[i] && !ctables_recursive_check_postcompute (
1720 e->subs[i], pc_cat, cats, cats_location))
1730 parse_category_string (const struct ctables_category *cat,
1731 struct substring s, struct dictionary *dict,
1732 enum fmt_type format, double *n)
1735 char *error = data_in (s, dict_get_encoding (dict), format,
1736 settings_get_fmt_settings (), &v, 0, NULL);
1739 msg_at (SE, cat->location,
1740 _("Failed to parse category specification as format %s: %s."),
1741 fmt_name (format), error);
1751 all_strings (struct variable **vars, size_t n_vars,
1752 const struct ctables_category *cat)
1754 for (size_t j = 0; j < n_vars; j++)
1755 if (var_is_numeric (vars[j]))
1757 msg_at (SE, cat->location,
1758 _("This category specification may be applied only to string "
1759 "variables, but this subcommand tries to apply it to "
1760 "numeric variable %s."),
1761 var_get_name (vars[j]));
1768 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1769 struct ctables *ct, struct ctables_table *t)
1771 if (!lex_match_id (lexer, "VARIABLES"))
1773 lex_match (lexer, T_EQUALS);
1775 struct variable **vars;
1777 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1780 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1781 for (size_t i = 1; i < n_vars; i++)
1783 const struct fmt_spec *f = var_get_print_format (vars[i]);
1784 if (f->type != common_format->type)
1786 common_format = NULL;
1792 && (fmt_get_category (common_format->type)
1793 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1795 struct ctables_categories *c = xmalloc (sizeof *c);
1796 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1797 for (size_t i = 0; i < n_vars; i++)
1799 struct ctables_categories **cp
1800 = &t->categories[var_get_dict_index (vars[i])];
1801 ctables_categories_unref (*cp);
1805 size_t allocated_cats = 0;
1806 if (lex_match (lexer, T_LBRACK))
1808 int cats_start_ofs = lex_ofs (lexer);
1811 if (c->n_cats >= allocated_cats)
1812 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1814 int start_ofs = lex_ofs (lexer);
1815 struct ctables_category *cat = &c->cats[c->n_cats];
1816 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1818 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1821 lex_match (lexer, T_COMMA);
1823 while (!lex_match (lexer, T_RBRACK));
1825 struct msg_location *cats_location
1826 = lex_ofs_location (lexer, cats_start_ofs, lex_ofs (lexer) - 1);
1827 for (size_t i = 0; i < c->n_cats; i++)
1829 struct ctables_category *cat = &c->cats[i];
1832 case CCT_POSTCOMPUTE:
1833 if (!ctables_recursive_check_postcompute (cat->pc->expr, cat,
1840 for (size_t j = 0; j < n_vars; j++)
1841 if (var_is_alpha (vars[j]))
1843 msg_at (SE, cat->location,
1844 _("This category specification may be applied "
1845 "only to numeric variables, but this "
1846 "subcommand tries to apply it to string "
1848 var_get_name (vars[j]));
1857 if (!parse_category_string (cat, cat->string, dict,
1858 common_format->type, &n))
1861 ss_dealloc (&cat->string);
1863 cat->type = CCT_NUMBER;
1866 else if (!all_strings (vars, n_vars, cat))
1875 if (!cat->srange[0].string)
1877 else if (!parse_category_string (cat, cat->srange[0], dict,
1878 common_format->type, &n[0]))
1881 if (!cat->srange[1].string)
1883 else if (!parse_category_string (cat, cat->srange[1], dict,
1884 common_format->type, &n[1]))
1887 ss_dealloc (&cat->srange[0]);
1888 ss_dealloc (&cat->srange[1]);
1890 cat->type = CCT_NRANGE;
1891 cat->nrange[0] = n[0];
1892 cat->nrange[1] = n[1];
1894 else if (!all_strings (vars, n_vars, cat))
1905 case CCT_EXCLUDED_MISSING:
1911 struct ctables_category cat = {
1913 .include_missing = false,
1914 .sort_ascending = true,
1916 bool show_totals = false;
1917 char *total_label = NULL;
1918 bool totals_before = false;
1919 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1921 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1923 lex_match (lexer, T_EQUALS);
1924 if (lex_match_id (lexer, "A"))
1925 cat.sort_ascending = true;
1926 else if (lex_match_id (lexer, "D"))
1927 cat.sort_ascending = false;
1930 lex_error_expecting (lexer, "A", "D");
1934 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1936 lex_match (lexer, T_EQUALS);
1937 if (lex_match_id (lexer, "VALUE"))
1938 cat.type = CCT_VALUE;
1939 else if (lex_match_id (lexer, "LABEL"))
1940 cat.type = CCT_LABEL;
1943 cat.type = CCT_FUNCTION;
1944 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1947 if (lex_match (lexer, T_LPAREN))
1949 cat.sort_var = parse_variable (lexer, dict);
1953 if (cat.sort_function == CTSF_PTILE)
1955 lex_match (lexer, T_COMMA);
1956 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1958 cat.percentile = lex_number (lexer);
1962 if (!lex_force_match (lexer, T_RPAREN))
1965 else if (ctables_function_availability (cat.sort_function)
1968 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1973 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1975 lex_match (lexer, T_EQUALS);
1976 if (lex_match_id (lexer, "INCLUDE"))
1977 cat.include_missing = true;
1978 else if (lex_match_id (lexer, "EXCLUDE"))
1979 cat.include_missing = false;
1982 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1986 else if (lex_match_id (lexer, "TOTAL"))
1988 lex_match (lexer, T_EQUALS);
1989 if (!parse_bool (lexer, &show_totals))
1992 else if (lex_match_id (lexer, "LABEL"))
1994 lex_match (lexer, T_EQUALS);
1995 if (!lex_force_string (lexer))
1998 total_label = ss_xstrdup (lex_tokss (lexer));
2001 else if (lex_match_id (lexer, "POSITION"))
2003 lex_match (lexer, T_EQUALS);
2004 if (lex_match_id (lexer, "BEFORE"))
2005 totals_before = true;
2006 else if (lex_match_id (lexer, "AFTER"))
2007 totals_before = false;
2010 lex_error_expecting (lexer, "BEFORE", "AFTER");
2014 else if (lex_match_id (lexer, "EMPTY"))
2016 lex_match (lexer, T_EQUALS);
2017 if (lex_match_id (lexer, "INCLUDE"))
2018 c->show_empty = true;
2019 else if (lex_match_id (lexer, "EXCLUDE"))
2020 c->show_empty = false;
2023 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2030 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2031 "TOTAL", "LABEL", "POSITION", "EMPTY");
2033 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2040 if (c->n_cats >= allocated_cats)
2041 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2042 c->cats[c->n_cats++] = cat;
2047 if (c->n_cats >= allocated_cats)
2048 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2050 struct ctables_category *totals;
2053 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2054 totals = &c->cats[0];
2057 totals = &c->cats[c->n_cats];
2060 *totals = (struct ctables_category) {
2062 .total_label = total_label ? total_label : xstrdup (_("Total")),
2066 struct ctables_category *subtotal = NULL;
2067 for (size_t i = totals_before ? 0 : c->n_cats;
2068 totals_before ? i < c->n_cats : i-- > 0;
2069 totals_before ? i++ : 0)
2071 struct ctables_category *cat = &c->cats[i];
2080 cat->subtotal = subtotal;
2083 case CCT_POSTCOMPUTE:
2094 case CCT_EXCLUDED_MISSING:
2103 ctables_nest_uninit (struct ctables_nest *nest)
2110 ctables_stack_uninit (struct ctables_stack *stack)
2114 for (size_t i = 0; i < stack->n; i++)
2115 ctables_nest_uninit (&stack->nests[i]);
2116 free (stack->nests);
2120 static struct ctables_stack
2121 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2128 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2129 for (size_t i = 0; i < s0.n; i++)
2130 for (size_t j = 0; j < s1.n; j++)
2132 const struct ctables_nest *a = &s0.nests[i];
2133 const struct ctables_nest *b = &s1.nests[j];
2135 size_t allocate = a->n + b->n;
2136 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2137 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
2139 for (size_t k = 0; k < a->n; k++)
2140 vars[n++] = a->vars[k];
2141 for (size_t k = 0; k < b->n; k++)
2142 vars[n++] = b->vars[k];
2143 assert (n == allocate);
2145 const struct ctables_nest *summary_src;
2146 if (!a->specs[CSV_CELL].var)
2148 else if (!b->specs[CSV_CELL].var)
2153 struct ctables_nest *new = &stack.nests[stack.n++];
2154 *new = (struct ctables_nest) {
2156 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2157 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2161 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2162 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2164 ctables_stack_uninit (&s0);
2165 ctables_stack_uninit (&s1);
2169 static struct ctables_stack
2170 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2172 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2173 for (size_t i = 0; i < s0.n; i++)
2174 stack.nests[stack.n++] = s0.nests[i];
2175 for (size_t i = 0; i < s1.n; i++)
2177 stack.nests[stack.n] = s1.nests[i];
2178 stack.nests[stack.n].group_head += s0.n;
2181 assert (stack.n == s0.n + s1.n);
2187 static struct ctables_stack
2188 var_fts (const struct ctables_axis *a)
2190 struct variable **vars = xmalloc (sizeof *vars);
2193 struct ctables_nest *nest = xmalloc (sizeof *nest);
2194 *nest = (struct ctables_nest) {
2197 .scale_idx = a->scale ? 0 : SIZE_MAX,
2199 if (a->specs[CSV_CELL].n || a->scale)
2200 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2202 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2203 nest->specs[sv].var = a->var;
2204 nest->specs[sv].is_scale = a->scale;
2206 return (struct ctables_stack) { .nests = nest, .n = 1 };
2209 static struct ctables_stack
2210 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2213 return (struct ctables_stack) { .n = 0 };
2221 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2222 enumerate_fts (axis_type, a->subs[1]));
2225 /* This should consider any of the scale variables found in the result to
2226 be linked to each other listwise for SMISSING=LISTWISE. */
2227 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2228 enumerate_fts (axis_type, a->subs[1]));
2234 union ctables_summary
2236 /* COUNT, VALIDN, TOTALN. */
2239 /* MINIMUM, MAXIMUM, RANGE. */
2246 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2247 struct moments1 *moments;
2249 /* MEDIAN, MODE, PTILE. */
2252 struct casewriter *writer;
2257 /* XXX multiple response */
2261 ctables_summary_init (union ctables_summary *s,
2262 const struct ctables_summary_spec *ss)
2264 switch (ss->function)
2268 case CTSF_ROWPCT_COUNT:
2269 case CTSF_COLPCT_COUNT:
2270 case CTSF_TABLEPCT_COUNT:
2271 case CTSF_SUBTABLEPCT_COUNT:
2272 case CTSF_LAYERPCT_COUNT:
2273 case CTSF_LAYERROWPCT_COUNT:
2274 case CTSF_LAYERCOLPCT_COUNT:
2275 case CTSF_ROWPCT_VALIDN:
2276 case CTSF_COLPCT_VALIDN:
2277 case CTSF_TABLEPCT_VALIDN:
2278 case CTSF_SUBTABLEPCT_VALIDN:
2279 case CTSF_LAYERPCT_VALIDN:
2280 case CTSF_LAYERROWPCT_VALIDN:
2281 case CTSF_LAYERCOLPCT_VALIDN:
2282 case CTSF_ROWPCT_TOTALN:
2283 case CTSF_COLPCT_TOTALN:
2284 case CTSF_TABLEPCT_TOTALN:
2285 case CTSF_SUBTABLEPCT_TOTALN:
2286 case CTSF_LAYERPCT_TOTALN:
2287 case CTSF_LAYERROWPCT_TOTALN:
2288 case CTSF_LAYERCOLPCT_TOTALN:
2300 s->min = s->max = SYSMIS;
2308 case CTSF_ROWPCT_SUM:
2309 case CTSF_COLPCT_SUM:
2310 case CTSF_TABLEPCT_SUM:
2311 case CTSF_SUBTABLEPCT_SUM:
2312 case CTSF_LAYERPCT_SUM:
2313 case CTSF_LAYERROWPCT_SUM:
2314 case CTSF_LAYERCOLPCT_SUM:
2315 s->moments = moments1_create (MOMENT_VARIANCE);
2322 struct caseproto *proto = caseproto_create ();
2323 proto = caseproto_add_width (proto, 0);
2324 proto = caseproto_add_width (proto, 0);
2326 struct subcase ordering;
2327 subcase_init (&ordering, 0, 0, SC_ASCEND);
2328 s->writer = sort_create_writer (&ordering, proto);
2329 subcase_uninit (&ordering);
2330 caseproto_unref (proto);
2340 ctables_summary_uninit (union ctables_summary *s,
2341 const struct ctables_summary_spec *ss)
2343 switch (ss->function)
2347 case CTSF_ROWPCT_COUNT:
2348 case CTSF_COLPCT_COUNT:
2349 case CTSF_TABLEPCT_COUNT:
2350 case CTSF_SUBTABLEPCT_COUNT:
2351 case CTSF_LAYERPCT_COUNT:
2352 case CTSF_LAYERROWPCT_COUNT:
2353 case CTSF_LAYERCOLPCT_COUNT:
2354 case CTSF_ROWPCT_VALIDN:
2355 case CTSF_COLPCT_VALIDN:
2356 case CTSF_TABLEPCT_VALIDN:
2357 case CTSF_SUBTABLEPCT_VALIDN:
2358 case CTSF_LAYERPCT_VALIDN:
2359 case CTSF_LAYERROWPCT_VALIDN:
2360 case CTSF_LAYERCOLPCT_VALIDN:
2361 case CTSF_ROWPCT_TOTALN:
2362 case CTSF_COLPCT_TOTALN:
2363 case CTSF_TABLEPCT_TOTALN:
2364 case CTSF_SUBTABLEPCT_TOTALN:
2365 case CTSF_LAYERPCT_TOTALN:
2366 case CTSF_LAYERROWPCT_TOTALN:
2367 case CTSF_LAYERCOLPCT_TOTALN:
2385 case CTSF_ROWPCT_SUM:
2386 case CTSF_COLPCT_SUM:
2387 case CTSF_TABLEPCT_SUM:
2388 case CTSF_SUBTABLEPCT_SUM:
2389 case CTSF_LAYERPCT_SUM:
2390 case CTSF_LAYERROWPCT_SUM:
2391 case CTSF_LAYERCOLPCT_SUM:
2392 moments1_destroy (s->moments);
2398 casewriter_destroy (s->writer);
2404 ctables_summary_add (union ctables_summary *s,
2405 const struct ctables_summary_spec *ss,
2406 const struct variable *var, const union value *value,
2407 bool is_scale, bool is_scale_missing,
2408 bool is_missing, bool excluded_missing,
2409 double d_weight, double e_weight)
2411 /* To determine whether a case is included in a given table for a particular
2412 kind of summary, consider the following charts for each variable in the
2413 table. Only if "yes" appears for every variable for the summary is the
2416 Categorical variables: VALIDN COUNT TOTALN
2417 Valid values in included categories yes yes yes
2418 Missing values in included categories --- yes yes
2419 Missing values in excluded categories --- --- yes
2420 Valid values in excluded categories --- --- ---
2422 Scale variables: VALIDN COUNT TOTALN
2423 Valid value yes yes yes
2424 Missing value --- yes yes
2426 Missing values include both user- and system-missing. (The system-missing
2427 value is always in an excluded category.)
2429 switch (ss->function)
2432 case CTSF_ROWPCT_TOTALN:
2433 case CTSF_COLPCT_TOTALN:
2434 case CTSF_TABLEPCT_TOTALN:
2435 case CTSF_SUBTABLEPCT_TOTALN:
2436 case CTSF_LAYERPCT_TOTALN:
2437 case CTSF_LAYERROWPCT_TOTALN:
2438 case CTSF_LAYERCOLPCT_TOTALN:
2439 s->count += d_weight;
2443 case CTSF_ROWPCT_COUNT:
2444 case CTSF_COLPCT_COUNT:
2445 case CTSF_TABLEPCT_COUNT:
2446 case CTSF_SUBTABLEPCT_COUNT:
2447 case CTSF_LAYERPCT_COUNT:
2448 case CTSF_LAYERROWPCT_COUNT:
2449 case CTSF_LAYERCOLPCT_COUNT:
2450 if (is_scale || !excluded_missing)
2451 s->count += d_weight;
2455 case CTSF_ROWPCT_VALIDN:
2456 case CTSF_COLPCT_VALIDN:
2457 case CTSF_TABLEPCT_VALIDN:
2458 case CTSF_SUBTABLEPCT_VALIDN:
2459 case CTSF_LAYERPCT_VALIDN:
2460 case CTSF_LAYERROWPCT_VALIDN:
2461 case CTSF_LAYERCOLPCT_VALIDN:
2465 s->count += d_weight;
2470 s->count += d_weight;
2474 if (is_scale || !excluded_missing)
2475 s->count += e_weight;
2482 s->count += e_weight;
2486 s->count += e_weight;
2492 if (!is_scale_missing)
2494 assert (!var_is_alpha (var)); /* XXX? */
2495 if (s->min == SYSMIS || value->f < s->min)
2497 if (s->max == SYSMIS || value->f > s->max)
2507 case CTSF_ROWPCT_SUM:
2508 case CTSF_COLPCT_SUM:
2509 case CTSF_TABLEPCT_SUM:
2510 case CTSF_SUBTABLEPCT_SUM:
2511 case CTSF_LAYERPCT_SUM:
2512 case CTSF_LAYERROWPCT_SUM:
2513 case CTSF_LAYERCOLPCT_SUM:
2514 if (!is_scale_missing)
2515 moments1_add (s->moments, value->f, e_weight);
2521 if (!is_scale_missing)
2523 s->ovalid += e_weight;
2525 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2526 *case_num_rw_idx (c, 0) = value->f;
2527 *case_num_rw_idx (c, 1) = e_weight;
2528 casewriter_write (s->writer, c);
2534 static enum ctables_domain_type
2535 ctables_function_domain (enum ctables_summary_function function)
2559 case CTSF_COLPCT_COUNT:
2560 case CTSF_COLPCT_SUM:
2561 case CTSF_COLPCT_TOTALN:
2562 case CTSF_COLPCT_VALIDN:
2565 case CTSF_LAYERCOLPCT_COUNT:
2566 case CTSF_LAYERCOLPCT_SUM:
2567 case CTSF_LAYERCOLPCT_TOTALN:
2568 case CTSF_LAYERCOLPCT_VALIDN:
2569 return CTDT_LAYERCOL;
2571 case CTSF_LAYERPCT_COUNT:
2572 case CTSF_LAYERPCT_SUM:
2573 case CTSF_LAYERPCT_TOTALN:
2574 case CTSF_LAYERPCT_VALIDN:
2577 case CTSF_LAYERROWPCT_COUNT:
2578 case CTSF_LAYERROWPCT_SUM:
2579 case CTSF_LAYERROWPCT_TOTALN:
2580 case CTSF_LAYERROWPCT_VALIDN:
2581 return CTDT_LAYERROW;
2583 case CTSF_ROWPCT_COUNT:
2584 case CTSF_ROWPCT_SUM:
2585 case CTSF_ROWPCT_TOTALN:
2586 case CTSF_ROWPCT_VALIDN:
2589 case CTSF_SUBTABLEPCT_COUNT:
2590 case CTSF_SUBTABLEPCT_SUM:
2591 case CTSF_SUBTABLEPCT_TOTALN:
2592 case CTSF_SUBTABLEPCT_VALIDN:
2593 return CTDT_SUBTABLE;
2595 case CTSF_TABLEPCT_COUNT:
2596 case CTSF_TABLEPCT_SUM:
2597 case CTSF_TABLEPCT_TOTALN:
2598 case CTSF_TABLEPCT_VALIDN:
2606 ctables_summary_value (const struct ctables_cell *cell,
2607 union ctables_summary *s,
2608 const struct ctables_summary_spec *ss)
2610 switch (ss->function)
2616 case CTSF_ROWPCT_COUNT:
2617 case CTSF_COLPCT_COUNT:
2618 case CTSF_TABLEPCT_COUNT:
2619 case CTSF_SUBTABLEPCT_COUNT:
2620 case CTSF_LAYERPCT_COUNT:
2621 case CTSF_LAYERROWPCT_COUNT:
2622 case CTSF_LAYERCOLPCT_COUNT:
2624 enum ctables_domain_type d = ctables_function_domain (ss->function);
2625 return (cell->domains[d]->e_count
2626 ? s->count / cell->domains[d]->e_count * 100
2630 case CTSF_ROWPCT_VALIDN:
2631 case CTSF_COLPCT_VALIDN:
2632 case CTSF_TABLEPCT_VALIDN:
2633 case CTSF_SUBTABLEPCT_VALIDN:
2634 case CTSF_LAYERPCT_VALIDN:
2635 case CTSF_LAYERROWPCT_VALIDN:
2636 case CTSF_LAYERCOLPCT_VALIDN:
2638 enum ctables_domain_type d = ctables_function_domain (ss->function);
2639 return (cell->domains[d]->e_valid
2640 ? s->count / cell->domains[d]->e_valid * 100
2644 case CTSF_ROWPCT_TOTALN:
2645 case CTSF_COLPCT_TOTALN:
2646 case CTSF_TABLEPCT_TOTALN:
2647 case CTSF_SUBTABLEPCT_TOTALN:
2648 case CTSF_LAYERPCT_TOTALN:
2649 case CTSF_LAYERROWPCT_TOTALN:
2650 case CTSF_LAYERCOLPCT_TOTALN:
2652 enum ctables_domain_type d = ctables_function_domain (ss->function);
2653 return (cell->domains[d]->e_total
2654 ? s->count / cell->domains[d]->e_total * 100
2678 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2683 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2689 double weight, variance;
2690 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2691 return calc_semean (variance, weight);
2697 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2698 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2703 double weight, mean;
2704 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2705 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2711 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2715 case CTSF_ROWPCT_SUM:
2716 case CTSF_COLPCT_SUM:
2717 case CTSF_TABLEPCT_SUM:
2718 case CTSF_SUBTABLEPCT_SUM:
2719 case CTSF_LAYERPCT_SUM:
2720 case CTSF_LAYERROWPCT_SUM:
2721 case CTSF_LAYERCOLPCT_SUM:
2728 struct casereader *reader = casewriter_make_reader (s->writer);
2731 struct percentile *ptile = percentile_create (
2732 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2733 struct order_stats *os = &ptile->parent;
2734 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2735 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2736 statistic_destroy (&ptile->parent.parent);
2743 struct casereader *reader = casewriter_make_reader (s->writer);
2746 struct mode *mode = mode_create ();
2747 struct order_stats *os = &mode->parent;
2748 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2749 s->ovalue = mode->mode;
2750 statistic_destroy (&mode->parent.parent);
2758 struct ctables_cell_sort_aux
2760 const struct ctables_nest *nest;
2761 enum pivot_axis_type a;
2765 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2767 const struct ctables_cell_sort_aux *aux = aux_;
2768 struct ctables_cell *const *ap = a_;
2769 struct ctables_cell *const *bp = b_;
2770 const struct ctables_cell *a = *ap;
2771 const struct ctables_cell *b = *bp;
2773 const struct ctables_nest *nest = aux->nest;
2774 for (size_t i = 0; i < nest->n; i++)
2775 if (i != nest->scale_idx)
2777 const struct variable *var = nest->vars[i];
2778 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2779 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2780 if (a_cv->category != b_cv->category)
2781 return a_cv->category > b_cv->category ? 1 : -1;
2783 const union value *a_val = &a_cv->value;
2784 const union value *b_val = &b_cv->value;
2785 switch (a_cv->category->type)
2791 case CCT_POSTCOMPUTE:
2792 case CCT_EXCLUDED_MISSING:
2793 /* Must be equal. */
2801 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2809 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2811 return a_cv->category->sort_ascending ? cmp : -cmp;
2817 const char *a_label = var_lookup_value_label (var, a_val);
2818 const char *b_label = var_lookup_value_label (var, b_val);
2820 ? (b_label ? strcmp (a_label, b_label) : 1)
2821 : (b_label ? -1 : value_compare_3way (
2822 a_val, b_val, var_get_width (var))));
2824 return a_cv->category->sort_ascending ? cmp : -cmp;
2838 For each ctables_table:
2839 For each combination of row vars:
2840 For each combination of column vars:
2841 For each combination of layer vars:
2843 Make a table of row values:
2844 Sort entries by row values
2845 Assign a 0-based index to each actual value
2846 Construct a dimension
2847 Make a table of column values
2848 Make a table of layer values
2850 Fill the table entry using the indexes from before.
2853 static struct ctables_domain *
2854 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
2855 enum ctables_domain_type domain)
2858 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2860 const struct ctables_nest *nest = s->nests[a];
2861 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2863 size_t v_idx = nest->domains[domain][i];
2864 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
2865 hash = hash_pointer (cv->category, hash);
2866 if (cv->category->type != CCT_TOTAL
2867 && cv->category->type != CCT_SUBTOTAL
2868 && cv->category->type != CCT_POSTCOMPUTE)
2869 hash = value_hash (&cv->value,
2870 var_get_width (nest->vars[v_idx]), hash);
2874 struct ctables_domain *d;
2875 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
2877 const struct ctables_cell *df = d->example;
2878 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2880 const struct ctables_nest *nest = s->nests[a];
2881 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2883 size_t v_idx = nest->domains[domain][i];
2884 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
2885 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
2886 if (cv1->category != cv2->category
2887 || (cv1->category->type != CCT_TOTAL
2888 && cv1->category->type != CCT_SUBTOTAL
2889 && cv1->category->type != CCT_POSTCOMPUTE
2890 && !value_equal (&cv1->value, &cv2->value,
2891 var_get_width (nest->vars[v_idx]))))
2900 d = xmalloc (sizeof *d);
2901 *d = (struct ctables_domain) { .example = cell };
2902 hmap_insert (&s->domains[domain], &d->node, hash);
2906 static struct substring
2907 rtrim_value (const union value *v, const struct variable *var)
2909 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
2910 var_get_width (var));
2911 ss_rtrim (&s, ss_cstr (" "));
2916 in_string_range (const union value *v, const struct variable *var,
2917 const struct substring *srange)
2919 struct substring s = rtrim_value (v, var);
2920 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
2921 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
2924 static const struct ctables_category *
2925 ctables_categories_match (const struct ctables_categories *c,
2926 const union value *v, const struct variable *var)
2928 if (var_is_numeric (var) && v->f == SYSMIS)
2931 const struct ctables_category *othernm = NULL;
2932 for (size_t i = c->n_cats; i-- > 0; )
2934 const struct ctables_category *cat = &c->cats[i];
2938 if (cat->number == v->f)
2943 if (ss_equals (cat->string, rtrim_value (v, var)))
2948 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
2949 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
2954 if (in_string_range (v, var, cat->srange))
2959 if (var_is_value_missing (var, v))
2963 case CCT_POSTCOMPUTE:
2978 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2981 case CCT_EXCLUDED_MISSING:
2986 return var_is_value_missing (var, v) ? NULL : othernm;
2989 static const struct ctables_category *
2990 ctables_categories_total (const struct ctables_categories *c)
2992 const struct ctables_category *first = &c->cats[0];
2993 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2994 return (first->type == CCT_TOTAL ? first
2995 : last->type == CCT_TOTAL ? last
2999 static struct ctables_cell *
3000 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3001 const struct ctables_category *cats[PIVOT_N_AXES][10])
3004 enum ctables_summary_variant sv = CSV_CELL;
3005 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3007 const struct ctables_nest *nest = s->nests[a];
3008 for (size_t i = 0; i < nest->n; i++)
3009 if (i != nest->scale_idx)
3011 hash = hash_pointer (cats[a][i], hash);
3012 if (cats[a][i]->type != CCT_TOTAL
3013 && cats[a][i]->type != CCT_SUBTOTAL
3014 && cats[a][i]->type != CCT_POSTCOMPUTE)
3015 hash = value_hash (case_data (c, nest->vars[i]),
3016 var_get_width (nest->vars[i]), hash);
3022 struct ctables_cell *cell;
3023 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3025 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3027 const struct ctables_nest *nest = s->nests[a];
3028 for (size_t i = 0; i < nest->n; i++)
3029 if (i != nest->scale_idx
3030 && (cats[a][i] != cell->axes[a].cvs[i].category
3031 || (cats[a][i]->type != CCT_TOTAL
3032 && cats[a][i]->type != CCT_SUBTOTAL
3033 && cats[a][i]->type != CCT_POSTCOMPUTE
3034 && !value_equal (case_data (c, nest->vars[i]),
3035 &cell->axes[a].cvs[i].value,
3036 var_get_width (nest->vars[i])))))
3045 cell = xmalloc (sizeof *cell);
3048 cell->omit_domains = 0;
3049 cell->postcompute = false;
3050 //struct string name = DS_EMPTY_INITIALIZER;
3051 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3053 const struct ctables_nest *nest = s->nests[a];
3054 cell->axes[a].cvs = (nest->n
3055 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3057 for (size_t i = 0; i < nest->n; i++)
3059 const struct ctables_category *cat = cats[a][i];
3060 const struct variable *var = nest->vars[i];
3061 const union value *value = case_data (c, var);
3062 if (i != nest->scale_idx)
3064 const struct ctables_category *subtotal = cat->subtotal;
3065 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3068 if (cat->type == CCT_TOTAL
3069 || cat->type == CCT_SUBTOTAL
3070 || cat->type == CCT_POSTCOMPUTE)
3072 /* XXX these should be more encompassing I think.*/
3076 case PIVOT_AXIS_COLUMN:
3077 cell->omit_domains |= ((1u << CTDT_TABLE) |
3078 (1u << CTDT_LAYER) |
3079 (1u << CTDT_LAYERCOL) |
3080 (1u << CTDT_SUBTABLE) |
3083 case PIVOT_AXIS_ROW:
3084 cell->omit_domains |= ((1u << CTDT_TABLE) |
3085 (1u << CTDT_LAYER) |
3086 (1u << CTDT_LAYERROW) |
3087 (1u << CTDT_SUBTABLE) |
3090 case PIVOT_AXIS_LAYER:
3091 cell->omit_domains |= ((1u << CTDT_TABLE) |
3092 (1u << CTDT_LAYER));
3096 if (cat->type == CCT_POSTCOMPUTE)
3097 cell->postcompute = true;
3100 cell->axes[a].cvs[i].category = cat;
3101 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3104 if (i != nest->scale_idx)
3106 if (!ds_is_empty (&name))
3107 ds_put_cstr (&name, ", ");
3108 char *value_s = data_out (value, var_get_encoding (var),
3109 var_get_print_format (var),
3110 settings_get_fmt_settings ());
3111 if (cat->type == CCT_TOTAL
3112 || cat->type == CCT_SUBTOTAL
3113 || cat->type == CCT_POSTCOMPUTE)
3114 ds_put_format (&name, "%s=total", var_get_name (var));
3116 ds_put_format (&name, "%s=%s", var_get_name (var),
3117 value_s + strspn (value_s, " "));
3123 //cell->name = ds_steal_cstr (&name);
3125 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3126 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3127 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3128 for (size_t i = 0; i < specs->n; i++)
3129 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3130 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3131 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3132 hmap_insert (&s->cells, &cell->node, hash);
3137 is_scale_missing (const struct ctables_summary_spec_set *specs,
3138 const struct ccase *c)
3140 if (!specs->is_scale)
3143 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3146 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3148 const struct variable *var = specs->listwise_vars[i];
3149 if (var_is_num_missing (var, case_num (c, var)))
3157 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3158 const struct ctables_category *cats[PIVOT_N_AXES][10],
3159 bool is_missing, bool excluded_missing,
3160 double d_weight, double e_weight)
3162 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3163 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3165 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3167 bool scale_missing = is_scale_missing (specs, c);
3168 for (size_t i = 0; i < specs->n; i++)
3169 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3170 specs->var, case_data (c, specs->var), specs->is_scale,
3171 scale_missing, is_missing, excluded_missing,
3172 d_weight, e_weight);
3173 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3174 if (!(cell->omit_domains && (1u << dt)))
3176 struct ctables_domain *d = cell->domains[dt];
3177 d->d_total += d_weight;
3178 d->e_total += e_weight;
3179 if (!excluded_missing)
3181 d->d_count += d_weight;
3182 d->e_count += e_weight;
3186 d->d_valid += d_weight;
3187 d->e_valid += e_weight;
3193 recurse_totals (struct ctables_section *s, const struct ccase *c,
3194 const struct ctables_category *cats[PIVOT_N_AXES][10],
3195 bool is_missing, bool excluded_missing,
3196 double d_weight, double e_weight,
3197 enum pivot_axis_type start_axis, size_t start_nest)
3199 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3201 const struct ctables_nest *nest = s->nests[a];
3202 for (size_t i = start_nest; i < nest->n; i++)
3204 if (i == nest->scale_idx)
3207 const struct variable *var = nest->vars[i];
3209 const struct ctables_category *total = ctables_categories_total (
3210 s->table->categories[var_get_dict_index (var)]);
3213 const struct ctables_category *save = cats[a][i];
3215 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3216 d_weight, e_weight);
3217 recurse_totals (s, c, cats, is_missing, excluded_missing,
3218 d_weight, e_weight, a, i + 1);
3227 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3228 const struct ctables_category *cats[PIVOT_N_AXES][10],
3229 bool is_missing, bool excluded_missing,
3230 double d_weight, double e_weight,
3231 enum pivot_axis_type start_axis, size_t start_nest)
3233 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3235 const struct ctables_nest *nest = s->nests[a];
3236 for (size_t i = start_nest; i < nest->n; i++)
3238 if (i == nest->scale_idx)
3241 const struct ctables_category *save = cats[a][i];
3244 cats[a][i] = save->subtotal;
3245 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3246 d_weight, e_weight);
3247 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3248 d_weight, e_weight, a, i + 1);
3257 ctables_add_occurrence (const struct variable *var,
3258 const union value *value,
3259 struct hmap *occurrences)
3261 int width = var_get_width (var);
3262 unsigned int hash = value_hash (value, width, 0);
3264 struct ctables_occurrence *o;
3265 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3267 if (value_equal (value, &o->value, width))
3270 o = xmalloc (sizeof *o);
3271 value_clone (&o->value, value, width);
3272 hmap_insert (occurrences, &o->node, hash);
3276 ctables_cell_insert (struct ctables_section *s,
3277 const struct ccase *c,
3278 double d_weight, double e_weight)
3280 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3282 /* Does at least one categorical variable have a missing value in an included
3283 or excluded category? */
3284 bool is_missing = false;
3286 /* Does at least one categorical variable have a missing value in an excluded
3288 bool excluded_missing = false;
3290 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3292 const struct ctables_nest *nest = s->nests[a];
3293 for (size_t i = 0; i < nest->n; i++)
3295 if (i == nest->scale_idx)
3298 const struct variable *var = nest->vars[i];
3299 const union value *value = case_data (c, var);
3301 bool var_missing = var_is_value_missing (var, value) != 0;
3305 cats[a][i] = ctables_categories_match (
3306 s->table->categories[var_get_dict_index (var)], value, var);
3312 static const struct ctables_category cct_excluded_missing = {
3313 .type = CCT_EXCLUDED_MISSING,
3316 cats[a][i] = &cct_excluded_missing;
3317 excluded_missing = true;
3322 if (!excluded_missing)
3323 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3325 const struct ctables_nest *nest = s->nests[a];
3326 for (size_t i = 0; i < nest->n; i++)
3327 if (i != nest->scale_idx)
3329 const struct variable *var = nest->vars[i];
3330 const union value *value = case_data (c, var);
3331 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3335 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3336 d_weight, e_weight);
3338 //if (!excluded_missing)
3340 recurse_totals (s, c, cats, is_missing, excluded_missing,
3341 d_weight, e_weight, 0, 0);
3342 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3343 d_weight, e_weight, 0, 0);
3349 const struct ctables_summary_spec_set *set;
3354 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3356 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3357 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3358 if (as->function != bs->function)
3359 return as->function > bs->function ? 1 : -1;
3360 else if (as->percentile != bs->percentile)
3361 return as->percentile < bs->percentile ? 1 : -1;
3362 return strcmp (as->label, bs->label);
3365 static struct pivot_value *
3366 ctables_category_create_label__ (const struct ctables_category *cat,
3367 const struct variable *var,
3368 const union value *value)
3370 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3371 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3372 : pivot_value_new_var_value (var, value));
3375 static struct pivot_value *
3376 ctables_postcompute_label (const struct ctables_categories *cats,
3377 const struct ctables_category *cat,
3378 const struct variable *var,
3379 const union value *value)
3381 struct substring in = ss_cstr (cat->pc->label);
3382 struct substring target = ss_cstr (")LABEL[");
3384 struct string out = DS_EMPTY_INITIALIZER;
3387 size_t chunk = ss_find_substring (in, target);
3388 if (chunk == SIZE_MAX)
3390 if (ds_is_empty (&out))
3391 return pivot_value_new_user_text (in.string, in.length);
3394 ds_put_substring (&out, in);
3395 return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
3399 ds_put_substring (&out, ss_head (in, chunk));
3400 ss_advance (&in, chunk + target.length);
3402 struct substring idx_s;
3403 if (!ss_get_until (&in, ']', &idx_s))
3406 long int idx = strtol (idx_s.string, &tail, 10);
3407 if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
3410 struct ctables_category *cat2 = &cats->cats[idx - 1];
3411 struct pivot_value *label2
3412 = ctables_category_create_label__ (cat2, var, value);
3413 char *label2_s = pivot_value_to_string_defaults (label2);
3414 ds_put_cstr (&out, label2_s);
3416 pivot_value_destroy (label2);
3421 return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
3424 static struct pivot_value *
3425 ctables_category_create_label (const struct ctables_categories *cats,
3426 const struct ctables_category *cat,
3427 const struct variable *var,
3428 const union value *value)
3430 return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
3431 ? ctables_postcompute_label (cats, cat, var, value)
3432 : ctables_category_create_label__ (cat, var, value));
3435 static struct ctables_value *
3436 ctables_value_find__ (struct ctables_table *t, const union value *value,
3437 int width, unsigned int hash)
3439 struct ctables_value *clv;
3440 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3441 hash, &t->clabels_values_map)
3442 if (value_equal (value, &clv->value, width))
3448 ctables_value_insert (struct ctables_table *t, const union value *value,
3451 unsigned int hash = value_hash (value, width, 0);
3452 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3455 clv = xmalloc (sizeof *clv);
3456 value_clone (&clv->value, value, width);
3457 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3461 static struct ctables_value *
3462 ctables_value_find (struct ctables_table *t,
3463 const union value *value, int width)
3465 return ctables_value_find__ (t, value, width,
3466 value_hash (value, width, 0));
3470 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3471 size_t ix[PIVOT_N_AXES])
3473 if (a < PIVOT_N_AXES)
3475 size_t limit = MAX (t->stacks[a].n, 1);
3476 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3477 ctables_table_add_section (t, a + 1, ix);
3481 struct ctables_section *s = &t->sections[t->n_sections++];
3482 *s = (struct ctables_section) {
3484 .cells = HMAP_INITIALIZER (s->cells),
3486 for (a = 0; a < PIVOT_N_AXES; a++)
3489 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3491 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3492 for (size_t i = 0; i < nest->n; i++)
3493 hmap_init (&s->occurrences[a][i]);
3495 for (size_t i = 0; i < N_CTDTS; i++)
3496 hmap_init (&s->domains[i]);
3501 ctpo_add (double a, double b)
3507 ctpo_sub (double a, double b)
3513 ctpo_mul (double a, double b)
3519 ctpo_div (double a, double b)
3521 return b ? a / b : SYSMIS;
3525 ctpo_pow (double a, double b)
3527 int save_errno = errno;
3529 double result = pow (a, b);
3537 ctpo_neg (double a, double b UNUSED)
3542 struct ctables_pcexpr_evaluate_ctx
3544 const struct ctables_cell *cell;
3545 const struct ctables_section *section;
3546 const struct ctables_categories *cats;
3547 enum pivot_axis_type pc_a;
3551 static double ctables_pcexpr_evaluate (
3552 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3555 ctables_pcexpr_evaluate_nonterminal (
3556 const struct ctables_pcexpr_evaluate_ctx *ctx,
3557 const struct ctables_pcexpr *e, size_t n_args,
3558 double evaluate (double, double))
3560 double args[2] = { 0, 0 };
3561 for (size_t i = 0; i < n_args; i++)
3563 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3564 if (!isfinite (args[i]) || args[i] == SYSMIS)
3567 return evaluate (args[0], args[1]);
3571 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3572 const struct ctables_cell_value *pc_cv)
3574 const struct ctables_section *s = ctx->section;
3577 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3579 const struct ctables_nest *nest = s->nests[a];
3580 for (size_t i = 0; i < nest->n; i++)
3581 if (i != nest->scale_idx)
3583 const struct ctables_cell_value *cv
3584 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3585 : &ctx->cell->axes[a].cvs[i]);
3586 hash = hash_pointer (cv->category, hash);
3587 if (cv->category->type != CCT_TOTAL
3588 && cv->category->type != CCT_SUBTOTAL
3589 && cv->category->type != CCT_POSTCOMPUTE)
3590 hash = value_hash (&cv->value,
3591 var_get_width (nest->vars[i]), hash);
3595 struct ctables_cell *tc;
3596 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3598 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3600 const struct ctables_nest *nest = s->nests[a];
3601 for (size_t i = 0; i < nest->n; i++)
3602 if (i != nest->scale_idx)
3604 const struct ctables_cell_value *p_cv
3605 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3606 : &ctx->cell->axes[a].cvs[i]);
3607 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3608 if (p_cv->category != t_cv->category
3609 || (p_cv->category->type != CCT_TOTAL
3610 && p_cv->category->type != CCT_SUBTOTAL
3611 && p_cv->category->type != CCT_POSTCOMPUTE
3612 && !value_equal (&p_cv->value,
3614 var_get_width (nest->vars[i]))))
3626 const struct ctables_table *t = s->table;
3627 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3628 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3629 size_t j = 0 /* XXX */;
3630 return ctables_summary_value (tc, &tc->summaries[j], &specs->specs[j]);
3634 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3635 const struct ctables_pcexpr *e)
3642 case CTPO_CAT_RANGE:
3644 struct ctables_cell_value cv = {
3645 .category = ctables_find_category_for_postcompute (ctx->cats, e)
3647 assert (cv.category != NULL);
3649 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3650 const struct ctables_occurrence *o;
3653 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3654 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3655 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3657 cv.value = o->value;
3658 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3663 case CTPO_CAT_NUMBER:
3664 case CTPO_CAT_STRING:
3665 case CTPO_CAT_MISSING:
3666 case CTPO_CAT_OTHERNM:
3667 case CTPO_CAT_SUBTOTAL:
3668 case CTPO_CAT_TOTAL:
3670 struct ctables_cell_value cv = {
3671 .category = ctables_find_category_for_postcompute (ctx->cats, e),
3672 .value = { .f = e->number },
3674 assert (cv.category != NULL);
3675 return ctables_pcexpr_evaluate_category (ctx, &cv);
3679 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3682 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3685 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3688 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3691 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3694 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3701 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3702 const struct ctables_cell *cell)
3704 enum pivot_axis_type pc_a;
3706 const struct ctables_postcompute *pc;
3707 for (pc_a = 0; ; pc_a++)
3709 assert (pc_a < PIVOT_N_AXES);
3710 for (pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3712 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3713 if (cv->category->type == CCT_POSTCOMPUTE)
3715 pc = cv->category->pc;
3722 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3723 const struct ctables_categories *cats = s->table->categories[
3724 var_get_dict_index (var)];
3725 struct ctables_pcexpr_evaluate_ctx ctx = {
3730 .pc_a_idx = pc_a_idx,
3732 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3736 ctables_table_output (struct ctables *ct, struct ctables_table *t)
3738 struct pivot_table *pt = pivot_table_create__ (
3740 ? pivot_value_new_user_text (t->title, SIZE_MAX)
3741 : pivot_value_new_text (N_("Custom Tables"))),
3744 pivot_table_set_caption (
3745 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
3747 pivot_table_set_corner_text (
3748 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
3750 bool summary_dimension = (t->summary_axis != t->slabels_axis
3751 || (!t->slabels_visible
3752 && t->summary_specs.n > 1));
3753 if (summary_dimension)
3755 struct pivot_dimension *d = pivot_dimension_create (
3756 pt, t->slabels_axis, N_("Statistics"));
3757 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3758 if (!t->slabels_visible)
3759 d->hide_all_labels = true;
3760 for (size_t i = 0; i < specs->n; i++)
3761 pivot_category_create_leaf (
3762 d->root, pivot_value_new_text (specs->specs[i].label));
3765 bool categories_dimension = t->clabels_example != NULL;
3766 if (categories_dimension)
3768 struct pivot_dimension *d = pivot_dimension_create (
3769 pt, t->label_axis[t->clabels_from_axis],
3770 t->clabels_from_axis == PIVOT_AXIS_ROW
3771 ? N_("Row Categories")
3772 : N_("Column Categories"));
3773 const struct variable *var = t->clabels_example;
3774 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
3775 for (size_t i = 0; i < t->n_clabels_values; i++)
3777 const struct ctables_value *value = t->clabels_values[i];
3778 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
3779 assert (cat != NULL);
3780 pivot_category_create_leaf (d->root, ctables_category_create_label (
3781 c, cat, t->clabels_example,
3786 pivot_table_set_look (pt, ct->look);
3787 struct pivot_dimension *d[PIVOT_N_AXES];
3788 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3790 static const char *names[] = {
3791 [PIVOT_AXIS_ROW] = N_("Rows"),
3792 [PIVOT_AXIS_COLUMN] = N_("Columns"),
3793 [PIVOT_AXIS_LAYER] = N_("Layers"),
3795 d[a] = (t->axes[a] || a == t->summary_axis
3796 ? pivot_dimension_create (pt, a, names[a])
3801 assert (t->axes[a]);
3803 for (size_t i = 0; i < t->stacks[a].n; i++)
3805 struct ctables_nest *nest = &t->stacks[a].nests[i];
3806 struct ctables_section **sections = xnmalloc (t->n_sections,
3808 size_t n_sections = 0;
3810 size_t n_total_cells = 0;
3811 size_t max_depth = 0;
3812 for (size_t j = 0; j < t->n_sections; j++)
3813 if (t->sections[j].nests[a] == nest)
3815 struct ctables_section *s = &t->sections[j];
3816 sections[n_sections++] = s;
3817 n_total_cells += s->cells.count;
3819 size_t depth = s->nests[a]->n;
3820 max_depth = MAX (depth, max_depth);
3823 struct ctables_cell **sorted = xnmalloc (n_total_cells,
3825 size_t n_sorted = 0;
3827 for (size_t j = 0; j < n_sections; j++)
3829 struct ctables_section *s = sections[j];
3831 struct ctables_cell *cell;
3832 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3834 sorted[n_sorted++] = cell;
3835 assert (n_sorted <= n_total_cells);
3838 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
3839 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
3842 for (size_t j = 0; j < n_sorted; j++)
3844 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
3849 struct ctables_level
3851 enum ctables_level_type
3853 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
3854 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
3855 CTL_SUMMARY, /* Summary functions. */
3859 enum settings_value_show vlabel; /* CTL_VAR only. */
3862 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
3863 size_t n_levels = 0;
3864 for (size_t k = 0; k < nest->n; k++)
3866 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
3867 if (vlabel != CTVL_NONE)
3869 levels[n_levels++] = (struct ctables_level) {
3871 .vlabel = (enum settings_value_show) vlabel,
3876 if (nest->scale_idx != k
3877 && (k != nest->n - 1 || t->label_axis[a] == a))
3879 levels[n_levels++] = (struct ctables_level) {
3880 .type = CTL_CATEGORY,
3886 if (!summary_dimension && a == t->slabels_axis)
3888 levels[n_levels++] = (struct ctables_level) {
3889 .type = CTL_SUMMARY,
3890 .var_idx = SIZE_MAX,
3894 /* Pivot categories:
3896 - variable label for nest->vars[0], if vlabel != CTVL_NONE
3897 - category for nest->vars[0], if nest->scale_idx != 0
3898 - variable label for nest->vars[1], if vlabel != CTVL_NONE
3899 - category for nest->vars[1], if nest->scale_idx != 1
3901 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
3902 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
3903 - summary function, if 'a == t->slabels_axis && a ==
3906 Additional dimensions:
3908 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
3910 - If 't->label_axis[b] == a' for some 'b != a', add a category
3915 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
3917 for (size_t j = 0; j < n_sorted; j++)
3919 struct ctables_cell *cell = sorted[j];
3920 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
3922 size_t n_common = 0;
3925 for (; n_common < n_levels; n_common++)
3927 const struct ctables_level *level = &levels[n_common];
3928 if (level->type == CTL_CATEGORY)
3930 size_t var_idx = level->var_idx;
3931 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
3932 if (prev->axes[a].cvs[var_idx].category != c)
3934 else if (c->type != CCT_SUBTOTAL
3935 && c->type != CCT_TOTAL
3936 && c->type != CCT_POSTCOMPUTE
3937 && !value_equal (&prev->axes[a].cvs[var_idx].value,
3938 &cell->axes[a].cvs[var_idx].value,
3939 var_get_type (nest->vars[var_idx])))
3945 for (size_t k = n_common; k < n_levels; k++)
3947 const struct ctables_level *level = &levels[k];
3948 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
3949 if (level->type == CTL_SUMMARY)
3951 assert (k == n_levels - 1);
3953 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3954 for (size_t m = 0; m < specs->n; m++)
3956 int leaf = pivot_category_create_leaf (
3957 parent, pivot_value_new_text (specs->specs[m].label));
3964 const struct variable *var = nest->vars[level->var_idx];
3965 struct pivot_value *label;
3966 if (level->type == CTL_VAR)
3968 label = pivot_value_new_variable (var);
3969 label->variable.show = level->vlabel;
3971 else if (level->type == CTL_CATEGORY)
3973 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
3974 label = ctables_category_create_label (
3975 t->categories[var_get_dict_index (var)],
3976 cv->category, var, &cv->value);
3981 if (k == n_levels - 1)
3982 prev_leaf = pivot_category_create_leaf (parent, label);
3984 groups[k] = pivot_category_create_group__ (parent, label);
3988 cell->axes[a].leaf = prev_leaf;
3995 for (size_t i = 0; i < t->n_sections; i++)
3997 struct ctables_section *s = &t->sections[i];
3999 struct ctables_cell *cell;
4000 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
4005 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
4006 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
4007 for (size_t j = 0; j < specs->n; j++)
4010 size_t n_dindexes = 0;
4012 if (summary_dimension)
4013 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
4015 if (categories_dimension)
4017 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
4018 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
4019 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
4020 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
4023 dindexes[n_dindexes++] = ctv->leaf;
4026 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4029 int leaf = cell->axes[a].leaf;
4030 if (a == t->summary_axis && !summary_dimension)
4032 dindexes[n_dindexes++] = leaf;
4035 const struct ctables_summary_spec *ss = &specs->specs[j];
4037 double d = (cell->postcompute
4038 ? ctables_cell_calculate_postcompute (s, cell)
4039 : ctables_summary_value (cell, &cell->summaries[j], ss));
4040 struct pivot_value *value;
4041 if (ct->hide_threshold != 0
4042 && d < ct->hide_threshold
4043 && (cell->postcompute
4045 : ctables_summary_function_is_count (ss->function)))
4047 value = pivot_value_new_user_text_nocopy (
4048 xasprintf ("<%d", ct->hide_threshold));
4050 else if (d == 0 && ct->zero)
4051 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
4052 else if (d == SYSMIS && ct->missing)
4053 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
4054 else if (specs->specs[j].is_ctables_format)
4056 char *s = data_out_stretchy (&(union value) { .f = d },
4058 &specs->specs[j].format,
4059 &ct->ctables_formats, NULL);
4060 value = pivot_value_new_user_text_nocopy (s);
4064 value = pivot_value_new_number (d);
4065 value->numeric.format = specs->specs[j].format;
4067 pivot_table_put (pt, dindexes, n_dindexes, value);
4072 pivot_table_submit (pt);
4076 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4078 enum pivot_axis_type label_pos = t->label_axis[a];
4082 t->clabels_from_axis = a;
4084 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4085 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4087 const struct ctables_stack *stack = &t->stacks[a];
4091 const struct ctables_nest *n0 = &stack->nests[0];
4093 const struct variable *v0 = n0->vars[n0->n - 1];
4094 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4095 t->clabels_example = v0;
4097 for (size_t i = 0; i < c0->n_cats; i++)
4098 if (c0->cats[i].type == CCT_FUNCTION)
4100 msg (SE, _("%s=%s is not allowed with sorting based "
4101 "on a summary function."),
4102 subcommand_name, pos_name);
4105 if (n0->n - 1 == n0->scale_idx)
4107 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4108 "but %s is a scale variable."),
4109 subcommand_name, pos_name, var_get_name (v0));
4113 for (size_t i = 1; i < stack->n; i++)
4115 const struct ctables_nest *ni = &stack->nests[i];
4117 const struct variable *vi = ni->vars[ni->n - 1];
4118 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4120 if (ni->n - 1 == ni->scale_idx)
4122 msg (SE, _("%s=%s requires the variables to be moved to be "
4123 "categorical, but %s is a scale variable."),
4124 subcommand_name, pos_name, var_get_name (vi));
4127 if (var_get_width (v0) != var_get_width (vi))
4129 msg (SE, _("%s=%s requires the variables to be "
4130 "moved to have the same width, but %s has "
4131 "width %d and %s has width %d."),
4132 subcommand_name, pos_name,
4133 var_get_name (v0), var_get_width (v0),
4134 var_get_name (vi), var_get_width (vi));
4137 if (!val_labs_equal (var_get_value_labels (v0),
4138 var_get_value_labels (vi)))
4140 msg (SE, _("%s=%s requires the variables to be "
4141 "moved to have the same value labels, but %s "
4142 "and %s have different value labels."),
4143 subcommand_name, pos_name,
4144 var_get_name (v0), var_get_name (vi));
4147 if (!ctables_categories_equal (c0, ci))
4149 msg (SE, _("%s=%s requires the variables to be "
4150 "moved to have the same category "
4151 "specifications, but %s and %s have different "
4152 "category specifications."),
4153 subcommand_name, pos_name,
4154 var_get_name (v0), var_get_name (vi));
4163 ctables_prepare_table (struct ctables_table *t)
4165 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4168 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4170 for (size_t j = 0; j < t->stacks[a].n; j++)
4172 struct ctables_nest *nest = &t->stacks[a].nests[j];
4173 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4175 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4176 nest->n_domains[dt] = 0;
4178 for (size_t k = 0; k < nest->n; k++)
4180 if (k == nest->scale_idx)
4189 if (a != PIVOT_AXIS_LAYER)
4196 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4197 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4198 : a == PIVOT_AXIS_ROW)
4200 if (k == nest->n - 1
4201 || (nest->scale_idx == nest->n - 1
4202 && k == nest->n - 2))
4208 if (a == PIVOT_AXIS_COLUMN)
4213 if (a == PIVOT_AXIS_ROW)
4218 nest->domains[dt][nest->n_domains[dt]++] = k;
4225 struct ctables_nest *nest = xmalloc (sizeof *nest);
4226 *nest = (struct ctables_nest) { .n = 0 };
4227 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4230 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4231 for (size_t i = 0; i < stack->n; i++)
4233 struct ctables_nest *nest = &stack->nests[i];
4234 if (!nest->specs[CSV_CELL].n)
4236 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4237 specs->specs = xmalloc (sizeof *specs->specs);
4240 enum ctables_summary_function function
4241 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4243 *specs->specs = (struct ctables_summary_spec) {
4244 .function = function,
4245 .format = ctables_summary_default_format (function, specs->var),
4246 .label = ctables_summary_default_label (function, 0),
4249 specs->var = nest->vars[0];
4251 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4252 &nest->specs[CSV_CELL]);
4254 else if (!nest->specs[CSV_TOTAL].n)
4255 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4256 &nest->specs[CSV_CELL]);
4258 if (t->ctables->smissing_listwise)
4260 struct variable **listwise_vars = NULL;
4262 size_t allocated = 0;
4264 for (size_t j = nest->group_head; j < stack->n; j++)
4266 const struct ctables_nest *other_nest = &stack->nests[j];
4267 if (other_nest->group_head != nest->group_head)
4270 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4273 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4274 sizeof *listwise_vars);
4275 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4278 for (size_t j = 0; j < N_CSVS; j++)
4280 nest->specs[j].listwise_vars = listwise_vars;
4281 nest->specs[j].n_listwise_vars = n;
4286 struct ctables_summary_spec_set *merged = &t->summary_specs;
4287 struct merge_item *items = xnmalloc (2 * stack->n, sizeof *items);
4289 for (size_t j = 0; j < stack->n; j++)
4291 const struct ctables_nest *nest = &stack->nests[j];
4293 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4294 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4299 struct merge_item min = items[0];
4300 for (size_t j = 1; j < n_left; j++)
4301 if (merge_item_compare_3way (&items[j], &min) < 0)
4304 if (merged->n >= merged->allocated)
4305 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4306 sizeof *merged->specs);
4307 merged->specs[merged->n++] = min.set->specs[min.ofs];
4309 for (size_t j = 0; j < n_left; )
4311 if (merge_item_compare_3way (&items[j], &min) == 0)
4313 struct merge_item *item = &items[j];
4314 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4315 if (++item->ofs >= item->set->n)
4317 items[j] = items[--n_left];
4326 for (size_t j = 0; j < merged->n; j++)
4327 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4329 for (size_t j = 0; j < stack->n; j++)
4331 const struct ctables_nest *nest = &stack->nests[j];
4332 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4334 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4335 for (size_t k = 0; k < specs->n; k++)
4336 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4337 specs->specs[k].axis_idx);
4343 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4344 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4348 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4349 enum pivot_axis_type a)
4351 struct ctables_stack *stack = &t->stacks[a];
4352 for (size_t i = 0; i < stack->n; i++)
4354 const struct ctables_nest *nest = &stack->nests[i];
4355 const struct variable *var = nest->vars[nest->n - 1];
4356 const union value *value = case_data (c, var);
4358 if (var_is_numeric (var) && value->f == SYSMIS)
4361 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4363 ctables_value_insert (t, value, var_get_width (var));
4368 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4370 const struct ctables_value *const *ap = a_;
4371 const struct ctables_value *const *bp = b_;
4372 const struct ctables_value *a = *ap;
4373 const struct ctables_value *b = *bp;
4374 const int *width = width_;
4375 return value_compare_3way (&a->value, &b->value, *width);
4379 ctables_sort_clabels_values (struct ctables_table *t)
4381 const struct variable *v0 = t->clabels_example;
4382 int width = var_get_width (v0);
4384 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4387 const struct val_labs *val_labs = var_get_value_labels (v0);
4388 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4389 vl = val_labs_next (val_labs, vl))
4390 if (ctables_categories_match (c0, &vl->value, v0))
4391 ctables_value_insert (t, &vl->value, width);
4394 size_t n = hmap_count (&t->clabels_values_map);
4395 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4397 struct ctables_value *clv;
4399 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4400 t->clabels_values[i++] = clv;
4401 t->n_clabels_values = n;
4404 sort (t->clabels_values, n, sizeof *t->clabels_values,
4405 compare_clabels_values_3way, &width);
4407 for (size_t i = 0; i < n; i++)
4408 t->clabels_values[i]->leaf = i;
4412 ctables_add_category_occurrences (const struct variable *var,
4413 struct hmap *occurrences,
4414 const struct ctables_categories *cats)
4416 const struct val_labs *val_labs = var_get_value_labels (var);
4418 for (size_t i = 0; i < cats->n_cats; i++)
4420 const struct ctables_category *c = &cats->cats[i];
4424 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4430 int width = var_get_width (var);
4432 value_init (&value, width);
4433 value_copy_buf_rpad (&value, width,
4434 CHAR_CAST (uint8_t *, c->string.string),
4435 c->string.length, ' ');
4436 ctables_add_occurrence (var, &value, occurrences);
4437 value_destroy (&value, width);
4442 assert (var_is_numeric (var));
4443 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4444 vl = val_labs_next (val_labs, vl))
4445 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
4446 ctables_add_occurrence (var, &vl->value, occurrences);
4450 assert (var_is_alpha (var));
4451 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4452 vl = val_labs_next (val_labs, vl))
4453 if (in_string_range (&vl->value, var, c->srange))
4454 ctables_add_occurrence (var, &vl->value, occurrences);
4458 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4459 vl = val_labs_next (val_labs, vl))
4460 if (var_is_value_missing (var, &vl->value))
4461 ctables_add_occurrence (var, &vl->value, occurrences);
4465 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4466 vl = val_labs_next (val_labs, vl))
4467 ctables_add_occurrence (var, &vl->value, occurrences);
4470 case CCT_POSTCOMPUTE:
4480 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4481 vl = val_labs_next (val_labs, vl))
4482 if (c->include_missing || !var_is_value_missing (var, &vl->value))
4483 ctables_add_occurrence (var, &vl->value, occurrences);
4486 case CCT_EXCLUDED_MISSING:
4493 ctables_section_recurse_add_empty_categories (
4494 struct ctables_section *s,
4495 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
4496 enum pivot_axis_type a, size_t a_idx)
4498 if (a >= PIVOT_N_AXES)
4499 ctables_cell_insert__ (s, c, cats);
4500 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
4501 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
4504 const struct variable *var = s->nests[a]->vars[a_idx];
4505 const struct ctables_categories *categories = s->table->categories[
4506 var_get_dict_index (var)];
4507 int width = var_get_width (var);
4508 const struct hmap *occurrences = &s->occurrences[a][a_idx];
4509 const struct ctables_occurrence *o;
4510 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4512 union value *value = case_data_rw (c, var);
4513 value_destroy (value, width);
4514 value_clone (value, &o->value, width);
4515 cats[a][a_idx] = ctables_categories_match (categories, value, var);
4516 assert (cats[a][a_idx] != NULL);
4517 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4520 for (size_t i = 0; i < categories->n_cats; i++)
4522 const struct ctables_category *cat = &categories->cats[i];
4523 if (cat->type == CCT_POSTCOMPUTE)
4525 cats[a][a_idx] = cat;
4526 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4533 ctables_section_add_empty_categories (struct ctables_section *s)
4535 bool show_empty = false;
4536 for (size_t a = 0; a < PIVOT_N_AXES; a++)
4538 for (size_t k = 0; k < s->nests[a]->n; k++)
4539 if (k != s->nests[a]->scale_idx)
4541 const struct variable *var = s->nests[a]->vars[k];
4542 const struct ctables_categories *cats = s->table->categories[
4543 var_get_dict_index (var)];
4544 if (cats->show_empty)
4547 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
4553 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
4554 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
4555 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
4560 ctables_execute (struct dataset *ds, struct ctables *ct)
4562 for (size_t i = 0; i < ct->n_tables; i++)
4564 struct ctables_table *t = ct->tables[i];
4565 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
4566 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
4567 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
4568 sizeof *t->sections);
4569 size_t ix[PIVOT_N_AXES];
4570 ctables_table_add_section (t, 0, ix);
4573 struct casereader *input = proc_open (ds);
4574 bool warn_on_invalid = true;
4575 for (struct ccase *c = casereader_read (input); c;
4576 case_unref (c), c = casereader_read (input))
4578 double d_weight = dict_get_case_weight (dataset_dict (ds), c,
4580 double e_weight = (ct->e_weight
4581 ? var_force_valid_weight (ct->e_weight,
4582 case_num (c, ct->e_weight),
4586 for (size_t i = 0; i < ct->n_tables; i++)
4588 struct ctables_table *t = ct->tables[i];
4590 for (size_t j = 0; j < t->n_sections; j++)
4591 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
4593 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4594 if (t->label_axis[a] != a)
4595 ctables_insert_clabels_values (t, c, a);
4598 casereader_destroy (input);
4600 for (size_t i = 0; i < ct->n_tables; i++)
4602 struct ctables_table *t = ct->tables[i];
4604 if (t->clabels_example)
4605 ctables_sort_clabels_values (t);
4607 for (size_t j = 0; j < t->n_sections; j++)
4608 ctables_section_add_empty_categories (&t->sections[j]);
4610 ctables_table_output (ct, ct->tables[i]);
4612 return proc_commit (ds);
4617 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
4618 struct dictionary *);
4621 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
4627 case CTPO_CAT_STRING:
4628 ss_dealloc (&e->string);
4637 for (size_t i = 0; i < 2; i++)
4638 ctables_pcexpr_destroy (e->subs[i]);
4642 case CTPO_CAT_NUMBER:
4643 case CTPO_CAT_RANGE:
4644 case CTPO_CAT_MISSING:
4645 case CTPO_CAT_OTHERNM:
4646 case CTPO_CAT_SUBTOTAL:
4647 case CTPO_CAT_TOTAL:
4651 msg_location_destroy (e->location);
4656 static struct ctables_pcexpr *
4657 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
4658 struct ctables_pcexpr *sub0,
4659 struct ctables_pcexpr *sub1)
4661 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4662 *e = (struct ctables_pcexpr) {
4664 .subs = { sub0, sub1 },
4665 .location = msg_location_merged (sub0->location, sub1->location),
4670 /* How to parse an operator. */
4673 enum token_type token;
4674 enum ctables_postcompute_op op;
4677 static const struct operator *
4678 match_operator (struct lexer *lexer, const struct operator ops[], size_t n_ops)
4680 for (const struct operator *op = ops; op < ops + n_ops; op++)
4681 if (lex_token (lexer) == op->token)
4683 if (op->token != T_NEG_NUM)
4692 static struct ctables_pcexpr *
4693 parse_binary_operators__ (struct lexer *lexer, struct dictionary *dict,
4694 const struct operator ops[], size_t n_ops,
4695 parse_recursively_func *parse_next_level,
4696 const char *chain_warning,
4697 struct ctables_pcexpr *lhs)
4699 for (int op_count = 0; ; op_count++)
4701 const struct operator *op = match_operator (lexer, ops, n_ops);
4704 if (op_count > 1 && chain_warning)
4705 msg_at (SW, lhs->location, "%s", chain_warning);
4710 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
4713 ctables_pcexpr_destroy (lhs);
4717 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
4721 static struct ctables_pcexpr *
4722 parse_binary_operators (struct lexer *lexer, struct dictionary *dict,
4723 const struct operator ops[], size_t n_ops,
4724 parse_recursively_func *parse_next_level,
4725 const char *chain_warning)
4727 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
4731 return parse_binary_operators__ (lexer, dict, ops, n_ops, parse_next_level,
4732 chain_warning, lhs);
4735 static struct ctables_pcexpr *parse_add (struct lexer *, struct dictionary *);
4737 static struct ctables_pcexpr
4738 ctpo_cat_range (double low, double high)
4740 return (struct ctables_pcexpr) {
4741 .op = CTPO_CAT_RANGE,
4742 .range = { low, high },
4746 static struct ctables_pcexpr *
4747 parse_primary (struct lexer *lexer, struct dictionary *dict)
4749 int start_ofs = lex_ofs (lexer);
4750 struct ctables_pcexpr e;
4751 if (lex_is_number (lexer))
4753 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
4754 .number = lex_number (lexer) };
4757 else if (lex_match_id (lexer, "MISSING"))
4758 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
4759 else if (lex_match_id (lexer, "OTHERNM"))
4760 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
4761 else if (lex_match_id (lexer, "TOTAL"))
4762 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
4763 else if (lex_match_id (lexer, "SUBTOTAL"))
4765 size_t subtotal_index = 0;
4766 if (lex_match (lexer, T_LBRACK))
4768 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
4770 subtotal_index = lex_integer (lexer);
4772 if (!lex_force_match (lexer, T_RBRACK))
4775 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
4776 .subtotal_index = subtotal_index };
4778 else if (lex_match (lexer, T_LBRACK))
4780 if (lex_match_id (lexer, "LO"))
4782 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
4784 e = ctpo_cat_range (-DBL_MAX, lex_number (lexer));
4787 else if (lex_is_number (lexer))
4789 double number = lex_number (lexer);
4791 if (lex_match_id (lexer, "THRU"))
4793 if (lex_match_id (lexer, "HI"))
4794 e = ctpo_cat_range (number, DBL_MAX);
4797 if (!lex_force_num (lexer))
4799 e = ctpo_cat_range (number, lex_number (lexer));
4804 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
4807 else if (lex_is_string (lexer))
4809 struct substring s = recode_substring_pool (
4810 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
4811 ss_rtrim (&s, ss_cstr (" "));
4813 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
4818 lex_error (lexer, NULL);
4822 if (!lex_force_match (lexer, T_RBRACK))
4824 if (e.op == CTPO_CAT_STRING)
4825 ss_dealloc (&e.string);
4829 else if (lex_match (lexer, T_LPAREN))
4831 struct ctables_pcexpr *ep = parse_add (lexer, dict);
4834 if (!lex_force_match (lexer, T_RPAREN))
4836 ctables_pcexpr_destroy (ep);
4843 lex_error (lexer, NULL);
4847 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4848 return xmemdup (&e, sizeof e);
4851 static struct ctables_pcexpr *
4852 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
4853 struct lexer *lexer, int start_ofs)
4855 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4856 *e = (struct ctables_pcexpr) {
4859 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
4864 static struct ctables_pcexpr *
4865 parse_exp (struct lexer *lexer, struct dictionary *dict)
4867 static const struct operator op = { T_EXP, CTPO_POW };
4869 const char *chain_warning =
4870 _("The exponentiation operator (`**') is left-associative: "
4871 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
4872 "To disable this warning, insert parentheses.");
4874 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
4875 return parse_binary_operators (lexer, dict, &op, 1,
4876 parse_primary, chain_warning);
4878 /* Special case for situations like "-5**6", which must be parsed as
4881 int start_ofs = lex_ofs (lexer);
4882 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
4883 *lhs = (struct ctables_pcexpr) {
4884 .op = CTPO_CONSTANT,
4885 .number = -lex_tokval (lexer),
4886 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
4890 struct ctables_pcexpr *node = parse_binary_operators__ (
4891 lexer, dict, &op, 1, parse_primary, chain_warning, lhs);
4895 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
4898 /* Parses the unary minus level. */
4899 static struct ctables_pcexpr *
4900 parse_neg (struct lexer *lexer, struct dictionary *dict)
4902 int start_ofs = lex_ofs (lexer);
4903 if (!lex_match (lexer, T_DASH))
4904 return parse_exp (lexer, dict);
4906 struct ctables_pcexpr *inner = parse_neg (lexer, dict);
4910 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
4913 /* Parses the multiplication and division level. */
4914 static struct ctables_pcexpr *
4915 parse_mul (struct lexer *lexer, struct dictionary *dict)
4917 static const struct operator ops[] =
4919 { T_ASTERISK, CTPO_MUL },
4920 { T_SLASH, CTPO_DIV },
4923 return parse_binary_operators (lexer, dict, ops, sizeof ops / sizeof *ops,
4927 /* Parses the addition and subtraction level. */
4928 static struct ctables_pcexpr *
4929 parse_add (struct lexer *lexer, struct dictionary *dict)
4931 static const struct operator ops[] =
4933 { T_PLUS, CTPO_ADD },
4934 { T_DASH, CTPO_SUB },
4935 { T_NEG_NUM, CTPO_ADD },
4938 return parse_binary_operators (lexer, dict, ops, sizeof ops / sizeof *ops,
4942 static struct ctables_postcompute *
4943 ctables_find_postcompute (struct ctables *ct, const char *name)
4945 struct ctables_postcompute *pc;
4946 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
4947 utf8_hash_case_string (name, 0), &ct->postcomputes)
4948 if (!utf8_strcasecmp (pc->name, name))
4954 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
4957 int pcompute_start = lex_ofs (lexer) - 1;
4959 if (!lex_force_match (lexer, T_AND) || !lex_force_id (lexer))
4962 char *name = ss_xstrdup (lex_tokss (lexer));
4965 if (!lex_force_match (lexer, T_EQUALS)
4966 || !lex_force_match_id (lexer, "EXPR")
4967 || !lex_force_match (lexer, T_LPAREN))
4973 int expr_start = lex_ofs (lexer);
4974 struct ctables_pcexpr *expr = parse_add (lexer, dict);
4975 int expr_end = lex_ofs (lexer) - 1;
4976 if (!expr || !lex_force_match (lexer, T_RPAREN))
4981 int pcompute_end = lex_ofs (lexer) - 1;
4983 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
4986 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
4989 msg_at (SW, location, _("New definition of &%s will override the "
4990 "previous definition."),
4992 msg_at (SN, pc->location, _("This is the previous definition."));
4994 ctables_pcexpr_destroy (pc->expr);
4995 msg_location_destroy (pc->location);
5000 pc = xmalloc (sizeof *pc);
5001 *pc = (struct ctables_postcompute) { .name = name };
5002 hmap_insert (&ct->postcomputes, &pc->hmap_node,
5003 utf8_hash_case_string (pc->name, 0));
5006 pc->location = location;
5008 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
5013 ctables_parse_pproperties_format (struct lexer *lexer,
5014 struct ctables_summary_spec_set *sss)
5016 *sss = (struct ctables_summary_spec_set) { .n = 0 };
5018 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
5019 && !(lex_token (lexer) == T_ID
5020 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
5021 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
5022 lex_tokss (lexer)))))
5024 /* Parse function. */
5025 enum ctables_summary_function function;
5026 if (!parse_ctables_summary_function (lexer, &function))
5029 /* Parse percentile. */
5030 double percentile = 0;
5031 if (function == CTSF_PTILE)
5033 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
5035 percentile = lex_number (lexer);
5040 struct fmt_spec format;
5041 if (!parse_format_specifier (lexer, &format)
5042 || !fmt_check_output (&format)
5043 || !fmt_check_type_compat (&format, VAL_NUMERIC))
5046 if (sss->n >= sss->allocated)
5047 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
5048 sizeof *sss->specs);
5049 sss->specs[sss->n++] = (struct ctables_summary_spec) {
5050 .function = function,
5051 .percentile = percentile,
5058 ctables_summary_spec_set_uninit (sss);
5063 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5065 struct ctables_postcompute **pcs = NULL;
5067 size_t allocated_pcs = 0;
5069 while (lex_match (lexer, T_AND))
5071 if (!lex_force_id (lexer))
5073 struct ctables_postcompute *pc
5074 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5077 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5082 if (n_pcs >= allocated_pcs)
5083 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5087 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5089 if (lex_match_id (lexer, "LABEL"))
5091 lex_match (lexer, T_EQUALS);
5092 if (!lex_force_string (lexer))
5095 for (size_t i = 0; i < n_pcs; i++)
5097 free (pcs[i]->label);
5098 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5103 else if (lex_match_id (lexer, "FORMAT"))
5105 lex_match (lexer, T_EQUALS);
5107 struct ctables_summary_spec_set sss;
5108 if (!ctables_parse_pproperties_format (lexer, &sss))
5111 for (size_t i = 0; i < n_pcs; i++)
5114 ctables_summary_spec_set_uninit (pcs[i]->specs);
5116 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5117 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5119 ctables_summary_spec_set_uninit (&sss);
5121 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5123 lex_match (lexer, T_EQUALS);
5124 bool hide_source_cats;
5125 if (!parse_bool (lexer, &hide_source_cats))
5127 for (size_t i = 0; i < n_pcs; i++)
5128 pcs[i]->hide_source_cats = hide_source_cats;
5132 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5145 put_strftime (struct string *out, time_t now, const char *format)
5147 const struct tm *tm = localtime (&now);
5149 strftime (value, sizeof value, format, tm);
5150 ds_put_cstr (out, value);
5154 skip_prefix (struct substring *s, struct substring prefix)
5156 if (ss_starts_with (*s, prefix))
5158 ss_advance (s, prefix.length);
5166 put_table_expression (struct string *out, struct lexer *lexer,
5167 struct dictionary *dict, int expr_start, int expr_end)
5170 for (int ofs = expr_start; ofs < expr_end; ofs++)
5172 const struct token *t = lex_ofs_token (lexer, ofs);
5173 if (t->type == T_LBRACK)
5175 else if (t->type == T_RBRACK && nest > 0)
5181 else if (t->type == T_ID)
5183 const struct variable *var
5184 = dict_lookup_var (dict, t->string.string);
5185 const char *label = var ? var_get_label (var) : NULL;
5186 ds_put_cstr (out, label ? label : t->string.string);
5190 if (ofs != expr_start && t->type != T_RPAREN && ds_last (out) != ' ')
5191 ds_put_byte (out, ' ');
5193 char *repr = lex_ofs_representation (lexer, ofs, ofs);
5194 ds_put_cstr (out, repr);
5197 if (ofs + 1 != expr_end && t->type != T_LPAREN)
5198 ds_put_byte (out, ' ');
5204 put_title_text (struct string *out, struct substring in, time_t now,
5205 struct lexer *lexer, struct dictionary *dict,
5206 int expr_start, int expr_end)
5210 size_t chunk = ss_find_byte (in, ')');
5211 ds_put_substring (out, ss_head (in, chunk));
5212 ss_advance (&in, chunk);
5213 if (ss_is_empty (in))
5216 if (skip_prefix (&in, ss_cstr (")DATE")))
5217 put_strftime (out, now, "%x");
5218 else if (skip_prefix (&in, ss_cstr (")TIME")))
5219 put_strftime (out, now, "%X");
5220 else if (skip_prefix (&in, ss_cstr (")TABLE")))
5221 put_table_expression (out, lexer, dict, expr_start, expr_end);
5224 ds_put_byte (out, ')');
5225 ss_advance (&in, 1);
5231 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5233 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5234 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5235 enum settings_value_show tvars = settings_get_show_variables ();
5236 for (size_t i = 0; i < n_vars; i++)
5237 vlabels[i] = (enum ctables_vlabel) tvars;
5239 struct pivot_table_look *look = pivot_table_look_unshare (
5240 pivot_table_look_ref (pivot_table_look_get_default ()));
5241 look->omit_empty = false;
5243 struct ctables *ct = xmalloc (sizeof *ct);
5244 *ct = (struct ctables) {
5245 .dict = dataset_dict (ds),
5247 .ctables_formats = FMT_SETTINGS_INIT,
5249 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5252 time_t now = time (NULL);
5257 const char *dot_string;
5258 const char *comma_string;
5260 static const struct ctf ctfs[4] = {
5261 { CTEF_NEGPAREN, "(,,,)", "(...)" },
5262 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
5263 { CTEF_PAREN, "-,(,),", "-.(.)." },
5264 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
5266 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
5267 for (size_t i = 0; i < 4; i++)
5269 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
5270 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
5271 fmt_number_style_from_string (s));
5274 if (!lex_force_match (lexer, T_SLASH))
5277 while (!lex_match_id (lexer, "TABLE"))
5279 if (lex_match_id (lexer, "FORMAT"))
5281 double widths[2] = { SYSMIS, SYSMIS };
5282 double units_per_inch = 72.0;
5284 while (lex_token (lexer) != T_SLASH)
5286 if (lex_match_id (lexer, "MINCOLWIDTH"))
5288 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
5291 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
5293 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
5296 else if (lex_match_id (lexer, "UNITS"))
5298 lex_match (lexer, T_EQUALS);
5299 if (lex_match_id (lexer, "POINTS"))
5300 units_per_inch = 72.0;
5301 else if (lex_match_id (lexer, "INCHES"))
5302 units_per_inch = 1.0;
5303 else if (lex_match_id (lexer, "CM"))
5304 units_per_inch = 2.54;
5307 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
5311 else if (lex_match_id (lexer, "EMPTY"))
5316 lex_match (lexer, T_EQUALS);
5317 if (lex_match_id (lexer, "ZERO"))
5319 /* Nothing to do. */
5321 else if (lex_match_id (lexer, "BLANK"))
5322 ct->zero = xstrdup ("");
5323 else if (lex_force_string (lexer))
5325 ct->zero = ss_xstrdup (lex_tokss (lexer));
5331 else if (lex_match_id (lexer, "MISSING"))
5333 lex_match (lexer, T_EQUALS);
5334 if (!lex_force_string (lexer))
5338 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
5339 ? ss_xstrdup (lex_tokss (lexer))
5345 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
5346 "UNITS", "EMPTY", "MISSING");
5351 if (widths[0] != SYSMIS && widths[1] != SYSMIS
5352 && widths[0] > widths[1])
5354 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
5358 for (size_t i = 0; i < 2; i++)
5359 if (widths[i] != SYSMIS)
5361 int *wr = ct->look->width_ranges[TABLE_HORZ];
5362 wr[i] = widths[i] / units_per_inch * 96.0;
5367 else if (lex_match_id (lexer, "VLABELS"))
5369 if (!lex_force_match_id (lexer, "VARIABLES"))
5371 lex_match (lexer, T_EQUALS);
5373 struct variable **vars;
5375 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
5379 if (!lex_force_match_id (lexer, "DISPLAY"))
5384 lex_match (lexer, T_EQUALS);
5386 enum ctables_vlabel vlabel;
5387 if (lex_match_id (lexer, "DEFAULT"))
5388 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
5389 else if (lex_match_id (lexer, "NAME"))
5391 else if (lex_match_id (lexer, "LABEL"))
5392 vlabel = CTVL_LABEL;
5393 else if (lex_match_id (lexer, "BOTH"))
5395 else if (lex_match_id (lexer, "NONE"))
5399 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
5405 for (size_t i = 0; i < n_vars; i++)
5406 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
5409 else if (lex_match_id (lexer, "MRSETS"))
5411 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
5413 lex_match (lexer, T_EQUALS);
5414 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
5417 else if (lex_match_id (lexer, "SMISSING"))
5419 if (lex_match_id (lexer, "VARIABLE"))
5420 ct->smissing_listwise = false;
5421 else if (lex_match_id (lexer, "LISTWISE"))
5422 ct->smissing_listwise = true;
5425 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
5429 else if (lex_match_id (lexer, "PCOMPUTE"))
5431 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
5434 else if (lex_match_id (lexer, "PPROPERTIES"))
5436 if (!ctables_parse_pproperties (lexer, ct))
5439 else if (lex_match_id (lexer, "WEIGHT"))
5441 if (!lex_force_match_id (lexer, "VARIABLE"))
5443 lex_match (lexer, T_EQUALS);
5444 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
5448 else if (lex_match_id (lexer, " HIDESMALLCOUNTS"))
5450 if (lex_match_id (lexer, "COUNT"))
5452 lex_match (lexer, T_EQUALS);
5453 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
5456 ct->hide_threshold = lex_integer (lexer);
5459 else if (ct->hide_threshold == 0)
5460 ct->hide_threshold = 5;
5464 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
5465 "SMISSING", "PCOMPUTE", "PPROPERTIES",
5466 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
5470 if (!lex_force_match (lexer, T_SLASH))
5474 size_t allocated_tables = 0;
5477 if (ct->n_tables >= allocated_tables)
5478 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
5479 sizeof *ct->tables);
5481 struct ctables_category *cat = xmalloc (sizeof *cat);
5482 *cat = (struct ctables_category) {
5484 .include_missing = false,
5485 .sort_ascending = true,
5488 struct ctables_categories *c = xmalloc (sizeof *c);
5489 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5490 *c = (struct ctables_categories) {
5497 struct ctables_categories **categories = xnmalloc (n_vars,
5498 sizeof *categories);
5499 for (size_t i = 0; i < n_vars; i++)
5502 struct ctables_table *t = xmalloc (sizeof *t);
5503 *t = (struct ctables_table) {
5505 .slabels_axis = PIVOT_AXIS_COLUMN,
5506 .slabels_visible = true,
5507 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
5509 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
5510 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
5511 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
5513 .clabels_from_axis = PIVOT_AXIS_LAYER,
5514 .categories = categories,
5515 .n_categories = n_vars,
5518 ct->tables[ct->n_tables++] = t;
5520 lex_match (lexer, T_EQUALS);
5521 int expr_start = lex_ofs (lexer);
5522 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
5524 if (lex_match (lexer, T_BY))
5526 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5527 ct, t, PIVOT_AXIS_COLUMN))
5530 if (lex_match (lexer, T_BY))
5532 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5533 ct, t, PIVOT_AXIS_LAYER))
5537 int expr_end = lex_ofs (lexer);
5539 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
5540 && !t->axes[PIVOT_AXIS_LAYER])
5542 lex_error (lexer, _("At least one variable must be specified."));
5546 const struct ctables_axis *scales[PIVOT_N_AXES];
5547 size_t n_scales = 0;
5548 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5550 scales[a] = find_scale (t->axes[a]);
5556 msg (SE, _("Scale variables may appear only on one axis."));
5557 if (scales[PIVOT_AXIS_ROW])
5558 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
5559 _("This scale variable appears on the rows axis."));
5560 if (scales[PIVOT_AXIS_COLUMN])
5561 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
5562 _("This scale variable appears on the columns axis."));
5563 if (scales[PIVOT_AXIS_LAYER])
5564 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
5565 _("This scale variable appears on the layer axis."));
5569 const struct ctables_axis *summaries[PIVOT_N_AXES];
5570 size_t n_summaries = 0;
5571 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5573 summaries[a] = (scales[a]
5575 : find_categorical_summary_spec (t->axes[a]));
5579 if (n_summaries > 1)
5581 msg (SE, _("Summaries may appear only on one axis."));
5582 if (summaries[PIVOT_AXIS_ROW])
5583 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
5584 _("This variable on the rows axis has a summary."));
5585 if (summaries[PIVOT_AXIS_COLUMN])
5586 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
5587 _("This variable on the columns axis has a summary."));
5588 if (summaries[PIVOT_AXIS_LAYER])
5589 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
5590 _("This variable on the layers axis has a summary."));
5593 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5594 if (n_summaries ? summaries[a] : t->axes[a])
5596 t->summary_axis = a;
5600 if (lex_token (lexer) == T_ENDCMD)
5602 if (!ctables_prepare_table (t))
5606 if (!lex_force_match (lexer, T_SLASH))
5609 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
5611 if (lex_match_id (lexer, "SLABELS"))
5613 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5615 if (lex_match_id (lexer, "POSITION"))
5617 lex_match (lexer, T_EQUALS);
5618 if (lex_match_id (lexer, "COLUMN"))
5619 t->slabels_axis = PIVOT_AXIS_COLUMN;
5620 else if (lex_match_id (lexer, "ROW"))
5621 t->slabels_axis = PIVOT_AXIS_ROW;
5622 else if (lex_match_id (lexer, "LAYER"))
5623 t->slabels_axis = PIVOT_AXIS_LAYER;
5626 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
5630 else if (lex_match_id (lexer, "VISIBLE"))
5632 lex_match (lexer, T_EQUALS);
5633 if (!parse_bool (lexer, &t->slabels_visible))
5638 lex_error_expecting (lexer, "POSITION", "VISIBLE");
5643 else if (lex_match_id (lexer, "CLABELS"))
5645 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5647 if (lex_match_id (lexer, "AUTO"))
5649 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
5650 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
5652 else if (lex_match_id (lexer, "ROWLABELS"))
5654 lex_match (lexer, T_EQUALS);
5655 if (lex_match_id (lexer, "OPPOSITE"))
5656 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
5657 else if (lex_match_id (lexer, "LAYER"))
5658 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
5661 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5665 else if (lex_match_id (lexer, "COLLABELS"))
5667 lex_match (lexer, T_EQUALS);
5668 if (lex_match_id (lexer, "OPPOSITE"))
5669 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
5670 else if (lex_match_id (lexer, "LAYER"))
5671 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
5674 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5680 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
5686 else if (lex_match_id (lexer, "CRITERIA"))
5688 if (!lex_force_match_id (lexer, "CILEVEL"))
5690 lex_match (lexer, T_EQUALS);
5692 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
5694 t->cilevel = lex_number (lexer);
5697 else if (lex_match_id (lexer, "CATEGORIES"))
5699 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
5703 else if (lex_match_id (lexer, "TITLES"))
5708 if (lex_match_id (lexer, "CAPTION"))
5709 textp = &t->caption;
5710 else if (lex_match_id (lexer, "CORNER"))
5712 else if (lex_match_id (lexer, "TITLE"))
5716 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
5719 lex_match (lexer, T_EQUALS);
5721 struct string s = DS_EMPTY_INITIALIZER;
5722 while (lex_is_string (lexer))
5724 if (!ds_is_empty (&s))
5725 ds_put_byte (&s, ' ');
5726 put_title_text (&s, lex_tokss (lexer), now,
5727 lexer, dataset_dict (ds),
5728 expr_start, expr_end);
5732 *textp = ds_steal_cstr (&s);
5734 while (lex_token (lexer) != T_SLASH
5735 && lex_token (lexer) != T_ENDCMD);
5737 else if (lex_match_id (lexer, "SIGTEST"))
5741 t->chisq = xmalloc (sizeof *t->chisq);
5742 *t->chisq = (struct ctables_chisq) {
5744 .include_mrsets = true,
5745 .all_visible = true,
5751 if (lex_match_id (lexer, "TYPE"))
5753 lex_match (lexer, T_EQUALS);
5754 if (!lex_force_match_id (lexer, "CHISQUARE"))
5757 else if (lex_match_id (lexer, "ALPHA"))
5759 lex_match (lexer, T_EQUALS);
5760 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
5762 t->chisq->alpha = lex_number (lexer);
5765 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5767 lex_match (lexer, T_EQUALS);
5768 if (parse_bool (lexer, &t->chisq->include_mrsets))
5771 else if (lex_match_id (lexer, "CATEGORIES"))
5773 lex_match (lexer, T_EQUALS);
5774 if (lex_match_id (lexer, "ALLVISIBLE"))
5775 t->chisq->all_visible = true;
5776 else if (lex_match_id (lexer, "SUBTOTALS"))
5777 t->chisq->all_visible = false;
5780 lex_error_expecting (lexer,
5781 "ALLVISIBLE", "SUBTOTALS");
5787 lex_error_expecting (lexer, "TYPE", "ALPHA",
5788 "INCLUDEMRSETS", "CATEGORIES");
5792 while (lex_token (lexer) != T_SLASH
5793 && lex_token (lexer) != T_ENDCMD);
5795 else if (lex_match_id (lexer, "COMPARETEST"))
5799 t->pairwise = xmalloc (sizeof *t->pairwise);
5800 *t->pairwise = (struct ctables_pairwise) {
5802 .alpha = { .05, .05 },
5803 .adjust = BONFERRONI,
5804 .include_mrsets = true,
5805 .meansvariance_allcats = true,
5806 .all_visible = true,
5815 if (lex_match_id (lexer, "TYPE"))
5817 lex_match (lexer, T_EQUALS);
5818 if (lex_match_id (lexer, "PROP"))
5819 t->pairwise->type = PROP;
5820 else if (lex_match_id (lexer, "MEAN"))
5821 t->pairwise->type = MEAN;
5824 lex_error_expecting (lexer, "PROP", "MEAN");
5828 else if (lex_match_id (lexer, "ALPHA"))
5830 lex_match (lexer, T_EQUALS);
5832 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5834 double a0 = lex_number (lexer);
5837 lex_match (lexer, T_COMMA);
5838 if (lex_is_number (lexer))
5840 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5842 double a1 = lex_number (lexer);
5845 t->pairwise->alpha[0] = MIN (a0, a1);
5846 t->pairwise->alpha[1] = MAX (a0, a1);
5849 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
5851 else if (lex_match_id (lexer, "ADJUST"))
5853 lex_match (lexer, T_EQUALS);
5854 if (lex_match_id (lexer, "BONFERRONI"))
5855 t->pairwise->adjust = BONFERRONI;
5856 else if (lex_match_id (lexer, "BH"))
5857 t->pairwise->adjust = BH;
5858 else if (lex_match_id (lexer, "NONE"))
5859 t->pairwise->adjust = 0;
5862 lex_error_expecting (lexer, "BONFERRONI", "BH",
5867 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5869 lex_match (lexer, T_EQUALS);
5870 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
5873 else if (lex_match_id (lexer, "MEANSVARIANCE"))
5875 lex_match (lexer, T_EQUALS);
5876 if (lex_match_id (lexer, "ALLCATS"))
5877 t->pairwise->meansvariance_allcats = true;
5878 else if (lex_match_id (lexer, "TESTEDCATS"))
5879 t->pairwise->meansvariance_allcats = false;
5882 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
5886 else if (lex_match_id (lexer, "CATEGORIES"))
5888 lex_match (lexer, T_EQUALS);
5889 if (lex_match_id (lexer, "ALLVISIBLE"))
5890 t->pairwise->all_visible = true;
5891 else if (lex_match_id (lexer, "SUBTOTALS"))
5892 t->pairwise->all_visible = false;
5895 lex_error_expecting (lexer, "ALLVISIBLE",
5900 else if (lex_match_id (lexer, "MERGE"))
5902 lex_match (lexer, T_EQUALS);
5903 if (!parse_bool (lexer, &t->pairwise->merge))
5906 else if (lex_match_id (lexer, "STYLE"))
5908 lex_match (lexer, T_EQUALS);
5909 if (lex_match_id (lexer, "APA"))
5910 t->pairwise->apa_style = true;
5911 else if (lex_match_id (lexer, "SIMPLE"))
5912 t->pairwise->apa_style = false;
5915 lex_error_expecting (lexer, "APA", "SIMPLE");
5919 else if (lex_match_id (lexer, "SHOWSIG"))
5921 lex_match (lexer, T_EQUALS);
5922 if (!parse_bool (lexer, &t->pairwise->show_sig))
5927 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
5928 "INCLUDEMRSETS", "MEANSVARIANCE",
5929 "CATEGORIES", "MERGE", "STYLE",
5934 while (lex_token (lexer) != T_SLASH
5935 && lex_token (lexer) != T_ENDCMD);
5939 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
5940 "CRITERIA", "CATEGORIES", "TITLES",
5941 "SIGTEST", "COMPARETEST");
5945 if (!lex_match (lexer, T_SLASH))
5949 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
5950 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
5952 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
5956 if (!ctables_prepare_table (t))
5959 while (lex_token (lexer) != T_ENDCMD);
5961 bool ok = ctables_execute (ds, ct);
5962 ctables_destroy (ct);
5963 return ok ? CMD_SUCCESS : CMD_FAILURE;
5966 ctables_destroy (ct);