1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casereader.h"
23 #include "data/casewriter.h"
24 #include "data/data-in.h"
25 #include "data/data-out.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/mrset.h"
29 #include "data/subcase.h"
30 #include "data/value-labels.h"
31 #include "language/command.h"
32 #include "language/lexer/format-parser.h"
33 #include "language/lexer/lexer.h"
34 #include "language/lexer/variable-parser.h"
35 #include "libpspp/array.h"
36 #include "libpspp/assertion.h"
37 #include "libpspp/hash-functions.h"
38 #include "libpspp/hmap.h"
39 #include "libpspp/i18n.h"
40 #include "libpspp/message.h"
41 #include "libpspp/string-array.h"
42 #include "math/mode.h"
43 #include "math/moments.h"
44 #include "math/percentiles.h"
45 #include "math/sort.h"
46 #include "output/pivot-table.h"
48 #include "gl/minmax.h"
49 #include "gl/xalloc.h"
52 #define _(msgid) gettext (msgid)
53 #define N_(msgid) (msgid)
57 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
58 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
59 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
60 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
64 - unweighted summaries (U*)
65 - lower confidence limits (*.LCL)
66 - upper confidence limits (*.UCL)
67 - standard error (*.SE)
70 /* All variables. */ \
71 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
72 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
73 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
74 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
75 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
76 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
95 /* Scale variables, totals, and subtotals. */ \
96 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
97 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
98 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
99 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
100 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
101 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
102 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
103 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
104 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
105 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
106 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
107 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
108 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
109 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
110 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
111 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
112 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
113 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
114 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
115 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
116 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
117 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
118 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
120 #if 0 /* Multiple response sets not yet implemented. */
121 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
122 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
123 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
124 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
125 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
126 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
127 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
128 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
129 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
130 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
131 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
132 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
133 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
134 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
135 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
136 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
137 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
138 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
139 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
140 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
141 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
142 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
145 enum ctables_summary_function
147 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
153 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
154 N_CTSF_FUNCTIONS = SUMMARIES
158 static bool ctables_summary_function_is_count (enum ctables_summary_function);
160 enum ctables_domain_type
162 /* Within a section, where stacked variables divide one section from
164 CTDT_TABLE, /* All layers of a whole section. */
165 CTDT_LAYER, /* One layer within a section. */
166 CTDT_LAYERROW, /* Row in one layer within a section. */
167 CTDT_LAYERCOL, /* Column in one layer within a section. */
169 /* Within a subtable, where a subtable pairs an innermost row variable with
170 an innermost column variable within a single layer. */
171 CTDT_SUBTABLE, /* Whole subtable. */
172 CTDT_ROW, /* Row within a subtable. */
173 CTDT_COL, /* Column within a subtable. */
177 struct ctables_domain
179 struct hmap_node node;
181 const struct ctables_cell *example;
183 double d_valid; /* Dictionary weight. */
186 double e_valid; /* Effective weight */
191 enum ctables_summary_variant
200 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
201 all the axes (except the scalar variable, if any). */
202 struct hmap_node node;
204 /* The domains that contain this cell. */
205 uint32_t omit_domains;
206 struct ctables_domain *domains[N_CTDTS];
211 enum ctables_summary_variant sv;
213 struct ctables_cell_axis
215 struct ctables_cell_value
217 const struct ctables_category *category;
225 union ctables_summary *summaries;
232 const struct dictionary *dict;
233 struct pivot_table_look *look;
235 /* CTABLES has a number of extra formats that we implement via custom
236 currency specifications on an alternate fmt_settings. */
237 #define CTEF_NEGPAREN FMT_CCA
238 #define CTEF_NEQUAL FMT_CCB
239 #define CTEF_PAREN FMT_CCC
240 #define CTEF_PCTPAREN FMT_CCD
241 struct fmt_settings ctables_formats;
243 /* If this is NULL, zeros are displayed using the normal print format.
244 Otherwise, this string is displayed. */
247 /* If this is NULL, missing values are displayed using the normal print
248 format. Otherwise, this string is displayed. */
251 /* Indexed by variable dictionary index. */
252 enum ctables_vlabel *vlabels;
254 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
256 bool mrsets_count_duplicates; /* MRSETS. */
257 bool smissing_listwise; /* SMISSING. */
258 struct variable *e_weight; /* WEIGHT. */
259 int hide_threshold; /* HIDESMALLCOUNTS. */
261 struct ctables_table **tables;
265 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
268 struct ctables_postcompute
270 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
271 char *name; /* Name, without leading &. */
273 struct msg_location *location; /* Location of definition. */
274 struct ctables_pcexpr *expr;
276 struct ctables_summary_spec_set *specs;
277 bool hide_source_cats;
280 struct ctables_pcexpr
290 enum ctables_postcompute_op
293 CTPO_CONSTANT, /* 5 */
294 CTPO_CAT_NUMBER, /* [5] */
295 CTPO_CAT_STRING, /* ["STRING"] */
296 CTPO_CAT_RANGE, /* [LO THRU 5] */
297 CTPO_CAT_MISSING, /* MISSING */
298 CTPO_CAT_OTHERNM, /* OTHERNM */
299 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
300 CTPO_CAT_TOTAL, /* TOTAL */
314 /* CTPO_CAT_NUMBER. */
317 /* CTPO_CAT_STRING, in dictionary encoding. */
318 struct substring string;
320 /* CTPO_CAT_RANGE. */
323 /* CTPO_CAT_SUBTOTAL. */
324 size_t subtotal_index;
326 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
327 One element: CTPO_NEG. */
328 struct ctables_pcexpr *subs[2];
331 /* Source location. */
332 struct msg_location *location;
335 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
336 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
337 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
338 struct ctables_pcexpr *sub1);
340 struct ctables_summary_spec_set
342 struct ctables_summary_spec *specs;
346 /* The variable to which the summary specs are applied. */
347 struct variable *var;
349 /* Whether the variable to which the summary specs are applied is a scale
350 variable for the purpose of summarization.
352 (VALIDN and TOTALN act differently for summarizing scale and categorical
356 /* If any of these optional additional scale variables are missing, then
357 treat 'var' as if it's missing too. This is for implementing
358 SMISSING=LISTWISE. */
359 struct variable **listwise_vars;
360 size_t n_listwise_vars;
363 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
364 const struct ctables_summary_spec_set *);
365 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
367 /* A nested sequence of variables, e.g. a > b > c. */
370 struct variable **vars;
373 size_t *domains[N_CTDTS];
374 size_t n_domains[N_CTDTS];
377 struct ctables_summary_spec_set specs[N_CSVS];
380 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
383 struct ctables_nest *nests;
389 struct hmap_node node;
394 struct ctables_occurrence
396 struct hmap_node node;
400 struct ctables_section
402 struct ctables_table *table;
403 struct ctables_nest *nests[PIVOT_N_AXES];
404 struct hmap *occurrences[PIVOT_N_AXES];
405 struct hmap cells; /* Contains "struct ctable_cell"s. */
406 struct hmap domains[N_CTDTS]; /* Contains "struct ctable_domain"s. */
411 struct ctables *ctables;
412 struct ctables_axis *axes[PIVOT_N_AXES];
413 struct ctables_stack stacks[PIVOT_N_AXES];
414 struct ctables_section *sections;
416 enum pivot_axis_type summary_axis;
417 struct ctables_summary_spec_set summary_specs;
419 const struct variable *clabels_example;
420 struct hmap clabels_values_map;
421 struct ctables_value **clabels_values;
422 size_t n_clabels_values;
424 enum pivot_axis_type slabels_axis;
425 bool slabels_visible;
427 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
429 Most commonly, label_axis[a] == a, and in particular we always have
430 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
432 If ROWLABELS or COLLABELS is specified, then one of
433 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
434 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
436 enum pivot_axis_type label_axis[PIVOT_N_AXES];
437 enum pivot_axis_type clabels_from_axis;
439 /* Indexed by variable dictionary index. */
440 struct ctables_categories **categories;
449 struct ctables_chisq *chisq;
450 struct ctables_pairwise *pairwise;
453 struct ctables_categories
456 struct ctables_category *cats;
461 struct ctables_category
463 enum ctables_category_type
465 /* Explicit category lists. */
473 /* Totals and subtotals. */
477 /* Implicit category lists. */
482 /* For contributing to TOTALN. */
483 CCT_EXCLUDED_MISSING,
487 struct ctables_category *subtotal;
493 double number; /* CCT_NUMBER. */
494 struct substring string; /* CCT_STRING, in dictionary encoding. */
495 double range[2]; /* CCT_RANGE. */
499 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
500 bool hide_subcategories; /* CCT_SUBTOTAL. */
503 const struct ctables_postcompute *pc; /* CCT_POSTCOMPUTE. */
505 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
508 bool include_missing;
512 enum ctables_summary_function sort_function;
513 struct variable *sort_var;
518 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
519 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
520 struct msg_location *location;
524 ctables_category_uninit (struct ctables_category *cat)
535 case CCT_POSTCOMPUTE:
539 ss_dealloc (&cat->string);
544 free (cat->total_label);
552 case CCT_EXCLUDED_MISSING:
558 ctables_category_equal (const struct ctables_category *a,
559 const struct ctables_category *b)
561 if (a->type != b->type)
567 return a->number == b->number;
570 return ss_equals (a->string, b->string);
573 return a->range[0] == b->range[0] && a->range[1] == b->range[1];
579 case CCT_POSTCOMPUTE:
580 return a->pc == b->pc;
584 return !strcmp (a->total_label, b->total_label);
589 return (a->include_missing == b->include_missing
590 && a->sort_ascending == b->sort_ascending
591 && a->sort_function == b->sort_function
592 && a->sort_var == b->sort_var
593 && a->percentile == b->percentile);
595 case CCT_EXCLUDED_MISSING:
603 ctables_categories_unref (struct ctables_categories *c)
608 assert (c->n_refs > 0);
612 for (size_t i = 0; i < c->n_cats; i++)
613 ctables_category_uninit (&c->cats[i]);
619 ctables_categories_equal (const struct ctables_categories *a,
620 const struct ctables_categories *b)
622 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
625 for (size_t i = 0; i < a->n_cats; i++)
626 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
632 /* Chi-square test (SIGTEST). */
640 /* Pairwise comparison test (COMPARETEST). */
641 struct ctables_pairwise
643 enum { PROP, MEAN } type;
646 bool meansvariance_allcats;
648 enum { BONFERRONI = 1, BH } adjust;
672 struct variable *var;
674 struct ctables_summary_spec_set specs[N_CSVS];
678 struct ctables_axis *subs[2];
681 struct msg_location *loc;
684 static void ctables_axis_destroy (struct ctables_axis *);
693 enum ctables_function_availability
695 CTFA_ALL, /* Any variables. */
696 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
697 CTFA_MRSETS, /* Only multiple-response sets */
700 struct ctables_summary_spec
702 enum ctables_summary_function function;
703 double percentile; /* CTSF_PTILE only. */
706 struct fmt_spec format;
707 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
713 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
714 const struct ctables_summary_spec *src)
717 dst->label = xstrdup (src->label);
721 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
728 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
729 const struct ctables_summary_spec_set *src)
731 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
732 for (size_t i = 0; i < src->n; i++)
733 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
735 *dst = (struct ctables_summary_spec_set) {
740 .is_scale = src->is_scale,
745 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
747 for (size_t i = 0; i < set->n; i++)
748 ctables_summary_spec_uninit (&set->specs[i]);
753 parse_col_width (struct lexer *lexer, const char *name, double *width)
755 lex_match (lexer, T_EQUALS);
756 if (lex_match_id (lexer, "DEFAULT"))
758 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
760 *width = lex_number (lexer);
770 parse_bool (struct lexer *lexer, bool *b)
772 if (lex_match_id (lexer, "NO"))
774 else if (lex_match_id (lexer, "YES"))
778 lex_error_expecting (lexer, "YES", "NO");
784 static enum ctables_function_availability
785 ctables_function_availability (enum ctables_summary_function f)
787 static enum ctables_function_availability availability[] = {
788 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
793 return availability[f];
797 ctables_summary_function_is_count (enum ctables_summary_function f)
803 case CTSF_ROWPCT_COUNT:
804 case CTSF_COLPCT_COUNT:
805 case CTSF_TABLEPCT_COUNT:
806 case CTSF_SUBTABLEPCT_COUNT:
807 case CTSF_LAYERPCT_COUNT:
808 case CTSF_LAYERROWPCT_COUNT:
809 case CTSF_LAYERCOLPCT_COUNT:
812 case CTSF_ROWPCT_VALIDN:
813 case CTSF_COLPCT_VALIDN:
814 case CTSF_TABLEPCT_VALIDN:
815 case CTSF_SUBTABLEPCT_VALIDN:
816 case CTSF_LAYERPCT_VALIDN:
817 case CTSF_LAYERROWPCT_VALIDN:
818 case CTSF_LAYERCOLPCT_VALIDN:
819 case CTSF_ROWPCT_TOTALN:
820 case CTSF_COLPCT_TOTALN:
821 case CTSF_TABLEPCT_TOTALN:
822 case CTSF_SUBTABLEPCT_TOTALN:
823 case CTSF_LAYERPCT_TOTALN:
824 case CTSF_LAYERROWPCT_TOTALN:
825 case CTSF_LAYERCOLPCT_TOTALN:
842 case CTSF_ROWPCT_SUM:
843 case CTSF_COLPCT_SUM:
844 case CTSF_TABLEPCT_SUM:
845 case CTSF_SUBTABLEPCT_SUM:
846 case CTSF_LAYERPCT_SUM:
847 case CTSF_LAYERROWPCT_SUM:
848 case CTSF_LAYERCOLPCT_SUM:
856 parse_ctables_summary_function (struct lexer *lexer,
857 enum ctables_summary_function *f)
861 enum ctables_summary_function function;
862 struct substring name;
864 static struct pair names[] = {
865 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
866 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
869 /* The .COUNT suffix may be omitted. */
870 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
871 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
872 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
873 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
874 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
875 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
876 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
880 if (!lex_force_id (lexer))
883 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
884 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
886 *f = names[i].function;
891 lex_error (lexer, _("Expecting summary function name."));
896 ctables_axis_destroy (struct ctables_axis *axis)
904 for (size_t i = 0; i < N_CSVS; i++)
905 ctables_summary_spec_set_uninit (&axis->specs[i]);
910 ctables_axis_destroy (axis->subs[0]);
911 ctables_axis_destroy (axis->subs[1]);
914 msg_location_destroy (axis->loc);
918 static struct ctables_axis *
919 ctables_axis_new_nonterminal (enum ctables_axis_op op,
920 struct ctables_axis *sub0,
921 struct ctables_axis *sub1,
922 struct lexer *lexer, int start_ofs)
924 struct ctables_axis *axis = xmalloc (sizeof *axis);
925 *axis = (struct ctables_axis) {
927 .subs = { sub0, sub1 },
928 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
933 struct ctables_axis_parse_ctx
936 struct dictionary *dict;
938 struct ctables_table *t;
941 static struct fmt_spec
942 ctables_summary_default_format (enum ctables_summary_function function,
943 const struct variable *var)
945 static const enum ctables_format default_formats[] = {
946 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
950 switch (default_formats[function])
953 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
956 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
959 return *var_get_print_format (var);
967 ctables_summary_default_label (enum ctables_summary_function function,
970 static const char *default_labels[] = {
971 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
976 return (function == CTSF_PTILE
977 ? xasprintf (_("Percentile %.2f"), percentile)
978 : xstrdup (gettext (default_labels[function])));
982 ctables_summary_function_name (enum ctables_summary_function function)
984 static const char *names[] = {
985 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
989 return names[function];
993 add_summary_spec (struct ctables_axis *axis,
994 enum ctables_summary_function function, double percentile,
995 const char *label, const struct fmt_spec *format,
996 bool is_ctables_format, const struct msg_location *loc,
997 enum ctables_summary_variant sv)
999 if (axis->op == CTAO_VAR)
1001 const char *function_name = ctables_summary_function_name (function);
1002 const char *var_name = var_get_name (axis->var);
1003 switch (ctables_function_availability (function))
1006 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1007 "response sets."), function_name);
1008 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1017 _("Summary function %s applies only to scale variables."),
1019 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1030 struct ctables_summary_spec_set *set = &axis->specs[sv];
1031 if (set->n >= set->allocated)
1032 set->specs = x2nrealloc (set->specs, &set->allocated,
1033 sizeof *set->specs);
1035 struct ctables_summary_spec *dst = &set->specs[set->n++];
1036 *dst = (struct ctables_summary_spec) {
1037 .function = function,
1038 .percentile = percentile,
1039 .label = xstrdup (label),
1040 .format = (format ? *format
1041 : ctables_summary_default_format (function, axis->var)),
1042 .is_ctables_format = is_ctables_format,
1048 for (size_t i = 0; i < 2; i++)
1049 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1050 format, is_ctables_format, loc, sv))
1056 static struct ctables_axis *ctables_axis_parse_stack (
1057 struct ctables_axis_parse_ctx *);
1060 static struct ctables_axis *
1061 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1063 if (lex_match (ctx->lexer, T_LPAREN))
1065 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1066 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1068 ctables_axis_destroy (sub);
1074 if (!lex_force_id (ctx->lexer))
1077 int start_ofs = lex_ofs (ctx->lexer);
1078 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1082 struct ctables_axis *axis = xmalloc (sizeof *axis);
1083 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1085 /* XXX should figure out default measures by reading data */
1086 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1087 : lex_match_phrase (ctx->lexer, "[C]") ? false
1088 : var_get_measure (var) == MEASURE_SCALE);
1089 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1090 lex_ofs (ctx->lexer) - 1);
1091 if (axis->scale && var_is_alpha (var))
1093 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1095 var_get_name (var));
1096 ctables_axis_destroy (axis);
1104 has_digit (const char *s)
1106 return s[strcspn (s, "0123456789")] != '\0';
1110 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1111 bool *is_ctables_format)
1113 char type[FMT_TYPE_LEN_MAX + 1];
1114 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1117 if (!strcasecmp (type, "NEGPAREN"))
1118 format->type = CTEF_NEGPAREN;
1119 else if (!strcasecmp (type, "NEQUAL"))
1120 format->type = CTEF_NEQUAL;
1121 else if (!strcasecmp (type, "PAREN"))
1122 format->type = CTEF_PAREN;
1123 else if (!strcasecmp (type, "PCTPAREN"))
1124 format->type = CTEF_PCTPAREN;
1127 *is_ctables_format = false;
1128 return (parse_format_specifier (lexer, format)
1129 && fmt_check_output (format)
1130 && fmt_check_type_compat (format, VAL_NUMERIC));
1135 msg (SE, _("Output format %s requires width 2 or greater."), type);
1138 else if (format->d > format->w - 1)
1140 msg (SE, _("Output format %s requires width greater than decimals."),
1146 *is_ctables_format = true;
1151 static struct ctables_axis *
1152 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1154 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1155 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1158 enum ctables_summary_variant sv = CSV_CELL;
1161 int start_ofs = lex_ofs (ctx->lexer);
1163 /* Parse function. */
1164 enum ctables_summary_function function;
1165 if (!parse_ctables_summary_function (ctx->lexer, &function))
1168 /* Parse percentile. */
1169 double percentile = 0;
1170 if (function == CTSF_PTILE)
1172 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1174 percentile = lex_number (ctx->lexer);
1175 lex_get (ctx->lexer);
1180 if (lex_is_string (ctx->lexer))
1182 label = ss_xstrdup (lex_tokss (ctx->lexer));
1183 lex_get (ctx->lexer);
1186 label = ctables_summary_default_label (function, percentile);
1189 struct fmt_spec format;
1190 const struct fmt_spec *formatp;
1191 bool is_ctables_format = false;
1192 if (lex_token (ctx->lexer) == T_ID
1193 && has_digit (lex_tokcstr (ctx->lexer)))
1195 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1196 &is_ctables_format))
1206 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1207 lex_ofs (ctx->lexer) - 1);
1208 add_summary_spec (sub, function, percentile, label, formatp,
1209 is_ctables_format, loc, sv);
1211 msg_location_destroy (loc);
1213 lex_match (ctx->lexer, T_COMMA);
1214 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1216 if (!lex_force_match (ctx->lexer, T_LBRACK))
1220 else if (lex_match (ctx->lexer, T_RBRACK))
1222 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1229 ctables_axis_destroy (sub);
1233 static const struct ctables_axis *
1234 find_scale (const struct ctables_axis *axis)
1238 else if (axis->op == CTAO_VAR)
1239 return axis->scale ? axis : NULL;
1242 for (size_t i = 0; i < 2; i++)
1244 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1252 static const struct ctables_axis *
1253 find_categorical_summary_spec (const struct ctables_axis *axis)
1257 else if (axis->op == CTAO_VAR)
1258 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1261 for (size_t i = 0; i < 2; i++)
1263 const struct ctables_axis *sum
1264 = find_categorical_summary_spec (axis->subs[i]);
1272 static struct ctables_axis *
1273 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1275 int start_ofs = lex_ofs (ctx->lexer);
1276 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1280 while (lex_match (ctx->lexer, T_GT))
1282 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1286 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1287 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1289 const struct ctables_axis *outer_scale = find_scale (lhs);
1290 const struct ctables_axis *inner_scale = find_scale (rhs);
1291 if (outer_scale && inner_scale)
1293 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1294 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1295 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1296 ctables_axis_destroy (nest);
1300 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1303 msg_at (SE, nest->loc,
1304 _("Summaries may only be requested for categorical variables "
1305 "at the innermost nesting level."));
1306 msg_at (SN, outer_sum->loc,
1307 _("This outer categorical variable has a summary."));
1308 ctables_axis_destroy (nest);
1318 static struct ctables_axis *
1319 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1321 int start_ofs = lex_ofs (ctx->lexer);
1322 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1326 while (lex_match (ctx->lexer, T_PLUS))
1328 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1332 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1333 ctx->lexer, start_ofs);
1340 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1341 struct ctables *ct, struct ctables_table *t,
1342 enum pivot_axis_type a)
1344 if (lex_token (lexer) == T_BY
1345 || lex_token (lexer) == T_SLASH
1346 || lex_token (lexer) == T_ENDCMD)
1349 struct ctables_axis_parse_ctx ctx = {
1355 t->axes[a] = ctables_axis_parse_stack (&ctx);
1356 return t->axes[a] != NULL;
1360 ctables_chisq_destroy (struct ctables_chisq *chisq)
1366 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1372 ctables_table_destroy (struct ctables_table *t)
1377 for (size_t i = 0; i < t->n_categories; i++)
1378 ctables_categories_unref (t->categories[i]);
1379 free (t->categories);
1381 ctables_axis_destroy (t->axes[PIVOT_AXIS_COLUMN]);
1382 ctables_axis_destroy (t->axes[PIVOT_AXIS_ROW]);
1383 ctables_axis_destroy (t->axes[PIVOT_AXIS_LAYER]);
1387 ctables_chisq_destroy (t->chisq);
1388 ctables_pairwise_destroy (t->pairwise);
1393 ctables_destroy (struct ctables *ct)
1398 pivot_table_look_unref (ct->look);
1402 for (size_t i = 0; i < ct->n_tables; i++)
1403 ctables_table_destroy (ct->tables[i]);
1408 static struct ctables_category
1409 cct_range (double low, double high)
1411 return (struct ctables_category) {
1413 .range = { low, high }
1418 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1419 struct ctables_category *cat)
1422 if (lex_match (lexer, T_EQUALS))
1424 if (!lex_force_string (lexer))
1427 total_label = ss_xstrdup (lex_tokss (lexer));
1431 total_label = xstrdup (_("Subtotal"));
1433 *cat = (struct ctables_category) {
1434 .type = CCT_SUBTOTAL,
1435 .hide_subcategories = hide_subcategories,
1436 .total_label = total_label
1442 ctables_table_parse_explicit_category (struct lexer *lexer,
1443 struct dictionary *dict,
1445 struct ctables_category *cat)
1447 if (lex_match_id (lexer, "OTHERNM"))
1448 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1449 else if (lex_match_id (lexer, "MISSING"))
1450 *cat = (struct ctables_category) { .type = CCT_MISSING };
1451 else if (lex_match_id (lexer, "SUBTOTAL"))
1452 return ctables_table_parse_subtotal (lexer, false, cat);
1453 else if (lex_match_id (lexer, "HSUBTOTAL"))
1454 return ctables_table_parse_subtotal (lexer, true, cat);
1455 else if (lex_match_id (lexer, "LO"))
1457 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
1459 *cat = cct_range (-DBL_MAX, lex_number (lexer));
1462 else if (lex_is_number (lexer))
1464 double number = lex_number (lexer);
1466 if (lex_match_id (lexer, "THRU"))
1468 if (lex_match_id (lexer, "HI"))
1469 *cat = cct_range (number, DBL_MAX);
1472 if (!lex_force_num (lexer))
1474 *cat = cct_range (number, lex_number (lexer));
1479 *cat = (struct ctables_category) {
1484 else if (lex_is_string (lexer))
1486 struct substring s = recode_substring_pool (
1487 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1488 ss_rtrim (&s, ss_cstr (" "));
1490 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1493 else if (lex_match (lexer, T_AND))
1495 if (!lex_force_id (lexer))
1497 struct ctables_postcompute *pc = ctables_find_postcompute (
1498 ct, lex_tokcstr (lexer));
1501 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1502 msg_at (SE, loc, _("Unknown postcompute &%s."),
1503 lex_tokcstr (lexer));
1504 msg_location_destroy (loc);
1509 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1513 lex_error (lexer, NULL);
1520 static struct ctables_category *
1521 ctables_find_category_for_postcompute (const struct ctables_categories *cats,
1522 const struct ctables_pcexpr *e)
1524 struct ctables_category *best = NULL;
1525 size_t n_subtotals = 0;
1526 for (size_t i = 0; i < cats->n_cats; i++)
1528 struct ctables_category *cat = &cats->cats[i];
1531 case CTPO_CAT_NUMBER:
1532 if (cat->type == CCT_NUMBER && cat->number == e->number)
1536 case CTPO_CAT_STRING:
1537 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1541 case CTPO_CAT_RANGE:
1542 if (cat->type == CCT_RANGE
1543 && cat->range[0] == e->range[0]
1544 && cat->range[1] == e->range[1])
1548 case CTPO_CAT_MISSING:
1549 if (cat->type == CCT_MISSING)
1553 case CTPO_CAT_OTHERNM:
1554 if (cat->type == CCT_OTHERNM)
1558 case CTPO_CAT_SUBTOTAL:
1559 if (cat->type == CCT_SUBTOTAL)
1562 if (e->subtotal_index == n_subtotals)
1564 else if (e->subtotal_index == 0)
1569 case CTPO_CAT_TOTAL:
1570 if (cat->type == CCT_TOTAL)
1584 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1590 ctables_recursive_check_postcompute (const struct ctables_pcexpr *e,
1591 struct ctables_category *pc_cat,
1592 const struct ctables_categories *cats,
1593 const struct msg_location *cats_location)
1597 case CTPO_CAT_NUMBER:
1598 case CTPO_CAT_STRING:
1599 case CTPO_CAT_RANGE:
1600 case CTPO_CAT_MISSING:
1601 case CTPO_CAT_OTHERNM:
1602 case CTPO_CAT_SUBTOTAL:
1603 case CTPO_CAT_TOTAL:
1605 struct ctables_category *cat = ctables_find_category_for_postcompute (
1609 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1611 size_t n_subtotals = 0;
1612 for (size_t i = 0; i < cats->n_cats; i++)
1613 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1614 if (n_subtotals > 1)
1616 msg_at (SE, cats_location,
1617 ngettext ("These categories include %zu instance "
1618 "of SUBTOTAL or HSUBTOTAL, so references "
1619 "from computed categories must refer to "
1620 "subtotals by position.",
1621 "These categories include %zu instances "
1622 "of SUBTOTAL or HSUBTOTAL, so references "
1623 "from computed categories must refer to "
1624 "subtotals by position.",
1627 msg_at (SN, e->location,
1628 _("This is the reference that lacks a position."));
1633 msg_at (SE, pc_cat->location,
1634 _("Computed category &%s references a category not included "
1635 "in the category list."),
1637 msg_at (SN, e->location, _("This is the missing category."));
1638 msg_at (SN, cats_location,
1639 _("To fix the problem, add the missing category to the "
1640 "list of categories here."));
1643 if (pc_cat->pc->hide_source_cats)
1657 for (size_t i = 0; i < 2; i++)
1658 if (e->subs[i] && !ctables_recursive_check_postcompute (
1659 e->subs[i], pc_cat, cats, cats_location))
1669 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1670 struct ctables *ct, struct ctables_table *t)
1672 if (!lex_match_id (lexer, "VARIABLES"))
1674 lex_match (lexer, T_EQUALS);
1676 struct variable **vars;
1678 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1681 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1682 for (size_t i = 1; i < n_vars; i++)
1684 const struct fmt_spec *f = var_get_print_format (vars[i]);
1685 if (f->type != common_format->type)
1687 common_format = NULL;
1693 && (fmt_get_category (common_format->type)
1694 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1696 struct ctables_categories *c = xmalloc (sizeof *c);
1697 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1698 for (size_t i = 0; i < n_vars; i++)
1700 struct ctables_categories **cp
1701 = &t->categories[var_get_dict_index (vars[i])];
1702 ctables_categories_unref (*cp);
1706 size_t allocated_cats = 0;
1707 if (lex_match (lexer, T_LBRACK))
1709 int cats_start_ofs = lex_ofs (lexer);
1712 if (c->n_cats >= allocated_cats)
1713 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1715 int start_ofs = lex_ofs (lexer);
1716 struct ctables_category *cat = &c->cats[c->n_cats];
1717 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1719 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1722 lex_match (lexer, T_COMMA);
1724 while (!lex_match (lexer, T_RBRACK));
1726 struct msg_location *cats_location
1727 = lex_ofs_location (lexer, cats_start_ofs, lex_ofs (lexer) - 1);
1728 for (size_t i = 0; i < c->n_cats; i++)
1730 struct ctables_category *cat = &c->cats[i];
1733 case CCT_POSTCOMPUTE:
1734 if (!ctables_recursive_check_postcompute (cat->pc->expr, cat,
1741 for (size_t j = 0; j < n_vars; j++)
1742 if (var_is_alpha (vars[j]))
1744 msg_at (SE, cat->location,
1745 _("This category specification may be applied "
1746 "only to numeric variables, but this "
1747 "subcommand tries to apply it to string "
1749 var_get_name (vars[j]));
1758 char *error = data_in (cat->string, dict_get_encoding (dict),
1759 common_format->type,
1760 settings_get_fmt_settings (),
1764 msg_at (SE, cat->location,
1765 _("Failed to parse category specification as "
1767 fmt_name (common_format->type), error);
1772 ss_dealloc (&cat->string);
1774 cat->type = CCT_NUMBER;
1779 for (size_t j = 0; j < n_vars; j++)
1780 if (var_is_numeric (vars[j]))
1782 msg_at (SE, cat->location,
1783 _("This category specification may be applied "
1784 "only to string variables, but this "
1785 "subcommand tries to apply it to numeric "
1787 var_get_name (vars[j]));
1800 case CCT_EXCLUDED_MISSING:
1806 struct ctables_category cat = {
1808 .include_missing = false,
1809 .sort_ascending = true,
1811 bool show_totals = false;
1812 char *total_label = NULL;
1813 bool totals_before = false;
1814 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1816 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1818 lex_match (lexer, T_EQUALS);
1819 if (lex_match_id (lexer, "A"))
1820 cat.sort_ascending = true;
1821 else if (lex_match_id (lexer, "D"))
1822 cat.sort_ascending = false;
1825 lex_error_expecting (lexer, "A", "D");
1829 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1831 lex_match (lexer, T_EQUALS);
1832 if (lex_match_id (lexer, "VALUE"))
1833 cat.type = CCT_VALUE;
1834 else if (lex_match_id (lexer, "LABEL"))
1835 cat.type = CCT_LABEL;
1838 cat.type = CCT_FUNCTION;
1839 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1842 if (lex_match (lexer, T_LPAREN))
1844 cat.sort_var = parse_variable (lexer, dict);
1848 if (cat.sort_function == CTSF_PTILE)
1850 lex_match (lexer, T_COMMA);
1851 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1853 cat.percentile = lex_number (lexer);
1857 if (!lex_force_match (lexer, T_RPAREN))
1860 else if (ctables_function_availability (cat.sort_function)
1863 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1868 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1870 lex_match (lexer, T_EQUALS);
1871 if (lex_match_id (lexer, "INCLUDE"))
1872 cat.include_missing = true;
1873 else if (lex_match_id (lexer, "EXCLUDE"))
1874 cat.include_missing = false;
1877 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1881 else if (lex_match_id (lexer, "TOTAL"))
1883 lex_match (lexer, T_EQUALS);
1884 if (!parse_bool (lexer, &show_totals))
1887 else if (lex_match_id (lexer, "LABEL"))
1889 lex_match (lexer, T_EQUALS);
1890 if (!lex_force_string (lexer))
1893 total_label = ss_xstrdup (lex_tokss (lexer));
1896 else if (lex_match_id (lexer, "POSITION"))
1898 lex_match (lexer, T_EQUALS);
1899 if (lex_match_id (lexer, "BEFORE"))
1900 totals_before = true;
1901 else if (lex_match_id (lexer, "AFTER"))
1902 totals_before = false;
1905 lex_error_expecting (lexer, "BEFORE", "AFTER");
1909 else if (lex_match_id (lexer, "EMPTY"))
1911 lex_match (lexer, T_EQUALS);
1912 if (lex_match_id (lexer, "INCLUDE"))
1913 c->show_empty = true;
1914 else if (lex_match_id (lexer, "EXCLUDE"))
1915 c->show_empty = false;
1918 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1925 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
1926 "TOTAL", "LABEL", "POSITION", "EMPTY");
1928 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
1935 if (c->n_cats >= allocated_cats)
1936 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1937 c->cats[c->n_cats++] = cat;
1942 if (c->n_cats >= allocated_cats)
1943 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1945 struct ctables_category *totals;
1948 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
1949 totals = &c->cats[0];
1952 totals = &c->cats[c->n_cats];
1955 *totals = (struct ctables_category) {
1957 .total_label = total_label ? total_label : xstrdup (_("Total")),
1961 struct ctables_category *subtotal = NULL;
1962 for (size_t i = totals_before ? 0 : c->n_cats;
1963 totals_before ? i < c->n_cats : i-- > 0;
1964 totals_before ? i++ : 0)
1966 struct ctables_category *cat = &c->cats[i];
1974 cat->subtotal = subtotal;
1977 case CCT_POSTCOMPUTE:
1988 case CCT_EXCLUDED_MISSING:
1997 ctables_nest_uninit (struct ctables_nest *nest)
2004 ctables_stack_uninit (struct ctables_stack *stack)
2008 for (size_t i = 0; i < stack->n; i++)
2009 ctables_nest_uninit (&stack->nests[i]);
2010 free (stack->nests);
2014 static struct ctables_stack
2015 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2022 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2023 for (size_t i = 0; i < s0.n; i++)
2024 for (size_t j = 0; j < s1.n; j++)
2026 const struct ctables_nest *a = &s0.nests[i];
2027 const struct ctables_nest *b = &s1.nests[j];
2029 size_t allocate = a->n + b->n;
2030 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2031 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
2033 for (size_t k = 0; k < a->n; k++)
2034 vars[n++] = a->vars[k];
2035 for (size_t k = 0; k < b->n; k++)
2036 vars[n++] = b->vars[k];
2037 assert (n == allocate);
2039 const struct ctables_nest *summary_src;
2040 if (!a->specs[CSV_CELL].var)
2042 else if (!b->specs[CSV_CELL].var)
2047 struct ctables_nest *new = &stack.nests[stack.n++];
2048 *new = (struct ctables_nest) {
2050 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2051 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2055 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2056 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2058 ctables_stack_uninit (&s0);
2059 ctables_stack_uninit (&s1);
2063 static struct ctables_stack
2064 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2066 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2067 for (size_t i = 0; i < s0.n; i++)
2068 stack.nests[stack.n++] = s0.nests[i];
2069 for (size_t i = 0; i < s1.n; i++)
2071 stack.nests[stack.n] = s1.nests[i];
2072 stack.nests[stack.n].group_head += s0.n;
2075 assert (stack.n == s0.n + s1.n);
2081 static struct ctables_stack
2082 var_fts (const struct ctables_axis *a)
2084 struct variable **vars = xmalloc (sizeof *vars);
2087 struct ctables_nest *nest = xmalloc (sizeof *nest);
2088 *nest = (struct ctables_nest) {
2091 .scale_idx = a->scale ? 0 : SIZE_MAX,
2093 if (a->specs[CSV_CELL].n || a->scale)
2094 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2096 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2097 nest->specs[sv].var = a->var;
2098 nest->specs[sv].is_scale = a->scale;
2100 return (struct ctables_stack) { .nests = nest, .n = 1 };
2103 static struct ctables_stack
2104 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2107 return (struct ctables_stack) { .n = 0 };
2115 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2116 enumerate_fts (axis_type, a->subs[1]));
2119 /* This should consider any of the scale variables found in the result to
2120 be linked to each other listwise for SMISSING=LISTWISE. */
2121 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2122 enumerate_fts (axis_type, a->subs[1]));
2128 union ctables_summary
2130 /* COUNT, VALIDN, TOTALN. */
2133 /* MINIMUM, MAXIMUM, RANGE. */
2140 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2141 struct moments1 *moments;
2143 /* MEDIAN, MODE, PTILE. */
2146 struct casewriter *writer;
2151 /* XXX multiple response */
2155 ctables_summary_init (union ctables_summary *s,
2156 const struct ctables_summary_spec *ss)
2158 switch (ss->function)
2162 case CTSF_ROWPCT_COUNT:
2163 case CTSF_COLPCT_COUNT:
2164 case CTSF_TABLEPCT_COUNT:
2165 case CTSF_SUBTABLEPCT_COUNT:
2166 case CTSF_LAYERPCT_COUNT:
2167 case CTSF_LAYERROWPCT_COUNT:
2168 case CTSF_LAYERCOLPCT_COUNT:
2169 case CTSF_ROWPCT_VALIDN:
2170 case CTSF_COLPCT_VALIDN:
2171 case CTSF_TABLEPCT_VALIDN:
2172 case CTSF_SUBTABLEPCT_VALIDN:
2173 case CTSF_LAYERPCT_VALIDN:
2174 case CTSF_LAYERROWPCT_VALIDN:
2175 case CTSF_LAYERCOLPCT_VALIDN:
2176 case CTSF_ROWPCT_TOTALN:
2177 case CTSF_COLPCT_TOTALN:
2178 case CTSF_TABLEPCT_TOTALN:
2179 case CTSF_SUBTABLEPCT_TOTALN:
2180 case CTSF_LAYERPCT_TOTALN:
2181 case CTSF_LAYERROWPCT_TOTALN:
2182 case CTSF_LAYERCOLPCT_TOTALN:
2194 s->min = s->max = SYSMIS;
2202 case CTSF_ROWPCT_SUM:
2203 case CTSF_COLPCT_SUM:
2204 case CTSF_TABLEPCT_SUM:
2205 case CTSF_SUBTABLEPCT_SUM:
2206 case CTSF_LAYERPCT_SUM:
2207 case CTSF_LAYERROWPCT_SUM:
2208 case CTSF_LAYERCOLPCT_SUM:
2209 s->moments = moments1_create (MOMENT_VARIANCE);
2216 struct caseproto *proto = caseproto_create ();
2217 proto = caseproto_add_width (proto, 0);
2218 proto = caseproto_add_width (proto, 0);
2220 struct subcase ordering;
2221 subcase_init (&ordering, 0, 0, SC_ASCEND);
2222 s->writer = sort_create_writer (&ordering, proto);
2223 subcase_uninit (&ordering);
2224 caseproto_unref (proto);
2234 ctables_summary_uninit (union ctables_summary *s,
2235 const struct ctables_summary_spec *ss)
2237 switch (ss->function)
2241 case CTSF_ROWPCT_COUNT:
2242 case CTSF_COLPCT_COUNT:
2243 case CTSF_TABLEPCT_COUNT:
2244 case CTSF_SUBTABLEPCT_COUNT:
2245 case CTSF_LAYERPCT_COUNT:
2246 case CTSF_LAYERROWPCT_COUNT:
2247 case CTSF_LAYERCOLPCT_COUNT:
2248 case CTSF_ROWPCT_VALIDN:
2249 case CTSF_COLPCT_VALIDN:
2250 case CTSF_TABLEPCT_VALIDN:
2251 case CTSF_SUBTABLEPCT_VALIDN:
2252 case CTSF_LAYERPCT_VALIDN:
2253 case CTSF_LAYERROWPCT_VALIDN:
2254 case CTSF_LAYERCOLPCT_VALIDN:
2255 case CTSF_ROWPCT_TOTALN:
2256 case CTSF_COLPCT_TOTALN:
2257 case CTSF_TABLEPCT_TOTALN:
2258 case CTSF_SUBTABLEPCT_TOTALN:
2259 case CTSF_LAYERPCT_TOTALN:
2260 case CTSF_LAYERROWPCT_TOTALN:
2261 case CTSF_LAYERCOLPCT_TOTALN:
2279 case CTSF_ROWPCT_SUM:
2280 case CTSF_COLPCT_SUM:
2281 case CTSF_TABLEPCT_SUM:
2282 case CTSF_SUBTABLEPCT_SUM:
2283 case CTSF_LAYERPCT_SUM:
2284 case CTSF_LAYERROWPCT_SUM:
2285 case CTSF_LAYERCOLPCT_SUM:
2286 moments1_destroy (s->moments);
2292 casewriter_destroy (s->writer);
2298 ctables_summary_add (union ctables_summary *s,
2299 const struct ctables_summary_spec *ss,
2300 const struct variable *var, const union value *value,
2301 bool is_scale, bool is_scale_missing,
2302 bool is_missing, bool excluded_missing,
2303 double d_weight, double e_weight)
2305 /* To determine whether a case is included in a given table for a particular
2306 kind of summary, consider the following charts for each variable in the
2307 table. Only if "yes" appears for every variable for the summary is the
2310 Categorical variables: VALIDN COUNT TOTALN
2311 Valid values in included categories yes yes yes
2312 Missing values in included categories --- yes yes
2313 Missing values in excluded categories --- --- yes
2314 Valid values in excluded categories --- --- ---
2316 Scale variables: VALIDN COUNT TOTALN
2317 Valid value yes yes yes
2318 Missing value --- yes yes
2320 Missing values include both user- and system-missing. (The system-missing
2321 value is always in an excluded category.)
2323 switch (ss->function)
2326 case CTSF_ROWPCT_TOTALN:
2327 case CTSF_COLPCT_TOTALN:
2328 case CTSF_TABLEPCT_TOTALN:
2329 case CTSF_SUBTABLEPCT_TOTALN:
2330 case CTSF_LAYERPCT_TOTALN:
2331 case CTSF_LAYERROWPCT_TOTALN:
2332 case CTSF_LAYERCOLPCT_TOTALN:
2333 s->count += d_weight;
2337 case CTSF_ROWPCT_COUNT:
2338 case CTSF_COLPCT_COUNT:
2339 case CTSF_TABLEPCT_COUNT:
2340 case CTSF_SUBTABLEPCT_COUNT:
2341 case CTSF_LAYERPCT_COUNT:
2342 case CTSF_LAYERROWPCT_COUNT:
2343 case CTSF_LAYERCOLPCT_COUNT:
2344 if (is_scale || !excluded_missing)
2345 s->count += d_weight;
2349 case CTSF_ROWPCT_VALIDN:
2350 case CTSF_COLPCT_VALIDN:
2351 case CTSF_TABLEPCT_VALIDN:
2352 case CTSF_SUBTABLEPCT_VALIDN:
2353 case CTSF_LAYERPCT_VALIDN:
2354 case CTSF_LAYERROWPCT_VALIDN:
2355 case CTSF_LAYERCOLPCT_VALIDN:
2359 s->count += d_weight;
2364 s->count += d_weight;
2368 if (is_scale || !excluded_missing)
2369 s->count += e_weight;
2376 s->count += e_weight;
2380 s->count += e_weight;
2386 if (!is_scale_missing)
2388 assert (!var_is_alpha (var)); /* XXX? */
2389 if (s->min == SYSMIS || value->f < s->min)
2391 if (s->max == SYSMIS || value->f > s->max)
2401 case CTSF_ROWPCT_SUM:
2402 case CTSF_COLPCT_SUM:
2403 case CTSF_TABLEPCT_SUM:
2404 case CTSF_SUBTABLEPCT_SUM:
2405 case CTSF_LAYERPCT_SUM:
2406 case CTSF_LAYERROWPCT_SUM:
2407 case CTSF_LAYERCOLPCT_SUM:
2408 if (!is_scale_missing)
2409 moments1_add (s->moments, value->f, e_weight);
2415 if (!is_scale_missing)
2417 s->ovalid += e_weight;
2419 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2420 *case_num_rw_idx (c, 0) = value->f;
2421 *case_num_rw_idx (c, 1) = e_weight;
2422 casewriter_write (s->writer, c);
2428 static enum ctables_domain_type
2429 ctables_function_domain (enum ctables_summary_function function)
2453 case CTSF_COLPCT_COUNT:
2454 case CTSF_COLPCT_SUM:
2455 case CTSF_COLPCT_TOTALN:
2456 case CTSF_COLPCT_VALIDN:
2459 case CTSF_LAYERCOLPCT_COUNT:
2460 case CTSF_LAYERCOLPCT_SUM:
2461 case CTSF_LAYERCOLPCT_TOTALN:
2462 case CTSF_LAYERCOLPCT_VALIDN:
2463 return CTDT_LAYERCOL;
2465 case CTSF_LAYERPCT_COUNT:
2466 case CTSF_LAYERPCT_SUM:
2467 case CTSF_LAYERPCT_TOTALN:
2468 case CTSF_LAYERPCT_VALIDN:
2471 case CTSF_LAYERROWPCT_COUNT:
2472 case CTSF_LAYERROWPCT_SUM:
2473 case CTSF_LAYERROWPCT_TOTALN:
2474 case CTSF_LAYERROWPCT_VALIDN:
2475 return CTDT_LAYERROW;
2477 case CTSF_ROWPCT_COUNT:
2478 case CTSF_ROWPCT_SUM:
2479 case CTSF_ROWPCT_TOTALN:
2480 case CTSF_ROWPCT_VALIDN:
2483 case CTSF_SUBTABLEPCT_COUNT:
2484 case CTSF_SUBTABLEPCT_SUM:
2485 case CTSF_SUBTABLEPCT_TOTALN:
2486 case CTSF_SUBTABLEPCT_VALIDN:
2487 return CTDT_SUBTABLE;
2489 case CTSF_TABLEPCT_COUNT:
2490 case CTSF_TABLEPCT_SUM:
2491 case CTSF_TABLEPCT_TOTALN:
2492 case CTSF_TABLEPCT_VALIDN:
2500 ctables_summary_value (const struct ctables_cell *cell,
2501 union ctables_summary *s,
2502 const struct ctables_summary_spec *ss)
2504 switch (ss->function)
2510 case CTSF_ROWPCT_COUNT:
2511 case CTSF_COLPCT_COUNT:
2512 case CTSF_TABLEPCT_COUNT:
2513 case CTSF_SUBTABLEPCT_COUNT:
2514 case CTSF_LAYERPCT_COUNT:
2515 case CTSF_LAYERROWPCT_COUNT:
2516 case CTSF_LAYERCOLPCT_COUNT:
2518 enum ctables_domain_type d = ctables_function_domain (ss->function);
2519 return (cell->domains[d]->e_count
2520 ? s->count / cell->domains[d]->e_count * 100
2524 case CTSF_ROWPCT_VALIDN:
2525 case CTSF_COLPCT_VALIDN:
2526 case CTSF_TABLEPCT_VALIDN:
2527 case CTSF_SUBTABLEPCT_VALIDN:
2528 case CTSF_LAYERPCT_VALIDN:
2529 case CTSF_LAYERROWPCT_VALIDN:
2530 case CTSF_LAYERCOLPCT_VALIDN:
2532 enum ctables_domain_type d = ctables_function_domain (ss->function);
2533 return (cell->domains[d]->e_valid
2534 ? s->count / cell->domains[d]->e_valid * 100
2538 case CTSF_ROWPCT_TOTALN:
2539 case CTSF_COLPCT_TOTALN:
2540 case CTSF_TABLEPCT_TOTALN:
2541 case CTSF_SUBTABLEPCT_TOTALN:
2542 case CTSF_LAYERPCT_TOTALN:
2543 case CTSF_LAYERROWPCT_TOTALN:
2544 case CTSF_LAYERCOLPCT_TOTALN:
2546 enum ctables_domain_type d = ctables_function_domain (ss->function);
2547 return (cell->domains[d]->e_total
2548 ? s->count / cell->domains[d]->e_total * 100
2572 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2577 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2583 double weight, variance;
2584 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2585 return calc_semean (variance, weight);
2591 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2592 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2597 double weight, mean;
2598 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2599 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2605 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2609 case CTSF_ROWPCT_SUM:
2610 case CTSF_COLPCT_SUM:
2611 case CTSF_TABLEPCT_SUM:
2612 case CTSF_SUBTABLEPCT_SUM:
2613 case CTSF_LAYERPCT_SUM:
2614 case CTSF_LAYERROWPCT_SUM:
2615 case CTSF_LAYERCOLPCT_SUM:
2622 struct casereader *reader = casewriter_make_reader (s->writer);
2625 struct percentile *ptile = percentile_create (
2626 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2627 struct order_stats *os = &ptile->parent;
2628 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2629 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2630 statistic_destroy (&ptile->parent.parent);
2637 struct casereader *reader = casewriter_make_reader (s->writer);
2640 struct mode *mode = mode_create ();
2641 struct order_stats *os = &mode->parent;
2642 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2643 s->ovalue = mode->mode;
2644 statistic_destroy (&mode->parent.parent);
2652 struct ctables_cell_sort_aux
2654 const struct ctables_nest *nest;
2655 enum pivot_axis_type a;
2659 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2661 const struct ctables_cell_sort_aux *aux = aux_;
2662 struct ctables_cell *const *ap = a_;
2663 struct ctables_cell *const *bp = b_;
2664 const struct ctables_cell *a = *ap;
2665 const struct ctables_cell *b = *bp;
2667 const struct ctables_nest *nest = aux->nest;
2668 for (size_t i = 0; i < nest->n; i++)
2669 if (i != nest->scale_idx)
2671 const struct variable *var = nest->vars[i];
2672 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2673 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2674 if (a_cv->category != b_cv->category)
2675 return a_cv->category > b_cv->category ? 1 : -1;
2677 const union value *a_val = &a_cv->value;
2678 const union value *b_val = &b_cv->value;
2679 switch (a_cv->category->type)
2685 case CCT_POSTCOMPUTE:
2686 case CCT_EXCLUDED_MISSING:
2687 /* Must be equal. */
2694 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2702 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2704 return a_cv->category->sort_ascending ? cmp : -cmp;
2710 const char *a_label = var_lookup_value_label (var, a_val);
2711 const char *b_label = var_lookup_value_label (var, b_val);
2713 ? (b_label ? strcmp (a_label, b_label) : 1)
2714 : (b_label ? -1 : value_compare_3way (
2715 a_val, b_val, var_get_width (var))));
2717 return a_cv->category->sort_ascending ? cmp : -cmp;
2731 For each ctables_table:
2732 For each combination of row vars:
2733 For each combination of column vars:
2734 For each combination of layer vars:
2736 Make a table of row values:
2737 Sort entries by row values
2738 Assign a 0-based index to each actual value
2739 Construct a dimension
2740 Make a table of column values
2741 Make a table of layer values
2743 Fill the table entry using the indexes from before.
2746 static struct ctables_domain *
2747 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
2748 enum ctables_domain_type domain)
2751 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2753 const struct ctables_nest *nest = s->nests[a];
2754 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2756 size_t v_idx = nest->domains[domain][i];
2757 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
2758 hash = hash_pointer (cv->category, hash);
2759 if (cv->category->type != CCT_TOTAL
2760 && cv->category->type != CCT_SUBTOTAL
2761 && cv->category->type != CCT_POSTCOMPUTE)
2762 hash = value_hash (&cv->value,
2763 var_get_width (nest->vars[v_idx]), hash);
2767 struct ctables_domain *d;
2768 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
2770 const struct ctables_cell *df = d->example;
2771 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2773 const struct ctables_nest *nest = s->nests[a];
2774 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2776 size_t v_idx = nest->domains[domain][i];
2777 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
2778 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
2779 if (cv1->category != cv2->category
2780 || (cv1->category->type != CCT_TOTAL
2781 && cv1->category->type != CCT_SUBTOTAL
2782 && cv1->category->type != CCT_POSTCOMPUTE
2783 && !value_equal (&cv1->value, &cv2->value,
2784 var_get_width (nest->vars[v_idx]))))
2793 d = xmalloc (sizeof *d);
2794 *d = (struct ctables_domain) { .example = cell };
2795 hmap_insert (&s->domains[domain], &d->node, hash);
2799 static const struct ctables_category *
2800 ctables_categories_match (const struct ctables_categories *c,
2801 const union value *v, const struct variable *var)
2803 if (var_is_numeric (var) && v->f == SYSMIS)
2806 const struct ctables_category *othernm = NULL;
2807 for (size_t i = c->n_cats; i-- > 0; )
2809 const struct ctables_category *cat = &c->cats[i];
2813 if (cat->number == v->f)
2819 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
2820 var_get_width (var));
2821 ss_rtrim (&s, ss_cstr (" "));
2822 printf ("%d '%.*s' ?=? '%.*s'\n",
2823 var_get_width (var),
2824 (int) cat->string.length, cat->string.string,
2825 (int) s.length, s.string);
2826 if (ss_equals (cat->string, s))
2832 if ((cat->range[0] == -DBL_MAX || v->f >= cat->range[0])
2833 && (cat->range[1] == DBL_MAX || v->f <= cat->range[1]))
2838 if (var_is_value_missing (var, v))
2842 case CCT_POSTCOMPUTE:
2857 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2860 case CCT_EXCLUDED_MISSING:
2865 return var_is_value_missing (var, v) ? NULL : othernm;
2868 static const struct ctables_category *
2869 ctables_categories_total (const struct ctables_categories *c)
2871 const struct ctables_category *first = &c->cats[0];
2872 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2873 return (first->type == CCT_TOTAL ? first
2874 : last->type == CCT_TOTAL ? last
2878 static struct ctables_cell *
2879 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
2880 const struct ctables_category *cats[PIVOT_N_AXES][10])
2883 enum ctables_summary_variant sv = CSV_CELL;
2884 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2886 const struct ctables_nest *nest = s->nests[a];
2887 for (size_t i = 0; i < nest->n; i++)
2888 if (i != nest->scale_idx)
2890 hash = hash_pointer (cats[a][i], hash);
2891 if (cats[a][i]->type != CCT_TOTAL
2892 && cats[a][i]->type != CCT_SUBTOTAL
2893 && cats[a][i]->type != CCT_POSTCOMPUTE)
2894 hash = value_hash (case_data (c, nest->vars[i]),
2895 var_get_width (nest->vars[i]), hash);
2901 struct ctables_cell *cell;
2902 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
2904 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2906 const struct ctables_nest *nest = s->nests[a];
2907 for (size_t i = 0; i < nest->n; i++)
2908 if (i != nest->scale_idx
2909 && (cats[a][i] != cell->axes[a].cvs[i].category
2910 || (cats[a][i]->type != CCT_TOTAL
2911 && cats[a][i]->type != CCT_SUBTOTAL
2912 && cats[a][i]->type != CCT_POSTCOMPUTE
2913 && !value_equal (case_data (c, nest->vars[i]),
2914 &cell->axes[a].cvs[i].value,
2915 var_get_width (nest->vars[i])))))
2924 cell = xmalloc (sizeof *cell);
2927 cell->omit_domains = 0;
2928 cell->postcompute = false;
2929 //struct string name = DS_EMPTY_INITIALIZER;
2930 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2932 const struct ctables_nest *nest = s->nests[a];
2933 cell->axes[a].cvs = (nest->n
2934 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
2936 for (size_t i = 0; i < nest->n; i++)
2938 const struct ctables_category *cat = cats[a][i];
2939 const struct variable *var = nest->vars[i];
2940 const union value *value = case_data (c, var);
2941 if (i != nest->scale_idx)
2943 const struct ctables_category *subtotal = cat->subtotal;
2944 if (cat->hide || (subtotal && subtotal->hide_subcategories))
2947 if (cat->type == CCT_TOTAL
2948 || cat->type == CCT_SUBTOTAL
2949 || cat->type == CCT_POSTCOMPUTE)
2951 /* XXX these should be more encompassing I think.*/
2955 case PIVOT_AXIS_COLUMN:
2956 cell->omit_domains |= ((1u << CTDT_TABLE) |
2957 (1u << CTDT_LAYER) |
2958 (1u << CTDT_LAYERCOL) |
2959 (1u << CTDT_SUBTABLE) |
2962 case PIVOT_AXIS_ROW:
2963 cell->omit_domains |= ((1u << CTDT_TABLE) |
2964 (1u << CTDT_LAYER) |
2965 (1u << CTDT_LAYERROW) |
2966 (1u << CTDT_SUBTABLE) |
2969 case PIVOT_AXIS_LAYER:
2970 cell->omit_domains |= ((1u << CTDT_TABLE) |
2971 (1u << CTDT_LAYER));
2975 if (cat->type == CCT_POSTCOMPUTE)
2976 cell->postcompute = true;
2979 cell->axes[a].cvs[i].category = cat;
2980 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
2983 if (i != nest->scale_idx)
2985 if (!ds_is_empty (&name))
2986 ds_put_cstr (&name, ", ");
2987 char *value_s = data_out (value, var_get_encoding (var),
2988 var_get_print_format (var),
2989 settings_get_fmt_settings ());
2990 if (cat->type == CCT_TOTAL
2991 || cat->type == CCT_SUBTOTAL
2992 || cat->type == CCT_POSTCOMPUTE)
2993 ds_put_format (&name, "%s=total", var_get_name (var));
2995 ds_put_format (&name, "%s=%s", var_get_name (var),
2996 value_s + strspn (value_s, " "));
3002 //cell->name = ds_steal_cstr (&name);
3004 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3005 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3006 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3007 for (size_t i = 0; i < specs->n; i++)
3008 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3009 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3010 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3011 hmap_insert (&s->cells, &cell->node, hash);
3016 is_scale_missing (const struct ctables_summary_spec_set *specs,
3017 const struct ccase *c)
3019 if (!specs->is_scale)
3022 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3025 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3027 const struct variable *var = specs->listwise_vars[i];
3028 if (var_is_num_missing (var, case_num (c, var)))
3036 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3037 const struct ctables_category *cats[PIVOT_N_AXES][10],
3038 bool is_missing, bool excluded_missing,
3039 double d_weight, double e_weight)
3041 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3042 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3044 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3046 bool scale_missing = is_scale_missing (specs, c);
3047 for (size_t i = 0; i < specs->n; i++)
3048 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3049 specs->var, case_data (c, specs->var), specs->is_scale,
3050 scale_missing, is_missing, excluded_missing,
3051 d_weight, e_weight);
3052 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3053 if (!(cell->omit_domains && (1u << dt)))
3055 struct ctables_domain *d = cell->domains[dt];
3056 d->d_total += d_weight;
3057 d->e_total += e_weight;
3058 if (!excluded_missing)
3060 d->d_count += d_weight;
3061 d->e_count += e_weight;
3065 d->d_valid += d_weight;
3066 d->e_valid += e_weight;
3072 recurse_totals (struct ctables_section *s, const struct ccase *c,
3073 const struct ctables_category *cats[PIVOT_N_AXES][10],
3074 bool is_missing, bool excluded_missing,
3075 double d_weight, double e_weight,
3076 enum pivot_axis_type start_axis, size_t start_nest)
3078 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3080 const struct ctables_nest *nest = s->nests[a];
3081 for (size_t i = start_nest; i < nest->n; i++)
3083 if (i == nest->scale_idx)
3086 const struct variable *var = nest->vars[i];
3088 const struct ctables_category *total = ctables_categories_total (
3089 s->table->categories[var_get_dict_index (var)]);
3092 const struct ctables_category *save = cats[a][i];
3094 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3095 d_weight, e_weight);
3096 recurse_totals (s, c, cats, is_missing, excluded_missing,
3097 d_weight, e_weight, a, i + 1);
3106 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3107 const struct ctables_category *cats[PIVOT_N_AXES][10],
3108 bool is_missing, bool excluded_missing,
3109 double d_weight, double e_weight,
3110 enum pivot_axis_type start_axis, size_t start_nest)
3112 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3114 const struct ctables_nest *nest = s->nests[a];
3115 for (size_t i = start_nest; i < nest->n; i++)
3117 if (i == nest->scale_idx)
3120 const struct ctables_category *save = cats[a][i];
3123 cats[a][i] = save->subtotal;
3124 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3125 d_weight, e_weight);
3126 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3127 d_weight, e_weight, a, i + 1);
3136 ctables_add_occurrence (const struct variable *var,
3137 const union value *value,
3138 struct hmap *occurrences)
3140 int width = var_get_width (var);
3141 unsigned int hash = value_hash (value, width, 0);
3143 struct ctables_occurrence *o;
3144 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3146 if (value_equal (value, &o->value, width))
3149 o = xmalloc (sizeof *o);
3150 value_clone (&o->value, value, width);
3151 hmap_insert (occurrences, &o->node, hash);
3155 ctables_cell_insert (struct ctables_section *s,
3156 const struct ccase *c,
3157 double d_weight, double e_weight)
3159 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3161 /* Does at least one categorical variable have a missing value in an included
3162 or excluded category? */
3163 bool is_missing = false;
3165 /* Does at least one categorical variable have a missing value in an excluded
3167 bool excluded_missing = false;
3169 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3171 const struct ctables_nest *nest = s->nests[a];
3172 for (size_t i = 0; i < nest->n; i++)
3174 if (i == nest->scale_idx)
3177 const struct variable *var = nest->vars[i];
3178 const union value *value = case_data (c, var);
3180 bool var_missing = var_is_value_missing (var, value) != 0;
3184 printf ("ctables_cell_insert %s: ", var_get_name (var));
3185 cats[a][i] = ctables_categories_match (
3186 s->table->categories[var_get_dict_index (var)], value, var);
3192 static const struct ctables_category cct_excluded_missing = {
3193 .type = CCT_EXCLUDED_MISSING,
3196 cats[a][i] = &cct_excluded_missing;
3197 excluded_missing = true;
3202 if (!excluded_missing)
3203 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3205 const struct ctables_nest *nest = s->nests[a];
3206 for (size_t i = 0; i < nest->n; i++)
3207 if (i != nest->scale_idx)
3209 const struct variable *var = nest->vars[i];
3210 const union value *value = case_data (c, var);
3211 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3215 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3216 d_weight, e_weight);
3218 //if (!excluded_missing)
3220 recurse_totals (s, c, cats, is_missing, excluded_missing,
3221 d_weight, e_weight, 0, 0);
3222 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3223 d_weight, e_weight, 0, 0);
3229 const struct ctables_summary_spec_set *set;
3234 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3236 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3237 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3238 if (as->function != bs->function)
3239 return as->function > bs->function ? 1 : -1;
3240 else if (as->percentile != bs->percentile)
3241 return as->percentile < bs->percentile ? 1 : -1;
3242 return strcmp (as->label, bs->label);
3245 static struct pivot_value *
3246 ctables_category_create_label (const struct ctables_category *cat,
3247 const struct variable *var,
3248 const union value *value)
3250 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3251 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3252 : cat->type == CCT_POSTCOMPUTE && cat->pc->label
3253 ? pivot_value_new_user_text (cat->pc->label, SIZE_MAX)
3254 : pivot_value_new_var_value (var, value));
3257 static struct ctables_value *
3258 ctables_value_find__ (struct ctables_table *t, const union value *value,
3259 int width, unsigned int hash)
3261 struct ctables_value *clv;
3262 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3263 hash, &t->clabels_values_map)
3264 if (value_equal (value, &clv->value, width))
3270 ctables_value_insert (struct ctables_table *t, const union value *value,
3273 unsigned int hash = value_hash (value, width, 0);
3274 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3277 clv = xmalloc (sizeof *clv);
3278 value_clone (&clv->value, value, width);
3279 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3283 static struct ctables_value *
3284 ctables_value_find (struct ctables_table *t,
3285 const union value *value, int width)
3287 return ctables_value_find__ (t, value, width,
3288 value_hash (value, width, 0));
3292 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3293 size_t ix[PIVOT_N_AXES])
3295 if (a < PIVOT_N_AXES)
3297 size_t limit = MAX (t->stacks[a].n, 1);
3298 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3299 ctables_table_add_section (t, a + 1, ix);
3303 struct ctables_section *s = &t->sections[t->n_sections++];
3304 *s = (struct ctables_section) {
3306 .cells = HMAP_INITIALIZER (s->cells),
3308 for (a = 0; a < PIVOT_N_AXES; a++)
3311 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3313 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3314 for (size_t i = 0; i < nest->n; i++)
3315 hmap_init (&s->occurrences[a][i]);
3317 for (size_t i = 0; i < N_CTDTS; i++)
3318 hmap_init (&s->domains[i]);
3323 ctpo_add (double a, double b)
3329 ctpo_sub (double a, double b)
3335 ctpo_mul (double a, double b)
3341 ctpo_div (double a, double b)
3343 return b ? a / b : SYSMIS;
3347 ctpo_pow (double a, double b)
3349 int save_errno = errno;
3351 double result = pow (a, b);
3359 ctpo_neg (double a, double b UNUSED)
3364 struct ctables_pcexpr_evaluate_ctx
3366 const struct ctables_cell *cell;
3367 const struct ctables_section *section;
3368 const struct ctables_categories *cats;
3369 enum pivot_axis_type pc_a;
3373 static double ctables_pcexpr_evaluate (
3374 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3377 ctables_pcexpr_evaluate_nonterminal (
3378 const struct ctables_pcexpr_evaluate_ctx *ctx,
3379 const struct ctables_pcexpr *e, size_t n_args,
3380 double evaluate (double, double))
3382 double args[2] = { 0, 0 };
3383 for (size_t i = 0; i < n_args; i++)
3385 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3386 if (!isfinite (args[i]) || args[i] == SYSMIS)
3389 return evaluate (args[0], args[1]);
3393 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3394 const struct ctables_cell_value *pc_cv)
3396 const struct ctables_section *s = ctx->section;
3399 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3401 const struct ctables_nest *nest = s->nests[a];
3402 for (size_t i = 0; i < nest->n; i++)
3403 if (i != nest->scale_idx)
3405 const struct ctables_cell_value *cv
3406 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3407 : &ctx->cell->axes[a].cvs[i]);
3408 hash = hash_pointer (cv->category, hash);
3409 if (cv->category->type != CCT_TOTAL
3410 && cv->category->type != CCT_SUBTOTAL
3411 && cv->category->type != CCT_POSTCOMPUTE)
3412 hash = value_hash (&cv->value,
3413 var_get_width (nest->vars[i]), hash);
3417 struct ctables_cell *tc;
3418 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3420 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3422 const struct ctables_nest *nest = s->nests[a];
3423 for (size_t i = 0; i < nest->n; i++)
3424 if (i != nest->scale_idx)
3426 const struct ctables_cell_value *p_cv
3427 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3428 : &ctx->cell->axes[a].cvs[i]);
3429 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3430 if (p_cv->category != t_cv->category
3431 || (p_cv->category->type != CCT_TOTAL
3432 && p_cv->category->type != CCT_SUBTOTAL
3433 && p_cv->category->type != CCT_POSTCOMPUTE
3434 && !value_equal (&p_cv->value,
3436 var_get_width (nest->vars[i]))))
3448 const struct ctables_table *t = s->table;
3449 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3450 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3451 size_t j = 0 /* XXX */;
3452 return ctables_summary_value (tc, &tc->summaries[j], &specs->specs[j]);
3456 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3457 const struct ctables_pcexpr *e)
3464 case CTPO_CAT_RANGE:
3466 struct ctables_cell_value cv = {
3467 .category = ctables_find_category_for_postcompute (ctx->cats, e)
3469 assert (cv.category != NULL);
3471 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3472 const struct ctables_occurrence *o;
3475 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3476 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3477 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3479 cv.value = o->value;
3480 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3485 case CTPO_CAT_NUMBER:
3486 case CTPO_CAT_STRING:
3487 case CTPO_CAT_MISSING:
3488 case CTPO_CAT_OTHERNM:
3489 case CTPO_CAT_SUBTOTAL:
3490 case CTPO_CAT_TOTAL:
3492 struct ctables_cell_value cv = {
3493 .category = ctables_find_category_for_postcompute (ctx->cats, e),
3494 .value = { .f = e->number },
3496 assert (cv.category != NULL);
3497 return ctables_pcexpr_evaluate_category (ctx, &cv);
3501 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3504 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3507 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3510 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3513 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3516 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3523 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3524 const struct ctables_cell *cell)
3526 enum pivot_axis_type pc_a;
3528 const struct ctables_postcompute *pc;
3529 for (pc_a = 0; ; pc_a++)
3531 assert (pc_a < PIVOT_N_AXES);
3532 for (pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3534 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3535 if (cv->category->type == CCT_POSTCOMPUTE)
3537 pc = cv->category->pc;
3544 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3545 const struct ctables_categories *cats = s->table->categories[
3546 var_get_dict_index (var)];
3547 struct ctables_pcexpr_evaluate_ctx ctx = {
3552 .pc_a_idx = pc_a_idx,
3554 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3558 ctables_table_output (struct ctables *ct, struct ctables_table *t)
3560 struct pivot_table *pt = pivot_table_create__ (
3562 ? pivot_value_new_user_text (t->title, SIZE_MAX)
3563 : pivot_value_new_text (N_("Custom Tables"))),
3566 pivot_table_set_caption (
3567 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
3569 pivot_table_set_caption (
3570 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
3572 bool summary_dimension = (t->summary_axis != t->slabels_axis
3573 || (!t->slabels_visible
3574 && t->summary_specs.n > 1));
3575 if (summary_dimension)
3577 struct pivot_dimension *d = pivot_dimension_create (
3578 pt, t->slabels_axis, N_("Statistics"));
3579 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3580 if (!t->slabels_visible)
3581 d->hide_all_labels = true;
3582 for (size_t i = 0; i < specs->n; i++)
3583 pivot_category_create_leaf (
3584 d->root, pivot_value_new_text (specs->specs[i].label));
3587 bool categories_dimension = t->clabels_example != NULL;
3588 if (categories_dimension)
3590 struct pivot_dimension *d = pivot_dimension_create (
3591 pt, t->label_axis[t->clabels_from_axis],
3592 t->clabels_from_axis == PIVOT_AXIS_ROW
3593 ? N_("Row Categories")
3594 : N_("Column Categories"));
3595 const struct variable *var = t->clabels_example;
3596 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
3597 for (size_t i = 0; i < t->n_clabels_values; i++)
3599 const struct ctables_value *value = t->clabels_values[i];
3600 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
3601 assert (cat != NULL);
3602 pivot_category_create_leaf (d->root, ctables_category_create_label (
3603 cat, t->clabels_example, &value->value));
3607 pivot_table_set_look (pt, ct->look);
3608 struct pivot_dimension *d[PIVOT_N_AXES];
3609 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3611 static const char *names[] = {
3612 [PIVOT_AXIS_ROW] = N_("Rows"),
3613 [PIVOT_AXIS_COLUMN] = N_("Columns"),
3614 [PIVOT_AXIS_LAYER] = N_("Layers"),
3616 d[a] = (t->axes[a] || a == t->summary_axis
3617 ? pivot_dimension_create (pt, a, names[a])
3622 assert (t->axes[a]);
3624 for (size_t i = 0; i < t->stacks[a].n; i++)
3626 struct ctables_nest *nest = &t->stacks[a].nests[i];
3627 struct ctables_section **sections = xnmalloc (t->n_sections,
3629 size_t n_sections = 0;
3631 size_t n_total_cells = 0;
3632 size_t max_depth = 0;
3633 for (size_t j = 0; j < t->n_sections; j++)
3634 if (t->sections[j].nests[a] == nest)
3636 struct ctables_section *s = &t->sections[j];
3637 sections[n_sections++] = s;
3638 n_total_cells += s->cells.count;
3640 size_t depth = s->nests[a]->n;
3641 max_depth = MAX (depth, max_depth);
3644 struct ctables_cell **sorted = xnmalloc (n_total_cells,
3646 size_t n_sorted = 0;
3648 for (size_t j = 0; j < n_sections; j++)
3650 struct ctables_section *s = sections[j];
3652 struct ctables_cell *cell;
3653 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3655 sorted[n_sorted++] = cell;
3656 assert (n_sorted <= n_total_cells);
3659 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
3660 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
3663 for (size_t j = 0; j < n_sorted; j++)
3665 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
3670 struct ctables_level
3672 enum ctables_level_type
3674 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
3675 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
3676 CTL_SUMMARY, /* Summary functions. */
3680 enum settings_value_show vlabel; /* CTL_VAR only. */
3683 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
3684 size_t n_levels = 0;
3685 for (size_t k = 0; k < nest->n; k++)
3687 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
3688 if (vlabel != CTVL_NONE)
3690 levels[n_levels++] = (struct ctables_level) {
3692 .vlabel = (enum settings_value_show) vlabel,
3697 if (nest->scale_idx != k
3698 && (k != nest->n - 1 || t->label_axis[a] == a))
3700 levels[n_levels++] = (struct ctables_level) {
3701 .type = CTL_CATEGORY,
3707 if (!summary_dimension && a == t->slabels_axis)
3709 levels[n_levels++] = (struct ctables_level) {
3710 .type = CTL_SUMMARY,
3711 .var_idx = SIZE_MAX,
3715 /* Pivot categories:
3717 - variable label for nest->vars[0], if vlabel != CTVL_NONE
3718 - category for nest->vars[0], if nest->scale_idx != 0
3719 - variable label for nest->vars[1], if vlabel != CTVL_NONE
3720 - category for nest->vars[1], if nest->scale_idx != 1
3722 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
3723 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
3724 - summary function, if 'a == t->slabels_axis && a ==
3727 Additional dimensions:
3729 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
3731 - If 't->label_axis[b] == a' for some 'b != a', add a category
3736 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
3738 for (size_t j = 0; j < n_sorted; j++)
3740 struct ctables_cell *cell = sorted[j];
3741 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
3743 size_t n_common = 0;
3746 for (; n_common < n_levels; n_common++)
3748 const struct ctables_level *level = &levels[n_common];
3749 if (level->type == CTL_CATEGORY)
3751 size_t var_idx = level->var_idx;
3752 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
3753 if (prev->axes[a].cvs[var_idx].category != c)
3755 else if (c->type != CCT_SUBTOTAL
3756 && c->type != CCT_TOTAL
3757 && c->type != CCT_POSTCOMPUTE
3758 && !value_equal (&prev->axes[a].cvs[var_idx].value,
3759 &cell->axes[a].cvs[var_idx].value,
3760 var_get_type (nest->vars[var_idx])))
3766 for (size_t k = n_common; k < n_levels; k++)
3768 const struct ctables_level *level = &levels[k];
3769 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
3770 if (level->type == CTL_SUMMARY)
3772 assert (k == n_levels - 1);
3774 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3775 for (size_t m = 0; m < specs->n; m++)
3777 int leaf = pivot_category_create_leaf (
3778 parent, pivot_value_new_text (specs->specs[m].label));
3785 const struct variable *var = nest->vars[level->var_idx];
3786 struct pivot_value *label;
3787 if (level->type == CTL_VAR)
3789 label = pivot_value_new_variable (var);
3790 label->variable.show = level->vlabel;
3792 else if (level->type == CTL_CATEGORY)
3794 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
3795 label = ctables_category_create_label (cv->category,
3801 if (k == n_levels - 1)
3802 prev_leaf = pivot_category_create_leaf (parent, label);
3804 groups[k] = pivot_category_create_group__ (parent, label);
3808 cell->axes[a].leaf = prev_leaf;
3815 for (size_t i = 0; i < t->n_sections; i++)
3817 struct ctables_section *s = &t->sections[i];
3819 struct ctables_cell *cell;
3820 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3825 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3826 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
3827 for (size_t j = 0; j < specs->n; j++)
3830 size_t n_dindexes = 0;
3832 if (summary_dimension)
3833 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
3835 if (categories_dimension)
3837 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
3838 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
3839 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
3840 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
3843 dindexes[n_dindexes++] = ctv->leaf;
3846 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3849 int leaf = cell->axes[a].leaf;
3850 if (a == t->summary_axis && !summary_dimension)
3852 dindexes[n_dindexes++] = leaf;
3855 const struct ctables_summary_spec *ss = &specs->specs[j];
3857 double d = (cell->postcompute
3858 ? ctables_cell_calculate_postcompute (s, cell)
3859 : ctables_summary_value (cell, &cell->summaries[j], ss));
3860 struct pivot_value *value;
3861 if (ct->hide_threshold != 0
3862 && d < ct->hide_threshold
3863 && (cell->postcompute
3865 : ctables_summary_function_is_count (ss->function)))
3867 value = pivot_value_new_user_text_nocopy (
3868 xasprintf ("<%d", ct->hide_threshold));
3870 else if (d == 0 && ct->zero)
3871 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
3872 else if (d == SYSMIS && ct->missing)
3873 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
3874 else if (specs->specs[j].is_ctables_format)
3876 char *s = data_out_stretchy (&(union value) { .f = d },
3878 &specs->specs[j].format,
3879 &ct->ctables_formats, NULL);
3880 value = pivot_value_new_user_text_nocopy (s);
3884 value = pivot_value_new_number (d);
3885 value->numeric.format = specs->specs[j].format;
3887 pivot_table_put (pt, dindexes, n_dindexes, value);
3892 pivot_table_submit (pt);
3896 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
3898 enum pivot_axis_type label_pos = t->label_axis[a];
3902 t->clabels_from_axis = a;
3904 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
3905 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
3907 const struct ctables_stack *stack = &t->stacks[a];
3911 const struct ctables_nest *n0 = &stack->nests[0];
3913 const struct variable *v0 = n0->vars[n0->n - 1];
3914 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
3915 t->clabels_example = v0;
3917 for (size_t i = 0; i < c0->n_cats; i++)
3918 if (c0->cats[i].type == CCT_FUNCTION)
3920 msg (SE, _("%s=%s is not allowed with sorting based "
3921 "on a summary function."),
3922 subcommand_name, pos_name);
3925 if (n0->n - 1 == n0->scale_idx)
3927 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
3928 "but %s is a scale variable."),
3929 subcommand_name, pos_name, var_get_name (v0));
3933 for (size_t i = 1; i < stack->n; i++)
3935 const struct ctables_nest *ni = &stack->nests[i];
3937 const struct variable *vi = ni->vars[ni->n - 1];
3938 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
3940 if (ni->n - 1 == ni->scale_idx)
3942 msg (SE, _("%s=%s requires the variables to be moved to be "
3943 "categorical, but %s is a scale variable."),
3944 subcommand_name, pos_name, var_get_name (vi));
3947 if (var_get_width (v0) != var_get_width (vi))
3949 msg (SE, _("%s=%s requires the variables to be "
3950 "moved to have the same width, but %s has "
3951 "width %d and %s has width %d."),
3952 subcommand_name, pos_name,
3953 var_get_name (v0), var_get_width (v0),
3954 var_get_name (vi), var_get_width (vi));
3957 if (!val_labs_equal (var_get_value_labels (v0),
3958 var_get_value_labels (vi)))
3960 msg (SE, _("%s=%s requires the variables to be "
3961 "moved to have the same value labels, but %s "
3962 "and %s have different value labels."),
3963 subcommand_name, pos_name,
3964 var_get_name (v0), var_get_name (vi));
3967 if (!ctables_categories_equal (c0, ci))
3969 msg (SE, _("%s=%s requires the variables to be "
3970 "moved to have the same category "
3971 "specifications, but %s and %s have different "
3972 "category specifications."),
3973 subcommand_name, pos_name,
3974 var_get_name (v0), var_get_name (vi));
3983 ctables_prepare_table (struct ctables_table *t)
3985 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3988 t->stacks[a] = enumerate_fts (a, t->axes[a]);
3990 for (size_t j = 0; j < t->stacks[a].n; j++)
3992 struct ctables_nest *nest = &t->stacks[a].nests[j];
3993 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3995 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
3996 nest->n_domains[dt] = 0;
3998 for (size_t k = 0; k < nest->n; k++)
4000 if (k == nest->scale_idx)
4009 if (a != PIVOT_AXIS_LAYER)
4016 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4017 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4018 : a == PIVOT_AXIS_ROW)
4020 if (k == nest->n - 1
4021 || (nest->scale_idx == nest->n - 1
4022 && k == nest->n - 2))
4028 if (a == PIVOT_AXIS_COLUMN)
4033 if (a == PIVOT_AXIS_ROW)
4038 nest->domains[dt][nest->n_domains[dt]++] = k;
4045 struct ctables_nest *nest = xmalloc (sizeof *nest);
4046 *nest = (struct ctables_nest) { .n = 0 };
4047 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4050 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4051 for (size_t i = 0; i < stack->n; i++)
4053 struct ctables_nest *nest = &stack->nests[i];
4054 if (!nest->specs[CSV_CELL].n)
4056 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4057 specs->specs = xmalloc (sizeof *specs->specs);
4060 enum ctables_summary_function function
4061 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4063 *specs->specs = (struct ctables_summary_spec) {
4064 .function = function,
4065 .format = ctables_summary_default_format (function, specs->var),
4066 .label = ctables_summary_default_label (function, 0),
4069 specs->var = nest->vars[0];
4071 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4072 &nest->specs[CSV_CELL]);
4074 else if (!nest->specs[CSV_TOTAL].n)
4075 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4076 &nest->specs[CSV_CELL]);
4078 if (t->ctables->smissing_listwise)
4080 struct variable **listwise_vars = NULL;
4082 size_t allocated = 0;
4084 for (size_t j = nest->group_head; j < stack->n; j++)
4086 const struct ctables_nest *other_nest = &stack->nests[j];
4087 if (other_nest->group_head != nest->group_head)
4090 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4093 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4094 sizeof *listwise_vars);
4095 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4098 for (size_t j = 0; j < N_CSVS; j++)
4100 nest->specs[j].listwise_vars = listwise_vars;
4101 nest->specs[j].n_listwise_vars = n;
4106 struct ctables_summary_spec_set *merged = &t->summary_specs;
4107 struct merge_item *items = xnmalloc (2 * stack->n, sizeof *items);
4109 for (size_t j = 0; j < stack->n; j++)
4111 const struct ctables_nest *nest = &stack->nests[j];
4113 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4114 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4119 struct merge_item min = items[0];
4120 for (size_t j = 1; j < n_left; j++)
4121 if (merge_item_compare_3way (&items[j], &min) < 0)
4124 if (merged->n >= merged->allocated)
4125 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4126 sizeof *merged->specs);
4127 merged->specs[merged->n++] = min.set->specs[min.ofs];
4129 for (size_t j = 0; j < n_left; )
4131 if (merge_item_compare_3way (&items[j], &min) == 0)
4133 struct merge_item *item = &items[j];
4134 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4135 if (++item->ofs >= item->set->n)
4137 items[j] = items[--n_left];
4146 for (size_t j = 0; j < merged->n; j++)
4147 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4149 for (size_t j = 0; j < stack->n; j++)
4151 const struct ctables_nest *nest = &stack->nests[j];
4152 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4154 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4155 for (size_t k = 0; k < specs->n; k++)
4156 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4157 specs->specs[k].axis_idx);
4163 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4164 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4168 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4169 enum pivot_axis_type a)
4171 struct ctables_stack *stack = &t->stacks[a];
4172 for (size_t i = 0; i < stack->n; i++)
4174 const struct ctables_nest *nest = &stack->nests[i];
4175 const struct variable *var = nest->vars[nest->n - 1];
4176 const union value *value = case_data (c, var);
4178 if (var_is_numeric (var) && value->f == SYSMIS)
4181 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4183 ctables_value_insert (t, value, var_get_width (var));
4188 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4190 const struct ctables_value *const *ap = a_;
4191 const struct ctables_value *const *bp = b_;
4192 const struct ctables_value *a = *ap;
4193 const struct ctables_value *b = *bp;
4194 const int *width = width_;
4195 return value_compare_3way (&a->value, &b->value, *width);
4199 ctables_sort_clabels_values (struct ctables_table *t)
4201 const struct variable *v0 = t->clabels_example;
4202 int width = var_get_width (v0);
4204 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4207 const struct val_labs *val_labs = var_get_value_labels (v0);
4208 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4209 vl = val_labs_next (val_labs, vl))
4210 if (ctables_categories_match (c0, &vl->value, v0))
4211 ctables_value_insert (t, &vl->value, width);
4214 size_t n = hmap_count (&t->clabels_values_map);
4215 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4217 struct ctables_value *clv;
4219 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4220 t->clabels_values[i++] = clv;
4221 t->n_clabels_values = n;
4224 sort (t->clabels_values, n, sizeof *t->clabels_values,
4225 compare_clabels_values_3way, &width);
4227 for (size_t i = 0; i < n; i++)
4228 t->clabels_values[i]->leaf = i;
4232 ctables_add_category_occurrences (const struct variable *var,
4233 struct hmap *occurrences,
4234 const struct ctables_categories *cats)
4236 const struct val_labs *val_labs = var_get_value_labels (var);
4238 for (size_t i = 0; i < cats->n_cats; i++)
4240 const struct ctables_category *c = &cats->cats[i];
4244 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4250 int width = var_get_width (var);
4252 value_init (&value, width);
4253 value_copy_buf_rpad (&value, width,
4254 CHAR_CAST (uint8_t *, c->string.string),
4255 c->string.length, ' ');
4256 ctables_add_occurrence (var, &value, occurrences);
4257 value_destroy (&value, width);
4262 assert (var_is_numeric (var));
4263 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4264 vl = val_labs_next (val_labs, vl))
4265 if (vl->value.f >= c->range[0] && vl->value.f <= c->range[1])
4266 ctables_add_occurrence (var, &vl->value, occurrences);
4270 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4271 vl = val_labs_next (val_labs, vl))
4272 if (var_is_value_missing (var, &vl->value))
4273 ctables_add_occurrence (var, &vl->value, occurrences);
4277 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4278 vl = val_labs_next (val_labs, vl))
4279 ctables_add_occurrence (var, &vl->value, occurrences);
4282 case CCT_POSTCOMPUTE:
4292 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4293 vl = val_labs_next (val_labs, vl))
4294 if (c->include_missing || !var_is_value_missing (var, &vl->value))
4295 ctables_add_occurrence (var, &vl->value, occurrences);
4298 case CCT_EXCLUDED_MISSING:
4305 ctables_section_recurse_add_empty_categories (
4306 struct ctables_section *s,
4307 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
4308 enum pivot_axis_type a, size_t a_idx)
4310 if (a >= PIVOT_N_AXES)
4311 ctables_cell_insert__ (s, c, cats);
4312 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
4313 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
4316 const struct variable *var = s->nests[a]->vars[a_idx];
4317 const struct ctables_categories *categories = s->table->categories[
4318 var_get_dict_index (var)];
4319 int width = var_get_width (var);
4320 const struct hmap *occurrences = &s->occurrences[a][a_idx];
4321 const struct ctables_occurrence *o;
4322 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4324 union value *value = case_data_rw (c, var);
4325 value_destroy (value, width);
4326 value_clone (value, &o->value, width);
4327 cats[a][a_idx] = ctables_categories_match (categories, value, var);
4328 assert (cats[a][a_idx] != NULL);
4329 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4332 for (size_t i = 0; i < categories->n_cats; i++)
4334 const struct ctables_category *cat = &categories->cats[i];
4335 if (cat->type == CCT_POSTCOMPUTE)
4337 cats[a][a_idx] = cat;
4338 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4345 ctables_section_add_empty_categories (struct ctables_section *s)
4347 bool show_empty = false;
4348 for (size_t a = 0; a < PIVOT_N_AXES; a++)
4350 for (size_t k = 0; k < s->nests[a]->n; k++)
4351 if (k != s->nests[a]->scale_idx)
4353 const struct variable *var = s->nests[a]->vars[k];
4354 const struct ctables_categories *cats = s->table->categories[
4355 var_get_dict_index (var)];
4356 if (cats->show_empty)
4359 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
4365 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
4366 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
4367 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
4372 ctables_execute (struct dataset *ds, struct ctables *ct)
4374 for (size_t i = 0; i < ct->n_tables; i++)
4376 struct ctables_table *t = ct->tables[i];
4377 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
4378 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
4379 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
4380 sizeof *t->sections);
4381 size_t ix[PIVOT_N_AXES];
4382 ctables_table_add_section (t, 0, ix);
4385 struct casereader *input = proc_open (ds);
4386 bool warn_on_invalid = true;
4387 for (struct ccase *c = casereader_read (input); c;
4388 case_unref (c), c = casereader_read (input))
4390 double d_weight = dict_get_case_weight (dataset_dict (ds), c,
4392 double e_weight = (ct->e_weight
4393 ? var_force_valid_weight (ct->e_weight,
4394 case_num (c, ct->e_weight),
4398 for (size_t i = 0; i < ct->n_tables; i++)
4400 struct ctables_table *t = ct->tables[i];
4402 for (size_t j = 0; j < t->n_sections; j++)
4403 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
4405 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4406 if (t->label_axis[a] != a)
4407 ctables_insert_clabels_values (t, c, a);
4410 casereader_destroy (input);
4412 for (size_t i = 0; i < ct->n_tables; i++)
4414 struct ctables_table *t = ct->tables[i];
4416 if (t->clabels_example)
4417 ctables_sort_clabels_values (t);
4419 for (size_t j = 0; j < t->n_sections; j++)
4420 ctables_section_add_empty_categories (&t->sections[j]);
4422 ctables_table_output (ct, ct->tables[i]);
4424 return proc_commit (ds);
4429 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
4430 struct dictionary *);
4433 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
4439 case CTPO_CAT_STRING:
4440 ss_dealloc (&e->string);
4449 for (size_t i = 0; i < 2; i++)
4450 ctables_pcexpr_destroy (e->subs[i]);
4454 case CTPO_CAT_NUMBER:
4455 case CTPO_CAT_RANGE:
4456 case CTPO_CAT_MISSING:
4457 case CTPO_CAT_OTHERNM:
4458 case CTPO_CAT_SUBTOTAL:
4459 case CTPO_CAT_TOTAL:
4463 msg_location_destroy (e->location);
4468 static struct ctables_pcexpr *
4469 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
4470 struct ctables_pcexpr *sub0,
4471 struct ctables_pcexpr *sub1)
4473 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4474 *e = (struct ctables_pcexpr) {
4476 .subs = { sub0, sub1 },
4477 .location = msg_location_merged (sub0->location, sub1->location),
4482 /* How to parse an operator. */
4485 enum token_type token;
4486 enum ctables_postcompute_op op;
4489 static const struct operator *
4490 match_operator (struct lexer *lexer, const struct operator ops[], size_t n_ops)
4492 for (const struct operator *op = ops; op < ops + n_ops; op++)
4493 if (lex_token (lexer) == op->token)
4495 if (op->token != T_NEG_NUM)
4504 static struct ctables_pcexpr *
4505 parse_binary_operators__ (struct lexer *lexer, struct dictionary *dict,
4506 const struct operator ops[], size_t n_ops,
4507 parse_recursively_func *parse_next_level,
4508 const char *chain_warning,
4509 struct ctables_pcexpr *lhs)
4511 for (int op_count = 0; ; op_count++)
4513 const struct operator *op = match_operator (lexer, ops, n_ops);
4516 if (op_count > 1 && chain_warning)
4517 msg_at (SW, lhs->location, "%s", chain_warning);
4522 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
4525 ctables_pcexpr_destroy (lhs);
4529 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
4533 static struct ctables_pcexpr *
4534 parse_binary_operators (struct lexer *lexer, struct dictionary *dict,
4535 const struct operator ops[], size_t n_ops,
4536 parse_recursively_func *parse_next_level,
4537 const char *chain_warning)
4539 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
4543 return parse_binary_operators__ (lexer, dict, ops, n_ops, parse_next_level,
4544 chain_warning, lhs);
4547 static struct ctables_pcexpr *parse_add (struct lexer *, struct dictionary *);
4549 static struct ctables_pcexpr
4550 ctpo_cat_range (double low, double high)
4552 return (struct ctables_pcexpr) {
4553 .op = CTPO_CAT_RANGE,
4554 .range = { low, high },
4558 static struct ctables_pcexpr *
4559 parse_primary (struct lexer *lexer, struct dictionary *dict)
4561 int start_ofs = lex_ofs (lexer);
4562 struct ctables_pcexpr e;
4563 if (lex_is_number (lexer))
4565 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
4566 .number = lex_number (lexer) };
4569 else if (lex_match_id (lexer, "MISSING"))
4570 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
4571 else if (lex_match_id (lexer, "OTHERNM"))
4572 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
4573 else if (lex_match_id (lexer, "TOTAL"))
4574 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
4575 else if (lex_match_id (lexer, "SUBTOTAL"))
4577 size_t subtotal_index = 0;
4578 if (lex_match (lexer, T_LBRACK))
4580 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
4582 subtotal_index = lex_integer (lexer);
4584 if (!lex_force_match (lexer, T_RBRACK))
4587 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
4588 .subtotal_index = subtotal_index };
4590 else if (lex_match (lexer, T_LBRACK))
4592 if (lex_match_id (lexer, "LO"))
4594 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
4596 e = ctpo_cat_range (-DBL_MAX, lex_number (lexer));
4599 else if (lex_is_number (lexer))
4601 double number = lex_number (lexer);
4603 if (lex_match_id (lexer, "THRU"))
4605 if (lex_match_id (lexer, "HI"))
4606 e = ctpo_cat_range (number, DBL_MAX);
4609 if (!lex_force_num (lexer))
4611 e = ctpo_cat_range (number, lex_number (lexer));
4616 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
4619 else if (lex_is_string (lexer))
4621 struct substring s = recode_substring_pool (
4622 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
4623 ss_rtrim (&s, ss_cstr (" "));
4625 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
4630 lex_error (lexer, NULL);
4634 if (!lex_force_match (lexer, T_RBRACK))
4636 if (e.op == CTPO_CAT_STRING)
4637 ss_dealloc (&e.string);
4641 else if (lex_match (lexer, T_LPAREN))
4643 struct ctables_pcexpr *ep = parse_add (lexer, dict);
4646 if (!lex_force_match (lexer, T_RPAREN))
4648 ctables_pcexpr_destroy (ep);
4655 lex_error (lexer, NULL);
4659 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4660 return xmemdup (&e, sizeof e);
4663 static struct ctables_pcexpr *
4664 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
4665 struct lexer *lexer, int start_ofs)
4667 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4668 *e = (struct ctables_pcexpr) {
4671 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
4676 static struct ctables_pcexpr *
4677 parse_exp (struct lexer *lexer, struct dictionary *dict)
4679 static const struct operator op = { T_EXP, CTPO_POW };
4681 const char *chain_warning =
4682 _("The exponentiation operator (`**') is left-associative: "
4683 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
4684 "To disable this warning, insert parentheses.");
4686 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
4687 return parse_binary_operators (lexer, dict, &op, 1,
4688 parse_primary, chain_warning);
4690 /* Special case for situations like "-5**6", which must be parsed as
4693 int start_ofs = lex_ofs (lexer);
4694 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
4695 *lhs = (struct ctables_pcexpr) {
4696 .op = CTPO_CONSTANT,
4697 .number = -lex_tokval (lexer),
4698 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
4702 struct ctables_pcexpr *node = parse_binary_operators__ (
4703 lexer, dict, &op, 1, parse_primary, chain_warning, lhs);
4707 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
4710 /* Parses the unary minus level. */
4711 static struct ctables_pcexpr *
4712 parse_neg (struct lexer *lexer, struct dictionary *dict)
4714 int start_ofs = lex_ofs (lexer);
4715 if (!lex_match (lexer, T_DASH))
4716 return parse_exp (lexer, dict);
4718 struct ctables_pcexpr *inner = parse_neg (lexer, dict);
4722 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
4725 /* Parses the multiplication and division level. */
4726 static struct ctables_pcexpr *
4727 parse_mul (struct lexer *lexer, struct dictionary *dict)
4729 static const struct operator ops[] =
4731 { T_ASTERISK, CTPO_MUL },
4732 { T_SLASH, CTPO_DIV },
4735 return parse_binary_operators (lexer, dict, ops, sizeof ops / sizeof *ops,
4739 /* Parses the addition and subtraction level. */
4740 static struct ctables_pcexpr *
4741 parse_add (struct lexer *lexer, struct dictionary *dict)
4743 static const struct operator ops[] =
4745 { T_PLUS, CTPO_ADD },
4746 { T_DASH, CTPO_SUB },
4747 { T_NEG_NUM, CTPO_ADD },
4750 return parse_binary_operators (lexer, dict, ops, sizeof ops / sizeof *ops,
4754 static struct ctables_postcompute *
4755 ctables_find_postcompute (struct ctables *ct, const char *name)
4757 struct ctables_postcompute *pc;
4758 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
4759 utf8_hash_case_string (name, 0), &ct->postcomputes)
4760 if (!utf8_strcasecmp (pc->name, name))
4766 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
4769 int pcompute_start = lex_ofs (lexer) - 1;
4771 if (!lex_force_match (lexer, T_AND) || !lex_force_id (lexer))
4774 char *name = ss_xstrdup (lex_tokss (lexer));
4777 if (!lex_force_match (lexer, T_EQUALS)
4778 || !lex_force_match_id (lexer, "EXPR")
4779 || !lex_force_match (lexer, T_LPAREN))
4785 int expr_start = lex_ofs (lexer);
4786 struct ctables_pcexpr *expr = parse_add (lexer, dict);
4787 int expr_end = lex_ofs (lexer) - 1;
4788 if (!expr || !lex_force_match (lexer, T_RPAREN))
4793 int pcompute_end = lex_ofs (lexer) - 1;
4795 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
4798 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
4801 msg_at (SW, location, _("New definition of &%s will override the "
4802 "previous definition."),
4804 msg_at (SN, pc->location, _("This is the previous definition."));
4806 ctables_pcexpr_destroy (pc->expr);
4807 msg_location_destroy (pc->location);
4812 pc = xmalloc (sizeof *pc);
4813 *pc = (struct ctables_postcompute) { .name = name };
4814 hmap_insert (&ct->postcomputes, &pc->hmap_node,
4815 utf8_hash_case_string (pc->name, 0));
4818 pc->location = location;
4820 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
4825 ctables_parse_pproperties_format (struct lexer *lexer,
4826 struct ctables_summary_spec_set *sss)
4828 *sss = (struct ctables_summary_spec_set) { .n = 0 };
4830 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
4831 && !(lex_token (lexer) == T_ID
4832 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
4833 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
4834 lex_tokss (lexer)))))
4836 /* Parse function. */
4837 enum ctables_summary_function function;
4838 if (!parse_ctables_summary_function (lexer, &function))
4841 /* Parse percentile. */
4842 double percentile = 0;
4843 if (function == CTSF_PTILE)
4845 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
4847 percentile = lex_number (lexer);
4852 struct fmt_spec format;
4853 if (!parse_format_specifier (lexer, &format)
4854 || !fmt_check_output (&format)
4855 || !fmt_check_type_compat (&format, VAL_NUMERIC))
4858 if (sss->n >= sss->allocated)
4859 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
4860 sizeof *sss->specs);
4861 sss->specs[sss->n++] = (struct ctables_summary_spec) {
4862 .function = function,
4863 .percentile = percentile,
4870 ctables_summary_spec_set_uninit (sss);
4875 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
4877 struct ctables_postcompute **pcs = NULL;
4879 size_t allocated_pcs = 0;
4881 while (lex_match (lexer, T_AND))
4883 if (!lex_force_id (lexer))
4885 struct ctables_postcompute *pc
4886 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
4889 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
4894 if (n_pcs >= allocated_pcs)
4895 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
4899 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
4901 if (lex_match_id (lexer, "LABEL"))
4903 lex_match (lexer, T_EQUALS);
4904 if (!lex_force_string (lexer))
4907 for (size_t i = 0; i < n_pcs; i++)
4909 free (pcs[i]->label);
4910 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
4915 else if (lex_match_id (lexer, "FORMAT"))
4917 lex_match (lexer, T_EQUALS);
4919 struct ctables_summary_spec_set sss;
4920 if (!ctables_parse_pproperties_format (lexer, &sss))
4923 for (size_t i = 0; i < n_pcs; i++)
4926 ctables_summary_spec_set_uninit (pcs[i]->specs);
4928 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
4929 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
4931 ctables_summary_spec_set_uninit (&sss);
4933 else if (lex_match_id (lexer, "HIDESOURCECATS"))
4935 lex_match (lexer, T_EQUALS);
4936 bool hide_source_cats;
4937 if (!parse_bool (lexer, &hide_source_cats))
4939 for (size_t i = 0; i < n_pcs; i++)
4940 pcs[i]->hide_source_cats = hide_source_cats;
4944 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
4957 cmd_ctables (struct lexer *lexer, struct dataset *ds)
4959 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
4960 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
4961 enum settings_value_show tvars = settings_get_show_variables ();
4962 for (size_t i = 0; i < n_vars; i++)
4963 vlabels[i] = (enum ctables_vlabel) tvars;
4965 struct pivot_table_look *look = pivot_table_look_unshare (
4966 pivot_table_look_ref (pivot_table_look_get_default ()));
4967 look->omit_empty = false;
4969 struct ctables *ct = xmalloc (sizeof *ct);
4970 *ct = (struct ctables) {
4971 .dict = dataset_dict (ds),
4973 .ctables_formats = FMT_SETTINGS_INIT,
4975 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
4981 const char *dot_string;
4982 const char *comma_string;
4984 static const struct ctf ctfs[4] = {
4985 { CTEF_NEGPAREN, "(,,,)", "(...)" },
4986 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
4987 { CTEF_PAREN, "-,(,),", "-.(.)." },
4988 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
4990 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
4991 for (size_t i = 0; i < 4; i++)
4993 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
4994 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
4995 fmt_number_style_from_string (s));
4998 if (!lex_force_match (lexer, T_SLASH))
5001 while (!lex_match_id (lexer, "TABLE"))
5003 if (lex_match_id (lexer, "FORMAT"))
5005 double widths[2] = { SYSMIS, SYSMIS };
5006 double units_per_inch = 72.0;
5008 while (lex_token (lexer) != T_SLASH)
5010 if (lex_match_id (lexer, "MINCOLWIDTH"))
5012 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
5015 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
5017 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
5020 else if (lex_match_id (lexer, "UNITS"))
5022 lex_match (lexer, T_EQUALS);
5023 if (lex_match_id (lexer, "POINTS"))
5024 units_per_inch = 72.0;
5025 else if (lex_match_id (lexer, "INCHES"))
5026 units_per_inch = 1.0;
5027 else if (lex_match_id (lexer, "CM"))
5028 units_per_inch = 2.54;
5031 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
5035 else if (lex_match_id (lexer, "EMPTY"))
5040 lex_match (lexer, T_EQUALS);
5041 if (lex_match_id (lexer, "ZERO"))
5043 /* Nothing to do. */
5045 else if (lex_match_id (lexer, "BLANK"))
5046 ct->zero = xstrdup ("");
5047 else if (lex_force_string (lexer))
5049 ct->zero = ss_xstrdup (lex_tokss (lexer));
5055 else if (lex_match_id (lexer, "MISSING"))
5057 lex_match (lexer, T_EQUALS);
5058 if (!lex_force_string (lexer))
5062 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
5063 ? ss_xstrdup (lex_tokss (lexer))
5069 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
5070 "UNITS", "EMPTY", "MISSING");
5075 if (widths[0] != SYSMIS && widths[1] != SYSMIS
5076 && widths[0] > widths[1])
5078 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
5082 for (size_t i = 0; i < 2; i++)
5083 if (widths[i] != SYSMIS)
5085 int *wr = ct->look->width_ranges[TABLE_HORZ];
5086 wr[i] = widths[i] / units_per_inch * 96.0;
5091 else if (lex_match_id (lexer, "VLABELS"))
5093 if (!lex_force_match_id (lexer, "VARIABLES"))
5095 lex_match (lexer, T_EQUALS);
5097 struct variable **vars;
5099 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
5103 if (!lex_force_match_id (lexer, "DISPLAY"))
5108 lex_match (lexer, T_EQUALS);
5110 enum ctables_vlabel vlabel;
5111 if (lex_match_id (lexer, "DEFAULT"))
5112 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
5113 else if (lex_match_id (lexer, "NAME"))
5115 else if (lex_match_id (lexer, "LABEL"))
5116 vlabel = CTVL_LABEL;
5117 else if (lex_match_id (lexer, "BOTH"))
5119 else if (lex_match_id (lexer, "NONE"))
5123 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
5129 for (size_t i = 0; i < n_vars; i++)
5130 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
5133 else if (lex_match_id (lexer, "MRSETS"))
5135 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
5137 lex_match (lexer, T_EQUALS);
5138 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
5141 else if (lex_match_id (lexer, "SMISSING"))
5143 if (lex_match_id (lexer, "VARIABLE"))
5144 ct->smissing_listwise = false;
5145 else if (lex_match_id (lexer, "LISTWISE"))
5146 ct->smissing_listwise = true;
5149 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
5153 else if (lex_match_id (lexer, "PCOMPUTE"))
5155 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
5158 else if (lex_match_id (lexer, "PPROPERTIES"))
5160 if (!ctables_parse_pproperties (lexer, ct))
5163 else if (lex_match_id (lexer, "WEIGHT"))
5165 if (!lex_force_match_id (lexer, "VARIABLE"))
5167 lex_match (lexer, T_EQUALS);
5168 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
5172 else if (lex_match_id (lexer, " HIDESMALLCOUNTS"))
5174 if (lex_match_id (lexer, "COUNT"))
5176 lex_match (lexer, T_EQUALS);
5177 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
5180 ct->hide_threshold = lex_integer (lexer);
5183 else if (ct->hide_threshold == 0)
5184 ct->hide_threshold = 5;
5188 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
5189 "SMISSING", "PCOMPUTE", "PPROPERTIES",
5190 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
5194 if (!lex_force_match (lexer, T_SLASH))
5198 size_t allocated_tables = 0;
5201 if (ct->n_tables >= allocated_tables)
5202 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
5203 sizeof *ct->tables);
5205 struct ctables_category *cat = xmalloc (sizeof *cat);
5206 *cat = (struct ctables_category) {
5208 .include_missing = false,
5209 .sort_ascending = true,
5212 struct ctables_categories *c = xmalloc (sizeof *c);
5213 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5214 *c = (struct ctables_categories) {
5221 struct ctables_categories **categories = xnmalloc (n_vars,
5222 sizeof *categories);
5223 for (size_t i = 0; i < n_vars; i++)
5226 struct ctables_table *t = xmalloc (sizeof *t);
5227 *t = (struct ctables_table) {
5229 .slabels_axis = PIVOT_AXIS_COLUMN,
5230 .slabels_visible = true,
5231 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
5233 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
5234 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
5235 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
5237 .clabels_from_axis = PIVOT_AXIS_LAYER,
5238 .categories = categories,
5239 .n_categories = n_vars,
5242 ct->tables[ct->n_tables++] = t;
5244 lex_match (lexer, T_EQUALS);
5245 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
5247 if (lex_match (lexer, T_BY))
5249 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5250 ct, t, PIVOT_AXIS_COLUMN))
5253 if (lex_match (lexer, T_BY))
5255 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5256 ct, t, PIVOT_AXIS_LAYER))
5261 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
5262 && !t->axes[PIVOT_AXIS_LAYER])
5264 lex_error (lexer, _("At least one variable must be specified."));
5268 const struct ctables_axis *scales[PIVOT_N_AXES];
5269 size_t n_scales = 0;
5270 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5272 scales[a] = find_scale (t->axes[a]);
5278 msg (SE, _("Scale variables may appear only on one axis."));
5279 if (scales[PIVOT_AXIS_ROW])
5280 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
5281 _("This scale variable appears on the rows axis."));
5282 if (scales[PIVOT_AXIS_COLUMN])
5283 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
5284 _("This scale variable appears on the columns axis."));
5285 if (scales[PIVOT_AXIS_LAYER])
5286 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
5287 _("This scale variable appears on the layer axis."));
5291 const struct ctables_axis *summaries[PIVOT_N_AXES];
5292 size_t n_summaries = 0;
5293 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5295 summaries[a] = (scales[a]
5297 : find_categorical_summary_spec (t->axes[a]));
5301 if (n_summaries > 1)
5303 msg (SE, _("Summaries may appear only on one axis."));
5304 if (summaries[PIVOT_AXIS_ROW])
5305 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
5306 _("This variable on the rows axis has a summary."));
5307 if (summaries[PIVOT_AXIS_COLUMN])
5308 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
5309 _("This variable on the columns axis has a summary."));
5310 if (summaries[PIVOT_AXIS_LAYER])
5311 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
5312 _("This variable on the layers axis has a summary."));
5315 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5316 if (n_summaries ? summaries[a] : t->axes[a])
5318 t->summary_axis = a;
5322 if (lex_token (lexer) == T_ENDCMD)
5324 if (!ctables_prepare_table (t))
5328 if (!lex_force_match (lexer, T_SLASH))
5331 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
5333 if (lex_match_id (lexer, "SLABELS"))
5335 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5337 if (lex_match_id (lexer, "POSITION"))
5339 lex_match (lexer, T_EQUALS);
5340 if (lex_match_id (lexer, "COLUMN"))
5341 t->slabels_axis = PIVOT_AXIS_COLUMN;
5342 else if (lex_match_id (lexer, "ROW"))
5343 t->slabels_axis = PIVOT_AXIS_ROW;
5344 else if (lex_match_id (lexer, "LAYER"))
5345 t->slabels_axis = PIVOT_AXIS_LAYER;
5348 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
5352 else if (lex_match_id (lexer, "VISIBLE"))
5354 lex_match (lexer, T_EQUALS);
5355 if (!parse_bool (lexer, &t->slabels_visible))
5360 lex_error_expecting (lexer, "POSITION", "VISIBLE");
5365 else if (lex_match_id (lexer, "CLABELS"))
5367 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5369 if (lex_match_id (lexer, "AUTO"))
5371 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
5372 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
5374 else if (lex_match_id (lexer, "ROWLABELS"))
5376 lex_match (lexer, T_EQUALS);
5377 if (lex_match_id (lexer, "OPPOSITE"))
5378 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
5379 else if (lex_match_id (lexer, "LAYER"))
5380 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
5383 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5387 else if (lex_match_id (lexer, "COLLABELS"))
5389 lex_match (lexer, T_EQUALS);
5390 if (lex_match_id (lexer, "OPPOSITE"))
5391 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
5392 else if (lex_match_id (lexer, "LAYER"))
5393 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
5396 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5402 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
5408 else if (lex_match_id (lexer, "CRITERIA"))
5410 if (!lex_force_match_id (lexer, "CILEVEL"))
5412 lex_match (lexer, T_EQUALS);
5414 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
5416 t->cilevel = lex_number (lexer);
5419 else if (lex_match_id (lexer, "CATEGORIES"))
5421 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
5425 else if (lex_match_id (lexer, "TITLES"))
5430 if (lex_match_id (lexer, "CAPTION"))
5431 textp = &t->caption;
5432 else if (lex_match_id (lexer, "CORNER"))
5434 else if (lex_match_id (lexer, "TITLE"))
5438 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
5441 lex_match (lexer, T_EQUALS);
5443 struct string s = DS_EMPTY_INITIALIZER;
5444 while (lex_is_string (lexer))
5446 if (!ds_is_empty (&s))
5447 ds_put_byte (&s, ' ');
5448 ds_put_substring (&s, lex_tokss (lexer));
5452 *textp = ds_steal_cstr (&s);
5454 while (lex_token (lexer) != T_SLASH
5455 && lex_token (lexer) != T_ENDCMD);
5457 else if (lex_match_id (lexer, "SIGTEST"))
5461 t->chisq = xmalloc (sizeof *t->chisq);
5462 *t->chisq = (struct ctables_chisq) {
5464 .include_mrsets = true,
5465 .all_visible = true,
5471 if (lex_match_id (lexer, "TYPE"))
5473 lex_match (lexer, T_EQUALS);
5474 if (!lex_force_match_id (lexer, "CHISQUARE"))
5477 else if (lex_match_id (lexer, "ALPHA"))
5479 lex_match (lexer, T_EQUALS);
5480 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
5482 t->chisq->alpha = lex_number (lexer);
5485 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5487 lex_match (lexer, T_EQUALS);
5488 if (parse_bool (lexer, &t->chisq->include_mrsets))
5491 else if (lex_match_id (lexer, "CATEGORIES"))
5493 lex_match (lexer, T_EQUALS);
5494 if (lex_match_id (lexer, "ALLVISIBLE"))
5495 t->chisq->all_visible = true;
5496 else if (lex_match_id (lexer, "SUBTOTALS"))
5497 t->chisq->all_visible = false;
5500 lex_error_expecting (lexer,
5501 "ALLVISIBLE", "SUBTOTALS");
5507 lex_error_expecting (lexer, "TYPE", "ALPHA",
5508 "INCLUDEMRSETS", "CATEGORIES");
5512 while (lex_token (lexer) != T_SLASH
5513 && lex_token (lexer) != T_ENDCMD);
5515 else if (lex_match_id (lexer, "COMPARETEST"))
5519 t->pairwise = xmalloc (sizeof *t->pairwise);
5520 *t->pairwise = (struct ctables_pairwise) {
5522 .alpha = { .05, .05 },
5523 .adjust = BONFERRONI,
5524 .include_mrsets = true,
5525 .meansvariance_allcats = true,
5526 .all_visible = true,
5535 if (lex_match_id (lexer, "TYPE"))
5537 lex_match (lexer, T_EQUALS);
5538 if (lex_match_id (lexer, "PROP"))
5539 t->pairwise->type = PROP;
5540 else if (lex_match_id (lexer, "MEAN"))
5541 t->pairwise->type = MEAN;
5544 lex_error_expecting (lexer, "PROP", "MEAN");
5548 else if (lex_match_id (lexer, "ALPHA"))
5550 lex_match (lexer, T_EQUALS);
5552 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5554 double a0 = lex_number (lexer);
5557 lex_match (lexer, T_COMMA);
5558 if (lex_is_number (lexer))
5560 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5562 double a1 = lex_number (lexer);
5565 t->pairwise->alpha[0] = MIN (a0, a1);
5566 t->pairwise->alpha[1] = MAX (a0, a1);
5569 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
5571 else if (lex_match_id (lexer, "ADJUST"))
5573 lex_match (lexer, T_EQUALS);
5574 if (lex_match_id (lexer, "BONFERRONI"))
5575 t->pairwise->adjust = BONFERRONI;
5576 else if (lex_match_id (lexer, "BH"))
5577 t->pairwise->adjust = BH;
5578 else if (lex_match_id (lexer, "NONE"))
5579 t->pairwise->adjust = 0;
5582 lex_error_expecting (lexer, "BONFERRONI", "BH",
5587 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5589 lex_match (lexer, T_EQUALS);
5590 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
5593 else if (lex_match_id (lexer, "MEANSVARIANCE"))
5595 lex_match (lexer, T_EQUALS);
5596 if (lex_match_id (lexer, "ALLCATS"))
5597 t->pairwise->meansvariance_allcats = true;
5598 else if (lex_match_id (lexer, "TESTEDCATS"))
5599 t->pairwise->meansvariance_allcats = false;
5602 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
5606 else if (lex_match_id (lexer, "CATEGORIES"))
5608 lex_match (lexer, T_EQUALS);
5609 if (lex_match_id (lexer, "ALLVISIBLE"))
5610 t->pairwise->all_visible = true;
5611 else if (lex_match_id (lexer, "SUBTOTALS"))
5612 t->pairwise->all_visible = false;
5615 lex_error_expecting (lexer, "ALLVISIBLE",
5620 else if (lex_match_id (lexer, "MERGE"))
5622 lex_match (lexer, T_EQUALS);
5623 if (!parse_bool (lexer, &t->pairwise->merge))
5626 else if (lex_match_id (lexer, "STYLE"))
5628 lex_match (lexer, T_EQUALS);
5629 if (lex_match_id (lexer, "APA"))
5630 t->pairwise->apa_style = true;
5631 else if (lex_match_id (lexer, "SIMPLE"))
5632 t->pairwise->apa_style = false;
5635 lex_error_expecting (lexer, "APA", "SIMPLE");
5639 else if (lex_match_id (lexer, "SHOWSIG"))
5641 lex_match (lexer, T_EQUALS);
5642 if (!parse_bool (lexer, &t->pairwise->show_sig))
5647 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
5648 "INCLUDEMRSETS", "MEANSVARIANCE",
5649 "CATEGORIES", "MERGE", "STYLE",
5654 while (lex_token (lexer) != T_SLASH
5655 && lex_token (lexer) != T_ENDCMD);
5659 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
5660 "CRITERIA", "CATEGORIES", "TITLES",
5661 "SIGTEST", "COMPARETEST");
5665 if (!lex_match (lexer, T_SLASH))
5669 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
5670 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
5672 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
5676 if (!ctables_prepare_table (t))
5679 while (lex_token (lexer) != T_ENDCMD);
5681 bool ok = ctables_execute (ds, ct);
5682 ctables_destroy (ct);
5683 return ok ? CMD_SUCCESS : CMD_FAILURE;
5686 ctables_destroy (ct);