1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casereader.h"
23 #include "data/casewriter.h"
24 #include "data/data-out.h"
25 #include "data/dataset.h"
26 #include "data/dictionary.h"
27 #include "data/mrset.h"
28 #include "data/subcase.h"
29 #include "data/value-labels.h"
30 #include "language/command.h"
31 #include "language/lexer/format-parser.h"
32 #include "language/lexer/lexer.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/array.h"
35 #include "libpspp/assertion.h"
36 #include "libpspp/hash-functions.h"
37 #include "libpspp/hmap.h"
38 #include "libpspp/i18n.h"
39 #include "libpspp/message.h"
40 #include "libpspp/string-array.h"
41 #include "math/mode.h"
42 #include "math/moments.h"
43 #include "math/percentiles.h"
44 #include "math/sort.h"
45 #include "output/pivot-table.h"
47 #include "gl/minmax.h"
48 #include "gl/xalloc.h"
51 #define _(msgid) gettext (msgid)
52 #define N_(msgid) (msgid)
56 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
57 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
58 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
59 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
63 - unweighted summaries (U*)
64 - lower confidence limits (*.LCL)
65 - upper confidence limits (*.UCL)
66 - standard error (*.SE)
69 /* All variables. */ \
70 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
71 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
72 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
73 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
74 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
75 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
76 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
94 /* Scale variables, totals, and subtotals. */ \
95 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
96 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
97 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
98 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
99 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
100 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
101 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
102 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
103 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
104 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
105 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
106 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
107 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
108 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
109 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
110 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
111 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
112 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
113 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
114 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
115 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
116 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
117 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
119 #if 0 /* Multiple response sets not yet implemented. */
120 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
121 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
122 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
123 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
124 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
125 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
126 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
127 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
128 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
129 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
130 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
131 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
132 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
133 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
134 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
135 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
136 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
137 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
138 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
139 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
140 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
141 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
144 enum ctables_summary_function
146 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
152 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
153 N_CTSF_FUNCTIONS = SUMMARIES
157 static bool ctables_summary_function_is_count (enum ctables_summary_function);
159 enum ctables_domain_type
161 /* Within a section, where stacked variables divide one section from
163 CTDT_TABLE, /* All layers of a whole section. */
164 CTDT_LAYER, /* One layer within a section. */
165 CTDT_LAYERROW, /* Row in one layer within a section. */
166 CTDT_LAYERCOL, /* Column in one layer within a section. */
168 /* Within a subtable, where a subtable pairs an innermost row variable with
169 an innermost column variable within a single layer. */
170 CTDT_SUBTABLE, /* Whole subtable. */
171 CTDT_ROW, /* Row within a subtable. */
172 CTDT_COL, /* Column within a subtable. */
176 struct ctables_domain
178 struct hmap_node node;
180 const struct ctables_cell *example;
182 double d_valid; /* Dictionary weight. */
185 double e_valid; /* Effective weight */
190 enum ctables_summary_variant
199 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
200 all the axes (except the scalar variable, if any). */
201 struct hmap_node node;
203 /* The domains that contain this cell. */
204 uint32_t omit_domains;
205 struct ctables_domain *domains[N_CTDTS];
210 enum ctables_summary_variant sv;
212 struct ctables_cell_axis
214 struct ctables_cell_value
216 const struct ctables_category *category;
224 union ctables_summary *summaries;
231 const struct dictionary *dict;
232 struct pivot_table_look *look;
234 /* CTABLES has a number of extra formats that we implement via custom
235 currency specifications on an alternate fmt_settings. */
236 #define CTEF_NEGPAREN FMT_CCA
237 #define CTEF_NEQUAL FMT_CCB
238 #define CTEF_PAREN FMT_CCC
239 #define CTEF_PCTPAREN FMT_CCD
240 struct fmt_settings ctables_formats;
242 /* If this is NULL, zeros are displayed using the normal print format.
243 Otherwise, this string is displayed. */
246 /* If this is NULL, missing values are displayed using the normal print
247 format. Otherwise, this string is displayed. */
250 /* Indexed by variable dictionary index. */
251 enum ctables_vlabel *vlabels;
253 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
255 bool mrsets_count_duplicates; /* MRSETS. */
256 bool smissing_listwise; /* SMISSING. */
257 struct variable *e_weight; /* WEIGHT. */
258 int hide_threshold; /* HIDESMALLCOUNTS. */
260 struct ctables_table **tables;
264 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
267 struct ctables_postcompute
269 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
270 char *name; /* Name, without leading &. */
272 struct msg_location *location; /* Location of definition. */
273 struct ctables_pcexpr *expr;
275 struct ctables_summary_spec_set *specs;
276 bool hide_source_cats;
279 struct ctables_pcexpr
289 enum ctables_postcompute_op
292 CTPO_CONSTANT, /* 5 */
293 CTPO_CAT_NUMBER, /* [5] */
294 CTPO_CAT_STRING, /* ["STRING"] */
295 CTPO_CAT_RANGE, /* [LO THRU 5] */
296 CTPO_CAT_MISSING, /* MISSING */
297 CTPO_CAT_OTHERNM, /* OTHERNM */
298 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
299 CTPO_CAT_TOTAL, /* TOTAL */
313 /* CTPO_CAT_NUMBER. */
316 /* CTPO_CAT_STRING. */
319 /* CTPO_CAT_RANGE. */
322 /* CTPO_CAT_SUBTOTAL. */
323 size_t subtotal_index;
325 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
326 One element: CTPO_NEG. */
327 struct ctables_pcexpr *subs[2];
330 /* Source location. */
331 struct msg_location *location;
334 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
335 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
336 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
337 struct ctables_pcexpr *sub1);
339 struct ctables_summary_spec_set
341 struct ctables_summary_spec *specs;
345 /* The variable to which the summary specs are applied. */
346 struct variable *var;
348 /* Whether the variable to which the summary specs are applied is a scale
349 variable for the purpose of summarization.
351 (VALIDN and TOTALN act differently for summarizing scale and categorical
355 /* If any of these optional additional scale variables are missing, then
356 treat 'var' as if it's missing too. This is for implementing
357 SMISSING=LISTWISE. */
358 struct variable **listwise_vars;
359 size_t n_listwise_vars;
362 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
363 const struct ctables_summary_spec_set *);
364 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
366 /* A nested sequence of variables, e.g. a > b > c. */
369 struct variable **vars;
372 size_t *domains[N_CTDTS];
373 size_t n_domains[N_CTDTS];
376 struct ctables_summary_spec_set specs[N_CSVS];
379 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
382 struct ctables_nest *nests;
388 struct hmap_node node;
393 struct ctables_occurrence
395 struct hmap_node node;
399 struct ctables_section
401 struct ctables_table *table;
402 struct ctables_nest *nests[PIVOT_N_AXES];
403 struct hmap *occurrences[PIVOT_N_AXES];
404 struct hmap cells; /* Contains "struct ctable_cell"s. */
405 struct hmap domains[N_CTDTS]; /* Contains "struct ctable_domain"s. */
410 struct ctables *ctables;
411 struct ctables_axis *axes[PIVOT_N_AXES];
412 struct ctables_stack stacks[PIVOT_N_AXES];
413 struct ctables_section *sections;
415 enum pivot_axis_type summary_axis;
416 struct ctables_summary_spec_set summary_specs;
418 const struct variable *clabels_example;
419 struct hmap clabels_values_map;
420 struct ctables_value **clabels_values;
421 size_t n_clabels_values;
423 enum pivot_axis_type slabels_axis;
424 bool slabels_visible;
426 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
428 Most commonly, label_axis[a] == a, and in particular we always have
429 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
431 If ROWLABELS or COLLABELS is specified, then one of
432 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
433 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
435 enum pivot_axis_type label_axis[PIVOT_N_AXES];
436 enum pivot_axis_type clabels_from_axis;
438 /* Indexed by variable dictionary index. */
439 struct ctables_categories **categories;
448 struct ctables_chisq *chisq;
449 struct ctables_pairwise *pairwise;
452 struct ctables_categories
455 struct ctables_category *cats;
460 struct ctables_category
462 enum ctables_category_type
464 /* Explicit category lists. */
472 /* Totals and subtotals. */
476 /* Implicit category lists. */
481 /* For contributing to TOTALN. */
482 CCT_EXCLUDED_MISSING,
486 struct ctables_category *subtotal;
492 double number; /* CCT_NUMBER. */
493 char *string; /* CCT_STRING. In dictionary encoding. */
494 double range[2]; /* CCT_RANGE. */
498 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
499 bool hide_subcategories; /* CCT_SUBTOTAL. */
502 const struct ctables_postcompute *pc; /* CCT_POSTCOMPUTE. */
504 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
507 bool include_missing;
511 enum ctables_summary_function sort_function;
512 struct variable *sort_var;
517 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
518 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
519 struct msg_location *location;
523 ctables_category_uninit (struct ctables_category *cat)
534 case CCT_POSTCOMPUTE:
543 free (cat->total_label);
551 case CCT_EXCLUDED_MISSING:
557 ctables_category_equal (const struct ctables_category *a,
558 const struct ctables_category *b)
560 if (a->type != b->type)
566 return a->number == b->number;
569 return strcmp (a->string, b->string);
572 return a->range[0] == b->range[0] && a->range[1] == b->range[1];
578 case CCT_POSTCOMPUTE:
579 return a->pc == b->pc;
583 return !strcmp (a->total_label, b->total_label);
588 return (a->include_missing == b->include_missing
589 && a->sort_ascending == b->sort_ascending
590 && a->sort_function == b->sort_function
591 && a->sort_var == b->sort_var
592 && a->percentile == b->percentile);
594 case CCT_EXCLUDED_MISSING:
602 ctables_categories_unref (struct ctables_categories *c)
607 assert (c->n_refs > 0);
611 for (size_t i = 0; i < c->n_cats; i++)
612 ctables_category_uninit (&c->cats[i]);
618 ctables_categories_equal (const struct ctables_categories *a,
619 const struct ctables_categories *b)
621 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
624 for (size_t i = 0; i < a->n_cats; i++)
625 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
631 /* Chi-square test (SIGTEST). */
639 /* Pairwise comparison test (COMPARETEST). */
640 struct ctables_pairwise
642 enum { PROP, MEAN } type;
645 bool meansvariance_allcats;
647 enum { BONFERRONI = 1, BH } adjust;
671 struct variable *var;
673 struct ctables_summary_spec_set specs[N_CSVS];
677 struct ctables_axis *subs[2];
680 struct msg_location *loc;
683 static void ctables_axis_destroy (struct ctables_axis *);
692 enum ctables_function_availability
694 CTFA_ALL, /* Any variables. */
695 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
696 CTFA_MRSETS, /* Only multiple-response sets */
699 struct ctables_summary_spec
701 enum ctables_summary_function function;
702 double percentile; /* CTSF_PTILE only. */
705 struct fmt_spec format;
706 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
712 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
713 const struct ctables_summary_spec *src)
716 dst->label = xstrdup (src->label);
720 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
727 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
728 const struct ctables_summary_spec_set *src)
730 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
731 for (size_t i = 0; i < src->n; i++)
732 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
734 *dst = (struct ctables_summary_spec_set) {
739 .is_scale = src->is_scale,
744 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
746 for (size_t i = 0; i < set->n; i++)
747 ctables_summary_spec_uninit (&set->specs[i]);
752 parse_col_width (struct lexer *lexer, const char *name, double *width)
754 lex_match (lexer, T_EQUALS);
755 if (lex_match_id (lexer, "DEFAULT"))
757 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
759 *width = lex_number (lexer);
769 parse_bool (struct lexer *lexer, bool *b)
771 if (lex_match_id (lexer, "NO"))
773 else if (lex_match_id (lexer, "YES"))
777 lex_error_expecting (lexer, "YES", "NO");
783 static enum ctables_function_availability
784 ctables_function_availability (enum ctables_summary_function f)
786 static enum ctables_function_availability availability[] = {
787 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
792 return availability[f];
796 ctables_summary_function_is_count (enum ctables_summary_function f)
802 case CTSF_ROWPCT_COUNT:
803 case CTSF_COLPCT_COUNT:
804 case CTSF_TABLEPCT_COUNT:
805 case CTSF_SUBTABLEPCT_COUNT:
806 case CTSF_LAYERPCT_COUNT:
807 case CTSF_LAYERROWPCT_COUNT:
808 case CTSF_LAYERCOLPCT_COUNT:
811 case CTSF_ROWPCT_VALIDN:
812 case CTSF_COLPCT_VALIDN:
813 case CTSF_TABLEPCT_VALIDN:
814 case CTSF_SUBTABLEPCT_VALIDN:
815 case CTSF_LAYERPCT_VALIDN:
816 case CTSF_LAYERROWPCT_VALIDN:
817 case CTSF_LAYERCOLPCT_VALIDN:
818 case CTSF_ROWPCT_TOTALN:
819 case CTSF_COLPCT_TOTALN:
820 case CTSF_TABLEPCT_TOTALN:
821 case CTSF_SUBTABLEPCT_TOTALN:
822 case CTSF_LAYERPCT_TOTALN:
823 case CTSF_LAYERROWPCT_TOTALN:
824 case CTSF_LAYERCOLPCT_TOTALN:
841 case CTSF_ROWPCT_SUM:
842 case CTSF_COLPCT_SUM:
843 case CTSF_TABLEPCT_SUM:
844 case CTSF_SUBTABLEPCT_SUM:
845 case CTSF_LAYERPCT_SUM:
846 case CTSF_LAYERROWPCT_SUM:
847 case CTSF_LAYERCOLPCT_SUM:
855 parse_ctables_summary_function (struct lexer *lexer,
856 enum ctables_summary_function *f)
860 enum ctables_summary_function function;
861 struct substring name;
863 static struct pair names[] = {
864 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
865 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
868 /* The .COUNT suffix may be omitted. */
869 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
870 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
871 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
872 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
873 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
874 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
875 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
879 if (!lex_force_id (lexer))
882 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
883 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
885 *f = names[i].function;
890 lex_error (lexer, _("Expecting summary function name."));
895 ctables_axis_destroy (struct ctables_axis *axis)
903 for (size_t i = 0; i < N_CSVS; i++)
904 ctables_summary_spec_set_uninit (&axis->specs[i]);
909 ctables_axis_destroy (axis->subs[0]);
910 ctables_axis_destroy (axis->subs[1]);
913 msg_location_destroy (axis->loc);
917 static struct ctables_axis *
918 ctables_axis_new_nonterminal (enum ctables_axis_op op,
919 struct ctables_axis *sub0,
920 struct ctables_axis *sub1,
921 struct lexer *lexer, int start_ofs)
923 struct ctables_axis *axis = xmalloc (sizeof *axis);
924 *axis = (struct ctables_axis) {
926 .subs = { sub0, sub1 },
927 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
932 struct ctables_axis_parse_ctx
935 struct dictionary *dict;
937 struct ctables_table *t;
940 static struct fmt_spec
941 ctables_summary_default_format (enum ctables_summary_function function,
942 const struct variable *var)
944 static const enum ctables_format default_formats[] = {
945 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
949 switch (default_formats[function])
952 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
955 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
958 return *var_get_print_format (var);
966 ctables_summary_default_label (enum ctables_summary_function function,
969 static const char *default_labels[] = {
970 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
975 return (function == CTSF_PTILE
976 ? xasprintf (_("Percentile %.2f"), percentile)
977 : xstrdup (gettext (default_labels[function])));
981 ctables_summary_function_name (enum ctables_summary_function function)
983 static const char *names[] = {
984 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
988 return names[function];
992 add_summary_spec (struct ctables_axis *axis,
993 enum ctables_summary_function function, double percentile,
994 const char *label, const struct fmt_spec *format,
995 bool is_ctables_format, const struct msg_location *loc,
996 enum ctables_summary_variant sv)
998 if (axis->op == CTAO_VAR)
1000 const char *function_name = ctables_summary_function_name (function);
1001 const char *var_name = var_get_name (axis->var);
1002 switch (ctables_function_availability (function))
1005 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1006 "response sets."), function_name);
1007 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1016 _("Summary function %s applies only to scale variables."),
1018 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1029 struct ctables_summary_spec_set *set = &axis->specs[sv];
1030 if (set->n >= set->allocated)
1031 set->specs = x2nrealloc (set->specs, &set->allocated,
1032 sizeof *set->specs);
1034 struct ctables_summary_spec *dst = &set->specs[set->n++];
1035 *dst = (struct ctables_summary_spec) {
1036 .function = function,
1037 .percentile = percentile,
1038 .label = xstrdup (label),
1039 .format = (format ? *format
1040 : ctables_summary_default_format (function, axis->var)),
1041 .is_ctables_format = is_ctables_format,
1047 for (size_t i = 0; i < 2; i++)
1048 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1049 format, is_ctables_format, loc, sv))
1055 static struct ctables_axis *ctables_axis_parse_stack (
1056 struct ctables_axis_parse_ctx *);
1059 static struct ctables_axis *
1060 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1062 if (lex_match (ctx->lexer, T_LPAREN))
1064 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1065 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1067 ctables_axis_destroy (sub);
1073 if (!lex_force_id (ctx->lexer))
1076 int start_ofs = lex_ofs (ctx->lexer);
1077 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1081 struct ctables_axis *axis = xmalloc (sizeof *axis);
1082 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1084 /* XXX should figure out default measures by reading data */
1085 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1086 : lex_match_phrase (ctx->lexer, "[C]") ? false
1087 : var_get_measure (var) == MEASURE_SCALE);
1088 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1089 lex_ofs (ctx->lexer) - 1);
1090 if (axis->scale && var_is_alpha (var))
1092 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1094 var_get_name (var));
1095 ctables_axis_destroy (axis);
1103 has_digit (const char *s)
1105 return s[strcspn (s, "0123456789")] != '\0';
1109 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1110 bool *is_ctables_format)
1112 char type[FMT_TYPE_LEN_MAX + 1];
1113 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1116 if (!strcasecmp (type, "NEGPAREN"))
1117 format->type = CTEF_NEGPAREN;
1118 else if (!strcasecmp (type, "NEQUAL"))
1119 format->type = CTEF_NEQUAL;
1120 else if (!strcasecmp (type, "PAREN"))
1121 format->type = CTEF_PAREN;
1122 else if (!strcasecmp (type, "PCTPAREN"))
1123 format->type = CTEF_PCTPAREN;
1126 *is_ctables_format = false;
1127 return (parse_format_specifier (lexer, format)
1128 && fmt_check_output (format)
1129 && fmt_check_type_compat (format, VAL_NUMERIC));
1134 msg (SE, _("Output format %s requires width 2 or greater."), type);
1137 else if (format->d > format->w - 1)
1139 msg (SE, _("Output format %s requires width greater than decimals."),
1145 *is_ctables_format = true;
1150 static struct ctables_axis *
1151 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1153 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1154 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1157 enum ctables_summary_variant sv = CSV_CELL;
1160 int start_ofs = lex_ofs (ctx->lexer);
1162 /* Parse function. */
1163 enum ctables_summary_function function;
1164 if (!parse_ctables_summary_function (ctx->lexer, &function))
1167 /* Parse percentile. */
1168 double percentile = 0;
1169 if (function == CTSF_PTILE)
1171 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1173 percentile = lex_number (ctx->lexer);
1174 lex_get (ctx->lexer);
1179 if (lex_is_string (ctx->lexer))
1181 label = ss_xstrdup (lex_tokss (ctx->lexer));
1182 lex_get (ctx->lexer);
1185 label = ctables_summary_default_label (function, percentile);
1188 struct fmt_spec format;
1189 const struct fmt_spec *formatp;
1190 bool is_ctables_format = false;
1191 if (lex_token (ctx->lexer) == T_ID
1192 && has_digit (lex_tokcstr (ctx->lexer)))
1194 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1195 &is_ctables_format))
1205 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1206 lex_ofs (ctx->lexer) - 1);
1207 add_summary_spec (sub, function, percentile, label, formatp,
1208 is_ctables_format, loc, sv);
1210 msg_location_destroy (loc);
1212 lex_match (ctx->lexer, T_COMMA);
1213 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1215 if (!lex_force_match (ctx->lexer, T_LBRACK))
1219 else if (lex_match (ctx->lexer, T_RBRACK))
1221 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1228 ctables_axis_destroy (sub);
1232 static const struct ctables_axis *
1233 find_scale (const struct ctables_axis *axis)
1237 else if (axis->op == CTAO_VAR)
1238 return axis->scale ? axis : NULL;
1241 for (size_t i = 0; i < 2; i++)
1243 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1251 static const struct ctables_axis *
1252 find_categorical_summary_spec (const struct ctables_axis *axis)
1256 else if (axis->op == CTAO_VAR)
1257 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1260 for (size_t i = 0; i < 2; i++)
1262 const struct ctables_axis *sum
1263 = find_categorical_summary_spec (axis->subs[i]);
1271 static struct ctables_axis *
1272 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1274 int start_ofs = lex_ofs (ctx->lexer);
1275 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1279 while (lex_match (ctx->lexer, T_GT))
1281 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1285 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1286 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1288 const struct ctables_axis *outer_scale = find_scale (lhs);
1289 const struct ctables_axis *inner_scale = find_scale (rhs);
1290 if (outer_scale && inner_scale)
1292 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1293 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1294 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1295 ctables_axis_destroy (nest);
1299 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1302 msg_at (SE, nest->loc,
1303 _("Summaries may only be requested for categorical variables "
1304 "at the innermost nesting level."));
1305 msg_at (SN, outer_sum->loc,
1306 _("This outer categorical variable has a summary."));
1307 ctables_axis_destroy (nest);
1317 static struct ctables_axis *
1318 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1320 int start_ofs = lex_ofs (ctx->lexer);
1321 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1325 while (lex_match (ctx->lexer, T_PLUS))
1327 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1331 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1332 ctx->lexer, start_ofs);
1339 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1340 struct ctables *ct, struct ctables_table *t,
1341 enum pivot_axis_type a)
1343 if (lex_token (lexer) == T_BY
1344 || lex_token (lexer) == T_SLASH
1345 || lex_token (lexer) == T_ENDCMD)
1348 struct ctables_axis_parse_ctx ctx = {
1354 t->axes[a] = ctables_axis_parse_stack (&ctx);
1355 return t->axes[a] != NULL;
1359 ctables_chisq_destroy (struct ctables_chisq *chisq)
1365 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1371 ctables_table_destroy (struct ctables_table *t)
1376 for (size_t i = 0; i < t->n_categories; i++)
1377 ctables_categories_unref (t->categories[i]);
1378 free (t->categories);
1380 ctables_axis_destroy (t->axes[PIVOT_AXIS_COLUMN]);
1381 ctables_axis_destroy (t->axes[PIVOT_AXIS_ROW]);
1382 ctables_axis_destroy (t->axes[PIVOT_AXIS_LAYER]);
1386 ctables_chisq_destroy (t->chisq);
1387 ctables_pairwise_destroy (t->pairwise);
1392 ctables_destroy (struct ctables *ct)
1397 pivot_table_look_unref (ct->look);
1401 for (size_t i = 0; i < ct->n_tables; i++)
1402 ctables_table_destroy (ct->tables[i]);
1407 static struct ctables_category
1408 cct_range (double low, double high)
1410 return (struct ctables_category) {
1412 .range = { low, high }
1417 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1418 struct ctables_category *cat)
1421 if (lex_match (lexer, T_EQUALS))
1423 if (!lex_force_string (lexer))
1426 total_label = ss_xstrdup (lex_tokss (lexer));
1430 total_label = xstrdup (_("Subtotal"));
1432 *cat = (struct ctables_category) {
1433 .type = CCT_SUBTOTAL,
1434 .hide_subcategories = hide_subcategories,
1435 .total_label = total_label
1441 ctables_table_parse_explicit_category (struct lexer *lexer, struct ctables *ct,
1442 struct ctables_category *cat)
1444 if (lex_match_id (lexer, "OTHERNM"))
1445 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1446 else if (lex_match_id (lexer, "MISSING"))
1447 *cat = (struct ctables_category) { .type = CCT_MISSING };
1448 else if (lex_match_id (lexer, "SUBTOTAL"))
1449 return ctables_table_parse_subtotal (lexer, false, cat);
1450 else if (lex_match_id (lexer, "HSUBTOTAL"))
1451 return ctables_table_parse_subtotal (lexer, true, cat);
1452 else if (lex_match_id (lexer, "LO"))
1454 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
1456 *cat = cct_range (-DBL_MAX, lex_number (lexer));
1459 else if (lex_is_number (lexer))
1461 double number = lex_number (lexer);
1463 if (lex_match_id (lexer, "THRU"))
1465 if (lex_match_id (lexer, "HI"))
1466 *cat = cct_range (number, DBL_MAX);
1469 if (!lex_force_num (lexer))
1471 *cat = cct_range (number, lex_number (lexer));
1476 *cat = (struct ctables_category) {
1481 else if (lex_is_string (lexer))
1483 *cat = (struct ctables_category) {
1485 .string = ss_xstrdup (lex_tokss (lexer)),
1489 else if (lex_match (lexer, T_AND))
1491 if (!lex_force_id (lexer))
1493 struct ctables_postcompute *pc = ctables_find_postcompute (
1494 ct, lex_tokcstr (lexer));
1497 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1498 msg_at (SE, loc, _("Unknown postcompute &%s."),
1499 lex_tokcstr (lexer));
1500 msg_location_destroy (loc);
1505 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1509 lex_error (lexer, NULL);
1516 static struct ctables_category *
1517 ctables_find_category_for_postcompute (const struct ctables_categories *cats,
1518 const struct ctables_pcexpr *e)
1520 struct ctables_category *best = NULL;
1521 size_t n_subtotals = 0;
1522 for (size_t i = 0; i < cats->n_cats; i++)
1524 struct ctables_category *cat = &cats->cats[i];
1527 case CTPO_CAT_NUMBER:
1528 if (cat->type == CCT_NUMBER && cat->number == e->number)
1532 case CTPO_CAT_STRING:
1533 if (cat->type == CCT_STRING && !strcmp (cat->string, e->string))
1537 case CTPO_CAT_RANGE:
1538 if (cat->type == CCT_RANGE
1539 && cat->range[0] == e->range[0]
1540 && cat->range[1] == e->range[1])
1544 case CTPO_CAT_MISSING:
1545 if (cat->type == CCT_MISSING)
1549 case CTPO_CAT_OTHERNM:
1550 if (cat->type == CCT_OTHERNM)
1554 case CTPO_CAT_SUBTOTAL:
1555 if (cat->type == CCT_SUBTOTAL)
1558 if (e->subtotal_index == n_subtotals)
1560 else if (e->subtotal_index == 0)
1565 case CTPO_CAT_TOTAL:
1566 if (cat->type == CCT_TOTAL)
1580 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1586 ctables_recursive_check_postcompute (const struct ctables_pcexpr *e,
1587 struct ctables_category *pc_cat,
1588 const struct ctables_categories *cats,
1589 const struct msg_location *cats_location)
1593 case CTPO_CAT_NUMBER:
1594 case CTPO_CAT_STRING:
1595 case CTPO_CAT_RANGE:
1596 case CTPO_CAT_MISSING:
1597 case CTPO_CAT_OTHERNM:
1598 case CTPO_CAT_SUBTOTAL:
1599 case CTPO_CAT_TOTAL:
1601 struct ctables_category *cat = ctables_find_category_for_postcompute (
1605 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1607 size_t n_subtotals = 0;
1608 for (size_t i = 0; i < cats->n_cats; i++)
1609 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1610 if (n_subtotals > 1)
1612 msg_at (SE, cats_location,
1613 ngettext ("These categories include %zu instance "
1614 "of SUBTOTAL or HSUBTOTAL, so references "
1615 "from computed categories must refer to "
1616 "subtotals by position.",
1617 "These categories include %zu instances "
1618 "of SUBTOTAL or HSUBTOTAL, so references "
1619 "from computed categories must refer to "
1620 "subtotals by position.",
1623 msg_at (SN, e->location,
1624 _("This is the reference that lacks a position."));
1629 msg_at (SE, pc_cat->location,
1630 _("Computed category &%s references a category not included "
1631 "in the category list."),
1633 msg_at (SN, e->location, _("This is the missing category."));
1634 msg_at (SN, cats_location,
1635 _("To fix the problem, add the missing category to the "
1636 "list of categories here."));
1639 if (pc_cat->pc->hide_source_cats)
1653 for (size_t i = 0; i < 2; i++)
1654 if (e->subs[i] && !ctables_recursive_check_postcompute (
1655 e->subs[i], pc_cat, cats, cats_location))
1665 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1666 struct ctables *ct, struct ctables_table *t)
1668 if (!lex_match_id (lexer, "VARIABLES"))
1670 lex_match (lexer, T_EQUALS);
1672 struct variable **vars;
1674 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1677 struct ctables_categories *c = xmalloc (sizeof *c);
1678 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1679 for (size_t i = 0; i < n_vars; i++)
1681 struct ctables_categories **cp
1682 = &t->categories[var_get_dict_index (vars[i])];
1683 ctables_categories_unref (*cp);
1688 size_t allocated_cats = 0;
1689 if (lex_match (lexer, T_LBRACK))
1691 int cats_start_ofs = lex_ofs (lexer);
1694 if (c->n_cats >= allocated_cats)
1695 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1697 int start_ofs = lex_ofs (lexer);
1698 struct ctables_category *cat = &c->cats[c->n_cats];
1699 if (!ctables_table_parse_explicit_category (lexer, ct, cat))
1701 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1704 lex_match (lexer, T_COMMA);
1706 while (!lex_match (lexer, T_RBRACK));
1708 struct msg_location *cats_location
1709 = lex_ofs_location (lexer, cats_start_ofs, lex_ofs (lexer) - 1);
1710 for (size_t i = 0; i < c->n_cats; i++)
1712 struct ctables_category *cat = &c->cats[i];
1713 if (cat->type == CCT_POSTCOMPUTE
1714 && !ctables_recursive_check_postcompute (cat->pc->expr, cat,
1720 struct ctables_category cat = {
1722 .include_missing = false,
1723 .sort_ascending = true,
1725 bool show_totals = false;
1726 char *total_label = NULL;
1727 bool totals_before = false;
1728 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1730 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1732 lex_match (lexer, T_EQUALS);
1733 if (lex_match_id (lexer, "A"))
1734 cat.sort_ascending = true;
1735 else if (lex_match_id (lexer, "D"))
1736 cat.sort_ascending = false;
1739 lex_error_expecting (lexer, "A", "D");
1743 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1745 lex_match (lexer, T_EQUALS);
1746 if (lex_match_id (lexer, "VALUE"))
1747 cat.type = CCT_VALUE;
1748 else if (lex_match_id (lexer, "LABEL"))
1749 cat.type = CCT_LABEL;
1752 cat.type = CCT_FUNCTION;
1753 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1756 if (lex_match (lexer, T_LPAREN))
1758 cat.sort_var = parse_variable (lexer, dict);
1762 if (cat.sort_function == CTSF_PTILE)
1764 lex_match (lexer, T_COMMA);
1765 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1767 cat.percentile = lex_number (lexer);
1771 if (!lex_force_match (lexer, T_RPAREN))
1774 else if (ctables_function_availability (cat.sort_function)
1777 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1782 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1784 lex_match (lexer, T_EQUALS);
1785 if (lex_match_id (lexer, "INCLUDE"))
1786 cat.include_missing = true;
1787 else if (lex_match_id (lexer, "EXCLUDE"))
1788 cat.include_missing = false;
1791 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1795 else if (lex_match_id (lexer, "TOTAL"))
1797 lex_match (lexer, T_EQUALS);
1798 if (!parse_bool (lexer, &show_totals))
1801 else if (lex_match_id (lexer, "LABEL"))
1803 lex_match (lexer, T_EQUALS);
1804 if (!lex_force_string (lexer))
1807 total_label = ss_xstrdup (lex_tokss (lexer));
1810 else if (lex_match_id (lexer, "POSITION"))
1812 lex_match (lexer, T_EQUALS);
1813 if (lex_match_id (lexer, "BEFORE"))
1814 totals_before = true;
1815 else if (lex_match_id (lexer, "AFTER"))
1816 totals_before = false;
1819 lex_error_expecting (lexer, "BEFORE", "AFTER");
1823 else if (lex_match_id (lexer, "EMPTY"))
1825 lex_match (lexer, T_EQUALS);
1826 if (lex_match_id (lexer, "INCLUDE"))
1827 c->show_empty = true;
1828 else if (lex_match_id (lexer, "EXCLUDE"))
1829 c->show_empty = false;
1832 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1839 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
1840 "TOTAL", "LABEL", "POSITION", "EMPTY");
1842 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
1849 if (c->n_cats >= allocated_cats)
1850 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1851 c->cats[c->n_cats++] = cat;
1856 if (c->n_cats >= allocated_cats)
1857 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1859 struct ctables_category *totals;
1862 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
1863 totals = &c->cats[0];
1866 totals = &c->cats[c->n_cats];
1869 *totals = (struct ctables_category) {
1871 .total_label = total_label ? total_label : xstrdup (_("Total")),
1875 struct ctables_category *subtotal = NULL;
1876 for (size_t i = totals_before ? 0 : c->n_cats;
1877 totals_before ? i < c->n_cats : i-- > 0;
1878 totals_before ? i++ : 0)
1880 struct ctables_category *cat = &c->cats[i];
1888 cat->subtotal = subtotal;
1891 case CCT_POSTCOMPUTE:
1902 case CCT_EXCLUDED_MISSING:
1911 ctables_nest_uninit (struct ctables_nest *nest)
1918 ctables_stack_uninit (struct ctables_stack *stack)
1922 for (size_t i = 0; i < stack->n; i++)
1923 ctables_nest_uninit (&stack->nests[i]);
1924 free (stack->nests);
1928 static struct ctables_stack
1929 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
1936 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
1937 for (size_t i = 0; i < s0.n; i++)
1938 for (size_t j = 0; j < s1.n; j++)
1940 const struct ctables_nest *a = &s0.nests[i];
1941 const struct ctables_nest *b = &s1.nests[j];
1943 size_t allocate = a->n + b->n;
1944 struct variable **vars = xnmalloc (allocate, sizeof *vars);
1945 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
1947 for (size_t k = 0; k < a->n; k++)
1948 vars[n++] = a->vars[k];
1949 for (size_t k = 0; k < b->n; k++)
1950 vars[n++] = b->vars[k];
1951 assert (n == allocate);
1953 const struct ctables_nest *summary_src;
1954 if (!a->specs[CSV_CELL].var)
1956 else if (!b->specs[CSV_CELL].var)
1961 struct ctables_nest *new = &stack.nests[stack.n++];
1962 *new = (struct ctables_nest) {
1964 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
1965 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
1969 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
1970 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
1972 ctables_stack_uninit (&s0);
1973 ctables_stack_uninit (&s1);
1977 static struct ctables_stack
1978 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
1980 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
1981 for (size_t i = 0; i < s0.n; i++)
1982 stack.nests[stack.n++] = s0.nests[i];
1983 for (size_t i = 0; i < s1.n; i++)
1985 stack.nests[stack.n] = s1.nests[i];
1986 stack.nests[stack.n].group_head += s0.n;
1989 assert (stack.n == s0.n + s1.n);
1995 static struct ctables_stack
1996 var_fts (const struct ctables_axis *a)
1998 struct variable **vars = xmalloc (sizeof *vars);
2001 struct ctables_nest *nest = xmalloc (sizeof *nest);
2002 *nest = (struct ctables_nest) {
2005 .scale_idx = a->scale ? 0 : SIZE_MAX,
2007 if (a->specs[CSV_CELL].n || a->scale)
2008 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2010 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2011 nest->specs[sv].var = a->var;
2012 nest->specs[sv].is_scale = a->scale;
2014 return (struct ctables_stack) { .nests = nest, .n = 1 };
2017 static struct ctables_stack
2018 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2021 return (struct ctables_stack) { .n = 0 };
2029 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2030 enumerate_fts (axis_type, a->subs[1]));
2033 /* This should consider any of the scale variables found in the result to
2034 be linked to each other listwise for SMISSING=LISTWISE. */
2035 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2036 enumerate_fts (axis_type, a->subs[1]));
2042 union ctables_summary
2044 /* COUNT, VALIDN, TOTALN. */
2047 /* MINIMUM, MAXIMUM, RANGE. */
2054 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2055 struct moments1 *moments;
2057 /* MEDIAN, MODE, PTILE. */
2060 struct casewriter *writer;
2065 /* XXX multiple response */
2069 ctables_summary_init (union ctables_summary *s,
2070 const struct ctables_summary_spec *ss)
2072 switch (ss->function)
2076 case CTSF_ROWPCT_COUNT:
2077 case CTSF_COLPCT_COUNT:
2078 case CTSF_TABLEPCT_COUNT:
2079 case CTSF_SUBTABLEPCT_COUNT:
2080 case CTSF_LAYERPCT_COUNT:
2081 case CTSF_LAYERROWPCT_COUNT:
2082 case CTSF_LAYERCOLPCT_COUNT:
2083 case CTSF_ROWPCT_VALIDN:
2084 case CTSF_COLPCT_VALIDN:
2085 case CTSF_TABLEPCT_VALIDN:
2086 case CTSF_SUBTABLEPCT_VALIDN:
2087 case CTSF_LAYERPCT_VALIDN:
2088 case CTSF_LAYERROWPCT_VALIDN:
2089 case CTSF_LAYERCOLPCT_VALIDN:
2090 case CTSF_ROWPCT_TOTALN:
2091 case CTSF_COLPCT_TOTALN:
2092 case CTSF_TABLEPCT_TOTALN:
2093 case CTSF_SUBTABLEPCT_TOTALN:
2094 case CTSF_LAYERPCT_TOTALN:
2095 case CTSF_LAYERROWPCT_TOTALN:
2096 case CTSF_LAYERCOLPCT_TOTALN:
2108 s->min = s->max = SYSMIS;
2116 case CTSF_ROWPCT_SUM:
2117 case CTSF_COLPCT_SUM:
2118 case CTSF_TABLEPCT_SUM:
2119 case CTSF_SUBTABLEPCT_SUM:
2120 case CTSF_LAYERPCT_SUM:
2121 case CTSF_LAYERROWPCT_SUM:
2122 case CTSF_LAYERCOLPCT_SUM:
2123 s->moments = moments1_create (MOMENT_VARIANCE);
2130 struct caseproto *proto = caseproto_create ();
2131 proto = caseproto_add_width (proto, 0);
2132 proto = caseproto_add_width (proto, 0);
2134 struct subcase ordering;
2135 subcase_init (&ordering, 0, 0, SC_ASCEND);
2136 s->writer = sort_create_writer (&ordering, proto);
2137 subcase_uninit (&ordering);
2138 caseproto_unref (proto);
2148 ctables_summary_uninit (union ctables_summary *s,
2149 const struct ctables_summary_spec *ss)
2151 switch (ss->function)
2155 case CTSF_ROWPCT_COUNT:
2156 case CTSF_COLPCT_COUNT:
2157 case CTSF_TABLEPCT_COUNT:
2158 case CTSF_SUBTABLEPCT_COUNT:
2159 case CTSF_LAYERPCT_COUNT:
2160 case CTSF_LAYERROWPCT_COUNT:
2161 case CTSF_LAYERCOLPCT_COUNT:
2162 case CTSF_ROWPCT_VALIDN:
2163 case CTSF_COLPCT_VALIDN:
2164 case CTSF_TABLEPCT_VALIDN:
2165 case CTSF_SUBTABLEPCT_VALIDN:
2166 case CTSF_LAYERPCT_VALIDN:
2167 case CTSF_LAYERROWPCT_VALIDN:
2168 case CTSF_LAYERCOLPCT_VALIDN:
2169 case CTSF_ROWPCT_TOTALN:
2170 case CTSF_COLPCT_TOTALN:
2171 case CTSF_TABLEPCT_TOTALN:
2172 case CTSF_SUBTABLEPCT_TOTALN:
2173 case CTSF_LAYERPCT_TOTALN:
2174 case CTSF_LAYERROWPCT_TOTALN:
2175 case CTSF_LAYERCOLPCT_TOTALN:
2193 case CTSF_ROWPCT_SUM:
2194 case CTSF_COLPCT_SUM:
2195 case CTSF_TABLEPCT_SUM:
2196 case CTSF_SUBTABLEPCT_SUM:
2197 case CTSF_LAYERPCT_SUM:
2198 case CTSF_LAYERROWPCT_SUM:
2199 case CTSF_LAYERCOLPCT_SUM:
2200 moments1_destroy (s->moments);
2206 casewriter_destroy (s->writer);
2212 ctables_summary_add (union ctables_summary *s,
2213 const struct ctables_summary_spec *ss,
2214 const struct variable *var, const union value *value,
2215 bool is_scale, bool is_scale_missing,
2216 bool is_missing, bool excluded_missing,
2217 double d_weight, double e_weight)
2219 /* To determine whether a case is included in a given table for a particular
2220 kind of summary, consider the following charts for each variable in the
2221 table. Only if "yes" appears for every variable for the summary is the
2224 Categorical variables: VALIDN COUNT TOTALN
2225 Valid values in included categories yes yes yes
2226 Missing values in included categories --- yes yes
2227 Missing values in excluded categories --- --- yes
2228 Valid values in excluded categories --- --- ---
2230 Scale variables: VALIDN COUNT TOTALN
2231 Valid value yes yes yes
2232 Missing value --- yes yes
2234 Missing values include both user- and system-missing. (The system-missing
2235 value is always in an excluded category.)
2237 switch (ss->function)
2240 case CTSF_ROWPCT_TOTALN:
2241 case CTSF_COLPCT_TOTALN:
2242 case CTSF_TABLEPCT_TOTALN:
2243 case CTSF_SUBTABLEPCT_TOTALN:
2244 case CTSF_LAYERPCT_TOTALN:
2245 case CTSF_LAYERROWPCT_TOTALN:
2246 case CTSF_LAYERCOLPCT_TOTALN:
2247 s->count += d_weight;
2251 case CTSF_ROWPCT_COUNT:
2252 case CTSF_COLPCT_COUNT:
2253 case CTSF_TABLEPCT_COUNT:
2254 case CTSF_SUBTABLEPCT_COUNT:
2255 case CTSF_LAYERPCT_COUNT:
2256 case CTSF_LAYERROWPCT_COUNT:
2257 case CTSF_LAYERCOLPCT_COUNT:
2258 if (is_scale || !excluded_missing)
2259 s->count += d_weight;
2263 case CTSF_ROWPCT_VALIDN:
2264 case CTSF_COLPCT_VALIDN:
2265 case CTSF_TABLEPCT_VALIDN:
2266 case CTSF_SUBTABLEPCT_VALIDN:
2267 case CTSF_LAYERPCT_VALIDN:
2268 case CTSF_LAYERROWPCT_VALIDN:
2269 case CTSF_LAYERCOLPCT_VALIDN:
2273 s->count += d_weight;
2278 s->count += d_weight;
2282 if (is_scale || !excluded_missing)
2283 s->count += e_weight;
2290 s->count += e_weight;
2294 s->count += e_weight;
2300 if (!is_scale_missing)
2302 assert (!var_is_alpha (var)); /* XXX? */
2303 if (s->min == SYSMIS || value->f < s->min)
2305 if (s->max == SYSMIS || value->f > s->max)
2315 case CTSF_ROWPCT_SUM:
2316 case CTSF_COLPCT_SUM:
2317 case CTSF_TABLEPCT_SUM:
2318 case CTSF_SUBTABLEPCT_SUM:
2319 case CTSF_LAYERPCT_SUM:
2320 case CTSF_LAYERROWPCT_SUM:
2321 case CTSF_LAYERCOLPCT_SUM:
2322 if (!is_scale_missing)
2323 moments1_add (s->moments, value->f, e_weight);
2329 if (!is_scale_missing)
2331 s->ovalid += e_weight;
2333 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2334 *case_num_rw_idx (c, 0) = value->f;
2335 *case_num_rw_idx (c, 1) = e_weight;
2336 casewriter_write (s->writer, c);
2342 static enum ctables_domain_type
2343 ctables_function_domain (enum ctables_summary_function function)
2367 case CTSF_COLPCT_COUNT:
2368 case CTSF_COLPCT_SUM:
2369 case CTSF_COLPCT_TOTALN:
2370 case CTSF_COLPCT_VALIDN:
2373 case CTSF_LAYERCOLPCT_COUNT:
2374 case CTSF_LAYERCOLPCT_SUM:
2375 case CTSF_LAYERCOLPCT_TOTALN:
2376 case CTSF_LAYERCOLPCT_VALIDN:
2377 return CTDT_LAYERCOL;
2379 case CTSF_LAYERPCT_COUNT:
2380 case CTSF_LAYERPCT_SUM:
2381 case CTSF_LAYERPCT_TOTALN:
2382 case CTSF_LAYERPCT_VALIDN:
2385 case CTSF_LAYERROWPCT_COUNT:
2386 case CTSF_LAYERROWPCT_SUM:
2387 case CTSF_LAYERROWPCT_TOTALN:
2388 case CTSF_LAYERROWPCT_VALIDN:
2389 return CTDT_LAYERROW;
2391 case CTSF_ROWPCT_COUNT:
2392 case CTSF_ROWPCT_SUM:
2393 case CTSF_ROWPCT_TOTALN:
2394 case CTSF_ROWPCT_VALIDN:
2397 case CTSF_SUBTABLEPCT_COUNT:
2398 case CTSF_SUBTABLEPCT_SUM:
2399 case CTSF_SUBTABLEPCT_TOTALN:
2400 case CTSF_SUBTABLEPCT_VALIDN:
2401 return CTDT_SUBTABLE;
2403 case CTSF_TABLEPCT_COUNT:
2404 case CTSF_TABLEPCT_SUM:
2405 case CTSF_TABLEPCT_TOTALN:
2406 case CTSF_TABLEPCT_VALIDN:
2414 ctables_summary_value (const struct ctables_cell *cell,
2415 union ctables_summary *s,
2416 const struct ctables_summary_spec *ss)
2418 switch (ss->function)
2424 case CTSF_ROWPCT_COUNT:
2425 case CTSF_COLPCT_COUNT:
2426 case CTSF_TABLEPCT_COUNT:
2427 case CTSF_SUBTABLEPCT_COUNT:
2428 case CTSF_LAYERPCT_COUNT:
2429 case CTSF_LAYERROWPCT_COUNT:
2430 case CTSF_LAYERCOLPCT_COUNT:
2432 enum ctables_domain_type d = ctables_function_domain (ss->function);
2433 return (cell->domains[d]->e_count
2434 ? s->count / cell->domains[d]->e_count * 100
2438 case CTSF_ROWPCT_VALIDN:
2439 case CTSF_COLPCT_VALIDN:
2440 case CTSF_TABLEPCT_VALIDN:
2441 case CTSF_SUBTABLEPCT_VALIDN:
2442 case CTSF_LAYERPCT_VALIDN:
2443 case CTSF_LAYERROWPCT_VALIDN:
2444 case CTSF_LAYERCOLPCT_VALIDN:
2446 enum ctables_domain_type d = ctables_function_domain (ss->function);
2447 return (cell->domains[d]->e_valid
2448 ? s->count / cell->domains[d]->e_valid * 100
2452 case CTSF_ROWPCT_TOTALN:
2453 case CTSF_COLPCT_TOTALN:
2454 case CTSF_TABLEPCT_TOTALN:
2455 case CTSF_SUBTABLEPCT_TOTALN:
2456 case CTSF_LAYERPCT_TOTALN:
2457 case CTSF_LAYERROWPCT_TOTALN:
2458 case CTSF_LAYERCOLPCT_TOTALN:
2460 enum ctables_domain_type d = ctables_function_domain (ss->function);
2461 return (cell->domains[d]->e_total
2462 ? s->count / cell->domains[d]->e_total * 100
2486 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2491 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2497 double weight, variance;
2498 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2499 return calc_semean (variance, weight);
2505 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2506 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2511 double weight, mean;
2512 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2513 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2519 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2523 case CTSF_ROWPCT_SUM:
2524 case CTSF_COLPCT_SUM:
2525 case CTSF_TABLEPCT_SUM:
2526 case CTSF_SUBTABLEPCT_SUM:
2527 case CTSF_LAYERPCT_SUM:
2528 case CTSF_LAYERROWPCT_SUM:
2529 case CTSF_LAYERCOLPCT_SUM:
2536 struct casereader *reader = casewriter_make_reader (s->writer);
2539 struct percentile *ptile = percentile_create (
2540 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2541 struct order_stats *os = &ptile->parent;
2542 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2543 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2544 statistic_destroy (&ptile->parent.parent);
2551 struct casereader *reader = casewriter_make_reader (s->writer);
2554 struct mode *mode = mode_create ();
2555 struct order_stats *os = &mode->parent;
2556 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2557 s->ovalue = mode->mode;
2558 statistic_destroy (&mode->parent.parent);
2566 struct ctables_cell_sort_aux
2568 const struct ctables_nest *nest;
2569 enum pivot_axis_type a;
2573 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2575 const struct ctables_cell_sort_aux *aux = aux_;
2576 struct ctables_cell *const *ap = a_;
2577 struct ctables_cell *const *bp = b_;
2578 const struct ctables_cell *a = *ap;
2579 const struct ctables_cell *b = *bp;
2581 const struct ctables_nest *nest = aux->nest;
2582 for (size_t i = 0; i < nest->n; i++)
2583 if (i != nest->scale_idx)
2585 const struct variable *var = nest->vars[i];
2586 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2587 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2588 if (a_cv->category != b_cv->category)
2589 return a_cv->category > b_cv->category ? 1 : -1;
2591 const union value *a_val = &a_cv->value;
2592 const union value *b_val = &b_cv->value;
2593 switch (a_cv->category->type)
2599 case CCT_POSTCOMPUTE:
2600 case CCT_EXCLUDED_MISSING:
2601 /* Must be equal. */
2608 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2616 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2618 return a_cv->category->sort_ascending ? cmp : -cmp;
2624 const char *a_label = var_lookup_value_label (var, a_val);
2625 const char *b_label = var_lookup_value_label (var, b_val);
2627 ? (b_label ? strcmp (a_label, b_label) : 1)
2628 : (b_label ? -1 : value_compare_3way (
2629 a_val, b_val, var_get_width (var))));
2631 return a_cv->category->sort_ascending ? cmp : -cmp;
2645 For each ctables_table:
2646 For each combination of row vars:
2647 For each combination of column vars:
2648 For each combination of layer vars:
2650 Make a table of row values:
2651 Sort entries by row values
2652 Assign a 0-based index to each actual value
2653 Construct a dimension
2654 Make a table of column values
2655 Make a table of layer values
2657 Fill the table entry using the indexes from before.
2660 static struct ctables_domain *
2661 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
2662 enum ctables_domain_type domain)
2665 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2667 const struct ctables_nest *nest = s->nests[a];
2668 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2670 size_t v_idx = nest->domains[domain][i];
2671 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
2672 hash = hash_pointer (cv->category, hash);
2673 if (cv->category->type != CCT_TOTAL
2674 && cv->category->type != CCT_SUBTOTAL
2675 && cv->category->type != CCT_POSTCOMPUTE)
2676 hash = value_hash (&cv->value,
2677 var_get_width (nest->vars[v_idx]), hash);
2681 struct ctables_domain *d;
2682 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
2684 const struct ctables_cell *df = d->example;
2685 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2687 const struct ctables_nest *nest = s->nests[a];
2688 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2690 size_t v_idx = nest->domains[domain][i];
2691 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
2692 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
2693 if (cv1->category != cv2->category
2694 || (cv1->category->type != CCT_TOTAL
2695 && cv1->category->type != CCT_SUBTOTAL
2696 && cv1->category->type != CCT_POSTCOMPUTE
2697 && !value_equal (&cv1->value, &cv2->value,
2698 var_get_width (nest->vars[v_idx]))))
2707 d = xmalloc (sizeof *d);
2708 *d = (struct ctables_domain) { .example = cell };
2709 hmap_insert (&s->domains[domain], &d->node, hash);
2713 static const struct ctables_category *
2714 ctables_categories_match (const struct ctables_categories *c,
2715 const union value *v, const struct variable *var)
2717 if (var_is_numeric (var) && v->f == SYSMIS)
2720 const struct ctables_category *othernm = NULL;
2721 for (size_t i = c->n_cats; i-- > 0; )
2723 const struct ctables_category *cat = &c->cats[i];
2727 if (cat->number == v->f)
2735 if ((cat->range[0] == -DBL_MAX || v->f >= cat->range[0])
2736 && (cat->range[1] == DBL_MAX || v->f <= cat->range[1]))
2741 if (var_is_value_missing (var, v))
2745 case CCT_POSTCOMPUTE:
2760 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2763 case CCT_EXCLUDED_MISSING:
2768 return var_is_value_missing (var, v) ? NULL : othernm;
2771 static const struct ctables_category *
2772 ctables_categories_total (const struct ctables_categories *c)
2774 const struct ctables_category *first = &c->cats[0];
2775 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2776 return (first->type == CCT_TOTAL ? first
2777 : last->type == CCT_TOTAL ? last
2781 static struct ctables_cell *
2782 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
2783 const struct ctables_category *cats[PIVOT_N_AXES][10])
2786 enum ctables_summary_variant sv = CSV_CELL;
2787 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2789 const struct ctables_nest *nest = s->nests[a];
2790 for (size_t i = 0; i < nest->n; i++)
2791 if (i != nest->scale_idx)
2793 hash = hash_pointer (cats[a][i], hash);
2794 if (cats[a][i]->type != CCT_TOTAL
2795 && cats[a][i]->type != CCT_SUBTOTAL
2796 && cats[a][i]->type != CCT_POSTCOMPUTE)
2797 hash = value_hash (case_data (c, nest->vars[i]),
2798 var_get_width (nest->vars[i]), hash);
2804 struct ctables_cell *cell;
2805 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
2807 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2809 const struct ctables_nest *nest = s->nests[a];
2810 for (size_t i = 0; i < nest->n; i++)
2811 if (i != nest->scale_idx
2812 && (cats[a][i] != cell->axes[a].cvs[i].category
2813 || (cats[a][i]->type != CCT_TOTAL
2814 && cats[a][i]->type != CCT_SUBTOTAL
2815 && cats[a][i]->type != CCT_POSTCOMPUTE
2816 && !value_equal (case_data (c, nest->vars[i]),
2817 &cell->axes[a].cvs[i].value,
2818 var_get_width (nest->vars[i])))))
2827 cell = xmalloc (sizeof *cell);
2830 cell->omit_domains = 0;
2831 cell->postcompute = false;
2832 //struct string name = DS_EMPTY_INITIALIZER;
2833 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2835 const struct ctables_nest *nest = s->nests[a];
2836 cell->axes[a].cvs = (nest->n
2837 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
2839 for (size_t i = 0; i < nest->n; i++)
2841 const struct ctables_category *cat = cats[a][i];
2842 const struct variable *var = nest->vars[i];
2843 const union value *value = case_data (c, var);
2844 if (i != nest->scale_idx)
2846 const struct ctables_category *subtotal = cat->subtotal;
2847 if (cat->hide || (subtotal && subtotal->hide_subcategories))
2850 if (cat->type == CCT_TOTAL
2851 || cat->type == CCT_SUBTOTAL
2852 || cat->type == CCT_POSTCOMPUTE)
2854 /* XXX these should be more encompassing I think.*/
2858 case PIVOT_AXIS_COLUMN:
2859 cell->omit_domains |= ((1u << CTDT_TABLE) |
2860 (1u << CTDT_LAYER) |
2861 (1u << CTDT_LAYERCOL) |
2862 (1u << CTDT_SUBTABLE) |
2865 case PIVOT_AXIS_ROW:
2866 cell->omit_domains |= ((1u << CTDT_TABLE) |
2867 (1u << CTDT_LAYER) |
2868 (1u << CTDT_LAYERROW) |
2869 (1u << CTDT_SUBTABLE) |
2872 case PIVOT_AXIS_LAYER:
2873 cell->omit_domains |= ((1u << CTDT_TABLE) |
2874 (1u << CTDT_LAYER));
2878 if (cat->type == CCT_POSTCOMPUTE)
2879 cell->postcompute = true;
2882 cell->axes[a].cvs[i].category = cat;
2883 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
2886 if (i != nest->scale_idx)
2888 if (!ds_is_empty (&name))
2889 ds_put_cstr (&name, ", ");
2890 char *value_s = data_out (value, var_get_encoding (var),
2891 var_get_print_format (var),
2892 settings_get_fmt_settings ());
2893 if (cat->type == CCT_TOTAL
2894 || cat->type == CCT_SUBTOTAL
2895 || cat->type == CCT_POSTCOMPUTE)
2896 ds_put_format (&name, "%s=total", var_get_name (var));
2898 ds_put_format (&name, "%s=%s", var_get_name (var),
2899 value_s + strspn (value_s, " "));
2905 //cell->name = ds_steal_cstr (&name);
2907 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
2908 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
2909 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
2910 for (size_t i = 0; i < specs->n; i++)
2911 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
2912 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2913 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
2914 hmap_insert (&s->cells, &cell->node, hash);
2919 is_scale_missing (const struct ctables_summary_spec_set *specs,
2920 const struct ccase *c)
2922 if (!specs->is_scale)
2925 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
2928 for (size_t i = 0; i < specs->n_listwise_vars; i++)
2930 const struct variable *var = specs->listwise_vars[i];
2931 if (var_is_num_missing (var, case_num (c, var)))
2939 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
2940 const struct ctables_category *cats[PIVOT_N_AXES][10],
2941 bool is_missing, bool excluded_missing,
2942 double d_weight, double e_weight)
2944 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
2945 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
2947 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
2949 bool scale_missing = is_scale_missing (specs, c);
2950 for (size_t i = 0; i < specs->n; i++)
2951 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
2952 specs->var, case_data (c, specs->var), specs->is_scale,
2953 scale_missing, is_missing, excluded_missing,
2954 d_weight, e_weight);
2955 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
2956 if (!(cell->omit_domains && (1u << dt)))
2958 struct ctables_domain *d = cell->domains[dt];
2959 d->d_total += d_weight;
2960 d->e_total += e_weight;
2961 if (!excluded_missing)
2963 d->d_count += d_weight;
2964 d->e_count += e_weight;
2968 d->d_valid += d_weight;
2969 d->e_valid += e_weight;
2975 recurse_totals (struct ctables_section *s, const struct ccase *c,
2976 const struct ctables_category *cats[PIVOT_N_AXES][10],
2977 bool is_missing, bool excluded_missing,
2978 double d_weight, double e_weight,
2979 enum pivot_axis_type start_axis, size_t start_nest)
2981 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
2983 const struct ctables_nest *nest = s->nests[a];
2984 for (size_t i = start_nest; i < nest->n; i++)
2986 if (i == nest->scale_idx)
2989 const struct variable *var = nest->vars[i];
2991 const struct ctables_category *total = ctables_categories_total (
2992 s->table->categories[var_get_dict_index (var)]);
2995 const struct ctables_category *save = cats[a][i];
2997 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
2998 d_weight, e_weight);
2999 recurse_totals (s, c, cats, is_missing, excluded_missing,
3000 d_weight, e_weight, a, i + 1);
3009 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3010 const struct ctables_category *cats[PIVOT_N_AXES][10],
3011 bool is_missing, bool excluded_missing,
3012 double d_weight, double e_weight,
3013 enum pivot_axis_type start_axis, size_t start_nest)
3015 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3017 const struct ctables_nest *nest = s->nests[a];
3018 for (size_t i = start_nest; i < nest->n; i++)
3020 if (i == nest->scale_idx)
3023 const struct ctables_category *save = cats[a][i];
3026 cats[a][i] = save->subtotal;
3027 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3028 d_weight, e_weight);
3029 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3030 d_weight, e_weight, a, i + 1);
3039 ctables_add_occurrence (const struct variable *var,
3040 const union value *value,
3041 struct hmap *occurrences)
3043 int width = var_get_width (var);
3044 unsigned int hash = value_hash (value, width, 0);
3046 struct ctables_occurrence *o;
3047 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3049 if (value_equal (value, &o->value, width))
3052 o = xmalloc (sizeof *o);
3053 value_clone (&o->value, value, width);
3054 hmap_insert (occurrences, &o->node, hash);
3058 ctables_cell_insert (struct ctables_section *s,
3059 const struct ccase *c,
3060 double d_weight, double e_weight)
3062 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3064 /* Does at least one categorical variable have a missing value in an included
3065 or excluded category? */
3066 bool is_missing = false;
3068 /* Does at least one categorical variable have a missing value in an excluded
3070 bool excluded_missing = false;
3072 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3074 const struct ctables_nest *nest = s->nests[a];
3075 for (size_t i = 0; i < nest->n; i++)
3077 if (i == nest->scale_idx)
3080 const struct variable *var = nest->vars[i];
3081 const union value *value = case_data (c, var);
3083 bool var_missing = var_is_value_missing (var, value) != 0;
3087 cats[a][i] = ctables_categories_match (
3088 s->table->categories[var_get_dict_index (var)], value, var);
3094 static const struct ctables_category cct_excluded_missing = {
3095 .type = CCT_EXCLUDED_MISSING,
3098 cats[a][i] = &cct_excluded_missing;
3099 excluded_missing = true;
3104 if (!excluded_missing)
3105 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3107 const struct ctables_nest *nest = s->nests[a];
3108 for (size_t i = 0; i < nest->n; i++)
3109 if (i != nest->scale_idx)
3111 const struct variable *var = nest->vars[i];
3112 const union value *value = case_data (c, var);
3113 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3117 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3118 d_weight, e_weight);
3120 //if (!excluded_missing)
3122 recurse_totals (s, c, cats, is_missing, excluded_missing,
3123 d_weight, e_weight, 0, 0);
3124 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3125 d_weight, e_weight, 0, 0);
3131 const struct ctables_summary_spec_set *set;
3136 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3138 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3139 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3140 if (as->function != bs->function)
3141 return as->function > bs->function ? 1 : -1;
3142 else if (as->percentile != bs->percentile)
3143 return as->percentile < bs->percentile ? 1 : -1;
3144 return strcmp (as->label, bs->label);
3147 static struct pivot_value *
3148 ctables_category_create_label (const struct ctables_category *cat,
3149 const struct variable *var,
3150 const union value *value)
3152 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3153 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3154 : cat->type == CCT_POSTCOMPUTE && cat->pc->label
3155 ? pivot_value_new_user_text (cat->pc->label, SIZE_MAX)
3156 : pivot_value_new_var_value (var, value));
3159 static struct ctables_value *
3160 ctables_value_find__ (struct ctables_table *t, const union value *value,
3161 int width, unsigned int hash)
3163 struct ctables_value *clv;
3164 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3165 hash, &t->clabels_values_map)
3166 if (value_equal (value, &clv->value, width))
3172 ctables_value_insert (struct ctables_table *t, const union value *value,
3175 unsigned int hash = value_hash (value, width, 0);
3176 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3179 clv = xmalloc (sizeof *clv);
3180 value_clone (&clv->value, value, width);
3181 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3185 static struct ctables_value *
3186 ctables_value_find (struct ctables_table *t,
3187 const union value *value, int width)
3189 return ctables_value_find__ (t, value, width,
3190 value_hash (value, width, 0));
3194 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3195 size_t ix[PIVOT_N_AXES])
3197 if (a < PIVOT_N_AXES)
3199 size_t limit = MAX (t->stacks[a].n, 1);
3200 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3201 ctables_table_add_section (t, a + 1, ix);
3205 struct ctables_section *s = &t->sections[t->n_sections++];
3206 *s = (struct ctables_section) {
3208 .cells = HMAP_INITIALIZER (s->cells),
3210 for (a = 0; a < PIVOT_N_AXES; a++)
3213 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3215 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3216 for (size_t i = 0; i < nest->n; i++)
3217 hmap_init (&s->occurrences[a][i]);
3219 for (size_t i = 0; i < N_CTDTS; i++)
3220 hmap_init (&s->domains[i]);
3225 ctpo_add (double a, double b)
3231 ctpo_sub (double a, double b)
3237 ctpo_mul (double a, double b)
3243 ctpo_div (double a, double b)
3245 return b ? a / b : SYSMIS;
3249 ctpo_pow (double a, double b)
3251 int save_errno = errno;
3253 double result = pow (a, b);
3261 ctpo_neg (double a, double b UNUSED)
3266 struct ctables_pcexpr_evaluate_ctx
3268 const struct ctables_cell *cell;
3269 const struct ctables_section *section;
3270 const struct ctables_categories *cats;
3271 enum pivot_axis_type pc_a;
3275 static double ctables_pcexpr_evaluate (
3276 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3279 ctables_pcexpr_evaluate_nonterminal (
3280 const struct ctables_pcexpr_evaluate_ctx *ctx,
3281 const struct ctables_pcexpr *e, size_t n_args,
3282 double evaluate (double, double))
3284 double args[2] = { 0, 0 };
3285 for (size_t i = 0; i < n_args; i++)
3287 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3288 if (!isfinite (args[i]) || args[i] == SYSMIS)
3291 return evaluate (args[0], args[1]);
3295 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3296 const struct ctables_cell_value *pc_cv)
3298 const struct ctables_section *s = ctx->section;
3301 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3303 const struct ctables_nest *nest = s->nests[a];
3304 for (size_t i = 0; i < nest->n; i++)
3305 if (i != nest->scale_idx)
3307 const struct ctables_cell_value *cv
3308 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3309 : &ctx->cell->axes[a].cvs[i]);
3310 hash = hash_pointer (cv->category, hash);
3311 if (cv->category->type != CCT_TOTAL
3312 && cv->category->type != CCT_SUBTOTAL
3313 && cv->category->type != CCT_POSTCOMPUTE)
3314 hash = value_hash (&cv->value,
3315 var_get_width (nest->vars[i]), hash);
3319 struct ctables_cell *tc;
3320 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3322 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3324 const struct ctables_nest *nest = s->nests[a];
3325 for (size_t i = 0; i < nest->n; i++)
3326 if (i != nest->scale_idx)
3328 const struct ctables_cell_value *p_cv
3329 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3330 : &ctx->cell->axes[a].cvs[i]);
3331 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3332 if (p_cv->category != t_cv->category
3333 || (p_cv->category->type != CCT_TOTAL
3334 && p_cv->category->type != CCT_SUBTOTAL
3335 && p_cv->category->type != CCT_POSTCOMPUTE
3336 && !value_equal (&p_cv->value,
3338 var_get_width (nest->vars[i]))))
3350 const struct ctables_table *t = s->table;
3351 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3352 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3353 size_t j = 0 /* XXX */;
3354 return ctables_summary_value (tc, &tc->summaries[j], &specs->specs[j]);
3358 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3359 const struct ctables_pcexpr *e)
3366 case CTPO_CAT_RANGE:
3368 struct ctables_cell_value cv = {
3369 .category = ctables_find_category_for_postcompute (ctx->cats, e)
3371 assert (cv.category != NULL);
3373 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3374 const struct ctables_occurrence *o;
3377 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3378 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3379 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3381 cv.value = o->value;
3382 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3387 case CTPO_CAT_NUMBER:
3388 case CTPO_CAT_STRING:
3389 case CTPO_CAT_MISSING:
3390 case CTPO_CAT_OTHERNM:
3391 case CTPO_CAT_SUBTOTAL:
3392 case CTPO_CAT_TOTAL:
3394 struct ctables_cell_value cv = {
3395 .category = ctables_find_category_for_postcompute (ctx->cats, e),
3396 .value = { .f = e->number },
3398 assert (cv.category != NULL);
3399 return ctables_pcexpr_evaluate_category (ctx, &cv);
3403 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3406 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3409 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3412 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3415 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3418 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3425 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3426 const struct ctables_cell *cell)
3428 enum pivot_axis_type pc_a;
3430 const struct ctables_postcompute *pc;
3431 for (pc_a = 0; ; pc_a++)
3433 assert (pc_a < PIVOT_N_AXES);
3434 for (pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3436 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3437 if (cv->category->type == CCT_POSTCOMPUTE)
3439 pc = cv->category->pc;
3446 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3447 const struct ctables_categories *cats = s->table->categories[
3448 var_get_dict_index (var)];
3449 struct ctables_pcexpr_evaluate_ctx ctx = {
3454 .pc_a_idx = pc_a_idx,
3456 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3460 ctables_table_output (struct ctables *ct, struct ctables_table *t)
3462 struct pivot_table *pt = pivot_table_create__ (
3464 ? pivot_value_new_user_text (t->title, SIZE_MAX)
3465 : pivot_value_new_text (N_("Custom Tables"))),
3468 pivot_table_set_caption (
3469 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
3471 pivot_table_set_caption (
3472 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
3474 bool summary_dimension = (t->summary_axis != t->slabels_axis
3475 || (!t->slabels_visible
3476 && t->summary_specs.n > 1));
3477 if (summary_dimension)
3479 struct pivot_dimension *d = pivot_dimension_create (
3480 pt, t->slabels_axis, N_("Statistics"));
3481 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3482 if (!t->slabels_visible)
3483 d->hide_all_labels = true;
3484 for (size_t i = 0; i < specs->n; i++)
3485 pivot_category_create_leaf (
3486 d->root, pivot_value_new_text (specs->specs[i].label));
3489 bool categories_dimension = t->clabels_example != NULL;
3490 if (categories_dimension)
3492 struct pivot_dimension *d = pivot_dimension_create (
3493 pt, t->label_axis[t->clabels_from_axis],
3494 t->clabels_from_axis == PIVOT_AXIS_ROW
3495 ? N_("Row Categories")
3496 : N_("Column Categories"));
3497 const struct variable *var = t->clabels_example;
3498 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
3499 for (size_t i = 0; i < t->n_clabels_values; i++)
3501 const struct ctables_value *value = t->clabels_values[i];
3502 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
3503 assert (cat != NULL);
3504 pivot_category_create_leaf (d->root, ctables_category_create_label (
3505 cat, t->clabels_example, &value->value));
3509 pivot_table_set_look (pt, ct->look);
3510 struct pivot_dimension *d[PIVOT_N_AXES];
3511 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3513 static const char *names[] = {
3514 [PIVOT_AXIS_ROW] = N_("Rows"),
3515 [PIVOT_AXIS_COLUMN] = N_("Columns"),
3516 [PIVOT_AXIS_LAYER] = N_("Layers"),
3518 d[a] = (t->axes[a] || a == t->summary_axis
3519 ? pivot_dimension_create (pt, a, names[a])
3524 assert (t->axes[a]);
3526 for (size_t i = 0; i < t->stacks[a].n; i++)
3528 struct ctables_nest *nest = &t->stacks[a].nests[i];
3529 struct ctables_section **sections = xnmalloc (t->n_sections,
3531 size_t n_sections = 0;
3533 size_t n_total_cells = 0;
3534 size_t max_depth = 0;
3535 for (size_t j = 0; j < t->n_sections; j++)
3536 if (t->sections[j].nests[a] == nest)
3538 struct ctables_section *s = &t->sections[j];
3539 sections[n_sections++] = s;
3540 n_total_cells += s->cells.count;
3542 size_t depth = s->nests[a]->n;
3543 max_depth = MAX (depth, max_depth);
3546 struct ctables_cell **sorted = xnmalloc (n_total_cells,
3548 size_t n_sorted = 0;
3550 for (size_t j = 0; j < n_sections; j++)
3552 struct ctables_section *s = sections[j];
3554 struct ctables_cell *cell;
3555 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3557 sorted[n_sorted++] = cell;
3558 assert (n_sorted <= n_total_cells);
3561 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
3562 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
3565 for (size_t j = 0; j < n_sorted; j++)
3567 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
3572 struct ctables_level
3574 enum ctables_level_type
3576 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
3577 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
3578 CTL_SUMMARY, /* Summary functions. */
3582 enum settings_value_show vlabel; /* CTL_VAR only. */
3585 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
3586 size_t n_levels = 0;
3587 for (size_t k = 0; k < nest->n; k++)
3589 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
3590 if (vlabel != CTVL_NONE)
3592 levels[n_levels++] = (struct ctables_level) {
3594 .vlabel = (enum settings_value_show) vlabel,
3599 if (nest->scale_idx != k
3600 && (k != nest->n - 1 || t->label_axis[a] == a))
3602 levels[n_levels++] = (struct ctables_level) {
3603 .type = CTL_CATEGORY,
3609 if (!summary_dimension && a == t->slabels_axis)
3611 levels[n_levels++] = (struct ctables_level) {
3612 .type = CTL_SUMMARY,
3613 .var_idx = SIZE_MAX,
3617 /* Pivot categories:
3619 - variable label for nest->vars[0], if vlabel != CTVL_NONE
3620 - category for nest->vars[0], if nest->scale_idx != 0
3621 - variable label for nest->vars[1], if vlabel != CTVL_NONE
3622 - category for nest->vars[1], if nest->scale_idx != 1
3624 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
3625 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
3626 - summary function, if 'a == t->slabels_axis && a ==
3629 Additional dimensions:
3631 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
3633 - If 't->label_axis[b] == a' for some 'b != a', add a category
3638 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
3640 for (size_t j = 0; j < n_sorted; j++)
3642 struct ctables_cell *cell = sorted[j];
3643 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
3645 size_t n_common = 0;
3648 for (; n_common < n_levels; n_common++)
3650 const struct ctables_level *level = &levels[n_common];
3651 if (level->type == CTL_CATEGORY)
3653 size_t var_idx = level->var_idx;
3654 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
3655 if (prev->axes[a].cvs[var_idx].category != c)
3657 else if (c->type != CCT_SUBTOTAL
3658 && c->type != CCT_TOTAL
3659 && c->type != CCT_POSTCOMPUTE
3660 && !value_equal (&prev->axes[a].cvs[var_idx].value,
3661 &cell->axes[a].cvs[var_idx].value,
3662 var_get_type (nest->vars[var_idx])))
3668 for (size_t k = n_common; k < n_levels; k++)
3670 const struct ctables_level *level = &levels[k];
3671 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
3672 if (level->type == CTL_SUMMARY)
3674 assert (k == n_levels - 1);
3676 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3677 for (size_t m = 0; m < specs->n; m++)
3679 int leaf = pivot_category_create_leaf (
3680 parent, pivot_value_new_text (specs->specs[m].label));
3687 const struct variable *var = nest->vars[level->var_idx];
3688 struct pivot_value *label;
3689 if (level->type == CTL_VAR)
3691 label = pivot_value_new_variable (var);
3692 label->variable.show = level->vlabel;
3694 else if (level->type == CTL_CATEGORY)
3696 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
3697 label = ctables_category_create_label (cv->category,
3703 if (k == n_levels - 1)
3704 prev_leaf = pivot_category_create_leaf (parent, label);
3706 groups[k] = pivot_category_create_group__ (parent, label);
3710 cell->axes[a].leaf = prev_leaf;
3717 for (size_t i = 0; i < t->n_sections; i++)
3719 struct ctables_section *s = &t->sections[i];
3721 struct ctables_cell *cell;
3722 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3727 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3728 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
3729 for (size_t j = 0; j < specs->n; j++)
3732 size_t n_dindexes = 0;
3734 if (summary_dimension)
3735 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
3737 if (categories_dimension)
3739 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
3740 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
3741 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
3742 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
3745 dindexes[n_dindexes++] = ctv->leaf;
3748 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3751 int leaf = cell->axes[a].leaf;
3752 if (a == t->summary_axis && !summary_dimension)
3754 dindexes[n_dindexes++] = leaf;
3757 const struct ctables_summary_spec *ss = &specs->specs[j];
3759 double d = (cell->postcompute
3760 ? ctables_cell_calculate_postcompute (s, cell)
3761 : ctables_summary_value (cell, &cell->summaries[j], ss));
3762 struct pivot_value *value;
3763 if (ct->hide_threshold != 0
3764 && d < ct->hide_threshold
3765 && (cell->postcompute
3767 : ctables_summary_function_is_count (ss->function)))
3769 value = pivot_value_new_user_text_nocopy (
3770 xasprintf ("<%d", ct->hide_threshold));
3772 else if (d == 0 && ct->zero)
3773 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
3774 else if (d == SYSMIS && ct->missing)
3775 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
3776 else if (specs->specs[j].is_ctables_format)
3778 char *s = data_out_stretchy (&(union value) { .f = d },
3780 &specs->specs[j].format,
3781 &ct->ctables_formats, NULL);
3782 value = pivot_value_new_user_text_nocopy (s);
3786 value = pivot_value_new_number (d);
3787 value->numeric.format = specs->specs[j].format;
3789 pivot_table_put (pt, dindexes, n_dindexes, value);
3794 pivot_table_submit (pt);
3798 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
3800 enum pivot_axis_type label_pos = t->label_axis[a];
3804 t->clabels_from_axis = a;
3806 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
3807 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
3809 const struct ctables_stack *stack = &t->stacks[a];
3813 const struct ctables_nest *n0 = &stack->nests[0];
3815 const struct variable *v0 = n0->vars[n0->n - 1];
3816 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
3817 t->clabels_example = v0;
3819 for (size_t i = 0; i < c0->n_cats; i++)
3820 if (c0->cats[i].type == CCT_FUNCTION)
3822 msg (SE, _("%s=%s is not allowed with sorting based "
3823 "on a summary function."),
3824 subcommand_name, pos_name);
3827 if (n0->n - 1 == n0->scale_idx)
3829 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
3830 "but %s is a scale variable."),
3831 subcommand_name, pos_name, var_get_name (v0));
3835 for (size_t i = 1; i < stack->n; i++)
3837 const struct ctables_nest *ni = &stack->nests[i];
3839 const struct variable *vi = ni->vars[ni->n - 1];
3840 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
3842 if (ni->n - 1 == ni->scale_idx)
3844 msg (SE, _("%s=%s requires the variables to be moved to be "
3845 "categorical, but %s is a scale variable."),
3846 subcommand_name, pos_name, var_get_name (vi));
3849 if (var_get_width (v0) != var_get_width (vi))
3851 msg (SE, _("%s=%s requires the variables to be "
3852 "moved to have the same width, but %s has "
3853 "width %d and %s has width %d."),
3854 subcommand_name, pos_name,
3855 var_get_name (v0), var_get_width (v0),
3856 var_get_name (vi), var_get_width (vi));
3859 if (!val_labs_equal (var_get_value_labels (v0),
3860 var_get_value_labels (vi)))
3862 msg (SE, _("%s=%s requires the variables to be "
3863 "moved to have the same value labels, but %s "
3864 "and %s have different value labels."),
3865 subcommand_name, pos_name,
3866 var_get_name (v0), var_get_name (vi));
3869 if (!ctables_categories_equal (c0, ci))
3871 msg (SE, _("%s=%s requires the variables to be "
3872 "moved to have the same category "
3873 "specifications, but %s and %s have different "
3874 "category specifications."),
3875 subcommand_name, pos_name,
3876 var_get_name (v0), var_get_name (vi));
3885 ctables_prepare_table (struct ctables_table *t)
3887 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3890 t->stacks[a] = enumerate_fts (a, t->axes[a]);
3892 for (size_t j = 0; j < t->stacks[a].n; j++)
3894 struct ctables_nest *nest = &t->stacks[a].nests[j];
3895 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3897 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
3898 nest->n_domains[dt] = 0;
3900 for (size_t k = 0; k < nest->n; k++)
3902 if (k == nest->scale_idx)
3911 if (a != PIVOT_AXIS_LAYER)
3918 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
3919 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
3920 : a == PIVOT_AXIS_ROW)
3922 if (k == nest->n - 1
3923 || (nest->scale_idx == nest->n - 1
3924 && k == nest->n - 2))
3930 if (a == PIVOT_AXIS_COLUMN)
3935 if (a == PIVOT_AXIS_ROW)
3940 nest->domains[dt][nest->n_domains[dt]++] = k;
3947 struct ctables_nest *nest = xmalloc (sizeof *nest);
3948 *nest = (struct ctables_nest) { .n = 0 };
3949 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
3952 struct ctables_stack *stack = &t->stacks[t->summary_axis];
3953 for (size_t i = 0; i < stack->n; i++)
3955 struct ctables_nest *nest = &stack->nests[i];
3956 if (!nest->specs[CSV_CELL].n)
3958 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
3959 specs->specs = xmalloc (sizeof *specs->specs);
3962 enum ctables_summary_function function
3963 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
3965 *specs->specs = (struct ctables_summary_spec) {
3966 .function = function,
3967 .format = ctables_summary_default_format (function, specs->var),
3968 .label = ctables_summary_default_label (function, 0),
3971 specs->var = nest->vars[0];
3973 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
3974 &nest->specs[CSV_CELL]);
3976 else if (!nest->specs[CSV_TOTAL].n)
3977 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
3978 &nest->specs[CSV_CELL]);
3980 if (t->ctables->smissing_listwise)
3982 struct variable **listwise_vars = NULL;
3984 size_t allocated = 0;
3986 for (size_t j = nest->group_head; j < stack->n; j++)
3988 const struct ctables_nest *other_nest = &stack->nests[j];
3989 if (other_nest->group_head != nest->group_head)
3992 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
3995 listwise_vars = x2nrealloc (listwise_vars, &allocated,
3996 sizeof *listwise_vars);
3997 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4000 for (size_t j = 0; j < N_CSVS; j++)
4002 nest->specs[j].listwise_vars = listwise_vars;
4003 nest->specs[j].n_listwise_vars = n;
4008 struct ctables_summary_spec_set *merged = &t->summary_specs;
4009 struct merge_item *items = xnmalloc (2 * stack->n, sizeof *items);
4011 for (size_t j = 0; j < stack->n; j++)
4013 const struct ctables_nest *nest = &stack->nests[j];
4015 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4016 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4021 struct merge_item min = items[0];
4022 for (size_t j = 1; j < n_left; j++)
4023 if (merge_item_compare_3way (&items[j], &min) < 0)
4026 if (merged->n >= merged->allocated)
4027 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4028 sizeof *merged->specs);
4029 merged->specs[merged->n++] = min.set->specs[min.ofs];
4031 for (size_t j = 0; j < n_left; )
4033 if (merge_item_compare_3way (&items[j], &min) == 0)
4035 struct merge_item *item = &items[j];
4036 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4037 if (++item->ofs >= item->set->n)
4039 items[j] = items[--n_left];
4048 for (size_t j = 0; j < merged->n; j++)
4049 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4051 for (size_t j = 0; j < stack->n; j++)
4053 const struct ctables_nest *nest = &stack->nests[j];
4054 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4056 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4057 for (size_t k = 0; k < specs->n; k++)
4058 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4059 specs->specs[k].axis_idx);
4065 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4066 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4070 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4071 enum pivot_axis_type a)
4073 struct ctables_stack *stack = &t->stacks[a];
4074 for (size_t i = 0; i < stack->n; i++)
4076 const struct ctables_nest *nest = &stack->nests[i];
4077 const struct variable *var = nest->vars[nest->n - 1];
4078 const union value *value = case_data (c, var);
4080 if (var_is_numeric (var) && value->f == SYSMIS)
4083 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4085 ctables_value_insert (t, value, var_get_width (var));
4090 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4092 const struct ctables_value *const *ap = a_;
4093 const struct ctables_value *const *bp = b_;
4094 const struct ctables_value *a = *ap;
4095 const struct ctables_value *b = *bp;
4096 const int *width = width_;
4097 return value_compare_3way (&a->value, &b->value, *width);
4101 ctables_sort_clabels_values (struct ctables_table *t)
4103 const struct variable *v0 = t->clabels_example;
4104 int width = var_get_width (v0);
4106 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4109 const struct val_labs *val_labs = var_get_value_labels (v0);
4110 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4111 vl = val_labs_next (val_labs, vl))
4112 if (ctables_categories_match (c0, &vl->value, v0))
4113 ctables_value_insert (t, &vl->value, width);
4116 size_t n = hmap_count (&t->clabels_values_map);
4117 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4119 struct ctables_value *clv;
4121 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4122 t->clabels_values[i++] = clv;
4123 t->n_clabels_values = n;
4126 sort (t->clabels_values, n, sizeof *t->clabels_values,
4127 compare_clabels_values_3way, &width);
4129 for (size_t i = 0; i < n; i++)
4130 t->clabels_values[i]->leaf = i;
4134 ctables_add_category_occurrences (const struct variable *var,
4135 struct hmap *occurrences,
4136 const struct ctables_categories *cats)
4138 const struct val_labs *val_labs = var_get_value_labels (var);
4140 for (size_t i = 0; i < cats->n_cats; i++)
4142 const struct ctables_category *c = &cats->cats[i];
4146 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4154 assert (var_is_numeric (var));
4155 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4156 vl = val_labs_next (val_labs, vl))
4157 if (vl->value.f >= c->range[0] && vl->value.f <= c->range[1])
4158 ctables_add_occurrence (var, &vl->value, occurrences);
4162 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4163 vl = val_labs_next (val_labs, vl))
4164 if (var_is_value_missing (var, &vl->value))
4165 ctables_add_occurrence (var, &vl->value, occurrences);
4169 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4170 vl = val_labs_next (val_labs, vl))
4171 ctables_add_occurrence (var, &vl->value, occurrences);
4174 case CCT_POSTCOMPUTE:
4184 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4185 vl = val_labs_next (val_labs, vl))
4186 if (c->include_missing || !var_is_value_missing (var, &vl->value))
4187 ctables_add_occurrence (var, &vl->value, occurrences);
4190 case CCT_EXCLUDED_MISSING:
4197 ctables_section_recurse_add_empty_categories (
4198 struct ctables_section *s,
4199 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
4200 enum pivot_axis_type a, size_t a_idx)
4202 if (a >= PIVOT_N_AXES)
4203 ctables_cell_insert__ (s, c, cats);
4204 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
4205 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
4208 const struct variable *var = s->nests[a]->vars[a_idx];
4209 const struct ctables_categories *categories = s->table->categories[
4210 var_get_dict_index (var)];
4211 int width = var_get_width (var);
4212 const struct hmap *occurrences = &s->occurrences[a][a_idx];
4213 const struct ctables_occurrence *o;
4214 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4216 union value *value = case_data_rw (c, var);
4217 value_destroy (value, width);
4218 value_clone (value, &o->value, width);
4219 cats[a][a_idx] = ctables_categories_match (categories, value, var);
4220 assert (cats[a][a_idx] != NULL);
4221 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4224 for (size_t i = 0; i < categories->n_cats; i++)
4226 const struct ctables_category *cat = &categories->cats[i];
4227 if (cat->type == CCT_POSTCOMPUTE)
4229 cats[a][a_idx] = cat;
4230 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4237 ctables_section_add_empty_categories (struct ctables_section *s)
4239 bool show_empty = false;
4240 for (size_t a = 0; a < PIVOT_N_AXES; a++)
4242 for (size_t k = 0; k < s->nests[a]->n; k++)
4243 if (k != s->nests[a]->scale_idx)
4245 const struct variable *var = s->nests[a]->vars[k];
4246 const struct ctables_categories *cats = s->table->categories[
4247 var_get_dict_index (var)];
4248 if (cats->show_empty)
4251 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
4257 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
4258 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
4259 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
4264 ctables_execute (struct dataset *ds, struct ctables *ct)
4266 for (size_t i = 0; i < ct->n_tables; i++)
4268 struct ctables_table *t = ct->tables[i];
4269 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
4270 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
4271 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
4272 sizeof *t->sections);
4273 size_t ix[PIVOT_N_AXES];
4274 ctables_table_add_section (t, 0, ix);
4277 struct casereader *input = proc_open (ds);
4278 bool warn_on_invalid = true;
4279 for (struct ccase *c = casereader_read (input); c;
4280 case_unref (c), c = casereader_read (input))
4282 double d_weight = dict_get_case_weight (dataset_dict (ds), c,
4284 double e_weight = (ct->e_weight
4285 ? var_force_valid_weight (ct->e_weight,
4286 case_num (c, ct->e_weight),
4290 for (size_t i = 0; i < ct->n_tables; i++)
4292 struct ctables_table *t = ct->tables[i];
4294 for (size_t j = 0; j < t->n_sections; j++)
4295 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
4297 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4298 if (t->label_axis[a] != a)
4299 ctables_insert_clabels_values (t, c, a);
4302 casereader_destroy (input);
4304 for (size_t i = 0; i < ct->n_tables; i++)
4306 struct ctables_table *t = ct->tables[i];
4308 if (t->clabels_example)
4309 ctables_sort_clabels_values (t);
4311 for (size_t j = 0; j < t->n_sections; j++)
4312 ctables_section_add_empty_categories (&t->sections[j]);
4314 ctables_table_output (ct, ct->tables[i]);
4316 return proc_commit (ds);
4321 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *);
4324 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
4330 case CTPO_CAT_STRING:
4340 for (size_t i = 0; i < 2; i++)
4341 ctables_pcexpr_destroy (e->subs[i]);
4345 case CTPO_CAT_NUMBER:
4346 case CTPO_CAT_RANGE:
4347 case CTPO_CAT_MISSING:
4348 case CTPO_CAT_OTHERNM:
4349 case CTPO_CAT_SUBTOTAL:
4350 case CTPO_CAT_TOTAL:
4354 msg_location_destroy (e->location);
4359 static struct ctables_pcexpr *
4360 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
4361 struct ctables_pcexpr *sub0,
4362 struct ctables_pcexpr *sub1)
4364 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4365 *e = (struct ctables_pcexpr) {
4367 .subs = { sub0, sub1 },
4368 .location = msg_location_merged (sub0->location, sub1->location),
4373 /* How to parse an operator. */
4376 enum token_type token;
4377 enum ctables_postcompute_op op;
4380 static const struct operator *
4381 match_operator (struct lexer *lexer, const struct operator ops[], size_t n_ops)
4383 for (const struct operator *op = ops; op < ops + n_ops; op++)
4384 if (lex_token (lexer) == op->token)
4386 if (op->token != T_NEG_NUM)
4395 static struct ctables_pcexpr *
4396 parse_binary_operators__ (struct lexer *lexer,
4397 const struct operator ops[], size_t n_ops,
4398 parse_recursively_func *parse_next_level,
4399 const char *chain_warning,
4400 struct ctables_pcexpr *lhs)
4402 for (int op_count = 0; ; op_count++)
4404 const struct operator *op = match_operator (lexer, ops, n_ops);
4407 if (op_count > 1 && chain_warning)
4408 msg_at (SW, lhs->location, "%s", chain_warning);
4413 struct ctables_pcexpr *rhs = parse_next_level (lexer);
4416 ctables_pcexpr_destroy (lhs);
4420 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
4424 static struct ctables_pcexpr *
4425 parse_binary_operators (struct lexer *lexer,
4426 const struct operator ops[], size_t n_ops,
4427 parse_recursively_func *parse_next_level,
4428 const char *chain_warning)
4430 struct ctables_pcexpr *lhs = parse_next_level (lexer);
4434 return parse_binary_operators__ (lexer, ops, n_ops, parse_next_level,
4435 chain_warning, lhs);
4438 static struct ctables_pcexpr *parse_add (struct lexer *);
4440 static struct ctables_pcexpr
4441 ctpo_cat_range (double low, double high)
4443 return (struct ctables_pcexpr) {
4444 .op = CTPO_CAT_RANGE,
4445 .range = { low, high },
4449 static struct ctables_pcexpr *
4450 parse_primary (struct lexer *lexer)
4452 int start_ofs = lex_ofs (lexer);
4453 struct ctables_pcexpr e;
4454 if (lex_is_number (lexer))
4456 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
4457 .number = lex_number (lexer) };
4460 else if (lex_match_id (lexer, "MISSING"))
4461 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
4462 else if (lex_match_id (lexer, "OTHERNM"))
4463 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
4464 else if (lex_match_id (lexer, "TOTAL"))
4465 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
4466 else if (lex_match_id (lexer, "SUBTOTAL"))
4468 size_t subtotal_index = 0;
4469 if (lex_match (lexer, T_LBRACK))
4471 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
4473 subtotal_index = lex_integer (lexer);
4475 if (!lex_force_match (lexer, T_RBRACK))
4478 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
4479 .subtotal_index = subtotal_index };
4481 else if (lex_match (lexer, T_LBRACK))
4483 if (lex_match_id (lexer, "LO"))
4485 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
4487 e = ctpo_cat_range (-DBL_MAX, lex_number (lexer));
4490 else if (lex_is_number (lexer))
4492 double number = lex_number (lexer);
4494 if (lex_match_id (lexer, "THRU"))
4496 if (lex_match_id (lexer, "HI"))
4497 e = ctpo_cat_range (number, DBL_MAX);
4500 if (!lex_force_num (lexer))
4502 e = ctpo_cat_range (number, lex_number (lexer));
4507 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
4510 else if (lex_is_string (lexer))
4512 e = (struct ctables_pcexpr) {
4513 .op = CTPO_CAT_STRING,
4514 .string = ss_xstrdup (lex_tokss (lexer)),
4520 lex_error (lexer, NULL);
4524 if (!lex_force_match (lexer, T_RBRACK))
4526 if (e.op == CTPO_CAT_STRING)
4531 else if (lex_match (lexer, T_LPAREN))
4533 struct ctables_pcexpr *ep = parse_add (lexer);
4536 if (!lex_force_match (lexer, T_RPAREN))
4538 ctables_pcexpr_destroy (ep);
4545 lex_error (lexer, NULL);
4549 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4550 return xmemdup (&e, sizeof e);
4553 static struct ctables_pcexpr *
4554 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
4555 struct lexer *lexer, int start_ofs)
4557 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4558 *e = (struct ctables_pcexpr) {
4561 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
4566 static struct ctables_pcexpr *
4567 parse_exp (struct lexer *lexer)
4569 static const struct operator op = { T_EXP, CTPO_POW };
4571 const char *chain_warning =
4572 _("The exponentiation operator (`**') is left-associative: "
4573 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
4574 "To disable this warning, insert parentheses.");
4576 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
4577 return parse_binary_operators (lexer, &op, 1,
4578 parse_primary, chain_warning);
4580 /* Special case for situations like "-5**6", which must be parsed as
4583 int start_ofs = lex_ofs (lexer);
4584 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
4585 *lhs = (struct ctables_pcexpr) {
4586 .op = CTPO_CONSTANT,
4587 .number = -lex_tokval (lexer),
4588 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
4592 struct ctables_pcexpr *node = parse_binary_operators__ (
4593 lexer, &op, 1, parse_primary, chain_warning, lhs);
4597 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
4600 /* Parses the unary minus level. */
4601 static struct ctables_pcexpr *
4602 parse_neg (struct lexer *lexer)
4604 int start_ofs = lex_ofs (lexer);
4605 if (!lex_match (lexer, T_DASH))
4606 return parse_exp (lexer);
4608 struct ctables_pcexpr *inner = parse_neg (lexer);
4612 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
4615 /* Parses the multiplication and division level. */
4616 static struct ctables_pcexpr *
4617 parse_mul (struct lexer *lexer)
4619 static const struct operator ops[] =
4621 { T_ASTERISK, CTPO_MUL },
4622 { T_SLASH, CTPO_DIV },
4625 return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops,
4629 /* Parses the addition and subtraction level. */
4630 static struct ctables_pcexpr *
4631 parse_add (struct lexer *lexer)
4633 static const struct operator ops[] =
4635 { T_PLUS, CTPO_ADD },
4636 { T_DASH, CTPO_SUB },
4637 { T_NEG_NUM, CTPO_ADD },
4640 return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops,
4644 static struct ctables_postcompute *
4645 ctables_find_postcompute (struct ctables *ct, const char *name)
4647 struct ctables_postcompute *pc;
4648 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
4649 utf8_hash_case_string (name, 0), &ct->postcomputes)
4650 if (!utf8_strcasecmp (pc->name, name))
4656 ctables_parse_pcompute (struct lexer *lexer, struct ctables *ct)
4658 int pcompute_start = lex_ofs (lexer) - 1;
4660 if (!lex_force_match (lexer, T_AND) || !lex_force_id (lexer))
4663 char *name = ss_xstrdup (lex_tokss (lexer));
4666 if (!lex_force_match (lexer, T_EQUALS)
4667 || !lex_force_match_id (lexer, "EXPR")
4668 || !lex_force_match (lexer, T_LPAREN))
4674 int expr_start = lex_ofs (lexer);
4675 struct ctables_pcexpr *expr = parse_add (lexer);
4676 int expr_end = lex_ofs (lexer) - 1;
4677 if (!expr || !lex_force_match (lexer, T_RPAREN))
4682 int pcompute_end = lex_ofs (lexer) - 1;
4684 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
4687 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
4690 msg_at (SW, location, _("New definition of &%s will override the "
4691 "previous definition."),
4693 msg_at (SN, pc->location, _("This is the previous definition."));
4695 ctables_pcexpr_destroy (pc->expr);
4696 msg_location_destroy (pc->location);
4701 pc = xmalloc (sizeof *pc);
4702 *pc = (struct ctables_postcompute) { .name = name };
4703 hmap_insert (&ct->postcomputes, &pc->hmap_node,
4704 utf8_hash_case_string (pc->name, 0));
4707 pc->location = location;
4709 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
4714 ctables_parse_pproperties_format (struct lexer *lexer,
4715 struct ctables_summary_spec_set *sss)
4717 *sss = (struct ctables_summary_spec_set) { .n = 0 };
4719 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
4720 && !(lex_token (lexer) == T_ID
4721 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
4722 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
4723 lex_tokss (lexer)))))
4725 /* Parse function. */
4726 enum ctables_summary_function function;
4727 if (!parse_ctables_summary_function (lexer, &function))
4730 /* Parse percentile. */
4731 double percentile = 0;
4732 if (function == CTSF_PTILE)
4734 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
4736 percentile = lex_number (lexer);
4741 struct fmt_spec format;
4742 if (!parse_format_specifier (lexer, &format)
4743 || !fmt_check_output (&format)
4744 || !fmt_check_type_compat (&format, VAL_NUMERIC))
4747 if (sss->n >= sss->allocated)
4748 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
4749 sizeof *sss->specs);
4750 sss->specs[sss->n++] = (struct ctables_summary_spec) {
4751 .function = function,
4752 .percentile = percentile,
4759 ctables_summary_spec_set_uninit (sss);
4764 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
4766 struct ctables_postcompute **pcs = NULL;
4768 size_t allocated_pcs = 0;
4770 while (lex_match (lexer, T_AND))
4772 if (!lex_force_id (lexer))
4774 struct ctables_postcompute *pc
4775 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
4778 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
4783 if (n_pcs >= allocated_pcs)
4784 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
4788 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
4790 if (lex_match_id (lexer, "LABEL"))
4792 lex_match (lexer, T_EQUALS);
4793 if (!lex_force_string (lexer))
4796 for (size_t i = 0; i < n_pcs; i++)
4798 free (pcs[i]->label);
4799 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
4804 else if (lex_match_id (lexer, "FORMAT"))
4806 lex_match (lexer, T_EQUALS);
4808 struct ctables_summary_spec_set sss;
4809 if (!ctables_parse_pproperties_format (lexer, &sss))
4812 for (size_t i = 0; i < n_pcs; i++)
4815 ctables_summary_spec_set_uninit (pcs[i]->specs);
4817 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
4818 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
4820 ctables_summary_spec_set_uninit (&sss);
4822 else if (lex_match_id (lexer, "HIDESOURCECATS"))
4824 lex_match (lexer, T_EQUALS);
4825 bool hide_source_cats;
4826 if (!parse_bool (lexer, &hide_source_cats))
4828 for (size_t i = 0; i < n_pcs; i++)
4829 pcs[i]->hide_source_cats = hide_source_cats;
4833 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
4846 cmd_ctables (struct lexer *lexer, struct dataset *ds)
4848 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
4849 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
4850 enum settings_value_show tvars = settings_get_show_variables ();
4851 for (size_t i = 0; i < n_vars; i++)
4852 vlabels[i] = (enum ctables_vlabel) tvars;
4854 struct pivot_table_look *look = pivot_table_look_unshare (
4855 pivot_table_look_ref (pivot_table_look_get_default ()));
4856 look->omit_empty = false;
4858 struct ctables *ct = xmalloc (sizeof *ct);
4859 *ct = (struct ctables) {
4860 .dict = dataset_dict (ds),
4862 .ctables_formats = FMT_SETTINGS_INIT,
4864 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
4870 const char *dot_string;
4871 const char *comma_string;
4873 static const struct ctf ctfs[4] = {
4874 { CTEF_NEGPAREN, "(,,,)", "(...)" },
4875 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
4876 { CTEF_PAREN, "-,(,),", "-.(.)." },
4877 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
4879 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
4880 for (size_t i = 0; i < 4; i++)
4882 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
4883 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
4884 fmt_number_style_from_string (s));
4887 if (!lex_force_match (lexer, T_SLASH))
4890 while (!lex_match_id (lexer, "TABLE"))
4892 if (lex_match_id (lexer, "FORMAT"))
4894 double widths[2] = { SYSMIS, SYSMIS };
4895 double units_per_inch = 72.0;
4897 while (lex_token (lexer) != T_SLASH)
4899 if (lex_match_id (lexer, "MINCOLWIDTH"))
4901 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
4904 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
4906 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
4909 else if (lex_match_id (lexer, "UNITS"))
4911 lex_match (lexer, T_EQUALS);
4912 if (lex_match_id (lexer, "POINTS"))
4913 units_per_inch = 72.0;
4914 else if (lex_match_id (lexer, "INCHES"))
4915 units_per_inch = 1.0;
4916 else if (lex_match_id (lexer, "CM"))
4917 units_per_inch = 2.54;
4920 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
4924 else if (lex_match_id (lexer, "EMPTY"))
4929 lex_match (lexer, T_EQUALS);
4930 if (lex_match_id (lexer, "ZERO"))
4932 /* Nothing to do. */
4934 else if (lex_match_id (lexer, "BLANK"))
4935 ct->zero = xstrdup ("");
4936 else if (lex_force_string (lexer))
4938 ct->zero = ss_xstrdup (lex_tokss (lexer));
4944 else if (lex_match_id (lexer, "MISSING"))
4946 lex_match (lexer, T_EQUALS);
4947 if (!lex_force_string (lexer))
4951 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
4952 ? ss_xstrdup (lex_tokss (lexer))
4958 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
4959 "UNITS", "EMPTY", "MISSING");
4964 if (widths[0] != SYSMIS && widths[1] != SYSMIS
4965 && widths[0] > widths[1])
4967 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
4971 for (size_t i = 0; i < 2; i++)
4972 if (widths[i] != SYSMIS)
4974 int *wr = ct->look->width_ranges[TABLE_HORZ];
4975 wr[i] = widths[i] / units_per_inch * 96.0;
4980 else if (lex_match_id (lexer, "VLABELS"))
4982 if (!lex_force_match_id (lexer, "VARIABLES"))
4984 lex_match (lexer, T_EQUALS);
4986 struct variable **vars;
4988 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
4992 if (!lex_force_match_id (lexer, "DISPLAY"))
4997 lex_match (lexer, T_EQUALS);
4999 enum ctables_vlabel vlabel;
5000 if (lex_match_id (lexer, "DEFAULT"))
5001 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
5002 else if (lex_match_id (lexer, "NAME"))
5004 else if (lex_match_id (lexer, "LABEL"))
5005 vlabel = CTVL_LABEL;
5006 else if (lex_match_id (lexer, "BOTH"))
5008 else if (lex_match_id (lexer, "NONE"))
5012 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
5018 for (size_t i = 0; i < n_vars; i++)
5019 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
5022 else if (lex_match_id (lexer, "MRSETS"))
5024 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
5026 lex_match (lexer, T_EQUALS);
5027 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
5030 else if (lex_match_id (lexer, "SMISSING"))
5032 if (lex_match_id (lexer, "VARIABLE"))
5033 ct->smissing_listwise = false;
5034 else if (lex_match_id (lexer, "LISTWISE"))
5035 ct->smissing_listwise = true;
5038 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
5042 else if (lex_match_id (lexer, "PCOMPUTE"))
5044 if (!ctables_parse_pcompute (lexer, ct))
5047 else if (lex_match_id (lexer, "PPROPERTIES"))
5049 if (!ctables_parse_pproperties (lexer, ct))
5052 else if (lex_match_id (lexer, "WEIGHT"))
5054 if (!lex_force_match_id (lexer, "VARIABLE"))
5056 lex_match (lexer, T_EQUALS);
5057 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
5061 else if (lex_match_id (lexer, " HIDESMALLCOUNTS"))
5063 if (lex_match_id (lexer, "COUNT"))
5065 lex_match (lexer, T_EQUALS);
5066 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
5069 ct->hide_threshold = lex_integer (lexer);
5072 else if (ct->hide_threshold == 0)
5073 ct->hide_threshold = 5;
5077 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
5078 "SMISSING", "PCOMPUTE", "PPROPERTIES",
5079 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
5083 if (!lex_force_match (lexer, T_SLASH))
5087 size_t allocated_tables = 0;
5090 if (ct->n_tables >= allocated_tables)
5091 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
5092 sizeof *ct->tables);
5094 struct ctables_category *cat = xmalloc (sizeof *cat);
5095 *cat = (struct ctables_category) {
5097 .include_missing = false,
5098 .sort_ascending = true,
5101 struct ctables_categories *c = xmalloc (sizeof *c);
5102 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5103 *c = (struct ctables_categories) {
5110 struct ctables_categories **categories = xnmalloc (n_vars,
5111 sizeof *categories);
5112 for (size_t i = 0; i < n_vars; i++)
5115 struct ctables_table *t = xmalloc (sizeof *t);
5116 *t = (struct ctables_table) {
5118 .slabels_axis = PIVOT_AXIS_COLUMN,
5119 .slabels_visible = true,
5120 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
5122 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
5123 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
5124 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
5126 .clabels_from_axis = PIVOT_AXIS_LAYER,
5127 .categories = categories,
5128 .n_categories = n_vars,
5131 ct->tables[ct->n_tables++] = t;
5133 lex_match (lexer, T_EQUALS);
5134 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
5136 if (lex_match (lexer, T_BY))
5138 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5139 ct, t, PIVOT_AXIS_COLUMN))
5142 if (lex_match (lexer, T_BY))
5144 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5145 ct, t, PIVOT_AXIS_LAYER))
5150 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
5151 && !t->axes[PIVOT_AXIS_LAYER])
5153 lex_error (lexer, _("At least one variable must be specified."));
5157 const struct ctables_axis *scales[PIVOT_N_AXES];
5158 size_t n_scales = 0;
5159 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5161 scales[a] = find_scale (t->axes[a]);
5167 msg (SE, _("Scale variables may appear only on one axis."));
5168 if (scales[PIVOT_AXIS_ROW])
5169 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
5170 _("This scale variable appears on the rows axis."));
5171 if (scales[PIVOT_AXIS_COLUMN])
5172 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
5173 _("This scale variable appears on the columns axis."));
5174 if (scales[PIVOT_AXIS_LAYER])
5175 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
5176 _("This scale variable appears on the layer axis."));
5180 const struct ctables_axis *summaries[PIVOT_N_AXES];
5181 size_t n_summaries = 0;
5182 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5184 summaries[a] = (scales[a]
5186 : find_categorical_summary_spec (t->axes[a]));
5190 if (n_summaries > 1)
5192 msg (SE, _("Summaries may appear only on one axis."));
5193 if (summaries[PIVOT_AXIS_ROW])
5194 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
5195 _("This variable on the rows axis has a summary."));
5196 if (summaries[PIVOT_AXIS_COLUMN])
5197 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
5198 _("This variable on the columns axis has a summary."));
5199 if (summaries[PIVOT_AXIS_LAYER])
5200 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
5201 _("This variable on the layers axis has a summary."));
5204 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5205 if (n_summaries ? summaries[a] : t->axes[a])
5207 t->summary_axis = a;
5211 if (lex_token (lexer) == T_ENDCMD)
5213 if (!ctables_prepare_table (t))
5217 if (!lex_force_match (lexer, T_SLASH))
5220 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
5222 if (lex_match_id (lexer, "SLABELS"))
5224 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5226 if (lex_match_id (lexer, "POSITION"))
5228 lex_match (lexer, T_EQUALS);
5229 if (lex_match_id (lexer, "COLUMN"))
5230 t->slabels_axis = PIVOT_AXIS_COLUMN;
5231 else if (lex_match_id (lexer, "ROW"))
5232 t->slabels_axis = PIVOT_AXIS_ROW;
5233 else if (lex_match_id (lexer, "LAYER"))
5234 t->slabels_axis = PIVOT_AXIS_LAYER;
5237 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
5241 else if (lex_match_id (lexer, "VISIBLE"))
5243 lex_match (lexer, T_EQUALS);
5244 if (!parse_bool (lexer, &t->slabels_visible))
5249 lex_error_expecting (lexer, "POSITION", "VISIBLE");
5254 else if (lex_match_id (lexer, "CLABELS"))
5256 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5258 if (lex_match_id (lexer, "AUTO"))
5260 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
5261 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
5263 else if (lex_match_id (lexer, "ROWLABELS"))
5265 lex_match (lexer, T_EQUALS);
5266 if (lex_match_id (lexer, "OPPOSITE"))
5267 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
5268 else if (lex_match_id (lexer, "LAYER"))
5269 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
5272 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5276 else if (lex_match_id (lexer, "COLLABELS"))
5278 lex_match (lexer, T_EQUALS);
5279 if (lex_match_id (lexer, "OPPOSITE"))
5280 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
5281 else if (lex_match_id (lexer, "LAYER"))
5282 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
5285 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5291 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
5297 else if (lex_match_id (lexer, "CRITERIA"))
5299 if (!lex_force_match_id (lexer, "CILEVEL"))
5301 lex_match (lexer, T_EQUALS);
5303 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
5305 t->cilevel = lex_number (lexer);
5308 else if (lex_match_id (lexer, "CATEGORIES"))
5310 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
5314 else if (lex_match_id (lexer, "TITLES"))
5319 if (lex_match_id (lexer, "CAPTION"))
5320 textp = &t->caption;
5321 else if (lex_match_id (lexer, "CORNER"))
5323 else if (lex_match_id (lexer, "TITLE"))
5327 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
5330 lex_match (lexer, T_EQUALS);
5332 struct string s = DS_EMPTY_INITIALIZER;
5333 while (lex_is_string (lexer))
5335 if (!ds_is_empty (&s))
5336 ds_put_byte (&s, ' ');
5337 ds_put_substring (&s, lex_tokss (lexer));
5341 *textp = ds_steal_cstr (&s);
5343 while (lex_token (lexer) != T_SLASH
5344 && lex_token (lexer) != T_ENDCMD);
5346 else if (lex_match_id (lexer, "SIGTEST"))
5350 t->chisq = xmalloc (sizeof *t->chisq);
5351 *t->chisq = (struct ctables_chisq) {
5353 .include_mrsets = true,
5354 .all_visible = true,
5360 if (lex_match_id (lexer, "TYPE"))
5362 lex_match (lexer, T_EQUALS);
5363 if (!lex_force_match_id (lexer, "CHISQUARE"))
5366 else if (lex_match_id (lexer, "ALPHA"))
5368 lex_match (lexer, T_EQUALS);
5369 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
5371 t->chisq->alpha = lex_number (lexer);
5374 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5376 lex_match (lexer, T_EQUALS);
5377 if (parse_bool (lexer, &t->chisq->include_mrsets))
5380 else if (lex_match_id (lexer, "CATEGORIES"))
5382 lex_match (lexer, T_EQUALS);
5383 if (lex_match_id (lexer, "ALLVISIBLE"))
5384 t->chisq->all_visible = true;
5385 else if (lex_match_id (lexer, "SUBTOTALS"))
5386 t->chisq->all_visible = false;
5389 lex_error_expecting (lexer,
5390 "ALLVISIBLE", "SUBTOTALS");
5396 lex_error_expecting (lexer, "TYPE", "ALPHA",
5397 "INCLUDEMRSETS", "CATEGORIES");
5401 while (lex_token (lexer) != T_SLASH
5402 && lex_token (lexer) != T_ENDCMD);
5404 else if (lex_match_id (lexer, "COMPARETEST"))
5408 t->pairwise = xmalloc (sizeof *t->pairwise);
5409 *t->pairwise = (struct ctables_pairwise) {
5411 .alpha = { .05, .05 },
5412 .adjust = BONFERRONI,
5413 .include_mrsets = true,
5414 .meansvariance_allcats = true,
5415 .all_visible = true,
5424 if (lex_match_id (lexer, "TYPE"))
5426 lex_match (lexer, T_EQUALS);
5427 if (lex_match_id (lexer, "PROP"))
5428 t->pairwise->type = PROP;
5429 else if (lex_match_id (lexer, "MEAN"))
5430 t->pairwise->type = MEAN;
5433 lex_error_expecting (lexer, "PROP", "MEAN");
5437 else if (lex_match_id (lexer, "ALPHA"))
5439 lex_match (lexer, T_EQUALS);
5441 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5443 double a0 = lex_number (lexer);
5446 lex_match (lexer, T_COMMA);
5447 if (lex_is_number (lexer))
5449 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5451 double a1 = lex_number (lexer);
5454 t->pairwise->alpha[0] = MIN (a0, a1);
5455 t->pairwise->alpha[1] = MAX (a0, a1);
5458 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
5460 else if (lex_match_id (lexer, "ADJUST"))
5462 lex_match (lexer, T_EQUALS);
5463 if (lex_match_id (lexer, "BONFERRONI"))
5464 t->pairwise->adjust = BONFERRONI;
5465 else if (lex_match_id (lexer, "BH"))
5466 t->pairwise->adjust = BH;
5467 else if (lex_match_id (lexer, "NONE"))
5468 t->pairwise->adjust = 0;
5471 lex_error_expecting (lexer, "BONFERRONI", "BH",
5476 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5478 lex_match (lexer, T_EQUALS);
5479 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
5482 else if (lex_match_id (lexer, "MEANSVARIANCE"))
5484 lex_match (lexer, T_EQUALS);
5485 if (lex_match_id (lexer, "ALLCATS"))
5486 t->pairwise->meansvariance_allcats = true;
5487 else if (lex_match_id (lexer, "TESTEDCATS"))
5488 t->pairwise->meansvariance_allcats = false;
5491 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
5495 else if (lex_match_id (lexer, "CATEGORIES"))
5497 lex_match (lexer, T_EQUALS);
5498 if (lex_match_id (lexer, "ALLVISIBLE"))
5499 t->pairwise->all_visible = true;
5500 else if (lex_match_id (lexer, "SUBTOTALS"))
5501 t->pairwise->all_visible = false;
5504 lex_error_expecting (lexer, "ALLVISIBLE",
5509 else if (lex_match_id (lexer, "MERGE"))
5511 lex_match (lexer, T_EQUALS);
5512 if (!parse_bool (lexer, &t->pairwise->merge))
5515 else if (lex_match_id (lexer, "STYLE"))
5517 lex_match (lexer, T_EQUALS);
5518 if (lex_match_id (lexer, "APA"))
5519 t->pairwise->apa_style = true;
5520 else if (lex_match_id (lexer, "SIMPLE"))
5521 t->pairwise->apa_style = false;
5524 lex_error_expecting (lexer, "APA", "SIMPLE");
5528 else if (lex_match_id (lexer, "SHOWSIG"))
5530 lex_match (lexer, T_EQUALS);
5531 if (!parse_bool (lexer, &t->pairwise->show_sig))
5536 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
5537 "INCLUDEMRSETS", "MEANSVARIANCE",
5538 "CATEGORIES", "MERGE", "STYLE",
5543 while (lex_token (lexer) != T_SLASH
5544 && lex_token (lexer) != T_ENDCMD);
5548 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
5549 "CRITERIA", "CATEGORIES", "TITLES",
5550 "SIGTEST", "COMPARETEST");
5554 if (!lex_match (lexer, T_SLASH))
5558 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
5559 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
5561 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
5565 if (!ctables_prepare_table (t))
5568 while (lex_token (lexer) != T_ENDCMD);
5570 bool ok = ctables_execute (ds, ct);
5571 ctables_destroy (ct);
5572 return ok ? CMD_SUCCESS : CMD_FAILURE;
5575 ctables_destroy (ct);