1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "data/casereader.h"
23 #include "data/casewriter.h"
24 #include "data/data-in.h"
25 #include "data/data-out.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/mrset.h"
29 #include "data/subcase.h"
30 #include "data/value-labels.h"
31 #include "language/command.h"
32 #include "language/lexer/format-parser.h"
33 #include "language/lexer/lexer.h"
34 #include "language/lexer/variable-parser.h"
35 #include "libpspp/array.h"
36 #include "libpspp/assertion.h"
37 #include "libpspp/hash-functions.h"
38 #include "libpspp/hmap.h"
39 #include "libpspp/i18n.h"
40 #include "libpspp/message.h"
41 #include "libpspp/string-array.h"
42 #include "math/mode.h"
43 #include "math/moments.h"
44 #include "math/percentiles.h"
45 #include "math/sort.h"
46 #include "output/pivot-table.h"
48 #include "gl/minmax.h"
49 #include "gl/xalloc.h"
52 #define _(msgid) gettext (msgid)
53 #define N_(msgid) (msgid)
57 CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
58 CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
59 CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
60 CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
64 - unweighted summaries (U*)
65 - lower confidence limits (*.LCL)
66 - upper confidence limits (*.UCL)
67 - standard error (*.SE)
70 /* All variables. */ \
71 S(CTSF_COUNT, "COUNT", N_("Count"), CTF_COUNT, CTFA_ALL) \
72 S(CTSF_ECOUNT, "ECOUNT", N_("Adjusted Count"), CTF_COUNT, CTFA_ALL) \
73 S(CTSF_ROWPCT_COUNT, "ROWPCT.COUNT", N_("Row %"), CTF_PERCENT, CTFA_ALL) \
74 S(CTSF_COLPCT_COUNT, "COLPCT.COUNT", N_("Column %"), CTF_PERCENT, CTFA_ALL) \
75 S(CTSF_TABLEPCT_COUNT, "TABLEPCT.COUNT", N_("Table %"), CTF_PERCENT, CTFA_ALL) \
76 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT.COUNT", N_("Subtable %"), CTF_PERCENT, CTFA_ALL) \
77 S(CTSF_LAYERPCT_COUNT, "LAYERPCT.COUNT", N_("Layer %"), CTF_PERCENT, CTFA_ALL) \
78 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT.COUNT", N_("Layer Row %"), CTF_PERCENT, CTFA_ALL) \
79 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT.COUNT", N_("Layer Column %"), CTF_PERCENT, CTFA_ALL) \
80 S(CTSF_ROWPCT_VALIDN, "ROWPCT.VALIDN", N_("Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
81 S(CTSF_COLPCT_VALIDN, "COLPCT.VALIDN", N_("Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
82 S(CTSF_TABLEPCT_VALIDN, "TABLEPCT.VALIDN", N_("Table Valid N %"), CTF_PERCENT, CTFA_ALL) \
83 S(CTSF_SUBTABLEPCT_VALIDN, "SUBTABLEPCT.VALIDN", N_("Subtable Valid N %"), CTF_PERCENT, CTFA_ALL) \
84 S(CTSF_LAYERPCT_VALIDN, "LAYERPCT.VALIDN", N_("Layer Valid N %"), CTF_PERCENT, CTFA_ALL) \
85 S(CTSF_LAYERROWPCT_VALIDN, "LAYERROWPCT.VALIDN", N_("Layer Row Valid N %"), CTF_PERCENT, CTFA_ALL) \
86 S(CTSF_LAYERCOLPCT_VALIDN, "LAYERCOLPCT.VALIDN", N_("Layer Column Valid N %"), CTF_PERCENT, CTFA_ALL) \
87 S(CTSF_ROWPCT_TOTALN, "ROWPCT.TOTALN", N_("Row Total N %"), CTF_PERCENT, CTFA_ALL) \
88 S(CTSF_COLPCT_TOTALN, "COLPCT.TOTALN", N_("Column Total N %"), CTF_PERCENT, CTFA_ALL) \
89 S(CTSF_TABLEPCT_TOTALN, "TABLEPCT.TOTALN", N_("Table Total N %"), CTF_PERCENT, CTFA_ALL) \
90 S(CTSF_SUBTABLEPCT_TOTALN, "SUBTABLEPCT.TOTALN", N_("Subtable Total N %"), CTF_PERCENT, CTFA_ALL) \
91 S(CTSF_LAYERPCT_TOTALN, "LAYERPCT.TOTALN", N_("Layer Total N %"), CTF_PERCENT, CTFA_ALL) \
92 S(CTSF_LAYERROWPCT_TOTALN, "LAYERROWPCT.TOTALN", N_("Layer Row Total N %"), CTF_PERCENT, CTFA_ALL) \
93 S(CTSF_LAYERCOLPCT_TOTALN, "LAYERCOLPCT.TOTALN", N_("Layer Column Total N %"), CTF_PERCENT, CTFA_ALL) \
95 /* Scale variables, totals, and subtotals. */ \
96 S(CTSF_MAXIMUM, "MAXIMUM", N_("Maximum"), CTF_GENERAL, CTFA_SCALE) \
97 S(CTSF_MEAN, "MEAN", N_("Mean"), CTF_GENERAL, CTFA_SCALE) \
98 S(CTSF_MEDIAN, "MEDIAN", N_("Median"), CTF_GENERAL, CTFA_SCALE) \
99 S(CTSF_MINIMUM, "MINIMUM", N_("Minimum"), CTF_GENERAL, CTFA_SCALE) \
100 S(CTSF_MISSING, "MISSING", N_("Missing"), CTF_GENERAL, CTFA_SCALE) \
101 S(CTSF_MODE, "MODE", N_("Mode"), CTF_GENERAL, CTFA_SCALE) \
102 S(CTSF_PTILE, "PTILE", N_("Percentile"), CTF_GENERAL, CTFA_SCALE) \
103 S(CTSF_RANGE, "RANGE", N_("Range"), CTF_GENERAL, CTFA_SCALE) \
104 S(CTSF_SEMEAN, "SEMEAN", N_("Std Error of Mean"), CTF_GENERAL, CTFA_SCALE) \
105 S(CTSF_STDDEV, "STDDEV", N_("Std Deviation"), CTF_GENERAL, CTFA_SCALE) \
106 S(CTSF_SUM, "SUM", N_("Sum"), CTF_GENERAL, CTFA_SCALE) \
107 S(CSTF_TOTALN, "TOTALN", N_("Total N"), CTF_COUNT, CTFA_SCALE) \
108 S(CTSF_ETOTALN, "ETOTALN", N_("Adjusted Total N"), CTF_COUNT, CTFA_SCALE) \
109 S(CTSF_VALIDN, "VALIDN", N_("Valid N"), CTF_COUNT, CTFA_SCALE) \
110 S(CTSF_EVALIDN, "EVALIDN", N_("Adjusted Valid N"), CTF_COUNT, CTFA_SCALE) \
111 S(CTSF_VARIANCE, "VARIANCE", N_("Variance"), CTF_GENERAL, CTFA_SCALE) \
112 S(CTSF_ROWPCT_SUM, "ROWPCT.SUM", N_("Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
113 S(CTSF_COLPCT_SUM, "COLPCT.SUM", N_("Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
114 S(CTSF_TABLEPCT_SUM, "TABLEPCT.SUM", N_("Table Sum %"), CTF_PERCENT, CTFA_SCALE) \
115 S(CTSF_SUBTABLEPCT_SUM, "SUBTABLEPCT.SUM", N_("Subtable Sum %"), CTF_PERCENT, CTFA_SCALE) \
116 S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
117 S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
118 S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
120 #if 0 /* Multiple response sets not yet implemented. */
121 S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
122 S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
123 S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
124 S(CTSF_TABLEPCT_RESPONSES, "TABLEPCT.RESPONSES", N_("Table Responses %"), CTF_PERCENT, CTFA_MRSETS) \
125 S(CTSF_SUBTABLEPCT_RESPONSES, "SUBTABLEPCT.RESPONSES", N_("Subtable Responses %"), CTF_PERCENT, CTFA_MRSETS) \
126 S(CTSF_LAYERPCT_RESPONSES, "LAYERPCT.RESPONSES", N_("Layer Responses %"), CTF_PERCENT, CTFA_MRSETS) \
127 S(CTSF_LAYERROWPCT_RESPONSES, "LAYERROWPCT.RESPONSES", N_("Layer Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
128 S(CTSF_LAYERCOLPCT_RESPONSES, "LAYERCOLPCT.RESPONSES", N_("Layer Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
129 S(CTSF_ROWPCT_RESPONSES_COUNT, "ROWPCT.RESPONSES.COUNT", N_("Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
130 S(CTSF_COLPCT_RESPONSES_COUNT, "COLPCT.RESPONSES.COUNT", N_("Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
131 S(CTSF_TABLEPCT_RESPONSES_COUNT, "TABLEPCT.RESPONSES.COUNT", N_("Table Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
132 S(CTSF_SUBTABLEPCT_RESPONSES_COUNT, "SUBTABLEPCT.RESPONSES.COUNT", N_("Subtable Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
133 S(CTSF_LAYERPCT_RESPONSES_COUNT, "LAYERPCT.RESPONSES.COUNT", N_("Layer Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
134 S(CTSF_LAYERROWPCT_RESPONSES_COUNT, "LAYERROWPCT.RESPONSES.COUNT", N_("Layer Row Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
135 S(CTSF_LAYERCOLPCT_RESPONSES_COUNT, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Responses % (Base: Count)"), CTF_PERCENT, CTFA_MRSETS) \
136 S(CTSF_ROWPCT_COUNT_RESPONSES, "ROWPCT.COUNT.RESPONSES", N_("Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
137 S(CTSF_COLPCT_COUNT_RESPONSES, "COLPCT.COUNT.RESPONSES", N_("Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
138 S(CTSF_TABLEPCT_COUNT_RESPONSES, "TABLEPCT.COUNT.RESPONSES", N_("Table Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
139 S(CTSF_SUBTABLEPCT_COUNT_RESPONSES, "SUBTABLEPCT.COUNT.RESPONSES", N_("Subtable Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
140 S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
141 S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
142 S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
145 enum ctables_summary_function
147 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM,
153 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1
154 N_CTSF_FUNCTIONS = SUMMARIES
158 static bool ctables_summary_function_is_count (enum ctables_summary_function);
160 enum ctables_domain_type
162 /* Within a section, where stacked variables divide one section from
164 CTDT_TABLE, /* All layers of a whole section. */
165 CTDT_LAYER, /* One layer within a section. */
166 CTDT_LAYERROW, /* Row in one layer within a section. */
167 CTDT_LAYERCOL, /* Column in one layer within a section. */
169 /* Within a subtable, where a subtable pairs an innermost row variable with
170 an innermost column variable within a single layer. */
171 CTDT_SUBTABLE, /* Whole subtable. */
172 CTDT_ROW, /* Row within a subtable. */
173 CTDT_COL, /* Column within a subtable. */
177 struct ctables_domain
179 struct hmap_node node;
181 const struct ctables_cell *example;
183 double d_valid; /* Dictionary weight. */
186 double e_valid; /* Effective weight */
191 enum ctables_summary_variant
200 /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
201 all the axes (except the scalar variable, if any). */
202 struct hmap_node node;
204 /* The domains that contain this cell. */
205 uint32_t omit_domains;
206 struct ctables_domain *domains[N_CTDTS];
211 enum ctables_summary_variant sv;
213 struct ctables_cell_axis
215 struct ctables_cell_value
217 const struct ctables_category *category;
225 union ctables_summary *summaries;
232 const struct dictionary *dict;
233 struct pivot_table_look *look;
235 /* CTABLES has a number of extra formats that we implement via custom
236 currency specifications on an alternate fmt_settings. */
237 #define CTEF_NEGPAREN FMT_CCA
238 #define CTEF_NEQUAL FMT_CCB
239 #define CTEF_PAREN FMT_CCC
240 #define CTEF_PCTPAREN FMT_CCD
241 struct fmt_settings ctables_formats;
243 /* If this is NULL, zeros are displayed using the normal print format.
244 Otherwise, this string is displayed. */
247 /* If this is NULL, missing values are displayed using the normal print
248 format. Otherwise, this string is displayed. */
251 /* Indexed by variable dictionary index. */
252 enum ctables_vlabel *vlabels;
254 struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
256 bool mrsets_count_duplicates; /* MRSETS. */
257 bool smissing_listwise; /* SMISSING. */
258 struct variable *e_weight; /* WEIGHT. */
259 int hide_threshold; /* HIDESMALLCOUNTS. */
261 struct ctables_table **tables;
265 static struct ctables_postcompute *ctables_find_postcompute (struct ctables *,
268 struct ctables_postcompute
270 struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
271 char *name; /* Name, without leading &. */
273 struct msg_location *location; /* Location of definition. */
274 struct ctables_pcexpr *expr;
276 struct ctables_summary_spec_set *specs;
277 bool hide_source_cats;
280 struct ctables_pcexpr
290 enum ctables_postcompute_op
293 CTPO_CONSTANT, /* 5 */
294 CTPO_CAT_NUMBER, /* [5] */
295 CTPO_CAT_STRING, /* ["STRING"] */
296 CTPO_CAT_RANGE, /* [LO THRU 5] */
297 CTPO_CAT_MISSING, /* MISSING */
298 CTPO_CAT_OTHERNM, /* OTHERNM */
299 CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
300 CTPO_CAT_TOTAL, /* TOTAL */
314 /* CTPO_CAT_NUMBER. */
317 /* CTPO_CAT_STRING, in dictionary encoding. */
318 struct substring string;
320 /* CTPO_CAT_RANGE. */
323 /* CTPO_CAT_SUBTOTAL. */
324 size_t subtotal_index;
326 /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
327 One element: CTPO_NEG. */
328 struct ctables_pcexpr *subs[2];
331 /* Source location. */
332 struct msg_location *location;
335 static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
336 static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
337 enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
338 struct ctables_pcexpr *sub1);
340 struct ctables_summary_spec_set
342 struct ctables_summary_spec *specs;
346 /* The variable to which the summary specs are applied. */
347 struct variable *var;
349 /* Whether the variable to which the summary specs are applied is a scale
350 variable for the purpose of summarization.
352 (VALIDN and TOTALN act differently for summarizing scale and categorical
356 /* If any of these optional additional scale variables are missing, then
357 treat 'var' as if it's missing too. This is for implementing
358 SMISSING=LISTWISE. */
359 struct variable **listwise_vars;
360 size_t n_listwise_vars;
363 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
364 const struct ctables_summary_spec_set *);
365 static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
367 /* A nested sequence of variables, e.g. a > b > c. */
370 struct variable **vars;
373 size_t *domains[N_CTDTS];
374 size_t n_domains[N_CTDTS];
377 struct ctables_summary_spec_set specs[N_CSVS];
380 /* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
383 struct ctables_nest *nests;
389 struct hmap_node node;
394 struct ctables_occurrence
396 struct hmap_node node;
400 struct ctables_section
402 struct ctables_table *table;
403 struct ctables_nest *nests[PIVOT_N_AXES];
404 struct hmap *occurrences[PIVOT_N_AXES];
405 struct hmap cells; /* Contains "struct ctable_cell"s. */
406 struct hmap domains[N_CTDTS]; /* Contains "struct ctable_domain"s. */
411 struct ctables *ctables;
412 struct ctables_axis *axes[PIVOT_N_AXES];
413 struct ctables_stack stacks[PIVOT_N_AXES];
414 struct ctables_section *sections;
416 enum pivot_axis_type summary_axis;
417 struct ctables_summary_spec_set summary_specs;
419 const struct variable *clabels_example;
420 struct hmap clabels_values_map;
421 struct ctables_value **clabels_values;
422 size_t n_clabels_values;
424 enum pivot_axis_type slabels_axis;
425 bool slabels_visible;
427 /* The innermost category labels for axis 'a' appear on axis label_axis[a].
429 Most commonly, label_axis[a] == a, and in particular we always have
430 label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
432 If ROWLABELS or COLLABELS is specified, then one of
433 label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
434 opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
436 enum pivot_axis_type label_axis[PIVOT_N_AXES];
437 enum pivot_axis_type clabels_from_axis;
439 /* Indexed by variable dictionary index. */
440 struct ctables_categories **categories;
449 struct ctables_chisq *chisq;
450 struct ctables_pairwise *pairwise;
453 struct ctables_categories
456 struct ctables_category *cats;
461 struct ctables_category
463 enum ctables_category_type
465 /* Explicit category lists. */
468 CCT_NRANGE, /* Numerical range. */
469 CCT_SRANGE, /* String range. */
474 /* Totals and subtotals. */
478 /* Implicit category lists. */
483 /* For contributing to TOTALN. */
484 CCT_EXCLUDED_MISSING,
488 struct ctables_category *subtotal;
494 double number; /* CCT_NUMBER. */
495 struct substring string; /* CCT_STRING, in dictionary encoding. */
496 double nrange[2]; /* CCT_NRANGE. */
497 struct substring srange[2]; /* CCT_SRANGE. */
501 char *total_label; /* CCT_SUBTOTAL, CCT_TOTAL. */
502 bool hide_subcategories; /* CCT_SUBTOTAL. */
505 const struct ctables_postcompute *pc; /* CCT_POSTCOMPUTE. */
507 /* CCT_VALUE, CCT_LABEL, CCT_FUNCTION. */
510 bool include_missing;
514 enum ctables_summary_function sort_function;
515 struct variable *sort_var;
520 /* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
521 CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
522 struct msg_location *location;
526 ctables_category_uninit (struct ctables_category *cat)
537 case CCT_POSTCOMPUTE:
541 ss_dealloc (&cat->string);
545 ss_dealloc (&cat->srange[0]);
546 ss_dealloc (&cat->srange[1]);
551 free (cat->total_label);
559 case CCT_EXCLUDED_MISSING:
565 nullable_substring_equal (const struct substring *a,
566 const struct substring *b)
568 return !a->string ? !b->string : b->string && ss_equals (*a, *b);
572 ctables_category_equal (const struct ctables_category *a,
573 const struct ctables_category *b)
575 if (a->type != b->type)
581 return a->number == b->number;
584 return ss_equals (a->string, b->string);
587 return a->nrange[0] == b->nrange[0] && a->nrange[1] == b->nrange[1];
590 return (nullable_substring_equal (&a->srange[0], &b->srange[0])
591 && nullable_substring_equal (&a->srange[1], &b->srange[1]));
597 case CCT_POSTCOMPUTE:
598 return a->pc == b->pc;
602 return !strcmp (a->total_label, b->total_label);
607 return (a->include_missing == b->include_missing
608 && a->sort_ascending == b->sort_ascending
609 && a->sort_function == b->sort_function
610 && a->sort_var == b->sort_var
611 && a->percentile == b->percentile);
613 case CCT_EXCLUDED_MISSING:
621 ctables_categories_unref (struct ctables_categories *c)
626 assert (c->n_refs > 0);
630 for (size_t i = 0; i < c->n_cats; i++)
631 ctables_category_uninit (&c->cats[i]);
637 ctables_categories_equal (const struct ctables_categories *a,
638 const struct ctables_categories *b)
640 if (a->n_cats != b->n_cats || a->show_empty != b->show_empty)
643 for (size_t i = 0; i < a->n_cats; i++)
644 if (!ctables_category_equal (&a->cats[i], &b->cats[i]))
650 /* Chi-square test (SIGTEST). */
658 /* Pairwise comparison test (COMPARETEST). */
659 struct ctables_pairwise
661 enum { PROP, MEAN } type;
664 bool meansvariance_allcats;
666 enum { BONFERRONI = 1, BH } adjust;
690 struct variable *var;
692 struct ctables_summary_spec_set specs[N_CSVS];
696 struct ctables_axis *subs[2];
699 struct msg_location *loc;
702 static void ctables_axis_destroy (struct ctables_axis *);
711 enum ctables_function_availability
713 CTFA_ALL, /* Any variables. */
714 CTFA_SCALE, /* Only scale variables, totals, and subtotals. */
715 CTFA_MRSETS, /* Only multiple-response sets */
718 struct ctables_summary_spec
720 enum ctables_summary_function function;
721 double percentile; /* CTSF_PTILE only. */
724 struct fmt_spec format;
725 bool is_ctables_format; /* Is 'format' one of CTEF_*? */
731 ctables_summary_spec_clone (struct ctables_summary_spec *dst,
732 const struct ctables_summary_spec *src)
735 dst->label = xstrdup (src->label);
739 ctables_summary_spec_uninit (struct ctables_summary_spec *s)
746 ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
747 const struct ctables_summary_spec_set *src)
749 struct ctables_summary_spec *specs = xnmalloc (src->n, sizeof *specs);
750 for (size_t i = 0; i < src->n; i++)
751 ctables_summary_spec_clone (&specs[i], &src->specs[i]);
753 *dst = (struct ctables_summary_spec_set) {
758 .is_scale = src->is_scale,
763 ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
765 for (size_t i = 0; i < set->n; i++)
766 ctables_summary_spec_uninit (&set->specs[i]);
771 parse_col_width (struct lexer *lexer, const char *name, double *width)
773 lex_match (lexer, T_EQUALS);
774 if (lex_match_id (lexer, "DEFAULT"))
776 else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
778 *width = lex_number (lexer);
788 parse_bool (struct lexer *lexer, bool *b)
790 if (lex_match_id (lexer, "NO"))
792 else if (lex_match_id (lexer, "YES"))
796 lex_error_expecting (lexer, "YES", "NO");
802 static enum ctables_function_availability
803 ctables_function_availability (enum ctables_summary_function f)
805 static enum ctables_function_availability availability[] = {
806 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY,
811 return availability[f];
815 ctables_summary_function_is_count (enum ctables_summary_function f)
821 case CTSF_ROWPCT_COUNT:
822 case CTSF_COLPCT_COUNT:
823 case CTSF_TABLEPCT_COUNT:
824 case CTSF_SUBTABLEPCT_COUNT:
825 case CTSF_LAYERPCT_COUNT:
826 case CTSF_LAYERROWPCT_COUNT:
827 case CTSF_LAYERCOLPCT_COUNT:
830 case CTSF_ROWPCT_VALIDN:
831 case CTSF_COLPCT_VALIDN:
832 case CTSF_TABLEPCT_VALIDN:
833 case CTSF_SUBTABLEPCT_VALIDN:
834 case CTSF_LAYERPCT_VALIDN:
835 case CTSF_LAYERROWPCT_VALIDN:
836 case CTSF_LAYERCOLPCT_VALIDN:
837 case CTSF_ROWPCT_TOTALN:
838 case CTSF_COLPCT_TOTALN:
839 case CTSF_TABLEPCT_TOTALN:
840 case CTSF_SUBTABLEPCT_TOTALN:
841 case CTSF_LAYERPCT_TOTALN:
842 case CTSF_LAYERROWPCT_TOTALN:
843 case CTSF_LAYERCOLPCT_TOTALN:
860 case CTSF_ROWPCT_SUM:
861 case CTSF_COLPCT_SUM:
862 case CTSF_TABLEPCT_SUM:
863 case CTSF_SUBTABLEPCT_SUM:
864 case CTSF_LAYERPCT_SUM:
865 case CTSF_LAYERROWPCT_SUM:
866 case CTSF_LAYERCOLPCT_SUM:
874 parse_ctables_summary_function (struct lexer *lexer,
875 enum ctables_summary_function *f)
879 enum ctables_summary_function function;
880 struct substring name;
882 static struct pair names[] = {
883 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \
884 { ENUM, SS_LITERAL_INITIALIZER (NAME) },
887 /* The .COUNT suffix may be omitted. */
888 S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _)
889 S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _)
890 S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _)
891 S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _)
892 S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _)
893 S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _)
894 S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _)
898 if (!lex_force_id (lexer))
901 for (size_t i = 0; i < sizeof names / sizeof *names; i++)
902 if (ss_equals_case (names[i].name, lex_tokss (lexer)))
904 *f = names[i].function;
909 lex_error (lexer, _("Expecting summary function name."));
914 ctables_axis_destroy (struct ctables_axis *axis)
922 for (size_t i = 0; i < N_CSVS; i++)
923 ctables_summary_spec_set_uninit (&axis->specs[i]);
928 ctables_axis_destroy (axis->subs[0]);
929 ctables_axis_destroy (axis->subs[1]);
932 msg_location_destroy (axis->loc);
936 static struct ctables_axis *
937 ctables_axis_new_nonterminal (enum ctables_axis_op op,
938 struct ctables_axis *sub0,
939 struct ctables_axis *sub1,
940 struct lexer *lexer, int start_ofs)
942 struct ctables_axis *axis = xmalloc (sizeof *axis);
943 *axis = (struct ctables_axis) {
945 .subs = { sub0, sub1 },
946 .loc = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
951 struct ctables_axis_parse_ctx
954 struct dictionary *dict;
956 struct ctables_table *t;
959 static struct fmt_spec
960 ctables_summary_default_format (enum ctables_summary_function function,
961 const struct variable *var)
963 static const enum ctables_format default_formats[] = {
964 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
968 switch (default_formats[function])
971 return (struct fmt_spec) { .type = FMT_F, .w = 40 };
974 return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
977 return *var_get_print_format (var);
985 ctables_summary_default_label (enum ctables_summary_function function,
988 static const char *default_labels[] = {
989 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
994 return (function == CTSF_PTILE
995 ? xasprintf (_("Percentile %.2f"), percentile)
996 : xstrdup (gettext (default_labels[function])));
1000 ctables_summary_function_name (enum ctables_summary_function function)
1002 static const char *names[] = {
1003 #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
1007 return names[function];
1011 add_summary_spec (struct ctables_axis *axis,
1012 enum ctables_summary_function function, double percentile,
1013 const char *label, const struct fmt_spec *format,
1014 bool is_ctables_format, const struct msg_location *loc,
1015 enum ctables_summary_variant sv)
1017 if (axis->op == CTAO_VAR)
1019 const char *function_name = ctables_summary_function_name (function);
1020 const char *var_name = var_get_name (axis->var);
1021 switch (ctables_function_availability (function))
1024 msg_at (SE, loc, _("Summary function %s applies only to multiple "
1025 "response sets."), function_name);
1026 msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
1035 _("Summary function %s applies only to scale variables."),
1037 msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
1048 struct ctables_summary_spec_set *set = &axis->specs[sv];
1049 if (set->n >= set->allocated)
1050 set->specs = x2nrealloc (set->specs, &set->allocated,
1051 sizeof *set->specs);
1053 struct ctables_summary_spec *dst = &set->specs[set->n++];
1054 *dst = (struct ctables_summary_spec) {
1055 .function = function,
1056 .percentile = percentile,
1057 .label = xstrdup (label),
1058 .format = (format ? *format
1059 : ctables_summary_default_format (function, axis->var)),
1060 .is_ctables_format = is_ctables_format,
1066 for (size_t i = 0; i < 2; i++)
1067 if (!add_summary_spec (axis->subs[i], function, percentile, label,
1068 format, is_ctables_format, loc, sv))
1074 static struct ctables_axis *ctables_axis_parse_stack (
1075 struct ctables_axis_parse_ctx *);
1078 static struct ctables_axis *
1079 ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx)
1081 if (lex_match (ctx->lexer, T_LPAREN))
1083 struct ctables_axis *sub = ctables_axis_parse_stack (ctx);
1084 if (!sub || !lex_force_match (ctx->lexer, T_RPAREN))
1086 ctables_axis_destroy (sub);
1092 if (!lex_force_id (ctx->lexer))
1095 int start_ofs = lex_ofs (ctx->lexer);
1096 struct variable *var = parse_variable (ctx->lexer, ctx->dict);
1100 struct ctables_axis *axis = xmalloc (sizeof *axis);
1101 *axis = (struct ctables_axis) { .op = CTAO_VAR, .var = var };
1103 /* XXX should figure out default measures by reading data */
1104 axis->scale = (lex_match_phrase (ctx->lexer, "[S]") ? true
1105 : lex_match_phrase (ctx->lexer, "[C]") ? false
1106 : var_get_measure (var) == MEASURE_SCALE);
1107 axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
1108 lex_ofs (ctx->lexer) - 1);
1109 if (axis->scale && var_is_alpha (var))
1111 msg_at (SE, axis->loc, _("Cannot use string variable %s as a scale "
1113 var_get_name (var));
1114 ctables_axis_destroy (axis);
1122 has_digit (const char *s)
1124 return s[strcspn (s, "0123456789")] != '\0';
1128 parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format,
1129 bool *is_ctables_format)
1131 char type[FMT_TYPE_LEN_MAX + 1];
1132 if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d))
1135 if (!strcasecmp (type, "NEGPAREN"))
1136 format->type = CTEF_NEGPAREN;
1137 else if (!strcasecmp (type, "NEQUAL"))
1138 format->type = CTEF_NEQUAL;
1139 else if (!strcasecmp (type, "PAREN"))
1140 format->type = CTEF_PAREN;
1141 else if (!strcasecmp (type, "PCTPAREN"))
1142 format->type = CTEF_PCTPAREN;
1145 *is_ctables_format = false;
1146 return (parse_format_specifier (lexer, format)
1147 && fmt_check_output (format)
1148 && fmt_check_type_compat (format, VAL_NUMERIC));
1153 msg (SE, _("Output format %s requires width 2 or greater."), type);
1156 else if (format->d > format->w - 1)
1158 msg (SE, _("Output format %s requires width greater than decimals."),
1164 *is_ctables_format = true;
1169 static struct ctables_axis *
1170 ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
1172 struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
1173 if (!sub || !lex_match (ctx->lexer, T_LBRACK))
1176 enum ctables_summary_variant sv = CSV_CELL;
1179 int start_ofs = lex_ofs (ctx->lexer);
1181 /* Parse function. */
1182 enum ctables_summary_function function;
1183 if (!parse_ctables_summary_function (ctx->lexer, &function))
1186 /* Parse percentile. */
1187 double percentile = 0;
1188 if (function == CTSF_PTILE)
1190 if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
1192 percentile = lex_number (ctx->lexer);
1193 lex_get (ctx->lexer);
1198 if (lex_is_string (ctx->lexer))
1200 label = ss_xstrdup (lex_tokss (ctx->lexer));
1201 lex_get (ctx->lexer);
1204 label = ctables_summary_default_label (function, percentile);
1207 struct fmt_spec format;
1208 const struct fmt_spec *formatp;
1209 bool is_ctables_format = false;
1210 if (lex_token (ctx->lexer) == T_ID
1211 && has_digit (lex_tokcstr (ctx->lexer)))
1213 if (!parse_ctables_format_specifier (ctx->lexer, &format,
1214 &is_ctables_format))
1224 struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
1225 lex_ofs (ctx->lexer) - 1);
1226 add_summary_spec (sub, function, percentile, label, formatp,
1227 is_ctables_format, loc, sv);
1229 msg_location_destroy (loc);
1231 lex_match (ctx->lexer, T_COMMA);
1232 if (sv == CSV_CELL && lex_match_id (ctx->lexer, "TOTALS"))
1234 if (!lex_force_match (ctx->lexer, T_LBRACK))
1238 else if (lex_match (ctx->lexer, T_RBRACK))
1240 if (sv == CSV_TOTAL && !lex_force_match (ctx->lexer, T_RBRACK))
1247 ctables_axis_destroy (sub);
1251 static const struct ctables_axis *
1252 find_scale (const struct ctables_axis *axis)
1256 else if (axis->op == CTAO_VAR)
1257 return axis->scale ? axis : NULL;
1260 for (size_t i = 0; i < 2; i++)
1262 const struct ctables_axis *scale = find_scale (axis->subs[i]);
1270 static const struct ctables_axis *
1271 find_categorical_summary_spec (const struct ctables_axis *axis)
1275 else if (axis->op == CTAO_VAR)
1276 return !axis->scale && axis->specs[CSV_CELL].n ? axis : NULL;
1279 for (size_t i = 0; i < 2; i++)
1281 const struct ctables_axis *sum
1282 = find_categorical_summary_spec (axis->subs[i]);
1290 static struct ctables_axis *
1291 ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
1293 int start_ofs = lex_ofs (ctx->lexer);
1294 struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
1298 while (lex_match (ctx->lexer, T_GT))
1300 struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
1304 struct ctables_axis *nest = ctables_axis_new_nonterminal (
1305 CTAO_NEST, lhs, rhs, ctx->lexer, start_ofs);
1307 const struct ctables_axis *outer_scale = find_scale (lhs);
1308 const struct ctables_axis *inner_scale = find_scale (rhs);
1309 if (outer_scale && inner_scale)
1311 msg_at (SE, nest->loc, _("Cannot nest scale variables."));
1312 msg_at (SN, outer_scale->loc, _("This is an outer scale variable."));
1313 msg_at (SN, inner_scale->loc, _("This is an inner scale variable."));
1314 ctables_axis_destroy (nest);
1318 const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
1321 msg_at (SE, nest->loc,
1322 _("Summaries may only be requested for categorical variables "
1323 "at the innermost nesting level."));
1324 msg_at (SN, outer_sum->loc,
1325 _("This outer categorical variable has a summary."));
1326 ctables_axis_destroy (nest);
1336 static struct ctables_axis *
1337 ctables_axis_parse_stack (struct ctables_axis_parse_ctx *ctx)
1339 int start_ofs = lex_ofs (ctx->lexer);
1340 struct ctables_axis *lhs = ctables_axis_parse_nest (ctx);
1344 while (lex_match (ctx->lexer, T_PLUS))
1346 struct ctables_axis *rhs = ctables_axis_parse_nest (ctx);
1350 lhs = ctables_axis_new_nonterminal (CTAO_STACK, lhs, rhs,
1351 ctx->lexer, start_ofs);
1358 ctables_axis_parse (struct lexer *lexer, struct dictionary *dict,
1359 struct ctables *ct, struct ctables_table *t,
1360 enum pivot_axis_type a)
1362 if (lex_token (lexer) == T_BY
1363 || lex_token (lexer) == T_SLASH
1364 || lex_token (lexer) == T_ENDCMD)
1367 struct ctables_axis_parse_ctx ctx = {
1373 t->axes[a] = ctables_axis_parse_stack (&ctx);
1374 return t->axes[a] != NULL;
1378 ctables_chisq_destroy (struct ctables_chisq *chisq)
1384 ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
1390 ctables_table_destroy (struct ctables_table *t)
1395 for (size_t i = 0; i < t->n_categories; i++)
1396 ctables_categories_unref (t->categories[i]);
1397 free (t->categories);
1399 ctables_axis_destroy (t->axes[PIVOT_AXIS_COLUMN]);
1400 ctables_axis_destroy (t->axes[PIVOT_AXIS_ROW]);
1401 ctables_axis_destroy (t->axes[PIVOT_AXIS_LAYER]);
1405 ctables_chisq_destroy (t->chisq);
1406 ctables_pairwise_destroy (t->pairwise);
1411 ctables_destroy (struct ctables *ct)
1416 pivot_table_look_unref (ct->look);
1420 for (size_t i = 0; i < ct->n_tables; i++)
1421 ctables_table_destroy (ct->tables[i]);
1426 static struct ctables_category
1427 cct_nrange (double low, double high)
1429 return (struct ctables_category) {
1431 .nrange = { low, high }
1435 static struct ctables_category
1436 cct_srange (struct substring low, struct substring high)
1438 return (struct ctables_category) {
1440 .srange = { low, high }
1445 ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories,
1446 struct ctables_category *cat)
1449 if (lex_match (lexer, T_EQUALS))
1451 if (!lex_force_string (lexer))
1454 total_label = ss_xstrdup (lex_tokss (lexer));
1458 total_label = xstrdup (_("Subtotal"));
1460 *cat = (struct ctables_category) {
1461 .type = CCT_SUBTOTAL,
1462 .hide_subcategories = hide_subcategories,
1463 .total_label = total_label
1468 static struct substring
1469 parse_substring (struct lexer *lexer, struct dictionary *dict)
1471 struct substring s = recode_substring_pool (
1472 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
1473 ss_rtrim (&s, ss_cstr (" "));
1479 ctables_table_parse_explicit_category (struct lexer *lexer,
1480 struct dictionary *dict,
1482 struct ctables_category *cat)
1484 if (lex_match_id (lexer, "OTHERNM"))
1485 *cat = (struct ctables_category) { .type = CCT_OTHERNM };
1486 else if (lex_match_id (lexer, "MISSING"))
1487 *cat = (struct ctables_category) { .type = CCT_MISSING };
1488 else if (lex_match_id (lexer, "SUBTOTAL"))
1489 return ctables_table_parse_subtotal (lexer, false, cat);
1490 else if (lex_match_id (lexer, "HSUBTOTAL"))
1491 return ctables_table_parse_subtotal (lexer, true, cat);
1492 else if (lex_match_id (lexer, "LO"))
1494 if (!lex_force_match_id (lexer, "THRU"))
1496 if (lex_is_string (lexer))
1498 struct substring sr0 = { .string = NULL };
1499 struct substring sr1 = parse_substring (lexer, dict);
1500 *cat = cct_srange (sr0, sr1);
1502 else if (lex_force_num (lexer))
1504 *cat = cct_nrange (-DBL_MAX, lex_number (lexer));
1510 else if (lex_is_number (lexer))
1512 double number = lex_number (lexer);
1514 if (lex_match_id (lexer, "THRU"))
1516 if (lex_match_id (lexer, "HI"))
1517 *cat = cct_nrange (number, DBL_MAX);
1520 if (!lex_force_num (lexer))
1522 *cat = cct_nrange (number, lex_number (lexer));
1527 *cat = (struct ctables_category) {
1532 else if (lex_is_string (lexer))
1534 struct substring s = parse_substring (lexer, dict);
1535 if (lex_match_id (lexer, "THRU"))
1537 if (lex_match_id (lexer, "HI"))
1539 struct substring sr1 = { .string = NULL };
1540 *cat = cct_srange (s, sr1);
1544 if (!lex_force_string (lexer))
1546 struct substring sr1 = parse_substring (lexer, dict);
1547 *cat = cct_srange (s, sr1);
1551 *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
1553 else if (lex_match (lexer, T_AND))
1555 if (!lex_force_id (lexer))
1557 struct ctables_postcompute *pc = ctables_find_postcompute (
1558 ct, lex_tokcstr (lexer));
1561 struct msg_location *loc = lex_get_location (lexer, -1, 0);
1562 msg_at (SE, loc, _("Unknown postcompute &%s."),
1563 lex_tokcstr (lexer));
1564 msg_location_destroy (loc);
1569 *cat = (struct ctables_category) { .type = CCT_POSTCOMPUTE, .pc = pc };
1573 lex_error (lexer, NULL);
1580 static struct ctables_category *
1581 ctables_find_category_for_postcompute (const struct ctables_categories *cats,
1582 const struct ctables_pcexpr *e)
1584 struct ctables_category *best = NULL;
1585 size_t n_subtotals = 0;
1586 for (size_t i = 0; i < cats->n_cats; i++)
1588 struct ctables_category *cat = &cats->cats[i];
1591 case CTPO_CAT_NUMBER:
1592 if (cat->type == CCT_NUMBER && cat->number == e->number)
1596 case CTPO_CAT_STRING:
1597 if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
1601 case CTPO_CAT_RANGE:
1602 if (cat->type == CCT_NRANGE
1603 && cat->nrange[0] == e->range[0]
1604 && cat->nrange[1] == e->range[1])
1608 case CTPO_CAT_MISSING:
1609 if (cat->type == CCT_MISSING)
1613 case CTPO_CAT_OTHERNM:
1614 if (cat->type == CCT_OTHERNM)
1618 case CTPO_CAT_SUBTOTAL:
1619 if (cat->type == CCT_SUBTOTAL)
1622 if (e->subtotal_index == n_subtotals)
1624 else if (e->subtotal_index == 0)
1629 case CTPO_CAT_TOTAL:
1630 if (cat->type == CCT_TOTAL)
1644 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0 && n_subtotals > 1)
1650 ctables_recursive_check_postcompute (const struct ctables_pcexpr *e,
1651 struct ctables_category *pc_cat,
1652 const struct ctables_categories *cats,
1653 const struct msg_location *cats_location)
1657 case CTPO_CAT_NUMBER:
1658 case CTPO_CAT_STRING:
1659 case CTPO_CAT_RANGE:
1660 case CTPO_CAT_MISSING:
1661 case CTPO_CAT_OTHERNM:
1662 case CTPO_CAT_SUBTOTAL:
1663 case CTPO_CAT_TOTAL:
1665 struct ctables_category *cat = ctables_find_category_for_postcompute (
1669 if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
1671 size_t n_subtotals = 0;
1672 for (size_t i = 0; i < cats->n_cats; i++)
1673 n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
1674 if (n_subtotals > 1)
1676 msg_at (SE, cats_location,
1677 ngettext ("These categories include %zu instance "
1678 "of SUBTOTAL or HSUBTOTAL, so references "
1679 "from computed categories must refer to "
1680 "subtotals by position.",
1681 "These categories include %zu instances "
1682 "of SUBTOTAL or HSUBTOTAL, so references "
1683 "from computed categories must refer to "
1684 "subtotals by position.",
1687 msg_at (SN, e->location,
1688 _("This is the reference that lacks a position."));
1693 msg_at (SE, pc_cat->location,
1694 _("Computed category &%s references a category not included "
1695 "in the category list."),
1697 msg_at (SN, e->location, _("This is the missing category."));
1698 msg_at (SN, cats_location,
1699 _("To fix the problem, add the missing category to the "
1700 "list of categories here."));
1703 if (pc_cat->pc->hide_source_cats)
1717 for (size_t i = 0; i < 2; i++)
1718 if (e->subs[i] && !ctables_recursive_check_postcompute (
1719 e->subs[i], pc_cat, cats, cats_location))
1729 parse_category_string (const struct ctables_category *cat,
1730 struct substring s, struct dictionary *dict,
1731 enum fmt_type format, double *n)
1733 printf ("parse %.*s as %s\n", (int) s.length, s.string, fmt_name (format));
1735 char *error = data_in (s, dict_get_encoding (dict), format,
1736 settings_get_fmt_settings (), &v, 0, NULL);
1739 msg_at (SE, cat->location,
1740 _("Failed to parse category specification as format %s: %s."),
1741 fmt_name (format), error);
1751 all_strings (struct variable **vars, size_t n_vars,
1752 const struct ctables_category *cat)
1754 for (size_t j = 0; j < n_vars; j++)
1755 if (var_is_numeric (vars[j]))
1757 msg_at (SE, cat->location,
1758 _("This category specification may be applied only to string "
1759 "variables, but this subcommand tries to apply it to "
1760 "numeric variable %s."),
1761 var_get_name (vars[j]));
1768 ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
1769 struct ctables *ct, struct ctables_table *t)
1771 if (!lex_match_id (lexer, "VARIABLES"))
1773 lex_match (lexer, T_EQUALS);
1775 struct variable **vars;
1777 if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
1780 const struct fmt_spec *common_format = var_get_print_format (vars[0]);
1781 for (size_t i = 1; i < n_vars; i++)
1783 const struct fmt_spec *f = var_get_print_format (vars[i]);
1784 if (f->type != common_format->type)
1786 common_format = NULL;
1792 && (fmt_get_category (common_format->type)
1793 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
1795 struct ctables_categories *c = xmalloc (sizeof *c);
1796 *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
1797 for (size_t i = 0; i < n_vars; i++)
1799 struct ctables_categories **cp
1800 = &t->categories[var_get_dict_index (vars[i])];
1801 ctables_categories_unref (*cp);
1805 size_t allocated_cats = 0;
1806 if (lex_match (lexer, T_LBRACK))
1808 int cats_start_ofs = lex_ofs (lexer);
1811 if (c->n_cats >= allocated_cats)
1812 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
1814 int start_ofs = lex_ofs (lexer);
1815 struct ctables_category *cat = &c->cats[c->n_cats];
1816 if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
1818 cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
1821 lex_match (lexer, T_COMMA);
1823 while (!lex_match (lexer, T_RBRACK));
1825 struct msg_location *cats_location
1826 = lex_ofs_location (lexer, cats_start_ofs, lex_ofs (lexer) - 1);
1827 for (size_t i = 0; i < c->n_cats; i++)
1829 struct ctables_category *cat = &c->cats[i];
1832 case CCT_POSTCOMPUTE:
1833 if (!ctables_recursive_check_postcompute (cat->pc->expr, cat,
1840 for (size_t j = 0; j < n_vars; j++)
1841 if (var_is_alpha (vars[j]))
1843 msg_at (SE, cat->location,
1844 _("This category specification may be applied "
1845 "only to numeric variables, but this "
1846 "subcommand tries to apply it to string "
1848 var_get_name (vars[j]));
1857 if (!parse_category_string (cat, cat->string, dict,
1858 common_format->type, &n))
1861 ss_dealloc (&cat->string);
1863 cat->type = CCT_NUMBER;
1866 else if (!all_strings (vars, n_vars, cat))
1875 if (!cat->srange[0].string)
1877 else if (!parse_category_string (cat, cat->srange[0], dict,
1878 common_format->type, &n[0]))
1881 if (!cat->srange[1].string)
1883 else if (!parse_category_string (cat, cat->srange[1], dict,
1884 common_format->type, &n[1]))
1887 ss_dealloc (&cat->srange[0]);
1888 ss_dealloc (&cat->srange[1]);
1890 cat->type = CCT_NRANGE;
1891 cat->nrange[0] = n[0];
1892 cat->nrange[1] = n[1];
1894 else if (!all_strings (vars, n_vars, cat))
1905 case CCT_EXCLUDED_MISSING:
1911 struct ctables_category cat = {
1913 .include_missing = false,
1914 .sort_ascending = true,
1916 bool show_totals = false;
1917 char *total_label = NULL;
1918 bool totals_before = false;
1919 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
1921 if (!c->n_cats && lex_match_id (lexer, "ORDER"))
1923 lex_match (lexer, T_EQUALS);
1924 if (lex_match_id (lexer, "A"))
1925 cat.sort_ascending = true;
1926 else if (lex_match_id (lexer, "D"))
1927 cat.sort_ascending = false;
1930 lex_error_expecting (lexer, "A", "D");
1934 else if (!c->n_cats && lex_match_id (lexer, "KEY"))
1936 lex_match (lexer, T_EQUALS);
1937 if (lex_match_id (lexer, "VALUE"))
1938 cat.type = CCT_VALUE;
1939 else if (lex_match_id (lexer, "LABEL"))
1940 cat.type = CCT_LABEL;
1943 cat.type = CCT_FUNCTION;
1944 if (!parse_ctables_summary_function (lexer, &cat.sort_function))
1947 if (lex_match (lexer, T_LPAREN))
1949 cat.sort_var = parse_variable (lexer, dict);
1953 if (cat.sort_function == CTSF_PTILE)
1955 lex_match (lexer, T_COMMA);
1956 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
1958 cat.percentile = lex_number (lexer);
1962 if (!lex_force_match (lexer, T_RPAREN))
1965 else if (ctables_function_availability (cat.sort_function)
1968 bool UNUSED b = lex_force_match (lexer, T_LPAREN);
1973 else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
1975 lex_match (lexer, T_EQUALS);
1976 if (lex_match_id (lexer, "INCLUDE"))
1977 cat.include_missing = true;
1978 else if (lex_match_id (lexer, "EXCLUDE"))
1979 cat.include_missing = false;
1982 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
1986 else if (lex_match_id (lexer, "TOTAL"))
1988 lex_match (lexer, T_EQUALS);
1989 if (!parse_bool (lexer, &show_totals))
1992 else if (lex_match_id (lexer, "LABEL"))
1994 lex_match (lexer, T_EQUALS);
1995 if (!lex_force_string (lexer))
1998 total_label = ss_xstrdup (lex_tokss (lexer));
2001 else if (lex_match_id (lexer, "POSITION"))
2003 lex_match (lexer, T_EQUALS);
2004 if (lex_match_id (lexer, "BEFORE"))
2005 totals_before = true;
2006 else if (lex_match_id (lexer, "AFTER"))
2007 totals_before = false;
2010 lex_error_expecting (lexer, "BEFORE", "AFTER");
2014 else if (lex_match_id (lexer, "EMPTY"))
2016 lex_match (lexer, T_EQUALS);
2017 if (lex_match_id (lexer, "INCLUDE"))
2018 c->show_empty = true;
2019 else if (lex_match_id (lexer, "EXCLUDE"))
2020 c->show_empty = false;
2023 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
2030 lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
2031 "TOTAL", "LABEL", "POSITION", "EMPTY");
2033 lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
2040 if (c->n_cats >= allocated_cats)
2041 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2042 c->cats[c->n_cats++] = cat;
2047 if (c->n_cats >= allocated_cats)
2048 c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
2050 struct ctables_category *totals;
2053 insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
2054 totals = &c->cats[0];
2057 totals = &c->cats[c->n_cats];
2060 *totals = (struct ctables_category) {
2062 .total_label = total_label ? total_label : xstrdup (_("Total")),
2066 struct ctables_category *subtotal = NULL;
2067 for (size_t i = totals_before ? 0 : c->n_cats;
2068 totals_before ? i < c->n_cats : i-- > 0;
2069 totals_before ? i++ : 0)
2071 struct ctables_category *cat = &c->cats[i];
2080 cat->subtotal = subtotal;
2083 case CCT_POSTCOMPUTE:
2094 case CCT_EXCLUDED_MISSING:
2103 ctables_nest_uninit (struct ctables_nest *nest)
2110 ctables_stack_uninit (struct ctables_stack *stack)
2114 for (size_t i = 0; i < stack->n; i++)
2115 ctables_nest_uninit (&stack->nests[i]);
2116 free (stack->nests);
2120 static struct ctables_stack
2121 nest_fts (struct ctables_stack s0, struct ctables_stack s1)
2128 struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
2129 for (size_t i = 0; i < s0.n; i++)
2130 for (size_t j = 0; j < s1.n; j++)
2132 const struct ctables_nest *a = &s0.nests[i];
2133 const struct ctables_nest *b = &s1.nests[j];
2135 size_t allocate = a->n + b->n;
2136 struct variable **vars = xnmalloc (allocate, sizeof *vars);
2137 enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
2139 for (size_t k = 0; k < a->n; k++)
2140 vars[n++] = a->vars[k];
2141 for (size_t k = 0; k < b->n; k++)
2142 vars[n++] = b->vars[k];
2143 assert (n == allocate);
2145 const struct ctables_nest *summary_src;
2146 if (!a->specs[CSV_CELL].var)
2148 else if (!b->specs[CSV_CELL].var)
2153 struct ctables_nest *new = &stack.nests[stack.n++];
2154 *new = (struct ctables_nest) {
2156 .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
2157 : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
2161 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2162 ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
2164 ctables_stack_uninit (&s0);
2165 ctables_stack_uninit (&s1);
2169 static struct ctables_stack
2170 stack_fts (struct ctables_stack s0, struct ctables_stack s1)
2172 struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
2173 for (size_t i = 0; i < s0.n; i++)
2174 stack.nests[stack.n++] = s0.nests[i];
2175 for (size_t i = 0; i < s1.n; i++)
2177 stack.nests[stack.n] = s1.nests[i];
2178 stack.nests[stack.n].group_head += s0.n;
2181 assert (stack.n == s0.n + s1.n);
2187 static struct ctables_stack
2188 var_fts (const struct ctables_axis *a)
2190 struct variable **vars = xmalloc (sizeof *vars);
2193 struct ctables_nest *nest = xmalloc (sizeof *nest);
2194 *nest = (struct ctables_nest) {
2197 .scale_idx = a->scale ? 0 : SIZE_MAX,
2199 if (a->specs[CSV_CELL].n || a->scale)
2200 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
2202 ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
2203 nest->specs[sv].var = a->var;
2204 nest->specs[sv].is_scale = a->scale;
2206 return (struct ctables_stack) { .nests = nest, .n = 1 };
2209 static struct ctables_stack
2210 enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
2213 return (struct ctables_stack) { .n = 0 };
2221 return stack_fts (enumerate_fts (axis_type, a->subs[0]),
2222 enumerate_fts (axis_type, a->subs[1]));
2225 /* This should consider any of the scale variables found in the result to
2226 be linked to each other listwise for SMISSING=LISTWISE. */
2227 return nest_fts (enumerate_fts (axis_type, a->subs[0]),
2228 enumerate_fts (axis_type, a->subs[1]));
2234 union ctables_summary
2236 /* COUNT, VALIDN, TOTALN. */
2239 /* MINIMUM, MAXIMUM, RANGE. */
2246 /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
2247 struct moments1 *moments;
2249 /* MEDIAN, MODE, PTILE. */
2252 struct casewriter *writer;
2257 /* XXX multiple response */
2261 ctables_summary_init (union ctables_summary *s,
2262 const struct ctables_summary_spec *ss)
2264 switch (ss->function)
2268 case CTSF_ROWPCT_COUNT:
2269 case CTSF_COLPCT_COUNT:
2270 case CTSF_TABLEPCT_COUNT:
2271 case CTSF_SUBTABLEPCT_COUNT:
2272 case CTSF_LAYERPCT_COUNT:
2273 case CTSF_LAYERROWPCT_COUNT:
2274 case CTSF_LAYERCOLPCT_COUNT:
2275 case CTSF_ROWPCT_VALIDN:
2276 case CTSF_COLPCT_VALIDN:
2277 case CTSF_TABLEPCT_VALIDN:
2278 case CTSF_SUBTABLEPCT_VALIDN:
2279 case CTSF_LAYERPCT_VALIDN:
2280 case CTSF_LAYERROWPCT_VALIDN:
2281 case CTSF_LAYERCOLPCT_VALIDN:
2282 case CTSF_ROWPCT_TOTALN:
2283 case CTSF_COLPCT_TOTALN:
2284 case CTSF_TABLEPCT_TOTALN:
2285 case CTSF_SUBTABLEPCT_TOTALN:
2286 case CTSF_LAYERPCT_TOTALN:
2287 case CTSF_LAYERROWPCT_TOTALN:
2288 case CTSF_LAYERCOLPCT_TOTALN:
2300 s->min = s->max = SYSMIS;
2308 case CTSF_ROWPCT_SUM:
2309 case CTSF_COLPCT_SUM:
2310 case CTSF_TABLEPCT_SUM:
2311 case CTSF_SUBTABLEPCT_SUM:
2312 case CTSF_LAYERPCT_SUM:
2313 case CTSF_LAYERROWPCT_SUM:
2314 case CTSF_LAYERCOLPCT_SUM:
2315 s->moments = moments1_create (MOMENT_VARIANCE);
2322 struct caseproto *proto = caseproto_create ();
2323 proto = caseproto_add_width (proto, 0);
2324 proto = caseproto_add_width (proto, 0);
2326 struct subcase ordering;
2327 subcase_init (&ordering, 0, 0, SC_ASCEND);
2328 s->writer = sort_create_writer (&ordering, proto);
2329 subcase_uninit (&ordering);
2330 caseproto_unref (proto);
2340 ctables_summary_uninit (union ctables_summary *s,
2341 const struct ctables_summary_spec *ss)
2343 switch (ss->function)
2347 case CTSF_ROWPCT_COUNT:
2348 case CTSF_COLPCT_COUNT:
2349 case CTSF_TABLEPCT_COUNT:
2350 case CTSF_SUBTABLEPCT_COUNT:
2351 case CTSF_LAYERPCT_COUNT:
2352 case CTSF_LAYERROWPCT_COUNT:
2353 case CTSF_LAYERCOLPCT_COUNT:
2354 case CTSF_ROWPCT_VALIDN:
2355 case CTSF_COLPCT_VALIDN:
2356 case CTSF_TABLEPCT_VALIDN:
2357 case CTSF_SUBTABLEPCT_VALIDN:
2358 case CTSF_LAYERPCT_VALIDN:
2359 case CTSF_LAYERROWPCT_VALIDN:
2360 case CTSF_LAYERCOLPCT_VALIDN:
2361 case CTSF_ROWPCT_TOTALN:
2362 case CTSF_COLPCT_TOTALN:
2363 case CTSF_TABLEPCT_TOTALN:
2364 case CTSF_SUBTABLEPCT_TOTALN:
2365 case CTSF_LAYERPCT_TOTALN:
2366 case CTSF_LAYERROWPCT_TOTALN:
2367 case CTSF_LAYERCOLPCT_TOTALN:
2385 case CTSF_ROWPCT_SUM:
2386 case CTSF_COLPCT_SUM:
2387 case CTSF_TABLEPCT_SUM:
2388 case CTSF_SUBTABLEPCT_SUM:
2389 case CTSF_LAYERPCT_SUM:
2390 case CTSF_LAYERROWPCT_SUM:
2391 case CTSF_LAYERCOLPCT_SUM:
2392 moments1_destroy (s->moments);
2398 casewriter_destroy (s->writer);
2404 ctables_summary_add (union ctables_summary *s,
2405 const struct ctables_summary_spec *ss,
2406 const struct variable *var, const union value *value,
2407 bool is_scale, bool is_scale_missing,
2408 bool is_missing, bool excluded_missing,
2409 double d_weight, double e_weight)
2411 /* To determine whether a case is included in a given table for a particular
2412 kind of summary, consider the following charts for each variable in the
2413 table. Only if "yes" appears for every variable for the summary is the
2416 Categorical variables: VALIDN COUNT TOTALN
2417 Valid values in included categories yes yes yes
2418 Missing values in included categories --- yes yes
2419 Missing values in excluded categories --- --- yes
2420 Valid values in excluded categories --- --- ---
2422 Scale variables: VALIDN COUNT TOTALN
2423 Valid value yes yes yes
2424 Missing value --- yes yes
2426 Missing values include both user- and system-missing. (The system-missing
2427 value is always in an excluded category.)
2429 switch (ss->function)
2432 case CTSF_ROWPCT_TOTALN:
2433 case CTSF_COLPCT_TOTALN:
2434 case CTSF_TABLEPCT_TOTALN:
2435 case CTSF_SUBTABLEPCT_TOTALN:
2436 case CTSF_LAYERPCT_TOTALN:
2437 case CTSF_LAYERROWPCT_TOTALN:
2438 case CTSF_LAYERCOLPCT_TOTALN:
2439 s->count += d_weight;
2443 case CTSF_ROWPCT_COUNT:
2444 case CTSF_COLPCT_COUNT:
2445 case CTSF_TABLEPCT_COUNT:
2446 case CTSF_SUBTABLEPCT_COUNT:
2447 case CTSF_LAYERPCT_COUNT:
2448 case CTSF_LAYERROWPCT_COUNT:
2449 case CTSF_LAYERCOLPCT_COUNT:
2450 if (is_scale || !excluded_missing)
2451 s->count += d_weight;
2455 case CTSF_ROWPCT_VALIDN:
2456 case CTSF_COLPCT_VALIDN:
2457 case CTSF_TABLEPCT_VALIDN:
2458 case CTSF_SUBTABLEPCT_VALIDN:
2459 case CTSF_LAYERPCT_VALIDN:
2460 case CTSF_LAYERROWPCT_VALIDN:
2461 case CTSF_LAYERCOLPCT_VALIDN:
2465 s->count += d_weight;
2470 s->count += d_weight;
2474 if (is_scale || !excluded_missing)
2475 s->count += e_weight;
2482 s->count += e_weight;
2486 s->count += e_weight;
2492 if (!is_scale_missing)
2494 assert (!var_is_alpha (var)); /* XXX? */
2495 if (s->min == SYSMIS || value->f < s->min)
2497 if (s->max == SYSMIS || value->f > s->max)
2507 case CTSF_ROWPCT_SUM:
2508 case CTSF_COLPCT_SUM:
2509 case CTSF_TABLEPCT_SUM:
2510 case CTSF_SUBTABLEPCT_SUM:
2511 case CTSF_LAYERPCT_SUM:
2512 case CTSF_LAYERROWPCT_SUM:
2513 case CTSF_LAYERCOLPCT_SUM:
2514 if (!is_scale_missing)
2515 moments1_add (s->moments, value->f, e_weight);
2521 if (!is_scale_missing)
2523 s->ovalid += e_weight;
2525 struct ccase *c = case_create (casewriter_get_proto (s->writer));
2526 *case_num_rw_idx (c, 0) = value->f;
2527 *case_num_rw_idx (c, 1) = e_weight;
2528 casewriter_write (s->writer, c);
2534 static enum ctables_domain_type
2535 ctables_function_domain (enum ctables_summary_function function)
2559 case CTSF_COLPCT_COUNT:
2560 case CTSF_COLPCT_SUM:
2561 case CTSF_COLPCT_TOTALN:
2562 case CTSF_COLPCT_VALIDN:
2565 case CTSF_LAYERCOLPCT_COUNT:
2566 case CTSF_LAYERCOLPCT_SUM:
2567 case CTSF_LAYERCOLPCT_TOTALN:
2568 case CTSF_LAYERCOLPCT_VALIDN:
2569 return CTDT_LAYERCOL;
2571 case CTSF_LAYERPCT_COUNT:
2572 case CTSF_LAYERPCT_SUM:
2573 case CTSF_LAYERPCT_TOTALN:
2574 case CTSF_LAYERPCT_VALIDN:
2577 case CTSF_LAYERROWPCT_COUNT:
2578 case CTSF_LAYERROWPCT_SUM:
2579 case CTSF_LAYERROWPCT_TOTALN:
2580 case CTSF_LAYERROWPCT_VALIDN:
2581 return CTDT_LAYERROW;
2583 case CTSF_ROWPCT_COUNT:
2584 case CTSF_ROWPCT_SUM:
2585 case CTSF_ROWPCT_TOTALN:
2586 case CTSF_ROWPCT_VALIDN:
2589 case CTSF_SUBTABLEPCT_COUNT:
2590 case CTSF_SUBTABLEPCT_SUM:
2591 case CTSF_SUBTABLEPCT_TOTALN:
2592 case CTSF_SUBTABLEPCT_VALIDN:
2593 return CTDT_SUBTABLE;
2595 case CTSF_TABLEPCT_COUNT:
2596 case CTSF_TABLEPCT_SUM:
2597 case CTSF_TABLEPCT_TOTALN:
2598 case CTSF_TABLEPCT_VALIDN:
2606 ctables_summary_value (const struct ctables_cell *cell,
2607 union ctables_summary *s,
2608 const struct ctables_summary_spec *ss)
2610 switch (ss->function)
2616 case CTSF_ROWPCT_COUNT:
2617 case CTSF_COLPCT_COUNT:
2618 case CTSF_TABLEPCT_COUNT:
2619 case CTSF_SUBTABLEPCT_COUNT:
2620 case CTSF_LAYERPCT_COUNT:
2621 case CTSF_LAYERROWPCT_COUNT:
2622 case CTSF_LAYERCOLPCT_COUNT:
2624 enum ctables_domain_type d = ctables_function_domain (ss->function);
2625 return (cell->domains[d]->e_count
2626 ? s->count / cell->domains[d]->e_count * 100
2630 case CTSF_ROWPCT_VALIDN:
2631 case CTSF_COLPCT_VALIDN:
2632 case CTSF_TABLEPCT_VALIDN:
2633 case CTSF_SUBTABLEPCT_VALIDN:
2634 case CTSF_LAYERPCT_VALIDN:
2635 case CTSF_LAYERROWPCT_VALIDN:
2636 case CTSF_LAYERCOLPCT_VALIDN:
2638 enum ctables_domain_type d = ctables_function_domain (ss->function);
2639 return (cell->domains[d]->e_valid
2640 ? s->count / cell->domains[d]->e_valid * 100
2644 case CTSF_ROWPCT_TOTALN:
2645 case CTSF_COLPCT_TOTALN:
2646 case CTSF_TABLEPCT_TOTALN:
2647 case CTSF_SUBTABLEPCT_TOTALN:
2648 case CTSF_LAYERPCT_TOTALN:
2649 case CTSF_LAYERROWPCT_TOTALN:
2650 case CTSF_LAYERCOLPCT_TOTALN:
2652 enum ctables_domain_type d = ctables_function_domain (ss->function);
2653 return (cell->domains[d]->e_total
2654 ? s->count / cell->domains[d]->e_total * 100
2678 return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
2683 moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
2689 double weight, variance;
2690 moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
2691 return calc_semean (variance, weight);
2697 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2698 return variance != SYSMIS ? sqrt (variance) : SYSMIS;
2703 double weight, mean;
2704 moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
2705 return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
2711 moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
2715 case CTSF_ROWPCT_SUM:
2716 case CTSF_COLPCT_SUM:
2717 case CTSF_TABLEPCT_SUM:
2718 case CTSF_SUBTABLEPCT_SUM:
2719 case CTSF_LAYERPCT_SUM:
2720 case CTSF_LAYERROWPCT_SUM:
2721 case CTSF_LAYERCOLPCT_SUM:
2728 struct casereader *reader = casewriter_make_reader (s->writer);
2731 struct percentile *ptile = percentile_create (
2732 ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
2733 struct order_stats *os = &ptile->parent;
2734 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2735 s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
2736 statistic_destroy (&ptile->parent.parent);
2743 struct casereader *reader = casewriter_make_reader (s->writer);
2746 struct mode *mode = mode_create ();
2747 struct order_stats *os = &mode->parent;
2748 order_stats_accumulate_idx (&os, 1, reader, 1, 0);
2749 s->ovalue = mode->mode;
2750 statistic_destroy (&mode->parent.parent);
2758 struct ctables_cell_sort_aux
2760 const struct ctables_nest *nest;
2761 enum pivot_axis_type a;
2765 ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
2767 const struct ctables_cell_sort_aux *aux = aux_;
2768 struct ctables_cell *const *ap = a_;
2769 struct ctables_cell *const *bp = b_;
2770 const struct ctables_cell *a = *ap;
2771 const struct ctables_cell *b = *bp;
2773 const struct ctables_nest *nest = aux->nest;
2774 for (size_t i = 0; i < nest->n; i++)
2775 if (i != nest->scale_idx)
2777 const struct variable *var = nest->vars[i];
2778 const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
2779 const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
2780 if (a_cv->category != b_cv->category)
2781 return a_cv->category > b_cv->category ? 1 : -1;
2783 const union value *a_val = &a_cv->value;
2784 const union value *b_val = &b_cv->value;
2785 switch (a_cv->category->type)
2791 case CCT_POSTCOMPUTE:
2792 case CCT_EXCLUDED_MISSING:
2793 /* Must be equal. */
2801 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2809 int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
2811 return a_cv->category->sort_ascending ? cmp : -cmp;
2817 const char *a_label = var_lookup_value_label (var, a_val);
2818 const char *b_label = var_lookup_value_label (var, b_val);
2820 ? (b_label ? strcmp (a_label, b_label) : 1)
2821 : (b_label ? -1 : value_compare_3way (
2822 a_val, b_val, var_get_width (var))));
2824 return a_cv->category->sort_ascending ? cmp : -cmp;
2838 For each ctables_table:
2839 For each combination of row vars:
2840 For each combination of column vars:
2841 For each combination of layer vars:
2843 Make a table of row values:
2844 Sort entries by row values
2845 Assign a 0-based index to each actual value
2846 Construct a dimension
2847 Make a table of column values
2848 Make a table of layer values
2850 Fill the table entry using the indexes from before.
2853 static struct ctables_domain *
2854 ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell,
2855 enum ctables_domain_type domain)
2858 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2860 const struct ctables_nest *nest = s->nests[a];
2861 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2863 size_t v_idx = nest->domains[domain][i];
2864 struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
2865 hash = hash_pointer (cv->category, hash);
2866 if (cv->category->type != CCT_TOTAL
2867 && cv->category->type != CCT_SUBTOTAL
2868 && cv->category->type != CCT_POSTCOMPUTE)
2869 hash = value_hash (&cv->value,
2870 var_get_width (nest->vars[v_idx]), hash);
2874 struct ctables_domain *d;
2875 HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain])
2877 const struct ctables_cell *df = d->example;
2878 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
2880 const struct ctables_nest *nest = s->nests[a];
2881 for (size_t i = 0; i < nest->n_domains[domain]; i++)
2883 size_t v_idx = nest->domains[domain][i];
2884 struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
2885 struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
2886 if (cv1->category != cv2->category
2887 || (cv1->category->type != CCT_TOTAL
2888 && cv1->category->type != CCT_SUBTOTAL
2889 && cv1->category->type != CCT_POSTCOMPUTE
2890 && !value_equal (&cv1->value, &cv2->value,
2891 var_get_width (nest->vars[v_idx]))))
2900 d = xmalloc (sizeof *d);
2901 *d = (struct ctables_domain) { .example = cell };
2902 hmap_insert (&s->domains[domain], &d->node, hash);
2906 static struct substring
2907 rtrim_value (const union value *v, const struct variable *var)
2909 struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
2910 var_get_width (var));
2911 ss_rtrim (&s, ss_cstr (" "));
2916 in_string_range (const union value *v, const struct variable *var,
2917 const struct substring *srange)
2919 struct substring s = rtrim_value (v, var);
2920 return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
2921 && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
2924 static const struct ctables_category *
2925 ctables_categories_match (const struct ctables_categories *c,
2926 const union value *v, const struct variable *var)
2928 if (var_is_numeric (var) && v->f == SYSMIS)
2931 const struct ctables_category *othernm = NULL;
2932 for (size_t i = c->n_cats; i-- > 0; )
2934 const struct ctables_category *cat = &c->cats[i];
2938 if (cat->number == v->f)
2943 if (ss_equals (cat->string, rtrim_value (v, var)))
2948 if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
2949 && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
2954 if (in_string_range (v, var, cat->srange))
2959 if (var_is_value_missing (var, v))
2963 case CCT_POSTCOMPUTE:
2978 return (cat->include_missing || !var_is_value_missing (var, v) ? cat
2981 case CCT_EXCLUDED_MISSING:
2986 return var_is_value_missing (var, v) ? NULL : othernm;
2989 static const struct ctables_category *
2990 ctables_categories_total (const struct ctables_categories *c)
2992 const struct ctables_category *first = &c->cats[0];
2993 const struct ctables_category *last = &c->cats[c->n_cats - 1];
2994 return (first->type == CCT_TOTAL ? first
2995 : last->type == CCT_TOTAL ? last
2999 static struct ctables_cell *
3000 ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
3001 const struct ctables_category *cats[PIVOT_N_AXES][10])
3004 enum ctables_summary_variant sv = CSV_CELL;
3005 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3007 const struct ctables_nest *nest = s->nests[a];
3008 for (size_t i = 0; i < nest->n; i++)
3009 if (i != nest->scale_idx)
3011 hash = hash_pointer (cats[a][i], hash);
3012 if (cats[a][i]->type != CCT_TOTAL
3013 && cats[a][i]->type != CCT_SUBTOTAL
3014 && cats[a][i]->type != CCT_POSTCOMPUTE)
3015 hash = value_hash (case_data (c, nest->vars[i]),
3016 var_get_width (nest->vars[i]), hash);
3022 struct ctables_cell *cell;
3023 HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
3025 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3027 const struct ctables_nest *nest = s->nests[a];
3028 for (size_t i = 0; i < nest->n; i++)
3029 if (i != nest->scale_idx
3030 && (cats[a][i] != cell->axes[a].cvs[i].category
3031 || (cats[a][i]->type != CCT_TOTAL
3032 && cats[a][i]->type != CCT_SUBTOTAL
3033 && cats[a][i]->type != CCT_POSTCOMPUTE
3034 && !value_equal (case_data (c, nest->vars[i]),
3035 &cell->axes[a].cvs[i].value,
3036 var_get_width (nest->vars[i])))))
3045 cell = xmalloc (sizeof *cell);
3048 cell->omit_domains = 0;
3049 cell->postcompute = false;
3050 //struct string name = DS_EMPTY_INITIALIZER;
3051 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3053 const struct ctables_nest *nest = s->nests[a];
3054 cell->axes[a].cvs = (nest->n
3055 ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
3057 for (size_t i = 0; i < nest->n; i++)
3059 const struct ctables_category *cat = cats[a][i];
3060 const struct variable *var = nest->vars[i];
3061 const union value *value = case_data (c, var);
3062 if (i != nest->scale_idx)
3064 const struct ctables_category *subtotal = cat->subtotal;
3065 if (cat->hide || (subtotal && subtotal->hide_subcategories))
3068 if (cat->type == CCT_TOTAL
3069 || cat->type == CCT_SUBTOTAL
3070 || cat->type == CCT_POSTCOMPUTE)
3072 /* XXX these should be more encompassing I think.*/
3076 case PIVOT_AXIS_COLUMN:
3077 cell->omit_domains |= ((1u << CTDT_TABLE) |
3078 (1u << CTDT_LAYER) |
3079 (1u << CTDT_LAYERCOL) |
3080 (1u << CTDT_SUBTABLE) |
3083 case PIVOT_AXIS_ROW:
3084 cell->omit_domains |= ((1u << CTDT_TABLE) |
3085 (1u << CTDT_LAYER) |
3086 (1u << CTDT_LAYERROW) |
3087 (1u << CTDT_SUBTABLE) |
3090 case PIVOT_AXIS_LAYER:
3091 cell->omit_domains |= ((1u << CTDT_TABLE) |
3092 (1u << CTDT_LAYER));
3096 if (cat->type == CCT_POSTCOMPUTE)
3097 cell->postcompute = true;
3100 cell->axes[a].cvs[i].category = cat;
3101 value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
3104 if (i != nest->scale_idx)
3106 if (!ds_is_empty (&name))
3107 ds_put_cstr (&name, ", ");
3108 char *value_s = data_out (value, var_get_encoding (var),
3109 var_get_print_format (var),
3110 settings_get_fmt_settings ());
3111 if (cat->type == CCT_TOTAL
3112 || cat->type == CCT_SUBTOTAL
3113 || cat->type == CCT_POSTCOMPUTE)
3114 ds_put_format (&name, "%s=total", var_get_name (var));
3116 ds_put_format (&name, "%s=%s", var_get_name (var),
3117 value_s + strspn (value_s, " "));
3123 //cell->name = ds_steal_cstr (&name);
3125 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3126 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3127 cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
3128 for (size_t i = 0; i < specs->n; i++)
3129 ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
3130 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3131 cell->domains[dt] = ctables_domain_insert (s, cell, dt);
3132 hmap_insert (&s->cells, &cell->node, hash);
3137 is_scale_missing (const struct ctables_summary_spec_set *specs,
3138 const struct ccase *c)
3140 if (!specs->is_scale)
3143 if (var_is_num_missing (specs->var, case_num (c, specs->var)))
3146 for (size_t i = 0; i < specs->n_listwise_vars; i++)
3148 const struct variable *var = specs->listwise_vars[i];
3149 if (var_is_num_missing (var, case_num (c, var)))
3157 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
3158 const struct ctables_category *cats[PIVOT_N_AXES][10],
3159 bool is_missing, bool excluded_missing,
3160 double d_weight, double e_weight)
3162 struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
3163 const struct ctables_nest *ss = s->nests[s->table->summary_axis];
3165 const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
3167 bool scale_missing = is_scale_missing (specs, c);
3168 for (size_t i = 0; i < specs->n; i++)
3169 ctables_summary_add (&cell->summaries[i], &specs->specs[i],
3170 specs->var, case_data (c, specs->var), specs->is_scale,
3171 scale_missing, is_missing, excluded_missing,
3172 d_weight, e_weight);
3173 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
3174 if (!(cell->omit_domains && (1u << dt)))
3176 struct ctables_domain *d = cell->domains[dt];
3177 d->d_total += d_weight;
3178 d->e_total += e_weight;
3179 if (!excluded_missing)
3181 d->d_count += d_weight;
3182 d->e_count += e_weight;
3186 d->d_valid += d_weight;
3187 d->e_valid += e_weight;
3193 recurse_totals (struct ctables_section *s, const struct ccase *c,
3194 const struct ctables_category *cats[PIVOT_N_AXES][10],
3195 bool is_missing, bool excluded_missing,
3196 double d_weight, double e_weight,
3197 enum pivot_axis_type start_axis, size_t start_nest)
3199 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3201 const struct ctables_nest *nest = s->nests[a];
3202 for (size_t i = start_nest; i < nest->n; i++)
3204 if (i == nest->scale_idx)
3207 const struct variable *var = nest->vars[i];
3209 const struct ctables_category *total = ctables_categories_total (
3210 s->table->categories[var_get_dict_index (var)]);
3213 const struct ctables_category *save = cats[a][i];
3215 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3216 d_weight, e_weight);
3217 recurse_totals (s, c, cats, is_missing, excluded_missing,
3218 d_weight, e_weight, a, i + 1);
3227 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
3228 const struct ctables_category *cats[PIVOT_N_AXES][10],
3229 bool is_missing, bool excluded_missing,
3230 double d_weight, double e_weight,
3231 enum pivot_axis_type start_axis, size_t start_nest)
3233 for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
3235 const struct ctables_nest *nest = s->nests[a];
3236 for (size_t i = start_nest; i < nest->n; i++)
3238 if (i == nest->scale_idx)
3241 const struct ctables_category *save = cats[a][i];
3244 cats[a][i] = save->subtotal;
3245 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3246 d_weight, e_weight);
3247 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3248 d_weight, e_weight, a, i + 1);
3257 ctables_add_occurrence (const struct variable *var,
3258 const union value *value,
3259 struct hmap *occurrences)
3261 int width = var_get_width (var);
3262 unsigned int hash = value_hash (value, width, 0);
3264 struct ctables_occurrence *o;
3265 HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
3267 if (value_equal (value, &o->value, width))
3270 o = xmalloc (sizeof *o);
3271 value_clone (&o->value, value, width);
3272 hmap_insert (occurrences, &o->node, hash);
3276 ctables_cell_insert (struct ctables_section *s,
3277 const struct ccase *c,
3278 double d_weight, double e_weight)
3280 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
3282 /* Does at least one categorical variable have a missing value in an included
3283 or excluded category? */
3284 bool is_missing = false;
3286 /* Does at least one categorical variable have a missing value in an excluded
3288 bool excluded_missing = false;
3290 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3292 const struct ctables_nest *nest = s->nests[a];
3293 for (size_t i = 0; i < nest->n; i++)
3295 if (i == nest->scale_idx)
3298 const struct variable *var = nest->vars[i];
3299 const union value *value = case_data (c, var);
3301 bool var_missing = var_is_value_missing (var, value) != 0;
3305 printf ("ctables_cell_insert %s: ", var_get_name (var));
3306 cats[a][i] = ctables_categories_match (
3307 s->table->categories[var_get_dict_index (var)], value, var);
3313 static const struct ctables_category cct_excluded_missing = {
3314 .type = CCT_EXCLUDED_MISSING,
3317 cats[a][i] = &cct_excluded_missing;
3318 excluded_missing = true;
3323 if (!excluded_missing)
3324 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3326 const struct ctables_nest *nest = s->nests[a];
3327 for (size_t i = 0; i < nest->n; i++)
3328 if (i != nest->scale_idx)
3330 const struct variable *var = nest->vars[i];
3331 const union value *value = case_data (c, var);
3332 ctables_add_occurrence (var, value, &s->occurrences[a][i]);
3336 ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
3337 d_weight, e_weight);
3339 //if (!excluded_missing)
3341 recurse_totals (s, c, cats, is_missing, excluded_missing,
3342 d_weight, e_weight, 0, 0);
3343 recurse_subtotals (s, c, cats, is_missing, excluded_missing,
3344 d_weight, e_weight, 0, 0);
3350 const struct ctables_summary_spec_set *set;
3355 merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
3357 const struct ctables_summary_spec *as = &a->set->specs[a->ofs];
3358 const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
3359 if (as->function != bs->function)
3360 return as->function > bs->function ? 1 : -1;
3361 else if (as->percentile != bs->percentile)
3362 return as->percentile < bs->percentile ? 1 : -1;
3363 return strcmp (as->label, bs->label);
3366 static struct pivot_value *
3367 ctables_category_create_label (const struct ctables_category *cat,
3368 const struct variable *var,
3369 const union value *value)
3371 return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
3372 ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
3373 : cat->type == CCT_POSTCOMPUTE && cat->pc->label
3374 ? pivot_value_new_user_text (cat->pc->label, SIZE_MAX)
3375 : pivot_value_new_var_value (var, value));
3378 static struct ctables_value *
3379 ctables_value_find__ (struct ctables_table *t, const union value *value,
3380 int width, unsigned int hash)
3382 struct ctables_value *clv;
3383 HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
3384 hash, &t->clabels_values_map)
3385 if (value_equal (value, &clv->value, width))
3391 ctables_value_insert (struct ctables_table *t, const union value *value,
3394 unsigned int hash = value_hash (value, width, 0);
3395 struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
3398 clv = xmalloc (sizeof *clv);
3399 value_clone (&clv->value, value, width);
3400 hmap_insert (&t->clabels_values_map, &clv->node, hash);
3404 static struct ctables_value *
3405 ctables_value_find (struct ctables_table *t,
3406 const union value *value, int width)
3408 return ctables_value_find__ (t, value, width,
3409 value_hash (value, width, 0));
3413 ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
3414 size_t ix[PIVOT_N_AXES])
3416 if (a < PIVOT_N_AXES)
3418 size_t limit = MAX (t->stacks[a].n, 1);
3419 for (ix[a] = 0; ix[a] < limit; ix[a]++)
3420 ctables_table_add_section (t, a + 1, ix);
3424 struct ctables_section *s = &t->sections[t->n_sections++];
3425 *s = (struct ctables_section) {
3427 .cells = HMAP_INITIALIZER (s->cells),
3429 for (a = 0; a < PIVOT_N_AXES; a++)
3432 struct ctables_nest *nest = &t->stacks[a].nests[ix[a]];
3434 s->occurrences[a] = xnmalloc (nest->n, sizeof *s->occurrences[a]);
3435 for (size_t i = 0; i < nest->n; i++)
3436 hmap_init (&s->occurrences[a][i]);
3438 for (size_t i = 0; i < N_CTDTS; i++)
3439 hmap_init (&s->domains[i]);
3444 ctpo_add (double a, double b)
3450 ctpo_sub (double a, double b)
3456 ctpo_mul (double a, double b)
3462 ctpo_div (double a, double b)
3464 return b ? a / b : SYSMIS;
3468 ctpo_pow (double a, double b)
3470 int save_errno = errno;
3472 double result = pow (a, b);
3480 ctpo_neg (double a, double b UNUSED)
3485 struct ctables_pcexpr_evaluate_ctx
3487 const struct ctables_cell *cell;
3488 const struct ctables_section *section;
3489 const struct ctables_categories *cats;
3490 enum pivot_axis_type pc_a;
3494 static double ctables_pcexpr_evaluate (
3495 const struct ctables_pcexpr_evaluate_ctx *, const struct ctables_pcexpr *);
3498 ctables_pcexpr_evaluate_nonterminal (
3499 const struct ctables_pcexpr_evaluate_ctx *ctx,
3500 const struct ctables_pcexpr *e, size_t n_args,
3501 double evaluate (double, double))
3503 double args[2] = { 0, 0 };
3504 for (size_t i = 0; i < n_args; i++)
3506 args[i] = ctables_pcexpr_evaluate (ctx, e->subs[i]);
3507 if (!isfinite (args[i]) || args[i] == SYSMIS)
3510 return evaluate (args[0], args[1]);
3514 ctables_pcexpr_evaluate_category (const struct ctables_pcexpr_evaluate_ctx *ctx,
3515 const struct ctables_cell_value *pc_cv)
3517 const struct ctables_section *s = ctx->section;
3520 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3522 const struct ctables_nest *nest = s->nests[a];
3523 for (size_t i = 0; i < nest->n; i++)
3524 if (i != nest->scale_idx)
3526 const struct ctables_cell_value *cv
3527 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3528 : &ctx->cell->axes[a].cvs[i]);
3529 hash = hash_pointer (cv->category, hash);
3530 if (cv->category->type != CCT_TOTAL
3531 && cv->category->type != CCT_SUBTOTAL
3532 && cv->category->type != CCT_POSTCOMPUTE)
3533 hash = value_hash (&cv->value,
3534 var_get_width (nest->vars[i]), hash);
3538 struct ctables_cell *tc;
3539 HMAP_FOR_EACH_WITH_HASH (tc, struct ctables_cell, node, hash, &s->cells)
3541 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3543 const struct ctables_nest *nest = s->nests[a];
3544 for (size_t i = 0; i < nest->n; i++)
3545 if (i != nest->scale_idx)
3547 const struct ctables_cell_value *p_cv
3548 = (a == ctx->pc_a && i == ctx->pc_a_idx ? pc_cv
3549 : &ctx->cell->axes[a].cvs[i]);
3550 const struct ctables_cell_value *t_cv = &tc->axes[a].cvs[i];
3551 if (p_cv->category != t_cv->category
3552 || (p_cv->category->type != CCT_TOTAL
3553 && p_cv->category->type != CCT_SUBTOTAL
3554 && p_cv->category->type != CCT_POSTCOMPUTE
3555 && !value_equal (&p_cv->value,
3557 var_get_width (nest->vars[i]))))
3569 const struct ctables_table *t = s->table;
3570 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3571 const struct ctables_summary_spec_set *specs = &specs_nest->specs[tc->sv];
3572 size_t j = 0 /* XXX */;
3573 return ctables_summary_value (tc, &tc->summaries[j], &specs->specs[j]);
3577 ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx,
3578 const struct ctables_pcexpr *e)
3585 case CTPO_CAT_RANGE:
3587 struct ctables_cell_value cv = {
3588 .category = ctables_find_category_for_postcompute (ctx->cats, e)
3590 assert (cv.category != NULL);
3592 struct hmap *occurrences = &ctx->section->occurrences[ctx->pc_a][ctx->pc_a_idx];
3593 const struct ctables_occurrence *o;
3596 const struct variable *var = ctx->section->nests[ctx->pc_a]->vars[ctx->pc_a_idx];
3597 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
3598 if (ctables_categories_match (ctx->cats, &o->value, var) == cv.category)
3600 cv.value = o->value;
3601 sum += ctables_pcexpr_evaluate_category (ctx, &cv);
3606 case CTPO_CAT_NUMBER:
3607 case CTPO_CAT_STRING:
3608 case CTPO_CAT_MISSING:
3609 case CTPO_CAT_OTHERNM:
3610 case CTPO_CAT_SUBTOTAL:
3611 case CTPO_CAT_TOTAL:
3613 struct ctables_cell_value cv = {
3614 .category = ctables_find_category_for_postcompute (ctx->cats, e),
3615 .value = { .f = e->number },
3617 assert (cv.category != NULL);
3618 return ctables_pcexpr_evaluate_category (ctx, &cv);
3622 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_add);
3625 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_sub);
3628 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_mul);
3631 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_div);
3634 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 2, ctpo_pow);
3637 return ctables_pcexpr_evaluate_nonterminal (ctx, e, 1, ctpo_neg);
3644 ctables_cell_calculate_postcompute (const struct ctables_section *s,
3645 const struct ctables_cell *cell)
3647 enum pivot_axis_type pc_a;
3649 const struct ctables_postcompute *pc;
3650 for (pc_a = 0; ; pc_a++)
3652 assert (pc_a < PIVOT_N_AXES);
3653 for (pc_a_idx = 0; pc_a_idx < s->nests[pc_a]->n; pc_a_idx++)
3655 const struct ctables_cell_value *cv = &cell->axes[pc_a].cvs[pc_a_idx];
3656 if (cv->category->type == CCT_POSTCOMPUTE)
3658 pc = cv->category->pc;
3665 const struct variable *var = s->nests[pc_a]->vars[pc_a_idx];
3666 const struct ctables_categories *cats = s->table->categories[
3667 var_get_dict_index (var)];
3668 struct ctables_pcexpr_evaluate_ctx ctx = {
3673 .pc_a_idx = pc_a_idx,
3675 return ctables_pcexpr_evaluate (&ctx, pc->expr);
3679 ctables_table_output (struct ctables *ct, struct ctables_table *t)
3681 struct pivot_table *pt = pivot_table_create__ (
3683 ? pivot_value_new_user_text (t->title, SIZE_MAX)
3684 : pivot_value_new_text (N_("Custom Tables"))),
3687 pivot_table_set_caption (
3688 pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
3690 pivot_table_set_caption (
3691 pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
3693 bool summary_dimension = (t->summary_axis != t->slabels_axis
3694 || (!t->slabels_visible
3695 && t->summary_specs.n > 1));
3696 if (summary_dimension)
3698 struct pivot_dimension *d = pivot_dimension_create (
3699 pt, t->slabels_axis, N_("Statistics"));
3700 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3701 if (!t->slabels_visible)
3702 d->hide_all_labels = true;
3703 for (size_t i = 0; i < specs->n; i++)
3704 pivot_category_create_leaf (
3705 d->root, pivot_value_new_text (specs->specs[i].label));
3708 bool categories_dimension = t->clabels_example != NULL;
3709 if (categories_dimension)
3711 struct pivot_dimension *d = pivot_dimension_create (
3712 pt, t->label_axis[t->clabels_from_axis],
3713 t->clabels_from_axis == PIVOT_AXIS_ROW
3714 ? N_("Row Categories")
3715 : N_("Column Categories"));
3716 const struct variable *var = t->clabels_example;
3717 const struct ctables_categories *c = t->categories[var_get_dict_index (var)];
3718 for (size_t i = 0; i < t->n_clabels_values; i++)
3720 const struct ctables_value *value = t->clabels_values[i];
3721 const struct ctables_category *cat = ctables_categories_match (c, &value->value, var);
3722 assert (cat != NULL);
3723 pivot_category_create_leaf (d->root, ctables_category_create_label (
3724 cat, t->clabels_example, &value->value));
3728 pivot_table_set_look (pt, ct->look);
3729 struct pivot_dimension *d[PIVOT_N_AXES];
3730 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3732 static const char *names[] = {
3733 [PIVOT_AXIS_ROW] = N_("Rows"),
3734 [PIVOT_AXIS_COLUMN] = N_("Columns"),
3735 [PIVOT_AXIS_LAYER] = N_("Layers"),
3737 d[a] = (t->axes[a] || a == t->summary_axis
3738 ? pivot_dimension_create (pt, a, names[a])
3743 assert (t->axes[a]);
3745 for (size_t i = 0; i < t->stacks[a].n; i++)
3747 struct ctables_nest *nest = &t->stacks[a].nests[i];
3748 struct ctables_section **sections = xnmalloc (t->n_sections,
3750 size_t n_sections = 0;
3752 size_t n_total_cells = 0;
3753 size_t max_depth = 0;
3754 for (size_t j = 0; j < t->n_sections; j++)
3755 if (t->sections[j].nests[a] == nest)
3757 struct ctables_section *s = &t->sections[j];
3758 sections[n_sections++] = s;
3759 n_total_cells += s->cells.count;
3761 size_t depth = s->nests[a]->n;
3762 max_depth = MAX (depth, max_depth);
3765 struct ctables_cell **sorted = xnmalloc (n_total_cells,
3767 size_t n_sorted = 0;
3769 for (size_t j = 0; j < n_sections; j++)
3771 struct ctables_section *s = sections[j];
3773 struct ctables_cell *cell;
3774 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3776 sorted[n_sorted++] = cell;
3777 assert (n_sorted <= n_total_cells);
3780 struct ctables_cell_sort_aux aux = { .nest = nest, .a = a };
3781 sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux);
3784 for (size_t j = 0; j < n_sorted; j++)
3786 printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0);
3791 struct ctables_level
3793 enum ctables_level_type
3795 CTL_VAR, /* Variable label for nest->vars[var_idx]. */
3796 CTL_CATEGORY, /* Category for nest->vars[var_idx]. */
3797 CTL_SUMMARY, /* Summary functions. */
3801 enum settings_value_show vlabel; /* CTL_VAR only. */
3804 struct ctables_level *levels = xnmalloc (1 + 2 * max_depth, sizeof *levels);
3805 size_t n_levels = 0;
3806 for (size_t k = 0; k < nest->n; k++)
3808 enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])];
3809 if (vlabel != CTVL_NONE)
3811 levels[n_levels++] = (struct ctables_level) {
3813 .vlabel = (enum settings_value_show) vlabel,
3818 if (nest->scale_idx != k
3819 && (k != nest->n - 1 || t->label_axis[a] == a))
3821 levels[n_levels++] = (struct ctables_level) {
3822 .type = CTL_CATEGORY,
3828 if (!summary_dimension && a == t->slabels_axis)
3830 levels[n_levels++] = (struct ctables_level) {
3831 .type = CTL_SUMMARY,
3832 .var_idx = SIZE_MAX,
3836 /* Pivot categories:
3838 - variable label for nest->vars[0], if vlabel != CTVL_NONE
3839 - category for nest->vars[0], if nest->scale_idx != 0
3840 - variable label for nest->vars[1], if vlabel != CTVL_NONE
3841 - category for nest->vars[1], if nest->scale_idx != 1
3843 - variable label for nest->vars[n - 1], if vlabel != CTVL_NONE
3844 - category for nest->vars[n - 1], if t->label_axis[a] == a && nest->scale_idx != n - 1.
3845 - summary function, if 'a == t->slabels_axis && a ==
3848 Additional dimensions:
3850 - If 'a == t->slabels_axis && a != t->summary_axis', add a summary
3852 - If 't->label_axis[b] == a' for some 'b != a', add a category
3857 struct pivot_category **groups = xnmalloc (1 + 2 * max_depth, sizeof *groups);
3859 for (size_t j = 0; j < n_sorted; j++)
3861 struct ctables_cell *cell = sorted[j];
3862 struct ctables_cell *prev = j > 0 ? sorted[j - 1] : NULL;
3864 size_t n_common = 0;
3867 for (; n_common < n_levels; n_common++)
3869 const struct ctables_level *level = &levels[n_common];
3870 if (level->type == CTL_CATEGORY)
3872 size_t var_idx = level->var_idx;
3873 const struct ctables_category *c = cell->axes[a].cvs[var_idx].category;
3874 if (prev->axes[a].cvs[var_idx].category != c)
3876 else if (c->type != CCT_SUBTOTAL
3877 && c->type != CCT_TOTAL
3878 && c->type != CCT_POSTCOMPUTE
3879 && !value_equal (&prev->axes[a].cvs[var_idx].value,
3880 &cell->axes[a].cvs[var_idx].value,
3881 var_get_type (nest->vars[var_idx])))
3887 for (size_t k = n_common; k < n_levels; k++)
3889 const struct ctables_level *level = &levels[k];
3890 struct pivot_category *parent = k ? groups[k - 1] : d[a]->root;
3891 if (level->type == CTL_SUMMARY)
3893 assert (k == n_levels - 1);
3895 const struct ctables_summary_spec_set *specs = &t->summary_specs;
3896 for (size_t m = 0; m < specs->n; m++)
3898 int leaf = pivot_category_create_leaf (
3899 parent, pivot_value_new_text (specs->specs[m].label));
3906 const struct variable *var = nest->vars[level->var_idx];
3907 struct pivot_value *label;
3908 if (level->type == CTL_VAR)
3910 label = pivot_value_new_variable (var);
3911 label->variable.show = level->vlabel;
3913 else if (level->type == CTL_CATEGORY)
3915 const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx];
3916 label = ctables_category_create_label (cv->category,
3922 if (k == n_levels - 1)
3923 prev_leaf = pivot_category_create_leaf (parent, label);
3925 groups[k] = pivot_category_create_group__ (parent, label);
3929 cell->axes[a].leaf = prev_leaf;
3936 for (size_t i = 0; i < t->n_sections; i++)
3938 struct ctables_section *s = &t->sections[i];
3940 struct ctables_cell *cell;
3941 HMAP_FOR_EACH (cell, struct ctables_cell, node, &s->cells)
3946 const struct ctables_nest *specs_nest = s->nests[t->summary_axis];
3947 const struct ctables_summary_spec_set *specs = &specs_nest->specs[cell->sv];
3948 for (size_t j = 0; j < specs->n; j++)
3951 size_t n_dindexes = 0;
3953 if (summary_dimension)
3954 dindexes[n_dindexes++] = specs->specs[j].axis_idx;
3956 if (categories_dimension)
3958 const struct ctables_nest *clabels_nest = s->nests[t->clabels_from_axis];
3959 const struct variable *var = clabels_nest->vars[clabels_nest->n - 1];
3960 const union value *value = &cell->axes[t->clabels_from_axis].cvs[clabels_nest->n - 1].value;
3961 const struct ctables_value *ctv = ctables_value_find (t, value, var_get_width (var));
3964 dindexes[n_dindexes++] = ctv->leaf;
3967 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
3970 int leaf = cell->axes[a].leaf;
3971 if (a == t->summary_axis && !summary_dimension)
3973 dindexes[n_dindexes++] = leaf;
3976 const struct ctables_summary_spec *ss = &specs->specs[j];
3978 double d = (cell->postcompute
3979 ? ctables_cell_calculate_postcompute (s, cell)
3980 : ctables_summary_value (cell, &cell->summaries[j], ss));
3981 struct pivot_value *value;
3982 if (ct->hide_threshold != 0
3983 && d < ct->hide_threshold
3984 && (cell->postcompute
3986 : ctables_summary_function_is_count (ss->function)))
3988 value = pivot_value_new_user_text_nocopy (
3989 xasprintf ("<%d", ct->hide_threshold));
3991 else if (d == 0 && ct->zero)
3992 value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
3993 else if (d == SYSMIS && ct->missing)
3994 value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
3995 else if (specs->specs[j].is_ctables_format)
3997 char *s = data_out_stretchy (&(union value) { .f = d },
3999 &specs->specs[j].format,
4000 &ct->ctables_formats, NULL);
4001 value = pivot_value_new_user_text_nocopy (s);
4005 value = pivot_value_new_number (d);
4006 value->numeric.format = specs->specs[j].format;
4008 pivot_table_put (pt, dindexes, n_dindexes, value);
4013 pivot_table_submit (pt);
4017 ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a)
4019 enum pivot_axis_type label_pos = t->label_axis[a];
4023 t->clabels_from_axis = a;
4025 const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS";
4026 const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE";
4028 const struct ctables_stack *stack = &t->stacks[a];
4032 const struct ctables_nest *n0 = &stack->nests[0];
4034 const struct variable *v0 = n0->vars[n0->n - 1];
4035 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4036 t->clabels_example = v0;
4038 for (size_t i = 0; i < c0->n_cats; i++)
4039 if (c0->cats[i].type == CCT_FUNCTION)
4041 msg (SE, _("%s=%s is not allowed with sorting based "
4042 "on a summary function."),
4043 subcommand_name, pos_name);
4046 if (n0->n - 1 == n0->scale_idx)
4048 msg (SE, _("%s=%s requires the variables to be moved to be categorical, "
4049 "but %s is a scale variable."),
4050 subcommand_name, pos_name, var_get_name (v0));
4054 for (size_t i = 1; i < stack->n; i++)
4056 const struct ctables_nest *ni = &stack->nests[i];
4058 const struct variable *vi = ni->vars[ni->n - 1];
4059 struct ctables_categories *ci = t->categories[var_get_dict_index (vi)];
4061 if (ni->n - 1 == ni->scale_idx)
4063 msg (SE, _("%s=%s requires the variables to be moved to be "
4064 "categorical, but %s is a scale variable."),
4065 subcommand_name, pos_name, var_get_name (vi));
4068 if (var_get_width (v0) != var_get_width (vi))
4070 msg (SE, _("%s=%s requires the variables to be "
4071 "moved to have the same width, but %s has "
4072 "width %d and %s has width %d."),
4073 subcommand_name, pos_name,
4074 var_get_name (v0), var_get_width (v0),
4075 var_get_name (vi), var_get_width (vi));
4078 if (!val_labs_equal (var_get_value_labels (v0),
4079 var_get_value_labels (vi)))
4081 msg (SE, _("%s=%s requires the variables to be "
4082 "moved to have the same value labels, but %s "
4083 "and %s have different value labels."),
4084 subcommand_name, pos_name,
4085 var_get_name (v0), var_get_name (vi));
4088 if (!ctables_categories_equal (c0, ci))
4090 msg (SE, _("%s=%s requires the variables to be "
4091 "moved to have the same category "
4092 "specifications, but %s and %s have different "
4093 "category specifications."),
4094 subcommand_name, pos_name,
4095 var_get_name (v0), var_get_name (vi));
4104 ctables_prepare_table (struct ctables_table *t)
4106 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4109 t->stacks[a] = enumerate_fts (a, t->axes[a]);
4111 for (size_t j = 0; j < t->stacks[a].n; j++)
4113 struct ctables_nest *nest = &t->stacks[a].nests[j];
4114 for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
4116 nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]);
4117 nest->n_domains[dt] = 0;
4119 for (size_t k = 0; k < nest->n; k++)
4121 if (k == nest->scale_idx)
4130 if (a != PIVOT_AXIS_LAYER)
4137 if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
4138 : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
4139 : a == PIVOT_AXIS_ROW)
4141 if (k == nest->n - 1
4142 || (nest->scale_idx == nest->n - 1
4143 && k == nest->n - 2))
4149 if (a == PIVOT_AXIS_COLUMN)
4154 if (a == PIVOT_AXIS_ROW)
4159 nest->domains[dt][nest->n_domains[dt]++] = k;
4166 struct ctables_nest *nest = xmalloc (sizeof *nest);
4167 *nest = (struct ctables_nest) { .n = 0 };
4168 t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
4171 struct ctables_stack *stack = &t->stacks[t->summary_axis];
4172 for (size_t i = 0; i < stack->n; i++)
4174 struct ctables_nest *nest = &stack->nests[i];
4175 if (!nest->specs[CSV_CELL].n)
4177 struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
4178 specs->specs = xmalloc (sizeof *specs->specs);
4181 enum ctables_summary_function function
4182 = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
4184 *specs->specs = (struct ctables_summary_spec) {
4185 .function = function,
4186 .format = ctables_summary_default_format (function, specs->var),
4187 .label = ctables_summary_default_label (function, 0),
4190 specs->var = nest->vars[0];
4192 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4193 &nest->specs[CSV_CELL]);
4195 else if (!nest->specs[CSV_TOTAL].n)
4196 ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
4197 &nest->specs[CSV_CELL]);
4199 if (t->ctables->smissing_listwise)
4201 struct variable **listwise_vars = NULL;
4203 size_t allocated = 0;
4205 for (size_t j = nest->group_head; j < stack->n; j++)
4207 const struct ctables_nest *other_nest = &stack->nests[j];
4208 if (other_nest->group_head != nest->group_head)
4211 if (nest != other_nest && other_nest->scale_idx < other_nest->n)
4214 listwise_vars = x2nrealloc (listwise_vars, &allocated,
4215 sizeof *listwise_vars);
4216 listwise_vars[n++] = other_nest->vars[other_nest->scale_idx];
4219 for (size_t j = 0; j < N_CSVS; j++)
4221 nest->specs[j].listwise_vars = listwise_vars;
4222 nest->specs[j].n_listwise_vars = n;
4227 struct ctables_summary_spec_set *merged = &t->summary_specs;
4228 struct merge_item *items = xnmalloc (2 * stack->n, sizeof *items);
4230 for (size_t j = 0; j < stack->n; j++)
4232 const struct ctables_nest *nest = &stack->nests[j];
4234 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4235 items[n_left++] = (struct merge_item) { .set = &nest->specs[sv] };
4240 struct merge_item min = items[0];
4241 for (size_t j = 1; j < n_left; j++)
4242 if (merge_item_compare_3way (&items[j], &min) < 0)
4245 if (merged->n >= merged->allocated)
4246 merged->specs = x2nrealloc (merged->specs, &merged->allocated,
4247 sizeof *merged->specs);
4248 merged->specs[merged->n++] = min.set->specs[min.ofs];
4250 for (size_t j = 0; j < n_left; )
4252 if (merge_item_compare_3way (&items[j], &min) == 0)
4254 struct merge_item *item = &items[j];
4255 item->set->specs[item->ofs].axis_idx = merged->n - 1;
4256 if (++item->ofs >= item->set->n)
4258 items[j] = items[--n_left];
4267 for (size_t j = 0; j < merged->n; j++)
4268 printf ("%s\n", ctables_summary_function_name (merged->specs[j].function));
4270 for (size_t j = 0; j < stack->n; j++)
4272 const struct ctables_nest *nest = &stack->nests[j];
4273 for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
4275 const struct ctables_summary_spec_set *specs = &nest->specs[sv];
4276 for (size_t k = 0; k < specs->n; k++)
4277 printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function),
4278 specs->specs[k].axis_idx);
4284 return (ctables_check_label_position (t, PIVOT_AXIS_ROW)
4285 && ctables_check_label_position (t, PIVOT_AXIS_COLUMN));
4289 ctables_insert_clabels_values (struct ctables_table *t, const struct ccase *c,
4290 enum pivot_axis_type a)
4292 struct ctables_stack *stack = &t->stacks[a];
4293 for (size_t i = 0; i < stack->n; i++)
4295 const struct ctables_nest *nest = &stack->nests[i];
4296 const struct variable *var = nest->vars[nest->n - 1];
4297 const union value *value = case_data (c, var);
4299 if (var_is_numeric (var) && value->f == SYSMIS)
4302 if (ctables_categories_match (t->categories [var_get_dict_index (var)],
4304 ctables_value_insert (t, value, var_get_width (var));
4309 compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
4311 const struct ctables_value *const *ap = a_;
4312 const struct ctables_value *const *bp = b_;
4313 const struct ctables_value *a = *ap;
4314 const struct ctables_value *b = *bp;
4315 const int *width = width_;
4316 return value_compare_3way (&a->value, &b->value, *width);
4320 ctables_sort_clabels_values (struct ctables_table *t)
4322 const struct variable *v0 = t->clabels_example;
4323 int width = var_get_width (v0);
4325 struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
4328 const struct val_labs *val_labs = var_get_value_labels (v0);
4329 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4330 vl = val_labs_next (val_labs, vl))
4331 if (ctables_categories_match (c0, &vl->value, v0))
4332 ctables_value_insert (t, &vl->value, width);
4335 size_t n = hmap_count (&t->clabels_values_map);
4336 t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
4338 struct ctables_value *clv;
4340 HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
4341 t->clabels_values[i++] = clv;
4342 t->n_clabels_values = n;
4345 sort (t->clabels_values, n, sizeof *t->clabels_values,
4346 compare_clabels_values_3way, &width);
4348 for (size_t i = 0; i < n; i++)
4349 t->clabels_values[i]->leaf = i;
4353 ctables_add_category_occurrences (const struct variable *var,
4354 struct hmap *occurrences,
4355 const struct ctables_categories *cats)
4357 const struct val_labs *val_labs = var_get_value_labels (var);
4359 for (size_t i = 0; i < cats->n_cats; i++)
4361 const struct ctables_category *c = &cats->cats[i];
4365 ctables_add_occurrence (var, &(const union value) { .f = c->number },
4371 int width = var_get_width (var);
4373 value_init (&value, width);
4374 value_copy_buf_rpad (&value, width,
4375 CHAR_CAST (uint8_t *, c->string.string),
4376 c->string.length, ' ');
4377 ctables_add_occurrence (var, &value, occurrences);
4378 value_destroy (&value, width);
4383 assert (var_is_numeric (var));
4384 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4385 vl = val_labs_next (val_labs, vl))
4386 if (vl->value.f >= c->nrange[0] && vl->value.f <= c->nrange[1])
4387 ctables_add_occurrence (var, &vl->value, occurrences);
4391 assert (var_is_alpha (var));
4392 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4393 vl = val_labs_next (val_labs, vl))
4394 if (in_string_range (&vl->value, var, c->srange))
4395 ctables_add_occurrence (var, &vl->value, occurrences);
4399 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4400 vl = val_labs_next (val_labs, vl))
4401 if (var_is_value_missing (var, &vl->value))
4402 ctables_add_occurrence (var, &vl->value, occurrences);
4406 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4407 vl = val_labs_next (val_labs, vl))
4408 ctables_add_occurrence (var, &vl->value, occurrences);
4411 case CCT_POSTCOMPUTE:
4421 for (const struct val_lab *vl = val_labs_first (val_labs); vl;
4422 vl = val_labs_next (val_labs, vl))
4423 if (c->include_missing || !var_is_value_missing (var, &vl->value))
4424 ctables_add_occurrence (var, &vl->value, occurrences);
4427 case CCT_EXCLUDED_MISSING:
4434 ctables_section_recurse_add_empty_categories (
4435 struct ctables_section *s,
4436 const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c,
4437 enum pivot_axis_type a, size_t a_idx)
4439 if (a >= PIVOT_N_AXES)
4440 ctables_cell_insert__ (s, c, cats);
4441 else if (!s->nests[a] || a_idx >= s->nests[a]->n)
4442 ctables_section_recurse_add_empty_categories (s, cats, c, a + 1, 0);
4445 const struct variable *var = s->nests[a]->vars[a_idx];
4446 const struct ctables_categories *categories = s->table->categories[
4447 var_get_dict_index (var)];
4448 int width = var_get_width (var);
4449 const struct hmap *occurrences = &s->occurrences[a][a_idx];
4450 const struct ctables_occurrence *o;
4451 HMAP_FOR_EACH (o, struct ctables_occurrence, node, occurrences)
4453 union value *value = case_data_rw (c, var);
4454 value_destroy (value, width);
4455 value_clone (value, &o->value, width);
4456 cats[a][a_idx] = ctables_categories_match (categories, value, var);
4457 assert (cats[a][a_idx] != NULL);
4458 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4461 for (size_t i = 0; i < categories->n_cats; i++)
4463 const struct ctables_category *cat = &categories->cats[i];
4464 if (cat->type == CCT_POSTCOMPUTE)
4466 cats[a][a_idx] = cat;
4467 ctables_section_recurse_add_empty_categories (s, cats, c, a, a_idx + 1);
4474 ctables_section_add_empty_categories (struct ctables_section *s)
4476 bool show_empty = false;
4477 for (size_t a = 0; a < PIVOT_N_AXES; a++)
4479 for (size_t k = 0; k < s->nests[a]->n; k++)
4480 if (k != s->nests[a]->scale_idx)
4482 const struct variable *var = s->nests[a]->vars[k];
4483 const struct ctables_categories *cats = s->table->categories[
4484 var_get_dict_index (var)];
4485 if (cats->show_empty)
4488 ctables_add_category_occurrences (var, &s->occurrences[a][k], cats);
4494 const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
4495 struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict));
4496 ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0);
4501 ctables_execute (struct dataset *ds, struct ctables *ct)
4503 for (size_t i = 0; i < ct->n_tables; i++)
4505 struct ctables_table *t = ct->tables[i];
4506 t->sections = xnmalloc (MAX (1, t->stacks[PIVOT_AXIS_ROW].n) *
4507 MAX (1, t->stacks[PIVOT_AXIS_COLUMN].n) *
4508 MAX (1, t->stacks[PIVOT_AXIS_LAYER].n),
4509 sizeof *t->sections);
4510 size_t ix[PIVOT_N_AXES];
4511 ctables_table_add_section (t, 0, ix);
4514 struct casereader *input = proc_open (ds);
4515 bool warn_on_invalid = true;
4516 for (struct ccase *c = casereader_read (input); c;
4517 case_unref (c), c = casereader_read (input))
4519 double d_weight = dict_get_case_weight (dataset_dict (ds), c,
4521 double e_weight = (ct->e_weight
4522 ? var_force_valid_weight (ct->e_weight,
4523 case_num (c, ct->e_weight),
4527 for (size_t i = 0; i < ct->n_tables; i++)
4529 struct ctables_table *t = ct->tables[i];
4531 for (size_t j = 0; j < t->n_sections; j++)
4532 ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
4534 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
4535 if (t->label_axis[a] != a)
4536 ctables_insert_clabels_values (t, c, a);
4539 casereader_destroy (input);
4541 for (size_t i = 0; i < ct->n_tables; i++)
4543 struct ctables_table *t = ct->tables[i];
4545 if (t->clabels_example)
4546 ctables_sort_clabels_values (t);
4548 for (size_t j = 0; j < t->n_sections; j++)
4549 ctables_section_add_empty_categories (&t->sections[j]);
4551 ctables_table_output (ct, ct->tables[i]);
4553 return proc_commit (ds);
4558 typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
4559 struct dictionary *);
4562 ctables_pcexpr_destroy (struct ctables_pcexpr *e)
4568 case CTPO_CAT_STRING:
4569 ss_dealloc (&e->string);
4578 for (size_t i = 0; i < 2; i++)
4579 ctables_pcexpr_destroy (e->subs[i]);
4583 case CTPO_CAT_NUMBER:
4584 case CTPO_CAT_RANGE:
4585 case CTPO_CAT_MISSING:
4586 case CTPO_CAT_OTHERNM:
4587 case CTPO_CAT_SUBTOTAL:
4588 case CTPO_CAT_TOTAL:
4592 msg_location_destroy (e->location);
4597 static struct ctables_pcexpr *
4598 ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
4599 struct ctables_pcexpr *sub0,
4600 struct ctables_pcexpr *sub1)
4602 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4603 *e = (struct ctables_pcexpr) {
4605 .subs = { sub0, sub1 },
4606 .location = msg_location_merged (sub0->location, sub1->location),
4611 /* How to parse an operator. */
4614 enum token_type token;
4615 enum ctables_postcompute_op op;
4618 static const struct operator *
4619 match_operator (struct lexer *lexer, const struct operator ops[], size_t n_ops)
4621 for (const struct operator *op = ops; op < ops + n_ops; op++)
4622 if (lex_token (lexer) == op->token)
4624 if (op->token != T_NEG_NUM)
4633 static struct ctables_pcexpr *
4634 parse_binary_operators__ (struct lexer *lexer, struct dictionary *dict,
4635 const struct operator ops[], size_t n_ops,
4636 parse_recursively_func *parse_next_level,
4637 const char *chain_warning,
4638 struct ctables_pcexpr *lhs)
4640 for (int op_count = 0; ; op_count++)
4642 const struct operator *op = match_operator (lexer, ops, n_ops);
4645 if (op_count > 1 && chain_warning)
4646 msg_at (SW, lhs->location, "%s", chain_warning);
4651 struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
4654 ctables_pcexpr_destroy (lhs);
4658 lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
4662 static struct ctables_pcexpr *
4663 parse_binary_operators (struct lexer *lexer, struct dictionary *dict,
4664 const struct operator ops[], size_t n_ops,
4665 parse_recursively_func *parse_next_level,
4666 const char *chain_warning)
4668 struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
4672 return parse_binary_operators__ (lexer, dict, ops, n_ops, parse_next_level,
4673 chain_warning, lhs);
4676 static struct ctables_pcexpr *parse_add (struct lexer *, struct dictionary *);
4678 static struct ctables_pcexpr
4679 ctpo_cat_range (double low, double high)
4681 return (struct ctables_pcexpr) {
4682 .op = CTPO_CAT_RANGE,
4683 .range = { low, high },
4687 static struct ctables_pcexpr *
4688 parse_primary (struct lexer *lexer, struct dictionary *dict)
4690 int start_ofs = lex_ofs (lexer);
4691 struct ctables_pcexpr e;
4692 if (lex_is_number (lexer))
4694 e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
4695 .number = lex_number (lexer) };
4698 else if (lex_match_id (lexer, "MISSING"))
4699 e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
4700 else if (lex_match_id (lexer, "OTHERNM"))
4701 e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
4702 else if (lex_match_id (lexer, "TOTAL"))
4703 e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
4704 else if (lex_match_id (lexer, "SUBTOTAL"))
4706 size_t subtotal_index = 0;
4707 if (lex_match (lexer, T_LBRACK))
4709 if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
4711 subtotal_index = lex_integer (lexer);
4713 if (!lex_force_match (lexer, T_RBRACK))
4716 e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
4717 .subtotal_index = subtotal_index };
4719 else if (lex_match (lexer, T_LBRACK))
4721 if (lex_match_id (lexer, "LO"))
4723 if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
4725 e = ctpo_cat_range (-DBL_MAX, lex_number (lexer));
4728 else if (lex_is_number (lexer))
4730 double number = lex_number (lexer);
4732 if (lex_match_id (lexer, "THRU"))
4734 if (lex_match_id (lexer, "HI"))
4735 e = ctpo_cat_range (number, DBL_MAX);
4738 if (!lex_force_num (lexer))
4740 e = ctpo_cat_range (number, lex_number (lexer));
4745 e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
4748 else if (lex_is_string (lexer))
4750 struct substring s = recode_substring_pool (
4751 dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
4752 ss_rtrim (&s, ss_cstr (" "));
4754 e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
4759 lex_error (lexer, NULL);
4763 if (!lex_force_match (lexer, T_RBRACK))
4765 if (e.op == CTPO_CAT_STRING)
4766 ss_dealloc (&e.string);
4770 else if (lex_match (lexer, T_LPAREN))
4772 struct ctables_pcexpr *ep = parse_add (lexer, dict);
4775 if (!lex_force_match (lexer, T_RPAREN))
4777 ctables_pcexpr_destroy (ep);
4784 lex_error (lexer, NULL);
4788 e.location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
4789 return xmemdup (&e, sizeof e);
4792 static struct ctables_pcexpr *
4793 ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
4794 struct lexer *lexer, int start_ofs)
4796 struct ctables_pcexpr *e = xmalloc (sizeof *e);
4797 *e = (struct ctables_pcexpr) {
4800 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1),
4805 static struct ctables_pcexpr *
4806 parse_exp (struct lexer *lexer, struct dictionary *dict)
4808 static const struct operator op = { T_EXP, CTPO_POW };
4810 const char *chain_warning =
4811 _("The exponentiation operator (`**') is left-associative: "
4812 "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
4813 "To disable this warning, insert parentheses.");
4815 if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
4816 return parse_binary_operators (lexer, dict, &op, 1,
4817 parse_primary, chain_warning);
4819 /* Special case for situations like "-5**6", which must be parsed as
4822 int start_ofs = lex_ofs (lexer);
4823 struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
4824 *lhs = (struct ctables_pcexpr) {
4825 .op = CTPO_CONSTANT,
4826 .number = -lex_tokval (lexer),
4827 .location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer)),
4831 struct ctables_pcexpr *node = parse_binary_operators__ (
4832 lexer, dict, &op, 1, parse_primary, chain_warning, lhs);
4836 return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
4839 /* Parses the unary minus level. */
4840 static struct ctables_pcexpr *
4841 parse_neg (struct lexer *lexer, struct dictionary *dict)
4843 int start_ofs = lex_ofs (lexer);
4844 if (!lex_match (lexer, T_DASH))
4845 return parse_exp (lexer, dict);
4847 struct ctables_pcexpr *inner = parse_neg (lexer, dict);
4851 return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
4854 /* Parses the multiplication and division level. */
4855 static struct ctables_pcexpr *
4856 parse_mul (struct lexer *lexer, struct dictionary *dict)
4858 static const struct operator ops[] =
4860 { T_ASTERISK, CTPO_MUL },
4861 { T_SLASH, CTPO_DIV },
4864 return parse_binary_operators (lexer, dict, ops, sizeof ops / sizeof *ops,
4868 /* Parses the addition and subtraction level. */
4869 static struct ctables_pcexpr *
4870 parse_add (struct lexer *lexer, struct dictionary *dict)
4872 static const struct operator ops[] =
4874 { T_PLUS, CTPO_ADD },
4875 { T_DASH, CTPO_SUB },
4876 { T_NEG_NUM, CTPO_ADD },
4879 return parse_binary_operators (lexer, dict, ops, sizeof ops / sizeof *ops,
4883 static struct ctables_postcompute *
4884 ctables_find_postcompute (struct ctables *ct, const char *name)
4886 struct ctables_postcompute *pc;
4887 HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
4888 utf8_hash_case_string (name, 0), &ct->postcomputes)
4889 if (!utf8_strcasecmp (pc->name, name))
4895 ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
4898 int pcompute_start = lex_ofs (lexer) - 1;
4900 if (!lex_force_match (lexer, T_AND) || !lex_force_id (lexer))
4903 char *name = ss_xstrdup (lex_tokss (lexer));
4906 if (!lex_force_match (lexer, T_EQUALS)
4907 || !lex_force_match_id (lexer, "EXPR")
4908 || !lex_force_match (lexer, T_LPAREN))
4914 int expr_start = lex_ofs (lexer);
4915 struct ctables_pcexpr *expr = parse_add (lexer, dict);
4916 int expr_end = lex_ofs (lexer) - 1;
4917 if (!expr || !lex_force_match (lexer, T_RPAREN))
4922 int pcompute_end = lex_ofs (lexer) - 1;
4924 struct msg_location *location = lex_ofs_location (lexer, pcompute_start,
4927 struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
4930 msg_at (SW, location, _("New definition of &%s will override the "
4931 "previous definition."),
4933 msg_at (SN, pc->location, _("This is the previous definition."));
4935 ctables_pcexpr_destroy (pc->expr);
4936 msg_location_destroy (pc->location);
4941 pc = xmalloc (sizeof *pc);
4942 *pc = (struct ctables_postcompute) { .name = name };
4943 hmap_insert (&ct->postcomputes, &pc->hmap_node,
4944 utf8_hash_case_string (pc->name, 0));
4947 pc->location = location;
4949 pc->label = lex_ofs_representation (lexer, expr_start, expr_end);
4954 ctables_parse_pproperties_format (struct lexer *lexer,
4955 struct ctables_summary_spec_set *sss)
4957 *sss = (struct ctables_summary_spec_set) { .n = 0 };
4959 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
4960 && !(lex_token (lexer) == T_ID
4961 && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
4962 || lex_id_match (ss_cstr ("HIDESOURCECATS"),
4963 lex_tokss (lexer)))))
4965 /* Parse function. */
4966 enum ctables_summary_function function;
4967 if (!parse_ctables_summary_function (lexer, &function))
4970 /* Parse percentile. */
4971 double percentile = 0;
4972 if (function == CTSF_PTILE)
4974 if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
4976 percentile = lex_number (lexer);
4981 struct fmt_spec format;
4982 if (!parse_format_specifier (lexer, &format)
4983 || !fmt_check_output (&format)
4984 || !fmt_check_type_compat (&format, VAL_NUMERIC))
4987 if (sss->n >= sss->allocated)
4988 sss->specs = x2nrealloc (sss->specs, &sss->allocated,
4989 sizeof *sss->specs);
4990 sss->specs[sss->n++] = (struct ctables_summary_spec) {
4991 .function = function,
4992 .percentile = percentile,
4999 ctables_summary_spec_set_uninit (sss);
5004 ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
5006 struct ctables_postcompute **pcs = NULL;
5008 size_t allocated_pcs = 0;
5010 while (lex_match (lexer, T_AND))
5012 if (!lex_force_id (lexer))
5014 struct ctables_postcompute *pc
5015 = ctables_find_postcompute (ct, lex_tokcstr (lexer));
5018 msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
5023 if (n_pcs >= allocated_pcs)
5024 pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
5028 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5030 if (lex_match_id (lexer, "LABEL"))
5032 lex_match (lexer, T_EQUALS);
5033 if (!lex_force_string (lexer))
5036 for (size_t i = 0; i < n_pcs; i++)
5038 free (pcs[i]->label);
5039 pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
5044 else if (lex_match_id (lexer, "FORMAT"))
5046 lex_match (lexer, T_EQUALS);
5048 struct ctables_summary_spec_set sss;
5049 if (!ctables_parse_pproperties_format (lexer, &sss))
5052 for (size_t i = 0; i < n_pcs; i++)
5055 ctables_summary_spec_set_uninit (pcs[i]->specs);
5057 pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
5058 ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
5060 ctables_summary_spec_set_uninit (&sss);
5062 else if (lex_match_id (lexer, "HIDESOURCECATS"))
5064 lex_match (lexer, T_EQUALS);
5065 bool hide_source_cats;
5066 if (!parse_bool (lexer, &hide_source_cats))
5068 for (size_t i = 0; i < n_pcs; i++)
5069 pcs[i]->hide_source_cats = hide_source_cats;
5073 lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
5086 cmd_ctables (struct lexer *lexer, struct dataset *ds)
5088 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5089 enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
5090 enum settings_value_show tvars = settings_get_show_variables ();
5091 for (size_t i = 0; i < n_vars; i++)
5092 vlabels[i] = (enum ctables_vlabel) tvars;
5094 struct pivot_table_look *look = pivot_table_look_unshare (
5095 pivot_table_look_ref (pivot_table_look_get_default ()));
5096 look->omit_empty = false;
5098 struct ctables *ct = xmalloc (sizeof *ct);
5099 *ct = (struct ctables) {
5100 .dict = dataset_dict (ds),
5102 .ctables_formats = FMT_SETTINGS_INIT,
5104 .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
5110 const char *dot_string;
5111 const char *comma_string;
5113 static const struct ctf ctfs[4] = {
5114 { CTEF_NEGPAREN, "(,,,)", "(...)" },
5115 { CTEF_NEQUAL, "-,N=,,", "-.N=.." },
5116 { CTEF_PAREN, "-,(,),", "-.(.)." },
5117 { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." },
5119 bool is_dot = settings_get_fmt_settings ()->decimal == '.';
5120 for (size_t i = 0; i < 4; i++)
5122 const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string;
5123 fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type,
5124 fmt_number_style_from_string (s));
5127 if (!lex_force_match (lexer, T_SLASH))
5130 while (!lex_match_id (lexer, "TABLE"))
5132 if (lex_match_id (lexer, "FORMAT"))
5134 double widths[2] = { SYSMIS, SYSMIS };
5135 double units_per_inch = 72.0;
5137 while (lex_token (lexer) != T_SLASH)
5139 if (lex_match_id (lexer, "MINCOLWIDTH"))
5141 if (!parse_col_width (lexer, "MINCOLWIDTH", &widths[0]))
5144 else if (lex_match_id (lexer, "MAXCOLWIDTH"))
5146 if (!parse_col_width (lexer, "MAXCOLWIDTH", &widths[1]))
5149 else if (lex_match_id (lexer, "UNITS"))
5151 lex_match (lexer, T_EQUALS);
5152 if (lex_match_id (lexer, "POINTS"))
5153 units_per_inch = 72.0;
5154 else if (lex_match_id (lexer, "INCHES"))
5155 units_per_inch = 1.0;
5156 else if (lex_match_id (lexer, "CM"))
5157 units_per_inch = 2.54;
5160 lex_error_expecting (lexer, "POINTS", "INCHES", "CM");
5164 else if (lex_match_id (lexer, "EMPTY"))
5169 lex_match (lexer, T_EQUALS);
5170 if (lex_match_id (lexer, "ZERO"))
5172 /* Nothing to do. */
5174 else if (lex_match_id (lexer, "BLANK"))
5175 ct->zero = xstrdup ("");
5176 else if (lex_force_string (lexer))
5178 ct->zero = ss_xstrdup (lex_tokss (lexer));
5184 else if (lex_match_id (lexer, "MISSING"))
5186 lex_match (lexer, T_EQUALS);
5187 if (!lex_force_string (lexer))
5191 ct->missing = (strcmp (lex_tokcstr (lexer), ".")
5192 ? ss_xstrdup (lex_tokss (lexer))
5198 lex_error_expecting (lexer, "MINCOLWIDTH", "MAXCOLWIDTH",
5199 "UNITS", "EMPTY", "MISSING");
5204 if (widths[0] != SYSMIS && widths[1] != SYSMIS
5205 && widths[0] > widths[1])
5207 msg (SE, _("MINCOLWIDTH must not be greater than MAXCOLWIDTH."));
5211 for (size_t i = 0; i < 2; i++)
5212 if (widths[i] != SYSMIS)
5214 int *wr = ct->look->width_ranges[TABLE_HORZ];
5215 wr[i] = widths[i] / units_per_inch * 96.0;
5220 else if (lex_match_id (lexer, "VLABELS"))
5222 if (!lex_force_match_id (lexer, "VARIABLES"))
5224 lex_match (lexer, T_EQUALS);
5226 struct variable **vars;
5228 if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars,
5232 if (!lex_force_match_id (lexer, "DISPLAY"))
5237 lex_match (lexer, T_EQUALS);
5239 enum ctables_vlabel vlabel;
5240 if (lex_match_id (lexer, "DEFAULT"))
5241 vlabel = (enum ctables_vlabel) settings_get_show_variables ();
5242 else if (lex_match_id (lexer, "NAME"))
5244 else if (lex_match_id (lexer, "LABEL"))
5245 vlabel = CTVL_LABEL;
5246 else if (lex_match_id (lexer, "BOTH"))
5248 else if (lex_match_id (lexer, "NONE"))
5252 lex_error_expecting (lexer, "DEFAULT", "NAME", "LABEL",
5258 for (size_t i = 0; i < n_vars; i++)
5259 ct->vlabels[var_get_dict_index (vars[i])] = vlabel;
5262 else if (lex_match_id (lexer, "MRSETS"))
5264 if (!lex_force_match_id (lexer, "COUNTDUPLICATES"))
5266 lex_match (lexer, T_EQUALS);
5267 if (!parse_bool (lexer, &ct->mrsets_count_duplicates))
5270 else if (lex_match_id (lexer, "SMISSING"))
5272 if (lex_match_id (lexer, "VARIABLE"))
5273 ct->smissing_listwise = false;
5274 else if (lex_match_id (lexer, "LISTWISE"))
5275 ct->smissing_listwise = true;
5278 lex_error_expecting (lexer, "VARIABLE", "LISTWISE");
5282 else if (lex_match_id (lexer, "PCOMPUTE"))
5284 if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
5287 else if (lex_match_id (lexer, "PPROPERTIES"))
5289 if (!ctables_parse_pproperties (lexer, ct))
5292 else if (lex_match_id (lexer, "WEIGHT"))
5294 if (!lex_force_match_id (lexer, "VARIABLE"))
5296 lex_match (lexer, T_EQUALS);
5297 ct->e_weight = parse_variable (lexer, dataset_dict (ds));
5301 else if (lex_match_id (lexer, " HIDESMALLCOUNTS"))
5303 if (lex_match_id (lexer, "COUNT"))
5305 lex_match (lexer, T_EQUALS);
5306 if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
5309 ct->hide_threshold = lex_integer (lexer);
5312 else if (ct->hide_threshold == 0)
5313 ct->hide_threshold = 5;
5317 lex_error_expecting (lexer, "FORMAT", "VLABELS", "MRSETS",
5318 "SMISSING", "PCOMPUTE", "PPROPERTIES",
5319 "WEIGHT", "HIDESMALLCOUNTS", "TABLE");
5323 if (!lex_force_match (lexer, T_SLASH))
5327 size_t allocated_tables = 0;
5330 if (ct->n_tables >= allocated_tables)
5331 ct->tables = x2nrealloc (ct->tables, &allocated_tables,
5332 sizeof *ct->tables);
5334 struct ctables_category *cat = xmalloc (sizeof *cat);
5335 *cat = (struct ctables_category) {
5337 .include_missing = false,
5338 .sort_ascending = true,
5341 struct ctables_categories *c = xmalloc (sizeof *c);
5342 size_t n_vars = dict_get_n_vars (dataset_dict (ds));
5343 *c = (struct ctables_categories) {
5350 struct ctables_categories **categories = xnmalloc (n_vars,
5351 sizeof *categories);
5352 for (size_t i = 0; i < n_vars; i++)
5355 struct ctables_table *t = xmalloc (sizeof *t);
5356 *t = (struct ctables_table) {
5358 .slabels_axis = PIVOT_AXIS_COLUMN,
5359 .slabels_visible = true,
5360 .clabels_values_map = HMAP_INITIALIZER (t->clabels_values_map),
5362 [PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW,
5363 [PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN,
5364 [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER,
5366 .clabels_from_axis = PIVOT_AXIS_LAYER,
5367 .categories = categories,
5368 .n_categories = n_vars,
5371 ct->tables[ct->n_tables++] = t;
5373 lex_match (lexer, T_EQUALS);
5374 if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
5376 if (lex_match (lexer, T_BY))
5378 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5379 ct, t, PIVOT_AXIS_COLUMN))
5382 if (lex_match (lexer, T_BY))
5384 if (!ctables_axis_parse (lexer, dataset_dict (ds),
5385 ct, t, PIVOT_AXIS_LAYER))
5390 if (!t->axes[PIVOT_AXIS_ROW] && !t->axes[PIVOT_AXIS_COLUMN]
5391 && !t->axes[PIVOT_AXIS_LAYER])
5393 lex_error (lexer, _("At least one variable must be specified."));
5397 const struct ctables_axis *scales[PIVOT_N_AXES];
5398 size_t n_scales = 0;
5399 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5401 scales[a] = find_scale (t->axes[a]);
5407 msg (SE, _("Scale variables may appear only on one axis."));
5408 if (scales[PIVOT_AXIS_ROW])
5409 msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
5410 _("This scale variable appears on the rows axis."));
5411 if (scales[PIVOT_AXIS_COLUMN])
5412 msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
5413 _("This scale variable appears on the columns axis."));
5414 if (scales[PIVOT_AXIS_LAYER])
5415 msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
5416 _("This scale variable appears on the layer axis."));
5420 const struct ctables_axis *summaries[PIVOT_N_AXES];
5421 size_t n_summaries = 0;
5422 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5424 summaries[a] = (scales[a]
5426 : find_categorical_summary_spec (t->axes[a]));
5430 if (n_summaries > 1)
5432 msg (SE, _("Summaries may appear only on one axis."));
5433 if (summaries[PIVOT_AXIS_ROW])
5434 msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
5435 _("This variable on the rows axis has a summary."));
5436 if (summaries[PIVOT_AXIS_COLUMN])
5437 msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
5438 _("This variable on the columns axis has a summary."));
5439 if (summaries[PIVOT_AXIS_LAYER])
5440 msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
5441 _("This variable on the layers axis has a summary."));
5444 for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
5445 if (n_summaries ? summaries[a] : t->axes[a])
5447 t->summary_axis = a;
5451 if (lex_token (lexer) == T_ENDCMD)
5453 if (!ctables_prepare_table (t))
5457 if (!lex_force_match (lexer, T_SLASH))
5460 while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
5462 if (lex_match_id (lexer, "SLABELS"))
5464 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5466 if (lex_match_id (lexer, "POSITION"))
5468 lex_match (lexer, T_EQUALS);
5469 if (lex_match_id (lexer, "COLUMN"))
5470 t->slabels_axis = PIVOT_AXIS_COLUMN;
5471 else if (lex_match_id (lexer, "ROW"))
5472 t->slabels_axis = PIVOT_AXIS_ROW;
5473 else if (lex_match_id (lexer, "LAYER"))
5474 t->slabels_axis = PIVOT_AXIS_LAYER;
5477 lex_error_expecting (lexer, "COLUMN", "ROW", "LAYER");
5481 else if (lex_match_id (lexer, "VISIBLE"))
5483 lex_match (lexer, T_EQUALS);
5484 if (!parse_bool (lexer, &t->slabels_visible))
5489 lex_error_expecting (lexer, "POSITION", "VISIBLE");
5494 else if (lex_match_id (lexer, "CLABELS"))
5496 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
5498 if (lex_match_id (lexer, "AUTO"))
5500 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_ROW;
5501 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_COLUMN;
5503 else if (lex_match_id (lexer, "ROWLABELS"))
5505 lex_match (lexer, T_EQUALS);
5506 if (lex_match_id (lexer, "OPPOSITE"))
5507 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_COLUMN;
5508 else if (lex_match_id (lexer, "LAYER"))
5509 t->label_axis[PIVOT_AXIS_ROW] = PIVOT_AXIS_LAYER;
5512 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5516 else if (lex_match_id (lexer, "COLLABELS"))
5518 lex_match (lexer, T_EQUALS);
5519 if (lex_match_id (lexer, "OPPOSITE"))
5520 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_ROW;
5521 else if (lex_match_id (lexer, "LAYER"))
5522 t->label_axis[PIVOT_AXIS_COLUMN] = PIVOT_AXIS_LAYER;
5525 lex_error_expecting (lexer, "OPPOSITE", "LAYER");
5531 lex_error_expecting (lexer, "AUTO", "ROWLABELS",
5537 else if (lex_match_id (lexer, "CRITERIA"))
5539 if (!lex_force_match_id (lexer, "CILEVEL"))
5541 lex_match (lexer, T_EQUALS);
5543 if (!lex_force_num_range_halfopen (lexer, "CILEVEL", 0, 100))
5545 t->cilevel = lex_number (lexer);
5548 else if (lex_match_id (lexer, "CATEGORIES"))
5550 if (!ctables_table_parse_categories (lexer, dataset_dict (ds),
5554 else if (lex_match_id (lexer, "TITLES"))
5559 if (lex_match_id (lexer, "CAPTION"))
5560 textp = &t->caption;
5561 else if (lex_match_id (lexer, "CORNER"))
5563 else if (lex_match_id (lexer, "TITLE"))
5567 lex_error_expecting (lexer, "CAPTION", "CORNER", "TITLE");
5570 lex_match (lexer, T_EQUALS);
5572 struct string s = DS_EMPTY_INITIALIZER;
5573 while (lex_is_string (lexer))
5575 if (!ds_is_empty (&s))
5576 ds_put_byte (&s, ' ');
5577 ds_put_substring (&s, lex_tokss (lexer));
5581 *textp = ds_steal_cstr (&s);
5583 while (lex_token (lexer) != T_SLASH
5584 && lex_token (lexer) != T_ENDCMD);
5586 else if (lex_match_id (lexer, "SIGTEST"))
5590 t->chisq = xmalloc (sizeof *t->chisq);
5591 *t->chisq = (struct ctables_chisq) {
5593 .include_mrsets = true,
5594 .all_visible = true,
5600 if (lex_match_id (lexer, "TYPE"))
5602 lex_match (lexer, T_EQUALS);
5603 if (!lex_force_match_id (lexer, "CHISQUARE"))
5606 else if (lex_match_id (lexer, "ALPHA"))
5608 lex_match (lexer, T_EQUALS);
5609 if (!lex_force_num_range_halfopen (lexer, "ALPHA", 0, 1))
5611 t->chisq->alpha = lex_number (lexer);
5614 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5616 lex_match (lexer, T_EQUALS);
5617 if (parse_bool (lexer, &t->chisq->include_mrsets))
5620 else if (lex_match_id (lexer, "CATEGORIES"))
5622 lex_match (lexer, T_EQUALS);
5623 if (lex_match_id (lexer, "ALLVISIBLE"))
5624 t->chisq->all_visible = true;
5625 else if (lex_match_id (lexer, "SUBTOTALS"))
5626 t->chisq->all_visible = false;
5629 lex_error_expecting (lexer,
5630 "ALLVISIBLE", "SUBTOTALS");
5636 lex_error_expecting (lexer, "TYPE", "ALPHA",
5637 "INCLUDEMRSETS", "CATEGORIES");
5641 while (lex_token (lexer) != T_SLASH
5642 && lex_token (lexer) != T_ENDCMD);
5644 else if (lex_match_id (lexer, "COMPARETEST"))
5648 t->pairwise = xmalloc (sizeof *t->pairwise);
5649 *t->pairwise = (struct ctables_pairwise) {
5651 .alpha = { .05, .05 },
5652 .adjust = BONFERRONI,
5653 .include_mrsets = true,
5654 .meansvariance_allcats = true,
5655 .all_visible = true,
5664 if (lex_match_id (lexer, "TYPE"))
5666 lex_match (lexer, T_EQUALS);
5667 if (lex_match_id (lexer, "PROP"))
5668 t->pairwise->type = PROP;
5669 else if (lex_match_id (lexer, "MEAN"))
5670 t->pairwise->type = MEAN;
5673 lex_error_expecting (lexer, "PROP", "MEAN");
5677 else if (lex_match_id (lexer, "ALPHA"))
5679 lex_match (lexer, T_EQUALS);
5681 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5683 double a0 = lex_number (lexer);
5686 lex_match (lexer, T_COMMA);
5687 if (lex_is_number (lexer))
5689 if (!lex_force_num_range_open (lexer, "ALPHA", 0, 1))
5691 double a1 = lex_number (lexer);
5694 t->pairwise->alpha[0] = MIN (a0, a1);
5695 t->pairwise->alpha[1] = MAX (a0, a1);
5698 t->pairwise->alpha[0] = t->pairwise->alpha[1] = a0;
5700 else if (lex_match_id (lexer, "ADJUST"))
5702 lex_match (lexer, T_EQUALS);
5703 if (lex_match_id (lexer, "BONFERRONI"))
5704 t->pairwise->adjust = BONFERRONI;
5705 else if (lex_match_id (lexer, "BH"))
5706 t->pairwise->adjust = BH;
5707 else if (lex_match_id (lexer, "NONE"))
5708 t->pairwise->adjust = 0;
5711 lex_error_expecting (lexer, "BONFERRONI", "BH",
5716 else if (lex_match_id (lexer, "INCLUDEMRSETS"))
5718 lex_match (lexer, T_EQUALS);
5719 if (!parse_bool (lexer, &t->pairwise->include_mrsets))
5722 else if (lex_match_id (lexer, "MEANSVARIANCE"))
5724 lex_match (lexer, T_EQUALS);
5725 if (lex_match_id (lexer, "ALLCATS"))
5726 t->pairwise->meansvariance_allcats = true;
5727 else if (lex_match_id (lexer, "TESTEDCATS"))
5728 t->pairwise->meansvariance_allcats = false;
5731 lex_error_expecting (lexer, "ALLCATS", "TESTEDCATS");
5735 else if (lex_match_id (lexer, "CATEGORIES"))
5737 lex_match (lexer, T_EQUALS);
5738 if (lex_match_id (lexer, "ALLVISIBLE"))
5739 t->pairwise->all_visible = true;
5740 else if (lex_match_id (lexer, "SUBTOTALS"))
5741 t->pairwise->all_visible = false;
5744 lex_error_expecting (lexer, "ALLVISIBLE",
5749 else if (lex_match_id (lexer, "MERGE"))
5751 lex_match (lexer, T_EQUALS);
5752 if (!parse_bool (lexer, &t->pairwise->merge))
5755 else if (lex_match_id (lexer, "STYLE"))
5757 lex_match (lexer, T_EQUALS);
5758 if (lex_match_id (lexer, "APA"))
5759 t->pairwise->apa_style = true;
5760 else if (lex_match_id (lexer, "SIMPLE"))
5761 t->pairwise->apa_style = false;
5764 lex_error_expecting (lexer, "APA", "SIMPLE");
5768 else if (lex_match_id (lexer, "SHOWSIG"))
5770 lex_match (lexer, T_EQUALS);
5771 if (!parse_bool (lexer, &t->pairwise->show_sig))
5776 lex_error_expecting (lexer, "TYPE", "ALPHA", "ADJUST",
5777 "INCLUDEMRSETS", "MEANSVARIANCE",
5778 "CATEGORIES", "MERGE", "STYLE",
5783 while (lex_token (lexer) != T_SLASH
5784 && lex_token (lexer) != T_ENDCMD);
5788 lex_error_expecting (lexer, "TABLE", "SLABELS", "CLABELS",
5789 "CRITERIA", "CATEGORIES", "TITLES",
5790 "SIGTEST", "COMPARETEST");
5794 if (!lex_match (lexer, T_SLASH))
5798 if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW
5799 && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN)
5801 msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
5805 if (!ctables_prepare_table (t))
5808 while (lex_token (lexer) != T_ENDCMD);
5810 bool ok = ctables_execute (ds, ct);
5811 ctables_destroy (ct);
5812 return ok ? CMD_SUCCESS : CMD_FAILURE;
5815 ctables_destroy (ct);